[llvm] [X86] mtune should be generic (PR #142297)

Sat May 31 16:13:21 PDT 2025

llvmbot wrote:




@llvm/pr-subscribers-backend-x86

Author: AZero13 (AZero13)

<details>
<summary>Changes</summary>

Yes, a lot of tests were updated, but we cannot stop hiding bugs this way.

---

Patch is 33.74 MiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/142297.diff


794 Files Affected:

- (modified) llvm/lib/Target/X86/X86Subtarget.cpp (+1-1) 
- (modified) llvm/test/CodeGen/X86/2007-03-15-GEP-Idx-Sink.ll (+26-23) 
- (modified) llvm/test/CodeGen/X86/2008-12-23-crazy-address.ll (+2-1) 
- (modified) llvm/test/CodeGen/X86/2012-01-10-UndefExceptionEdge.ll (+17-28) 
- (modified) llvm/test/CodeGen/X86/2012-12-1-merge-multiple.ll (+2-4) 
- (modified) llvm/test/CodeGen/X86/64-bit-shift-by-32-minus-y.ll (+16-16) 
- (modified) llvm/test/CodeGen/X86/AMX/amx-greedy-ra-spill-shape.ll (+1-1) 
- (modified) llvm/test/CodeGen/X86/GlobalISel/add-scalar.ll (+3-3) 
- (modified) llvm/test/CodeGen/X86/GlobalISel/sub-scalar.ll (+3-3) 
- (modified) llvm/test/CodeGen/X86/MergeConsecutiveStores.ll (+140-86) 
- (modified) llvm/test/CodeGen/X86/PR71178-register-coalescer-crash.ll (+1-1) 
- (modified) llvm/test/CodeGen/X86/abds-neg.ll (+60-59) 
- (modified) llvm/test/CodeGen/X86/abds.ll (+79-79) 
- (modified) llvm/test/CodeGen/X86/abdu-neg.ll (+85-84) 
- (modified) llvm/test/CodeGen/X86/abdu-vector-128.ll (+5-5) 
- (modified) llvm/test/CodeGen/X86/abdu.ll (+100-100) 
- (modified) llvm/test/CodeGen/X86/abi-isel.ll (+448-224) 
- (modified) llvm/test/CodeGen/X86/abs.ll (+1-1) 
- (modified) llvm/test/CodeGen/X86/add-cmov.ll (+6-3) 
- (modified) llvm/test/CodeGen/X86/add-ext.ll (+14-7) 
- (modified) llvm/test/CodeGen/X86/add-of-carry.ll (+2-2) 
- (modified) llvm/test/CodeGen/X86/add-sub-bool.ll (+21-21) 
- (modified) llvm/test/CodeGen/X86/add.ll (+8-8) 
- (modified) llvm/test/CodeGen/X86/add_shl_constant.ll (+27-20) 
- (modified) llvm/test/CodeGen/X86/addcarry.ll (+18-18) 
- (modified) llvm/test/CodeGen/X86/addr-mode-matcher-2.ll (+4-2) 
- (modified) llvm/test/CodeGen/X86/and-sink.ll (+7-7) 
- (modified) llvm/test/CodeGen/X86/andnot-patterns.ll (+5-5) 
- (modified) llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast.ll (+471-472) 
- (modified) llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast_from_memory.ll (+189-187) 
- (modified) llvm/test/CodeGen/X86/apx/check-nf-in-suppress-reloc-pass.ll (+3-3) 
- (modified) llvm/test/CodeGen/X86/apx/cmov.ll (+6-6) 
- (modified) llvm/test/CodeGen/X86/apx/flags-copy-lowering.ll (+1-1) 
- (modified) llvm/test/CodeGen/X86/apx/mul-i1024.ll (+1120-1123) 
- (modified) llvm/test/CodeGen/X86/arithmetic_fence2.ll (+14-14) 
- (modified) llvm/test/CodeGen/X86/atomic-bit-test.ll (+1-1) 
- (modified) llvm/test/CodeGen/X86/atomic-eflags-reuse.ll (+8-8) 
- (modified) llvm/test/CodeGen/X86/atomic-fp.ll (+12-12) 
- (modified) llvm/test/CodeGen/X86/atomic-mi.ll (+2-2) 
- (modified) llvm/test/CodeGen/X86/atomic-rm-bit-test.ll (+52-52) 
- (modified) llvm/test/CodeGen/X86/avg.ll (+49-48) 
- (modified) llvm/test/CodeGen/X86/avgceils-scalar.ll (+20-10) 
- (modified) llvm/test/CodeGen/X86/avgceils.ll (+6-6) 
- (modified) llvm/test/CodeGen/X86/avgceilu-scalar.ll (+20-10) 
- (modified) llvm/test/CodeGen/X86/avgfloors-scalar.ll (+2-2) 
- (modified) llvm/test/CodeGen/X86/avgfloors.ll (+2-2) 
- (modified) llvm/test/CodeGen/X86/avgflooru-i128.ll (+11-11) 
- (modified) llvm/test/CodeGen/X86/avgflooru-scalar.ll (+4-4) 
- (modified) llvm/test/CodeGen/X86/avoid-lea-scale2.ll (+14-3) 
- (modified) llvm/test/CodeGen/X86/avx-basic.ll (+1-1) 
- (modified) llvm/test/CodeGen/X86/avx-cvt-3.ll (+4-4) 
- (modified) llvm/test/CodeGen/X86/avx-intrinsics-fast-isel.ll (+28-28) 
- (modified) llvm/test/CodeGen/X86/avx-intrinsics-x86_64.ll (+22-14) 
- (modified) llvm/test/CodeGen/X86/avx-logic.ll (+8-8) 
- (modified) llvm/test/CodeGen/X86/avx-select.ll (+2-2) 
- (modified) llvm/test/CodeGen/X86/avx-splat.ll (+2-2) 
- (modified) llvm/test/CodeGen/X86/avx-vbroadcast.ll (+2-6) 
- (modified) llvm/test/CodeGen/X86/avx-vperm2x128.ll (+2-2) 
- (modified) llvm/test/CodeGen/X86/avx2-arith.ll (+1-1) 
- (modified) llvm/test/CodeGen/X86/avx2-conversions.ll (+2-4) 
- (modified) llvm/test/CodeGen/X86/avx2-intrinsics-x86.ll (+182-190) 
- (modified) llvm/test/CodeGen/X86/avx2-nontemporal.ll (+9-9) 
- (modified) llvm/test/CodeGen/X86/avx2-vbroadcast.ll (+18-8) 
- (modified) llvm/test/CodeGen/X86/avx2-vector-shifts.ll (+18-18) 
- (modified) llvm/test/CodeGen/X86/avx512-arith.ll (+3-3) 
- (modified) llvm/test/CodeGen/X86/avx512-broadcast-unfold.ll (+601-481) 
- (modified) llvm/test/CodeGen/X86/avx512-cvt.ll (+183-183) 
- (modified) llvm/test/CodeGen/X86/avx512-extract-subvector-load-store.ll (+12-12) 
- (modified) llvm/test/CodeGen/X86/avx512-insert-extract.ll (+170-126) 
- (modified) llvm/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll (+20-20) 
- (modified) llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll (+180-180) 
- (modified) llvm/test/CodeGen/X86/avx512-intrinsics.ll (+28-30) 
- (modified) llvm/test/CodeGen/X86/avx512-mask-op.ll (+2-2) 
- (modified) llvm/test/CodeGen/X86/avx512-masked-memop-64-32.ll (+6-6) 
- (modified) llvm/test/CodeGen/X86/avx512-nontemporal.ll (+2-2) 
- (modified) llvm/test/CodeGen/X86/avx512-regcall-Mask.ll (+1-1) 
- (modified) llvm/test/CodeGen/X86/avx512-regcall-NoMask.ll (+14-16) 
- (modified) llvm/test/CodeGen/X86/avx512-select.ll (+2-2) 
- (modified) llvm/test/CodeGen/X86/avx512-shuffles/partial_permute.ll (+250-250) 
- (modified) llvm/test/CodeGen/X86/avx512-shuffles/permute.ll (+160-160) 
- (modified) llvm/test/CodeGen/X86/avx512-vec-cmp.ll (+8-8) 
- (modified) llvm/test/CodeGen/X86/avx512bf16-intrinsics-upgrade.ll (+2-2) 
- (modified) llvm/test/CodeGen/X86/avx512bw-intrinsics-fast-isel.ll (+7-7) 
- (modified) llvm/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll (+60-54) 
- (modified) llvm/test/CodeGen/X86/avx512bw-intrinsics.ll (+4-6) 
- (modified) llvm/test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll (+326-346) 
- (modified) llvm/test/CodeGen/X86/avx512fp16-frem.ll (+204-204) 
- (modified) llvm/test/CodeGen/X86/avx512vl-intrinsics-fast-isel.ll (+4-4) 
- (modified) llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll (+315-315) 
- (modified) llvm/test/CodeGen/X86/avx512vl-intrinsics.ll (+18-18) 
- (modified) llvm/test/CodeGen/X86/avx512vlvp2intersect-intrinsics.ll (+2-2) 
- (modified) llvm/test/CodeGen/X86/avx512vp2intersect-intrinsics.ll (+2-2) 
- (modified) llvm/test/CodeGen/X86/bfloat-calling-conv.ll (+7-7) 
- (modified) llvm/test/CodeGen/X86/bfloat.ll (+15-15) 
- (modified) llvm/test/CodeGen/X86/bitcast-and-setcc-512.ll (+23-23) 
- (modified) llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-sext.ll (+16-16) 
- (modified) llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-zext.ll (+24-24) 
- (modified) llvm/test/CodeGen/X86/bitcast-int-to-vector-bool.ll (+8-8) 
- (modified) llvm/test/CodeGen/X86/bitcast-vector-bool.ll (+1-1) 
- (modified) llvm/test/CodeGen/X86/bitreverse.ll (+68-68) 
- (modified) llvm/test/CodeGen/X86/bitselect.ll (+21-21) 
- (modified) llvm/test/CodeGen/X86/bmi2.ll (+6-6) 
- (modified) llvm/test/CodeGen/X86/bool-simplify.ll (+1-1) 
- (modified) llvm/test/CodeGen/X86/broadcast-elm-cross-splat-vec.ll (+40-48) 
- (modified) llvm/test/CodeGen/X86/bswap-wide-int.ll (+3-3) 
- (modified) llvm/test/CodeGen/X86/bswap.ll (+16-14) 
- (modified) llvm/test/CodeGen/X86/btc_bts_btr.ll (+59-59) 
- (modified) llvm/test/CodeGen/X86/build-vector-512.ll (+81-81) 
- (modified) llvm/test/CodeGen/X86/callbr-asm-blockplacement.ll (+6-3) 
- (modified) llvm/test/CodeGen/X86/canonicalize-vars.ll (+2-2) 
- (modified) llvm/test/CodeGen/X86/cast-vsel.ll (+26-26) 
- (modified) llvm/test/CodeGen/X86/clear-highbits.ll (+55-47) 
- (modified) llvm/test/CodeGen/X86/clear-lowbits.ll (+23-23) 
- (modified) llvm/test/CodeGen/X86/clobber_frame_ptr2.ll (+47-3) 
- (modified) llvm/test/CodeGen/X86/cmov-into-branch.ll (+2-2) 
- (modified) llvm/test/CodeGen/X86/cmov.ll (+2-1) 
- (modified) llvm/test/CodeGen/X86/cmovcmov.ll (+2-2) 
- (modified) llvm/test/CodeGen/X86/cmp-concat.ll (+8-8) 
- (modified) llvm/test/CodeGen/X86/cmp-shiftX-maskX.ll (+9-9) 
- (modified) llvm/test/CodeGen/X86/cmpf-avx.ll (+1-1) 
- (modified) llvm/test/CodeGen/X86/coalescer-breaks-subreg-to-reg-liveness-reduced.ll (+1-1) 
- (modified) llvm/test/CodeGen/X86/codegen-prepare-addrmode-tls.ll (+6-6) 
- (modified) llvm/test/CodeGen/X86/combine-add.ll (+5-5) 
- (modified) llvm/test/CodeGen/X86/combine-addo.ll (+1-1) 
- (modified) llvm/test/CodeGen/X86/combine-and.ll (+1-1) 
- (modified) llvm/test/CodeGen/X86/combine-bitselect.ll (+32-34) 
- (modified) llvm/test/CodeGen/X86/combine-concatvectors.ll (+1-1) 
- (modified) llvm/test/CodeGen/X86/combine-fcopysign.ll (+12-12) 
- (modified) llvm/test/CodeGen/X86/combine-fneg.ll (+2-2) 
- (modified) llvm/test/CodeGen/X86/combine-mul.ll (+4-4) 
- (modified) llvm/test/CodeGen/X86/combine-pavg.ll (+2-2) 
- (modified) llvm/test/CodeGen/X86/combine-pmadd.ll (+1-1) 
- (modified) llvm/test/CodeGen/X86/combine-pmuldq.ll (+41-31) 
- (modified) llvm/test/CodeGen/X86/combine-ptest-256.ll (+11-11) 
- (modified) llvm/test/CodeGen/X86/combine-ptest.ll (+20-20) 
- (modified) llvm/test/CodeGen/X86/combine-rotates.ll (+16-15) 
- (modified) llvm/test/CodeGen/X86/combine-sbb.ll (+28-32) 
- (modified) llvm/test/CodeGen/X86/combine-sdiv.ll (+64-59) 
- (modified) llvm/test/CodeGen/X86/combine-shl.ll (+15-16) 
- (modified) llvm/test/CodeGen/X86/combine-smax.ll (+1-1) 
- (modified) llvm/test/CodeGen/X86/combine-smin.ll (+1-1) 
- (modified) llvm/test/CodeGen/X86/combine-sra.ll (+43-43) 
- (modified) llvm/test/CodeGen/X86/combine-srem.ll (+4-4) 
- (modified) llvm/test/CodeGen/X86/combine-srl.ll (+47-47) 
- (modified) llvm/test/CodeGen/X86/combine-sse41-intrinsics.ll (+7-6) 
- (modified) llvm/test/CodeGen/X86/combine-sub-usat.ll (+10-30) 
- (modified) llvm/test/CodeGen/X86/combine-sub.ll (+2-2) 
- (modified) llvm/test/CodeGen/X86/combine-udiv.ll (+49-49) 
- (modified) llvm/test/CodeGen/X86/combine-undef-index-mscatter.ll (+5-5) 
- (modified) llvm/test/CodeGen/X86/combine-urem.ll (+5-5) 
- (modified) llvm/test/CodeGen/X86/comi-flags.ll (+112-56) 
- (modified) llvm/test/CodeGen/X86/concat-cast.ll (+21-21) 
- (modified) llvm/test/CodeGen/X86/concat-fpext-v2bf16.ll (+3-3) 
- (modified) llvm/test/CodeGen/X86/conditional-tailcall.ll (+73-81) 
- (modified) llvm/test/CodeGen/X86/copy-eflags.ll (+2-2) 
- (modified) llvm/test/CodeGen/X86/copy-low-subvec-elt-to-high-subvec-elt.ll (+4-8) 
- (modified) llvm/test/CodeGen/X86/critical-anti-dep-breaker.ll (+1-1) 
- (modified) llvm/test/CodeGen/X86/ctlz.ll (+5-5) 
- (modified) llvm/test/CodeGen/X86/dag-large-offset.ll (+2-1) 
- (modified) llvm/test/CodeGen/X86/dag-update-nodetomatch.ll (+61-58) 
- (modified) llvm/test/CodeGen/X86/dagcombine-cse.ll (+2-2) 
- (modified) llvm/test/CodeGen/X86/dagcombine-select.ll (+1-1) 
- (modified) llvm/test/CodeGen/X86/dagcombine-shifts.ll (+1-1) 
- (modified) llvm/test/CodeGen/X86/div-rem-pair-recomposition-signed.ll (+372-353) 
- (modified) llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll (+299-279) 
- (modified) llvm/test/CodeGen/X86/divide-by-constant.ll (+14-25) 
- (modified) llvm/test/CodeGen/X86/divmod128.ll (+18-18) 
- (modified) llvm/test/CodeGen/X86/divrem.ll (+26) 
- (modified) llvm/test/CodeGen/X86/dpbusd_i4.ll (+3-3) 
- (modified) llvm/test/CodeGen/X86/early-ifcvt.ll (+100-14) 
- (modified) llvm/test/CodeGen/X86/eq-or-eq-range-of-2.ll (+20-20) 
- (modified) llvm/test/CodeGen/X86/expand-vp-int-intrinsics.ll (+95-103) 
- (modified) llvm/test/CodeGen/X86/extract-bits.ll (+730-712) 
- (modified) llvm/test/CodeGen/X86/extract-concat.ll (+4-4) 
- (modified) llvm/test/CodeGen/X86/extract-lowbits.ll (+50-48) 
- (modified) llvm/test/CodeGen/X86/extract-store.ll (+4-24) 
- (modified) llvm/test/CodeGen/X86/extractelement-fp.ll (+2-2) 
- (modified) llvm/test/CodeGen/X86/extractelement-load.ll (+55-27) 
- (modified) llvm/test/CodeGen/X86/fast-isel-select-cmov2.ll (+8-64) 
- (modified) llvm/test/CodeGen/X86/fcmp-logic.ll (+26-14) 
- (modified) llvm/test/CodeGen/X86/fixup-bw-inst.ll (+26-20) 
- (modified) llvm/test/CodeGen/X86/fma-fneg-combine-2.ll (+2-2) 
- (modified) llvm/test/CodeGen/X86/fma-intrinsics-fast-isel.ll (+2-2) 
- (modified) llvm/test/CodeGen/X86/fma_patterns.ll (+4-4) 
- (modified) llvm/test/CodeGen/X86/fma_patterns_wide.ll (+30-30) 
- (modified) llvm/test/CodeGen/X86/fminimum-fmaximum.ll (+69-69) 
- (modified) llvm/test/CodeGen/X86/fminimumnum-fmaximumnum.ll (+282-279) 
- (modified) llvm/test/CodeGen/X86/fold-add.ll (+8-4) 
- (modified) llvm/test/CodeGen/X86/fold-int-pow2-with-fmul-or-fdiv.ll (+34-20) 
- (modified) llvm/test/CodeGen/X86/fold-loop-of-urem.ll (+17-12) 
- (modified) llvm/test/CodeGen/X86/fold-tied-op.ll (+61-56) 
- (modified) llvm/test/CodeGen/X86/fold-vector-sext-crash2.ll (+2-2) 
- (modified) llvm/test/CodeGen/X86/fp-round.ll (+4-4) 
- (modified) llvm/test/CodeGen/X86/fp-strict-scalar-cmp-fp16.ll (-392) 
- (modified) llvm/test/CodeGen/X86/fp-strict-scalar-cmp.ll (+32-256) 
- (modified) llvm/test/CodeGen/X86/fp-strict-scalar.ll (+2-4) 
- (modified) llvm/test/CodeGen/X86/fp128-cast.ll (+3-3) 
- (modified) llvm/test/CodeGen/X86/fp128-libcalls-strict.ll (+279-417) 
- (modified) llvm/test/CodeGen/X86/fp128-libcalls.ll (+222-313) 
- (modified) llvm/test/CodeGen/X86/fpclamptosat.ll (+3-3) 
- (modified) llvm/test/CodeGen/X86/fpclamptosat_vec.ll (+77-77) 
- (modified) llvm/test/CodeGen/X86/fpenv.ll (+6-6) 
- (modified) llvm/test/CodeGen/X86/fptosi-sat-scalar.ll (+3-3) 
- (modified) llvm/test/CodeGen/X86/fptosi-sat-vector-128.ll (+96-96) 
- (modified) llvm/test/CodeGen/X86/fptoui-sat-scalar.ll (+1-1) 
- (modified) llvm/test/CodeGen/X86/fptoui-sat-vector-128.ll (+141-144) 
- (modified) llvm/test/CodeGen/X86/frame-base.ll (+10-2) 
- (modified) llvm/test/CodeGen/X86/freeze-binary.ll (+21-21) 
- (modified) llvm/test/CodeGen/X86/freeze-vector.ll (+22-28) 
- (modified) llvm/test/CodeGen/X86/fshl.ll (+65-57) 
- (modified) llvm/test/CodeGen/X86/fshr.ll (+76-70) 
- (modified) llvm/test/CodeGen/X86/ftrunc.ll (+40-41) 
- (modified) llvm/test/CodeGen/X86/funnel-shift.ll (+26-27) 
- (modified) llvm/test/CodeGen/X86/gfni-funnel-shifts.ll (+300-327) 
- (modified) llvm/test/CodeGen/X86/gfni-lzcnt.ll (+10-20) 
- (modified) llvm/test/CodeGen/X86/gfni-rotates.ll (+274-290) 
- (modified) llvm/test/CodeGen/X86/gfni-shifts.ll (+116-116) 
- (modified) llvm/test/CodeGen/X86/gfni-tzcnt.ll (+24-34) 
- (modified) llvm/test/CodeGen/X86/ghc-cc64.ll (+49-23) 
- (modified) llvm/test/CodeGen/X86/haddsub-2.ll (+116-116) 
- (modified) llvm/test/CodeGen/X86/haddsub-4.ll (+33-37) 
- (modified) llvm/test/CodeGen/X86/haddsub-shuf.ll (+7-7) 
- (modified) llvm/test/CodeGen/X86/haddsub-undef.ll (+7-7) 
- (modified) llvm/test/CodeGen/X86/half.ll (+24-24) 
- (modified) llvm/test/CodeGen/X86/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll (+20-20) 
- (modified) llvm/test/CodeGen/X86/hoist-and-by-const-from-shl-in-eqcmp-zero.ll (+2-2) 
- (modified) llvm/test/CodeGen/X86/horizontal-reduce-umax.ll (+11-11) 
- (modified) llvm/test/CodeGen/X86/horizontal-reduce-umin.ll (+1-1) 
- (modified) llvm/test/CodeGen/X86/horizontal-shuffle-demanded.ll (+8-8) 
- (modified) llvm/test/CodeGen/X86/horizontal-sum.ll (+104-103) 
- (modified) llvm/test/CodeGen/X86/i128-add.ll (+18-18) 
- (modified) llvm/test/CodeGen/X86/i128-mul.ll (+68-72) 
- (modified) llvm/test/CodeGen/X86/i128-sdiv.ll (+348-30) 
- (modified) llvm/test/CodeGen/X86/i64-to-float.ll (+24-22) 
- (modified) llvm/test/CodeGen/X86/i686-win-shrink-wrapping.ll (+2-2) 
- (modified) llvm/test/CodeGen/X86/iabs.ll (+1-1) 
- (modified) llvm/test/CodeGen/X86/icmp-abs-C-vec.ll (+22-22) 
- (modified) llvm/test/CodeGen/X86/icmp-pow2-diff.ll (+29-39) 
- (modified) llvm/test/CodeGen/X86/icmp-shift-opt.ll (+17-17) 
- (modified) llvm/test/CodeGen/X86/immediate_merging.ll (+4-4) 
- (modified) llvm/test/CodeGen/X86/implicit-null-check.ll (+4-3) 
- (modified) llvm/test/CodeGen/X86/imul.ll (+45-25) 
- (modified) llvm/test/CodeGen/X86/insert-into-constant-vector.ll (+42-41) 
- (modified) llvm/test/CodeGen/X86/insertelement-duplicates.ll (+8-8) 
- (modified) llvm/test/CodeGen/X86/insertelement-legalize.ll (+6-6) 
- (modified) llvm/test/CodeGen/X86/insertelement-shuffle.ll (+4-4) 
- (modified) llvm/test/CodeGen/X86/insertelement-var-index.ll (+46-41) 
- (modified) llvm/test/CodeGen/X86/intrinsic-cttz-elts.ll (+9-31) 
- (modified) llvm/test/CodeGen/X86/is_fpclass-fp80.ll (+25-29) 
- (modified) llvm/test/CodeGen/X86/is_fpclass.ll (+10-15) 
- (modified) llvm/test/CodeGen/X86/isel-and.ll (+1-1) 
- (modified) llvm/test/CodeGen/X86/isel-buildvector-avx.ll (+4-9) 
- (modified) llvm/test/CodeGen/X86/isel-fp-to-int.ll (+2-2) 
- (modified) llvm/test/CodeGen/X86/isel-icmp.ll (+2-2) 
- (modified) llvm/test/CodeGen/X86/isel-or.ll (+1-1) 
- (modified) llvm/test/CodeGen/X86/isel-phi.ll (+6-6) 
- (modified) llvm/test/CodeGen/X86/isel-sdiv.ll (+1-8) 
- (modified) llvm/test/CodeGen/X86/isel-select-cmov.ll (+4-4) 
- (modified) llvm/test/CodeGen/X86/isel-srem.ll (+62-8) 
- (modified) llvm/test/CodeGen/X86/isel-udiv.ll (-7) 
- (modified) llvm/test/CodeGen/X86/isel-urem.ll (+61-7) 
- (modified) llvm/test/CodeGen/X86/isel-xor.ll (+3-3) 
- (modified) llvm/test/CodeGen/X86/ispow2.ll (+3-3) 
- (modified) llvm/test/CodeGen/X86/known-bits-vector.ll (+2-2) 
- (modified) llvm/test/CodeGen/X86/known-bits.ll (+4-6) 
- (modified) llvm/test/CodeGen/X86/known-never-zero.ll (+21-21) 
- (modified) llvm/test/CodeGen/X86/known-pow2.ll (+33-33) 
- (modified) llvm/test/CodeGen/X86/known-signbits-shl.ll (+2-2) 
- (modified) llvm/test/CodeGen/X86/known-signbits-vector.ll (+5-5) 
- (modified) llvm/test/CodeGen/X86/knownbits-hadd-hsub.ll (+2-2) 
- (modified) llvm/test/CodeGen/X86/lack-of-signed-truncation-check.ll (+2-2) 
- (modified) llvm/test/CodeGen/X86/lea-16bit.ll (+1-1) 
- (modified) llvm/test/CodeGen/X86/lea-2.ll (+7-4) 
- (modified) llvm/test/CodeGen/X86/lea-4.ll (+2-2) 
- (modified) llvm/test/CodeGen/X86/lea-5.ll (+51-8) 
- (modified) llvm/test/CodeGen/X86/lea-opt-cse1.ll (+8-4) 
- (modified) llvm/test/CodeGen/X86/lea-opt-cse2.ll (+8-4) 
- (modified) llvm/test/CodeGen/X86/lea-opt-cse3.ll (+32-16) 
- (modified) llvm/test/CodeGen/X86/lea-opt-cse4.ll (+16-8) 
- (modified) llvm/test/CodeGen/X86/lea-opt.ll (+20-10) 
- (modified) llvm/test/CodeGen/X86/lea-recursion.ll (+4-2) 
- (modified) llvm/test/CodeGen/X86/lea.ll (+4-2) 
- (modified) llvm/test/CodeGen/X86/legalize-shift-64.ll (+13-13) 
- (modified) llvm/test/CodeGen/X86/legalize-shl-vec.ll (+72-76) 
- (modified) llvm/test/CodeGen/X86/llvm.frexp.ll (+1-1) 
- (modified) llvm/test/CodeGen/X86/load-local-v3i1.ll (+1-1) 
- (modified) llvm/test/CodeGen/X86/load-scalar-as-vector.ll (+18-4) 
- (modified) llvm/test/CodeGen/X86/loop-strength-reduce-2.ll (+48-14) 
- (modified) llvm/test/CodeGen/X86/loop-strength-reduce-3.ll (+20-7) 
- (modified) llvm/test/CodeGen/X86/loop-strength-reduce.ll (+20-7) 
- (modified) llvm/test/CodeGen/X86/loop-strength-reduce4.ll (+60-14) 
- (modified) llvm/test/CodeGen/X86/loop-strength-reduce8.ll (+34-9) 
- (modified) llvm/test/CodeGen/X86/lrshrink-debug.ll (+1-1) 
- (modified) llvm/test/CodeGen/X86/lsr-i386.ll (+29-8) 
- (modified) llvm/test/CodeGen/X86/lsr-loop-exit-cond.ll (+25-30) 
- (modified) llvm/test/CodeGen/X86/lsr-negative-stride.ll (+2-2) 
- (modified) llvm/test/CodeGen/X86/machine-cp.ll (+22-9) 
- (modified) llvm/test/CodeGen/X86/madd.ll (+47-44) 
- (modified) llvm/test/CodeGen/X86/masked-iv-safe.ll (+16-13) 
- (modified) llvm/test/CodeGen/X86/masked-iv-unsafe.ll (+37-33) 
- (modified) llvm/test/CodeGen/X86/masked_compressstore.ll (+38-57) 
- (modified) llvm/test/CodeGen/X86/masked_expandload.ll (+10-10) 
- (modified) llvm/test/CodeGen/X86/masked_gather.ll (+27-27) 
- (modified) llvm/test/CodeGen/X86/masked_gather_scatter.ll (+102-86) 
- (modified) llvm/test/CodeGen/X86/masked_gather_scatter_widen.ll (+64-64) 
- (modified) llvm/test/CodeGen/X86/masked_load.ll (+67-67) 
- (modified) llvm/test/CodeGen/X86/masked_store.ll (+51-64) 
- (modified) llvm/test/CodeGen/X86/masked_store_trunc.ll (+64-64) 
- (modified) llvm/test/CodeGen/X86/masked_store_trunc_ssat.ll (+360-291) 
- (modified) llvm/test/CodeGen/X86/masked_store_trunc_usat.ll (+228-228) 
- (modified) llvm/test/CodeGen/X86/matrix-multiply.ll (+297-300) 
- (modified) llvm/test/CodeGen/X86/mem-intrin-base-reg.ll (+153-24) 
- (modified) llvm/test/CodeGen/X86/memcmp-more-load-pairs-x32.ll (+103-97) 
- (modified) llvm/test/CodeGen/X86/memcmp-more-load-pairs.ll (+265-265) 
- (modified) llvm/test/CodeGen/X86/memcmp-optsize-x32.ll (+14-14) 
- (modified) llvm/test/CodeGen/X86/memcmp-optsize.ll (+39-39) 
- (modified) llvm/test/CodeGen/X86/memcmp-pgso-x32.ll (+14-14) 
- (modified) llvm/test/CodeGen/X86/memcmp-pgso.ll (+39-39) 
- (modified) llvm/test/CodeGen/X86/memcmp-x32.ll (+38-38) 


``````````diff

diff --git a/llvm/lib/Target/X86/X86Subtarget.cpp b/llvm/lib/Target/X86/X86Subtarget.cpp
index a8ee9f55611b6..95cef72057552 100644
--- a/llvm/lib/Target/X86/X86Subtarget.cpp
+++ b/llvm/lib/Target/X86/X86Subtarget.cpp
@@ -253,7 +253,7 @@ void X86Subtarget::initSubtargetFeatures(StringRef CPU, StringRef TuneCPU,
     CPU = "generic";
 
   if (TuneCPU.empty())
-    TuneCPU = "i586"; // FIXME: "generic" is more modern than llc tests expect.
+    TuneCPU = "generic";
 
   std::string FullFS = X86_MC::ParseX86Triple(TargetTriple);
   assert(!FullFS.empty() && "Failed to parse X86 triple");
diff --git a/llvm/test/CodeGen/X86/2007-03-15-GEP-Idx-Sink.ll b/llvm/test/CodeGen/X86/2007-03-15-GEP-Idx-Sink.ll
index 49e2bf207e52a..dc7385658fce7 100644
--- a/llvm/test/CodeGen/X86/2007-03-15-GEP-Idx-Sink.ll
+++ b/llvm/test/CodeGen/X86/2007-03-15-GEP-Idx-Sink.ll
@@ -4,47 +4,50 @@
 define void @foo(ptr %buf, i32 %size, i32 %col, ptr %p) nounwind {
 ; CHECK-LABEL: foo:
 ; CHECK:       ## %bb.0: ## %entry
+; CHECK-NEXT:    pushl %ebp
 ; CHECK-NEXT:    pushl %ebx
 ; CHECK-NEXT:    pushl %edi
 ; CHECK-NEXT:    pushl %esi
-; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT:    testl %eax, %eax
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ebp
+; CHECK-NEXT:    testl %ebp, %ebp
 ; CHECK-NEXT:    jle LBB0_3
 ; CHECK-NEXT:  ## %bb.1: ## %bb.preheader
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; CHECK-NEXT:    addl $8, %ecx
+; CHECK-NEXT:    xorl %edi, %edi
 ; CHECK-NEXT:    .p2align 4
 ; CHECK-NEXT:  LBB0_2: ## %bb
 ; CHECK-NEXT:    ## =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    movl (%esi), %edi
-; CHECK-NEXT:    movzbl -8(%ecx), %ebx
-; CHECK-NEXT:    movb %bl, (%edi,%edx)
-; CHECK-NEXT:    movzbl -7(%ecx), %ebx
-; CHECK-NEXT:    movb %bl, 7(%edi,%edx)
-; CHECK-NEXT:    movzbl -6(%ecx), %ebx
-; CHECK-NEXT:    movb %bl, 5(%edi,%edx)
-; CHECK-NEXT:    movzbl -5(%ecx), %ebx
-; CHECK-NEXT:    movb %bl, 3(%edi,%edx)
-; CHECK-NEXT:    movzbl -4(%ecx), %ebx
-; CHECK-NEXT:    movb %bl, 2(%edi,%edx)
-; CHECK-NEXT:    movzbl -3(%ecx), %ebx
-; CHECK-NEXT:    movb %bl, 1(%edi,%edx)
-; CHECK-NEXT:    movzbl -2(%ecx), %ebx
-; CHECK-NEXT:    movb %bl, 2(%edi,%edx)
-; CHECK-NEXT:    movzbl -1(%ecx), %ebx
-; CHECK-NEXT:    movb %bl, 4(%edi,%edx)
-; CHECK-NEXT:    movzbl (%ecx), %ebx
-; CHECK-NEXT:    movb %bl, 6(%edi,%edx)
-; CHECK-NEXT:    addl $4, %esi
+; CHECK-NEXT:    movl (%esi,%edi,4), %ebx
+; CHECK-NEXT:    movzbl -8(%ecx), %eax
+; CHECK-NEXT:    movb %al, (%ebx,%edx)
+; CHECK-NEXT:    movzbl -7(%ecx), %eax
+; CHECK-NEXT:    movb %al, 7(%ebx,%edx)
+; CHECK-NEXT:    movzbl -6(%ecx), %eax
+; CHECK-NEXT:    movb %al, 5(%ebx,%edx)
+; CHECK-NEXT:    movzbl -5(%ecx), %eax
+; CHECK-NEXT:    movb %al, 3(%ebx,%edx)
+; CHECK-NEXT:    movzbl -4(%ecx), %eax
+; CHECK-NEXT:    movb %al, 2(%ebx,%edx)
+; CHECK-NEXT:    movzbl -3(%ecx), %eax
+; CHECK-NEXT:    movb %al, 1(%ebx,%edx)
+; CHECK-NEXT:    movzbl -2(%ecx), %eax
+; CHECK-NEXT:    movb %al, 2(%ebx,%edx)
+; CHECK-NEXT:    movzbl -1(%ecx), %eax
+; CHECK-NEXT:    movb %al, 4(%ebx,%edx)
+; CHECK-NEXT:    movzbl (%ecx), %eax
+; CHECK-NEXT:    movb %al, 6(%ebx,%edx)
+; CHECK-NEXT:    incl %edi
 ; CHECK-NEXT:    addl $9, %ecx
-; CHECK-NEXT:    decl %eax
+; CHECK-NEXT:    cmpl %edi, %ebp
 ; CHECK-NEXT:    jne LBB0_2
 ; CHECK-NEXT:  LBB0_3: ## %return
 ; CHECK-NEXT:    popl %esi
 ; CHECK-NEXT:    popl %edi
 ; CHECK-NEXT:    popl %ebx
+; CHECK-NEXT:    popl %ebp
 ; CHECK-NEXT:    retl
 entry:
 	icmp sgt i32 %size, 0		; <i1>:0 [#uses=1]
diff --git a/llvm/test/CodeGen/X86/2008-12-23-crazy-address.ll b/llvm/test/CodeGen/X86/2008-12-23-crazy-address.ll
index 54f85b8f73817..4f283c7a27b3c 100644
--- a/llvm/test/CodeGen/X86/2008-12-23-crazy-address.ll
+++ b/llvm/test/CodeGen/X86/2008-12-23-crazy-address.ll
@@ -36,7 +36,8 @@ define void @bar(i32 %i) nounwind {
 ; CHECK-NEXT:    pushl %eax
 ; CHECK-NEXT:    calll frob at PLT
 ; CHECK-NEXT:    addl $4, %esp
-; CHECK-NEXT:    leal X(%esp,%esi,4), %eax
+; CHECK-NEXT:    leal (%esp,%esi,4), %eax
+; CHECK-NEXT:    addl $X, %eax
 ; CHECK-NEXT:    pushl %eax
 ; CHECK-NEXT:    calll borf at PLT
 ; CHECK-NEXT:    addl $44, %esp
diff --git a/llvm/test/CodeGen/X86/2012-01-10-UndefExceptionEdge.ll b/llvm/test/CodeGen/X86/2012-01-10-UndefExceptionEdge.ll
index 1962ddebc2115..1a8fdc5b4bc8f 100644
--- a/llvm/test/CodeGen/X86/2012-01-10-UndefExceptionEdge.ll
+++ b/llvm/test/CodeGen/X86/2012-01-10-UndefExceptionEdge.ll
@@ -29,32 +29,29 @@ define void @f(ptr nocapture %arg, ptr nocapture %arg1, ptr nocapture %arg2, ptr
 ; CHECK-NEXT:    pushl %ebx
 ; CHECK-NEXT:    pushl %edi
 ; CHECK-NEXT:    pushl %esi
-; CHECK-NEXT:    subl $28, %esp
+; CHECK-NEXT:    subl $12, %esp
 ; CHECK-NEXT:    .cfi_offset %esi, -20
 ; CHECK-NEXT:    .cfi_offset %edi, -16
 ; CHECK-NEXT:    .cfi_offset %ebx, -12
 ; CHECK-NEXT:    xorl %eax, %eax
-; CHECK-NEXT:    xorl %edi, %edi
 ; CHECK-NEXT:    testb %al, %al
 ; CHECK-NEXT:  Ltmp0:
-; CHECK-NEXT:    ## implicit-def: $ebx
+; CHECK-NEXT:    ## implicit-def: $edi
 ; CHECK-NEXT:    calll __Znam
 ; CHECK-NEXT:  Ltmp1:
 ; CHECK-NEXT:  ## %bb.1: ## %bb11
 ; CHECK-NEXT:    movl %eax, %esi
-; CHECK-NEXT:    movb $1, %al
-; CHECK-NEXT:    testb %al, %al
+; CHECK-NEXT:    movb $1, %bl
+; CHECK-NEXT:    testb %bl, %bl
 ; CHECK-NEXT:    jne LBB0_2
 ; CHECK-NEXT:  ## %bb.7: ## %bb31
-; CHECK-NEXT:    ## implicit-def: $eax
-; CHECK-NEXT:    ## kill: killed $eax
+; CHECK-NEXT:    ## implicit-def: $edi
 ; CHECK-NEXT:  LBB0_8: ## %bb38
 ; CHECK-NEXT:    ## =>This Loop Header: Depth=1
 ; CHECK-NEXT:    ## Child Loop BB0_13 Depth 2
 ; CHECK-NEXT:    ## Child Loop BB0_16 Depth 3
 ; CHECK-NEXT:    ## Child Loop BB0_21 Depth 2
-; CHECK-NEXT:    movb $1, %al
-; CHECK-NEXT:    testb %al, %al
+; CHECK-NEXT:    testb %bl, %bl
 ; CHECK-NEXT:    jne LBB0_9
 ; CHECK-NEXT:  ## %bb.10: ## %bb41
 ; CHECK-NEXT:    ## in Loop: Header=BB0_8 Depth=1
@@ -78,8 +75,7 @@ define void @f(ptr nocapture %arg, ptr nocapture %arg1, ptr nocapture %arg2, ptr
 ; CHECK-NEXT:    ## Parent Loop BB0_8 Depth=1
 ; CHECK-NEXT:    ## => This Loop Header: Depth=2
 ; CHECK-NEXT:    ## Child Loop BB0_16 Depth 3
-; CHECK-NEXT:    movb $1, %cl
-; CHECK-NEXT:    testb %cl, %cl
+; CHECK-NEXT:    testb %bl, %bl
 ; CHECK-NEXT:    jne LBB0_19
 ; CHECK-NEXT:  ## %bb.14: ## %bb48
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=2
@@ -87,15 +83,11 @@ define void @f(ptr nocapture %arg, ptr nocapture %arg1, ptr nocapture %arg2, ptr
 ; CHECK-NEXT:  ## %bb.15: ## %bb49.preheader
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=2
 ; CHECK-NEXT:    xorl %ecx, %ecx
-; CHECK-NEXT:    movl %esi, %edx
-; CHECK-NEXT:    movl %edi, %ebx
 ; CHECK-NEXT:  LBB0_16: ## %bb49
 ; CHECK-NEXT:    ## Parent Loop BB0_8 Depth=1
 ; CHECK-NEXT:    ## Parent Loop BB0_13 Depth=2
 ; CHECK-NEXT:    ## => This Inner Loop Header: Depth=3
 ; CHECK-NEXT:    incl %ecx
-; CHECK-NEXT:    addl $4, %edx
-; CHECK-NEXT:    decl %ebx
 ; CHECK-NEXT:    jne LBB0_16
 ; CHECK-NEXT:  LBB0_17: ## %bb57
 ; CHECK-NEXT:    ## in Loop: Header=BB0_13 Depth=2
@@ -107,33 +99,30 @@ define void @f(ptr nocapture %arg, ptr nocapture %arg1, ptr nocapture %arg2, ptr
 ; CHECK-NEXT:    movl %eax, {{[0-9]+}}(%esp)
 ; CHECK-NEXT:    movl $0, (%esp)
 ; CHECK-NEXT:    calll ___bzero
-; CHECK-NEXT:    movb $1, %al
-; CHECK-NEXT:    testb %al, %al
+; CHECK-NEXT:    testb %bl, %bl
 ; CHECK-NEXT:    jne LBB0_22
 ; CHECK-NEXT:  ## %bb.20: ## %bb61.preheader
 ; CHECK-NEXT:    ## in Loop: Header=BB0_8 Depth=1
-; CHECK-NEXT:    movl %esi, %eax
-; CHECK-NEXT:    movl %edi, %ecx
+; CHECK-NEXT:    xorl %eax, %eax
 ; CHECK-NEXT:  LBB0_21: ## %bb61
 ; CHECK-NEXT:    ## Parent Loop BB0_8 Depth=1
 ; CHECK-NEXT:    ## => This Inner Loop Header: Depth=2
-; CHECK-NEXT:    movl $0, (%eax)
-; CHECK-NEXT:    addl $4, %eax
-; CHECK-NEXT:    decl %ecx
+; CHECK-NEXT:    movl $0, (%esi,%eax,4)
+; CHECK-NEXT:    incl %eax
 ; CHECK-NEXT:    jne LBB0_21
 ; CHECK-NEXT:  LBB0_22: ## %bb67
 ; CHECK-NEXT:    ## in Loop: Header=BB0_8 Depth=1
-; CHECK-NEXT:    decl {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
+; CHECK-NEXT:    decl %edi
 ; CHECK-NEXT:    jmp LBB0_8
 ; CHECK-NEXT:  LBB0_18: ## %bb43
 ; CHECK-NEXT:  Ltmp5:
-; CHECK-NEXT:    movl %esi, %ebx
+; CHECK-NEXT:    movl %esi, %edi
 ; CHECK-NEXT:    calll _OnOverFlow
 ; CHECK-NEXT:  Ltmp6:
 ; CHECK-NEXT:    jmp LBB0_3
 ; CHECK-NEXT:  LBB0_2: ## %bb29
 ; CHECK-NEXT:  Ltmp7:
-; CHECK-NEXT:    movl %esi, %ebx
+; CHECK-NEXT:    movl %esi, %edi
 ; CHECK-NEXT:    calll _OnOverFlow
 ; CHECK-NEXT:  Ltmp8:
 ; CHECK-NEXT:  LBB0_3: ## %bb30
@@ -141,10 +130,10 @@ define void @f(ptr nocapture %arg, ptr nocapture %arg1, ptr nocapture %arg2, ptr
 ; CHECK-NEXT:  LBB0_4: ## %bb20.loopexit
 ; CHECK-NEXT:  Ltmp4:
 ; CHECK-NEXT:  LBB0_9:
-; CHECK-NEXT:    movl %esi, %ebx
+; CHECK-NEXT:    movl %esi, %edi
 ; CHECK-NEXT:  LBB0_6: ## %bb23
-; CHECK-NEXT:    testl %ebx, %ebx
-; CHECK-NEXT:    addl $28, %esp
+; CHECK-NEXT:    testl %edi, %edi
+; CHECK-NEXT:    addl $12, %esp
 ; CHECK-NEXT:    popl %esi
 ; CHECK-NEXT:    popl %edi
 ; CHECK-NEXT:    popl %ebx
diff --git a/llvm/test/CodeGen/X86/2012-12-1-merge-multiple.ll b/llvm/test/CodeGen/X86/2012-12-1-merge-multiple.ll
index 86af5fc58c977..5a3ebeceb73c7 100644
--- a/llvm/test/CodeGen/X86/2012-12-1-merge-multiple.ll
+++ b/llvm/test/CodeGen/X86/2012-12-1-merge-multiple.ll
@@ -4,10 +4,8 @@
 define void @multiple_stores_on_chain(ptr %A) {
 ; CHECK-LABEL: multiple_stores_on_chain:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    movabsq $844433520132096, %rax # imm = 0x3000200010000
-; CHECK-NEXT:    movq %rax, (%rdi)
-; CHECK-NEXT:    movabsq $1970350607106052, %rax # imm = 0x7000600050004
-; CHECK-NEXT:    movq %rax, 8(%rdi)
+; CHECK-NEXT:    movaps {{.*#+}} xmm0 = [0,1,2,3,4,5,6,7]
+; CHECK-NEXT:    movups %xmm0, (%rdi)
 ; CHECK-NEXT:    retq
 entry:
   %a1 = getelementptr inbounds i16, ptr %A, i64 1
diff --git a/llvm/test/CodeGen/X86/64-bit-shift-by-32-minus-y.ll b/llvm/test/CodeGen/X86/64-bit-shift-by-32-minus-y.ll
index 4c92adb25d0bd..d9ccdbdc9ea5f 100644
--- a/llvm/test/CodeGen/X86/64-bit-shift-by-32-minus-y.ll
+++ b/llvm/test/CodeGen/X86/64-bit-shift-by-32-minus-y.ll
@@ -307,19 +307,19 @@ define i64 @t5_cse(i64 %val, i64 %shamt, ptr%dst) nounwind {
 ; X86-NOBMI2-NEXT:    pushl %esi
 ; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %edi
 ; X86-NOBMI2-NEXT:    movl %eax, %ebx
 ; X86-NOBMI2-NEXT:    addl $32, %ebx
 ; X86-NOBMI2-NEXT:    adcl $0, %edi
-; X86-NOBMI2-NEXT:    movl %ebx, (%ecx)
-; X86-NOBMI2-NEXT:    movl %edi, 4(%ecx)
 ; X86-NOBMI2-NEXT:    movb $32, %cl
 ; X86-NOBMI2-NEXT:    subb %al, %cl
 ; X86-NOBMI2-NEXT:    movl %esi, %eax
 ; X86-NOBMI2-NEXT:    shll %cl, %eax
 ; X86-NOBMI2-NEXT:    shldl %cl, %esi, %edx
+; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NOBMI2-NEXT:    movl %ebx, (%esi)
+; X86-NOBMI2-NEXT:    movl %edi, 4(%esi)
 ; X86-NOBMI2-NEXT:    testb $32, %cl
 ; X86-NOBMI2-NEXT:    je .LBB5_2
 ; X86-NOBMI2-NEXT:  # %bb.1:
@@ -338,17 +338,17 @@ define i64 @t5_cse(i64 %val, i64 %shamt, ptr%dst) nounwind {
 ; X86-BMI2-NEXT:    pushl %esi
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ebx
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-BMI2-NEXT:    movl %ebx, %edi
 ; X86-BMI2-NEXT:    addl $32, %edi
 ; X86-BMI2-NEXT:    adcl $0, %esi
-; X86-BMI2-NEXT:    movl %edi, (%ecx)
-; X86-BMI2-NEXT:    movl %esi, 4(%ecx)
 ; X86-BMI2-NEXT:    movb $32, %cl
 ; X86-BMI2-NEXT:    subb %bl, %cl
+; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %ebx
 ; X86-BMI2-NEXT:    shldl %cl, %eax, %edx
+; X86-BMI2-NEXT:    movl %edi, (%ebx)
+; X86-BMI2-NEXT:    movl %esi, 4(%ebx)
 ; X86-BMI2-NEXT:    shlxl %ecx, %eax, %eax
 ; X86-BMI2-NEXT:    testb $32, %cl
 ; X86-BMI2-NEXT:    je .LBB5_2
@@ -390,18 +390,18 @@ define i64 @t6_cse2(i64 %val, i64 %shamt, ptr%dst) nounwind {
 ; X86-NOBMI2:       # %bb.0:
 ; X86-NOBMI2-NEXT:    pushl %edi
 ; X86-NOBMI2-NEXT:    pushl %esi
-; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NOBMI2-NEXT:    xorl %edi, %edi
+; X86-NOBMI2-NEXT:    xorl %esi, %esi
 ; X86-NOBMI2-NEXT:    movl $32, %ecx
 ; X86-NOBMI2-NEXT:    subl {{[0-9]+}}(%esp), %ecx
-; X86-NOBMI2-NEXT:    sbbl {{[0-9]+}}(%esp), %edi
-; X86-NOBMI2-NEXT:    movl %ecx, (%eax)
-; X86-NOBMI2-NEXT:    movl %edi, 4(%eax)
-; X86-NOBMI2-NEXT:    movl %esi, %eax
+; X86-NOBMI2-NEXT:    sbbl {{[0-9]+}}(%esp), %esi
+; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NOBMI2-NEXT:    movl %edi, %eax
 ; X86-NOBMI2-NEXT:    shll %cl, %eax
-; X86-NOBMI2-NEXT:    shldl %cl, %esi, %edx
+; X86-NOBMI2-NEXT:    shldl %cl, %edi, %edx
+; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-NOBMI2-NEXT:    movl %ecx, (%edi)
+; X86-NOBMI2-NEXT:    movl %esi, 4(%edi)
 ; X86-NOBMI2-NEXT:    testb $32, %cl
 ; X86-NOBMI2-NEXT:    je .LBB6_2
 ; X86-NOBMI2-NEXT:  # %bb.1:
@@ -423,9 +423,9 @@ define i64 @t6_cse2(i64 %val, i64 %shamt, ptr%dst) nounwind {
 ; X86-BMI2-NEXT:    movl $32, %ecx
 ; X86-BMI2-NEXT:    subl {{[0-9]+}}(%esp), %ecx
 ; X86-BMI2-NEXT:    sbbl {{[0-9]+}}(%esp), %edi
+; X86-BMI2-NEXT:    shldl %cl, %eax, %edx
 ; X86-BMI2-NEXT:    movl %ecx, (%esi)
 ; X86-BMI2-NEXT:    movl %edi, 4(%esi)
-; X86-BMI2-NEXT:    shldl %cl, %eax, %edx
 ; X86-BMI2-NEXT:    shlxl %ecx, %eax, %eax
 ; X86-BMI2-NEXT:    testb $32, %cl
 ; X86-BMI2-NEXT:    je .LBB6_2
diff --git a/llvm/test/CodeGen/X86/AMX/amx-greedy-ra-spill-shape.ll b/llvm/test/CodeGen/X86/AMX/amx-greedy-ra-spill-shape.ll
index 0c349c3aa8ec1..7a301c06107ce 100644
--- a/llvm/test/CodeGen/X86/AMX/amx-greedy-ra-spill-shape.ll
+++ b/llvm/test/CodeGen/X86/AMX/amx-greedy-ra-spill-shape.ll
@@ -124,9 +124,9 @@ define void @foo(i32 %M, i32 %N, i32 %K, ptr %A, ptr %B_rcr4, ptr %C, i32 %c_row
   ; CHECK-NEXT:   successors: %bb.6(0x7c000000), %bb.5(0x04000000)
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[PTILEZEROV:%[0-9]+]]:tile = PTILEZEROV [[COPY6]].sub_16bit, [[MOV32rm2]].sub_16bit
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:gr64_nosp = MOVSX64rr32 [[COPY9]].sub_32bit
   ; CHECK-NEXT:   [[MOV64rm7:%[0-9]+]]:gr64 = MOV64rm %stack.13, 1, $noreg, 0, $noreg :: (load (s64) from %stack.13)
   ; CHECK-NEXT:   [[PTILELOADDV:%[0-9]+]]:tile = PTILELOADDV [[COPY6]].sub_16bit, [[COPY4]].sub_16bit, [[MOV64rm7]], 1, [[MOVSX64rr32_]], 0, $noreg
-  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:gr64_nosp = MOVSX64rr32 [[COPY9]].sub_32bit
   ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:gr32 = COPY [[LEA64_32r1]]
   ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:gr64 = COPY [[MOV64rm1]]
   ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:gr32 = COPY [[COPY4]]
diff --git a/llvm/test/CodeGen/X86/GlobalISel/add-scalar.ll b/llvm/test/CodeGen/X86/GlobalISel/add-scalar.ll
index 7bde1b7a7a8be..ac916ffe29ed7 100644
--- a/llvm/test/CodeGen/X86/GlobalISel/add-scalar.ll
+++ b/llvm/test/CodeGen/X86/GlobalISel/add-scalar.ll
@@ -18,10 +18,10 @@ define i128 @test_add_i128(i128 %arg1, i128 %arg2) nounwind {
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
 ; X86-NEXT:    addl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-NEXT:    adcl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
 ; X86-NEXT:    adcl {{[0-9]+}}(%esp), %esi
 ; X86-NEXT:    adcl {{[0-9]+}}(%esp), %edi
 ; X86-NEXT:    movl %ecx, (%eax)
@@ -44,8 +44,8 @@ define i64 @test_add_i64(i64 %arg1, i64 %arg2) {
 ; X86-LABEL: test_add_i64:
 ; X86:       # %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-NEXT:    addl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-NEXT:    adcl {{[0-9]+}}(%esp), %edx
 ; X86-NEXT:    retl
   %ret = add i64 %arg1, %arg2
diff --git a/llvm/test/CodeGen/X86/GlobalISel/sub-scalar.ll b/llvm/test/CodeGen/X86/GlobalISel/sub-scalar.ll
index 7a035f5e4ad4d..d8f113753ed8f 100644
--- a/llvm/test/CodeGen/X86/GlobalISel/sub-scalar.ll
+++ b/llvm/test/CodeGen/X86/GlobalISel/sub-scalar.ll
@@ -18,10 +18,10 @@ define i128 @test_sub_i128(i128 %arg1, i128 %arg2) nounwind {
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
 ; X86-NEXT:    subl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-NEXT:    sbbl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
 ; X86-NEXT:    sbbl {{[0-9]+}}(%esp), %esi
 ; X86-NEXT:    sbbl {{[0-9]+}}(%esp), %edi
 ; X86-NEXT:    movl %ecx, (%eax)
@@ -45,8 +45,8 @@ define i64 @test_sub_i64(i64 %arg1, i64 %arg2) {
 ; X86-LABEL: test_sub_i64:
 ; X86:       # %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-NEXT:    subl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-NEXT:    sbbl {{[0-9]+}}(%esp), %edx
 ; X86-NEXT:    retl
   %ret = sub i64 %arg1, %arg2
diff --git a/llvm/test/CodeGen/X86/MergeConsecutiveStores.ll b/llvm/test/CodeGen/X86/MergeConsecutiveStores.ll
index 0103d2bf3cc2c..be670237ad7fd 100644
--- a/llvm/test/CodeGen/X86/MergeConsecutiveStores.ll
+++ b/llvm/test/CodeGen/X86/MergeConsecutiveStores.ll
@@ -17,13 +17,14 @@ define void @merge_const_store(i32 %count, ptr nocapture %p) nounwind uwtable no
 ; X86-NEXT:    jle .LBB0_3
 ; X86-NEXT:  # %bb.1: # %.lr.ph.preheader
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    xorl %edx, %edx
 ; X86-NEXT:    .p2align 4
 ; X86-NEXT:  .LBB0_2: # %.lr.ph
 ; X86-NEXT:    # =>This Inner Loop Header: Depth=1
-; X86-NEXT:    movl $67305985, (%ecx) # imm = 0x4030201
-; X86-NEXT:    movl $134678021, 4(%ecx) # imm = 0x8070605
-; X86-NEXT:    addl $8, %ecx
-; X86-NEXT:    decl %eax
+; X86-NEXT:    movl $67305985, (%ecx,%edx,8) # imm = 0x4030201
+; X86-NEXT:    movl $134678021, 4(%ecx,%edx,8) # imm = 0x8070605
+; X86-NEXT:    incl %edx
+; X86-NEXT:    cmpl %edx, %eax
 ; X86-NEXT:    jne .LBB0_2
 ; X86-NEXT:  .LBB0_3: # %._crit_edge
 ; X86-NEXT:    retl
@@ -33,13 +34,15 @@ define void @merge_const_store(i32 %count, ptr nocapture %p) nounwind uwtable no
 ; X64-NEXT:    testl %edi, %edi
 ; X64-NEXT:    jle .LBB0_3
 ; X64-NEXT:  # %bb.1: # %.lr.ph.preheader
-; X64-NEXT:    movabsq $578437695752307201, %rax # imm = 0x807060504030201
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    xorl %ecx, %ecx
+; X64-NEXT:    movabsq $578437695752307201, %rdx # imm = 0x807060504030201
 ; X64-NEXT:    .p2align 4
 ; X64-NEXT:  .LBB0_2: # %.lr.ph
 ; X64-NEXT:    # =>This Inner Loop Header: Depth=1
-; X64-NEXT:    movq %rax, (%rsi)
-; X64-NEXT:    addq $8, %rsi
-; X64-NEXT:    decl %edi
+; X64-NEXT:    movq %rdx, (%rsi,%rcx,8)
+; X64-NEXT:    incq %rcx
+; X64-NEXT:    cmpl %ecx, %eax
 ; X64-NEXT:    jne .LBB0_2
 ; X64-NEXT:  .LBB0_3: # %._crit_edge
 ; X64-NEXT:    retq
@@ -213,61 +216,76 @@ define void @merge_const_store_vec(i32 %count, ptr nocapture %p) nounwind uwtabl
 define void @merge_nonconst_store(i32 %count, i8 %zz, ptr nocapture %p) nounwind uwtable noinline ssp {
 ; X86-BWON-LABEL: merge_nonconst_store:
 ; X86-BWON:       # %bb.0:
+; X86-BWON-NEXT:    pushl %esi
+; X86-BWON-NEXT:    .cfi_def_cfa_offset 8
+; X86-BWON-NEXT:    .cfi_offset %esi, -8
 ; X86-BWON-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-BWON-NEXT:    testl %eax, %eax
 ; X86-BWON-NEXT:    jle .LBB3_3
 ; X86-BWON-NEXT:  # %bb.1: # %.lr.ph.preheader
 ; X86-BWON-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-BWON-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
+; X86-BWON-NEXT:    xorl %esi, %esi
 ; X86-BWON-NEXT:    .p2align 4
 ; X86-BWON-NEXT:  .LBB3_2: # %.lr.ph
 ; X86-BWON-NEXT:    # =>This Inner Loop Header: Depth=1
-; X86-BWON-NEXT:    movl $67305985, (%ecx) # imm = 0x4030201
-; X86-BWON-NEXT:    movb %dl, 4(%ecx)
-; X86-BWON-NEXT:    movw $1798, 5(%ecx) # imm = 0x706
-; X86-BWON-NEXT:    movb $8, 7(%ecx)
-; X86-BWON-NEXT:    addl ...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/142297