[llvm] Topodagworklistx86 (PR #77475)

Tue Jan 9 06:06:21 PST 2024

Amaury =?utf-8?q?Séchet?= <deadalnix at gmail.com>,
Amaury =?utf-8?q?Séchet?= <deadalnix at gmail.com>
Message-ID:
In-Reply-To: <llvm.org/llvm/llvm-project/pull/77475 at github.com>


llvmbot wrote:



@llvm/pr-subscribers-llvm-selectiondag

@llvm/pr-subscribers-backend-x86

Author: None (deadalnix)

<details>
<summary>Changes</summary>

This is revival of D152928 now that things have moved to guithub.

We want to process node in topological order in DAGCombine and this is a step int hat direction.

So far, only the X86 tests are regenerated. The diff is already huge, so we can benefit from keeping the scope under control while regression are being worked on.

---

Patch is 28.48 MiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/77475.diff


553 Files Affected:

- (modified) llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (+59-22) 
- (modified) llvm/test/CodeGen/X86/2007-05-15-maskmovq.ll (+4-4) 
- (modified) llvm/test/CodeGen/X86/2008-05-12-tailmerge-5.ll (+3-2) 
- (modified) llvm/test/CodeGen/X86/2008-06-13-VolatileLoadStore.ll (+7-5) 
- (modified) llvm/test/CodeGen/X86/2008-12-02-dagcombine-1.ll (+5-2) 
- (modified) llvm/test/CodeGen/X86/2009-04-29-IndirectDestOperands.ll (+2-2) 
- (modified) llvm/test/CodeGen/X86/2010-09-17-SideEffectsInChain.ll (+5-5) 
- (modified) llvm/test/CodeGen/X86/2011-10-19-LegelizeLoad.ll (+10-12) 
- (modified) llvm/test/CodeGen/X86/abds.ll (+44-20) 
- (modified) llvm/test/CodeGen/X86/abdu.ll (+52-51) 
- (modified) llvm/test/CodeGen/X86/absolute-constant.ll (+8-2) 
- (modified) llvm/test/CodeGen/X86/add-cmov.ll (+2-2) 
- (modified) llvm/test/CodeGen/X86/add-of-mul.ll (+2-1) 
- (modified) llvm/test/CodeGen/X86/add-sub-bool.ll (+13-13) 
- (modified) llvm/test/CodeGen/X86/addcarry.ll (+30-31) 
- (modified) llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast.ll (+743-700) 
- (modified) llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast_from_memory.ll (+1171-942) 
- (modified) llvm/test/CodeGen/X86/atomic-fp.ll (+5-5) 
- (modified) llvm/test/CodeGen/X86/atomic-mi.ll (+8-8) 
- (modified) llvm/test/CodeGen/X86/avg.ll (+1294-764) 
- (modified) llvm/test/CodeGen/X86/avoid-sfb.ll (+6-6) 
- (modified) llvm/test/CodeGen/X86/avx-logic.ll (+23-21) 
- (modified) llvm/test/CodeGen/X86/avx-vbroadcast.ll (+2-2) 
- (modified) llvm/test/CodeGen/X86/avx1-logical-load-folding.ll (+8-6) 
- (modified) llvm/test/CodeGen/X86/avx2-fma-fneg-combine.ll (+16-6) 
- (modified) llvm/test/CodeGen/X86/avx512-broadcast-arith.ll (+8-8) 
- (modified) llvm/test/CodeGen/X86/avx512-broadcast-unfold.ll (+24-25) 
- (modified) llvm/test/CodeGen/X86/avx512-build-vector.ll (+3-3) 
- (modified) llvm/test/CodeGen/X86/avx512-ext.ll (+4-5) 
- (modified) llvm/test/CodeGen/X86/avx512-extract-subvector-load-store.ll (+12-10) 
- (modified) llvm/test/CodeGen/X86/avx512-hadd-hsub.ll (+4-4) 
- (modified) llvm/test/CodeGen/X86/avx512-insert-extract.ll (+20-16) 
- (modified) llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll (+270-136) 
- (modified) llvm/test/CodeGen/X86/avx512-mask-op.ll (+45-57) 
- (modified) llvm/test/CodeGen/X86/avx512-shuffles/partial_permute.ll (+99-119) 
- (modified) llvm/test/CodeGen/X86/avx512-trunc.ll (+11-15) 
- (modified) llvm/test/CodeGen/X86/avx512-vbroadcast.ll (+49-4) 
- (modified) llvm/test/CodeGen/X86/avx512-vec-cmp.ll (+16-8) 
- (modified) llvm/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll (+139-99) 
- (modified) llvm/test/CodeGen/X86/avx512bw-mask-op.ll (+2-6) 
- (modified) llvm/test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll (+464-268) 
- (modified) llvm/test/CodeGen/X86/avx512dq-intrinsics-fast-isel.ll (+4-4) 
- (modified) llvm/test/CodeGen/X86/avx512dq-mask-op.ll (+1-3) 
- (modified) llvm/test/CodeGen/X86/avx512dqvl-intrinsics-fast-isel.ll (+2-2) 
- (modified) llvm/test/CodeGen/X86/avx512f-vec-test-testn.ll (+4-4) 
- (modified) llvm/test/CodeGen/X86/avx512fp16-combine-shuffle-fma.ll (+14-3) 
- (modified) llvm/test/CodeGen/X86/avx512fp16-fma-intrinsics.ll (+1-1) 
- (modified) llvm/test/CodeGen/X86/avx512fp16-mov.ll (+20-13) 
- (modified) llvm/test/CodeGen/X86/avx512vbmi2-intrinsics-upgrade.ll (+1-1) 
- (modified) llvm/test/CodeGen/X86/avx512vbmi2-intrinsics.ll (+1-1) 
- (modified) llvm/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll (+13-13) 
- (modified) llvm/test/CodeGen/X86/bit-test-shift.ll (+6-4) 
- (modified) llvm/test/CodeGen/X86/bitcast-and-setcc-128.ll (+20-10) 
- (modified) llvm/test/CodeGen/X86/bitcast-setcc-128.ll (+24-28) 
- (modified) llvm/test/CodeGen/X86/bitcast-setcc-256.ll (+12-37) 
- (modified) llvm/test/CodeGen/X86/bitcast-setcc-512.ll (+8-23) 
- (modified) llvm/test/CodeGen/X86/bitcast-vector-bool.ll (+386-280) 
- (modified) llvm/test/CodeGen/X86/bitselect.ll (+8-8) 
- (modified) llvm/test/CodeGen/X86/bool-ext-inc.ll (+12-9) 
- (modified) llvm/test/CodeGen/X86/bool-math.ll (+6-5) 
- (modified) llvm/test/CodeGen/X86/bswap.ll (+9-2) 
- (modified) llvm/test/CodeGen/X86/bswap_tree2.ll (+16-8) 
- (modified) llvm/test/CodeGen/X86/bt.ll (+1-1) 
- (modified) llvm/test/CodeGen/X86/btc_bts_btr.ll (+9-9) 
- (modified) llvm/test/CodeGen/X86/buildvec-insertvec.ll (+22-23) 
- (modified) llvm/test/CodeGen/X86/bypass-slow-division-32.ll (+2-5) 
- (modified) llvm/test/CodeGen/X86/cfguard-x86-64-vectorcall.ll (+3-3) 
- (modified) llvm/test/CodeGen/X86/clz.ll (+2-11) 
- (modified) llvm/test/CodeGen/X86/cmov-promotion.ll (+36-40) 
- (modified) llvm/test/CodeGen/X86/cmov.ll (+1-1) 
- (modified) llvm/test/CodeGen/X86/cmp-bool.ll (+3-2) 
- (modified) llvm/test/CodeGen/X86/cmp-shiftX-maskX.ll (+3-2) 
- (modified) llvm/test/CodeGen/X86/cmp.ll (+4-2) 
- (modified) llvm/test/CodeGen/X86/combine-and.ll (+88-53) 
- (modified) llvm/test/CodeGen/X86/combine-avx2-intrinsics.ll (+1) 
- (modified) llvm/test/CodeGen/X86/combine-bitreverse.ll (+40-40) 
- (modified) llvm/test/CodeGen/X86/combine-bitselect.ll (+107-36) 
- (modified) llvm/test/CodeGen/X86/combine-bswap.ll (+13-8) 
- (modified) llvm/test/CodeGen/X86/combine-concatvectors.ll (+12-13) 
- (modified) llvm/test/CodeGen/X86/combine-multiplies.ll (+22-22) 
- (modified) llvm/test/CodeGen/X86/combine-or.ll (+3-1) 
- (modified) llvm/test/CodeGen/X86/combine-pmuldq.ll (+32-32) 
- (modified) llvm/test/CodeGen/X86/combine-rotates.ll (+4-2) 
- (modified) llvm/test/CodeGen/X86/combine-sdiv.ll (+87-86) 
- (modified) llvm/test/CodeGen/X86/combine-shl.ll (+4-7) 
- (modified) llvm/test/CodeGen/X86/combine-sra.ll (+4-2) 
- (modified) llvm/test/CodeGen/X86/combine-srem.ll (+2-2) 
- (modified) llvm/test/CodeGen/X86/combine-srl.ll (+2-4) 
- (modified) llvm/test/CodeGen/X86/combine-sse41-intrinsics.ll (+5-5) 
- (modified) llvm/test/CodeGen/X86/combine-sub.ll (+6-6) 
- (modified) llvm/test/CodeGen/X86/commute-blend-sse41.ll (+5-5) 
- (modified) llvm/test/CodeGen/X86/conditional-tailcall.ll (+1-1) 
- (modified) llvm/test/CodeGen/X86/const-shift-of-constmasked.ll (+21-17) 
- (modified) llvm/test/CodeGen/X86/constant-pool-sharing.ll (+4-4) 
- (modified) llvm/test/CodeGen/X86/dagcombine-cse.ll (+7-7) 
- (modified) llvm/test/CodeGen/X86/dagcombine-select.ll (+59-34) 
- (modified) llvm/test/CodeGen/X86/div-rem-pair-recomposition-signed.ll (+36-36) 
- (modified) llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll (+36-36) 
- (modified) llvm/test/CodeGen/X86/divide-by-constant.ll (+10-3) 
- (modified) llvm/test/CodeGen/X86/divmod128.ll (+21-20) 
- (modified) llvm/test/CodeGen/X86/dont-trunc-store-double-to-float.ll (+6-3) 
- (modified) llvm/test/CodeGen/X86/dpbusd.ll (+239-227) 
- (modified) llvm/test/CodeGen/X86/dpbusd_const.ll (+131-127) 
- (modified) llvm/test/CodeGen/X86/dpbusd_i4.ll (+54-31) 
- (modified) llvm/test/CodeGen/X86/elementwise-store-of-scalar-splat.ll (+15-15) 
- (modified) llvm/test/CodeGen/X86/expand-vp-cast-intrinsics.ll (+5-3) 
- (modified) llvm/test/CodeGen/X86/expand-vp-int-intrinsics.ll (+64-68) 
- (modified) llvm/test/CodeGen/X86/extract-bits.ll (+5-4) 
- (modified) llvm/test/CodeGen/X86/extract-concat.ll (+33-17) 
- (modified) llvm/test/CodeGen/X86/extract-fp.ll (+1-1) 
- (modified) llvm/test/CodeGen/X86/extract-insert.ll (+1-1) 
- (modified) llvm/test/CodeGen/X86/extract-lowbits.ll (+6-4) 
- (modified) llvm/test/CodeGen/X86/extractelement-fp.ll (+16-16) 
- (modified) llvm/test/CodeGen/X86/extractelement-load.ll (+5-5) 
- (modified) llvm/test/CodeGen/X86/f16c-intrinsics-upgrade.ll (+12-4) 
- (modified) llvm/test/CodeGen/X86/fdiv.ll (+4-4) 
- (modified) llvm/test/CodeGen/X86/fma-fneg-combine-2.ll (+4-4) 
- (modified) llvm/test/CodeGen/X86/fma.ll (+78-78) 
- (modified) llvm/test/CodeGen/X86/fma_patterns.ll (+68-55) 
- (modified) llvm/test/CodeGen/X86/fma_patterns_wide.ll (+27-22) 
- (modified) llvm/test/CodeGen/X86/fmul-combines.ll (+11-1) 
- (modified) llvm/test/CodeGen/X86/fold-call-3.ll (+1-1) 
- (modified) llvm/test/CodeGen/X86/fold-int-pow2-with-fmul-or-fdiv.ll (+369-55) 
- (modified) llvm/test/CodeGen/X86/fold-masked-merge.ll (+4-5) 
- (modified) llvm/test/CodeGen/X86/fold-rmw-ops.ll (+10-4) 
- (modified) llvm/test/CodeGen/X86/fp-intrinsics.ll (+6-6) 
- (modified) llvm/test/CodeGen/X86/fp-logic.ll (+3-2) 
- (modified) llvm/test/CodeGen/X86/fp128-cast-strict.ll (+36-36) 
- (modified) llvm/test/CodeGen/X86/fp128-cast.ll (+11-15) 
- (modified) llvm/test/CodeGen/X86/fpclamptosat.ll (+15-24) 
- (modified) llvm/test/CodeGen/X86/fpclamptosat_vec.ll (+378-368) 
- (modified) llvm/test/CodeGen/X86/fpenv-combine.ll (+6-6) 
- (modified) llvm/test/CodeGen/X86/fpenv.ll (+27-21) 
- (modified) llvm/test/CodeGen/X86/freeze-unary.ll (+2) 
- (modified) llvm/test/CodeGen/X86/freeze-vector.ll (+14-13) 
- (modified) llvm/test/CodeGen/X86/frem.ll (+14-14) 
- (modified) llvm/test/CodeGen/X86/funnel-shift.ll (+15-17) 
- (modified) llvm/test/CodeGen/X86/h-registers-2.ll (+1-2) 
- (modified) llvm/test/CodeGen/X86/haddsub-2.ll (+10-10) 
- (modified) llvm/test/CodeGen/X86/haddsub-3.ll (+5-5) 
- (modified) llvm/test/CodeGen/X86/haddsub-4.ll (+11-10) 
- (modified) llvm/test/CodeGen/X86/haddsub-shuf.ll (+42-8) 
- (modified) llvm/test/CodeGen/X86/haddsub-undef.ll (+163-92) 
- (modified) llvm/test/CodeGen/X86/haddsub.ll (+55-124) 
- (modified) llvm/test/CodeGen/X86/half.ll (+55-54) 
- (modified) llvm/test/CodeGen/X86/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll (+42-40) 
- (modified) llvm/test/CodeGen/X86/hoist-and-by-const-from-shl-in-eqcmp-zero.ll (+4-4) 
- (modified) llvm/test/CodeGen/X86/horizontal-reduce-fadd.ll (+2-2) 
- (modified) llvm/test/CodeGen/X86/horizontal-reduce-smax.ll (+322-162) 
- (modified) llvm/test/CodeGen/X86/horizontal-reduce-smin.ll (+322-162) 
- (modified) llvm/test/CodeGen/X86/horizontal-reduce-umax.ll (+257-153) 
- (modified) llvm/test/CodeGen/X86/horizontal-reduce-umin.ll (+331-103) 
- (modified) llvm/test/CodeGen/X86/horizontal-sum.ll (+237-191) 
- (modified) llvm/test/CodeGen/X86/i128-add.ll (+2-6) 
- (modified) llvm/test/CodeGen/X86/i64-to-float.ll (+24-23) 
- (modified) llvm/test/CodeGen/X86/iabs.ll (+1-1) 
- (modified) llvm/test/CodeGen/X86/icmp-abs-C.ll (+1-1) 
- (modified) llvm/test/CodeGen/X86/icmp-pow2-logic-npow2.ll (+1-1) 
- (modified) llvm/test/CodeGen/X86/icmp-shift-opt.ll (+81-46) 
- (modified) llvm/test/CodeGen/X86/illegal-bitfield-loadstore.ll (+20-10) 
- (modified) llvm/test/CodeGen/X86/indirect-branch-tracking-eh2.ll (+178-48) 
- (modified) llvm/test/CodeGen/X86/insertelement-duplicates.ll (+6-6) 
- (modified) llvm/test/CodeGen/X86/insertelement-var-index.ll (+7-7) 
- (modified) llvm/test/CodeGen/X86/insertelement-zero.ll (+10-7) 
- (modified) llvm/test/CodeGen/X86/is_fpclass-fp80.ll (+19-18) 
- (modified) llvm/test/CodeGen/X86/is_fpclass.ll (+12-6) 
- (modified) llvm/test/CodeGen/X86/ispow2.ll (+29-29) 
- (modified) llvm/test/CodeGen/X86/jump_sign.ll (+7-6) 
- (modified) llvm/test/CodeGen/X86/known-bits-vector.ll (+16-2) 
- (modified) llvm/test/CodeGen/X86/known-never-zero.ll (+2-3) 
- (modified) llvm/test/CodeGen/X86/known-pow2.ll (+5-8) 
- (modified) llvm/test/CodeGen/X86/known-signbits-vector.ll (+27-25) 
- (modified) llvm/test/CodeGen/X86/lea-recursion.ll (+10-10) 
- (modified) llvm/test/CodeGen/X86/legalize-shift.ll (+6-2) 
- (modified) llvm/test/CodeGen/X86/lifetime-alias.ll (+6-6) 
- (modified) llvm/test/CodeGen/X86/load-chain.ll (+2-2) 
- (modified) llvm/test/CodeGen/X86/load-combine.ll (+5-5) 
- (modified) llvm/test/CodeGen/X86/load-local-v3i1.ll (+4-4) 
- (modified) llvm/test/CodeGen/X86/load-local-v3i129.ll (+18-11) 
- (modified) llvm/test/CodeGen/X86/load-partial.ll (+66-37) 
- (modified) llvm/test/CodeGen/X86/load-scalar-as-vector.ll (+2-2) 
- (modified) llvm/test/CodeGen/X86/lzcnt-cmp.ll (+6-3) 
- (modified) llvm/test/CodeGen/X86/madd.ll (+1431-359) 
- (modified) llvm/test/CodeGen/X86/mask-negated-bool.ll (+8-2) 
- (modified) llvm/test/CodeGen/X86/masked_compressstore.ll (+7-5) 
- (modified) llvm/test/CodeGen/X86/masked_expandload.ll (+14-16) 
- (modified) llvm/test/CodeGen/X86/masked_gather.ll (+6-3) 
- (modified) llvm/test/CodeGen/X86/masked_gather_scatter.ll (+90-45) 
- (modified) llvm/test/CodeGen/X86/masked_gather_scatter_widen.ll (+12-6) 
- (modified) llvm/test/CodeGen/X86/masked_load.ll (+12-18) 
- (modified) llvm/test/CodeGen/X86/masked_store.ll (+43-39) 
- (modified) llvm/test/CodeGen/X86/masked_store_trunc.ll (+25-14) 
- (modified) llvm/test/CodeGen/X86/masked_store_trunc_ssat.ll (+469-449) 
- (modified) llvm/test/CodeGen/X86/masked_store_trunc_usat.ll (+262-236) 
- (modified) llvm/test/CodeGen/X86/matrix-multiply.ll (+164-172) 
- (modified) llvm/test/CodeGen/X86/memcmp-more-load-pairs-x32.ll (+12-12) 
- (modified) llvm/test/CodeGen/X86/memcmp-more-load-pairs.ll (+833-64) 
- (modified) llvm/test/CodeGen/X86/memcmp-optsize-x32.ll (+6-6) 
- (modified) llvm/test/CodeGen/X86/memcmp-optsize.ll (+11-10) 
- (modified) llvm/test/CodeGen/X86/memcmp-pgso-x32.ll (+6-6) 
- (modified) llvm/test/CodeGen/X86/memcmp-pgso.ll (+11-10) 
- (modified) llvm/test/CodeGen/X86/memcmp-x32.ll (+4-4) 
- (modified) llvm/test/CodeGen/X86/memcmp.ll (+833-64) 
- (modified) llvm/test/CodeGen/X86/memset-zero.ll (+2-2) 
- (modified) llvm/test/CodeGen/X86/merge-consecutive-loads-128.ll (+2-2) 
- (modified) llvm/test/CodeGen/X86/merge-consecutive-loads-512.ll (+15-7) 
- (modified) llvm/test/CodeGen/X86/merge-consecutive-stores-nt.ll (+4-4) 
- (modified) llvm/test/CodeGen/X86/merge-store-constants.ll (+7-5) 
- (modified) llvm/test/CodeGen/X86/midpoint-int-vec-128.ll (+94-64) 
- (modified) llvm/test/CodeGen/X86/midpoint-int-vec-256.ll (+114-64) 
- (modified) llvm/test/CodeGen/X86/midpoint-int-vec-512.ll (+476-372) 
- (modified) llvm/test/CodeGen/X86/min-legal-vector-width.ll (+214-108) 
- (modified) llvm/test/CodeGen/X86/mmx-cvt.ll (+3-9) 
- (modified) llvm/test/CodeGen/X86/movmsk-cmp.ll (+918-739) 
- (modified) llvm/test/CodeGen/X86/mul128.ll (+6-6) 
- (modified) llvm/test/CodeGen/X86/mulvi32.ll (+8-8) 
- (modified) llvm/test/CodeGen/X86/musttail-varargs.ll (+10-10) 
- (modified) llvm/test/CodeGen/X86/neg-abs.ll (+2-2) 
- (modified) llvm/test/CodeGen/X86/neg_fp.ll (+8-6) 
- (modified) llvm/test/CodeGen/X86/no-wide-load.ll (+1-1) 
- (modified) llvm/test/CodeGen/X86/nontemporal-loads.ll (+72-72) 
- (modified) llvm/test/CodeGen/X86/oddshuffles.ll (+273-270) 
- (modified) llvm/test/CodeGen/X86/oddsubvector.ll (+124-81) 
- (modified) llvm/test/CodeGen/X86/omit-urem-of-power-of-two-or-zero-when-comparing-with-zero.ll (+134-83) 
- (modified) llvm/test/CodeGen/X86/overflow.ll (+3-1) 
- (modified) llvm/test/CodeGen/X86/packus.ll (+10-8) 
- (modified) llvm/test/CodeGen/X86/parity-vec.ll (+9-22) 
- (modified) llvm/test/CodeGen/X86/phaddsub-extract.ll (+465-126) 
- (modified) llvm/test/CodeGen/X86/phaddsub.ll (+23-63) 
- (modified) llvm/test/CodeGen/X86/pmaddubsw.ll (+1033-91) 
- (modified) llvm/test/CodeGen/X86/pmulh.ll (+637-312) 
- (modified) llvm/test/CodeGen/X86/pr15267.ll (+21-25) 
- (modified) llvm/test/CodeGen/X86/pr22338.ll (+10-4) 
- (modified) llvm/test/CodeGen/X86/pr23258.ll (+14-14) 
- (modified) llvm/test/CodeGen/X86/pr28472.ll (+5) 
- (modified) llvm/test/CodeGen/X86/pr31045.ll (+1-1) 
- (modified) llvm/test/CodeGen/X86/pr32284.ll (+24-33) 
- (modified) llvm/test/CodeGen/X86/pr32345.ll (+2-2) 
- (modified) llvm/test/CodeGen/X86/pr33290.ll (+1-1) 
- (modified) llvm/test/CodeGen/X86/pr33828.ll (+6-6) 
- (modified) llvm/test/CodeGen/X86/pr34137.ll (+5-8) 
- (modified) llvm/test/CodeGen/X86/pr34592.ll (+1-1) 
- (modified) llvm/test/CodeGen/X86/pr35316.ll (+7-7) 
- (modified) llvm/test/CodeGen/X86/pr35443.ll (+3-4) 
- (modified) llvm/test/CodeGen/X86/pr35765.ll (+2-3) 
- (modified) llvm/test/CodeGen/X86/pr35982.ll (+40-12) 
- (modified) llvm/test/CodeGen/X86/pr38185.ll (+4-5) 
- (modified) llvm/test/CodeGen/X86/pr38217.ll (+5-5) 
- (modified) llvm/test/CodeGen/X86/pr38539.ll (+6-6) 
- (modified) llvm/test/CodeGen/X86/pr38639.ll (+5-6) 
- (modified) llvm/test/CodeGen/X86/pr39666.ll (+2-1) 
- (modified) llvm/test/CodeGen/X86/pr40730.ll (+3-1) 
- (modified) llvm/test/CodeGen/X86/pr41619.ll (+1) 
- (modified) llvm/test/CodeGen/X86/pr42727.ll (+2-2) 
- (modified) llvm/test/CodeGen/X86/pr42905.ll (+4-10) 
- (modified) llvm/test/CodeGen/X86/pr44976.ll (+1-1) 
- (modified) llvm/test/CodeGen/X86/pr45563-2.ll (+84-63) 
- (modified) llvm/test/CodeGen/X86/pr45833.ll (+97-76) 
- (modified) llvm/test/CodeGen/X86/pr46820.ll (+8-7) 
- (modified) llvm/test/CodeGen/X86/pr46877.ll (+2-2) 
- (modified) llvm/test/CodeGen/X86/pr47517.ll (+13-1) 
- (modified) llvm/test/CodeGen/X86/pr49162.ll (+2-6) 
- (modified) llvm/test/CodeGen/X86/pr49451.ll (+1-2) 
- (modified) llvm/test/CodeGen/X86/pr50609.ll (+3-2) 
- (modified) llvm/test/CodeGen/X86/pr51615.ll (+9-8) 
- (modified) llvm/test/CodeGen/X86/pr53419.ll (+28-60) 
- (modified) llvm/test/CodeGen/X86/pr53842.ll (+2-14) 
- (modified) llvm/test/CodeGen/X86/pr56103.ll (+1-1) 
- (modified) llvm/test/CodeGen/X86/pr57340.ll (+4-5) 
- (modified) llvm/test/CodeGen/X86/pr57402.ll (+1-1) 
- (modified) llvm/test/CodeGen/X86/pr57658.ll (+1-2) 
- (modified) llvm/test/CodeGen/X86/pr59980.ll (+1-1) 
- (modified) llvm/test/CodeGen/X86/pr61923.ll (+6-4) 
- (modified) llvm/test/CodeGen/X86/pr62286.ll (+20-19) 
- (modified) llvm/test/CodeGen/X86/pr63439.ll (+2-1) 
- (modified) llvm/test/CodeGen/X86/pr64593.ll (+2-4) 
- (modified) llvm/test/CodeGen/X86/pr67333.ll (+34-36) 
- (modified) llvm/test/CodeGen/X86/pr74736.ll (+1-1) 
- (modified) llvm/test/CodeGen/X86/prefer-avx256-mask-shuffle.ll (+2-6) 
- (modified) llvm/test/CodeGen/X86/promote-vec3.ll (+1-1) 
- (modified) llvm/test/CodeGen/X86/psubus.ll (+482-410) 
- (modified) llvm/test/CodeGen/X86/pull-binop-through-shift.ll (+1-1) 
- (modified) llvm/test/CodeGen/X86/rotate-extract-vector.ll (+22-10) 
- (modified) llvm/test/CodeGen/X86/rotate-extract.ll (+42-34) 
- (modified) llvm/test/CodeGen/X86/rotate.ll (+5-5) 
- (modified) llvm/test/CodeGen/X86/rotate4.ll (+12-12) 
- (modified) llvm/test/CodeGen/X86/rotate_vec.ll (+50-14) 
- (modified) llvm/test/CodeGen/X86/sad.ll (+401-96) 
- (modified) llvm/test/CodeGen/X86/sad_variations.ll (+2-56) 
- (modified) llvm/test/CodeGen/X86/sadd_sat.ll (+2-2) 
- (modified) llvm/test/CodeGen/X86/sadd_sat_plus.ll (+2-2) 
- (modified) llvm/test/CodeGen/X86/sadd_sat_vec.ll (+94-87) 
- (modified) llvm/test/CodeGen/X86/sat-add.ll (+94-37) 
- (modified) llvm/test/CodeGen/X86/scalar_widen_div.ll (+14-13) 
- (modified) llvm/test/CodeGen/X86/scalarize-fp.ll (+17-3) 
- (modified) llvm/test/CodeGen/X86/sdiv-exact.ll (+17-13) 
- (modified) llvm/test/CodeGen/X86/sdiv_fix.ll (+107-106) 
- (modified) llvm/test/CodeGen/X86/sdiv_fix_sat.ll (+223-225) 
- (modified) llvm/test/CodeGen/X86/select-sra.ll (+1-1) 
- (modified) llvm/test/CodeGen/X86/setcc-combine.ll (+94-30) 
- (modified) llvm/test/CodeGen/X86/setcc-freeze.ll (+2-1) 
- (modified) llvm/test/CodeGen/X86/setcc-fsh.ll (+22-24) 
- (modified) llvm/test/CodeGen/X86/setcc-logic.ll (+13-15) 
- (modified) llvm/test/CodeGen/X86/setcc-wide-types.ll (+273-185) 
- (modified) llvm/test/CodeGen/X86/setcc.ll (+1-1) 
- (modified) llvm/test/CodeGen/X86/shift-amount-mod.ll (+10-11) 
- (modified) llvm/test/CodeGen/X86/shift-and.ll (+1-1) 
- (modified) llvm/test/CodeGen/X86/shift-by-signext.ll (+2-2) 
- (modified) llvm/test/CodeGen/X86/shift-combine.ll (+4-4) 
- (modified) llvm/test/CodeGen/X86/shift-i128.ll (+69-65) 
- (modified) llvm/test/CodeGen/X86/shift-i256.ll (+2-2) 
- (modified) llvm/test/CodeGen/X86/shift-mask.ll (+1-1) 
- (modified) llvm/test/CodeGen/X86/shift-parts.ll (+4-6) 
- (modified) llvm/test/CodeGen/X86/shl_elim.ll (+1-1) 
- (modified) llvm/test/CodeGen/X86/shrink_vmul.ll (+138-134) 
- (modified) llvm/test/CodeGen/X86/shuffle-combine-crash-5.ll (+2-3) 
- (modified) llvm/test/CodeGen/X86/shuffle-half.ll (+2-2) 
- (modified) llvm/test/CodeGen/X86/shuffle-strided-with-offset-128.ll (+4-4) 
- (modified) llvm/test/CodeGen/X86/shuffle-strided-with-offset-512.ll (+4-4) 
- (modified) llvm/test/CodeGen/X86/shuffle-vs-trunc-256.ll (+47-47) 
- (modified) llvm/test/CodeGen/X86/shuffle-vs-trunc-512.ll (+26-6) 
- (modified) llvm/test/CodeGen/X86/signbit-shift.ll (+3-2) 
- (modified) llvm/test/CodeGen/X86/single_elt_vector_memory_operation.ll (+6-6) 
- (modified) llvm/test/CodeGen/X86/smul-with-overflow.ll (+69-67) 
- (modified) llvm/test/CodeGen/X86/splat-for-size.ll (+1-1) 
- (modified) llvm/test/CodeGen/X86/srem-seteq-illegal-types.ll (+21-20) 


``````````diff

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 2327664516ccdae..4cb7d5db1d360b7 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -1817,13 +1817,15 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
 
   WorklistInserter AddNodes(*this);
 
+  DAG.AssignTopologicalOrder();
+
   // Add all the dag nodes to the worklist.
   //
   // Note: All nodes are not added to PruningList here, this is because the only
   // nodes which can be deleted are those which have no uses and all other nodes
   // which would otherwise be added to the worklist by the first call to
   // getNextWorklistEntry are already present in it.
-  for (SDNode &Node : DAG.allnodes())
+  for (SDNode &Node : reverse(DAG.allnodes()))
     AddToWorklist(&Node, /* IsCandidateForPruning */ Node.use_empty());
 
   // Create a dummy node (which is not added to allnodes), that adds a reference
@@ -3488,7 +3490,6 @@ static SDValue combineUADDO_CARRYDiamond(DAGCombiner &Combiner,
     return SDValue();
   }
 
-
   auto cancelDiamond = [&](SDValue A,SDValue B) {
     SDLoc DL(N);
     SDValue NewY =
@@ -3559,46 +3560,82 @@ static SDValue combineCarryDiamond(SelectionDAG &DAG, const TargetLowering &TLI,
   SDValue Carry0 = getAsCarry(TLI, N0);
   if (!Carry0)
     return SDValue();
+
   SDValue Carry1 = getAsCarry(TLI, N1);
   if (!Carry1)
     return SDValue();
 
-  unsigned Opcode = Carry0.getOpcode();
-  if (Opcode != Carry1.getOpcode())
+  auto matchCarry = [](SDValue N, SDValue &A, SDValue &B, bool &IsAdd) {
+    unsigned Opcode = N.getOpcode();
+    IsAdd = Opcode == ISD::UADDO || Opcode == ISD::UADDO_CARRY;
+
+    if (Opcode == ISD::UADDO || Opcode == ISD::USUBO) {
+      A = N.getOperand(0);
+      B = N.getOperand(1);
+      return true;
+    }
+
+    if ((Opcode == ISD::UADDO_CARRY || Opcode == ISD::USUBO_CARRY) &&
+        isNullConstant(N.getOperand(1))) {
+      A = N.getOperand(0);
+      B = N.getOperand(2);
+      return true;
+    }
+
+    return false;
+  };
+
+  SDValue X, Y;
+  bool IsAdd;
+  if (!matchCarry(Carry0, X, Y, IsAdd))
+    return SDValue();
+
+  SDValue A, B;
+  bool IsAdd1;
+  if (!matchCarry(Carry1, A, B, IsAdd1))
     return SDValue();
-  if (Opcode != ISD::UADDO && Opcode != ISD::USUBO)
+
+  if (IsAdd != IsAdd1)
+    return SDValue();
+
+  unsigned NewOp = IsAdd ? ISD::UADDO_CARRY : ISD::USUBO_CARRY;
+  if (!TLI.isOperationLegalOrCustom(NewOp, Carry0.getValue(0).getValueType()))
     return SDValue();
 
   // Canonicalize the add/sub of A and B (the top node in the above ASCII art)
   // as Carry0 and the add/sub of the carry in as Carry1 (the middle node).
-  if (Carry1.getNode()->isOperandOf(Carry0.getNode()))
+  if (Carry0.getValue(0) == A || Carry0.getValue(0) == B) {
     std::swap(Carry0, Carry1);
+    std::swap(A, X);
+    std::swap(B, Y);
+  }
 
   // Check if nodes are connected in expected way.
-  if (Carry1.getOperand(0) != Carry0.getValue(0) &&
-      Carry1.getOperand(1) != Carry0.getValue(0))
-    return SDValue();
-
-  // The carry in value must be on the righthand side for subtraction.
-  unsigned CarryInOperandNum =
-      Carry1.getOperand(0) == Carry0.getValue(0) ? 1 : 0;
-  if (Opcode == ISD::USUBO && CarryInOperandNum != 1)
-    return SDValue();
-  SDValue CarryIn = Carry1.getOperand(CarryInOperandNum);
-
-  unsigned NewOp = Opcode == ISD::UADDO ? ISD::UADDO_CARRY : ISD::USUBO_CARRY;
-  if (!TLI.isOperationLegalOrCustom(NewOp, Carry0.getValue(0).getValueType()))
+  if (Carry1.getValue(0) != X && (!IsAdd || Carry1.getValue(0) != Y))
     return SDValue();
 
   // Verify that the carry/borrow in is plausibly a carry/borrow bit.
+  // TODO: make getAsCarry() aware of how partial carries are merged.
+  SDValue CarryIn = Carry1.getValue(0) == X ? Y : X;
   CarryIn = getAsCarry(TLI, CarryIn, true);
   if (!CarryIn)
     return SDValue();
 
   SDLoc DL(N);
   SDValue Merged =
-      DAG.getNode(NewOp, DL, Carry1->getVTList(), Carry0.getOperand(0),
-                  Carry0.getOperand(1), CarryIn);
+      DAG.getNode(NewOp, DL, Carry1->getVTList(), A, B, CarryIn);
+
+  LLVM_DEBUG(
+    dbgs() << "Combine carry diamond:\n";
+    dbgs() << "\tCarry0: "; Carry0->dump();
+    dbgs() << "\tX: "; X->dump();
+    dbgs() << "\tY: "; Y->dump();
+    dbgs() << "\tCarry1: "; Carry1->dump();
+    dbgs() << "\tA: "; A->dump();
+    dbgs() << "\tB: "; B->dump();
+    dbgs() << "\tCarryIn: "; CarryIn->dump();
+    dbgs() << "\tMerged: "; Merged->dump();
+    dbgs() << "\n");
 
   // Please note that because we have proven that the result of the UADDO/USUBO
   // of A and B feeds into the UADDO/USUBO that does the carry/borrow in, we can
@@ -3613,7 +3650,7 @@ static SDValue combineCarryDiamond(SelectionDAG &DAG, const TargetLowering &TLI,
   // carry flags; and that AND can return a constant zero.
   //
   // TODO: match other operations that can merge flags (ADD, etc)
-  DAG.ReplaceAllUsesOfValueWith(Carry1.getValue(0), Merged.getValue(0));
+  DAG.ReplaceAllUsesOfValueWith(Carry0.getValue(0), Merged.getValue(0));
   if (N->getOpcode() == ISD::AND)
     return DAG.getConstant(0, DL, MVT::i1);
   return Merged.getValue(1);
diff --git a/llvm/test/CodeGen/X86/2007-05-15-maskmovq.ll b/llvm/test/CodeGen/X86/2007-05-15-maskmovq.ll
index 69f733461efc77d..48fbd6b213695d5 100644
--- a/llvm/test/CodeGen/X86/2007-05-15-maskmovq.ll
+++ b/llvm/test/CodeGen/X86/2007-05-15-maskmovq.ll
@@ -12,14 +12,14 @@ define void @test(<1 x i64> %c64, <1 x i64> %mask1, ptr %P) {
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; CHECK-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
-; CHECK-NEXT:    movl %eax, (%esp)
+; CHECK-NEXT:    movl %eax, {{[0-9]+}}(%esp)
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; CHECK-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
-; CHECK-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; CHECK-NEXT:    movl %eax, (%esp)
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %edi
-; CHECK-NEXT:    movq (%esp), %mm0
-; CHECK-NEXT:    movq {{[0-9]+}}(%esp), %mm1
+; CHECK-NEXT:    movq {{[0-9]+}}(%esp), %mm0
+; CHECK-NEXT:    movq (%esp), %mm1
 ; CHECK-NEXT:    maskmovq %mm0, %mm1
 ; CHECK-NEXT:    addl $16, %esp
 ; CHECK-NEXT:    popl %edi
diff --git a/llvm/test/CodeGen/X86/2008-05-12-tailmerge-5.ll b/llvm/test/CodeGen/X86/2008-05-12-tailmerge-5.ll
index 0d63779227554cf..003358f5f61c47a 100644
--- a/llvm/test/CodeGen/X86/2008-05-12-tailmerge-5.ll
+++ b/llvm/test/CodeGen/X86/2008-05-12-tailmerge-5.ll
@@ -15,9 +15,10 @@ define void @passing2(i64 %str.0, i64 %str.1, i16 signext  %s, i32 %j, i8 signex
 ; CHECK-NEXT:    movq %rdi, {{[0-9]+}}(%rsp)
 ; CHECK-NEXT:    movb %al, {{[0-9]+}}(%rsp)
 ; CHECK-NEXT:    movb %ah, {{[0-9]+}}(%rsp)
-; CHECK-NEXT:    shrq $16, %rax
+; CHECK-NEXT:    shrl $16, %esi
+; CHECK-NEXT:    movb %sil, {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    shrl $24, %eax
 ; CHECK-NEXT:    movb %al, {{[0-9]+}}(%rsp)
-; CHECK-NEXT:    movb %ah, {{[0-9]+}}(%rsp)
 ; CHECK-NEXT:    movw %dx, {{[0-9]+}}(%rsp)
 ; CHECK-NEXT:    movl %ecx, {{[0-9]+}}(%rsp)
 ; CHECK-NEXT:    movb %r8b, {{[0-9]+}}(%rsp)
diff --git a/llvm/test/CodeGen/X86/2008-06-13-VolatileLoadStore.ll b/llvm/test/CodeGen/X86/2008-06-13-VolatileLoadStore.ll
index bf309f015668425..c699c74e3253747 100644
--- a/llvm/test/CodeGen/X86/2008-06-13-VolatileLoadStore.ll
+++ b/llvm/test/CodeGen/X86/2008-06-13-VolatileLoadStore.ll
@@ -10,11 +10,13 @@ define i16 @f(i64 %x, double %y) {
 ; CHECK-LABEL: f:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
-; CHECK-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
-; CHECK-NEXT:    movsd %xmm1, atomic
-; CHECK-NEXT:    xorps %xmm1, %xmm1
-; CHECK-NEXT:    movsd %xmm1, atomic2
-; CHECK-NEXT:    movsd %xmm0, anything
+; CHECK-NEXT:    movsd %xmm0, atomic
+; CHECK-NEXT:    xorps %xmm0, %xmm0
+; CHECK-NEXT:    movsd %xmm0, atomic2
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; CHECK-NEXT:    movl %ecx, anything+4
+; CHECK-NEXT:    movl %eax, anything
 ; CHECK-NEXT:    movl ioport, %ecx
 ; CHECK-NEXT:    movl ioport, %eax
 ; CHECK-NEXT:    shrl $16, %eax
diff --git a/llvm/test/CodeGen/X86/2008-12-02-dagcombine-1.ll b/llvm/test/CodeGen/X86/2008-12-02-dagcombine-1.ll
index 62998a593996b2d..b4d7ae6d2320bb3 100644
--- a/llvm/test/CodeGen/X86/2008-12-02-dagcombine-1.ll
+++ b/llvm/test/CodeGen/X86/2008-12-02-dagcombine-1.ll
@@ -8,8 +8,11 @@ define ptr @test(ptr %a, ptr %L, ptr %P) nounwind {
 ; CHECK-LABEL: test:
 ; CHECK:       ## %bb.0: ## %entry
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT:    subl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT:    addl $-2, %eax
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; CHECK-NEXT:    addl %ecx, %edx
+; CHECK-NEXT:    subl %edx, %eax
+; CHECK-NEXT:    leal -2(%eax,%ecx), %eax
 ; CHECK-NEXT:    retl
 entry:
         %0 = ptrtoint ptr %a to i32
diff --git a/llvm/test/CodeGen/X86/2009-04-29-IndirectDestOperands.ll b/llvm/test/CodeGen/X86/2009-04-29-IndirectDestOperands.ll
index 8fc701c8cf69425..697e606a853840b 100644
--- a/llvm/test/CodeGen/X86/2009-04-29-IndirectDestOperands.ll
+++ b/llvm/test/CodeGen/X86/2009-04-29-IndirectDestOperands.ll
@@ -16,9 +16,9 @@ define void @cpuid(ptr %data) nounwind {
 ; CHECK-NEXT:    ## InlineAsm Start
 ; CHECK-NEXT:    cpuid
 ; CHECK-NEXT:    ## InlineAsm End
-; CHECK-NEXT:    movl %ebx, 8(%esi)
-; CHECK-NEXT:    movl %ecx, 12(%esi)
 ; CHECK-NEXT:    movl %edx, 16(%esi)
+; CHECK-NEXT:    movl %ecx, 12(%esi)
+; CHECK-NEXT:    movl %ebx, 8(%esi)
 ; CHECK-NEXT:    movl %eax, 4(%esi)
 ; CHECK-NEXT:    popl %esi
 ; CHECK-NEXT:    popl %ebx
diff --git a/llvm/test/CodeGen/X86/2010-09-17-SideEffectsInChain.ll b/llvm/test/CodeGen/X86/2010-09-17-SideEffectsInChain.ll
index ecaa105dedcfe8d..7d61e3430fa1901 100644
--- a/llvm/test/CodeGen/X86/2010-09-17-SideEffectsInChain.ll
+++ b/llvm/test/CodeGen/X86/2010-09-17-SideEffectsInChain.ll
@@ -17,17 +17,17 @@ define fastcc i32 @cli_magic_scandesc(ptr %in) nounwind ssp {
 ; CHECK-NEXT:    movq %rdx, (%rsp)
 ; CHECK-NEXT:    movq 24(%rdi), %rdx
 ; CHECK-NEXT:    movq %rdx, {{[0-9]+}}(%rsp)
-; CHECK-NEXT:    movq %rsi, {{[0-9]+}}(%rsp)
-; CHECK-NEXT:    movq 16(%rdi), %rdx
+; CHECK-NEXT:    movq 56(%rdi), %rdx
 ; CHECK-NEXT:    movq %rdx, {{[0-9]+}}(%rsp)
-; CHECK-NEXT:    movq 32(%rdi), %rdx
+; CHECK-NEXT:    movq 48(%rdi), %rdx
 ; CHECK-NEXT:    movq %rdx, {{[0-9]+}}(%rsp)
 ; CHECK-NEXT:    movq 40(%rdi), %rdx
 ; CHECK-NEXT:    movq %rdx, {{[0-9]+}}(%rsp)
-; CHECK-NEXT:    movq 48(%rdi), %rdx
+; CHECK-NEXT:    movq 32(%rdi), %rdx
 ; CHECK-NEXT:    movq %rdx, {{[0-9]+}}(%rsp)
-; CHECK-NEXT:    movq 56(%rdi), %rdx
+; CHECK-NEXT:    movq 16(%rdi), %rdx
 ; CHECK-NEXT:    movq %rdx, {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    movq %rsi, {{[0-9]+}}(%rsp)
 ; CHECK-NEXT:    movb %al, (%rsp)
 ; CHECK-NEXT:    movb %cl, {{[0-9]+}}(%rsp)
 ; CHECK-NEXT:    movq __stack_chk_guard(%rip), %rax
diff --git a/llvm/test/CodeGen/X86/2011-10-19-LegelizeLoad.ll b/llvm/test/CodeGen/X86/2011-10-19-LegelizeLoad.ll
index 65ff22f960f233f..d325c267833d834 100644
--- a/llvm/test/CodeGen/X86/2011-10-19-LegelizeLoad.ll
+++ b/llvm/test/CodeGen/X86/2011-10-19-LegelizeLoad.ll
@@ -17,21 +17,19 @@ target triple = "x86_64-unknown-linux-gnu"
 define dso_local i32 @main() nounwind uwtable {
 ; CHECK-LABEL: main:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    movl i(%rip), %esi
-; CHECK-NEXT:    movl j(%rip), %eax
-; CHECK-NEXT:    movl %esi, %edx
+; CHECK-NEXT:    movq i(%rip), %rdx
+; CHECK-NEXT:    movq j(%rip), %rsi
+; CHECK-NEXT:    movsbl %sil, %eax
+; CHECK-NEXT:    idivb %dl
+; CHECK-NEXT:    movl %eax, %ecx
 ; CHECK-NEXT:    shrl $8, %edx
-; CHECK-NEXT:    movsbl %al, %ecx
-; CHECK-NEXT:    shrl $8, %eax
-; CHECK-NEXT:    cbtw
+; CHECK-NEXT:    shrl $8, %esi
+; CHECK-NEXT:    movsbl %sil, %eax
 ; CHECK-NEXT:    idivb %dl
-; CHECK-NEXT:    movl %eax, %edx
-; CHECK-NEXT:    movl %ecx, %eax
-; CHECK-NEXT:    idivb %sil
-; CHECK-NEXT:    movzbl %dl, %ecx
+; CHECK-NEXT:    movzbl %cl, %ecx
+; CHECK-NEXT:    movd %ecx, %xmm0
 ; CHECK-NEXT:    movzbl %al, %eax
-; CHECK-NEXT:    movd %eax, %xmm0
-; CHECK-NEXT:    pinsrb $1, %ecx, %xmm0
+; CHECK-NEXT:    pinsrb $1, %eax, %xmm0
 ; CHECK-NEXT:    pextrw $0, %xmm0, res(%rip)
 ; CHECK-NEXT:    xorl %eax, %eax
 ; CHECK-NEXT:    retq
diff --git a/llvm/test/CodeGen/X86/abds.ll b/llvm/test/CodeGen/X86/abds.ll
index 39ac47e99e6e989..1cd5b7e785af595 100644
--- a/llvm/test/CodeGen/X86/abds.ll
+++ b/llvm/test/CodeGen/X86/abds.ll
@@ -129,21 +129,27 @@ define i16 @abd_ext_i16(i16 %a, i16 %b) nounwind {
 define i16 @abd_ext_i16_i32(i16 %a, i32 %b) nounwind {
 ; X86-LABEL: abd_ext_i16_i32:
 ; X86:       # %bb.0:
+; X86-NEXT:    pushl %esi
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl %ecx, %edx
+; X86-NEXT:    sarl $31, %edx
 ; X86-NEXT:    movswl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movl %eax, %edx
-; X86-NEXT:    subl %ecx, %edx
-; X86-NEXT:    negl %edx
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:    sarl $31, %esi
 ; X86-NEXT:    subl %ecx, %eax
-; X86-NEXT:    cmovlel %edx, %eax
+; X86-NEXT:    sbbl %edx, %esi
+; X86-NEXT:    sarl $31, %esi
+; X86-NEXT:    xorl %esi, %eax
+; X86-NEXT:    subl %esi, %eax
 ; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    popl %esi
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: abd_ext_i16_i32:
 ; X64:       # %bb.0:
+; X64-NEXT:    # kill: def $edi killed $edi def $rdi
+; X64-NEXT:    movswq %di, %rcx
 ; X64-NEXT:    movslq %esi, %rax
-; X64-NEXT:    movswl %di, %ecx
-; X64-NEXT:    movslq %ecx, %rcx
 ; X64-NEXT:    subq %rax, %rcx
 ; X64-NEXT:    movq %rcx, %rax
 ; X64-NEXT:    negq %rax
@@ -191,13 +197,19 @@ define i16 @abd_ext_i16_undef(i16 %a, i16 %b) nounwind {
 define i32 @abd_ext_i32(i32 %a, i32 %b) nounwind {
 ; X86-LABEL: abd_ext_i32:
 ; X86:       # %bb.0:
+; X86-NEXT:    pushl %esi
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl %ecx, %edx
+; X86-NEXT:    sarl $31, %edx
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movl %eax, %edx
-; X86-NEXT:    subl %ecx, %edx
-; X86-NEXT:    negl %edx
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:    sarl $31, %esi
 ; X86-NEXT:    subl %ecx, %eax
-; X86-NEXT:    cmovlel %edx, %eax
+; X86-NEXT:    sbbl %edx, %esi
+; X86-NEXT:    sarl $31, %esi
+; X86-NEXT:    xorl %esi, %eax
+; X86-NEXT:    subl %esi, %eax
+; X86-NEXT:    popl %esi
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: abd_ext_i32:
@@ -221,20 +233,26 @@ define i32 @abd_ext_i32(i32 %a, i32 %b) nounwind {
 define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind {
 ; X86-LABEL: abd_ext_i32_i16:
 ; X86:       # %bb.0:
+; X86-NEXT:    pushl %esi
 ; X86-NEXT:    movswl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl %ecx, %edx
+; X86-NEXT:    sarl $31, %edx
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movl %eax, %edx
-; X86-NEXT:    subl %ecx, %edx
-; X86-NEXT:    negl %edx
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:    sarl $31, %esi
 ; X86-NEXT:    subl %ecx, %eax
-; X86-NEXT:    cmovlel %edx, %eax
+; X86-NEXT:    sbbl %edx, %esi
+; X86-NEXT:    sarl $31, %esi
+; X86-NEXT:    xorl %esi, %eax
+; X86-NEXT:    subl %esi, %eax
+; X86-NEXT:    popl %esi
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: abd_ext_i32_i16:
 ; X64:       # %bb.0:
+; X64-NEXT:    # kill: def $esi killed $esi def $rsi
 ; X64-NEXT:    movslq %edi, %rcx
-; X64-NEXT:    movswl %si, %eax
-; X64-NEXT:    cltq
+; X64-NEXT:    movswq %si, %rax
 ; X64-NEXT:    subq %rax, %rcx
 ; X64-NEXT:    movq %rcx, %rax
 ; X64-NEXT:    negq %rax
@@ -252,13 +270,19 @@ define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind {
 define i32 @abd_ext_i32_undef(i32 %a, i32 %b) nounwind {
 ; X86-LABEL: abd_ext_i32_undef:
 ; X86:       # %bb.0:
+; X86-NEXT:    pushl %esi
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl %ecx, %edx
+; X86-NEXT:    sarl $31, %edx
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movl %eax, %edx
-; X86-NEXT:    subl %ecx, %edx
-; X86-NEXT:    negl %edx
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:    sarl $31, %esi
 ; X86-NEXT:    subl %ecx, %eax
-; X86-NEXT:    cmovlel %edx, %eax
+; X86-NEXT:    sbbl %edx, %esi
+; X86-NEXT:    sarl $31, %esi
+; X86-NEXT:    xorl %esi, %eax
+; X86-NEXT:    subl %esi, %eax
+; X86-NEXT:    popl %esi
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: abd_ext_i32_undef:
diff --git a/llvm/test/CodeGen/X86/abdu.ll b/llvm/test/CodeGen/X86/abdu.ll
index 11719be4ab5cd0c..be0e22c747eaff4 100644
--- a/llvm/test/CodeGen/X86/abdu.ll
+++ b/llvm/test/CodeGen/X86/abdu.ll
@@ -20,13 +20,13 @@ define i8 @abd_ext_i8(i8 %a, i8 %b) nounwind {
 ;
 ; X64-LABEL: abd_ext_i8:
 ; X64:       # %bb.0:
-; X64-NEXT:    movzbl %sil, %eax
 ; X64-NEXT:    movzbl %dil, %ecx
-; X64-NEXT:    subl %eax, %ecx
-; X64-NEXT:    movl %ecx, %eax
-; X64-NEXT:    negl %eax
-; X64-NEXT:    cmovsl %ecx, %eax
-; X64-NEXT:    # kill: def $al killed $al killed $eax
+; X64-NEXT:    movzbl %sil, %eax
+; X64-NEXT:    subq %rax, %rcx
+; X64-NEXT:    movq %rcx, %rax
+; X64-NEXT:    negq %rax
+; X64-NEXT:    cmovsq %rcx, %rax
+; X64-NEXT:    # kill: def $al killed $al killed $rax
 ; X64-NEXT:    retq
   %aext = zext i8 %a to i64
   %bext = zext i8 %b to i64
@@ -50,13 +50,13 @@ define i8 @abd_ext_i8_i16(i8 %a, i16 %b) nounwind {
 ;
 ; X64-LABEL: abd_ext_i8_i16:
 ; X64:       # %bb.0:
-; X64-NEXT:    movzwl %si, %eax
 ; X64-NEXT:    movzbl %dil, %ecx
-; X64-NEXT:    subl %eax, %ecx
-; X64-NEXT:    movl %ecx, %eax
-; X64-NEXT:    negl %eax
-; X64-NEXT:    cmovsl %ecx, %eax
-; X64-NEXT:    # kill: def $al killed $al killed $eax
+; X64-NEXT:    movzwl %si, %eax
+; X64-NEXT:    subq %rax, %rcx
+; X64-NEXT:    movq %rcx, %rax
+; X64-NEXT:    negq %rax
+; X64-NEXT:    cmovsq %rcx, %rax
+; X64-NEXT:    # kill: def $al killed $al killed $rax
 ; X64-NEXT:    retq
   %aext = zext i8 %a to i64
   %bext = zext i16 %b to i64
@@ -80,13 +80,13 @@ define i8 @abd_ext_i8_undef(i8 %a, i8 %b) nounwind {
 ;
 ; X64-LABEL: abd_ext_i8_undef:
 ; X64:       # %bb.0:
-; X64-NEXT:    movzbl %sil, %eax
 ; X64-NEXT:    movzbl %dil, %ecx
-; X64-NEXT:    subl %eax, %ecx
-; X64-NEXT:    movl %ecx, %eax
-; X64-NEXT:    negl %eax
-; X64-NEXT:    cmovsl %ecx, %eax
-; X64-NEXT:    # kill: def $al killed $al killed $eax
+; X64-NEXT:    movzbl %sil, %eax
+; X64-NEXT:    subq %rax, %rcx
+; X64-NEXT:    movq %rcx, %rax
+; X64-NEXT:    negq %rax
+; X64-NEXT:    cmovsq %rcx, %rax
+; X64-NEXT:    # kill: def $al killed $al killed $rax
 ; X64-NEXT:    retq
   %aext = zext i8 %a to i64
   %bext = zext i8 %b to i64
@@ -110,13 +110,13 @@ define i16 @abd_ext_i16(i16 %a, i16 %b) nounwind {
 ;
 ; X64-LABEL: abd_ext_i16:
 ; X64:       # %bb.0:
-; X64-NEXT:    movzwl %si, %eax
 ; X64-NEXT:    movzwl %di, %ecx
-; X64-NEXT:    subl %eax, %ecx
-; X64-NEXT:    movl %ecx, %eax
-; X64-NEXT:    negl %eax
-; X64-NEXT:    cmovsl %ecx, %eax
-; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    movzwl %si, %eax
+; X64-NEXT:    subq %rax, %rcx
+; X64-NEXT:    movq %rcx, %rax
+; X64-NEXT:    negq %rax
+; X64-NEXT:    cmovsq %rcx, %rax
+; X64-NEXT:    # kill: def $ax killed $ax killed $rax
 ; X64-NEXT:    retq
   %aext = zext i16 %a to i64
   %bext = zext i16 %b to i64
@@ -129,20 +129,20 @@ define i16 @abd_ext_i16(i16 %a, i16 %b) nounwind {
 define i16 @abd_ext_i16_i32(i16 %a, i32 %b) nounwind {
 ; X86-LABEL: abd_ext_i16_i32:
 ; X86:       # %bb.0:
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movl %eax, %edx
-; X86-NEXT:    subl %ecx, %edx
-; X86-NEXT:    negl %edx
+; X86-NEXT:    xorl %ecx, %ecx
+; X86-NEXT:    subl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    sbbl %ecx, %ecx
+; X86-NEXT:    sarl $31, %ecx
+; X86-NEXT:    xorl %ecx, %eax
 ; X86-NEXT:    subl %ecx, %eax
-; X86-NEXT:    cmovbel %edx, %eax
 ; X86-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: abd_ext_i16_i32:
 ; X64:       # %bb.0:
-; X64-NEXT:    movl %esi, %eax
 ; X64-NEXT:    movzwl %di, %ecx
+; X64-NEXT:    movl %esi, %eax
 ; X64-NEXT:    subq %rax, %rcx
 ; X64-NEXT:    movq %rcx, %rax
 ; X64-NEXT:    negq %rax
@@ -171,13 +171,13 @@ define i16 @abd_ext_i16_undef(i16 %a, i16 %b) nounwind {
 ;
 ; X64-LABEL: abd_ext_i16_undef:
 ; X64:       # %bb.0:
-; X64-NEXT:    movzwl %si, %eax
 ; X64-NEXT:    movzwl %di, %ecx
-; X64-NEXT:    subl %eax, %ecx
-; X64-NEXT:    movl %ecx, %eax
-; ...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/77475