[llvm] Topodagworklistx86 (PR #77475)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Jan 9 06:06:21 PST 2024
Amaury =?utf-8?q?Séchet?= <deadalnix at gmail.com>,
Amaury =?utf-8?q?Séchet?= <deadalnix at gmail.com>
Message-ID:
In-Reply-To: <llvm.org/llvm/llvm-project/pull/77475 at github.com>
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-selectiondag
@llvm/pr-subscribers-backend-x86
Author: None (deadalnix)
<details>
<summary>Changes</summary>
This is revival of D152928 now that things have moved to guithub.
We want to process node in topological order in DAGCombine and this is a step int hat direction.
So far, only the X86 tests are regenerated. The diff is already huge, so we can benefit from keeping the scope under control while regression are being worked on.
---
Patch is 28.48 MiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/77475.diff
553 Files Affected:
- (modified) llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (+59-22)
- (modified) llvm/test/CodeGen/X86/2007-05-15-maskmovq.ll (+4-4)
- (modified) llvm/test/CodeGen/X86/2008-05-12-tailmerge-5.ll (+3-2)
- (modified) llvm/test/CodeGen/X86/2008-06-13-VolatileLoadStore.ll (+7-5)
- (modified) llvm/test/CodeGen/X86/2008-12-02-dagcombine-1.ll (+5-2)
- (modified) llvm/test/CodeGen/X86/2009-04-29-IndirectDestOperands.ll (+2-2)
- (modified) llvm/test/CodeGen/X86/2010-09-17-SideEffectsInChain.ll (+5-5)
- (modified) llvm/test/CodeGen/X86/2011-10-19-LegelizeLoad.ll (+10-12)
- (modified) llvm/test/CodeGen/X86/abds.ll (+44-20)
- (modified) llvm/test/CodeGen/X86/abdu.ll (+52-51)
- (modified) llvm/test/CodeGen/X86/absolute-constant.ll (+8-2)
- (modified) llvm/test/CodeGen/X86/add-cmov.ll (+2-2)
- (modified) llvm/test/CodeGen/X86/add-of-mul.ll (+2-1)
- (modified) llvm/test/CodeGen/X86/add-sub-bool.ll (+13-13)
- (modified) llvm/test/CodeGen/X86/addcarry.ll (+30-31)
- (modified) llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast.ll (+743-700)
- (modified) llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast_from_memory.ll (+1171-942)
- (modified) llvm/test/CodeGen/X86/atomic-fp.ll (+5-5)
- (modified) llvm/test/CodeGen/X86/atomic-mi.ll (+8-8)
- (modified) llvm/test/CodeGen/X86/avg.ll (+1294-764)
- (modified) llvm/test/CodeGen/X86/avoid-sfb.ll (+6-6)
- (modified) llvm/test/CodeGen/X86/avx-logic.ll (+23-21)
- (modified) llvm/test/CodeGen/X86/avx-vbroadcast.ll (+2-2)
- (modified) llvm/test/CodeGen/X86/avx1-logical-load-folding.ll (+8-6)
- (modified) llvm/test/CodeGen/X86/avx2-fma-fneg-combine.ll (+16-6)
- (modified) llvm/test/CodeGen/X86/avx512-broadcast-arith.ll (+8-8)
- (modified) llvm/test/CodeGen/X86/avx512-broadcast-unfold.ll (+24-25)
- (modified) llvm/test/CodeGen/X86/avx512-build-vector.ll (+3-3)
- (modified) llvm/test/CodeGen/X86/avx512-ext.ll (+4-5)
- (modified) llvm/test/CodeGen/X86/avx512-extract-subvector-load-store.ll (+12-10)
- (modified) llvm/test/CodeGen/X86/avx512-hadd-hsub.ll (+4-4)
- (modified) llvm/test/CodeGen/X86/avx512-insert-extract.ll (+20-16)
- (modified) llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll (+270-136)
- (modified) llvm/test/CodeGen/X86/avx512-mask-op.ll (+45-57)
- (modified) llvm/test/CodeGen/X86/avx512-shuffles/partial_permute.ll (+99-119)
- (modified) llvm/test/CodeGen/X86/avx512-trunc.ll (+11-15)
- (modified) llvm/test/CodeGen/X86/avx512-vbroadcast.ll (+49-4)
- (modified) llvm/test/CodeGen/X86/avx512-vec-cmp.ll (+16-8)
- (modified) llvm/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll (+139-99)
- (modified) llvm/test/CodeGen/X86/avx512bw-mask-op.ll (+2-6)
- (modified) llvm/test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll (+464-268)
- (modified) llvm/test/CodeGen/X86/avx512dq-intrinsics-fast-isel.ll (+4-4)
- (modified) llvm/test/CodeGen/X86/avx512dq-mask-op.ll (+1-3)
- (modified) llvm/test/CodeGen/X86/avx512dqvl-intrinsics-fast-isel.ll (+2-2)
- (modified) llvm/test/CodeGen/X86/avx512f-vec-test-testn.ll (+4-4)
- (modified) llvm/test/CodeGen/X86/avx512fp16-combine-shuffle-fma.ll (+14-3)
- (modified) llvm/test/CodeGen/X86/avx512fp16-fma-intrinsics.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/avx512fp16-mov.ll (+20-13)
- (modified) llvm/test/CodeGen/X86/avx512vbmi2-intrinsics-upgrade.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/avx512vbmi2-intrinsics.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll (+13-13)
- (modified) llvm/test/CodeGen/X86/bit-test-shift.ll (+6-4)
- (modified) llvm/test/CodeGen/X86/bitcast-and-setcc-128.ll (+20-10)
- (modified) llvm/test/CodeGen/X86/bitcast-setcc-128.ll (+24-28)
- (modified) llvm/test/CodeGen/X86/bitcast-setcc-256.ll (+12-37)
- (modified) llvm/test/CodeGen/X86/bitcast-setcc-512.ll (+8-23)
- (modified) llvm/test/CodeGen/X86/bitcast-vector-bool.ll (+386-280)
- (modified) llvm/test/CodeGen/X86/bitselect.ll (+8-8)
- (modified) llvm/test/CodeGen/X86/bool-ext-inc.ll (+12-9)
- (modified) llvm/test/CodeGen/X86/bool-math.ll (+6-5)
- (modified) llvm/test/CodeGen/X86/bswap.ll (+9-2)
- (modified) llvm/test/CodeGen/X86/bswap_tree2.ll (+16-8)
- (modified) llvm/test/CodeGen/X86/bt.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/btc_bts_btr.ll (+9-9)
- (modified) llvm/test/CodeGen/X86/buildvec-insertvec.ll (+22-23)
- (modified) llvm/test/CodeGen/X86/bypass-slow-division-32.ll (+2-5)
- (modified) llvm/test/CodeGen/X86/cfguard-x86-64-vectorcall.ll (+3-3)
- (modified) llvm/test/CodeGen/X86/clz.ll (+2-11)
- (modified) llvm/test/CodeGen/X86/cmov-promotion.ll (+36-40)
- (modified) llvm/test/CodeGen/X86/cmov.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/cmp-bool.ll (+3-2)
- (modified) llvm/test/CodeGen/X86/cmp-shiftX-maskX.ll (+3-2)
- (modified) llvm/test/CodeGen/X86/cmp.ll (+4-2)
- (modified) llvm/test/CodeGen/X86/combine-and.ll (+88-53)
- (modified) llvm/test/CodeGen/X86/combine-avx2-intrinsics.ll (+1)
- (modified) llvm/test/CodeGen/X86/combine-bitreverse.ll (+40-40)
- (modified) llvm/test/CodeGen/X86/combine-bitselect.ll (+107-36)
- (modified) llvm/test/CodeGen/X86/combine-bswap.ll (+13-8)
- (modified) llvm/test/CodeGen/X86/combine-concatvectors.ll (+12-13)
- (modified) llvm/test/CodeGen/X86/combine-multiplies.ll (+22-22)
- (modified) llvm/test/CodeGen/X86/combine-or.ll (+3-1)
- (modified) llvm/test/CodeGen/X86/combine-pmuldq.ll (+32-32)
- (modified) llvm/test/CodeGen/X86/combine-rotates.ll (+4-2)
- (modified) llvm/test/CodeGen/X86/combine-sdiv.ll (+87-86)
- (modified) llvm/test/CodeGen/X86/combine-shl.ll (+4-7)
- (modified) llvm/test/CodeGen/X86/combine-sra.ll (+4-2)
- (modified) llvm/test/CodeGen/X86/combine-srem.ll (+2-2)
- (modified) llvm/test/CodeGen/X86/combine-srl.ll (+2-4)
- (modified) llvm/test/CodeGen/X86/combine-sse41-intrinsics.ll (+5-5)
- (modified) llvm/test/CodeGen/X86/combine-sub.ll (+6-6)
- (modified) llvm/test/CodeGen/X86/commute-blend-sse41.ll (+5-5)
- (modified) llvm/test/CodeGen/X86/conditional-tailcall.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/const-shift-of-constmasked.ll (+21-17)
- (modified) llvm/test/CodeGen/X86/constant-pool-sharing.ll (+4-4)
- (modified) llvm/test/CodeGen/X86/dagcombine-cse.ll (+7-7)
- (modified) llvm/test/CodeGen/X86/dagcombine-select.ll (+59-34)
- (modified) llvm/test/CodeGen/X86/div-rem-pair-recomposition-signed.ll (+36-36)
- (modified) llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll (+36-36)
- (modified) llvm/test/CodeGen/X86/divide-by-constant.ll (+10-3)
- (modified) llvm/test/CodeGen/X86/divmod128.ll (+21-20)
- (modified) llvm/test/CodeGen/X86/dont-trunc-store-double-to-float.ll (+6-3)
- (modified) llvm/test/CodeGen/X86/dpbusd.ll (+239-227)
- (modified) llvm/test/CodeGen/X86/dpbusd_const.ll (+131-127)
- (modified) llvm/test/CodeGen/X86/dpbusd_i4.ll (+54-31)
- (modified) llvm/test/CodeGen/X86/elementwise-store-of-scalar-splat.ll (+15-15)
- (modified) llvm/test/CodeGen/X86/expand-vp-cast-intrinsics.ll (+5-3)
- (modified) llvm/test/CodeGen/X86/expand-vp-int-intrinsics.ll (+64-68)
- (modified) llvm/test/CodeGen/X86/extract-bits.ll (+5-4)
- (modified) llvm/test/CodeGen/X86/extract-concat.ll (+33-17)
- (modified) llvm/test/CodeGen/X86/extract-fp.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/extract-insert.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/extract-lowbits.ll (+6-4)
- (modified) llvm/test/CodeGen/X86/extractelement-fp.ll (+16-16)
- (modified) llvm/test/CodeGen/X86/extractelement-load.ll (+5-5)
- (modified) llvm/test/CodeGen/X86/f16c-intrinsics-upgrade.ll (+12-4)
- (modified) llvm/test/CodeGen/X86/fdiv.ll (+4-4)
- (modified) llvm/test/CodeGen/X86/fma-fneg-combine-2.ll (+4-4)
- (modified) llvm/test/CodeGen/X86/fma.ll (+78-78)
- (modified) llvm/test/CodeGen/X86/fma_patterns.ll (+68-55)
- (modified) llvm/test/CodeGen/X86/fma_patterns_wide.ll (+27-22)
- (modified) llvm/test/CodeGen/X86/fmul-combines.ll (+11-1)
- (modified) llvm/test/CodeGen/X86/fold-call-3.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/fold-int-pow2-with-fmul-or-fdiv.ll (+369-55)
- (modified) llvm/test/CodeGen/X86/fold-masked-merge.ll (+4-5)
- (modified) llvm/test/CodeGen/X86/fold-rmw-ops.ll (+10-4)
- (modified) llvm/test/CodeGen/X86/fp-intrinsics.ll (+6-6)
- (modified) llvm/test/CodeGen/X86/fp-logic.ll (+3-2)
- (modified) llvm/test/CodeGen/X86/fp128-cast-strict.ll (+36-36)
- (modified) llvm/test/CodeGen/X86/fp128-cast.ll (+11-15)
- (modified) llvm/test/CodeGen/X86/fpclamptosat.ll (+15-24)
- (modified) llvm/test/CodeGen/X86/fpclamptosat_vec.ll (+378-368)
- (modified) llvm/test/CodeGen/X86/fpenv-combine.ll (+6-6)
- (modified) llvm/test/CodeGen/X86/fpenv.ll (+27-21)
- (modified) llvm/test/CodeGen/X86/freeze-unary.ll (+2)
- (modified) llvm/test/CodeGen/X86/freeze-vector.ll (+14-13)
- (modified) llvm/test/CodeGen/X86/frem.ll (+14-14)
- (modified) llvm/test/CodeGen/X86/funnel-shift.ll (+15-17)
- (modified) llvm/test/CodeGen/X86/h-registers-2.ll (+1-2)
- (modified) llvm/test/CodeGen/X86/haddsub-2.ll (+10-10)
- (modified) llvm/test/CodeGen/X86/haddsub-3.ll (+5-5)
- (modified) llvm/test/CodeGen/X86/haddsub-4.ll (+11-10)
- (modified) llvm/test/CodeGen/X86/haddsub-shuf.ll (+42-8)
- (modified) llvm/test/CodeGen/X86/haddsub-undef.ll (+163-92)
- (modified) llvm/test/CodeGen/X86/haddsub.ll (+55-124)
- (modified) llvm/test/CodeGen/X86/half.ll (+55-54)
- (modified) llvm/test/CodeGen/X86/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll (+42-40)
- (modified) llvm/test/CodeGen/X86/hoist-and-by-const-from-shl-in-eqcmp-zero.ll (+4-4)
- (modified) llvm/test/CodeGen/X86/horizontal-reduce-fadd.ll (+2-2)
- (modified) llvm/test/CodeGen/X86/horizontal-reduce-smax.ll (+322-162)
- (modified) llvm/test/CodeGen/X86/horizontal-reduce-smin.ll (+322-162)
- (modified) llvm/test/CodeGen/X86/horizontal-reduce-umax.ll (+257-153)
- (modified) llvm/test/CodeGen/X86/horizontal-reduce-umin.ll (+331-103)
- (modified) llvm/test/CodeGen/X86/horizontal-sum.ll (+237-191)
- (modified) llvm/test/CodeGen/X86/i128-add.ll (+2-6)
- (modified) llvm/test/CodeGen/X86/i64-to-float.ll (+24-23)
- (modified) llvm/test/CodeGen/X86/iabs.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/icmp-abs-C.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/icmp-pow2-logic-npow2.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/icmp-shift-opt.ll (+81-46)
- (modified) llvm/test/CodeGen/X86/illegal-bitfield-loadstore.ll (+20-10)
- (modified) llvm/test/CodeGen/X86/indirect-branch-tracking-eh2.ll (+178-48)
- (modified) llvm/test/CodeGen/X86/insertelement-duplicates.ll (+6-6)
- (modified) llvm/test/CodeGen/X86/insertelement-var-index.ll (+7-7)
- (modified) llvm/test/CodeGen/X86/insertelement-zero.ll (+10-7)
- (modified) llvm/test/CodeGen/X86/is_fpclass-fp80.ll (+19-18)
- (modified) llvm/test/CodeGen/X86/is_fpclass.ll (+12-6)
- (modified) llvm/test/CodeGen/X86/ispow2.ll (+29-29)
- (modified) llvm/test/CodeGen/X86/jump_sign.ll (+7-6)
- (modified) llvm/test/CodeGen/X86/known-bits-vector.ll (+16-2)
- (modified) llvm/test/CodeGen/X86/known-never-zero.ll (+2-3)
- (modified) llvm/test/CodeGen/X86/known-pow2.ll (+5-8)
- (modified) llvm/test/CodeGen/X86/known-signbits-vector.ll (+27-25)
- (modified) llvm/test/CodeGen/X86/lea-recursion.ll (+10-10)
- (modified) llvm/test/CodeGen/X86/legalize-shift.ll (+6-2)
- (modified) llvm/test/CodeGen/X86/lifetime-alias.ll (+6-6)
- (modified) llvm/test/CodeGen/X86/load-chain.ll (+2-2)
- (modified) llvm/test/CodeGen/X86/load-combine.ll (+5-5)
- (modified) llvm/test/CodeGen/X86/load-local-v3i1.ll (+4-4)
- (modified) llvm/test/CodeGen/X86/load-local-v3i129.ll (+18-11)
- (modified) llvm/test/CodeGen/X86/load-partial.ll (+66-37)
- (modified) llvm/test/CodeGen/X86/load-scalar-as-vector.ll (+2-2)
- (modified) llvm/test/CodeGen/X86/lzcnt-cmp.ll (+6-3)
- (modified) llvm/test/CodeGen/X86/madd.ll (+1431-359)
- (modified) llvm/test/CodeGen/X86/mask-negated-bool.ll (+8-2)
- (modified) llvm/test/CodeGen/X86/masked_compressstore.ll (+7-5)
- (modified) llvm/test/CodeGen/X86/masked_expandload.ll (+14-16)
- (modified) llvm/test/CodeGen/X86/masked_gather.ll (+6-3)
- (modified) llvm/test/CodeGen/X86/masked_gather_scatter.ll (+90-45)
- (modified) llvm/test/CodeGen/X86/masked_gather_scatter_widen.ll (+12-6)
- (modified) llvm/test/CodeGen/X86/masked_load.ll (+12-18)
- (modified) llvm/test/CodeGen/X86/masked_store.ll (+43-39)
- (modified) llvm/test/CodeGen/X86/masked_store_trunc.ll (+25-14)
- (modified) llvm/test/CodeGen/X86/masked_store_trunc_ssat.ll (+469-449)
- (modified) llvm/test/CodeGen/X86/masked_store_trunc_usat.ll (+262-236)
- (modified) llvm/test/CodeGen/X86/matrix-multiply.ll (+164-172)
- (modified) llvm/test/CodeGen/X86/memcmp-more-load-pairs-x32.ll (+12-12)
- (modified) llvm/test/CodeGen/X86/memcmp-more-load-pairs.ll (+833-64)
- (modified) llvm/test/CodeGen/X86/memcmp-optsize-x32.ll (+6-6)
- (modified) llvm/test/CodeGen/X86/memcmp-optsize.ll (+11-10)
- (modified) llvm/test/CodeGen/X86/memcmp-pgso-x32.ll (+6-6)
- (modified) llvm/test/CodeGen/X86/memcmp-pgso.ll (+11-10)
- (modified) llvm/test/CodeGen/X86/memcmp-x32.ll (+4-4)
- (modified) llvm/test/CodeGen/X86/memcmp.ll (+833-64)
- (modified) llvm/test/CodeGen/X86/memset-zero.ll (+2-2)
- (modified) llvm/test/CodeGen/X86/merge-consecutive-loads-128.ll (+2-2)
- (modified) llvm/test/CodeGen/X86/merge-consecutive-loads-512.ll (+15-7)
- (modified) llvm/test/CodeGen/X86/merge-consecutive-stores-nt.ll (+4-4)
- (modified) llvm/test/CodeGen/X86/merge-store-constants.ll (+7-5)
- (modified) llvm/test/CodeGen/X86/midpoint-int-vec-128.ll (+94-64)
- (modified) llvm/test/CodeGen/X86/midpoint-int-vec-256.ll (+114-64)
- (modified) llvm/test/CodeGen/X86/midpoint-int-vec-512.ll (+476-372)
- (modified) llvm/test/CodeGen/X86/min-legal-vector-width.ll (+214-108)
- (modified) llvm/test/CodeGen/X86/mmx-cvt.ll (+3-9)
- (modified) llvm/test/CodeGen/X86/movmsk-cmp.ll (+918-739)
- (modified) llvm/test/CodeGen/X86/mul128.ll (+6-6)
- (modified) llvm/test/CodeGen/X86/mulvi32.ll (+8-8)
- (modified) llvm/test/CodeGen/X86/musttail-varargs.ll (+10-10)
- (modified) llvm/test/CodeGen/X86/neg-abs.ll (+2-2)
- (modified) llvm/test/CodeGen/X86/neg_fp.ll (+8-6)
- (modified) llvm/test/CodeGen/X86/no-wide-load.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/nontemporal-loads.ll (+72-72)
- (modified) llvm/test/CodeGen/X86/oddshuffles.ll (+273-270)
- (modified) llvm/test/CodeGen/X86/oddsubvector.ll (+124-81)
- (modified) llvm/test/CodeGen/X86/omit-urem-of-power-of-two-or-zero-when-comparing-with-zero.ll (+134-83)
- (modified) llvm/test/CodeGen/X86/overflow.ll (+3-1)
- (modified) llvm/test/CodeGen/X86/packus.ll (+10-8)
- (modified) llvm/test/CodeGen/X86/parity-vec.ll (+9-22)
- (modified) llvm/test/CodeGen/X86/phaddsub-extract.ll (+465-126)
- (modified) llvm/test/CodeGen/X86/phaddsub.ll (+23-63)
- (modified) llvm/test/CodeGen/X86/pmaddubsw.ll (+1033-91)
- (modified) llvm/test/CodeGen/X86/pmulh.ll (+637-312)
- (modified) llvm/test/CodeGen/X86/pr15267.ll (+21-25)
- (modified) llvm/test/CodeGen/X86/pr22338.ll (+10-4)
- (modified) llvm/test/CodeGen/X86/pr23258.ll (+14-14)
- (modified) llvm/test/CodeGen/X86/pr28472.ll (+5)
- (modified) llvm/test/CodeGen/X86/pr31045.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/pr32284.ll (+24-33)
- (modified) llvm/test/CodeGen/X86/pr32345.ll (+2-2)
- (modified) llvm/test/CodeGen/X86/pr33290.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/pr33828.ll (+6-6)
- (modified) llvm/test/CodeGen/X86/pr34137.ll (+5-8)
- (modified) llvm/test/CodeGen/X86/pr34592.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/pr35316.ll (+7-7)
- (modified) llvm/test/CodeGen/X86/pr35443.ll (+3-4)
- (modified) llvm/test/CodeGen/X86/pr35765.ll (+2-3)
- (modified) llvm/test/CodeGen/X86/pr35982.ll (+40-12)
- (modified) llvm/test/CodeGen/X86/pr38185.ll (+4-5)
- (modified) llvm/test/CodeGen/X86/pr38217.ll (+5-5)
- (modified) llvm/test/CodeGen/X86/pr38539.ll (+6-6)
- (modified) llvm/test/CodeGen/X86/pr38639.ll (+5-6)
- (modified) llvm/test/CodeGen/X86/pr39666.ll (+2-1)
- (modified) llvm/test/CodeGen/X86/pr40730.ll (+3-1)
- (modified) llvm/test/CodeGen/X86/pr41619.ll (+1)
- (modified) llvm/test/CodeGen/X86/pr42727.ll (+2-2)
- (modified) llvm/test/CodeGen/X86/pr42905.ll (+4-10)
- (modified) llvm/test/CodeGen/X86/pr44976.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/pr45563-2.ll (+84-63)
- (modified) llvm/test/CodeGen/X86/pr45833.ll (+97-76)
- (modified) llvm/test/CodeGen/X86/pr46820.ll (+8-7)
- (modified) llvm/test/CodeGen/X86/pr46877.ll (+2-2)
- (modified) llvm/test/CodeGen/X86/pr47517.ll (+13-1)
- (modified) llvm/test/CodeGen/X86/pr49162.ll (+2-6)
- (modified) llvm/test/CodeGen/X86/pr49451.ll (+1-2)
- (modified) llvm/test/CodeGen/X86/pr50609.ll (+3-2)
- (modified) llvm/test/CodeGen/X86/pr51615.ll (+9-8)
- (modified) llvm/test/CodeGen/X86/pr53419.ll (+28-60)
- (modified) llvm/test/CodeGen/X86/pr53842.ll (+2-14)
- (modified) llvm/test/CodeGen/X86/pr56103.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/pr57340.ll (+4-5)
- (modified) llvm/test/CodeGen/X86/pr57402.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/pr57658.ll (+1-2)
- (modified) llvm/test/CodeGen/X86/pr59980.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/pr61923.ll (+6-4)
- (modified) llvm/test/CodeGen/X86/pr62286.ll (+20-19)
- (modified) llvm/test/CodeGen/X86/pr63439.ll (+2-1)
- (modified) llvm/test/CodeGen/X86/pr64593.ll (+2-4)
- (modified) llvm/test/CodeGen/X86/pr67333.ll (+34-36)
- (modified) llvm/test/CodeGen/X86/pr74736.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/prefer-avx256-mask-shuffle.ll (+2-6)
- (modified) llvm/test/CodeGen/X86/promote-vec3.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/psubus.ll (+482-410)
- (modified) llvm/test/CodeGen/X86/pull-binop-through-shift.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/rotate-extract-vector.ll (+22-10)
- (modified) llvm/test/CodeGen/X86/rotate-extract.ll (+42-34)
- (modified) llvm/test/CodeGen/X86/rotate.ll (+5-5)
- (modified) llvm/test/CodeGen/X86/rotate4.ll (+12-12)
- (modified) llvm/test/CodeGen/X86/rotate_vec.ll (+50-14)
- (modified) llvm/test/CodeGen/X86/sad.ll (+401-96)
- (modified) llvm/test/CodeGen/X86/sad_variations.ll (+2-56)
- (modified) llvm/test/CodeGen/X86/sadd_sat.ll (+2-2)
- (modified) llvm/test/CodeGen/X86/sadd_sat_plus.ll (+2-2)
- (modified) llvm/test/CodeGen/X86/sadd_sat_vec.ll (+94-87)
- (modified) llvm/test/CodeGen/X86/sat-add.ll (+94-37)
- (modified) llvm/test/CodeGen/X86/scalar_widen_div.ll (+14-13)
- (modified) llvm/test/CodeGen/X86/scalarize-fp.ll (+17-3)
- (modified) llvm/test/CodeGen/X86/sdiv-exact.ll (+17-13)
- (modified) llvm/test/CodeGen/X86/sdiv_fix.ll (+107-106)
- (modified) llvm/test/CodeGen/X86/sdiv_fix_sat.ll (+223-225)
- (modified) llvm/test/CodeGen/X86/select-sra.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/setcc-combine.ll (+94-30)
- (modified) llvm/test/CodeGen/X86/setcc-freeze.ll (+2-1)
- (modified) llvm/test/CodeGen/X86/setcc-fsh.ll (+22-24)
- (modified) llvm/test/CodeGen/X86/setcc-logic.ll (+13-15)
- (modified) llvm/test/CodeGen/X86/setcc-wide-types.ll (+273-185)
- (modified) llvm/test/CodeGen/X86/setcc.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/shift-amount-mod.ll (+10-11)
- (modified) llvm/test/CodeGen/X86/shift-and.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/shift-by-signext.ll (+2-2)
- (modified) llvm/test/CodeGen/X86/shift-combine.ll (+4-4)
- (modified) llvm/test/CodeGen/X86/shift-i128.ll (+69-65)
- (modified) llvm/test/CodeGen/X86/shift-i256.ll (+2-2)
- (modified) llvm/test/CodeGen/X86/shift-mask.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/shift-parts.ll (+4-6)
- (modified) llvm/test/CodeGen/X86/shl_elim.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/shrink_vmul.ll (+138-134)
- (modified) llvm/test/CodeGen/X86/shuffle-combine-crash-5.ll (+2-3)
- (modified) llvm/test/CodeGen/X86/shuffle-half.ll (+2-2)
- (modified) llvm/test/CodeGen/X86/shuffle-strided-with-offset-128.ll (+4-4)
- (modified) llvm/test/CodeGen/X86/shuffle-strided-with-offset-512.ll (+4-4)
- (modified) llvm/test/CodeGen/X86/shuffle-vs-trunc-256.ll (+47-47)
- (modified) llvm/test/CodeGen/X86/shuffle-vs-trunc-512.ll (+26-6)
- (modified) llvm/test/CodeGen/X86/signbit-shift.ll (+3-2)
- (modified) llvm/test/CodeGen/X86/single_elt_vector_memory_operation.ll (+6-6)
- (modified) llvm/test/CodeGen/X86/smul-with-overflow.ll (+69-67)
- (modified) llvm/test/CodeGen/X86/splat-for-size.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/srem-seteq-illegal-types.ll (+21-20)
``````````diff
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 2327664516ccdae..4cb7d5db1d360b7 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -1817,13 +1817,15 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
WorklistInserter AddNodes(*this);
+ DAG.AssignTopologicalOrder();
+
// Add all the dag nodes to the worklist.
//
// Note: All nodes are not added to PruningList here, this is because the only
// nodes which can be deleted are those which have no uses and all other nodes
// which would otherwise be added to the worklist by the first call to
// getNextWorklistEntry are already present in it.
- for (SDNode &Node : DAG.allnodes())
+ for (SDNode &Node : reverse(DAG.allnodes()))
AddToWorklist(&Node, /* IsCandidateForPruning */ Node.use_empty());
// Create a dummy node (which is not added to allnodes), that adds a reference
@@ -3488,7 +3490,6 @@ static SDValue combineUADDO_CARRYDiamond(DAGCombiner &Combiner,
return SDValue();
}
-
auto cancelDiamond = [&](SDValue A,SDValue B) {
SDLoc DL(N);
SDValue NewY =
@@ -3559,46 +3560,82 @@ static SDValue combineCarryDiamond(SelectionDAG &DAG, const TargetLowering &TLI,
SDValue Carry0 = getAsCarry(TLI, N0);
if (!Carry0)
return SDValue();
+
SDValue Carry1 = getAsCarry(TLI, N1);
if (!Carry1)
return SDValue();
- unsigned Opcode = Carry0.getOpcode();
- if (Opcode != Carry1.getOpcode())
+ auto matchCarry = [](SDValue N, SDValue &A, SDValue &B, bool &IsAdd) {
+ unsigned Opcode = N.getOpcode();
+ IsAdd = Opcode == ISD::UADDO || Opcode == ISD::UADDO_CARRY;
+
+ if (Opcode == ISD::UADDO || Opcode == ISD::USUBO) {
+ A = N.getOperand(0);
+ B = N.getOperand(1);
+ return true;
+ }
+
+ if ((Opcode == ISD::UADDO_CARRY || Opcode == ISD::USUBO_CARRY) &&
+ isNullConstant(N.getOperand(1))) {
+ A = N.getOperand(0);
+ B = N.getOperand(2);
+ return true;
+ }
+
+ return false;
+ };
+
+ SDValue X, Y;
+ bool IsAdd;
+ if (!matchCarry(Carry0, X, Y, IsAdd))
+ return SDValue();
+
+ SDValue A, B;
+ bool IsAdd1;
+ if (!matchCarry(Carry1, A, B, IsAdd1))
return SDValue();
- if (Opcode != ISD::UADDO && Opcode != ISD::USUBO)
+
+ if (IsAdd != IsAdd1)
+ return SDValue();
+
+ unsigned NewOp = IsAdd ? ISD::UADDO_CARRY : ISD::USUBO_CARRY;
+ if (!TLI.isOperationLegalOrCustom(NewOp, Carry0.getValue(0).getValueType()))
return SDValue();
// Canonicalize the add/sub of A and B (the top node in the above ASCII art)
// as Carry0 and the add/sub of the carry in as Carry1 (the middle node).
- if (Carry1.getNode()->isOperandOf(Carry0.getNode()))
+ if (Carry0.getValue(0) == A || Carry0.getValue(0) == B) {
std::swap(Carry0, Carry1);
+ std::swap(A, X);
+ std::swap(B, Y);
+ }
// Check if nodes are connected in expected way.
- if (Carry1.getOperand(0) != Carry0.getValue(0) &&
- Carry1.getOperand(1) != Carry0.getValue(0))
- return SDValue();
-
- // The carry in value must be on the righthand side for subtraction.
- unsigned CarryInOperandNum =
- Carry1.getOperand(0) == Carry0.getValue(0) ? 1 : 0;
- if (Opcode == ISD::USUBO && CarryInOperandNum != 1)
- return SDValue();
- SDValue CarryIn = Carry1.getOperand(CarryInOperandNum);
-
- unsigned NewOp = Opcode == ISD::UADDO ? ISD::UADDO_CARRY : ISD::USUBO_CARRY;
- if (!TLI.isOperationLegalOrCustom(NewOp, Carry0.getValue(0).getValueType()))
+ if (Carry1.getValue(0) != X && (!IsAdd || Carry1.getValue(0) != Y))
return SDValue();
// Verify that the carry/borrow in is plausibly a carry/borrow bit.
+ // TODO: make getAsCarry() aware of how partial carries are merged.
+ SDValue CarryIn = Carry1.getValue(0) == X ? Y : X;
CarryIn = getAsCarry(TLI, CarryIn, true);
if (!CarryIn)
return SDValue();
SDLoc DL(N);
SDValue Merged =
- DAG.getNode(NewOp, DL, Carry1->getVTList(), Carry0.getOperand(0),
- Carry0.getOperand(1), CarryIn);
+ DAG.getNode(NewOp, DL, Carry1->getVTList(), A, B, CarryIn);
+
+ LLVM_DEBUG(
+ dbgs() << "Combine carry diamond:\n";
+ dbgs() << "\tCarry0: "; Carry0->dump();
+ dbgs() << "\tX: "; X->dump();
+ dbgs() << "\tY: "; Y->dump();
+ dbgs() << "\tCarry1: "; Carry1->dump();
+ dbgs() << "\tA: "; A->dump();
+ dbgs() << "\tB: "; B->dump();
+ dbgs() << "\tCarryIn: "; CarryIn->dump();
+ dbgs() << "\tMerged: "; Merged->dump();
+ dbgs() << "\n");
// Please note that because we have proven that the result of the UADDO/USUBO
// of A and B feeds into the UADDO/USUBO that does the carry/borrow in, we can
@@ -3613,7 +3650,7 @@ static SDValue combineCarryDiamond(SelectionDAG &DAG, const TargetLowering &TLI,
// carry flags; and that AND can return a constant zero.
//
// TODO: match other operations that can merge flags (ADD, etc)
- DAG.ReplaceAllUsesOfValueWith(Carry1.getValue(0), Merged.getValue(0));
+ DAG.ReplaceAllUsesOfValueWith(Carry0.getValue(0), Merged.getValue(0));
if (N->getOpcode() == ISD::AND)
return DAG.getConstant(0, DL, MVT::i1);
return Merged.getValue(1);
diff --git a/llvm/test/CodeGen/X86/2007-05-15-maskmovq.ll b/llvm/test/CodeGen/X86/2007-05-15-maskmovq.ll
index 69f733461efc77d..48fbd6b213695d5 100644
--- a/llvm/test/CodeGen/X86/2007-05-15-maskmovq.ll
+++ b/llvm/test/CodeGen/X86/2007-05-15-maskmovq.ll
@@ -12,14 +12,14 @@ define void @test(<1 x i64> %c64, <1 x i64> %mask1, ptr %P) {
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
; CHECK-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; CHECK-NEXT: movl %eax, (%esp)
+; CHECK-NEXT: movl %eax, {{[0-9]+}}(%esp)
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
; CHECK-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; CHECK-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; CHECK-NEXT: movl %eax, (%esp)
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edi
-; CHECK-NEXT: movq (%esp), %mm0
-; CHECK-NEXT: movq {{[0-9]+}}(%esp), %mm1
+; CHECK-NEXT: movq {{[0-9]+}}(%esp), %mm0
+; CHECK-NEXT: movq (%esp), %mm1
; CHECK-NEXT: maskmovq %mm0, %mm1
; CHECK-NEXT: addl $16, %esp
; CHECK-NEXT: popl %edi
diff --git a/llvm/test/CodeGen/X86/2008-05-12-tailmerge-5.ll b/llvm/test/CodeGen/X86/2008-05-12-tailmerge-5.ll
index 0d63779227554cf..003358f5f61c47a 100644
--- a/llvm/test/CodeGen/X86/2008-05-12-tailmerge-5.ll
+++ b/llvm/test/CodeGen/X86/2008-05-12-tailmerge-5.ll
@@ -15,9 +15,10 @@ define void @passing2(i64 %str.0, i64 %str.1, i16 signext %s, i32 %j, i8 signex
; CHECK-NEXT: movq %rdi, {{[0-9]+}}(%rsp)
; CHECK-NEXT: movb %al, {{[0-9]+}}(%rsp)
; CHECK-NEXT: movb %ah, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: shrq $16, %rax
+; CHECK-NEXT: shrl $16, %esi
+; CHECK-NEXT: movb %sil, {{[0-9]+}}(%rsp)
+; CHECK-NEXT: shrl $24, %eax
; CHECK-NEXT: movb %al, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: movb %ah, {{[0-9]+}}(%rsp)
; CHECK-NEXT: movw %dx, {{[0-9]+}}(%rsp)
; CHECK-NEXT: movl %ecx, {{[0-9]+}}(%rsp)
; CHECK-NEXT: movb %r8b, {{[0-9]+}}(%rsp)
diff --git a/llvm/test/CodeGen/X86/2008-06-13-VolatileLoadStore.ll b/llvm/test/CodeGen/X86/2008-06-13-VolatileLoadStore.ll
index bf309f015668425..c699c74e3253747 100644
--- a/llvm/test/CodeGen/X86/2008-06-13-VolatileLoadStore.ll
+++ b/llvm/test/CodeGen/X86/2008-06-13-VolatileLoadStore.ll
@@ -10,11 +10,13 @@ define i16 @f(i64 %x, double %y) {
; CHECK-LABEL: f:
; CHECK: # %bb.0:
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
-; CHECK-NEXT: movsd %xmm1, atomic
-; CHECK-NEXT: xorps %xmm1, %xmm1
-; CHECK-NEXT: movsd %xmm1, atomic2
-; CHECK-NEXT: movsd %xmm0, anything
+; CHECK-NEXT: movsd %xmm0, atomic
+; CHECK-NEXT: xorps %xmm0, %xmm0
+; CHECK-NEXT: movsd %xmm0, atomic2
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; CHECK-NEXT: movl %ecx, anything+4
+; CHECK-NEXT: movl %eax, anything
; CHECK-NEXT: movl ioport, %ecx
; CHECK-NEXT: movl ioport, %eax
; CHECK-NEXT: shrl $16, %eax
diff --git a/llvm/test/CodeGen/X86/2008-12-02-dagcombine-1.ll b/llvm/test/CodeGen/X86/2008-12-02-dagcombine-1.ll
index 62998a593996b2d..b4d7ae6d2320bb3 100644
--- a/llvm/test/CodeGen/X86/2008-12-02-dagcombine-1.ll
+++ b/llvm/test/CodeGen/X86/2008-12-02-dagcombine-1.ll
@@ -8,8 +8,11 @@ define ptr @test(ptr %a, ptr %L, ptr %P) nounwind {
; CHECK-LABEL: test:
; CHECK: ## %bb.0: ## %entry
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT: subl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT: addl $-2, %eax
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx
+; CHECK-NEXT: addl %ecx, %edx
+; CHECK-NEXT: subl %edx, %eax
+; CHECK-NEXT: leal -2(%eax,%ecx), %eax
; CHECK-NEXT: retl
entry:
%0 = ptrtoint ptr %a to i32
diff --git a/llvm/test/CodeGen/X86/2009-04-29-IndirectDestOperands.ll b/llvm/test/CodeGen/X86/2009-04-29-IndirectDestOperands.ll
index 8fc701c8cf69425..697e606a853840b 100644
--- a/llvm/test/CodeGen/X86/2009-04-29-IndirectDestOperands.ll
+++ b/llvm/test/CodeGen/X86/2009-04-29-IndirectDestOperands.ll
@@ -16,9 +16,9 @@ define void @cpuid(ptr %data) nounwind {
; CHECK-NEXT: ## InlineAsm Start
; CHECK-NEXT: cpuid
; CHECK-NEXT: ## InlineAsm End
-; CHECK-NEXT: movl %ebx, 8(%esi)
-; CHECK-NEXT: movl %ecx, 12(%esi)
; CHECK-NEXT: movl %edx, 16(%esi)
+; CHECK-NEXT: movl %ecx, 12(%esi)
+; CHECK-NEXT: movl %ebx, 8(%esi)
; CHECK-NEXT: movl %eax, 4(%esi)
; CHECK-NEXT: popl %esi
; CHECK-NEXT: popl %ebx
diff --git a/llvm/test/CodeGen/X86/2010-09-17-SideEffectsInChain.ll b/llvm/test/CodeGen/X86/2010-09-17-SideEffectsInChain.ll
index ecaa105dedcfe8d..7d61e3430fa1901 100644
--- a/llvm/test/CodeGen/X86/2010-09-17-SideEffectsInChain.ll
+++ b/llvm/test/CodeGen/X86/2010-09-17-SideEffectsInChain.ll
@@ -17,17 +17,17 @@ define fastcc i32 @cli_magic_scandesc(ptr %in) nounwind ssp {
; CHECK-NEXT: movq %rdx, (%rsp)
; CHECK-NEXT: movq 24(%rdi), %rdx
; CHECK-NEXT: movq %rdx, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: movq %rsi, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: movq 16(%rdi), %rdx
+; CHECK-NEXT: movq 56(%rdi), %rdx
; CHECK-NEXT: movq %rdx, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: movq 32(%rdi), %rdx
+; CHECK-NEXT: movq 48(%rdi), %rdx
; CHECK-NEXT: movq %rdx, {{[0-9]+}}(%rsp)
; CHECK-NEXT: movq 40(%rdi), %rdx
; CHECK-NEXT: movq %rdx, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: movq 48(%rdi), %rdx
+; CHECK-NEXT: movq 32(%rdi), %rdx
; CHECK-NEXT: movq %rdx, {{[0-9]+}}(%rsp)
-; CHECK-NEXT: movq 56(%rdi), %rdx
+; CHECK-NEXT: movq 16(%rdi), %rdx
; CHECK-NEXT: movq %rdx, {{[0-9]+}}(%rsp)
+; CHECK-NEXT: movq %rsi, {{[0-9]+}}(%rsp)
; CHECK-NEXT: movb %al, (%rsp)
; CHECK-NEXT: movb %cl, {{[0-9]+}}(%rsp)
; CHECK-NEXT: movq __stack_chk_guard(%rip), %rax
diff --git a/llvm/test/CodeGen/X86/2011-10-19-LegelizeLoad.ll b/llvm/test/CodeGen/X86/2011-10-19-LegelizeLoad.ll
index 65ff22f960f233f..d325c267833d834 100644
--- a/llvm/test/CodeGen/X86/2011-10-19-LegelizeLoad.ll
+++ b/llvm/test/CodeGen/X86/2011-10-19-LegelizeLoad.ll
@@ -17,21 +17,19 @@ target triple = "x86_64-unknown-linux-gnu"
define dso_local i32 @main() nounwind uwtable {
; CHECK-LABEL: main:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: movl i(%rip), %esi
-; CHECK-NEXT: movl j(%rip), %eax
-; CHECK-NEXT: movl %esi, %edx
+; CHECK-NEXT: movq i(%rip), %rdx
+; CHECK-NEXT: movq j(%rip), %rsi
+; CHECK-NEXT: movsbl %sil, %eax
+; CHECK-NEXT: idivb %dl
+; CHECK-NEXT: movl %eax, %ecx
; CHECK-NEXT: shrl $8, %edx
-; CHECK-NEXT: movsbl %al, %ecx
-; CHECK-NEXT: shrl $8, %eax
-; CHECK-NEXT: cbtw
+; CHECK-NEXT: shrl $8, %esi
+; CHECK-NEXT: movsbl %sil, %eax
; CHECK-NEXT: idivb %dl
-; CHECK-NEXT: movl %eax, %edx
-; CHECK-NEXT: movl %ecx, %eax
-; CHECK-NEXT: idivb %sil
-; CHECK-NEXT: movzbl %dl, %ecx
+; CHECK-NEXT: movzbl %cl, %ecx
+; CHECK-NEXT: movd %ecx, %xmm0
; CHECK-NEXT: movzbl %al, %eax
-; CHECK-NEXT: movd %eax, %xmm0
-; CHECK-NEXT: pinsrb $1, %ecx, %xmm0
+; CHECK-NEXT: pinsrb $1, %eax, %xmm0
; CHECK-NEXT: pextrw $0, %xmm0, res(%rip)
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/abds.ll b/llvm/test/CodeGen/X86/abds.ll
index 39ac47e99e6e989..1cd5b7e785af595 100644
--- a/llvm/test/CodeGen/X86/abds.ll
+++ b/llvm/test/CodeGen/X86/abds.ll
@@ -129,21 +129,27 @@ define i16 @abd_ext_i16(i16 %a, i16 %b) nounwind {
define i16 @abd_ext_i16_i32(i16 %a, i32 %b) nounwind {
; X86-LABEL: abd_ext_i16_i32:
; X86: # %bb.0:
+; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ecx, %edx
+; X86-NEXT: sarl $31, %edx
; X86-NEXT: movswl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl %eax, %edx
-; X86-NEXT: subl %ecx, %edx
-; X86-NEXT: negl %edx
+; X86-NEXT: movl %eax, %esi
+; X86-NEXT: sarl $31, %esi
; X86-NEXT: subl %ecx, %eax
-; X86-NEXT: cmovlel %edx, %eax
+; X86-NEXT: sbbl %edx, %esi
+; X86-NEXT: sarl $31, %esi
+; X86-NEXT: xorl %esi, %eax
+; X86-NEXT: subl %esi, %eax
; X86-NEXT: # kill: def $ax killed $ax killed $eax
+; X86-NEXT: popl %esi
; X86-NEXT: retl
;
; X64-LABEL: abd_ext_i16_i32:
; X64: # %bb.0:
+; X64-NEXT: # kill: def $edi killed $edi def $rdi
+; X64-NEXT: movswq %di, %rcx
; X64-NEXT: movslq %esi, %rax
-; X64-NEXT: movswl %di, %ecx
-; X64-NEXT: movslq %ecx, %rcx
; X64-NEXT: subq %rax, %rcx
; X64-NEXT: movq %rcx, %rax
; X64-NEXT: negq %rax
@@ -191,13 +197,19 @@ define i16 @abd_ext_i16_undef(i16 %a, i16 %b) nounwind {
define i32 @abd_ext_i32(i32 %a, i32 %b) nounwind {
; X86-LABEL: abd_ext_i32:
; X86: # %bb.0:
+; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ecx, %edx
+; X86-NEXT: sarl $31, %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl %eax, %edx
-; X86-NEXT: subl %ecx, %edx
-; X86-NEXT: negl %edx
+; X86-NEXT: movl %eax, %esi
+; X86-NEXT: sarl $31, %esi
; X86-NEXT: subl %ecx, %eax
-; X86-NEXT: cmovlel %edx, %eax
+; X86-NEXT: sbbl %edx, %esi
+; X86-NEXT: sarl $31, %esi
+; X86-NEXT: xorl %esi, %eax
+; X86-NEXT: subl %esi, %eax
+; X86-NEXT: popl %esi
; X86-NEXT: retl
;
; X64-LABEL: abd_ext_i32:
@@ -221,20 +233,26 @@ define i32 @abd_ext_i32(i32 %a, i32 %b) nounwind {
define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind {
; X86-LABEL: abd_ext_i32_i16:
; X86: # %bb.0:
+; X86-NEXT: pushl %esi
; X86-NEXT: movswl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ecx, %edx
+; X86-NEXT: sarl $31, %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl %eax, %edx
-; X86-NEXT: subl %ecx, %edx
-; X86-NEXT: negl %edx
+; X86-NEXT: movl %eax, %esi
+; X86-NEXT: sarl $31, %esi
; X86-NEXT: subl %ecx, %eax
-; X86-NEXT: cmovlel %edx, %eax
+; X86-NEXT: sbbl %edx, %esi
+; X86-NEXT: sarl $31, %esi
+; X86-NEXT: xorl %esi, %eax
+; X86-NEXT: subl %esi, %eax
+; X86-NEXT: popl %esi
; X86-NEXT: retl
;
; X64-LABEL: abd_ext_i32_i16:
; X64: # %bb.0:
+; X64-NEXT: # kill: def $esi killed $esi def $rsi
; X64-NEXT: movslq %edi, %rcx
-; X64-NEXT: movswl %si, %eax
-; X64-NEXT: cltq
+; X64-NEXT: movswq %si, %rax
; X64-NEXT: subq %rax, %rcx
; X64-NEXT: movq %rcx, %rax
; X64-NEXT: negq %rax
@@ -252,13 +270,19 @@ define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind {
define i32 @abd_ext_i32_undef(i32 %a, i32 %b) nounwind {
; X86-LABEL: abd_ext_i32_undef:
; X86: # %bb.0:
+; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ecx, %edx
+; X86-NEXT: sarl $31, %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl %eax, %edx
-; X86-NEXT: subl %ecx, %edx
-; X86-NEXT: negl %edx
+; X86-NEXT: movl %eax, %esi
+; X86-NEXT: sarl $31, %esi
; X86-NEXT: subl %ecx, %eax
-; X86-NEXT: cmovlel %edx, %eax
+; X86-NEXT: sbbl %edx, %esi
+; X86-NEXT: sarl $31, %esi
+; X86-NEXT: xorl %esi, %eax
+; X86-NEXT: subl %esi, %eax
+; X86-NEXT: popl %esi
; X86-NEXT: retl
;
; X64-LABEL: abd_ext_i32_undef:
diff --git a/llvm/test/CodeGen/X86/abdu.ll b/llvm/test/CodeGen/X86/abdu.ll
index 11719be4ab5cd0c..be0e22c747eaff4 100644
--- a/llvm/test/CodeGen/X86/abdu.ll
+++ b/llvm/test/CodeGen/X86/abdu.ll
@@ -20,13 +20,13 @@ define i8 @abd_ext_i8(i8 %a, i8 %b) nounwind {
;
; X64-LABEL: abd_ext_i8:
; X64: # %bb.0:
-; X64-NEXT: movzbl %sil, %eax
; X64-NEXT: movzbl %dil, %ecx
-; X64-NEXT: subl %eax, %ecx
-; X64-NEXT: movl %ecx, %eax
-; X64-NEXT: negl %eax
-; X64-NEXT: cmovsl %ecx, %eax
-; X64-NEXT: # kill: def $al killed $al killed $eax
+; X64-NEXT: movzbl %sil, %eax
+; X64-NEXT: subq %rax, %rcx
+; X64-NEXT: movq %rcx, %rax
+; X64-NEXT: negq %rax
+; X64-NEXT: cmovsq %rcx, %rax
+; X64-NEXT: # kill: def $al killed $al killed $rax
; X64-NEXT: retq
%aext = zext i8 %a to i64
%bext = zext i8 %b to i64
@@ -50,13 +50,13 @@ define i8 @abd_ext_i8_i16(i8 %a, i16 %b) nounwind {
;
; X64-LABEL: abd_ext_i8_i16:
; X64: # %bb.0:
-; X64-NEXT: movzwl %si, %eax
; X64-NEXT: movzbl %dil, %ecx
-; X64-NEXT: subl %eax, %ecx
-; X64-NEXT: movl %ecx, %eax
-; X64-NEXT: negl %eax
-; X64-NEXT: cmovsl %ecx, %eax
-; X64-NEXT: # kill: def $al killed $al killed $eax
+; X64-NEXT: movzwl %si, %eax
+; X64-NEXT: subq %rax, %rcx
+; X64-NEXT: movq %rcx, %rax
+; X64-NEXT: negq %rax
+; X64-NEXT: cmovsq %rcx, %rax
+; X64-NEXT: # kill: def $al killed $al killed $rax
; X64-NEXT: retq
%aext = zext i8 %a to i64
%bext = zext i16 %b to i64
@@ -80,13 +80,13 @@ define i8 @abd_ext_i8_undef(i8 %a, i8 %b) nounwind {
;
; X64-LABEL: abd_ext_i8_undef:
; X64: # %bb.0:
-; X64-NEXT: movzbl %sil, %eax
; X64-NEXT: movzbl %dil, %ecx
-; X64-NEXT: subl %eax, %ecx
-; X64-NEXT: movl %ecx, %eax
-; X64-NEXT: negl %eax
-; X64-NEXT: cmovsl %ecx, %eax
-; X64-NEXT: # kill: def $al killed $al killed $eax
+; X64-NEXT: movzbl %sil, %eax
+; X64-NEXT: subq %rax, %rcx
+; X64-NEXT: movq %rcx, %rax
+; X64-NEXT: negq %rax
+; X64-NEXT: cmovsq %rcx, %rax
+; X64-NEXT: # kill: def $al killed $al killed $rax
; X64-NEXT: retq
%aext = zext i8 %a to i64
%bext = zext i8 %b to i64
@@ -110,13 +110,13 @@ define i16 @abd_ext_i16(i16 %a, i16 %b) nounwind {
;
; X64-LABEL: abd_ext_i16:
; X64: # %bb.0:
-; X64-NEXT: movzwl %si, %eax
; X64-NEXT: movzwl %di, %ecx
-; X64-NEXT: subl %eax, %ecx
-; X64-NEXT: movl %ecx, %eax
-; X64-NEXT: negl %eax
-; X64-NEXT: cmovsl %ecx, %eax
-; X64-NEXT: # kill: def $ax killed $ax killed $eax
+; X64-NEXT: movzwl %si, %eax
+; X64-NEXT: subq %rax, %rcx
+; X64-NEXT: movq %rcx, %rax
+; X64-NEXT: negq %rax
+; X64-NEXT: cmovsq %rcx, %rax
+; X64-NEXT: # kill: def $ax killed $ax killed $rax
; X64-NEXT: retq
%aext = zext i16 %a to i64
%bext = zext i16 %b to i64
@@ -129,20 +129,20 @@ define i16 @abd_ext_i16(i16 %a, i16 %b) nounwind {
define i16 @abd_ext_i16_i32(i16 %a, i32 %b) nounwind {
; X86-LABEL: abd_ext_i16_i32:
; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl %eax, %edx
-; X86-NEXT: subl %ecx, %edx
-; X86-NEXT: negl %edx
+; X86-NEXT: xorl %ecx, %ecx
+; X86-NEXT: subl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: sbbl %ecx, %ecx
+; X86-NEXT: sarl $31, %ecx
+; X86-NEXT: xorl %ecx, %eax
; X86-NEXT: subl %ecx, %eax
-; X86-NEXT: cmovbel %edx, %eax
; X86-NEXT: # kill: def $ax killed $ax killed $eax
; X86-NEXT: retl
;
; X64-LABEL: abd_ext_i16_i32:
; X64: # %bb.0:
-; X64-NEXT: movl %esi, %eax
; X64-NEXT: movzwl %di, %ecx
+; X64-NEXT: movl %esi, %eax
; X64-NEXT: subq %rax, %rcx
; X64-NEXT: movq %rcx, %rax
; X64-NEXT: negq %rax
@@ -171,13 +171,13 @@ define i16 @abd_ext_i16_undef(i16 %a, i16 %b) nounwind {
;
; X64-LABEL: abd_ext_i16_undef:
; X64: # %bb.0:
-; X64-NEXT: movzwl %si, %eax
; X64-NEXT: movzwl %di, %ecx
-; X64-NEXT: subl %eax, %ecx
-; X64-NEXT: movl %ecx, %eax
-; ...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/77475
More information about the llvm-commits
mailing list