[llvm] [MCP] Move dependencies if they block copy propagation (PR #105562)

via llvm-commits llvm-commits at lists.llvm.org
Wed Aug 21 11:13:41 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-arm

Author: Gábor Spaits (spaits)

<details>
<summary>Changes</summary>

As we have discussed in a previous PR (https://github.com/llvm/llvm-project/pull/98087) here is an implementation using ScheduleDAG in the MCP.

This PR is not fully finished yet. I have not really done any precise benchmarking.

The only thing I have done is that, I have tested how much time does the generation of some regression tests take before my patch and after my path. I have not seen any increases there on my machine. But this is not a precise way of measuring. 

I have not updated all the tests yet.

Could you please take a quick look at this PR and give some feedback?
Is this direction good? Should we continue with this? (Then I will try to do some compile time benchmarking and also update the tests).

---

Patch is 1.38 MiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/105562.diff


241 Files Affected:

- (modified) llvm/lib/CodeGen/MachineCopyPropagation.cpp (+255-23) 
- (modified) llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll (+5-5) 
- (modified) llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll (+98-98) 
- (modified) llvm/test/CodeGen/AArch64/GlobalISel/merge-stores-truncating.ll (+1-2) 
- (modified) llvm/test/CodeGen/AArch64/aarch64-mulv.ll (+1-2) 
- (modified) llvm/test/CodeGen/AArch64/aarch64-wide-mul.ll (+4-6) 
- (modified) llvm/test/CodeGen/AArch64/addp-shuffle.ll (+2-4) 
- (added) llvm/test/CodeGen/AArch64/anti-dependencies-mcp.mir (+201) 
- (modified) llvm/test/CodeGen/AArch64/arm64-non-pow2-ldst.ll (+4-4) 
- (modified) llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll (+30-72) 
- (modified) llvm/test/CodeGen/AArch64/arm64-windows-calls.ll (+1-2) 
- (modified) llvm/test/CodeGen/AArch64/avoid-zero-copy.mir (+3) 
- (modified) llvm/test/CodeGen/AArch64/cgp-usubo.ll (+5-10) 
- (modified) llvm/test/CodeGen/AArch64/cmpxchg-idioms.ll (+6-6) 
- (modified) llvm/test/CodeGen/AArch64/duplane-index-patfrags.ll (+4-8) 
- (modified) llvm/test/CodeGen/AArch64/fcmp.ll (+9-9) 
- (modified) llvm/test/CodeGen/AArch64/fexplog.ll (+180-330) 
- (modified) llvm/test/CodeGen/AArch64/fpext.ll (+14-32) 
- (modified) llvm/test/CodeGen/AArch64/fpow.ll (+20-36) 
- (modified) llvm/test/CodeGen/AArch64/fpowi.ll (+36-66) 
- (modified) llvm/test/CodeGen/AArch64/frem.ll (+20-36) 
- (modified) llvm/test/CodeGen/AArch64/fsincos.ll (+72-132) 
- (modified) llvm/test/CodeGen/AArch64/ldrpre-ldr-merge.mir (+76-76) 
- (modified) llvm/test/CodeGen/AArch64/llvm.exp10.ll (+6-12) 
- (modified) llvm/test/CodeGen/AArch64/load.ll (+1-2) 
- (modified) llvm/test/CodeGen/AArch64/lr-reserved-for-ra-live-in.ll (+2-2) 
- (modified) llvm/test/CodeGen/AArch64/machine-cp-sub-reg.mir (+3-3) 
- (modified) llvm/test/CodeGen/AArch64/machine-sink-kill-flags.ll (+2-3) 
- (modified) llvm/test/CodeGen/AArch64/named-vector-shuffles-neon.ll (+2-4) 
- (modified) llvm/test/CodeGen/AArch64/neon-extadd.ll (+18-36) 
- (modified) llvm/test/CodeGen/AArch64/neon-extmul.ll (+4-6) 
- (modified) llvm/test/CodeGen/AArch64/neon-perm.ll (+1-2) 
- (modified) llvm/test/CodeGen/AArch64/sext.ll (+27-54) 
- (modified) llvm/test/CodeGen/AArch64/shufflevector.ll (+5-12) 
- (modified) llvm/test/CodeGen/AArch64/spillfill-sve.mir (+6-106) 
- (modified) llvm/test/CodeGen/AArch64/streaming-compatible-memory-ops.ll (+2-3) 
- (modified) llvm/test/CodeGen/AArch64/sve-sext-zext.ll (+9-18) 
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc.ll (+2-2) 
- (modified) llvm/test/CodeGen/AArch64/sve-vector-deinterleave.ll (+5-10) 
- (modified) llvm/test/CodeGen/AArch64/sve-vector-interleave.ll (+2-4) 
- (modified) llvm/test/CodeGen/AArch64/vec_umulo.ll (+5-9) 
- (modified) llvm/test/CodeGen/AArch64/vecreduce-add.ll (+24-25) 
- (modified) llvm/test/CodeGen/AArch64/vselect-ext.ll (+7-10) 
- (modified) llvm/test/CodeGen/AArch64/zext-to-tbl.ll (+9-9) 
- (modified) llvm/test/CodeGen/AArch64/zext.ll (+27-54) 
- (modified) llvm/test/CodeGen/ARM/addsubo-legalization.ll (+2-4) 
- (modified) llvm/test/CodeGen/ARM/fpclamptosat_vec.ll (+19-24) 
- (modified) llvm/test/CodeGen/ARM/funnel-shift.ll (+2-3) 
- (modified) llvm/test/CodeGen/ARM/llvm.exp10.ll (+3-9) 
- (modified) llvm/test/CodeGen/ARM/load-combine-big-endian.ll (+3-9) 
- (modified) llvm/test/CodeGen/ARM/load-combine.ll (+2-6) 
- (modified) llvm/test/CodeGen/ARM/sub-cmp-peephole.ll (+6-14) 
- (modified) llvm/test/CodeGen/ARM/vecreduce-fadd-legalization-strict.ll (+16-18) 
- (modified) llvm/test/CodeGen/ARM/vlddup.ll (+10-20) 
- (modified) llvm/test/CodeGen/ARM/vldlane.ll (+9-22) 
- (modified) llvm/test/CodeGen/RISCV/alu64.ll (+2-4) 
- (modified) llvm/test/CodeGen/RISCV/branch-on-zero.ll (+2-4) 
- (modified) llvm/test/CodeGen/RISCV/condops.ll (+6-12) 
- (modified) llvm/test/CodeGen/RISCV/double-fcmp-strict.ll (+12-24) 
- (modified) llvm/test/CodeGen/RISCV/float-fcmp-strict.ll (+6-12) 
- (modified) llvm/test/CodeGen/RISCV/half-fcmp-strict.ll (+6-12) 
- (modified) llvm/test/CodeGen/RISCV/llvm.frexp.ll (+12-18) 
- (modified) llvm/test/CodeGen/RISCV/machine-cp.mir (+5-4) 
- (modified) llvm/test/CodeGen/RISCV/neg-abs.ll (+4-6) 
- (modified) llvm/test/CodeGen/RISCV/nontemporal.ll (+50-75) 
- (modified) llvm/test/CodeGen/RISCV/overflow-intrinsics.ll (+5-8) 
- (modified) llvm/test/CodeGen/RISCV/rv32zbb-zbkb.ll (+3-5) 
- (added) llvm/test/CodeGen/RISCV/rv64-legal-i32/xaluo.ll (+2603) 
- (modified) llvm/test/CodeGen/RISCV/rv64-statepoint-call-lowering.ll (+1-2) 
- (modified) llvm/test/CodeGen/RISCV/rvv/constant-folding-crash.ll (+2-4) 
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-deinterleave-load.ll (+1-1) 
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum-vp.ll (+17-25) 
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum.ll (+20-32) 
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum-vp.ll (+17-25) 
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum.ll (+20-32) 
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-store-fp.ll (+4-4) 
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-store-int.ll (+4-4) 
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll (+12-12) 
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll (+12-12) 
- (modified) llvm/test/CodeGen/RISCV/rvv/fmaximum-sdnode.ll (+24-34) 
- (modified) llvm/test/CodeGen/RISCV/rvv/fmaximum-vp.ll (+22-30) 
- (modified) llvm/test/CodeGen/RISCV/rvv/fminimum-sdnode.ll (+24-34) 
- (modified) llvm/test/CodeGen/RISCV/rvv/fminimum-vp.ll (+22-30) 
- (modified) llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll (+3-6) 
- (modified) llvm/test/CodeGen/RISCV/rvv/mask-reg-alloc.mir (+1-1) 
- (modified) llvm/test/CodeGen/RISCV/rvv/no-reserved-frame.ll (+3-4) 
- (modified) llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll (+1-1) 
- (modified) llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll (+1-1) 
- (modified) llvm/test/CodeGen/RISCV/rvv/vmfeq.ll (+6-12) 
- (modified) llvm/test/CodeGen/RISCV/rvv/vmfge.ll (+6-12) 
- (modified) llvm/test/CodeGen/RISCV/rvv/vmfgt.ll (+6-12) 
- (modified) llvm/test/CodeGen/RISCV/rvv/vmfle.ll (+6-12) 
- (modified) llvm/test/CodeGen/RISCV/rvv/vmflt.ll (+6-12) 
- (modified) llvm/test/CodeGen/RISCV/rvv/vmfne.ll (+6-12) 
- (modified) llvm/test/CodeGen/RISCV/rvv/vmseq.ll (+10-20) 
- (modified) llvm/test/CodeGen/RISCV/rvv/vmsge.ll (+10-20) 
- (modified) llvm/test/CodeGen/RISCV/rvv/vmsgeu.ll (+10-20) 
- (modified) llvm/test/CodeGen/RISCV/rvv/vmsgt.ll (+10-20) 
- (modified) llvm/test/CodeGen/RISCV/rvv/vmsgtu.ll (+10-20) 
- (modified) llvm/test/CodeGen/RISCV/rvv/vmsle.ll (+10-20) 
- (modified) llvm/test/CodeGen/RISCV/rvv/vmsleu.ll (+10-20) 
- (modified) llvm/test/CodeGen/RISCV/rvv/vmslt.ll (+10-20) 
- (modified) llvm/test/CodeGen/RISCV/rvv/vmsltu.ll (+10-20) 
- (modified) llvm/test/CodeGen/RISCV/rvv/vmsne.ll (+10-20) 
- (modified) llvm/test/CodeGen/RISCV/rvv/vselect-fp.ll (+1-1) 
- (modified) llvm/test/CodeGen/RISCV/rvv/vsetvli-regression.ll (+1-2) 
- (modified) llvm/test/CodeGen/RISCV/rvv/vxrm.mir (+1-1) 
- (modified) llvm/test/CodeGen/RISCV/shifts.ll (+2-4) 
- (modified) llvm/test/CodeGen/RISCV/srem-vector-lkk.ll (+11-20) 
- (modified) llvm/test/CodeGen/RISCV/tail-calls.ll (+6-8) 
- (modified) llvm/test/CodeGen/RISCV/unaligned-load-store.ll (+1-2) 
- (modified) llvm/test/CodeGen/RISCV/urem-vector-lkk.ll (+13-24) 
- (modified) llvm/test/CodeGen/RISCV/wide-mem.ll (+1-2) 
- (modified) llvm/test/CodeGen/RISCV/wide-scalar-shift-by-byte-multiple-legalization.ll (+2-4) 
- (modified) llvm/test/CodeGen/RISCV/wide-scalar-shift-legalization.ll (+2-4) 
- (modified) llvm/test/CodeGen/RISCV/xaluo.ll (+27-54) 
- (modified) llvm/test/CodeGen/RISCV/xtheadmemidx.ll (+1-1) 
- (modified) llvm/test/CodeGen/RISCV/zcmp-cm-popretz.mir (+8-8) 
- (modified) llvm/test/CodeGen/Thumb/smul_fix_sat.ll (+2-4) 
- (modified) llvm/test/CodeGen/Thumb/umulo-128-legalisation-lowering.ll (+2-2) 
- (modified) llvm/test/CodeGen/Thumb2/mve-div-expand.ll (+6-11) 
- (modified) llvm/test/CodeGen/Thumb2/mve-fmath.ll (+29-66) 
- (modified) llvm/test/CodeGen/Thumb2/mve-fpclamptosat_vec.ll (+16-16) 
- (modified) llvm/test/CodeGen/Thumb2/mve-fptosi-sat-vector.ll (+19-21) 
- (modified) llvm/test/CodeGen/Thumb2/mve-fptoui-sat-vector.ll (+20-22) 
- (modified) llvm/test/CodeGen/Thumb2/mve-frint.ll (+6-18) 
- (modified) llvm/test/CodeGen/Thumb2/mve-laneinterleaving.ll (+3-3) 
- (modified) llvm/test/CodeGen/Thumb2/mve-sext-masked-load.ll (+3-6) 
- (modified) llvm/test/CodeGen/Thumb2/mve-shuffle.ll (+12-12) 
- (modified) llvm/test/CodeGen/Thumb2/mve-shufflemov.ll (+25-25) 
- (modified) llvm/test/CodeGen/Thumb2/mve-simple-arith.ll (+6-12) 
- (modified) llvm/test/CodeGen/Thumb2/mve-vabdus.ll (+3-3) 
- (modified) llvm/test/CodeGen/Thumb2/mve-vcvt.ll (+2-6) 
- (modified) llvm/test/CodeGen/Thumb2/mve-vcvt16.ll (+2-2) 
- (modified) llvm/test/CodeGen/Thumb2/mve-vld4.ll (+1-3) 
- (modified) llvm/test/CodeGen/Thumb2/mve-vmovn.ll (+2-2) 
- (modified) llvm/test/CodeGen/Thumb2/mve-vst4.ll (+2-2) 
- (modified) llvm/test/CodeGen/Thumb2/mve-zext-masked-load.ll (+3-7) 
- (modified) llvm/test/CodeGen/X86/apx/mul-i1024.ll (+13-6) 
- (modified) llvm/test/CodeGen/X86/atomic-unordered.ll (+1-1) 
- (modified) llvm/test/CodeGen/X86/avx10_2_512ni-intrinsics.ll (+2-4) 
- (modified) llvm/test/CodeGen/X86/avx10_2ni-intrinsics.ll (+4-8) 
- (modified) llvm/test/CodeGen/X86/avx512-calling-conv.ll (+19-19) 
- (modified) llvm/test/CodeGen/X86/avx512-gfni-intrinsics.ll (+24-36) 
- (modified) llvm/test/CodeGen/X86/avx512-insert-extract.ll (+7-7) 
- (modified) llvm/test/CodeGen/X86/avx512-intrinsics.ll (+2-4) 
- (modified) llvm/test/CodeGen/X86/avx512-mask-op.ll (+5-5) 
- (modified) llvm/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll (+8-12) 
- (modified) llvm/test/CodeGen/X86/avx512bw-intrinsics.ll (+2-4) 
- (modified) llvm/test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll (+4-8) 
- (modified) llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll (+10-20) 
- (modified) llvm/test/CodeGen/X86/avx512vbmi2vl-intrinsics-upgrade.ll (+28-56) 
- (modified) llvm/test/CodeGen/X86/avx512vbmi2vl-intrinsics.ll (+4-8) 
- (modified) llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll (+6-6) 
- (modified) llvm/test/CodeGen/X86/div-rem-pair-recomposition-signed.ll (+1-1) 
- (modified) llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll (+1-1) 
- (modified) llvm/test/CodeGen/X86/element-wise-atomic-memory-intrinsics.ll (+2-4) 
- (modified) llvm/test/CodeGen/X86/expand-vp-cast-intrinsics.ll (+1-2) 
- (modified) llvm/test/CodeGen/X86/extract-bits.ll (+13-20) 
- (modified) llvm/test/CodeGen/X86/icmp-abs-C-vec.ll (+2-4) 
- (modified) llvm/test/CodeGen/X86/is_fpclass.ll (+2-4) 
- (modified) llvm/test/CodeGen/X86/ldexp.ll (+1-2) 
- (modified) llvm/test/CodeGen/X86/legalize-shl-vec.ll (+8-8) 
- (modified) llvm/test/CodeGen/X86/matrix-multiply.ll (+29-31) 
- (modified) llvm/test/CodeGen/X86/mul-i1024.ll (+1-1) 
- (modified) llvm/test/CodeGen/X86/mul-i256.ll (+1-1) 
- (modified) llvm/test/CodeGen/X86/mul-i512.ll (+1-1) 
- (modified) llvm/test/CodeGen/X86/peephole-na-phys-copy-folding.ll (+1-1) 
- (modified) llvm/test/CodeGen/X86/pmul.ll (+1-2) 
- (modified) llvm/test/CodeGen/X86/pmulh.ll (+6-6) 
- (modified) llvm/test/CodeGen/X86/pointer-vector.ll (+1-2) 
- (modified) llvm/test/CodeGen/X86/pr11334.ll (+1-2) 
- (modified) llvm/test/CodeGen/X86/pr34177.ll (+1-1) 
- (modified) llvm/test/CodeGen/X86/pr61964.ll (+4-6) 
- (modified) llvm/test/CodeGen/X86/shift-i128.ll (+4-4) 
- (modified) llvm/test/CodeGen/X86/sibcall.ll (+1-2) 
- (modified) llvm/test/CodeGen/X86/smul_fix.ll (+1-1) 
- (modified) llvm/test/CodeGen/X86/smul_fix_sat.ll (+1-2) 
- (modified) llvm/test/CodeGen/X86/smulo-128-legalisation-lowering.ll (+6-6) 
- (modified) llvm/test/CodeGen/X86/subvectorwise-store-of-vector-splat.ll (+15-15) 
- (modified) llvm/test/CodeGen/X86/umul-with-overflow.ll (+3-4) 
- (modified) llvm/test/CodeGen/X86/umul_fix.ll (+1-1) 
- (modified) llvm/test/CodeGen/X86/umul_fix_sat.ll (+2-4) 
- (modified) llvm/test/CodeGen/X86/umulo-128-legalisation-lowering.ll (+1-2) 
- (modified) llvm/test/CodeGen/X86/vec_int_to_fp.ll (+5-10) 
- (modified) llvm/test/CodeGen/X86/vec_saddo.ll (+5-9) 
- (modified) llvm/test/CodeGen/X86/vec_ssubo.ll (+2-3) 
- (modified) llvm/test/CodeGen/X86/vec_umulo.ll (+11-18) 
- (modified) llvm/test/CodeGen/X86/vector-interleave.ll (+2-4) 
- (modified) llvm/test/CodeGen/X86/vector-interleaved-load-i16-stride-2.ll (+2-2) 
- (modified) llvm/test/CodeGen/X86/vector-interleaved-load-i16-stride-3.ll (+8-8) 
- (modified) llvm/test/CodeGen/X86/vector-interleaved-load-i16-stride-4.ll (+2-2) 
- (modified) llvm/test/CodeGen/X86/vector-interleaved-load-i16-stride-5.ll (+13-13) 
- (modified) llvm/test/CodeGen/X86/vector-interleaved-load-i16-stride-6.ll (+10-10) 
- (modified) llvm/test/CodeGen/X86/vector-interleaved-load-i16-stride-7.ll (+23-26) 
- (modified) llvm/test/CodeGen/X86/vector-interleaved-load-i16-stride-8.ll (+32-32) 
- (modified) llvm/test/CodeGen/X86/vector-interleaved-load-i32-stride-3.ll (+22-22) 
- (modified) llvm/test/CodeGen/X86/vector-interleaved-load-i32-stride-4.ll (+20-20) 
- (modified) llvm/test/CodeGen/X86/vector-interleaved-load-i32-stride-5.ll (+65-68) 
- (modified) llvm/test/CodeGen/X86/vector-interleaved-load-i32-stride-6.ll (+24-27) 
- (modified) llvm/test/CodeGen/X86/vector-interleaved-load-i32-stride-7.ll (+30-30) 
- (modified) llvm/test/CodeGen/X86/vector-interleaved-load-i32-stride-8.ll (+85-85) 
- (modified) llvm/test/CodeGen/X86/vector-interleaved-load-i64-stride-4.ll (+6-6) 
- (modified) llvm/test/CodeGen/X86/vector-interleaved-load-i64-stride-5.ll (+20-20) 
- (modified) llvm/test/CodeGen/X86/vector-interleaved-load-i64-stride-6.ll (+8-8) 
- (modified) llvm/test/CodeGen/X86/vector-interleaved-load-i64-stride-7.ll (+80-84) 
- (modified) llvm/test/CodeGen/X86/vector-interleaved-load-i64-stride-8.ll (+232-232) 
- (modified) llvm/test/CodeGen/X86/vector-interleaved-load-i8-stride-3.ll (+3-3) 
- (modified) llvm/test/CodeGen/X86/vector-interleaved-load-i8-stride-4.ll (+6-6) 
- (modified) llvm/test/CodeGen/X86/vector-interleaved-load-i8-stride-5.ll (+5-5) 
- (modified) llvm/test/CodeGen/X86/vector-interleaved-load-i8-stride-6.ll (+11-11) 
- (modified) llvm/test/CodeGen/X86/vector-interleaved-load-i8-stride-7.ll (+41-41) 
- (modified) llvm/test/CodeGen/X86/vector-interleaved-load-i8-stride-8.ll (+48-50) 
- (modified) llvm/test/CodeGen/X86/vector-interleaved-store-i16-stride-3.ll (+32-32) 
- (modified) llvm/test/CodeGen/X86/vector-interleaved-store-i16-stride-4.ll (+12-12) 
- (modified) llvm/test/CodeGen/X86/vector-interleaved-store-i16-stride-5.ll (+15-15) 
- (modified) llvm/test/CodeGen/X86/vector-interleaved-store-i16-stride-6.ll (+24-24) 
- (modified) llvm/test/CodeGen/X86/vector-interleaved-store-i16-stride-7.ll (+39-42) 
- (modified) llvm/test/CodeGen/X86/vector-interleaved-store-i32-stride-2.ll (+12-12) 
- (modified) llvm/test/CodeGen/X86/vector-interleaved-store-i32-stride-3.ll (+2-2) 
- (modified) llvm/test/CodeGen/X86/vector-interleaved-store-i32-stride-5.ll (+11-11) 
- (modified) llvm/test/CodeGen/X86/vector-interleaved-store-i32-stride-6.ll (+56-60) 
- (modified) llvm/test/CodeGen/X86/vector-interleaved-store-i32-stride-7.ll (+57-58) 
- (modified) llvm/test/CodeGen/X86/vector-interleaved-store-i32-stride-8.ll (+2-2) 
- (modified) llvm/test/CodeGen/X86/vector-interleaved-store-i64-stride-3.ll (+4-4) 
- (modified) llvm/test/CodeGen/X86/vector-interleaved-store-i64-stride-4.ll (+24-24) 
- (modified) llvm/test/CodeGen/X86/vector-interleaved-store-i64-stride-5.ll (+5-5) 
- (modified) llvm/test/CodeGen/X86/vector-interleaved-store-i64-stride-7.ll (+140-142) 
- (modified) llvm/test/CodeGen/X86/vector-interleaved-store-i64-stride-8.ll (+48-48) 
- (modified) llvm/test/CodeGen/X86/vector-interleaved-store-i8-stride-3.ll (+1-1) 
- (modified) llvm/test/CodeGen/X86/vector-interleaved-store-i8-stride-5.ll (+4-4) 
- (modified) llvm/test/CodeGen/X86/vector-interleaved-store-i8-stride-6.ll (+19-19) 
- (modified) llvm/test/CodeGen/X86/vector-interleaved-store-i8-stride-7.ll (+17-20) 
- (modified) llvm/test/CodeGen/X86/vector-interleaved-store-i8-stride-8.ll (+4-4) 
- (modified) llvm/test/CodeGen/X86/vector-intrinsics.ll (+2-2) 
- (modified) llvm/test/CodeGen/X86/vector-sext.ll (+5-10) 
- (modified) llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll (+10-11) 
- (modified) llvm/test/CodeGen/X86/vector-zext.ll (+3-3) 
- (modified) llvm/test/CodeGen/X86/wide-scalar-shift-legalization.ll (+1-1) 
- (modified) llvm/test/CodeGen/X86/x86-interleaved-access.ll (+5-5) 
- (modified) llvm/test/CodeGen/X86/xmulo.ll (+46-88) 


``````````diff
diff --git a/llvm/lib/CodeGen/MachineCopyPropagation.cpp b/llvm/lib/CodeGen/MachineCopyPropagation.cpp
index b34e0939d1c7c6..493d7cd7d8c920 100644
--- a/llvm/lib/CodeGen/MachineCopyPropagation.cpp
+++ b/llvm/lib/CodeGen/MachineCopyPropagation.cpp
@@ -48,19 +48,27 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/iterator_range.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/LiveIntervals.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/ScheduleDAGInstrs.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
 #include "llvm/CodeGen/TargetInstrInfo.h"
 #include "llvm/CodeGen/TargetRegisterInfo.h"
 #include "llvm/CodeGen/TargetSubtargetInfo.h"
@@ -70,9 +78,15 @@
 #include "llvm/Pass.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/DebugCounter.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
+#include <algorithm>
 #include <cassert>
 #include <iterator>
+#include <optional>
+#include <queue>
+#include <utility>
+#include <variant>
 
 using namespace llvm;
 
@@ -92,6 +106,113 @@ static cl::opt<cl::boolOrDefault>
     EnableSpillageCopyElimination("enable-spill-copy-elim", cl::Hidden);
 
 namespace {
+// A ScheduleDAG subclass that is used as a dependency graph.
+class ScheduleDAGMCP : public ScheduleDAGInstrs {
+public:
+  void schedule() override {
+    llvm_unreachable("This schedule dag is only used as a dependency graph for "
+                     "Machine Copy Propagation\n");
+  }
+
+  ScheduleDAGMCP(MachineFunction &MF, const MachineLoopInfo *MLI,
+                 bool RemoveKillFlags = false)
+      : ScheduleDAGInstrs(MF, MLI, RemoveKillFlags) {
+    CanHandleTerminators = true;
+  }
+};
+
+static bool moveInstructionsOutOfTheWayIfWeCan(SUnit *Dst,
+                                               SUnit *Src,
+                                               ScheduleDAGMCP &DG) {
+  MachineInstr *DstInstr = Dst->getInstr();
+  MachineInstr *SrcInstr = Src->getInstr();
+  MachineBasicBlock *MBB = SrcInstr->getParent();
+
+  if (DstInstr == nullptr || SrcInstr == nullptr)
+    return false;
+  assert("This function only operates on a basic block level." &&
+         MBB == SrcInstr->getParent());
+
+  int SectionSize =
+      std::distance(SrcInstr->getIterator(), DstInstr->getIterator());
+
+  // The bit vector representing the instructions in the section.
+  // This vector stores which instruction needs to be moved and which does not.
+  BitVector SectionInstr(SectionSize, false);
+
+  // The queue for the breadth first search.
+  std::queue<const SUnit *> Edges;
+
+  // Process the children of a node.
+  // Basically every node are checked before it is being put into the queue.
+  // A node is enqueued if it has no dependencies on the source of the copy
+  // (only if we are not talking about the destination node which is a special
+  // case indicated by a flag) and is located between the source of the copy and
+  // the destination of the copy.
+  auto ProcessSNodeChildren = [SrcInstr, &SectionSize, &SectionInstr](
+                                  std::queue<const SUnit *> &Queue,
+                                  const SUnit *Node, bool IsRoot) -> bool {
+    for (llvm::SDep I : Node->Preds) {
+      SUnit *SU = I.getSUnit();
+      MachineInstr &MI = *(SU->getInstr());
+      if (!IsRoot && &MI == SrcInstr)
+        return false;
+
+      int DestinationFromSource =
+          std::distance(SrcInstr->getIterator(), MI.getIterator());
+
+      if (&MI != SrcInstr && DestinationFromSource > 0 &&
+          DestinationFromSource < SectionSize) {
+        // If an instruction is already in the Instructions to move map, than
+        // that means that it has already been processes with all of their
+        // dependence. We do not need to do anything with it again.
+        if (!SectionInstr[DestinationFromSource]) {
+          SectionInstr[DestinationFromSource] = true;
+          Queue.push(SU);
+        }
+      }
+    }
+    return true;
+  };
+
+  // The BFS happens here.
+  //
+  // Could not use the ADT implementation of BFS here.
+  // In ADT graph traversals we don't have the chance to select exactly which
+  // children are being put into the "nodes to traverse" queue or stack.
+  //
+  // We couldn't work around this by checking the need for the node in the
+  // processing stage. In some context it does matter what the parent of the
+  // instruction was: Namely when we are starting the traversal with the source
+  // of the copy propagation. This instruction must have the destination as a
+  // dependency. In case of other instruction than has the destination as a dependency, this
+  // dependency would mean the end of the traversal, but in this scenario this
+  // must be ignored. Let's say that we can not control what nodes to process
+  // and we come across the copy source. How do I know what node has that copy
+  // source as their dependency? We can check of which node is the copy source
+  // the dependency of. This list will alway contain the source. To decide if we
+  // have it as dependency of another instruction, we must check in the already
+  // traversed list if any of the instructions that is depended on the source is
+  // contained. This would introduce extra costs.
+  ProcessSNodeChildren(Edges, Dst, true);
+  while (!Edges.empty()) {
+    const auto *Current = Edges.front();
+    Edges.pop();
+    if (!ProcessSNodeChildren(Edges, Current, false))
+      return false;
+  }
+
+  // If all of the dependencies were deemed valid during the BFS then we
+  // are moving them before the copy source here keeping their relative
+  // order to each other.
+  auto CurrentInst = SrcInstr->getIterator();
+  for (int I = 0; I < SectionSize; I++) {
+    if (SectionInstr[I])
+      MBB->splice(SrcInstr->getIterator(), MBB, CurrentInst->getIterator());
+    ++CurrentInst;
+  }
+  return true;
+}
 
 static std::optional<DestSourcePair> isCopyInstr(const MachineInstr &MI,
                                                  const TargetInstrInfo &TII,
@@ -114,6 +235,7 @@ class CopyTracker {
   };
 
   DenseMap<MCRegUnit, CopyInfo> Copies;
+  DenseMap<MCRegUnit, CopyInfo> InvalidCopies;
 
 public:
   /// Mark all of the given registers and their subregisters as unavailable for
@@ -130,9 +252,14 @@ class CopyTracker {
     }
   }
 
+  int getInvalidCopiesSize() {
+    return InvalidCopies.size();
+  }
+
   /// Remove register from copy maps.
   void invalidateRegister(MCRegister Reg, const TargetRegisterInfo &TRI,
-                          const TargetInstrInfo &TII, bool UseCopyInstr) {
+                          const TargetInstrInfo &TII, bool UseCopyInstr,
+                          bool MayStillBePropagated = false) {
     // Since Reg might be a subreg of some registers, only invalidate Reg is not
     // enough. We have to find the COPY defines Reg or registers defined by Reg
     // and invalidate all of them. Similarly, we must invalidate all of the
@@ -158,8 +285,11 @@ class CopyTracker {
           InvalidateCopy(MI);
       }
     }
-    for (MCRegUnit Unit : RegUnitsToInvalidate)
+    for (MCRegUnit Unit : RegUnitsToInvalidate) {
+      if (Copies.contains(Unit) && MayStillBePropagated)
+        InvalidCopies[Unit] = Copies[Unit];
       Copies.erase(Unit);
+    }
   }
 
   /// Clobber a single register, removing it from the tracker's copy maps.
@@ -252,6 +382,10 @@ class CopyTracker {
     return !Copies.empty();
   }
 
+  bool hasAnyInvalidCopies() {
+    return !InvalidCopies.empty();
+  }
+
   MachineInstr *findCopyForUnit(MCRegUnit RegUnit,
                                 const TargetRegisterInfo &TRI,
                                 bool MustBeAvailable = false) {
@@ -263,6 +397,17 @@ class CopyTracker {
     return CI->second.MI;
   }
 
+  MachineInstr *findInvalidCopyForUnit(MCRegUnit RegUnit,
+                                const TargetRegisterInfo &TRI,
+                                bool MustBeAvailable = false) {
+    auto CI = InvalidCopies.find(RegUnit);
+    if (CI == InvalidCopies.end())
+      return nullptr;
+    if (MustBeAvailable && !CI->second.Avail)
+      return nullptr;
+    return CI->second.MI;
+  }
+
   MachineInstr *findCopyDefViaUnit(MCRegUnit RegUnit,
                                    const TargetRegisterInfo &TRI) {
     auto CI = Copies.find(RegUnit);
@@ -274,12 +419,28 @@ class CopyTracker {
     return findCopyForUnit(RU, TRI, true);
   }
 
+  MachineInstr *findInvalidCopyDefViaUnit(MCRegUnit RegUnit,
+                                   const TargetRegisterInfo &TRI) {
+    auto CI = InvalidCopies.find(RegUnit);
+    if (CI == InvalidCopies.end())
+      return nullptr;
+    if (CI->second.DefRegs.size() != 1)
+      return nullptr;
+    MCRegUnit RU = *TRI.regunits(CI->second.DefRegs[0]).begin();
+    return findInvalidCopyForUnit(RU, TRI, false);
+  }
+
+  // TODO: This is ugly there shall be a more elegant solution to invalid
+  //       copy searching. Create a variant that either returns a valid an invalid
+  //       copy or no copy at all (std::monotype).
   MachineInstr *findAvailBackwardCopy(MachineInstr &I, MCRegister Reg,
                                       const TargetRegisterInfo &TRI,
                                       const TargetInstrInfo &TII,
-                                      bool UseCopyInstr) {
+                                      bool UseCopyInstr,
+                                      bool SearchInvalid = false) {
     MCRegUnit RU = *TRI.regunits(Reg).begin();
-    MachineInstr *AvailCopy = findCopyDefViaUnit(RU, TRI);
+    MachineInstr *AvailCopy = SearchInvalid ? findInvalidCopyDefViaUnit(RU, TRI)
+                                            : findCopyDefViaUnit(RU, TRI);
 
     if (!AvailCopy)
       return nullptr;
@@ -377,13 +538,20 @@ class CopyTracker {
 
   void clear() {
     Copies.clear();
+    InvalidCopies.clear();
   }
 };
 
+using Copy = MachineInstr*;
+using InvalidCopy = std::pair<Copy, MachineInstr *>;
+using CopyLookupResult = std::variant<std::monostate, Copy, InvalidCopy>;
+
 class MachineCopyPropagation : public MachineFunctionPass {
+  LiveIntervals *LIS = nullptr;
   const TargetRegisterInfo *TRI = nullptr;
   const TargetInstrInfo *TII = nullptr;
   const MachineRegisterInfo *MRI = nullptr;
+  AAResults *AA = nullptr;
 
   // Return true if this is a copy instruction and false otherwise.
   bool UseCopyInstr;
@@ -398,6 +566,7 @@ class MachineCopyPropagation : public MachineFunctionPass {
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.setPreservesCFG();
+    AU.addUsedIfAvailable<LiveIntervalsWrapperPass>();
     MachineFunctionPass::getAnalysisUsage(AU);
   }
 
@@ -414,11 +583,11 @@ class MachineCopyPropagation : public MachineFunctionPass {
   void ReadRegister(MCRegister Reg, MachineInstr &Reader, DebugType DT);
   void readSuccessorLiveIns(const MachineBasicBlock &MBB);
   void ForwardCopyPropagateBlock(MachineBasicBlock &MBB);
-  void BackwardCopyPropagateBlock(MachineBasicBlock &MBB);
+  void BackwardCopyPropagateBlock(MachineBasicBlock &MBB, bool ResolveAntiDeps = false);
   void EliminateSpillageCopies(MachineBasicBlock &MBB);
   bool eraseIfRedundant(MachineInstr &Copy, MCRegister Src, MCRegister Def);
   void forwardUses(MachineInstr &MI);
-  void propagateDefs(MachineInstr &MI);
+  void propagateDefs(MachineInstr &MI, ScheduleDAGMCP &DG, bool ResolveAntiDeps = false);
   bool isForwardableRegClassCopy(const MachineInstr &Copy,
                                  const MachineInstr &UseI, unsigned UseIdx);
   bool isBackwardPropagatableRegClassCopy(const MachineInstr &Copy,
@@ -427,7 +596,7 @@ class MachineCopyPropagation : public MachineFunctionPass {
   bool hasImplicitOverlap(const MachineInstr &MI, const MachineOperand &Use);
   bool hasOverlappingMultipleDef(const MachineInstr &MI,
                                  const MachineOperand &MODef, Register Def);
-
+  
   /// Candidates for deletion.
   SmallSetVector<MachineInstr *, 8> MaybeDeadCopies;
 
@@ -986,8 +1155,10 @@ static bool isBackwardPropagatableCopy(const DestSourcePair &CopyOperands,
   return CopyOperands.Source->isRenamable() && CopyOperands.Source->isKill();
 }
 
-void MachineCopyPropagation::propagateDefs(MachineInstr &MI) {
-  if (!Tracker.hasAnyCopies())
+void MachineCopyPropagation::propagateDefs(MachineInstr &MI,
+                                           ScheduleDAGMCP &DG,
+                                           bool MoveDependenciesForBetterCopyPropagation) {
+  if (!Tracker.hasAnyCopies() && !Tracker.hasAnyInvalidCopies())
     return;
 
   for (unsigned OpIdx = 0, OpEnd = MI.getNumOperands(); OpIdx != OpEnd;
@@ -1010,8 +1181,30 @@ void MachineCopyPropagation::propagateDefs(MachineInstr &MI) {
 
     MachineInstr *Copy = Tracker.findAvailBackwardCopy(
         MI, MODef.getReg().asMCReg(), *TRI, *TII, UseCopyInstr);
-    if (!Copy)
-      continue;
+    if (!Copy) {
+      if (!MoveDependenciesForBetterCopyPropagation)
+        continue;
+
+      LLVM_DEBUG(
+          dbgs()
+          << "MCP: Couldn't find any backward copy that has no dependency.\n");
+      Copy = Tracker.findAvailBackwardCopy(MI, MODef.getReg().asMCReg(), *TRI,
+                                           *TII, UseCopyInstr, true);
+      if (!Copy) {
+        LLVM_DEBUG(
+            dbgs()
+            << "MCP: Couldn't find any backward copy that has dependency.\n");
+        continue;
+      }
+      LLVM_DEBUG(
+          dbgs()
+          << "MCP: Found potential backward copy that has dependency.\n");
+      SUnit *DstSUnit = DG.getSUnit(Copy);
+      SUnit *SrcSUnit = DG.getSUnit(&MI);
+
+      if (!moveInstructionsOutOfTheWayIfWeCan(DstSUnit, SrcSUnit, DG))
+        continue;
+    }
 
     std::optional<DestSourcePair> CopyOperands =
         isCopyInstr(*Copy, *TII, UseCopyInstr);
@@ -1033,23 +1226,35 @@ void MachineCopyPropagation::propagateDefs(MachineInstr &MI) {
     LLVM_DEBUG(dbgs() << "MCP: Replacing " << printReg(MODef.getReg(), TRI)
                       << "\n     with " << printReg(Def, TRI) << "\n     in "
                       << MI << "     from " << *Copy);
+    if (!MoveDependenciesForBetterCopyPropagation) {
+      MODef.setReg(Def);
+      MODef.setIsRenamable(CopyOperands->Destination->isRenamable());
 
-    MODef.setReg(Def);
-    MODef.setIsRenamable(CopyOperands->Destination->isRenamable());
-
-    LLVM_DEBUG(dbgs() << "MCP: After replacement: " << MI << "\n");
-    MaybeDeadCopies.insert(Copy);
-    Changed = true;
-    ++NumCopyBackwardPropagated;
+      LLVM_DEBUG(dbgs() << "MCP: After replacement: " << MI << "\n");
+      MaybeDeadCopies.insert(Copy);
+      Changed = true;
+      ++NumCopyBackwardPropagated;
+    }
   }
 }
 
 void MachineCopyPropagation::BackwardCopyPropagateBlock(
-    MachineBasicBlock &MBB) {
+    MachineBasicBlock &MBB, bool MoveDependenciesForBetterCopyPropagation) {
+  ScheduleDAGMCP DG{*(MBB.getParent()), nullptr, false};
+  if (MoveDependenciesForBetterCopyPropagation) {
+    DG.startBlock(&MBB);
+    DG.enterRegion(&MBB, MBB.begin(), MBB.end(), MBB.size());
+    DG.buildSchedGraph(nullptr);
+    // DG.viewGraph();
+  }
+ 
+
   LLVM_DEBUG(dbgs() << "MCP: BackwardCopyPropagateBlock " << MBB.getName()
                     << "\n");
 
   for (MachineInstr &MI : llvm::make_early_inc_range(llvm::reverse(MBB))) {
+    //llvm::errs() << "Next MI: ";
+    //MI.dump();
     // Ignore non-trivial COPYs.
     std::optional<DestSourcePair> CopyOperands =
         isCopyInstr(MI, *TII, UseCopyInstr);
@@ -1062,7 +1267,7 @@ void MachineCopyPropagation::BackwardCopyPropagateBlock(
         // just let forward cp do COPY-to-COPY propagation.
         if (isBackwardPropagatableCopy(*CopyOperands, *MRI)) {
           Tracker.invalidateRegister(SrcReg.asMCReg(), *TRI, *TII,
-                                     UseCopyInstr);
+                                     UseCopyInstr, MoveDependenciesForBetterCopyPropagation);
           Tracker.invalidateRegister(DefReg.asMCReg(), *TRI, *TII,
                                      UseCopyInstr);
           Tracker.trackCopy(&MI, *TRI, *TII, UseCopyInstr);
@@ -1077,10 +1282,10 @@ void MachineCopyPropagation::BackwardCopyPropagateBlock(
         MCRegister Reg = MO.getReg().asMCReg();
         if (!Reg)
           continue;
-        Tracker.invalidateRegister(Reg, *TRI, *TII, UseCopyInstr);
+        Tracker.invalidateRegister(Reg, *TRI, *TII, UseCopyInstr, false);
       }
 
-    propagateDefs(MI);
+    propagateDefs(MI, DG, MoveDependenciesForBetterCopyPropagation);
     for (const MachineOperand &MO : MI.operands()) {
       if (!MO.isReg())
         continue;
@@ -1104,7 +1309,7 @@ void MachineCopyPropagation::BackwardCopyPropagateBlock(
           }
         } else {
           Tracker.invalidateRegister(MO.getReg().asMCReg(), *TRI, *TII,
-                                     UseCopyInstr);
+                                     UseCopyInstr, MoveDependenciesForBetterCopyPropagation);
         }
       }
     }
@@ -1122,6 +1327,15 @@ void MachineCopyPropagation::BackwardCopyPropagateBlock(
     Copy->eraseFromParent();
     ++NumDeletes;
   }
+  if (MoveDependenciesForBetterCopyPropagation) {
+    DG.exitRegion();
+    DG.finishBlock();
+    // QUESTION: Does it makes sense to keep the kill flags here?
+    //           On the other parts of this pass we juts throw out
+    //           the kill flags.
+    DG.fixupKills(MBB);
+  }
+
 
   MaybeDeadCopies.clear();
   CopyDbgUsers.clear();
@@ -1472,11 +1686,29 @@ bool MachineCopyPropagation::runOnMachineFunction(MachineFunction &MF) {
   TRI = MF.getSubtarget().getRegisterInfo();
   TII = MF.getSubtarget().getInstrInfo();
   MRI = &MF.getRegInfo();
+  auto *LISWrapper = getAnalysisIfAvailable<LiveIntervalsWrapperPass>();
+  LIS = LISWrapper ? &LISWrapper->getLIS() : nullptr;
 
   for (MachineBasicBlock &MBB : MF) {
     if (isSpillageCopyElimEnabled)
       EliminateSpillageCopies(MBB);
+
+    // BackwardCopyPropagateBlock happens in two stages.
+    // First we move those unnecessary dependencies out of the way
+    // that may block copy propagations.
+    //
+    // The reason for this two stage approach is that the ScheduleDAG can not
+    // handle register renaming.
+    // QUESTION: I think these two stages could be merged together, if I were to change
+    // the renaming mechanism.
+    //
+    // The renaming wouldn't happen instantly. There would be a data structure
+    // that contained what register should be renamed to what. Then after the
+    // backward propagation has concluded the renaming would happen.
+    BackwardCopyPropagateBlock(MBB, true);
+    // Then we do the actual copy propagation.
     BackwardCopyPropagateBlock(MBB);
+
     ForwardCopyPropagateBlock(MBB);
   }
 
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll
index de3f323891a36a..92575d701f4281 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll
@@ -6026,8 +6026,8 @@ define { i8, i1 } @cmpxchg_i8(ptr %ptr, i8 %desired, i8 %new) {
 ; CHECK-OUTLINE-O1-NEXT:    .cfi_offset w29, -16
 ; CHECK-OUTLINE-O1-NEXT:    .cfi_offset w19, -24
 ; CHECK-OUTLINE-O1-NEXT:    .cfi_offset w20, -32
-; CHECK-OUTLINE-O1-NEXT:    mov x3, x0
 ; CHECK-OUTLINE-O1-NEXT:    mov w19, w1
+; CHECK-OUTLINE-O1-NEXT:    mov x3, x0
 ; CHECK-OUTLINE-O1-NEXT:    mov w1, w2
 ; CHECK-OUTLINE-O1-NEXT:    mov w0, w19
 ; CHECK-OUTLINE-O1-NEXT:    mov x2, x3
@@ -6133,8 +6133,8 @@ define { i16, i1 } @cmpxchg_i16(ptr %ptr, i16 %desired, i16 %new) {
 ; CHECK-OUTLINE-O1-NEXT:    .cfi_offset w29, -16
 ; CHECK-OUTLINE-O1-NEXT:    .cfi_offset w19, -24
 ; CHECK-OUTLINE-O1-NEXT:    .cfi_offset w20, -32
-; CHECK-OUTLINE-O1-NEXT:    mov x3, x0
 ; CHECK-OUTLI...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/105562


More information about the llvm-commits mailing list