[llvm] [PHIElimination] Reuse existing COPY in predecessor basic block (PR #131837)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 18 09:01:02 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-systemz
@llvm/pr-subscribers-backend-hexagon
Author: Guy David (guy-david)
<details>
<summary>Changes</summary>
The insertion point of COPY isn't always optimal and could lead to a worse block layout, see the regression test in the first commit (which needs to be reduced).
---
Patch is 2.30 MiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/131837.diff
127 Files Affected:
- (modified) llvm/lib/CodeGen/PHIElimination.cpp (+9)
- (modified) llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-store-outline_atomics.ll (+8-8)
- (modified) llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-store-rcpc.ll (+24-24)
- (modified) llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-store-v8a.ll (+24-24)
- (modified) llvm/test/CodeGen/AArch64/PHIElimination-debugloc.mir (+1-1)
- (added) llvm/test/CodeGen/AArch64/PHIElimination-reuse-copy.mir (+35)
- (modified) llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll (+1-1)
- (modified) llvm/test/CodeGen/AArch64/atomicrmw-O0.ll (+30-30)
- (modified) llvm/test/CodeGen/AArch64/bfis-in-loop.ll (+1-1)
- (added) llvm/test/CodeGen/AArch64/block-layout-regression.mir (+107)
- (modified) llvm/test/CodeGen/AArch64/complex-deinterleaving-crash.ll (+15-15)
- (modified) llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-predicated-scalable.ll (+14-14)
- (modified) llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions.ll (+6-6)
- (modified) llvm/test/CodeGen/AArch64/phi.ll (+20-20)
- (modified) llvm/test/CodeGen/AArch64/pr48188.ll (+6-6)
- (modified) llvm/test/CodeGen/AArch64/ragreedy-csr.ll (+11-11)
- (modified) llvm/test/CodeGen/AArch64/ragreedy-local-interval-cost.ll (+56-57)
- (modified) llvm/test/CodeGen/AArch64/reduce-or-opt.ll (+12-12)
- (modified) llvm/test/CodeGen/AArch64/sink-and-fold.ll (+3-3)
- (modified) llvm/test/CodeGen/AArch64/sve-lsrchain.ll (+7-7)
- (modified) llvm/test/CodeGen/AArch64/sve-ptest-removal-sink.ll (+4-4)
- (modified) llvm/test/CodeGen/AArch64/swifterror.ll (+8-8)
- (modified) llvm/test/CodeGen/AArch64/tbl-loops.ll (+8-8)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_fmax.ll (+74-72)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_fmin.ll (+74-72)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-temporal-divergent-i1.ll (+7-7)
- (modified) llvm/test/CodeGen/AMDGPU/atomic_optimizations_global_pointer.ll (+832-789)
- (modified) llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll (+110-100)
- (modified) llvm/test/CodeGen/AMDGPU/buffer-fat-pointer-atomicrmw-fadd.ll (+1387-1378)
- (modified) llvm/test/CodeGen/AMDGPU/buffer-fat-pointer-atomicrmw-fmax.ll (+924-908)
- (modified) llvm/test/CodeGen/AMDGPU/buffer-fat-pointer-atomicrmw-fmin.ll (+924-908)
- (modified) llvm/test/CodeGen/AMDGPU/div_i128.ll (+914-922)
- (modified) llvm/test/CodeGen/AMDGPU/div_v2i128.ll (+114-114)
- (modified) llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll (+1-1)
- (modified) llvm/test/CodeGen/AMDGPU/flat-atomicrmw-fmax.ll (+29-33)
- (modified) llvm/test/CodeGen/AMDGPU/flat-atomicrmw-fmin.ll (+29-33)
- (modified) llvm/test/CodeGen/AMDGPU/global-atomicrmw-fadd.ll (+952-950)
- (modified) llvm/test/CodeGen/AMDGPU/global-atomicrmw-fmax.ll (+658-656)
- (modified) llvm/test/CodeGen/AMDGPU/global-atomicrmw-fmin.ll (+658-656)
- (modified) llvm/test/CodeGen/AMDGPU/global-atomicrmw-fsub.ll (+793-791)
- (modified) llvm/test/CodeGen/AMDGPU/global_atomics_i32_system.ll (+323-323)
- (modified) llvm/test/CodeGen/AMDGPU/global_atomics_i64_system.ll (+461-461)
- (modified) llvm/test/CodeGen/AMDGPU/global_atomics_scan_fadd.ll (+255-255)
- (modified) llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmax.ll (+225-225)
- (modified) llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmin.ll (+225-225)
- (modified) llvm/test/CodeGen/AMDGPU/global_atomics_scan_fsub.ll (+227-227)
- (modified) llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll (+62-77)
- (modified) llvm/test/CodeGen/AMDGPU/move-to-valu-atomicrmw-system.ll (+17-17)
- (modified) llvm/test/CodeGen/AMDGPU/mul.ll (+12-12)
- (modified) llvm/test/CodeGen/AMDGPU/rem_i128.ll (+869-871)
- (modified) llvm/test/CodeGen/AMDGPU/sdiv64.ll (+117-117)
- (modified) llvm/test/CodeGen/AMDGPU/srem64.ll (+117-117)
- (modified) llvm/test/CodeGen/AMDGPU/udiv64.ll (+105-105)
- (modified) llvm/test/CodeGen/AMDGPU/urem64.ll (+89-89)
- (modified) llvm/test/CodeGen/AMDGPU/vni8-across-blocks.ll (+42-41)
- (modified) llvm/test/CodeGen/AMDGPU/wave32.ll (+4-4)
- (modified) llvm/test/CodeGen/ARM/and-cmp0-sink.ll (+11-11)
- (modified) llvm/test/CodeGen/ARM/cttz.ll (+46-46)
- (modified) llvm/test/CodeGen/ARM/select-imm.ll (+8-8)
- (modified) llvm/test/CodeGen/ARM/struct-byval-loop.ll (+8-8)
- (modified) llvm/test/CodeGen/ARM/swifterror.ll (+154-154)
- (modified) llvm/test/CodeGen/AVR/bug-81911.ll (+17-17)
- (modified) llvm/test/CodeGen/Hexagon/swp-conv3x3-nested.ll (+1-2)
- (modified) llvm/test/CodeGen/Hexagon/swp-epilog-phi7.ll (+1)
- (modified) llvm/test/CodeGen/Hexagon/swp-matmul-bitext.ll (+1-1)
- (modified) llvm/test/CodeGen/Hexagon/swp-stages4.ll (+2-5)
- (modified) llvm/test/CodeGen/Hexagon/tinycore.ll (+8-3)
- (modified) llvm/test/CodeGen/LoongArch/machinelicm-address-pseudos.ll (+28-28)
- (modified) llvm/test/CodeGen/PowerPC/2013-07-01-PHIElimBug.mir (+1-2)
- (modified) llvm/test/CodeGen/PowerPC/disable-ctr-ppcf128.ll (+3-3)
- (modified) llvm/test/CodeGen/PowerPC/phi-eliminate.mir (+3-6)
- (modified) llvm/test/CodeGen/PowerPC/ppcf128-freeze.mir (+15-15)
- (modified) llvm/test/CodeGen/PowerPC/pr116071.ll (+18-7)
- (modified) llvm/test/CodeGen/PowerPC/sms-phi-2.ll (+6-7)
- (modified) llvm/test/CodeGen/PowerPC/sms-phi-3.ll (+12-12)
- (modified) llvm/test/CodeGen/PowerPC/stack-restore-with-setjmp.ll (+4-6)
- (modified) llvm/test/CodeGen/PowerPC/subreg-postra-2.ll (+9-9)
- (modified) llvm/test/CodeGen/PowerPC/vsx.ll (+1-1)
- (modified) llvm/test/CodeGen/RISCV/abds.ll (+100-100)
- (modified) llvm/test/CodeGen/RISCV/machine-pipeliner.ll (+13-11)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll (+60-60)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store-asm.ll (+30-31)
- (modified) llvm/test/CodeGen/RISCV/rvv/vxrm-insert-out-of-loop.ll (+12-12)
- (modified) llvm/test/CodeGen/RISCV/xcvbi.ll (+30-30)
- (modified) llvm/test/CodeGen/SystemZ/swifterror.ll (+2-2)
- (modified) llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll (+48-48)
- (modified) llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-disabled-in-loloops.ll (+22-22)
- (modified) llvm/test/CodeGen/Thumb2/LowOverheadLoops/varying-outer-2d-reduction.ll (+16-16)
- (modified) llvm/test/CodeGen/Thumb2/LowOverheadLoops/while-loops.ll (+53-58)
- (modified) llvm/test/CodeGen/Thumb2/mve-blockplacement.ll (+9-12)
- (modified) llvm/test/CodeGen/Thumb2/mve-float32regloops.ll (+23-20)
- (modified) llvm/test/CodeGen/Thumb2/mve-laneinterleaving-reduct.ll (+4-4)
- (modified) llvm/test/CodeGen/Thumb2/mve-memtp-loop.ll (+50-51)
- (modified) llvm/test/CodeGen/Thumb2/mve-phireg.ll (+7-7)
- (modified) llvm/test/CodeGen/Thumb2/mve-pipelineloops.ll (+41-44)
- (modified) llvm/test/CodeGen/Thumb2/mve-postinc-dct.ll (+8-11)
- (modified) llvm/test/CodeGen/Thumb2/mve-postinc-distribute.ll (+9-8)
- (modified) llvm/test/CodeGen/Thumb2/mve-postinc-lsr.ll (+22-22)
- (modified) llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll (+17-16)
- (modified) llvm/test/CodeGen/Thumb2/pr52817.ll (+8-8)
- (modified) llvm/test/CodeGen/VE/Scalar/br_jt.ll (+19-19)
- (modified) llvm/test/CodeGen/X86/2012-01-10-UndefExceptionEdge.ll (+2-2)
- (modified) llvm/test/CodeGen/X86/AMX/amx-ldtilecfg-insert.ll (+9-9)
- (modified) llvm/test/CodeGen/X86/AMX/amx-spill-merge.ll (+16-16)
- (modified) llvm/test/CodeGen/X86/atomic32.ll (+72-54)
- (modified) llvm/test/CodeGen/X86/atomic64.ll (+20-15)
- (modified) llvm/test/CodeGen/X86/atomic6432.ll (+36-36)
- (modified) llvm/test/CodeGen/X86/callbr-asm-branch-folding.ll (+4-4)
- (modified) llvm/test/CodeGen/X86/callbr-asm-kill.mir (+3-6)
- (modified) llvm/test/CodeGen/X86/coalescer-breaks-subreg-to-reg-liveness-reduced.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/combine-pmuldq.ll (+4-4)
- (modified) llvm/test/CodeGen/X86/fp128-select.ll (+11-10)
- (modified) llvm/test/CodeGen/X86/madd.ll (+58-58)
- (modified) llvm/test/CodeGen/X86/masked_load.ll (+13-14)
- (modified) llvm/test/CodeGen/X86/min-legal-vector-width.ll (+15-15)
- (modified) llvm/test/CodeGen/X86/pcsections-atomics.ll (+158-138)
- (modified) llvm/test/CodeGen/X86/pr15705.ll (+9-8)
- (modified) llvm/test/CodeGen/X86/pr32256.ll (+6-6)
- (modified) llvm/test/CodeGen/X86/pr38795.ll (+9-6)
- (modified) llvm/test/CodeGen/X86/pr49451.ll (+3-3)
- (modified) llvm/test/CodeGen/X86/pr63108.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/sad.ll (+13-13)
- (modified) llvm/test/CodeGen/X86/sse-scalar-fp-arith.ll (+40-48)
- (modified) llvm/test/CodeGen/X86/statepoint-cmp-sunk-past-statepoint.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/swifterror.ll (+9-8)
- (modified) llvm/test/DebugInfo/MIR/InstrRef/phi-regallocd-to-stack.mir (+3-4)
- (modified) llvm/test/Transforms/LoopStrengthReduce/RISCV/lsr-drop-solution.ll (+7-11)
``````````diff
diff --git a/llvm/lib/CodeGen/PHIElimination.cpp b/llvm/lib/CodeGen/PHIElimination.cpp
index 14f91a87f75b4..cc3d4aac55b9d 100644
--- a/llvm/lib/CodeGen/PHIElimination.cpp
+++ b/llvm/lib/CodeGen/PHIElimination.cpp
@@ -587,6 +587,15 @@ void PHIEliminationImpl::LowerPHINode(MachineBasicBlock &MBB,
MachineBasicBlock::iterator InsertPos =
findPHICopyInsertPoint(&opBlock, &MBB, SrcReg);
+ // Reuse an existing copy in the block if possible.
+ if (MachineInstr *DefMI = MRI->getUniqueVRegDef(SrcReg)) {
+ if (DefMI->isCopy() && DefMI->getParent() == &opBlock &&
+ MRI->use_empty(SrcReg)) {
+ DefMI->getOperand(0).setReg(IncomingReg);
+ continue;
+ }
+ }
+
// Insert the copy.
MachineInstr *NewSrcInstr = nullptr;
if (!reusedIncoming && IncomingReg) {
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-store-outline_atomics.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-store-outline_atomics.ll
index c1c5c53aa7df2..6c300b04508b2 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-store-outline_atomics.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-store-outline_atomics.ll
@@ -118,8 +118,8 @@ define dso_local void @store_atomic_i64_aligned_seq_cst(i64 %value, ptr %ptr) {
define dso_local void @store_atomic_i128_aligned_unordered(i128 %value, ptr %ptr) {
; -O0-LABEL: store_atomic_i128_aligned_unordered:
; -O0: bl __aarch64_cas16_relax
-; -O0: subs x10, x10, x11
-; -O0: ccmp x8, x9, #0, eq
+; -O0: subs x9, x0, x9
+; -O0: ccmp x1, x8, #0, eq
;
; -O1-LABEL: store_atomic_i128_aligned_unordered:
; -O1: ldxp xzr, x8, [x2]
@@ -131,8 +131,8 @@ define dso_local void @store_atomic_i128_aligned_unordered(i128 %value, ptr %ptr
define dso_local void @store_atomic_i128_aligned_monotonic(i128 %value, ptr %ptr) {
; -O0-LABEL: store_atomic_i128_aligned_monotonic:
; -O0: bl __aarch64_cas16_relax
-; -O0: subs x10, x10, x11
-; -O0: ccmp x8, x9, #0, eq
+; -O0: subs x9, x0, x9
+; -O0: ccmp x1, x8, #0, eq
;
; -O1-LABEL: store_atomic_i128_aligned_monotonic:
; -O1: ldxp xzr, x8, [x2]
@@ -144,8 +144,8 @@ define dso_local void @store_atomic_i128_aligned_monotonic(i128 %value, ptr %ptr
define dso_local void @store_atomic_i128_aligned_release(i128 %value, ptr %ptr) {
; -O0-LABEL: store_atomic_i128_aligned_release:
; -O0: bl __aarch64_cas16_rel
-; -O0: subs x10, x10, x11
-; -O0: ccmp x8, x9, #0, eq
+; -O0: subs x9, x0, x9
+; -O0: ccmp x1, x8, #0, eq
;
; -O1-LABEL: store_atomic_i128_aligned_release:
; -O1: ldxp xzr, x8, [x2]
@@ -157,8 +157,8 @@ define dso_local void @store_atomic_i128_aligned_release(i128 %value, ptr %ptr)
define dso_local void @store_atomic_i128_aligned_seq_cst(i128 %value, ptr %ptr) {
; -O0-LABEL: store_atomic_i128_aligned_seq_cst:
; -O0: bl __aarch64_cas16_acq_rel
-; -O0: subs x10, x10, x11
-; -O0: ccmp x8, x9, #0, eq
+; -O0: subs x9, x0, x9
+; -O0: ccmp x1, x8, #0, eq
;
; -O1-LABEL: store_atomic_i128_aligned_seq_cst:
; -O1: ldaxp xzr, x8, [x2]
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-store-rcpc.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-store-rcpc.ll
index d1047d84e2956..2a7bbad9d6454 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-store-rcpc.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-store-rcpc.ll
@@ -117,13 +117,13 @@ define dso_local void @store_atomic_i64_aligned_seq_cst(i64 %value, ptr %ptr) {
define dso_local void @store_atomic_i128_aligned_unordered(i128 %value, ptr %ptr) {
; -O0-LABEL: store_atomic_i128_aligned_unordered:
-; -O0: ldxp x10, x12, [x9]
+; -O0: ldxp x8, x10, [x13]
+; -O0: cmp x8, x9
; -O0: cmp x10, x11
-; -O0: cmp x12, x13
-; -O0: stxp w8, x14, x15, [x9]
-; -O0: stxp w8, x10, x12, [x9]
-; -O0: subs x12, x12, x13
-; -O0: ccmp x10, x11, #0, eq
+; -O0: stxp w12, x14, x15, [x13]
+; -O0: stxp w12, x8, x10, [x13]
+; -O0: subs x10, x10, x11
+; -O0: ccmp x8, x9, #0, eq
;
; -O1-LABEL: store_atomic_i128_aligned_unordered:
; -O1: ldxp xzr, x8, [x2]
@@ -134,13 +134,13 @@ define dso_local void @store_atomic_i128_aligned_unordered(i128 %value, ptr %ptr
define dso_local void @store_atomic_i128_aligned_monotonic(i128 %value, ptr %ptr) {
; -O0-LABEL: store_atomic_i128_aligned_monotonic:
-; -O0: ldxp x10, x12, [x9]
+; -O0: ldxp x8, x10, [x13]
+; -O0: cmp x8, x9
; -O0: cmp x10, x11
-; -O0: cmp x12, x13
-; -O0: stxp w8, x14, x15, [x9]
-; -O0: stxp w8, x10, x12, [x9]
-; -O0: subs x12, x12, x13
-; -O0: ccmp x10, x11, #0, eq
+; -O0: stxp w12, x14, x15, [x13]
+; -O0: stxp w12, x8, x10, [x13]
+; -O0: subs x10, x10, x11
+; -O0: ccmp x8, x9, #0, eq
;
; -O1-LABEL: store_atomic_i128_aligned_monotonic:
; -O1: ldxp xzr, x8, [x2]
@@ -151,13 +151,13 @@ define dso_local void @store_atomic_i128_aligned_monotonic(i128 %value, ptr %ptr
define dso_local void @store_atomic_i128_aligned_release(i128 %value, ptr %ptr) {
; -O0-LABEL: store_atomic_i128_aligned_release:
-; -O0: ldxp x10, x12, [x9]
+; -O0: ldxp x8, x10, [x13]
+; -O0: cmp x8, x9
; -O0: cmp x10, x11
-; -O0: cmp x12, x13
-; -O0: stlxp w8, x14, x15, [x9]
-; -O0: stlxp w8, x10, x12, [x9]
-; -O0: subs x12, x12, x13
-; -O0: ccmp x10, x11, #0, eq
+; -O0: stlxp w12, x14, x15, [x13]
+; -O0: stlxp w12, x8, x10, [x13]
+; -O0: subs x10, x10, x11
+; -O0: ccmp x8, x9, #0, eq
;
; -O1-LABEL: store_atomic_i128_aligned_release:
; -O1: ldxp xzr, x8, [x2]
@@ -168,13 +168,13 @@ define dso_local void @store_atomic_i128_aligned_release(i128 %value, ptr %ptr)
define dso_local void @store_atomic_i128_aligned_seq_cst(i128 %value, ptr %ptr) {
; -O0-LABEL: store_atomic_i128_aligned_seq_cst:
-; -O0: ldaxp x10, x12, [x9]
+; -O0: ldaxp x8, x10, [x13]
+; -O0: cmp x8, x9
; -O0: cmp x10, x11
-; -O0: cmp x12, x13
-; -O0: stlxp w8, x14, x15, [x9]
-; -O0: stlxp w8, x10, x12, [x9]
-; -O0: subs x12, x12, x13
-; -O0: ccmp x10, x11, #0, eq
+; -O0: stlxp w12, x14, x15, [x13]
+; -O0: stlxp w12, x8, x10, [x13]
+; -O0: subs x10, x10, x11
+; -O0: ccmp x8, x9, #0, eq
;
; -O1-LABEL: store_atomic_i128_aligned_seq_cst:
; -O1: ldaxp xzr, x8, [x2]
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-store-v8a.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-store-v8a.ll
index 1a79c73355143..493bc742f7663 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-store-v8a.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-store-v8a.ll
@@ -117,13 +117,13 @@ define dso_local void @store_atomic_i64_aligned_seq_cst(i64 %value, ptr %ptr) {
define dso_local void @store_atomic_i128_aligned_unordered(i128 %value, ptr %ptr) {
; -O0-LABEL: store_atomic_i128_aligned_unordered:
-; -O0: ldxp x10, x12, [x9]
+; -O0: ldxp x8, x10, [x13]
+; -O0: cmp x8, x9
; -O0: cmp x10, x11
-; -O0: cmp x12, x13
-; -O0: stxp w8, x14, x15, [x9]
-; -O0: stxp w8, x10, x12, [x9]
-; -O0: subs x12, x12, x13
-; -O0: ccmp x10, x11, #0, eq
+; -O0: stxp w12, x14, x15, [x13]
+; -O0: stxp w12, x8, x10, [x13]
+; -O0: subs x10, x10, x11
+; -O0: ccmp x8, x9, #0, eq
;
; -O1-LABEL: store_atomic_i128_aligned_unordered:
; -O1: ldxp xzr, x8, [x2]
@@ -134,13 +134,13 @@ define dso_local void @store_atomic_i128_aligned_unordered(i128 %value, ptr %ptr
define dso_local void @store_atomic_i128_aligned_monotonic(i128 %value, ptr %ptr) {
; -O0-LABEL: store_atomic_i128_aligned_monotonic:
-; -O0: ldxp x10, x12, [x9]
+; -O0: ldxp x8, x10, [x13]
+; -O0: cmp x8, x9
; -O0: cmp x10, x11
-; -O0: cmp x12, x13
-; -O0: stxp w8, x14, x15, [x9]
-; -O0: stxp w8, x10, x12, [x9]
-; -O0: subs x12, x12, x13
-; -O0: ccmp x10, x11, #0, eq
+; -O0: stxp w12, x14, x15, [x13]
+; -O0: stxp w12, x8, x10, [x13]
+; -O0: subs x10, x10, x11
+; -O0: ccmp x8, x9, #0, eq
;
; -O1-LABEL: store_atomic_i128_aligned_monotonic:
; -O1: ldxp xzr, x8, [x2]
@@ -151,13 +151,13 @@ define dso_local void @store_atomic_i128_aligned_monotonic(i128 %value, ptr %ptr
define dso_local void @store_atomic_i128_aligned_release(i128 %value, ptr %ptr) {
; -O0-LABEL: store_atomic_i128_aligned_release:
-; -O0: ldxp x10, x12, [x9]
+; -O0: ldxp x8, x10, [x13]
+; -O0: cmp x8, x9
; -O0: cmp x10, x11
-; -O0: cmp x12, x13
-; -O0: stlxp w8, x14, x15, [x9]
-; -O0: stlxp w8, x10, x12, [x9]
-; -O0: subs x12, x12, x13
-; -O0: ccmp x10, x11, #0, eq
+; -O0: stlxp w12, x14, x15, [x13]
+; -O0: stlxp w12, x8, x10, [x13]
+; -O0: subs x10, x10, x11
+; -O0: ccmp x8, x9, #0, eq
;
; -O1-LABEL: store_atomic_i128_aligned_release:
; -O1: ldxp xzr, x8, [x2]
@@ -168,13 +168,13 @@ define dso_local void @store_atomic_i128_aligned_release(i128 %value, ptr %ptr)
define dso_local void @store_atomic_i128_aligned_seq_cst(i128 %value, ptr %ptr) {
; -O0-LABEL: store_atomic_i128_aligned_seq_cst:
-; -O0: ldaxp x10, x12, [x9]
+; -O0: ldaxp x8, x10, [x13]
+; -O0: cmp x8, x9
; -O0: cmp x10, x11
-; -O0: cmp x12, x13
-; -O0: stlxp w8, x14, x15, [x9]
-; -O0: stlxp w8, x10, x12, [x9]
-; -O0: subs x12, x12, x13
-; -O0: ccmp x10, x11, #0, eq
+; -O0: stlxp w12, x14, x15, [x13]
+; -O0: stlxp w12, x8, x10, [x13]
+; -O0: subs x10, x10, x11
+; -O0: ccmp x8, x9, #0, eq
;
; -O1-LABEL: store_atomic_i128_aligned_seq_cst:
; -O1: ldaxp xzr, x8, [x2]
diff --git a/llvm/test/CodeGen/AArch64/PHIElimination-debugloc.mir b/llvm/test/CodeGen/AArch64/PHIElimination-debugloc.mir
index 01c44e3f253bb..993d1c1f1b5f0 100644
--- a/llvm/test/CodeGen/AArch64/PHIElimination-debugloc.mir
+++ b/llvm/test/CodeGen/AArch64/PHIElimination-debugloc.mir
@@ -37,7 +37,7 @@ body: |
bb.1:
%x:gpr32 = COPY $wzr
; Test that the debug location is not copied into bb1!
- ; CHECK: %3:gpr32 = COPY killed %x{{$}}
+ ; CHECK: %3:gpr32 = COPY $wzr
; CHECK-LABEL: bb.2:
bb.2:
%y:gpr32 = PHI %x:gpr32, %bb.1, undef %undef:gpr32, %bb.0, debug-location !14
diff --git a/llvm/test/CodeGen/AArch64/PHIElimination-reuse-copy.mir b/llvm/test/CodeGen/AArch64/PHIElimination-reuse-copy.mir
new file mode 100644
index 0000000000000..883d130bfac4e
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/PHIElimination-reuse-copy.mir
@@ -0,0 +1,35 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -run-pass=phi-node-elimination -mtriple=aarch64-linux-gnu -o - %s | FileCheck %s
+
+# Verify that the original COPY in bb.1 is reappropriated as the PHI source in bb.2,
+# instead of creating a new COPY with the same source register.
+
+---
+name: test
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: test
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; CHECK-NEXT: liveins: $nzcv, $wzr
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:gpr32 = IMPLICIT_DEF
+ ; CHECK-NEXT: Bcc 8, %bb.2, implicit $nzcv
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:gpr32 = COPY $wzr
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: %y:gpr32 = COPY [[DEF]]
+ ; CHECK-NEXT: $wzr = COPY %y
+ bb.0:
+ liveins: $nzcv, $wzr
+ Bcc 8, %bb.2, implicit $nzcv
+ bb.1:
+ %x:gpr32 = COPY $wzr
+ bb.2:
+ %y:gpr32 = PHI %x:gpr32, %bb.1, undef %undef:gpr32, %bb.0
+ $wzr = COPY %y:gpr32
+...
diff --git a/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll b/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll
index fb6575cc0ee83..10fc431b07b18 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll
@@ -587,8 +587,8 @@ define i16 @red_mla_dup_ext_u8_s8_s16(ptr noalias nocapture noundef readonly %A,
; CHECK-SD-NEXT: mov w10, w2
; CHECK-SD-NEXT: b.hi .LBB5_4
; CHECK-SD-NEXT: // %bb.2:
-; CHECK-SD-NEXT: mov x11, xzr
; CHECK-SD-NEXT: mov w8, wzr
+; CHECK-SD-NEXT: mov x11, xzr
; CHECK-SD-NEXT: b .LBB5_7
; CHECK-SD-NEXT: .LBB5_3:
; CHECK-SD-NEXT: mov w8, wzr
diff --git a/llvm/test/CodeGen/AArch64/atomicrmw-O0.ll b/llvm/test/CodeGen/AArch64/atomicrmw-O0.ll
index 37a7782caeed9..cab6fba59cbd1 100644
--- a/llvm/test/CodeGen/AArch64/atomicrmw-O0.ll
+++ b/llvm/test/CodeGen/AArch64/atomicrmw-O0.ll
@@ -45,7 +45,7 @@ define i8 @test_rmw_add_8(ptr %dst) {
;
; LSE-LABEL: test_rmw_add_8:
; LSE: // %bb.0: // %entry
-; LSE-NEXT: mov w8, #1
+; LSE-NEXT: mov w8, #1 // =0x1
; LSE-NEXT: ldaddalb w8, w0, [x0]
; LSE-NEXT: ret
entry:
@@ -94,7 +94,7 @@ define i16 @test_rmw_add_16(ptr %dst) {
;
; LSE-LABEL: test_rmw_add_16:
; LSE: // %bb.0: // %entry
-; LSE-NEXT: mov w8, #1
+; LSE-NEXT: mov w8, #1 // =0x1
; LSE-NEXT: ldaddalh w8, w0, [x0]
; LSE-NEXT: ret
entry:
@@ -143,7 +143,7 @@ define i32 @test_rmw_add_32(ptr %dst) {
;
; LSE-LABEL: test_rmw_add_32:
; LSE: // %bb.0: // %entry
-; LSE-NEXT: mov w8, #1
+; LSE-NEXT: mov w8, #1 // =0x1
; LSE-NEXT: ldaddal w8, w0, [x0]
; LSE-NEXT: ret
entry:
@@ -192,7 +192,7 @@ define i64 @test_rmw_add_64(ptr %dst) {
;
; LSE-LABEL: test_rmw_add_64:
; LSE: // %bb.0: // %entry
-; LSE-NEXT: mov w8, #1
+; LSE-NEXT: mov w8, #1 // =0x1
; LSE-NEXT: // kill: def $x8 killed $w8
; LSE-NEXT: ldaddal x8, x0, [x0]
; LSE-NEXT: ret
@@ -207,16 +207,16 @@ define i128 @test_rmw_add_128(ptr %dst) {
; NOLSE-NEXT: sub sp, sp, #48
; NOLSE-NEXT: .cfi_def_cfa_offset 48
; NOLSE-NEXT: str x0, [sp, #24] // 8-byte Folded Spill
-; NOLSE-NEXT: ldr x8, [x0, #8]
-; NOLSE-NEXT: ldr x9, [x0]
+; NOLSE-NEXT: ldr x9, [x0, #8]
+; NOLSE-NEXT: ldr x8, [x0]
; NOLSE-NEXT: str x9, [sp, #32] // 8-byte Folded Spill
; NOLSE-NEXT: str x8, [sp, #40] // 8-byte Folded Spill
; NOLSE-NEXT: b .LBB4_1
; NOLSE-NEXT: .LBB4_1: // %atomicrmw.start
; NOLSE-NEXT: // =>This Loop Header: Depth=1
; NOLSE-NEXT: // Child Loop BB4_2 Depth 2
-; NOLSE-NEXT: ldr x13, [sp, #40] // 8-byte Folded Reload
-; NOLSE-NEXT: ldr x11, [sp, #32] // 8-byte Folded Reload
+; NOLSE-NEXT: ldr x13, [sp, #32] // 8-byte Folded Reload
+; NOLSE-NEXT: ldr x11, [sp, #40] // 8-byte Folded Reload
; NOLSE-NEXT: ldr x9, [sp, #24] // 8-byte Folded Reload
; NOLSE-NEXT: adds x14, x11, #1
; NOLSE-NEXT: cinc x15, x13, hs
@@ -246,8 +246,8 @@ define i128 @test_rmw_add_128(ptr %dst) {
; NOLSE-NEXT: str x9, [sp, #16] // 8-byte Folded Spill
; NOLSE-NEXT: subs x12, x12, x13
; NOLSE-NEXT: ccmp x10, x11, #0, eq
-; NOLSE-NEXT: str x9, [sp, #32] // 8-byte Folded Spill
-; NOLSE-NEXT: str x8, [sp, #40] // 8-byte Folded Spill
+; NOLSE-NEXT: str x9, [sp, #40] // 8-byte Folded Spill
+; NOLSE-NEXT: str x8, [sp, #32] // 8-byte Folded Spill
; NOLSE-NEXT: b.ne .LBB4_1
; NOLSE-NEXT: b .LBB4_6
; NOLSE-NEXT: .LBB4_6: // %atomicrmw.end
@@ -261,15 +261,15 @@ define i128 @test_rmw_add_128(ptr %dst) {
; LSE-NEXT: sub sp, sp, #48
; LSE-NEXT: .cfi_def_cfa_offset 48
; LSE-NEXT: str x0, [sp, #24] // 8-byte Folded Spill
-; LSE-NEXT: ldr x8, [x0, #8]
-; LSE-NEXT: ldr x9, [x0]
+; LSE-NEXT: ldr x9, [x0, #8]
+; LSE-NEXT: ldr x8, [x0]
; LSE-NEXT: str x9, [sp, #32] // 8-byte Folded Spill
; LSE-NEXT: str x8, [sp, #40] // 8-byte Folded Spill
; LSE-NEXT: b .LBB4_1
; LSE-NEXT: .LBB4_1: // %atomicrmw.start
; LSE-NEXT: // =>This Inner Loop Header: Depth=1
-; LSE-NEXT: ldr x11, [sp, #40] // 8-byte Folded Reload
-; LSE-NEXT: ldr x10, [sp, #32] // 8-byte Folded Reload
+; LSE-NEXT: ldr x11, [sp, #32] // 8-byte Folded Reload
+; LSE-NEXT: ldr x10, [sp, #40] // 8-byte Folded Reload
; LSE-NEXT: ldr x8, [sp, #24] // 8-byte Folded Reload
; LSE-NEXT: mov x0, x10
; LSE-NEXT: mov x1, x11
@@ -284,8 +284,8 @@ define i128 @test_rmw_add_128(ptr %dst) {
; LSE-NEXT: str x8, [sp, #16] // 8-byte Folded Spill
; LSE-NEXT: subs x11, x8, x11
; LSE-NEXT: ccmp x9, x10, #0, eq
-; LSE-NEXT: str x9, [sp, #32] // 8-byte Folded Spill
-; LSE-NEXT: str x8, [sp, #40] // 8-byte Folded Spill
+; LSE-NEXT: str x9, [sp, #40] // 8-byte Folded Spill
+; LSE-NEXT: str x8, [sp, #32] // 8-byte Folded Spill
; LSE-NEXT: b.ne .LBB4_1
; LSE-NEXT: b .LBB4_2
; LSE-NEXT: .LBB4_2: // %atomicrmw.end
@@ -597,23 +597,23 @@ define i128 @test_rmw_nand_128(ptr %dst) {
; NOLSE-NEXT: sub sp, sp, #48
; NOLSE-NEXT: .cfi_def_cfa_offset 48
; NOLSE-NEXT: str x0, [sp, #24] // 8-byte Folded Spill
-; NOLSE-NEXT: ldr x8, [x0, #8]
-; NOLSE-NEXT: ldr x9, [x0]
+; NOLSE-NEXT: ldr x9, [x0, #8]
+; NOLSE-NEXT: ldr x8, [x0]
; NOLSE-NEXT: str x9, [sp, #32] // 8-byte Folded Spill
; NOLSE-NEXT: str x8, [sp, #40] // 8-byte Folded Spill
; NOLSE-NEXT: b .LBB9_1
; NOLSE-NEXT: .LBB9_1: // %atomicrmw.start
; NOLSE-NEXT: // =>This Loop Header: Depth=1
; NOLSE-NEXT: // Child Loop BB9_2 Depth 2
-; NOLSE-NEXT: ldr x13, [sp, #40] // 8-byte Folded Reload
-; NOLSE-NEXT: ldr x11, [sp, #32] // 8-byte Folded Reload
+; NOLSE-NEXT: ldr x13, [sp, #32] // 8-byte Folded Reload
+; NOLSE-NEXT: ldr x11, [sp, #40] // 8-byte Folded Reload
; NOLSE-NEXT: ldr x9, [sp, #24] // 8-byte Folded Reload
; NOLSE-NEXT: mov w8, w11
; NOLSE-NEXT: mvn w10, w8
; NOLSE-NEXT: // implicit-def: $x8
; NOLSE-NEXT: mov w8, w10
; NOLSE-NEXT: orr x14, x8, #0xfffffffffffffffe
-; NOLSE-NEXT: mov x15, #-1
+; NOLSE-NEXT: mov x15, #-1 // =0xffffffffffffffff
; NOLSE-NEXT: .LBB9_2: // %atomicrmw.start
; NOLSE-NEXT: // Parent Loop BB9_1 Depth=1
; NOLSE-NEXT: // => This Inner Loop Header: Depth=2
@@ -640,8 +640,8 @@ define i128 @test_rmw_nand_128(ptr %dst) {
; NOLSE-NEXT: str x9, [sp, #16] // 8-byte Folded Spill
; NOLSE-NEXT: subs x12, x12, x13
; NOLSE-NEXT: ccmp x10, x11, #0, eq
-; NOLSE-NEXT: str x9, [sp, #32] // 8-byte Folded Spill
-; NOLSE-NEXT: str x8, [sp, #40] // 8-byte Folded Spill
+; NOLSE-NEXT: str x9, [sp, #40] // 8-byte Folded Spill
+; NOLSE-NEXT: str x8, [sp, #32] // 8-byte Folded Spill
; NOLSE-NEXT: b.ne .LBB9_1
; NOLSE-NEXT: b .LBB9_6
; NOLSE-NEXT: .LBB9_6: // %atomicrmw.end
@@ -655,15 +655,15 @@ define i128 @test_rmw_nand_128(ptr %dst) {
; LSE-NEXT: sub sp, sp, #48
; LSE-NEXT: .cfi_def_cfa_offset 48
; LSE-NEXT: str x0, [sp, #24] // 8-byte Folded Spill
-; LSE-NEXT: ldr x8, [x0, #8]
-; LSE-NEXT: ldr x9, [x0]
+; LSE-NEXT: ldr x9, [x0, #8]
+; LSE-NEXT: ldr x8, [x0]
; LSE-NEXT: str x9, [sp, #32] // 8-byte Folded Spill
; LSE-NEXT: str x8, [sp, #40] // 8-byte Folded Spill
; LSE-NEXT: b .LBB9_1
; LSE-NEXT: .LBB9_1: // %atomicrmw.start
; LSE-NEXT: // =>This Inner Loop Header: Depth=1
-; LSE-NEXT: ldr x11, [sp, #40] // 8-byte Folded Reload
-; LSE-NEXT: ldr x10, [sp, #32] // 8-byte Folded Reload
+; LSE-NEXT: ldr x11, [sp, #32] // 8-byte Folded Reload
+; LSE-NEXT: ldr x10, [sp, #40] // 8-byte Folded Reload
; LSE-NEXT: ldr x8, [sp, #24] // 8-byte Folded Reload
; LSE-NEXT: mov x0, x10
; LSE-NEXT: mov x1, x11
@@ -672,7 +672,7 @@ define i128 @test_rmw_nand_128(ptr %dst) {
; LSE-NEXT: // implicit-def: $x9
; LSE-NEXT: mov w9, w12
; LSE-NEXT: orr x2, x9, #0xfffffffffffffffe
-; LSE-NEXT: mov x9, #-1
+; LSE-NEXT: mov x9, #-1 // =0xffffffffffffffff
; LSE-NEXT: // kill: def $x2 killed $x2 def $x2_x3
; LSE-NEXT: mov x3, x9
; LSE-NEXT: caspal x0, x1, x2, x3, [x8]
@@ -682,8 +682,8 @@ define i128 @test_rmw_nand_128(ptr %dst) {
; LSE-NEXT: str x8, [sp, #16] // 8-byte Folded Spill
; LSE-NEXT: subs x11, x8, x11
; LSE-NEXT: ccmp x9, x10, #0, eq
-; LSE-NEXT: str x9, [sp, #32] // 8-byte Folded Spill
-; LSE-NEXT: str x8, [sp, #40] // 8-byte Folded Spill
+; LSE-NEXT: str x9, [sp, #40] // 8-byte Folded Spill
+; LSE-NEXT: str x8, [sp, #32] // 8-byte Folded Spill
; LSE-NEXT: b.ne .LBB9_1
; LSE-NEXT: b .LBB9_2
; LSE-NEXT: .LBB9_2: // %atomicrmw.end
diff --git a/llvm/test/CodeGen/AArch64/bfis-in-loop.ll b/llvm/test/CodeGen/AArch64/bfis-in-loop.ll
index 43d49da1abd21..b0339222bc2df 100644
--- a/llvm/test/CodeGen/AArch64/bfis-in-loop.ll
+++ b/llvm/test/CodeGen/AArch64/bfis-in-loop.ll
@@ -14,8 +14,8 @@ define i64 @bfi...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/131837
More information about the llvm-commits
mailing list