[llvm] Reland "[RegAlloc] Fix the terminal rule check for interfere with DstReg (#168661)" (PR #169219)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Nov 23 06:52:47 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-regalloc
@llvm/pr-subscribers-backend-aarch64
Author: None (hstk30-hw)
<details>
<summary>Changes</summary>
Reland d5f3ab8ec97786476a077b0c8e35c7c337dfddf2, fix testcases.
---
Patch is 1023.97 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/169219.diff
55 Files Affected:
- (modified) llvm/lib/CodeGen/RegisterCoalescer.cpp (+1-1)
- (modified) llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll (+7-7)
- (modified) llvm/test/CodeGen/AArch64/complex-deinterleaving-crash.ll (+26-26)
- (modified) llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-scalable.ll (+12-12)
- (modified) llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions.ll (+15-15)
- (modified) llvm/test/CodeGen/AArch64/machine-sink-kill-flags.ll (+2-1)
- (modified) llvm/test/CodeGen/AArch64/sve-extract-fixed-from-scalable-vector.ll (+6-6)
- (modified) llvm/test/CodeGen/AArch64/sve-extract-fixed-vector.ll (+16-16)
- (modified) llvm/test/CodeGen/AArch64/sve-fixed-length-reshuffle.ll (+6-6)
- (modified) llvm/test/CodeGen/AArch64/zext-to-tbl.ll (+46-46)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-used-outside-loop.ll (+4-4)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/mul-known-bits.i64.ll (+21-24)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/mul.ll (+8-8)
- (modified) llvm/test/CodeGen/AMDGPU/atomic_optimizations_global_pointer.ll (+8-8)
- (modified) llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll (+10-10)
- (modified) llvm/test/CodeGen/AMDGPU/local-atomicrmw-fadd.ll (+1159-1185)
- (modified) llvm/test/CodeGen/AMDGPU/local-atomicrmw-fmax.ll (+1231-1259)
- (modified) llvm/test/CodeGen/AMDGPU/local-atomicrmw-fmin.ll (+1231-1259)
- (modified) llvm/test/CodeGen/AMDGPU/local-atomicrmw-fsub.ll (+1379-1421)
- (modified) llvm/test/CodeGen/AMDGPU/set-inactive-wwm-overwrite.ll (+4-4)
- (modified) llvm/test/CodeGen/BPF/objdump_cond_op_2.ll (+2-1)
- (modified) llvm/test/CodeGen/Hexagon/swp-stages5.ll (-1)
- (modified) llvm/test/CodeGen/NVPTX/atomics-b128.ll (+75-75)
- (modified) llvm/test/CodeGen/NVPTX/atomics-sm70.ll (+20-20)
- (modified) llvm/test/CodeGen/NVPTX/atomics-sm90.ll (+20-20)
- (modified) llvm/test/CodeGen/NVPTX/atomics.ll (+6-6)
- (modified) llvm/test/CodeGen/PowerPC/ctrloop-fp128.ll (+3-3)
- (modified) llvm/test/CodeGen/PowerPC/licm-xxsplti.ll (+18-18)
- (modified) llvm/test/CodeGen/PowerPC/loop-instr-form-prepare.ll (+5-3)
- (modified) llvm/test/CodeGen/PowerPC/sink-side-effect.ll (+1-1)
- (modified) llvm/test/CodeGen/PowerPC/sms-phi-1.ll (+2-3)
- (modified) llvm/test/CodeGen/PowerPC/vsx-fma-m-early.ll (+5-5)
- (modified) llvm/test/CodeGen/RISCV/branch-on-zero.ll (+10-6)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-exact-vlen.ll (+6-4)
- (modified) llvm/test/CodeGen/RISCV/rvv/pr95865.ll (+22-21)
- (modified) llvm/test/CodeGen/RISCV/rvv/remat.ll (+22-35)
- (modified) llvm/test/CodeGen/RISCV/rvv/vandn-sdnode.ll (+33-33)
- (modified) llvm/test/CodeGen/RISCV/rvv/vcpop-shl-zext-opt.ll (+14-14)
- (modified) llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll (+6-6)
- (modified) llvm/test/CodeGen/RISCV/rvv/vxrm-insert-out-of-loop.ll (+12-12)
- (modified) llvm/test/CodeGen/Thumb2/LowOverheadLoops/varying-outer-2d-reduction.ll (+18-16)
- (modified) llvm/test/CodeGen/Thumb2/LowOverheadLoops/while-loops.ll (+46-45)
- (modified) llvm/test/CodeGen/Thumb2/mve-gather-increment.ll (+12-12)
- (modified) llvm/test/CodeGen/Thumb2/mve-gather-scatter-optimisation.ll (+45-45)
- (modified) llvm/test/CodeGen/Thumb2/mve-laneinterleaving-reduct.ll (+47-42)
- (modified) llvm/test/CodeGen/Thumb2/mve-pipelineloops.ll (+26-26)
- (modified) llvm/test/CodeGen/WebAssembly/simd-shift-in-loop.ll (+8-6)
- (modified) llvm/test/CodeGen/X86/AMX/amx-ldtilecfg-insert.ll (+9-9)
- (modified) llvm/test/CodeGen/X86/i128-mul.ll (+91-87)
- (modified) llvm/test/CodeGen/X86/loop-strength-reduce5.ll (+5-5)
- (modified) llvm/test/CodeGen/X86/madd.ll (+11-11)
- (modified) llvm/test/CodeGen/X86/pr49451.ll (+3-3)
- (modified) llvm/test/CodeGen/X86/wide-scalar-shift-by-byte-multiple-legalization.ll (+57-57)
- (modified) llvm/test/CodeGen/X86/x86-shrink-wrapping.ll (+6-6)
- (modified) llvm/test/CodeGen/X86/xor.ll (+66-66)
``````````diff
diff --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp
index 25c4375a73ce0..e624088a0964e 100644
--- a/llvm/lib/CodeGen/RegisterCoalescer.cpp
+++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp
@@ -4150,7 +4150,7 @@ bool RegisterCoalescer::applyTerminalRule(const MachineInstr &Copy) const {
continue;
Register OtherSrcReg, OtherReg;
unsigned OtherSrcSubReg = 0, OtherSubReg = 0;
- if (!isMoveInstr(*TRI, &Copy, OtherSrcReg, OtherReg, OtherSrcSubReg,
+ if (!isMoveInstr(*TRI, &MI, OtherSrcReg, OtherReg, OtherSrcSubReg,
OtherSubReg))
return false;
if (OtherReg == SrcReg)
diff --git a/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll b/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll
index 4894932d3c9b1..99c540366fb12 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll
@@ -803,20 +803,20 @@ define i64 @red_mla_dup_ext_u8_s8_s64(ptr noalias noundef readonly captures(none
; CHECK-SD-NEXT: smlal2 v4.2d, v16.4s, v20.4s
; CHECK-SD-NEXT: smlal v6.2d, v16.2s, v20.2s
; CHECK-SD-NEXT: smlal v3.2d, v16.2s, v19.2s
-; CHECK-SD-NEXT: smlal2 v1.2d, v16.4s, v18.4s
+; CHECK-SD-NEXT: smlal2 v0.2d, v16.4s, v18.4s
; CHECK-SD-NEXT: smlal v7.2d, v16.2s, v17.2s
-; CHECK-SD-NEXT: smlal v0.2d, v16.2s, v18.2s
+; CHECK-SD-NEXT: smlal v1.2d, v16.2s, v18.2s
; CHECK-SD-NEXT: smlal2 v5.2d, v16.4s, v17.4s
; CHECK-SD-NEXT: b.ne .LBB6_7
; CHECK-SD-NEXT: // %bb.8: // %middle.block
-; CHECK-SD-NEXT: add v0.2d, v0.2d, v6.2d
+; CHECK-SD-NEXT: add v1.2d, v1.2d, v6.2d
; CHECK-SD-NEXT: add v3.2d, v3.2d, v7.2d
; CHECK-SD-NEXT: cmp x10, x9
-; CHECK-SD-NEXT: add v1.2d, v1.2d, v4.2d
+; CHECK-SD-NEXT: add v0.2d, v0.2d, v4.2d
; CHECK-SD-NEXT: add v2.2d, v2.2d, v5.2d
-; CHECK-SD-NEXT: add v0.2d, v0.2d, v3.2d
-; CHECK-SD-NEXT: add v1.2d, v1.2d, v2.2d
-; CHECK-SD-NEXT: add v0.2d, v0.2d, v1.2d
+; CHECK-SD-NEXT: add v1.2d, v1.2d, v3.2d
+; CHECK-SD-NEXT: add v0.2d, v0.2d, v2.2d
+; CHECK-SD-NEXT: add v0.2d, v1.2d, v0.2d
; CHECK-SD-NEXT: addp d0, v0.2d
; CHECK-SD-NEXT: fmov x8, d0
; CHECK-SD-NEXT: b.eq .LBB6_15
diff --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-crash.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-crash.ll
index 7542e9c4b8f5b..a4f20905a85c2 100644
--- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-crash.ll
+++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-crash.ll
@@ -35,15 +35,15 @@ define i32 @check_deinterleaving_has_deinterleave(ptr %a) {
; CHECK-LABEL: check_deinterleaving_has_deinterleave:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: movi v0.2d, #0000000000000000
-; CHECK-NEXT: movi v1.4s, #1
+; CHECK-NEXT: movi v2.4s, #1
; CHECK-NEXT: add x8, x0, #16
-; CHECK-NEXT: movi v3.2d, #0000000000000000
-; CHECK-NEXT: movi v2.2d, #0000000000000000
-; CHECK-NEXT: mov w9, #32 // =0x20
+; CHECK-NEXT: movi v1.2d, #0000000000000000
; CHECK-NEXT: movi v4.2d, #0000000000000000
+; CHECK-NEXT: mov w9, #32 // =0x20
+; CHECK-NEXT: movi v3.2d, #0000000000000000
; CHECK-NEXT: movi v5.2d, #0000000000000000
-; CHECK-NEXT: movi v7.2d, #0000000000000000
; CHECK-NEXT: movi v6.2d, #0000000000000000
+; CHECK-NEXT: movi v7.2d, #0000000000000000
; CHECK-NEXT: movi v16.2d, #0000000000000000
; CHECK-NEXT: .LBB1_1: // %vector.body
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
@@ -64,31 +64,31 @@ define i32 @check_deinterleaving_has_deinterleave(ptr %a) {
; CHECK-NEXT: ushll v24.4s, v18.4h, #0
; CHECK-NEXT: ushll2 v18.4s, v18.8h, #0
; CHECK-NEXT: ushll v20.4s, v20.4h, #0
-; CHECK-NEXT: and v21.16b, v21.16b, v1.16b
-; CHECK-NEXT: and v19.16b, v19.16b, v1.16b
-; CHECK-NEXT: and v22.16b, v22.16b, v1.16b
-; CHECK-NEXT: and v17.16b, v17.16b, v1.16b
-; CHECK-NEXT: and v23.16b, v23.16b, v1.16b
-; CHECK-NEXT: and v24.16b, v24.16b, v1.16b
-; CHECK-NEXT: and v18.16b, v18.16b, v1.16b
-; CHECK-NEXT: and v20.16b, v20.16b, v1.16b
-; CHECK-NEXT: add v4.4s, v4.4s, v19.4s
-; CHECK-NEXT: add v2.4s, v2.4s, v21.4s
-; CHECK-NEXT: add v0.4s, v0.4s, v22.4s
-; CHECK-NEXT: add v3.4s, v3.4s, v17.4s
+; CHECK-NEXT: and v21.16b, v21.16b, v2.16b
+; CHECK-NEXT: and v19.16b, v19.16b, v2.16b
+; CHECK-NEXT: and v22.16b, v22.16b, v2.16b
+; CHECK-NEXT: and v17.16b, v17.16b, v2.16b
+; CHECK-NEXT: and v23.16b, v23.16b, v2.16b
+; CHECK-NEXT: and v24.16b, v24.16b, v2.16b
+; CHECK-NEXT: and v18.16b, v18.16b, v2.16b
+; CHECK-NEXT: and v20.16b, v20.16b, v2.16b
+; CHECK-NEXT: add v5.4s, v5.4s, v19.4s
+; CHECK-NEXT: add v3.4s, v3.4s, v21.4s
+; CHECK-NEXT: add v1.4s, v1.4s, v22.4s
+; CHECK-NEXT: add v4.4s, v4.4s, v17.4s
; CHECK-NEXT: add v16.4s, v16.4s, v23.4s
-; CHECK-NEXT: add v5.4s, v5.4s, v24.4s
-; CHECK-NEXT: add v6.4s, v6.4s, v20.4s
-; CHECK-NEXT: add v7.4s, v7.4s, v18.4s
+; CHECK-NEXT: add v6.4s, v6.4s, v24.4s
+; CHECK-NEXT: add v7.4s, v7.4s, v20.4s
+; CHECK-NEXT: add v0.4s, v0.4s, v18.4s
; CHECK-NEXT: b.ne .LBB1_1
; CHECK-NEXT: // %bb.2: // %middle.block
-; CHECK-NEXT: add v1.4s, v7.4s, v3.4s
-; CHECK-NEXT: add v3.4s, v16.4s, v4.4s
-; CHECK-NEXT: add v0.4s, v5.4s, v0.4s
-; CHECK-NEXT: add v2.4s, v6.4s, v2.4s
-; CHECK-NEXT: add v1.4s, v1.4s, v3.4s
+; CHECK-NEXT: add v0.4s, v0.4s, v4.4s
+; CHECK-NEXT: add v2.4s, v16.4s, v5.4s
+; CHECK-NEXT: add v1.4s, v6.4s, v1.4s
+; CHECK-NEXT: add v3.4s, v7.4s, v3.4s
; CHECK-NEXT: add v0.4s, v0.4s, v2.4s
-; CHECK-NEXT: add v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: add v1.4s, v1.4s, v3.4s
+; CHECK-NEXT: add v0.4s, v1.4s, v0.4s
; CHECK-NEXT: addv s0, v0.4s
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-scalable.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-scalable.ll
index 4f00aed3aa4bc..ddeeca7d5df50 100644
--- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-scalable.ll
+++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-scalable.ll
@@ -31,14 +31,14 @@ define %"class.std::complex" @complex_mul_v2f64(ptr %a, ptr %b) {
; CHECK-NEXT: ldr z5, [x1]
; CHECK-NEXT: add x1, x1, x10
; CHECK-NEXT: add x0, x0, x10
-; CHECK-NEXT: fcmla z0.d, p0/m, z5.d, z3.d, #0
-; CHECK-NEXT: fcmla z1.d, p0/m, z4.d, z2.d, #0
-; CHECK-NEXT: fcmla z0.d, p0/m, z5.d, z3.d, #90
-; CHECK-NEXT: fcmla z1.d, p0/m, z4.d, z2.d, #90
+; CHECK-NEXT: fcmla z1.d, p0/m, z5.d, z3.d, #0
+; CHECK-NEXT: fcmla z0.d, p0/m, z4.d, z2.d, #0
+; CHECK-NEXT: fcmla z1.d, p0/m, z5.d, z3.d, #90
+; CHECK-NEXT: fcmla z0.d, p0/m, z4.d, z2.d, #90
; CHECK-NEXT: b.ne .LBB0_1
; CHECK-NEXT: // %bb.2: // %exit.block
-; CHECK-NEXT: uzp1 z2.d, z0.d, z1.d
-; CHECK-NEXT: uzp2 z1.d, z0.d, z1.d
+; CHECK-NEXT: uzp1 z2.d, z1.d, z0.d
+; CHECK-NEXT: uzp2 z1.d, z1.d, z0.d
; CHECK-NEXT: faddv d0, p0, z2.d
; CHECK-NEXT: faddv d1, p0, z1.d
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
@@ -205,20 +205,20 @@ define %"class.std::complex" @complex_mul_v2f64_unrolled(ptr %a, ptr %b) {
; CHECK-NEXT: ldr z18, [x1, #3, mul vl]
; CHECK-NEXT: ldr z19, [x1, #2, mul vl]
; CHECK-NEXT: add x1, x1, x10
-; CHECK-NEXT: fcmla z0.d, p0/m, z16.d, z5.d, #0
-; CHECK-NEXT: fcmla z1.d, p0/m, z7.d, z4.d, #0
+; CHECK-NEXT: fcmla z1.d, p0/m, z16.d, z5.d, #0
+; CHECK-NEXT: fcmla z0.d, p0/m, z7.d, z4.d, #0
; CHECK-NEXT: fcmla z3.d, p0/m, z18.d, z6.d, #0
; CHECK-NEXT: fcmla z2.d, p0/m, z19.d, z17.d, #0
-; CHECK-NEXT: fcmla z0.d, p0/m, z16.d, z5.d, #90
-; CHECK-NEXT: fcmla z1.d, p0/m, z7.d, z4.d, #90
+; CHECK-NEXT: fcmla z1.d, p0/m, z16.d, z5.d, #90
+; CHECK-NEXT: fcmla z0.d, p0/m, z7.d, z4.d, #90
; CHECK-NEXT: fcmla z3.d, p0/m, z18.d, z6.d, #90
; CHECK-NEXT: fcmla z2.d, p0/m, z19.d, z17.d, #90
; CHECK-NEXT: b.ne .LBB2_1
; CHECK-NEXT: // %bb.2: // %exit.block
; CHECK-NEXT: uzp1 z4.d, z2.d, z3.d
-; CHECK-NEXT: uzp1 z5.d, z0.d, z1.d
+; CHECK-NEXT: uzp1 z5.d, z1.d, z0.d
; CHECK-NEXT: uzp2 z2.d, z2.d, z3.d
-; CHECK-NEXT: uzp2 z0.d, z0.d, z1.d
+; CHECK-NEXT: uzp2 z0.d, z1.d, z0.d
; CHECK-NEXT: fadd z1.d, z4.d, z5.d
; CHECK-NEXT: fadd z2.d, z2.d, z0.d
; CHECK-NEXT: faddv d0, p0, z1.d
diff --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions.ll
index aed3072bb4af3..355adec955e4b 100644
--- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions.ll
+++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions.ll
@@ -25,14 +25,14 @@ define dso_local %"struct.std::complex" @complex_mul_v2f64(ptr %a, ptr %b) {
; CHECK-NEXT: ldp q3, q2, [x9]
; CHECK-NEXT: cmp x8, #1600
; CHECK-NEXT: ldp q5, q4, [x10]
-; CHECK-NEXT: fcmla v0.2d, v5.2d, v3.2d, #0
-; CHECK-NEXT: fcmla v1.2d, v4.2d, v2.2d, #0
-; CHECK-NEXT: fcmla v0.2d, v5.2d, v3.2d, #90
-; CHECK-NEXT: fcmla v1.2d, v4.2d, v2.2d, #90
+; CHECK-NEXT: fcmla v1.2d, v5.2d, v3.2d, #0
+; CHECK-NEXT: fcmla v0.2d, v4.2d, v2.2d, #0
+; CHECK-NEXT: fcmla v1.2d, v5.2d, v3.2d, #90
+; CHECK-NEXT: fcmla v0.2d, v4.2d, v2.2d, #90
; CHECK-NEXT: b.ne .LBB0_1
; CHECK-NEXT: // %bb.2: // %middle.block
-; CHECK-NEXT: zip2 v2.2d, v0.2d, v1.2d
-; CHECK-NEXT: zip1 v0.2d, v0.2d, v1.2d
+; CHECK-NEXT: zip2 v2.2d, v1.2d, v0.2d
+; CHECK-NEXT: zip1 v0.2d, v1.2d, v0.2d
; CHECK-NEXT: faddp d0, v0.2d
; CHECK-NEXT: faddp d1, v2.2d
; CHECK-NEXT: ret
@@ -159,20 +159,20 @@ define %"struct.std::complex" @complex_mul_v2f64_unrolled(ptr %a, ptr %b) {
; CHECK-NEXT: ldp q17, q16, [x8], #64
; CHECK-NEXT: ldp q19, q18, [x9], #64
; CHECK-NEXT: fcmla v2.2d, v7.2d, v5.2d, #0
-; CHECK-NEXT: fcmla v0.2d, v6.2d, v4.2d, #0
-; CHECK-NEXT: fcmla v1.2d, v19.2d, v17.2d, #0
+; CHECK-NEXT: fcmla v1.2d, v6.2d, v4.2d, #0
+; CHECK-NEXT: fcmla v0.2d, v19.2d, v17.2d, #0
; CHECK-NEXT: fcmla v3.2d, v18.2d, v16.2d, #0
; CHECK-NEXT: fcmla v2.2d, v7.2d, v5.2d, #90
-; CHECK-NEXT: fcmla v0.2d, v6.2d, v4.2d, #90
-; CHECK-NEXT: fcmla v1.2d, v19.2d, v17.2d, #90
+; CHECK-NEXT: fcmla v1.2d, v6.2d, v4.2d, #90
+; CHECK-NEXT: fcmla v0.2d, v19.2d, v17.2d, #90
; CHECK-NEXT: fcmla v3.2d, v18.2d, v16.2d, #90
; CHECK-NEXT: b.ne .LBB2_1
; CHECK-NEXT: // %bb.2: // %middle.block
-; CHECK-NEXT: zip2 v4.2d, v1.2d, v3.2d
-; CHECK-NEXT: zip1 v1.2d, v1.2d, v3.2d
-; CHECK-NEXT: zip2 v3.2d, v2.2d, v0.2d
-; CHECK-NEXT: zip1 v0.2d, v2.2d, v0.2d
-; CHECK-NEXT: fadd v0.2d, v1.2d, v0.2d
+; CHECK-NEXT: zip2 v4.2d, v0.2d, v3.2d
+; CHECK-NEXT: zip1 v0.2d, v0.2d, v3.2d
+; CHECK-NEXT: zip2 v3.2d, v2.2d, v1.2d
+; CHECK-NEXT: zip1 v1.2d, v2.2d, v1.2d
+; CHECK-NEXT: fadd v0.2d, v0.2d, v1.2d
; CHECK-NEXT: fadd v1.2d, v4.2d, v3.2d
; CHECK-NEXT: faddp d0, v0.2d
; CHECK-NEXT: faddp d1, v1.2d
diff --git a/llvm/test/CodeGen/AArch64/machine-sink-kill-flags.ll b/llvm/test/CodeGen/AArch64/machine-sink-kill-flags.ll
index 338084295fc7f..0fe4683d97a23 100644
--- a/llvm/test/CodeGen/AArch64/machine-sink-kill-flags.ll
+++ b/llvm/test/CodeGen/AArch64/machine-sink-kill-flags.ll
@@ -16,8 +16,9 @@ define i32 @test(ptr %ptr) {
; CHECK-NEXT: mov w9, wzr
; CHECK-NEXT: LBB0_1: ; %.thread
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: lsr w11, w9, #1
; CHECK-NEXT: sub w10, w9, #1
-; CHECK-NEXT: lsr w9, w9, #1
+; CHECK-NEXT: mov w9, w11
; CHECK-NEXT: tbnz w10, #0, LBB0_1
; CHECK-NEXT: ; %bb.2: ; %bb343
; CHECK-NEXT: and w9, w10, #0x1
diff --git a/llvm/test/CodeGen/AArch64/sve-extract-fixed-from-scalable-vector.ll b/llvm/test/CodeGen/AArch64/sve-extract-fixed-from-scalable-vector.ll
index 52a77cb396909..6c6a691760af3 100644
--- a/llvm/test/CodeGen/AArch64/sve-extract-fixed-from-scalable-vector.ll
+++ b/llvm/test/CodeGen/AArch64/sve-extract-fixed-from-scalable-vector.ll
@@ -147,15 +147,15 @@ define <2 x float> @extract_v2f32_nxv16f32_2(<vscale x 16 x float> %arg) {
define <4 x i1> @extract_v4i1_nxv32i1_0(<vscale x 32 x i1> %arg) {
; CHECK-LABEL: extract_v4i1_nxv32i1_0:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov z0.b, p0/z, #1 // =0x1
-; CHECK-NEXT: umov w8, v0.b[1]
-; CHECK-NEXT: mov v1.16b, v0.16b
+; CHECK-NEXT: mov z1.b, p0/z, #1 // =0x1
+; CHECK-NEXT: umov w8, v1.b[1]
+; CHECK-NEXT: mov v0.16b, v1.16b
+; CHECK-NEXT: umov w9, v1.b[2]
; CHECK-NEXT: mov v0.h[1], w8
-; CHECK-NEXT: umov w8, v1.b[2]
-; CHECK-NEXT: mov v0.h[2], w8
; CHECK-NEXT: umov w8, v1.b[3]
+; CHECK-NEXT: mov v0.h[2], w9
; CHECK-NEXT: mov v0.h[3], w8
-; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
%ext = call <4 x i1> @llvm.vector.extract.v4i1.nxv32i1(<vscale x 32 x i1> %arg, i64 0)
ret <4 x i1> %ext
diff --git a/llvm/test/CodeGen/AArch64/sve-extract-fixed-vector.ll b/llvm/test/CodeGen/AArch64/sve-extract-fixed-vector.ll
index 72994100b2970..1cefe96962e29 100644
--- a/llvm/test/CodeGen/AArch64/sve-extract-fixed-vector.ll
+++ b/llvm/test/CodeGen/AArch64/sve-extract-fixed-vector.ll
@@ -248,15 +248,15 @@ define <2 x i1> @extract_v2i1_nxv2i1(<vscale x 2 x i1> %inmask) {
define <4 x i1> @extract_v4i1_nxv4i1(<vscale x 4 x i1> %inmask) {
; CHECK-LABEL: extract_v4i1_nxv4i1:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov z0.s, p0/z, #1 // =0x1
-; CHECK-NEXT: mov w8, v0.s[1]
-; CHECK-NEXT: mov v1.16b, v0.16b
+; CHECK-NEXT: mov z1.s, p0/z, #1 // =0x1
+; CHECK-NEXT: mov w8, v1.s[1]
+; CHECK-NEXT: mov v0.16b, v1.16b
+; CHECK-NEXT: mov w9, v1.s[2]
; CHECK-NEXT: mov v0.h[1], w8
-; CHECK-NEXT: mov w8, v1.s[2]
-; CHECK-NEXT: mov v0.h[2], w8
; CHECK-NEXT: mov w8, v1.s[3]
+; CHECK-NEXT: mov v0.h[2], w9
; CHECK-NEXT: mov v0.h[3], w8
-; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
%mask = call <4 x i1> @llvm.vector.extract.v4i1.nxv4i1(<vscale x 4 x i1> %inmask, i64 0)
ret <4 x i1> %mask
@@ -265,23 +265,23 @@ define <4 x i1> @extract_v4i1_nxv4i1(<vscale x 4 x i1> %inmask) {
define <8 x i1> @extract_v8i1_nxv8i1(<vscale x 8 x i1> %inmask) {
; CHECK-LABEL: extract_v8i1_nxv8i1:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov z0.h, p0/z, #1 // =0x1
-; CHECK-NEXT: umov w8, v0.h[1]
-; CHECK-NEXT: mov v1.16b, v0.16b
+; CHECK-NEXT: mov z1.h, p0/z, #1 // =0x1
+; CHECK-NEXT: umov w8, v1.h[1]
+; CHECK-NEXT: mov v0.16b, v1.16b
+; CHECK-NEXT: umov w9, v1.h[2]
; CHECK-NEXT: mov v0.b[1], w8
-; CHECK-NEXT: umov w8, v1.h[2]
-; CHECK-NEXT: mov v0.b[2], w8
; CHECK-NEXT: umov w8, v1.h[3]
+; CHECK-NEXT: mov v0.b[2], w9
+; CHECK-NEXT: umov w9, v1.h[4]
; CHECK-NEXT: mov v0.b[3], w8
-; CHECK-NEXT: umov w8, v1.h[4]
-; CHECK-NEXT: mov v0.b[4], w8
; CHECK-NEXT: umov w8, v1.h[5]
+; CHECK-NEXT: mov v0.b[4], w9
+; CHECK-NEXT: umov w9, v1.h[6]
; CHECK-NEXT: mov v0.b[5], w8
-; CHECK-NEXT: umov w8, v1.h[6]
-; CHECK-NEXT: mov v0.b[6], w8
; CHECK-NEXT: umov w8, v1.h[7]
+; CHECK-NEXT: mov v0.b[6], w9
; CHECK-NEXT: mov v0.b[7], w8
-; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
%mask = call <8 x i1> @llvm.vector.extract.v8i1.nxv8i1(<vscale x 8 x i1> %inmask, i64 0)
ret <8 x i1> %mask
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-reshuffle.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-reshuffle.ll
index 8e807cda7166d..41e4a38fad90b 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-reshuffle.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-reshuffle.ll
@@ -8,15 +8,15 @@ target triple = "aarch64-unknown-linux-gnu"
define <4 x i1> @reshuffle_v4i1_nxv4i1(<vscale x 4 x i1> %a) #0 {
; CHECK-LABEL: reshuffle_v4i1_nxv4i1:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov z0.s, p0/z, #1 // =0x1
-; CHECK-NEXT: mov w8, v0.s[1]
-; CHECK-NEXT: mov v1.16b, v0.16b
+; CHECK-NEXT: mov z1.s, p0/z, #1 // =0x1
+; CHECK-NEXT: mov w8, v1.s[1]
+; CHECK-NEXT: mov v0.16b, v1.16b
+; CHECK-NEXT: mov w9, v1.s[2]
; CHECK-NEXT: mov v0.h[1], w8
-; CHECK-NEXT: mov w8, v1.s[2]
-; CHECK-NEXT: mov v0.h[2], w8
; CHECK-NEXT: mov w8, v1.s[3]
+; CHECK-NEXT: mov v0.h[2], w9
; CHECK-NEXT: mov v0.h[3], w8
-; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
%el0 = extractelement <vscale x 4 x i1> %a, i32 0
%el1 = extractelement <vscale x 4 x i1> %a, i32 1
diff --git a/llvm/test/CodeGen/AArch64/zext-to-tbl.ll b/llvm/test/CodeGen/AArch64/zext-to-tbl.ll
index 935189dec48ac..74a717f1635a3 100644
--- a/llvm/test/CodeGen/AArch64/zext-to-tbl.ll
+++ b/llvm/test/CodeGen/AArch64/zext-to-tbl.ll
@@ -2835,11 +2835,11 @@ define i32 @test_widening_instr_mull(ptr %p1, ptr %p2, i32 %h) {
; CHECK-BE-NEXT: .LBB24_1: // %loop
; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-BE-NEXT: ld1 { v0.16b }, [x1], #16
-; CHECK-BE-NEXT: mov x8, x0
+; CHECK-BE-NEXT: add x8, x0, #16
; CHECK-BE-NEXT: ld1 { v1.8h }, [x0]
-; CHECK-BE-NEXT: add x0, x0, #16
-; CHECK-BE-NEXT: add x9, x8, #48
-; CHECK-BE-NEXT: ld1 { v3.8h }, [x0]
+; CHECK-BE-NEXT: ld1 { v3.8h }, [x8]
+; CHECK-BE-NEXT: add x9, x0, #48
+; CHECK-BE-NEXT: add x10, x0, #32
; CHECK-BE-NEXT: subs w2, w2, #1
; CHECK-BE-NEXT: ushll v2.8h, v0.8b, #0
; CHECK-BE-NEXT: ushll2 v0.8h, v0.16b, #0
@@ -2847,11 +2847,11 @@ define i32 @test_widening_instr_mull(ptr %p1, ptr %p2, i32 %h) {
; CHECK-BE-NEXT: umull2 v5.4s, v3.8h, v0.8h
; CHECK-BE-NEXT: umull v0.4s, v3.4h, v0.4h
; CHECK-BE-NEXT: umull2 v1.4s, v1.8h, v2.8h
-; CHECK-BE-NEXT: st1 { v4.4s }, [x8]
-; CHECK-BE-NEXT: add x8, x8, #32
+; CHECK-BE-NEXT: st1 { v4.4s }, [x0]
+; CHECK-BE-NEXT: mov x0, x8
; CHECK-BE-NEXT: st1 { v5.4s }, [x9]
-; CHECK-BE-NEXT: st1 { v0.4s }, [x8]
-; CHECK-BE-NEXT: st1 { v1.4s }, [x0]
+; CHECK-BE-NEXT: st1 { v0.4s }, [x10]
+; CHECK-BE-NEXT: st1 { v1.4s }, [x8]
; CHECK-BE-NEXT: b.ne .LBB24_1
; CHECK-BE-NEXT: // %bb.2: // %exit
; CHECK-BE-NEXT: mov w0, wzr
@@ -2950,26 +2950,26 @@ define i32 @test_widening_instr_mull_64(ptr %p1, ptr %p2, i32 %h) {
; CHECK-BE-NEXT: .LBB25_1: // %loop
; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-BE-NEXT: ld1 { v4.16b }, [x0]
-; CHECK-BE-NEXT: add x10, x1, #48
+; CHECK-BE-NEXT: add x9, x1, #48
+; CHECK-BE-NEXT: add x8, x1, #32
+; CHECK-BE-NEXT: ld1 { v18.4s }, [x9]
; CHECK-BE-NEXT: ld1 { v16.4s }, [x1]
-; CHECK-BE-NEXT: add x9, x1, #32
-; CHECK-BE-NEXT: ld1 { v18.4s }, [x10]
; CHECK-BE-NEXT: add x1, x1, #16
-; CHECK-BE-NEXT: ld1 { v20.4s }, [x9]
+; CHECK-BE-NEXT: ld1 { v20.4s }, [x8]
; CHECK-BE-NEXT: ld1 { v22.4s }, [x1]
-; CHECK-BE-NEXT: add x9, x0, #96
+; CHECK-BE-NEXT: add x8, x0, #96
; CHECK-BE-NEXT: tbl v5.16b, { v4.16b }, v3.16b
; CHECK-BE-NEXT: tbl v6.16b, { v4.16b }, v2.16b
; CHECK-BE-NEXT: tbl v7.16b, { v4.16b }, v1.16b
; CHECK-BE-NEXT: tbl v4.16b, { v4.16b }, v0.16b
; CHECK-BE-NEXT: ext v24.16b, v18.16b, v18.16b, #8
-; CHECK-BE-NEXT: mov x8, x0
+; CHECK-BE-NEXT: add x9, x0, #32
; CHECK-BE-NEXT: ext v25.16b, v20.16b, v20.16b, #8
-; CHECK-BE-NEXT: add x10, x0, #32
+; CHECK-BE-NEXT: add x10, x0, #16
; CHECK-BE-NEXT: subs w2, w2, #1
; CHECK-BE-NEXT: ext v17.16b, v5.16b, v5.16b, #8
-; CHECK-BE-NEXT: rev32 v5.8b, v5.8b
; CHECK-BE-NEXT: ext v19.16b, v6.16b, v6.16b, #8
+; CHECK-BE-NEXT: rev32 v5.8b, v5.8b
; CHECK-BE-NEXT: rev32 v21.8b, v7.8b
; CHECK-BE-NEXT: rev32 v23.8b, v4.8b
; CHECK-BE-NEXT: ext v7.16b, v7.16b, v7.16b, #8
@@ -2986,22 +2986,22 @@ define i32 @test_widening_instr_mull_64(ptr %p1, ptr %p2, i32 %h) {
; CHECK-BE-NEXT: rev32 v4.8b, v4.8b
; CHECK-BE-NEXT: umull v17.2d, v17.2s, v24.2s
; CHECK-BE-NEXT: umull v19.2d, v19.2s, v25.2s
-; CHECK-BE-NEXT: st1 { v5.2d }, [x9]
+; CHECK-BE-NEXT: st1 { v5.2d }, [x8]
; CHECK-BE-NEXT: umull v5.2d, v6.2s, v20.2s
; CHECK-BE-NEXT: umull v6.2d, v7.2s, v21.2s
-; CHECK-BE-NEXT: add x9, x0, #112
+; ...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/169219
More information about the llvm-commits
mailing list