[llvm] f8e9723 - ARM: Enable terminal rule (#165958)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Nov 10 12:49:05 PST 2025
Author: Matt Arsenault
Date: 2025-11-10T12:49:01-08:00
New Revision: f8e9723d17187d2b5ba867919ca2f5158fc28659
URL: https://github.com/llvm/llvm-project/commit/f8e9723d17187d2b5ba867919ca2f5158fc28659
DIFF: https://github.com/llvm/llvm-project/commit/f8e9723d17187d2b5ba867919ca2f5158fc28659.diff
LOG: ARM: Enable terminal rule (#165958)
Added:
Modified:
llvm/lib/Target/ARM/ARMSubtarget.h
llvm/test/CodeGen/Thumb2/LowOverheadLoops/constbound.ll
llvm/test/CodeGen/Thumb2/LowOverheadLoops/minloop.ll
llvm/test/CodeGen/Thumb2/LowOverheadLoops/varying-outer-2d-reduction.ll
llvm/test/CodeGen/Thumb2/LowOverheadLoops/while-loops.ll
llvm/test/CodeGen/Thumb2/mve-float32regloops.ll
llvm/test/CodeGen/Thumb2/mve-gather-increment.ll
llvm/test/CodeGen/Thumb2/mve-gather-scatter-optimisation.ll
llvm/test/CodeGen/Thumb2/mve-pipelineloops.ll
llvm/test/CodeGen/Thumb2/mve-shuffle.ll
llvm/test/CodeGen/Thumb2/mve-vld4.ll
llvm/test/CodeGen/Thumb2/mve-vmaxnma-commute.ll
llvm/test/CodeGen/Thumb2/mve-vst4.ll
llvm/test/CodeGen/Thumb2/pacbti-m-vla.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/ARM/ARMSubtarget.h b/llvm/lib/Target/ARM/ARMSubtarget.h
index 4a0883cc662e7..34baa3108402c 100644
--- a/llvm/lib/Target/ARM/ARMSubtarget.h
+++ b/llvm/lib/Target/ARM/ARMSubtarget.h
@@ -377,6 +377,7 @@ class ARMSubtarget : public ARMGenSubtargetInfo {
bool isRWPI() const;
bool useMachineScheduler() const { return UseMISched; }
+ bool enableTerminalRule() const override { return true; }
bool useMachinePipeliner() const { return UseMIPipeliner; }
bool hasMinSize() const { return OptMinSize; }
bool isThumb1Only() const { return isThumb() && !hasThumb2(); }
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/constbound.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/constbound.ll
index 79665af17ef58..9632469261f4d 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/constbound.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/constbound.ll
@@ -7,22 +7,22 @@ define dso_local i32 @test_500_504(ptr nocapture readonly %x) {
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: mov.w lr, #126
-; CHECK-NEXT: adr r2, .LCPI0_0
-; CHECK-NEXT: vldrw.u32 q0, [r2]
-; CHECK-NEXT: mov.w r2, #500
-; CHECK-NEXT: vdup.32 q1, r2
-; CHECK-NEXT: movs r1, #0
+; CHECK-NEXT: adr r1, .LCPI0_0
+; CHECK-NEXT: vldrw.u32 q0, [r1]
+; CHECK-NEXT: mov.w r1, #500
+; CHECK-NEXT: mov.w r12, #0
+; CHECK-NEXT: vdup.32 q1, r1
; CHECK-NEXT: movs r2, #0
; CHECK-NEXT: .LBB0_1: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: vqadd.u32 q2, q0, r1
-; CHECK-NEXT: adds r1, #4
+; CHECK-NEXT: vqadd.u32 q2, q0, r2
+; CHECK-NEXT: adds r2, #4
; CHECK-NEXT: vptt.u32 hi, q1, q2
; CHECK-NEXT: vldrwt.u32 q2, [r0], #16
-; CHECK-NEXT: vaddvat.u32 r2, q2
+; CHECK-NEXT: vaddvat.u32 r12, q2
; CHECK-NEXT: le lr, .LBB0_1
; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup
-; CHECK-NEXT: mov r0, r2
+; CHECK-NEXT: mov r0, r12
; CHECK-NEXT: pop {r7, pc}
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: @ %bb.3:
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/minloop.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/minloop.ll
index ec257bcf123f3..bcedcd40ba112 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/minloop.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/minloop.ll
@@ -28,29 +28,29 @@ define void @arm_min_q31(ptr nocapture readonly %pSrc, i32 %blockSize, ptr nocap
; CHECK-NEXT: str r6, [sp] @ 4-byte Spill
; CHECK-NEXT: subs r7, #4
; CHECK-NEXT: movs r6, #1
-; CHECK-NEXT: mov.w r8, #0
; CHECK-NEXT: mov.w r10, #0
+; CHECK-NEXT: mov.w r8, #0
; CHECK-NEXT: add.w lr, r6, r7, lsr #2
; CHECK-NEXT: .LBB0_5: @ %while.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ldr r11, [r0, #16]!
-; CHECK-NEXT: ldrd r5, r7, [r0, #-12]
+; CHECK-NEXT: ldrd r5, r6, [r0, #-12]
; CHECK-NEXT: ldr r4, [r0, #-4]
; CHECK-NEXT: cmp r12, r5
; CHECK-NEXT: csel r5, r5, r12, gt
-; CHECK-NEXT: csinc r6, r10, r8, le
-; CHECK-NEXT: cmp r5, r7
+; CHECK-NEXT: csinc r7, r10, r8, le
+; CHECK-NEXT: cmp r5, r6
; CHECK-NEXT: it gt
-; CHECK-NEXT: addgt.w r6, r8, #2
-; CHECK-NEXT: csel r7, r7, r5, gt
-; CHECK-NEXT: cmp r7, r4
+; CHECK-NEXT: addgt.w r7, r8, #2
+; CHECK-NEXT: csel r6, r6, r5, gt
+; CHECK-NEXT: cmp r6, r4
; CHECK-NEXT: it gt
-; CHECK-NEXT: addgt.w r6, r8, #3
-; CHECK-NEXT: csel r7, r4, r7, gt
+; CHECK-NEXT: addgt.w r7, r8, #3
+; CHECK-NEXT: csel r6, r4, r6, gt
; CHECK-NEXT: add.w r8, r8, #4
-; CHECK-NEXT: cmp r7, r11
-; CHECK-NEXT: csel r10, r8, r6, gt
-; CHECK-NEXT: csel r12, r11, r7, gt
+; CHECK-NEXT: cmp r6, r11
+; CHECK-NEXT: csel r10, r8, r7, gt
+; CHECK-NEXT: csel r12, r11, r6, gt
; CHECK-NEXT: le lr, .LBB0_5
; CHECK-NEXT: @ %bb.6: @ %while.end.loopexit.unr-lcssa.loopexit
; CHECK-NEXT: ldr r6, [sp] @ 4-byte Reload
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/varying-outer-2d-reduction.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/varying-outer-2d-reduction.ll
index 1769c5d2fd385..98e082be4cad1 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/varying-outer-2d-reduction.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/varying-outer-2d-reduction.ll
@@ -21,11 +21,12 @@ define dso_local void @varying_outer_2d_reduction(ptr nocapture readonly %Input,
; ENABLED-NEXT: it lt
; ENABLED-NEXT: bxlt lr
; ENABLED-NEXT: .LBB0_1: @ %for.body.lr.ph
-; ENABLED-NEXT: push.w {r4, r5, r6, r7, r9, r10, r11, lr}
+; ENABLED-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr}
; ENABLED-NEXT: mov r11, r0
-; ENABLED-NEXT: ldr r0, [sp, #32]
+; ENABLED-NEXT: ldr r0, [sp, #36]
; ENABLED-NEXT: add.w r9, r2, #3
; ENABLED-NEXT: mov.w r12, #0
+; ENABLED-NEXT: mov.w r8, #1
; ENABLED-NEXT: mov r10, r11
; ENABLED-NEXT: uxth r0, r0
; ENABLED-NEXT: rsbs r5, r0, #0
@@ -49,18 +50,16 @@ define dso_local void @varying_outer_2d_reduction(ptr nocapture readonly %Input,
; ENABLED-NEXT: @ %bb.5: @ %vector.ph
; ENABLED-NEXT: @ in Loop: Header=BB0_4 Depth=1
; ENABLED-NEXT: bic r0, r9, #3
-; ENABLED-NEXT: movs r7, #1
-; ENABLED-NEXT: subs r0, #4
; ENABLED-NEXT: sub.w r4, r2, r12
+; ENABLED-NEXT: subs r0, #4
; ENABLED-NEXT: vmov.i32 q1, #0x0
-; ENABLED-NEXT: add.w r6, r7, r0, lsr #2
+; ENABLED-NEXT: mov r7, r10
+; ENABLED-NEXT: add.w r6, r8, r0, lsr #2
; ENABLED-NEXT: adds r0, r2, #3
; ENABLED-NEXT: sub.w r0, r0, r12
; ENABLED-NEXT: bic r0, r0, #3
; ENABLED-NEXT: subs r0, #4
-; ENABLED-NEXT: add.w r0, r7, r0, lsr #2
-; ENABLED-NEXT: mov r7, r10
-; ENABLED-NEXT: dls lr, r0
+; ENABLED-NEXT: add.w lr, r8, r0, lsr #2
; ENABLED-NEXT: mov r0, r11
; ENABLED-NEXT: .LBB0_6: @ %vector.body
; ENABLED-NEXT: @ Parent Loop BB0_4 Depth=1
@@ -83,7 +82,7 @@ define dso_local void @varying_outer_2d_reduction(ptr nocapture readonly %Input,
; ENABLED-NEXT: vaddv.u32 r0, q0
; ENABLED-NEXT: b .LBB0_3
; ENABLED-NEXT: .LBB0_8:
-; ENABLED-NEXT: pop.w {r4, r5, r6, r7, r9, r10, r11, lr}
+; ENABLED-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, lr}
; ENABLED-NEXT: bx lr
;
; NOREDUCTIONS-LABEL: varying_outer_2d_reduction:
@@ -92,11 +91,12 @@ define dso_local void @varying_outer_2d_reduction(ptr nocapture readonly %Input,
; NOREDUCTIONS-NEXT: it lt
; NOREDUCTIONS-NEXT: bxlt lr
; NOREDUCTIONS-NEXT: .LBB0_1: @ %for.body.lr.ph
-; NOREDUCTIONS-NEXT: push.w {r4, r5, r6, r7, r9, r10, r11, lr}
+; NOREDUCTIONS-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr}
; NOREDUCTIONS-NEXT: mov r11, r0
-; NOREDUCTIONS-NEXT: ldr r0, [sp, #32]
+; NOREDUCTIONS-NEXT: ldr r0, [sp, #36]
; NOREDUCTIONS-NEXT: add.w r9, r2, #3
; NOREDUCTIONS-NEXT: mov.w r12, #0
+; NOREDUCTIONS-NEXT: mov.w r8, #1
; NOREDUCTIONS-NEXT: mov r10, r11
; NOREDUCTIONS-NEXT: uxth r0, r0
; NOREDUCTIONS-NEXT: rsbs r5, r0, #0
@@ -120,18 +120,16 @@ define dso_local void @varying_outer_2d_reduction(ptr nocapture readonly %Input,
; NOREDUCTIONS-NEXT: @ %bb.5: @ %vector.ph
; NOREDUCTIONS-NEXT: @ in Loop: Header=BB0_4 Depth=1
; NOREDUCTIONS-NEXT: bic r0, r9, #3
-; NOREDUCTIONS-NEXT: movs r7, #1
-; NOREDUCTIONS-NEXT: subs r0, #4
; NOREDUCTIONS-NEXT: sub.w r4, r2, r12
+; NOREDUCTIONS-NEXT: subs r0, #4
; NOREDUCTIONS-NEXT: vmov.i32 q1, #0x0
-; NOREDUCTIONS-NEXT: add.w r6, r7, r0, lsr #2
+; NOREDUCTIONS-NEXT: mov r7, r10
+; NOREDUCTIONS-NEXT: add.w r6, r8, r0, lsr #2
; NOREDUCTIONS-NEXT: adds r0, r2, #3
; NOREDUCTIONS-NEXT: sub.w r0, r0, r12
; NOREDUCTIONS-NEXT: bic r0, r0, #3
; NOREDUCTIONS-NEXT: subs r0, #4
-; NOREDUCTIONS-NEXT: add.w r0, r7, r0, lsr #2
-; NOREDUCTIONS-NEXT: mov r7, r10
-; NOREDUCTIONS-NEXT: dls lr, r0
+; NOREDUCTIONS-NEXT: add.w lr, r8, r0, lsr #2
; NOREDUCTIONS-NEXT: mov r0, r11
; NOREDUCTIONS-NEXT: .LBB0_6: @ %vector.body
; NOREDUCTIONS-NEXT: @ Parent Loop BB0_4 Depth=1
@@ -154,7 +152,7 @@ define dso_local void @varying_outer_2d_reduction(ptr nocapture readonly %Input,
; NOREDUCTIONS-NEXT: vaddv.u32 r0, q0
; NOREDUCTIONS-NEXT: b .LBB0_3
; NOREDUCTIONS-NEXT: .LBB0_8:
-; NOREDUCTIONS-NEXT: pop.w {r4, r5, r6, r7, r9, r10, r11, lr}
+; NOREDUCTIONS-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, lr}
; NOREDUCTIONS-NEXT: bx lr
entry:
%conv = sext i16 %N to i32
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/while-loops.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/while-loops.ll
index cbcbf1f392ce8..435acc29f076e 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/while-loops.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/while-loops.ll
@@ -165,74 +165,73 @@ define dso_local i32 @b(ptr %c, i32 %d, i32 %e, ptr %n) "frame-pointer"="all" {
; CHECK-NEXT: sub sp, #16
; CHECK-NEXT: wls lr, r1, .LBB2_3
; CHECK-NEXT: @ %bb.1: @ %while.body.preheader
-; CHECK-NEXT: adds r6, r3, #4
-; CHECK-NEXT: adds r1, r0, #4
+; CHECK-NEXT: add.w r9, r3, #4
+; CHECK-NEXT: add.w r10, r0, #4
; CHECK-NEXT: mvn r8, #1
-; CHECK-NEXT: @ implicit-def: $r9
+; CHECK-NEXT: @ implicit-def: $r6
; CHECK-NEXT: @ implicit-def: $r4
; CHECK-NEXT: str r2, [sp] @ 4-byte Spill
; CHECK-NEXT: .LBB2_2: @ %while.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: str r1, [sp, #12] @ 4-byte Spill
+; CHECK-NEXT: ldr.w r1, [r10]
; CHECK-NEXT: asrs r2, r4, #31
-; CHECK-NEXT: ldr r1, [sp, #12] @ 4-byte Reload
-; CHECK-NEXT: ldr r1, [r1]
+; CHECK-NEXT: str r6, [sp, #4] @ 4-byte Spill
; CHECK-NEXT: muls r1, r3, r1
; CHECK-NEXT: adds r4, r4, r1
; CHECK-NEXT: adc.w r1, r2, r1, asr #31
; CHECK-NEXT: adds.w r2, r4, #-2147483648
-; CHECK-NEXT: ldrd r2, r4, [r8]
-; CHECK-NEXT: adc r5, r1, #0
-; CHECK-NEXT: str r2, [sp, #4] @ 4-byte Spill
-; CHECK-NEXT: smull r4, r2, r4, r9
-; CHECK-NEXT: asrs r1, r5, #31
+; CHECK-NEXT: ldrd r5, r4, [r8]
+; CHECK-NEXT: adc r2, r1, #0
; CHECK-NEXT: str r5, [sp, #8] @ 4-byte Spill
-; CHECK-NEXT: subs r4, r5, r4
-; CHECK-NEXT: sbcs r1, r2
-; CHECK-NEXT: ldr r2, [sp, #12] @ 4-byte Reload
-; CHECK-NEXT: adds.w r10, r4, #-2147483648
-; CHECK-NEXT: adc r1, r1, #0
-; CHECK-NEXT: ldr r4, [r2, #-4]
+; CHECK-NEXT: smull r4, r5, r4, r6
+; CHECK-NEXT: asrs r1, r2, #31
+; CHECK-NEXT: str r2, [sp, #12] @ 4-byte Spill
+; CHECK-NEXT: subs r4, r2, r4
+; CHECK-NEXT: sbcs r1, r5
+; CHECK-NEXT: adds.w r6, r4, #-2147483648
+; CHECK-NEXT: ldr r4, [r10, #-4]
+; CHECK-NEXT: adc r11, r1, #0
+; CHECK-NEXT: mov r1, r9
+; CHECK-NEXT: add.w r10, r10, #4
; CHECK-NEXT: muls r4, r3, r4
; CHECK-NEXT: adds r3, #4
; CHECK-NEXT: adds.w r12, r4, #-2147483648
; CHECK-NEXT: asr.w r5, r4, #31
-; CHECK-NEXT: ldr r4, [r6]
+; CHECK-NEXT: ldr.w r4, [r9]
; CHECK-NEXT: adc r5, r5, #0
; CHECK-NEXT: mul r2, r4, r0
-; CHECK-NEXT: adds r0, #4
; CHECK-NEXT: add.w r2, r2, #-2147483648
; CHECK-NEXT: asrl r12, r5, r2
-; CHECK-NEXT: smull r2, r5, r4, r12
-; CHECK-NEXT: lsll r2, r5, #30
-; CHECK-NEXT: ldr r2, [sp, #4] @ 4-byte Reload
-; CHECK-NEXT: asr.w r11, r5, #31
-; CHECK-NEXT: mov r12, r5
-; CHECK-NEXT: lsll r12, r11, r4
-; CHECK-NEXT: mul r2, r2, r9
-; CHECK-NEXT: lsrl r12, r11, #2
-; CHECK-NEXT: adds r2, #2
-; CHECK-NEXT: lsll r12, r11, r2
+; CHECK-NEXT: smull r2, r9, r4, r12
+; CHECK-NEXT: mov r12, r0
+; CHECK-NEXT: lsll r2, r9, #30
+; CHECK-NEXT: asr.w r5, r9, #31
+; CHECK-NEXT: mov r2, r9
+; CHECK-NEXT: mov r9, r1
+; CHECK-NEXT: ldrd r1, r0, [sp, #4] @ 8-byte Folded Reload
+; CHECK-NEXT: lsll r2, r5, r4
+; CHECK-NEXT: lsrl r2, r5, #2
+; CHECK-NEXT: muls r0, r1, r0
+; CHECK-NEXT: ldr r1, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT: adds r0, #2
+; CHECK-NEXT: lsll r2, r5, r0
+; CHECK-NEXT: add.w r0, r2, #-2147483648
; CHECK-NEXT: ldr r2, [sp] @ 4-byte Reload
-; CHECK-NEXT: add.w r5, r12, #-2147483648
-; CHECK-NEXT: asrl r10, r1, r5
-; CHECK-NEXT: ldr r5, [sp, #8] @ 4-byte Reload
-; CHECK-NEXT: lsrl r10, r1, #2
-; CHECK-NEXT: movs r1, #2
-; CHECK-NEXT: mov r9, r10
-; CHECK-NEXT: str.w r10, [r1]
-; CHECK-NEXT: ldr r1, [r8], #-4
-; CHECK-NEXT: mls r5, r1, r4, r5
-; CHECK-NEXT: adds.w r4, r5, #-2147483648
-; CHECK-NEXT: asr.w r1, r5, #31
+; CHECK-NEXT: asrl r6, r11, r0
+; CHECK-NEXT: movs r0, #2
+; CHECK-NEXT: lsrl r6, r11, #2
+; CHECK-NEXT: str r6, [r0]
+; CHECK-NEXT: ldr r0, [r8], #-4
+; CHECK-NEXT: mls r0, r0, r4, r1
+; CHECK-NEXT: adds.w r4, r0, #-2147483648
+; CHECK-NEXT: asr.w r1, r0, #31
; CHECK-NEXT: adc r1, r1, #0
; CHECK-NEXT: lsrl r4, r1, #2
-; CHECK-NEXT: rsbs r1, r4, #0
-; CHECK-NEXT: str r1, [r2]
-; CHECK-NEXT: str r1, [r6, #-4]
-; CHECK-NEXT: adds r6, #4
-; CHECK-NEXT: ldr r1, [sp, #12] @ 4-byte Reload
-; CHECK-NEXT: adds r1, #4
+; CHECK-NEXT: rsbs r0, r4, #0
+; CHECK-NEXT: str r0, [r2]
+; CHECK-NEXT: str r0, [r9, #-4]
+; CHECK-NEXT: add.w r9, r9, #4
+; CHECK-NEXT: add.w r0, r12, #4
; CHECK-NEXT: le lr, .LBB2_2
; CHECK-NEXT: .LBB2_3: @ %while.end
; CHECK-NEXT: add sp, #16
diff --git a/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll b/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll
index f7b4548f127bf..b6657d607ce6d 100644
--- a/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll
@@ -1573,120 +1573,115 @@ define arm_aapcs_vfpcc void @arm_biquad_cascade_df1_f32(ptr nocapture readonly %
; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
; CHECK-NEXT: .pad #16
; CHECK-NEXT: sub sp, #16
-; CHECK-NEXT: ldrd r7, r9, [r0]
-; CHECK-NEXT: and r6, r3, #3
-; CHECK-NEXT: ldr r0, [r0, #8]
-; CHECK-NEXT: lsrs r3, r3, #2
-; CHECK-NEXT: @ implicit-def: $r12
-; CHECK-NEXT: str r6, [sp, #4] @ 4-byte Spill
-; CHECK-NEXT: str r3, [sp] @ 4-byte Spill
-; CHECK-NEXT: str r2, [sp, #8] @ 4-byte Spill
+; CHECK-NEXT: ldm.w r0, {r7, r9, r11}
+; CHECK-NEXT: and r0, r3, #3
+; CHECK-NEXT: @ implicit-def: $r5
+; CHECK-NEXT: str r0, [sp, #8] @ 4-byte Spill
+; CHECK-NEXT: lsrs r0, r3, #2
+; CHECK-NEXT: str r0, [sp, #4] @ 4-byte Spill
; CHECK-NEXT: b .LBB19_3
; CHECK-NEXT: .LBB19_1: @ in Loop: Header=BB19_3 Depth=1
-; CHECK-NEXT: mov r3, r8
-; CHECK-NEXT: mov r2, r5
-; CHECK-NEXT: mov r4, r11
-; CHECK-NEXT: mov r8, r10
+; CHECK-NEXT: mov r8, r3
+; CHECK-NEXT: mov r3, r12
+; CHECK-NEXT: mov r0, r4
+; CHECK-NEXT: mov r12, r10
; CHECK-NEXT: .LBB19_2: @ %if.end69
; CHECK-NEXT: @ in Loop: Header=BB19_3 Depth=1
; CHECK-NEXT: ldr r7, [sp, #12] @ 4-byte Reload
-; CHECK-NEXT: adds r0, #128
-; CHECK-NEXT: strd r2, r4, [r9]
-; CHECK-NEXT: ldr r2, [sp, #8] @ 4-byte Reload
-; CHECK-NEXT: subs r7, #1
-; CHECK-NEXT: strd r3, r8, [r9, #8]
-; CHECK-NEXT: add.w r9, r9, #16
+; CHECK-NEXT: add.w r11, r11, #128
+; CHECK-NEXT: strd r8, r0, [r9]
; CHECK-NEXT: mov r1, r2
+; CHECK-NEXT: strd r3, r12, [r9, #8]
+; CHECK-NEXT: add.w r9, r9, #16
+; CHECK-NEXT: subs r7, #1
; CHECK-NEXT: beq.w .LBB19_13
; CHECK-NEXT: .LBB19_3: @ %do.body
; CHECK-NEXT: @ =>This Loop Header: Depth=1
; CHECK-NEXT: @ Child Loop BB19_5 Depth 2
-; CHECK-NEXT: ldrd r5, r11, [r9]
+; CHECK-NEXT: ldr.w r10, [r9, #12]
; CHECK-NEXT: mov r6, r2
-; CHECK-NEXT: ldrd r8, r10, [r9, #8]
-; CHECK-NEXT: ldr r2, [sp] @ 4-byte Reload
+; CHECK-NEXT: ldm.w r9, {r3, r4, r12}
+; CHECK-NEXT: ldr r0, [sp, #4] @ 4-byte Reload
; CHECK-NEXT: str r7, [sp, #12] @ 4-byte Spill
-; CHECK-NEXT: wls lr, r2, .LBB19_6
+; CHECK-NEXT: wls lr, r0, .LBB19_6
; CHECK-NEXT: @ %bb.4: @ %while.body.lr.ph
; CHECK-NEXT: @ in Loop: Header=BB19_3 Depth=1
-; CHECK-NEXT: ldr r6, [sp, #8] @ 4-byte Reload
-; CHECK-NEXT: mov r4, r11
-; CHECK-NEXT: mov r3, r5
+; CHECK-NEXT: mov r6, r2
; CHECK-NEXT: .LBB19_5: @ %while.body
; CHECK-NEXT: @ Parent Loop BB19_3 Depth=1
; CHECK-NEXT: @ => This Inner Loop Header: Depth=2
-; CHECK-NEXT: ldr r5, [r1, #12]
-; CHECK-NEXT: vldrw.u32 q2, [r0]
-; CHECK-NEXT: vldrw.u32 q6, [r0, #16]
-; CHECK-NEXT: ldm.w r1, {r2, r7, r11}
-; CHECK-NEXT: vmul.f32 q2, q2, r5
-; CHECK-NEXT: vldrw.u32 q7, [r0, #32]
-; CHECK-NEXT: vfma.f32 q2, q6, r11
-; CHECK-NEXT: vldrw.u32 q4, [r0, #48]
+; CHECK-NEXT: mov r5, r3
+; CHECK-NEXT: mov r8, r4
+; CHECK-NEXT: ldrd r4, r3, [r1, #8]
+; CHECK-NEXT: vldrw.u32 q2, [r11]
+; CHECK-NEXT: vldrw.u32 q6, [r11, #16]
+; CHECK-NEXT: ldrd r0, r7, [r1]
+; CHECK-NEXT: vmul.f32 q2, q2, r3
+; CHECK-NEXT: vldrw.u32 q7, [r11, #32]
+; CHECK-NEXT: vfma.f32 q2, q6, r4
+; CHECK-NEXT: vldrw.u32 q4, [r11, #48]
; CHECK-NEXT: vfma.f32 q2, q7, r7
-; CHECK-NEXT: vldrw.u32 q5, [r0, #64]
-; CHECK-NEXT: vfma.f32 q2, q4, r2
-; CHECK-NEXT: vldrw.u32 q3, [r0, #80]
-; CHECK-NEXT: vfma.f32 q2, q5, r3
-; CHECK-NEXT: vldrw.u32 q1, [r0, #96]
-; CHECK-NEXT: vfma.f32 q2, q3, r4
-; CHECK-NEXT: vldrw.u32 q0, [r0, #112]
-; CHECK-NEXT: vfma.f32 q2, q1, r8
+; CHECK-NEXT: vldrw.u32 q5, [r11, #64]
+; CHECK-NEXT: vfma.f32 q2, q4, r0
+; CHECK-NEXT: vldrw.u32 q3, [r11, #80]
+; CHECK-NEXT: vfma.f32 q2, q5, r5
+; CHECK-NEXT: vldrw.u32 q1, [r11, #96]
+; CHECK-NEXT: vfma.f32 q2, q3, r8
+; CHECK-NEXT: vldrw.u32 q0, [r11, #112]
+; CHECK-NEXT: vfma.f32 q2, q1, r12
; CHECK-NEXT: adds r1, #16
; CHECK-NEXT: vfma.f32 q2, q0, r10
-; CHECK-NEXT: mov r4, r11
-; CHECK-NEXT: vmov r10, r8, d5
+; CHECK-NEXT: mov r5, r3
+; CHECK-NEXT: vmov r10, r12, d5
; CHECK-NEXT: vstrb.8 q2, [r6], #16
-; CHECK-NEXT: mov r3, r5
-; CHECK-NEXT: mov r12, r5
; CHECK-NEXT: le lr, .LBB19_5
; CHECK-NEXT: .LBB19_6: @ %while.end
; CHECK-NEXT: @ in Loop: Header=BB19_3 Depth=1
-; CHECK-NEXT: ldr r3, [sp, #4] @ 4-byte Reload
-; CHECK-NEXT: cmp r3, #0
+; CHECK-NEXT: ldr r7, [sp, #8] @ 4-byte Reload
+; CHECK-NEXT: cmp r7, #0
; CHECK-NEXT: beq .LBB19_1
; CHECK-NEXT: @ %bb.7: @ %if.then
; CHECK-NEXT: @ in Loop: Header=BB19_3 Depth=1
-; CHECK-NEXT: ldrd lr, r4, [r1]
-; CHECK-NEXT: vldrw.u32 q0, [r0]
-; CHECK-NEXT: ldrd r2, r1, [r1, #8]
-; CHECK-NEXT: vldrw.u32 q6, [r0, #16]
-; CHECK-NEXT: vldrw.u32 q7, [r0, #32]
-; CHECK-NEXT: vldrw.u32 q4, [r0, #48]
+; CHECK-NEXT: ldrd lr, r0, [r1]
+; CHECK-NEXT: vldrw.u32 q0, [r11]
+; CHECK-NEXT: ldrd r8, r1, [r1, #8]
+; CHECK-NEXT: vldrw.u32 q6, [r11, #16]
+; CHECK-NEXT: vldrw.u32 q7, [r11, #32]
+; CHECK-NEXT: vldrw.u32 q4, [r11, #48]
; CHECK-NEXT: vmul.f32 q0, q0, r1
-; CHECK-NEXT: vldrw.u32 q5, [r0, #64]
-; CHECK-NEXT: vfma.f32 q0, q6, r2
-; CHECK-NEXT: vldrw.u32 q3, [r0, #80]
-; CHECK-NEXT: vfma.f32 q0, q7, r4
-; CHECK-NEXT: vldrw.u32 q2, [r0, #96]
+; CHECK-NEXT: vldrw.u32 q5, [r11, #64]
+; CHECK-NEXT: vfma.f32 q0, q6, r8
+; CHECK-NEXT: vldrw.u32 q3, [r11, #80]
+; CHECK-NEXT: vfma.f32 q0, q7, r0
+; CHECK-NEXT: vldrw.u32 q2, [r11, #96]
; CHECK-NEXT: vfma.f32 q0, q4, lr
-; CHECK-NEXT: vldrw.u32 q1, [r0, #112]
-; CHECK-NEXT: vfma.f32 q0, q5, r5
-; CHECK-NEXT: cmp r3, #1
-; CHECK-NEXT: vfma.f32 q0, q3, r11
-; CHECK-NEXT: vfma.f32 q0, q2, r8
+; CHECK-NEXT: vldrw.u32 q1, [r11, #112]
+; CHECK-NEXT: vfma.f32 q0, q5, r3
+; CHECK-NEXT: cmp r7, #1
+; CHECK-NEXT: vfma.f32 q0, q3, r4
+; CHECK-NEXT: vfma.f32 q0, q2, r12
; CHECK-NEXT: vfma.f32 q0, q1, r10
-; CHECK-NEXT: vmov r5, s0
+; CHECK-NEXT: vmov r4, s0
; CHECK-NEXT: bne .LBB19_9
; CHECK-NEXT: @ %bb.8: @ %if.then58
; CHECK-NEXT: @ in Loop: Header=BB19_3 Depth=1
-; CHECK-NEXT: str r5, [r6]
-; CHECK-NEXT: mov r2, lr
-; CHECK-NEXT: mov r4, r12
-; CHECK-NEXT: mov r3, r5
+; CHECK-NEXT: str r4, [r6]
+; CHECK-NEXT: mov r8, lr
+; CHECK-NEXT: mov r0, r5
+; CHECK-NEXT: mov r3, r4
; CHECK-NEXT: b .LBB19_12
; CHECK-NEXT: .LBB19_9: @ %if.else
; CHECK-NEXT: @ in Loop: Header=BB19_3 Depth=1
-; CHECK-NEXT: vmov r8, s1
-; CHECK-NEXT: cmp r3, #2
+; CHECK-NEXT: vmov r12, s1
+; CHECK-NEXT: cmp r7, #2
; CHECK-NEXT: vstr s1, [r6, #4]
-; CHECK-NEXT: str r5, [r6]
+; CHECK-NEXT: str r4, [r6]
; CHECK-NEXT: bne .LBB19_11
; CHECK-NEXT: @ %bb.10: @ in Loop: Header=BB19_3 Depth=1
-; CHECK-NEXT: mov r2, r4
-; CHECK-NEXT: mov r3, r8
-; CHECK-NEXT: mov r4, lr
-; CHECK-NEXT: mov r8, r5
+; CHECK-NEXT: mov r8, r0
+; CHECK-NEXT: mov r3, r12
+; CHECK-NEXT: mov r0, lr
+; CHECK-NEXT: mov r12, r4
; CHECK-NEXT: b .LBB19_12
; CHECK-NEXT: .LBB19_11: @ %if.else64
; CHECK-NEXT: @ in Loop: Header=BB19_3 Depth=1
@@ -1694,7 +1689,7 @@ define arm_aapcs_vfpcc void @arm_biquad_cascade_df1_f32(ptr nocapture readonly %
; CHECK-NEXT: vstr s2, [r6, #8]
; CHECK-NEXT: .LBB19_12: @ %if.end69
; CHECK-NEXT: @ in Loop: Header=BB19_3 Depth=1
-; CHECK-NEXT: mov r12, r1
+; CHECK-NEXT: mov r5, r1
; CHECK-NEXT: b .LBB19_2
; CHECK-NEXT: .LBB19_13: @ %do.end
; CHECK-NEXT: add sp, #16
@@ -1901,8 +1896,8 @@ define void @arm_biquad_cascade_df2T_f32(ptr nocapture readonly %S, ptr nocaptur
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, r5, r6, r7, r8, lr}
; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr}
-; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13}
-; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13}
+; CHECK-NEXT: .vsave {d8, d9, d10, d11}
+; CHECK-NEXT: vpush {d8, d9, d10, d11}
; CHECK-NEXT: ldrd r6, r12, [r0, #4]
; CHECK-NEXT: lsr.w r8, r3, #1
; CHECK-NEXT: ldrb r0, [r0]
@@ -1910,11 +1905,11 @@ define void @arm_biquad_cascade_df2T_f32(ptr nocapture readonly %S, ptr nocaptur
; CHECK-NEXT: b .LBB20_3
; CHECK-NEXT: .LBB20_1: @ %if.else
; CHECK-NEXT: @ in Loop: Header=BB20_3 Depth=1
-; CHECK-NEXT: vmov.f32 s14, s13
-; CHECK-NEXT: vstr s12, [r6]
+; CHECK-NEXT: vmov.f32 s6, s5
+; CHECK-NEXT: vstr s4, [r6]
; CHECK-NEXT: .LBB20_2: @ %if.end
; CHECK-NEXT: @ in Loop: Header=BB20_3 Depth=1
-; CHECK-NEXT: vstr s14, [r6, #4]
+; CHECK-NEXT: vstr s6, [r6, #4]
; CHECK-NEXT: add.w r12, r12, #20
; CHECK-NEXT: adds r6, #8
; CHECK-NEXT: subs r0, #1
@@ -1923,41 +1918,39 @@ define void @arm_biquad_cascade_df2T_f32(ptr nocapture readonly %S, ptr nocaptur
; CHECK-NEXT: .LBB20_3: @ %do.body
; CHECK-NEXT: @ =>This Loop Header: Depth=1
; CHECK-NEXT: @ Child Loop BB20_5 Depth 2
-; CHECK-NEXT: vldrw.u32 q2, [r12]
+; CHECK-NEXT: vldrw.u32 q3, [r12]
; CHECK-NEXT: movs r5, #0
-; CHECK-NEXT: vmov q4, q2
+; CHECK-NEXT: vmov q4, q3
; CHECK-NEXT: vshlc q4, r5, #32
-; CHECK-NEXT: vldrw.u32 q1, [r12, #8]
-; CHECK-NEXT: vmov q5, q1
+; CHECK-NEXT: vldrw.u32 q2, [r12, #8]
+; CHECK-NEXT: vmov q5, q2
; CHECK-NEXT: vshlc q5, r5, #32
-; CHECK-NEXT: vldrw.u32 q3, [r6]
-; CHECK-NEXT: vmov.f32 s14, s0
+; CHECK-NEXT: vldrw.u32 q1, [r6]
+; CHECK-NEXT: vmov.f32 s6, s0
; CHECK-NEXT: mov r5, r2
-; CHECK-NEXT: vmov.f32 s15, s0
+; CHECK-NEXT: vmov.f32 s7, s0
; CHECK-NEXT: wls lr, r8, .LBB20_6
; CHECK-NEXT: @ %bb.4: @ %while.body.preheader
; CHECK-NEXT: @ in Loop: Header=BB20_3 Depth=1
-; CHECK-NEXT: vmov q6, q3
; CHECK-NEXT: mov r5, r2
; CHECK-NEXT: .LBB20_5: @ %while.body
; CHECK-NEXT: @ Parent Loop BB20_3 Depth=1
; CHECK-NEXT: @ => This Inner Loop Header: Depth=2
; CHECK-NEXT: ldrd r7, r4, [r1], #8
-; CHECK-NEXT: vfma.f32 q6, q2, r7
-; CHECK-NEXT: vmov r7, s24
-; CHECK-NEXT: vmov q3, q6
-; CHECK-NEXT: vfma.f32 q3, q1, r7
-; CHECK-NEXT: vstr s24, [r5]
-; CHECK-NEXT: vmov.f32 s15, s0
-; CHECK-NEXT: vfma.f32 q3, q4, r4
-; CHECK-NEXT: vmov r4, s13
-; CHECK-NEXT: vstr s13, [r5, #4]
-; CHECK-NEXT: vfma.f32 q3, q5, r4
+; CHECK-NEXT: vfma.f32 q1, q3, r7
+; CHECK-NEXT: vmov r7, s4
+; CHECK-NEXT: vmov.f32 s2, s4
+; CHECK-NEXT: vfma.f32 q1, q2, r7
+; CHECK-NEXT: vmov.f32 s7, s0
+; CHECK-NEXT: vfma.f32 q1, q4, r4
+; CHECK-NEXT: vmov r4, s5
+; CHECK-NEXT: vstr s5, [r5, #4]
+; CHECK-NEXT: vfma.f32 q1, q5, r4
+; CHECK-NEXT: vmov.f32 s4, s6
+; CHECK-NEXT: vmov.f32 s5, s7
+; CHECK-NEXT: vmov.f32 s6, s0
+; CHECK-NEXT: vstr s2, [r5]
; CHECK-NEXT: adds r5, #8
-; CHECK-NEXT: vmov.f32 s12, s14
-; CHECK-NEXT: vmov.f32 s13, s15
-; CHECK-NEXT: vmov.f32 s14, s0
-; CHECK-NEXT: vmov q6, q3
; CHECK-NEXT: le lr, .LBB20_5
; CHECK-NEXT: .LBB20_6: @ %while.end
; CHECK-NEXT: @ in Loop: Header=BB20_3 Depth=1
@@ -1966,14 +1959,14 @@ define void @arm_biquad_cascade_df2T_f32(ptr nocapture readonly %S, ptr nocaptur
; CHECK-NEXT: @ %bb.7: @ %if.then
; CHECK-NEXT: @ in Loop: Header=BB20_3 Depth=1
; CHECK-NEXT: ldr r1, [r1]
-; CHECK-NEXT: vfma.f32 q3, q2, r1
-; CHECK-NEXT: vmov r1, s12
-; CHECK-NEXT: vstr s12, [r5]
-; CHECK-NEXT: vfma.f32 q3, q1, r1
-; CHECK-NEXT: vstr s13, [r6]
+; CHECK-NEXT: vfma.f32 q1, q3, r1
+; CHECK-NEXT: vmov r1, s4
+; CHECK-NEXT: vstr s4, [r5]
+; CHECK-NEXT: vfma.f32 q1, q2, r1
+; CHECK-NEXT: vstr s5, [r6]
; CHECK-NEXT: b .LBB20_2
; CHECK-NEXT: .LBB20_8: @ %do.end
-; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13}
+; CHECK-NEXT: vpop {d8, d9, d10, d11}
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, pc}
; CHECK-NEXT: .p2align 2
; CHECK-NEXT: @ %bb.9:
diff --git a/llvm/test/CodeGen/Thumb2/mve-gather-increment.ll b/llvm/test/CodeGen/Thumb2/mve-gather-increment.ll
index 0d86f22a321e0..b60ee7c6d406b 100644
--- a/llvm/test/CodeGen/Thumb2/mve-gather-increment.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-gather-increment.ll
@@ -1313,27 +1313,29 @@ define arm_aapcs_vfpcc void @gather_inc_v16i8_simple(ptr noalias nocapture reado
; CHECK-NEXT: @ Child Loop BB16_3 Depth 2
; CHECK-NEXT: ldr.w r8, [sp, #56] @ 4-byte Reload
; CHECK-NEXT: vldrw.u32 q5, [sp] @ 16-byte Reload
-; CHECK-NEXT: vldrw.u32 q0, [sp, #16] @ 16-byte Reload
+; CHECK-NEXT: vldrw.u32 q6, [sp, #16] @ 16-byte Reload
; CHECK-NEXT: vldrw.u32 q7, [sp, #32] @ 16-byte Reload
; CHECK-NEXT: vmov q4, q3
; CHECK-NEXT: .LBB16_3: @ %vector.body
; CHECK-NEXT: @ Parent Loop BB16_2 Depth=1
; CHECK-NEXT: @ => This Inner Loop Header: Depth=2
-; CHECK-NEXT: vadd.i32 q1, q5, r0
+; CHECK-NEXT: vmov q0, q6
+; CHECK-NEXT: vadd.i32 q6, q5, r0
+; CHECK-NEXT: vmov r7, r3, d13
; CHECK-NEXT: vadd.i32 q2, q4, r0
-; CHECK-NEXT: vmov r7, r3, d3
-; CHECK-NEXT: vadd.i32 q6, q0, lr
; CHECK-NEXT: vmov r5, r6, d5
+; CHECK-NEXT: vmov q1, q7
+; CHECK-NEXT: vmov r4, r10, d12
+; CHECK-NEXT: vadd.i32 q6, q0, lr
; CHECK-NEXT: subs.w r9, r9, #16
-; CHECK-NEXT: vmov r4, r10, d2
-; CHECK-NEXT: vadd.i32 q1, q7, lr
; CHECK-NEXT: vadd.i32 q4, q4, lr
; CHECK-NEXT: vadd.i32 q5, q5, lr
+; CHECK-NEXT: vadd.i32 q7, q7, lr
; CHECK-NEXT: ldrb.w r11, [r3]
; CHECK-NEXT: ldrb r3, [r7]
; CHECK-NEXT: vmov r7, r12, d4
-; CHECK-NEXT: vadd.i32 q2, q7, r0
-; CHECK-NEXT: vadd.i32 q7, q0, r0
+; CHECK-NEXT: vadd.i32 q2, q1, r0
+; CHECK-NEXT: vadd.i32 q1, q0, r0
; CHECK-NEXT: ldrb r5, [r5]
; CHECK-NEXT: ldrb r6, [r6]
; CHECK-NEXT: ldrb r4, [r4]
@@ -1342,7 +1344,7 @@ define arm_aapcs_vfpcc void @gather_inc_v16i8_simple(ptr noalias nocapture reado
; CHECK-NEXT: ldrb.w r1, [r12]
; CHECK-NEXT: vmov.8 q0[0], r7
; CHECK-NEXT: vmov.8 q0[1], r1
-; CHECK-NEXT: vmov r1, r7, d15
+; CHECK-NEXT: vmov r1, r7, d3
; CHECK-NEXT: vmov.8 q0[2], r5
; CHECK-NEXT: vmov.8 q0[3], r6
; CHECK-NEXT: vmov.8 q0[4], r4
@@ -1357,8 +1359,7 @@ define arm_aapcs_vfpcc void @gather_inc_v16i8_simple(ptr noalias nocapture reado
; CHECK-NEXT: ldrb r3, [r5]
; CHECK-NEXT: ldrb.w r12, [r7]
; CHECK-NEXT: ldrb r5, [r4]
-; CHECK-NEXT: vmov r4, r7, d14
-; CHECK-NEXT: vmov q7, q1
+; CHECK-NEXT: vmov r4, r7, d2
; CHECK-NEXT: ldrb r4, [r4]
; CHECK-NEXT: ldrb r7, [r7]
; CHECK-NEXT: vmov.8 q0[8], r4
@@ -1370,7 +1371,6 @@ define arm_aapcs_vfpcc void @gather_inc_v16i8_simple(ptr noalias nocapture reado
; CHECK-NEXT: vmov.8 q0[14], r3
; CHECK-NEXT: vmov.8 q0[15], r12
; CHECK-NEXT: vstrb.8 q0, [r8], #16
-; CHECK-NEXT: vmov q0, q6
; CHECK-NEXT: bne .LBB16_3
; CHECK-NEXT: @ %bb.4: @ %middle.block
; CHECK-NEXT: @ in Loop: Header=BB16_2 Depth=1
diff --git a/llvm/test/CodeGen/Thumb2/mve-gather-scatter-optimisation.ll b/llvm/test/CodeGen/Thumb2/mve-gather-scatter-optimisation.ll
index eedca2cd4a5d3..c0b2da7eff41b 100644
--- a/llvm/test/CodeGen/Thumb2/mve-gather-scatter-optimisation.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-gather-scatter-optimisation.ll
@@ -236,11 +236,11 @@ define arm_aapcs_vfpcc void @push_out_mul_gather_scatter(ptr noalias nocapture r
; CHECK-NEXT: vldrw.u32 q1, [r1]
; CHECK-NEXT: .LBB5_1: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: vldrw.u32 q2, [r0, q1, uxtw #2]
-; CHECK-NEXT: vadd.i32 q3, q1, q0
+; CHECK-NEXT: vldrw.u32 q3, [r0, q1, uxtw #2]
; CHECK-NEXT: subs r2, #4
-; CHECK-NEXT: vstrw.32 q2, [r0, q1, uxtw #2]
-; CHECK-NEXT: vmov q1, q3
+; CHECK-NEXT: vmov q2, q1
+; CHECK-NEXT: vadd.i32 q1, q1, q0
+; CHECK-NEXT: vstrw.32 q3, [r0, q2, uxtw #2]
; CHECK-NEXT: bne .LBB5_1
; CHECK-NEXT: @ %bb.2: @ %end
; CHECK-NEXT: bx lr
@@ -330,20 +330,20 @@ define arm_aapcs_vfpcc void @non_gatscat_use1(ptr noalias nocapture readonly %da
; CHECK-NEXT: vpush {d8, d9}
; CHECK-NEXT: adr r4, .LCPI7_0
; CHECK-NEXT: mov.w r12, #9
-; CHECK-NEXT: vldrw.u32 q1, [r4]
+; CHECK-NEXT: vldrw.u32 q0, [r4]
; CHECK-NEXT: mov.w lr, #12
; CHECK-NEXT: movs r4, #8
-; CHECK-NEXT: vdup.32 q0, r0
+; CHECK-NEXT: vdup.32 q1, r0
; CHECK-NEXT: .LBB7_1: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: vmov q3, q0
-; CHECK-NEXT: vadd.i32 q2, q1, r4
-; CHECK-NEXT: vmla.i32 q3, q1, lr
-; CHECK-NEXT: vmul.i32 q1, q1, r12
-; CHECK-NEXT: vldrw.u32 q4, [q3, #24]
+; CHECK-NEXT: vmov q2, q0
+; CHECK-NEXT: vmov q3, q1
+; CHECK-NEXT: vmla.i32 q3, q2, lr
; CHECK-NEXT: subs r2, #4
-; CHECK-NEXT: vstrw.32 q1, [r3]
-; CHECK-NEXT: vmov q1, q2
+; CHECK-NEXT: vldrw.u32 q4, [q3, #24]
+; CHECK-NEXT: vmul.i32 q2, q2, r12
+; CHECK-NEXT: vadd.i32 q0, q0, r4
+; CHECK-NEXT: vstrw.32 q2, [r3]
; CHECK-NEXT: vstrb.8 q4, [r1], #16
; CHECK-NEXT: bne .LBB7_1
; CHECK-NEXT: @ %bb.2: @ %end
@@ -390,22 +390,22 @@ define arm_aapcs_vfpcc void @non_gatscat_use2(ptr noalias nocapture readonly %da
; CHECK-NEXT: vpush {d8, d9, d10, d11}
; CHECK-NEXT: adr r4, .LCPI8_0
; CHECK-NEXT: movs r5, #18
-; CHECK-NEXT: vldrw.u32 q2, [r4]
+; CHECK-NEXT: vldrw.u32 q0, [r4]
; CHECK-NEXT: mov.w r12, #9
; CHECK-NEXT: mov.w lr, #12
; CHECK-NEXT: movs r4, #8
-; CHECK-NEXT: vdup.32 q0, r0
-; CHECK-NEXT: vdup.32 q1, r5
+; CHECK-NEXT: vdup.32 q1, r0
+; CHECK-NEXT: vdup.32 q2, r5
; CHECK-NEXT: .LBB8_1: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: vmov q4, q0
-; CHECK-NEXT: vadd.i32 q3, q2, r4
-; CHECK-NEXT: vmla.i32 q4, q2, lr
+; CHECK-NEXT: vmov q3, q0
+; CHECK-NEXT: vmov q4, q1
+; CHECK-NEXT: vmla.i32 q4, q3, lr
; CHECK-NEXT: subs r2, #4
; CHECK-NEXT: vldrw.u32 q5, [q4, #24]
-; CHECK-NEXT: vmov q4, q1
-; CHECK-NEXT: vmla.i32 q4, q2, r12
-; CHECK-NEXT: vmov q2, q3
+; CHECK-NEXT: vmov q4, q2
+; CHECK-NEXT: vmla.i32 q4, q3, r12
+; CHECK-NEXT: vadd.i32 q0, q0, r4
; CHECK-NEXT: vstrb.8 q5, [r1], #16
; CHECK-NEXT: vstrw.32 q4, [r3]
; CHECK-NEXT: bne .LBB8_1
@@ -487,21 +487,21 @@ define dso_local void @arm_mat_mult_q31(ptr noalias nocapture readonly %A, ptr n
; CHECK-NEXT: @ => This Loop Header: Depth=2
; CHECK-NEXT: @ Child Loop BB9_3 Depth 3
; CHECK-NEXT: vldrw.u32 q0, [sp, #16] @ 16-byte Reload
-; CHECK-NEXT: vmov q7, q2
+; CHECK-NEXT: vmov q1, q2
; CHECK-NEXT: dls lr, r10
; CHECK-NEXT: vmov.i32 q5, #0x0
-; CHECK-NEXT: vmlas.i32 q7, q0, r7
-; CHECK-NEXT: vmov q6, q4
+; CHECK-NEXT: vmlas.i32 q1, q0, r7
+; CHECK-NEXT: vmov q7, q4
; CHECK-NEXT: .LBB9_3: @ %vector.body
; CHECK-NEXT: @ Parent Loop BB9_1 Depth=1
; CHECK-NEXT: @ Parent Loop BB9_2 Depth=2
; CHECK-NEXT: @ => This Inner Loop Header: Depth=3
-; CHECK-NEXT: vadd.i32 q0, q7, q3
-; CHECK-NEXT: vldrw.u32 q1, [r1, q7, uxtw #2]
-; CHECK-NEXT: vldrw.u32 q7, [q6, #32]!
-; CHECK-NEXT: vmul.i32 q1, q1, q7
-; CHECK-NEXT: vmov q7, q0
-; CHECK-NEXT: vadd.i32 q5, q1, q5
+; CHECK-NEXT: vmov q6, q1
+; CHECK-NEXT: vadd.i32 q1, q1, q3
+; CHECK-NEXT: vldrw.u32 q0, [r1, q6, uxtw #2]
+; CHECK-NEXT: vldrw.u32 q6, [q7, #32]!
+; CHECK-NEXT: vmul.i32 q0, q0, q6
+; CHECK-NEXT: vadd.i32 q5, q0, q5
; CHECK-NEXT: le lr, .LBB9_3
; CHECK-NEXT: @ %bb.4: @ %middle.block
; CHECK-NEXT: @ in Loop: Header=BB9_2 Depth=2
@@ -702,12 +702,12 @@ define dso_local void @arm_mat_mult_q15(ptr noalias nocapture readonly %A, ptr n
; CHECK-NEXT: @ Parent Loop BB10_5 Depth=1
; CHECK-NEXT: @ Parent Loop BB10_8 Depth=2
; CHECK-NEXT: @ => This Inner Loop Header: Depth=3
-; CHECK-NEXT: vadd.i32 q6, q5, q3
-; CHECK-NEXT: vldrh.s32 q7, [r1, q5, uxtw #1]
-; CHECK-NEXT: vldrh.s32 q5, [r3], #8
-; CHECK-NEXT: vmul.i32 q5, q7, q5
-; CHECK-NEXT: vadd.i32 q4, q5, q4
-; CHECK-NEXT: vmov q5, q6
+; CHECK-NEXT: vmov q6, q5
+; CHECK-NEXT: vadd.i32 q5, q5, q3
+; CHECK-NEXT: vldrh.s32 q7, [r1, q6, uxtw #1]
+; CHECK-NEXT: vldrh.s32 q6, [r3], #8
+; CHECK-NEXT: vmul.i32 q6, q7, q6
+; CHECK-NEXT: vadd.i32 q4, q6, q4
; CHECK-NEXT: le lr, .LBB10_11
; CHECK-NEXT: @ %bb.12: @ %middle.block
; CHECK-NEXT: @ in Loop: Header=BB10_8 Depth=2
@@ -922,15 +922,15 @@ define hidden arm_aapcs_vfpcc i32 @arm_depthwise_conv_s8(ptr nocapture readonly
; CHECK-NEXT: @ Parent Loop BB11_3 Depth=3
; CHECK-NEXT: @ Parent Loop BB11_4 Depth=4
; CHECK-NEXT: @ => This Inner Loop Header: Depth=5
-; CHECK-NEXT: vldrb.s32 q2, [r0, q5]
-; CHECK-NEXT: vadd.i32 q7, q5, q0
-; CHECK-NEXT: vldrb.s32 q5, [r1, q4]
-; CHECK-NEXT: vadd.i32 q6, q4, q0
-; CHECK-NEXT: vadd.i32 q2, q2, r2
+; CHECK-NEXT: vmov q7, q5
+; CHECK-NEXT: vmov q6, q4
+; CHECK-NEXT: vldrb.s32 q2, [r0, q7]
+; CHECK-NEXT: vldrb.s32 q7, [r1, q6]
; CHECK-NEXT: subs r5, #4
-; CHECK-NEXT: vmlava.u32 r12, q2, q5
-; CHECK-NEXT: vmov q5, q7
-; CHECK-NEXT: vmov q4, q6
+; CHECK-NEXT: vadd.i32 q4, q4, q0
+; CHECK-NEXT: vadd.i32 q2, q2, r2
+; CHECK-NEXT: vadd.i32 q5, q5, q0
+; CHECK-NEXT: vmlava.u32 r12, q2, q7
; CHECK-NEXT: bne .LBB11_5
; CHECK-NEXT: @ %bb.6: @ %middle.block
; CHECK-NEXT: @ in Loop: Header=BB11_4 Depth=4
diff --git a/llvm/test/CodeGen/Thumb2/mve-pipelineloops.ll b/llvm/test/CodeGen/Thumb2/mve-pipelineloops.ll
index 43ed5eefbf4c7..d6c5cde30ed73 100644
--- a/llvm/test/CodeGen/Thumb2/mve-pipelineloops.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-pipelineloops.ll
@@ -18,50 +18,50 @@ define void @arm_cmplx_dot_prod_q15(ptr noundef %pSrcA, ptr noundef %pSrcB, i32
; CHECK-NEXT: csel r7, r6, r5, hs
; CHECK-NEXT: add.w lr, r7, #1
; CHECK-NEXT: mov r4, r5
-; CHECK-NEXT: vldrh.u16 q0, [r0], #32
+; CHECK-NEXT: vldrh.u16 q1, [r0], #32
; CHECK-NEXT: movs r7, #0
; CHECK-NEXT: mov r8, r5
+; CHECK-NEXT: vldrh.u16 q2, [r1], #32
+; CHECK-NEXT: vmlsldava.s16 r4, r7, q1, q2
+; CHECK-NEXT: vldrh.u16 q0, [r0, #-16]
+; CHECK-NEXT: vmlaldavax.s16 r8, r5, q1, q2
+; CHECK-NEXT: vldrh.u16 q2, [r1, #-16]
+; CHECK-NEXT: vmlsldava.s16 r4, r7, q0, q2
; CHECK-NEXT: vldrh.u16 q1, [r1], #32
-; CHECK-NEXT: vmlsldava.s16 r4, r7, q0, q1
-; CHECK-NEXT: vldrh.u16 q2, [r0, #-16]
-; CHECK-NEXT: vmlaldavax.s16 r8, r5, q0, q1
-; CHECK-NEXT: vldrh.u16 q3, [r1, #-16]
-; CHECK-NEXT: vmlsldava.s16 r4, r7, q2, q3
-; CHECK-NEXT: vldrh.u16 q0, [r1], #32
; CHECK-NEXT: sub.w lr, lr, #1
; CHECK-NEXT: cmp.w lr, #0
-; CHECK-NEXT: vldrh.u16 q1, [r0], #32
+; CHECK-NEXT: vldrh.u16 q3, [r0], #32
; CHECK-NEXT: beq .LBB0_3
; CHECK-NEXT: .p2align 2
; CHECK-NEXT: .LBB0_2: @ %while.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: vmlaldavax.s16 r8, r5, q2, q3
-; CHECK-NEXT: vldrh.u16 q3, [r1, #-16]
-; CHECK-NEXT: vmlsldava.s16 r4, r7, q1, q0
-; CHECK-NEXT: vldrh.u16 q2, [r0, #-16]
-; CHECK-NEXT: vmlaldavax.s16 r8, r5, q1, q0
-; CHECK-NEXT: vldrh.u16 q1, [r0], #32
-; CHECK-NEXT: vmlsldava.s16 r4, r7, q2, q3
-; CHECK-NEXT: vldrh.u16 q0, [r1], #32
+; CHECK-NEXT: vmlaldavax.s16 r8, r5, q0, q2
+; CHECK-NEXT: vldrh.u16 q2, [r1, #-16]
+; CHECK-NEXT: vmlsldava.s16 r4, r7, q3, q1
+; CHECK-NEXT: vldrh.u16 q0, [r0, #-16]
+; CHECK-NEXT: vmlaldavax.s16 r8, r5, q3, q1
+; CHECK-NEXT: vldrh.u16 q3, [r0], #32
+; CHECK-NEXT: vmlsldava.s16 r4, r7, q0, q2
+; CHECK-NEXT: vldrh.u16 q1, [r1], #32
; CHECK-NEXT: le lr, .LBB0_2
; CHECK-NEXT: .LBB0_3:
-; CHECK-NEXT: vmlaldavax.s16 r8, r5, q2, q3
+; CHECK-NEXT: vmlaldavax.s16 r8, r5, q0, q2
; CHECK-NEXT: movs r6, #14
; CHECK-NEXT: and.w r2, r6, r2, lsl #1
-; CHECK-NEXT: vmlaldavax.s16 r8, r5, q1, q0
-; CHECK-NEXT: vldrh.u16 q2, [r0, #-16]
-; CHECK-NEXT: vmlsldava.s16 r4, r7, q1, q0
-; CHECK-NEXT: vldrh.u16 q0, [r1, #-16]
-; CHECK-NEXT: vmlaldavax.s16 r8, r5, q2, q0
+; CHECK-NEXT: vmlaldavax.s16 r8, r5, q3, q1
+; CHECK-NEXT: vldrh.u16 q0, [r0, #-16]
+; CHECK-NEXT: vmlsldava.s16 r4, r7, q3, q1
+; CHECK-NEXT: vldrh.u16 q1, [r1, #-16]
+; CHECK-NEXT: vmlaldavax.s16 r8, r5, q0, q1
; CHECK-NEXT: vctp.16 r2
-; CHECK-NEXT: vmlsldava.s16 r4, r7, q2, q0
+; CHECK-NEXT: vmlsldava.s16 r4, r7, q0, q1
; CHECK-NEXT: vpst
-; CHECK-NEXT: vldrht.u16 q1, [r0]
+; CHECK-NEXT: vldrht.u16 q2, [r0]
; CHECK-NEXT: cmp r2, #9
; CHECK-NEXT: vpsttt
; CHECK-NEXT: vldrht.u16 q0, [r1]
-; CHECK-NEXT: vmlsldavat.s16 r4, r7, q1, q0
-; CHECK-NEXT: vmlaldavaxt.s16 r8, r5, q1, q0
+; CHECK-NEXT: vmlsldavat.s16 r4, r7, q2, q0
+; CHECK-NEXT: vmlaldavaxt.s16 r8, r5, q2, q0
; CHECK-NEXT: blo .LBB0_10
; CHECK-NEXT: @ %bb.4: @ %do.body.1
; CHECK-NEXT: subs r2, #8
diff --git a/llvm/test/CodeGen/Thumb2/mve-shuffle.ll b/llvm/test/CodeGen/Thumb2/mve-shuffle.ll
index 94d5490cead2f..6f2a0b2debc47 100644
--- a/llvm/test/CodeGen/Thumb2/mve-shuffle.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-shuffle.ll
@@ -439,17 +439,18 @@ define arm_aapcs_vfpcc <8 x i16> @shuffle4step_i16(<32 x i16> %src) {
; CHECK-NEXT: vmovx.f16 s1, s14
; CHECK-NEXT: vmovx.f16 s20, s0
; CHECK-NEXT: vins.f16 s23, s1
-; CHECK-NEXT: vmovx.f16 s1, s2
-; CHECK-NEXT: vins.f16 s20, s1
+; CHECK-NEXT: vmov.f32 s1, s2
+; CHECK-NEXT: vmovx.f16 s2, s2
; CHECK-NEXT: vmovx.f16 s21, s4
-; CHECK-NEXT: vmovx.f16 s1, s6
+; CHECK-NEXT: vins.f16 s20, s2
+; CHECK-NEXT: vmovx.f16 s2, s6
; CHECK-NEXT: vins.f16 s12, s14
; CHECK-NEXT: vins.f16 s8, s10
; CHECK-NEXT: vins.f16 s4, s6
-; CHECK-NEXT: vins.f16 s21, s1
-; CHECK-NEXT: vins.f16 s0, s2
-; CHECK-NEXT: vmov.f32 s1, s4
+; CHECK-NEXT: vins.f16 s21, s2
+; CHECK-NEXT: vins.f16 s0, s1
; CHECK-NEXT: vmov.f32 s2, s8
+; CHECK-NEXT: vmov.f32 s1, s4
; CHECK-NEXT: vmov.f32 s3, s12
; CHECK-NEXT: vadd.i16 q0, q0, q5
; CHECK-NEXT: vadd.i16 q0, q0, q4
diff --git a/llvm/test/CodeGen/Thumb2/mve-vld4.ll b/llvm/test/CodeGen/Thumb2/mve-vld4.ll
index ab41069bfa258..ecb169898f9f0 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vld4.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vld4.ll
@@ -391,17 +391,18 @@ define void @vld4_v8i16_align1(ptr %src, ptr %dst) {
; CHECK-NEXT: vmovx.f16 s1, s2
; CHECK-NEXT: vmovx.f16 s20, s8
; CHECK-NEXT: vins.f16 s23, s1
-; CHECK-NEXT: vmovx.f16 s1, s10
-; CHECK-NEXT: vins.f16 s20, s1
+; CHECK-NEXT: vmov.f32 s1, s10
+; CHECK-NEXT: vmovx.f16 s10, s10
; CHECK-NEXT: vmovx.f16 s21, s12
-; CHECK-NEXT: vmovx.f16 s1, s14
+; CHECK-NEXT: vins.f16 s20, s10
+; CHECK-NEXT: vmovx.f16 s10, s14
; CHECK-NEXT: vins.f16 s0, s2
; CHECK-NEXT: vins.f16 s12, s14
; CHECK-NEXT: vins.f16 s4, s6
-; CHECK-NEXT: vins.f16 s8, s10
-; CHECK-NEXT: vins.f16 s21, s1
-; CHECK-NEXT: vmov.f32 s9, s12
+; CHECK-NEXT: vins.f16 s21, s10
; CHECK-NEXT: vmov.f32 s10, s4
+; CHECK-NEXT: vins.f16 s8, s1
+; CHECK-NEXT: vmov.f32 s9, s12
; CHECK-NEXT: vmov.f32 s11, s0
; CHECK-NEXT: vadd.i16 q0, q2, q5
; CHECK-NEXT: vadd.i16 q0, q0, q4
diff --git a/llvm/test/CodeGen/Thumb2/mve-vmaxnma-commute.ll b/llvm/test/CodeGen/Thumb2/mve-vmaxnma-commute.ll
index 04be18e3dd873..6656d44eec81e 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vmaxnma-commute.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vmaxnma-commute.ll
@@ -344,14 +344,14 @@ define void @loop_absmax32_pred_c(ptr %0, i32 %1, ptr nocapture %2) {
; CHECK-NEXT: vmov.i32 q0, #0x0
; CHECK-NEXT: dlstp.32 lr, r1
; CHECK-NEXT: .LBB19_1: @ =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: vldrw.u32 q1, [r0], #16
-; CHECK-NEXT: vmaxnma.f32 q1, q0
-; CHECK-NEXT: vmov q0, q1
+; CHECK-NEXT: vmov q1, q0
+; CHECK-NEXT: vldrw.u32 q0, [r0], #16
+; CHECK-NEXT: vmaxnma.f32 q0, q1
; CHECK-NEXT: letp lr, .LBB19_1
; CHECK-NEXT: @ %bb.2:
-; CHECK-NEXT: vldr s0, .LCPI19_0
-; CHECK-NEXT: vmov r0, s0
-; CHECK-NEXT: vmaxnmav.f32 r0, q1
+; CHECK-NEXT: vldr s4, .LCPI19_0
+; CHECK-NEXT: vmov r0, s4
+; CHECK-NEXT: vmaxnmav.f32 r0, q0
; CHECK-NEXT: vmov s0, r0
; CHECK-NEXT: vstr s0, [r2]
; CHECK-NEXT: pop {r7, pc}
@@ -538,14 +538,14 @@ define void @loop_absmax16_pred_c(ptr %0, i32 %1, ptr nocapture %2) {
; CHECK-NEXT: vmov.i32 q0, #0x0
; CHECK-NEXT: dlstp.16 lr, r1
; CHECK-NEXT: .LBB23_1: @ =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: vldrh.u16 q1, [r0], #8
-; CHECK-NEXT: vmaxnma.f16 q1, q0
-; CHECK-NEXT: vmov q0, q1
+; CHECK-NEXT: vmov q1, q0
+; CHECK-NEXT: vldrh.u16 q0, [r0], #8
+; CHECK-NEXT: vmaxnma.f16 q0, q1
; CHECK-NEXT: letp lr, .LBB23_1
; CHECK-NEXT: @ %bb.2:
-; CHECK-NEXT: vldr.16 s0, .LCPI23_0
-; CHECK-NEXT: vmov r0, s0
-; CHECK-NEXT: vmaxnmav.f16 r0, q1
+; CHECK-NEXT: vldr.16 s4, .LCPI23_0
+; CHECK-NEXT: vmov r0, s4
+; CHECK-NEXT: vmaxnmav.f16 r0, q0
; CHECK-NEXT: vmov s0, r0
; CHECK-NEXT: vstr.16 s0, [r2]
; CHECK-NEXT: pop {r7, pc}
diff --git a/llvm/test/CodeGen/Thumb2/mve-vst4.ll b/llvm/test/CodeGen/Thumb2/mve-vst4.ll
index 26ab555c2c593..fb5f543fd0d3a 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vst4.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vst4.ll
@@ -1055,18 +1055,18 @@ define void @vst4_v4f16(ptr %src, ptr %dst) {
; CHECK-NEXT: vins.f16 s12, s2
; CHECK-NEXT: vmovx.f16 s2, s3
; CHECK-NEXT: vins.f16 s11, s2
-; CHECK-NEXT: vmovx.f16 s2, s4
-; CHECK-NEXT: vins.f16 s4, s6
-; CHECK-NEXT: vmovx.f16 s6, s6
+; CHECK-NEXT: vmov.f32 s2, s6
+; CHECK-NEXT: vmovx.f16 s6, s4
+; CHECK-NEXT: vins.f16 s4, s2
+; CHECK-NEXT: vmovx.f16 s2, s2
; CHECK-NEXT: vins.f16 s1, s3
-; CHECK-NEXT: vins.f16 s2, s6
-; CHECK-NEXT: vmovx.f16 s6, s7
+; CHECK-NEXT: vins.f16 s6, s2
+; CHECK-NEXT: vmovx.f16 s2, s7
; CHECK-NEXT: vmov.f32 s8, s5
-; CHECK-NEXT: vins.f16 s10, s6
+; CHECK-NEXT: vins.f16 s10, s2
; CHECK-NEXT: vmov.f32 s9, s1
; CHECK-NEXT: vmov.f32 s5, s0
; CHECK-NEXT: vstrh.16 q2, [r1, #16]
-; CHECK-NEXT: vmov.f32 s6, s2
; CHECK-NEXT: vmov.f32 s7, s12
; CHECK-NEXT: vstrh.16 q1, [r1]
; CHECK-NEXT: pop {r4, r5, r6, pc}
diff --git a/llvm/test/CodeGen/Thumb2/pacbti-m-vla.ll b/llvm/test/CodeGen/Thumb2/pacbti-m-vla.ll
index e6fcf56af6e8d..2929a04cc0637 100644
--- a/llvm/test/CodeGen/Thumb2/pacbti-m-vla.ll
+++ b/llvm/test/CodeGen/Thumb2/pacbti-m-vla.ll
@@ -63,8 +63,8 @@ define hidden i32 @f(i32 %n) local_unnamed_addr #0 {
; CHECK-NEXT: subs r0, #4
; CHECK-NEXT: sub.w r3, r4, #16
; CHECK-NEXT: add.w lr, r2, r0, lsr #2
-; CHECK-NEXT: movs r2, #0
; CHECK-NEXT: movs r0, #0
+; CHECK-NEXT: movs r2, #0
; CHECK-NEXT: .LBB0_5: @ %for.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ldr r5, [r3, #16]!
More information about the llvm-commits
mailing list