[llvm] ab3bb86 - Revert "[ARM] Adjust strd/ldrd codegen alignment requirements"
David Spickett via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 3 07:07:54 PDT 2023
Author: David Spickett
Date: 2023-07-03T14:05:49Z
New Revision: ab3bb86d4487fa904a18b6350901b3a4e9470196
URL: https://github.com/llvm/llvm-project/commit/ab3bb86d4487fa904a18b6350901b3a4e9470196
DIFF: https://github.com/llvm/llvm-project/commit/ab3bb86d4487fa904a18b6350901b3a4e9470196.diff
LOG: Revert "[ARM] Adjust strd/ldrd codegen alignment requirements"
This reverts commit 92a9c30c61da7f973d55cd84fade424159b9cac9.
This has caused a test failure in the 2nd stage of Linaro's
Arm 32 bit buildbots.
LLVM::simplified-template-names.s
7: error: Simplified template DW_AT_name could not be reconstituted:
check:10'0 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
8: original: f3<unsigned char, (unsigned char)'\x00'>
check:10'0 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
9: reconstituted: f3<unsigned char, (unsigned char)'\x7f'>
check:10'0 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
I suspect a load/store is slightly off.
Added:
Modified:
llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
llvm/test/CodeGen/ARM/copy-by-struct-i32.ll
llvm/test/CodeGen/ARM/ha-alignstack-call.ll
llvm/test/CodeGen/ARM/indexed-mem.ll
llvm/test/CodeGen/ARM/prera-ldst-aliasing.mir
llvm/test/CodeGen/ARM/prera-ldst-insertpt.mir
llvm/test/CodeGen/ARM/vector-DAGCombine.ll
llvm/test/CodeGen/Thumb2/LowOverheadLoops/while-loops.ll
llvm/test/CodeGen/Thumb2/mve-float16regloops.ll
llvm/test/CodeGen/Thumb2/mve-float32regloops.ll
llvm/test/CodeGen/Thumb2/mve-gather-ptrs.ll
llvm/test/CodeGen/Thumb2/mve-masked-ldst.ll
llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll
llvm/test/CodeGen/Thumb2/mve-scatter-ptrs.ll
llvm/test/CodeGen/Thumb2/mve-vecreduce-slp.ll
llvm/test/CodeGen/Thumb2/mve-vld3.ll
llvm/test/CodeGen/Thumb2/mve-vst2.ll
llvm/test/CodeGen/Thumb2/postinc-distribute.mir
Removed:
################################################################################
diff --git a/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index 93db983b92c05d..eb2f6cd87afc2f 100644
--- a/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -2289,7 +2289,10 @@ bool ARMPreAllocLoadStoreOpt::CanFormLdStDWord(
return false;
Align Alignment = (*Op0->memoperands_begin())->getAlign();
- Align ReqAlign = STI->getDualLoadStoreAlignment();
+ const Function &Func = MF->getFunction();
+ Align ReqAlign =
+ STI->hasV6Ops() ? TD->getABITypeAlign(Type::getInt64Ty(Func.getContext()))
+ : Align(8); // Pre-v6 need 8-byte align
if (Alignment < ReqAlign)
return false;
diff --git a/llvm/test/CodeGen/ARM/copy-by-struct-i32.ll b/llvm/test/CodeGen/ARM/copy-by-struct-i32.ll
index 34aab4c04b1093..5d361861c27c5e 100644
--- a/llvm/test/CodeGen/ARM/copy-by-struct-i32.ll
+++ b/llvm/test/CodeGen/ARM/copy-by-struct-i32.ll
@@ -21,8 +21,9 @@ define arm_aapcscc void @s(ptr %q, ptr %p) {
; ASSEMBLY-NEXT: sbc r5, r5, #0
; ASSEMBLY-NEXT: ldr r2, [r1, #8]
; ASSEMBLY-NEXT: ldr r3, [r1, #12]
-; ASSEMBLY-NEXT: strd r4, r5, [sp, #128]
+; ASSEMBLY-NEXT: str r5, [sp, #132]
; ASSEMBLY-NEXT: add r5, r1, #16
+; ASSEMBLY-NEXT: str r4, [sp, #128]
; ASSEMBLY-NEXT: mov r4, sp
; ASSEMBLY-NEXT: vld1.32 {d16}, [r5]!
; ASSEMBLY-NEXT: vst1.32 {d16}, [r4]!
diff --git a/llvm/test/CodeGen/ARM/ha-alignstack-call.ll b/llvm/test/CodeGen/ARM/ha-alignstack-call.ll
index e861fe397f8494..678c987cf3d4f5 100644
--- a/llvm/test/CodeGen/ARM/ha-alignstack-call.ll
+++ b/llvm/test/CodeGen/ARM/ha-alignstack-call.ll
@@ -81,10 +81,11 @@ entry:
ret float %call
}
; CHECK-LABEL: f1_1_call:
-; CHECK: movw r0, #52429
-; CHECK: mov r1, #0
-; CHECK: movt r0, #16204
-; CHECK-DAG: strd r0, r1, [sp]
+; CHECK: movw r1, #52429
+; CHECK: mov r0, #0
+; CHECK: movt r1, #16204
+; CHECK-DAG: str r1, [sp]
+; CHECK-DAG: str r0, [sp, #4]
; CHECK: bl f1_1
; pass in memory, alignment 8
@@ -95,12 +96,13 @@ entry:
ret float %call
}
; CHECK-LABEL: f1_2_call:
-; CHECK-DAG: movw r0, #26214
-; CHECK-DAG: mov r1, #0
-; CHECK: movt r0, #16230
-; CHECK: strd r0, r1, [sp, #8]
+; CHECK-DAG: mov r0, #0
+; CHECK-DAG: movw r1, #26214
+; CHECK: str r0, [sp, #12]
; CHECK: movw r0, #52429
+; CHECK: movt r1, #16230
; CHECK: movt r0, #16204
+; CHECK-DAG: str r1, [sp, #8]
; CHECK-DAG: str r0, [sp]
; CHECK: bl f1_2
diff --git a/llvm/test/CodeGen/ARM/indexed-mem.ll b/llvm/test/CodeGen/ARM/indexed-mem.ll
index e5b3e925bb4b47..6e81a25dd1551d 100644
--- a/llvm/test/CodeGen/ARM/indexed-mem.ll
+++ b/llvm/test/CodeGen/ARM/indexed-mem.ll
@@ -225,8 +225,7 @@ define ptr @post_inc_ldrd(ptr %base, ptr %addr.3) {
;
; CHECK-V8A-LABEL: post_inc_ldrd:
; CHECK-V8A: @ %bb.0:
-; CHECK-V8A-NEXT: ldrd r2, r3, [r0]
-; CHECK-V8A-NEXT: add r0, r0, #8
+; CHECK-V8A-NEXT: ldm r0!, {r2, r3}
; CHECK-V8A-NEXT: add r2, r2, r3
; CHECK-V8A-NEXT: str r2, [r1]
; CHECK-V8A-NEXT: bx lr
@@ -249,8 +248,8 @@ define ptr @pre_inc_str_multi(ptr %base) {
;
; CHECK-V8A-LABEL: pre_inc_str_multi:
; CHECK-V8A: @ %bb.0:
-; CHECK-V8A-NEXT: ldrd r2, r3, [r0]
-; CHECK-V8A-NEXT: add r1, r2, r3
+; CHECK-V8A-NEXT: ldm r0, {r1, r2}
+; CHECK-V8A-NEXT: add r1, r1, r2
; CHECK-V8A-NEXT: str r1, [r0, #8]!
; CHECK-V8A-NEXT: bx lr
%addr.1 = getelementptr i32, ptr %base, i32 1
@@ -272,8 +271,8 @@ define ptr @pre_dec_str_multi(ptr %base) {
;
; CHECK-V8A-LABEL: pre_dec_str_multi:
; CHECK-V8A: @ %bb.0:
-; CHECK-V8A-NEXT: ldrd r2, r3, [r0]
-; CHECK-V8A-NEXT: add r1, r2, r3
+; CHECK-V8A-NEXT: ldm r0, {r1, r2}
+; CHECK-V8A-NEXT: add r1, r1, r2
; CHECK-V8A-NEXT: str r1, [r0, #-4]!
; CHECK-V8A-NEXT: bx lr
%addr.1 = getelementptr i32, ptr %base, i32 1
diff --git a/llvm/test/CodeGen/ARM/prera-ldst-aliasing.mir b/llvm/test/CodeGen/ARM/prera-ldst-aliasing.mir
index cea8cf7fdb8a6f..689aa3d70f03b7 100644
--- a/llvm/test/CodeGen/ARM/prera-ldst-aliasing.mir
+++ b/llvm/test/CodeGen/ARM/prera-ldst-aliasing.mir
@@ -30,8 +30,10 @@ body: |
t2STRi12 killed %2, %0, 0, 14, $noreg :: (store (s32) into %ir.x)
%3 : gpr = t2LDRi12 %1, 4, 14, $noreg :: (load (s32) from %ir.arrayidx2)
t2STRi12 killed %3, %0, 4, 14, $noreg :: (store (s32) into %ir.arrayidx3)
- ; CHECK: t2LDRDi8
- ; CHECK-NEXT: t2STRDi8
+ ; CHECK: t2LDRi12
+ ; CHECK-NEXT: t2LDRi12
+ ; CHECK-NEXT: t2STRi12
+ ; CHECK-NEXT: t2STRi12
tBX_RET 14, $noreg
...
diff --git a/llvm/test/CodeGen/ARM/prera-ldst-insertpt.mir b/llvm/test/CodeGen/ARM/prera-ldst-insertpt.mir
index bf03698a933be7..7d939458ee79ba 100644
--- a/llvm/test/CodeGen/ARM/prera-ldst-insertpt.mir
+++ b/llvm/test/CodeGen/ARM/prera-ldst-insertpt.mir
@@ -41,7 +41,8 @@ body: |
; Make sure we move the paired stores next to each other, and
; insert them in an appropriate location.
- ; CHECK: t2STRDi8 %1, %10,
+ ; CHECK: t2STRi12 %1,
+ ; CHECK-NEXT: t2STRi12 killed %10,
; CHECK-NEXT: t2MOVi
; CHECK-NEXT: t2ADDrs
@@ -52,7 +53,8 @@ body: |
t2STRi12 killed %13, %0, 20, 14, $noreg :: (store (s32))
; Make sure we move the paired stores next to each other.
- ; CHECK: t2STRDi8 %12, %13,
+ ; CHECK: t2STRi12 killed %12,
+ ; CHECK-NEXT: t2STRi12 killed %13,
tBX_RET 14, $noreg
---
@@ -86,7 +88,8 @@ body: |
; CHECK-NEXT: t2MOVi32imm
; CHECK-LIMIT-LABEL: name: b
- ; CHECK-LIMIT: t2STRDi8 {{.*}}, {{.*}}, {{.*}}, 0
+ ; CHECK-LIMIT: t2STRi12 {{.*}}, 0
+ ; CHECK-LIMIT-NEXT: t2STRi12 {{.*}}, 4
; CHECK-LIMIT-NEXT: t2MUL
; CHECK-LIMIT-NEXT: t2STRi12 {{.*}}, 8
@@ -102,7 +105,8 @@ body: |
t2STRi12 killed %13, %0, 20, 14, $noreg :: (store (s32))
; Make sure we move the paired stores next to each other.
- ; CHECK: t2STRDi8 %12, %13, %0, 16
+ ; CHECK: t2STRi12 {{.*}}, 16
+ ; CHECK-NEXT: t2STRi12 {{.*}}, 20
tBX_RET 14, $noreg
diff --git a/llvm/test/CodeGen/ARM/vector-DAGCombine.ll b/llvm/test/CodeGen/ARM/vector-DAGCombine.ll
index 7bc7b844396277..fb21d1682bb383 100644
--- a/llvm/test/CodeGen/ARM/vector-DAGCombine.ll
+++ b/llvm/test/CodeGen/ARM/vector-DAGCombine.ll
@@ -134,8 +134,8 @@ define void @i64_buildvector(ptr %ptr, ptr %vp) nounwind {
define void @i64_insertelement(ptr %ptr, ptr %vp) nounwind {
; CHECK-LABEL: i64_insertelement:
; CHECK: @ %bb.0:
-; CHECK-NEXT: ldm r0, {r0, r3}
-; CHECK-NEXT: stm r1, {r0, r3}
+; CHECK-NEXT: ldm r0, {r2, r3}
+; CHECK-NEXT: strd r2, r3, [r1]
; CHECK-NEXT: bx lr
%t0 = load i64, ptr %ptr, align 4
%vec = load <2 x i64>, ptr %vp
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/while-loops.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/while-loops.ll
index cbcbf1f392ce8b..3b42ee36e7c2eb 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/while-loops.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/while-loops.ll
@@ -165,74 +165,74 @@ define dso_local i32 @b(ptr %c, i32 %d, i32 %e, ptr %n) "frame-pointer"="all" {
; CHECK-NEXT: sub sp, #16
; CHECK-NEXT: wls lr, r1, .LBB2_3
; CHECK-NEXT: @ %bb.1: @ %while.body.preheader
-; CHECK-NEXT: adds r6, r3, #4
-; CHECK-NEXT: adds r1, r0, #4
-; CHECK-NEXT: mvn r8, #1
-; CHECK-NEXT: @ implicit-def: $r9
+; CHECK-NEXT: mov r12, r0
+; CHECK-NEXT: add.w r10, r3, #4
+; CHECK-NEXT: adds r0, #4
+; CHECK-NEXT: mvn r9, #1
+; CHECK-NEXT: @ implicit-def: $r8
; CHECK-NEXT: @ implicit-def: $r4
; CHECK-NEXT: str r2, [sp] @ 4-byte Spill
; CHECK-NEXT: .LBB2_2: @ %while.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: str r1, [sp, #12] @ 4-byte Spill
-; CHECK-NEXT: asrs r2, r4, #31
-; CHECK-NEXT: ldr r1, [sp, #12] @ 4-byte Reload
-; CHECK-NEXT: ldr r1, [r1]
-; CHECK-NEXT: muls r1, r3, r1
-; CHECK-NEXT: adds r4, r4, r1
-; CHECK-NEXT: adc.w r1, r2, r1, asr #31
-; CHECK-NEXT: adds.w r2, r4, #-2147483648
-; CHECK-NEXT: ldrd r2, r4, [r8]
-; CHECK-NEXT: adc r5, r1, #0
+; CHECK-NEXT: ldr r2, [r0]
+; CHECK-NEXT: asrs r5, r4, #31
+; CHECK-NEXT: str r0, [sp, #12] @ 4-byte Spill
+; CHECK-NEXT: muls r2, r3, r2
+; CHECK-NEXT: adds r4, r4, r2
+; CHECK-NEXT: adc.w r2, r5, r2, asr #31
+; CHECK-NEXT: ldr.w r5, [r9, #4]
+; CHECK-NEXT: adds.w r4, r4, #-2147483648
+; CHECK-NEXT: adc r1, r2, #0
+; CHECK-NEXT: str r1, [sp, #8] @ 4-byte Spill
+; CHECK-NEXT: smull r5, r6, r5, r8
+; CHECK-NEXT: ldr.w r2, [r9]
+; CHECK-NEXT: asrs r4, r1, #31
; CHECK-NEXT: str r2, [sp, #4] @ 4-byte Spill
-; CHECK-NEXT: smull r4, r2, r4, r9
-; CHECK-NEXT: asrs r1, r5, #31
-; CHECK-NEXT: str r5, [sp, #8] @ 4-byte Spill
-; CHECK-NEXT: subs r4, r5, r4
-; CHECK-NEXT: sbcs r1, r2
-; CHECK-NEXT: ldr r2, [sp, #12] @ 4-byte Reload
-; CHECK-NEXT: adds.w r10, r4, #-2147483648
-; CHECK-NEXT: adc r1, r1, #0
-; CHECK-NEXT: ldr r4, [r2, #-4]
+; CHECK-NEXT: subs r5, r1, r5
+; CHECK-NEXT: sbcs r4, r6
+; CHECK-NEXT: adds.w r6, r5, #-2147483648
+; CHECK-NEXT: adc r5, r4, #0
+; CHECK-NEXT: ldr r4, [r0, #-4]
; CHECK-NEXT: muls r4, r3, r4
; CHECK-NEXT: adds r3, #4
-; CHECK-NEXT: adds.w r12, r4, #-2147483648
-; CHECK-NEXT: asr.w r5, r4, #31
-; CHECK-NEXT: ldr r4, [r6]
-; CHECK-NEXT: adc r5, r5, #0
-; CHECK-NEXT: mul r2, r4, r0
-; CHECK-NEXT: adds r0, #4
+; CHECK-NEXT: adds.w r0, r4, #-2147483648
+; CHECK-NEXT: asr.w r1, r4, #31
+; CHECK-NEXT: ldr.w r4, [r10]
+; CHECK-NEXT: adc r1, r1, #0
+; CHECK-NEXT: mul r2, r4, r12
+; CHECK-NEXT: add.w r12, r12, #4
; CHECK-NEXT: add.w r2, r2, #-2147483648
-; CHECK-NEXT: asrl r12, r5, r2
-; CHECK-NEXT: smull r2, r5, r4, r12
-; CHECK-NEXT: lsll r2, r5, #30
-; CHECK-NEXT: ldr r2, [sp, #4] @ 4-byte Reload
-; CHECK-NEXT: asr.w r11, r5, #31
-; CHECK-NEXT: mov r12, r5
-; CHECK-NEXT: lsll r12, r11, r4
-; CHECK-NEXT: mul r2, r2, r9
-; CHECK-NEXT: lsrl r12, r11, #2
-; CHECK-NEXT: adds r2, #2
-; CHECK-NEXT: lsll r12, r11, r2
+; CHECK-NEXT: asrl r0, r1, r2
; CHECK-NEXT: ldr r2, [sp] @ 4-byte Reload
-; CHECK-NEXT: add.w r5, r12, #-2147483648
-; CHECK-NEXT: asrl r10, r1, r5
-; CHECK-NEXT: ldr r5, [sp, #8] @ 4-byte Reload
-; CHECK-NEXT: lsrl r10, r1, #2
-; CHECK-NEXT: movs r1, #2
-; CHECK-NEXT: mov r9, r10
-; CHECK-NEXT: str.w r10, [r1]
-; CHECK-NEXT: ldr r1, [r8], #-4
-; CHECK-NEXT: mls r5, r1, r4, r5
-; CHECK-NEXT: adds.w r4, r5, #-2147483648
-; CHECK-NEXT: asr.w r1, r5, #31
+; CHECK-NEXT: smull r0, r1, r4, r0
+; CHECK-NEXT: lsll r0, r1, #30
+; CHECK-NEXT: asr.w r11, r1, #31
+; CHECK-NEXT: mov r0, r1
+; CHECK-NEXT: ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT: lsll r0, r11, r4
+; CHECK-NEXT: lsrl r0, r11, #2
+; CHECK-NEXT: mul r1, r1, r8
+; CHECK-NEXT: adds r1, #2
+; CHECK-NEXT: lsll r0, r11, r1
+; CHECK-NEXT: ldr r1, [sp, #8] @ 4-byte Reload
+; CHECK-NEXT: add.w r0, r0, #-2147483648
+; CHECK-NEXT: asrl r6, r5, r0
+; CHECK-NEXT: movs r0, #2
+; CHECK-NEXT: lsrl r6, r5, #2
+; CHECK-NEXT: str r6, [r0]
+; CHECK-NEXT: mov r8, r6
+; CHECK-NEXT: ldr r0, [r9], #-4
+; CHECK-NEXT: mls r0, r0, r4, r1
+; CHECK-NEXT: adds.w r4, r0, #-2147483648
+; CHECK-NEXT: asr.w r1, r0, #31
; CHECK-NEXT: adc r1, r1, #0
; CHECK-NEXT: lsrl r4, r1, #2
-; CHECK-NEXT: rsbs r1, r4, #0
-; CHECK-NEXT: str r1, [r2]
-; CHECK-NEXT: str r1, [r6, #-4]
-; CHECK-NEXT: adds r6, #4
-; CHECK-NEXT: ldr r1, [sp, #12] @ 4-byte Reload
-; CHECK-NEXT: adds r1, #4
+; CHECK-NEXT: rsbs r0, r4, #0
+; CHECK-NEXT: str r0, [r2]
+; CHECK-NEXT: str r0, [r10, #-4]
+; CHECK-NEXT: add.w r10, r10, #4
+; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT: adds r0, #4
; CHECK-NEXT: le lr, .LBB2_2
; CHECK-NEXT: .LBB2_3: @ %while.end
; CHECK-NEXT: add sp, #16
diff --git a/llvm/test/CodeGen/Thumb2/mve-float16regloops.ll b/llvm/test/CodeGen/Thumb2/mve-float16regloops.ll
index 52d2a111ead696..bf0b49d92f5004 100644
--- a/llvm/test/CodeGen/Thumb2/mve-float16regloops.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-float16regloops.ll
@@ -1350,7 +1350,7 @@ define void @arm_biquad_cascade_df2T_f16(ptr nocapture readonly %S, ptr nocaptur
; CHECK-NEXT: .vsave {d8, d9, d10, d11}
; CHECK-NEXT: vpush {d8, d9, d10, d11}
; CHECK-NEXT: vmov.i32 q0, #0x0
-; CHECK-NEXT: ldrd r6, r12, [r0, #4]
+; CHECK-NEXT: ldrd r12, r6, [r0, #4]
; CHECK-NEXT: ldrb.w r9, [r0]
; CHECK-NEXT: vldr.16 s0, .LCPI17_0
; CHECK-NEXT: lsr.w r8, r3, #1
@@ -1358,26 +1358,26 @@ define void @arm_biquad_cascade_df2T_f16(ptr nocapture readonly %S, ptr nocaptur
; CHECK-NEXT: .LBB17_1: @ %if.else
; CHECK-NEXT: @ in Loop: Header=BB17_3 Depth=1
; CHECK-NEXT: vmovx.f16 s5, s4
-; CHECK-NEXT: vstr.16 s4, [r6]
+; CHECK-NEXT: vstr.16 s4, [r12]
; CHECK-NEXT: .LBB17_2: @ %if.end
; CHECK-NEXT: @ in Loop: Header=BB17_3 Depth=1
-; CHECK-NEXT: vstr.16 s5, [r6, #2]
-; CHECK-NEXT: add.w r12, r12, #10
+; CHECK-NEXT: vstr.16 s5, [r12, #2]
+; CHECK-NEXT: adds r6, #10
; CHECK-NEXT: subs.w r9, r9, #1
-; CHECK-NEXT: add.w r6, r6, #4
+; CHECK-NEXT: add.w r12, r12, #4
; CHECK-NEXT: mov r1, r2
; CHECK-NEXT: beq .LBB17_8
; CHECK-NEXT: .LBB17_3: @ %do.body
; CHECK-NEXT: @ =>This Loop Header: Depth=1
; CHECK-NEXT: @ Child Loop BB17_5 Depth 2
-; CHECK-NEXT: vldrh.u16 q2, [r12]
+; CHECK-NEXT: vldrh.u16 q2, [r6]
; CHECK-NEXT: movs r5, #0
; CHECK-NEXT: vmov q4, q2
; CHECK-NEXT: vshlc q4, r5, #16
-; CHECK-NEXT: vldrh.u16 q3, [r12, #4]
+; CHECK-NEXT: vldrh.u16 q3, [r6, #4]
; CHECK-NEXT: vmov q5, q3
; CHECK-NEXT: vshlc q5, r5, #16
-; CHECK-NEXT: vldrh.u16 q1, [r6]
+; CHECK-NEXT: vldrh.u16 q1, [r12]
; CHECK-NEXT: vmov.f32 s5, s1
; CHECK-NEXT: mov r5, r2
; CHECK-NEXT: wls lr, r8, .LBB17_6
@@ -1414,7 +1414,7 @@ define void @arm_biquad_cascade_df2T_f16(ptr nocapture readonly %S, ptr nocaptur
; CHECK-NEXT: vfma.f16 q1, q3, r0
; CHECK-NEXT: strh r0, [r5]
; CHECK-NEXT: vmovx.f16 s2, s4
-; CHECK-NEXT: vstr.16 s2, [r6]
+; CHECK-NEXT: vstr.16 s2, [r12]
; CHECK-NEXT: b .LBB17_2
; CHECK-NEXT: .LBB17_8: @ %do.end
; CHECK-NEXT: vpop {d8, d9, d10, d11}
diff --git a/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll b/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll
index ecfe1c7f9c9b53..b7b19a477ab0fd 100644
--- a/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll
@@ -1331,7 +1331,7 @@ define arm_aapcs_vfpcc void @arm_biquad_cascade_stereo_df2T_f32(ptr nocapture re
; CHECK-NEXT: sub sp, #24
; CHECK-NEXT: mov r8, r3
; CHECK-NEXT: ldrb.w r12, [r0]
-; CHECK-NEXT: ldrd r0, r3, [r0, #4]
+; CHECK-NEXT: ldrd r3, r0, [r0, #4]
; CHECK-NEXT: movs r4, #0
; CHECK-NEXT: cmp.w r8, #0
; CHECK-NEXT: strd r4, r4, [sp, #16]
@@ -1343,13 +1343,13 @@ define arm_aapcs_vfpcc void @arm_biquad_cascade_stereo_df2T_f32(ptr nocapture re
; CHECK-NEXT: .LBB17_2: @ %bb29
; CHECK-NEXT: @ =>This Loop Header: Depth=1
; CHECK-NEXT: @ Child Loop BB17_3 Depth 2
-; CHECK-NEXT: ldrd r5, r7, [r3]
-; CHECK-NEXT: vldrw.u32 q1, [r0]
-; CHECK-NEXT: ldr r6, [r3, #12]
-; CHECK-NEXT: vldr s8, [r3, #8]
+; CHECK-NEXT: ldrd r5, r7, [r0]
+; CHECK-NEXT: vldrw.u32 q1, [r3]
+; CHECK-NEXT: ldr r6, [r0, #12]
+; CHECK-NEXT: vldr s8, [r0, #8]
; CHECK-NEXT: vstrw.32 q1, [r4]
; CHECK-NEXT: vdup.32 q1, r7
-; CHECK-NEXT: vldr s12, [r3, #16]
+; CHECK-NEXT: vldr s12, [r0, #16]
; CHECK-NEXT: vmov.f32 s6, s8
; CHECK-NEXT: dls lr, r8
; CHECK-NEXT: vmov.f32 s7, s8
@@ -1373,18 +1373,18 @@ define arm_aapcs_vfpcc void @arm_biquad_cascade_stereo_df2T_f32(ptr nocapture re
; CHECK-NEXT: @ %bb.4: @ %bb75
; CHECK-NEXT: @ in Loop: Header=BB17_2 Depth=1
; CHECK-NEXT: subs.w r12, r12, #1
-; CHECK-NEXT: add.w r3, r3, #20
-; CHECK-NEXT: vstrb.8 q3, [r0], #16
+; CHECK-NEXT: add.w r0, r0, #20
+; CHECK-NEXT: vstrb.8 q3, [r3], #16
; CHECK-NEXT: mov r1, r2
; CHECK-NEXT: bne .LBB17_2
; CHECK-NEXT: b .LBB17_7
; CHECK-NEXT: .LBB17_5: @ %bb21.preheader
; CHECK-NEXT: dls lr, r12
-; CHECK-NEXT: mov r1, sp
+; CHECK-NEXT: mov r0, sp
; CHECK-NEXT: .LBB17_6: @ %bb21
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: vldrw.u32 q0, [r0], #16
-; CHECK-NEXT: vstrw.32 q0, [r1]
+; CHECK-NEXT: vldrw.u32 q0, [r3], #16
+; CHECK-NEXT: vstrw.32 q0, [r0]
; CHECK-NEXT: le lr, .LBB17_6
; CHECK-NEXT: .LBB17_7: @ %bb80
; CHECK-NEXT: add sp, #24
@@ -1918,7 +1918,7 @@ define void @arm_biquad_cascade_df2T_f32(ptr nocapture readonly %S, ptr nocaptur
; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr}
; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13}
; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13}
-; CHECK-NEXT: ldrd r6, r12, [r0, #4]
+; CHECK-NEXT: ldrd r12, r6, [r0, #4]
; CHECK-NEXT: lsr.w r8, r3, #1
; CHECK-NEXT: ldrb r0, [r0]
; CHECK-NEXT: vldr s0, .LCPI20_0
@@ -1926,26 +1926,26 @@ define void @arm_biquad_cascade_df2T_f32(ptr nocapture readonly %S, ptr nocaptur
; CHECK-NEXT: .LBB20_1: @ %if.else
; CHECK-NEXT: @ in Loop: Header=BB20_3 Depth=1
; CHECK-NEXT: vmov.f32 s6, s5
-; CHECK-NEXT: vstr s4, [r6]
+; CHECK-NEXT: vstr s4, [r12]
; CHECK-NEXT: .LBB20_2: @ %if.end
; CHECK-NEXT: @ in Loop: Header=BB20_3 Depth=1
-; CHECK-NEXT: vstr s6, [r6, #4]
-; CHECK-NEXT: add.w r12, r12, #20
+; CHECK-NEXT: vstr s6, [r12, #4]
+; CHECK-NEXT: adds r6, #20
; CHECK-NEXT: subs r0, #1
-; CHECK-NEXT: add.w r6, r6, #8
+; CHECK-NEXT: add.w r12, r12, #8
; CHECK-NEXT: mov r1, r2
; CHECK-NEXT: beq .LBB20_8
; CHECK-NEXT: .LBB20_3: @ %do.body
; CHECK-NEXT: @ =>This Loop Header: Depth=1
; CHECK-NEXT: @ Child Loop BB20_5 Depth 2
-; CHECK-NEXT: vldrw.u32 q3, [r12]
+; CHECK-NEXT: vldrw.u32 q3, [r6]
; CHECK-NEXT: movs r5, #0
; CHECK-NEXT: vmov q4, q3
; CHECK-NEXT: vshlc q4, r5, #32
-; CHECK-NEXT: vldrw.u32 q2, [r12, #8]
+; CHECK-NEXT: vldrw.u32 q2, [r6, #8]
; CHECK-NEXT: vmov q5, q2
; CHECK-NEXT: vshlc q5, r5, #32
-; CHECK-NEXT: vldrw.u32 q1, [r6]
+; CHECK-NEXT: vldrw.u32 q1, [r12]
; CHECK-NEXT: vmov.f32 s6, s0
; CHECK-NEXT: mov r5, r2
; CHECK-NEXT: vmov.f32 s7, s0
@@ -1985,7 +1985,7 @@ define void @arm_biquad_cascade_df2T_f32(ptr nocapture readonly %S, ptr nocaptur
; CHECK-NEXT: vmov r1, s4
; CHECK-NEXT: vstr s4, [r5]
; CHECK-NEXT: vfma.f32 q1, q2, r1
-; CHECK-NEXT: vstr s5, [r6]
+; CHECK-NEXT: vstr s5, [r12]
; CHECK-NEXT: b .LBB20_2
; CHECK-NEXT: .LBB20_8: @ %do.end
; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13}
diff --git a/llvm/test/CodeGen/Thumb2/mve-gather-ptrs.ll b/llvm/test/CodeGen/Thumb2/mve-gather-ptrs.ll
index b45cca7e1b4c58..e80ffe053b152d 100644
--- a/llvm/test/CodeGen/Thumb2/mve-gather-ptrs.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-gather-ptrs.ll
@@ -6,10 +6,10 @@
define arm_aapcs_vfpcc <2 x i32> @ptr_v2i32(ptr %offptr) {
; CHECK-LABEL: ptr_v2i32:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: ldrd r0, r1, [r0]
-; CHECK-NEXT: ldr r1, [r1]
+; CHECK-NEXT: ldrd r1, r0, [r0]
; CHECK-NEXT: ldr r0, [r0]
-; CHECK-NEXT: vmov q0[2], q0[0], r0, r1
+; CHECK-NEXT: ldr r1, [r1]
+; CHECK-NEXT: vmov q0[2], q0[0], r1, r0
; CHECK-NEXT: bx lr
entry:
%offs = load <2 x ptr>, ptr %offptr, align 4
@@ -112,9 +112,9 @@ entry:
define arm_aapcs_vfpcc <2 x float> @ptr_v2f32(ptr %offptr) {
; CHECK-LABEL: ptr_v2f32:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: ldrd r0, r1, [r0]
-; CHECK-NEXT: vldr s1, [r1]
-; CHECK-NEXT: vldr s0, [r0]
+; CHECK-NEXT: ldrd r1, r0, [r0]
+; CHECK-NEXT: vldr s1, [r0]
+; CHECK-NEXT: vldr s0, [r1]
; CHECK-NEXT: bx lr
entry:
%offs = load <2 x ptr>, ptr %offptr, align 4
@@ -199,13 +199,13 @@ entry:
define arm_aapcs_vfpcc <2 x i32> @ptr_v2i16_sext(ptr %offptr) {
; CHECK-LABEL: ptr_v2i16_sext:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: ldrd r0, r1, [r0]
-; CHECK-NEXT: ldrsh.w r1, [r1]
+; CHECK-NEXT: ldrd r1, r0, [r0]
; CHECK-NEXT: ldrsh.w r0, [r0]
-; CHECK-NEXT: vmov q0[2], q0[0], r0, r1
-; CHECK-NEXT: asrs r1, r1, #31
+; CHECK-NEXT: ldrsh.w r1, [r1]
+; CHECK-NEXT: vmov q0[2], q0[0], r1, r0
; CHECK-NEXT: asrs r0, r0, #31
-; CHECK-NEXT: vmov q0[3], q0[1], r0, r1
+; CHECK-NEXT: asrs r1, r1, #31
+; CHECK-NEXT: vmov q0[3], q0[1], r1, r0
; CHECK-NEXT: bx lr
entry:
%offs = load <2 x ptr>, ptr %offptr, align 4
@@ -217,11 +217,11 @@ entry:
define arm_aapcs_vfpcc <2 x i32> @ptr_v2i16_zext(ptr %offptr) {
; CHECK-LABEL: ptr_v2i16_zext:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: ldrd r0, r1, [r0]
+; CHECK-NEXT: ldrd r1, r0, [r0]
; CHECK-NEXT: vmov.i64 q0, #0xffff
-; CHECK-NEXT: ldrh r1, [r1]
; CHECK-NEXT: ldrh r0, [r0]
-; CHECK-NEXT: vmov q1[2], q1[0], r0, r1
+; CHECK-NEXT: ldrh r1, [r1]
+; CHECK-NEXT: vmov q1[2], q1[0], r1, r0
; CHECK-NEXT: vand q0, q1, q0
; CHECK-NEXT: bx lr
entry:
diff --git a/llvm/test/CodeGen/Thumb2/mve-masked-ldst.ll b/llvm/test/CodeGen/Thumb2/mve-masked-ldst.ll
index c7c579f9d65362..ab43ff11358029 100644
--- a/llvm/test/CodeGen/Thumb2/mve-masked-ldst.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-masked-ldst.ll
@@ -94,22 +94,22 @@ define void @foo_sext_v2i64_v2i32(ptr %dest, ptr %mask, ptr %src) {
; CHECK-LE-NEXT: .pad #4
; CHECK-LE-NEXT: sub sp, #4
; CHECK-LE-NEXT: ldrd r12, lr, [r1]
-; CHECK-LE-NEXT: movs r1, #0
+; CHECK-LE-NEXT: movs r3, #0
; CHECK-LE-NEXT: @ implicit-def: $q1
-; CHECK-LE-NEXT: rsbs.w r3, r12, #0
+; CHECK-LE-NEXT: rsbs.w r1, r12, #0
; CHECK-LE-NEXT: vmov q0[2], q0[0], r12, lr
-; CHECK-LE-NEXT: sbcs.w r3, r1, r12, asr #31
-; CHECK-LE-NEXT: csetm r3, lt
+; CHECK-LE-NEXT: sbcs.w r1, r3, r12, asr #31
+; CHECK-LE-NEXT: csetm r1, lt
; CHECK-LE-NEXT: rsbs.w r4, lr, #0
-; CHECK-LE-NEXT: sbcs.w r4, r1, lr, asr #31
-; CHECK-LE-NEXT: bfi r1, r3, #0, #1
-; CHECK-LE-NEXT: csetm r3, lt
-; CHECK-LE-NEXT: bfi r1, r3, #1, #1
-; CHECK-LE-NEXT: lsls r3, r1, #31
+; CHECK-LE-NEXT: sbcs.w r4, r3, lr, asr #31
+; CHECK-LE-NEXT: bfi r3, r1, #0, #1
+; CHECK-LE-NEXT: csetm r1, lt
+; CHECK-LE-NEXT: bfi r3, r1, #1, #1
+; CHECK-LE-NEXT: lsls r1, r3, #31
; CHECK-LE-NEXT: itt ne
-; CHECK-LE-NEXT: ldrne r3, [r2]
-; CHECK-LE-NEXT: vmovne.32 q1[0], r3
-; CHECK-LE-NEXT: lsls r1, r1, #30
+; CHECK-LE-NEXT: ldrne r1, [r2]
+; CHECK-LE-NEXT: vmovne.32 q1[0], r1
+; CHECK-LE-NEXT: lsls r1, r3, #30
; CHECK-LE-NEXT: itt mi
; CHECK-LE-NEXT: ldrmi r1, [r2, #4]
; CHECK-LE-NEXT: vmovmi.32 q1[2], r1
@@ -218,22 +218,22 @@ define void @foo_sext_v2i64_v2i32_unaligned(ptr %dest, ptr %mask, ptr %src) {
; CHECK-LE-NEXT: .pad #4
; CHECK-LE-NEXT: sub sp, #4
; CHECK-LE-NEXT: ldrd r12, lr, [r1]
-; CHECK-LE-NEXT: movs r1, #0
+; CHECK-LE-NEXT: movs r3, #0
; CHECK-LE-NEXT: @ implicit-def: $q0
-; CHECK-LE-NEXT: rsbs.w r3, r12, #0
+; CHECK-LE-NEXT: rsbs.w r1, r12, #0
; CHECK-LE-NEXT: vmov q1[2], q1[0], r12, lr
-; CHECK-LE-NEXT: sbcs.w r3, r1, r12, asr #31
-; CHECK-LE-NEXT: csetm r3, lt
+; CHECK-LE-NEXT: sbcs.w r1, r3, r12, asr #31
+; CHECK-LE-NEXT: csetm r1, lt
; CHECK-LE-NEXT: rsbs.w r4, lr, #0
-; CHECK-LE-NEXT: sbcs.w r4, r1, lr, asr #31
-; CHECK-LE-NEXT: bfi r1, r3, #0, #1
-; CHECK-LE-NEXT: csetm r3, lt
-; CHECK-LE-NEXT: bfi r1, r3, #1, #1
-; CHECK-LE-NEXT: lsls r3, r1, #31
+; CHECK-LE-NEXT: sbcs.w r4, r3, lr, asr #31
+; CHECK-LE-NEXT: bfi r3, r1, #0, #1
+; CHECK-LE-NEXT: csetm r1, lt
+; CHECK-LE-NEXT: bfi r3, r1, #1, #1
+; CHECK-LE-NEXT: lsls r1, r3, #31
; CHECK-LE-NEXT: itt ne
-; CHECK-LE-NEXT: ldrne r3, [r2]
-; CHECK-LE-NEXT: vmovne.32 q0[0], r3
-; CHECK-LE-NEXT: lsls r1, r1, #30
+; CHECK-LE-NEXT: ldrne r1, [r2]
+; CHECK-LE-NEXT: vmovne.32 q0[0], r1
+; CHECK-LE-NEXT: lsls r1, r3, #30
; CHECK-LE-NEXT: itt mi
; CHECK-LE-NEXT: ldrmi r1, [r2, #4]
; CHECK-LE-NEXT: vmovmi.32 q0[2], r1
@@ -346,23 +346,23 @@ define void @foo_zext_v2i64_v2i32(ptr %dest, ptr %mask, ptr %src) {
; CHECK-LE-NEXT: .pad #4
; CHECK-LE-NEXT: sub sp, #4
; CHECK-LE-NEXT: ldrd r12, lr, [r1]
-; CHECK-LE-NEXT: movs r1, #0
+; CHECK-LE-NEXT: movs r3, #0
; CHECK-LE-NEXT: @ implicit-def: $q0
; CHECK-LE-NEXT: vmov.i64 q2, #0xffffffff
-; CHECK-LE-NEXT: rsbs.w r3, r12, #0
+; CHECK-LE-NEXT: rsbs.w r1, r12, #0
; CHECK-LE-NEXT: vmov q1[2], q1[0], r12, lr
-; CHECK-LE-NEXT: sbcs.w r3, r1, r12, asr #31
-; CHECK-LE-NEXT: csetm r3, lt
+; CHECK-LE-NEXT: sbcs.w r1, r3, r12, asr #31
+; CHECK-LE-NEXT: csetm r1, lt
; CHECK-LE-NEXT: rsbs.w r4, lr, #0
-; CHECK-LE-NEXT: sbcs.w r4, r1, lr, asr #31
-; CHECK-LE-NEXT: bfi r1, r3, #0, #1
-; CHECK-LE-NEXT: csetm r3, lt
-; CHECK-LE-NEXT: bfi r1, r3, #1, #1
-; CHECK-LE-NEXT: lsls r3, r1, #31
+; CHECK-LE-NEXT: sbcs.w r4, r3, lr, asr #31
+; CHECK-LE-NEXT: bfi r3, r1, #0, #1
+; CHECK-LE-NEXT: csetm r1, lt
+; CHECK-LE-NEXT: bfi r3, r1, #1, #1
+; CHECK-LE-NEXT: lsls r1, r3, #31
; CHECK-LE-NEXT: itt ne
-; CHECK-LE-NEXT: ldrne r3, [r2]
-; CHECK-LE-NEXT: vmovne.32 q0[0], r3
-; CHECK-LE-NEXT: lsls r1, r1, #30
+; CHECK-LE-NEXT: ldrne r1, [r2]
+; CHECK-LE-NEXT: vmovne.32 q0[0], r1
+; CHECK-LE-NEXT: lsls r1, r3, #30
; CHECK-LE-NEXT: itt mi
; CHECK-LE-NEXT: ldrmi r1, [r2, #4]
; CHECK-LE-NEXT: vmovmi.32 q0[2], r1
@@ -460,23 +460,23 @@ define void @foo_zext_v2i64_v2i32_unaligned(ptr %dest, ptr %mask, ptr %src) {
; CHECK-LE-NEXT: .pad #4
; CHECK-LE-NEXT: sub sp, #4
; CHECK-LE-NEXT: ldrd r12, lr, [r1]
-; CHECK-LE-NEXT: movs r1, #0
+; CHECK-LE-NEXT: movs r3, #0
; CHECK-LE-NEXT: @ implicit-def: $q0
; CHECK-LE-NEXT: vmov.i64 q2, #0xffffffff
-; CHECK-LE-NEXT: rsbs.w r3, r12, #0
+; CHECK-LE-NEXT: rsbs.w r1, r12, #0
; CHECK-LE-NEXT: vmov q1[2], q1[0], r12, lr
-; CHECK-LE-NEXT: sbcs.w r3, r1, r12, asr #31
-; CHECK-LE-NEXT: csetm r3, lt
+; CHECK-LE-NEXT: sbcs.w r1, r3, r12, asr #31
+; CHECK-LE-NEXT: csetm r1, lt
; CHECK-LE-NEXT: rsbs.w r4, lr, #0
-; CHECK-LE-NEXT: sbcs.w r4, r1, lr, asr #31
-; CHECK-LE-NEXT: bfi r1, r3, #0, #1
-; CHECK-LE-NEXT: csetm r3, lt
-; CHECK-LE-NEXT: bfi r1, r3, #1, #1
-; CHECK-LE-NEXT: lsls r3, r1, #31
+; CHECK-LE-NEXT: sbcs.w r4, r3, lr, asr #31
+; CHECK-LE-NEXT: bfi r3, r1, #0, #1
+; CHECK-LE-NEXT: csetm r1, lt
+; CHECK-LE-NEXT: bfi r3, r1, #1, #1
+; CHECK-LE-NEXT: lsls r1, r3, #31
; CHECK-LE-NEXT: itt ne
-; CHECK-LE-NEXT: ldrne r3, [r2]
-; CHECK-LE-NEXT: vmovne.32 q0[0], r3
-; CHECK-LE-NEXT: lsls r1, r1, #30
+; CHECK-LE-NEXT: ldrne r1, [r2]
+; CHECK-LE-NEXT: vmovne.32 q0[0], r1
+; CHECK-LE-NEXT: lsls r1, r3, #30
; CHECK-LE-NEXT: itt mi
; CHECK-LE-NEXT: ldrmi r1, [r2, #4]
; CHECK-LE-NEXT: vmovmi.32 q0[2], r1
diff --git a/llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll b/llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll
index b5d981ef340254..86319b01f48201 100644
--- a/llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll
@@ -6,101 +6,102 @@ define arm_aapcs_vfpcc void @ssatmul_s_q31(ptr nocapture readonly %pSrcA, ptr no
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; CHECK-NEXT: .pad #8
-; CHECK-NEXT: sub sp, #8
+; CHECK-NEXT: .pad #12
+; CHECK-NEXT: sub sp, #12
; CHECK-NEXT: cmp r3, #0
; CHECK-NEXT: beq.w .LBB0_8
; CHECK-NEXT: @ %bb.1: @ %entry
-; CHECK-NEXT: mov r11, r2
; CHECK-NEXT: cmp r3, #1
; CHECK-NEXT: bne .LBB0_3
; CHECK-NEXT: @ %bb.2:
-; CHECK-NEXT: movs r2, #0
+; CHECK-NEXT: movs r7, #0
; CHECK-NEXT: mov r12, r0
-; CHECK-NEXT: mov r8, r1
-; CHECK-NEXT: mov r10, r11
+; CHECK-NEXT: mov r6, r1
+; CHECK-NEXT: mov r10, r2
; CHECK-NEXT: b .LBB0_6
; CHECK-NEXT: .LBB0_3: @ %vector.ph
-; CHECK-NEXT: bic r2, r3, #1
-; CHECK-NEXT: adr r4, .LCPI0_0
-; CHECK-NEXT: subs r7, r2, #2
-; CHECK-NEXT: movs r6, #1
+; CHECK-NEXT: bic r5, r3, #1
; CHECK-NEXT: str r3, [sp, #4] @ 4-byte Spill
-; CHECK-NEXT: add.w r10, r11, r2, lsl #2
+; CHECK-NEXT: subs r7, r5, #2
+; CHECK-NEXT: movs r6, #1
+; CHECK-NEXT: add.w r3, r1, r5, lsl #2
+; CHECK-NEXT: adr r4, .LCPI0_0
; CHECK-NEXT: add.w lr, r6, r7, lsr #1
-; CHECK-NEXT: str r2, [sp] @ 4-byte Spill
-; CHECK-NEXT: add.w r8, r1, r2, lsl #2
-; CHECK-NEXT: add.w r12, r0, r2, lsl #2
+; CHECK-NEXT: str r3, [sp, #8] @ 4-byte Spill
+; CHECK-NEXT: str r5, [sp] @ 4-byte Spill
+; CHECK-NEXT: add.w r10, r2, r5, lsl #2
+; CHECK-NEXT: add.w r12, r0, r5, lsl #2
; CHECK-NEXT: vldrw.u32 q0, [r4]
; CHECK-NEXT: vmvn.i32 q1, #0x80000000
; CHECK-NEXT: .LBB0_4: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: ldrd r4, r2, [r0], #8
+; CHECK-NEXT: ldrd r4, r6, [r0], #8
; CHECK-NEXT: movs r5, #0
-; CHECK-NEXT: ldrd r7, r6, [r1], #8
-; CHECK-NEXT: smull r4, r7, r7, r4
-; CHECK-NEXT: asrl r4, r7, #31
+; CHECK-NEXT: ldrd r7, r8, [r1], #8
+; CHECK-NEXT: smull r4, r11, r7, r4
+; CHECK-NEXT: asrl r4, r11, #31
; CHECK-NEXT: rsbs.w r9, r4, #-2147483648
; CHECK-NEXT: mov.w r9, #-1
-; CHECK-NEXT: sbcs.w r3, r9, r7
+; CHECK-NEXT: sbcs.w r3, r9, r11
; CHECK-NEXT: csetm r3, lt
; CHECK-NEXT: bfi r5, r3, #0, #8
-; CHECK-NEXT: smull r2, r3, r6, r2
-; CHECK-NEXT: asrl r2, r3, #31
-; CHECK-NEXT: rsbs.w r6, r2, #-2147483648
-; CHECK-NEXT: vmov q2[2], q2[0], r4, r2
-; CHECK-NEXT: sbcs.w r6, r9, r3
-; CHECK-NEXT: vmov q2[3], q2[1], r7, r3
-; CHECK-NEXT: csetm r6, lt
-; CHECK-NEXT: bfi r5, r6, #8, #8
+; CHECK-NEXT: smull r6, r3, r8, r6
+; CHECK-NEXT: asrl r6, r3, #31
+; CHECK-NEXT: rsbs.w r7, r6, #-2147483648
+; CHECK-NEXT: vmov q2[2], q2[0], r4, r6
+; CHECK-NEXT: sbcs.w r7, r9, r3
+; CHECK-NEXT: vmov q2[3], q2[1], r11, r3
+; CHECK-NEXT: csetm r7, lt
+; CHECK-NEXT: mvn r6, #-2147483648
+; CHECK-NEXT: bfi r5, r7, #8, #8
; CHECK-NEXT: vmsr p0, r5
-; CHECK-NEXT: mvn r5, #-2147483648
; CHECK-NEXT: vpsel q2, q2, q0
-; CHECK-NEXT: vmov r2, r3, d4
-; CHECK-NEXT: subs r2, r2, r5
-; CHECK-NEXT: sbcs r2, r3, #0
-; CHECK-NEXT: mov.w r3, #0
-; CHECK-NEXT: csetm r2, lt
-; CHECK-NEXT: bfi r3, r2, #0, #8
-; CHECK-NEXT: vmov r2, r4, d5
-; CHECK-NEXT: subs r2, r2, r5
-; CHECK-NEXT: sbcs r2, r4, #0
-; CHECK-NEXT: csetm r2, lt
-; CHECK-NEXT: bfi r3, r2, #8, #8
-; CHECK-NEXT: vmsr p0, r3
+; CHECK-NEXT: vmov r3, r4, d4
+; CHECK-NEXT: subs r3, r3, r6
+; CHECK-NEXT: sbcs r3, r4, #0
+; CHECK-NEXT: mov.w r4, #0
+; CHECK-NEXT: csetm r3, lt
+; CHECK-NEXT: bfi r4, r3, #0, #8
+; CHECK-NEXT: vmov r3, r5, d5
+; CHECK-NEXT: subs r3, r3, r6
+; CHECK-NEXT: sbcs r3, r5, #0
+; CHECK-NEXT: csetm r3, lt
+; CHECK-NEXT: bfi r4, r3, #8, #8
+; CHECK-NEXT: vmsr p0, r4
; CHECK-NEXT: vpsel q2, q2, q1
-; CHECK-NEXT: vmov r2, s10
-; CHECK-NEXT: vmov r3, s8
-; CHECK-NEXT: strd r3, r2, [r11], #8
+; CHECK-NEXT: vmov r3, s10
+; CHECK-NEXT: vmov r4, s8
+; CHECK-NEXT: strd r4, r3, [r2], #8
; CHECK-NEXT: le lr, .LBB0_4
; CHECK-NEXT: @ %bb.5: @ %middle.block
-; CHECK-NEXT: ldrd r2, r3, [sp] @ 8-byte Folded Reload
-; CHECK-NEXT: cmp r2, r3
+; CHECK-NEXT: ldrd r7, r3, [sp] @ 8-byte Folded Reload
+; CHECK-NEXT: ldr r6, [sp, #8] @ 4-byte Reload
+; CHECK-NEXT: cmp r7, r3
; CHECK-NEXT: beq .LBB0_8
; CHECK-NEXT: .LBB0_6: @ %for.body.preheader
-; CHECK-NEXT: sub.w lr, r3, r2
+; CHECK-NEXT: sub.w lr, r3, r7
; CHECK-NEXT: mov.w r0, #-1
; CHECK-NEXT: mov.w r1, #-2147483648
-; CHECK-NEXT: mvn r3, #-2147483648
+; CHECK-NEXT: mvn r2, #-2147483648
; CHECK-NEXT: .LBB0_7: @ %for.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: ldr r2, [r12], #4
-; CHECK-NEXT: ldr r4, [r8], #4
-; CHECK-NEXT: smull r2, r5, r4, r2
-; CHECK-NEXT: asrl r2, r5, #31
-; CHECK-NEXT: subs r4, r1, r2
-; CHECK-NEXT: sbcs.w r4, r0, r5
-; CHECK-NEXT: cset r4, lt
-; CHECK-NEXT: cmp r4, #0
-; CHECK-NEXT: csel r2, r2, r1, ne
-; CHECK-NEXT: csel r4, r5, r0, ne
-; CHECK-NEXT: subs r5, r2, r3
-; CHECK-NEXT: sbcs r4, r4, #0
-; CHECK-NEXT: csel r2, r2, r3, lt
-; CHECK-NEXT: str r2, [r10], #4
+; CHECK-NEXT: ldr r3, [r12], #4
+; CHECK-NEXT: ldr r4, [r6], #4
+; CHECK-NEXT: smull r4, r3, r4, r3
+; CHECK-NEXT: asrl r4, r3, #31
+; CHECK-NEXT: subs r5, r1, r4
+; CHECK-NEXT: sbcs.w r5, r0, r3
+; CHECK-NEXT: cset r5, lt
+; CHECK-NEXT: cmp r5, #0
+; CHECK-NEXT: csel r4, r4, r1, ne
+; CHECK-NEXT: csel r3, r3, r0, ne
+; CHECK-NEXT: subs r5, r4, r2
+; CHECK-NEXT: sbcs r3, r3, #0
+; CHECK-NEXT: csel r3, r4, r2, lt
+; CHECK-NEXT: str r3, [r10], #4
; CHECK-NEXT: le lr, .LBB0_7
; CHECK-NEXT: .LBB0_8: @ %for.cond.cleanup
-; CHECK-NEXT: add sp, #8
+; CHECK-NEXT: add sp, #12
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: @ %bb.9:
@@ -612,21 +613,20 @@ define arm_aapcs_vfpcc void @usatmul_2_q31(ptr nocapture readonly %pSrcA, ptr no
; CHECK-NEXT: cmp r3, #0
; CHECK-NEXT: beq .LBB3_8
; CHECK-NEXT: @ %bb.1: @ %entry
-; CHECK-NEXT: mov r8, r2
; CHECK-NEXT: cmp r3, #1
; CHECK-NEXT: bne .LBB3_3
; CHECK-NEXT: @ %bb.2:
; CHECK-NEXT: movs r7, #0
; CHECK-NEXT: mov r12, r0
; CHECK-NEXT: mov r11, r1
-; CHECK-NEXT: mov r2, r8
+; CHECK-NEXT: mov r8, r2
; CHECK-NEXT: b .LBB3_6
; CHECK-NEXT: .LBB3_3: @ %vector.ph
; CHECK-NEXT: bic r5, r3, #1
; CHECK-NEXT: movs r6, #1
; CHECK-NEXT: subs r7, r5, #2
; CHECK-NEXT: str r5, [sp] @ 4-byte Spill
-; CHECK-NEXT: add.w r2, r8, r5, lsl #2
+; CHECK-NEXT: add.w r8, r2, r5, lsl #2
; CHECK-NEXT: add.w r11, r1, r5, lsl #2
; CHECK-NEXT: add.w lr, r6, r7, lsr #1
; CHECK-NEXT: add.w r12, r0, r5, lsl #2
@@ -653,7 +653,7 @@ define arm_aapcs_vfpcc void @usatmul_2_q31(ptr nocapture readonly %pSrcA, ptr no
; CHECK-NEXT: vpsel q1, q1, q0
; CHECK-NEXT: vmov r4, s6
; CHECK-NEXT: vmov r5, s4
-; CHECK-NEXT: strd r5, r4, [r8], #8
+; CHECK-NEXT: strd r5, r4, [r2], #8
; CHECK-NEXT: le lr, .LBB3_4
; CHECK-NEXT: @ %bb.5: @ %middle.block
; CHECK-NEXT: ldr r7, [sp] @ 4-byte Reload
@@ -667,11 +667,11 @@ define arm_aapcs_vfpcc void @usatmul_2_q31(ptr nocapture readonly %pSrcA, ptr no
; CHECK-NEXT: ldr r1, [r11], #4
; CHECK-NEXT: umull r0, r1, r1, r0
; CHECK-NEXT: lsrl r0, r1, #31
-; CHECK-NEXT: subs.w r3, r0, #-1
+; CHECK-NEXT: subs.w r2, r0, #-1
; CHECK-NEXT: sbcs r1, r1, #0
; CHECK-NEXT: it hs
; CHECK-NEXT: movhs.w r0, #-1
-; CHECK-NEXT: str r0, [r2], #4
+; CHECK-NEXT: str r0, [r8], #4
; CHECK-NEXT: le lr, .LBB3_7
; CHECK-NEXT: .LBB3_8: @ %for.cond.cleanup
; CHECK-NEXT: add sp, #4
diff --git a/llvm/test/CodeGen/Thumb2/mve-scatter-ptrs.ll b/llvm/test/CodeGen/Thumb2/mve-scatter-ptrs.ll
index 99fed2fb563312..38cccc60f04e04 100644
--- a/llvm/test/CodeGen/Thumb2/mve-scatter-ptrs.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-scatter-ptrs.ll
@@ -9,10 +9,10 @@ define arm_aapcs_vfpcc void @ptr_v2i32(<2 x i32> %v, ptr %offptr) {
; CHECK-LABEL: ptr_v2i32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov r2, s0
-; CHECK-NEXT: ldrd r0, r1, [r0]
-; CHECK-NEXT: str r2, [r0]
-; CHECK-NEXT: vmov r0, s2
-; CHECK-NEXT: str r0, [r1]
+; CHECK-NEXT: ldrd r1, r0, [r0]
+; CHECK-NEXT: str r2, [r1]
+; CHECK-NEXT: vmov r1, s2
+; CHECK-NEXT: str r1, [r0]
; CHECK-NEXT: bx lr
entry:
%offs = load <2 x ptr>, ptr %offptr, align 4
@@ -125,9 +125,9 @@ entry:
define arm_aapcs_vfpcc void @ptr_v2f32(<2 x float> %v, ptr %offptr) {
; CHECK-LABEL: ptr_v2f32:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: ldrd r0, r1, [r0]
-; CHECK-NEXT: vstr s0, [r0]
-; CHECK-NEXT: vstr s1, [r1]
+; CHECK-NEXT: ldrd r1, r0, [r0]
+; CHECK-NEXT: vstr s0, [r1]
+; CHECK-NEXT: vstr s1, [r0]
; CHECK-NEXT: bx lr
entry:
%offs = load <2 x ptr>, ptr %offptr, align 4
@@ -217,10 +217,10 @@ define arm_aapcs_vfpcc void @ptr_v2i16_trunc(<2 x i32> %v, ptr %offptr) {
; CHECK-LABEL: ptr_v2i16_trunc:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov r2, s0
-; CHECK-NEXT: ldrd r0, r1, [r0]
-; CHECK-NEXT: strh r2, [r0]
-; CHECK-NEXT: vmov r0, s2
-; CHECK-NEXT: strh r0, [r1]
+; CHECK-NEXT: ldrd r1, r0, [r0]
+; CHECK-NEXT: strh r2, [r1]
+; CHECK-NEXT: vmov r1, s2
+; CHECK-NEXT: strh r1, [r0]
; CHECK-NEXT: bx lr
entry:
%offs = load <2 x ptr>, ptr %offptr, align 4
diff --git a/llvm/test/CodeGen/Thumb2/mve-vecreduce-slp.ll b/llvm/test/CodeGen/Thumb2/mve-vecreduce-slp.ll
index d8c3e4ae3ffaf0..78693d55e1a6a5 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vecreduce-slp.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vecreduce-slp.ll
@@ -7,7 +7,7 @@
define i32 @addv2i32i32(ptr %x) {
; CHECK-LABEL: addv2i32i32:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: ldrd r0, r1, [r0]
+; CHECK-NEXT: ldrd r1, r0, [r0]
; CHECK-NEXT: add r0, r1
; CHECK-NEXT: bx lr
entry:
@@ -1308,10 +1308,10 @@ entry:
define i32 @mlav2i32i32(ptr %x, ptr %y) {
; CHECK-LABEL: mlav2i32i32:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: ldrd r0, r2, [r0]
-; CHECK-NEXT: ldrd r1, r3, [r1]
-; CHECK-NEXT: muls r0, r1, r0
-; CHECK-NEXT: mla r0, r3, r2, r0
+; CHECK-NEXT: ldrd r2, r0, [r0]
+; CHECK-NEXT: ldrd r3, r1, [r1]
+; CHECK-NEXT: muls r2, r3, r2
+; CHECK-NEXT: mla r0, r1, r0, r2
; CHECK-NEXT: bx lr
entry:
%0 = load i32, ptr %x, align 4
diff --git a/llvm/test/CodeGen/Thumb2/mve-vld3.ll b/llvm/test/CodeGen/Thumb2/mve-vld3.ll
index 4895eabb71ec06..ec74cba3d979eb 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vld3.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vld3.ll
@@ -9,11 +9,11 @@ define void @vld3_v2i32(ptr %src, ptr %dst) {
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: vldrw.u32 q0, [r0]
-; CHECK-NEXT: ldrd r0, r2, [r0, #16]
+; CHECK-NEXT: ldrd r2, r0, [r0, #16]
; CHECK-NEXT: vmov.f32 s6, s3
; CHECK-NEXT: vmov r12, lr, d0
; CHECK-NEXT: vmov r3, s6
-; CHECK-NEXT: add r0, r3
+; CHECK-NEXT: add r2, r3
; CHECK-NEXT: add.w r3, r12, lr
; CHECK-NEXT: add r0, r2
; CHECK-NEXT: vmov r2, s2
@@ -438,8 +438,8 @@ define void @vld3_v2i8(ptr %src, ptr %dst) {
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .pad #8
; CHECK-NEXT: sub sp, #8
-; CHECK-NEXT: ldrd r0, r2, [r0]
-; CHECK-NEXT: strd r0, r2, [sp]
+; CHECK-NEXT: ldrd r2, r0, [r0]
+; CHECK-NEXT: strd r2, r0, [sp]
; CHECK-NEXT: mov r0, sp
; CHECK-NEXT: vldrb.u16 q0, [r0]
; CHECK-NEXT: vmov.u16 r0, q0[4]
diff --git a/llvm/test/CodeGen/Thumb2/mve-vst2.ll b/llvm/test/CodeGen/Thumb2/mve-vst2.ll
index 57d08a7f3c4b25..483b838035ed66 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vst2.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vst2.ll
@@ -480,9 +480,9 @@ entry:
define void @vst2_v2f16(ptr %src, ptr %dst) {
; CHECK-LABEL: vst2_v2f16:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: ldrd r0, r2, [r0]
-; CHECK-NEXT: vmov.32 q1[0], r0
-; CHECK-NEXT: vmov.32 q0[0], r2
+; CHECK-NEXT: ldrd r2, r0, [r0]
+; CHECK-NEXT: vmov.32 q1[0], r2
+; CHECK-NEXT: vmov.32 q0[0], r0
; CHECK-NEXT: vmovx.f16 s5, s4
; CHECK-NEXT: vins.f16 s4, s0
; CHECK-NEXT: vmovx.f16 s0, s0
diff --git a/llvm/test/CodeGen/Thumb2/postinc-distribute.mir b/llvm/test/CodeGen/Thumb2/postinc-distribute.mir
index fd8c2a83786710..c54d2bb4209abc 100644
--- a/llvm/test/CodeGen/Thumb2/postinc-distribute.mir
+++ b/llvm/test/CodeGen/Thumb2/postinc-distribute.mir
@@ -366,7 +366,8 @@ body: |
; CHECK: liveins: $r0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:gprnopc = COPY $r0
- ; CHECK-NEXT: [[t2LDRDi8_:%[0-9]+]]:rgpr, [[t2LDRDi8_1:%[0-9]+]]:rgpr = t2LDRDi8 [[COPY]], 0, 14 /* CC::al */, $noreg :: (load (s32))
+ ; CHECK-NEXT: [[t2LDRi12_:%[0-9]+]]:rgpr = t2LDRi12 [[COPY]], 0, 14 /* CC::al */, $noreg :: (load (s32))
+ ; CHECK-NEXT: [[t2LDRi12_1:%[0-9]+]]:rgpr = t2LDRi12 [[COPY]], 4, 14 /* CC::al */, $noreg :: (load (s32))
; CHECK-NEXT: [[t2ADDri:%[0-9]+]]:rgpr = nuw t2ADDri [[COPY]], 32, 14 /* CC::al */, $noreg, $noreg
; CHECK-NEXT: [[t2LDRi8_:%[0-9]+]]:rgpr = t2LDRi8 [[COPY]], -8, 14 /* CC::al */, $noreg :: (load (s32))
; CHECK-NEXT: $r0 = COPY [[t2ADDri]]
@@ -574,7 +575,8 @@ body: |
; CHECK: liveins: $r0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:gprnopc = COPY $r0
- ; CHECK-NEXT: [[t2LDRDi8_:%[0-9]+]]:rgpr, [[t2LDRDi8_1:%[0-9]+]]:rgpr = t2LDRDi8 [[COPY]], 0, 14 /* CC::al */, $noreg :: (load (s32))
+ ; CHECK-NEXT: [[t2LDRi12_:%[0-9]+]]:rgpr = t2LDRi12 [[COPY]], 0, 14 /* CC::al */, $noreg :: (load (s32))
+ ; CHECK-NEXT: [[t2LDRi12_1:%[0-9]+]]:rgpr = t2LDRi12 [[COPY]], 4, 14 /* CC::al */, $noreg :: (load (s32))
; CHECK-NEXT: [[t2SUBri:%[0-9]+]]:rgpr = nuw t2SUBri [[COPY]], 32, 14 /* CC::al */, $noreg, $noreg
; CHECK-NEXT: [[t2LDRi8_:%[0-9]+]]:rgpr = t2LDRi8 [[COPY]], -8, 14 /* CC::al */, $noreg :: (load (s32))
; CHECK-NEXT: $r0 = COPY [[t2SUBri]]
More information about the llvm-commits
mailing list