[llvm] [LoopStrengthReduce] Encourage the creation of IVs whose increment can later be combined with memory instuctions (PR #152995)
Sergey Shcherbinin via llvm-commits
llvm-commits at lists.llvm.org
Mon Aug 11 04:13:51 PDT 2025
https://github.com/SergeyShch01 created https://github.com/llvm/llvm-project/pull/152995
Encourage (via heuristics) the creation of IVs whose increment can later be combined with memory instructions as pre/post increments. Regresstion tests are updated accordingly.
>From 431ed6d7ab881c6b0f4991b3ce1b08326660e14d Mon Sep 17 00:00:00 2001
From: Sergey Shcherbinin <sscherbinin at nvidia.com>
Date: Mon, 11 Aug 2025 15:09:37 +0400
Subject: [PATCH] [LoopStrengthReduce] Encourage (via heuristics) the creation
of IVs whose increment can later be combined with a memory instruction as a
pre/post increment.
---
.../Transforms/Scalar/LoopStrengthReduce.cpp | 20 +-
llvm/test/CodeGen/AArch64/arm64-scaled_iv.ll | 33 +-
llvm/test/CodeGen/AArch64/cheap-as-a-move.ll | 8 +-
.../AArch64/complex-deinterleaving-crash.ll | 8 +-
.../complex-deinterleaving-reductions.ll | 22 +-
.../CodeGen/AArch64/fp-conversion-to-tbl.ll | 177 +++---
.../CodeGen/AArch64/machine-combiner-copy.ll | 13 +-
.../CodeGen/AArch64/machine-licm-sub-loop.ll | 19 +-
.../neon-partial-reduce-dot-product.ll | 151 +++---
.../AArch64/ragreedy-local-interval-cost.ll | 286 +++++-----
llvm/test/CodeGen/AArch64/reduce-or-opt.ll | 20 +-
llvm/test/CodeGen/AArch64/sink-and-fold.ll | 41 +-
llvm/test/CodeGen/AArch64/sink-mul-exts.ll | 42 +-
llvm/test/CodeGen/AArch64/sinksplat.ll | 21 +-
llvm/test/CodeGen/AArch64/sitofp-to-tbl.ll | 48 +-
llvm/test/CodeGen/AArch64/trunc-to-tbl.ll | 357 ++++++-------
llvm/test/CodeGen/AArch64/vecreduce-fadd.ll | 63 +--
llvm/test/CodeGen/AArch64/vldn_shuffle.ll | 55 +-
llvm/test/CodeGen/AArch64/vselect-ext.ll | 21 +-
llvm/test/CodeGen/AArch64/zext-to-tbl.ll | 502 ++++++++----------
.../ARM/ParallelDSP/multi-use-loads.ll | 111 ++--
llvm/test/CodeGen/ARM/branch-on-zero.ll | 30 +-
llvm/test/CodeGen/ARM/dsp-loop-indexing.ll | 111 ++--
llvm/test/CodeGen/ARM/fpclamptosat.ll | 142 ++---
llvm/test/CodeGen/ARM/loop-align-cortex-m.ll | 4 +-
llvm/test/CodeGen/ARM/loop-indexing.ll | 104 ++--
llvm/test/CodeGen/Thumb/mvn.ll | 99 +---
.../Thumb2/LowOverheadLoops/loop-guards.ll | 7 +-
.../Thumb2/LowOverheadLoops/lsr-le-cost.ll | 240 +++------
.../LowOverheadLoops/no-dec-le-simple.ll | 43 +-
.../Thumb2/LowOverheadLoops/sibling-loops.ll | 49 +-
.../test/CodeGen/Thumb2/pacbti-m-varargs-1.ll | 10 +-
llvm/test/CodeGen/Thumb2/pacbti-m-vla.ll | 45 +-
.../LoopStrengthReduce/AArch64/pr53625.ll | 27 +-
.../AArch64/small-constant.ll | 13 +-
.../AArch64/vscale-factor-out-constant.ll | 2 +-
.../ARM/2012-06-15-lsr-noaddrmode.ll | 4 +-
.../ARM/illegal-addr-modes.ll | 3 +-
38 files changed, 1309 insertions(+), 1642 deletions(-)
diff --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index e3ef9d8680b53..27d9190688ffa 100644
--- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -523,6 +523,8 @@ struct Formula {
bool countsDownToZero() const;
+ bool isBaseRegOnly() const;
+
size_t getNumRegs() const;
Type *getType() const;
@@ -717,6 +719,11 @@ bool Formula::countsDownToZero() const {
return StepInt->isNegative();
}
+bool Formula::isBaseRegOnly() const {
+ return BaseGV == nullptr && Scale == 0 && ScaledReg == nullptr &&
+ BaseOffset.isZero() && UnfoldedOffset.isZero() && BaseRegs.size() == 1;
+}
+
/// Return the total number of register operands used by this formula. This does
/// not include register uses implied by non-constant addrec strides.
size_t Formula::getNumRegs() const {
@@ -1425,12 +1432,17 @@ void Cost::RateRegister(const Formula &F, const SCEV *Reg,
const SCEV *Start;
const SCEVConstant *Step;
if (match(AR, m_scev_AffineAddRec(m_SCEV(Start), m_SCEVConstant(Step))))
- // If the step size matches the base offset, we could use pre-indexed
- // addressing.
- if ((AMK == TTI::AMK_PreIndexed && F.BaseOffset.isFixed() &&
+ if ( // If the step size matches the base offset, we could use
+ // pre-indexed addressing.
+ (AMK == TTI::AMK_PreIndexed && F.BaseOffset.isFixed() &&
Step->getAPInt() == F.BaseOffset.getFixedValue()) ||
(AMK == TTI::AMK_PostIndexed && !isa<SCEVConstant>(Start) &&
- SE->isLoopInvariant(Start, L)))
+ SE->isLoopInvariant(Start, L)) ||
+ // general check for post-indexed addressing with specific step
+ (LU.Kind == LSRUse::Address && F.isBaseRegOnly() &&
+ TTI->isLegalAddressingMode(LU.AccessTy.MemTy, nullptr,
+ Step->getAPInt().getSExtValue(), true,
+ 0, LU.AccessTy.AddrSpace)))
LoopCost = 0;
}
// If the loop counts down to zero and we'll be using a hardware loop then
diff --git a/llvm/test/CodeGen/AArch64/arm64-scaled_iv.ll b/llvm/test/CodeGen/AArch64/arm64-scaled_iv.ll
index c4da564434ee9..966ff15dff098 100644
--- a/llvm/test/CodeGen/AArch64/arm64-scaled_iv.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-scaled_iv.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
; RUN: opt -S -loop-reduce < %s | FileCheck %s
; Scaling factor in addressing mode are costly.
; Make loop-reduce prefer unscaled accesses.
@@ -7,20 +8,38 @@ target triple = "arm64-apple-ios7.0.0"
; Function Attrs: nounwind ssp
define void @mulDouble(ptr nocapture %a, ptr nocapture %b, ptr nocapture %c) {
-; CHECK: @mulDouble
+; CHECK-LABEL: define void @mulDouble(
+; CHECK-SAME: ptr captures(none) [[A:%.*]], ptr captures(none) [[B:%.*]], ptr captures(none) [[C:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr nuw i8, ptr [[A]], i64 8
+; CHECK-NEXT: [[SCEVGEP3:%.*]] = getelementptr nuw i8, ptr [[C]], i64 16
+; CHECK-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK: for.body:
+; CHECK-NEXT: [[LSR_IV6:%.*]] = phi ptr [ [[SCEVGEP7:%.*]], [[FOR_BODY]] ], [ [[B]], [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[LSR_IV4:%.*]] = phi ptr [ [[SCEVGEP5:%.*]], [[FOR_BODY]] ], [ [[SCEVGEP3]], [[ENTRY]] ]
+; CHECK-NEXT: [[LSR_IV2:%.*]] = phi i32 [ [[LSR_IV_NEXT:%.*]], [[FOR_BODY]] ], [ 19, [[ENTRY]] ]
+; CHECK-NEXT: [[LSR_IV:%.*]] = phi ptr [ [[SCEVGEP1:%.*]], [[FOR_BODY]] ], [ [[SCEVGEP]], [[ENTRY]] ]
+; CHECK-NEXT: [[TMP1:%.*]] = load double, ptr [[LSR_IV6]], align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load double, ptr [[LSR_IV4]], align 8
+; CHECK-NEXT: [[MUL:%.*]] = fmul double [[TMP1]], [[TMP2]]
+; CHECK-NEXT: store double [[MUL]], ptr [[LSR_IV]], align 8
+; CHECK-NEXT: [[SCEVGEP1]] = getelementptr i8, ptr [[LSR_IV]], i64 8
+; CHECK-NEXT: [[LSR_IV_NEXT]] = add nsw i32 [[LSR_IV2]], -1
+; CHECK-NEXT: [[SCEVGEP5]] = getelementptr i8, ptr [[LSR_IV4]], i64 8
+; CHECK-NEXT: [[SCEVGEP7]] = getelementptr i8, ptr [[LSR_IV6]], i64 8
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LSR_IV_NEXT]], 0
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
+; CHECK: for.end:
+; CHECK-NEXT: ret void
+;
entry:
br label %for.body
for.body: ; preds = %for.body, %entry
-; CHECK: [[IV:%[^ ]+]] = phi i64 [ [[IVNEXT:%[^,]+]], %for.body ], [ 0, %entry ]
-; Only one induction variable should have been generated.
-; CHECK-NOT: phi
%indvars.iv = phi i64 [ 1, %entry ], [ %indvars.iv.next, %for.body ]
%tmp = add nsw i64 %indvars.iv, -1
%arrayidx = getelementptr inbounds double, ptr %b, i64 %tmp
%tmp1 = load double, ptr %arrayidx, align 8
-; The induction variable should carry the scaling factor: 1 * 8 = 8.
-; CHECK: [[IVNEXT]] = add nuw nsw i64 [[IV]], 8
%indvars.iv.next = add i64 %indvars.iv, 1
%arrayidx2 = getelementptr inbounds double, ptr %c, i64 %indvars.iv.next
%tmp2 = load double, ptr %arrayidx2, align 8
@@ -28,8 +47,6 @@ for.body: ; preds = %for.body, %entry
%arrayidx4 = getelementptr inbounds double, ptr %a, i64 %indvars.iv
store double %mul, ptr %arrayidx4, align 8
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
-; Comparison should be 19 * 8 = 152.
-; CHECK: icmp eq i32 {{%[^,]+}}, 152
%exitcond = icmp eq i32 %lftr.wideiv, 20
br i1 %exitcond, label %for.end, label %for.body
diff --git a/llvm/test/CodeGen/AArch64/cheap-as-a-move.ll b/llvm/test/CodeGen/AArch64/cheap-as-a-move.ll
index 50c70c5676c4a..673caa2a7e63c 100644
--- a/llvm/test/CodeGen/AArch64/cheap-as-a-move.ll
+++ b/llvm/test/CodeGen/AArch64/cheap-as-a-move.ll
@@ -26,11 +26,11 @@ define void @f0(ptr %a, i64 %n) {
; CHECK-NEXT: b.ge .LBB0_2
; CHECK-NEXT: .LBB0_1: // %loop.body
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: ldr w0, [x20, x22, lsl #2]
+; CHECK-NEXT: ldr w0, [x20]
; CHECK-NEXT: mov x1, x21
; CHECK-NEXT: bl g
-; CHECK-NEXT: str w0, [x20, x22, lsl #2]
; CHECK-NEXT: add x22, x22, #1
+; CHECK-NEXT: str w0, [x20], #4
; CHECK-NEXT: cmp x22, x19
; CHECK-NEXT: b.lt .LBB0_1
; CHECK-NEXT: .LBB0_2: // %exit
@@ -76,12 +76,12 @@ define void @f1(ptr %a, i64 %n) {
; CHECK-NEXT: b.ge .LBB1_2
; CHECK-NEXT: .LBB1_1: // %loop.body
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: ldr w0, [x20, x21, lsl #2]
+; CHECK-NEXT: ldr w0, [x20]
; CHECK-NEXT: mov x1, #1450704896 // =0x56780000
; CHECK-NEXT: movk x1, #4660, lsl #48
; CHECK-NEXT: bl g
-; CHECK-NEXT: str w0, [x20, x21, lsl #2]
; CHECK-NEXT: add x21, x21, #1
+; CHECK-NEXT: str w0, [x20], #4
; CHECK-NEXT: cmp x21, x19
; CHECK-NEXT: b.lt .LBB1_1
; CHECK-NEXT: .LBB1_2: // %exit
diff --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-crash.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-crash.ll
index 7542e9c4b8f5b..279b5e0a6dd81 100644
--- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-crash.ll
+++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-crash.ll
@@ -36,10 +36,9 @@ define i32 @check_deinterleaving_has_deinterleave(ptr %a) {
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: movi v0.2d, #0000000000000000
; CHECK-NEXT: movi v1.4s, #1
-; CHECK-NEXT: add x8, x0, #16
+; CHECK-NEXT: mov w8, #32 // =0x20
; CHECK-NEXT: movi v3.2d, #0000000000000000
; CHECK-NEXT: movi v2.2d, #0000000000000000
-; CHECK-NEXT: mov w9, #32 // =0x20
; CHECK-NEXT: movi v4.2d, #0000000000000000
; CHECK-NEXT: movi v5.2d, #0000000000000000
; CHECK-NEXT: movi v7.2d, #0000000000000000
@@ -47,9 +46,8 @@ define i32 @check_deinterleaving_has_deinterleave(ptr %a) {
; CHECK-NEXT: movi v16.2d, #0000000000000000
; CHECK-NEXT: .LBB1_1: // %vector.body
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: ldp q17, q18, [x8, #-16]
-; CHECK-NEXT: subs x9, x9, #32
-; CHECK-NEXT: add x8, x8, #32
+; CHECK-NEXT: ldp q17, q18, [x0], #32
+; CHECK-NEXT: subs x8, x8, #32
; CHECK-NEXT: cmeq v17.16b, v17.16b, #0
; CHECK-NEXT: cmeq v18.16b, v18.16b, #0
; CHECK-NEXT: ushll2 v19.8h, v17.16b, #0
diff --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions.ll
index aed3072bb4af3..78ad7ad81f84d 100644
--- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions.ll
+++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions.ll
@@ -16,15 +16,12 @@ define dso_local %"struct.std::complex" @complex_mul_v2f64(ptr %a, ptr %b) {
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: movi v0.2d, #0000000000000000
; CHECK-NEXT: movi v1.2d, #0000000000000000
-; CHECK-NEXT: mov x8, xzr
+; CHECK-NEXT: mov w8, #1600 // =0x640
; CHECK-NEXT: .LBB0_1: // %vector.body
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: add x9, x0, x8
-; CHECK-NEXT: add x10, x1, x8
-; CHECK-NEXT: add x8, x8, #32
-; CHECK-NEXT: ldp q3, q2, [x9]
-; CHECK-NEXT: cmp x8, #1600
-; CHECK-NEXT: ldp q5, q4, [x10]
+; CHECK-NEXT: ldp q3, q2, [x0], #32
+; CHECK-NEXT: ldp q5, q4, [x1], #32
+; CHECK-NEXT: subs x8, x8, #32
; CHECK-NEXT: fcmla v0.2d, v5.2d, v3.2d, #0
; CHECK-NEXT: fcmla v1.2d, v4.2d, v2.2d, #0
; CHECK-NEXT: fcmla v0.2d, v5.2d, v3.2d, #90
@@ -83,15 +80,12 @@ define %"struct.std::complex" @complex_mul_nonzero_init_v2f64(ptr %a, ptr %b) {
; CHECK-NEXT: movi v0.2d, #0000000000000000
; CHECK-NEXT: adrp x8, .LCPI1_0
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI1_0]
-; CHECK-NEXT: mov x8, xzr
+; CHECK-NEXT: mov w8, #1600 // =0x640
; CHECK-NEXT: .LBB1_1: // %vector.body
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: add x9, x0, x8
-; CHECK-NEXT: add x10, x1, x8
-; CHECK-NEXT: add x8, x8, #32
-; CHECK-NEXT: ldp q3, q2, [x9]
-; CHECK-NEXT: cmp x8, #1600
-; CHECK-NEXT: ldp q5, q4, [x10]
+; CHECK-NEXT: ldp q3, q2, [x0], #32
+; CHECK-NEXT: ldp q5, q4, [x1], #32
+; CHECK-NEXT: subs x8, x8, #32
; CHECK-NEXT: fcmla v1.2d, v5.2d, v3.2d, #0
; CHECK-NEXT: fcmla v0.2d, v4.2d, v2.2d, #0
; CHECK-NEXT: fcmla v1.2d, v5.2d, v3.2d, #90
diff --git a/llvm/test/CodeGen/AArch64/fp-conversion-to-tbl.ll b/llvm/test/CodeGen/AArch64/fp-conversion-to-tbl.ll
index 1fbca7ca2c27c..be20483b75f7e 100644
--- a/llvm/test/CodeGen/AArch64/fp-conversion-to-tbl.ll
+++ b/llvm/test/CodeGen/AArch64/fp-conversion-to-tbl.ll
@@ -32,13 +32,11 @@ define void @fptoui_v8f32_to_v8i8_in_loop(ptr %A, ptr %dst) {
; CHECK-NEXT: adrp x8, lCPI0_0 at PAGE
; CHECK-NEXT: Lloh1:
; CHECK-NEXT: ldr q0, [x8, lCPI0_0 at PAGEOFF]
-; CHECK-NEXT: mov x8, xzr
+; CHECK-NEXT: mov w8, #1000 ; =0x3e8
; CHECK-NEXT: LBB0_1: ; %loop
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: add x9, x0, x8, lsl #5
-; CHECK-NEXT: add x8, x8, #1
-; CHECK-NEXT: cmp x8, #1000
-; CHECK-NEXT: ldp q2, q1, [x9]
+; CHECK-NEXT: ldp q2, q1, [x0], #32
+; CHECK-NEXT: subs x8, x8, #1
; CHECK-NEXT: fcvtzu.4s v4, v1
; CHECK-NEXT: fcvtzu.4s v3, v2
; CHECK-NEXT: tbl.16b v1, { v3, v4 }, v0
@@ -111,22 +109,18 @@ define void @fptoui_2x_v8f32_to_v8i8_in_loop(ptr %A, ptr %B, ptr %dst) {
; CHECK-NEXT: adrp x8, lCPI2_0 at PAGE
; CHECK-NEXT: Lloh3:
; CHECK-NEXT: ldr q0, [x8, lCPI2_0 at PAGEOFF]
-; CHECK-NEXT: mov x8, xzr
+; CHECK-NEXT: mov w8, #1000 ; =0x3e8
; CHECK-NEXT: LBB2_1: ; %loop
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: lsl x9, x8, #5
-; CHECK-NEXT: add x10, x0, x9
-; CHECK-NEXT: add x9, x1, x9
-; CHECK-NEXT: ldp q2, q1, [x10]
+; CHECK-NEXT: ldp q2, q1, [x0], #32
+; CHECK-NEXT: subs x8, x8, #1
; CHECK-NEXT: fcvtzu.4s v4, v1
-; CHECK-NEXT: ldp q7, q1, [x9]
+; CHECK-NEXT: ldp q7, q1, [x1], #32
; CHECK-NEXT: fcvtzu.4s v3, v2
; CHECK-NEXT: fcvtzu.4s v6, v1
; CHECK-NEXT: fcvtzu.4s v5, v7
; CHECK-NEXT: tbl.16b v1, { v3, v4, v5, v6 }, v0
-; CHECK-NEXT: str q1, [x2, x8, lsl #4]
-; CHECK-NEXT: add x8, x8, #1
-; CHECK-NEXT: cmp x8, #1000
+; CHECK-NEXT: str q1, [x2], #16
; CHECK-NEXT: b.eq LBB2_1
; CHECK-NEXT: ; %bb.2: ; %exit
; CHECK-NEXT: ret
@@ -178,22 +172,18 @@ define void @fptoui_2x_v8f32_to_v8i8_in_loop_no_concat_shuffle(ptr %A, ptr %B, p
; CHECK-NEXT: adrp x8, lCPI3_0 at PAGE
; CHECK-NEXT: Lloh5:
; CHECK-NEXT: ldr q0, [x8, lCPI3_0 at PAGEOFF]
-; CHECK-NEXT: mov x8, xzr
+; CHECK-NEXT: mov w8, #1000 ; =0x3e8
; CHECK-NEXT: LBB3_1: ; %loop
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: lsl x9, x8, #5
-; CHECK-NEXT: add x10, x0, x9
-; CHECK-NEXT: add x9, x1, x9
-; CHECK-NEXT: ldp q2, q1, [x10]
+; CHECK-NEXT: ldp q2, q1, [x0], #32
+; CHECK-NEXT: subs x8, x8, #1
; CHECK-NEXT: fcvtzu.4s v4, v1
-; CHECK-NEXT: ldp q7, q1, [x9]
+; CHECK-NEXT: ldp q7, q1, [x1], #32
; CHECK-NEXT: fcvtzu.4s v3, v2
; CHECK-NEXT: fcvtzu.4s v6, v1
; CHECK-NEXT: fcvtzu.4s v5, v7
; CHECK-NEXT: tbl.16b v1, { v3, v4, v5, v6 }, v0
-; CHECK-NEXT: str q1, [x2, x8, lsl #4]
-; CHECK-NEXT: add x8, x8, #1
-; CHECK-NEXT: cmp x8, #1000
+; CHECK-NEXT: str q1, [x2], #16
; CHECK-NEXT: b.eq LBB3_1
; CHECK-NEXT: ; %bb.2: ; %exit
; CHECK-NEXT: ret
@@ -245,15 +235,13 @@ define void @fptoui_v16f32_to_v16i8_in_loop(ptr %A, ptr %dst) {
; CHECK-NEXT: adrp x8, lCPI4_0 at PAGE
; CHECK-NEXT: Lloh7:
; CHECK-NEXT: ldr q0, [x8, lCPI4_0 at PAGEOFF]
-; CHECK-NEXT: mov x8, xzr
+; CHECK-NEXT: mov w8, #1000 ; =0x3e8
; CHECK-NEXT: LBB4_1: ; %loop
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: add x9, x0, x8, lsl #6
-; CHECK-NEXT: add x8, x8, #1
-; CHECK-NEXT: cmp x8, #1000
-; CHECK-NEXT: ldp q2, q1, [x9, #32]
+; CHECK-NEXT: ldp q2, q1, [x0, #32]
+; CHECK-NEXT: subs x8, x8, #1
; CHECK-NEXT: fcvtzu.4s v6, v1
-; CHECK-NEXT: ldp q7, q1, [x9]
+; CHECK-NEXT: ldp q7, q1, [x0], #64
; CHECK-NEXT: fcvtzu.4s v5, v2
; CHECK-NEXT: fcvtzu.4s v4, v1
; CHECK-NEXT: fcvtzu.4s v3, v7
@@ -306,30 +294,25 @@ define void @fptoui_2x_v16f32_to_v16i8_in_loop(ptr %A, ptr %B, ptr %dst) {
; CHECK-NEXT: adrp x8, lCPI5_0 at PAGE
; CHECK-NEXT: Lloh9:
; CHECK-NEXT: ldr q0, [x8, lCPI5_0 at PAGEOFF]
-; CHECK-NEXT: mov x8, xzr
+; CHECK-NEXT: mov w8, #1000 ; =0x3e8
; CHECK-NEXT: LBB5_1: ; %loop
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: lsl x9, x8, #6
-; CHECK-NEXT: add x10, x1, x9
-; CHECK-NEXT: add x9, x0, x9
-; CHECK-NEXT: ldp q2, q1, [x10, #32]
-; CHECK-NEXT: ldp q3, q4, [x9, #32]
-; CHECK-NEXT: ldp q5, q6, [x10]
+; CHECK-NEXT: ldp q2, q1, [x1, #32]
+; CHECK-NEXT: subs x8, x8, #1
+; CHECK-NEXT: ldp q3, q4, [x0, #32]
+; CHECK-NEXT: ldp q5, q6, [x1], #64
; CHECK-NEXT: fcvtzu.4s v19, v1
; CHECK-NEXT: fcvtzu.4s v18, v2
-; CHECK-NEXT: ldp q2, q1, [x9]
; CHECK-NEXT: fcvtzu.4s v23, v4
-; CHECK-NEXT: fcvtzu.4s v17, v6
-; CHECK-NEXT: add x9, x2, x8, lsl #5
+; CHECK-NEXT: ldp q2, q1, [x0], #64
; CHECK-NEXT: fcvtzu.4s v22, v3
+; CHECK-NEXT: fcvtzu.4s v17, v6
; CHECK-NEXT: fcvtzu.4s v16, v5
-; CHECK-NEXT: add x8, x8, #1
; CHECK-NEXT: fcvtzu.4s v21, v1
-; CHECK-NEXT: cmp x8, #1000
; CHECK-NEXT: fcvtzu.4s v20, v2
; CHECK-NEXT: tbl.16b v1, { v16, v17, v18, v19 }, v0
; CHECK-NEXT: tbl.16b v2, { v20, v21, v22, v23 }, v0
-; CHECK-NEXT: stp q2, q1, [x9]
+; CHECK-NEXT: stp q2, q1, [x2], #32
; CHECK-NEXT: b.eq LBB5_1
; CHECK-NEXT: ; %bb.2: ; %exit
; CHECK-NEXT: ret
@@ -359,17 +342,15 @@ exit:
define void @fptoui_v8f32_to_v8i16_in_loop(ptr %A, ptr %dst) {
; CHECK-LABEL: fptoui_v8f32_to_v8i16_in_loop:
; CHECK: ; %bb.0: ; %entry
-; CHECK-NEXT: mov x8, xzr
+; CHECK-NEXT: mov w8, #1000 ; =0x3e8
; CHECK-NEXT: LBB6_1: ; %loop
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: add x9, x0, x8, lsl #5
-; CHECK-NEXT: ldp q0, q1, [x9]
+; CHECK-NEXT: ldp q0, q1, [x0], #32
+; CHECK-NEXT: subs x8, x8, #1
; CHECK-NEXT: fcvtzu.4s v1, v1
; CHECK-NEXT: fcvtzu.4s v0, v0
; CHECK-NEXT: uzp1.8h v0, v0, v1
-; CHECK-NEXT: str q0, [x1, x8, lsl #4]
-; CHECK-NEXT: add x8, x8, #1
-; CHECK-NEXT: cmp x8, #1000
+; CHECK-NEXT: str q0, [x1], #16
; CHECK-NEXT: b.eq LBB6_1
; CHECK-NEXT: ; %bb.2: ; %exit
; CHECK-NEXT: ret
@@ -394,24 +375,19 @@ exit:
define void @fptoui_2x_v8f32_to_v8i16_in_loop(ptr %A, ptr %B, ptr %dst) {
; CHECK-LABEL: fptoui_2x_v8f32_to_v8i16_in_loop:
; CHECK: ; %bb.0: ; %entry
-; CHECK-NEXT: mov x8, xzr
+; CHECK-NEXT: mov w8, #1000 ; =0x3e8
; CHECK-NEXT: LBB7_1: ; %loop
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: lsl x9, x8, #5
-; CHECK-NEXT: add x8, x8, #1
-; CHECK-NEXT: cmp x8, #1000
-; CHECK-NEXT: add x10, x0, x9
-; CHECK-NEXT: add x11, x1, x9
-; CHECK-NEXT: add x9, x2, x9
-; CHECK-NEXT: ldp q0, q1, [x10]
-; CHECK-NEXT: ldp q2, q3, [x11]
+; CHECK-NEXT: ldp q0, q1, [x0], #32
+; CHECK-NEXT: ldp q2, q3, [x1], #32
+; CHECK-NEXT: subs x8, x8, #1
; CHECK-NEXT: fcvtzu.4s v1, v1
; CHECK-NEXT: fcvtzu.4s v0, v0
; CHECK-NEXT: fcvtzu.4s v3, v3
; CHECK-NEXT: fcvtzu.4s v2, v2
; CHECK-NEXT: uzp1.8h v0, v0, v1
; CHECK-NEXT: uzp1.8h v1, v2, v3
-; CHECK-NEXT: stp q0, q1, [x9]
+; CHECK-NEXT: stp q0, q1, [x2], #32
; CHECK-NEXT: b.eq LBB7_1
; CHECK-NEXT: ; %bb.2: ; %exit
; CHECK-NEXT: ret
@@ -483,18 +459,16 @@ define void @uitofp_v8i8_to_v8f32(ptr %src, ptr %dst) {
; CHECK-NEXT: ldr q0, [x8, lCPI8_0 at PAGEOFF]
; CHECK-NEXT: Lloh13:
; CHECK-NEXT: ldr q1, [x9, lCPI8_1 at PAGEOFF]
-; CHECK-NEXT: mov x8, xzr
+; CHECK-NEXT: mov w8, #1000 ; =0x3e8
; CHECK-NEXT: LBB8_1: ; %loop
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: ldr d2, [x0, x8, lsl #3]
-; CHECK-NEXT: add x9, x1, x8, lsl #5
-; CHECK-NEXT: add x8, x8, #1
-; CHECK-NEXT: cmp x8, #1000
+; CHECK-NEXT: ldr d2, [x0], #8
+; CHECK-NEXT: subs x8, x8, #1
; CHECK-NEXT: tbl.16b v3, { v2 }, v0
; CHECK-NEXT: tbl.16b v2, { v2 }, v1
; CHECK-NEXT: ucvtf.4s v3, v3
; CHECK-NEXT: ucvtf.4s v2, v2
-; CHECK-NEXT: stp q2, q3, [x9]
+; CHECK-NEXT: stp q2, q3, [x1], #32
; CHECK-NEXT: b.eq LBB8_1
; CHECK-NEXT: ; %bb.2: ; %exit
; CHECK-NEXT: ret
@@ -606,13 +580,11 @@ define void @uitofp_v16i8_to_v16f32(ptr %src, ptr %dst) {
; CHECK-NEXT: ldr q2, [x10, lCPI9_2 at PAGEOFF]
; CHECK-NEXT: Lloh21:
; CHECK-NEXT: ldr q3, [x8, lCPI9_3 at PAGEOFF]
-; CHECK-NEXT: mov x8, xzr
+; CHECK-NEXT: mov w8, #1000 ; =0x3e8
; CHECK-NEXT: LBB9_1: ; %loop
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: ldr q4, [x0, x8, lsl #4]
-; CHECK-NEXT: add x9, x1, x8, lsl #6
-; CHECK-NEXT: add x8, x8, #1
-; CHECK-NEXT: cmp x8, #1000
+; CHECK-NEXT: ldr q4, [x0], #16
+; CHECK-NEXT: subs x8, x8, #1
; CHECK-NEXT: tbl.16b v5, { v4 }, v0
; CHECK-NEXT: tbl.16b v6, { v4 }, v1
; CHECK-NEXT: tbl.16b v7, { v4 }, v2
@@ -621,8 +593,8 @@ define void @uitofp_v16i8_to_v16f32(ptr %src, ptr %dst) {
; CHECK-NEXT: ucvtf.4s v6, v6
; CHECK-NEXT: ucvtf.4s v7, v7
; CHECK-NEXT: ucvtf.4s v4, v4
-; CHECK-NEXT: stp q6, q5, [x9, #32]
-; CHECK-NEXT: stp q4, q7, [x9]
+; CHECK-NEXT: stp q6, q5, [x1, #32]
+; CHECK-NEXT: stp q4, q7, [x1], #64
; CHECK-NEXT: b.eq LBB9_1
; CHECK-NEXT: ; %bb.2: ; %exit
; CHECK-NEXT: ret
@@ -668,13 +640,11 @@ define void @uitofp_v8i16_to_v8f64(ptr nocapture noundef readonly %x, ptr nocapt
; CHECK-NEXT: ldr q2, [x10, lCPI10_2 at PAGEOFF]
; CHECK-NEXT: Lloh29:
; CHECK-NEXT: ldr q3, [x8, lCPI10_3 at PAGEOFF]
-; CHECK-NEXT: mov x8, xzr
+; CHECK-NEXT: mov w8, #1024 ; =0x400
; CHECK-NEXT: LBB10_1: ; %vector.body
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: ldr q4, [x0, x8]
-; CHECK-NEXT: add x9, x1, x8
-; CHECK-NEXT: add x8, x8, #64
-; CHECK-NEXT: cmp x8, #2, lsl #12 ; =8192
+; CHECK-NEXT: ldr q4, [x0], #64
+; CHECK-NEXT: subs x8, x8, #8
; CHECK-NEXT: tbl.16b v5, { v4 }, v0
; CHECK-NEXT: tbl.16b v6, { v4 }, v1
; CHECK-NEXT: tbl.16b v7, { v4 }, v2
@@ -683,8 +653,8 @@ define void @uitofp_v8i16_to_v8f64(ptr nocapture noundef readonly %x, ptr nocapt
; CHECK-NEXT: ucvtf.2d v6, v6
; CHECK-NEXT: ucvtf.2d v7, v7
; CHECK-NEXT: ucvtf.2d v4, v4
-; CHECK-NEXT: stp q6, q5, [x9, #32]
-; CHECK-NEXT: stp q4, q7, [x9]
+; CHECK-NEXT: stp q6, q5, [x1, #32]
+; CHECK-NEXT: stp q4, q7, [x1], #64
; CHECK-NEXT: b.ne LBB10_1
; CHECK-NEXT: ; %bb.2: ; %for.cond.cleanup
; CHECK-NEXT: ret
@@ -731,61 +701,58 @@ define void @uitofp_ld4_v32i16_to_v8f64(ptr nocapture noundef readonly %x, ptr n
; CHECK-NEXT: ldr q2, [x10, lCPI11_2 at PAGEOFF]
; CHECK-NEXT: Lloh37:
; CHECK-NEXT: ldr q3, [x8, lCPI11_3 at PAGEOFF]
-; CHECK-NEXT: mov x8, xzr
+; CHECK-NEXT: mov w8, #1024 ; =0x400
; CHECK-NEXT: LBB11_1: ; %vector.body
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: add x9, x0, x8
-; CHECK-NEXT: ldp q5, q4, [x9, #32]
-; CHECK-NEXT: ldp q7, q6, [x9]
-; CHECK-NEXT: add x9, x1, x8
-; CHECK-NEXT: add x8, x8, #64
+; CHECK-NEXT: ldp q4, q5, [x0, #32]
+; CHECK-NEXT: subs x8, x8, #8
+; CHECK-NEXT: ldp q7, q6, [x0], #64
; CHECK-NEXT: tbl.16b v16, { v4 }, v0
-; CHECK-NEXT: tbl.16b v17, { v5 }, v0
+; CHECK-NEXT: tbl.16b v19, { v5 }, v0
+; CHECK-NEXT: tbl.16b v20, { v5 }, v1
+; CHECK-NEXT: tbl.16b v17, { v6 }, v0
+; CHECK-NEXT: tbl.16b v18, { v7 }, v0
; CHECK-NEXT: tbl.16b v21, { v4 }, v1
-; CHECK-NEXT: tbl.16b v18, { v6 }, v0
-; CHECK-NEXT: tbl.16b v19, { v7 }, v0
-; CHECK-NEXT: tbl.16b v20, { v7 }, v1
-; CHECK-NEXT: tbl.16b v22, { v5 }, v1
+; CHECK-NEXT: tbl.16b v22, { v4 }, v2
; CHECK-NEXT: tbl.16b v23, { v5 }, v2
-; CHECK-NEXT: tbl.16b v24, { v4 }, v2
-; CHECK-NEXT: tbl.16b v25, { v7 }, v2
; CHECK-NEXT: tbl.16b v5, { v5 }, v3
; CHECK-NEXT: tbl.16b v4, { v4 }, v3
+; CHECK-NEXT: tbl.16b v24, { v7 }, v1
+; CHECK-NEXT: tbl.16b v25, { v7 }, v2
; CHECK-NEXT: tbl.16b v7, { v7 }, v3
; CHECK-NEXT: tbl.16b v26, { v6 }, v1
; CHECK-NEXT: tbl.16b v27, { v6 }, v2
; CHECK-NEXT: tbl.16b v6, { v6 }, v3
-; CHECK-NEXT: ucvtf.2d v17, v17
; CHECK-NEXT: ucvtf.2d v16, v16
; CHECK-NEXT: ucvtf.2d v19, v19
; CHECK-NEXT: ucvtf.2d v18, v18
+; CHECK-NEXT: ucvtf.2d v17, v17
+; CHECK-NEXT: ucvtf.2d v21, v21
; CHECK-NEXT: ucvtf.2d v22, v22
+; CHECK-NEXT: ucvtf.2d v4, v4
+; CHECK-NEXT: ucvtf.2d v20, v20
; CHECK-NEXT: ucvtf.2d v23, v23
; CHECK-NEXT: ucvtf.2d v5, v5
-; CHECK-NEXT: ucvtf.2d v21, v21
; CHECK-NEXT: ucvtf.2d v24, v24
-; CHECK-NEXT: ucvtf.2d v4, v4
-; CHECK-NEXT: cmp x8, #2, lsl #12 ; =8192
-; CHECK-NEXT: ucvtf.2d v20, v20
; CHECK-NEXT: ucvtf.2d v25, v25
; CHECK-NEXT: ucvtf.2d v7, v7
; CHECK-NEXT: ucvtf.2d v26, v26
; CHECK-NEXT: ucvtf.2d v27, v27
; CHECK-NEXT: ucvtf.2d v6, v6
-; CHECK-NEXT: fadd.2d v17, v22, v17
-; CHECK-NEXT: fadd.2d v5, v23, v5
; CHECK-NEXT: fadd.2d v16, v21, v16
-; CHECK-NEXT: fadd.2d v4, v24, v4
+; CHECK-NEXT: fadd.2d v4, v22, v4
; CHECK-NEXT: fadd.2d v19, v20, v19
+; CHECK-NEXT: fadd.2d v5, v23, v5
+; CHECK-NEXT: fadd.2d v18, v24, v18
; CHECK-NEXT: fadd.2d v7, v25, v7
-; CHECK-NEXT: fadd.2d v18, v26, v18
+; CHECK-NEXT: fadd.2d v17, v26, v17
; CHECK-NEXT: fadd.2d v6, v27, v6
-; CHECK-NEXT: fadd.2d v5, v17, v5
; CHECK-NEXT: fadd.2d v4, v16, v4
-; CHECK-NEXT: fadd.2d v7, v19, v7
-; CHECK-NEXT: fadd.2d v6, v18, v6
-; CHECK-NEXT: stp q5, q4, [x9, #32]
-; CHECK-NEXT: stp q7, q6, [x9]
+; CHECK-NEXT: fadd.2d v5, v19, v5
+; CHECK-NEXT: fadd.2d v7, v18, v7
+; CHECK-NEXT: fadd.2d v6, v17, v6
+; CHECK-NEXT: stp q4, q5, [x1, #32]
+; CHECK-NEXT: stp q7, q6, [x1], #64
; CHECK-NEXT: b.ne LBB11_1
; CHECK-NEXT: ; %bb.2: ; %for.cond.cleanup
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/machine-combiner-copy.ll b/llvm/test/CodeGen/AArch64/machine-combiner-copy.ll
index 4c8e589391c3a..c7eeccc96901e 100644
--- a/llvm/test/CodeGen/AArch64/machine-combiner-copy.ll
+++ b/llvm/test/CodeGen/AArch64/machine-combiner-copy.ll
@@ -15,19 +15,18 @@ define void @fma_dup_f16(ptr noalias nocapture noundef readonly %A, half noundef
; CHECK-NEXT: b .LBB0_6
; CHECK-NEXT: .LBB0_3: // %vector.ph
; CHECK-NEXT: and x9, x8, #0xfffffff0
-; CHECK-NEXT: add x10, x1, #16
-; CHECK-NEXT: add x11, x0, #16
+; CHECK-NEXT: add x10, x0, #16
+; CHECK-NEXT: mov x11, x1
; CHECK-NEXT: mov x12, x9
; CHECK-NEXT: .LBB0_4: // %vector.body
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: ldp q1, q4, [x10, #-16]
+; CHECK-NEXT: ldp q1, q4, [x11]
; CHECK-NEXT: subs x12, x12, #16
-; CHECK-NEXT: ldp q2, q3, [x11, #-16]
-; CHECK-NEXT: add x11, x11, #32
+; CHECK-NEXT: ldp q2, q3, [x10, #-16]
+; CHECK-NEXT: add x10, x10, #32
; CHECK-NEXT: fmla v1.8h, v2.8h, v0.h[0]
; CHECK-NEXT: fmla v4.8h, v3.8h, v0.h[0]
-; CHECK-NEXT: stp q1, q4, [x10, #-16]
-; CHECK-NEXT: add x10, x10, #32
+; CHECK-NEXT: stp q1, q4, [x11], #32
; CHECK-NEXT: b.ne .LBB0_4
; CHECK-NEXT: // %bb.5: // %middle.block
; CHECK-NEXT: cmp x9, x8
diff --git a/llvm/test/CodeGen/AArch64/machine-licm-sub-loop.ll b/llvm/test/CodeGen/AArch64/machine-licm-sub-loop.ll
index f6bbdf5d95d87..6d9c374f01351 100644
--- a/llvm/test/CodeGen/AArch64/machine-licm-sub-loop.ll
+++ b/llvm/test/CodeGen/AArch64/machine-licm-sub-loop.ll
@@ -13,13 +13,13 @@ define void @foo(i32 noundef %limit, ptr %out, ptr %y) {
; CHECK-NEXT: mov x8, xzr
; CHECK-NEXT: mov x9, xzr
; CHECK-NEXT: and x12, x10, #0xfffffff0
-; CHECK-NEXT: add x13, x1, #32
-; CHECK-NEXT: add x14, x2, #16
+; CHECK-NEXT: add x13, x2, #16
+; CHECK-NEXT: mov x14, x1
; CHECK-NEXT: b .LBB0_3
; CHECK-NEXT: .LBB0_2: // %for.cond1.for.cond.cleanup3_crit_edge.us
; CHECK-NEXT: // in Loop: Header=BB0_3 Depth=1
; CHECK-NEXT: add x9, x9, #1
-; CHECK-NEXT: add x13, x13, x11
+; CHECK-NEXT: add x14, x14, x11
; CHECK-NEXT: add x8, x8, x10
; CHECK-NEXT: cmp x9, x10
; CHECK-NEXT: b.eq .LBB0_10
@@ -36,23 +36,24 @@ define void @foo(i32 noundef %limit, ptr %out, ptr %y) {
; CHECK-NEXT: .LBB0_5: // %vector.ph
; CHECK-NEXT: // in Loop: Header=BB0_3 Depth=1
; CHECK-NEXT: dup v0.8h, w15
-; CHECK-NEXT: mov x16, x14
-; CHECK-NEXT: mov x17, x13
+; CHECK-NEXT: mov x16, x13
+; CHECK-NEXT: mov x17, x14
; CHECK-NEXT: mov x18, x12
; CHECK-NEXT: .LBB0_6: // %vector.body
; CHECK-NEXT: // Parent Loop BB0_3 Depth=1
; CHECK-NEXT: // => This Inner Loop Header: Depth=2
; CHECK-NEXT: ldp q1, q4, [x16, #-16]
; CHECK-NEXT: subs x18, x18, #16
-; CHECK-NEXT: ldp q3, q2, [x17, #-32]
+; CHECK-NEXT: ldp q3, q2, [x17]
; CHECK-NEXT: add x16, x16, #32
-; CHECK-NEXT: ldp q6, q5, [x17]
+; CHECK-NEXT: ldp q6, q5, [x17, #32]
; CHECK-NEXT: smlal2 v2.4s, v0.8h, v1.8h
; CHECK-NEXT: smlal v3.4s, v0.4h, v1.4h
; CHECK-NEXT: smlal2 v5.4s, v0.8h, v4.8h
; CHECK-NEXT: smlal v6.4s, v0.4h, v4.4h
-; CHECK-NEXT: stp q3, q2, [x17, #-32]
-; CHECK-NEXT: stp q6, q5, [x17], #64
+; CHECK-NEXT: stp q3, q2, [x17]
+; CHECK-NEXT: stp q6, q5, [x17, #32]
+; CHECK-NEXT: add x17, x17, #64
; CHECK-NEXT: b.ne .LBB0_6
; CHECK-NEXT: // %bb.7: // %middle.block
; CHECK-NEXT: // in Loop: Header=BB0_3 Depth=1
diff --git a/llvm/test/CodeGen/AArch64/neon-partial-reduce-dot-product.ll b/llvm/test/CodeGen/AArch64/neon-partial-reduce-dot-product.ll
index 2d81a264e02bc..ac324fd289972 100644
--- a/llvm/test/CodeGen/AArch64/neon-partial-reduce-dot-product.ll
+++ b/llvm/test/CodeGen/AArch64/neon-partial-reduce-dot-product.ll
@@ -34,16 +34,15 @@ define <4 x i32> @udot_in_loop(ptr %p1, ptr %p2){
; CHECK-NODOT-LABEL: udot_in_loop:
; CHECK-NODOT: // %bb.0: // %entry
; CHECK-NODOT-NEXT: movi v1.2d, #0000000000000000
-; CHECK-NODOT-NEXT: mov x8, xzr
+; CHECK-NODOT-NEXT: mov w8, #16 // =0x10
; CHECK-NODOT-NEXT: .LBB1_1: // %vector.body
; CHECK-NODOT-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-NODOT-NEXT: ldr q2, [x0, x8]
-; CHECK-NODOT-NEXT: ldr q3, [x1, x8]
+; CHECK-NODOT-NEXT: ldr q2, [x0], #16
; CHECK-NODOT-NEXT: mov v0.16b, v1.16b
-; CHECK-NODOT-NEXT: add x8, x8, #16
+; CHECK-NODOT-NEXT: ldr q3, [x1], #16
+; CHECK-NODOT-NEXT: subs x8, x8, #16
; CHECK-NODOT-NEXT: umull v4.8h, v2.8b, v3.8b
; CHECK-NODOT-NEXT: umull2 v2.8h, v2.16b, v3.16b
-; CHECK-NODOT-NEXT: cmp x8, #16
; CHECK-NODOT-NEXT: uaddw v1.4s, v1.4s, v4.4h
; CHECK-NODOT-NEXT: uaddw2 v1.4s, v1.4s, v4.8h
; CHECK-NODOT-NEXT: uaddw v1.4s, v1.4s, v2.4h
@@ -55,15 +54,14 @@ define <4 x i32> @udot_in_loop(ptr %p1, ptr %p2){
; CHECK-DOT-LABEL: udot_in_loop:
; CHECK-DOT: // %bb.0: // %entry
; CHECK-DOT-NEXT: movi v1.2d, #0000000000000000
-; CHECK-DOT-NEXT: mov x8, xzr
+; CHECK-DOT-NEXT: mov w8, #16 // =0x10
; CHECK-DOT-NEXT: .LBB1_1: // %vector.body
; CHECK-DOT-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-DOT-NEXT: ldr q2, [x0, x8]
-; CHECK-DOT-NEXT: ldr q3, [x1, x8]
+; CHECK-DOT-NEXT: ldr q2, [x0], #16
; CHECK-DOT-NEXT: mov v0.16b, v1.16b
-; CHECK-DOT-NEXT: add x8, x8, #16
+; CHECK-DOT-NEXT: ldr q3, [x1], #16
+; CHECK-DOT-NEXT: subs x8, x8, #16
; CHECK-DOT-NEXT: udot v1.4s, v2.16b, v3.16b
-; CHECK-DOT-NEXT: cmp x8, #16
; CHECK-DOT-NEXT: b.ne .LBB1_1
; CHECK-DOT-NEXT: // %bb.2: // %end
; CHECK-DOT-NEXT: ret
@@ -71,15 +69,14 @@ define <4 x i32> @udot_in_loop(ptr %p1, ptr %p2){
; CHECK-DOT-I8MM-LABEL: udot_in_loop:
; CHECK-DOT-I8MM: // %bb.0: // %entry
; CHECK-DOT-I8MM-NEXT: movi v1.2d, #0000000000000000
-; CHECK-DOT-I8MM-NEXT: mov x8, xzr
+; CHECK-DOT-I8MM-NEXT: mov w8, #16 // =0x10
; CHECK-DOT-I8MM-NEXT: .LBB1_1: // %vector.body
; CHECK-DOT-I8MM-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-DOT-I8MM-NEXT: ldr q2, [x0, x8]
-; CHECK-DOT-I8MM-NEXT: ldr q3, [x1, x8]
+; CHECK-DOT-I8MM-NEXT: ldr q2, [x0], #16
; CHECK-DOT-I8MM-NEXT: mov v0.16b, v1.16b
-; CHECK-DOT-I8MM-NEXT: add x8, x8, #16
+; CHECK-DOT-I8MM-NEXT: ldr q3, [x1], #16
+; CHECK-DOT-I8MM-NEXT: subs x8, x8, #16
; CHECK-DOT-I8MM-NEXT: udot v1.4s, v2.16b, v3.16b
-; CHECK-DOT-I8MM-NEXT: cmp x8, #16
; CHECK-DOT-I8MM-NEXT: b.ne .LBB1_1
; CHECK-DOT-I8MM-NEXT: // %bb.2: // %end
; CHECK-DOT-I8MM-NEXT: ret
@@ -236,18 +233,17 @@ define <4 x i32> @usdot_in_loop(ptr %p1, ptr %p2){
; CHECK-NODOT-LABEL: usdot_in_loop:
; CHECK-NODOT: // %bb.0: // %entry
; CHECK-NODOT-NEXT: movi v1.2d, #0000000000000000
-; CHECK-NODOT-NEXT: mov x8, xzr
+; CHECK-NODOT-NEXT: mov w8, #16 // =0x10
; CHECK-NODOT-NEXT: .LBB6_1: // %vector.body
; CHECK-NODOT-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-NODOT-NEXT: ldr q2, [x0, x8]
-; CHECK-NODOT-NEXT: ldr q3, [x1, x8]
+; CHECK-NODOT-NEXT: ldr q2, [x0], #16
; CHECK-NODOT-NEXT: mov v0.16b, v1.16b
-; CHECK-NODOT-NEXT: add x8, x8, #16
+; CHECK-NODOT-NEXT: ldr q3, [x1], #16
+; CHECK-NODOT-NEXT: subs x8, x8, #16
; CHECK-NODOT-NEXT: sshll v4.8h, v2.8b, #0
-; CHECK-NODOT-NEXT: ushll v5.8h, v3.8b, #0
; CHECK-NODOT-NEXT: sshll2 v2.8h, v2.16b, #0
+; CHECK-NODOT-NEXT: ushll v5.8h, v3.8b, #0
; CHECK-NODOT-NEXT: ushll2 v3.8h, v3.16b, #0
-; CHECK-NODOT-NEXT: cmp x8, #16
; CHECK-NODOT-NEXT: smlal v1.4s, v4.4h, v5.4h
; CHECK-NODOT-NEXT: smlal2 v1.4s, v4.8h, v5.8h
; CHECK-NODOT-NEXT: smlal v1.4s, v2.4h, v3.4h
@@ -259,18 +255,17 @@ define <4 x i32> @usdot_in_loop(ptr %p1, ptr %p2){
; CHECK-DOT-LABEL: usdot_in_loop:
; CHECK-DOT: // %bb.0: // %entry
; CHECK-DOT-NEXT: movi v1.2d, #0000000000000000
-; CHECK-DOT-NEXT: mov x8, xzr
+; CHECK-DOT-NEXT: mov w8, #16 // =0x10
; CHECK-DOT-NEXT: .LBB6_1: // %vector.body
; CHECK-DOT-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-DOT-NEXT: ldr q2, [x0, x8]
-; CHECK-DOT-NEXT: ldr q3, [x1, x8]
+; CHECK-DOT-NEXT: ldr q2, [x0], #16
; CHECK-DOT-NEXT: mov v0.16b, v1.16b
-; CHECK-DOT-NEXT: add x8, x8, #16
+; CHECK-DOT-NEXT: ldr q3, [x1], #16
+; CHECK-DOT-NEXT: subs x8, x8, #16
; CHECK-DOT-NEXT: sshll v4.8h, v2.8b, #0
-; CHECK-DOT-NEXT: ushll v5.8h, v3.8b, #0
; CHECK-DOT-NEXT: sshll2 v2.8h, v2.16b, #0
+; CHECK-DOT-NEXT: ushll v5.8h, v3.8b, #0
; CHECK-DOT-NEXT: ushll2 v3.8h, v3.16b, #0
-; CHECK-DOT-NEXT: cmp x8, #16
; CHECK-DOT-NEXT: smlal v1.4s, v4.4h, v5.4h
; CHECK-DOT-NEXT: smlal2 v1.4s, v4.8h, v5.8h
; CHECK-DOT-NEXT: smlal v1.4s, v2.4h, v3.4h
@@ -282,15 +277,14 @@ define <4 x i32> @usdot_in_loop(ptr %p1, ptr %p2){
; CHECK-DOT-I8MM-LABEL: usdot_in_loop:
; CHECK-DOT-I8MM: // %bb.0: // %entry
; CHECK-DOT-I8MM-NEXT: movi v1.2d, #0000000000000000
-; CHECK-DOT-I8MM-NEXT: mov x8, xzr
+; CHECK-DOT-I8MM-NEXT: mov w8, #16 // =0x10
; CHECK-DOT-I8MM-NEXT: .LBB6_1: // %vector.body
; CHECK-DOT-I8MM-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-DOT-I8MM-NEXT: ldr q2, [x0, x8]
-; CHECK-DOT-I8MM-NEXT: ldr q3, [x1, x8]
+; CHECK-DOT-I8MM-NEXT: ldr q2, [x0], #16
; CHECK-DOT-I8MM-NEXT: mov v0.16b, v1.16b
-; CHECK-DOT-I8MM-NEXT: add x8, x8, #16
+; CHECK-DOT-I8MM-NEXT: ldr q3, [x1], #16
+; CHECK-DOT-I8MM-NEXT: subs x8, x8, #16
; CHECK-DOT-I8MM-NEXT: usdot v1.4s, v3.16b, v2.16b
-; CHECK-DOT-I8MM-NEXT: cmp x8, #16
; CHECK-DOT-I8MM-NEXT: b.ne .LBB6_1
; CHECK-DOT-I8MM-NEXT: // %bb.2: // %end
; CHECK-DOT-I8MM-NEXT: ret
@@ -402,18 +396,17 @@ define <4 x i32> @sudot_in_loop(ptr %p1, ptr %p2){
; CHECK-NODOT-LABEL: sudot_in_loop:
; CHECK-NODOT: // %bb.0: // %entry
; CHECK-NODOT-NEXT: movi v1.2d, #0000000000000000
-; CHECK-NODOT-NEXT: mov x8, xzr
+; CHECK-NODOT-NEXT: mov w8, #16 // =0x10
; CHECK-NODOT-NEXT: .LBB9_1: // %vector.body
; CHECK-NODOT-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-NODOT-NEXT: ldr q2, [x0, x8]
-; CHECK-NODOT-NEXT: ldr q3, [x1, x8]
+; CHECK-NODOT-NEXT: ldr q2, [x0], #16
; CHECK-NODOT-NEXT: mov v0.16b, v1.16b
-; CHECK-NODOT-NEXT: add x8, x8, #16
+; CHECK-NODOT-NEXT: ldr q3, [x1], #16
+; CHECK-NODOT-NEXT: subs x8, x8, #16
; CHECK-NODOT-NEXT: ushll v4.8h, v2.8b, #0
-; CHECK-NODOT-NEXT: sshll v5.8h, v3.8b, #0
; CHECK-NODOT-NEXT: ushll2 v2.8h, v2.16b, #0
+; CHECK-NODOT-NEXT: sshll v5.8h, v3.8b, #0
; CHECK-NODOT-NEXT: sshll2 v3.8h, v3.16b, #0
-; CHECK-NODOT-NEXT: cmp x8, #16
; CHECK-NODOT-NEXT: smlal v1.4s, v4.4h, v5.4h
; CHECK-NODOT-NEXT: smlal2 v1.4s, v4.8h, v5.8h
; CHECK-NODOT-NEXT: smlal v1.4s, v2.4h, v3.4h
@@ -425,18 +418,17 @@ define <4 x i32> @sudot_in_loop(ptr %p1, ptr %p2){
; CHECK-DOT-LABEL: sudot_in_loop:
; CHECK-DOT: // %bb.0: // %entry
; CHECK-DOT-NEXT: movi v1.2d, #0000000000000000
-; CHECK-DOT-NEXT: mov x8, xzr
+; CHECK-DOT-NEXT: mov w8, #16 // =0x10
; CHECK-DOT-NEXT: .LBB9_1: // %vector.body
; CHECK-DOT-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-DOT-NEXT: ldr q2, [x0, x8]
-; CHECK-DOT-NEXT: ldr q3, [x1, x8]
+; CHECK-DOT-NEXT: ldr q2, [x0], #16
; CHECK-DOT-NEXT: mov v0.16b, v1.16b
-; CHECK-DOT-NEXT: add x8, x8, #16
+; CHECK-DOT-NEXT: ldr q3, [x1], #16
+; CHECK-DOT-NEXT: subs x8, x8, #16
; CHECK-DOT-NEXT: ushll v4.8h, v2.8b, #0
-; CHECK-DOT-NEXT: sshll v5.8h, v3.8b, #0
; CHECK-DOT-NEXT: ushll2 v2.8h, v2.16b, #0
+; CHECK-DOT-NEXT: sshll v5.8h, v3.8b, #0
; CHECK-DOT-NEXT: sshll2 v3.8h, v3.16b, #0
-; CHECK-DOT-NEXT: cmp x8, #16
; CHECK-DOT-NEXT: smlal v1.4s, v4.4h, v5.4h
; CHECK-DOT-NEXT: smlal2 v1.4s, v4.8h, v5.8h
; CHECK-DOT-NEXT: smlal v1.4s, v2.4h, v3.4h
@@ -448,15 +440,14 @@ define <4 x i32> @sudot_in_loop(ptr %p1, ptr %p2){
; CHECK-DOT-I8MM-LABEL: sudot_in_loop:
; CHECK-DOT-I8MM: // %bb.0: // %entry
; CHECK-DOT-I8MM-NEXT: movi v1.2d, #0000000000000000
-; CHECK-DOT-I8MM-NEXT: mov x8, xzr
+; CHECK-DOT-I8MM-NEXT: mov w8, #16 // =0x10
; CHECK-DOT-I8MM-NEXT: .LBB9_1: // %vector.body
; CHECK-DOT-I8MM-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-DOT-I8MM-NEXT: ldr q2, [x0, x8]
-; CHECK-DOT-I8MM-NEXT: ldr q3, [x1, x8]
+; CHECK-DOT-I8MM-NEXT: ldr q2, [x0], #16
; CHECK-DOT-I8MM-NEXT: mov v0.16b, v1.16b
-; CHECK-DOT-I8MM-NEXT: add x8, x8, #16
+; CHECK-DOT-I8MM-NEXT: ldr q3, [x1], #16
+; CHECK-DOT-I8MM-NEXT: subs x8, x8, #16
; CHECK-DOT-I8MM-NEXT: usdot v1.4s, v2.16b, v3.16b
-; CHECK-DOT-I8MM-NEXT: cmp x8, #16
; CHECK-DOT-I8MM-NEXT: b.ne .LBB9_1
; CHECK-DOT-I8MM-NEXT: // %bb.2: // %end
; CHECK-DOT-I8MM-NEXT: ret
@@ -775,13 +766,12 @@ define <4 x i32> @udot_no_bin_op_in_loop(ptr %p){
; CHECK-NODOT-LABEL: udot_no_bin_op_in_loop:
; CHECK-NODOT: // %bb.0: // %entry
; CHECK-NODOT-NEXT: movi v1.2d, #0000000000000000
-; CHECK-NODOT-NEXT: mov x8, xzr
+; CHECK-NODOT-NEXT: mov w8, #16 // =0x10
; CHECK-NODOT-NEXT: .LBB16_1: // %vector.body
; CHECK-NODOT-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-NODOT-NEXT: ldr q2, [x0, x8]
+; CHECK-NODOT-NEXT: ldr q2, [x0], #16
; CHECK-NODOT-NEXT: mov v0.16b, v1.16b
-; CHECK-NODOT-NEXT: add x8, x8, #16
-; CHECK-NODOT-NEXT: cmp x8, #16
+; CHECK-NODOT-NEXT: subs x8, x8, #16
; CHECK-NODOT-NEXT: ushll v3.8h, v2.8b, #0
; CHECK-NODOT-NEXT: ushll2 v2.8h, v2.16b, #0
; CHECK-NODOT-NEXT: uaddw v1.4s, v1.4s, v3.4h
@@ -796,13 +786,12 @@ define <4 x i32> @udot_no_bin_op_in_loop(ptr %p){
; CHECK-DOT: // %bb.0: // %entry
; CHECK-DOT-NEXT: movi v1.2d, #0000000000000000
; CHECK-DOT-NEXT: movi v2.16b, #1
-; CHECK-DOT-NEXT: mov x8, xzr
+; CHECK-DOT-NEXT: mov w8, #16 // =0x10
; CHECK-DOT-NEXT: .LBB16_1: // %vector.body
; CHECK-DOT-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-DOT-NEXT: ldr q3, [x0, x8]
+; CHECK-DOT-NEXT: ldr q3, [x0], #16
; CHECK-DOT-NEXT: mov v0.16b, v1.16b
-; CHECK-DOT-NEXT: add x8, x8, #16
-; CHECK-DOT-NEXT: cmp x8, #16
+; CHECK-DOT-NEXT: subs x8, x8, #16
; CHECK-DOT-NEXT: udot v1.4s, v3.16b, v2.16b
; CHECK-DOT-NEXT: b.ne .LBB16_1
; CHECK-DOT-NEXT: // %bb.2: // %end
@@ -812,13 +801,12 @@ define <4 x i32> @udot_no_bin_op_in_loop(ptr %p){
; CHECK-DOT-I8MM: // %bb.0: // %entry
; CHECK-DOT-I8MM-NEXT: movi v1.2d, #0000000000000000
; CHECK-DOT-I8MM-NEXT: movi v2.16b, #1
-; CHECK-DOT-I8MM-NEXT: mov x8, xzr
+; CHECK-DOT-I8MM-NEXT: mov w8, #16 // =0x10
; CHECK-DOT-I8MM-NEXT: .LBB16_1: // %vector.body
; CHECK-DOT-I8MM-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-DOT-I8MM-NEXT: ldr q3, [x0, x8]
+; CHECK-DOT-I8MM-NEXT: ldr q3, [x0], #16
; CHECK-DOT-I8MM-NEXT: mov v0.16b, v1.16b
-; CHECK-DOT-I8MM-NEXT: add x8, x8, #16
-; CHECK-DOT-I8MM-NEXT: cmp x8, #16
+; CHECK-DOT-I8MM-NEXT: subs x8, x8, #16
; CHECK-DOT-I8MM-NEXT: udot v1.4s, v3.16b, v2.16b
; CHECK-DOT-I8MM-NEXT: b.ne .LBB16_1
; CHECK-DOT-I8MM-NEXT: // %bb.2: // %end
@@ -1135,20 +1123,19 @@ define <4 x i32> @usdot_multiple_zext_users(ptr %p1, ptr %p2, ptr %p3) {
; CHECK-NODOT: // %bb.0: // %entry
; CHECK-NODOT-NEXT: movi v0.2d, #0000000000000000
; CHECK-NODOT-NEXT: movi v1.2d, #0000000000000000
-; CHECK-NODOT-NEXT: mov x8, xzr
+; CHECK-NODOT-NEXT: mov w8, #1024 // =0x400
; CHECK-NODOT-NEXT: .LBB28_1: // %vector.body
; CHECK-NODOT-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-NODOT-NEXT: ldr q2, [x0, x8]
-; CHECK-NODOT-NEXT: ldr q3, [x1, x8]
-; CHECK-NODOT-NEXT: ldr q4, [x2, x8]
-; CHECK-NODOT-NEXT: add x8, x8, #16
+; CHECK-NODOT-NEXT: ldr q2, [x0], #16
+; CHECK-NODOT-NEXT: subs x8, x8, #16
+; CHECK-NODOT-NEXT: ldr q3, [x1], #16
+; CHECK-NODOT-NEXT: ldr q4, [x2], #16
; CHECK-NODOT-NEXT: sshll v5.8h, v2.8b, #0
-; CHECK-NODOT-NEXT: ushll v6.8h, v4.8b, #0
; CHECK-NODOT-NEXT: sshll v7.8h, v3.8b, #0
; CHECK-NODOT-NEXT: sshll2 v2.8h, v2.16b, #0
-; CHECK-NODOT-NEXT: ushll2 v4.8h, v4.16b, #0
; CHECK-NODOT-NEXT: sshll2 v3.8h, v3.16b, #0
-; CHECK-NODOT-NEXT: cmp x8, #1024
+; CHECK-NODOT-NEXT: ushll v6.8h, v4.8b, #0
+; CHECK-NODOT-NEXT: ushll2 v4.8h, v4.16b, #0
; CHECK-NODOT-NEXT: smlal v0.4s, v5.4h, v6.4h
; CHECK-NODOT-NEXT: smlal v1.4s, v7.4h, v6.4h
; CHECK-NODOT-NEXT: smlal2 v0.4s, v5.8h, v6.8h
@@ -1166,20 +1153,19 @@ define <4 x i32> @usdot_multiple_zext_users(ptr %p1, ptr %p2, ptr %p3) {
; CHECK-DOT: // %bb.0: // %entry
; CHECK-DOT-NEXT: movi v0.2d, #0000000000000000
; CHECK-DOT-NEXT: movi v1.2d, #0000000000000000
-; CHECK-DOT-NEXT: mov x8, xzr
+; CHECK-DOT-NEXT: mov w8, #1024 // =0x400
; CHECK-DOT-NEXT: .LBB28_1: // %vector.body
; CHECK-DOT-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-DOT-NEXT: ldr q2, [x0, x8]
-; CHECK-DOT-NEXT: ldr q3, [x1, x8]
-; CHECK-DOT-NEXT: ldr q4, [x2, x8]
-; CHECK-DOT-NEXT: add x8, x8, #16
+; CHECK-DOT-NEXT: ldr q2, [x0], #16
+; CHECK-DOT-NEXT: subs x8, x8, #16
+; CHECK-DOT-NEXT: ldr q3, [x1], #16
+; CHECK-DOT-NEXT: ldr q4, [x2], #16
; CHECK-DOT-NEXT: sshll v5.8h, v2.8b, #0
-; CHECK-DOT-NEXT: ushll v6.8h, v4.8b, #0
; CHECK-DOT-NEXT: sshll v7.8h, v3.8b, #0
; CHECK-DOT-NEXT: sshll2 v2.8h, v2.16b, #0
-; CHECK-DOT-NEXT: ushll2 v4.8h, v4.16b, #0
; CHECK-DOT-NEXT: sshll2 v3.8h, v3.16b, #0
-; CHECK-DOT-NEXT: cmp x8, #1024
+; CHECK-DOT-NEXT: ushll v6.8h, v4.8b, #0
+; CHECK-DOT-NEXT: ushll2 v4.8h, v4.16b, #0
; CHECK-DOT-NEXT: smlal v0.4s, v5.4h, v6.4h
; CHECK-DOT-NEXT: smlal v1.4s, v7.4h, v6.4h
; CHECK-DOT-NEXT: smlal2 v0.4s, v5.8h, v6.8h
@@ -1197,16 +1183,15 @@ define <4 x i32> @usdot_multiple_zext_users(ptr %p1, ptr %p2, ptr %p3) {
; CHECK-DOT-I8MM: // %bb.0: // %entry
; CHECK-DOT-I8MM-NEXT: movi v0.2d, #0000000000000000
; CHECK-DOT-I8MM-NEXT: movi v1.2d, #0000000000000000
-; CHECK-DOT-I8MM-NEXT: mov x8, xzr
+; CHECK-DOT-I8MM-NEXT: mov w8, #1024 // =0x400
; CHECK-DOT-I8MM-NEXT: .LBB28_1: // %vector.body
; CHECK-DOT-I8MM-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-DOT-I8MM-NEXT: ldr q2, [x0, x8]
-; CHECK-DOT-I8MM-NEXT: ldr q3, [x1, x8]
-; CHECK-DOT-I8MM-NEXT: ldr q4, [x2, x8]
-; CHECK-DOT-I8MM-NEXT: add x8, x8, #16
+; CHECK-DOT-I8MM-NEXT: ldr q2, [x0], #16
+; CHECK-DOT-I8MM-NEXT: subs x8, x8, #16
+; CHECK-DOT-I8MM-NEXT: ldr q3, [x1], #16
+; CHECK-DOT-I8MM-NEXT: ldr q4, [x2], #16
; CHECK-DOT-I8MM-NEXT: usdot v0.4s, v4.16b, v2.16b
; CHECK-DOT-I8MM-NEXT: usdot v1.4s, v4.16b, v3.16b
-; CHECK-DOT-I8MM-NEXT: cmp x8, #1024
; CHECK-DOT-I8MM-NEXT: b.ne .LBB28_1
; CHECK-DOT-I8MM-NEXT: // %bb.2: // %end
; CHECK-DOT-I8MM-NEXT: add v0.4s, v1.4s, v0.4s
diff --git a/llvm/test/CodeGen/AArch64/ragreedy-local-interval-cost.ll b/llvm/test/CodeGen/AArch64/ragreedy-local-interval-cost.ll
index c91de8f3a0a47..7993155e79cfc 100644
--- a/llvm/test/CodeGen/AArch64/ragreedy-local-interval-cost.ll
+++ b/llvm/test/CodeGen/AArch64/ragreedy-local-interval-cost.ll
@@ -6,6 +6,7 @@
@C = external dso_local local_unnamed_addr global [8 x [8 x i64]], align 8
define dso_local void @run_test() local_unnamed_addr uwtable {
+; CH`ECK-NEXT: .cfi_offset b9, -16
; CHECK-LABEL: run_test:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: sub sp, sp, #208
@@ -30,194 +31,196 @@ define dso_local void @run_test() local_unnamed_addr uwtable {
; CHECK-NEXT: .cfi_offset b13, -96
; CHECK-NEXT: .cfi_offset b14, -104
; CHECK-NEXT: .cfi_offset b15, -112
-; CHECK-NEXT: movi v2.2d, #0000000000000000
-; CHECK-NEXT: // implicit-def: $q1
+; CHECK-NEXT: movi v1.2d, #0000000000000000
+; CHECK-NEXT: // implicit-def: $q6
; CHECK-NEXT: mov x8, xzr
-; CHECK-NEXT: adrp x9, B+48
-; CHECK-NEXT: add x9, x9, :lo12:B+48
-; CHECK-NEXT: adrp x10, A
-; CHECK-NEXT: add x10, x10, :lo12:A
-; CHECK-NEXT: mov x11, xzr
-; CHECK-NEXT: // kill: killed $q1
-; CHECK-NEXT: // implicit-def: $q1
+; CHECK-NEXT: adrp x9, A+128
+; CHECK-NEXT: add x9, x9, :lo12:A+128
+; CHECK-NEXT: adrp x10, B+48
+; CHECK-NEXT: add x10, x10, :lo12:B+48
+; CHECK-NEXT: mov w11, #8 // =0x8
+; CHECK-NEXT: // kill: killed $q6
+; CHECK-NEXT: // implicit-def: $q6
; CHECK-NEXT: mov x12, xzr
; CHECK-NEXT: // implicit-def: $q0
+; CHECK-NEXT: // implicit-def: $q2
; CHECK-NEXT: // implicit-def: $q3
; CHECK-NEXT: // implicit-def: $q4
; CHECK-NEXT: // implicit-def: $q5
; CHECK-NEXT: // implicit-def: $q7
-; CHECK-NEXT: // implicit-def: $q10
+; CHECK-NEXT: // implicit-def: $q16
; CHECK-NEXT: // implicit-def: $q17
-; CHECK-NEXT: // implicit-def: $q6
-; CHECK-NEXT: // implicit-def: $q18
+; CHECK-NEXT: // implicit-def: $q10
; CHECK-NEXT: // implicit-def: $q19
; CHECK-NEXT: // implicit-def: $q20
; CHECK-NEXT: // implicit-def: $q21
; CHECK-NEXT: // implicit-def: $q22
; CHECK-NEXT: // implicit-def: $q23
; CHECK-NEXT: // implicit-def: $q24
-; CHECK-NEXT: // implicit-def: $q9
+; CHECK-NEXT: // implicit-def: $q25
; CHECK-NEXT: // implicit-def: $q27
-; CHECK-NEXT: // implicit-def: $q12
+; CHECK-NEXT: // implicit-def: $q26
; CHECK-NEXT: // implicit-def: $q28
-; CHECK-NEXT: // implicit-def: $q14
-; CHECK-NEXT: // implicit-def: $q15
-; CHECK-NEXT: // implicit-def: $q29
; CHECK-NEXT: // implicit-def: $q30
-; CHECK-NEXT: // implicit-def: $q11
+; CHECK-NEXT: // implicit-def: $q18
+; CHECK-NEXT: // implicit-def: $q29
; CHECK-NEXT: // implicit-def: $q31
+; CHECK-NEXT: // implicit-def: $q12
; CHECK-NEXT: // implicit-def: $q13
-; CHECK-NEXT: // kill: killed $q1
-; CHECK-NEXT: // implicit-def: $q1
-; CHECK-NEXT: // kill: killed $q1
+; CHECK-NEXT: // implicit-def: $q11
+; CHECK-NEXT: // kill: killed $q6
+; CHECK-NEXT: // implicit-def: $q6
+; CHECK-NEXT: // kill: killed $q6
; CHECK-NEXT: .LBB0_1: // %for.cond1.preheader
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: stp q29, q15, [sp] // 32-byte Folded Spill
; CHECK-NEXT: ldr q15, [x8]
-; CHECK-NEXT: ldr x15, [x8]
-; CHECK-NEXT: str q14, [sp, #32] // 16-byte Folded Spill
-; CHECK-NEXT: add x20, x10, x11
-; CHECK-NEXT: mov v8.16b, v28.16b
+; CHECK-NEXT: ldr x16, [x8]
+; CHECK-NEXT: mov v6.16b, v0.16b
+; CHECK-NEXT: ldr q14, [x8]
+; CHECK-NEXT: str q18, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT: mov v9.16b, v27.16b
; CHECK-NEXT: fmov x2, d15
; CHECK-NEXT: mov x17, v15.d[1]
-; CHECK-NEXT: ldr q14, [x8]
-; CHECK-NEXT: mov v28.16b, v24.16b
-; CHECK-NEXT: mov v24.16b, v20.16b
-; CHECK-NEXT: mov v20.16b, v17.16b
-; CHECK-NEXT: fmov x13, d14
-; CHECK-NEXT: mov x16, v14.d[1]
-; CHECK-NEXT: mov v17.16b, v5.16b
-; CHECK-NEXT: mul x3, x2, x15
-; CHECK-NEXT: ldr q14, [x9], #64
-; CHECK-NEXT: ldr q5, [sp, #64] // 16-byte Folded Reload
; CHECK-NEXT: ldr x6, [x8]
-; CHECK-NEXT: ldr x20, [x20, #128]
-; CHECK-NEXT: mul x1, x17, x15
-; CHECK-NEXT: mov x14, v14.d[1]
+; CHECK-NEXT: fmov x14, d14
+; CHECK-NEXT: mov x13, v14.d[1]
+; CHECK-NEXT: ldr q18, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT: ldr q14, [x10], #64
+; CHECK-NEXT: mov v27.16b, v23.16b
+; CHECK-NEXT: mul x3, x2, x16
+; CHECK-NEXT: ldr x20, [x9], #8
+; CHECK-NEXT: mov x15, v14.d[1]
; CHECK-NEXT: fmov x5, d14
-; CHECK-NEXT: mov v29.16b, v21.16b
-; CHECK-NEXT: mov v21.16b, v0.16b
-; CHECK-NEXT: mov v25.16b, v6.16b
-; CHECK-NEXT: mul x18, x13, x15
-; CHECK-NEXT: mov v6.16b, v2.16b
-; CHECK-NEXT: mov v26.16b, v22.16b
+; CHECK-NEXT: mov v23.16b, v19.16b
+; CHECK-NEXT: mul x1, x17, x16
+; CHECK-NEXT: mov v19.16b, v7.16b
+; CHECK-NEXT: mov v7.16b, v2.16b
+; CHECK-NEXT: stp q26, q31, [sp] // 32-byte Folded Spill
+; CHECK-NEXT: mov v31.16b, v22.16b
+; CHECK-NEXT: mov v26.16b, v10.16b
+; CHECK-NEXT: mul x18, x14, x16
+; CHECK-NEXT: mov v22.16b, v5.16b
+; CHECK-NEXT: mov v5.16b, v1.16b
; CHECK-NEXT: fmov d15, x3
-; CHECK-NEXT: mov v22.16b, v18.16b
-; CHECK-NEXT: mov v18.16b, v7.16b
-; CHECK-NEXT: mul x0, x16, x15
-; CHECK-NEXT: mov v7.16b, v3.16b
-; CHECK-NEXT: mov v16.16b, v4.16b
-; CHECK-NEXT: add x11, x11, #8
-; CHECK-NEXT: add x12, x12, #1
+; CHECK-NEXT: mov v8.16b, v20.16b
+; CHECK-NEXT: mov v20.16b, v16.16b
+; CHECK-NEXT: mul x0, x13, x16
+; CHECK-NEXT: mov v16.16b, v3.16b
+; CHECK-NEXT: mov v10.16b, v21.16b
+; CHECK-NEXT: mov v21.16b, v17.16b
+; CHECK-NEXT: mov v17.16b, v4.16b
+; CHECK-NEXT: subs x11, x11, #1
; CHECK-NEXT: mov v15.d[1], x1
-; CHECK-NEXT: mul x4, x14, x15
-; CHECK-NEXT: cmp x11, #64
+; CHECK-NEXT: mul x4, x15, x16
+; CHECK-NEXT: add x12, x12, #1
; CHECK-NEXT: fmov d14, x18
-; CHECK-NEXT: mul x15, x5, x15
-; CHECK-NEXT: add v5.2d, v5.2d, v15.2d
-; CHECK-NEXT: mul x21, x2, x6
+; CHECK-NEXT: mul x16, x5, x16
+; CHECK-NEXT: mul x19, x2, x6
+; CHECK-NEXT: add v18.2d, v18.2d, v15.2d
; CHECK-NEXT: mov v14.d[1], x0
; CHECK-NEXT: mul x2, x2, x20
-; CHECK-NEXT: fmov d0, x15
-; CHECK-NEXT: str q5, [sp, #64] // 16-byte Folded Spill
-; CHECK-NEXT: ldr q5, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT: mul x22, x13, x20
-; CHECK-NEXT: add v5.2d, v5.2d, v14.2d
-; CHECK-NEXT: fmov d3, x21
-; CHECK-NEXT: mul x19, x17, x6
-; CHECK-NEXT: mov v0.d[1], x4
-; CHECK-NEXT: fmov d1, x2
+; CHECK-NEXT: str q18, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT: ldp q18, q15, [sp, #32] // 32-byte Folded Reload
+; CHECK-NEXT: mul x7, x17, x6
+; CHECK-NEXT: fmov d0, x16
+; CHECK-NEXT: fmov d1, x19
; CHECK-NEXT: mul x17, x17, x20
-; CHECK-NEXT: str q5, [sp, #48] // 16-byte Folded Spill
-; CHECK-NEXT: add v5.2d, v13.2d, v14.2d
-; CHECK-NEXT: fmov d2, x22
-; CHECK-NEXT: ldr q13, [sp, #80] // 16-byte Folded Reload
-; CHECK-NEXT: mul x7, x16, x6
-; CHECK-NEXT: ldp q15, q14, [sp, #16] // 32-byte Folded Reload
-; CHECK-NEXT: mov v3.d[1], x19
-; CHECK-NEXT: add v13.2d, v13.2d, v0.2d
-; CHECK-NEXT: mul x16, x16, x20
-; CHECK-NEXT: mov v1.d[1], x17
-; CHECK-NEXT: mul x23, x5, x20
-; CHECK-NEXT: str q13, [sp, #80] // 16-byte Folded Spill
-; CHECK-NEXT: mov v13.16b, v5.16b
-; CHECK-NEXT: mov v5.16b, v17.16b
-; CHECK-NEXT: mov v17.16b, v20.16b
-; CHECK-NEXT: mov v20.16b, v24.16b
-; CHECK-NEXT: mul x13, x13, x6
-; CHECK-NEXT: mov v24.16b, v28.16b
-; CHECK-NEXT: add v11.2d, v11.2d, v3.2d
-; CHECK-NEXT: mov v2.d[1], x16
-; CHECK-NEXT: add v15.2d, v15.2d, v1.2d
-; CHECK-NEXT: add v27.2d, v27.2d, v3.2d
-; CHECK-NEXT: mul x18, x14, x20
-; CHECK-NEXT: add v23.2d, v23.2d, v3.2d
-; CHECK-NEXT: add v19.2d, v19.2d, v3.2d
-; CHECK-NEXT: fmov d4, x23
-; CHECK-NEXT: add v10.2d, v10.2d, v3.2d
-; CHECK-NEXT: mul x15, x5, x6
-; CHECK-NEXT: fmov d0, x13
-; CHECK-NEXT: add v14.2d, v14.2d, v2.2d
-; CHECK-NEXT: add v2.2d, v6.2d, v3.2d
+; CHECK-NEXT: add v15.2d, v15.2d, v14.2d
+; CHECK-NEXT: fmov d2, x2
+; CHECK-NEXT: mov v0.d[1], x4
+; CHECK-NEXT: mul x22, x14, x20
+; CHECK-NEXT: str q15, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT: add v15.2d, v11.2d, v14.2d
+; CHECK-NEXT: ldr q11, [sp, #80] // 16-byte Folded Reload
+; CHECK-NEXT: mul x18, x5, x20
+; CHECK-NEXT: mov v1.d[1], x7
+; CHECK-NEXT: mov v2.d[1], x17
+; CHECK-NEXT: add v11.2d, v11.2d, v0.2d
; CHECK-NEXT: mul x14, x14, x6
-; CHECK-NEXT: mov v3.16b, v7.16b
-; CHECK-NEXT: mov v7.16b, v18.16b
-; CHECK-NEXT: mov v4.d[1], x18
-; CHECK-NEXT: mov v18.16b, v22.16b
-; CHECK-NEXT: mov v0.d[1], x7
-; CHECK-NEXT: fmov d1, x15
-; CHECK-NEXT: add v28.2d, v8.2d, v4.2d
-; CHECK-NEXT: mov v1.d[1], x14
-; CHECK-NEXT: add v31.2d, v31.2d, v0.2d
-; CHECK-NEXT: add v30.2d, v30.2d, v0.2d
-; CHECK-NEXT: add v12.2d, v12.2d, v0.2d
-; CHECK-NEXT: add v24.2d, v24.2d, v0.2d
-; CHECK-NEXT: add v22.2d, v26.2d, v0.2d
-; CHECK-NEXT: add v20.2d, v20.2d, v0.2d
-; CHECK-NEXT: add v18.2d, v18.2d, v0.2d
+; CHECK-NEXT: fmov d3, x22
+; CHECK-NEXT: mul x21, x13, x20
+; CHECK-NEXT: add v12.2d, v12.2d, v1.2d
+; CHECK-NEXT: str q11, [sp, #80] // 16-byte Folded Spill
+; CHECK-NEXT: fmov d4, x18
+; CHECK-NEXT: add v18.2d, v18.2d, v2.2d
+; CHECK-NEXT: mov v2.16b, v7.16b
+; CHECK-NEXT: mul x23, x15, x20
+; CHECK-NEXT: mov v7.16b, v19.16b
+; CHECK-NEXT: mov v19.16b, v23.16b
+; CHECK-NEXT: fmov d14, x14
+; CHECK-NEXT: mov v23.16b, v27.16b
+; CHECK-NEXT: add v27.2d, v9.2d, v1.2d
+; CHECK-NEXT: mul x16, x5, x6
+; CHECK-NEXT: mov v11.16b, v15.16b
+; CHECK-NEXT: mov v3.d[1], x21
+; CHECK-NEXT: add v19.2d, v19.2d, v1.2d
+; CHECK-NEXT: add v7.2d, v7.2d, v1.2d
+; CHECK-NEXT: mul x13, x13, x6
+; CHECK-NEXT: add v23.2d, v23.2d, v1.2d
+; CHECK-NEXT: add v1.2d, v5.2d, v1.2d
+; CHECK-NEXT: mov v4.d[1], x23
+; CHECK-NEXT: mov v5.16b, v22.16b
+; CHECK-NEXT: mov v22.16b, v31.16b
+; CHECK-NEXT: mul x14, x15, x6
+; CHECK-NEXT: fmov d0, x16
+; CHECK-NEXT: add v30.2d, v30.2d, v3.2d
+; CHECK-NEXT: mov v3.16b, v16.16b
+; CHECK-NEXT: mov v16.16b, v20.16b
+; CHECK-NEXT: mov v14.d[1], x13
+; CHECK-NEXT: add v28.2d, v28.2d, v4.2d
+; CHECK-NEXT: mov v4.16b, v17.16b
+; CHECK-NEXT: mov v17.16b, v21.16b
+; CHECK-NEXT: mov v21.16b, v10.16b
+; CHECK-NEXT: mov v10.16b, v26.16b
+; CHECK-NEXT: mov v0.d[1], x14
+; CHECK-NEXT: ldp q26, q31, [sp] // 32-byte Folded Reload
+; CHECK-NEXT: add v13.2d, v13.2d, v14.2d
+; CHECK-NEXT: add v24.2d, v24.2d, v14.2d
+; CHECK-NEXT: add v22.2d, v22.2d, v14.2d
+; CHECK-NEXT: add v31.2d, v31.2d, v14.2d
+; CHECK-NEXT: add v26.2d, v26.2d, v14.2d
+; CHECK-NEXT: add v20.2d, v8.2d, v14.2d
+; CHECK-NEXT: add v10.2d, v10.2d, v14.2d
+; CHECK-NEXT: add v16.2d, v16.2d, v14.2d
+; CHECK-NEXT: add v5.2d, v5.2d, v14.2d
+; CHECK-NEXT: add v3.2d, v3.2d, v14.2d
+; CHECK-NEXT: add v2.2d, v2.2d, v14.2d
+; CHECK-NEXT: add v29.2d, v29.2d, v0.2d
+; CHECK-NEXT: add v25.2d, v25.2d, v0.2d
+; CHECK-NEXT: add v21.2d, v21.2d, v0.2d
; CHECK-NEXT: add v17.2d, v17.2d, v0.2d
-; CHECK-NEXT: add v7.2d, v7.2d, v0.2d
-; CHECK-NEXT: add v4.2d, v16.2d, v0.2d
-; CHECK-NEXT: add v3.2d, v3.2d, v0.2d
-; CHECK-NEXT: mov v0.16b, v21.16b
-; CHECK-NEXT: mov v21.16b, v29.16b
-; CHECK-NEXT: ldr q29, [sp] // 16-byte Folded Reload
-; CHECK-NEXT: add v9.2d, v9.2d, v1.2d
-; CHECK-NEXT: add v6.2d, v25.2d, v1.2d
-; CHECK-NEXT: add v5.2d, v5.2d, v1.2d
-; CHECK-NEXT: add v29.2d, v29.2d, v1.2d
-; CHECK-NEXT: add v21.2d, v21.2d, v1.2d
-; CHECK-NEXT: add v0.2d, v0.2d, v1.2d
+; CHECK-NEXT: add v4.2d, v4.2d, v0.2d
+; CHECK-NEXT: add v0.2d, v6.2d, v0.2d
; CHECK-NEXT: b.ne .LBB0_1
; CHECK-NEXT: // %bb.2: // %for.cond.cleanup
-; CHECK-NEXT: ldr q1, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT: ldr q6, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: adrp x8, C
; CHECK-NEXT: add x8, x8, :lo12:C
-; CHECK-NEXT: stp q11, q30, [x8, #80]
+; CHECK-NEXT: stp q12, q31, [x8, #80]
; CHECK-NEXT: ldp x20, x19, [sp, #192] // 16-byte Folded Reload
-; CHECK-NEXT: str q1, [x8]
-; CHECK-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT: str q6, [x8]
+; CHECK-NEXT: ldr q6, [sp, #64] // 16-byte Folded Reload
; CHECK-NEXT: ldr x23, [sp, #160] // 8-byte Folded Reload
-; CHECK-NEXT: stp q15, q14, [x8, #144]
+; CHECK-NEXT: str q29, [x8, #112]
; CHECK-NEXT: ldp x22, x21, [sp, #176] // 16-byte Folded Reload
-; CHECK-NEXT: stp q1, q13, [x8, #16]
-; CHECK-NEXT: ldr q1, [sp, #80] // 16-byte Folded Reload
-; CHECK-NEXT: stp q28, q12, [x8, #176]
+; CHECK-NEXT: stp q6, q11, [x8, #16]
+; CHECK-NEXT: ldr q6, [sp, #80] // 16-byte Folded Reload
+; CHECK-NEXT: stp q18, q30, [x8, #144]
+; CHECK-NEXT: ldp d9, d8, [sp, #144] // 16-byte Folded Reload
+; CHECK-NEXT: stp q6, q13, [x8, #48]
; CHECK-NEXT: ldp d13, d12, [sp, #112] // 16-byte Folded Reload
-; CHECK-NEXT: stp q1, q31, [x8, #48]
+; CHECK-NEXT: stp q28, q26, [x8, #176]
; CHECK-NEXT: ldp d15, d14, [sp, #96] // 16-byte Folded Reload
-; CHECK-NEXT: stp q9, q24, [x8, #240]
-; CHECK-NEXT: ldp d9, d8, [sp, #144] // 16-byte Folded Reload
-; CHECK-NEXT: stp q19, q18, [x8, #336]
-; CHECK-NEXT: stp q10, q7, [x8, #400]
+; CHECK-NEXT: stp q19, q10, [x8, #336]
; CHECK-NEXT: ldp d11, d10, [sp, #128] // 16-byte Folded Reload
-; CHECK-NEXT: str q29, [x8, #112]
; CHECK-NEXT: str q27, [x8, #208]
+; CHECK-NEXT: stp q25, q24, [x8, #240]
; CHECK-NEXT: stp q23, q22, [x8, #272]
; CHECK-NEXT: stp q21, q20, [x8, #304]
-; CHECK-NEXT: stp q6, q17, [x8, #368]
-; CHECK-NEXT: stp q5, q4, [x8, #432]
-; CHECK-NEXT: stp q2, q3, [x8, #464]
+; CHECK-NEXT: stp q17, q16, [x8, #368]
+; CHECK-NEXT: stp q7, q5, [x8, #400]
+; CHECK-NEXT: stp q4, q3, [x8, #432]
+; CHECK-NEXT: stp q1, q2, [x8, #464]
; CHECK-NEXT: str q0, [x8, #496]
; CHECK-NEXT: add sp, sp, #208
; CHECK-NEXT: .cfi_def_cfa_offset 0
@@ -235,7 +238,6 @@ define dso_local void @run_test() local_unnamed_addr uwtable {
; CHECK-NEXT: .cfi_restore b14
; CHECK-NEXT: .cfi_restore b15
; CHECK-NEXT: ret
-; CH`ECK-NEXT: .cfi_offset b9, -16
entry:
br label %for.cond1.preheader
diff --git a/llvm/test/CodeGen/AArch64/reduce-or-opt.ll b/llvm/test/CodeGen/AArch64/reduce-or-opt.ll
index f5df5ea53c990..477b664a275f0 100644
--- a/llvm/test/CodeGen/AArch64/reduce-or-opt.ll
+++ b/llvm/test/CodeGen/AArch64/reduce-or-opt.ll
@@ -4,19 +4,19 @@
define i64 @select_or_reduce_v2i1(ptr nocapture noundef readonly %src) {
; CHECK-LABEL: select_or_reduce_v2i1:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: mov x8, xzr
+; CHECK-NEXT: mov w8, #2 // =0x2
; CHECK-NEXT: .LBB0_1: // %vector.body
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: ldr q0, [x0, x8]
+; CHECK-NEXT: ldr q0, [x0], #16
; CHECK-NEXT: cmeq v0.2d, v0.2d, #0
; CHECK-NEXT: umaxv s0, v0.4s
; CHECK-NEXT: fmov w9, s0
; CHECK-NEXT: tbnz w9, #0, .LBB0_3
; CHECK-NEXT: // %bb.2: // %vector.body
; CHECK-NEXT: // in Loop: Header=BB0_1 Depth=1
-; CHECK-NEXT: cmp x8, #16
-; CHECK-NEXT: add x8, x8, #16
-; CHECK-NEXT: b.ne .LBB0_1
+; CHECK-NEXT: mov x10, x8
+; CHECK-NEXT: sub x8, x8, #2
+; CHECK-NEXT: cbnz x10, .LBB0_1
; CHECK-NEXT: .LBB0_3: // %middle.split
; CHECK-NEXT: and x0, x9, #0x1
; CHECK-NEXT: ret
@@ -42,19 +42,19 @@ middle.split:
define i64 @br_or_reduce_v2i1(ptr nocapture noundef readonly %src, ptr noundef readnone %p) {
; CHECK-LABEL: br_or_reduce_v2i1:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: mov x8, xzr
+; CHECK-NEXT: mov w8, #2 // =0x2
; CHECK-NEXT: .LBB1_1: // %vector.body
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: ldr q0, [x0, x8]
+; CHECK-NEXT: ldr q0, [x0], #16
; CHECK-NEXT: cmeq v0.2d, v0.2d, #0
; CHECK-NEXT: umaxv s0, v0.4s
; CHECK-NEXT: fmov w9, s0
; CHECK-NEXT: tbnz w9, #0, .LBB1_3
; CHECK-NEXT: // %bb.2: // %vector.body
; CHECK-NEXT: // in Loop: Header=BB1_1 Depth=1
-; CHECK-NEXT: cmp x8, #16
-; CHECK-NEXT: add x8, x8, #16
-; CHECK-NEXT: b.ne .LBB1_1
+; CHECK-NEXT: mov x10, x8
+; CHECK-NEXT: sub x8, x8, #2
+; CHECK-NEXT: cbnz x10, .LBB1_1
; CHECK-NEXT: .LBB1_3: // %middle.split
; CHECK-NEXT: tbz w9, #0, .LBB1_5
; CHECK-NEXT: // %bb.4: // %found
diff --git a/llvm/test/CodeGen/AArch64/sink-and-fold.ll b/llvm/test/CodeGen/AArch64/sink-and-fold.ll
index 4d383fefc43c7..4798816a438a8 100644
--- a/llvm/test/CodeGen/AArch64/sink-and-fold.ll
+++ b/llvm/test/CodeGen/AArch64/sink-and-fold.ll
@@ -136,50 +136,51 @@ define void @f4(ptr %a, i64 %n) nounwind "target-features"="+alu-lsl-fast" {
; CHECK-NEXT: cmp x1, #1
; CHECK-NEXT: b.lt .LBB4_9
; CHECK-NEXT: // %bb.1: // %LI.preheader
-; CHECK-NEXT: stp x30, x23, [sp, #-48]! // 16-byte Folded Spill
-; CHECK-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill
-; CHECK-NEXT: mov x22, xzr
-; CHECK-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT: str x30, [sp, #-64]! // 8-byte Folded Spill
+; CHECK-NEXT: stp x22, x21, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT: mov x21, xzr
+; CHECK-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill
; CHECK-NEXT: mov x19, x1
; CHECK-NEXT: mov x20, x0
+; CHECK-NEXT: stp x24, x23, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: b .LBB4_3
; CHECK-NEXT: .LBB4_2: // %LI.latch
; CHECK-NEXT: // in Loop: Header=BB4_3 Depth=1
-; CHECK-NEXT: cmp x22, x19
-; CHECK-NEXT: mov x22, x23
+; CHECK-NEXT: cmp x21, x19
+; CHECK-NEXT: mov x21, x22
; CHECK-NEXT: b.ge .LBB4_8
; CHECK-NEXT: .LBB4_3: // %LI
; CHECK-NEXT: // =>This Loop Header: Depth=1
; CHECK-NEXT: // Child Loop BB4_6 Depth 2
-; CHECK-NEXT: mov x21, xzr
-; CHECK-NEXT: add x23, x22, #1
+; CHECK-NEXT: add x22, x21, #1
+; CHECK-NEXT: mov x23, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: mov x24, x20
; CHECK-NEXT: b .LBB4_6
; CHECK-NEXT: .LBB4_4: // %if.else
; CHECK-NEXT: // in Loop: Header=BB4_6 Depth=2
-; CHECK-NEXT: ldr w0, [x20, x22, lsl #2]
+; CHECK-NEXT: ldr w0, [x20, x21, lsl #2]
; CHECK-NEXT: .LBB4_5: // %LJ.latch
; CHECK-NEXT: // in Loop: Header=BB4_6 Depth=2
-; CHECK-NEXT: add x8, x21, #1
-; CHECK-NEXT: str w0, [x20, x21, lsl #2]
-; CHECK-NEXT: sub x9, x8, #1
-; CHECK-NEXT: mov x21, x8
-; CHECK-NEXT: cmp x9, x19
+; CHECK-NEXT: add x23, x23, #1
+; CHECK-NEXT: str w0, [x24], #4
+; CHECK-NEXT: cmp x23, x19
; CHECK-NEXT: b.ge .LBB4_2
; CHECK-NEXT: .LBB4_6: // %LJ
; CHECK-NEXT: // Parent Loop BB4_3 Depth=1
; CHECK-NEXT: // => This Inner Loop Header: Depth=2
-; CHECK-NEXT: ldr w8, [x20, x21, lsl #2]
+; CHECK-NEXT: ldr w8, [x24]
; CHECK-NEXT: tbz w8, #31, .LBB4_4
; CHECK-NEXT: // %bb.7: // %if.then
; CHECK-NEXT: // in Loop: Header=BB4_6 Depth=2
-; CHECK-NEXT: add x0, x20, x22, lsl #2
-; CHECK-NEXT: mov x1, x21
+; CHECK-NEXT: add x0, x20, x21, lsl #2
+; CHECK-NEXT: add x1, x23, #1
; CHECK-NEXT: bl use
; CHECK-NEXT: b .LBB4_5
; CHECK-NEXT: .LBB4_8:
-; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
-; CHECK-NEXT: ldp x22, x21, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT: ldp x30, x23, [sp], #48 // 16-byte Folded Reload
+; CHECK-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT: ldp x22, x21, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT: ldp x24, x23, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT: ldr x30, [sp], #64 // 8-byte Folded Reload
; CHECK-NEXT: .LBB4_9: // %exit
; CHECK-NEXT: ret
entry:
diff --git a/llvm/test/CodeGen/AArch64/sink-mul-exts.ll b/llvm/test/CodeGen/AArch64/sink-mul-exts.ll
index d52ac7847f814..a1c969f0b3626 100644
--- a/llvm/test/CodeGen/AArch64/sink-mul-exts.ll
+++ b/llvm/test/CodeGen/AArch64/sink-mul-exts.ll
@@ -6,13 +6,12 @@ define <8 x i16> @mul_splat_sext_v8i16(ptr %x, ptr %y) {
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ldr d1, [x0]
; CHECK-NEXT: movi v0.2d, #0000000000000000
-; CHECK-NEXT: mov x8, xzr
+; CHECK-NEXT: mov w8, #1 // =0x1
; CHECK-NEXT: dup v1.8b, v1.b[3]
; CHECK-NEXT: .LBB0_1: // %l1
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: ldr d2, [x1, x8]
-; CHECK-NEXT: add x8, x8, #4
-; CHECK-NEXT: cmp w8, #4
+; CHECK-NEXT: ldr d2, [x1], #4
+; CHECK-NEXT: subs w8, w8, #1
; CHECK-NEXT: smlal v0.8h, v2.8b, v1.8b
; CHECK-NEXT: b.eq .LBB0_1
; CHECK-NEXT: // %bb.2: // %l2
@@ -45,12 +44,11 @@ define <4 x i32> @mul_splat_sext_v4i32(ptr %x, ptr %y) {
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: movi v0.2d, #0000000000000000
; CHECK-NEXT: ldr d1, [x0]
-; CHECK-NEXT: mov x8, xzr
+; CHECK-NEXT: mov w8, #1 // =0x1
; CHECK-NEXT: .LBB1_1: // %l1
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: ldr d2, [x1, x8]
-; CHECK-NEXT: add x8, x8, #8
-; CHECK-NEXT: cmp w8, #8
+; CHECK-NEXT: ldr d2, [x1], #8
+; CHECK-NEXT: subs w8, w8, #1
; CHECK-NEXT: smlal v0.4s, v2.4h, v1.h[3]
; CHECK-NEXT: b.eq .LBB1_1
; CHECK-NEXT: // %bb.2: // %l2
@@ -83,12 +81,11 @@ define <2 x i64> @mul_splat_sext_v2i64(ptr %x, ptr %y) {
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: movi v0.2d, #0000000000000000
; CHECK-NEXT: ldr d1, [x0]
-; CHECK-NEXT: mov x8, xzr
+; CHECK-NEXT: mov w8, #1 // =0x1
; CHECK-NEXT: .LBB2_1: // %l1
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: ldr d2, [x1, x8]
-; CHECK-NEXT: add x8, x8, #16
-; CHECK-NEXT: cmp w8, #16
+; CHECK-NEXT: ldr d2, [x1], #16
+; CHECK-NEXT: subs w8, w8, #1
; CHECK-NEXT: smlal v0.2d, v2.2s, v1.s[1]
; CHECK-NEXT: b.eq .LBB2_1
; CHECK-NEXT: // %bb.2: // %l2
@@ -121,13 +118,12 @@ define <8 x i16> @mul_sext_splat_v8i16(ptr %x, ptr %y) {
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ldr d1, [x0]
; CHECK-NEXT: movi v0.2d, #0000000000000000
-; CHECK-NEXT: mov x8, xzr
+; CHECK-NEXT: mov w8, #1 // =0x1
; CHECK-NEXT: dup v1.8b, v1.b[3]
; CHECK-NEXT: .LBB3_1: // %l1
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: ldr d2, [x1, x8]
-; CHECK-NEXT: add x8, x8, #4
-; CHECK-NEXT: cmp w8, #4
+; CHECK-NEXT: ldr d2, [x1], #4
+; CHECK-NEXT: subs w8, w8, #1
; CHECK-NEXT: smlal v0.8h, v2.8b, v1.8b
; CHECK-NEXT: b.eq .LBB3_1
; CHECK-NEXT: // %bb.2: // %l2
@@ -160,12 +156,11 @@ define <4 x i32> @mul_sext_splat_v4i32(ptr %x, ptr %y) {
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: movi v0.2d, #0000000000000000
; CHECK-NEXT: ldr d1, [x0]
-; CHECK-NEXT: mov x8, xzr
+; CHECK-NEXT: mov w8, #1 // =0x1
; CHECK-NEXT: .LBB4_1: // %l1
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: ldr d2, [x1, x8]
-; CHECK-NEXT: add x8, x8, #8
-; CHECK-NEXT: cmp w8, #8
+; CHECK-NEXT: ldr d2, [x1], #8
+; CHECK-NEXT: subs w8, w8, #1
; CHECK-NEXT: smlal v0.4s, v2.4h, v1.h[3]
; CHECK-NEXT: b.eq .LBB4_1
; CHECK-NEXT: // %bb.2: // %l2
@@ -198,12 +193,11 @@ define <2 x i64> @mul_sext_splat_v2i64(ptr %x, ptr %y) {
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: movi v0.2d, #0000000000000000
; CHECK-NEXT: ldr d1, [x0]
-; CHECK-NEXT: mov x8, xzr
+; CHECK-NEXT: mov w8, #1 // =0x1
; CHECK-NEXT: .LBB5_1: // %l1
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: ldr d2, [x1, x8]
-; CHECK-NEXT: add x8, x8, #16
-; CHECK-NEXT: cmp w8, #16
+; CHECK-NEXT: ldr d2, [x1], #16
+; CHECK-NEXT: subs w8, w8, #1
; CHECK-NEXT: smlal v0.2d, v2.2s, v1.s[1]
; CHECK-NEXT: b.eq .LBB5_1
; CHECK-NEXT: // %bb.2: // %l2
diff --git a/llvm/test/CodeGen/AArch64/sinksplat.ll b/llvm/test/CodeGen/AArch64/sinksplat.ll
index 5743dc7cce580..a5ef9be557db0 100644
--- a/llvm/test/CodeGen/AArch64/sinksplat.ll
+++ b/llvm/test/CodeGen/AArch64/sinksplat.ll
@@ -234,12 +234,11 @@ define <4 x float> @fmul(ptr %x, ptr %y) {
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: movi v0.2d, #0000000000000000
; CHECK-NEXT: ldr s1, [x0]
-; CHECK-NEXT: mov x8, xzr
+; CHECK-NEXT: mov w8, #1 // =0x1
; CHECK-NEXT: .LBB7_1: // %l1
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: ldr q2, [x1, x8]
-; CHECK-NEXT: add x8, x8, #16
-; CHECK-NEXT: cmp w8, #16
+; CHECK-NEXT: ldr q2, [x1], #16
+; CHECK-NEXT: subs w8, w8, #1
; CHECK-NEXT: fmul v2.4s, v2.4s, v1.s[0]
; CHECK-NEXT: fadd v0.4s, v2.4s, v0.4s
; CHECK-NEXT: b.eq .LBB7_1
@@ -427,14 +426,13 @@ define <4 x half> @fmul_half(ptr %x, ptr %y) {
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ld1r { v1.4h }, [x0]
; CHECK-NEXT: movi d0, #0000000000000000
-; CHECK-NEXT: mov x8, xzr
+; CHECK-NEXT: mov w8, #1 // =0x1
; CHECK-NEXT: fcvtl v1.4s, v1.4h
; CHECK-NEXT: .LBB13_1: // %l1
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: ldr d2, [x1, x8]
+; CHECK-NEXT: ldr d2, [x1], #8
; CHECK-NEXT: fcvtl v0.4s, v0.4h
-; CHECK-NEXT: add x8, x8, #8
-; CHECK-NEXT: cmp w8, #8
+; CHECK-NEXT: subs w8, w8, #1
; CHECK-NEXT: fcvtl v2.4s, v2.4h
; CHECK-NEXT: fmul v2.4s, v2.4s, v1.4s
; CHECK-NEXT: fcvtn v2.4h, v2.4s
@@ -471,12 +469,11 @@ define <4 x half> @fmul_half_fullfp16(ptr %x, ptr %y) "target-features"="+fullfp
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: movi d0, #0000000000000000
; CHECK-NEXT: ldr h1, [x0]
-; CHECK-NEXT: mov x8, xzr
+; CHECK-NEXT: mov w8, #1 // =0x1
; CHECK-NEXT: .LBB14_1: // %l1
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: ldr d2, [x1, x8]
-; CHECK-NEXT: add x8, x8, #8
-; CHECK-NEXT: cmp w8, #8
+; CHECK-NEXT: ldr d2, [x1], #8
+; CHECK-NEXT: subs w8, w8, #1
; CHECK-NEXT: fmul v2.4h, v2.4h, v1.h[0]
; CHECK-NEXT: fadd v0.4h, v2.4h, v0.4h
; CHECK-NEXT: b.eq .LBB14_1
diff --git a/llvm/test/CodeGen/AArch64/sitofp-to-tbl.ll b/llvm/test/CodeGen/AArch64/sitofp-to-tbl.ll
index 3e708b0678fbc..36c693332126f 100644
--- a/llvm/test/CodeGen/AArch64/sitofp-to-tbl.ll
+++ b/llvm/test/CodeGen/AArch64/sitofp-to-tbl.ll
@@ -45,18 +45,16 @@ define void @sitofp_v8i8_to_v8f32(ptr %src, ptr %dst) {
; CHECK-NEXT: adrp x9, .LCPI0_1
; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI0_0]
; CHECK-NEXT: ldr q1, [x9, :lo12:.LCPI0_1]
-; CHECK-NEXT: mov x8, xzr
+; CHECK-NEXT: mov w8, #1000 // =0x3e8
; CHECK-NEXT: .LBB0_1: // %loop
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: ldr d2, [x0, x8, lsl #3]
-; CHECK-NEXT: add x9, x1, x8, lsl #5
-; CHECK-NEXT: add x8, x8, #1
-; CHECK-NEXT: cmp x8, #1000
+; CHECK-NEXT: ldr d2, [x0], #8
+; CHECK-NEXT: subs x8, x8, #1
; CHECK-NEXT: tbl v3.16b, { v2.16b }, v0.16b
; CHECK-NEXT: tbl v2.16b, { v2.16b }, v1.16b
; CHECK-NEXT: scvtf v3.4s, v3.4s, #24
; CHECK-NEXT: scvtf v2.4s, v2.4s, #24
-; CHECK-NEXT: stp q2, q3, [x9]
+; CHECK-NEXT: stp q2, q3, [x1], #32
; CHECK-NEXT: b.eq .LBB0_1
; CHECK-NEXT: // %bb.2: // %exit
; CHECK-NEXT: ret
@@ -158,13 +156,11 @@ define void @sitofp_v16i8_to_v16f32(ptr %src, ptr %dst) {
; CHECK-NEXT: ldr q1, [x9, :lo12:.LCPI1_1]
; CHECK-NEXT: ldr q2, [x10, :lo12:.LCPI1_2]
; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI1_3]
-; CHECK-NEXT: mov x8, xzr
+; CHECK-NEXT: mov w8, #1000 // =0x3e8
; CHECK-NEXT: .LBB1_1: // %loop
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: ldr q4, [x0, x8, lsl #4]
-; CHECK-NEXT: add x9, x1, x8, lsl #6
-; CHECK-NEXT: add x8, x8, #1
-; CHECK-NEXT: cmp x8, #1000
+; CHECK-NEXT: ldr q4, [x0], #16
+; CHECK-NEXT: subs x8, x8, #1
; CHECK-NEXT: tbl v5.16b, { v4.16b }, v0.16b
; CHECK-NEXT: tbl v6.16b, { v4.16b }, v1.16b
; CHECK-NEXT: tbl v7.16b, { v4.16b }, v2.16b
@@ -173,8 +169,8 @@ define void @sitofp_v16i8_to_v16f32(ptr %src, ptr %dst) {
; CHECK-NEXT: scvtf v6.4s, v6.4s, #24
; CHECK-NEXT: scvtf v7.4s, v7.4s, #24
; CHECK-NEXT: scvtf v4.4s, v4.4s, #24
-; CHECK-NEXT: stp q6, q5, [x9, #32]
-; CHECK-NEXT: stp q4, q7, [x9]
+; CHECK-NEXT: stp q6, q5, [x1, #32]
+; CHECK-NEXT: stp q4, q7, [x1], #64
; CHECK-NEXT: b.eq .LBB1_1
; CHECK-NEXT: // %bb.2: // %exit
; CHECK-NEXT: ret
@@ -201,10 +197,11 @@ exit:
define void @sitofp_v8i8_to_v8f16(ptr %src, ptr %dst) {
; CHECK-LABEL: sitofp_v8i8_to_v8f16:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: mov x8, xzr
+; CHECK-NEXT: mov w8, #1000 // =0x3e8
; CHECK-NEXT: .LBB2_1: // %loop
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: ldr d0, [x0, x8, lsl #3]
+; CHECK-NEXT: ldr d0, [x0], #8
+; CHECK-NEXT: subs x8, x8, #1
; CHECK-NEXT: sshll v0.8h, v0.8b, #0
; CHECK-NEXT: sshll v1.4s, v0.4h, #0
; CHECK-NEXT: sshll2 v0.4s, v0.8h, #0
@@ -212,9 +209,7 @@ define void @sitofp_v8i8_to_v8f16(ptr %src, ptr %dst) {
; CHECK-NEXT: scvtf v0.4s, v0.4s
; CHECK-NEXT: fcvtn v1.4h, v1.4s
; CHECK-NEXT: fcvtn2 v1.8h, v0.4s
-; CHECK-NEXT: str q1, [x1, x8, lsl #4]
-; CHECK-NEXT: add x8, x8, #1
-; CHECK-NEXT: cmp x8, #1000
+; CHECK-NEXT: str q1, [x1], #16
; CHECK-NEXT: b.eq .LBB2_1
; CHECK-NEXT: // %bb.2: // %exit
; CHECK-NEXT: ret
@@ -241,19 +236,18 @@ exit:
define void @sitofp_v2i8_to_v2f64(ptr %src, ptr %dst) {
; CHECK-LABEL: sitofp_v2i8_to_v2f64:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: mov x8, xzr
+; CHECK-NEXT: mov w8, #1000 // =0x3e8
; CHECK-NEXT: .LBB3_1: // %loop
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: add x9, x0, x8, lsl #1
-; CHECK-NEXT: ldrsb w10, [x9]
-; CHECK-NEXT: ldrsb w9, [x9, #1]
-; CHECK-NEXT: fmov s0, w10
-; CHECK-NEXT: mov v0.s[1], w9
+; CHECK-NEXT: ldrsb w9, [x0]
+; CHECK-NEXT: ldrsb w10, [x0, #1]
+; CHECK-NEXT: subs x8, x8, #1
+; CHECK-NEXT: add x0, x0, #2
+; CHECK-NEXT: fmov s0, w9
+; CHECK-NEXT: mov v0.s[1], w10
; CHECK-NEXT: sshll v0.2d, v0.2s, #0
; CHECK-NEXT: scvtf v0.2d, v0.2d
-; CHECK-NEXT: str q0, [x1, x8, lsl #4]
-; CHECK-NEXT: add x8, x8, #1
-; CHECK-NEXT: cmp x8, #1000
+; CHECK-NEXT: str q0, [x1], #16
; CHECK-NEXT: b.eq .LBB3_1
; CHECK-NEXT: // %bb.2: // %exit
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/trunc-to-tbl.ll b/llvm/test/CodeGen/AArch64/trunc-to-tbl.ll
index fd23f3da18cd7..807c98c0f2d15 100644
--- a/llvm/test/CodeGen/AArch64/trunc-to-tbl.ll
+++ b/llvm/test/CodeGen/AArch64/trunc-to-tbl.ll
@@ -47,16 +47,15 @@ define void @trunc_v16i32_to_v16i8_in_loop(ptr %A, ptr %dst) {
; CHECK-NEXT: adrp x8, lCPI0_0 at PAGE
; CHECK-NEXT: Lloh1:
; CHECK-NEXT: ldr q0, [x8, lCPI0_0 at PAGEOFF]
-; CHECK-NEXT: mov x8, xzr
+; CHECK-NEXT: mov w8, #1000 ; =0x3e8
; CHECK-NEXT: LBB0_1: ; %loop
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: add x9, x0, x8, lsl #6
-; CHECK-NEXT: ldp q1, q2, [x9]
-; CHECK-NEXT: ldp q3, q4, [x9, #32]
+; CHECK-NEXT: ldp q1, q2, [x0]
+; CHECK-NEXT: subs x8, x8, #1
+; CHECK-NEXT: ldp q3, q4, [x0, #32]
+; CHECK-NEXT: add x0, x0, #64
; CHECK-NEXT: tbl.16b v1, { v1, v2, v3, v4 }, v0
-; CHECK-NEXT: str q1, [x1, x8, lsl #4]
-; CHECK-NEXT: add x8, x8, #1
-; CHECK-NEXT: cmp x8, #1000
+; CHECK-NEXT: str q1, [x1], #16
; CHECK-NEXT: b.eq LBB0_1
; CHECK-NEXT: ; %bb.2: ; %exit
; CHECK-NEXT: ret
@@ -67,46 +66,42 @@ define void @trunc_v16i32_to_v16i8_in_loop(ptr %A, ptr %dst) {
; CHECK-BE-NEXT: adrp x8, .LCPI0_0
; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI0_0
; CHECK-BE-NEXT: ld1 { v0.16b }, [x8]
-; CHECK-BE-NEXT: mov x8, xzr
+; CHECK-BE-NEXT: mov w8, #1000 // =0x3e8
; CHECK-BE-NEXT: .LBB0_1: // %loop
; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-BE-NEXT: add x9, x0, x8, lsl #6
-; CHECK-BE-NEXT: add x10, x9, #16
-; CHECK-BE-NEXT: ld1 { v1.16b }, [x9]
-; CHECK-BE-NEXT: add x11, x9, #32
-; CHECK-BE-NEXT: ld1 { v2.16b }, [x10]
-; CHECK-BE-NEXT: add x9, x9, #48
-; CHECK-BE-NEXT: ld1 { v3.16b }, [x11]
+; CHECK-BE-NEXT: add x9, x0, #16
+; CHECK-BE-NEXT: ld1 { v1.16b }, [x0]
+; CHECK-BE-NEXT: add x10, x0, #32
+; CHECK-BE-NEXT: ld1 { v2.16b }, [x9]
+; CHECK-BE-NEXT: add x9, x0, #48
+; CHECK-BE-NEXT: subs x8, x8, #1
+; CHECK-BE-NEXT: ld1 { v3.16b }, [x10]
+; CHECK-BE-NEXT: add x0, x0, #64
; CHECK-BE-NEXT: ld1 { v4.16b }, [x9]
-; CHECK-BE-NEXT: add x9, x1, x8, lsl #4
-; CHECK-BE-NEXT: add x8, x8, #1
-; CHECK-BE-NEXT: cmp x8, #1000
; CHECK-BE-NEXT: tbl v1.16b, { v1.16b, v2.16b, v3.16b, v4.16b }, v0.16b
-; CHECK-BE-NEXT: st1 { v1.16b }, [x9]
+; CHECK-BE-NEXT: st1 { v1.16b }, [x1], #16
; CHECK-BE-NEXT: b.eq .LBB0_1
; CHECK-BE-NEXT: // %bb.2: // %exit
; CHECK-BE-NEXT: ret
;
; CHECK-DISABLE-LABEL: trunc_v16i32_to_v16i8_in_loop:
; CHECK-DISABLE: // %bb.0: // %entry
-; CHECK-DISABLE-NEXT: mov x8, xzr
+; CHECK-DISABLE-NEXT: mov w8, #1000 // =0x3e8
; CHECK-DISABLE-NEXT: .LBB0_1: // %loop
; CHECK-DISABLE-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-DISABLE-NEXT: add x9, x0, x8, lsl #6
-; CHECK-DISABLE-NEXT: ld1 { v0.4s }, [x9]
-; CHECK-DISABLE-NEXT: add x10, x9, #16
-; CHECK-DISABLE-NEXT: add x11, x9, #48
-; CHECK-DISABLE-NEXT: add x9, x9, #32
-; CHECK-DISABLE-NEXT: ld1 { v1.4s }, [x10]
-; CHECK-DISABLE-NEXT: ld1 { v2.4s }, [x11]
-; CHECK-DISABLE-NEXT: ld1 { v3.4s }, [x9]
-; CHECK-DISABLE-NEXT: add x9, x1, x8, lsl #4
-; CHECK-DISABLE-NEXT: add x8, x8, #1
-; CHECK-DISABLE-NEXT: cmp x8, #1000
+; CHECK-DISABLE-NEXT: add x9, x0, #16
+; CHECK-DISABLE-NEXT: add x10, x0, #48
+; CHECK-DISABLE-NEXT: add x11, x0, #32
+; CHECK-DISABLE-NEXT: ld1 { v0.4s }, [x0]
+; CHECK-DISABLE-NEXT: ld1 { v1.4s }, [x9]
+; CHECK-DISABLE-NEXT: ld1 { v2.4s }, [x10]
+; CHECK-DISABLE-NEXT: ld1 { v3.4s }, [x11]
+; CHECK-DISABLE-NEXT: subs x8, x8, #1
+; CHECK-DISABLE-NEXT: add x0, x0, #64
; CHECK-DISABLE-NEXT: uzp1 v0.8h, v0.8h, v1.8h
; CHECK-DISABLE-NEXT: uzp1 v2.8h, v3.8h, v2.8h
; CHECK-DISABLE-NEXT: uzp1 v0.16b, v0.16b, v2.16b
-; CHECK-DISABLE-NEXT: st1 { v0.16b }, [x9]
+; CHECK-DISABLE-NEXT: st1 { v0.16b }, [x1], #16
; CHECK-DISABLE-NEXT: b.eq .LBB0_1
; CHECK-DISABLE-NEXT: // %bb.2: // %exit
; CHECK-DISABLE-NEXT: ret
@@ -221,15 +216,13 @@ define void @trunc_v8i32_to_v8i8_in_loop(ptr %A, ptr %dst) {
; CHECK-NEXT: adrp x8, lCPI2_0 at PAGE
; CHECK-NEXT: Lloh3:
; CHECK-NEXT: ldr q0, [x8, lCPI2_0 at PAGEOFF]
-; CHECK-NEXT: mov x8, xzr
+; CHECK-NEXT: mov w8, #1000 ; =0x3e8
; CHECK-NEXT: LBB2_1: ; %loop
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: add x9, x0, x8, lsl #5
-; CHECK-NEXT: ldp q1, q2, [x9]
+; CHECK-NEXT: ldp q1, q2, [x0], #32
+; CHECK-NEXT: subs x8, x8, #1
; CHECK-NEXT: tbl.16b v1, { v1, v2 }, v0
-; CHECK-NEXT: str d1, [x1, x8, lsl #3]
-; CHECK-NEXT: add x8, x8, #1
-; CHECK-NEXT: cmp x8, #1000
+; CHECK-NEXT: str d1, [x1], #8
; CHECK-NEXT: b.eq LBB2_1
; CHECK-NEXT: ; %bb.2: ; %exit
; CHECK-NEXT: ret
@@ -240,37 +233,33 @@ define void @trunc_v8i32_to_v8i8_in_loop(ptr %A, ptr %dst) {
; CHECK-BE-NEXT: adrp x8, .LCPI2_0
; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI2_0
; CHECK-BE-NEXT: ld1 { v0.16b }, [x8]
-; CHECK-BE-NEXT: mov x8, xzr
+; CHECK-BE-NEXT: mov w8, #1000 // =0x3e8
; CHECK-BE-NEXT: .LBB2_1: // %loop
; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-BE-NEXT: add x9, x0, x8, lsl #5
-; CHECK-BE-NEXT: add x10, x9, #16
-; CHECK-BE-NEXT: ld1 { v1.16b }, [x9]
-; CHECK-BE-NEXT: add x9, x1, x8, lsl #3
-; CHECK-BE-NEXT: ld1 { v2.16b }, [x10]
-; CHECK-BE-NEXT: add x8, x8, #1
-; CHECK-BE-NEXT: cmp x8, #1000
+; CHECK-BE-NEXT: add x9, x0, #16
+; CHECK-BE-NEXT: ld1 { v1.16b }, [x0]
+; CHECK-BE-NEXT: subs x8, x8, #1
+; CHECK-BE-NEXT: ld1 { v2.16b }, [x9]
+; CHECK-BE-NEXT: add x0, x0, #32
; CHECK-BE-NEXT: tbl v1.16b, { v1.16b, v2.16b }, v0.16b
-; CHECK-BE-NEXT: st1 { v1.8b }, [x9]
+; CHECK-BE-NEXT: st1 { v1.8b }, [x1], #8
; CHECK-BE-NEXT: b.eq .LBB2_1
; CHECK-BE-NEXT: // %bb.2: // %exit
; CHECK-BE-NEXT: ret
;
; CHECK-DISABLE-LABEL: trunc_v8i32_to_v8i8_in_loop:
; CHECK-DISABLE: // %bb.0: // %entry
-; CHECK-DISABLE-NEXT: mov x8, xzr
+; CHECK-DISABLE-NEXT: mov w8, #1000 // =0x3e8
; CHECK-DISABLE-NEXT: .LBB2_1: // %loop
; CHECK-DISABLE-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-DISABLE-NEXT: add x9, x0, x8, lsl #5
-; CHECK-DISABLE-NEXT: add x10, x9, #16
-; CHECK-DISABLE-NEXT: ld1 { v0.4s }, [x9]
-; CHECK-DISABLE-NEXT: add x9, x1, x8, lsl #3
-; CHECK-DISABLE-NEXT: ld1 { v1.4s }, [x10]
-; CHECK-DISABLE-NEXT: add x8, x8, #1
-; CHECK-DISABLE-NEXT: cmp x8, #1000
+; CHECK-DISABLE-NEXT: add x9, x0, #16
+; CHECK-DISABLE-NEXT: ld1 { v0.4s }, [x0]
+; CHECK-DISABLE-NEXT: subs x8, x8, #1
+; CHECK-DISABLE-NEXT: ld1 { v1.4s }, [x9]
+; CHECK-DISABLE-NEXT: add x0, x0, #32
; CHECK-DISABLE-NEXT: uzp1 v0.8h, v0.8h, v1.8h
; CHECK-DISABLE-NEXT: xtn v0.8b, v0.8h
-; CHECK-DISABLE-NEXT: st1 { v0.8b }, [x9]
+; CHECK-DISABLE-NEXT: st1 { v0.8b }, [x1], #8
; CHECK-DISABLE-NEXT: b.eq .LBB2_1
; CHECK-DISABLE-NEXT: // %bb.2: // %exit
; CHECK-DISABLE-NEXT: ret
@@ -334,20 +323,19 @@ define void @trunc_v16i64_to_v16i8_in_loop(ptr %A, ptr %dst) {
; CHECK-NEXT: adrp x8, lCPI3_0 at PAGE
; CHECK-NEXT: Lloh5:
; CHECK-NEXT: ldr q0, [x8, lCPI3_0 at PAGEOFF]
-; CHECK-NEXT: mov x8, xzr
+; CHECK-NEXT: mov w8, #1000 ; =0x3e8
; CHECK-NEXT: LBB3_1: ; %loop
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: add x9, x0, x8, lsl #7
-; CHECK-NEXT: ldp q1, q2, [x9]
-; CHECK-NEXT: ldp q16, q17, [x9, #64]
-; CHECK-NEXT: ldp q3, q4, [x9, #32]
-; CHECK-NEXT: ldp q18, q19, [x9, #96]
+; CHECK-NEXT: ldp q1, q2, [x0]
+; CHECK-NEXT: subs x8, x8, #1
+; CHECK-NEXT: ldp q16, q17, [x0, #64]
+; CHECK-NEXT: ldp q3, q4, [x0, #32]
+; CHECK-NEXT: ldp q18, q19, [x0, #96]
+; CHECK-NEXT: add x0, x0, #128
; CHECK-NEXT: tbl.16b v1, { v1, v2, v3, v4 }, v0
; CHECK-NEXT: tbl.16b v2, { v16, v17, v18, v19 }, v0
; CHECK-NEXT: mov.d v1[1], v2[0]
-; CHECK-NEXT: str q1, [x1, x8, lsl #4]
-; CHECK-NEXT: add x8, x8, #1
-; CHECK-NEXT: cmp x8, #1000
+; CHECK-NEXT: str q1, [x1], #16
; CHECK-NEXT: b.eq LBB3_1
; CHECK-NEXT: ; %bb.2: ; %exit
; CHECK-NEXT: ret
@@ -358,68 +346,64 @@ define void @trunc_v16i64_to_v16i8_in_loop(ptr %A, ptr %dst) {
; CHECK-BE-NEXT: adrp x8, .LCPI3_0
; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI3_0
; CHECK-BE-NEXT: ld1 { v0.16b }, [x8]
-; CHECK-BE-NEXT: mov x8, xzr
+; CHECK-BE-NEXT: mov w8, #1000 // =0x3e8
; CHECK-BE-NEXT: .LBB3_1: // %loop
; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-BE-NEXT: add x9, x0, x8, lsl #7
-; CHECK-BE-NEXT: add x13, x9, #64
-; CHECK-BE-NEXT: add x12, x9, #80
-; CHECK-BE-NEXT: add x14, x9, #16
-; CHECK-BE-NEXT: ld1 { v1.16b }, [x9]
-; CHECK-BE-NEXT: ld1 { v16.16b }, [x13]
-; CHECK-BE-NEXT: add x11, x9, #96
-; CHECK-BE-NEXT: add x13, x9, #32
-; CHECK-BE-NEXT: ld1 { v2.16b }, [x14]
-; CHECK-BE-NEXT: ld1 { v17.16b }, [x12]
-; CHECK-BE-NEXT: add x10, x9, #112
-; CHECK-BE-NEXT: add x9, x9, #48
-; CHECK-BE-NEXT: ld1 { v3.16b }, [x13]
-; CHECK-BE-NEXT: ld1 { v18.16b }, [x11]
-; CHECK-BE-NEXT: ld1 { v4.16b }, [x9]
-; CHECK-BE-NEXT: add x9, x1, x8, lsl #4
-; CHECK-BE-NEXT: ld1 { v19.16b }, [x10]
-; CHECK-BE-NEXT: add x8, x8, #1
-; CHECK-BE-NEXT: cmp x8, #1000
+; CHECK-BE-NEXT: add x12, x0, #64
+; CHECK-BE-NEXT: add x11, x0, #80
+; CHECK-BE-NEXT: add x13, x0, #16
+; CHECK-BE-NEXT: ld1 { v1.16b }, [x0]
+; CHECK-BE-NEXT: ld1 { v16.16b }, [x12]
+; CHECK-BE-NEXT: add x10, x0, #96
+; CHECK-BE-NEXT: add x12, x0, #32
+; CHECK-BE-NEXT: ld1 { v2.16b }, [x13]
+; CHECK-BE-NEXT: ld1 { v17.16b }, [x11]
+; CHECK-BE-NEXT: add x9, x0, #112
+; CHECK-BE-NEXT: add x11, x0, #48
+; CHECK-BE-NEXT: ld1 { v3.16b }, [x12]
+; CHECK-BE-NEXT: ld1 { v18.16b }, [x10]
+; CHECK-BE-NEXT: ld1 { v4.16b }, [x11]
+; CHECK-BE-NEXT: subs x8, x8, #1
+; CHECK-BE-NEXT: ld1 { v19.16b }, [x9]
+; CHECK-BE-NEXT: add x0, x0, #128
; CHECK-BE-NEXT: tbl v1.16b, { v1.16b, v2.16b, v3.16b, v4.16b }, v0.16b
; CHECK-BE-NEXT: tbl v2.16b, { v16.16b, v17.16b, v18.16b, v19.16b }, v0.16b
; CHECK-BE-NEXT: mov v1.d[1], v2.d[0]
-; CHECK-BE-NEXT: st1 { v1.16b }, [x9]
+; CHECK-BE-NEXT: st1 { v1.16b }, [x1], #16
; CHECK-BE-NEXT: b.eq .LBB3_1
; CHECK-BE-NEXT: // %bb.2: // %exit
; CHECK-BE-NEXT: ret
;
; CHECK-DISABLE-LABEL: trunc_v16i64_to_v16i8_in_loop:
; CHECK-DISABLE: // %bb.0: // %entry
-; CHECK-DISABLE-NEXT: mov x8, xzr
+; CHECK-DISABLE-NEXT: mov w8, #1000 // =0x3e8
; CHECK-DISABLE-NEXT: .LBB3_1: // %loop
; CHECK-DISABLE-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-DISABLE-NEXT: add x9, x0, x8, lsl #7
-; CHECK-DISABLE-NEXT: add x10, x9, #16
-; CHECK-DISABLE-NEXT: add x11, x9, #48
+; CHECK-DISABLE-NEXT: add x9, x0, #16
+; CHECK-DISABLE-NEXT: add x10, x0, #48
+; CHECK-DISABLE-NEXT: ld1 { v6.2d }, [x0]
; CHECK-DISABLE-NEXT: ld1 { v0.2d }, [x9]
+; CHECK-DISABLE-NEXT: add x9, x0, #32
; CHECK-DISABLE-NEXT: ld1 { v1.2d }, [x10]
-; CHECK-DISABLE-NEXT: add x10, x9, #112
-; CHECK-DISABLE-NEXT: ld1 { v2.2d }, [x11]
+; CHECK-DISABLE-NEXT: add x10, x0, #112
+; CHECK-DISABLE-NEXT: ld1 { v2.2d }, [x9]
+; CHECK-DISABLE-NEXT: add x9, x0, #96
; CHECK-DISABLE-NEXT: ld1 { v3.2d }, [x10]
-; CHECK-DISABLE-NEXT: add x10, x9, #96
-; CHECK-DISABLE-NEXT: add x11, x9, #32
-; CHECK-DISABLE-NEXT: ld1 { v4.2d }, [x10]
-; CHECK-DISABLE-NEXT: add x10, x9, #80
-; CHECK-DISABLE-NEXT: add x9, x9, #64
-; CHECK-DISABLE-NEXT: ld1 { v5.2d }, [x11]
-; CHECK-DISABLE-NEXT: ld1 { v6.2d }, [x10]
+; CHECK-DISABLE-NEXT: add x10, x0, #80
+; CHECK-DISABLE-NEXT: ld1 { v4.2d }, [x9]
+; CHECK-DISABLE-NEXT: add x9, x0, #64
+; CHECK-DISABLE-NEXT: ld1 { v5.2d }, [x10]
+; CHECK-DISABLE-NEXT: uzp1 v0.4s, v6.4s, v0.4s
; CHECK-DISABLE-NEXT: ld1 { v7.2d }, [x9]
-; CHECK-DISABLE-NEXT: uzp1 v0.4s, v0.4s, v1.4s
-; CHECK-DISABLE-NEXT: add x9, x1, x8, lsl #4
-; CHECK-DISABLE-NEXT: add x8, x8, #1
+; CHECK-DISABLE-NEXT: uzp1 v1.4s, v2.4s, v1.4s
+; CHECK-DISABLE-NEXT: subs x8, x8, #1
; CHECK-DISABLE-NEXT: uzp1 v3.4s, v4.4s, v3.4s
-; CHECK-DISABLE-NEXT: cmp x8, #1000
-; CHECK-DISABLE-NEXT: uzp1 v4.4s, v7.4s, v6.4s
-; CHECK-DISABLE-NEXT: uzp1 v2.4s, v5.4s, v2.4s
-; CHECK-DISABLE-NEXT: uzp1 v1.8h, v4.8h, v3.8h
-; CHECK-DISABLE-NEXT: uzp1 v0.8h, v0.8h, v2.8h
-; CHECK-DISABLE-NEXT: uzp1 v0.16b, v0.16b, v1.16b
-; CHECK-DISABLE-NEXT: st1 { v0.16b }, [x9]
+; CHECK-DISABLE-NEXT: add x0, x0, #128
+; CHECK-DISABLE-NEXT: uzp1 v4.4s, v7.4s, v5.4s
+; CHECK-DISABLE-NEXT: uzp1 v0.8h, v0.8h, v1.8h
+; CHECK-DISABLE-NEXT: uzp1 v2.8h, v4.8h, v3.8h
+; CHECK-DISABLE-NEXT: uzp1 v0.16b, v0.16b, v2.16b
+; CHECK-DISABLE-NEXT: st1 { v0.16b }, [x1], #16
; CHECK-DISABLE-NEXT: b.eq .LBB3_1
; CHECK-DISABLE-NEXT: // %bb.2: // %exit
; CHECK-DISABLE-NEXT: ret
@@ -483,16 +467,15 @@ define void @trunc_v8i64_to_v8i8_in_loop(ptr %A, ptr %dst) {
; CHECK-NEXT: adrp x8, lCPI4_0 at PAGE
; CHECK-NEXT: Lloh7:
; CHECK-NEXT: ldr q0, [x8, lCPI4_0 at PAGEOFF]
-; CHECK-NEXT: mov x8, xzr
+; CHECK-NEXT: mov w8, #1000 ; =0x3e8
; CHECK-NEXT: LBB4_1: ; %loop
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: add x9, x0, x8, lsl #6
-; CHECK-NEXT: ldp q1, q2, [x9]
-; CHECK-NEXT: ldp q3, q4, [x9, #32]
+; CHECK-NEXT: ldp q1, q2, [x0]
+; CHECK-NEXT: subs x8, x8, #1
+; CHECK-NEXT: ldp q3, q4, [x0, #32]
+; CHECK-NEXT: add x0, x0, #64
; CHECK-NEXT: tbl.16b v1, { v1, v2, v3, v4 }, v0
-; CHECK-NEXT: str d1, [x1, x8, lsl #3]
-; CHECK-NEXT: add x8, x8, #1
-; CHECK-NEXT: cmp x8, #1000
+; CHECK-NEXT: str d1, [x1], #8
; CHECK-NEXT: b.eq LBB4_1
; CHECK-NEXT: ; %bb.2: ; %exit
; CHECK-NEXT: ret
@@ -503,47 +486,43 @@ define void @trunc_v8i64_to_v8i8_in_loop(ptr %A, ptr %dst) {
; CHECK-BE-NEXT: adrp x8, .LCPI4_0
; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI4_0
; CHECK-BE-NEXT: ld1 { v0.16b }, [x8]
-; CHECK-BE-NEXT: mov x8, xzr
+; CHECK-BE-NEXT: mov w8, #1000 // =0x3e8
; CHECK-BE-NEXT: .LBB4_1: // %loop
; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-BE-NEXT: add x9, x0, x8, lsl #6
-; CHECK-BE-NEXT: add x10, x9, #16
-; CHECK-BE-NEXT: ld1 { v1.16b }, [x9]
-; CHECK-BE-NEXT: add x11, x9, #32
-; CHECK-BE-NEXT: ld1 { v2.16b }, [x10]
-; CHECK-BE-NEXT: add x9, x9, #48
-; CHECK-BE-NEXT: ld1 { v3.16b }, [x11]
+; CHECK-BE-NEXT: add x9, x0, #16
+; CHECK-BE-NEXT: ld1 { v1.16b }, [x0]
+; CHECK-BE-NEXT: add x10, x0, #32
+; CHECK-BE-NEXT: ld1 { v2.16b }, [x9]
+; CHECK-BE-NEXT: add x9, x0, #48
+; CHECK-BE-NEXT: subs x8, x8, #1
+; CHECK-BE-NEXT: ld1 { v3.16b }, [x10]
+; CHECK-BE-NEXT: add x0, x0, #64
; CHECK-BE-NEXT: ld1 { v4.16b }, [x9]
-; CHECK-BE-NEXT: add x9, x1, x8, lsl #3
-; CHECK-BE-NEXT: add x8, x8, #1
-; CHECK-BE-NEXT: cmp x8, #1000
; CHECK-BE-NEXT: tbl v1.16b, { v1.16b, v2.16b, v3.16b, v4.16b }, v0.16b
-; CHECK-BE-NEXT: st1 { v1.8b }, [x9]
+; CHECK-BE-NEXT: st1 { v1.8b }, [x1], #8
; CHECK-BE-NEXT: b.eq .LBB4_1
; CHECK-BE-NEXT: // %bb.2: // %exit
; CHECK-BE-NEXT: ret
;
; CHECK-DISABLE-LABEL: trunc_v8i64_to_v8i8_in_loop:
; CHECK-DISABLE: // %bb.0: // %entry
-; CHECK-DISABLE-NEXT: mov x8, xzr
+; CHECK-DISABLE-NEXT: mov w8, #1000 // =0x3e8
; CHECK-DISABLE-NEXT: .LBB4_1: // %loop
; CHECK-DISABLE-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-DISABLE-NEXT: add x9, x0, x8, lsl #6
-; CHECK-DISABLE-NEXT: ld1 { v0.2d }, [x9]
-; CHECK-DISABLE-NEXT: add x10, x9, #16
-; CHECK-DISABLE-NEXT: add x11, x9, #48
-; CHECK-DISABLE-NEXT: add x9, x9, #32
-; CHECK-DISABLE-NEXT: ld1 { v1.2d }, [x10]
-; CHECK-DISABLE-NEXT: ld1 { v2.2d }, [x11]
-; CHECK-DISABLE-NEXT: ld1 { v3.2d }, [x9]
-; CHECK-DISABLE-NEXT: add x9, x1, x8, lsl #3
-; CHECK-DISABLE-NEXT: add x8, x8, #1
-; CHECK-DISABLE-NEXT: cmp x8, #1000
+; CHECK-DISABLE-NEXT: add x9, x0, #16
+; CHECK-DISABLE-NEXT: add x10, x0, #48
+; CHECK-DISABLE-NEXT: add x11, x0, #32
+; CHECK-DISABLE-NEXT: ld1 { v0.2d }, [x0]
+; CHECK-DISABLE-NEXT: ld1 { v1.2d }, [x9]
+; CHECK-DISABLE-NEXT: ld1 { v2.2d }, [x10]
+; CHECK-DISABLE-NEXT: ld1 { v3.2d }, [x11]
+; CHECK-DISABLE-NEXT: subs x8, x8, #1
+; CHECK-DISABLE-NEXT: add x0, x0, #64
; CHECK-DISABLE-NEXT: uzp1 v0.4s, v0.4s, v1.4s
; CHECK-DISABLE-NEXT: uzp1 v2.4s, v3.4s, v2.4s
; CHECK-DISABLE-NEXT: uzp1 v0.8h, v0.8h, v2.8h
; CHECK-DISABLE-NEXT: xtn v0.8b, v0.8h
-; CHECK-DISABLE-NEXT: st1 { v0.8b }, [x9]
+; CHECK-DISABLE-NEXT: st1 { v0.8b }, [x1], #8
; CHECK-DISABLE-NEXT: b.eq .LBB4_1
; CHECK-DISABLE-NEXT: // %bb.2: // %exit
; CHECK-DISABLE-NEXT: ret
@@ -568,10 +547,11 @@ exit:
define void @trunc_v8i19_to_v8i8_in_loop(ptr %A, ptr %dst) {
; CHECK-LABEL: trunc_v8i19_to_v8i8_in_loop:
; CHECK: ; %bb.0: ; %entry
-; CHECK-NEXT: mov x8, xzr
+; CHECK-NEXT: mov w8, #1000 ; =0x3e8
; CHECK-NEXT: LBB5_1: ; %loop
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ldp x9, x10, [x0]
+; CHECK-NEXT: subs x8, x8, #1
; CHECK-NEXT: ldrb w14, [x0, #18]
; CHECK-NEXT: ldrh w15, [x0, #16]
; CHECK-NEXT: add x0, x0, #32
@@ -593,19 +573,18 @@ define void @trunc_v8i19_to_v8i8_in_loop(ptr %A, ptr %dst) {
; CHECK-NEXT: mov.s v0[3], w10
; CHECK-NEXT: uzp1.8h v0, v1, v0
; CHECK-NEXT: xtn.8b v0, v0
-; CHECK-NEXT: str d0, [x1, x8, lsl #3]
-; CHECK-NEXT: add x8, x8, #1
-; CHECK-NEXT: cmp x8, #1000
+; CHECK-NEXT: str d0, [x1], #8
; CHECK-NEXT: b.eq LBB5_1
; CHECK-NEXT: ; %bb.2: ; %exit
; CHECK-NEXT: ret
;
; CHECK-BE-LABEL: trunc_v8i19_to_v8i8_in_loop:
; CHECK-BE: // %bb.0: // %entry
-; CHECK-BE-NEXT: mov x8, xzr
+; CHECK-BE-NEXT: mov w8, #1000 // =0x3e8
; CHECK-BE-NEXT: .LBB5_1: // %loop
; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-BE-NEXT: ldp x10, x9, [x0]
+; CHECK-BE-NEXT: subs x8, x8, #1
; CHECK-BE-NEXT: ldrh w16, [x0, #16]
; CHECK-BE-NEXT: ldrb w17, [x0, #18]
; CHECK-BE-NEXT: add x0, x0, #32
@@ -630,23 +609,21 @@ define void @trunc_v8i19_to_v8i8_in_loop(ptr %A, ptr %dst) {
; CHECK-BE-NEXT: mov v0.s[2], w13
; CHECK-BE-NEXT: mov v1.s[2], w11
; CHECK-BE-NEXT: mov v0.s[3], w9
-; CHECK-BE-NEXT: add x9, x1, x8, lsl #3
-; CHECK-BE-NEXT: add x8, x8, #1
-; CHECK-BE-NEXT: cmp x8, #1000
; CHECK-BE-NEXT: mov v1.s[3], w12
; CHECK-BE-NEXT: uzp1 v0.8h, v0.8h, v1.8h
; CHECK-BE-NEXT: xtn v0.8b, v0.8h
-; CHECK-BE-NEXT: st1 { v0.8b }, [x9]
+; CHECK-BE-NEXT: st1 { v0.8b }, [x1], #8
; CHECK-BE-NEXT: b.eq .LBB5_1
; CHECK-BE-NEXT: // %bb.2: // %exit
; CHECK-BE-NEXT: ret
;
; CHECK-DISABLE-LABEL: trunc_v8i19_to_v8i8_in_loop:
; CHECK-DISABLE: // %bb.0: // %entry
-; CHECK-DISABLE-NEXT: mov x8, xzr
+; CHECK-DISABLE-NEXT: mov w8, #1000 // =0x3e8
; CHECK-DISABLE-NEXT: .LBB5_1: // %loop
; CHECK-DISABLE-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-DISABLE-NEXT: ldp x10, x9, [x0]
+; CHECK-DISABLE-NEXT: subs x8, x8, #1
; CHECK-DISABLE-NEXT: ldrh w16, [x0, #16]
; CHECK-DISABLE-NEXT: ldrb w17, [x0, #18]
; CHECK-DISABLE-NEXT: add x0, x0, #32
@@ -671,13 +648,10 @@ define void @trunc_v8i19_to_v8i8_in_loop(ptr %A, ptr %dst) {
; CHECK-DISABLE-NEXT: mov v0.s[2], w13
; CHECK-DISABLE-NEXT: mov v1.s[2], w11
; CHECK-DISABLE-NEXT: mov v0.s[3], w9
-; CHECK-DISABLE-NEXT: add x9, x1, x8, lsl #3
-; CHECK-DISABLE-NEXT: add x8, x8, #1
-; CHECK-DISABLE-NEXT: cmp x8, #1000
; CHECK-DISABLE-NEXT: mov v1.s[3], w12
; CHECK-DISABLE-NEXT: uzp1 v0.8h, v0.8h, v1.8h
; CHECK-DISABLE-NEXT: xtn v0.8b, v0.8h
-; CHECK-DISABLE-NEXT: st1 { v0.8b }, [x9]
+; CHECK-DISABLE-NEXT: st1 { v0.8b }, [x1], #8
; CHECK-DISABLE-NEXT: b.eq .LBB5_1
; CHECK-DISABLE-NEXT: // %bb.2: // %exit
; CHECK-DISABLE-NEXT: ret
@@ -820,51 +794,45 @@ exit:
define void @trunc_v16i16_to_v16i8_in_loop(ptr %A, ptr %dst) {
; CHECK-LABEL: trunc_v16i16_to_v16i8_in_loop:
; CHECK: ; %bb.0: ; %entry
-; CHECK-NEXT: mov x8, xzr
+; CHECK-NEXT: mov w8, #1000 ; =0x3e8
; CHECK-NEXT: LBB7_1: ; %loop
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: add x9, x0, x8, lsl #5
-; CHECK-NEXT: ldp q1, q0, [x9]
+; CHECK-NEXT: ldp q1, q0, [x0], #32
+; CHECK-NEXT: subs x8, x8, #1
; CHECK-NEXT: uzp1.16b v0, v1, v0
-; CHECK-NEXT: str q0, [x1, x8, lsl #4]
-; CHECK-NEXT: add x8, x8, #1
-; CHECK-NEXT: cmp x8, #1000
+; CHECK-NEXT: str q0, [x1], #16
; CHECK-NEXT: b.eq LBB7_1
; CHECK-NEXT: ; %bb.2: ; %exit
; CHECK-NEXT: ret
;
; CHECK-BE-LABEL: trunc_v16i16_to_v16i8_in_loop:
; CHECK-BE: // %bb.0: // %entry
-; CHECK-BE-NEXT: mov x8, xzr
+; CHECK-BE-NEXT: mov w8, #1000 // =0x3e8
; CHECK-BE-NEXT: .LBB7_1: // %loop
; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-BE-NEXT: add x9, x0, x8, lsl #5
-; CHECK-BE-NEXT: add x10, x9, #16
-; CHECK-BE-NEXT: ld1 { v0.8h }, [x9]
-; CHECK-BE-NEXT: add x9, x1, x8, lsl #4
-; CHECK-BE-NEXT: ld1 { v1.8h }, [x10]
-; CHECK-BE-NEXT: add x8, x8, #1
-; CHECK-BE-NEXT: cmp x8, #1000
+; CHECK-BE-NEXT: add x9, x0, #16
+; CHECK-BE-NEXT: ld1 { v0.8h }, [x0]
+; CHECK-BE-NEXT: subs x8, x8, #1
+; CHECK-BE-NEXT: ld1 { v1.8h }, [x9]
+; CHECK-BE-NEXT: add x0, x0, #32
; CHECK-BE-NEXT: uzp1 v0.16b, v0.16b, v1.16b
-; CHECK-BE-NEXT: st1 { v0.16b }, [x9]
+; CHECK-BE-NEXT: st1 { v0.16b }, [x1], #16
; CHECK-BE-NEXT: b.eq .LBB7_1
; CHECK-BE-NEXT: // %bb.2: // %exit
; CHECK-BE-NEXT: ret
;
; CHECK-DISABLE-LABEL: trunc_v16i16_to_v16i8_in_loop:
; CHECK-DISABLE: // %bb.0: // %entry
-; CHECK-DISABLE-NEXT: mov x8, xzr
+; CHECK-DISABLE-NEXT: mov w8, #1000 // =0x3e8
; CHECK-DISABLE-NEXT: .LBB7_1: // %loop
; CHECK-DISABLE-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-DISABLE-NEXT: add x9, x0, x8, lsl #5
-; CHECK-DISABLE-NEXT: add x10, x9, #16
-; CHECK-DISABLE-NEXT: ld1 { v0.8h }, [x9]
-; CHECK-DISABLE-NEXT: add x9, x1, x8, lsl #4
-; CHECK-DISABLE-NEXT: ld1 { v1.8h }, [x10]
-; CHECK-DISABLE-NEXT: add x8, x8, #1
-; CHECK-DISABLE-NEXT: cmp x8, #1000
+; CHECK-DISABLE-NEXT: add x9, x0, #16
+; CHECK-DISABLE-NEXT: ld1 { v0.8h }, [x0]
+; CHECK-DISABLE-NEXT: subs x8, x8, #1
+; CHECK-DISABLE-NEXT: ld1 { v1.8h }, [x9]
+; CHECK-DISABLE-NEXT: add x0, x0, #32
; CHECK-DISABLE-NEXT: uzp1 v0.16b, v0.16b, v1.16b
-; CHECK-DISABLE-NEXT: st1 { v0.16b }, [x9]
+; CHECK-DISABLE-NEXT: st1 { v0.16b }, [x1], #16
; CHECK-DISABLE-NEXT: b.eq .LBB7_1
; CHECK-DISABLE-NEXT: // %bb.2: // %exit
; CHECK-DISABLE-NEXT: ret
@@ -889,46 +857,39 @@ exit:
define void @trunc_v8i16_to_v8i8_in_loop(ptr %A, ptr %dst) {
; CHECK-LABEL: trunc_v8i16_to_v8i8_in_loop:
; CHECK: ; %bb.0: ; %entry
-; CHECK-NEXT: mov x8, xzr
+; CHECK-NEXT: mov w8, #1000 ; =0x3e8
; CHECK-NEXT: LBB8_1: ; %loop
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: ldr q0, [x0, x8, lsl #4]
+; CHECK-NEXT: ldr q0, [x0], #16
+; CHECK-NEXT: subs x8, x8, #1
; CHECK-NEXT: xtn.8b v0, v0
-; CHECK-NEXT: str d0, [x1, x8, lsl #3]
-; CHECK-NEXT: add x8, x8, #1
-; CHECK-NEXT: cmp x8, #1000
+; CHECK-NEXT: str d0, [x1], #8
; CHECK-NEXT: b.eq LBB8_1
; CHECK-NEXT: ; %bb.2: ; %exit
; CHECK-NEXT: ret
;
; CHECK-BE-LABEL: trunc_v8i16_to_v8i8_in_loop:
; CHECK-BE: // %bb.0: // %entry
-; CHECK-BE-NEXT: mov x8, xzr
+; CHECK-BE-NEXT: mov w8, #1000 // =0x3e8
; CHECK-BE-NEXT: .LBB8_1: // %loop
; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-BE-NEXT: add x9, x0, x8, lsl #4
-; CHECK-BE-NEXT: ld1 { v0.8h }, [x9]
-; CHECK-BE-NEXT: add x9, x1, x8, lsl #3
-; CHECK-BE-NEXT: add x8, x8, #1
-; CHECK-BE-NEXT: cmp x8, #1000
+; CHECK-BE-NEXT: ld1 { v0.8h }, [x0], #16
+; CHECK-BE-NEXT: subs x8, x8, #1
; CHECK-BE-NEXT: xtn v0.8b, v0.8h
-; CHECK-BE-NEXT: st1 { v0.8b }, [x9]
+; CHECK-BE-NEXT: st1 { v0.8b }, [x1], #8
; CHECK-BE-NEXT: b.eq .LBB8_1
; CHECK-BE-NEXT: // %bb.2: // %exit
; CHECK-BE-NEXT: ret
;
; CHECK-DISABLE-LABEL: trunc_v8i16_to_v8i8_in_loop:
; CHECK-DISABLE: // %bb.0: // %entry
-; CHECK-DISABLE-NEXT: mov x8, xzr
+; CHECK-DISABLE-NEXT: mov w8, #1000 // =0x3e8
; CHECK-DISABLE-NEXT: .LBB8_1: // %loop
; CHECK-DISABLE-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-DISABLE-NEXT: add x9, x0, x8, lsl #4
-; CHECK-DISABLE-NEXT: ld1 { v0.8h }, [x9]
-; CHECK-DISABLE-NEXT: add x9, x1, x8, lsl #3
-; CHECK-DISABLE-NEXT: add x8, x8, #1
-; CHECK-DISABLE-NEXT: cmp x8, #1000
+; CHECK-DISABLE-NEXT: ld1 { v0.8h }, [x0], #16
+; CHECK-DISABLE-NEXT: subs x8, x8, #1
; CHECK-DISABLE-NEXT: xtn v0.8b, v0.8h
-; CHECK-DISABLE-NEXT: st1 { v0.8b }, [x9]
+; CHECK-DISABLE-NEXT: st1 { v0.8b }, [x1], #8
; CHECK-DISABLE-NEXT: b.eq .LBB8_1
; CHECK-DISABLE-NEXT: // %bb.2: // %exit
; CHECK-DISABLE-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fadd.ll b/llvm/test/CodeGen/AArch64/vecreduce-fadd.ll
index 59dfcf9850a49..6b48769131c85 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-fadd.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-fadd.ll
@@ -318,12 +318,11 @@ define float @fadd_reduction_v4f32_in_loop(ptr %ptr.start) {
; CHECK-LABEL: fadd_reduction_v4f32_in_loop:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: movi d0, #0000000000000000
-; CHECK-NEXT: mov x8, xzr
+; CHECK-NEXT: mov w8, #7 // =0x7
; CHECK-NEXT: .LBB11_1: // %loop
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: ldr q1, [x0, x8]
-; CHECK-NEXT: add x8, x8, #16
-; CHECK-NEXT: cmp w8, #112
+; CHECK-NEXT: ldr q1, [x0], #16
+; CHECK-NEXT: subs w8, w8, #1
; CHECK-NEXT: faddp v1.4s, v1.4s, v1.4s
; CHECK-NEXT: faddp s1, v1.2s
; CHECK-NEXT: fadd s0, s1, s0
@@ -355,13 +354,12 @@ define half @fadd_reduction_v4f16_in_loop(ptr %ptr.start) {
; CHECK-SD-NOFP16-LABEL: fadd_reduction_v4f16_in_loop:
; CHECK-SD-NOFP16: // %bb.0: // %entry
; CHECK-SD-NOFP16-NEXT: movi d0, #0000000000000000
-; CHECK-SD-NOFP16-NEXT: mov x8, xzr
+; CHECK-SD-NOFP16-NEXT: mov w8, #7 // =0x7
; CHECK-SD-NOFP16-NEXT: .LBB12_1: // %loop
; CHECK-SD-NOFP16-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-SD-NOFP16-NEXT: ldr d1, [x0, x8]
+; CHECK-SD-NOFP16-NEXT: ldr d1, [x0], #8
; CHECK-SD-NOFP16-NEXT: fcvt s0, h0
-; CHECK-SD-NOFP16-NEXT: add x8, x8, #8
-; CHECK-SD-NOFP16-NEXT: cmp w8, #56
+; CHECK-SD-NOFP16-NEXT: subs w8, w8, #1
; CHECK-SD-NOFP16-NEXT: mov h2, v1.h[1]
; CHECK-SD-NOFP16-NEXT: mov h3, v1.h[3]
; CHECK-SD-NOFP16-NEXT: mov h4, v1.h[2]
@@ -381,12 +379,11 @@ define half @fadd_reduction_v4f16_in_loop(ptr %ptr.start) {
; CHECK-SD-FP16-LABEL: fadd_reduction_v4f16_in_loop:
; CHECK-SD-FP16: // %bb.0: // %entry
; CHECK-SD-FP16-NEXT: movi d0, #0000000000000000
-; CHECK-SD-FP16-NEXT: mov x8, xzr
+; CHECK-SD-FP16-NEXT: mov w8, #7 // =0x7
; CHECK-SD-FP16-NEXT: .LBB12_1: // %loop
; CHECK-SD-FP16-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-SD-FP16-NEXT: ldr d1, [x0, x8]
-; CHECK-SD-FP16-NEXT: add x8, x8, #8
-; CHECK-SD-FP16-NEXT: cmp w8, #56
+; CHECK-SD-FP16-NEXT: ldr d1, [x0], #8
+; CHECK-SD-FP16-NEXT: subs w8, w8, #1
; CHECK-SD-FP16-NEXT: faddp v1.4h, v1.4h, v1.4h
; CHECK-SD-FP16-NEXT: faddp h1, v1.2h
; CHECK-SD-FP16-NEXT: fadd h0, h1, h0
@@ -396,14 +393,13 @@ define half @fadd_reduction_v4f16_in_loop(ptr %ptr.start) {
;
; CHECK-GI-NOFP16-LABEL: fadd_reduction_v4f16_in_loop:
; CHECK-GI-NOFP16: // %bb.0: // %entry
-; CHECK-GI-NOFP16-NEXT: mov x8, xzr
; CHECK-GI-NOFP16-NEXT: mov w9, #0 // =0x0
+; CHECK-GI-NOFP16-NEXT: mov w8, #7 // =0x7
; CHECK-GI-NOFP16-NEXT: .LBB12_1: // %loop
; CHECK-GI-NOFP16-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-GI-NOFP16-NEXT: ldr d0, [x0, x8]
+; CHECK-GI-NOFP16-NEXT: ldr d0, [x0], #8
; CHECK-GI-NOFP16-NEXT: fmov s1, w9
-; CHECK-GI-NOFP16-NEXT: add x8, x8, #8
-; CHECK-GI-NOFP16-NEXT: cmp w8, #56
+; CHECK-GI-NOFP16-NEXT: subs w8, w8, #1
; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h
; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
; CHECK-GI-NOFP16-NEXT: faddp v0.4s, v0.4s, v0.4s
@@ -421,12 +417,11 @@ define half @fadd_reduction_v4f16_in_loop(ptr %ptr.start) {
; CHECK-GI-FP16-LABEL: fadd_reduction_v4f16_in_loop:
; CHECK-GI-FP16: // %bb.0: // %entry
; CHECK-GI-FP16-NEXT: movi d0, #0000000000000000
-; CHECK-GI-FP16-NEXT: mov x8, xzr
+; CHECK-GI-FP16-NEXT: mov w8, #7 // =0x7
; CHECK-GI-FP16-NEXT: .LBB12_1: // %loop
; CHECK-GI-FP16-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-GI-FP16-NEXT: ldr d1, [x0, x8]
-; CHECK-GI-FP16-NEXT: add x8, x8, #8
-; CHECK-GI-FP16-NEXT: cmp w8, #56
+; CHECK-GI-FP16-NEXT: ldr d1, [x0], #8
+; CHECK-GI-FP16-NEXT: subs w8, w8, #1
; CHECK-GI-FP16-NEXT: faddp v1.4h, v1.4h, v1.4h
; CHECK-GI-FP16-NEXT: faddp h1, v1.2h
; CHECK-GI-FP16-NEXT: fadd h0, h1, h0
@@ -458,13 +453,12 @@ define half @fadd_reduction_v8f16_in_loop(ptr %ptr.start) {
; CHECK-SD-NOFP16-LABEL: fadd_reduction_v8f16_in_loop:
; CHECK-SD-NOFP16: // %bb.0: // %entry
; CHECK-SD-NOFP16-NEXT: movi d0, #0000000000000000
-; CHECK-SD-NOFP16-NEXT: mov x8, xzr
+; CHECK-SD-NOFP16-NEXT: mov w8, #7 // =0x7
; CHECK-SD-NOFP16-NEXT: .LBB13_1: // %loop
; CHECK-SD-NOFP16-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-SD-NOFP16-NEXT: ldr q1, [x0, x8]
+; CHECK-SD-NOFP16-NEXT: ldr q1, [x0], #8
; CHECK-SD-NOFP16-NEXT: fcvt s0, h0
-; CHECK-SD-NOFP16-NEXT: add x8, x8, #8
-; CHECK-SD-NOFP16-NEXT: cmp w8, #56
+; CHECK-SD-NOFP16-NEXT: subs w8, w8, #1
; CHECK-SD-NOFP16-NEXT: mov h2, v1.h[3]
; CHECK-SD-NOFP16-NEXT: mov h3, v1.h[2]
; CHECK-SD-NOFP16-NEXT: mov h4, v1.h[1]
@@ -496,12 +490,11 @@ define half @fadd_reduction_v8f16_in_loop(ptr %ptr.start) {
; CHECK-SD-FP16-LABEL: fadd_reduction_v8f16_in_loop:
; CHECK-SD-FP16: // %bb.0: // %entry
; CHECK-SD-FP16-NEXT: movi d0, #0000000000000000
-; CHECK-SD-FP16-NEXT: mov x8, xzr
+; CHECK-SD-FP16-NEXT: mov w8, #7 // =0x7
; CHECK-SD-FP16-NEXT: .LBB13_1: // %loop
; CHECK-SD-FP16-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-SD-FP16-NEXT: ldr q1, [x0, x8]
-; CHECK-SD-FP16-NEXT: add x8, x8, #8
-; CHECK-SD-FP16-NEXT: cmp w8, #56
+; CHECK-SD-FP16-NEXT: ldr q1, [x0], #8
+; CHECK-SD-FP16-NEXT: subs w8, w8, #1
; CHECK-SD-FP16-NEXT: faddp v2.8h, v1.8h, v1.8h
; CHECK-SD-FP16-NEXT: faddp v1.8h, v2.8h, v1.8h
; CHECK-SD-FP16-NEXT: faddp h1, v1.2h
@@ -512,13 +505,12 @@ define half @fadd_reduction_v8f16_in_loop(ptr %ptr.start) {
;
; CHECK-GI-NOFP16-LABEL: fadd_reduction_v8f16_in_loop:
; CHECK-GI-NOFP16: // %bb.0: // %entry
-; CHECK-GI-NOFP16-NEXT: mov x8, xzr
; CHECK-GI-NOFP16-NEXT: mov w9, #0 // =0x0
+; CHECK-GI-NOFP16-NEXT: mov w8, #7 // =0x7
; CHECK-GI-NOFP16-NEXT: .LBB13_1: // %loop
; CHECK-GI-NOFP16-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-GI-NOFP16-NEXT: ldr q0, [x0, x8]
-; CHECK-GI-NOFP16-NEXT: add x8, x8, #8
-; CHECK-GI-NOFP16-NEXT: cmp w8, #56
+; CHECK-GI-NOFP16-NEXT: ldr q0, [x0], #8
+; CHECK-GI-NOFP16-NEXT: subs w8, w8, #1
; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v0.4h
; CHECK-GI-NOFP16-NEXT: fcvtl2 v0.4s, v0.8h
; CHECK-GI-NOFP16-NEXT: fadd v0.4s, v1.4s, v0.4s
@@ -539,12 +531,11 @@ define half @fadd_reduction_v8f16_in_loop(ptr %ptr.start) {
; CHECK-GI-FP16-LABEL: fadd_reduction_v8f16_in_loop:
; CHECK-GI-FP16: // %bb.0: // %entry
; CHECK-GI-FP16-NEXT: movi d0, #0000000000000000
-; CHECK-GI-FP16-NEXT: mov x8, xzr
+; CHECK-GI-FP16-NEXT: mov w8, #7 // =0x7
; CHECK-GI-FP16-NEXT: .LBB13_1: // %loop
; CHECK-GI-FP16-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-GI-FP16-NEXT: ldr q1, [x0, x8]
-; CHECK-GI-FP16-NEXT: add x8, x8, #8
-; CHECK-GI-FP16-NEXT: cmp w8, #56
+; CHECK-GI-FP16-NEXT: ldr q1, [x0], #8
+; CHECK-GI-FP16-NEXT: subs w8, w8, #1
; CHECK-GI-FP16-NEXT: faddp v2.8h, v1.8h, v1.8h
; CHECK-GI-FP16-NEXT: faddp v1.8h, v2.8h, v1.8h
; CHECK-GI-FP16-NEXT: faddp h1, v1.2h
diff --git a/llvm/test/CodeGen/AArch64/vldn_shuffle.ll b/llvm/test/CodeGen/AArch64/vldn_shuffle.ll
index 3685e9cf85bd6..2727cec125e78 100644
--- a/llvm/test/CodeGen/AArch64/vldn_shuffle.ll
+++ b/llvm/test/CodeGen/AArch64/vldn_shuffle.ll
@@ -6,15 +6,14 @@ define void @vld2(ptr nocapture readonly %pSrc, ptr noalias nocapture %pDst, i32
; CHECK: .Lfunc_begin0:
; CHECK-NEXT: .cfi_startproc
; CHECK-NEXT: // %bb.0: // %entry
-; CHECK-NEXT: mov x8, xzr
+; CHECK-NEXT: mov w8, #1024 // =0x400
; CHECK-NEXT: .LBB0_1: // %vector.body
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ld2 { v0.4s, v1.4s }, [x0], #32
+; CHECK-NEXT: subs x8, x8, #4
; CHECK-NEXT: fmul v2.4s, v0.4s, v0.4s
; CHECK-NEXT: fmla v2.4s, v1.4s, v1.4s
-; CHECK-NEXT: str q2, [x1, x8]
-; CHECK-NEXT: add x8, x8, #16
-; CHECK-NEXT: cmp x8, #1, lsl #12 // =4096
+; CHECK-NEXT: str q2, [x1], #16
; CHECK-NEXT: b.ne .LBB0_1
; CHECK-NEXT: // %bb.2: // %while.end
; CHECK-NEXT: ret
@@ -46,16 +45,15 @@ define void @vld3(ptr nocapture readonly %pSrc, ptr noalias nocapture %pDst, i32
; CHECK: .Lfunc_begin1:
; CHECK-NEXT: .cfi_startproc
; CHECK-NEXT: // %bb.0: // %entry
-; CHECK-NEXT: mov x8, xzr
+; CHECK-NEXT: mov w8, #1024 // =0x400
; CHECK-NEXT: .LBB1_1: // %vector.body
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ld3 { v0.4s, v1.4s, v2.4s }, [x0], #48
+; CHECK-NEXT: subs x8, x8, #4
; CHECK-NEXT: fmul v3.4s, v0.4s, v0.4s
; CHECK-NEXT: fmla v3.4s, v1.4s, v1.4s
; CHECK-NEXT: fmla v3.4s, v2.4s, v2.4s
-; CHECK-NEXT: str q3, [x1, x8]
-; CHECK-NEXT: add x8, x8, #16
-; CHECK-NEXT: cmp x8, #1, lsl #12 // =4096
+; CHECK-NEXT: str q3, [x1], #16
; CHECK-NEXT: b.ne .LBB1_1
; CHECK-NEXT: // %bb.2: // %while.end
; CHECK-NEXT: ret
@@ -90,18 +88,16 @@ define void @vld4(ptr nocapture readonly %pSrc, ptr noalias nocapture %pDst, i32
; CHECK: .Lfunc_begin2:
; CHECK-NEXT: .cfi_startproc
; CHECK-NEXT: // %bb.0: // %entry
-; CHECK-NEXT: mov x8, xzr
+; CHECK-NEXT: mov w8, #1024 // =0x400
; CHECK-NEXT: .LBB2_1: // %vector.body
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ld4 { v0.4s, v1.4s, v2.4s, v3.4s }, [x0], #64
-; CHECK-NEXT: add x9, x1, x8
-; CHECK-NEXT: add x8, x8, #32
-; CHECK-NEXT: cmp x8, #2, lsl #12 // =8192
+; CHECK-NEXT: subs x8, x8, #4
; CHECK-NEXT: fmul v4.4s, v0.4s, v0.4s
; CHECK-NEXT: fmla v4.4s, v1.4s, v1.4s
; CHECK-NEXT: fmul v5.4s, v2.4s, v2.4s
; CHECK-NEXT: fmla v5.4s, v3.4s, v3.4s
-; CHECK-NEXT: st2 { v4.4s, v5.4s }, [x9]
+; CHECK-NEXT: st2 { v4.4s, v5.4s }, [x1], #32
; CHECK-NEXT: b.ne .LBB2_1
; CHECK-NEXT: // %bb.2: // %while.end
; CHECK-NEXT: ret
@@ -140,15 +136,12 @@ define void @twosrc(ptr nocapture readonly %pSrc, ptr nocapture readonly %pSrc2,
; CHECK: .Lfunc_begin3:
; CHECK-NEXT: .cfi_startproc
; CHECK-NEXT: // %bb.0: // %entry
-; CHECK-NEXT: mov x8, xzr
+; CHECK-NEXT: mov w8, #1024 // =0x400
; CHECK-NEXT: .LBB3_1: // %vector.body
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: add x9, x0, x8
-; CHECK-NEXT: add x10, x1, x8
-; CHECK-NEXT: add x8, x8, #32
-; CHECK-NEXT: ld2 { v0.4s, v1.4s }, [x9]
-; CHECK-NEXT: cmp x8, #2, lsl #12 // =8192
-; CHECK-NEXT: ld2 { v2.4s, v3.4s }, [x10]
+; CHECK-NEXT: ld2 { v0.4s, v1.4s }, [x0], #32
+; CHECK-NEXT: subs x8, x8, #4
+; CHECK-NEXT: ld2 { v2.4s, v3.4s }, [x1], #32
; CHECK-NEXT: fmul v4.4s, v2.4s, v0.4s
; CHECK-NEXT: fmla v4.4s, v1.4s, v3.4s
; CHECK-NEXT: str q4, [x2], #16
@@ -186,15 +179,14 @@ define void @vld2_multiuse(ptr nocapture readonly %pSrc, ptr noalias nocapture %
; CHECK: .Lfunc_begin4:
; CHECK-NEXT: .cfi_startproc
; CHECK-NEXT: // %bb.0: // %entry
-; CHECK-NEXT: mov x8, xzr
+; CHECK-NEXT: mov w8, #1024 // =0x400
; CHECK-NEXT: .LBB4_1: // %vector.body
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ld2 { v0.4s, v1.4s }, [x0], #32
+; CHECK-NEXT: subs x8, x8, #4
; CHECK-NEXT: fmul v2.4s, v0.4s, v0.4s
; CHECK-NEXT: fmla v2.4s, v1.4s, v1.4s
-; CHECK-NEXT: str q2, [x1, x8]
-; CHECK-NEXT: add x8, x8, #16
-; CHECK-NEXT: cmp x8, #1, lsl #12 // =4096
+; CHECK-NEXT: str q2, [x1], #16
; CHECK-NEXT: b.ne .LBB4_1
; CHECK-NEXT: // %bb.2: // %while.end
; CHECK-NEXT: ret
@@ -225,16 +217,15 @@ define void @vld3_multiuse(ptr nocapture readonly %pSrc, ptr noalias nocapture %
; CHECK: .Lfunc_begin5:
; CHECK-NEXT: .cfi_startproc
; CHECK-NEXT: // %bb.0: // %entry
-; CHECK-NEXT: mov x8, xzr
+; CHECK-NEXT: mov w8, #1024 // =0x400
; CHECK-NEXT: .LBB5_1: // %vector.body
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ld3 { v0.4s, v1.4s, v2.4s }, [x0], #48
+; CHECK-NEXT: subs x8, x8, #4
; CHECK-NEXT: fmul v3.4s, v0.4s, v0.4s
; CHECK-NEXT: fmla v3.4s, v1.4s, v1.4s
; CHECK-NEXT: fmla v3.4s, v2.4s, v2.4s
-; CHECK-NEXT: str q3, [x1, x8]
-; CHECK-NEXT: add x8, x8, #16
-; CHECK-NEXT: cmp x8, #1, lsl #12 // =4096
+; CHECK-NEXT: str q3, [x1], #16
; CHECK-NEXT: b.ne .LBB5_1
; CHECK-NEXT: // %bb.2: // %while.end
; CHECK-NEXT: ret
@@ -267,18 +258,16 @@ define void @vld4_multiuse(ptr nocapture readonly %pSrc, ptr noalias nocapture %
; CHECK: .Lfunc_begin6:
; CHECK-NEXT: .cfi_startproc
; CHECK-NEXT: // %bb.0: // %entry
-; CHECK-NEXT: mov x8, xzr
+; CHECK-NEXT: mov w8, #1024 // =0x400
; CHECK-NEXT: .LBB6_1: // %vector.body
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ld4 { v0.4s, v1.4s, v2.4s, v3.4s }, [x0], #64
-; CHECK-NEXT: add x9, x1, x8
-; CHECK-NEXT: add x8, x8, #32
-; CHECK-NEXT: cmp x8, #2, lsl #12 // =8192
+; CHECK-NEXT: subs x8, x8, #4
; CHECK-NEXT: fmul v4.4s, v0.4s, v0.4s
; CHECK-NEXT: fmla v4.4s, v1.4s, v1.4s
; CHECK-NEXT: fmul v5.4s, v2.4s, v2.4s
; CHECK-NEXT: fmla v5.4s, v3.4s, v3.4s
-; CHECK-NEXT: st2 { v4.4s, v5.4s }, [x9]
+; CHECK-NEXT: st2 { v4.4s, v5.4s }, [x1], #32
; CHECK-NEXT: b.ne .LBB6_1
; CHECK-NEXT: // %bb.2: // %while.end
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/vselect-ext.ll b/llvm/test/CodeGen/AArch64/vselect-ext.ll
index 4f2b9c5a62669..04f1fc5912255 100644
--- a/llvm/test/CodeGen/AArch64/vselect-ext.ll
+++ b/llvm/test/CodeGen/AArch64/vselect-ext.ll
@@ -587,12 +587,11 @@ define void @extension_in_loop_v16i8_to_v16i32(ptr %src, ptr %dst) {
; CHECK-NEXT: ldr q2, [x10, lCPI24_2 at PAGEOFF]
; CHECK-NEXT: Lloh9:
; CHECK-NEXT: ldr q3, [x8, lCPI24_3 at PAGEOFF]
-; CHECK-NEXT: mov x8, xzr
+; CHECK-NEXT: mov w8, #128 ; =0x80
; CHECK-NEXT: LBB24_1: ; %loop
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: ldr q4, [x0, x8]
-; CHECK-NEXT: add x8, x8, #16
-; CHECK-NEXT: cmp x8, #128
+; CHECK-NEXT: ldr q4, [x0], #16
+; CHECK-NEXT: subs x8, x8, #16
; CHECK-NEXT: cmge.16b v5, v4, #0
; CHECK-NEXT: tbl.16b v7, { v4 }, v0
; CHECK-NEXT: tbl.16b v16, { v4 }, v1
@@ -657,12 +656,11 @@ define void @extension_in_loop_as_shuffle_v16i8_to_v16i32(ptr %src, ptr %dst) {
; CHECK-NEXT: ldr q2, [x10, lCPI25_2 at PAGEOFF]
; CHECK-NEXT: Lloh17:
; CHECK-NEXT: ldr q3, [x8, lCPI25_3 at PAGEOFF]
-; CHECK-NEXT: mov x8, xzr
+; CHECK-NEXT: mov w8, #128 ; =0x80
; CHECK-NEXT: LBB25_1: ; %loop
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: ldr q4, [x0, x8]
-; CHECK-NEXT: add x8, x8, #16
-; CHECK-NEXT: cmp x8, #128
+; CHECK-NEXT: ldr q4, [x0], #16
+; CHECK-NEXT: subs x8, x8, #16
; CHECK-NEXT: cmge.16b v5, v4, #0
; CHECK-NEXT: tbl.16b v7, { v4 }, v0
; CHECK-NEXT: tbl.16b v16, { v4 }, v1
@@ -728,12 +726,11 @@ define void @shuffle_in_loop_is_no_extend_v16i8_to_v16i32(ptr %src, ptr %dst) {
; CHECK-NEXT: ldr q2, [x10, lCPI26_2 at PAGEOFF]
; CHECK-NEXT: Lloh25:
; CHECK-NEXT: ldr q3, [x8, lCPI26_3 at PAGEOFF]
-; CHECK-NEXT: mov x8, xzr
+; CHECK-NEXT: mov w8, #128 ; =0x80
; CHECK-NEXT: LBB26_1: ; %loop
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: ldr q4, [x0, x8]
-; CHECK-NEXT: add x8, x8, #16
-; CHECK-NEXT: cmp x8, #128
+; CHECK-NEXT: ldr q4, [x0], #16
+; CHECK-NEXT: subs x8, x8, #16
; CHECK-NEXT: cmge.16b v5, v4, #0
; CHECK-NEXT: tbl.16b v7, { v4 }, v0
; CHECK-NEXT: tbl.16b v16, { v4 }, v1
diff --git a/llvm/test/CodeGen/AArch64/zext-to-tbl.ll b/llvm/test/CodeGen/AArch64/zext-to-tbl.ll
index 74a717f1635a3..631d04f4dbac7 100644
--- a/llvm/test/CodeGen/AArch64/zext-to-tbl.ll
+++ b/llvm/test/CodeGen/AArch64/zext-to-tbl.ll
@@ -161,12 +161,11 @@ define void @zext_v16i8_to_v16i32_in_loop(ptr %src, ptr %dst) {
; CHECK-NEXT: ldr q2, [x10, lCPI0_2 at PAGEOFF]
; CHECK-NEXT: Lloh7:
; CHECK-NEXT: ldr q3, [x8, lCPI0_3 at PAGEOFF]
-; CHECK-NEXT: mov x8, xzr
+; CHECK-NEXT: mov w8, #128 ; =0x80
; CHECK-NEXT: LBB0_1: ; %loop
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: ldr q4, [x0, x8]
-; CHECK-NEXT: add x8, x8, #16
-; CHECK-NEXT: cmp x8, #128
+; CHECK-NEXT: ldr q4, [x0], #16
+; CHECK-NEXT: subs x8, x8, #16
; CHECK-NEXT: tbl.16b v5, { v4 }, v3
; CHECK-NEXT: tbl.16b v6, { v4 }, v2
; CHECK-NEXT: tbl.16b v7, { v4 }, v1
@@ -196,24 +195,22 @@ define void @zext_v16i8_to_v16i32_in_loop(ptr %src, ptr %dst) {
; CHECK-BE-NEXT: adrp x8, .LCPI0_3
; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI0_3
; CHECK-BE-NEXT: ld1 { v3.16b }, [x8]
-; CHECK-BE-NEXT: mov x8, xzr
+; CHECK-BE-NEXT: mov w8, #128 // =0x80
; CHECK-BE-NEXT: .LBB0_1: // %loop
; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-BE-NEXT: add x9, x0, x8
-; CHECK-BE-NEXT: add x8, x8, #16
-; CHECK-BE-NEXT: ld1 { v4.16b }, [x9]
+; CHECK-BE-NEXT: ld1 { v4.16b }, [x0], #16
; CHECK-BE-NEXT: add x9, x1, #48
-; CHECK-BE-NEXT: cmp x8, #128
+; CHECK-BE-NEXT: add x10, x1, #32
+; CHECK-BE-NEXT: subs x8, x8, #16
; CHECK-BE-NEXT: tbl v5.16b, { v4.16b }, v3.16b
; CHECK-BE-NEXT: tbl v6.16b, { v4.16b }, v2.16b
; CHECK-BE-NEXT: tbl v7.16b, { v4.16b }, v1.16b
; CHECK-BE-NEXT: tbl v4.16b, { v4.16b }, v0.16b
; CHECK-BE-NEXT: st1 { v5.16b }, [x9]
-; CHECK-BE-NEXT: add x9, x1, #32
-; CHECK-BE-NEXT: st1 { v6.16b }, [x9]
; CHECK-BE-NEXT: add x9, x1, #16
; CHECK-BE-NEXT: st1 { v4.16b }, [x1]
; CHECK-BE-NEXT: add x1, x1, #64
+; CHECK-BE-NEXT: st1 { v6.16b }, [x10]
; CHECK-BE-NEXT: st1 { v7.16b }, [x9]
; CHECK-BE-NEXT: b.ne .LBB0_1
; CHECK-BE-NEXT: // %bb.2: // %exit
@@ -239,20 +236,20 @@ exit:
define void @zext_v16i8_to_v16i32_in_loop_not_header(ptr %src, ptr %dst, i1 %c) {
; CHECK-LABEL: zext_v16i8_to_v16i32_in_loop_not_header:
; CHECK: ; %bb.0: ; %entry
-; CHECK-NEXT: mov x8, xzr
+; CHECK-NEXT: mov w8, #128 ; =0x80
; CHECK-NEXT: b LBB1_2
; CHECK-NEXT: LBB1_1: ; %loop.latch
; CHECK-NEXT: ; in Loop: Header=BB1_2 Depth=1
-; CHECK-NEXT: add x8, x8, #16
+; CHECK-NEXT: subs x8, x8, #16
; CHECK-NEXT: add x1, x1, #64
-; CHECK-NEXT: cmp x8, #128
+; CHECK-NEXT: add x0, x0, #16
; CHECK-NEXT: b.eq LBB1_4
; CHECK-NEXT: LBB1_2: ; %loop
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
; CHECK-NEXT: tbz w2, #0, LBB1_1
; CHECK-NEXT: ; %bb.3: ; %then
; CHECK-NEXT: ; in Loop: Header=BB1_2 Depth=1
-; CHECK-NEXT: ldr q0, [x0, x8]
+; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ushll2.8h v1, v0, #0
; CHECK-NEXT: ushll.8h v0, v0, #0
; CHECK-NEXT: ushll2.4s v2, v1, #0
@@ -267,23 +264,22 @@ define void @zext_v16i8_to_v16i32_in_loop_not_header(ptr %src, ptr %dst, i1 %c)
;
; CHECK-BE-LABEL: zext_v16i8_to_v16i32_in_loop_not_header:
; CHECK-BE: // %bb.0: // %entry
-; CHECK-BE-NEXT: mov x8, xzr
+; CHECK-BE-NEXT: mov w8, #128 // =0x80
; CHECK-BE-NEXT: b .LBB1_2
; CHECK-BE-NEXT: .LBB1_1: // %loop.latch
; CHECK-BE-NEXT: // in Loop: Header=BB1_2 Depth=1
-; CHECK-BE-NEXT: add x8, x8, #16
+; CHECK-BE-NEXT: subs x8, x8, #16
; CHECK-BE-NEXT: add x1, x1, #64
-; CHECK-BE-NEXT: cmp x8, #128
+; CHECK-BE-NEXT: add x0, x0, #16
; CHECK-BE-NEXT: b.eq .LBB1_4
; CHECK-BE-NEXT: .LBB1_2: // %loop
; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-BE-NEXT: tbz w2, #0, .LBB1_1
; CHECK-BE-NEXT: // %bb.3: // %then
; CHECK-BE-NEXT: // in Loop: Header=BB1_2 Depth=1
-; CHECK-BE-NEXT: add x9, x0, x8
-; CHECK-BE-NEXT: add x10, x1, #32
-; CHECK-BE-NEXT: ld1 { v0.16b }, [x9]
+; CHECK-BE-NEXT: ld1 { v0.16b }, [x0]
; CHECK-BE-NEXT: add x9, x1, #48
+; CHECK-BE-NEXT: add x10, x1, #32
; CHECK-BE-NEXT: ushll2 v1.8h, v0.16b, #0
; CHECK-BE-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-BE-NEXT: ushll2 v2.4s, v1.8h, #0
@@ -366,12 +362,11 @@ entry:
define void @zext_v16i8_to_v16i32_in_loop_optsize(ptr %src, ptr %dst) optsize {
; CHECK-LABEL: zext_v16i8_to_v16i32_in_loop_optsize:
; CHECK: ; %bb.0: ; %entry
-; CHECK-NEXT: mov x8, xzr
+; CHECK-NEXT: mov w8, #128 ; =0x80
; CHECK-NEXT: LBB3_1: ; %loop
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: ldr q0, [x0, x8]
-; CHECK-NEXT: add x8, x8, #16
-; CHECK-NEXT: cmp x8, #128
+; CHECK-NEXT: ldr q0, [x0], #16
+; CHECK-NEXT: subs x8, x8, #16
; CHECK-NEXT: ushll2.8h v1, v0, #0
; CHECK-NEXT: ushll.8h v0, v0, #0
; CHECK-NEXT: ushll2.4s v2, v1, #0
@@ -386,14 +381,12 @@ define void @zext_v16i8_to_v16i32_in_loop_optsize(ptr %src, ptr %dst) optsize {
;
; CHECK-BE-LABEL: zext_v16i8_to_v16i32_in_loop_optsize:
; CHECK-BE: // %bb.0: // %entry
-; CHECK-BE-NEXT: mov x8, xzr
+; CHECK-BE-NEXT: mov w8, #128 // =0x80
; CHECK-BE-NEXT: .LBB3_1: // %loop
; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-BE-NEXT: add x9, x0, x8
-; CHECK-BE-NEXT: add x8, x8, #16
-; CHECK-BE-NEXT: ld1 { v0.16b }, [x9]
+; CHECK-BE-NEXT: ld1 { v0.16b }, [x0], #16
; CHECK-BE-NEXT: add x9, x1, #48
-; CHECK-BE-NEXT: cmp x8, #128
+; CHECK-BE-NEXT: subs x8, x8, #16
; CHECK-BE-NEXT: ushll2 v1.8h, v0.16b, #0
; CHECK-BE-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-BE-NEXT: ushll2 v2.4s, v1.8h, #0
@@ -432,12 +425,11 @@ exit:
define void @zext_v16i8_to_v16i32_in_loop_minsize(ptr %src, ptr %dst) minsize {
; CHECK-LABEL: zext_v16i8_to_v16i32_in_loop_minsize:
; CHECK: ; %bb.0: ; %entry
-; CHECK-NEXT: mov x8, xzr
+; CHECK-NEXT: mov w8, #128 ; =0x80
; CHECK-NEXT: LBB4_1: ; %loop
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: ldr q0, [x0, x8]
-; CHECK-NEXT: add x8, x8, #16
-; CHECK-NEXT: cmp x8, #128
+; CHECK-NEXT: ldr q0, [x0], #16
+; CHECK-NEXT: subs x8, x8, #16
; CHECK-NEXT: ushll2.8h v1, v0, #0
; CHECK-NEXT: ushll.8h v0, v0, #0
; CHECK-NEXT: ushll2.4s v2, v1, #0
@@ -452,14 +444,12 @@ define void @zext_v16i8_to_v16i32_in_loop_minsize(ptr %src, ptr %dst) minsize {
;
; CHECK-BE-LABEL: zext_v16i8_to_v16i32_in_loop_minsize:
; CHECK-BE: // %bb.0: // %entry
-; CHECK-BE-NEXT: mov x8, xzr
+; CHECK-BE-NEXT: mov w8, #128 // =0x80
; CHECK-BE-NEXT: .LBB4_1: // %loop
; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-BE-NEXT: add x9, x0, x8
-; CHECK-BE-NEXT: add x8, x8, #16
-; CHECK-BE-NEXT: ld1 { v0.16b }, [x9]
+; CHECK-BE-NEXT: ld1 { v0.16b }, [x0], #16
; CHECK-BE-NEXT: add x9, x1, #48
-; CHECK-BE-NEXT: cmp x8, #128
+; CHECK-BE-NEXT: subs x8, x8, #16
; CHECK-BE-NEXT: ushll2 v1.8h, v0.16b, #0
; CHECK-BE-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-BE-NEXT: ushll2 v2.4s, v1.8h, #0
@@ -497,12 +487,11 @@ exit:
define void @zext_v16i8_to_v16i16_in_loop(ptr %src, ptr %dst) {
; CHECK-LABEL: zext_v16i8_to_v16i16_in_loop:
; CHECK: ; %bb.0: ; %entry
-; CHECK-NEXT: mov x8, xzr
+; CHECK-NEXT: mov w8, #128 ; =0x80
; CHECK-NEXT: LBB5_1: ; %loop
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: ldr q0, [x0, x8]
-; CHECK-NEXT: add x8, x8, #16
-; CHECK-NEXT: cmp x8, #128
+; CHECK-NEXT: ldr q0, [x0], #16
+; CHECK-NEXT: subs x8, x8, #16
; CHECK-NEXT: ushll2.8h v1, v0, #0
; CHECK-NEXT: ushll.8h v0, v0, #0
; CHECK-NEXT: stp q0, q1, [x1], #32
@@ -512,14 +501,12 @@ define void @zext_v16i8_to_v16i16_in_loop(ptr %src, ptr %dst) {
;
; CHECK-BE-LABEL: zext_v16i8_to_v16i16_in_loop:
; CHECK-BE: // %bb.0: // %entry
-; CHECK-BE-NEXT: mov x8, xzr
+; CHECK-BE-NEXT: mov w8, #128 // =0x80
; CHECK-BE-NEXT: .LBB5_1: // %loop
; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-BE-NEXT: add x9, x0, x8
-; CHECK-BE-NEXT: add x8, x8, #16
-; CHECK-BE-NEXT: ld1 { v0.16b }, [x9]
+; CHECK-BE-NEXT: ld1 { v0.16b }, [x0], #16
; CHECK-BE-NEXT: add x9, x1, #16
-; CHECK-BE-NEXT: cmp x8, #128
+; CHECK-BE-NEXT: subs x8, x8, #16
; CHECK-BE-NEXT: ushll2 v1.8h, v0.16b, #0
; CHECK-BE-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-BE-NEXT: st1 { v0.8h }, [x1]
@@ -629,12 +616,11 @@ define void @zext_v8i8_to_v8i32_in_loop(ptr %src, ptr %dst) {
; CHECK-NEXT: ldr q0, [x8, lCPI6_0 at PAGEOFF]
; CHECK-NEXT: Lloh11:
; CHECK-NEXT: ldr q1, [x9, lCPI6_1 at PAGEOFF]
-; CHECK-NEXT: mov x8, xzr
+; CHECK-NEXT: mov w8, #128 ; =0x80
; CHECK-NEXT: LBB6_1: ; %loop
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: ldr d2, [x0, x8]
-; CHECK-NEXT: add x8, x8, #16
-; CHECK-NEXT: cmp x8, #128
+; CHECK-NEXT: ldr d2, [x0], #16
+; CHECK-NEXT: subs x8, x8, #16
; CHECK-NEXT: tbl.16b v3, { v2 }, v1
; CHECK-NEXT: tbl.16b v2, { v2 }, v0
; CHECK-NEXT: stp q2, q3, [x1], #64
@@ -652,14 +638,13 @@ define void @zext_v8i8_to_v8i32_in_loop(ptr %src, ptr %dst) {
; CHECK-BE-NEXT: adrp x8, .LCPI6_1
; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI6_1
; CHECK-BE-NEXT: ld1 { v1.16b }, [x8]
-; CHECK-BE-NEXT: mov x8, xzr
+; CHECK-BE-NEXT: mov w8, #128 // =0x80
; CHECK-BE-NEXT: .LBB6_1: // %loop
; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-BE-NEXT: add x9, x0, x8
-; CHECK-BE-NEXT: add x8, x8, #16
-; CHECK-BE-NEXT: ld1 { v2.8b }, [x9]
+; CHECK-BE-NEXT: ld1 { v2.8b }, [x0]
; CHECK-BE-NEXT: add x9, x1, #16
-; CHECK-BE-NEXT: cmp x8, #128
+; CHECK-BE-NEXT: subs x8, x8, #16
+; CHECK-BE-NEXT: add x0, x0, #16
; CHECK-BE-NEXT: tbl v3.16b, { v2.16b }, v1.16b
; CHECK-BE-NEXT: tbl v2.16b, { v2.16b }, v0.16b
; CHECK-BE-NEXT: st1 { v2.16b }, [x1]
@@ -689,12 +674,11 @@ exit:
define void @zext_v16i8_to_v16i64_in_loop(ptr %src, ptr %dst) {
; CHECK-LABEL: zext_v16i8_to_v16i64_in_loop:
; CHECK: ; %bb.0: ; %entry
-; CHECK-NEXT: mov x8, xzr
+; CHECK-NEXT: mov w8, #128 ; =0x80
; CHECK-NEXT: LBB7_1: ; %loop
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: ldr q0, [x0, x8]
-; CHECK-NEXT: add x8, x8, #16
-; CHECK-NEXT: cmp x8, #128
+; CHECK-NEXT: ldr q0, [x0], #16
+; CHECK-NEXT: subs x8, x8, #16
; CHECK-NEXT: ushll2.8h v1, v0, #0
; CHECK-NEXT: ushll.8h v0, v0, #0
; CHECK-NEXT: ushll2.4s v2, v1, #0
@@ -719,14 +703,12 @@ define void @zext_v16i8_to_v16i64_in_loop(ptr %src, ptr %dst) {
;
; CHECK-BE-LABEL: zext_v16i8_to_v16i64_in_loop:
; CHECK-BE: // %bb.0: // %entry
-; CHECK-BE-NEXT: mov x8, xzr
+; CHECK-BE-NEXT: mov w8, #128 // =0x80
; CHECK-BE-NEXT: .LBB7_1: // %loop
; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-BE-NEXT: add x9, x0, x8
-; CHECK-BE-NEXT: add x8, x8, #16
-; CHECK-BE-NEXT: ld1 { v0.16b }, [x9]
+; CHECK-BE-NEXT: ld1 { v0.16b }, [x0], #16
; CHECK-BE-NEXT: add x9, x1, #112
-; CHECK-BE-NEXT: cmp x8, #128
+; CHECK-BE-NEXT: subs x8, x8, #16
; CHECK-BE-NEXT: ushll2 v1.8h, v0.16b, #0
; CHECK-BE-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-BE-NEXT: ushll2 v2.4s, v1.8h, #0
@@ -782,12 +764,11 @@ exit:
define void @zext_v8i8_to_v8i64_in_loop(ptr %src, ptr %dst) {
; CHECK-LABEL: zext_v8i8_to_v8i64_in_loop:
; CHECK: ; %bb.0: ; %entry
-; CHECK-NEXT: mov x8, xzr
+; CHECK-NEXT: mov w8, #128 ; =0x80
; CHECK-NEXT: LBB8_1: ; %loop
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: ldr d0, [x0, x8]
-; CHECK-NEXT: add x8, x8, #16
-; CHECK-NEXT: cmp x8, #128
+; CHECK-NEXT: ldr d0, [x0], #16
+; CHECK-NEXT: subs x8, x8, #16
; CHECK-NEXT: ushll.8h v0, v0, #0
; CHECK-NEXT: ushll2.4s v1, v0, #0
; CHECK-NEXT: ushll.4s v0, v0, #0
@@ -803,14 +784,13 @@ define void @zext_v8i8_to_v8i64_in_loop(ptr %src, ptr %dst) {
;
; CHECK-BE-LABEL: zext_v8i8_to_v8i64_in_loop:
; CHECK-BE: // %bb.0: // %entry
-; CHECK-BE-NEXT: mov x8, xzr
+; CHECK-BE-NEXT: mov w8, #128 // =0x80
; CHECK-BE-NEXT: .LBB8_1: // %loop
; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-BE-NEXT: add x9, x0, x8
-; CHECK-BE-NEXT: add x8, x8, #16
-; CHECK-BE-NEXT: ld1 { v0.8b }, [x9]
+; CHECK-BE-NEXT: ld1 { v0.8b }, [x0]
; CHECK-BE-NEXT: add x9, x1, #48
-; CHECK-BE-NEXT: cmp x8, #128
+; CHECK-BE-NEXT: subs x8, x8, #16
+; CHECK-BE-NEXT: add x0, x0, #16
; CHECK-BE-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-BE-NEXT: ushll2 v1.4s, v0.8h, #0
; CHECK-BE-NEXT: ushll v0.4s, v0.4h, #0
@@ -851,12 +831,11 @@ exit:
define void @zext_v8i8_to_v8i16_in_loop(ptr %src, ptr %dst) {
; CHECK-LABEL: zext_v8i8_to_v8i16_in_loop:
; CHECK: ; %bb.0: ; %entry
-; CHECK-NEXT: mov x8, xzr
+; CHECK-NEXT: mov w8, #128 ; =0x80
; CHECK-NEXT: LBB9_1: ; %loop
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: ldr d0, [x0, x8]
-; CHECK-NEXT: add x8, x8, #16
-; CHECK-NEXT: cmp x8, #128
+; CHECK-NEXT: ldr d0, [x0], #16
+; CHECK-NEXT: subs x8, x8, #16
; CHECK-NEXT: ushll.8h v0, v0, #0
; CHECK-NEXT: str q0, [x1], #32
; CHECK-NEXT: b.ne LBB9_1
@@ -865,13 +844,12 @@ define void @zext_v8i8_to_v8i16_in_loop(ptr %src, ptr %dst) {
;
; CHECK-BE-LABEL: zext_v8i8_to_v8i16_in_loop:
; CHECK-BE: // %bb.0: // %entry
-; CHECK-BE-NEXT: mov x8, xzr
+; CHECK-BE-NEXT: mov w8, #128 // =0x80
; CHECK-BE-NEXT: .LBB9_1: // %loop
; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-BE-NEXT: add x9, x0, x8
-; CHECK-BE-NEXT: add x8, x8, #16
-; CHECK-BE-NEXT: ld1 { v0.8b }, [x9]
-; CHECK-BE-NEXT: cmp x8, #128
+; CHECK-BE-NEXT: ld1 { v0.8b }, [x0]
+; CHECK-BE-NEXT: subs x8, x8, #16
+; CHECK-BE-NEXT: add x0, x0, #16
; CHECK-BE-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-BE-NEXT: st1 { v0.8h }, [x1]
; CHECK-BE-NEXT: add x1, x1, #32
@@ -902,12 +880,11 @@ exit:
define void @zext_v8i8_to_v8i20_in_loop(ptr %src, ptr %dst) {
; CHECK-LABEL: zext_v8i8_to_v8i20_in_loop:
; CHECK: ; %bb.0: ; %entry
-; CHECK-NEXT: mov x8, xzr
+; CHECK-NEXT: mov w8, #128 ; =0x80
; CHECK-NEXT: LBB10_1: ; %loop
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: ldr d0, [x0, x8]
-; CHECK-NEXT: add x8, x8, #16
-; CHECK-NEXT: cmp x8, #128
+; CHECK-NEXT: ldr d0, [x0], #16
+; CHECK-NEXT: subs x8, x8, #16
; CHECK-NEXT: ushll.8h v0, v0, #0
; CHECK-NEXT: ushll2.4s v1, v0, #0
; CHECK-NEXT: ushll.4s v0, v0, #0
@@ -937,13 +914,12 @@ define void @zext_v8i8_to_v8i20_in_loop(ptr %src, ptr %dst) {
;
; CHECK-BE-LABEL: zext_v8i8_to_v8i20_in_loop:
; CHECK-BE: // %bb.0: // %entry
-; CHECK-BE-NEXT: mov x8, xzr
+; CHECK-BE-NEXT: mov w8, #128 // =0x80
; CHECK-BE-NEXT: .LBB10_1: // %loop
; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-BE-NEXT: add x9, x0, x8
-; CHECK-BE-NEXT: add x8, x8, #16
-; CHECK-BE-NEXT: ld1 { v0.8b }, [x9]
-; CHECK-BE-NEXT: cmp x8, #128
+; CHECK-BE-NEXT: ld1 { v0.8b }, [x0]
+; CHECK-BE-NEXT: subs x8, x8, #16
+; CHECK-BE-NEXT: add x0, x0, #16
; CHECK-BE-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-BE-NEXT: ushll2 v1.4s, v0.8h, #0
; CHECK-BE-NEXT: ushll v0.4s, v0.4h, #0
@@ -997,12 +973,11 @@ exit:
define void @zext_v4i8_to_v4i32_in_loop(ptr %src, ptr %dst) {
; CHECK-LABEL: zext_v4i8_to_v4i32_in_loop:
; CHECK: ; %bb.0: ; %entry
-; CHECK-NEXT: mov x8, xzr
+; CHECK-NEXT: mov w8, #128 ; =0x80
; CHECK-NEXT: LBB11_1: ; %loop
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: ldr s0, [x0, x8]
-; CHECK-NEXT: add x8, x8, #16
-; CHECK-NEXT: cmp x8, #128
+; CHECK-NEXT: ldr s0, [x0], #16
+; CHECK-NEXT: subs x8, x8, #16
; CHECK-NEXT: ushll.8h v0, v0, #0
; CHECK-NEXT: ushll.4s v0, v0, #0
; CHECK-NEXT: str q0, [x1], #64
@@ -1015,12 +990,11 @@ define void @zext_v4i8_to_v4i32_in_loop(ptr %src, ptr %dst) {
; CHECK-BE-NEXT: adrp x8, .LCPI11_0
; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI11_0
; CHECK-BE-NEXT: ld1 { v0.16b }, [x8]
-; CHECK-BE-NEXT: mov x8, xzr
+; CHECK-BE-NEXT: mov w8, #128 // =0x80
; CHECK-BE-NEXT: .LBB11_1: // %loop
; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-BE-NEXT: ldr s1, [x0, x8]
-; CHECK-BE-NEXT: add x8, x8, #16
-; CHECK-BE-NEXT: cmp x8, #128
+; CHECK-BE-NEXT: ldr s1, [x0], #16
+; CHECK-BE-NEXT: subs x8, x8, #16
; CHECK-BE-NEXT: rev32 v1.16b, v1.16b
; CHECK-BE-NEXT: tbl v1.16b, { v1.16b }, v0.16b
; CHECK-BE-NEXT: st1 { v1.16b }, [x1]
@@ -1167,12 +1141,11 @@ define void @zext_v12i8_to_v12i32_in_loop(ptr %src, ptr %dst) {
; CHECK-NEXT: ldr q1, [x9, lCPI12_1 at PAGEOFF]
; CHECK-NEXT: Lloh17:
; CHECK-NEXT: ldr q2, [x10, lCPI12_2 at PAGEOFF]
-; CHECK-NEXT: mov x8, xzr
+; CHECK-NEXT: mov w8, #128 ; =0x80
; CHECK-NEXT: LBB12_1: ; %loop
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: ldr q3, [x0, x8]
-; CHECK-NEXT: add x8, x8, #16
-; CHECK-NEXT: cmp x8, #128
+; CHECK-NEXT: ldr q3, [x0], #16
+; CHECK-NEXT: subs x8, x8, #16
; CHECK-NEXT: tbl.16b v4, { v3 }, v2
; CHECK-NEXT: tbl.16b v5, { v3 }, v1
; CHECK-NEXT: tbl.16b v3, { v3 }, v0
@@ -1196,15 +1169,13 @@ define void @zext_v12i8_to_v12i32_in_loop(ptr %src, ptr %dst) {
; CHECK-BE-NEXT: adrp x8, .LCPI12_2
; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI12_2
; CHECK-BE-NEXT: ld1 { v2.16b }, [x8]
-; CHECK-BE-NEXT: mov x8, xzr
+; CHECK-BE-NEXT: mov w8, #128 // =0x80
; CHECK-BE-NEXT: .LBB12_1: // %loop
; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-BE-NEXT: add x9, x0, x8
-; CHECK-BE-NEXT: add x8, x8, #16
-; CHECK-BE-NEXT: add x10, x1, #16
-; CHECK-BE-NEXT: ld1 { v3.16b }, [x9]
+; CHECK-BE-NEXT: ld1 { v3.16b }, [x0], #16
; CHECK-BE-NEXT: add x9, x1, #32
-; CHECK-BE-NEXT: cmp x8, #128
+; CHECK-BE-NEXT: add x10, x1, #16
+; CHECK-BE-NEXT: subs x8, x8, #16
; CHECK-BE-NEXT: tbl v4.16b, { v3.16b }, v2.16b
; CHECK-BE-NEXT: tbl v5.16b, { v3.16b }, v1.16b
; CHECK-BE-NEXT: tbl v3.16b, { v3.16b }, v0.16b
@@ -1239,12 +1210,11 @@ define void @zext_v16i4_to_v16i32_in_loop(ptr %src, ptr %dst) {
; CHECK-LABEL: zext_v16i4_to_v16i32_in_loop:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: movi.4s v0, #15
-; CHECK-NEXT: mov x8, xzr
+; CHECK-NEXT: mov w8, #128 ; =0x80
; CHECK-NEXT: LBB13_1: ; %loop
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: ldr x9, [x0, x8]
-; CHECK-NEXT: add x8, x8, #16
-; CHECK-NEXT: cmp x8, #128
+; CHECK-NEXT: ldr x9, [x0], #16
+; CHECK-NEXT: subs x8, x8, #16
; CHECK-NEXT: ubfx x12, x9, #48, #4
; CHECK-NEXT: lsr x10, x9, #52
; CHECK-NEXT: ubfx x13, x9, #32, #4
@@ -1294,12 +1264,11 @@ define void @zext_v16i4_to_v16i32_in_loop(ptr %src, ptr %dst) {
; CHECK-BE-LABEL: zext_v16i4_to_v16i32_in_loop:
; CHECK-BE: // %bb.0: // %entry
; CHECK-BE-NEXT: movi v0.4s, #15
-; CHECK-BE-NEXT: mov x8, xzr
+; CHECK-BE-NEXT: mov w8, #128 // =0x80
; CHECK-BE-NEXT: .LBB13_1: // %loop
; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-BE-NEXT: ldr x9, [x0, x8]
-; CHECK-BE-NEXT: add x8, x8, #16
-; CHECK-BE-NEXT: cmp x8, #128
+; CHECK-BE-NEXT: ldr x9, [x0], #16
+; CHECK-BE-NEXT: subs x8, x8, #16
; CHECK-BE-NEXT: ubfx w11, w9, #12, #4
; CHECK-BE-NEXT: lsr w14, w9, #28
; CHECK-BE-NEXT: lsr w10, w9, #8
@@ -1373,13 +1342,11 @@ exit:
define void @zext_v16i16_to_v16i64_in_loop(ptr %src, ptr %dst) {
; CHECK-LABEL: zext_v16i16_to_v16i64_in_loop:
; CHECK: ; %bb.0: ; %entry
-; CHECK-NEXT: mov x8, xzr
+; CHECK-NEXT: mov w8, #128 ; =0x80
; CHECK-NEXT: LBB14_1: ; %loop
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: add x9, x0, x8
-; CHECK-NEXT: add x8, x8, #32
-; CHECK-NEXT: ldp q1, q0, [x9]
-; CHECK-NEXT: cmp x8, #256
+; CHECK-NEXT: ldp q1, q0, [x0], #32
+; CHECK-NEXT: subs x8, x8, #16
; CHECK-NEXT: ushll2.4s v2, v0, #0
; CHECK-NEXT: ushll2.4s v3, v1, #0
; CHECK-NEXT: ushll.4s v0, v0, #0
@@ -1402,16 +1369,15 @@ define void @zext_v16i16_to_v16i64_in_loop(ptr %src, ptr %dst) {
;
; CHECK-BE-LABEL: zext_v16i16_to_v16i64_in_loop:
; CHECK-BE: // %bb.0: // %entry
-; CHECK-BE-NEXT: mov x8, xzr
+; CHECK-BE-NEXT: mov w8, #128 // =0x80
; CHECK-BE-NEXT: .LBB14_1: // %loop
; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-BE-NEXT: add x9, x0, x8
-; CHECK-BE-NEXT: add x8, x8, #32
-; CHECK-BE-NEXT: ld1 { v0.8h }, [x9]
-; CHECK-BE-NEXT: add x9, x9, #16
-; CHECK-BE-NEXT: cmp x8, #256
+; CHECK-BE-NEXT: ld1 { v0.8h }, [x0]
+; CHECK-BE-NEXT: add x9, x0, #16
+; CHECK-BE-NEXT: subs x8, x8, #16
; CHECK-BE-NEXT: ld1 { v1.8h }, [x9]
; CHECK-BE-NEXT: add x9, x1, #48
+; CHECK-BE-NEXT: add x0, x0, #32
; CHECK-BE-NEXT: ushll2 v2.4s, v0.8h, #0
; CHECK-BE-NEXT: ushll v0.4s, v0.4h, #0
; CHECK-BE-NEXT: ushll2 v3.4s, v1.8h, #0
@@ -1465,14 +1431,12 @@ exit:
define void @zext_v16i32_to_v16i64_in_loop(ptr %src, ptr %dst) {
; CHECK-LABEL: zext_v16i32_to_v16i64_in_loop:
; CHECK: ; %bb.0: ; %entry
-; CHECK-NEXT: mov x8, xzr
+; CHECK-NEXT: mov w8, #128 ; =0x80
; CHECK-NEXT: LBB15_1: ; %loop
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: add x9, x0, x8
-; CHECK-NEXT: add x8, x8, #64
-; CHECK-NEXT: ldp q1, q0, [x9, #32]
-; CHECK-NEXT: cmp x8, #512
-; CHECK-NEXT: ldp q5, q4, [x9]
+; CHECK-NEXT: ldp q1, q0, [x0, #32]
+; CHECK-NEXT: subs x8, x8, #16
+; CHECK-NEXT: ldp q5, q4, [x0], #64
; CHECK-NEXT: ushll2.2d v2, v0, #0
; CHECK-NEXT: ushll2.2d v3, v1, #0
; CHECK-NEXT: ushll.2d v0, v0, #0
@@ -1491,43 +1455,42 @@ define void @zext_v16i32_to_v16i64_in_loop(ptr %src, ptr %dst) {
;
; CHECK-BE-LABEL: zext_v16i32_to_v16i64_in_loop:
; CHECK-BE: // %bb.0: // %entry
-; CHECK-BE-NEXT: mov x8, xzr
+; CHECK-BE-NEXT: mov w8, #128 // =0x80
; CHECK-BE-NEXT: .LBB15_1: // %loop
; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-BE-NEXT: add x9, x0, x8
-; CHECK-BE-NEXT: add x8, x8, #64
-; CHECK-BE-NEXT: ld1 { v0.4s }, [x9]
-; CHECK-BE-NEXT: add x10, x9, #48
-; CHECK-BE-NEXT: cmp x8, #512
-; CHECK-BE-NEXT: ld1 { v1.4s }, [x10]
-; CHECK-BE-NEXT: add x10, x9, #32
-; CHECK-BE-NEXT: add x9, x9, #16
+; CHECK-BE-NEXT: ld1 { v0.4s }, [x0]
+; CHECK-BE-NEXT: add x9, x0, #48
+; CHECK-BE-NEXT: subs x8, x8, #16
+; CHECK-BE-NEXT: ld1 { v1.4s }, [x9]
+; CHECK-BE-NEXT: add x9, x0, #32
+; CHECK-BE-NEXT: ld1 { v2.4s }, [x9]
+; CHECK-BE-NEXT: add x9, x0, #16
+; CHECK-BE-NEXT: add x0, x0, #64
+; CHECK-BE-NEXT: ushll2 v3.2d, v0.4s, #0
; CHECK-BE-NEXT: ld1 { v4.4s }, [x9]
-; CHECK-BE-NEXT: ld1 { v2.4s }, [x10]
; CHECK-BE-NEXT: add x9, x1, #16
-; CHECK-BE-NEXT: ushll2 v3.2d, v0.4s, #0
-; CHECK-BE-NEXT: ushll v0.2d, v0.2s, #0
-; CHECK-BE-NEXT: add x10, x1, #80
; CHECK-BE-NEXT: ushll2 v5.2d, v1.4s, #0
+; CHECK-BE-NEXT: ushll v0.2d, v0.2s, #0
+; CHECK-BE-NEXT: ushll v1.2d, v1.2s, #0
; CHECK-BE-NEXT: ushll2 v6.2d, v2.4s, #0
+; CHECK-BE-NEXT: ushll v2.2d, v2.2s, #0
; CHECK-BE-NEXT: st1 { v3.2d }, [x9]
-; CHECK-BE-NEXT: ushll2 v3.2d, v4.4s, #0
; CHECK-BE-NEXT: add x9, x1, #112
-; CHECK-BE-NEXT: st1 { v0.2d }, [x1]
-; CHECK-BE-NEXT: ushll v0.2d, v1.2s, #0
-; CHECK-BE-NEXT: ushll v1.2d, v2.2s, #0
+; CHECK-BE-NEXT: ushll2 v3.2d, v4.4s, #0
; CHECK-BE-NEXT: st1 { v5.2d }, [x9]
+; CHECK-BE-NEXT: add x9, x1, #80
+; CHECK-BE-NEXT: st1 { v6.2d }, [x9]
; CHECK-BE-NEXT: add x9, x1, #48
-; CHECK-BE-NEXT: ushll v2.2d, v4.2s, #0
+; CHECK-BE-NEXT: st1 { v0.2d }, [x1]
+; CHECK-BE-NEXT: ushll v0.2d, v4.2s, #0
; CHECK-BE-NEXT: st1 { v3.2d }, [x9]
-; CHECK-BE-NEXT: add x9, x1, #64
-; CHECK-BE-NEXT: st1 { v6.2d }, [x10]
-; CHECK-BE-NEXT: add x10, x1, #96
+; CHECK-BE-NEXT: add x9, x1, #96
; CHECK-BE-NEXT: st1 { v1.2d }, [x9]
+; CHECK-BE-NEXT: add x9, x1, #64
+; CHECK-BE-NEXT: st1 { v2.2d }, [x9]
; CHECK-BE-NEXT: add x9, x1, #32
; CHECK-BE-NEXT: add x1, x1, #128
-; CHECK-BE-NEXT: st1 { v0.2d }, [x10]
-; CHECK-BE-NEXT: st1 { v2.2d }, [x9]
+; CHECK-BE-NEXT: st1 { v0.2d }, [x9]
; CHECK-BE-NEXT: b.ne .LBB15_1
; CHECK-BE-NEXT: // %bb.2: // %exit
; CHECK-BE-NEXT: ret
@@ -1554,14 +1517,13 @@ exit:
define void @zext_v8i8_to_v8i128_in_loop(ptr %src, ptr %dst) {
; CHECK-LABEL: zext_v8i8_to_v8i128_in_loop:
; CHECK: ; %bb.0: ; %entry
-; CHECK-NEXT: mov x8, xzr
+; CHECK-NEXT: mov w8, #128 ; =0x80
; CHECK-NEXT: LBB16_1: ; %loop
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: ldr d0, [x0, x8]
-; CHECK-NEXT: add x8, x8, #16
+; CHECK-NEXT: ldr d0, [x0], #16
; CHECK-NEXT: str xzr, [x1, #120]
; CHECK-NEXT: str xzr, [x1, #104]
-; CHECK-NEXT: cmp x8, #128
+; CHECK-NEXT: subs x8, x8, #16
; CHECK-NEXT: ushll.8h v0, v0, #0
; CHECK-NEXT: str xzr, [x1, #88]
; CHECK-NEXT: str xzr, [x1, #72]
@@ -1594,15 +1556,14 @@ define void @zext_v8i8_to_v8i128_in_loop(ptr %src, ptr %dst) {
;
; CHECK-BE-LABEL: zext_v8i8_to_v8i128_in_loop:
; CHECK-BE: // %bb.0: // %entry
-; CHECK-BE-NEXT: mov x8, xzr
+; CHECK-BE-NEXT: mov w8, #128 // =0x80
; CHECK-BE-NEXT: .LBB16_1: // %loop
; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-BE-NEXT: add x9, x0, x8
-; CHECK-BE-NEXT: add x8, x8, #16
-; CHECK-BE-NEXT: ld1 { v0.8b }, [x9]
+; CHECK-BE-NEXT: ld1 { v0.8b }, [x0]
; CHECK-BE-NEXT: str xzr, [x1, #112]
-; CHECK-BE-NEXT: cmp x8, #128
+; CHECK-BE-NEXT: subs x8, x8, #16
; CHECK-BE-NEXT: str xzr, [x1, #96]
+; CHECK-BE-NEXT: add x0, x0, #16
; CHECK-BE-NEXT: str xzr, [x1, #80]
; CHECK-BE-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-BE-NEXT: str xzr, [x1, #64]
@@ -1660,20 +1621,19 @@ define void @zext_v8i8_to_v8i64_with_add_in_sequence_in_loop(ptr %src, ptr %dst)
; CHECK-NEXT: adrp x8, lCPI17_0 at PAGE
; CHECK-NEXT: Lloh19:
; CHECK-NEXT: adrp x9, lCPI17_1 at PAGE
-; CHECK-NEXT: mov w10, #128 ; =0x80
; CHECK-NEXT: Lloh20:
; CHECK-NEXT: ldr q0, [x8, lCPI17_0 at PAGEOFF]
; CHECK-NEXT: Lloh21:
; CHECK-NEXT: ldr q1, [x9, lCPI17_1 at PAGEOFF]
-; CHECK-NEXT: add x8, x1, #64
-; CHECK-NEXT: add x9, x0, #8
+; CHECK-NEXT: add x8, x0, #8
+; CHECK-NEXT: mov w9, #128 ; =0x80
; CHECK-NEXT: LBB17_1: ; %loop
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: ldp d2, d3, [x9, #-8]
-; CHECK-NEXT: subs x10, x10, #16
-; CHECK-NEXT: ldp q7, q5, [x8, #-32]
-; CHECK-NEXT: add x9, x9, #16
-; CHECK-NEXT: ldp q17, q6, [x8, #-64]
+; CHECK-NEXT: ldp d2, d3, [x8, #-8]
+; CHECK-NEXT: subs x9, x9, #16
+; CHECK-NEXT: ldp q7, q5, [x1, #32]
+; CHECK-NEXT: add x8, x8, #16
+; CHECK-NEXT: ldp q17, q6, [x1]
; CHECK-NEXT: tbl.16b v4, { v2 }, v1
; CHECK-NEXT: tbl.16b v2, { v2 }, v0
; CHECK-NEXT: tbl.16b v16, { v3 }, v1
@@ -1681,17 +1641,18 @@ define void @zext_v8i8_to_v8i64_with_add_in_sequence_in_loop(ptr %src, ptr %dst)
; CHECK-NEXT: uaddw2.2d v5, v5, v4
; CHECK-NEXT: uaddw2.2d v6, v6, v2
; CHECK-NEXT: uaddw.2d v4, v7, v4
-; CHECK-NEXT: ldp q18, q7, [x8, #32]
+; CHECK-NEXT: ldp q18, q7, [x1, #96]
; CHECK-NEXT: uaddw.2d v2, v17, v2
-; CHECK-NEXT: stp q4, q5, [x8, #-32]
+; CHECK-NEXT: stp q4, q5, [x1, #32]
; CHECK-NEXT: uaddw2.2d v5, v7, v16
-; CHECK-NEXT: stp q2, q6, [x8, #-64]
+; CHECK-NEXT: stp q2, q6, [x1]
; CHECK-NEXT: uaddw.2d v16, v18, v16
-; CHECK-NEXT: ldp q7, q6, [x8]
-; CHECK-NEXT: stp q16, q5, [x8, #32]
+; CHECK-NEXT: ldp q7, q6, [x1, #64]
+; CHECK-NEXT: stp q16, q5, [x1, #96]
; CHECK-NEXT: uaddw2.2d v4, v6, v3
; CHECK-NEXT: uaddw.2d v2, v7, v3
-; CHECK-NEXT: stp q2, q4, [x8], #128
+; CHECK-NEXT: stp q2, q4, [x1, #64]
+; CHECK-NEXT: add x1, x1, #128
; CHECK-NEXT: b.ne LBB17_1
; CHECK-NEXT: ; %bb.2: ; %exit
; CHECK-NEXT: ret
@@ -1707,63 +1668,62 @@ define void @zext_v8i8_to_v8i64_with_add_in_sequence_in_loop(ptr %src, ptr %dst)
; CHECK-BE-NEXT: adrp x9, .LCPI17_1
; CHECK-BE-NEXT: add x9, x9, :lo12:.LCPI17_1
; CHECK-BE-NEXT: ld1 { v1.16b }, [x9]
-; CHECK-BE-NEXT: add x9, x1, #64
-; CHECK-BE-NEXT: add x10, x0, #8
+; CHECK-BE-NEXT: add x9, x0, #8
; CHECK-BE-NEXT: .LBB17_1: // %loop
; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-BE-NEXT: ld1 { v2.8b }, [x10]
-; CHECK-BE-NEXT: sub x11, x10, #8
-; CHECK-BE-NEXT: add x15, x9, #32
-; CHECK-BE-NEXT: ld1 { v3.8b }, [x11]
-; CHECK-BE-NEXT: ld1 { v16.2d }, [x15]
-; CHECK-BE-NEXT: sub x11, x9, #64
-; CHECK-BE-NEXT: sub x12, x9, #32
-; CHECK-BE-NEXT: ld1 { v6.2d }, [x9]
-; CHECK-BE-NEXT: ld1 { v21.2d }, [x11]
+; CHECK-BE-NEXT: sub x10, x9, #8
+; CHECK-BE-NEXT: ld1 { v2.8b }, [x9]
+; CHECK-BE-NEXT: add x13, x1, #64
+; CHECK-BE-NEXT: ld1 { v3.8b }, [x10]
+; CHECK-BE-NEXT: add x10, x1, #32
+; CHECK-BE-NEXT: add x14, x1, #96
+; CHECK-BE-NEXT: ld1 { v5.2d }, [x1]
+; CHECK-BE-NEXT: ld1 { v16.2d }, [x14]
+; CHECK-BE-NEXT: ld1 { v19.2d }, [x13]
; CHECK-BE-NEXT: tbl v4.16b, { v2.16b }, v1.16b
; CHECK-BE-NEXT: tbl v2.16b, { v2.16b }, v0.16b
-; CHECK-BE-NEXT: ld1 { v19.2d }, [x12]
-; CHECK-BE-NEXT: tbl v5.16b, { v3.16b }, v1.16b
+; CHECK-BE-NEXT: ld1 { v21.2d }, [x10]
+; CHECK-BE-NEXT: tbl v6.16b, { v3.16b }, v1.16b
; CHECK-BE-NEXT: tbl v3.16b, { v3.16b }, v0.16b
-; CHECK-BE-NEXT: sub x13, x9, #16
-; CHECK-BE-NEXT: sub x14, x9, #48
-; CHECK-BE-NEXT: add x16, x9, #48
-; CHECK-BE-NEXT: add x17, x9, #16
-; CHECK-BE-NEXT: ld1 { v22.2d }, [x13]
+; CHECK-BE-NEXT: add x11, x1, #48
+; CHECK-BE-NEXT: add x12, x1, #16
+; CHECK-BE-NEXT: add x15, x1, #112
+; CHECK-BE-NEXT: add x16, x1, #80
+; CHECK-BE-NEXT: ld1 { v22.2d }, [x11]
; CHECK-BE-NEXT: subs x8, x8, #16
-; CHECK-BE-NEXT: add x10, x10, #16
+; CHECK-BE-NEXT: add x9, x9, #16
; CHECK-BE-NEXT: rev32 v7.8b, v4.8b
; CHECK-BE-NEXT: ext v4.16b, v4.16b, v4.16b, #8
; CHECK-BE-NEXT: rev32 v17.8b, v2.8b
-; CHECK-BE-NEXT: ext v18.16b, v5.16b, v5.16b, #8
+; CHECK-BE-NEXT: ext v18.16b, v6.16b, v6.16b, #8
; CHECK-BE-NEXT: ext v20.16b, v3.16b, v3.16b, #8
; CHECK-BE-NEXT: ext v2.16b, v2.16b, v2.16b, #8
-; CHECK-BE-NEXT: rev32 v5.8b, v5.8b
+; CHECK-BE-NEXT: rev32 v6.8b, v6.8b
; CHECK-BE-NEXT: rev32 v3.8b, v3.8b
-; CHECK-BE-NEXT: uaddw v7.2d, v16.2d, v7.2s
; CHECK-BE-NEXT: rev32 v4.8b, v4.8b
-; CHECK-BE-NEXT: uaddw v6.2d, v6.2d, v17.2s
-; CHECK-BE-NEXT: rev32 v17.8b, v18.8b
+; CHECK-BE-NEXT: uaddw v7.2d, v16.2d, v7.2s
+; CHECK-BE-NEXT: ld1 { v16.2d }, [x15]
+; CHECK-BE-NEXT: rev32 v18.8b, v18.8b
; CHECK-BE-NEXT: rev32 v20.8b, v20.8b
; CHECK-BE-NEXT: rev32 v2.8b, v2.8b
-; CHECK-BE-NEXT: ld1 { v16.2d }, [x16]
-; CHECK-BE-NEXT: ld1 { v18.2d }, [x14]
-; CHECK-BE-NEXT: uaddw v5.2d, v19.2d, v5.2s
-; CHECK-BE-NEXT: uaddw v3.2d, v21.2d, v3.2s
-; CHECK-BE-NEXT: st1 { v7.2d }, [x15]
-; CHECK-BE-NEXT: ld1 { v7.2d }, [x17]
-; CHECK-BE-NEXT: st1 { v6.2d }, [x9]
-; CHECK-BE-NEXT: add x9, x9, #128
+; CHECK-BE-NEXT: uaddw v17.2d, v19.2d, v17.2s
+; CHECK-BE-NEXT: ld1 { v19.2d }, [x12]
+; CHECK-BE-NEXT: uaddw v6.2d, v21.2d, v6.2s
+; CHECK-BE-NEXT: uaddw v3.2d, v5.2d, v3.2s
+; CHECK-BE-NEXT: ld1 { v5.2d }, [x16]
+; CHECK-BE-NEXT: st1 { v7.2d }, [x14]
; CHECK-BE-NEXT: uaddw v4.2d, v16.2d, v4.2s
-; CHECK-BE-NEXT: st1 { v5.2d }, [x12]
-; CHECK-BE-NEXT: uaddw v5.2d, v22.2d, v17.2s
-; CHECK-BE-NEXT: st1 { v3.2d }, [x11]
-; CHECK-BE-NEXT: uaddw v3.2d, v18.2d, v20.2s
-; CHECK-BE-NEXT: uaddw v2.2d, v7.2d, v2.2s
-; CHECK-BE-NEXT: st1 { v4.2d }, [x16]
-; CHECK-BE-NEXT: st1 { v5.2d }, [x13]
-; CHECK-BE-NEXT: st1 { v3.2d }, [x14]
-; CHECK-BE-NEXT: st1 { v2.2d }, [x17]
+; CHECK-BE-NEXT: st1 { v6.2d }, [x10]
+; CHECK-BE-NEXT: uaddw v6.2d, v22.2d, v18.2s
+; CHECK-BE-NEXT: st1 { v3.2d }, [x1]
+; CHECK-BE-NEXT: uaddw v3.2d, v19.2d, v20.2s
+; CHECK-BE-NEXT: uaddw v2.2d, v5.2d, v2.2s
+; CHECK-BE-NEXT: add x1, x1, #128
+; CHECK-BE-NEXT: st1 { v17.2d }, [x13]
+; CHECK-BE-NEXT: st1 { v4.2d }, [x15]
+; CHECK-BE-NEXT: st1 { v6.2d }, [x11]
+; CHECK-BE-NEXT: st1 { v3.2d }, [x12]
+; CHECK-BE-NEXT: st1 { v2.2d }, [x16]
; CHECK-BE-NEXT: b.ne .LBB17_1
; CHECK-BE-NEXT: // %bb.2: // %exit
; CHECK-BE-NEXT: ret
@@ -2176,13 +2136,12 @@ define void @zext_v20i8_to_v20i24_in_loop(ptr %src, ptr %dst) {
; CHECK-NEXT: ldr q2, [x10, lCPI20_2 at PAGEOFF]
; CHECK-NEXT: Lloh29:
; CHECK-NEXT: ldr q3, [x8, lCPI20_3 at PAGEOFF]
-; CHECK-NEXT: mov x8, xzr
+; CHECK-NEXT: mov w8, #128 ; =0x80
; CHECK-NEXT: LBB20_1: ; %loop
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: add x9, x0, x8
-; CHECK-NEXT: add x8, x8, #16
-; CHECK-NEXT: ldp q4, q5, [x9]
-; CHECK-NEXT: cmp x8, #128
+; CHECK-NEXT: ldr q4, [x0]
+; CHECK-NEXT: ldr q5, [x0, #16]!
+; CHECK-NEXT: subs x8, x8, #16
; CHECK-NEXT: tbl.16b v5, { v5 }, v0
; CHECK-NEXT: tbl.16b v6, { v4 }, v3
; CHECK-NEXT: tbl.16b v7, { v4 }, v2
@@ -2216,28 +2175,26 @@ define void @zext_v20i8_to_v20i24_in_loop(ptr %src, ptr %dst) {
; CHECK-BE-NEXT: adrp x8, .LCPI20_3
; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI20_3
; CHECK-BE-NEXT: ld1 { v3.16b }, [x8]
-; CHECK-BE-NEXT: mov x8, xzr
+; CHECK-BE-NEXT: mov w8, #128 // =0x80
; CHECK-BE-NEXT: .LBB20_1: // %loop
; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-BE-NEXT: add x9, x0, x8
-; CHECK-BE-NEXT: add x8, x8, #16
-; CHECK-BE-NEXT: add x10, x9, #16
-; CHECK-BE-NEXT: ld1 { v5.16b }, [x9]
+; CHECK-BE-NEXT: ld1 { v4.16b }, [x0]
+; CHECK-BE-NEXT: add x0, x0, #16
; CHECK-BE-NEXT: add x9, x1, #32
-; CHECK-BE-NEXT: ld1 { v4.16b }, [x10]
-; CHECK-BE-NEXT: cmp x8, #128
-; CHECK-BE-NEXT: tbl v6.16b, { v5.16b }, v3.16b
-; CHECK-BE-NEXT: tbl v16.16b, { v5.16b }, v2.16b
-; CHECK-BE-NEXT: tbl v5.16b, { v5.16b }, v1.16b
-; CHECK-BE-NEXT: tbl v4.16b, { v4.16b }, v0.16b
+; CHECK-BE-NEXT: ld1 { v5.16b }, [x0]
+; CHECK-BE-NEXT: subs x8, x8, #16
+; CHECK-BE-NEXT: tbl v6.16b, { v4.16b }, v3.16b
+; CHECK-BE-NEXT: tbl v16.16b, { v4.16b }, v2.16b
+; CHECK-BE-NEXT: tbl v4.16b, { v4.16b }, v1.16b
+; CHECK-BE-NEXT: tbl v5.16b, { v5.16b }, v0.16b
; CHECK-BE-NEXT: st1 { v6.16b }, [x9]
; CHECK-BE-NEXT: add x9, x1, #16
-; CHECK-BE-NEXT: rev32 v7.16b, v4.16b
-; CHECK-BE-NEXT: rev64 v4.16b, v4.16b
-; CHECK-BE-NEXT: st1 { v5.16b }, [x1]
+; CHECK-BE-NEXT: rev32 v7.16b, v5.16b
+; CHECK-BE-NEXT: rev64 v5.16b, v5.16b
+; CHECK-BE-NEXT: st1 { v4.16b }, [x1]
; CHECK-BE-NEXT: st1 { v16.16b }, [x9]
; CHECK-BE-NEXT: mov s6, v7.s[2]
-; CHECK-BE-NEXT: str d4, [x1, #48]
+; CHECK-BE-NEXT: str d5, [x1, #48]
; CHECK-BE-NEXT: str s6, [x1, #56]
; CHECK-BE-NEXT: add x1, x1, #64
; CHECK-BE-NEXT: b.ne .LBB20_1
@@ -2512,29 +2469,28 @@ define void @zext_v23i8_to_v23i48_in_loop(ptr %src, ptr %dst) {
; CHECK-NEXT: ldr q4, [x9, lCPI21_4 at PAGEOFF]
; CHECK-NEXT: Lloh41:
; CHECK-NEXT: ldr q5, [x10, lCPI21_5 at PAGEOFF]
-; CHECK-NEXT: mov x8, xzr
+; CHECK-NEXT: mov w8, #128 ; =0x80
; CHECK-NEXT: LBB21_1: ; %loop
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: add x9, x0, x8
; CHECK-NEXT: movi.2d v19, #0000000000000000
-; CHECK-NEXT: add x8, x8, #16
-; CHECK-NEXT: ldp q7, q6, [x9]
-; CHECK-NEXT: cmp x8, #128
+; CHECK-NEXT: ldr q6, [x0]
+; CHECK-NEXT: subs x8, x8, #16
+; CHECK-NEXT: ldr q7, [x0, #16]!
; CHECK-NEXT: strh wzr, [x1, #136]
-; CHECK-NEXT: tbl.16b v16, { v6 }, v1
-; CHECK-NEXT: tbl.16b v17, { v6 }, v0
-; CHECK-NEXT: mov.b v19[4], v6[6]
-; CHECK-NEXT: tbl.16b v18, { v7 }, v5
-; CHECK-NEXT: tbl.16b v20, { v7 }, v4
-; CHECK-NEXT: tbl.16b v21, { v7 }, v3
+; CHECK-NEXT: tbl.16b v18, { v6 }, v5
+; CHECK-NEXT: tbl.16b v20, { v6 }, v4
+; CHECK-NEXT: tbl.16b v21, { v6 }, v3
+; CHECK-NEXT: tbl.16b v16, { v7 }, v1
+; CHECK-NEXT: tbl.16b v17, { v7 }, v0
+; CHECK-NEXT: mov.b v19[4], v7[6]
+; CHECK-NEXT: stp q20, q18, [x1, #64]
; CHECK-NEXT: stp q17, q16, [x1, #96]
-; CHECK-NEXT: tbl.16b v16, { v7 }, v2
-; CHECK-NEXT: tbl.16b v17, { v7 }, v1
-; CHECK-NEXT: tbl.16b v7, { v7 }, v0
+; CHECK-NEXT: tbl.16b v16, { v6 }, v2
+; CHECK-NEXT: tbl.16b v17, { v6 }, v1
+; CHECK-NEXT: tbl.16b v6, { v6 }, v0
; CHECK-NEXT: fmov x9, d19
-; CHECK-NEXT: stp q20, q18, [x1, #64]
; CHECK-NEXT: stp q16, q21, [x1, #32]
-; CHECK-NEXT: stp q7, q17, [x1]
+; CHECK-NEXT: stp q6, q17, [x1]
; CHECK-NEXT: str x9, [x1, #128]!
; CHECK-NEXT: b.ne LBB21_1
; CHECK-NEXT: ; %bb.2: ; %exit
@@ -2572,16 +2528,14 @@ define void @zext_v23i8_to_v23i48_in_loop(ptr %src, ptr %dst) {
; CHECK-BE-NEXT: adrp x8, .LCPI21_6
; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI21_6
; CHECK-BE-NEXT: ld1 { v6.16b }, [x8]
-; CHECK-BE-NEXT: mov x8, xzr
+; CHECK-BE-NEXT: mov w8, #128 // =0x80
; CHECK-BE-NEXT: .LBB21_1: // %loop
; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-BE-NEXT: add x9, x0, x8
-; CHECK-BE-NEXT: add x8, x8, #16
-; CHECK-BE-NEXT: ld1 { v7.16b }, [x9]
-; CHECK-BE-NEXT: add x9, x9, #16
-; CHECK-BE-NEXT: cmp x8, #128
-; CHECK-BE-NEXT: ld1 { v16.16b }, [x9]
+; CHECK-BE-NEXT: ld1 { v7.16b }, [x0]
+; CHECK-BE-NEXT: add x0, x0, #16
; CHECK-BE-NEXT: add x9, x1, #80
+; CHECK-BE-NEXT: ld1 { v16.16b }, [x0]
+; CHECK-BE-NEXT: subs x8, x8, #16
; CHECK-BE-NEXT: tbl v17.16b, { v7.16b }, v6.16b
; CHECK-BE-NEXT: tbl v18.16b, { v7.16b }, v5.16b
; CHECK-BE-NEXT: tbl v20.16b, { v7.16b }, v4.16b
@@ -2639,13 +2593,12 @@ exit:
define void @zext_v8i8_to_v8i33_in_loop(ptr %src, ptr %dst) {
; CHECK-LABEL: zext_v8i8_to_v8i33_in_loop:
; CHECK: ; %bb.0: ; %entry
-; CHECK-NEXT: mov x8, xzr
+; CHECK-NEXT: mov w8, #128 ; =0x80
; CHECK-NEXT: LBB22_1: ; %loop
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: ldr d0, [x0, x8]
-; CHECK-NEXT: add x8, x8, #16
+; CHECK-NEXT: ldr d0, [x0], #16
+; CHECK-NEXT: subs x8, x8, #16
; CHECK-NEXT: strb wzr, [x1, #32]
-; CHECK-NEXT: cmp x8, #128
; CHECK-NEXT: ushll.8h v0, v0, #0
; CHECK-NEXT: ushll2.4s v1, v0, #0
; CHECK-NEXT: ushll.4s v0, v0, #0
@@ -2676,13 +2629,12 @@ define void @zext_v8i8_to_v8i33_in_loop(ptr %src, ptr %dst) {
;
; CHECK-BE-LABEL: zext_v8i8_to_v8i33_in_loop:
; CHECK-BE: // %bb.0: // %entry
-; CHECK-BE-NEXT: mov x8, xzr
+; CHECK-BE-NEXT: mov w8, #128 // =0x80
; CHECK-BE-NEXT: .LBB22_1: // %loop
; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-BE-NEXT: add x9, x0, x8
-; CHECK-BE-NEXT: add x8, x8, #16
-; CHECK-BE-NEXT: ld1 { v0.8b }, [x9]
-; CHECK-BE-NEXT: cmp x8, #128
+; CHECK-BE-NEXT: ld1 { v0.8b }, [x0]
+; CHECK-BE-NEXT: subs x8, x8, #16
+; CHECK-BE-NEXT: add x0, x0, #16
; CHECK-BE-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-BE-NEXT: ushll2 v1.4s, v0.8h, #0
; CHECK-BE-NEXT: ushll v0.4s, v0.4h, #0
diff --git a/llvm/test/CodeGen/ARM/ParallelDSP/multi-use-loads.ll b/llvm/test/CodeGen/ARM/ParallelDSP/multi-use-loads.ll
index e45985136cf34..25f0140055b77 100644
--- a/llvm/test/CodeGen/ARM/ParallelDSP/multi-use-loads.ll
+++ b/llvm/test/CodeGen/ARM/ParallelDSP/multi-use-loads.ll
@@ -10,17 +10,17 @@ define i32 @add_user(i32 %arg, ptr nocapture readnone %arg1, ptr nocapture reado
; CHECK-LE-NEXT: @ %bb.1: @ %for.body.preheader
; CHECK-LE-NEXT: .save {r4, lr}
; CHECK-LE-NEXT: push {r4, lr}
-; CHECK-LE-NEXT: sub.w lr, r3, #2
-; CHECK-LE-NEXT: subs r2, #2
+; CHECK-LE-NEXT: adds r3, #2
; CHECK-LE-NEXT: mov.w r12, #0
; CHECK-LE-NEXT: movs r1, #0
; CHECK-LE-NEXT: .LBB0_2: @ %for.body
; CHECK-LE-NEXT: @ =>This Inner Loop Header: Depth=1
-; CHECK-LE-NEXT: ldr r3, [lr, #2]!
+; CHECK-LE-NEXT: ldr lr, [r3, #-2]
+; CHECK-LE-NEXT: adds r3, #2
+; CHECK-LE-NEXT: ldr r4, [r2], #2
; CHECK-LE-NEXT: subs r0, #1
-; CHECK-LE-NEXT: ldr r4, [r2, #2]!
-; CHECK-LE-NEXT: sxtah r1, r1, r3
-; CHECK-LE-NEXT: smlad r12, r4, r3, r12
+; CHECK-LE-NEXT: sxtah r1, r1, lr
+; CHECK-LE-NEXT: smlad r12, r4, lr, r12
; CHECK-LE-NEXT: bne .LBB0_2
; CHECK-LE-NEXT: @ %bb.3:
; CHECK-LE-NEXT: pop.w {r4, lr}
@@ -39,19 +39,18 @@ define i32 @add_user(i32 %arg, ptr nocapture readnone %arg1, ptr nocapture reado
; CHECK-BE-NEXT: @ %bb.1: @ %for.body.preheader
; CHECK-BE-NEXT: .save {r4, r5, r7, lr}
; CHECK-BE-NEXT: push {r4, r5, r7, lr}
-; CHECK-BE-NEXT: subs r3, #2
-; CHECK-BE-NEXT: subs r2, #2
+; CHECK-BE-NEXT: adds r3, #2
; CHECK-BE-NEXT: mov.w r12, #0
; CHECK-BE-NEXT: movs r1, #0
; CHECK-BE-NEXT: .LBB0_2: @ %for.body
; CHECK-BE-NEXT: @ =>This Inner Loop Header: Depth=1
-; CHECK-BE-NEXT: ldrsh lr, [r3, #2]!
+; CHECK-BE-NEXT: ldrsh lr, [r3, #-2]
; CHECK-BE-NEXT: subs r0, #1
-; CHECK-BE-NEXT: ldrsh r4, [r2, #2]!
+; CHECK-BE-NEXT: ldrsh.w r4, [r2]
+; CHECK-BE-NEXT: ldrsh r5, [r2, #2]!
; CHECK-BE-NEXT: add r1, lr
-; CHECK-BE-NEXT: ldrsh.w r5, [r2, #2]
; CHECK-BE-NEXT: smlabb r12, r4, lr, r12
-; CHECK-BE-NEXT: ldrsh.w r4, [r3, #2]
+; CHECK-BE-NEXT: ldrsh r4, [r3], #2
; CHECK-BE-NEXT: smlabb r12, r5, r4, r12
; CHECK-BE-NEXT: bne .LBB0_2
; CHECK-BE-NEXT: @ %bb.3:
@@ -112,18 +111,18 @@ define i32 @mul_bottom_user(i32 %arg, ptr nocapture readnone %arg1, ptr nocaptur
; CHECK-LE-NEXT: @ %bb.1: @ %for.body.preheader
; CHECK-LE-NEXT: .save {r4, lr}
; CHECK-LE-NEXT: push {r4, lr}
-; CHECK-LE-NEXT: sub.w lr, r3, #2
-; CHECK-LE-NEXT: subs r2, #2
+; CHECK-LE-NEXT: adds r3, #2
; CHECK-LE-NEXT: mov.w r12, #0
; CHECK-LE-NEXT: movs r1, #0
; CHECK-LE-NEXT: .LBB1_2: @ %for.body
; CHECK-LE-NEXT: @ =>This Inner Loop Header: Depth=1
-; CHECK-LE-NEXT: ldr r3, [lr, #2]!
+; CHECK-LE-NEXT: ldr lr, [r3, #-2]
+; CHECK-LE-NEXT: adds r3, #2
+; CHECK-LE-NEXT: ldr r4, [r2], #2
; CHECK-LE-NEXT: subs r0, #1
-; CHECK-LE-NEXT: ldr r4, [r2, #2]!
-; CHECK-LE-NEXT: smlad r12, r4, r3, r12
-; CHECK-LE-NEXT: sxth r3, r3
-; CHECK-LE-NEXT: mul r1, r3, r1
+; CHECK-LE-NEXT: smlad r12, r4, lr, r12
+; CHECK-LE-NEXT: sxth.w r4, lr
+; CHECK-LE-NEXT: mul r1, r4, r1
; CHECK-LE-NEXT: bne .LBB1_2
; CHECK-LE-NEXT: @ %bb.3:
; CHECK-LE-NEXT: pop.w {r4, lr}
@@ -142,19 +141,18 @@ define i32 @mul_bottom_user(i32 %arg, ptr nocapture readnone %arg1, ptr nocaptur
; CHECK-BE-NEXT: @ %bb.1: @ %for.body.preheader
; CHECK-BE-NEXT: .save {r4, r5, r7, lr}
; CHECK-BE-NEXT: push {r4, r5, r7, lr}
-; CHECK-BE-NEXT: subs r3, #2
-; CHECK-BE-NEXT: subs r2, #2
+; CHECK-BE-NEXT: adds r3, #2
; CHECK-BE-NEXT: mov.w r12, #0
; CHECK-BE-NEXT: movs r1, #0
; CHECK-BE-NEXT: .LBB1_2: @ %for.body
; CHECK-BE-NEXT: @ =>This Inner Loop Header: Depth=1
-; CHECK-BE-NEXT: ldrsh lr, [r3, #2]!
+; CHECK-BE-NEXT: ldrsh lr, [r3, #-2]
; CHECK-BE-NEXT: subs r0, #1
-; CHECK-BE-NEXT: ldrsh r4, [r2, #2]!
-; CHECK-BE-NEXT: ldrsh.w r5, [r2, #2]
+; CHECK-BE-NEXT: ldrsh.w r4, [r2]
+; CHECK-BE-NEXT: ldrsh r5, [r2, #2]!
; CHECK-BE-NEXT: mul r1, lr, r1
; CHECK-BE-NEXT: smlabb r12, r4, lr, r12
-; CHECK-BE-NEXT: ldrsh.w r4, [r3, #2]
+; CHECK-BE-NEXT: ldrsh r4, [r3], #2
; CHECK-BE-NEXT: smlabb r12, r5, r4, r12
; CHECK-BE-NEXT: bne .LBB1_2
; CHECK-BE-NEXT: @ %bb.3:
@@ -215,15 +213,15 @@ define i32 @mul_top_user(i32 %arg, ptr nocapture readnone %arg1, ptr nocapture r
; CHECK-LE-NEXT: @ %bb.1: @ %for.body.preheader
; CHECK-LE-NEXT: .save {r4, lr}
; CHECK-LE-NEXT: push {r4, lr}
-; CHECK-LE-NEXT: subs r3, #2
-; CHECK-LE-NEXT: subs r2, #2
+; CHECK-LE-NEXT: adds r3, #2
; CHECK-LE-NEXT: mov.w r12, #0
; CHECK-LE-NEXT: movs r1, #0
; CHECK-LE-NEXT: .LBB2_2: @ %for.body
; CHECK-LE-NEXT: @ =>This Inner Loop Header: Depth=1
-; CHECK-LE-NEXT: ldr lr, [r3, #2]!
+; CHECK-LE-NEXT: ldr lr, [r3, #-2]
+; CHECK-LE-NEXT: adds r3, #2
+; CHECK-LE-NEXT: ldr r4, [r2], #2
; CHECK-LE-NEXT: subs r0, #1
-; CHECK-LE-NEXT: ldr r4, [r2, #2]!
; CHECK-LE-NEXT: smlad r12, r4, lr, r12
; CHECK-LE-NEXT: asr.w r4, r4, #16
; CHECK-LE-NEXT: mul r1, r4, r1
@@ -245,18 +243,17 @@ define i32 @mul_top_user(i32 %arg, ptr nocapture readnone %arg1, ptr nocapture r
; CHECK-BE-NEXT: @ %bb.1: @ %for.body.preheader
; CHECK-BE-NEXT: .save {r4, lr}
; CHECK-BE-NEXT: push {r4, lr}
-; CHECK-BE-NEXT: subs r3, #2
-; CHECK-BE-NEXT: subs r2, #2
+; CHECK-BE-NEXT: adds r3, #2
; CHECK-BE-NEXT: mov.w r12, #0
; CHECK-BE-NEXT: movs r1, #0
; CHECK-BE-NEXT: .LBB2_2: @ %for.body
; CHECK-BE-NEXT: @ =>This Inner Loop Header: Depth=1
-; CHECK-BE-NEXT: ldrsh lr, [r3, #2]!
+; CHECK-BE-NEXT: ldrsh lr, [r3, #-2]
; CHECK-BE-NEXT: subs r0, #1
-; CHECK-BE-NEXT: ldrsh r4, [r2, #2]!
+; CHECK-BE-NEXT: ldrsh.w r4, [r2]
; CHECK-BE-NEXT: smlabb r12, r4, lr, r12
-; CHECK-BE-NEXT: ldrsh.w r4, [r2, #2]
-; CHECK-BE-NEXT: ldrsh.w lr, [r3, #2]
+; CHECK-BE-NEXT: ldrsh r4, [r2, #2]!
+; CHECK-BE-NEXT: ldrsh lr, [r3], #2
; CHECK-BE-NEXT: mul r1, r4, r1
; CHECK-BE-NEXT: smlabb r12, r4, lr, r12
; CHECK-BE-NEXT: bne .LBB2_2
@@ -318,18 +315,18 @@ define i32 @and_user(i32 %arg, ptr nocapture readnone %arg1, ptr nocapture reado
; CHECK-LE-NEXT: @ %bb.1: @ %for.body.preheader
; CHECK-LE-NEXT: .save {r4, lr}
; CHECK-LE-NEXT: push {r4, lr}
-; CHECK-LE-NEXT: sub.w lr, r3, #2
-; CHECK-LE-NEXT: subs r2, #2
+; CHECK-LE-NEXT: adds r3, #2
; CHECK-LE-NEXT: mov.w r12, #0
; CHECK-LE-NEXT: movs r1, #0
; CHECK-LE-NEXT: .LBB3_2: @ %for.body
; CHECK-LE-NEXT: @ =>This Inner Loop Header: Depth=1
-; CHECK-LE-NEXT: ldr r3, [lr, #2]!
+; CHECK-LE-NEXT: ldr lr, [r3, #-2]
+; CHECK-LE-NEXT: adds r3, #2
+; CHECK-LE-NEXT: ldr r4, [r2], #2
; CHECK-LE-NEXT: subs r0, #1
-; CHECK-LE-NEXT: ldr r4, [r2, #2]!
-; CHECK-LE-NEXT: smlad r12, r4, r3, r12
-; CHECK-LE-NEXT: uxth r3, r3
-; CHECK-LE-NEXT: mul r1, r3, r1
+; CHECK-LE-NEXT: smlad r12, r4, lr, r12
+; CHECK-LE-NEXT: uxth.w r4, lr
+; CHECK-LE-NEXT: mul r1, r4, r1
; CHECK-LE-NEXT: bne .LBB3_2
; CHECK-LE-NEXT: @ %bb.3:
; CHECK-LE-NEXT: pop.w {r4, lr}
@@ -348,19 +345,18 @@ define i32 @and_user(i32 %arg, ptr nocapture readnone %arg1, ptr nocapture reado
; CHECK-BE-NEXT: @ %bb.1: @ %for.body.preheader
; CHECK-BE-NEXT: .save {r4, r5, r7, lr}
; CHECK-BE-NEXT: push {r4, r5, r7, lr}
-; CHECK-BE-NEXT: subs r3, #2
-; CHECK-BE-NEXT: subs r2, #2
+; CHECK-BE-NEXT: adds r3, #2
; CHECK-BE-NEXT: mov.w r12, #0
; CHECK-BE-NEXT: movs r1, #0
; CHECK-BE-NEXT: .LBB3_2: @ %for.body
; CHECK-BE-NEXT: @ =>This Inner Loop Header: Depth=1
-; CHECK-BE-NEXT: ldrh lr, [r3, #2]!
+; CHECK-BE-NEXT: ldrh lr, [r3, #-2]
; CHECK-BE-NEXT: subs r0, #1
-; CHECK-BE-NEXT: ldrsh r4, [r2, #2]!
-; CHECK-BE-NEXT: ldrsh.w r5, [r2, #2]
+; CHECK-BE-NEXT: ldrsh.w r4, [r2]
+; CHECK-BE-NEXT: ldrsh r5, [r2, #2]!
; CHECK-BE-NEXT: mul r1, lr, r1
; CHECK-BE-NEXT: smlabb r12, r4, lr, r12
-; CHECK-BE-NEXT: ldrsh.w r4, [r3, #2]
+; CHECK-BE-NEXT: ldrsh r4, [r3], #2
; CHECK-BE-NEXT: smlabb r12, r5, r4, r12
; CHECK-BE-NEXT: bne .LBB3_2
; CHECK-BE-NEXT: @ %bb.3:
@@ -422,15 +418,15 @@ define i32 @multi_uses(i32 %arg, ptr nocapture readnone %arg1, ptr nocapture rea
; CHECK-LE-NEXT: cmp r0, #1
; CHECK-LE-NEXT: blt .LBB4_4
; CHECK-LE-NEXT: @ %bb.1: @ %for.body.preheader
-; CHECK-LE-NEXT: subs r3, #2
-; CHECK-LE-NEXT: subs r2, #2
+; CHECK-LE-NEXT: adds r3, #2
; CHECK-LE-NEXT: mov.w lr, #0
; CHECK-LE-NEXT: mov.w r12, #0
; CHECK-LE-NEXT: .LBB4_2: @ %for.body
; CHECK-LE-NEXT: @ =>This Inner Loop Header: Depth=1
-; CHECK-LE-NEXT: ldr r1, [r3, #2]!
+; CHECK-LE-NEXT: ldr r1, [r3, #-2]
+; CHECK-LE-NEXT: adds r3, #2
+; CHECK-LE-NEXT: ldr r4, [r2], #2
; CHECK-LE-NEXT: subs r0, #1
-; CHECK-LE-NEXT: ldr r4, [r2, #2]!
; CHECK-LE-NEXT: smlad lr, r4, r1, lr
; CHECK-LE-NEXT: eor.w r4, r1, r12
; CHECK-LE-NEXT: mul r1, r4, r1
@@ -452,18 +448,17 @@ define i32 @multi_uses(i32 %arg, ptr nocapture readnone %arg1, ptr nocapture rea
; CHECK-BE-NEXT: cmp r0, #1
; CHECK-BE-NEXT: blt .LBB4_4
; CHECK-BE-NEXT: @ %bb.1: @ %for.body.preheader
-; CHECK-BE-NEXT: subs r3, #2
-; CHECK-BE-NEXT: subs r2, #2
+; CHECK-BE-NEXT: adds r3, #2
; CHECK-BE-NEXT: mov.w r12, #0
; CHECK-BE-NEXT: mov.w lr, #0
; CHECK-BE-NEXT: .LBB4_2: @ %for.body
; CHECK-BE-NEXT: @ =>This Inner Loop Header: Depth=1
-; CHECK-BE-NEXT: ldrsh r4, [r2, #2]!
+; CHECK-BE-NEXT: ldrsh.w r4, [r2]
; CHECK-BE-NEXT: subs r0, #1
-; CHECK-BE-NEXT: ldrsh r1, [r3, #2]!
-; CHECK-BE-NEXT: ldrsh.w r5, [r2, #2]
+; CHECK-BE-NEXT: ldrsh r1, [r3, #-2]
+; CHECK-BE-NEXT: ldrsh r5, [r2, #2]!
; CHECK-BE-NEXT: smlabb r12, r4, r1, r12
-; CHECK-BE-NEXT: ldrsh.w r4, [r3, #2]
+; CHECK-BE-NEXT: ldrsh r4, [r3], #2
; CHECK-BE-NEXT: smlabb r12, r5, r4, r12
; CHECK-BE-NEXT: eor.w r5, r1, lr
; CHECK-BE-NEXT: mul r1, r5, r1
diff --git a/llvm/test/CodeGen/ARM/branch-on-zero.ll b/llvm/test/CodeGen/ARM/branch-on-zero.ll
index 575176fc013c6..1cd5bd9c0d01b 100644
--- a/llvm/test/CodeGen/ARM/branch-on-zero.ll
+++ b/llvm/test/CodeGen/ARM/branch-on-zero.ll
@@ -23,18 +23,15 @@ define i32 @test_lshr(ptr nocapture %x, ptr nocapture readonly %y, i32 %n) {
; CHECK-V7M-LABEL: test_lshr:
; CHECK-V7M: @ %bb.0: @ %entry
; CHECK-V7M-NEXT: lsrs r2, r2, #2
-; CHECK-V7M-NEXT: beq .LBB0_3
-; CHECK-V7M-NEXT: @ %bb.1: @ %while.body.preheader
-; CHECK-V7M-NEXT: subs r1, #4
-; CHECK-V7M-NEXT: subs r0, #4
-; CHECK-V7M-NEXT: .LBB0_2: @ %while.body
+; CHECK-V7M-NEXT: beq .LBB0_2
+; CHECK-V7M-NEXT: .LBB0_1: @ %while.body
; CHECK-V7M-NEXT: @ =>This Inner Loop Header: Depth=1
-; CHECK-V7M-NEXT: ldr r3, [r1, #4]!
+; CHECK-V7M-NEXT: ldr r3, [r1], #4
; CHECK-V7M-NEXT: subs r2, #1
; CHECK-V7M-NEXT: lsl.w r3, r3, #1
-; CHECK-V7M-NEXT: str r3, [r0, #4]!
-; CHECK-V7M-NEXT: bne .LBB0_2
-; CHECK-V7M-NEXT: .LBB0_3: @ %while.end
+; CHECK-V7M-NEXT: str r3, [r0], #4
+; CHECK-V7M-NEXT: bne .LBB0_1
+; CHECK-V7M-NEXT: .LBB0_2: @ %while.end
; CHECK-V7M-NEXT: movs r0, #0
; CHECK-V7M-NEXT: bx lr
;
@@ -109,18 +106,15 @@ define i32 @test_lshr2(ptr nocapture %x, ptr nocapture readonly %y, i32 %n) {
; CHECK-V7M-LABEL: test_lshr2:
; CHECK-V7M: @ %bb.0: @ %entry
; CHECK-V7M-NEXT: lsrs r2, r2, #2
-; CHECK-V7M-NEXT: beq .LBB1_3
-; CHECK-V7M-NEXT: @ %bb.1: @ %while.body.preheader
-; CHECK-V7M-NEXT: subs r1, #4
-; CHECK-V7M-NEXT: subs r0, #4
-; CHECK-V7M-NEXT: .LBB1_2: @ %while.body
+; CHECK-V7M-NEXT: beq .LBB1_2
+; CHECK-V7M-NEXT: .LBB1_1: @ %while.body
; CHECK-V7M-NEXT: @ =>This Inner Loop Header: Depth=1
-; CHECK-V7M-NEXT: ldr r3, [r1, #4]!
+; CHECK-V7M-NEXT: ldr r3, [r1], #4
; CHECK-V7M-NEXT: subs r2, #1
; CHECK-V7M-NEXT: lsl.w r3, r3, #1
-; CHECK-V7M-NEXT: str r3, [r0, #4]!
-; CHECK-V7M-NEXT: bne .LBB1_2
-; CHECK-V7M-NEXT: .LBB1_3: @ %while.end
+; CHECK-V7M-NEXT: str r3, [r0], #4
+; CHECK-V7M-NEXT: bne .LBB1_1
+; CHECK-V7M-NEXT: .LBB1_2: @ %while.end
; CHECK-V7M-NEXT: movs r0, #0
; CHECK-V7M-NEXT: bx lr
;
diff --git a/llvm/test/CodeGen/ARM/dsp-loop-indexing.ll b/llvm/test/CodeGen/ARM/dsp-loop-indexing.ll
index 9fb64471e9881..5ed8949716b80 100644
--- a/llvm/test/CodeGen/ARM/dsp-loop-indexing.ll
+++ b/llvm/test/CodeGen/ARM/dsp-loop-indexing.ll
@@ -18,19 +18,19 @@
; CHECK-LABEL: test_qadd_2
; CHECK: @ %loop
-; CHECK-DEFAULT: ldr{{.*}}, #4]
-; CHECK-DEFAULT: ldr{{.*}}, #4]
-; CHECK-DEFAULT: str{{.*}}, #4]
-; CHECK-DEFAULT: ldr{{.*}}, #8]!
-; CHECK-DEAFULT: ldr{{.*}}, #8]!
-; CHECK-DEFAULT: str{{.*}}, #8]!
+; CHECK-DEFAULT: ldr{{.*}}, #-4]
+; CHECK-DEFAULT: ldr{{.*}}, #-4]
+; CHECK-DEFAULT: str{{.*}}, #-4]
+; CHECK-DEFAULT: ldr{{.*}}, #8
+; CHECK-DEAFULT: ldr{{.*}}, #8
+; CHECK-DEFAULT: str{{.*}}, #8
-; CHECK-COMPLEX: ldr{{.*}}, #8]!
-; CHECK-COMPLEX: ldr{{.*}}, #8]!
-; CHECK-COMPLEX: str{{.*}}, #8]!
-; CHECK-COMPLEX: ldr{{.*}}, #4]
-; CHECK-COMPLEX: ldr{{.*}}, #4]
-; CHECK-COMPLEX: str{{.*}}, #4]
+; CHECK-COMPLEX: ldr{{.*}}, #-4]
+; CHECK-COMPLEX: ldr{{.*}}, #-4]
+; CHECK-COMPLEX: str{{.*}}, #-4]
+; CHECK-COMPLEX: ldr{{.*}}, #8
+; CHECK-COMPLEX: ldr{{.*}}, #8
+; CHECK-COMPLEX: str{{.*}}, #8
; DISABLED-NOT: ldr{{.*}}]!
; DISABLED-NOT: str{{.*}}]!
@@ -74,9 +74,8 @@ exit:
; CHECK-DEFAULT: ldr{{.*}},
; CHECK-DEFAULT: ldr{{.*}},
; CHECK-DEFAULT: str{{.*}},
-; CHECK-DEFAULT: ldr{{.*}}, #-4]
-; CHECK-DEFAULT: ldr{{.*}}, #-4]
-; CHECK-DEFAULT: sub{{.*}}, #8
+; CHECK-DEFAULT: ldr{{.*}}, #-8
+; CHECK-DEFAULT: ldr{{.*}}, #-8
; CHECK-DEFAULT: str{{.*}}, #-4]
; CHECK-DEFAULT: sub{{.*}}, #8
@@ -124,22 +123,22 @@ exit:
; CHECK-LABEL: test_qadd_3
; CHECK: @ %loop
-; CHECK-DEFAULT: ldr{{.*}}, #8]
-; CHECK-DEFAULT: ldr{{.*}}, #8]
-; CHECK-DEFAULT: str{{.*}}, #8]
-; CHECK-DEFAULT: ldr{{.*}}, #12]!
-; CHECK-DEFAULT: ldr{{.*}}, #12]!
-; CHECK-DEFAULT: str{{.*}}, #12]!
-
-; CHECK-COMPLEX: ldr{{.*}}, #12]!
-; CHECK-COMPLEX: ldr{{.*}}, #12]!
-; CHECK-COMPLEX: str{{.*}}, #12]!
-; CHECK-COMPLEX: ldr{{.*}}, #4]
-; CHECK-COMPLEX: ldr{{.*}}, #4]
-; CHECK-COMPLEX: str{{.*}}, #4]
-; CHECK-COMPLEX: ldr{{.*}}, #8]
-; CHECK-COMPLEX: ldr{{.*}}, #8]
-; CHECK-COMPLEX: str{{.*}}, #8]
+; CHECK-DEFAULT: ldr{{.*}}, #-4]
+; CHECK-DEFAULT: ldr{{.*}}, #-4]
+; CHECK-DEFAULT: str{{.*}}, #-4]
+; CHECK-DEFAULT: ldr{{.*}}, [r{{[0-9]+}}]
+; CHECK-DEFAULT: ldr{{.*}}, [r{{[0-9]+}}]
+; CHECK-DEFAULT: str{{.*}}, [r{{[0-9]+}}]
+
+; CHECK-COMPLEX: ldr{{.*}}, #-8]
+; CHECK-COMPLEX: ldr{{.*}}, #-8]
+; CHECK-COMPLEX: str{{.*}}, #-8]
+; CHECK-COMPLEX: ldr{{.*}}, #-4]
+; CHECK-COMPLEX: ldr{{.*}}, #-4]
+; CHECK-COMPLEX: str{{.*}}, #-4]
+; CHECK-COMPLEX: ldr{{.*}}, #12
+; CHECK-COMPLEX: ldr{{.*}}, #12
+; CHECK-COMPLEX: str{{.*}}, #12
; DISABLED-NOT: ldr{{.*}}]!
; DISABLED-NOT: str{{.*}}]!
@@ -198,18 +197,18 @@ exit:
; CHECK-DEFAULT: ldr{{.*}}, #12]
; CHECK-DEFAULT: str{{.*}}, #12]
-; CHECK-COMPLEX: ldr{{.*}}, #16]!
-; CHECK-COMPLEX: ldr{{.*}}, #16]!
-; CHECK-COMPLEX: str{{.*}}, #16]!
-; CHECK-COMPLEX: ldr{{.*}}, #4]
-; CHECK-COMPLEX: ldr{{.*}}, #4]
-; CHECK-COMPLEX: str{{.*}}, #4]
-; CHECK-COMPLEX: ldr{{.*}}, #8]
-; CHECK-COMPLEX: ldr{{.*}}, #8]
-; CHECK-COMPLEX: str{{.*}}, #8]
-; CHECK-COMPLEX: ldr{{.*}}, #12]
-; CHECK-COMPLEX: ldr{{.*}}, #12]
-; CHECK-COMPLEX: str{{.*}}, #12]
+; CHECK-COMPLEX: ldr{{.*}}, #-12]
+; CHECK-COMPLEX: ldr{{.*}}, #-12]
+; CHECK-COMPLEX: str{{.*}}, #-12]
+; CHECK-COMPLEX: ldr{{.*}}, #-8]
+; CHECK-COMPLEX: ldr{{.*}}, #-8]
+; CHECK-COMPLEX: str{{.*}}, #-8]
+; CHECK-COMPLEX: ldr{{.*}}, #-4]
+; CHECK-COMPLEX: ldr{{.*}}, #-4]
+; CHECK-COMPLEX: str{{.*}}, #-4]
+; CHECK-COMPLEX: ldr{{.*}}, #16
+; CHECK-COMPLEX: ldr{{.*}}, #16
+; CHECK-COMPLEX: str{{.*}}, #16
; DISABLED-NOT: ldr{{.*}}]!
; DISABLED-NOT: str{{.*}}]!
@@ -265,19 +264,19 @@ exit:
; CHECK: @ %loop
; TODO: pre-inc store.
-; CHECK-DEFAULT: ldr{{.*}}, #4]
-; CHECK-DEFAULT: ldr{{.*}}, #4]
-; CHECK-DEFAULT: str{{.*}}, #8]
-; CHECK-DEFAULT: ldr{{.*}}, #8]!
-; CHECK-DEFAULT: ldr{{.*}}, #8]!
-; CHECK-DEFAULT: str{{.*}}, #16]!
-
-; CHECK-COMPLEX: ldr{{.*}}, #8]!
-; CHECK-COMPLEX: ldr{{.*}}, #8]!
-; CHECK-COMPLEX: str{{.*}}, #16]!
-; CHECK-COMPLEX: ldr{{.*}}, #4]
-; CHECK-COMPLEX: ldr{{.*}}, #4]
-; CHECK-COMPLEX: str{{.*}}, #8]
+; CHECK-DEFAULT: ldr{{.*}}, #-4]
+; CHECK-DEFAULT: ldr{{.*}}, #-4]
+; CHECK-DEFAULT: str{{.*}}, #-8]
+; CHECK-DEFAULT: ldr{{.*}}, #8
+; CHECK-DEFAULT: ldr{{.*}}, #8
+; CHECK-DEFAULT: str{{.*}}, #16
+
+; CHECK-COMPLEX: ldr{{.*}}, #-4]
+; CHECK-COMPLEX: ldr{{.*}}, #-4]
+; CHECK-COMPLEX: str{{.*}}, #-8]
+; CHECK-COMPLEX: ldr{{.*}}, #8
+; CHECK-COMPLEX: ldr{{.*}}, #8
+; CHECK-COMPLEX: str{{.*}}, #16
; DISABLED-NOT: ldr{{.*}}]!
; DISABLED-NOT: str{{.*}}]!
diff --git a/llvm/test/CodeGen/ARM/fpclamptosat.ll b/llvm/test/CodeGen/ARM/fpclamptosat.ll
index 8ab56b228d2a7..bee4630a583d5 100644
--- a/llvm/test/CodeGen/ARM/fpclamptosat.ll
+++ b/llvm/test/CodeGen/ARM/fpclamptosat.ll
@@ -3555,29 +3555,30 @@ define void @unroll_maxmin(ptr nocapture %0, ptr nocapture readonly %1, i32 %2)
; SOFT-NEXT: push {r4, r5, r6, r7, lr}
; SOFT-NEXT: .pad #20
; SOFT-NEXT: sub sp, #20
-; SOFT-NEXT: mov r4, r1
-; SOFT-NEXT: mov r5, r0
-; SOFT-NEXT: movs r0, #0
-; SOFT-NEXT: str r0, [sp, #16] @ 4-byte Spill
-; SOFT-NEXT: mvns r0, r0
-; SOFT-NEXT: str r0, [sp, #8] @ 4-byte Spill
-; SOFT-NEXT: movs r0, #1
-; SOFT-NEXT: lsls r1, r0, #31
-; SOFT-NEXT: str r1, [sp, #12] @ 4-byte Spill
-; SOFT-NEXT: str r0, [sp, #4] @ 4-byte Spill
-; SOFT-NEXT: lsls r7, r0, #10
+; SOFT-NEXT: movs r2, #0
+; SOFT-NEXT: str r2, [sp, #16] @ 4-byte Spill
+; SOFT-NEXT: mvns r2, r2
+; SOFT-NEXT: str r2, [sp, #8] @ 4-byte Spill
+; SOFT-NEXT: movs r2, #1
+; SOFT-NEXT: lsls r3, r2, #31
+; SOFT-NEXT: str r3, [sp, #12] @ 4-byte Spill
+; SOFT-NEXT: str r2, [sp, #4] @ 4-byte Spill
+; SOFT-NEXT: lsls r7, r2, #10
+; SOFT-NEXT: adds r5, r1, #4
+; SOFT-NEXT: adds r6, r0, #4
; SOFT-NEXT: b .LBB54_2
; SOFT-NEXT: .LBB54_1: @ in Loop: Header=BB54_2 Depth=1
-; SOFT-NEXT: str r0, [r5, #4]
-; SOFT-NEXT: adds r4, #8
+; SOFT-NEXT: str r0, [r6]
; SOFT-NEXT: adds r5, #8
+; SOFT-NEXT: adds r6, #8
; SOFT-NEXT: subs r7, r7, #2
; SOFT-NEXT: beq .LBB54_18
; SOFT-NEXT: .LBB54_2: @ =>This Inner Loop Header: Depth=1
-; SOFT-NEXT: ldr r0, [r4]
-; SOFT-NEXT: movs r1, #79
-; SOFT-NEXT: lsls r6, r1, #24
-; SOFT-NEXT: mov r1, r6
+; SOFT-NEXT: movs r0, #79
+; SOFT-NEXT: lsls r4, r0, #24
+; SOFT-NEXT: subs r0, r5, #4
+; SOFT-NEXT: ldr r0, [r0]
+; SOFT-NEXT: mov r1, r4
; SOFT-NEXT: bl __aeabi_fmul
; SOFT-NEXT: bl __aeabi_f2lz
; SOFT-NEXT: ldr r2, .LCPI54_0
@@ -3603,9 +3604,10 @@ define void @unroll_maxmin(ptr nocapture %0, ptr nocapture readonly %1, i32 %2)
; SOFT-NEXT: @ %bb.7: @ in Loop: Header=BB54_2 Depth=1
; SOFT-NEXT: ldr r0, [sp, #12] @ 4-byte Reload
; SOFT-NEXT: .LBB54_8: @ in Loop: Header=BB54_2 Depth=1
-; SOFT-NEXT: str r0, [r5]
-; SOFT-NEXT: ldr r0, [r4, #4]
-; SOFT-NEXT: mov r1, r6
+; SOFT-NEXT: subs r1, r6, #4
+; SOFT-NEXT: str r0, [r1]
+; SOFT-NEXT: ldr r0, [r5]
+; SOFT-NEXT: mov r1, r4
; SOFT-NEXT: bl __aeabi_fmul
; SOFT-NEXT: bl __aeabi_f2lz
; SOFT-NEXT: ldr r2, .LCPI54_0
@@ -3658,22 +3660,22 @@ define void @unroll_maxmin(ptr nocapture %0, ptr nocapture readonly %1, i32 %2)
;
; VFP2-LABEL: unroll_maxmin:
; VFP2: @ %bb.0:
-; VFP2-NEXT: subs r1, #8
-; VFP2-NEXT: subs r0, #8
+; VFP2-NEXT: adds r1, #4
+; VFP2-NEXT: adds r0, #4
; VFP2-NEXT: vldr s0, .LCPI54_0
; VFP2-NEXT: mov.w r2, #1024
; VFP2-NEXT: .LBB54_1: @ =>This Inner Loop Header: Depth=1
-; VFP2-NEXT: vldr s2, [r1, #8]
+; VFP2-NEXT: vldr s2, [r1, #-4]
; VFP2-NEXT: subs r2, #2
; VFP2-NEXT: vmul.f32 s2, s2, s0
; VFP2-NEXT: vcvt.s32.f32 s2, s2
-; VFP2-NEXT: vmov r3, s2
-; VFP2-NEXT: str r3, [r0, #8]!
-; VFP2-NEXT: vldr s2, [r1, #12]
+; VFP2-NEXT: vstr s2, [r0, #-4]
+; VFP2-NEXT: vldr s2, [r1]
; VFP2-NEXT: add.w r1, r1, #8
; VFP2-NEXT: vmul.f32 s2, s2, s0
; VFP2-NEXT: vcvt.s32.f32 s2, s2
-; VFP2-NEXT: vstr s2, [r0, #4]
+; VFP2-NEXT: vstr s2, [r0]
+; VFP2-NEXT: add.w r0, r0, #8
; VFP2-NEXT: bne .LBB54_1
; VFP2-NEXT: @ %bb.2:
; VFP2-NEXT: bx lr
@@ -3687,20 +3689,20 @@ define void @unroll_maxmin(ptr nocapture %0, ptr nocapture readonly %1, i32 %2)
; FULL-NEXT: .save {r7, lr}
; FULL-NEXT: push {r7, lr}
; FULL-NEXT: mov.w lr, #512
-; FULL-NEXT: subs r1, #8
-; FULL-NEXT: subs r0, #8
+; FULL-NEXT: adds r1, #4
+; FULL-NEXT: adds r0, #4
; FULL-NEXT: vldr s0, .LCPI54_0
; FULL-NEXT: .LBB54_1: @ =>This Inner Loop Header: Depth=1
-; FULL-NEXT: vldr s2, [r1, #8]
+; FULL-NEXT: vldr s2, [r1, #-4]
; FULL-NEXT: vmul.f32 s2, s2, s0
; FULL-NEXT: vcvt.s32.f32 s2, s2
-; FULL-NEXT: vmov r2, s2
-; FULL-NEXT: str r2, [r0, #8]!
-; FULL-NEXT: vldr s2, [r1, #12]
+; FULL-NEXT: vstr s2, [r0, #-4]
+; FULL-NEXT: vldr s2, [r1]
; FULL-NEXT: adds r1, #8
; FULL-NEXT: vmul.f32 s2, s2, s0
; FULL-NEXT: vcvt.s32.f32 s2, s2
-; FULL-NEXT: vstr s2, [r0, #4]
+; FULL-NEXT: vstr s2, [r0]
+; FULL-NEXT: adds r0, #8
; FULL-NEXT: le lr, .LBB54_1
; FULL-NEXT: @ %bb.2:
; FULL-NEXT: pop {r7, pc}
@@ -3750,28 +3752,29 @@ define void @unroll_minmax(ptr nocapture %0, ptr nocapture readonly %1, i32 %2)
; SOFT-NEXT: push {r4, r5, r6, r7, lr}
; SOFT-NEXT: .pad #12
; SOFT-NEXT: sub sp, #12
-; SOFT-NEXT: mov r4, r1
-; SOFT-NEXT: mov r5, r0
-; SOFT-NEXT: movs r0, #0
-; SOFT-NEXT: str r0, [sp] @ 4-byte Spill
-; SOFT-NEXT: mvns r0, r0
-; SOFT-NEXT: str r0, [sp, #8] @ 4-byte Spill
-; SOFT-NEXT: movs r0, #1
-; SOFT-NEXT: lsls r1, r0, #31
-; SOFT-NEXT: str r1, [sp, #4] @ 4-byte Spill
-; SOFT-NEXT: lsls r7, r0, #10
+; SOFT-NEXT: movs r2, #0
+; SOFT-NEXT: str r2, [sp] @ 4-byte Spill
+; SOFT-NEXT: mvns r2, r2
+; SOFT-NEXT: str r2, [sp, #8] @ 4-byte Spill
+; SOFT-NEXT: movs r2, #1
+; SOFT-NEXT: lsls r3, r2, #31
+; SOFT-NEXT: str r3, [sp, #4] @ 4-byte Spill
+; SOFT-NEXT: lsls r5, r2, #10
+; SOFT-NEXT: adds r6, r1, #4
+; SOFT-NEXT: adds r7, r0, #4
; SOFT-NEXT: b .LBB55_2
; SOFT-NEXT: .LBB55_1: @ in Loop: Header=BB55_2 Depth=1
-; SOFT-NEXT: str r0, [r5, #4]
-; SOFT-NEXT: adds r4, #8
-; SOFT-NEXT: adds r5, #8
-; SOFT-NEXT: subs r7, r7, #2
+; SOFT-NEXT: str r0, [r7]
+; SOFT-NEXT: adds r6, #8
+; SOFT-NEXT: adds r7, #8
+; SOFT-NEXT: subs r5, r5, #2
; SOFT-NEXT: beq .LBB55_14
; SOFT-NEXT: .LBB55_2: @ =>This Inner Loop Header: Depth=1
-; SOFT-NEXT: ldr r0, [r4]
-; SOFT-NEXT: movs r1, #79
-; SOFT-NEXT: lsls r6, r1, #24
-; SOFT-NEXT: mov r1, r6
+; SOFT-NEXT: movs r0, #79
+; SOFT-NEXT: lsls r4, r0, #24
+; SOFT-NEXT: subs r0, r6, #4
+; SOFT-NEXT: ldr r0, [r0]
+; SOFT-NEXT: mov r1, r4
; SOFT-NEXT: bl __aeabi_fmul
; SOFT-NEXT: bl __aeabi_f2lz
; SOFT-NEXT: ldr r2, [sp, #4] @ 4-byte Reload
@@ -3794,9 +3797,10 @@ define void @unroll_minmax(ptr nocapture %0, ptr nocapture readonly %1, i32 %2)
; SOFT-NEXT: @ %bb.7: @ in Loop: Header=BB55_2 Depth=1
; SOFT-NEXT: ldr r0, .LCPI55_0
; SOFT-NEXT: .LBB55_8: @ in Loop: Header=BB55_2 Depth=1
-; SOFT-NEXT: str r0, [r5]
-; SOFT-NEXT: ldr r0, [r4, #4]
-; SOFT-NEXT: mov r1, r6
+; SOFT-NEXT: subs r1, r7, #4
+; SOFT-NEXT: str r0, [r1]
+; SOFT-NEXT: ldr r0, [r6]
+; SOFT-NEXT: mov r1, r4
; SOFT-NEXT: bl __aeabi_fmul
; SOFT-NEXT: bl __aeabi_f2lz
; SOFT-NEXT: ldr r2, [sp, #4] @ 4-byte Reload
@@ -3829,22 +3833,22 @@ define void @unroll_minmax(ptr nocapture %0, ptr nocapture readonly %1, i32 %2)
;
; VFP2-LABEL: unroll_minmax:
; VFP2: @ %bb.0:
-; VFP2-NEXT: subs r1, #8
-; VFP2-NEXT: subs r0, #8
+; VFP2-NEXT: adds r1, #4
+; VFP2-NEXT: adds r0, #4
; VFP2-NEXT: vldr s0, .LCPI55_0
; VFP2-NEXT: mov.w r2, #1024
; VFP2-NEXT: .LBB55_1: @ =>This Inner Loop Header: Depth=1
-; VFP2-NEXT: vldr s2, [r1, #8]
+; VFP2-NEXT: vldr s2, [r1, #-4]
; VFP2-NEXT: subs r2, #2
; VFP2-NEXT: vmul.f32 s2, s2, s0
; VFP2-NEXT: vcvt.s32.f32 s2, s2
-; VFP2-NEXT: vmov r3, s2
-; VFP2-NEXT: str r3, [r0, #8]!
-; VFP2-NEXT: vldr s2, [r1, #12]
+; VFP2-NEXT: vstr s2, [r0, #-4]
+; VFP2-NEXT: vldr s2, [r1]
; VFP2-NEXT: add.w r1, r1, #8
; VFP2-NEXT: vmul.f32 s2, s2, s0
; VFP2-NEXT: vcvt.s32.f32 s2, s2
-; VFP2-NEXT: vstr s2, [r0, #4]
+; VFP2-NEXT: vstr s2, [r0]
+; VFP2-NEXT: add.w r0, r0, #8
; VFP2-NEXT: bne .LBB55_1
; VFP2-NEXT: @ %bb.2:
; VFP2-NEXT: bx lr
@@ -3858,20 +3862,20 @@ define void @unroll_minmax(ptr nocapture %0, ptr nocapture readonly %1, i32 %2)
; FULL-NEXT: .save {r7, lr}
; FULL-NEXT: push {r7, lr}
; FULL-NEXT: mov.w lr, #512
-; FULL-NEXT: subs r1, #8
-; FULL-NEXT: subs r0, #8
+; FULL-NEXT: adds r1, #4
+; FULL-NEXT: adds r0, #4
; FULL-NEXT: vldr s0, .LCPI55_0
; FULL-NEXT: .LBB55_1: @ =>This Inner Loop Header: Depth=1
-; FULL-NEXT: vldr s2, [r1, #8]
+; FULL-NEXT: vldr s2, [r1, #-4]
; FULL-NEXT: vmul.f32 s2, s2, s0
; FULL-NEXT: vcvt.s32.f32 s2, s2
-; FULL-NEXT: vmov r2, s2
-; FULL-NEXT: str r2, [r0, #8]!
-; FULL-NEXT: vldr s2, [r1, #12]
+; FULL-NEXT: vstr s2, [r0, #-4]
+; FULL-NEXT: vldr s2, [r1]
; FULL-NEXT: adds r1, #8
; FULL-NEXT: vmul.f32 s2, s2, s0
; FULL-NEXT: vcvt.s32.f32 s2, s2
-; FULL-NEXT: vstr s2, [r0, #4]
+; FULL-NEXT: vstr s2, [r0]
+; FULL-NEXT: adds r0, #8
; FULL-NEXT: le lr, .LBB55_1
; FULL-NEXT: @ %bb.2:
; FULL-NEXT: pop {r7, pc}
diff --git a/llvm/test/CodeGen/ARM/loop-align-cortex-m.ll b/llvm/test/CodeGen/ARM/loop-align-cortex-m.ll
index bce0bbabfdc29..c18ce1a1ebdd2 100644
--- a/llvm/test/CodeGen/ARM/loop-align-cortex-m.ll
+++ b/llvm/test/CodeGen/ARM/loop-align-cortex-m.ll
@@ -4,7 +4,7 @@
define void @test_loop_alignment(ptr %in, ptr %out) optsize {
; CHECK-LABEL: test_loop_alignment:
-; CHECK: mov{{.*}}, #0
+; CHECK: mov{{.*}}, #1024
; CHECK: .p2align 2
entry:
@@ -27,7 +27,7 @@ end:
define void @test_loop_alignment_minsize(ptr %in, ptr %out) minsize {
; CHECK-LABEL: test_loop_alignment_minsize:
-; CHECK: movs {{r[0-9]+}}, #0
+; CHECK: mov.w {{r[0-9]+}}, #1024
; CHECK-NOT: .p2align
entry:
diff --git a/llvm/test/CodeGen/ARM/loop-indexing.ll b/llvm/test/CodeGen/ARM/loop-indexing.ll
index bb859b202bbc0..92d52645e8565 100644
--- a/llvm/test/CodeGen/ARM/loop-indexing.ll
+++ b/llvm/test/CodeGen/ARM/loop-indexing.ll
@@ -27,15 +27,15 @@
; CHECK-LABEL: test_fma
; CHECK: @ %loop
-; CHECK-DEFAULT: vldr s{{.*}}, #8]
-; CHECK-DEFAULT: vldr s{{.*}}, #8]
-; CHECK-DEFAULT: vldr s{{.*}}, #12]
-; CHECK-DEFAULT: vldr s{{.*}}, #12]
+; CHECK-DEFAULT: vldr s{{.*}}, #-4]
+; CHECK-DEFAULT: vldr s{{.*}}, #-4]
+; CHECK-DEFAULT: vldr s{{.*}}, [r{{[0-9]+}}]
+; CHECK-DEFAULT: vldr s{{.*}}, [r{{[0-9]+}}]
-; CHECK-COMPLEX: vldr s{{.*}}, #8]
-; CHECK-COMPLEX: vldr s{{.*}}, #8]
-; CHECK-COMPLEX: vldr s{{.*}}, #12]
-; CHECK-COMPLEX: vldr s{{.*}}, #12]
+; CHECK-COMPLEX: vldr s{{.*}}, #-4]
+; CHECK-COMPLEX: vldr s{{.*}}, #-4]
+; CHECK-COMPLEX: vldr s{{.*}}, [r{{[0-9]+}}]
+; CHECK-COMPLEX: vldr s{{.*}}, [r{{[0-9]+}}]
define float @test_fma(ptr %a, ptr %b, i32 %N) {
entry:
@@ -69,10 +69,10 @@ exit:
; CHECK-LABEL: convolve_16bit
; TODO: Both arrays should use indexing
-; CHECK-DEFAULT: ldr{{.*}}, #8]!
+; CHECK-DEFAULT: ldr{{.*}}, #-6]
; CHECK-DEFAULT-NOT: ldr{{.*}}]!
-; CHECK-COMPLEX: ldr{{.*}}, #8]!
+; CHECK-COMPLEX: ldr{{.*}}, #-6]
; CHECK-COMPLEX-NOT: ldr{{.*}}]!
; DISABLED-NOT: ldr{{.*}}]!
@@ -182,21 +182,21 @@ for.cond.cleanup: ; preds = %for.cond.cleanup3,
; CHECK-LABEL: mul_8x8
; CHECK: @ %for.body
-; CHECK-DEFAULT: str{{.*}}, #16]!
-; CHECK-DEFAULT: ldrb{{.*}}, #4]!
-; CHECK-DEFAULT: ldrb{{.*}}, #4]!
+; CHECK-DEFAULT: str{{.*}}, #-12]
+; CHECK-DEFAULT: ldrb{{.*}}, #4
+; CHECK-DEFAULT: ldrb{{.*}}, #4
-; CHECK-COMPLEX: str{{.*}}, #16]!
-; CHECK-COMPLEX: ldrb{{.*}}, #4]!
-; CHECK-COMPLEX: ldrb{{.*}}, #4]!
+; CHECK-COMPLEX: str{{.*}}, #-12]
+; CHECK-COMPLEX: ldrb{{.*}}, #4
+; CHECK-COMPLEX: ldrb{{.*}}, #4
; DISABLED-NOT: ldr{{.*}}]!
; DISABLED-NOT: str{{.*}}]!
; CHECK-T2: @ %for.body.epil
-; CHECK-T2: ldrb{{.*}}, #1]!
-; CHECK-T2: ldrb{{.*}}, #1]!
-; CHECK-T2: str{{.*}}, #4]!
+; CHECK-T2: ldrb{{.*}}, #1
+; CHECK-T2: ldrb{{.*}}, #1
+; CHECK-T2: str{{.*}}, #4
define void @mul_8x8(ptr nocapture readonly %A, ptr nocapture readonly %B, ptr nocapture %C, i32 %N) {
entry:
@@ -289,20 +289,20 @@ for.body: ; preds = %for.body, %for.body
; CHECK-LABEL: mul_16x8
; CHECK: @ %for.body
-; CHECK-DEFAULT: str{{.*}}, #16]!
-; CHECK-DEFAULT: ldrsh{{.*}}, #8]!
+; CHECK-DEFAULT: str{{.*}}, #-12]
+; CHECK-DEFAULT: ldrsh{{.*}}, #8
-; CHECK-COMPLEX: ldrsh{{.*}}, #8]!
-; CHECK-COMPLEX: str{{.*}}, #16]!
-; CHECK-COMPLEX: ldrb{{.*}}, #4]!
+; CHECK-COMPLEX: ldrsh{{.*}}, #-6]
+; CHECK-COMPLEX: str{{.*}}, #-12]
+; CHECK-COMPLEX: ldrb{{.*}}, #4
; DISABLED-NOT: ldr{{.*}}]!
; DISABLED-NOT: str{{.*}}]!
; CHECK-T2: @ %for.body.epil
-; CHECK-T2: ldrsh{{.*}}, #2]!
-; CHECK-T2: ldrb{{.*}}, #1]!
-; CHECK-T2: str{{.*}}, #4]!
+; CHECK-T2: ldrsh{{.*}}, #2
+; CHECK-T2: ldrb{{.*}}, #1
+; CHECK-T2: str{{.*}}, #4
define void @mul_16x8(ptr nocapture readonly %A, ptr nocapture readonly %B, ptr nocapture %C, i32 %N) {
entry:
@@ -397,20 +397,20 @@ for.body: ; preds = %for.body, %for.body
; TODO: pre-indexed loads
; CHECK-DEFAULT-NOT: ldrsh{{.*}}]!
-; CHECK-DEFAULT: str{{.*}}, #16]!
+; CHECK-DEFAULT: str{{.*}}, #-12]
; CHECK-DEFAULT-NOT: ldrsh{{.*}}]!
-; CHECK-COMPLEX: ldrsh{{.*}}]!
-; CHECK-COMPLEX: ldrsh{{.*}}]!
-; CHECK-COMPLEX: str{{.*}}]!
+; CHECK-COMPLEX: ldrsh{{.*}}]
+; CHECK-COMPLEX: ldrsh{{.*}}]
+; CHECK-COMPLEX: str{{.*}}]
; DISABLED-NOT: ldr{{.*}}]!
; DISABLED-NOT: str{{.*}}]!
; CHECK-T2: @ %for.body.epil
-; CHECK-T2: ldrsh{{.*}}, #2]!
-; CHECK-T2: ldrsh{{.*}}, #2]!
-; CHECK-T2: str{{.*}}, #4]!
+; CHECK-T2: ldrsh{{.*}}, #2
+; CHECK-T2: ldrsh{{.*}}, #2
+; CHECK-T2: str{{.*}}, #4
define void @mul_16x16(ptr nocapture readonly %A, ptr nocapture readonly %B, ptr nocapture %C, i32 %N) {
entry:
@@ -503,15 +503,15 @@ for.body: ; preds = %for.body, %for.body
; CHECK-LABEL: mul_8x8_2d
; CHECK: @ %for.body4.us
-; CHECK-DEFAULT: ldr{{.*}}, #16]!
-; CHECK-DEFAULT: ldrb{{.*}}, #4]!
+; CHECK-DEFAULT: ldr{{.*}}, #-12]
+; CHECK-DEFAULT: ldrb{{.*}}, #4
; DISABLED-NOT: ldr{{.*}}]!
; DISABLED-NOT: str{{.*}}]!
; CHECK-T2: @ %for.body4.us.epil
-; CHECK-T2: ldrb{{.*}}, #1]!
-; CHECK-T2: ldr{{.*}}, #4]!
+; CHECK-T2: ldrb{{.*}}, #1
+; CHECK-T2: ldr{{.*}}, [r{{[0-9]+}}]
define void @mul_8x8_2d(ptr nocapture readonly %A, ptr nocapture readonly %B, ptr nocapture readonly %C, i32 %N, i32 %M) {
entry:
@@ -622,15 +622,15 @@ for.cond.cleanup: ; preds = %for.cond1.for.cond.
; CHECK-LABEL: mul_16x16_2d
; CHECK: @ %for.body4.us
-; CHECK-DEFAULT: ldr{{.*}}, #16]!
-; CHECK-DEFAULT: ldrsh{{.*}}, #8]!
+; CHECK-DEFAULT: ldr{{.*}}, [r{{[0-9]+}}]
+; CHECK-DEFAULT: ldrsh{{.*}}, #8
; DISABLED-NOT: ldr{{.*}}]!
; DISABLED-NOT: str{{.*}}]!
; CHECK-T2: @ %for.body4.us.epil
-; CHECK-T2: ldrsh{{.*}}, #2]!
-; CHECK-T2: ldr{{.*}}, #4]!
+; CHECK-T2: ldrsh{{.*}}, #2
+; CHECK-T2: ldr{{.*}}, [r{{[0-9]+}}]
define void @mul_16x16_2d(ptr nocapture readonly %A, ptr nocapture readonly %B, ptr nocapture readonly %C, i32 %N, i32 %M) {
entry:
@@ -735,7 +735,7 @@ for.cond.cleanup: ; preds = %for.cond1.for.cond.
; TODO: Both input arrays could use pre-indexed loads.
; TODO: pre-indexed stores.
-; CHECK-DEFAULT: ldrb{{.*}}, #4]!
+; CHECK-DEFAULT: ldrb{{.*}}, #4
; CHECK-DEFAULT-NOT: ldr{{.*}}]!
; CHECK-DEFAULT-NOT: str{{.*}}]!
@@ -747,7 +747,7 @@ for.cond.cleanup: ; preds = %for.cond1.for.cond.
; DISABLED-NOT: str{{.*}}]!
; CHECK-T2: @ %for.body4.us.epil
-; CHECK-T2: ldrb{{.*}}, #1]!
+; CHECK-T2: ldrb{{.*}}, #1
define void @mac_8x8_2d(ptr nocapture readonly %A, ptr nocapture readonly %B, ptr nocapture %C, i32 %N, i32 %M) {
entry:
@@ -852,7 +852,7 @@ for.cond.cleanup: ; preds = %for.cond1.for.cond.
; CHECK: @ %for.body4.us
; TODO: pre-indexed loads for both input arrays.
-; CHECK-DEFAULT: ldrsh{{.*}}, #8]!
+; CHECK-DEFAULT: ldrsh{{.*}}, #-6]
; CHECK-DEFAULT-NOT: ldr{{.*}}]!
; TODO: increased complexity should lead to better codegen.
@@ -861,7 +861,7 @@ for.cond.cleanup: ; preds = %for.cond1.for.cond.
; DISABLED-NOT: ldr{{.*}}]!
; CHECK-T2: @ %for.body4.us.epil
-; CHECK-T2: ldrsh{{.*}}, #2]!
+; CHECK-T2: ldrsh{{.*}}, #2
define void @mac_16x16_2d(ptr nocapture readonly %A, ptr nocapture readonly %B, ptr nocapture %C, i32 %N, i32 %M) {
entry:
@@ -1043,14 +1043,14 @@ for.body: ; preds = %for.body, %for.body
; CHECK-DEFAULT-NOT: ldr{{.*}}]!
; CHECK-DEFAULT-NOT: str{{.*}}]!
-; CHECK-COMPLEX: ldr{{.*}}, #16]!
-; CHECK-COMPLEX: ldr{{.*}}, #16]!
-; CHECK-COMPLEX: str{{.*}}, #16]!
+; CHECK-COMPLEX: ldr{{.*}}, #-12]
+; CHECK-COMPLEX: ldr{{.*}}, #-12]
+; CHECK-COMPLEX: str{{.*}}, #-12]
; CHECK-T2: @ %for.body.epil
-; CHECK-T2: ldr{{.*}}, #4]!
-; CHECK-T2: ldr{{.*}}, #4]!
-; CHECK-T2: str{{.*}}, #4]!
+; CHECK-T2: ldr{{.*}}, #4
+; CHECK-T2: ldr{{.*}}, #4
+; CHECK-T2: str{{.*}}, #4
define void @mul32x32_forwards(ptr nocapture %a, ptr nocapture readonly %b, ptr nocapture readonly %c, i32 %N) {
entry:
diff --git a/llvm/test/CodeGen/Thumb/mvn.ll b/llvm/test/CodeGen/Thumb/mvn.ll
index a3656358beb8f..1ad5d424e8727 100644
--- a/llvm/test/CodeGen/Thumb/mvn.ll
+++ b/llvm/test/CodeGen/Thumb/mvn.ll
@@ -2,11 +2,6 @@
; RUN: llc -mtriple=thumbv6m-eabi -asm-verbose=false %s -o - | FileCheck %s
define void @test8(ptr %a) {
-; CHECK-LABEL: test8:
-; CHECK: ldrb r1, [r0]
-; CHECK-NEXT: mvns r1, r1
-; CHECK-NEXT: strb r1, [r0]
-; CHECK-NEXT: bx lr
%x = load i8, ptr %a
%xn = xor i8 %x, -1
store i8 %xn, ptr %a
@@ -14,14 +9,6 @@ define void @test8(ptr %a) {
}
define void @test8_2(ptr %a, ptr %b) {
-; CHECK-LABEL: test8_2:
-; CHECK: ldrb r2, [r1]
-; CHECK-NEXT: ldrb r3, [r0]
-; CHECK-NEXT: mvns r3, r3
-; CHECK-NEXT: strb r3, [r0]
-; CHECK-NEXT: mvns r0, r2
-; CHECK-NEXT: strb r0, [r1]
-; CHECK-NEXT: bx lr
%x = load i8, ptr %a
%y = load i8, ptr %b
%xn = xor i8 %x, -1
@@ -32,16 +19,6 @@ define void @test8_2(ptr %a, ptr %b) {
}
define void @loop8(ptr %a) {
-; CHECK-LABEL: loop8:
-; CHECK: movs r1, #0
-; CHECK-NEXT: .LBB2_1:
-; CHECK-NEXT: ldrb r2, [r0, r1]
-; CHECK-NEXT: mvns r2, r2
-; CHECK-NEXT: strb r2, [r0, r1]
-; CHECK-NEXT: adds r1, r1, #1
-; CHECK-NEXT: cmp r1, #10
-; CHECK-NEXT: bne .LBB2_1
-; CHECK-NEXT: bx lr
entry:
br label %for.body
@@ -60,21 +37,6 @@ for.cond.cleanup:
}
define void @loop8_2(ptr %a, ptr %b) {
-; CHECK-LABEL: loop8_2:
-; CHECK: .save {r4, lr}
-; CHECK-NEXT: push {r4, lr}
-; CHECK-NEXT: movs r2, #0
-; CHECK-NEXT: .LBB3_1:
-; CHECK-NEXT: ldrb r3, [r1, r2]
-; CHECK-NEXT: ldrb r4, [r0, r2]
-; CHECK-NEXT: mvns r4, r4
-; CHECK-NEXT: strb r4, [r0, r2]
-; CHECK-NEXT: mvns r3, r3
-; CHECK-NEXT: strb r3, [r1, r2]
-; CHECK-NEXT: adds r2, r2, #1
-; CHECK-NEXT: cmp r2, #10
-; CHECK-NEXT: bne .LBB3_1
-; CHECK-NEXT: pop {r4, pc}
entry:
br label %for.body
@@ -97,11 +59,6 @@ for.cond.cleanup:
}
define void @test32(ptr %a) {
-; CHECK-LABEL: test32:
-; CHECK: ldr r1, [r0]
-; CHECK-NEXT: mvns r1, r1
-; CHECK-NEXT: str r1, [r0]
-; CHECK-NEXT: bx lr
%x = load i32, ptr %a
%xn = xor i32 %x, -1
store i32 %xn, ptr %a
@@ -109,14 +66,6 @@ define void @test32(ptr %a) {
}
define void @test32_2(ptr %a, ptr %b) {
-; CHECK-LABEL: test32_2:
-; CHECK: ldr r2, [r1]
-; CHECK-NEXT: ldr r3, [r0]
-; CHECK-NEXT: mvns r3, r3
-; CHECK-NEXT: str r3, [r0]
-; CHECK-NEXT: mvns r0, r2
-; CHECK-NEXT: str r0, [r1]
-; CHECK-NEXT: bx lr
%x = load i32, ptr %a
%y = load i32, ptr %b
%xn = xor i32 %x, -1
@@ -127,16 +76,6 @@ define void @test32_2(ptr %a, ptr %b) {
}
define void @loop32(ptr %a) {
-; CHECK-LABEL: loop32:
-; CHECK: movs r1, #0
-; CHECK-NEXT: .LBB6_1:
-; CHECK-NEXT: ldr r2, [r0, r1]
-; CHECK-NEXT: mvns r2, r2
-; CHECK-NEXT: str r2, [r0, r1]
-; CHECK-NEXT: adds r1, r1, #4
-; CHECK-NEXT: cmp r1, #40
-; CHECK-NEXT: bne .LBB6_1
-; CHECK-NEXT: bx lr
entry:
br label %for.body
@@ -155,21 +94,6 @@ for.cond.cleanup:
}
define void @loop32_2(ptr %a, ptr %b) {
-; CHECK-LABEL: loop32_2:
-; CHECK: .save {r4, lr}
-; CHECK-NEXT: push {r4, lr}
-; CHECK-NEXT: movs r2, #0
-; CHECK-NEXT: .LBB7_1:
-; CHECK-NEXT: ldr r3, [r1, r2]
-; CHECK-NEXT: ldr r4, [r0, r2]
-; CHECK-NEXT: mvns r4, r4
-; CHECK-NEXT: str r4, [r0, r2]
-; CHECK-NEXT: mvns r3, r3
-; CHECK-NEXT: str r3, [r1, r2]
-; CHECK-NEXT: adds r2, r2, #4
-; CHECK-NEXT: cmp r2, #40
-; CHECK-NEXT: bne .LBB7_1
-; CHECK-NEXT: pop {r4, pc}
entry:
br label %for.body
@@ -193,30 +117,11 @@ for.cond.cleanup:
define void @test128(ptr %a) {
-; CHECK-LABEL: test128:
-; CHECK: ldr r1, [r0, #8]
-; CHECK-NEXT: ldr r2, .LCPI8_0
-; CHECK-NEXT: eors r2, r1
-; CHECK-NEXT: str r2, [r0, #8]
-; CHECK-NEXT: ldr r1, [r0]
-; CHECK-NEXT: ldr r2, .LCPI8_1
-; CHECK-NEXT: eors r2, r1
-; CHECK-NEXT: str r2, [r0]
-; CHECK-NEXT: ldr r1, [r0, #4]
-; CHECK-NEXT: ldr r2, .LCPI8_2
-; CHECK-NEXT: eors r2, r1
-; CHECK-NEXT: str r2, [r0, #4]
-; CHECK-NEXT: bx lr
-; CHECK-NEXT: .p2align 2
-; CHECK-NEXT: .LCPI8_0:
-; CHECK-NEXT: .long 6692605
-; CHECK-NEXT: .LCPI8_1:
-; CHECK-NEXT: .long 2080661269
-; CHECK-NEXT: .LCPI8_2:
-; CHECK-NEXT: .long 4075008415
%x = load i128, ptr %a
%xn = xor i128 %x, 123456789123456789123456789
store i128 %xn, ptr %a
ret void
}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK: {{.*}}
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/loop-guards.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/loop-guards.ll
index 342b07e2a19d5..cc68881cb5c64 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/loop-guards.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/loop-guards.ll
@@ -182,10 +182,9 @@ if.end: ; preds = %do.body, %entry
; CHECK: ne_trip_count
; CHECK: body:
; CHECK: bb.0.entry:
-; CHECK: $lr = t2WLS killed renamable $r3, %bb.3
-; CHECK: bb.1.do.body.preheader:
-; CHECK: bb.2.do.body:
-; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.2
+; CHECK: $lr = t2WLS killed renamable $r3, %bb.2
+; CHECK: bb.1.do.body:
+; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.1
define void @ne_trip_count(i1 zeroext %t1, ptr nocapture %a, ptr nocapture readonly %b, i32 %N) {
entry:
br label %do.body.preheader
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/lsr-le-cost.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/lsr-le-cost.ll
index 037b272f60ec7..04c4e94ce726a 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/lsr-le-cost.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/lsr-le-cost.ll
@@ -7,54 +7,29 @@
; matter the preferred addressing mode.
define void @test(ptr %dst, i32 %n) {
-; CHECK-NOMVE-LABEL: test:
-; CHECK-NOMVE: @ %bb.0: @ %entry
-; CHECK-NOMVE-NEXT: push {r7, lr}
-; CHECK-NOMVE-NEXT: add.w r0, r0, r1, lsl #1
-; CHECK-NOMVE-NEXT: movs r2, #0
-; CHECK-NOMVE-NEXT: sub.w r12, r0, #2
-; CHECK-NOMVE-NEXT: movs r3, #0
-; CHECK-NOMVE-NEXT: .LBB0_1: @ %outer_loop
-; CHECK-NOMVE-NEXT: @ =>This Loop Header: Depth=1
-; CHECK-NOMVE-NEXT: @ Child Loop BB0_2 Depth 2
-; CHECK-NOMVE-NEXT: dls lr, r1
-; CHECK-NOMVE-NEXT: mov r0, r12
-; CHECK-NOMVE-NEXT: .LBB0_2: @ %inner_loop
-; CHECK-NOMVE-NEXT: @ Parent Loop BB0_1 Depth=1
-; CHECK-NOMVE-NEXT: @ => This Inner Loop Header: Depth=2
-; CHECK-NOMVE-NEXT: strh r2, [r0, #2]!
-; CHECK-NOMVE-NEXT: le lr, .LBB0_2
-; CHECK-NOMVE-NEXT: @ %bb.3: @ %outer_loop_end
-; CHECK-NOMVE-NEXT: @ in Loop: Header=BB0_1 Depth=1
-; CHECK-NOMVE-NEXT: adds r3, #1
-; CHECK-NOMVE-NEXT: cmp r3, r1
-; CHECK-NOMVE-NEXT: it eq
-; CHECK-NOMVE-NEXT: popeq {r7, pc}
-; CHECK-NOMVE-NEXT: b .LBB0_1
-;
-; CHECK-MVE-LABEL: test:
-; CHECK-MVE: @ %bb.0: @ %entry
-; CHECK-MVE-NEXT: push {r7, lr}
-; CHECK-MVE-NEXT: add.w r12, r0, r1, lsl #1
-; CHECK-MVE-NEXT: movs r2, #0
-; CHECK-MVE-NEXT: movs r3, #0
-; CHECK-MVE-NEXT: .LBB0_1: @ %outer_loop
-; CHECK-MVE-NEXT: @ =>This Loop Header: Depth=1
-; CHECK-MVE-NEXT: @ Child Loop BB0_2 Depth 2
-; CHECK-MVE-NEXT: dls lr, r1
-; CHECK-MVE-NEXT: mov r0, r12
-; CHECK-MVE-NEXT: .LBB0_2: @ %inner_loop
-; CHECK-MVE-NEXT: @ Parent Loop BB0_1 Depth=1
-; CHECK-MVE-NEXT: @ => This Inner Loop Header: Depth=2
-; CHECK-MVE-NEXT: strh r2, [r0], #2
-; CHECK-MVE-NEXT: le lr, .LBB0_2
-; CHECK-MVE-NEXT: @ %bb.3: @ %outer_loop_end
-; CHECK-MVE-NEXT: @ in Loop: Header=BB0_1 Depth=1
-; CHECK-MVE-NEXT: adds r3, #1
-; CHECK-MVE-NEXT: cmp r3, r1
-; CHECK-MVE-NEXT: it eq
-; CHECK-MVE-NEXT: popeq {r7, pc}
-; CHECK-MVE-NEXT: b .LBB0_1
+; CHECK-LABEL: test:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: add.w r12, r0, r1, lsl #1
+; CHECK-NEXT: movs r2, #0
+; CHECK-NEXT: movs r3, #0
+; CHECK-NEXT: .LBB0_1: @ %outer_loop
+; CHECK-NEXT: @ =>This Loop Header: Depth=1
+; CHECK-NEXT: @ Child Loop BB0_2 Depth 2
+; CHECK-NEXT: dls lr, r1
+; CHECK-NEXT: mov r0, r12
+; CHECK-NEXT: .LBB0_2: @ %inner_loop
+; CHECK-NEXT: @ Parent Loop BB0_1 Depth=1
+; CHECK-NEXT: @ => This Inner Loop Header: Depth=2
+; CHECK-NEXT: strh r2, [r0], #2
+; CHECK-NEXT: le lr, .LBB0_2
+; CHECK-NEXT: @ %bb.3: @ %outer_loop_end
+; CHECK-NEXT: @ in Loop: Header=BB0_1 Depth=1
+; CHECK-NEXT: adds r3, #1
+; CHECK-NEXT: cmp r3, r1
+; CHECK-NEXT: it eq
+; CHECK-NEXT: popeq {r7, pc}
+; CHECK-NEXT: b .LBB0_1
entry:
br label %outer_loop
@@ -136,61 +111,34 @@ exit:
declare void @otherfn()
define void @test_no_le(ptr %dst, i32 %n) {
-; CHECK-NOMVE-LABEL: test_no_le:
-; CHECK-NOMVE: @ %bb.0: @ %entry
-; CHECK-NOMVE-NEXT: push.w {r4, r5, r6, r7, r8, lr}
-; CHECK-NOMVE-NEXT: add.w r5, r0, r1, lsl #1
-; CHECK-NOMVE-NEXT: mov r4, r1
-; CHECK-NOMVE-NEXT: movs r6, #0
-; CHECK-NOMVE-NEXT: mov.w r8, #0
-; CHECK-NOMVE-NEXT: .LBB2_1: @ %outer_loop
-; CHECK-NOMVE-NEXT: @ =>This Loop Header: Depth=1
-; CHECK-NOMVE-NEXT: @ Child Loop BB2_2 Depth 2
-; CHECK-NOMVE-NEXT: movs r7, #0
-; CHECK-NOMVE-NEXT: .LBB2_2: @ %inner_loop
-; CHECK-NOMVE-NEXT: @ Parent Loop BB2_1 Depth=1
-; CHECK-NOMVE-NEXT: @ => This Inner Loop Header: Depth=2
-; CHECK-NOMVE-NEXT: bl otherfn
-; CHECK-NOMVE-NEXT: strh.w r6, [r5, r7, lsl #1]
-; CHECK-NOMVE-NEXT: adds r7, #1
-; CHECK-NOMVE-NEXT: cmp r4, r7
-; CHECK-NOMVE-NEXT: bne .LBB2_2
-; CHECK-NOMVE-NEXT: @ %bb.3: @ %outer_loop_end
-; CHECK-NOMVE-NEXT: @ in Loop: Header=BB2_1 Depth=1
-; CHECK-NOMVE-NEXT: add.w r8, r8, #1
-; CHECK-NOMVE-NEXT: cmp r8, r4
-; CHECK-NOMVE-NEXT: bne .LBB2_1
-; CHECK-NOMVE-NEXT: @ %bb.4: @ %exit
-; CHECK-NOMVE-NEXT: pop.w {r4, r5, r6, r7, r8, pc}
-;
-; CHECK-MVE-LABEL: test_no_le:
-; CHECK-MVE: @ %bb.0: @ %entry
-; CHECK-MVE-NEXT: push.w {r4, r5, r6, r7, r8, r9, lr}
-; CHECK-MVE-NEXT: sub sp, #4
-; CHECK-MVE-NEXT: add.w r8, r0, r1, lsl #1
-; CHECK-MVE-NEXT: mov r9, r1
-; CHECK-MVE-NEXT: movs r6, #0
-; CHECK-MVE-NEXT: movs r7, #0
-; CHECK-MVE-NEXT: .LBB2_1: @ %outer_loop
-; CHECK-MVE-NEXT: @ =>This Loop Header: Depth=1
-; CHECK-MVE-NEXT: @ Child Loop BB2_2 Depth 2
-; CHECK-MVE-NEXT: mov r5, r8
-; CHECK-MVE-NEXT: mov r4, r9
-; CHECK-MVE-NEXT: .LBB2_2: @ %inner_loop
-; CHECK-MVE-NEXT: @ Parent Loop BB2_1 Depth=1
-; CHECK-MVE-NEXT: @ => This Inner Loop Header: Depth=2
-; CHECK-MVE-NEXT: bl otherfn
-; CHECK-MVE-NEXT: strh r6, [r5], #2
-; CHECK-MVE-NEXT: subs r4, #1
-; CHECK-MVE-NEXT: bne .LBB2_2
-; CHECK-MVE-NEXT: @ %bb.3: @ %outer_loop_end
-; CHECK-MVE-NEXT: @ in Loop: Header=BB2_1 Depth=1
-; CHECK-MVE-NEXT: adds r7, #1
-; CHECK-MVE-NEXT: cmp r7, r9
-; CHECK-MVE-NEXT: bne .LBB2_1
-; CHECK-MVE-NEXT: @ %bb.4: @ %exit
-; CHECK-MVE-NEXT: add sp, #4
-; CHECK-MVE-NEXT: pop.w {r4, r5, r6, r7, r8, r9, pc}
+; CHECK-LABEL: test_no_le:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, lr}
+; CHECK-NEXT: sub sp, #4
+; CHECK-NEXT: add.w r8, r0, r1, lsl #1
+; CHECK-NEXT: mov r9, r1
+; CHECK-NEXT: movs r6, #0
+; CHECK-NEXT: movs r7, #0
+; CHECK-NEXT: .LBB2_1: @ %outer_loop
+; CHECK-NEXT: @ =>This Loop Header: Depth=1
+; CHECK-NEXT: @ Child Loop BB2_2 Depth 2
+; CHECK-NEXT: mov r5, r8
+; CHECK-NEXT: mov r4, r9
+; CHECK-NEXT: .LBB2_2: @ %inner_loop
+; CHECK-NEXT: @ Parent Loop BB2_1 Depth=1
+; CHECK-NEXT: @ => This Inner Loop Header: Depth=2
+; CHECK-NEXT: bl otherfn
+; CHECK-NEXT: strh r6, [r5], #2
+; CHECK-NEXT: subs r4, #1
+; CHECK-NEXT: bne .LBB2_2
+; CHECK-NEXT: @ %bb.3: @ %outer_loop_end
+; CHECK-NEXT: @ in Loop: Header=BB2_1 Depth=1
+; CHECK-NEXT: adds r7, #1
+; CHECK-NEXT: cmp r7, r9
+; CHECK-NEXT: bne .LBB2_1
+; CHECK-NEXT: @ %bb.4: @ %exit
+; CHECK-NEXT: add sp, #4
+; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, pc}
entry:
br label %outer_loop
@@ -218,61 +166,34 @@ exit:
}
define void @test_no_le_optsize(ptr %dst, i32 %n) optsize {
-; CHECK-NOMVE-LABEL: test_no_le_optsize:
-; CHECK-NOMVE: @ %bb.0: @ %entry
-; CHECK-NOMVE-NEXT: push.w {r4, r5, r6, r7, r8, lr}
-; CHECK-NOMVE-NEXT: add.w r5, r0, r1, lsl #1
-; CHECK-NOMVE-NEXT: mov r4, r1
-; CHECK-NOMVE-NEXT: movs r6, #0
-; CHECK-NOMVE-NEXT: mov.w r8, #0
-; CHECK-NOMVE-NEXT: .LBB3_1: @ %outer_loop
-; CHECK-NOMVE-NEXT: @ =>This Loop Header: Depth=1
-; CHECK-NOMVE-NEXT: @ Child Loop BB3_2 Depth 2
-; CHECK-NOMVE-NEXT: movs r7, #0
-; CHECK-NOMVE-NEXT: .LBB3_2: @ %inner_loop
-; CHECK-NOMVE-NEXT: @ Parent Loop BB3_1 Depth=1
-; CHECK-NOMVE-NEXT: @ => This Inner Loop Header: Depth=2
-; CHECK-NOMVE-NEXT: bl otherfn
-; CHECK-NOMVE-NEXT: strh.w r6, [r5, r7, lsl #1]
-; CHECK-NOMVE-NEXT: adds r7, #1
-; CHECK-NOMVE-NEXT: cmp r4, r7
-; CHECK-NOMVE-NEXT: bne .LBB3_2
-; CHECK-NOMVE-NEXT: @ %bb.3: @ %outer_loop_end
-; CHECK-NOMVE-NEXT: @ in Loop: Header=BB3_1 Depth=1
-; CHECK-NOMVE-NEXT: add.w r8, r8, #1
-; CHECK-NOMVE-NEXT: cmp r8, r4
-; CHECK-NOMVE-NEXT: bne .LBB3_1
-; CHECK-NOMVE-NEXT: @ %bb.4: @ %exit
-; CHECK-NOMVE-NEXT: pop.w {r4, r5, r6, r7, r8, pc}
-;
-; CHECK-MVE-LABEL: test_no_le_optsize:
-; CHECK-MVE: @ %bb.0: @ %entry
-; CHECK-MVE-NEXT: push.w {r4, r5, r6, r7, r8, r9, lr}
-; CHECK-MVE-NEXT: sub sp, #4
-; CHECK-MVE-NEXT: add.w r8, r0, r1, lsl #1
-; CHECK-MVE-NEXT: mov r9, r1
-; CHECK-MVE-NEXT: movs r6, #0
-; CHECK-MVE-NEXT: movs r7, #0
-; CHECK-MVE-NEXT: .LBB3_1: @ %outer_loop
-; CHECK-MVE-NEXT: @ =>This Loop Header: Depth=1
-; CHECK-MVE-NEXT: @ Child Loop BB3_2 Depth 2
-; CHECK-MVE-NEXT: mov r5, r8
-; CHECK-MVE-NEXT: mov r4, r9
-; CHECK-MVE-NEXT: .LBB3_2: @ %inner_loop
-; CHECK-MVE-NEXT: @ Parent Loop BB3_1 Depth=1
-; CHECK-MVE-NEXT: @ => This Inner Loop Header: Depth=2
-; CHECK-MVE-NEXT: bl otherfn
-; CHECK-MVE-NEXT: strh r6, [r5], #2
-; CHECK-MVE-NEXT: subs r4, #1
-; CHECK-MVE-NEXT: bne .LBB3_2
-; CHECK-MVE-NEXT: @ %bb.3: @ %outer_loop_end
-; CHECK-MVE-NEXT: @ in Loop: Header=BB3_1 Depth=1
-; CHECK-MVE-NEXT: adds r7, #1
-; CHECK-MVE-NEXT: cmp r7, r9
-; CHECK-MVE-NEXT: bne .LBB3_1
-; CHECK-MVE-NEXT: @ %bb.4: @ %exit
-; CHECK-MVE-NEXT: add sp, #4
-; CHECK-MVE-NEXT: pop.w {r4, r5, r6, r7, r8, r9, pc}
+; CHECK-LABEL: test_no_le_optsize:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, lr}
+; CHECK-NEXT: sub sp, #4
+; CHECK-NEXT: add.w r8, r0, r1, lsl #1
+; CHECK-NEXT: mov r9, r1
+; CHECK-NEXT: movs r6, #0
+; CHECK-NEXT: movs r7, #0
+; CHECK-NEXT: .LBB3_1: @ %outer_loop
+; CHECK-NEXT: @ =>This Loop Header: Depth=1
+; CHECK-NEXT: @ Child Loop BB3_2 Depth 2
+; CHECK-NEXT: mov r5, r8
+; CHECK-NEXT: mov r4, r9
+; CHECK-NEXT: .LBB3_2: @ %inner_loop
+; CHECK-NEXT: @ Parent Loop BB3_1 Depth=1
+; CHECK-NEXT: @ => This Inner Loop Header: Depth=2
+; CHECK-NEXT: bl otherfn
+; CHECK-NEXT: strh r6, [r5], #2
+; CHECK-NEXT: subs r4, #1
+; CHECK-NEXT: bne .LBB3_2
+; CHECK-NEXT: @ %bb.3: @ %outer_loop_end
+; CHECK-NEXT: @ in Loop: Header=BB3_1 Depth=1
+; CHECK-NEXT: adds r7, #1
+; CHECK-NEXT: cmp r7, r9
+; CHECK-NEXT: bne .LBB3_1
+; CHECK-NEXT: @ %bb.4: @ %exit
+; CHECK-NEXT: add sp, #4
+; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, pc}
entry:
br label %outer_loop
@@ -298,3 +219,6 @@ outer_loop_end:
exit:
ret void
}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK-MVE: {{.*}}
+; CHECK-NOMVE: {{.*}}
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/no-dec-le-simple.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/no-dec-le-simple.ll
index 8e8934b6e9599..10aee24b2261e 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/no-dec-le-simple.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/no-dec-le-simple.ll
@@ -4,16 +4,15 @@
define void @cbz_exit(ptr %in, ptr %res) {
; CHECK-LABEL: cbz_exit:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: subs r2, r0, #4
-; CHECK-NEXT: mov.w r0, #-1
+; CHECK-NEXT: mov.w r2, #-1
; CHECK-NEXT: .LBB0_1: @ %loop
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: ldr r3, [r2, #4]!
-; CHECK-NEXT: adds r0, #1
+; CHECK-NEXT: ldr r3, [r0], #4
+; CHECK-NEXT: adds r2, #1
; CHECK-NEXT: cbz r3, .LBB0_2
; CHECK-NEXT: le .LBB0_1
; CHECK-NEXT: .LBB0_2: @ %exit
-; CHECK-NEXT: str r0, [r1]
+; CHECK-NEXT: str r2, [r1]
; CHECK-NEXT: bx lr
entry:
br label %loop
@@ -34,16 +33,15 @@ exit:
define void @cbnz_exit(ptr %in, ptr %res) {
; CHECK-LABEL: cbnz_exit:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: subs r2, r0, #4
-; CHECK-NEXT: mov.w r0, #-1
+; CHECK-NEXT: mov.w r2, #-1
; CHECK-NEXT: .LBB1_1: @ %loop
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: ldr r3, [r2, #4]!
-; CHECK-NEXT: adds r0, #1
+; CHECK-NEXT: ldr r3, [r0], #4
+; CHECK-NEXT: adds r2, #1
; CHECK-NEXT: cbnz r3, .LBB1_2
; CHECK-NEXT: le .LBB1_1
; CHECK-NEXT: .LBB1_2: @ %exit
-; CHECK-NEXT: str r0, [r1]
+; CHECK-NEXT: str r2, [r1]
; CHECK-NEXT: bx lr
entry:
br label %loop
@@ -64,17 +62,16 @@ exit:
define void @cbnz_exit_too_large(ptr %in, ptr %res) {
; CHECK-LABEL: cbnz_exit_too_large:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: subs r2, r0, #4
-; CHECK-NEXT: mov.w r0, #-1
+; CHECK-NEXT: mov.w r2, #-1
; CHECK-NEXT: .LBB2_1: @ %loop
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: ldr r12, [r2, #4]!
-; CHECK-NEXT: .zero 4090
-; CHECK-NEXT: adds r0, #1
+; CHECK-NEXT: ldr r12, [r0], #4
+; CHECK-NEXT: adds r2, #1
; CHECK-NEXT: cmp.w r12, #0
+; CHECK-NEXT: .zero 4090
; CHECK-NEXT: beq.w .LBB2_1
; CHECK-NEXT: @ %bb.2: @ %exit
-; CHECK-NEXT: str r0, [r1]
+; CHECK-NEXT: str r2, [r1]
; CHECK-NEXT: bx lr
entry:
br label %loop
@@ -96,16 +93,15 @@ exit:
define void @cbz_exit_minsize(ptr %in, ptr %res) #0 {
; CHECK-LABEL: cbz_exit_minsize:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: movs r2, #0
+; CHECK-NEXT: mov.w r2, #-1
; CHECK-NEXT: .LBB3_1: @ %loop
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: ldr.w r3, [r0, r2, lsl #2]
; CHECK-NEXT: adds r2, #1
+; CHECK-NEXT: ldm r0!, {r3}
; CHECK-NEXT: cmp r3, #0
; CHECK-NEXT: bne .LBB3_1
; CHECK-NEXT: @ %bb.2: @ %exit
-; CHECK-NEXT: subs r0, r2, #1
-; CHECK-NEXT: str r0, [r1]
+; CHECK-NEXT: str r2, [r1]
; CHECK-NEXT: bx lr
entry:
br label %loop
@@ -126,16 +122,15 @@ exit:
define void @cbnz_exit_minsize(ptr %in, ptr %res) #0 {
; CHECK-LABEL: cbnz_exit_minsize:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: movs r2, #0
+; CHECK-NEXT: mov.w r2, #-1
; CHECK-NEXT: .LBB4_1: @ %loop
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: ldr.w r3, [r0, r2, lsl #2]
; CHECK-NEXT: adds r2, #1
+; CHECK-NEXT: ldm r0!, {r3}
; CHECK-NEXT: cmp r3, #0
; CHECK-NEXT: beq .LBB4_1
; CHECK-NEXT: @ %bb.2: @ %exit
-; CHECK-NEXT: subs r0, r2, #1
-; CHECK-NEXT: str r0, [r1]
+; CHECK-NEXT: str r2, [r1]
; CHECK-NEXT: bx lr
entry:
br label %loop
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/sibling-loops.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/sibling-loops.ll
index c6158cb611a70..508595b0835f8 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/sibling-loops.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/sibling-loops.ll
@@ -9,49 +9,46 @@ define arm_aapcs_vfpcc void @test(ptr noalias nocapture readonly %off, ptr noali
; CHECK-NEXT: bxlt lr
; CHECK-NEXT: .LBB0_1: @ %for.cond1.preheader.us.preheader
; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, lr}
-; CHECK-NEXT: sub.w r9, r1, #2
-; CHECK-NEXT: sub.w r8, r0, #2
-; CHECK-NEXT: subs r5, r2, #2
-; CHECK-NEXT: mov r10, r3
+; CHECK-NEXT: mov r8, r3
; CHECK-NEXT: lsl.w r12, r3, #1
-; CHECK-NEXT: movs r0, #0
-; CHECK-NEXT: mov r4, r9
+; CHECK-NEXT: mov.w r9, #0
+; CHECK-NEXT: mov r10, r1
; CHECK-NEXT: .LBB0_2: @ %for.cond1.preheader.us
; CHECK-NEXT: @ =>This Loop Header: Depth=1
; CHECK-NEXT: @ Child Loop BB0_3 Depth 2
; CHECK-NEXT: @ Child Loop BB0_5 Depth 2
-; CHECK-NEXT: dls lr, r10
-; CHECK-NEXT: mov r6, r8
-; CHECK-NEXT: mov r7, r9
-; CHECK-NEXT: mov r2, r4
+; CHECK-NEXT: dls lr, r8
+; CHECK-NEXT: mov r6, r0
+; CHECK-NEXT: mov r7, r1
+; CHECK-NEXT: mov r5, r10
; CHECK-NEXT: .LBB0_3: @ %for.body4.us
; CHECK-NEXT: @ Parent Loop BB0_2 Depth=1
; CHECK-NEXT: @ => This Inner Loop Header: Depth=2
-; CHECK-NEXT: ldrh r1, [r6, #2]!
-; CHECK-NEXT: ldrh r3, [r7, #2]!
-; CHECK-NEXT: add r1, r3
-; CHECK-NEXT: strh r1, [r2, #2]!
+; CHECK-NEXT: ldrh r3, [r6], #2
+; CHECK-NEXT: ldrh r4, [r7], #2
+; CHECK-NEXT: add r3, r4
+; CHECK-NEXT: strh r3, [r5], #2
; CHECK-NEXT: le lr, .LBB0_3
; CHECK-NEXT: @ %bb.4: @ %for.body15.us.preheader
; CHECK-NEXT: @ in Loop: Header=BB0_2 Depth=1
-; CHECK-NEXT: dls lr, r10
-; CHECK-NEXT: mov r6, r8
-; CHECK-NEXT: mov r7, r9
-; CHECK-NEXT: mov r2, r5
+; CHECK-NEXT: dls lr, r8
+; CHECK-NEXT: mov r6, r0
+; CHECK-NEXT: mov r7, r1
+; CHECK-NEXT: mov r5, r2
; CHECK-NEXT: .LBB0_5: @ %for.body15.us
; CHECK-NEXT: @ Parent Loop BB0_2 Depth=1
; CHECK-NEXT: @ => This Inner Loop Header: Depth=2
-; CHECK-NEXT: ldrh r1, [r6, #2]!
-; CHECK-NEXT: ldrh r3, [r7, #2]!
-; CHECK-NEXT: add r1, r3
-; CHECK-NEXT: strh r1, [r2, #2]!
+; CHECK-NEXT: ldrh r3, [r6], #2
+; CHECK-NEXT: ldrh r4, [r7], #2
+; CHECK-NEXT: add r3, r4
+; CHECK-NEXT: strh r3, [r5], #2
; CHECK-NEXT: le lr, .LBB0_5
; CHECK-NEXT: @ %bb.6: @ %for.cond.cleanup14.us
; CHECK-NEXT: @ in Loop: Header=BB0_2 Depth=1
-; CHECK-NEXT: adds r0, #1
-; CHECK-NEXT: add r5, r12
-; CHECK-NEXT: add r4, r12
-; CHECK-NEXT: cmp r0, r10
+; CHECK-NEXT: add.w r9, r9, #1
+; CHECK-NEXT: add r2, r12
+; CHECK-NEXT: add r10, r12
+; CHECK-NEXT: cmp r9, r8
; CHECK-NEXT: bne .LBB0_2
; CHECK-NEXT: @ %bb.7:
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, lr}
diff --git a/llvm/test/CodeGen/Thumb2/pacbti-m-varargs-1.ll b/llvm/test/CodeGen/Thumb2/pacbti-m-varargs-1.ll
index 2b7abfabf7035..8766fefb8de40 100644
--- a/llvm/test/CodeGen/Thumb2/pacbti-m-varargs-1.ll
+++ b/llvm/test/CodeGen/Thumb2/pacbti-m-varargs-1.ll
@@ -33,13 +33,13 @@ define hidden i32 @_Z1fiz(i32 %n, ...) local_unnamed_addr #0 {
; CHECK-NEXT: ldr r1, [sp]
; CHECK-NEXT: dls lr, r0
; CHECK-NEXT: movs r0, #0
-; CHECK-NEXT: adds r1, #4
; CHECK-NEXT: .LBB0_2: @ %for.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: str r1, [sp]
-; CHECK-NEXT: ldr r2, [r1, #-4]
-; CHECK-NEXT: adds r1, #4
-; CHECK-NEXT: add r0, r2
+; CHECK-NEXT: adds r2, r1, #4
+; CHECK-NEXT: str r2, [sp]
+; CHECK-NEXT: ldr r1, [r1]
+; CHECK-NEXT: add r0, r1
+; CHECK-NEXT: mov r1, r2
; CHECK-NEXT: le lr, .LBB0_2
; CHECK-NEXT: b .LBB0_4
; CHECK-NEXT: .LBB0_3:
diff --git a/llvm/test/CodeGen/Thumb2/pacbti-m-vla.ll b/llvm/test/CodeGen/Thumb2/pacbti-m-vla.ll
index e6fcf56af6e8d..8118830d46abe 100644
--- a/llvm/test/CodeGen/Thumb2/pacbti-m-vla.ll
+++ b/llvm/test/CodeGen/Thumb2/pacbti-m-vla.ll
@@ -20,28 +20,31 @@ define hidden i32 @f(i32 %n) local_unnamed_addr #0 {
; CHECK-NEXT: .cfi_startproc
; CHECK-NEXT: @ %bb.0: @ %entry
; CHECK-NEXT: pac r12, lr, sp
-; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, ra_auth_code, lr}
-; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r12, lr}
-; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, ra_auth_code, lr}
+; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r12, lr}
+; CHECK-NEXT: .cfi_def_cfa_offset 36
; CHECK-NEXT: .cfi_offset lr, -4
; CHECK-NEXT: .cfi_offset ra_auth_code, -8
-; CHECK-NEXT: .cfi_offset r9, -12
-; CHECK-NEXT: .cfi_offset r8, -16
-; CHECK-NEXT: .cfi_offset r7, -20
-; CHECK-NEXT: .cfi_offset r6, -24
-; CHECK-NEXT: .cfi_offset r5, -28
-; CHECK-NEXT: .cfi_offset r4, -32
+; CHECK-NEXT: .cfi_offset r10, -12
+; CHECK-NEXT: .cfi_offset r9, -16
+; CHECK-NEXT: .cfi_offset r8, -20
+; CHECK-NEXT: .cfi_offset r7, -24
+; CHECK-NEXT: .cfi_offset r6, -28
+; CHECK-NEXT: .cfi_offset r5, -32
+; CHECK-NEXT: .cfi_offset r4, -36
; CHECK-NEXT: .setfp r7, sp, #12
; CHECK-NEXT: add r7, sp, #12
-; CHECK-NEXT: .cfi_def_cfa r7, 20
+; CHECK-NEXT: .cfi_def_cfa r7, 24
+; CHECK-NEXT: .pad #4
+; CHECK-NEXT: sub sp, #4
; CHECK-NEXT: mov r5, r0
; CHECK-NEXT: movs r0, #7
; CHECK-NEXT: add.w r0, r0, r5, lsl #2
; CHECK-NEXT: bic r0, r0, #7
-; CHECK-NEXT: sub.w r4, sp, r0
-; CHECK-NEXT: mov sp, r4
+; CHECK-NEXT: sub.w r8, sp, r0
+; CHECK-NEXT: mov sp, r8
; CHECK-NEXT: mov r0, r5
-; CHECK-NEXT: mov r1, r4
+; CHECK-NEXT: mov r1, r8
; CHECK-NEXT: bl g
; CHECK-NEXT: cmp r5, #1
; CHECK-NEXT: blt .LBB0_3
@@ -61,31 +64,31 @@ define hidden i32 @f(i32 %n) local_unnamed_addr #0 {
; CHECK-NEXT: bic r0, r5, #3
; CHECK-NEXT: movs r2, #1
; CHECK-NEXT: subs r0, #4
-; CHECK-NEXT: sub.w r3, r4, #16
+; CHECK-NEXT: add.w r3, r8, #12
; CHECK-NEXT: add.w lr, r2, r0, lsr #2
; CHECK-NEXT: movs r2, #0
; CHECK-NEXT: movs r0, #0
; CHECK-NEXT: .LBB0_5: @ %for.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: ldr r5, [r3, #16]!
+; CHECK-NEXT: ldrd r5, r1, [r3, #-12]
; CHECK-NEXT: adds r2, #4
+; CHECK-NEXT: ldr r6, [r3, #-4]
; CHECK-NEXT: add r0, r5
-; CHECK-NEXT: ldrd r5, r1, [r3, #4]
-; CHECK-NEXT: ldr r6, [r3, #12]
-; CHECK-NEXT: add r0, r5
+; CHECK-NEXT: ldr r4, [r3], #16
; CHECK-NEXT: add r0, r1
; CHECK-NEXT: add r0, r6
+; CHECK-NEXT: add r0, r4
; CHECK-NEXT: le lr, .LBB0_5
; CHECK-NEXT: .LBB0_6: @ %for.cond.cleanup.loopexit.unr-lcssa
; CHECK-NEXT: cmp.w r12, #0
; CHECK-NEXT: beq .LBB0_9
; CHECK-NEXT: @ %bb.7: @ %for.body.epil
-; CHECK-NEXT: ldr.w r3, [r4, r2, lsl #2]
+; CHECK-NEXT: ldr.w r3, [r8, r2, lsl #2]
; CHECK-NEXT: cmp.w r12, #1
; CHECK-NEXT: add r0, r3
; CHECK-NEXT: beq .LBB0_9
; CHECK-NEXT: @ %bb.8: @ %for.body.epil.1
-; CHECK-NEXT: add.w r2, r4, r2, lsl #2
+; CHECK-NEXT: add.w r2, r8, r2, lsl #2
; CHECK-NEXT: cmp.w r12, #2
; CHECK-NEXT: ldr r1, [r2, #4]
; CHECK-NEXT: add r0, r1
@@ -95,7 +98,7 @@ define hidden i32 @f(i32 %n) local_unnamed_addr #0 {
; CHECK-NEXT: .LBB0_9: @ %for.cond.cleanup
; CHECK-NEXT: sub.w r4, r7, #12
; CHECK-NEXT: mov sp, r4
-; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r12, lr}
+; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r12, lr}
; CHECK-NEXT: aut r12, lr, sp
; CHECK-NEXT: bx lr
entry:
diff --git a/llvm/test/Transforms/LoopStrengthReduce/AArch64/pr53625.ll b/llvm/test/Transforms/LoopStrengthReduce/AArch64/pr53625.ll
index ff2527d5bb6ad..6c73792351abe 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/AArch64/pr53625.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/AArch64/pr53625.ll
@@ -58,30 +58,29 @@ define i64 @IVIncHoist_not_all_user_in_header(i32 %c, ptr %a, ptr %b) {
; CHECK-NEXT: cmp w0, #1
; CHECK-NEXT: b.lt .LBB1_5
; CHECK-NEXT: // %bb.1: // %for.body.preheader
-; CHECK-NEXT: mov x8, xzr
-; CHECK-NEXT: mov w9, w0
-; CHECK-NEXT: add x10, x1, #4
-; CHECK-NEXT: add x11, x2, #8
-; CHECK-NEXT: mov w0, #1 // =0x1
+; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: add x9, x1, #4
+; CHECK-NEXT: add x10, x2, #8
+; CHECK-NEXT: mov w0, #3 // =0x3
; CHECK-NEXT: .LBB1_2: // %for.body
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: ldr w12, [x10, x8, lsl #2]
-; CHECK-NEXT: cbnz w12, .LBB1_7
+; CHECK-NEXT: ldr w11, [x9], #4
+; CHECK-NEXT: cbnz w11, .LBB1_7
; CHECK-NEXT: // %bb.3: // %if.then
; CHECK-NEXT: // in Loop: Header=BB1_2 Depth=1
-; CHECK-NEXT: ldr w12, [x11, x8, lsl #2]
-; CHECK-NEXT: cbnz w12, .LBB1_6
+; CHECK-NEXT: ldr w11, [x10], #4
+; CHECK-NEXT: cbnz w11, .LBB1_6
; CHECK-NEXT: // %bb.4: // %for.cond
; CHECK-NEXT: // in Loop: Header=BB1_2 Depth=1
-; CHECK-NEXT: add x8, x8, #1
-; CHECK-NEXT: cmp x9, x8
+; CHECK-NEXT: subs x8, x8, #1
+; CHECK-NEXT: add x0, x0, #1
; CHECK-NEXT: b.ne .LBB1_2
; CHECK-NEXT: .LBB1_5:
; CHECK-NEXT: mov x0, xzr
+; CHECK-NEXT: .LBB1_6: // %return
; CHECK-NEXT: ret
-; CHECK-NEXT: .LBB1_6: // %if.then.return.loopexit_crit_edge
-; CHECK-NEXT: add x0, x8, #3
-; CHECK-NEXT: .LBB1_7: // %return
+; CHECK-NEXT: .LBB1_7:
+; CHECK-NEXT: mov w0, #1 // =0x1
; CHECK-NEXT: ret
entry:
%cmp13 = icmp sgt i32 %c, 0
diff --git a/llvm/test/Transforms/LoopStrengthReduce/AArch64/small-constant.ll b/llvm/test/Transforms/LoopStrengthReduce/AArch64/small-constant.ll
index cd6b410b67aa3..5e785f8005b5f 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/AArch64/small-constant.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/AArch64/small-constant.ll
@@ -19,19 +19,20 @@ define float @test1(ptr nocapture readonly %arr, i64 %start, float %threshold) {
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: cbz x1, .LBB0_4
; CHECK-NEXT: // %bb.1: // %for.body.preheader
-; CHECK-NEXT: add x8, x0, #28
+; CHECK-NEXT: add x9, x0, x1, lsl #2
+; CHECK-NEXT: neg x8, x1
+; CHECK-NEXT: add x9, x9, #28
; CHECK-NEXT: .LBB0_2: // %for.body
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: ldr s1, [x8, x1, lsl #2]
+; CHECK-NEXT: ldr s1, [x9], #4
; CHECK-NEXT: fcmp s1, s0
; CHECK-NEXT: b.gt .LBB0_5
; CHECK-NEXT: // %bb.3: // %for.cond
; CHECK-NEXT: // in Loop: Header=BB0_2 Depth=1
-; CHECK-NEXT: add x1, x1, #1
-; CHECK-NEXT: cbnz x1, .LBB0_2
+; CHECK-NEXT: subs x8, x8, #1
+; CHECK-NEXT: b.ne .LBB0_2
; CHECK-NEXT: .LBB0_4:
-; CHECK-NEXT: fmov s0, #-7.00000000
-; CHECK-NEXT: ret
+; CHECK-NEXT: fmov s1, #-7.00000000
; CHECK-NEXT: .LBB0_5: // %cleanup2
; CHECK-NEXT: fmov s0, s1
; CHECK-NEXT: ret
diff --git a/llvm/test/Transforms/LoopStrengthReduce/AArch64/vscale-factor-out-constant.ll b/llvm/test/Transforms/LoopStrengthReduce/AArch64/vscale-factor-out-constant.ll
index c70537f6e561e..78392b61cc837 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/AArch64/vscale-factor-out-constant.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/AArch64/vscale-factor-out-constant.ll
@@ -8,7 +8,7 @@ target triple = "aarch64-unknown-linux-gnu"
; CHECK-LABEL: test
; FIXME: Handle VectorType in SCEVExpander::expandAddToGEP.
; The generated IR is not ideal with base 'scalar_vector' cast to i8*, and do ugly getelementptr over casted base.
-; CHECK: scevgep
+; CHECK: gep_vec
define void @test(ptr %a, i32 %v, i64 %n) {
entry:
%scalar_vector = alloca <vscale x 4 x i32>, align 16
diff --git a/llvm/test/Transforms/LoopStrengthReduce/ARM/2012-06-15-lsr-noaddrmode.ll b/llvm/test/Transforms/LoopStrengthReduce/ARM/2012-06-15-lsr-noaddrmode.ll
index 9fb4813a4f8d2..1c2eac1bfbb64 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/ARM/2012-06-15-lsr-noaddrmode.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/ARM/2012-06-15-lsr-noaddrmode.ll
@@ -46,8 +46,8 @@ declare ptr @getstruct() nounwind
; Check that the loop preheader contains no address computation.
; CHECK: %while.cond.i.i
; CHECK-NOT: add{{.*}}lsl
-; CHECK: ldr{{.*}}lsl #2
-; CHECK: ldr{{.*}}lsl #2
+; CHECK: ldr{{.*}}
+; CHECK: ldr{{.*}}
define i32 @main() nounwind ssp {
entry:
%v0 = load i32, ptr @ncol, align 4
diff --git a/llvm/test/Transforms/LoopStrengthReduce/ARM/illegal-addr-modes.ll b/llvm/test/Transforms/LoopStrengthReduce/ARM/illegal-addr-modes.ll
index 3844e00ae0a4f..b59a8439ce057 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/ARM/illegal-addr-modes.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/ARM/illegal-addr-modes.ll
@@ -33,11 +33,12 @@ define ptr @negativeOneCase(ptr returned %a, ptr nocapture readonly %b, i32 %n)
; CHECK-NEXT: br label [[WHILE_COND2:%.*]]
; CHECK: while.cond2:
; CHECK-NEXT: [[LSR_IV:%.*]] = phi i32 [ [[LSR_IV_NEXT:%.*]], [[WHILE_BODY5:%.*]] ], [ 0, [[WHILE_COND2_PREHEADER]] ]
-; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[B]], i32 [[LSR_IV]]
+; CHECK-NEXT: [[SCEVGEP1:%.*]] = phi ptr [ [[INCDEC_PTR6:%.*]], [[WHILE_BODY5]] ], [ [[B]], [[WHILE_COND2_PREHEADER]] ]
; CHECK-NEXT: [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[INCDEC_PTR]], i32 [[LSR_IV]]
; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i32 [[N]], [[LSR_IV]]
; CHECK-NEXT: br i1 [[CMP3]], label [[WHILE_END8:%.*]], label [[WHILE_BODY5]]
; CHECK: while.body5:
+; CHECK-NEXT: [[INCDEC_PTR6]] = getelementptr inbounds i8, ptr [[SCEVGEP1]], i32 1
; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[SCEVGEP1]], align 1
; CHECK-NEXT: store i8 [[TMP1]], ptr [[SCEVGEP2]], align 1
; CHECK-NEXT: [[LSR_IV_NEXT]] = add i32 [[LSR_IV]], 1
More information about the llvm-commits
mailing list