[llvm] [AArch64] Disable Pre-RA Scheduler for Neoverse V2 (PR #127784)

Wed Feb 26 05:32:15 PST 2025

https://github.com/sjoerdmeijer updated https://github.com/llvm/llvm-project/pull/127784

>From f641df878dd41a283a7990a1108ae14abc8e55f7 Mon Sep 17 00:00:00 2001
From: Sjoerd Meijer <smeijer at nvidia.com>
Date: Wed, 19 Feb 2025 01:15:20 -0800
Subject: [PATCH] [AArch64] Disable Pre-RA Scheduler for Neoverse V2

We would like to disable the pre-RA machine scheduler for the Neoverse
V2 because we have a key workload that massively benefits from this (25%
uplift). Despite the machine scheduler being register pressure aware, it
results in spills for this workload. Disabling the scheduler seems a lot
more attractive than trying to tweak regalloc heuristics:
- We see no benefit of scheduling anyway on this big core, and have
  never seen this. I.e., when we added the V2 scheduling model, this
  wasn't for perf reasons, only to enable LLVM-MCA.
- Scheduling can consume significant compile-time, not resulting in any
  perf gains. This is a bad deal.

FWIW: the GCC folks realised the same not that long ago, and did exactly
the same, see also:
https://gcc.gnu.org/pipermail/gcc-patches/2024-October/667074.html

I guess other big cores could benefit from this too, but I would like
to leave that decision to folks with more experience on those cores, so
that's why I propose to change this for the V2 here only.

Numbers:
* We know the Eigen library is somewhat sensitive to scheduling, but
  I found one kernel to regress with ~2%, and another to improve with
  ~2%. They cancel each other out, and overall the result is neutral.
* SPEC FP and INT seem totally unaffected.
* LLVM test-suite: a little bit up and down, all within noise levels I
  think, so is neutral.
* Compile-time numbers: I see a geomean 3% improvement for the LLVM
  test-suite, and a very decent one for the sqlite amalgamation version.

I haven't looked at the post-RA scheduling, maybe that's interesting
as a follow up.
---
 llvm/lib/Target/AArch64/AArch64Features.td    |   3 +
 .../Target/AArch64/AArch64ISelLowering.cpp    |   3 +-
 llvm/lib/Target/AArch64/AArch64Processors.td  |   1 +
 llvm/lib/Target/AArch64/AArch64Subtarget.h    |   4 +-
 .../Atomics/aarch64-atomic-load-rcpc_immo.ll  | 284 +++++++++---------
 llvm/test/CodeGen/AArch64/selectopt-const.ll  |  10 +-
 .../CodeGen/AArch64/sve-fixed-vector-zext.ll  |  12 +-
 7 files changed, 162 insertions(+), 155 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64Features.td b/llvm/lib/Target/AArch64/AArch64Features.td
index 357f526d5e308..13c1a386149b7 100644
--- a/llvm/lib/Target/AArch64/AArch64Features.td
+++ b/llvm/lib/Target/AArch64/AArch64Features.td
@@ -669,6 +669,9 @@ def FeatureExynosCheapAsMoveHandling : SubtargetFeature<"exynos-cheap-as-move",
     "HasExynosCheapAsMoveHandling", "true",
     "Use Exynos specific handling of cheap instructions">;
 
+def FeatureDisablePreRAScheduler : SubtargetFeature<"use-prera-scheduler",
+    "DisablePreRAScheduler", "true", "Disable scheduling before register allocation">;
+
 def FeaturePostRAScheduler : SubtargetFeature<"use-postra-scheduler",
     "UsePostRAScheduler", "true", "Schedule again after register allocation">;
 
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 50be082777835..327566994647a 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1173,7 +1173,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
 
   setStackPointerRegisterToSaveRestore(AArch64::SP);
 
-  setSchedulingPreference(Sched::Hybrid);
+  setSchedulingPreference(Subtarget->enableMachineScheduler() ? Sched::Hybrid
+                                                              : Sched::Source);
 
   EnableExtLdPromotion = true;
 
diff --git a/llvm/lib/Target/AArch64/AArch64Processors.td b/llvm/lib/Target/AArch64/AArch64Processors.td
index 30d9372e4afd1..265a044f7304a 100644
--- a/llvm/lib/Target/AArch64/AArch64Processors.td
+++ b/llvm/lib/Target/AArch64/AArch64Processors.td
@@ -540,6 +540,7 @@ def TuneNeoverseV2 : SubtargetFeature<"neoversev2", "ARMProcFamily", "NeoverseV2
                                       FeatureCmpBccFusion,
                                       FeatureFuseAdrpAdd,
                                       FeatureALULSLFast,
+                                      FeatureDisablePreRAScheduler,
                                       FeaturePostRAScheduler,
                                       FeatureEnableSelectOptimize,
                                       FeatureUseFixedOverScalableIfEqualCost,
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h
index c6eb77e3bc3ba..8689d13a38533 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.h
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h
@@ -156,7 +156,9 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
   const LegalizerInfo *getLegalizerInfo() const override;
   const RegisterBankInfo *getRegBankInfo() const override;
   const Triple &getTargetTriple() const { return TargetTriple; }
-  bool enableMachineScheduler() const override { return true; }
+  bool enableMachineScheduler() const override {
+    return !disablePreRAScheduler();
+  }
   bool enablePostRAScheduler() const override { return usePostRAScheduler(); }
   bool enableSubRegLiveness() const override { return EnableSubregLiveness; }
 
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-load-rcpc_immo.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-load-rcpc_immo.ll
index 02ff12c27fcda..d29fdd23863a6 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-load-rcpc_immo.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-load-rcpc_immo.ll
@@ -48,12 +48,12 @@ define i8 @load_atomic_i8_aligned_acquire(ptr %ptr) {
 ; GISEL:    add x8, x0, #4
 ; GISEL:    ldaprb w0, [x8]
 ;
-; SDAG-NOAVOIDLDAPUR-LABEL: load_atomic_i8_aligned_acquire:
-; SDAG-NOAVOIDLDAPUR:    ldapurb w0, [x0, #4]
-;
 ; SDAG-AVOIDLDAPUR-LABEL: load_atomic_i8_aligned_acquire:
 ; SDAG-AVOIDLDAPUR:    add x8, x0, #4
 ; SDAG-AVOIDLDAPUR:    ldaprb w0, [x8]
+;
+; SDAG-NOAVOIDLDAPUR-LABEL: load_atomic_i8_aligned_acquire:
+; SDAG-NOAVOIDLDAPUR:    ldapurb w0, [x0, #4]
     %gep = getelementptr inbounds i8, ptr %ptr, i32 4
     %r = load atomic i8, ptr %gep acquire, align 1
     ret i8 %r
@@ -64,12 +64,12 @@ define i8 @load_atomic_i8_aligned_acquire_const(ptr readonly %ptr) {
 ; GISEL:    add x8, x0, #4
 ; GISEL:    ldaprb w0, [x8]
 ;
-; SDAG-NOAVOIDLDAPUR-LABEL: load_atomic_i8_aligned_acquire_const:
-; SDAG-NOAVOIDLDAPUR:    ldapurb w0, [x0, #4]
-;
 ; SDAG-AVOIDLDAPUR-LABEL: load_atomic_i8_aligned_acquire_const:
 ; SDAG-AVOIDLDAPUR:    add x8, x0, #4
 ; SDAG-AVOIDLDAPUR:    ldaprb w0, [x8]
+;
+; SDAG-NOAVOIDLDAPUR-LABEL: load_atomic_i8_aligned_acquire_const:
+; SDAG-NOAVOIDLDAPUR:    ldapurb w0, [x0, #4]
     %gep = getelementptr inbounds i8, ptr %ptr, i32 4
     %r = load atomic i8, ptr %gep acquire, align 1
     ret i8 %r
@@ -130,12 +130,12 @@ define i16 @load_atomic_i16_aligned_acquire(ptr %ptr) {
 ; GISEL:    add x8, x0, #8
 ; GISEL:    ldaprh w0, [x8]
 ;
-; SDAG-NOAVOIDLDAPUR-LABEL: load_atomic_i16_aligned_acquire:
-; SDAG-NOAVOIDLDAPUR:    ldapurh w0, [x0, #8]
-;
 ; SDAG-AVOIDLDAPUR-LABEL: load_atomic_i16_aligned_acquire:
 ; SDAG-AVOIDLDAPUR:    add x8, x0, #8
 ; SDAG-AVOIDLDAPUR:    ldaprh w0, [x8]
+;
+; SDAG-NOAVOIDLDAPUR-LABEL: load_atomic_i16_aligned_acquire:
+; SDAG-NOAVOIDLDAPUR:    ldapurh w0, [x0, #8]
     %gep = getelementptr inbounds i16, ptr %ptr, i32 4
     %r = load atomic i16, ptr %gep acquire, align 2
     ret i16 %r
@@ -146,12 +146,12 @@ define i16 @load_atomic_i16_aligned_acquire_const(ptr readonly %ptr) {
 ; GISEL:    add x8, x0, #8
 ; GISEL:    ldaprh w0, [x8]
 ;
-; SDAG-NOAVOIDLDAPUR-LABEL: load_atomic_i16_aligned_acquire_const:
-; SDAG-NOAVOIDLDAPUR:    ldapurh w0, [x0, #8]
-;
 ; SDAG-AVOIDLDAPUR-LABEL: load_atomic_i16_aligned_acquire_const:
 ; SDAG-AVOIDLDAPUR:    add x8, x0, #8
 ; SDAG-AVOIDLDAPUR:    ldaprh w0, [x8]
+;
+; SDAG-NOAVOIDLDAPUR-LABEL: load_atomic_i16_aligned_acquire_const:
+; SDAG-NOAVOIDLDAPUR:    ldapurh w0, [x0, #8]
     %gep = getelementptr inbounds i16, ptr %ptr, i32 4
     %r = load atomic i16, ptr %gep acquire, align 2
     ret i16 %r
@@ -211,12 +211,12 @@ define i32 @load_atomic_i32_aligned_acquire(ptr %ptr) {
 ; GISEL-LABEL: load_atomic_i32_aligned_acquire:
 ; GISEL:    ldapur w0, [x0, #16]
 ;
-; SDAG-NOAVOIDLDAPUR-LABEL: load_atomic_i32_aligned_acquire:
-; SDAG-NOAVOIDLDAPUR:    ldapur w0, [x0, #16]
-;
 ; SDAG-AVOIDLDAPUR-LABEL: load_atomic_i32_aligned_acquire:
 ; SDAG-AVOIDLDAPUR:    add x8, x0, #16
 ; SDAG-AVOIDLDAPUR:    ldapr w0, [x8]
+;
+; SDAG-NOAVOIDLDAPUR-LABEL: load_atomic_i32_aligned_acquire:
+; SDAG-NOAVOIDLDAPUR:    ldapur w0, [x0, #16]
     %gep = getelementptr inbounds i32, ptr %ptr, i32 4
     %r = load atomic i32, ptr %gep acquire, align 4
     ret i32 %r
@@ -226,12 +226,12 @@ define i32 @load_atomic_i32_aligned_acquire_const(ptr readonly %ptr) {
 ; GISEL-LABEL: load_atomic_i32_aligned_acquire_const:
 ; GISEL:    ldapur w0, [x0, #16]
 ;
-; SDAG-NOAVOIDLDAPUR-LABEL: load_atomic_i32_aligned_acquire_const:
-; SDAG-NOAVOIDLDAPUR:    ldapur w0, [x0, #16]
-;
 ; SDAG-AVOIDLDAPUR-LABEL: load_atomic_i32_aligned_acquire_const:
 ; SDAG-AVOIDLDAPUR:    add x8, x0, #16
 ; SDAG-AVOIDLDAPUR:    ldapr w0, [x8]
+;
+; SDAG-NOAVOIDLDAPUR-LABEL: load_atomic_i32_aligned_acquire_const:
+; SDAG-NOAVOIDLDAPUR:    ldapur w0, [x0, #16]
     %gep = getelementptr inbounds i32, ptr %ptr, i32 4
     %r = load atomic i32, ptr %gep acquire, align 4
     ret i32 %r
@@ -291,12 +291,12 @@ define i64 @load_atomic_i64_aligned_acquire(ptr %ptr) {
 ; GISEL-LABEL: load_atomic_i64_aligned_acquire:
 ; GISEL:    ldapur x0, [x0, #32]
 ;
-; SDAG-NOAVOIDLDAPUR-LABEL: load_atomic_i64_aligned_acquire:
-; SDAG-NOAVOIDLDAPUR:    ldapur x0, [x0, #32]
-;
 ; SDAG-AVOIDLDAPUR-LABEL: load_atomic_i64_aligned_acquire:
 ; SDAG-AVOIDLDAPUR:    add x8, x0, #32
 ; SDAG-AVOIDLDAPUR:    ldapr x0, [x8]
+;
+; SDAG-NOAVOIDLDAPUR-LABEL: load_atomic_i64_aligned_acquire:
+; SDAG-NOAVOIDLDAPUR:    ldapur x0, [x0, #32]
     %gep = getelementptr inbounds i64, ptr %ptr, i32 4
     %r = load atomic i64, ptr %gep acquire, align 8
     ret i64 %r
@@ -306,12 +306,12 @@ define i64 @load_atomic_i64_aligned_acquire_const(ptr readonly %ptr) {
 ; GISEL-LABEL: load_atomic_i64_aligned_acquire_const:
 ; GISEL:    ldapur x0, [x0, #32]
 ;
-; SDAG-NOAVOIDLDAPUR-LABEL: load_atomic_i64_aligned_acquire_const:
-; SDAG-NOAVOIDLDAPUR:    ldapur x0, [x0, #32]
-;
 ; SDAG-AVOIDLDAPUR-LABEL: load_atomic_i64_aligned_acquire_const:
 ; SDAG-AVOIDLDAPUR:    add x8, x0, #32
 ; SDAG-AVOIDLDAPUR:    ldapr x0, [x8]
+;
+; SDAG-NOAVOIDLDAPUR-LABEL: load_atomic_i64_aligned_acquire_const:
+; SDAG-NOAVOIDLDAPUR:    ldapur x0, [x0, #32]
     %gep = getelementptr inbounds i64, ptr %ptr, i32 4
     %r = load atomic i64, ptr %gep acquire, align 8
     ret i64 %r
@@ -440,12 +440,12 @@ define i8 @load_atomic_i8_unaligned_acquire(ptr %ptr) {
 ; GISEL:    add x8, x0, #4
 ; GISEL:    ldaprb w0, [x8]
 ;
-; SDAG-NOAVOIDLDAPUR-LABEL: load_atomic_i8_unaligned_acquire:
-; SDAG-NOAVOIDLDAPUR:    ldapurb w0, [x0, #4]
-;
 ; SDAG-AVOIDLDAPUR-LABEL: load_atomic_i8_unaligned_acquire:
 ; SDAG-AVOIDLDAPUR:    add x8, x0, #4
 ; SDAG-AVOIDLDAPUR:    ldaprb w0, [x8]
+;
+; SDAG-NOAVOIDLDAPUR-LABEL: load_atomic_i8_unaligned_acquire:
+; SDAG-NOAVOIDLDAPUR:    ldapurb w0, [x0, #4]
     %gep = getelementptr inbounds i8, ptr %ptr, i32 4
     %r = load atomic i8, ptr %gep acquire, align 1
     ret i8 %r
@@ -456,12 +456,12 @@ define i8 @load_atomic_i8_unaligned_acquire_const(ptr readonly %ptr) {
 ; GISEL:    add x8, x0, #4
 ; GISEL:    ldaprb w0, [x8]
 ;
-; SDAG-NOAVOIDLDAPUR-LABEL: load_atomic_i8_unaligned_acquire_const:
-; SDAG-NOAVOIDLDAPUR:    ldapurb w0, [x0, #4]
-;
 ; SDAG-AVOIDLDAPUR-LABEL: load_atomic_i8_unaligned_acquire_const:
 ; SDAG-AVOIDLDAPUR:    add x8, x0, #4
 ; SDAG-AVOIDLDAPUR:    ldaprb w0, [x8]
+;
+; SDAG-NOAVOIDLDAPUR-LABEL: load_atomic_i8_unaligned_acquire_const:
+; SDAG-NOAVOIDLDAPUR:    ldapurb w0, [x0, #4]
     %gep = getelementptr inbounds i8, ptr %ptr, i32 4
     %r = load atomic i8, ptr %gep acquire, align 1
     ret i8 %r
@@ -490,9 +490,9 @@ define i16 @load_atomic_i16_unaligned_unordered(ptr %ptr) {
 ; GISEL:    add x1, x8, #4
 ; GISEL:    bl __atomic_load
 ;
-; SDAG-LABEL: load_atomic_i16_unaligned_unordered:
-; SDAG:    add x1, x0, #4
-; SDAG:    bl __atomic_load
+; SDAG-NOAVOIDLDAPUR-LABEL: load_atomic_i16_unaligned_unordered:
+; SDAG-NOAVOIDLDAPUR:    add x1, x0, #4
+; SDAG-NOAVOIDLDAPUR:    bl __atomic_load
     %gep = getelementptr inbounds i8, ptr %ptr, i32 4
     %r = load atomic i16, ptr %gep unordered, align 1
     ret i16 %r
@@ -503,9 +503,9 @@ define i16 @load_atomic_i16_unaligned_unordered_const(ptr readonly %ptr) {
 ; GISEL:    add x1, x8, #4
 ; GISEL:    bl __atomic_load
 ;
-; SDAG-LABEL: load_atomic_i16_unaligned_unordered_const:
-; SDAG:    add x1, x0, #4
-; SDAG:    bl __atomic_load
+; SDAG-NOAVOIDLDAPUR-LABEL: load_atomic_i16_unaligned_unordered_const:
+; SDAG-NOAVOIDLDAPUR:    add x1, x0, #4
+; SDAG-NOAVOIDLDAPUR:    bl __atomic_load
     %gep = getelementptr inbounds i8, ptr %ptr, i32 4
     %r = load atomic i16, ptr %gep unordered, align 1
     ret i16 %r
@@ -516,9 +516,9 @@ define i16 @load_atomic_i16_unaligned_monotonic(ptr %ptr) {
 ; GISEL:    add x1, x8, #8
 ; GISEL:    bl __atomic_load
 ;
-; SDAG-LABEL: load_atomic_i16_unaligned_monotonic:
-; SDAG:    add x1, x0, #8
-; SDAG:    bl __atomic_load
+; SDAG-NOAVOIDLDAPUR-LABEL: load_atomic_i16_unaligned_monotonic:
+; SDAG-NOAVOIDLDAPUR:    add x1, x0, #8
+; SDAG-NOAVOIDLDAPUR:    bl __atomic_load
     %gep = getelementptr inbounds i16, ptr %ptr, i32 4
     %r = load atomic i16, ptr %gep monotonic, align 1
     ret i16 %r
@@ -529,9 +529,9 @@ define i16 @load_atomic_i16_unaligned_monotonic_const(ptr readonly %ptr) {
 ; GISEL:    add x1, x8, #8
 ; GISEL:    bl __atomic_load
 ;
-; SDAG-LABEL: load_atomic_i16_unaligned_monotonic_const:
-; SDAG:    add x1, x0, #8
-; SDAG:    bl __atomic_load
+; SDAG-NOAVOIDLDAPUR-LABEL: load_atomic_i16_unaligned_monotonic_const:
+; SDAG-NOAVOIDLDAPUR:    add x1, x0, #8
+; SDAG-NOAVOIDLDAPUR:    bl __atomic_load
     %gep = getelementptr inbounds i16, ptr %ptr, i32 4
     %r = load atomic i16, ptr %gep monotonic, align 1
     ret i16 %r
@@ -542,9 +542,9 @@ define i16 @load_atomic_i16_unaligned_acquire(ptr %ptr) {
 ; GISEL:    add x1, x8, #8
 ; GISEL:    bl __atomic_load
 ;
-; SDAG-LABEL: load_atomic_i16_unaligned_acquire:
-; SDAG:    add x1, x0, #8
-; SDAG:    bl __atomic_load
+; SDAG-NOAVOIDLDAPUR-LABEL: load_atomic_i16_unaligned_acquire:
+; SDAG-NOAVOIDLDAPUR:    add x1, x0, #8
+; SDAG-NOAVOIDLDAPUR:    bl __atomic_load
     %gep = getelementptr inbounds i16, ptr %ptr, i32 4
     %r = load atomic i16, ptr %gep acquire, align 1
     ret i16 %r
@@ -555,9 +555,9 @@ define i16 @load_atomic_i16_unaligned_acquire_const(ptr readonly %ptr) {
 ; GISEL:    add x1, x8, #8
 ; GISEL:    bl __atomic_load
 ;
-; SDAG-LABEL: load_atomic_i16_unaligned_acquire_const:
-; SDAG:    add x1, x0, #8
-; SDAG:    bl __atomic_load
+; SDAG-NOAVOIDLDAPUR-LABEL: load_atomic_i16_unaligned_acquire_const:
+; SDAG-NOAVOIDLDAPUR:    add x1, x0, #8
+; SDAG-NOAVOIDLDAPUR:    bl __atomic_load
     %gep = getelementptr inbounds i16, ptr %ptr, i32 4
     %r = load atomic i16, ptr %gep acquire, align 1
     ret i16 %r
@@ -568,9 +568,9 @@ define i16 @load_atomic_i16_unaligned_seq_cst(ptr %ptr) {
 ; GISEL:    add x1, x8, #8
 ; GISEL:    bl __atomic_load
 ;
-; SDAG-LABEL: load_atomic_i16_unaligned_seq_cst:
-; SDAG:    add x1, x0, #8
-; SDAG:    bl __atomic_load
+; SDAG-NOAVOIDLDAPUR-LABEL: load_atomic_i16_unaligned_seq_cst:
+; SDAG-NOAVOIDLDAPUR:    add x1, x0, #8
+; SDAG-NOAVOIDLDAPUR:    bl __atomic_load
     %gep = getelementptr inbounds i16, ptr %ptr, i32 4
     %r = load atomic i16, ptr %gep seq_cst, align 1
     ret i16 %r
@@ -581,9 +581,9 @@ define i16 @load_atomic_i16_unaligned_seq_cst_const(ptr readonly %ptr) {
 ; GISEL:    add x1, x8, #8
 ; GISEL:    bl __atomic_load
 ;
-; SDAG-LABEL: load_atomic_i16_unaligned_seq_cst_const:
-; SDAG:    add x1, x0, #8
-; SDAG:    bl __atomic_load
+; SDAG-NOAVOIDLDAPUR-LABEL: load_atomic_i16_unaligned_seq_cst_const:
+; SDAG-NOAVOIDLDAPUR:    add x1, x0, #8
+; SDAG-NOAVOIDLDAPUR:    bl __atomic_load
     %gep = getelementptr inbounds i16, ptr %ptr, i32 4
     %r = load atomic i16, ptr %gep seq_cst, align 1
     ret i16 %r
@@ -594,9 +594,9 @@ define i32 @load_atomic_i32_unaligned_unordered(ptr %ptr) {
 ; GISEL:    add x1, x8, #16
 ; GISEL:    bl __atomic_load
 ;
-; SDAG-LABEL: load_atomic_i32_unaligned_unordered:
-; SDAG:    add x1, x0, #16
-; SDAG:    bl __atomic_load
+; SDAG-NOAVOIDLDAPUR-LABEL: load_atomic_i32_unaligned_unordered:
+; SDAG-NOAVOIDLDAPUR:    add x1, x0, #16
+; SDAG-NOAVOIDLDAPUR:    bl __atomic_load
     %gep = getelementptr inbounds i32, ptr %ptr, i32 4
     %r = load atomic i32, ptr %gep unordered, align 1
     ret i32 %r
@@ -607,9 +607,9 @@ define i32 @load_atomic_i32_unaligned_unordered_const(ptr readonly %ptr) {
 ; GISEL:    add x1, x8, #16
 ; GISEL:    bl __atomic_load
 ;
-; SDAG-LABEL: load_atomic_i32_unaligned_unordered_const:
-; SDAG:    add x1, x0, #16
-; SDAG:    bl __atomic_load
+; SDAG-NOAVOIDLDAPUR-LABEL: load_atomic_i32_unaligned_unordered_const:
+; SDAG-NOAVOIDLDAPUR:    add x1, x0, #16
+; SDAG-NOAVOIDLDAPUR:    bl __atomic_load
     %gep = getelementptr inbounds i32, ptr %ptr, i32 4
     %r = load atomic i32, ptr %gep unordered, align 1
     ret i32 %r
@@ -620,9 +620,9 @@ define i32 @load_atomic_i32_unaligned_monotonic(ptr %ptr) {
 ; GISEL:    add x1, x8, #16
 ; GISEL:    bl __atomic_load
 ;
-; SDAG-LABEL: load_atomic_i32_unaligned_monotonic:
-; SDAG:    add x1, x0, #16
-; SDAG:    bl __atomic_load
+; SDAG-NOAVOIDLDAPUR-LABEL: load_atomic_i32_unaligned_monotonic:
+; SDAG-NOAVOIDLDAPUR:    add x1, x0, #16
+; SDAG-NOAVOIDLDAPUR:    bl __atomic_load
     %gep = getelementptr inbounds i32, ptr %ptr, i32 4
     %r = load atomic i32, ptr %gep monotonic, align 1
     ret i32 %r
@@ -633,9 +633,9 @@ define i32 @load_atomic_i32_unaligned_monotonic_const(ptr readonly %ptr) {
 ; GISEL:    add x1, x8, #16
 ; GISEL:    bl __atomic_load
 ;
-; SDAG-LABEL: load_atomic_i32_unaligned_monotonic_const:
-; SDAG:    add x1, x0, #16
-; SDAG:    bl __atomic_load
+; SDAG-NOAVOIDLDAPUR-LABEL: load_atomic_i32_unaligned_monotonic_const:
+; SDAG-NOAVOIDLDAPUR:    add x1, x0, #16
+; SDAG-NOAVOIDLDAPUR:    bl __atomic_load
     %gep = getelementptr inbounds i32, ptr %ptr, i32 4
     %r = load atomic i32, ptr %gep monotonic, align 1
     ret i32 %r
@@ -646,9 +646,9 @@ define i32 @load_atomic_i32_unaligned_acquire(ptr %ptr) {
 ; GISEL:    add x1, x8, #16
 ; GISEL:    bl __atomic_load
 ;
-; SDAG-LABEL: load_atomic_i32_unaligned_acquire:
-; SDAG:    add x1, x0, #16
-; SDAG:    bl __atomic_load
+; SDAG-NOAVOIDLDAPUR-LABEL: load_atomic_i32_unaligned_acquire:
+; SDAG-NOAVOIDLDAPUR:    add x1, x0, #16
+; SDAG-NOAVOIDLDAPUR:    bl __atomic_load
     %gep = getelementptr inbounds i32, ptr %ptr, i32 4
     %r = load atomic i32, ptr %gep acquire, align 1
     ret i32 %r
@@ -659,9 +659,9 @@ define i32 @load_atomic_i32_unaligned_acquire_const(ptr readonly %ptr) {
 ; GISEL:    add x1, x8, #16
 ; GISEL:    bl __atomic_load
 ;
-; SDAG-LABEL: load_atomic_i32_unaligned_acquire_const:
-; SDAG:    add x1, x0, #16
-; SDAG:    bl __atomic_load
+; SDAG-NOAVOIDLDAPUR-LABEL: load_atomic_i32_unaligned_acquire_const:
+; SDAG-NOAVOIDLDAPUR:    add x1, x0, #16
+; SDAG-NOAVOIDLDAPUR:    bl __atomic_load
     %gep = getelementptr inbounds i32, ptr %ptr, i32 4
     %r = load atomic i32, ptr %gep acquire, align 1
     ret i32 %r
@@ -672,9 +672,9 @@ define i32 @load_atomic_i32_unaligned_seq_cst(ptr %ptr) {
 ; GISEL:    add x1, x8, #16
 ; GISEL:    bl __atomic_load
 ;
-; SDAG-LABEL: load_atomic_i32_unaligned_seq_cst:
-; SDAG:    add x1, x0, #16
-; SDAG:    bl __atomic_load
+; SDAG-NOAVOIDLDAPUR-LABEL: load_atomic_i32_unaligned_seq_cst:
+; SDAG-NOAVOIDLDAPUR:    add x1, x0, #16
+; SDAG-NOAVOIDLDAPUR:    bl __atomic_load
     %gep = getelementptr inbounds i32, ptr %ptr, i32 4
     %r = load atomic i32, ptr %gep seq_cst, align 1
     ret i32 %r
@@ -685,9 +685,9 @@ define i32 @load_atomic_i32_unaligned_seq_cst_const(ptr readonly %ptr) {
 ; GISEL:    add x1, x8, #16
 ; GISEL:    bl __atomic_load
 ;
-; SDAG-LABEL: load_atomic_i32_unaligned_seq_cst_const:
-; SDAG:    add x1, x0, #16
-; SDAG:    bl __atomic_load
+; SDAG-NOAVOIDLDAPUR-LABEL: load_atomic_i32_unaligned_seq_cst_const:
+; SDAG-NOAVOIDLDAPUR:    add x1, x0, #16
+; SDAG-NOAVOIDLDAPUR:    bl __atomic_load
     %gep = getelementptr inbounds i32, ptr %ptr, i32 4
     %r = load atomic i32, ptr %gep seq_cst, align 1
     ret i32 %r
@@ -698,9 +698,9 @@ define i64 @load_atomic_i64_unaligned_unordered(ptr %ptr) {
 ; GISEL:    add x1, x8, #32
 ; GISEL:    bl __atomic_load
 ;
-; SDAG-LABEL: load_atomic_i64_unaligned_unordered:
-; SDAG:    add x1, x0, #32
-; SDAG:    bl __atomic_load
+; SDAG-NOAVOIDLDAPUR-LABEL: load_atomic_i64_unaligned_unordered:
+; SDAG-NOAVOIDLDAPUR:    add x1, x0, #32
+; SDAG-NOAVOIDLDAPUR:    bl __atomic_load
     %gep = getelementptr inbounds i64, ptr %ptr, i32 4
     %r = load atomic i64, ptr %gep unordered, align 1
     ret i64 %r
@@ -711,9 +711,9 @@ define i64 @load_atomic_i64_unaligned_unordered_const(ptr readonly %ptr) {
 ; GISEL:    add x1, x8, #32
 ; GISEL:    bl __atomic_load
 ;
-; SDAG-LABEL: load_atomic_i64_unaligned_unordered_const:
-; SDAG:    add x1, x0, #32
-; SDAG:    bl __atomic_load
+; SDAG-NOAVOIDLDAPUR-LABEL: load_atomic_i64_unaligned_unordered_const:
+; SDAG-NOAVOIDLDAPUR:    add x1, x0, #32
+; SDAG-NOAVOIDLDAPUR:    bl __atomic_load
     %gep = getelementptr inbounds i64, ptr %ptr, i32 4
     %r = load atomic i64, ptr %gep unordered, align 1
     ret i64 %r
@@ -724,9 +724,9 @@ define i64 @load_atomic_i64_unaligned_monotonic(ptr %ptr) {
 ; GISEL:    add x1, x8, #32
 ; GISEL:    bl __atomic_load
 ;
-; SDAG-LABEL: load_atomic_i64_unaligned_monotonic:
-; SDAG:    add x1, x0, #32
-; SDAG:    bl __atomic_load
+; SDAG-NOAVOIDLDAPUR-LABEL: load_atomic_i64_unaligned_monotonic:
+; SDAG-NOAVOIDLDAPUR:    add x1, x0, #32
+; SDAG-NOAVOIDLDAPUR:    bl __atomic_load
     %gep = getelementptr inbounds i64, ptr %ptr, i32 4
     %r = load atomic i64, ptr %gep monotonic, align 1
     ret i64 %r
@@ -737,9 +737,9 @@ define i64 @load_atomic_i64_unaligned_monotonic_const(ptr readonly %ptr) {
 ; GISEL:    add x1, x8, #32
 ; GISEL:    bl __atomic_load
 ;
-; SDAG-LABEL: load_atomic_i64_unaligned_monotonic_const:
-; SDAG:    add x1, x0, #32
-; SDAG:    bl __atomic_load
+; SDAG-NOAVOIDLDAPUR-LABEL: load_atomic_i64_unaligned_monotonic_const:
+; SDAG-NOAVOIDLDAPUR:    add x1, x0, #32
+; SDAG-NOAVOIDLDAPUR:    bl __atomic_load
     %gep = getelementptr inbounds i64, ptr %ptr, i32 4
     %r = load atomic i64, ptr %gep monotonic, align 1
     ret i64 %r
@@ -750,9 +750,9 @@ define i64 @load_atomic_i64_unaligned_acquire(ptr %ptr) {
 ; GISEL:    add x1, x8, #32
 ; GISEL:    bl __atomic_load
 ;
-; SDAG-LABEL: load_atomic_i64_unaligned_acquire:
-; SDAG:    add x1, x0, #32
-; SDAG:    bl __atomic_load
+; SDAG-NOAVOIDLDAPUR-LABEL: load_atomic_i64_unaligned_acquire:
+; SDAG-NOAVOIDLDAPUR:    add x1, x0, #32
+; SDAG-NOAVOIDLDAPUR:    bl __atomic_load
     %gep = getelementptr inbounds i64, ptr %ptr, i32 4
     %r = load atomic i64, ptr %gep acquire, align 1
     ret i64 %r
@@ -763,9 +763,9 @@ define i64 @load_atomic_i64_unaligned_acquire_const(ptr readonly %ptr) {
 ; GISEL:    add x1, x8, #32
 ; GISEL:    bl __atomic_load
 ;
-; SDAG-LABEL: load_atomic_i64_unaligned_acquire_const:
-; SDAG:    add x1, x0, #32
-; SDAG:    bl __atomic_load
+; SDAG-NOAVOIDLDAPUR-LABEL: load_atomic_i64_unaligned_acquire_const:
+; SDAG-NOAVOIDLDAPUR:    add x1, x0, #32
+; SDAG-NOAVOIDLDAPUR:    bl __atomic_load
     %gep = getelementptr inbounds i64, ptr %ptr, i32 4
     %r = load atomic i64, ptr %gep acquire, align 1
     ret i64 %r
@@ -776,9 +776,9 @@ define i64 @load_atomic_i64_unaligned_seq_cst(ptr %ptr) {
 ; GISEL:    add x1, x8, #32
 ; GISEL:    bl __atomic_load
 ;
-; SDAG-LABEL: load_atomic_i64_unaligned_seq_cst:
-; SDAG:    add x1, x0, #32
-; SDAG:    bl __atomic_load
+; SDAG-NOAVOIDLDAPUR-LABEL: load_atomic_i64_unaligned_seq_cst:
+; SDAG-NOAVOIDLDAPUR:    add x1, x0, #32
+; SDAG-NOAVOIDLDAPUR:    bl __atomic_load
     %gep = getelementptr inbounds i64, ptr %ptr, i32 4
     %r = load atomic i64, ptr %gep seq_cst, align 1
     ret i64 %r
@@ -789,9 +789,9 @@ define i64 @load_atomic_i64_unaligned_seq_cst_const(ptr readonly %ptr) {
 ; GISEL:    add x1, x8, #32
 ; GISEL:    bl __atomic_load
 ;
-; SDAG-LABEL: load_atomic_i64_unaligned_seq_cst_const:
-; SDAG:    add x1, x0, #32
-; SDAG:    bl __atomic_load
+; SDAG-NOAVOIDLDAPUR-LABEL: load_atomic_i64_unaligned_seq_cst_const:
+; SDAG-NOAVOIDLDAPUR:    add x1, x0, #32
+; SDAG-NOAVOIDLDAPUR:    bl __atomic_load
     %gep = getelementptr inbounds i64, ptr %ptr, i32 4
     %r = load atomic i64, ptr %gep seq_cst, align 1
     ret i64 %r
@@ -802,9 +802,9 @@ define i128 @load_atomic_i128_unaligned_unordered(ptr %ptr) {
 ; GISEL:    add x1, x8, #64
 ; GISEL:    bl __atomic_load
 ;
-; SDAG-LABEL: load_atomic_i128_unaligned_unordered:
-; SDAG:    add x1, x0, #64
-; SDAG:    bl __atomic_load
+; SDAG-NOAVOIDLDAPUR-LABEL: load_atomic_i128_unaligned_unordered:
+; SDAG-NOAVOIDLDAPUR:    add x1, x0, #64
+; SDAG-NOAVOIDLDAPUR:    bl __atomic_load
     %gep = getelementptr inbounds i128, ptr %ptr, i32 4
     %r = load atomic i128, ptr %gep unordered, align 1
     ret i128 %r
@@ -815,9 +815,9 @@ define i128 @load_atomic_i128_unaligned_unordered_const(ptr readonly %ptr) {
 ; GISEL:    add x1, x8, #64
 ; GISEL:    bl __atomic_load
 ;
-; SDAG-LABEL: load_atomic_i128_unaligned_unordered_const:
-; SDAG:    add x1, x0, #64
-; SDAG:    bl __atomic_load
+; SDAG-NOAVOIDLDAPUR-LABEL: load_atomic_i128_unaligned_unordered_const:
+; SDAG-NOAVOIDLDAPUR:    add x1, x0, #64
+; SDAG-NOAVOIDLDAPUR:    bl __atomic_load
     %gep = getelementptr inbounds i128, ptr %ptr, i32 4
     %r = load atomic i128, ptr %gep unordered, align 1
     ret i128 %r
@@ -828,9 +828,9 @@ define i128 @load_atomic_i128_unaligned_monotonic(ptr %ptr) {
 ; GISEL:    add x1, x8, #64
 ; GISEL:    bl __atomic_load
 ;
-; SDAG-LABEL: load_atomic_i128_unaligned_monotonic:
-; SDAG:    add x1, x0, #64
-; SDAG:    bl __atomic_load
+; SDAG-NOAVOIDLDAPUR-LABEL: load_atomic_i128_unaligned_monotonic:
+; SDAG-NOAVOIDLDAPUR:    add x1, x0, #64
+; SDAG-NOAVOIDLDAPUR:    bl __atomic_load
     %gep = getelementptr inbounds i128, ptr %ptr, i32 4
     %r = load atomic i128, ptr %gep monotonic, align 1
     ret i128 %r
@@ -841,9 +841,9 @@ define i128 @load_atomic_i128_unaligned_monotonic_const(ptr readonly %ptr) {
 ; GISEL:    add x1, x8, #64
 ; GISEL:    bl __atomic_load
 ;
-; SDAG-LABEL: load_atomic_i128_unaligned_monotonic_const:
-; SDAG:    add x1, x0, #64
-; SDAG:    bl __atomic_load
+; SDAG-NOAVOIDLDAPUR-LABEL: load_atomic_i128_unaligned_monotonic_const:
+; SDAG-NOAVOIDLDAPUR:    add x1, x0, #64
+; SDAG-NOAVOIDLDAPUR:    bl __atomic_load
     %gep = getelementptr inbounds i128, ptr %ptr, i32 4
     %r = load atomic i128, ptr %gep monotonic, align 1
     ret i128 %r
@@ -854,9 +854,9 @@ define i128 @load_atomic_i128_unaligned_acquire(ptr %ptr) {
 ; GISEL:    add x1, x8, #64
 ; GISEL:    bl __atomic_load
 ;
-; SDAG-LABEL: load_atomic_i128_unaligned_acquire:
-; SDAG:    add x1, x0, #64
-; SDAG:    bl __atomic_load
+; SDAG-NOAVOIDLDAPUR-LABEL: load_atomic_i128_unaligned_acquire:
+; SDAG-NOAVOIDLDAPUR:    add x1, x0, #64
+; SDAG-NOAVOIDLDAPUR:    bl __atomic_load
     %gep = getelementptr inbounds i128, ptr %ptr, i32 4
     %r = load atomic i128, ptr %gep acquire, align 1
     ret i128 %r
@@ -867,9 +867,9 @@ define i128 @load_atomic_i128_unaligned_acquire_const(ptr readonly %ptr) {
 ; GISEL:    add x1, x8, #64
 ; GISEL:    bl __atomic_load
 ;
-; SDAG-LABEL: load_atomic_i128_unaligned_acquire_const:
-; SDAG:    add x1, x0, #64
-; SDAG:    bl __atomic_load
+; SDAG-NOAVOIDLDAPUR-LABEL: load_atomic_i128_unaligned_acquire_const:
+; SDAG-NOAVOIDLDAPUR:    add x1, x0, #64
+; SDAG-NOAVOIDLDAPUR:    bl __atomic_load
     %gep = getelementptr inbounds i128, ptr %ptr, i32 4
     %r = load atomic i128, ptr %gep acquire, align 1
     ret i128 %r
@@ -880,9 +880,9 @@ define i128 @load_atomic_i128_unaligned_seq_cst(ptr %ptr) {
 ; GISEL:    add x1, x8, #64
 ; GISEL:    bl __atomic_load
 ;
-; SDAG-LABEL: load_atomic_i128_unaligned_seq_cst:
-; SDAG:    add x1, x0, #64
-; SDAG:    bl __atomic_load
+; SDAG-NOAVOIDLDAPUR-LABEL: load_atomic_i128_unaligned_seq_cst:
+; SDAG-NOAVOIDLDAPUR:    add x1, x0, #64
+; SDAG-NOAVOIDLDAPUR:    bl __atomic_load
     %gep = getelementptr inbounds i128, ptr %ptr, i32 4
     %r = load atomic i128, ptr %gep seq_cst, align 1
     ret i128 %r
@@ -893,9 +893,9 @@ define i128 @load_atomic_i128_unaligned_seq_cst_const(ptr readonly %ptr) {
 ; GISEL:    add x1, x8, #64
 ; GISEL:    bl __atomic_load
 ;
-; SDAG-LABEL: load_atomic_i128_unaligned_seq_cst_const:
-; SDAG:    add x1, x0, #64
-; SDAG:    bl __atomic_load
+; SDAG-NOAVOIDLDAPUR-LABEL: load_atomic_i128_unaligned_seq_cst_const:
+; SDAG-NOAVOIDLDAPUR:    add x1, x0, #64
+; SDAG-NOAVOIDLDAPUR:    bl __atomic_load
     %gep = getelementptr inbounds i128, ptr %ptr, i32 4
     %r = load atomic i128, ptr %gep seq_cst, align 1
     ret i128 %r
@@ -907,14 +907,14 @@ define i8 @load_atomic_i8_from_gep() {
 ; GISEL:    add x8, x8, #1
 ; GISEL:    ldaprb w0, [x8]
 ;
-; SDAG-NOAVOIDLDAPUR-LABEL: load_atomic_i8_from_gep:
-; SDAG-NOAVOIDLDAPUR:    bl init
-; SDAG-NOAVOIDLDAPUR:    ldapurb w0, [sp, #13]
-;
 ; SDAG-AVOIDLDAPUR-LABEL: load_atomic_i8_from_gep:
 ; SDAG-AVOIDLDAPUR:    bl init
 ; SDAG-AVOIDLDAPUR:    orr x8, x19, #0x1
 ; SDAG-AVOIDLDAPUR:    ldaprb w0, [x8]
+;
+; SDAG-NOAVOIDLDAPUR-LABEL: load_atomic_i8_from_gep:
+; SDAG-NOAVOIDLDAPUR:    bl init
+; SDAG-NOAVOIDLDAPUR:    ldapurb w0, [sp, #13]
   %a = alloca [3 x i8]
   call void @init(ptr %a)
   %arrayidx  = getelementptr [3 x i8], ptr %a, i64 0, i64 1
@@ -928,14 +928,14 @@ define i16 @load_atomic_i16_from_gep() {
 ; GISEL:    add x8, x8, #2
 ; GISEL:    ldaprh w0, [x8]
 ;
-; SDAG-NOAVOIDLDAPUR-LABEL: load_atomic_i16_from_gep:
-; SDAG-NOAVOIDLDAPUR:    bl init
-; SDAG-NOAVOIDLDAPUR:    ldapurh w0, [sp, #10]
-;
 ; SDAG-AVOIDLDAPUR-LABEL: load_atomic_i16_from_gep:
 ; SDAG-AVOIDLDAPUR:    bl init
 ; SDAG-AVOIDLDAPUR:    orr x8, x19, #0x2
 ; SDAG-AVOIDLDAPUR:    ldaprh w0, [x8]
+;
+; SDAG-NOAVOIDLDAPUR-LABEL: load_atomic_i16_from_gep:
+; SDAG-NOAVOIDLDAPUR:    bl init
+; SDAG-NOAVOIDLDAPUR:    ldapurh w0, [sp, #10]
   %a = alloca [3 x i16]
   call void @init(ptr %a)
   %arrayidx  = getelementptr [3 x i16], ptr %a, i64 0, i64 1
@@ -948,14 +948,14 @@ define i32 @load_atomic_i32_from_gep() {
 ; GISEL:    bl init
 ; GISEL:    ldapur w0, [x8, #4]
 ;
-; SDAG-NOAVOIDLDAPUR-LABEL: load_atomic_i32_from_gep:
-; SDAG-NOAVOIDLDAPUR:    bl init
-; SDAG-NOAVOIDLDAPUR:    ldapur w0, [sp, #8]
-;
 ; SDAG-AVOIDLDAPUR-LABEL: load_atomic_i32_from_gep:
 ; SDAG-AVOIDLDAPUR:    bl init
 ; SDAG-AVOIDLDAPUR:    add x8, x19, #4
 ; SDAG-AVOIDLDAPUR:    ldapr w0, [x8]
+;
+; SDAG-NOAVOIDLDAPUR-LABEL: load_atomic_i32_from_gep:
+; SDAG-NOAVOIDLDAPUR:    bl init
+; SDAG-NOAVOIDLDAPUR:    ldapur w0, [sp, #8]
   %a = alloca [3 x i32]
   call void @init(ptr %a)
   %arrayidx  = getelementptr [3 x i32], ptr %a, i64 0, i64 1
@@ -968,14 +968,14 @@ define i64 @load_atomic_i64_from_gep() {
 ; GISEL:    bl init
 ; GISEL:    ldapur x0, [x8, #8]
 ;
-; SDAG-NOAVOIDLDAPUR-LABEL: load_atomic_i64_from_gep:
-; SDAG-NOAVOIDLDAPUR:    bl init
-; SDAG-NOAVOIDLDAPUR:    ldapur x0, [sp, #16]
-;
 ; SDAG-AVOIDLDAPUR-LABEL: load_atomic_i64_from_gep:
 ; SDAG-AVOIDLDAPUR:    bl init
 ; SDAG-AVOIDLDAPUR:    add x8, x19, #8
 ; SDAG-AVOIDLDAPUR:    ldapr x0, [x8]
+;
+; SDAG-NOAVOIDLDAPUR-LABEL: load_atomic_i64_from_gep:
+; SDAG-NOAVOIDLDAPUR:    bl init
+; SDAG-NOAVOIDLDAPUR:    ldapur x0, [sp, #16]
   %a = alloca [3 x i64]
   call void @init(ptr %a)
   %arrayidx  = getelementptr [3 x i64], ptr %a, i64 0, i64 1
diff --git a/llvm/test/CodeGen/AArch64/selectopt-const.ll b/llvm/test/CodeGen/AArch64/selectopt-const.ll
index f10327e136ad1..96288a26f2530 100644
--- a/llvm/test/CodeGen/AArch64/selectopt-const.ll
+++ b/llvm/test/CodeGen/AArch64/selectopt-const.ll
@@ -7,30 +7,30 @@ define i32 @test_const(ptr %in1, ptr %in2, ptr %out, i32 %n, ptr %tbl) {
 ; CHECK-NEXT:    cmp w3, #1
 ; CHECK-NEXT:    b.lt .LBB0_3
 ; CHECK-NEXT:  // %bb.1: // %for.body.preheader
-; CHECK-NEXT:    mov w9, #1267 // =0x4f3
-; CHECK-NEXT:    fmov s1, #1.00000000
-; CHECK-NEXT:    fmov d2, #5.00000000
 ; CHECK-NEXT:    mov w8, w3
+; CHECK-NEXT:    mov w9, #1267 // =0x4f3
 ; CHECK-NEXT:    movk w9, #16309, lsl #16
 ; CHECK-NEXT:    fmov s0, w9
+; CHECK-NEXT:    fmov s1, #1.00000000
+; CHECK-NEXT:    fmov d2, #5.00000000
 ; CHECK-NEXT:    .p2align 5, , 16
 ; CHECK-NEXT:  .LBB0_2: // %for.body
 ; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    ldr s4, [x1], #4
 ; CHECK-NEXT:    ldr w9, [x0], #4
 ; CHECK-NEXT:    add w9, w9, #10
 ; CHECK-NEXT:    scvtf d3, w9
+; CHECK-NEXT:    ldr s4, [x1], #4
 ; CHECK-NEXT:    fmadd s4, s4, s0, s1
 ; CHECK-NEXT:    fabs s4, s4
 ; CHECK-NEXT:    fcvt d4, s4
 ; CHECK-NEXT:    fdiv d3, d3, d4
 ; CHECK-NEXT:    fcmp d3, d2
 ; CHECK-NEXT:    cset w9, lt
-; CHECK-NEXT:    subs x8, x8, #1
 ; CHECK-NEXT:    ubfiz x9, x9, #4, #32
 ; CHECK-NEXT:    ldr s3, [x4, x9]
 ; CHECK-NEXT:    fcvtzs w9, s3
 ; CHECK-NEXT:    str w9, [x2], #4
+; CHECK-NEXT:    subs x8, x8, #1
 ; CHECK-NEXT:    b.ne .LBB0_2
 ; CHECK-NEXT:  .LBB0_3: // %for.cond.cleanup
 ; CHECK-NEXT:    mov w0, wzr
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-vector-zext.ll b/llvm/test/CodeGen/AArch64/sve-fixed-vector-zext.ll
index 1ab2589bccd5b..4870b828f2f80 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-vector-zext.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-vector-zext.ll
@@ -16,12 +16,12 @@ define internal i32 @test(ptr nocapture readonly %p1, i32 %i1, ptr nocapture rea
 ; SVE256-NEXT:  uaddv   d0, p1, z0.s
 
 ; NEON-LABEL: test:
-; NEON:       ldr q0, [x0, w9, sxtw]
-; NEON:       ldr q1, [x2, w10, sxtw]
-; NEON:       usubl2  v2.8h, v0.16b, v1.16b
-; NEON-NEXT:  usubl   v0.8h, v0.8b, v1.8b
-; NEON:       saddl2  v1.4s, v0.8h, v2.8h
-; NEON-NEXT:  saddl   v0.4s, v0.4h, v2.4h
+; NEON:       ldr {{q[0-9]}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw]
+; NEON:       ldr {{q[0-9]}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw]
+; NEON:       usubl2  v2.8h, {{v[0-9]}}.16b, {{v[0-9]}}.16b
+; NEON-NEXT:  usubl   v0.8h, {{v[0-9]}}.8b, {{v[0-9]}}.8b
+; NEON:       saddl2  v1.4s, {{v[0-9]}}.8h, {{v[0-9]}}.8h
+; NEON-NEXT:  saddl   v0.4s, {{v[0-9]}}.4h, {{v[0-9]}}.4h
 ; NEON-NEXT:  add v0.4s, v0.4s, v1.4s
 ; NEON-NEXT:  addv    s0, v0.4s