[llvm] [AArch64][GlobalISel] Vector Constant Materialization (PR #67149)

Mark Harley via llvm-commits llvm-commits at lists.llvm.org
Mon Sep 25 02:13:48 PDT 2023


https://github.com/MarkAHarley updated https://github.com/llvm/llvm-project/pull/67149

>From db28871e27bbb38032d32c66d88e5023ede2fe84 Mon Sep 17 00:00:00 2001
From: Mark Harley <mark.harley at arm.com>
Date: Thu, 21 Sep 2023 11:21:06 +0100
Subject: [PATCH 1/2] [AArch64][GlobalISel] Vector Constant Materialization
 Vector constants are always lowered via constant pool loads. This patch
 selects MOVI/MVNI in more cases where appropriate.

---
 .../GISel/AArch64InstructionSelector.cpp      |  219 ++++
 .../AArch64/GlobalISel/combine-udiv.ll        |   85 +-
 ...legalizer-lowering-build-vector-to-dup.mir |  143 ++-
 .../CodeGen/AArch64/GlobalISel/select-dup.mir |  204 +--
 .../AArch64/GlobalISel/select-vector-icmp.mir | 1089 +++++++++--------
 .../GlobalISel/select-vector-shift.mir        |  251 ++--
 .../CodeGen/AArch64/arm64-subvector-extend.ll |   11 +-
 llvm/test/CodeGen/AArch64/bool-ext-inc.ll     |    5 +-
 llvm/test/CodeGen/AArch64/mul_pow2.ll         |    7 +-
 .../AArch64/neon-bitwise-instructions.ll      |  153 +--
 .../AArch64/neon-compare-instructions.ll      |  311 ++---
 llvm/test/CodeGen/AArch64/neon-mov.ll         |   94 +-
 llvm/test/CodeGen/AArch64/zext.ll             |  140 +--
 13 files changed, 1422 insertions(+), 1290 deletions(-)

diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index 141821afa9c5915..7462965e5243e48 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -170,6 +170,21 @@ class AArch64InstructionSelector : public InstructionSelector {
                                    MachineIRBuilder &MIRBuilder,
                                    MachineRegisterInfo &MRI);
 
+  MachineInstr *tryAdvSIMDModImm8(Register Dst, unsigned DstSize, APInt Bits,
+                                  MachineIRBuilder &MIRBuilder);
+
+  MachineInstr *tryAdvSIMDModImm16(Register Dst, unsigned DstSize, APInt Bits,
+                                   MachineIRBuilder &MIRBuilder, bool inv);
+
+  MachineInstr *tryAdvSIMDModImm32(Register Dst, unsigned DstSize, APInt Bits,
+                                   MachineIRBuilder &MIRBuilder, bool inv);
+  MachineInstr *tryAdvSIMDModImm64(Register Dst, unsigned DstSize, APInt Bits,
+                                   MachineIRBuilder &MIRBuilder);
+  MachineInstr *tryAdvSIMDModImm321s(Register Dst, unsigned DstSize, APInt Bits,
+                                     MachineIRBuilder &MIRBuilder, bool inv);
+  MachineInstr *tryAdvSIMDModImmFP(Register Dst, unsigned DstSize, APInt Bits,
+                                   MachineIRBuilder &MIRBuilder);
+
   bool selectInsertElt(MachineInstr &I, MachineRegisterInfo &MRI);
   bool tryOptConstantBuildVec(MachineInstr &MI, LLT DstTy,
                               MachineRegisterInfo &MRI);
@@ -5433,6 +5448,188 @@ bool AArch64InstructionSelector::selectInsertElt(MachineInstr &I,
   return true;
 }
 
+MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm8(
+    Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &builder) {
+  unsigned int Op;
+  if (DstSize == 128) {
+    if (Bits.getHiBits(64) != Bits.getLoBits(64))
+      return nullptr;
+    Op = AArch64::MOVIv16b_ns;
+  } else {
+    Op = AArch64::MOVIv8b_ns;
+  }
+
+  uint64_t val = Bits.zextOrTrunc(64).getZExtValue();
+
+  if (AArch64_AM::isAdvSIMDModImmType9(val)) {
+    val = AArch64_AM::encodeAdvSIMDModImmType9(val);
+    auto Mov = builder.buildInstr(Op, {Dst}, {}).addImm(val);
+    constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI);
+    return &*Mov;
+  }
+  return nullptr;
+}
+
+MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm16(
+    Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &builder,
+    bool inv) {
+
+  unsigned int Op;
+  if (DstSize == 128) {
+    if (Bits.getHiBits(64) != Bits.getLoBits(64))
+      return nullptr;
+    Op = inv ? AArch64::MVNIv8i16 : AArch64::MOVIv8i16;
+  } else {
+    Op = inv ? AArch64::MVNIv4i16 : AArch64::MOVIv4i16;
+  }
+
+  uint64_t val = Bits.zextOrTrunc(64).getZExtValue();
+
+  bool isAdvSIMDModImm = false;
+  uint64_t Shift;
+
+  if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType5(val))) {
+    val = AArch64_AM::encodeAdvSIMDModImmType5(val);
+    Shift = 0;
+  } else if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType6(val))) {
+    val = AArch64_AM::encodeAdvSIMDModImmType6(val);
+    Shift = 8;
+  }
+
+  if (isAdvSIMDModImm) {
+    auto Mov = builder.buildInstr(Op, {Dst}, {}).addImm(val).addImm(Shift);
+    constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI);
+    return &*Mov;
+  }
+  return nullptr;
+}
+
+MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm32(
+    Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &builder,
+    bool inv) {
+
+  unsigned int Op;
+  if (DstSize == 128) {
+    if (Bits.getHiBits(64) != Bits.getLoBits(64))
+      return nullptr;
+    Op = inv ? AArch64::MVNIv4i32 : AArch64::MOVIv4i32;
+  } else {
+    Op = inv ? AArch64::MVNIv2i32 : AArch64::MOVIv2i32;
+  }
+
+  uint64_t val = Bits.zextOrTrunc(64).getZExtValue();
+  bool isAdvSIMDModImm = false;
+  uint64_t Shift;
+
+  if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType1(val))) {
+    val = AArch64_AM::encodeAdvSIMDModImmType1(val);
+    Shift = 0;
+  } else if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType2(val))) {
+    val = AArch64_AM::encodeAdvSIMDModImmType2(val);
+    Shift = 8;
+  } else if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType3(val))) {
+    val = AArch64_AM::encodeAdvSIMDModImmType3(val);
+    Shift = 16;
+  } else if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType4(val))) {
+    val = AArch64_AM::encodeAdvSIMDModImmType4(val);
+    Shift = 24;
+  }
+
+  if (isAdvSIMDModImm) {
+    auto Mov = builder.buildInstr(Op, {Dst}, {}).addImm(val).addImm(Shift);
+    constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI);
+    return &*Mov;
+  }
+  return nullptr;
+}
+
+MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm64(
+    Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &builder) {
+
+  unsigned int Op;
+  if (DstSize == 128) {
+    if (Bits.getHiBits(64) != Bits.getLoBits(64))
+      return nullptr;
+    Op = AArch64::MOVIv2d_ns;
+  } else {
+    Op = AArch64::MOVID;
+  }
+
+  uint64_t val = Bits.zextOrTrunc(64).getZExtValue();
+  if (AArch64_AM::isAdvSIMDModImmType10(val)) {
+    val = AArch64_AM::encodeAdvSIMDModImmType10(val);
+    auto Mov = builder.buildInstr(Op, {Dst}, {}).addImm(val);
+    constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI);
+    return &*Mov;
+  }
+  return nullptr;
+}
+
+MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm321s(
+    Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &builder,
+    bool inv) {
+
+  unsigned int Op;
+  if (DstSize == 128) {
+    if (Bits.getHiBits(64) != Bits.getLoBits(64))
+      return nullptr;
+    Op = inv ? AArch64::MVNIv4s_msl : AArch64::MOVIv4s_msl;
+  } else {
+    Op = inv ? AArch64::MVNIv2s_msl : AArch64::MOVIv2s_msl;
+  }
+
+  uint64_t val = Bits.zextOrTrunc(64).getZExtValue();
+  bool isAdvSIMDModImm = false;
+  uint64_t Shift;
+
+  if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType7(val))) {
+    val = AArch64_AM::encodeAdvSIMDModImmType7(val);
+    Shift = 264;
+  } else if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType8(val))) {
+    val = AArch64_AM::encodeAdvSIMDModImmType8(val);
+    Shift = 272;
+  }
+  if (isAdvSIMDModImm) {
+    auto Mov = builder.buildInstr(Op, {Dst}, {}).addImm(val).addImm(Shift);
+    constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI);
+    return &*Mov;
+  }
+  return nullptr;
+}
+
+MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImmFP(
+    Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &builder) {
+
+  unsigned int Op;
+  bool isWide = false;
+  if (DstSize == 128) {
+    if (Bits.getHiBits(64) != Bits.getLoBits(64))
+      return nullptr;
+    // Need to deal with 4f32
+    Op = AArch64::FMOVv2f64_ns;
+    isWide = true;
+  } else {
+    Op = AArch64::FMOVv2f32_ns;
+  }
+
+  uint64_t val = Bits.zextOrTrunc(64).getZExtValue();
+  bool isAdvSIMDModImm = false;
+
+  if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType11(val))) {
+    val = AArch64_AM::encodeAdvSIMDModImmType7(val);
+  } else if (isWide &&
+             (isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType12(val))) {
+    val = AArch64_AM::encodeAdvSIMDModImmType12(val);
+  }
+
+  if (isAdvSIMDModImm) {
+    auto Mov = builder.buildInstr(Op, {Dst}, {}).addImm(val);
+    constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI);
+    return &*Mov;
+  }
+  return nullptr;
+}
+
 MachineInstr *
 AArch64InstructionSelector::emitConstantVector(Register Dst, Constant *CV,
                                                MachineIRBuilder &MIRBuilder,
@@ -5459,6 +5656,28 @@ AArch64InstructionSelector::emitConstantVector(Register Dst, Constant *CV,
     }
   }
 
+  if (CV->getSplatValue()) {
+    APInt DefBits = APInt::getSplat(DstSize, CV->getUniqueInteger());
+    MachineInstr *NewOp;
+    bool inv = false;
+    if ((NewOp = tryAdvSIMDModImm64(Dst, DstSize, DefBits, MIRBuilder)) ||
+        (NewOp = tryAdvSIMDModImm32(Dst, DstSize, DefBits, MIRBuilder, inv)) ||
+        (NewOp =
+             tryAdvSIMDModImm321s(Dst, DstSize, DefBits, MIRBuilder, inv)) ||
+        (NewOp = tryAdvSIMDModImm16(Dst, DstSize, DefBits, MIRBuilder, inv)) ||
+        (NewOp = tryAdvSIMDModImm8(Dst, DstSize, DefBits, MIRBuilder)) ||
+        (NewOp = tryAdvSIMDModImmFP(Dst, DstSize, DefBits, MIRBuilder)))
+      return NewOp;
+
+    DefBits = ~DefBits;
+    inv = true;
+    if ((NewOp = tryAdvSIMDModImm32(Dst, DstSize, DefBits, MIRBuilder, inv)) ||
+        (NewOp =
+             tryAdvSIMDModImm321s(Dst, DstSize, DefBits, MIRBuilder, inv)) ||
+        (NewOp = tryAdvSIMDModImm16(Dst, DstSize, DefBits, MIRBuilder, inv)))
+      return NewOp;
+  }
+
   auto *CPLoad = emitLoadFromConstantPool(CV, MIRBuilder);
   if (!CPLoad) {
     LLVM_DEBUG(dbgs() << "Could not generate cp load for constant vector!");
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll b/llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll
index 56d851c52bb67a5..8aea944b55c2d0b 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll
@@ -56,31 +56,30 @@ define <8 x i16> @combine_vec_udiv_nonuniform(<8 x i16> %x) {
 ;
 ; GISEL-LABEL: combine_vec_udiv_nonuniform:
 ; GISEL:       // %bb.0:
-; GISEL-NEXT:    adrp x8, .LCPI1_4
-; GISEL-NEXT:    adrp x9, .LCPI1_0
-; GISEL-NEXT:    ldr q1, [x8, :lo12:.LCPI1_4]
 ; GISEL-NEXT:    adrp x8, .LCPI1_3
-; GISEL-NEXT:    ldr q2, [x8, :lo12:.LCPI1_3]
+; GISEL-NEXT:    ldr q1, [x8, :lo12:.LCPI1_3]
 ; GISEL-NEXT:    adrp x8, .LCPI1_2
+; GISEL-NEXT:    ldr q2, [x8, :lo12:.LCPI1_2]
+; GISEL-NEXT:    adrp x8, .LCPI1_1
 ; GISEL-NEXT:    neg v1.8h, v1.8h
 ; GISEL-NEXT:    ushl v1.8h, v0.8h, v1.8h
 ; GISEL-NEXT:    umull2 v3.4s, v1.8h, v2.8h
 ; GISEL-NEXT:    umull v1.4s, v1.4h, v2.4h
 ; GISEL-NEXT:    uzp2 v1.8h, v1.8h, v3.8h
-; GISEL-NEXT:    ldr q3, [x8, :lo12:.LCPI1_2]
-; GISEL-NEXT:    adrp x8, .LCPI1_1
+; GISEL-NEXT:    ldr q3, [x8, :lo12:.LCPI1_1]
+; GISEL-NEXT:    adrp x8, .LCPI1_0
 ; GISEL-NEXT:    sub v2.8h, v0.8h, v1.8h
 ; GISEL-NEXT:    umull2 v4.4s, v2.8h, v3.8h
 ; GISEL-NEXT:    umull v2.4s, v2.4h, v3.4h
-; GISEL-NEXT:    ldr q3, [x8, :lo12:.LCPI1_1]
-; GISEL-NEXT:    adrp x8, .LCPI1_5
+; GISEL-NEXT:    ldr q3, [x8, :lo12:.LCPI1_0]
+; GISEL-NEXT:    adrp x8, .LCPI1_4
 ; GISEL-NEXT:    uzp2 v2.8h, v2.8h, v4.8h
-; GISEL-NEXT:    ldr q4, [x9, :lo12:.LCPI1_0]
+; GISEL-NEXT:    ldr q4, [x8, :lo12:.LCPI1_4]
 ; GISEL-NEXT:    add v1.8h, v2.8h, v1.8h
 ; GISEL-NEXT:    neg v2.8h, v3.8h
-; GISEL-NEXT:    ldr q3, [x8, :lo12:.LCPI1_5]
+; GISEL-NEXT:    movi v3.8h, #1
 ; GISEL-NEXT:    ushl v1.8h, v1.8h, v2.8h
-; GISEL-NEXT:    cmeq v2.8h, v3.8h, v4.8h
+; GISEL-NEXT:    cmeq v2.8h, v4.8h, v3.8h
 ; GISEL-NEXT:    bif v0.16b, v1.16b, v2.16b
 ; GISEL-NEXT:    ret
   %1 = udiv <8 x i16> %x, <i16 23, i16 34, i16 -23, i16 56, i16 128, i16 -1, i16 -256, i16 -32768>
@@ -105,24 +104,23 @@ define <8 x i16> @combine_vec_udiv_nonuniform2(<8 x i16> %x) {
 ;
 ; GISEL-LABEL: combine_vec_udiv_nonuniform2:
 ; GISEL:       // %bb.0:
-; GISEL-NEXT:    adrp x8, .LCPI2_3
-; GISEL-NEXT:    adrp x9, .LCPI2_0
-; GISEL-NEXT:    ldr q1, [x8, :lo12:.LCPI2_3]
 ; GISEL-NEXT:    adrp x8, .LCPI2_2
-; GISEL-NEXT:    ldr q4, [x9, :lo12:.LCPI2_0]
-; GISEL-NEXT:    ldr q2, [x8, :lo12:.LCPI2_2]
+; GISEL-NEXT:    ldr q1, [x8, :lo12:.LCPI2_2]
 ; GISEL-NEXT:    adrp x8, .LCPI2_1
+; GISEL-NEXT:    ldr q2, [x8, :lo12:.LCPI2_1]
+; GISEL-NEXT:    adrp x8, .LCPI2_0
 ; GISEL-NEXT:    neg v1.8h, v1.8h
 ; GISEL-NEXT:    ushl v1.8h, v0.8h, v1.8h
 ; GISEL-NEXT:    umull2 v3.4s, v1.8h, v2.8h
 ; GISEL-NEXT:    umull v1.4s, v1.4h, v2.4h
-; GISEL-NEXT:    ldr q2, [x8, :lo12:.LCPI2_1]
-; GISEL-NEXT:    adrp x8, .LCPI2_4
+; GISEL-NEXT:    ldr q2, [x8, :lo12:.LCPI2_0]
+; GISEL-NEXT:    adrp x8, .LCPI2_3
 ; GISEL-NEXT:    neg v2.8h, v2.8h
+; GISEL-NEXT:    ldr q4, [x8, :lo12:.LCPI2_3]
 ; GISEL-NEXT:    uzp2 v1.8h, v1.8h, v3.8h
-; GISEL-NEXT:    ldr q3, [x8, :lo12:.LCPI2_4]
+; GISEL-NEXT:    movi v3.8h, #1
 ; GISEL-NEXT:    ushl v1.8h, v1.8h, v2.8h
-; GISEL-NEXT:    cmeq v2.8h, v3.8h, v4.8h
+; GISEL-NEXT:    cmeq v2.8h, v4.8h, v3.8h
 ; GISEL-NEXT:    bif v0.16b, v1.16b, v2.16b
 ; GISEL-NEXT:    ret
   %1 = udiv <8 x i16> %x, <i16 -34, i16 35, i16 36, i16 -37, i16 38, i16 -39, i16 40, i16 -41>
@@ -146,22 +144,21 @@ define <8 x i16> @combine_vec_udiv_nonuniform3(<8 x i16> %x) {
 ;
 ; GISEL-LABEL: combine_vec_udiv_nonuniform3:
 ; GISEL:       // %bb.0:
-; GISEL-NEXT:    adrp x8, .LCPI3_2
-; GISEL-NEXT:    adrp x9, .LCPI3_0
-; GISEL-NEXT:    ldr q1, [x8, :lo12:.LCPI3_2]
 ; GISEL-NEXT:    adrp x8, .LCPI3_1
-; GISEL-NEXT:    ldr q4, [x9, :lo12:.LCPI3_0]
+; GISEL-NEXT:    movi v3.8h, #1
+; GISEL-NEXT:    ldr q1, [x8, :lo12:.LCPI3_1]
+; GISEL-NEXT:    adrp x8, .LCPI3_0
 ; GISEL-NEXT:    umull2 v2.4s, v0.8h, v1.8h
 ; GISEL-NEXT:    umull v1.4s, v0.4h, v1.4h
 ; GISEL-NEXT:    uzp2 v1.8h, v1.8h, v2.8h
 ; GISEL-NEXT:    sub v2.8h, v0.8h, v1.8h
 ; GISEL-NEXT:    usra v1.8h, v2.8h, #1
-; GISEL-NEXT:    ldr q2, [x8, :lo12:.LCPI3_1]
-; GISEL-NEXT:    adrp x8, .LCPI3_3
-; GISEL-NEXT:    ldr q3, [x8, :lo12:.LCPI3_3]
+; GISEL-NEXT:    ldr q2, [x8, :lo12:.LCPI3_0]
+; GISEL-NEXT:    adrp x8, .LCPI3_2
+; GISEL-NEXT:    ldr q4, [x8, :lo12:.LCPI3_2]
 ; GISEL-NEXT:    neg v2.8h, v2.8h
 ; GISEL-NEXT:    ushl v1.8h, v1.8h, v2.8h
-; GISEL-NEXT:    cmeq v2.8h, v3.8h, v4.8h
+; GISEL-NEXT:    cmeq v2.8h, v4.8h, v3.8h
 ; GISEL-NEXT:    bif v0.16b, v1.16b, v2.16b
 ; GISEL-NEXT:    ret
   %1 = udiv <8 x i16> %x, <i16 7, i16 23, i16 25, i16 27, i16 31, i16 47, i16 63, i16 127>
@@ -187,20 +184,19 @@ define <16 x i8> @combine_vec_udiv_nonuniform4(<16 x i8> %x) {
 ;
 ; GISEL-LABEL: combine_vec_udiv_nonuniform4:
 ; GISEL:       // %bb.0:
-; GISEL-NEXT:    adrp x8, .LCPI4_2
-; GISEL-NEXT:    adrp x9, .LCPI4_0
-; GISEL-NEXT:    ldr q1, [x8, :lo12:.LCPI4_2]
 ; GISEL-NEXT:    adrp x8, .LCPI4_1
-; GISEL-NEXT:    ldr q4, [x9, :lo12:.LCPI4_0]
-; GISEL-NEXT:    ldr q3, [x8, :lo12:.LCPI4_1]
-; GISEL-NEXT:    adrp x8, .LCPI4_3
+; GISEL-NEXT:    ldr q1, [x8, :lo12:.LCPI4_1]
+; GISEL-NEXT:    adrp x8, .LCPI4_0
+; GISEL-NEXT:    ldr q3, [x8, :lo12:.LCPI4_0]
+; GISEL-NEXT:    adrp x8, .LCPI4_2
 ; GISEL-NEXT:    umull2 v2.8h, v0.16b, v1.16b
 ; GISEL-NEXT:    umull v1.8h, v0.8b, v1.8b
+; GISEL-NEXT:    ldr q4, [x8, :lo12:.LCPI4_2]
 ; GISEL-NEXT:    uzp2 v1.16b, v1.16b, v2.16b
 ; GISEL-NEXT:    neg v2.16b, v3.16b
-; GISEL-NEXT:    ldr q3, [x8, :lo12:.LCPI4_3]
+; GISEL-NEXT:    movi v3.16b, #1
 ; GISEL-NEXT:    ushl v1.16b, v1.16b, v2.16b
-; GISEL-NEXT:    cmeq v2.16b, v3.16b, v4.16b
+; GISEL-NEXT:    cmeq v2.16b, v4.16b, v3.16b
 ; GISEL-NEXT:    bif v0.16b, v1.16b, v2.16b
 ; GISEL-NEXT:    ret
   %div = udiv <16 x i8> %x, <i8 -64, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
@@ -236,27 +232,26 @@ define <8 x i16> @pr38477(<8 x i16> %a0) {
 ;
 ; GISEL-LABEL: pr38477:
 ; GISEL:       // %bb.0:
-; GISEL-NEXT:    adrp x8, .LCPI5_3
-; GISEL-NEXT:    adrp x9, .LCPI5_0
-; GISEL-NEXT:    ldr q1, [x8, :lo12:.LCPI5_3]
 ; GISEL-NEXT:    adrp x8, .LCPI5_2
-; GISEL-NEXT:    ldr q3, [x8, :lo12:.LCPI5_2]
+; GISEL-NEXT:    ldr q1, [x8, :lo12:.LCPI5_2]
 ; GISEL-NEXT:    adrp x8, .LCPI5_1
+; GISEL-NEXT:    ldr q3, [x8, :lo12:.LCPI5_1]
+; GISEL-NEXT:    adrp x8, .LCPI5_0
 ; GISEL-NEXT:    umull2 v2.4s, v0.8h, v1.8h
 ; GISEL-NEXT:    umull v1.4s, v0.4h, v1.4h
 ; GISEL-NEXT:    uzp2 v1.8h, v1.8h, v2.8h
 ; GISEL-NEXT:    sub v2.8h, v0.8h, v1.8h
 ; GISEL-NEXT:    umull2 v4.4s, v2.8h, v3.8h
 ; GISEL-NEXT:    umull v2.4s, v2.4h, v3.4h
-; GISEL-NEXT:    ldr q3, [x8, :lo12:.LCPI5_1]
-; GISEL-NEXT:    adrp x8, .LCPI5_4
+; GISEL-NEXT:    ldr q3, [x8, :lo12:.LCPI5_0]
+; GISEL-NEXT:    adrp x8, .LCPI5_3
 ; GISEL-NEXT:    uzp2 v2.8h, v2.8h, v4.8h
-; GISEL-NEXT:    ldr q4, [x9, :lo12:.LCPI5_0]
+; GISEL-NEXT:    ldr q4, [x8, :lo12:.LCPI5_3]
 ; GISEL-NEXT:    add v1.8h, v2.8h, v1.8h
 ; GISEL-NEXT:    neg v2.8h, v3.8h
-; GISEL-NEXT:    ldr q3, [x8, :lo12:.LCPI5_4]
+; GISEL-NEXT:    movi v3.8h, #1
 ; GISEL-NEXT:    ushl v1.8h, v1.8h, v2.8h
-; GISEL-NEXT:    cmeq v2.8h, v3.8h, v4.8h
+; GISEL-NEXT:    cmeq v2.8h, v4.8h, v3.8h
 ; GISEL-NEXT:    bif v0.16b, v1.16b, v2.16b
 ; GISEL-NEXT:    ret
   %1 = udiv <8 x i16> %a0, <i16 1, i16 119, i16 73, i16 -111, i16 -3, i16 118, i16 32, i16 31>
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-build-vector-to-dup.mir b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-build-vector-to-dup.mir
index db17c66bde966c4..70867c2ea2842a8 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-build-vector-to-dup.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-build-vector-to-dup.mir
@@ -11,16 +11,19 @@ body:             |
     liveins: $d0
     ; LOWER-LABEL: name: same_reg
     ; LOWER: liveins: $d0
-    ; LOWER: %r:_(s8) = G_IMPLICIT_DEF
-    ; LOWER: %build_vector:_(<8 x s8>) = G_DUP %r(s8)
-    ; LOWER: $d0 = COPY %build_vector(<8 x s8>)
-    ; LOWER: RET_ReallyLR implicit $d0
+    ; LOWER-NEXT: {{  $}}
+    ; LOWER-NEXT: %r:_(s8) = G_IMPLICIT_DEF
+    ; LOWER-NEXT: %build_vector:_(<8 x s8>) = G_DUP %r(s8)
+    ; LOWER-NEXT: $d0 = COPY %build_vector(<8 x s8>)
+    ; LOWER-NEXT: RET_ReallyLR implicit $d0
+    ;
     ; SELECT-LABEL: name: same_reg
     ; SELECT: liveins: $d0
-    ; SELECT: %r:gpr32 = IMPLICIT_DEF
-    ; SELECT: %build_vector:fpr64 = DUPv8i8gpr %r
-    ; SELECT: $d0 = COPY %build_vector
-    ; SELECT: RET_ReallyLR implicit $d0
+    ; SELECT-NEXT: {{  $}}
+    ; SELECT-NEXT: %r:gpr32 = IMPLICIT_DEF
+    ; SELECT-NEXT: %build_vector:fpr64 = DUPv8i8gpr %r
+    ; SELECT-NEXT: $d0 = COPY %build_vector
+    ; SELECT-NEXT: RET_ReallyLR implicit $d0
     %r:_(s8) = G_IMPLICIT_DEF
     %build_vector:_(<8 x s8>) = G_BUILD_VECTOR %r, %r, %r, %r, %r, %r, %r, %r
     $d0 = COPY %build_vector(<8 x s8>)
@@ -36,21 +39,24 @@ body:             |
     liveins: $d0, $w0, $w1
     ; LOWER-LABEL: name: dont_combine_different_reg
     ; LOWER: liveins: $d0, $w0, $w1
-    ; LOWER: %r:_(s32) = COPY $w0
-    ; LOWER: %q:_(s32) = COPY $w1
-    ; LOWER: %build_vector:_(<2 x s32>) = G_BUILD_VECTOR %r(s32), %q(s32)
-    ; LOWER: $d0 = COPY %build_vector(<2 x s32>)
-    ; LOWER: RET_ReallyLR implicit $d0
+    ; LOWER-NEXT: {{  $}}
+    ; LOWER-NEXT: %r:_(s32) = COPY $w0
+    ; LOWER-NEXT: %q:_(s32) = COPY $w1
+    ; LOWER-NEXT: %build_vector:_(<2 x s32>) = G_BUILD_VECTOR %r(s32), %q(s32)
+    ; LOWER-NEXT: $d0 = COPY %build_vector(<2 x s32>)
+    ; LOWER-NEXT: RET_ReallyLR implicit $d0
+    ;
     ; SELECT-LABEL: name: dont_combine_different_reg
     ; SELECT: liveins: $d0, $w0, $w1
-    ; SELECT: %r:gpr32all = COPY $w0
-    ; SELECT: %q:gpr32 = COPY $w1
-    ; SELECT: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF
-    ; SELECT: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], %r, %subreg.ssub
-    ; SELECT: [[INSvi32gpr:%[0-9]+]]:fpr128 = INSvi32gpr [[INSERT_SUBREG]], 1, %q
-    ; SELECT: %build_vector:fpr64 = COPY [[INSvi32gpr]].dsub
-    ; SELECT: $d0 = COPY %build_vector
-    ; SELECT: RET_ReallyLR implicit $d0
+    ; SELECT-NEXT: {{  $}}
+    ; SELECT-NEXT: %r:gpr32all = COPY $w0
+    ; SELECT-NEXT: %q:gpr32 = COPY $w1
+    ; SELECT-NEXT: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF
+    ; SELECT-NEXT: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], %r, %subreg.ssub
+    ; SELECT-NEXT: [[INSvi32gpr:%[0-9]+]]:fpr128 = INSvi32gpr [[INSERT_SUBREG]], 1, %q
+    ; SELECT-NEXT: %build_vector:fpr64 = COPY [[INSvi32gpr]].dsub
+    ; SELECT-NEXT: $d0 = COPY %build_vector
+    ; SELECT-NEXT: RET_ReallyLR implicit $d0
     %r:_(s32) = COPY $w0
     %q:_(s32) = COPY $w1
     %build_vector:_(<2 x s32>) = G_BUILD_VECTOR %r, %q
@@ -65,21 +71,22 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     liveins: $d0
-    ; Don't combine with 0. We want to avoid blocking immAllZerosV selection
-    ; patterns.
 
     ; LOWER-LABEL: name: dont_combine_zero
     ; LOWER: liveins: $d0
-    ; LOWER: %r:_(s8) = G_CONSTANT i8 0
-    ; LOWER: %build_vector:_(<8 x s8>) = G_BUILD_VECTOR %r(s8), %r(s8), %r(s8), %r(s8), %r(s8), %r(s8), %r(s8), %r(s8)
-    ; LOWER: $d0 = COPY %build_vector(<8 x s8>)
-    ; LOWER: RET_ReallyLR implicit $d0
+    ; LOWER-NEXT: {{  $}}
+    ; LOWER-NEXT: %r:_(s8) = G_CONSTANT i8 0
+    ; LOWER-NEXT: %build_vector:_(<8 x s8>) = G_BUILD_VECTOR %r(s8), %r(s8), %r(s8), %r(s8), %r(s8), %r(s8), %r(s8), %r(s8)
+    ; LOWER-NEXT: $d0 = COPY %build_vector(<8 x s8>)
+    ; LOWER-NEXT: RET_ReallyLR implicit $d0
+    ;
     ; SELECT-LABEL: name: dont_combine_zero
     ; SELECT: liveins: $d0
-    ; SELECT: [[MOVIv2d_ns:%[0-9]+]]:fpr128 = MOVIv2d_ns 0
-    ; SELECT: %build_vector:fpr64 = COPY [[MOVIv2d_ns]].dsub
-    ; SELECT: $d0 = COPY %build_vector
-    ; SELECT: RET_ReallyLR implicit $d0
+    ; SELECT-NEXT: {{  $}}
+    ; SELECT-NEXT: [[MOVIv2d_ns:%[0-9]+]]:fpr128 = MOVIv2d_ns 0
+    ; SELECT-NEXT: %build_vector:fpr64 = COPY [[MOVIv2d_ns]].dsub
+    ; SELECT-NEXT: $d0 = COPY %build_vector
+    ; SELECT-NEXT: RET_ReallyLR implicit $d0
     %r:_(s8) = G_CONSTANT i8 0
     %build_vector:_(<8 x s8>) = G_BUILD_VECTOR %r, %r, %r, %r, %r, %r, %r, %r
     $d0 = COPY %build_vector(<8 x s8>)
@@ -93,21 +100,21 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     liveins: $d0
-    ; Don't combine with -1. We want to avoid blocking immAllOnesV selection
-    ; patterns.
 
     ; LOWER-LABEL: name: dont_combine_all_ones
     ; LOWER: liveins: $d0
-    ; LOWER: %r:_(s8) = G_CONSTANT i8 -1
-    ; LOWER: %build_vector:_(<8 x s8>) = G_BUILD_VECTOR %r(s8), %r(s8), %r(s8), %r(s8), %r(s8), %r(s8), %r(s8), %r(s8)
-    ; LOWER: $d0 = COPY %build_vector(<8 x s8>)
-    ; LOWER: RET_ReallyLR implicit $d0
+    ; LOWER-NEXT: {{  $}}
+    ; LOWER-NEXT: %r:_(s8) = G_CONSTANT i8 -1
+    ; LOWER-NEXT: %build_vector:_(<8 x s8>) = G_BUILD_VECTOR %r(s8), %r(s8), %r(s8), %r(s8), %r(s8), %r(s8), %r(s8), %r(s8)
+    ; LOWER-NEXT: $d0 = COPY %build_vector(<8 x s8>)
+    ; LOWER-NEXT: RET_ReallyLR implicit $d0
+    ;
     ; SELECT-LABEL: name: dont_combine_all_ones
     ; SELECT: liveins: $d0
-    ; SELECT: [[ADRP:%[0-9]+]]:gpr64common = ADRP target-flags(aarch64-page) %const.0
-    ; SELECT: [[LDRDui:%[0-9]+]]:fpr64 = LDRDui [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) %const.0
-    ; SELECT: $d0 = COPY [[LDRDui]]
-    ; SELECT: RET_ReallyLR implicit $d0
+    ; SELECT-NEXT: {{  $}}
+    ; SELECT-NEXT: %build_vector:fpr64 = MOVID 255
+    ; SELECT-NEXT: $d0 = COPY %build_vector
+    ; SELECT-NEXT: RET_ReallyLR implicit $d0
     %r:_(s8) = G_CONSTANT i8 -1
     %build_vector:_(<8 x s8>) = G_BUILD_VECTOR %r, %r, %r, %r, %r, %r, %r, %r
     $d0 = COPY %build_vector(<8 x s8>)
@@ -121,22 +128,24 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     liveins: $d0
-    ; We should get a NEGv2i32 here.
 
     ; LOWER-LABEL: name: all_zeros_pat_example
     ; LOWER: liveins: $d0
-    ; LOWER: %v:_(<2 x s32>) = COPY $d0
-    ; LOWER: %cst:_(s32) = G_CONSTANT i32 0
-    ; LOWER: %build_vector:_(<2 x s32>) = G_BUILD_VECTOR %cst(s32), %cst(s32)
-    ; LOWER: %sub:_(<2 x s32>) = G_SUB %build_vector, %v
-    ; LOWER: $d0 = COPY %sub(<2 x s32>)
-    ; LOWER: RET_ReallyLR implicit $d0
+    ; LOWER-NEXT: {{  $}}
+    ; LOWER-NEXT: %v:_(<2 x s32>) = COPY $d0
+    ; LOWER-NEXT: %cst:_(s32) = G_CONSTANT i32 0
+    ; LOWER-NEXT: %build_vector:_(<2 x s32>) = G_BUILD_VECTOR %cst(s32), %cst(s32)
+    ; LOWER-NEXT: %sub:_(<2 x s32>) = G_SUB %build_vector, %v
+    ; LOWER-NEXT: $d0 = COPY %sub(<2 x s32>)
+    ; LOWER-NEXT: RET_ReallyLR implicit $d0
+    ;
     ; SELECT-LABEL: name: all_zeros_pat_example
     ; SELECT: liveins: $d0
-    ; SELECT: %v:fpr64 = COPY $d0
-    ; SELECT: %sub:fpr64 = NEGv2i32 %v
-    ; SELECT: $d0 = COPY %sub
-    ; SELECT: RET_ReallyLR implicit $d0
+    ; SELECT-NEXT: {{  $}}
+    ; SELECT-NEXT: %v:fpr64 = COPY $d0
+    ; SELECT-NEXT: %sub:fpr64 = NEGv2i32 %v
+    ; SELECT-NEXT: $d0 = COPY %sub
+    ; SELECT-NEXT: RET_ReallyLR implicit $d0
       %v:_(<2 x s32>) = COPY $d0
     %cst:_(s32) = G_CONSTANT i32 0
     %build_vector:_(<2 x s32>) = G_BUILD_VECTOR %cst, %cst
@@ -152,25 +161,27 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     liveins: $d0, $d1
-    ; We should get a BICv8i8 here.
 
     ; LOWER-LABEL: name: all_ones_pat_example
     ; LOWER: liveins: $d0, $d1
-    ; LOWER: %v0:_(<2 x s32>) = COPY $d0
-    ; LOWER: %v1:_(<2 x s32>) = COPY $d1
-    ; LOWER: %cst:_(s32) = G_CONSTANT i32 -1
-    ; LOWER: %build_vector:_(<2 x s32>) = G_BUILD_VECTOR %cst(s32), %cst(s32)
-    ; LOWER: %xor:_(<2 x s32>) = G_XOR %v0, %build_vector
-    ; LOWER: %and:_(<2 x s32>) = G_AND %v1, %xor
-    ; LOWER: $d0 = COPY %and(<2 x s32>)
-    ; LOWER: RET_ReallyLR implicit $d0
+    ; LOWER-NEXT: {{  $}}
+    ; LOWER-NEXT: %v0:_(<2 x s32>) = COPY $d0
+    ; LOWER-NEXT: %v1:_(<2 x s32>) = COPY $d1
+    ; LOWER-NEXT: %cst:_(s32) = G_CONSTANT i32 -1
+    ; LOWER-NEXT: %build_vector:_(<2 x s32>) = G_BUILD_VECTOR %cst(s32), %cst(s32)
+    ; LOWER-NEXT: %xor:_(<2 x s32>) = G_XOR %v0, %build_vector
+    ; LOWER-NEXT: %and:_(<2 x s32>) = G_AND %v1, %xor
+    ; LOWER-NEXT: $d0 = COPY %and(<2 x s32>)
+    ; LOWER-NEXT: RET_ReallyLR implicit $d0
+    ;
     ; SELECT-LABEL: name: all_ones_pat_example
     ; SELECT: liveins: $d0, $d1
-    ; SELECT: %v0:fpr64 = COPY $d0
-    ; SELECT: %v1:fpr64 = COPY $d1
-    ; SELECT: %and:fpr64 = BICv8i8 %v1, %v0
-    ; SELECT: $d0 = COPY %and
-    ; SELECT: RET_ReallyLR implicit $d0
+    ; SELECT-NEXT: {{  $}}
+    ; SELECT-NEXT: %v0:fpr64 = COPY $d0
+    ; SELECT-NEXT: %v1:fpr64 = COPY $d1
+    ; SELECT-NEXT: %and:fpr64 = BICv8i8 %v1, %v0
+    ; SELECT-NEXT: $d0 = COPY %and
+    ; SELECT-NEXT: RET_ReallyLR implicit $d0
     %v0:_(<2 x s32>) = COPY $d0
     %v1:_(<2 x s32>) = COPY $d1
     %cst:_(s32) = G_CONSTANT i32 -1
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-dup.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-dup.mir
index 9dc0ba68fe04c99..809bdceb4aa25e1 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/select-dup.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-dup.mir
@@ -14,10 +14,11 @@ body:             |
     liveins: $w0
     ; CHECK-LABEL: name: DUPv4i32gpr
     ; CHECK: liveins: $w0
-    ; CHECK: %copy:gpr32 = COPY $w0
-    ; CHECK: %dup:fpr128 = DUPv4i32gpr %copy
-    ; CHECK: $q0 = COPY %dup
-    ; CHECK: RET_ReallyLR implicit $q0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %copy:gpr32 = COPY $w0
+    ; CHECK-NEXT: %dup:fpr128 = DUPv4i32gpr %copy
+    ; CHECK-NEXT: $q0 = COPY %dup
+    ; CHECK-NEXT: RET_ReallyLR implicit $q0
     %copy:gpr(s32) = COPY $w0
     %dup:fpr(<4 x s32>) = G_DUP %copy(s32)
     $q0 = COPY %dup(<4 x s32>)
@@ -35,10 +36,11 @@ body:             |
     liveins: $x0
     ; CHECK-LABEL: name: DUPv2i64gpr
     ; CHECK: liveins: $x0
-    ; CHECK: %copy:gpr64 = COPY $x0
-    ; CHECK: %dup:fpr128 = DUPv2i64gpr %copy
-    ; CHECK: $q0 = COPY %dup
-    ; CHECK: RET_ReallyLR implicit $q0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %copy:gpr64 = COPY $x0
+    ; CHECK-NEXT: %dup:fpr128 = DUPv2i64gpr %copy
+    ; CHECK-NEXT: $q0 = COPY %dup
+    ; CHECK-NEXT: RET_ReallyLR implicit $q0
     %copy:gpr(s64) = COPY $x0
     %dup:fpr(<2 x s64>) = G_DUP %copy(s64)
     $q0 = COPY %dup(<2 x s64>)
@@ -56,10 +58,11 @@ body:             |
     liveins: $w0
     ; CHECK-LABEL: name: DUPv2i32gpr
     ; CHECK: liveins: $w0
-    ; CHECK: %copy:gpr32 = COPY $w0
-    ; CHECK: %dup:fpr64 = DUPv2i32gpr %copy
-    ; CHECK: $d0 = COPY %dup
-    ; CHECK: RET_ReallyLR implicit $d0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %copy:gpr32 = COPY $w0
+    ; CHECK-NEXT: %dup:fpr64 = DUPv2i32gpr %copy
+    ; CHECK-NEXT: $d0 = COPY %dup
+    ; CHECK-NEXT: RET_ReallyLR implicit $d0
     %copy:gpr(s32) = COPY $w0
     %dup:fpr(<2 x s32>) = G_DUP %copy(s32)
     $d0 = COPY %dup(<2 x s32>)
@@ -78,12 +81,13 @@ body:             |
 
     ; CHECK-LABEL: name: DUPv4i32lane
     ; CHECK: liveins: $s0
-    ; CHECK: %copy:fpr32 = COPY $s0
-    ; CHECK: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF
-    ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], %copy, %subreg.ssub
-    ; CHECK: %dup:fpr128 = DUPv4i32lane [[INSERT_SUBREG]], 0
-    ; CHECK: $q0 = COPY %dup
-    ; CHECK: RET_ReallyLR implicit $q0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %copy:fpr32 = COPY $s0
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF
+    ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], %copy, %subreg.ssub
+    ; CHECK-NEXT: %dup:fpr128 = DUPv4i32lane [[INSERT_SUBREG]], 0
+    ; CHECK-NEXT: $q0 = COPY %dup
+    ; CHECK-NEXT: RET_ReallyLR implicit $q0
     %copy:fpr(s32) = COPY $s0
     %dup:fpr(<4 x s32>) = G_DUP %copy(s32)
     $q0 = COPY %dup(<4 x s32>)
@@ -101,12 +105,13 @@ body:             |
     liveins: $d0
     ; CHECK-LABEL: name: DUPv2i64lane
     ; CHECK: liveins: $d0
-    ; CHECK: %copy:fpr64 = COPY $d0
-    ; CHECK: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF
-    ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], %copy, %subreg.dsub
-    ; CHECK: %dup:fpr128 = DUPv2i64lane [[INSERT_SUBREG]], 0
-    ; CHECK: $q0 = COPY %dup
-    ; CHECK: RET_ReallyLR implicit $q0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %copy:fpr64 = COPY $d0
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF
+    ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], %copy, %subreg.dsub
+    ; CHECK-NEXT: %dup:fpr128 = DUPv2i64lane [[INSERT_SUBREG]], 0
+    ; CHECK-NEXT: $q0 = COPY %dup
+    ; CHECK-NEXT: RET_ReallyLR implicit $q0
     %copy:fpr(s64) = COPY $d0
     %dup:fpr(<2 x s64>) = G_DUP %copy(s64)
     $q0 = COPY %dup(<2 x s64>)
@@ -124,12 +129,13 @@ body:             |
     liveins: $s0
     ; CHECK-LABEL: name: DUPv2i32lane
     ; CHECK: liveins: $s0
-    ; CHECK: %copy:fpr32 = COPY $s0
-    ; CHECK: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF
-    ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], %copy, %subreg.ssub
-    ; CHECK: %dup:fpr64 = DUPv2i32lane [[INSERT_SUBREG]], 0
-    ; CHECK: $d0 = COPY %dup
-    ; CHECK: RET_ReallyLR implicit $d0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %copy:fpr32 = COPY $s0
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF
+    ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], %copy, %subreg.ssub
+    ; CHECK-NEXT: %dup:fpr64 = DUPv2i32lane [[INSERT_SUBREG]], 0
+    ; CHECK-NEXT: $d0 = COPY %dup
+    ; CHECK-NEXT: RET_ReallyLR implicit $d0
     %copy:fpr(s32) = COPY $s0
     %dup:fpr(<2 x s32>) = G_DUP %copy(s32)
     $d0 = COPY %dup(<2 x s32>)
@@ -148,12 +154,13 @@ body:             |
     liveins: $h0
     ; CHECK-LABEL: name: DUPv4i16lane
     ; CHECK: liveins: $h0
-    ; CHECK: %copy:fpr16 = COPY $h0
-    ; CHECK: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF
-    ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], %copy, %subreg.hsub
-    ; CHECK: %dup:fpr64 = DUPv4i16lane [[INSERT_SUBREG]], 0
-    ; CHECK: $d0 = COPY %dup
-    ; CHECK: RET_ReallyLR implicit $d0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %copy:fpr16 = COPY $h0
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF
+    ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], %copy, %subreg.hsub
+    ; CHECK-NEXT: %dup:fpr64 = DUPv4i16lane [[INSERT_SUBREG]], 0
+    ; CHECK-NEXT: $d0 = COPY %dup
+    ; CHECK-NEXT: RET_ReallyLR implicit $d0
     %copy:fpr(s16) = COPY $h0
     %dup:fpr(<4 x s16>) = G_DUP %copy(s16)
     $d0 = COPY %dup(<4 x s16>)
@@ -170,10 +177,11 @@ body:             |
     liveins: $w0
     ; CHECK-LABEL: name: DUPv4i16gpr
     ; CHECK: liveins: $w0
-    ; CHECK: %copy:gpr32 = COPY $w0
-    ; CHECK: %dup:fpr64 = DUPv4i16gpr %copy
-    ; CHECK: $d0 = COPY %dup
-    ; CHECK: RET_ReallyLR implicit $d0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %copy:gpr32 = COPY $w0
+    ; CHECK-NEXT: %dup:fpr64 = DUPv4i16gpr %copy
+    ; CHECK-NEXT: $d0 = COPY %dup
+    ; CHECK-NEXT: RET_ReallyLR implicit $d0
     %copy:gpr(s32) = COPY $w0
     %dup:fpr(<4 x s16>) = G_DUP %copy(s32)
     $d0 = COPY %dup(<4 x s16>)
@@ -191,12 +199,13 @@ body:             |
     liveins: $h0
     ; CHECK-LABEL: name: DUPv8i16lane
     ; CHECK: liveins: $h0
-    ; CHECK: %copy:fpr16 = COPY $h0
-    ; CHECK: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF
-    ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], %copy, %subreg.hsub
-    ; CHECK: %dup:fpr128 = DUPv8i16lane [[INSERT_SUBREG]], 0
-    ; CHECK: $q0 = COPY %dup
-    ; CHECK: RET_ReallyLR implicit $q0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %copy:fpr16 = COPY $h0
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF
+    ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], %copy, %subreg.hsub
+    ; CHECK-NEXT: %dup:fpr128 = DUPv8i16lane [[INSERT_SUBREG]], 0
+    ; CHECK-NEXT: $q0 = COPY %dup
+    ; CHECK-NEXT: RET_ReallyLR implicit $q0
     %copy:fpr(s16) = COPY $h0
     %dup:fpr(<8 x s16>) = G_DUP %copy(s16)
     $q0 = COPY %dup(<8 x s16>)
@@ -214,10 +223,11 @@ body:             |
     liveins: $w0
     ; CHECK-LABEL: name: DUPv8i16gpr
     ; CHECK: liveins: $w0
-    ; CHECK: %copy:gpr32 = COPY $w0
-    ; CHECK: %dup:fpr128 = DUPv8i16gpr %copy
-    ; CHECK: $q0 = COPY %dup
-    ; CHECK: RET_ReallyLR implicit $q0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %copy:gpr32 = COPY $w0
+    ; CHECK-NEXT: %dup:fpr128 = DUPv8i16gpr %copy
+    ; CHECK-NEXT: $q0 = COPY %dup
+    ; CHECK-NEXT: RET_ReallyLR implicit $q0
     %copy:gpr(s32) = COPY $w0
     %dup:fpr(<8 x s16>) = G_DUP %copy(s32)
     $q0 = COPY %dup(<8 x s16>)
@@ -233,13 +243,13 @@ tracksRegLiveness: true
 body:             |
   bb.0.entry:
     liveins: $w0
-    ; Checks that we can still select the gpr variant if the scalar is an s16.
     ; CHECK-LABEL: name: DUPv8i16gpr_s16_src
     ; CHECK: liveins: $w0
-    ; CHECK: %copy:gpr32 = COPY $w0
-    ; CHECK: %dup:fpr128 = DUPv8i16gpr %copy
-    ; CHECK: $q0 = COPY %dup
-    ; CHECK: RET_ReallyLR implicit $q0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %copy:gpr32 = COPY $w0
+    ; CHECK-NEXT: %dup:fpr128 = DUPv8i16gpr %copy
+    ; CHECK-NEXT: $q0 = COPY %dup
+    ; CHECK-NEXT: RET_ReallyLR implicit $q0
     %copy:gpr(s32) = COPY $w0
     %trunc:gpr(s16) = G_TRUNC %copy
     %dup:fpr(<8 x s16>) = G_DUP %trunc(s16)
@@ -258,10 +268,11 @@ body:             |
     liveins: $w0
     ; CHECK-LABEL: name: DUPv4s16gpr_s16_src
     ; CHECK: liveins: $w0
-    ; CHECK: %copy:gpr32 = COPY $w0
-    ; CHECK: %dup:fpr64 = DUPv4i16gpr %copy
-    ; CHECK: $d0 = COPY %dup
-    ; CHECK: RET_ReallyLR implicit $d0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %copy:gpr32 = COPY $w0
+    ; CHECK-NEXT: %dup:fpr64 = DUPv4i16gpr %copy
+    ; CHECK-NEXT: $d0 = COPY %dup
+    ; CHECK-NEXT: RET_ReallyLR implicit $d0
     %copy:gpr(s32) = COPY $w0
     %trunc:gpr(s16) = G_TRUNC %copy
     %dup:fpr(<4 x s16>) = G_DUP %trunc(s16)
@@ -280,10 +291,11 @@ body:             |
     liveins: $w0
     ; CHECK-LABEL: name: DUPv8i8gpr
     ; CHECK: liveins: $w0
-    ; CHECK: %copy:gpr32 = COPY $w0
-    ; CHECK: %dup:fpr64 = DUPv8i8gpr %copy
-    ; CHECK: $d0 = COPY %dup
-    ; CHECK: RET_ReallyLR implicit $d0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %copy:gpr32 = COPY $w0
+    ; CHECK-NEXT: %dup:fpr64 = DUPv8i8gpr %copy
+    ; CHECK-NEXT: $d0 = COPY %dup
+    ; CHECK-NEXT: RET_ReallyLR implicit $d0
     %copy:gpr(s32) = COPY $w0
     %dup:fpr(<8 x s8>) = G_DUP %copy(s32)
     $d0 = COPY %dup(<8 x s8>)
@@ -301,10 +313,11 @@ body:             |
     liveins: $w0
     ; CHECK-LABEL: name: DUPv8i8gpr_s8_src
     ; CHECK: liveins: $w0
-    ; CHECK: %copy:gpr32 = COPY $w0
-    ; CHECK: %dup:fpr64 = DUPv8i8gpr %copy
-    ; CHECK: $d0 = COPY %dup
-    ; CHECK: RET_ReallyLR implicit $d0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %copy:gpr32 = COPY $w0
+    ; CHECK-NEXT: %dup:fpr64 = DUPv8i8gpr %copy
+    ; CHECK-NEXT: $d0 = COPY %dup
+    ; CHECK-NEXT: RET_ReallyLR implicit $d0
     %copy:gpr(s32) = COPY $w0
     %trunc:gpr(s8) = G_TRUNC %copy(s32)
     %dup:fpr(<8 x s8>) = G_DUP %trunc(s8)
@@ -323,10 +336,11 @@ body:             |
     liveins: $w0
     ; CHECK-LABEL: name: DUPv16i8gpr
     ; CHECK: liveins: $w0
-    ; CHECK: %copy:gpr32 = COPY $w0
-    ; CHECK: %dup:fpr128 = DUPv16i8gpr %copy
-    ; CHECK: $q0 = COPY %dup
-    ; CHECK: RET_ReallyLR implicit $q0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %copy:gpr32 = COPY $w0
+    ; CHECK-NEXT: %dup:fpr128 = DUPv16i8gpr %copy
+    ; CHECK-NEXT: $q0 = COPY %dup
+    ; CHECK-NEXT: RET_ReallyLR implicit $q0
     %copy:gpr(s32) = COPY $w0
     %dup:fpr(<16 x s8>) = G_DUP %copy(s32)
     $q0 = COPY %dup(<16 x s8>)
@@ -341,13 +355,13 @@ tracksRegLiveness: true
 body:             |
   bb.0.entry:
     liveins: $w0
-    ; Check we still select the gpr variant when scalar is an s8.
     ; CHECK-LABEL: name: DUPv16i8gpr_s8_src
     ; CHECK: liveins: $w0
-    ; CHECK: %copy:gpr32 = COPY $w0
-    ; CHECK: %dup:fpr128 = DUPv16i8gpr %copy
-    ; CHECK: $q0 = COPY %dup
-    ; CHECK: RET_ReallyLR implicit $q0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %copy:gpr32 = COPY $w0
+    ; CHECK-NEXT: %dup:fpr128 = DUPv16i8gpr %copy
+    ; CHECK-NEXT: $q0 = COPY %dup
+    ; CHECK-NEXT: RET_ReallyLR implicit $q0
     %copy:gpr(s32) = COPY $w0
     %trunc:gpr(s8) = G_TRUNC %copy
     %dup:fpr(<16 x s8>) = G_DUP %trunc(s8)
@@ -368,11 +382,12 @@ body:             |
 
     ; CHECK-LABEL: name: dup_v2p0
     ; CHECK: liveins: $x0
-    ; CHECK: [[COPY:%[0-9]+]]:gpr64all = COPY $x0
-    ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY [[COPY]]
-    ; CHECK: [[DUPv2i64gpr:%[0-9]+]]:fpr128 = DUPv2i64gpr [[COPY1]]
-    ; CHECK: $q0 = COPY [[DUPv2i64gpr]]
-    ; CHECK: RET_ReallyLR implicit $q0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64all = COPY $x0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY [[COPY]]
+    ; CHECK-NEXT: [[DUPv2i64gpr:%[0-9]+]]:fpr128 = DUPv2i64gpr [[COPY1]]
+    ; CHECK-NEXT: $q0 = COPY [[DUPv2i64gpr]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $q0
     %0:gpr(p0) = COPY $x0
     %4:fpr(<2 x p0>) = G_DUP %0(p0)
     $q0 = COPY %4(<2 x p0>)
@@ -389,10 +404,10 @@ body:             |
     liveins: $w0
     ; CHECK-LABEL: name: cst_v4s32
     ; CHECK: liveins: $w0
-    ; CHECK: [[ADRP:%[0-9]+]]:gpr64common = ADRP target-flags(aarch64-page) %const.0
-    ; CHECK: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) %const.0
-    ; CHECK: $q0 = COPY [[LDRQui]]
-    ; CHECK: RET_ReallyLR implicit $q0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %dup:fpr128 = MOVIv4i32 3, 0
+    ; CHECK-NEXT: $q0 = COPY %dup
+    ; CHECK-NEXT: RET_ReallyLR implicit $q0
     %cst:gpr(s32) = G_CONSTANT i32 3
     %dup:fpr(<4 x s32>) = G_DUP %cst(s32)
     $q0 = COPY %dup(<4 x s32>)
@@ -409,10 +424,10 @@ body:             |
     liveins: $w0
     ; CHECK-LABEL: name: cst_v8s8
     ; CHECK: liveins: $w0
-    ; CHECK: [[ADRP:%[0-9]+]]:gpr64common = ADRP target-flags(aarch64-page) %const.0
-    ; CHECK: [[LDRDui:%[0-9]+]]:fpr64 = LDRDui [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) %const.0
-    ; CHECK: $d0 = COPY [[LDRDui]]
-    ; CHECK: RET_ReallyLR implicit $d0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %dup:fpr64 = MOVIv8b_ns 3
+    ; CHECK-NEXT: $d0 = COPY %dup
+    ; CHECK-NEXT: RET_ReallyLR implicit $d0
     %cst:gpr(s8) = G_CONSTANT i8 3
     %dup:fpr(<8 x s8>) = G_DUP %cst(s8)
     $d0 = COPY %dup(<8 x s8>)
@@ -428,11 +443,12 @@ body:             |
     liveins: $w0
     ; CHECK-LABEL: name: cst_v2p0
     ; CHECK: liveins: $w0
-    ; CHECK: %cst:gpr64 = MOVi64imm 3
-    ; CHECK: [[ADRP:%[0-9]+]]:gpr64common = ADRP target-flags(aarch64-page) %const.0
-    ; CHECK: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) %const.0
-    ; CHECK: $q0 = COPY [[LDRQui]]
-    ; CHECK: RET_ReallyLR implicit $q0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %cst:gpr64 = MOVi64imm 3
+    ; CHECK-NEXT: [[ADRP:%[0-9]+]]:gpr64common = ADRP target-flags(aarch64-page) %const.0
+    ; CHECK-NEXT: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) %const.0 :: (load (s128) from constant-pool)
+    ; CHECK-NEXT: $q0 = COPY [[LDRQui]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $q0
     %cst:gpr(p0) = G_CONSTANT i64 3
     %dup:fpr(<2 x p0>) = G_DUP %cst(p0)
     $q0 = COPY %dup(<2 x p0>)
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-vector-icmp.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-vector-icmp.mir
index 55313a43136753f..21e84ecaed32f94 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/select-vector-icmp.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-vector-icmp.mir
@@ -2,7 +2,6 @@
 # RUN: llc -mtriple=aarch64-- -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
 
 --- |
-  ; ModuleID = 'icmp-autogen-tests-with-ne.ll'
   source_filename = "icmp-autogen-tests-with-ne.ll"
   target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
   target triple = "aarch64"
@@ -427,12 +426,13 @@ body:             |
 
     ; CHECK-LABEL: name: test_v2i64_eq
     ; CHECK: liveins: $q0, $q1
-    ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
-    ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
-    ; CHECK: [[CMEQv2i64_:%[0-9]+]]:fpr128 = CMEQv2i64 [[COPY]], [[COPY1]]
-    ; CHECK: [[XTNv2i32_:%[0-9]+]]:fpr64 = XTNv2i32 [[CMEQv2i64_]]
-    ; CHECK: $d0 = COPY [[XTNv2i32_]]
-    ; CHECK: RET_ReallyLR implicit $d0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
+    ; CHECK-NEXT: [[CMEQv2i64_:%[0-9]+]]:fpr128 = CMEQv2i64 [[COPY]], [[COPY1]]
+    ; CHECK-NEXT: [[XTNv2i32_:%[0-9]+]]:fpr64 = XTNv2i32 [[CMEQv2i64_]]
+    ; CHECK-NEXT: $d0 = COPY [[XTNv2i32_]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $d0
     %0:fpr(<2 x s64>) = COPY $q0
     %1:fpr(<2 x s64>) = COPY $q1
     %4:fpr(<2 x s64>) = G_ICMP intpred(eq), %0(<2 x s64>), %1
@@ -460,12 +460,13 @@ body:             |
 
     ; CHECK-LABEL: name: test_v4i32_eq
     ; CHECK: liveins: $q0, $q1
-    ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
-    ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
-    ; CHECK: [[CMEQv4i32_:%[0-9]+]]:fpr128 = CMEQv4i32 [[COPY]], [[COPY1]]
-    ; CHECK: [[XTNv4i16_:%[0-9]+]]:fpr64 = XTNv4i16 [[CMEQv4i32_]]
-    ; CHECK: $d0 = COPY [[XTNv4i16_]]
-    ; CHECK: RET_ReallyLR implicit $d0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
+    ; CHECK-NEXT: [[CMEQv4i32_:%[0-9]+]]:fpr128 = CMEQv4i32 [[COPY]], [[COPY1]]
+    ; CHECK-NEXT: [[XTNv4i16_:%[0-9]+]]:fpr64 = XTNv4i16 [[CMEQv4i32_]]
+    ; CHECK-NEXT: $d0 = COPY [[XTNv4i16_]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $d0
     %0:fpr(<4 x s32>) = COPY $q0
     %1:fpr(<4 x s32>) = COPY $q1
     %4:fpr(<4 x s32>) = G_ICMP intpred(eq), %0(<4 x s32>), %1
@@ -493,11 +494,12 @@ body:             |
 
     ; CHECK-LABEL: name: test_v2i32_eq
     ; CHECK: liveins: $d0, $d1
-    ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
-    ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
-    ; CHECK: [[CMEQv2i32_:%[0-9]+]]:fpr64 = CMEQv2i32 [[COPY]], [[COPY1]]
-    ; CHECK: $d0 = COPY [[CMEQv2i32_]]
-    ; CHECK: RET_ReallyLR implicit $d0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
+    ; CHECK-NEXT: [[CMEQv2i32_:%[0-9]+]]:fpr64 = CMEQv2i32 [[COPY]], [[COPY1]]
+    ; CHECK-NEXT: $d0 = COPY [[CMEQv2i32_]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $d0
     %0:fpr(<2 x s32>) = COPY $d0
     %1:fpr(<2 x s32>) = COPY $d1
     %4:fpr(<2 x s32>) = G_ICMP intpred(eq), %0(<2 x s32>), %1
@@ -536,17 +538,16 @@ body:             |
 
     ; CHECK-LABEL: name: test_v2i16_eq
     ; CHECK: liveins: $d0, $d1
-    ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
-    ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
-    ; CHECK: [[ADRP:%[0-9]+]]:gpr64common = ADRP target-flags(aarch64-page) %const.0
-    ; CHECK: [[LDRDui:%[0-9]+]]:fpr64 = LDRDui [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) %const.0
-    ; CHECK: [[ANDv8i8_:%[0-9]+]]:fpr64 = ANDv8i8 [[COPY]], [[LDRDui]]
-    ; CHECK: [[ADRP1:%[0-9]+]]:gpr64common = ADRP target-flags(aarch64-page) %const.0
-    ; CHECK: [[LDRDui1:%[0-9]+]]:fpr64 = LDRDui [[ADRP1]], target-flags(aarch64-pageoff, aarch64-nc) %const.0
-    ; CHECK: [[ANDv8i8_1:%[0-9]+]]:fpr64 = ANDv8i8 [[COPY1]], [[LDRDui1]]
-    ; CHECK: [[CMEQv2i32_:%[0-9]+]]:fpr64 = CMEQv2i32 [[ANDv8i8_]], [[ANDv8i8_1]]
-    ; CHECK: $d0 = COPY [[CMEQv2i32_]]
-    ; CHECK: RET_ReallyLR implicit $d0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
+    ; CHECK-NEXT: [[MOVID:%[0-9]+]]:fpr64 = MOVID 51
+    ; CHECK-NEXT: [[ANDv8i8_:%[0-9]+]]:fpr64 = ANDv8i8 [[COPY]], [[MOVID]]
+    ; CHECK-NEXT: [[MOVID1:%[0-9]+]]:fpr64 = MOVID 51
+    ; CHECK-NEXT: [[ANDv8i8_1:%[0-9]+]]:fpr64 = ANDv8i8 [[COPY1]], [[MOVID1]]
+    ; CHECK-NEXT: [[CMEQv2i32_:%[0-9]+]]:fpr64 = CMEQv2i32 [[ANDv8i8_]], [[ANDv8i8_1]]
+    ; CHECK-NEXT: $d0 = COPY [[CMEQv2i32_]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $d0
     %2:fpr(<2 x s32>) = COPY $d0
     %3:fpr(<2 x s32>) = COPY $d1
     %13:gpr(s32) = G_CONSTANT i32 65535
@@ -582,12 +583,13 @@ body:             |
 
     ; CHECK-LABEL: name: test_v8i16_eq
     ; CHECK: liveins: $q0, $q1
-    ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
-    ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
-    ; CHECK: [[CMEQv8i16_:%[0-9]+]]:fpr128 = CMEQv8i16 [[COPY]], [[COPY1]]
-    ; CHECK: [[XTNv8i8_:%[0-9]+]]:fpr64 = XTNv8i8 [[CMEQv8i16_]]
-    ; CHECK: $d0 = COPY [[XTNv8i8_]]
-    ; CHECK: RET_ReallyLR implicit $d0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
+    ; CHECK-NEXT: [[CMEQv8i16_:%[0-9]+]]:fpr128 = CMEQv8i16 [[COPY]], [[COPY1]]
+    ; CHECK-NEXT: [[XTNv8i8_:%[0-9]+]]:fpr64 = XTNv8i8 [[CMEQv8i16_]]
+    ; CHECK-NEXT: $d0 = COPY [[XTNv8i8_]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $d0
     %0:fpr(<8 x s16>) = COPY $q0
     %1:fpr(<8 x s16>) = COPY $q1
     %4:fpr(<8 x s16>) = G_ICMP intpred(eq), %0(<8 x s16>), %1
@@ -615,11 +617,12 @@ body:             |
 
     ; CHECK-LABEL: name: test_v4i16_eq
     ; CHECK: liveins: $d0, $d1
-    ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
-    ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
-    ; CHECK: [[CMEQv4i16_:%[0-9]+]]:fpr64 = CMEQv4i16 [[COPY]], [[COPY1]]
-    ; CHECK: $d0 = COPY [[CMEQv4i16_]]
-    ; CHECK: RET_ReallyLR implicit $d0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
+    ; CHECK-NEXT: [[CMEQv4i16_:%[0-9]+]]:fpr64 = CMEQv4i16 [[COPY]], [[COPY1]]
+    ; CHECK-NEXT: $d0 = COPY [[CMEQv4i16_]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $d0
     %0:fpr(<4 x s16>) = COPY $d0
     %1:fpr(<4 x s16>) = COPY $d1
     %4:fpr(<4 x s16>) = G_ICMP intpred(eq), %0(<4 x s16>), %1
@@ -647,11 +650,12 @@ body:             |
 
     ; CHECK-LABEL: name: test_v16i8_eq
     ; CHECK: liveins: $q0, $q1
-    ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
-    ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
-    ; CHECK: [[CMEQv16i8_:%[0-9]+]]:fpr128 = CMEQv16i8 [[COPY]], [[COPY1]]
-    ; CHECK: $q0 = COPY [[CMEQv16i8_]]
-    ; CHECK: RET_ReallyLR implicit $q0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
+    ; CHECK-NEXT: [[CMEQv16i8_:%[0-9]+]]:fpr128 = CMEQv16i8 [[COPY]], [[COPY1]]
+    ; CHECK-NEXT: $q0 = COPY [[CMEQv16i8_]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $q0
     %0:fpr(<16 x s8>) = COPY $q0
     %1:fpr(<16 x s8>) = COPY $q1
     %4:fpr(<16 x s8>) = G_ICMP intpred(eq), %0(<16 x s8>), %1
@@ -679,11 +683,12 @@ body:             |
 
     ; CHECK-LABEL: name: test_v8i8_eq
     ; CHECK: liveins: $d0, $d1
-    ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
-    ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
-    ; CHECK: [[CMEQv8i8_:%[0-9]+]]:fpr64 = CMEQv8i8 [[COPY]], [[COPY1]]
-    ; CHECK: $d0 = COPY [[CMEQv8i8_]]
-    ; CHECK: RET_ReallyLR implicit $d0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
+    ; CHECK-NEXT: [[CMEQv8i8_:%[0-9]+]]:fpr64 = CMEQv8i8 [[COPY]], [[COPY1]]
+    ; CHECK-NEXT: $d0 = COPY [[CMEQv8i8_]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $d0
     %0:fpr(<8 x s8>) = COPY $d0
     %1:fpr(<8 x s8>) = COPY $d1
     %4:fpr(<8 x s8>) = G_ICMP intpred(eq), %0(<8 x s8>), %1
@@ -711,13 +716,14 @@ body:             |
 
     ; CHECK-LABEL: name: test_v2i64_ne
     ; CHECK: liveins: $q0, $q1
-    ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
-    ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
-    ; CHECK: [[CMEQv2i64_:%[0-9]+]]:fpr128 = CMEQv2i64 [[COPY]], [[COPY1]]
-    ; CHECK: [[NOTv16i8_:%[0-9]+]]:fpr128 = NOTv16i8 [[CMEQv2i64_]]
-    ; CHECK: [[XTNv2i32_:%[0-9]+]]:fpr64 = XTNv2i32 [[NOTv16i8_]]
-    ; CHECK: $d0 = COPY [[XTNv2i32_]]
-    ; CHECK: RET_ReallyLR implicit $d0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
+    ; CHECK-NEXT: [[CMEQv2i64_:%[0-9]+]]:fpr128 = CMEQv2i64 [[COPY]], [[COPY1]]
+    ; CHECK-NEXT: [[NOTv16i8_:%[0-9]+]]:fpr128 = NOTv16i8 [[CMEQv2i64_]]
+    ; CHECK-NEXT: [[XTNv2i32_:%[0-9]+]]:fpr64 = XTNv2i32 [[NOTv16i8_]]
+    ; CHECK-NEXT: $d0 = COPY [[XTNv2i32_]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $d0
     %0:fpr(<2 x s64>) = COPY $q0
     %1:fpr(<2 x s64>) = COPY $q1
     %4:fpr(<2 x s64>) = G_ICMP intpred(ne), %0(<2 x s64>), %1
@@ -745,13 +751,14 @@ body:             |
 
     ; CHECK-LABEL: name: test_v4i32_ne
     ; CHECK: liveins: $q0, $q1
-    ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
-    ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
-    ; CHECK: [[CMEQv4i32_:%[0-9]+]]:fpr128 = CMEQv4i32 [[COPY]], [[COPY1]]
-    ; CHECK: [[NOTv16i8_:%[0-9]+]]:fpr128 = NOTv16i8 [[CMEQv4i32_]]
-    ; CHECK: [[XTNv4i16_:%[0-9]+]]:fpr64 = XTNv4i16 [[NOTv16i8_]]
-    ; CHECK: $d0 = COPY [[XTNv4i16_]]
-    ; CHECK: RET_ReallyLR implicit $d0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
+    ; CHECK-NEXT: [[CMEQv4i32_:%[0-9]+]]:fpr128 = CMEQv4i32 [[COPY]], [[COPY1]]
+    ; CHECK-NEXT: [[NOTv16i8_:%[0-9]+]]:fpr128 = NOTv16i8 [[CMEQv4i32_]]
+    ; CHECK-NEXT: [[XTNv4i16_:%[0-9]+]]:fpr64 = XTNv4i16 [[NOTv16i8_]]
+    ; CHECK-NEXT: $d0 = COPY [[XTNv4i16_]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $d0
     %0:fpr(<4 x s32>) = COPY $q0
     %1:fpr(<4 x s32>) = COPY $q1
     %4:fpr(<4 x s32>) = G_ICMP intpred(ne), %0(<4 x s32>), %1
@@ -779,12 +786,13 @@ body:             |
 
     ; CHECK-LABEL: name: test_v2i32_ne
     ; CHECK: liveins: $d0, $d1
-    ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
-    ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
-    ; CHECK: [[CMEQv2i32_:%[0-9]+]]:fpr64 = CMEQv2i32 [[COPY]], [[COPY1]]
-    ; CHECK: [[NOTv8i8_:%[0-9]+]]:fpr64 = NOTv8i8 [[CMEQv2i32_]]
-    ; CHECK: $d0 = COPY [[NOTv8i8_]]
-    ; CHECK: RET_ReallyLR implicit $d0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
+    ; CHECK-NEXT: [[CMEQv2i32_:%[0-9]+]]:fpr64 = CMEQv2i32 [[COPY]], [[COPY1]]
+    ; CHECK-NEXT: [[NOTv8i8_:%[0-9]+]]:fpr64 = NOTv8i8 [[CMEQv2i32_]]
+    ; CHECK-NEXT: $d0 = COPY [[NOTv8i8_]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $d0
     %0:fpr(<2 x s32>) = COPY $d0
     %1:fpr(<2 x s32>) = COPY $d1
     %4:fpr(<2 x s32>) = G_ICMP intpred(ne), %0(<2 x s32>), %1
@@ -823,18 +831,17 @@ body:             |
 
     ; CHECK-LABEL: name: test_v2i16_ne
     ; CHECK: liveins: $d0, $d1
-    ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
-    ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
-    ; CHECK: [[ADRP:%[0-9]+]]:gpr64common = ADRP target-flags(aarch64-page) %const.0
-    ; CHECK: [[LDRDui:%[0-9]+]]:fpr64 = LDRDui [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) %const.0
-    ; CHECK: [[ANDv8i8_:%[0-9]+]]:fpr64 = ANDv8i8 [[COPY]], [[LDRDui]]
-    ; CHECK: [[ADRP1:%[0-9]+]]:gpr64common = ADRP target-flags(aarch64-page) %const.0
-    ; CHECK: [[LDRDui1:%[0-9]+]]:fpr64 = LDRDui [[ADRP1]], target-flags(aarch64-pageoff, aarch64-nc) %const.0
-    ; CHECK: [[ANDv8i8_1:%[0-9]+]]:fpr64 = ANDv8i8 [[COPY1]], [[LDRDui1]]
-    ; CHECK: [[CMEQv2i32_:%[0-9]+]]:fpr64 = CMEQv2i32 [[ANDv8i8_]], [[ANDv8i8_1]]
-    ; CHECK: [[NOTv8i8_:%[0-9]+]]:fpr64 = NOTv8i8 [[CMEQv2i32_]]
-    ; CHECK: $d0 = COPY [[NOTv8i8_]]
-    ; CHECK: RET_ReallyLR implicit $d0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
+    ; CHECK-NEXT: [[MOVID:%[0-9]+]]:fpr64 = MOVID 51
+    ; CHECK-NEXT: [[ANDv8i8_:%[0-9]+]]:fpr64 = ANDv8i8 [[COPY]], [[MOVID]]
+    ; CHECK-NEXT: [[MOVID1:%[0-9]+]]:fpr64 = MOVID 51
+    ; CHECK-NEXT: [[ANDv8i8_1:%[0-9]+]]:fpr64 = ANDv8i8 [[COPY1]], [[MOVID1]]
+    ; CHECK-NEXT: [[CMEQv2i32_:%[0-9]+]]:fpr64 = CMEQv2i32 [[ANDv8i8_]], [[ANDv8i8_1]]
+    ; CHECK-NEXT: [[NOTv8i8_:%[0-9]+]]:fpr64 = NOTv8i8 [[CMEQv2i32_]]
+    ; CHECK-NEXT: $d0 = COPY [[NOTv8i8_]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $d0
     %2:fpr(<2 x s32>) = COPY $d0
     %3:fpr(<2 x s32>) = COPY $d1
     %13:gpr(s32) = G_CONSTANT i32 65535
@@ -870,13 +877,14 @@ body:             |
 
     ; CHECK-LABEL: name: test_v8i16_ne
     ; CHECK: liveins: $q0, $q1
-    ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
-    ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
-    ; CHECK: [[CMEQv8i16_:%[0-9]+]]:fpr128 = CMEQv8i16 [[COPY]], [[COPY1]]
-    ; CHECK: [[NOTv16i8_:%[0-9]+]]:fpr128 = NOTv16i8 [[CMEQv8i16_]]
-    ; CHECK: [[XTNv8i8_:%[0-9]+]]:fpr64 = XTNv8i8 [[NOTv16i8_]]
-    ; CHECK: $d0 = COPY [[XTNv8i8_]]
-    ; CHECK: RET_ReallyLR implicit $d0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
+    ; CHECK-NEXT: [[CMEQv8i16_:%[0-9]+]]:fpr128 = CMEQv8i16 [[COPY]], [[COPY1]]
+    ; CHECK-NEXT: [[NOTv16i8_:%[0-9]+]]:fpr128 = NOTv16i8 [[CMEQv8i16_]]
+    ; CHECK-NEXT: [[XTNv8i8_:%[0-9]+]]:fpr64 = XTNv8i8 [[NOTv16i8_]]
+    ; CHECK-NEXT: $d0 = COPY [[XTNv8i8_]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $d0
     %0:fpr(<8 x s16>) = COPY $q0
     %1:fpr(<8 x s16>) = COPY $q1
     %4:fpr(<8 x s16>) = G_ICMP intpred(ne), %0(<8 x s16>), %1
@@ -904,12 +912,13 @@ body:             |
 
     ; CHECK-LABEL: name: test_v4i16_ne
     ; CHECK: liveins: $d0, $d1
-    ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
-    ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
-    ; CHECK: [[CMEQv4i16_:%[0-9]+]]:fpr64 = CMEQv4i16 [[COPY]], [[COPY1]]
-    ; CHECK: [[NOTv8i8_:%[0-9]+]]:fpr64 = NOTv8i8 [[CMEQv4i16_]]
-    ; CHECK: $d0 = COPY [[NOTv8i8_]]
-    ; CHECK: RET_ReallyLR implicit $d0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
+    ; CHECK-NEXT: [[CMEQv4i16_:%[0-9]+]]:fpr64 = CMEQv4i16 [[COPY]], [[COPY1]]
+    ; CHECK-NEXT: [[NOTv8i8_:%[0-9]+]]:fpr64 = NOTv8i8 [[CMEQv4i16_]]
+    ; CHECK-NEXT: $d0 = COPY [[NOTv8i8_]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $d0
     %0:fpr(<4 x s16>) = COPY $d0
     %1:fpr(<4 x s16>) = COPY $d1
     %4:fpr(<4 x s16>) = G_ICMP intpred(ne), %0(<4 x s16>), %1
@@ -937,12 +946,13 @@ body:             |
 
     ; CHECK-LABEL: name: test_v16i8_ne
     ; CHECK: liveins: $q0, $q1
-    ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
-    ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
-    ; CHECK: [[CMEQv16i8_:%[0-9]+]]:fpr128 = CMEQv16i8 [[COPY]], [[COPY1]]
-    ; CHECK: [[NOTv16i8_:%[0-9]+]]:fpr128 = NOTv16i8 [[CMEQv16i8_]]
-    ; CHECK: $q0 = COPY [[NOTv16i8_]]
-    ; CHECK: RET_ReallyLR implicit $q0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
+    ; CHECK-NEXT: [[CMEQv16i8_:%[0-9]+]]:fpr128 = CMEQv16i8 [[COPY]], [[COPY1]]
+    ; CHECK-NEXT: [[NOTv16i8_:%[0-9]+]]:fpr128 = NOTv16i8 [[CMEQv16i8_]]
+    ; CHECK-NEXT: $q0 = COPY [[NOTv16i8_]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $q0
     %0:fpr(<16 x s8>) = COPY $q0
     %1:fpr(<16 x s8>) = COPY $q1
     %4:fpr(<16 x s8>) = G_ICMP intpred(ne), %0(<16 x s8>), %1
@@ -970,12 +980,13 @@ body:             |
 
     ; CHECK-LABEL: name: test_v8i8_ne
     ; CHECK: liveins: $d0, $d1
-    ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
-    ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
-    ; CHECK: [[CMEQv8i8_:%[0-9]+]]:fpr64 = CMEQv8i8 [[COPY]], [[COPY1]]
-    ; CHECK: [[NOTv8i8_:%[0-9]+]]:fpr64 = NOTv8i8 [[CMEQv8i8_]]
-    ; CHECK: $d0 = COPY [[NOTv8i8_]]
-    ; CHECK: RET_ReallyLR implicit $d0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
+    ; CHECK-NEXT: [[CMEQv8i8_:%[0-9]+]]:fpr64 = CMEQv8i8 [[COPY]], [[COPY1]]
+    ; CHECK-NEXT: [[NOTv8i8_:%[0-9]+]]:fpr64 = NOTv8i8 [[CMEQv8i8_]]
+    ; CHECK-NEXT: $d0 = COPY [[NOTv8i8_]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $d0
     %0:fpr(<8 x s8>) = COPY $d0
     %1:fpr(<8 x s8>) = COPY $d1
     %4:fpr(<8 x s8>) = G_ICMP intpred(ne), %0(<8 x s8>), %1
@@ -1003,12 +1014,13 @@ body:             |
 
     ; CHECK-LABEL: name: test_v2i64_ugt
     ; CHECK: liveins: $q0, $q1
-    ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
-    ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
-    ; CHECK: [[CMHIv2i64_:%[0-9]+]]:fpr128 = CMHIv2i64 [[COPY]], [[COPY1]]
-    ; CHECK: [[XTNv2i32_:%[0-9]+]]:fpr64 = XTNv2i32 [[CMHIv2i64_]]
-    ; CHECK: $d0 = COPY [[XTNv2i32_]]
-    ; CHECK: RET_ReallyLR implicit $d0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
+    ; CHECK-NEXT: [[CMHIv2i64_:%[0-9]+]]:fpr128 = CMHIv2i64 [[COPY]], [[COPY1]]
+    ; CHECK-NEXT: [[XTNv2i32_:%[0-9]+]]:fpr64 = XTNv2i32 [[CMHIv2i64_]]
+    ; CHECK-NEXT: $d0 = COPY [[XTNv2i32_]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $d0
     %0:fpr(<2 x s64>) = COPY $q0
     %1:fpr(<2 x s64>) = COPY $q1
     %4:fpr(<2 x s64>) = G_ICMP intpred(ugt), %0(<2 x s64>), %1
@@ -1036,12 +1048,13 @@ body:             |
 
     ; CHECK-LABEL: name: test_v4i32_ugt
     ; CHECK: liveins: $q0, $q1
-    ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
-    ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
-    ; CHECK: [[CMHIv4i32_:%[0-9]+]]:fpr128 = CMHIv4i32 [[COPY]], [[COPY1]]
-    ; CHECK: [[XTNv4i16_:%[0-9]+]]:fpr64 = XTNv4i16 [[CMHIv4i32_]]
-    ; CHECK: $d0 = COPY [[XTNv4i16_]]
-    ; CHECK: RET_ReallyLR implicit $d0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
+    ; CHECK-NEXT: [[CMHIv4i32_:%[0-9]+]]:fpr128 = CMHIv4i32 [[COPY]], [[COPY1]]
+    ; CHECK-NEXT: [[XTNv4i16_:%[0-9]+]]:fpr64 = XTNv4i16 [[CMHIv4i32_]]
+    ; CHECK-NEXT: $d0 = COPY [[XTNv4i16_]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $d0
     %0:fpr(<4 x s32>) = COPY $q0
     %1:fpr(<4 x s32>) = COPY $q1
     %4:fpr(<4 x s32>) = G_ICMP intpred(ugt), %0(<4 x s32>), %1
@@ -1069,11 +1082,12 @@ body:             |
 
     ; CHECK-LABEL: name: test_v2i32_ugt
     ; CHECK: liveins: $d0, $d1
-    ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
-    ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
-    ; CHECK: [[CMHIv2i32_:%[0-9]+]]:fpr64 = CMHIv2i32 [[COPY]], [[COPY1]]
-    ; CHECK: $d0 = COPY [[CMHIv2i32_]]
-    ; CHECK: RET_ReallyLR implicit $d0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
+    ; CHECK-NEXT: [[CMHIv2i32_:%[0-9]+]]:fpr64 = CMHIv2i32 [[COPY]], [[COPY1]]
+    ; CHECK-NEXT: $d0 = COPY [[CMHIv2i32_]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $d0
     %0:fpr(<2 x s32>) = COPY $d0
     %1:fpr(<2 x s32>) = COPY $d1
     %4:fpr(<2 x s32>) = G_ICMP intpred(ugt), %0(<2 x s32>), %1
@@ -1112,17 +1126,16 @@ body:             |
 
     ; CHECK-LABEL: name: test_v2i16_ugt
     ; CHECK: liveins: $d0, $d1
-    ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
-    ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
-    ; CHECK: [[ADRP:%[0-9]+]]:gpr64common = ADRP target-flags(aarch64-page) %const.0
-    ; CHECK: [[LDRDui:%[0-9]+]]:fpr64 = LDRDui [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) %const.0
-    ; CHECK: [[ANDv8i8_:%[0-9]+]]:fpr64 = ANDv8i8 [[COPY]], [[LDRDui]]
-    ; CHECK: [[ADRP1:%[0-9]+]]:gpr64common = ADRP target-flags(aarch64-page) %const.0
-    ; CHECK: [[LDRDui1:%[0-9]+]]:fpr64 = LDRDui [[ADRP1]], target-flags(aarch64-pageoff, aarch64-nc) %const.0
-    ; CHECK: [[ANDv8i8_1:%[0-9]+]]:fpr64 = ANDv8i8 [[COPY1]], [[LDRDui1]]
-    ; CHECK: [[CMHIv2i32_:%[0-9]+]]:fpr64 = CMHIv2i32 [[ANDv8i8_]], [[ANDv8i8_1]]
-    ; CHECK: $d0 = COPY [[CMHIv2i32_]]
-    ; CHECK: RET_ReallyLR implicit $d0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
+    ; CHECK-NEXT: [[MOVID:%[0-9]+]]:fpr64 = MOVID 51
+    ; CHECK-NEXT: [[ANDv8i8_:%[0-9]+]]:fpr64 = ANDv8i8 [[COPY]], [[MOVID]]
+    ; CHECK-NEXT: [[MOVID1:%[0-9]+]]:fpr64 = MOVID 51
+    ; CHECK-NEXT: [[ANDv8i8_1:%[0-9]+]]:fpr64 = ANDv8i8 [[COPY1]], [[MOVID1]]
+    ; CHECK-NEXT: [[CMHIv2i32_:%[0-9]+]]:fpr64 = CMHIv2i32 [[ANDv8i8_]], [[ANDv8i8_1]]
+    ; CHECK-NEXT: $d0 = COPY [[CMHIv2i32_]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $d0
     %2:fpr(<2 x s32>) = COPY $d0
     %3:fpr(<2 x s32>) = COPY $d1
     %13:gpr(s32) = G_CONSTANT i32 65535
@@ -1158,12 +1171,13 @@ body:             |
 
     ; CHECK-LABEL: name: test_v8i16_ugt
     ; CHECK: liveins: $q0, $q1
-    ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
-    ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
-    ; CHECK: [[CMHIv8i16_:%[0-9]+]]:fpr128 = CMHIv8i16 [[COPY]], [[COPY1]]
-    ; CHECK: [[XTNv8i8_:%[0-9]+]]:fpr64 = XTNv8i8 [[CMHIv8i16_]]
-    ; CHECK: $d0 = COPY [[XTNv8i8_]]
-    ; CHECK: RET_ReallyLR implicit $d0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
+    ; CHECK-NEXT: [[CMHIv8i16_:%[0-9]+]]:fpr128 = CMHIv8i16 [[COPY]], [[COPY1]]
+    ; CHECK-NEXT: [[XTNv8i8_:%[0-9]+]]:fpr64 = XTNv8i8 [[CMHIv8i16_]]
+    ; CHECK-NEXT: $d0 = COPY [[XTNv8i8_]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $d0
     %0:fpr(<8 x s16>) = COPY $q0
     %1:fpr(<8 x s16>) = COPY $q1
     %4:fpr(<8 x s16>) = G_ICMP intpred(ugt), %0(<8 x s16>), %1
@@ -1191,11 +1205,12 @@ body:             |
 
     ; CHECK-LABEL: name: test_v4i16_ugt
     ; CHECK: liveins: $d0, $d1
-    ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
-    ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
-    ; CHECK: [[CMHIv4i16_:%[0-9]+]]:fpr64 = CMHIv4i16 [[COPY]], [[COPY1]]
-    ; CHECK: $d0 = COPY [[CMHIv4i16_]]
-    ; CHECK: RET_ReallyLR implicit $d0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
+    ; CHECK-NEXT: [[CMHIv4i16_:%[0-9]+]]:fpr64 = CMHIv4i16 [[COPY]], [[COPY1]]
+    ; CHECK-NEXT: $d0 = COPY [[CMHIv4i16_]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $d0
     %0:fpr(<4 x s16>) = COPY $d0
     %1:fpr(<4 x s16>) = COPY $d1
     %4:fpr(<4 x s16>) = G_ICMP intpred(ugt), %0(<4 x s16>), %1
@@ -1223,11 +1238,12 @@ body:             |
 
     ; CHECK-LABEL: name: test_v16i8_ugt
     ; CHECK: liveins: $q0, $q1
-    ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
-    ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
-    ; CHECK: [[CMHIv16i8_:%[0-9]+]]:fpr128 = CMHIv16i8 [[COPY]], [[COPY1]]
-    ; CHECK: $q0 = COPY [[CMHIv16i8_]]
-    ; CHECK: RET_ReallyLR implicit $q0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
+    ; CHECK-NEXT: [[CMHIv16i8_:%[0-9]+]]:fpr128 = CMHIv16i8 [[COPY]], [[COPY1]]
+    ; CHECK-NEXT: $q0 = COPY [[CMHIv16i8_]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $q0
     %0:fpr(<16 x s8>) = COPY $q0
     %1:fpr(<16 x s8>) = COPY $q1
     %4:fpr(<16 x s8>) = G_ICMP intpred(ugt), %0(<16 x s8>), %1
@@ -1255,11 +1271,12 @@ body:             |
 
     ; CHECK-LABEL: name: test_v8i8_ugt
     ; CHECK: liveins: $d0, $d1
-    ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
-    ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
-    ; CHECK: [[CMHIv8i8_:%[0-9]+]]:fpr64 = CMHIv8i8 [[COPY]], [[COPY1]]
-    ; CHECK: $d0 = COPY [[CMHIv8i8_]]
-    ; CHECK: RET_ReallyLR implicit $d0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
+    ; CHECK-NEXT: [[CMHIv8i8_:%[0-9]+]]:fpr64 = CMHIv8i8 [[COPY]], [[COPY1]]
+    ; CHECK-NEXT: $d0 = COPY [[CMHIv8i8_]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $d0
     %0:fpr(<8 x s8>) = COPY $d0
     %1:fpr(<8 x s8>) = COPY $d1
     %4:fpr(<8 x s8>) = G_ICMP intpred(ugt), %0(<8 x s8>), %1
@@ -1287,12 +1304,13 @@ body:             |
 
     ; CHECK-LABEL: name: test_v2i64_uge
     ; CHECK: liveins: $q0, $q1
-    ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
-    ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
-    ; CHECK: [[CMHSv2i64_:%[0-9]+]]:fpr128 = CMHSv2i64 [[COPY]], [[COPY1]]
-    ; CHECK: [[XTNv2i32_:%[0-9]+]]:fpr64 = XTNv2i32 [[CMHSv2i64_]]
-    ; CHECK: $d0 = COPY [[XTNv2i32_]]
-    ; CHECK: RET_ReallyLR implicit $d0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
+    ; CHECK-NEXT: [[CMHSv2i64_:%[0-9]+]]:fpr128 = CMHSv2i64 [[COPY]], [[COPY1]]
+    ; CHECK-NEXT: [[XTNv2i32_:%[0-9]+]]:fpr64 = XTNv2i32 [[CMHSv2i64_]]
+    ; CHECK-NEXT: $d0 = COPY [[XTNv2i32_]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $d0
     %0:fpr(<2 x s64>) = COPY $q0
     %1:fpr(<2 x s64>) = COPY $q1
     %4:fpr(<2 x s64>) = G_ICMP intpred(uge), %0(<2 x s64>), %1
@@ -1320,12 +1338,13 @@ body:             |
 
     ; CHECK-LABEL: name: test_v4i32_uge
     ; CHECK: liveins: $q0, $q1
-    ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
-    ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
-    ; CHECK: [[CMHSv4i32_:%[0-9]+]]:fpr128 = CMHSv4i32 [[COPY]], [[COPY1]]
-    ; CHECK: [[XTNv4i16_:%[0-9]+]]:fpr64 = XTNv4i16 [[CMHSv4i32_]]
-    ; CHECK: $d0 = COPY [[XTNv4i16_]]
-    ; CHECK: RET_ReallyLR implicit $d0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
+    ; CHECK-NEXT: [[CMHSv4i32_:%[0-9]+]]:fpr128 = CMHSv4i32 [[COPY]], [[COPY1]]
+    ; CHECK-NEXT: [[XTNv4i16_:%[0-9]+]]:fpr64 = XTNv4i16 [[CMHSv4i32_]]
+    ; CHECK-NEXT: $d0 = COPY [[XTNv4i16_]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $d0
     %0:fpr(<4 x s32>) = COPY $q0
     %1:fpr(<4 x s32>) = COPY $q1
     %4:fpr(<4 x s32>) = G_ICMP intpred(uge), %0(<4 x s32>), %1
@@ -1353,11 +1372,12 @@ body:             |
 
     ; CHECK-LABEL: name: test_v2i32_uge
     ; CHECK: liveins: $d0, $d1
-    ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
-    ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
-    ; CHECK: [[CMHSv2i32_:%[0-9]+]]:fpr64 = CMHSv2i32 [[COPY]], [[COPY1]]
-    ; CHECK: $d0 = COPY [[CMHSv2i32_]]
-    ; CHECK: RET_ReallyLR implicit $d0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
+    ; CHECK-NEXT: [[CMHSv2i32_:%[0-9]+]]:fpr64 = CMHSv2i32 [[COPY]], [[COPY1]]
+    ; CHECK-NEXT: $d0 = COPY [[CMHSv2i32_]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $d0
     %0:fpr(<2 x s32>) = COPY $d0
     %1:fpr(<2 x s32>) = COPY $d1
     %4:fpr(<2 x s32>) = G_ICMP intpred(uge), %0(<2 x s32>), %1
@@ -1396,17 +1416,16 @@ body:             |
 
     ; CHECK-LABEL: name: test_v2i16_uge
     ; CHECK: liveins: $d0, $d1
-    ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
-    ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
-    ; CHECK: [[ADRP:%[0-9]+]]:gpr64common = ADRP target-flags(aarch64-page) %const.0
-    ; CHECK: [[LDRDui:%[0-9]+]]:fpr64 = LDRDui [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) %const.0
-    ; CHECK: [[ANDv8i8_:%[0-9]+]]:fpr64 = ANDv8i8 [[COPY]], [[LDRDui]]
-    ; CHECK: [[ADRP1:%[0-9]+]]:gpr64common = ADRP target-flags(aarch64-page) %const.0
-    ; CHECK: [[LDRDui1:%[0-9]+]]:fpr64 = LDRDui [[ADRP1]], target-flags(aarch64-pageoff, aarch64-nc) %const.0
-    ; CHECK: [[ANDv8i8_1:%[0-9]+]]:fpr64 = ANDv8i8 [[COPY1]], [[LDRDui1]]
-    ; CHECK: [[CMHSv2i32_:%[0-9]+]]:fpr64 = CMHSv2i32 [[ANDv8i8_]], [[ANDv8i8_1]]
-    ; CHECK: $d0 = COPY [[CMHSv2i32_]]
-    ; CHECK: RET_ReallyLR implicit $d0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
+    ; CHECK-NEXT: [[MOVID:%[0-9]+]]:fpr64 = MOVID 51
+    ; CHECK-NEXT: [[ANDv8i8_:%[0-9]+]]:fpr64 = ANDv8i8 [[COPY]], [[MOVID]]
+    ; CHECK-NEXT: [[MOVID1:%[0-9]+]]:fpr64 = MOVID 51
+    ; CHECK-NEXT: [[ANDv8i8_1:%[0-9]+]]:fpr64 = ANDv8i8 [[COPY1]], [[MOVID1]]
+    ; CHECK-NEXT: [[CMHSv2i32_:%[0-9]+]]:fpr64 = CMHSv2i32 [[ANDv8i8_]], [[ANDv8i8_1]]
+    ; CHECK-NEXT: $d0 = COPY [[CMHSv2i32_]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $d0
     %2:fpr(<2 x s32>) = COPY $d0
     %3:fpr(<2 x s32>) = COPY $d1
     %13:gpr(s32) = G_CONSTANT i32 65535
@@ -1442,12 +1461,13 @@ body:             |
 
     ; CHECK-LABEL: name: test_v8i16_uge
     ; CHECK: liveins: $q0, $q1
-    ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
-    ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
-    ; CHECK: [[CMHSv8i16_:%[0-9]+]]:fpr128 = CMHSv8i16 [[COPY]], [[COPY1]]
-    ; CHECK: [[XTNv8i8_:%[0-9]+]]:fpr64 = XTNv8i8 [[CMHSv8i16_]]
-    ; CHECK: $d0 = COPY [[XTNv8i8_]]
-    ; CHECK: RET_ReallyLR implicit $d0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
+    ; CHECK-NEXT: [[CMHSv8i16_:%[0-9]+]]:fpr128 = CMHSv8i16 [[COPY]], [[COPY1]]
+    ; CHECK-NEXT: [[XTNv8i8_:%[0-9]+]]:fpr64 = XTNv8i8 [[CMHSv8i16_]]
+    ; CHECK-NEXT: $d0 = COPY [[XTNv8i8_]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $d0
     %0:fpr(<8 x s16>) = COPY $q0
     %1:fpr(<8 x s16>) = COPY $q1
     %4:fpr(<8 x s16>) = G_ICMP intpred(uge), %0(<8 x s16>), %1
@@ -1475,11 +1495,12 @@ body:             |
 
     ; CHECK-LABEL: name: test_v4i16_uge
     ; CHECK: liveins: $d0, $d1
-    ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
-    ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
-    ; CHECK: [[CMHSv4i16_:%[0-9]+]]:fpr64 = CMHSv4i16 [[COPY]], [[COPY1]]
-    ; CHECK: $d0 = COPY [[CMHSv4i16_]]
-    ; CHECK: RET_ReallyLR implicit $d0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
+    ; CHECK-NEXT: [[CMHSv4i16_:%[0-9]+]]:fpr64 = CMHSv4i16 [[COPY]], [[COPY1]]
+    ; CHECK-NEXT: $d0 = COPY [[CMHSv4i16_]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $d0
     %0:fpr(<4 x s16>) = COPY $d0
     %1:fpr(<4 x s16>) = COPY $d1
     %4:fpr(<4 x s16>) = G_ICMP intpred(uge), %0(<4 x s16>), %1
@@ -1507,11 +1528,12 @@ body:             |
 
     ; CHECK-LABEL: name: test_v16i8_uge
     ; CHECK: liveins: $q0, $q1
-    ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
-    ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
-    ; CHECK: [[CMHSv16i8_:%[0-9]+]]:fpr128 = CMHSv16i8 [[COPY]], [[COPY1]]
-    ; CHECK: $q0 = COPY [[CMHSv16i8_]]
-    ; CHECK: RET_ReallyLR implicit $q0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
+    ; CHECK-NEXT: [[CMHSv16i8_:%[0-9]+]]:fpr128 = CMHSv16i8 [[COPY]], [[COPY1]]
+    ; CHECK-NEXT: $q0 = COPY [[CMHSv16i8_]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $q0
     %0:fpr(<16 x s8>) = COPY $q0
     %1:fpr(<16 x s8>) = COPY $q1
     %4:fpr(<16 x s8>) = G_ICMP intpred(uge), %0(<16 x s8>), %1
@@ -1539,11 +1561,12 @@ body:             |
 
     ; CHECK-LABEL: name: test_v8i8_uge
     ; CHECK: liveins: $d0, $d1
-    ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
-    ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
-    ; CHECK: [[CMHSv8i8_:%[0-9]+]]:fpr64 = CMHSv8i8 [[COPY]], [[COPY1]]
-    ; CHECK: $d0 = COPY [[CMHSv8i8_]]
-    ; CHECK: RET_ReallyLR implicit $d0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
+    ; CHECK-NEXT: [[CMHSv8i8_:%[0-9]+]]:fpr64 = CMHSv8i8 [[COPY]], [[COPY1]]
+    ; CHECK-NEXT: $d0 = COPY [[CMHSv8i8_]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $d0
     %0:fpr(<8 x s8>) = COPY $d0
     %1:fpr(<8 x s8>) = COPY $d1
     %4:fpr(<8 x s8>) = G_ICMP intpred(uge), %0(<8 x s8>), %1
@@ -1571,12 +1594,13 @@ body:             |
 
     ; CHECK-LABEL: name: test_v2i64_ult
     ; CHECK: liveins: $q0, $q1
-    ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
-    ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
-    ; CHECK: [[CMHIv2i64_:%[0-9]+]]:fpr128 = CMHIv2i64 [[COPY1]], [[COPY]]
-    ; CHECK: [[XTNv2i32_:%[0-9]+]]:fpr64 = XTNv2i32 [[CMHIv2i64_]]
-    ; CHECK: $d0 = COPY [[XTNv2i32_]]
-    ; CHECK: RET_ReallyLR implicit $d0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
+    ; CHECK-NEXT: [[CMHIv2i64_:%[0-9]+]]:fpr128 = CMHIv2i64 [[COPY1]], [[COPY]]
+    ; CHECK-NEXT: [[XTNv2i32_:%[0-9]+]]:fpr64 = XTNv2i32 [[CMHIv2i64_]]
+    ; CHECK-NEXT: $d0 = COPY [[XTNv2i32_]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $d0
     %0:fpr(<2 x s64>) = COPY $q0
     %1:fpr(<2 x s64>) = COPY $q1
     %4:fpr(<2 x s64>) = G_ICMP intpred(ult), %0(<2 x s64>), %1
@@ -1604,12 +1628,13 @@ body:             |
 
     ; CHECK-LABEL: name: test_v4i32_ult
     ; CHECK: liveins: $q0, $q1
-    ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
-    ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
-    ; CHECK: [[CMHIv4i32_:%[0-9]+]]:fpr128 = CMHIv4i32 [[COPY1]], [[COPY]]
-    ; CHECK: [[XTNv4i16_:%[0-9]+]]:fpr64 = XTNv4i16 [[CMHIv4i32_]]
-    ; CHECK: $d0 = COPY [[XTNv4i16_]]
-    ; CHECK: RET_ReallyLR implicit $d0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
+    ; CHECK-NEXT: [[CMHIv4i32_:%[0-9]+]]:fpr128 = CMHIv4i32 [[COPY1]], [[COPY]]
+    ; CHECK-NEXT: [[XTNv4i16_:%[0-9]+]]:fpr64 = XTNv4i16 [[CMHIv4i32_]]
+    ; CHECK-NEXT: $d0 = COPY [[XTNv4i16_]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $d0
     %0:fpr(<4 x s32>) = COPY $q0
     %1:fpr(<4 x s32>) = COPY $q1
     %4:fpr(<4 x s32>) = G_ICMP intpred(ult), %0(<4 x s32>), %1
@@ -1637,11 +1662,12 @@ body:             |
 
     ; CHECK-LABEL: name: test_v2i32_ult
     ; CHECK: liveins: $d0, $d1
-    ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
-    ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
-    ; CHECK: [[CMHIv2i32_:%[0-9]+]]:fpr64 = CMHIv2i32 [[COPY1]], [[COPY]]
-    ; CHECK: $d0 = COPY [[CMHIv2i32_]]
-    ; CHECK: RET_ReallyLR implicit $d0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
+    ; CHECK-NEXT: [[CMHIv2i32_:%[0-9]+]]:fpr64 = CMHIv2i32 [[COPY1]], [[COPY]]
+    ; CHECK-NEXT: $d0 = COPY [[CMHIv2i32_]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $d0
     %0:fpr(<2 x s32>) = COPY $d0
     %1:fpr(<2 x s32>) = COPY $d1
     %4:fpr(<2 x s32>) = G_ICMP intpred(ult), %0(<2 x s32>), %1
@@ -1680,17 +1706,16 @@ body:             |
 
     ; CHECK-LABEL: name: test_v2i16_ult
     ; CHECK: liveins: $d0, $d1
-    ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
-    ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
-    ; CHECK: [[ADRP:%[0-9]+]]:gpr64common = ADRP target-flags(aarch64-page) %const.0
-    ; CHECK: [[LDRDui:%[0-9]+]]:fpr64 = LDRDui [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) %const.0
-    ; CHECK: [[ANDv8i8_:%[0-9]+]]:fpr64 = ANDv8i8 [[COPY]], [[LDRDui]]
-    ; CHECK: [[ADRP1:%[0-9]+]]:gpr64common = ADRP target-flags(aarch64-page) %const.0
-    ; CHECK: [[LDRDui1:%[0-9]+]]:fpr64 = LDRDui [[ADRP1]], target-flags(aarch64-pageoff, aarch64-nc) %const.0
-    ; CHECK: [[ANDv8i8_1:%[0-9]+]]:fpr64 = ANDv8i8 [[COPY1]], [[LDRDui1]]
-    ; CHECK: [[CMHIv2i32_:%[0-9]+]]:fpr64 = CMHIv2i32 [[ANDv8i8_1]], [[ANDv8i8_]]
-    ; CHECK: $d0 = COPY [[CMHIv2i32_]]
-    ; CHECK: RET_ReallyLR implicit $d0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
+    ; CHECK-NEXT: [[MOVID:%[0-9]+]]:fpr64 = MOVID 51
+    ; CHECK-NEXT: [[ANDv8i8_:%[0-9]+]]:fpr64 = ANDv8i8 [[COPY]], [[MOVID]]
+    ; CHECK-NEXT: [[MOVID1:%[0-9]+]]:fpr64 = MOVID 51
+    ; CHECK-NEXT: [[ANDv8i8_1:%[0-9]+]]:fpr64 = ANDv8i8 [[COPY1]], [[MOVID1]]
+    ; CHECK-NEXT: [[CMHIv2i32_:%[0-9]+]]:fpr64 = CMHIv2i32 [[ANDv8i8_1]], [[ANDv8i8_]]
+    ; CHECK-NEXT: $d0 = COPY [[CMHIv2i32_]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $d0
     %2:fpr(<2 x s32>) = COPY $d0
     %3:fpr(<2 x s32>) = COPY $d1
     %13:gpr(s32) = G_CONSTANT i32 65535
@@ -1726,12 +1751,13 @@ body:             |
 
     ; CHECK-LABEL: name: test_v8i16_ult
     ; CHECK: liveins: $q0, $q1
-    ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
-    ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
-    ; CHECK: [[CMHIv8i16_:%[0-9]+]]:fpr128 = CMHIv8i16 [[COPY1]], [[COPY]]
-    ; CHECK: [[XTNv8i8_:%[0-9]+]]:fpr64 = XTNv8i8 [[CMHIv8i16_]]
-    ; CHECK: $d0 = COPY [[XTNv8i8_]]
-    ; CHECK: RET_ReallyLR implicit $d0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
+    ; CHECK-NEXT: [[CMHIv8i16_:%[0-9]+]]:fpr128 = CMHIv8i16 [[COPY1]], [[COPY]]
+    ; CHECK-NEXT: [[XTNv8i8_:%[0-9]+]]:fpr64 = XTNv8i8 [[CMHIv8i16_]]
+    ; CHECK-NEXT: $d0 = COPY [[XTNv8i8_]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $d0
     %0:fpr(<8 x s16>) = COPY $q0
     %1:fpr(<8 x s16>) = COPY $q1
     %4:fpr(<8 x s16>) = G_ICMP intpred(ult), %0(<8 x s16>), %1
@@ -1759,11 +1785,12 @@ body:             |
 
     ; CHECK-LABEL: name: test_v4i16_ult
     ; CHECK: liveins: $d0, $d1
-    ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
-    ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
-    ; CHECK: [[CMHIv4i16_:%[0-9]+]]:fpr64 = CMHIv4i16 [[COPY1]], [[COPY]]
-    ; CHECK: $d0 = COPY [[CMHIv4i16_]]
-    ; CHECK: RET_ReallyLR implicit $d0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
+    ; CHECK-NEXT: [[CMHIv4i16_:%[0-9]+]]:fpr64 = CMHIv4i16 [[COPY1]], [[COPY]]
+    ; CHECK-NEXT: $d0 = COPY [[CMHIv4i16_]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $d0
     %0:fpr(<4 x s16>) = COPY $d0
     %1:fpr(<4 x s16>) = COPY $d1
     %4:fpr(<4 x s16>) = G_ICMP intpred(ult), %0(<4 x s16>), %1
@@ -1791,11 +1818,12 @@ body:             |
 
     ; CHECK-LABEL: name: test_v16i8_ult
     ; CHECK: liveins: $q0, $q1
-    ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
-    ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
-    ; CHECK: [[CMHIv16i8_:%[0-9]+]]:fpr128 = CMHIv16i8 [[COPY1]], [[COPY]]
-    ; CHECK: $q0 = COPY [[CMHIv16i8_]]
-    ; CHECK: RET_ReallyLR implicit $q0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
+    ; CHECK-NEXT: [[CMHIv16i8_:%[0-9]+]]:fpr128 = CMHIv16i8 [[COPY1]], [[COPY]]
+    ; CHECK-NEXT: $q0 = COPY [[CMHIv16i8_]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $q0
     %0:fpr(<16 x s8>) = COPY $q0
     %1:fpr(<16 x s8>) = COPY $q1
     %4:fpr(<16 x s8>) = G_ICMP intpred(ult), %0(<16 x s8>), %1
@@ -1823,11 +1851,12 @@ body:             |
 
     ; CHECK-LABEL: name: test_v8i8_ult
     ; CHECK: liveins: $d0, $d1
-    ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
-    ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
-    ; CHECK: [[CMHIv8i8_:%[0-9]+]]:fpr64 = CMHIv8i8 [[COPY1]], [[COPY]]
-    ; CHECK: $d0 = COPY [[CMHIv8i8_]]
-    ; CHECK: RET_ReallyLR implicit $d0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
+    ; CHECK-NEXT: [[CMHIv8i8_:%[0-9]+]]:fpr64 = CMHIv8i8 [[COPY1]], [[COPY]]
+    ; CHECK-NEXT: $d0 = COPY [[CMHIv8i8_]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $d0
     %0:fpr(<8 x s8>) = COPY $d0
     %1:fpr(<8 x s8>) = COPY $d1
     %4:fpr(<8 x s8>) = G_ICMP intpred(ult), %0(<8 x s8>), %1
@@ -1855,12 +1884,13 @@ body:             |
 
     ; CHECK-LABEL: name: test_v2i64_ule
     ; CHECK: liveins: $q0, $q1
-    ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
-    ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
-    ; CHECK: [[CMHSv2i64_:%[0-9]+]]:fpr128 = CMHSv2i64 [[COPY1]], [[COPY]]
-    ; CHECK: [[XTNv2i32_:%[0-9]+]]:fpr64 = XTNv2i32 [[CMHSv2i64_]]
-    ; CHECK: $d0 = COPY [[XTNv2i32_]]
-    ; CHECK: RET_ReallyLR implicit $d0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
+    ; CHECK-NEXT: [[CMHSv2i64_:%[0-9]+]]:fpr128 = CMHSv2i64 [[COPY1]], [[COPY]]
+    ; CHECK-NEXT: [[XTNv2i32_:%[0-9]+]]:fpr64 = XTNv2i32 [[CMHSv2i64_]]
+    ; CHECK-NEXT: $d0 = COPY [[XTNv2i32_]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $d0
     %0:fpr(<2 x s64>) = COPY $q0
     %1:fpr(<2 x s64>) = COPY $q1
     %4:fpr(<2 x s64>) = G_ICMP intpred(ule), %0(<2 x s64>), %1
@@ -1888,12 +1918,13 @@ body:             |
 
     ; CHECK-LABEL: name: test_v4i32_ule
     ; CHECK: liveins: $q0, $q1
-    ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
-    ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
-    ; CHECK: [[CMHSv4i32_:%[0-9]+]]:fpr128 = CMHSv4i32 [[COPY1]], [[COPY]]
-    ; CHECK: [[XTNv4i16_:%[0-9]+]]:fpr64 = XTNv4i16 [[CMHSv4i32_]]
-    ; CHECK: $d0 = COPY [[XTNv4i16_]]
-    ; CHECK: RET_ReallyLR implicit $d0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
+    ; CHECK-NEXT: [[CMHSv4i32_:%[0-9]+]]:fpr128 = CMHSv4i32 [[COPY1]], [[COPY]]
+    ; CHECK-NEXT: [[XTNv4i16_:%[0-9]+]]:fpr64 = XTNv4i16 [[CMHSv4i32_]]
+    ; CHECK-NEXT: $d0 = COPY [[XTNv4i16_]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $d0
     %0:fpr(<4 x s32>) = COPY $q0
     %1:fpr(<4 x s32>) = COPY $q1
     %4:fpr(<4 x s32>) = G_ICMP intpred(ule), %0(<4 x s32>), %1
@@ -1921,11 +1952,12 @@ body:             |
 
     ; CHECK-LABEL: name: test_v2i32_ule
     ; CHECK: liveins: $d0, $d1
-    ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
-    ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
-    ; CHECK: [[CMHSv2i32_:%[0-9]+]]:fpr64 = CMHSv2i32 [[COPY1]], [[COPY]]
-    ; CHECK: $d0 = COPY [[CMHSv2i32_]]
-    ; CHECK: RET_ReallyLR implicit $d0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
+    ; CHECK-NEXT: [[CMHSv2i32_:%[0-9]+]]:fpr64 = CMHSv2i32 [[COPY1]], [[COPY]]
+    ; CHECK-NEXT: $d0 = COPY [[CMHSv2i32_]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $d0
     %0:fpr(<2 x s32>) = COPY $d0
     %1:fpr(<2 x s32>) = COPY $d1
     %4:fpr(<2 x s32>) = G_ICMP intpred(ule), %0(<2 x s32>), %1
@@ -1964,17 +1996,16 @@ body:             |
 
     ; CHECK-LABEL: name: test_v2i16_ule
     ; CHECK: liveins: $d0, $d1
-    ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
-    ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
-    ; CHECK: [[ADRP:%[0-9]+]]:gpr64common = ADRP target-flags(aarch64-page) %const.0
-    ; CHECK: [[LDRDui:%[0-9]+]]:fpr64 = LDRDui [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) %const.0
-    ; CHECK: [[ANDv8i8_:%[0-9]+]]:fpr64 = ANDv8i8 [[COPY]], [[LDRDui]]
-    ; CHECK: [[ADRP1:%[0-9]+]]:gpr64common = ADRP target-flags(aarch64-page) %const.0
-    ; CHECK: [[LDRDui1:%[0-9]+]]:fpr64 = LDRDui [[ADRP1]], target-flags(aarch64-pageoff, aarch64-nc) %const.0
-    ; CHECK: [[ANDv8i8_1:%[0-9]+]]:fpr64 = ANDv8i8 [[COPY1]], [[LDRDui1]]
-    ; CHECK: [[CMHSv2i32_:%[0-9]+]]:fpr64 = CMHSv2i32 [[ANDv8i8_1]], [[ANDv8i8_]]
-    ; CHECK: $d0 = COPY [[CMHSv2i32_]]
-    ; CHECK: RET_ReallyLR implicit $d0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
+    ; CHECK-NEXT: [[MOVID:%[0-9]+]]:fpr64 = MOVID 51
+    ; CHECK-NEXT: [[ANDv8i8_:%[0-9]+]]:fpr64 = ANDv8i8 [[COPY]], [[MOVID]]
+    ; CHECK-NEXT: [[MOVID1:%[0-9]+]]:fpr64 = MOVID 51
+    ; CHECK-NEXT: [[ANDv8i8_1:%[0-9]+]]:fpr64 = ANDv8i8 [[COPY1]], [[MOVID1]]
+    ; CHECK-NEXT: [[CMHSv2i32_:%[0-9]+]]:fpr64 = CMHSv2i32 [[ANDv8i8_1]], [[ANDv8i8_]]
+    ; CHECK-NEXT: $d0 = COPY [[CMHSv2i32_]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $d0
     %2:fpr(<2 x s32>) = COPY $d0
     %3:fpr(<2 x s32>) = COPY $d1
     %13:gpr(s32) = G_CONSTANT i32 65535
@@ -2010,12 +2041,13 @@ body:             |
 
     ; CHECK-LABEL: name: test_v8i16_ule
     ; CHECK: liveins: $q0, $q1
-    ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
-    ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
-    ; CHECK: [[CMHSv8i16_:%[0-9]+]]:fpr128 = CMHSv8i16 [[COPY1]], [[COPY]]
-    ; CHECK: [[XTNv8i8_:%[0-9]+]]:fpr64 = XTNv8i8 [[CMHSv8i16_]]
-    ; CHECK: $d0 = COPY [[XTNv8i8_]]
-    ; CHECK: RET_ReallyLR implicit $d0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
+    ; CHECK-NEXT: [[CMHSv8i16_:%[0-9]+]]:fpr128 = CMHSv8i16 [[COPY1]], [[COPY]]
+    ; CHECK-NEXT: [[XTNv8i8_:%[0-9]+]]:fpr64 = XTNv8i8 [[CMHSv8i16_]]
+    ; CHECK-NEXT: $d0 = COPY [[XTNv8i8_]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $d0
     %0:fpr(<8 x s16>) = COPY $q0
     %1:fpr(<8 x s16>) = COPY $q1
     %4:fpr(<8 x s16>) = G_ICMP intpred(ule), %0(<8 x s16>), %1
@@ -2043,11 +2075,12 @@ body:             |
 
     ; CHECK-LABEL: name: test_v4i16_ule
     ; CHECK: liveins: $d0, $d1
-    ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
-    ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
-    ; CHECK: [[CMHSv4i16_:%[0-9]+]]:fpr64 = CMHSv4i16 [[COPY1]], [[COPY]]
-    ; CHECK: $d0 = COPY [[CMHSv4i16_]]
-    ; CHECK: RET_ReallyLR implicit $d0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
+    ; CHECK-NEXT: [[CMHSv4i16_:%[0-9]+]]:fpr64 = CMHSv4i16 [[COPY1]], [[COPY]]
+    ; CHECK-NEXT: $d0 = COPY [[CMHSv4i16_]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $d0
     %0:fpr(<4 x s16>) = COPY $d0
     %1:fpr(<4 x s16>) = COPY $d1
     %4:fpr(<4 x s16>) = G_ICMP intpred(ule), %0(<4 x s16>), %1
@@ -2075,11 +2108,12 @@ body:             |
 
     ; CHECK-LABEL: name: test_v16i8_ule
     ; CHECK: liveins: $q0, $q1
-    ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
-    ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
-    ; CHECK: [[CMHSv16i8_:%[0-9]+]]:fpr128 = CMHSv16i8 [[COPY1]], [[COPY]]
-    ; CHECK: $q0 = COPY [[CMHSv16i8_]]
-    ; CHECK: RET_ReallyLR implicit $q0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
+    ; CHECK-NEXT: [[CMHSv16i8_:%[0-9]+]]:fpr128 = CMHSv16i8 [[COPY1]], [[COPY]]
+    ; CHECK-NEXT: $q0 = COPY [[CMHSv16i8_]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $q0
     %0:fpr(<16 x s8>) = COPY $q0
     %1:fpr(<16 x s8>) = COPY $q1
     %4:fpr(<16 x s8>) = G_ICMP intpred(ule), %0(<16 x s8>), %1
@@ -2107,11 +2141,12 @@ body:             |
 
     ; CHECK-LABEL: name: test_v8i8_ule
     ; CHECK: liveins: $d0, $d1
-    ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
-    ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
-    ; CHECK: [[CMHSv8i8_:%[0-9]+]]:fpr64 = CMHSv8i8 [[COPY1]], [[COPY]]
-    ; CHECK: $d0 = COPY [[CMHSv8i8_]]
-    ; CHECK: RET_ReallyLR implicit $d0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
+    ; CHECK-NEXT: [[CMHSv8i8_:%[0-9]+]]:fpr64 = CMHSv8i8 [[COPY1]], [[COPY]]
+    ; CHECK-NEXT: $d0 = COPY [[CMHSv8i8_]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $d0
     %0:fpr(<8 x s8>) = COPY $d0
     %1:fpr(<8 x s8>) = COPY $d1
     %4:fpr(<8 x s8>) = G_ICMP intpred(ule), %0(<8 x s8>), %1
@@ -2139,12 +2174,13 @@ body:             |
 
     ; CHECK-LABEL: name: test_v2i64_sgt
     ; CHECK: liveins: $q0, $q1
-    ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
-    ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
-    ; CHECK: [[CMGTv2i64_:%[0-9]+]]:fpr128 = CMGTv2i64 [[COPY]], [[COPY1]]
-    ; CHECK: [[XTNv2i32_:%[0-9]+]]:fpr64 = XTNv2i32 [[CMGTv2i64_]]
-    ; CHECK: $d0 = COPY [[XTNv2i32_]]
-    ; CHECK: RET_ReallyLR implicit $d0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
+    ; CHECK-NEXT: [[CMGTv2i64_:%[0-9]+]]:fpr128 = CMGTv2i64 [[COPY]], [[COPY1]]
+    ; CHECK-NEXT: [[XTNv2i32_:%[0-9]+]]:fpr64 = XTNv2i32 [[CMGTv2i64_]]
+    ; CHECK-NEXT: $d0 = COPY [[XTNv2i32_]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $d0
     %0:fpr(<2 x s64>) = COPY $q0
     %1:fpr(<2 x s64>) = COPY $q1
     %4:fpr(<2 x s64>) = G_ICMP intpred(sgt), %0(<2 x s64>), %1
@@ -2172,12 +2208,13 @@ body:             |
 
     ; CHECK-LABEL: name: test_v4i32_sgt
     ; CHECK: liveins: $q0, $q1
-    ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
-    ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
-    ; CHECK: [[CMGTv4i32_:%[0-9]+]]:fpr128 = CMGTv4i32 [[COPY]], [[COPY1]]
-    ; CHECK: [[XTNv4i16_:%[0-9]+]]:fpr64 = XTNv4i16 [[CMGTv4i32_]]
-    ; CHECK: $d0 = COPY [[XTNv4i16_]]
-    ; CHECK: RET_ReallyLR implicit $d0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
+    ; CHECK-NEXT: [[CMGTv4i32_:%[0-9]+]]:fpr128 = CMGTv4i32 [[COPY]], [[COPY1]]
+    ; CHECK-NEXT: [[XTNv4i16_:%[0-9]+]]:fpr64 = XTNv4i16 [[CMGTv4i32_]]
+    ; CHECK-NEXT: $d0 = COPY [[XTNv4i16_]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $d0
     %0:fpr(<4 x s32>) = COPY $q0
     %1:fpr(<4 x s32>) = COPY $q1
     %4:fpr(<4 x s32>) = G_ICMP intpred(sgt), %0(<4 x s32>), %1
@@ -2205,11 +2242,12 @@ body:             |
 
     ; CHECK-LABEL: name: test_v2i32_sgt
     ; CHECK: liveins: $d0, $d1
-    ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
-    ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
-    ; CHECK: [[CMGTv2i32_:%[0-9]+]]:fpr64 = CMGTv2i32 [[COPY]], [[COPY1]]
-    ; CHECK: $d0 = COPY [[CMGTv2i32_]]
-    ; CHECK: RET_ReallyLR implicit $d0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
+    ; CHECK-NEXT: [[CMGTv2i32_:%[0-9]+]]:fpr64 = CMGTv2i32 [[COPY]], [[COPY1]]
+    ; CHECK-NEXT: $d0 = COPY [[CMGTv2i32_]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $d0
     %0:fpr(<2 x s32>) = COPY $d0
     %1:fpr(<2 x s32>) = COPY $d1
     %4:fpr(<2 x s32>) = G_ICMP intpred(sgt), %0(<2 x s32>), %1
@@ -2250,21 +2288,20 @@ body:             |
 
     ; CHECK-LABEL: name: test_v2i16_sgt
     ; CHECK: liveins: $d0, $d1
-    ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
-    ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
-    ; CHECK: [[ADRP:%[0-9]+]]:gpr64common = ADRP target-flags(aarch64-page) %const.0
-    ; CHECK: [[LDRDui:%[0-9]+]]:fpr64 = LDRDui [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) %const.0
-    ; CHECK: [[SHLv2i32_shift:%[0-9]+]]:fpr64 = SHLv2i32_shift [[COPY]], 16
-    ; CHECK: [[NEGv2i32_:%[0-9]+]]:fpr64 = NEGv2i32 [[LDRDui]]
-    ; CHECK: [[SSHLv2i32_:%[0-9]+]]:fpr64 = SSHLv2i32 [[SHLv2i32_shift]], [[NEGv2i32_]]
-    ; CHECK: [[ADRP1:%[0-9]+]]:gpr64common = ADRP target-flags(aarch64-page) %const.0
-    ; CHECK: [[LDRDui1:%[0-9]+]]:fpr64 = LDRDui [[ADRP1]], target-flags(aarch64-pageoff, aarch64-nc) %const.0
-    ; CHECK: [[SHLv2i32_shift1:%[0-9]+]]:fpr64 = SHLv2i32_shift [[COPY1]], 16
-    ; CHECK: [[NEGv2i32_1:%[0-9]+]]:fpr64 = NEGv2i32 [[LDRDui1]]
-    ; CHECK: [[SSHLv2i32_1:%[0-9]+]]:fpr64 = SSHLv2i32 [[SHLv2i32_shift1]], [[NEGv2i32_1]]
-    ; CHECK: [[CMGTv2i32_:%[0-9]+]]:fpr64 = CMGTv2i32 [[SSHLv2i32_]], [[SSHLv2i32_1]]
-    ; CHECK: $d0 = COPY [[CMGTv2i32_]]
-    ; CHECK: RET_ReallyLR implicit $d0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
+    ; CHECK-NEXT: [[MOVIv2i32_:%[0-9]+]]:fpr64 = MOVIv2i32 16, 0
+    ; CHECK-NEXT: [[SHLv2i32_shift:%[0-9]+]]:fpr64 = SHLv2i32_shift [[COPY]], 16
+    ; CHECK-NEXT: [[NEGv2i32_:%[0-9]+]]:fpr64 = NEGv2i32 [[MOVIv2i32_]]
+    ; CHECK-NEXT: [[SSHLv2i32_:%[0-9]+]]:fpr64 = SSHLv2i32 [[SHLv2i32_shift]], [[NEGv2i32_]]
+    ; CHECK-NEXT: [[MOVIv2i32_1:%[0-9]+]]:fpr64 = MOVIv2i32 16, 0
+    ; CHECK-NEXT: [[SHLv2i32_shift1:%[0-9]+]]:fpr64 = SHLv2i32_shift [[COPY1]], 16
+    ; CHECK-NEXT: [[NEGv2i32_1:%[0-9]+]]:fpr64 = NEGv2i32 [[MOVIv2i32_1]]
+    ; CHECK-NEXT: [[SSHLv2i32_1:%[0-9]+]]:fpr64 = SSHLv2i32 [[SHLv2i32_shift1]], [[NEGv2i32_1]]
+    ; CHECK-NEXT: [[CMGTv2i32_:%[0-9]+]]:fpr64 = CMGTv2i32 [[SSHLv2i32_]], [[SSHLv2i32_1]]
+    ; CHECK-NEXT: $d0 = COPY [[CMGTv2i32_]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $d0
     %2:fpr(<2 x s32>) = COPY $d0
     %3:fpr(<2 x s32>) = COPY $d1
     %14:gpr(s32) = G_CONSTANT i32 16
@@ -2302,12 +2339,13 @@ body:             |
 
     ; CHECK-LABEL: name: test_v8i16_sgt
     ; CHECK: liveins: $q0, $q1
-    ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
-    ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
-    ; CHECK: [[CMGTv8i16_:%[0-9]+]]:fpr128 = CMGTv8i16 [[COPY]], [[COPY1]]
-    ; CHECK: [[XTNv8i8_:%[0-9]+]]:fpr64 = XTNv8i8 [[CMGTv8i16_]]
-    ; CHECK: $d0 = COPY [[XTNv8i8_]]
-    ; CHECK: RET_ReallyLR implicit $d0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
+    ; CHECK-NEXT: [[CMGTv8i16_:%[0-9]+]]:fpr128 = CMGTv8i16 [[COPY]], [[COPY1]]
+    ; CHECK-NEXT: [[XTNv8i8_:%[0-9]+]]:fpr64 = XTNv8i8 [[CMGTv8i16_]]
+    ; CHECK-NEXT: $d0 = COPY [[XTNv8i8_]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $d0
     %0:fpr(<8 x s16>) = COPY $q0
     %1:fpr(<8 x s16>) = COPY $q1
     %4:fpr(<8 x s16>) = G_ICMP intpred(sgt), %0(<8 x s16>), %1
@@ -2335,11 +2373,12 @@ body:             |
 
     ; CHECK-LABEL: name: test_v4i16_sgt
     ; CHECK: liveins: $d0, $d1
-    ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
-    ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
-    ; CHECK: [[CMGTv4i16_:%[0-9]+]]:fpr64 = CMGTv4i16 [[COPY]], [[COPY1]]
-    ; CHECK: $d0 = COPY [[CMGTv4i16_]]
-    ; CHECK: RET_ReallyLR implicit $d0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
+    ; CHECK-NEXT: [[CMGTv4i16_:%[0-9]+]]:fpr64 = CMGTv4i16 [[COPY]], [[COPY1]]
+    ; CHECK-NEXT: $d0 = COPY [[CMGTv4i16_]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $d0
     %0:fpr(<4 x s16>) = COPY $d0
     %1:fpr(<4 x s16>) = COPY $d1
     %4:fpr(<4 x s16>) = G_ICMP intpred(sgt), %0(<4 x s16>), %1
@@ -2367,11 +2406,12 @@ body:             |
 
     ; CHECK-LABEL: name: test_v16i8_sgt
     ; CHECK: liveins: $q0, $q1
-    ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
-    ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
-    ; CHECK: [[CMGTv16i8_:%[0-9]+]]:fpr128 = CMGTv16i8 [[COPY]], [[COPY1]]
-    ; CHECK: $q0 = COPY [[CMGTv16i8_]]
-    ; CHECK: RET_ReallyLR implicit $q0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
+    ; CHECK-NEXT: [[CMGTv16i8_:%[0-9]+]]:fpr128 = CMGTv16i8 [[COPY]], [[COPY1]]
+    ; CHECK-NEXT: $q0 = COPY [[CMGTv16i8_]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $q0
     %0:fpr(<16 x s8>) = COPY $q0
     %1:fpr(<16 x s8>) = COPY $q1
     %4:fpr(<16 x s8>) = G_ICMP intpred(sgt), %0(<16 x s8>), %1
@@ -2399,11 +2439,12 @@ body:             |
 
     ; CHECK-LABEL: name: test_v8i8_sgt
     ; CHECK: liveins: $d0, $d1
-    ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
-    ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
-    ; CHECK: [[CMGTv8i8_:%[0-9]+]]:fpr64 = CMGTv8i8 [[COPY]], [[COPY1]]
-    ; CHECK: $d0 = COPY [[CMGTv8i8_]]
-    ; CHECK: RET_ReallyLR implicit $d0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
+    ; CHECK-NEXT: [[CMGTv8i8_:%[0-9]+]]:fpr64 = CMGTv8i8 [[COPY]], [[COPY1]]
+    ; CHECK-NEXT: $d0 = COPY [[CMGTv8i8_]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $d0
     %0:fpr(<8 x s8>) = COPY $d0
     %1:fpr(<8 x s8>) = COPY $d1
     %4:fpr(<8 x s8>) = G_ICMP intpred(sgt), %0(<8 x s8>), %1
@@ -2431,12 +2472,13 @@ body:             |
 
     ; CHECK-LABEL: name: test_v2i64_sge
     ; CHECK: liveins: $q0, $q1
-    ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
-    ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
-    ; CHECK: [[CMGEv2i64_:%[0-9]+]]:fpr128 = CMGEv2i64 [[COPY]], [[COPY1]]
-    ; CHECK: [[XTNv2i32_:%[0-9]+]]:fpr64 = XTNv2i32 [[CMGEv2i64_]]
-    ; CHECK: $d0 = COPY [[XTNv2i32_]]
-    ; CHECK: RET_ReallyLR implicit $d0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
+    ; CHECK-NEXT: [[CMGEv2i64_:%[0-9]+]]:fpr128 = CMGEv2i64 [[COPY]], [[COPY1]]
+    ; CHECK-NEXT: [[XTNv2i32_:%[0-9]+]]:fpr64 = XTNv2i32 [[CMGEv2i64_]]
+    ; CHECK-NEXT: $d0 = COPY [[XTNv2i32_]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $d0
     %0:fpr(<2 x s64>) = COPY $q0
     %1:fpr(<2 x s64>) = COPY $q1
     %4:fpr(<2 x s64>) = G_ICMP intpred(sge), %0(<2 x s64>), %1
@@ -2464,12 +2506,13 @@ body:             |
 
     ; CHECK-LABEL: name: test_v4i32_sge
     ; CHECK: liveins: $q0, $q1
-    ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
-    ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
-    ; CHECK: [[CMGEv4i32_:%[0-9]+]]:fpr128 = CMGEv4i32 [[COPY]], [[COPY1]]
-    ; CHECK: [[XTNv4i16_:%[0-9]+]]:fpr64 = XTNv4i16 [[CMGEv4i32_]]
-    ; CHECK: $d0 = COPY [[XTNv4i16_]]
-    ; CHECK: RET_ReallyLR implicit $d0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
+    ; CHECK-NEXT: [[CMGEv4i32_:%[0-9]+]]:fpr128 = CMGEv4i32 [[COPY]], [[COPY1]]
+    ; CHECK-NEXT: [[XTNv4i16_:%[0-9]+]]:fpr64 = XTNv4i16 [[CMGEv4i32_]]
+    ; CHECK-NEXT: $d0 = COPY [[XTNv4i16_]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $d0
     %0:fpr(<4 x s32>) = COPY $q0
     %1:fpr(<4 x s32>) = COPY $q1
     %4:fpr(<4 x s32>) = G_ICMP intpred(sge), %0(<4 x s32>), %1
@@ -2497,11 +2540,12 @@ body:             |
 
     ; CHECK-LABEL: name: test_v2i32_sge
     ; CHECK: liveins: $d0, $d1
-    ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
-    ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
-    ; CHECK: [[CMGEv2i32_:%[0-9]+]]:fpr64 = CMGEv2i32 [[COPY]], [[COPY1]]
-    ; CHECK: $d0 = COPY [[CMGEv2i32_]]
-    ; CHECK: RET_ReallyLR implicit $d0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
+    ; CHECK-NEXT: [[CMGEv2i32_:%[0-9]+]]:fpr64 = CMGEv2i32 [[COPY]], [[COPY1]]
+    ; CHECK-NEXT: $d0 = COPY [[CMGEv2i32_]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $d0
     %0:fpr(<2 x s32>) = COPY $d0
     %1:fpr(<2 x s32>) = COPY $d1
     %4:fpr(<2 x s32>) = G_ICMP intpred(sge), %0(<2 x s32>), %1
@@ -2542,21 +2586,20 @@ body:             |
 
     ; CHECK-LABEL: name: test_v2i16_sge
     ; CHECK: liveins: $d0, $d1
-    ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
-    ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
-    ; CHECK: [[ADRP:%[0-9]+]]:gpr64common = ADRP target-flags(aarch64-page) %const.0
-    ; CHECK: [[LDRDui:%[0-9]+]]:fpr64 = LDRDui [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) %const.0
-    ; CHECK: [[SHLv2i32_shift:%[0-9]+]]:fpr64 = SHLv2i32_shift [[COPY]], 16
-    ; CHECK: [[NEGv2i32_:%[0-9]+]]:fpr64 = NEGv2i32 [[LDRDui]]
-    ; CHECK: [[SSHLv2i32_:%[0-9]+]]:fpr64 = SSHLv2i32 [[SHLv2i32_shift]], [[NEGv2i32_]]
-    ; CHECK: [[ADRP1:%[0-9]+]]:gpr64common = ADRP target-flags(aarch64-page) %const.0
-    ; CHECK: [[LDRDui1:%[0-9]+]]:fpr64 = LDRDui [[ADRP1]], target-flags(aarch64-pageoff, aarch64-nc) %const.0
-    ; CHECK: [[SHLv2i32_shift1:%[0-9]+]]:fpr64 = SHLv2i32_shift [[COPY1]], 16
-    ; CHECK: [[NEGv2i32_1:%[0-9]+]]:fpr64 = NEGv2i32 [[LDRDui1]]
-    ; CHECK: [[SSHLv2i32_1:%[0-9]+]]:fpr64 = SSHLv2i32 [[SHLv2i32_shift1]], [[NEGv2i32_1]]
-    ; CHECK: [[CMGEv2i32_:%[0-9]+]]:fpr64 = CMGEv2i32 [[SSHLv2i32_]], [[SSHLv2i32_1]]
-    ; CHECK: $d0 = COPY [[CMGEv2i32_]]
-    ; CHECK: RET_ReallyLR implicit $d0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
+    ; CHECK-NEXT: [[MOVIv2i32_:%[0-9]+]]:fpr64 = MOVIv2i32 16, 0
+    ; CHECK-NEXT: [[SHLv2i32_shift:%[0-9]+]]:fpr64 = SHLv2i32_shift [[COPY]], 16
+    ; CHECK-NEXT: [[NEGv2i32_:%[0-9]+]]:fpr64 = NEGv2i32 [[MOVIv2i32_]]
+    ; CHECK-NEXT: [[SSHLv2i32_:%[0-9]+]]:fpr64 = SSHLv2i32 [[SHLv2i32_shift]], [[NEGv2i32_]]
+    ; CHECK-NEXT: [[MOVIv2i32_1:%[0-9]+]]:fpr64 = MOVIv2i32 16, 0
+    ; CHECK-NEXT: [[SHLv2i32_shift1:%[0-9]+]]:fpr64 = SHLv2i32_shift [[COPY1]], 16
+    ; CHECK-NEXT: [[NEGv2i32_1:%[0-9]+]]:fpr64 = NEGv2i32 [[MOVIv2i32_1]]
+    ; CHECK-NEXT: [[SSHLv2i32_1:%[0-9]+]]:fpr64 = SSHLv2i32 [[SHLv2i32_shift1]], [[NEGv2i32_1]]
+    ; CHECK-NEXT: [[CMGEv2i32_:%[0-9]+]]:fpr64 = CMGEv2i32 [[SSHLv2i32_]], [[SSHLv2i32_1]]
+    ; CHECK-NEXT: $d0 = COPY [[CMGEv2i32_]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $d0
     %2:fpr(<2 x s32>) = COPY $d0
     %3:fpr(<2 x s32>) = COPY $d1
     %14:gpr(s32) = G_CONSTANT i32 16
@@ -2594,12 +2637,13 @@ body:             |
 
     ; CHECK-LABEL: name: test_v8i16_sge
     ; CHECK: liveins: $q0, $q1
-    ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
-    ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
-    ; CHECK: [[CMGEv8i16_:%[0-9]+]]:fpr128 = CMGEv8i16 [[COPY]], [[COPY1]]
-    ; CHECK: [[XTNv8i8_:%[0-9]+]]:fpr64 = XTNv8i8 [[CMGEv8i16_]]
-    ; CHECK: $d0 = COPY [[XTNv8i8_]]
-    ; CHECK: RET_ReallyLR implicit $d0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
+    ; CHECK-NEXT: [[CMGEv8i16_:%[0-9]+]]:fpr128 = CMGEv8i16 [[COPY]], [[COPY1]]
+    ; CHECK-NEXT: [[XTNv8i8_:%[0-9]+]]:fpr64 = XTNv8i8 [[CMGEv8i16_]]
+    ; CHECK-NEXT: $d0 = COPY [[XTNv8i8_]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $d0
     %0:fpr(<8 x s16>) = COPY $q0
     %1:fpr(<8 x s16>) = COPY $q1
     %4:fpr(<8 x s16>) = G_ICMP intpred(sge), %0(<8 x s16>), %1
@@ -2627,11 +2671,12 @@ body:             |
 
     ; CHECK-LABEL: name: test_v4i16_sge
     ; CHECK: liveins: $d0, $d1
-    ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
-    ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
-    ; CHECK: [[CMGEv4i16_:%[0-9]+]]:fpr64 = CMGEv4i16 [[COPY]], [[COPY1]]
-    ; CHECK: $d0 = COPY [[CMGEv4i16_]]
-    ; CHECK: RET_ReallyLR implicit $d0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
+    ; CHECK-NEXT: [[CMGEv4i16_:%[0-9]+]]:fpr64 = CMGEv4i16 [[COPY]], [[COPY1]]
+    ; CHECK-NEXT: $d0 = COPY [[CMGEv4i16_]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $d0
     %0:fpr(<4 x s16>) = COPY $d0
     %1:fpr(<4 x s16>) = COPY $d1
     %4:fpr(<4 x s16>) = G_ICMP intpred(sge), %0(<4 x s16>), %1
@@ -2659,11 +2704,12 @@ body:             |
 
     ; CHECK-LABEL: name: test_v16i8_sge
     ; CHECK: liveins: $q0, $q1
-    ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
-    ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
-    ; CHECK: [[CMGEv16i8_:%[0-9]+]]:fpr128 = CMGEv16i8 [[COPY]], [[COPY1]]
-    ; CHECK: $q0 = COPY [[CMGEv16i8_]]
-    ; CHECK: RET_ReallyLR implicit $q0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
+    ; CHECK-NEXT: [[CMGEv16i8_:%[0-9]+]]:fpr128 = CMGEv16i8 [[COPY]], [[COPY1]]
+    ; CHECK-NEXT: $q0 = COPY [[CMGEv16i8_]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $q0
     %0:fpr(<16 x s8>) = COPY $q0
     %1:fpr(<16 x s8>) = COPY $q1
     %4:fpr(<16 x s8>) = G_ICMP intpred(sge), %0(<16 x s8>), %1
@@ -2691,11 +2737,12 @@ body:             |
 
     ; CHECK-LABEL: name: test_v8i8_sge
     ; CHECK: liveins: $d0, $d1
-    ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
-    ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
-    ; CHECK: [[CMGEv8i8_:%[0-9]+]]:fpr64 = CMGEv8i8 [[COPY]], [[COPY1]]
-    ; CHECK: $d0 = COPY [[CMGEv8i8_]]
-    ; CHECK: RET_ReallyLR implicit $d0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
+    ; CHECK-NEXT: [[CMGEv8i8_:%[0-9]+]]:fpr64 = CMGEv8i8 [[COPY]], [[COPY1]]
+    ; CHECK-NEXT: $d0 = COPY [[CMGEv8i8_]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $d0
     %0:fpr(<8 x s8>) = COPY $d0
     %1:fpr(<8 x s8>) = COPY $d1
     %4:fpr(<8 x s8>) = G_ICMP intpred(sge), %0(<8 x s8>), %1
@@ -2723,12 +2770,13 @@ body:             |
 
     ; CHECK-LABEL: name: test_v2i64_slt
     ; CHECK: liveins: $q0, $q1
-    ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
-    ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
-    ; CHECK: [[CMGTv2i64_:%[0-9]+]]:fpr128 = CMGTv2i64 [[COPY1]], [[COPY]]
-    ; CHECK: [[XTNv2i32_:%[0-9]+]]:fpr64 = XTNv2i32 [[CMGTv2i64_]]
-    ; CHECK: $d0 = COPY [[XTNv2i32_]]
-    ; CHECK: RET_ReallyLR implicit $d0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
+    ; CHECK-NEXT: [[CMGTv2i64_:%[0-9]+]]:fpr128 = CMGTv2i64 [[COPY1]], [[COPY]]
+    ; CHECK-NEXT: [[XTNv2i32_:%[0-9]+]]:fpr64 = XTNv2i32 [[CMGTv2i64_]]
+    ; CHECK-NEXT: $d0 = COPY [[XTNv2i32_]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $d0
     %0:fpr(<2 x s64>) = COPY $q0
     %1:fpr(<2 x s64>) = COPY $q1
     %4:fpr(<2 x s64>) = G_ICMP intpred(slt), %0(<2 x s64>), %1
@@ -2756,12 +2804,13 @@ body:             |
 
     ; CHECK-LABEL: name: test_v4i32_slt
     ; CHECK: liveins: $q0, $q1
-    ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
-    ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
-    ; CHECK: [[CMGTv4i32_:%[0-9]+]]:fpr128 = CMGTv4i32 [[COPY1]], [[COPY]]
-    ; CHECK: [[XTNv4i16_:%[0-9]+]]:fpr64 = XTNv4i16 [[CMGTv4i32_]]
-    ; CHECK: $d0 = COPY [[XTNv4i16_]]
-    ; CHECK: RET_ReallyLR implicit $d0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
+    ; CHECK-NEXT: [[CMGTv4i32_:%[0-9]+]]:fpr128 = CMGTv4i32 [[COPY1]], [[COPY]]
+    ; CHECK-NEXT: [[XTNv4i16_:%[0-9]+]]:fpr64 = XTNv4i16 [[CMGTv4i32_]]
+    ; CHECK-NEXT: $d0 = COPY [[XTNv4i16_]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $d0
     %0:fpr(<4 x s32>) = COPY $q0
     %1:fpr(<4 x s32>) = COPY $q1
     %4:fpr(<4 x s32>) = G_ICMP intpred(slt), %0(<4 x s32>), %1
@@ -2789,11 +2838,12 @@ body:             |
 
     ; CHECK-LABEL: name: test_v2i32_slt
     ; CHECK: liveins: $d0, $d1
-    ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
-    ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
-    ; CHECK: [[CMGTv2i32_:%[0-9]+]]:fpr64 = CMGTv2i32 [[COPY1]], [[COPY]]
-    ; CHECK: $d0 = COPY [[CMGTv2i32_]]
-    ; CHECK: RET_ReallyLR implicit $d0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
+    ; CHECK-NEXT: [[CMGTv2i32_:%[0-9]+]]:fpr64 = CMGTv2i32 [[COPY1]], [[COPY]]
+    ; CHECK-NEXT: $d0 = COPY [[CMGTv2i32_]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $d0
     %0:fpr(<2 x s32>) = COPY $d0
     %1:fpr(<2 x s32>) = COPY $d1
     %4:fpr(<2 x s32>) = G_ICMP intpred(slt), %0(<2 x s32>), %1
@@ -2834,21 +2884,20 @@ body:             |
 
     ; CHECK-LABEL: name: test_v2i16_slt
     ; CHECK: liveins: $d0, $d1
-    ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
-    ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
-    ; CHECK: [[ADRP:%[0-9]+]]:gpr64common = ADRP target-flags(aarch64-page) %const.0
-    ; CHECK: [[LDRDui:%[0-9]+]]:fpr64 = LDRDui [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) %const.0
-    ; CHECK: [[SHLv2i32_shift:%[0-9]+]]:fpr64 = SHLv2i32_shift [[COPY]], 16
-    ; CHECK: [[NEGv2i32_:%[0-9]+]]:fpr64 = NEGv2i32 [[LDRDui]]
-    ; CHECK: [[SSHLv2i32_:%[0-9]+]]:fpr64 = SSHLv2i32 [[SHLv2i32_shift]], [[NEGv2i32_]]
-    ; CHECK: [[ADRP1:%[0-9]+]]:gpr64common = ADRP target-flags(aarch64-page) %const.0
-    ; CHECK: [[LDRDui1:%[0-9]+]]:fpr64 = LDRDui [[ADRP1]], target-flags(aarch64-pageoff, aarch64-nc) %const.0
-    ; CHECK: [[SHLv2i32_shift1:%[0-9]+]]:fpr64 = SHLv2i32_shift [[COPY1]], 16
-    ; CHECK: [[NEGv2i32_1:%[0-9]+]]:fpr64 = NEGv2i32 [[LDRDui1]]
-    ; CHECK: [[SSHLv2i32_1:%[0-9]+]]:fpr64 = SSHLv2i32 [[SHLv2i32_shift1]], [[NEGv2i32_1]]
-    ; CHECK: [[CMGTv2i32_:%[0-9]+]]:fpr64 = CMGTv2i32 [[SSHLv2i32_1]], [[SSHLv2i32_]]
-    ; CHECK: $d0 = COPY [[CMGTv2i32_]]
-    ; CHECK: RET_ReallyLR implicit $d0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
+    ; CHECK-NEXT: [[MOVIv2i32_:%[0-9]+]]:fpr64 = MOVIv2i32 16, 0
+    ; CHECK-NEXT: [[SHLv2i32_shift:%[0-9]+]]:fpr64 = SHLv2i32_shift [[COPY]], 16
+    ; CHECK-NEXT: [[NEGv2i32_:%[0-9]+]]:fpr64 = NEGv2i32 [[MOVIv2i32_]]
+    ; CHECK-NEXT: [[SSHLv2i32_:%[0-9]+]]:fpr64 = SSHLv2i32 [[SHLv2i32_shift]], [[NEGv2i32_]]
+    ; CHECK-NEXT: [[MOVIv2i32_1:%[0-9]+]]:fpr64 = MOVIv2i32 16, 0
+    ; CHECK-NEXT: [[SHLv2i32_shift1:%[0-9]+]]:fpr64 = SHLv2i32_shift [[COPY1]], 16
+    ; CHECK-NEXT: [[NEGv2i32_1:%[0-9]+]]:fpr64 = NEGv2i32 [[MOVIv2i32_1]]
+    ; CHECK-NEXT: [[SSHLv2i32_1:%[0-9]+]]:fpr64 = SSHLv2i32 [[SHLv2i32_shift1]], [[NEGv2i32_1]]
+    ; CHECK-NEXT: [[CMGTv2i32_:%[0-9]+]]:fpr64 = CMGTv2i32 [[SSHLv2i32_1]], [[SSHLv2i32_]]
+    ; CHECK-NEXT: $d0 = COPY [[CMGTv2i32_]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $d0
     %2:fpr(<2 x s32>) = COPY $d0
     %3:fpr(<2 x s32>) = COPY $d1
     %14:gpr(s32) = G_CONSTANT i32 16
@@ -2886,12 +2935,13 @@ body:             |
 
     ; CHECK-LABEL: name: test_v8i16_slt
     ; CHECK: liveins: $q0, $q1
-    ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
-    ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
-    ; CHECK: [[CMGTv8i16_:%[0-9]+]]:fpr128 = CMGTv8i16 [[COPY1]], [[COPY]]
-    ; CHECK: [[XTNv8i8_:%[0-9]+]]:fpr64 = XTNv8i8 [[CMGTv8i16_]]
-    ; CHECK: $d0 = COPY [[XTNv8i8_]]
-    ; CHECK: RET_ReallyLR implicit $d0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
+    ; CHECK-NEXT: [[CMGTv8i16_:%[0-9]+]]:fpr128 = CMGTv8i16 [[COPY1]], [[COPY]]
+    ; CHECK-NEXT: [[XTNv8i8_:%[0-9]+]]:fpr64 = XTNv8i8 [[CMGTv8i16_]]
+    ; CHECK-NEXT: $d0 = COPY [[XTNv8i8_]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $d0
     %0:fpr(<8 x s16>) = COPY $q0
     %1:fpr(<8 x s16>) = COPY $q1
     %4:fpr(<8 x s16>) = G_ICMP intpred(slt), %0(<8 x s16>), %1
@@ -2919,11 +2969,12 @@ body:             |
 
     ; CHECK-LABEL: name: test_v4i16_slt
     ; CHECK: liveins: $d0, $d1
-    ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
-    ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
-    ; CHECK: [[CMGTv4i16_:%[0-9]+]]:fpr64 = CMGTv4i16 [[COPY1]], [[COPY]]
-    ; CHECK: $d0 = COPY [[CMGTv4i16_]]
-    ; CHECK: RET_ReallyLR implicit $d0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
+    ; CHECK-NEXT: [[CMGTv4i16_:%[0-9]+]]:fpr64 = CMGTv4i16 [[COPY1]], [[COPY]]
+    ; CHECK-NEXT: $d0 = COPY [[CMGTv4i16_]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $d0
     %0:fpr(<4 x s16>) = COPY $d0
     %1:fpr(<4 x s16>) = COPY $d1
     %4:fpr(<4 x s16>) = G_ICMP intpred(slt), %0(<4 x s16>), %1
@@ -2951,11 +3002,12 @@ body:             |
 
     ; CHECK-LABEL: name: test_v16i8_slt
     ; CHECK: liveins: $q0, $q1
-    ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
-    ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
-    ; CHECK: [[CMGTv16i8_:%[0-9]+]]:fpr128 = CMGTv16i8 [[COPY1]], [[COPY]]
-    ; CHECK: $q0 = COPY [[CMGTv16i8_]]
-    ; CHECK: RET_ReallyLR implicit $q0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
+    ; CHECK-NEXT: [[CMGTv16i8_:%[0-9]+]]:fpr128 = CMGTv16i8 [[COPY1]], [[COPY]]
+    ; CHECK-NEXT: $q0 = COPY [[CMGTv16i8_]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $q0
     %0:fpr(<16 x s8>) = COPY $q0
     %1:fpr(<16 x s8>) = COPY $q1
     %4:fpr(<16 x s8>) = G_ICMP intpred(slt), %0(<16 x s8>), %1
@@ -2983,11 +3035,12 @@ body:             |
 
     ; CHECK-LABEL: name: test_v8i8_slt
     ; CHECK: liveins: $d0, $d1
-    ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
-    ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
-    ; CHECK: [[CMGTv8i8_:%[0-9]+]]:fpr64 = CMGTv8i8 [[COPY1]], [[COPY]]
-    ; CHECK: $d0 = COPY [[CMGTv8i8_]]
-    ; CHECK: RET_ReallyLR implicit $d0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
+    ; CHECK-NEXT: [[CMGTv8i8_:%[0-9]+]]:fpr64 = CMGTv8i8 [[COPY1]], [[COPY]]
+    ; CHECK-NEXT: $d0 = COPY [[CMGTv8i8_]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $d0
     %0:fpr(<8 x s8>) = COPY $d0
     %1:fpr(<8 x s8>) = COPY $d1
     %4:fpr(<8 x s8>) = G_ICMP intpred(slt), %0(<8 x s8>), %1
@@ -3015,12 +3068,13 @@ body:             |
 
     ; CHECK-LABEL: name: test_v2i64_sle
     ; CHECK: liveins: $q0, $q1
-    ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
-    ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
-    ; CHECK: [[CMGEv2i64_:%[0-9]+]]:fpr128 = CMGEv2i64 [[COPY1]], [[COPY]]
-    ; CHECK: [[XTNv2i32_:%[0-9]+]]:fpr64 = XTNv2i32 [[CMGEv2i64_]]
-    ; CHECK: $d0 = COPY [[XTNv2i32_]]
-    ; CHECK: RET_ReallyLR implicit $d0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
+    ; CHECK-NEXT: [[CMGEv2i64_:%[0-9]+]]:fpr128 = CMGEv2i64 [[COPY1]], [[COPY]]
+    ; CHECK-NEXT: [[XTNv2i32_:%[0-9]+]]:fpr64 = XTNv2i32 [[CMGEv2i64_]]
+    ; CHECK-NEXT: $d0 = COPY [[XTNv2i32_]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $d0
     %0:fpr(<2 x s64>) = COPY $q0
     %1:fpr(<2 x s64>) = COPY $q1
     %4:fpr(<2 x s64>) = G_ICMP intpred(sle), %0(<2 x s64>), %1
@@ -3048,12 +3102,13 @@ body:             |
 
     ; CHECK-LABEL: name: test_v4i32_sle
     ; CHECK: liveins: $q0, $q1
-    ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
-    ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
-    ; CHECK: [[CMGEv4i32_:%[0-9]+]]:fpr128 = CMGEv4i32 [[COPY1]], [[COPY]]
-    ; CHECK: [[XTNv4i16_:%[0-9]+]]:fpr64 = XTNv4i16 [[CMGEv4i32_]]
-    ; CHECK: $d0 = COPY [[XTNv4i16_]]
-    ; CHECK: RET_ReallyLR implicit $d0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
+    ; CHECK-NEXT: [[CMGEv4i32_:%[0-9]+]]:fpr128 = CMGEv4i32 [[COPY1]], [[COPY]]
+    ; CHECK-NEXT: [[XTNv4i16_:%[0-9]+]]:fpr64 = XTNv4i16 [[CMGEv4i32_]]
+    ; CHECK-NEXT: $d0 = COPY [[XTNv4i16_]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $d0
     %0:fpr(<4 x s32>) = COPY $q0
     %1:fpr(<4 x s32>) = COPY $q1
     %4:fpr(<4 x s32>) = G_ICMP intpred(sle), %0(<4 x s32>), %1
@@ -3081,11 +3136,12 @@ body:             |
 
     ; CHECK-LABEL: name: test_v2i32_sle
     ; CHECK: liveins: $d0, $d1
-    ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
-    ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
-    ; CHECK: [[CMGEv2i32_:%[0-9]+]]:fpr64 = CMGEv2i32 [[COPY1]], [[COPY]]
-    ; CHECK: $d0 = COPY [[CMGEv2i32_]]
-    ; CHECK: RET_ReallyLR implicit $d0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
+    ; CHECK-NEXT: [[CMGEv2i32_:%[0-9]+]]:fpr64 = CMGEv2i32 [[COPY1]], [[COPY]]
+    ; CHECK-NEXT: $d0 = COPY [[CMGEv2i32_]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $d0
     %0:fpr(<2 x s32>) = COPY $d0
     %1:fpr(<2 x s32>) = COPY $d1
     %4:fpr(<2 x s32>) = G_ICMP intpred(sle), %0(<2 x s32>), %1
@@ -3126,21 +3182,20 @@ body:             |
 
     ; CHECK-LABEL: name: test_v2i16_sle
     ; CHECK: liveins: $d0, $d1
-    ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
-    ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
-    ; CHECK: [[ADRP:%[0-9]+]]:gpr64common = ADRP target-flags(aarch64-page) %const.0
-    ; CHECK: [[LDRDui:%[0-9]+]]:fpr64 = LDRDui [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) %const.0
-    ; CHECK: [[SHLv2i32_shift:%[0-9]+]]:fpr64 = SHLv2i32_shift [[COPY]], 16
-    ; CHECK: [[NEGv2i32_:%[0-9]+]]:fpr64 = NEGv2i32 [[LDRDui]]
-    ; CHECK: [[SSHLv2i32_:%[0-9]+]]:fpr64 = SSHLv2i32 [[SHLv2i32_shift]], [[NEGv2i32_]]
-    ; CHECK: [[ADRP1:%[0-9]+]]:gpr64common = ADRP target-flags(aarch64-page) %const.0
-    ; CHECK: [[LDRDui1:%[0-9]+]]:fpr64 = LDRDui [[ADRP1]], target-flags(aarch64-pageoff, aarch64-nc) %const.0
-    ; CHECK: [[SHLv2i32_shift1:%[0-9]+]]:fpr64 = SHLv2i32_shift [[COPY1]], 16
-    ; CHECK: [[NEGv2i32_1:%[0-9]+]]:fpr64 = NEGv2i32 [[LDRDui1]]
-    ; CHECK: [[SSHLv2i32_1:%[0-9]+]]:fpr64 = SSHLv2i32 [[SHLv2i32_shift1]], [[NEGv2i32_1]]
-    ; CHECK: [[CMGEv2i32_:%[0-9]+]]:fpr64 = CMGEv2i32 [[SSHLv2i32_1]], [[SSHLv2i32_]]
-    ; CHECK: $d0 = COPY [[CMGEv2i32_]]
-    ; CHECK: RET_ReallyLR implicit $d0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
+    ; CHECK-NEXT: [[MOVIv2i32_:%[0-9]+]]:fpr64 = MOVIv2i32 16, 0
+    ; CHECK-NEXT: [[SHLv2i32_shift:%[0-9]+]]:fpr64 = SHLv2i32_shift [[COPY]], 16
+    ; CHECK-NEXT: [[NEGv2i32_:%[0-9]+]]:fpr64 = NEGv2i32 [[MOVIv2i32_]]
+    ; CHECK-NEXT: [[SSHLv2i32_:%[0-9]+]]:fpr64 = SSHLv2i32 [[SHLv2i32_shift]], [[NEGv2i32_]]
+    ; CHECK-NEXT: [[MOVIv2i32_1:%[0-9]+]]:fpr64 = MOVIv2i32 16, 0
+    ; CHECK-NEXT: [[SHLv2i32_shift1:%[0-9]+]]:fpr64 = SHLv2i32_shift [[COPY1]], 16
+    ; CHECK-NEXT: [[NEGv2i32_1:%[0-9]+]]:fpr64 = NEGv2i32 [[MOVIv2i32_1]]
+    ; CHECK-NEXT: [[SSHLv2i32_1:%[0-9]+]]:fpr64 = SSHLv2i32 [[SHLv2i32_shift1]], [[NEGv2i32_1]]
+    ; CHECK-NEXT: [[CMGEv2i32_:%[0-9]+]]:fpr64 = CMGEv2i32 [[SSHLv2i32_1]], [[SSHLv2i32_]]
+    ; CHECK-NEXT: $d0 = COPY [[CMGEv2i32_]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $d0
     %2:fpr(<2 x s32>) = COPY $d0
     %3:fpr(<2 x s32>) = COPY $d1
     %14:gpr(s32) = G_CONSTANT i32 16
@@ -3178,12 +3233,13 @@ body:             |
 
     ; CHECK-LABEL: name: test_v8i16_sle
     ; CHECK: liveins: $q0, $q1
-    ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
-    ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
-    ; CHECK: [[CMGEv8i16_:%[0-9]+]]:fpr128 = CMGEv8i16 [[COPY1]], [[COPY]]
-    ; CHECK: [[XTNv8i8_:%[0-9]+]]:fpr64 = XTNv8i8 [[CMGEv8i16_]]
-    ; CHECK: $d0 = COPY [[XTNv8i8_]]
-    ; CHECK: RET_ReallyLR implicit $d0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
+    ; CHECK-NEXT: [[CMGEv8i16_:%[0-9]+]]:fpr128 = CMGEv8i16 [[COPY1]], [[COPY]]
+    ; CHECK-NEXT: [[XTNv8i8_:%[0-9]+]]:fpr64 = XTNv8i8 [[CMGEv8i16_]]
+    ; CHECK-NEXT: $d0 = COPY [[XTNv8i8_]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $d0
     %0:fpr(<8 x s16>) = COPY $q0
     %1:fpr(<8 x s16>) = COPY $q1
     %4:fpr(<8 x s16>) = G_ICMP intpred(sle), %0(<8 x s16>), %1
@@ -3211,11 +3267,12 @@ body:             |
 
     ; CHECK-LABEL: name: test_v4i16_sle
     ; CHECK: liveins: $d0, $d1
-    ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
-    ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
-    ; CHECK: [[CMGEv4i16_:%[0-9]+]]:fpr64 = CMGEv4i16 [[COPY1]], [[COPY]]
-    ; CHECK: $d0 = COPY [[CMGEv4i16_]]
-    ; CHECK: RET_ReallyLR implicit $d0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
+    ; CHECK-NEXT: [[CMGEv4i16_:%[0-9]+]]:fpr64 = CMGEv4i16 [[COPY1]], [[COPY]]
+    ; CHECK-NEXT: $d0 = COPY [[CMGEv4i16_]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $d0
     %0:fpr(<4 x s16>) = COPY $d0
     %1:fpr(<4 x s16>) = COPY $d1
     %4:fpr(<4 x s16>) = G_ICMP intpred(sle), %0(<4 x s16>), %1
@@ -3243,11 +3300,12 @@ body:             |
 
     ; CHECK-LABEL: name: test_v16i8_sle
     ; CHECK: liveins: $q0, $q1
-    ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
-    ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
-    ; CHECK: [[CMGEv16i8_:%[0-9]+]]:fpr128 = CMGEv16i8 [[COPY1]], [[COPY]]
-    ; CHECK: $q0 = COPY [[CMGEv16i8_]]
-    ; CHECK: RET_ReallyLR implicit $q0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
+    ; CHECK-NEXT: [[CMGEv16i8_:%[0-9]+]]:fpr128 = CMGEv16i8 [[COPY1]], [[COPY]]
+    ; CHECK-NEXT: $q0 = COPY [[CMGEv16i8_]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $q0
     %0:fpr(<16 x s8>) = COPY $q0
     %1:fpr(<16 x s8>) = COPY $q1
     %4:fpr(<16 x s8>) = G_ICMP intpred(sle), %0(<16 x s8>), %1
@@ -3275,11 +3333,12 @@ body:             |
 
     ; CHECK-LABEL: name: test_v8i8_sle
     ; CHECK: liveins: $d0, $d1
-    ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
-    ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
-    ; CHECK: [[CMGEv8i8_:%[0-9]+]]:fpr64 = CMGEv8i8 [[COPY1]], [[COPY]]
-    ; CHECK: $d0 = COPY [[CMGEv8i8_]]
-    ; CHECK: RET_ReallyLR implicit $d0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
+    ; CHECK-NEXT: [[CMGEv8i8_:%[0-9]+]]:fpr64 = CMGEv8i8 [[COPY1]], [[COPY]]
+    ; CHECK-NEXT: $d0 = COPY [[CMGEv8i8_]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $d0
     %0:fpr(<8 x s8>) = COPY $d0
     %1:fpr(<8 x s8>) = COPY $d1
     %4:fpr(<8 x s8>) = G_ICMP intpred(sle), %0(<8 x s8>), %1
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-vector-shift.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-vector-shift.mir
index 332b300ceaf8868..07061152f8f5099 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/select-vector-shift.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-vector-shift.mir
@@ -17,11 +17,12 @@ body:             |
 
     ; CHECK-LABEL: name: shl_v2i32
     ; CHECK: liveins: $d0, $d1
-    ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
-    ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
-    ; CHECK: [[USHLv2i32_:%[0-9]+]]:fpr64 = USHLv2i32 [[COPY]], [[COPY1]]
-    ; CHECK: $d0 = COPY [[USHLv2i32_]]
-    ; CHECK: RET_ReallyLR implicit $d0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
+    ; CHECK-NEXT: [[USHLv2i32_:%[0-9]+]]:fpr64 = USHLv2i32 [[COPY]], [[COPY1]]
+    ; CHECK-NEXT: $d0 = COPY [[USHLv2i32_]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $d0
     %0:fpr(<2 x s32>) = COPY $d0
     %1:fpr(<2 x s32>) = COPY $d1
     %2:fpr(<2 x s32>) = G_SHL %0, %1(<2 x s32>)
@@ -51,10 +52,11 @@ body:             |
 
     ; CHECK-LABEL: name: shl_v2i32_imm
     ; CHECK: liveins: $d0
-    ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
-    ; CHECK: [[SHLv2i32_shift:%[0-9]+]]:fpr64 = SHLv2i32_shift [[COPY]], 24
-    ; CHECK: $d0 = COPY [[SHLv2i32_shift]]
-    ; CHECK: RET_ReallyLR implicit $d0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
+    ; CHECK-NEXT: [[SHLv2i32_shift:%[0-9]+]]:fpr64 = SHLv2i32_shift [[COPY]], 24
+    ; CHECK-NEXT: $d0 = COPY [[SHLv2i32_shift]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $d0
     %0:fpr(<2 x s32>) = COPY $d0
     %2:gpr(s32) = G_CONSTANT i32 24
     %1:fpr(<2 x s32>) = G_BUILD_VECTOR %2(s32), %2(s32)
@@ -85,12 +87,12 @@ body:             |
 
     ; CHECK-LABEL: name: shl_v2i32_imm_out_of_range
     ; CHECK: liveins: $d0
-    ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
-    ; CHECK: [[ADRP:%[0-9]+]]:gpr64common = ADRP target-flags(aarch64-page) %const.0
-    ; CHECK: [[LDRDui:%[0-9]+]]:fpr64 = LDRDui [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) %const.0
-    ; CHECK: [[USHLv2i32_:%[0-9]+]]:fpr64 = USHLv2i32 [[COPY]], [[LDRDui]]
-    ; CHECK: $d0 = COPY [[USHLv2i32_]]
-    ; CHECK: RET_ReallyLR implicit $d0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
+    ; CHECK-NEXT: [[MOVIv2i32_:%[0-9]+]]:fpr64 = MOVIv2i32 40, 0
+    ; CHECK-NEXT: [[USHLv2i32_:%[0-9]+]]:fpr64 = USHLv2i32 [[COPY]], [[MOVIv2i32_]]
+    ; CHECK-NEXT: $d0 = COPY [[USHLv2i32_]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $d0
     %0:fpr(<2 x s32>) = COPY $d0
     %2:gpr(s32) = G_CONSTANT i32 40
     %1:fpr(<2 x s32>) = G_BUILD_VECTOR %2(s32), %2(s32)
@@ -116,11 +118,12 @@ body:             |
 
     ; CHECK-LABEL: name: shl_v4i32
     ; CHECK: liveins: $q0, $q1
-    ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
-    ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
-    ; CHECK: [[USHLv4i32_:%[0-9]+]]:fpr128 = USHLv4i32 [[COPY]], [[COPY1]]
-    ; CHECK: $q0 = COPY [[USHLv4i32_]]
-    ; CHECK: RET_ReallyLR implicit $q0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
+    ; CHECK-NEXT: [[USHLv4i32_:%[0-9]+]]:fpr128 = USHLv4i32 [[COPY]], [[COPY1]]
+    ; CHECK-NEXT: $q0 = COPY [[USHLv4i32_]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $q0
     %0:fpr(<4 x s32>) = COPY $q0
     %1:fpr(<4 x s32>) = COPY $q1
     %2:fpr(<4 x s32>) = G_SHL %0, %1(<4 x s32>)
@@ -150,10 +153,11 @@ body:             |
 
     ; CHECK-LABEL: name: shl_v4i32_imm
     ; CHECK: liveins: $q0
-    ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
-    ; CHECK: [[SHLv4i32_shift:%[0-9]+]]:fpr128 = SHLv4i32_shift [[COPY]], 24
-    ; CHECK: $q0 = COPY [[SHLv4i32_shift]]
-    ; CHECK: RET_ReallyLR implicit $q0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
+    ; CHECK-NEXT: [[SHLv4i32_shift:%[0-9]+]]:fpr128 = SHLv4i32_shift [[COPY]], 24
+    ; CHECK-NEXT: $q0 = COPY [[SHLv4i32_shift]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $q0
     %0:fpr(<4 x s32>) = COPY $q0
     %2:gpr(s32) = G_CONSTANT i32 24
     %1:fpr(<4 x s32>) = G_BUILD_VECTOR %2(s32), %2(s32), %2(s32), %2(s32)
@@ -179,11 +183,12 @@ body:             |
 
     ; CHECK-LABEL: name: shl_v2i64
     ; CHECK: liveins: $q0, $q1
-    ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
-    ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
-    ; CHECK: [[USHLv2i64_:%[0-9]+]]:fpr128 = USHLv2i64 [[COPY]], [[COPY1]]
-    ; CHECK: $q0 = COPY [[USHLv2i64_]]
-    ; CHECK: RET_ReallyLR implicit $q0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
+    ; CHECK-NEXT: [[USHLv2i64_:%[0-9]+]]:fpr128 = USHLv2i64 [[COPY]], [[COPY1]]
+    ; CHECK-NEXT: $q0 = COPY [[USHLv2i64_]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $q0
     %0:fpr(<2 x s64>) = COPY $q0
     %1:fpr(<2 x s64>) = COPY $q1
     %2:fpr(<2 x s64>) = G_SHL %0, %1(<2 x s64>)
@@ -213,10 +218,11 @@ body:             |
 
     ; CHECK-LABEL: name: shl_v2i64_imm
     ; CHECK: liveins: $q0
-    ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
-    ; CHECK: [[SHLv2i64_shift:%[0-9]+]]:fpr128 = SHLv2i64_shift [[COPY]], 24
-    ; CHECK: $q0 = COPY [[SHLv2i64_shift]]
-    ; CHECK: RET_ReallyLR implicit $q0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
+    ; CHECK-NEXT: [[SHLv2i64_shift:%[0-9]+]]:fpr128 = SHLv2i64_shift [[COPY]], 24
+    ; CHECK-NEXT: $q0 = COPY [[SHLv2i64_shift]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $q0
     %0:fpr(<2 x s64>) = COPY $q0
     %2:gpr(s64) = G_CONSTANT i64 24
     %1:fpr(<2 x s64>) = G_BUILD_VECTOR %2(s64), %2(s64)
@@ -247,12 +253,13 @@ body:             |
 
     ; CHECK-LABEL: name: shl_v2i64_imm_out_of_range
     ; CHECK: liveins: $q0
-    ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
-    ; CHECK: [[ADRP:%[0-9]+]]:gpr64common = ADRP target-flags(aarch64-page) %const.0
-    ; CHECK: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) %const.0
-    ; CHECK: [[USHLv2i64_:%[0-9]+]]:fpr128 = USHLv2i64 [[COPY]], [[LDRQui]]
-    ; CHECK: $q0 = COPY [[USHLv2i64_]]
-    ; CHECK: RET_ReallyLR implicit $q0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
+    ; CHECK-NEXT: [[ADRP:%[0-9]+]]:gpr64common = ADRP target-flags(aarch64-page) %const.0
+    ; CHECK-NEXT: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) %const.0 :: (load (s128) from constant-pool)
+    ; CHECK-NEXT: [[USHLv2i64_:%[0-9]+]]:fpr128 = USHLv2i64 [[COPY]], [[LDRQui]]
+    ; CHECK-NEXT: $q0 = COPY [[USHLv2i64_]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $q0
     %0:fpr(<2 x s64>) = COPY $q0
     %2:gpr(s64) = G_CONSTANT i64 70
     %1:fpr(<2 x s64>) = G_BUILD_VECTOR %2(s64), %2(s64)
@@ -278,12 +285,13 @@ body:             |
 
     ; CHECK-LABEL: name: ashr_v2i32
     ; CHECK: liveins: $d0, $d1
-    ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
-    ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
-    ; CHECK: [[NEGv2i32_:%[0-9]+]]:fpr64 = NEGv2i32 [[COPY1]]
-    ; CHECK: [[SSHLv2i32_:%[0-9]+]]:fpr64 = SSHLv2i32 [[COPY]], [[NEGv2i32_]]
-    ; CHECK: $d0 = COPY [[SSHLv2i32_]]
-    ; CHECK: RET_ReallyLR implicit $d0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
+    ; CHECK-NEXT: [[NEGv2i32_:%[0-9]+]]:fpr64 = NEGv2i32 [[COPY1]]
+    ; CHECK-NEXT: [[SSHLv2i32_:%[0-9]+]]:fpr64 = SSHLv2i32 [[COPY]], [[NEGv2i32_]]
+    ; CHECK-NEXT: $d0 = COPY [[SSHLv2i32_]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $d0
     %0:fpr(<2 x s32>) = COPY $d0
     %1:fpr(<2 x s32>) = COPY $d1
     %2:fpr(<2 x s32>) = G_ASHR %0, %1(<2 x s32>)
@@ -308,12 +316,13 @@ body:             |
 
     ; CHECK-LABEL: name: ashr_v4i32
     ; CHECK: liveins: $q0, $q1
-    ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
-    ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
-    ; CHECK: [[NEGv4i32_:%[0-9]+]]:fpr128 = NEGv4i32 [[COPY1]]
-    ; CHECK: [[SSHLv4i32_:%[0-9]+]]:fpr128 = SSHLv4i32 [[COPY]], [[NEGv4i32_]]
-    ; CHECK: $q0 = COPY [[SSHLv4i32_]]
-    ; CHECK: RET_ReallyLR implicit $q0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
+    ; CHECK-NEXT: [[NEGv4i32_:%[0-9]+]]:fpr128 = NEGv4i32 [[COPY1]]
+    ; CHECK-NEXT: [[SSHLv4i32_:%[0-9]+]]:fpr128 = SSHLv4i32 [[COPY]], [[NEGv4i32_]]
+    ; CHECK-NEXT: $q0 = COPY [[SSHLv4i32_]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $q0
     %0:fpr(<4 x s32>) = COPY $q0
     %1:fpr(<4 x s32>) = COPY $q1
     %2:fpr(<4 x s32>) = G_ASHR %0, %1(<4 x s32>)
@@ -338,12 +347,13 @@ body:             |
 
     ; CHECK-LABEL: name: ashr_v2i64
     ; CHECK: liveins: $q0, $q1
-    ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
-    ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
-    ; CHECK: [[NEGv2i64_:%[0-9]+]]:fpr128 = NEGv2i64 [[COPY1]]
-    ; CHECK: [[SSHLv2i64_:%[0-9]+]]:fpr128 = SSHLv2i64 [[COPY]], [[NEGv2i64_]]
-    ; CHECK: $q0 = COPY [[SSHLv2i64_]]
-    ; CHECK: RET_ReallyLR implicit $q0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
+    ; CHECK-NEXT: [[NEGv2i64_:%[0-9]+]]:fpr128 = NEGv2i64 [[COPY1]]
+    ; CHECK-NEXT: [[SSHLv2i64_:%[0-9]+]]:fpr128 = SSHLv2i64 [[COPY]], [[NEGv2i64_]]
+    ; CHECK-NEXT: $q0 = COPY [[SSHLv2i64_]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $q0
     %0:fpr(<2 x s64>) = COPY $q0
     %1:fpr(<2 x s64>) = COPY $q1
     %2:fpr(<2 x s64>) = G_ASHR %0, %1(<2 x s64>)
@@ -361,11 +371,12 @@ body:             |
     liveins: $d0, $d1
     ; CHECK-LABEL: name: shl_v4i16
     ; CHECK: liveins: $d0, $d1
-    ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
-    ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
-    ; CHECK: [[USHLv4i16_:%[0-9]+]]:fpr64 = USHLv4i16 [[COPY]], [[COPY1]]
-    ; CHECK: $d0 = COPY [[USHLv4i16_]]
-    ; CHECK: RET_ReallyLR implicit $d0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
+    ; CHECK-NEXT: [[USHLv4i16_:%[0-9]+]]:fpr64 = USHLv4i16 [[COPY]], [[COPY1]]
+    ; CHECK-NEXT: $d0 = COPY [[USHLv4i16_]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $d0
     %0:fpr(<4 x s16>) = COPY $d0
     %1:fpr(<4 x s16>) = COPY $d1
     %2:fpr(<4 x s16>) = G_SHL %0, %1(<4 x s16>)
@@ -382,12 +393,13 @@ body:             |
     liveins: $d0, $d1
     ; CHECK-LABEL: name: lshr_v4i16
     ; CHECK: liveins: $d0, $d1
-    ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
-    ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
-    ; CHECK: [[NEGv4i16_:%[0-9]+]]:fpr64 = NEGv4i16 [[COPY1]]
-    ; CHECK: [[USHLv4i16_:%[0-9]+]]:fpr64 = USHLv4i16 [[COPY]], [[NEGv4i16_]]
-    ; CHECK: $d0 = COPY [[USHLv4i16_]]
-    ; CHECK: RET_ReallyLR implicit $d0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
+    ; CHECK-NEXT: [[NEGv4i16_:%[0-9]+]]:fpr64 = NEGv4i16 [[COPY1]]
+    ; CHECK-NEXT: [[USHLv4i16_:%[0-9]+]]:fpr64 = USHLv4i16 [[COPY]], [[NEGv4i16_]]
+    ; CHECK-NEXT: $d0 = COPY [[USHLv4i16_]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $d0
     %0:fpr(<4 x s16>) = COPY $d0
     %1:fpr(<4 x s16>) = COPY $d1
     %2:fpr(<4 x s16>) = G_LSHR %0, %1(<4 x s16>)
@@ -411,12 +423,13 @@ body:             |
 
     ; CHECK-LABEL: name: lshr_v4i32
     ; CHECK: liveins: $q0, $q1
-    ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
-    ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
-    ; CHECK: [[NEGv4i32_:%[0-9]+]]:fpr128 = NEGv4i32 [[COPY1]]
-    ; CHECK: [[USHLv4i32_:%[0-9]+]]:fpr128 = USHLv4i32 [[COPY]], [[NEGv4i32_]]
-    ; CHECK: $q0 = COPY [[USHLv4i32_]]
-    ; CHECK: RET_ReallyLR implicit $q0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
+    ; CHECK-NEXT: [[NEGv4i32_:%[0-9]+]]:fpr128 = NEGv4i32 [[COPY1]]
+    ; CHECK-NEXT: [[USHLv4i32_:%[0-9]+]]:fpr128 = USHLv4i32 [[COPY]], [[NEGv4i32_]]
+    ; CHECK-NEXT: $q0 = COPY [[USHLv4i32_]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $q0
     %0:fpr(<4 x s32>) = COPY $q0
     %1:fpr(<4 x s32>) = COPY $q1
     %2:fpr(<4 x s32>) = G_LSHR %0, %1(<4 x s32>)
@@ -434,12 +447,13 @@ body:             |
     liveins: $q0, $q1
     ; CHECK-LABEL: name: lshr_v8i16
     ; CHECK: liveins: $q0, $q1
-    ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
-    ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
-    ; CHECK: [[NEGv8i16_:%[0-9]+]]:fpr128 = NEGv8i16 [[COPY1]]
-    ; CHECK: [[USHLv8i16_:%[0-9]+]]:fpr128 = USHLv8i16 [[COPY]], [[NEGv8i16_]]
-    ; CHECK: $q0 = COPY [[USHLv8i16_]]
-    ; CHECK: RET_ReallyLR implicit $q0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
+    ; CHECK-NEXT: [[NEGv8i16_:%[0-9]+]]:fpr128 = NEGv8i16 [[COPY1]]
+    ; CHECK-NEXT: [[USHLv8i16_:%[0-9]+]]:fpr128 = USHLv8i16 [[COPY]], [[NEGv8i16_]]
+    ; CHECK-NEXT: $q0 = COPY [[USHLv8i16_]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $q0
     %0:fpr(<8 x s16>) = COPY $q0
     %1:fpr(<8 x s16>) = COPY $q1
     %2:fpr(<8 x s16>) = G_LSHR %0, %1(<8 x s16>)
@@ -456,12 +470,13 @@ body:             |
     liveins: $d0, $d1
     ; CHECK-LABEL: name: ashr_v4i16
     ; CHECK: liveins: $d0, $d1
-    ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
-    ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
-    ; CHECK: [[NEGv4i16_:%[0-9]+]]:fpr64 = NEGv4i16 [[COPY1]]
-    ; CHECK: [[SSHLv4i16_:%[0-9]+]]:fpr64 = SSHLv4i16 [[COPY]], [[NEGv4i16_]]
-    ; CHECK: $d0 = COPY [[SSHLv4i16_]]
-    ; CHECK: RET_ReallyLR implicit $d0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
+    ; CHECK-NEXT: [[NEGv4i16_:%[0-9]+]]:fpr64 = NEGv4i16 [[COPY1]]
+    ; CHECK-NEXT: [[SSHLv4i16_:%[0-9]+]]:fpr64 = SSHLv4i16 [[COPY]], [[NEGv4i16_]]
+    ; CHECK-NEXT: $d0 = COPY [[SSHLv4i16_]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $d0
     %0:fpr(<4 x s16>) = COPY $d0
     %1:fpr(<4 x s16>) = COPY $d1
     %2:fpr(<4 x s16>) = G_ASHR %0, %1(<4 x s16>)
@@ -478,10 +493,11 @@ body:             |
     liveins: $d0, $d1
     ; CHECK-LABEL: name: vashr_v4i16_imm
     ; CHECK: liveins: $d0, $d1
-    ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
-    ; CHECK: [[SSHRv4i16_shift:%[0-9]+]]:fpr64 = SSHRv4i16_shift [[COPY]], 5
-    ; CHECK: $d0 = COPY [[SSHRv4i16_shift]]
-    ; CHECK: RET_ReallyLR implicit $d0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
+    ; CHECK-NEXT: [[SSHRv4i16_shift:%[0-9]+]]:fpr64 = SSHRv4i16_shift [[COPY]], 5
+    ; CHECK-NEXT: $d0 = COPY [[SSHRv4i16_shift]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $d0
     %0:fpr(<4 x s16>) = COPY $d0
     %1:gpr(s32) = G_CONSTANT i32 5
     %2:fpr(<4 x s16>) = G_VASHR %0, %1
@@ -498,10 +514,11 @@ body:             |
     liveins: $d0, $d1
     ; CHECK-LABEL: name: vlshr_v4i16_imm
     ; CHECK: liveins: $d0, $d1
-    ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
-    ; CHECK: [[USHRv4i16_shift:%[0-9]+]]:fpr64 = USHRv4i16_shift [[COPY]], 5
-    ; CHECK: $d0 = COPY [[USHRv4i16_shift]]
-    ; CHECK: RET_ReallyLR implicit $d0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
+    ; CHECK-NEXT: [[USHRv4i16_shift:%[0-9]+]]:fpr64 = USHRv4i16_shift [[COPY]], 5
+    ; CHECK-NEXT: $d0 = COPY [[USHRv4i16_shift]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $d0
     %0:fpr(<4 x s16>) = COPY $d0
     %1:gpr(s32) = G_CONSTANT i32 5
     %2:fpr(<4 x s16>) = G_VLSHR %0, %1
@@ -518,11 +535,12 @@ body:             |
     liveins: $q0, $q1
     ; CHECK-LABEL: name: shl_v8i16
     ; CHECK: liveins: $q0, $q1
-    ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
-    ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
-    ; CHECK: [[USHLv8i16_:%[0-9]+]]:fpr128 = USHLv8i16 [[COPY]], [[COPY1]]
-    ; CHECK: $q0 = COPY [[USHLv8i16_]]
-    ; CHECK: RET_ReallyLR implicit $q0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
+    ; CHECK-NEXT: [[USHLv8i16_:%[0-9]+]]:fpr128 = USHLv8i16 [[COPY]], [[COPY1]]
+    ; CHECK-NEXT: $q0 = COPY [[USHLv8i16_]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $q0
     %0:fpr(<8 x s16>) = COPY $q0
     %1:fpr(<8 x s16>) = COPY $q1
     %2:fpr(<8 x s16>) = G_SHL %0, %1(<8 x s16>)
@@ -539,11 +557,12 @@ body:             |
     liveins: $q0, $q1
     ; CHECK-LABEL: name: shl_v16i8
     ; CHECK: liveins: $q0, $q1
-    ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
-    ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
-    ; CHECK: [[USHLv16i8_:%[0-9]+]]:fpr128 = USHLv16i8 [[COPY]], [[COPY1]]
-    ; CHECK: $q0 = COPY [[USHLv16i8_]]
-    ; CHECK: RET_ReallyLR implicit $q0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
+    ; CHECK-NEXT: [[USHLv16i8_:%[0-9]+]]:fpr128 = USHLv16i8 [[COPY]], [[COPY1]]
+    ; CHECK-NEXT: $q0 = COPY [[USHLv16i8_]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $q0
     %0:fpr(<16 x s8>) = COPY $q0
     %1:fpr(<16 x s8>) = COPY $q1
     %2:fpr(<16 x s8>) = G_SHL %0, %1(<16 x s8>)
@@ -560,12 +579,13 @@ body:             |
     liveins: $q0, $q1
     ; CHECK-LABEL: name: lshr_v16i8
     ; CHECK: liveins: $q0, $q1
-    ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
-    ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
-    ; CHECK: [[NEGv16i8_:%[0-9]+]]:fpr128 = NEGv16i8 [[COPY1]]
-    ; CHECK: [[USHLv16i8_:%[0-9]+]]:fpr128 = USHLv16i8 [[COPY]], [[NEGv16i8_]]
-    ; CHECK: $q0 = COPY [[USHLv16i8_]]
-    ; CHECK: RET_ReallyLR implicit $q0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
+    ; CHECK-NEXT: [[NEGv16i8_:%[0-9]+]]:fpr128 = NEGv16i8 [[COPY1]]
+    ; CHECK-NEXT: [[USHLv16i8_:%[0-9]+]]:fpr128 = USHLv16i8 [[COPY]], [[NEGv16i8_]]
+    ; CHECK-NEXT: $q0 = COPY [[USHLv16i8_]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $q0
     %0:fpr(<16 x s8>) = COPY $q0
     %1:fpr(<16 x s8>) = COPY $q1
     %2:fpr(<16 x s8>) = G_LSHR %0, %1(<16 x s8>)
@@ -592,15 +612,14 @@ body:             |
   bb.1:
     liveins: $d0
 
-    ; Should still be able to select immediate forms using a G_DUP from a
-    ; constant.
 
     ; CHECK-LABEL: name: shl_v2i32_imm_dup
     ; CHECK: liveins: $d0
-    ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
-    ; CHECK: [[SHLv2i32_shift:%[0-9]+]]:fpr64 = SHLv2i32_shift [[COPY]], 24
-    ; CHECK: $d0 = COPY [[SHLv2i32_shift]]
-    ; CHECK: RET_ReallyLR implicit $d0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
+    ; CHECK-NEXT: [[SHLv2i32_shift:%[0-9]+]]:fpr64 = SHLv2i32_shift [[COPY]], 24
+    ; CHECK-NEXT: $d0 = COPY [[SHLv2i32_shift]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $d0
     %0:fpr(<2 x s32>) = COPY $d0
     %2:gpr(s32) = G_CONSTANT i32 24
     %1:fpr(<2 x s32>) = G_DUP %2(s32)
diff --git a/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll b/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll
index 37c6202cda4a1f0..51f8c2ceceecb0a 100644
--- a/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll
@@ -244,12 +244,11 @@ define <4 x i64> @zext_v4i8_to_v4i64(<4 x i8> %v0) nounwind {
 ; CHECK-GI-LABEL: zext_v4i8_to_v4i64:
 ; CHECK-GI:       // %bb.0:
 ; CHECK-GI-NEXT:    ushll.4s v0, v0, #0
-; CHECK-GI-NEXT:    adrp x8, .LCPI14_0
-; CHECK-GI-NEXT:    ldr q3, [x8, :lo12:.LCPI14_0]
-; CHECK-GI-NEXT:    ushll.2d v1, v0, #0
-; CHECK-GI-NEXT:    ushll2.2d v2, v0, #0
-; CHECK-GI-NEXT:    and.16b v0, v1, v3
-; CHECK-GI-NEXT:    and.16b v1, v2, v3
+; CHECK-GI-NEXT:    movi.2d v1, #0x000000000000ff
+; CHECK-GI-NEXT:    ushll.2d v2, v0, #0
+; CHECK-GI-NEXT:    ushll2.2d v3, v0, #0
+; CHECK-GI-NEXT:    and.16b v0, v2, v1
+; CHECK-GI-NEXT:    and.16b v1, v3, v1
 ; CHECK-GI-NEXT:    ret
   %r = zext <4 x i8> %v0 to <4 x i64>
   ret <4 x i64> %r
diff --git a/llvm/test/CodeGen/AArch64/bool-ext-inc.ll b/llvm/test/CodeGen/AArch64/bool-ext-inc.ll
index 8e6a70986c302fe..65c2cd85b71a93f 100644
--- a/llvm/test/CodeGen/AArch64/bool-ext-inc.ll
+++ b/llvm/test/CodeGen/AArch64/bool-ext-inc.ll
@@ -29,10 +29,9 @@ define <4 x i32> @zextbool_sub_vector(<4 x i32> %c1, <4 x i32> %c2, <4 x i32> %x
 ;
 ; GISEL-LABEL: zextbool_sub_vector:
 ; GISEL:       // %bb.0:
-; GISEL-NEXT:    adrp x8, .LCPI1_0
+; GISEL-NEXT:    movi v3.4s, #1
 ; GISEL-NEXT:    cmeq v0.4s, v0.4s, v1.4s
-; GISEL-NEXT:    ldr q1, [x8, :lo12:.LCPI1_0]
-; GISEL-NEXT:    and v0.16b, v0.16b, v1.16b
+; GISEL-NEXT:    and v0.16b, v0.16b, v3.16b
 ; GISEL-NEXT:    sub v0.4s, v2.4s, v0.4s
 ; GISEL-NEXT:    ret
   %c = icmp eq <4 x i32> %c1, %c2
diff --git a/llvm/test/CodeGen/AArch64/mul_pow2.ll b/llvm/test/CodeGen/AArch64/mul_pow2.ll
index 33c766f382e6b62..e16ee40c8dcb0d6 100644
--- a/llvm/test/CodeGen/AArch64/mul_pow2.ll
+++ b/llvm/test/CodeGen/AArch64/mul_pow2.ll
@@ -858,12 +858,11 @@ define <4 x i32> @muladd_demand_commute(<4 x i32> %x, <4 x i32> %y) {
 ;
 ; GISEL-LABEL: muladd_demand_commute:
 ; GISEL:       // %bb.0:
-; GISEL-NEXT:    adrp x8, .LCPI49_1
-; GISEL-NEXT:    ldr q2, [x8, :lo12:.LCPI49_1]
 ; GISEL-NEXT:    adrp x8, .LCPI49_0
+; GISEL-NEXT:    movi v3.4s, #1, msl #16
+; GISEL-NEXT:    ldr q2, [x8, :lo12:.LCPI49_0]
 ; GISEL-NEXT:    mla v1.4s, v0.4s, v2.4s
-; GISEL-NEXT:    ldr q0, [x8, :lo12:.LCPI49_0]
-; GISEL-NEXT:    and v0.16b, v1.16b, v0.16b
+; GISEL-NEXT:    and v0.16b, v1.16b, v3.16b
 ; GISEL-NEXT:    ret
   %m = mul <4 x i32> %x, <i32 131008, i32 131008, i32 131008, i32 131008>
   %a = add <4 x i32> %m, %y
diff --git a/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll b/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll
index 4f13b78d6c169a1..f17b9724aadba37 100644
--- a/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll
+++ b/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll
@@ -144,8 +144,7 @@ define <2 x i32> @orrimm2s_lsl0(<2 x i32> %a) {
 ;
 ; CHECK-GI-LABEL: orrimm2s_lsl0:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI12_0
-; CHECK-GI-NEXT:    ldr d1, [x8, :lo12:.LCPI12_0]
+; CHECK-GI-NEXT:    movi d1, #0x0000ff000000ff
 ; CHECK-GI-NEXT:    orr v0.8b, v0.8b, v1.8b
 ; CHECK-GI-NEXT:    ret
 	%tmp1 = or <2 x i32> %a, < i32 255, i32 255>
@@ -160,8 +159,7 @@ define <2 x i32> @orrimm2s_lsl8(<2 x i32> %a) {
 ;
 ; CHECK-GI-LABEL: orrimm2s_lsl8:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI13_0
-; CHECK-GI-NEXT:    ldr d1, [x8, :lo12:.LCPI13_0]
+; CHECK-GI-NEXT:    movi d1, #0x00ff000000ff00
 ; CHECK-GI-NEXT:    orr v0.8b, v0.8b, v1.8b
 ; CHECK-GI-NEXT:    ret
 	%tmp1 = or <2 x i32> %a, < i32 65280, i32 65280>
@@ -176,8 +174,7 @@ define <2 x i32> @orrimm2s_lsl16(<2 x i32> %a) {
 ;
 ; CHECK-GI-LABEL: orrimm2s_lsl16:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI14_0
-; CHECK-GI-NEXT:    ldr d1, [x8, :lo12:.LCPI14_0]
+; CHECK-GI-NEXT:    movi d1, #0xff000000ff0000
 ; CHECK-GI-NEXT:    orr v0.8b, v0.8b, v1.8b
 ; CHECK-GI-NEXT:    ret
 	%tmp1 = or <2 x i32> %a, < i32 16711680, i32 16711680>
@@ -192,8 +189,7 @@ define <2 x i32> @orrimm2s_lsl24(<2 x i32> %a) {
 ;
 ; CHECK-GI-LABEL: orrimm2s_lsl24:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI15_0
-; CHECK-GI-NEXT:    ldr d1, [x8, :lo12:.LCPI15_0]
+; CHECK-GI-NEXT:    movi d1, #0xff000000ff000000
 ; CHECK-GI-NEXT:    orr v0.8b, v0.8b, v1.8b
 ; CHECK-GI-NEXT:    ret
 	%tmp1 = or <2 x i32> %a, < i32 4278190080, i32 4278190080>
@@ -208,8 +204,7 @@ define <4 x i32> @orrimm4s_lsl0(<4 x i32> %a) {
 ;
 ; CHECK-GI-LABEL: orrimm4s_lsl0:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI16_0
-; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI16_0]
+; CHECK-GI-NEXT:    movi v1.2d, #0x0000ff000000ff
 ; CHECK-GI-NEXT:    orr v0.16b, v0.16b, v1.16b
 ; CHECK-GI-NEXT:    ret
 	%tmp1 = or <4 x i32> %a, < i32 255, i32 255, i32 255, i32 255>
@@ -224,8 +219,7 @@ define <4 x i32> @orrimm4s_lsl8(<4 x i32> %a) {
 ;
 ; CHECK-GI-LABEL: orrimm4s_lsl8:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI17_0
-; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI17_0]
+; CHECK-GI-NEXT:    movi v1.2d, #0x00ff000000ff00
 ; CHECK-GI-NEXT:    orr v0.16b, v0.16b, v1.16b
 ; CHECK-GI-NEXT:    ret
 	%tmp1 = or <4 x i32> %a, < i32 65280, i32 65280, i32 65280, i32 65280>
@@ -240,8 +234,7 @@ define <4 x i32> @orrimm4s_lsl16(<4 x i32> %a) {
 ;
 ; CHECK-GI-LABEL: orrimm4s_lsl16:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI18_0
-; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI18_0]
+; CHECK-GI-NEXT:    movi v1.2d, #0xff000000ff0000
 ; CHECK-GI-NEXT:    orr v0.16b, v0.16b, v1.16b
 ; CHECK-GI-NEXT:    ret
 	%tmp1 = or <4 x i32> %a, < i32 16711680, i32 16711680, i32 16711680, i32 16711680>
@@ -256,8 +249,7 @@ define <4 x i32> @orrimm4s_lsl24(<4 x i32> %a) {
 ;
 ; CHECK-GI-LABEL: orrimm4s_lsl24:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI19_0
-; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI19_0]
+; CHECK-GI-NEXT:    movi v1.2d, #0xff000000ff000000
 ; CHECK-GI-NEXT:    orr v0.16b, v0.16b, v1.16b
 ; CHECK-GI-NEXT:    ret
 	%tmp1 = or <4 x i32> %a, < i32 4278190080, i32 4278190080, i32 4278190080, i32 4278190080>
@@ -272,8 +264,7 @@ define <4 x i16> @orrimm4h_lsl0(<4 x i16> %a) {
 ;
 ; CHECK-GI-LABEL: orrimm4h_lsl0:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI20_0
-; CHECK-GI-NEXT:    ldr d1, [x8, :lo12:.LCPI20_0]
+; CHECK-GI-NEXT:    movi d1, #0xff00ff00ff00ff
 ; CHECK-GI-NEXT:    orr v0.8b, v0.8b, v1.8b
 ; CHECK-GI-NEXT:    ret
 	%tmp1 = or <4 x i16> %a, < i16 255, i16 255, i16 255, i16 255 >
@@ -288,8 +279,7 @@ define <4 x i16> @orrimm4h_lsl8(<4 x i16> %a) {
 ;
 ; CHECK-GI-LABEL: orrimm4h_lsl8:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI21_0
-; CHECK-GI-NEXT:    ldr d1, [x8, :lo12:.LCPI21_0]
+; CHECK-GI-NEXT:    movi d1, #0xff00ff00ff00ff00
 ; CHECK-GI-NEXT:    orr v0.8b, v0.8b, v1.8b
 ; CHECK-GI-NEXT:    ret
 	%tmp1 = or <4 x i16> %a, < i16 65280, i16 65280, i16 65280, i16 65280 >
@@ -304,8 +294,7 @@ define <8 x i16> @orrimm8h_lsl0(<8 x i16> %a) {
 ;
 ; CHECK-GI-LABEL: orrimm8h_lsl0:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI22_0
-; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI22_0]
+; CHECK-GI-NEXT:    movi v1.2d, #0xff00ff00ff00ff
 ; CHECK-GI-NEXT:    orr v0.16b, v0.16b, v1.16b
 ; CHECK-GI-NEXT:    ret
 	%tmp1 = or <8 x i16> %a, < i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255 >
@@ -320,8 +309,7 @@ define <8 x i16> @orrimm8h_lsl8(<8 x i16> %a) {
 ;
 ; CHECK-GI-LABEL: orrimm8h_lsl8:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI23_0
-; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI23_0]
+; CHECK-GI-NEXT:    movi v1.2d, #0xff00ff00ff00ff00
 ; CHECK-GI-NEXT:    orr v0.16b, v0.16b, v1.16b
 ; CHECK-GI-NEXT:    ret
 	%tmp1 = or <8 x i16> %a, < i16 65280, i16 65280, i16 65280, i16 65280, i16 65280, i16 65280, i16 65280, i16 65280 >
@@ -336,8 +324,7 @@ define <2 x i32> @bicimm2s_lsl0(<2 x i32> %a) {
 ;
 ; CHECK-GI-LABEL: bicimm2s_lsl0:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI24_0
-; CHECK-GI-NEXT:    ldr d1, [x8, :lo12:.LCPI24_0]
+; CHECK-GI-NEXT:    mvni v1.2s, #16
 ; CHECK-GI-NEXT:    and v0.8b, v0.8b, v1.8b
 ; CHECK-GI-NEXT:    ret
 	%tmp1 = and <2 x i32> %a, < i32 4294967279, i32 4294967279 >
@@ -352,8 +339,7 @@ define <2 x i32> @bicimm2s_lsl8(<2 x i32> %a) {
 ;
 ; CHECK-GI-LABEL: bicimm2s_lsl8:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI25_0
-; CHECK-GI-NEXT:    ldr d1, [x8, :lo12:.LCPI25_0]
+; CHECK-GI-NEXT:    mvni v1.2s, #16, lsl #8
 ; CHECK-GI-NEXT:    and v0.8b, v0.8b, v1.8b
 ; CHECK-GI-NEXT:    ret
 	%tmp1 = and <2 x i32> %a, < i32 4294963199, i32  4294963199 >
@@ -368,8 +354,7 @@ define <2 x i32> @bicimm2s_lsl16(<2 x i32> %a) {
 ;
 ; CHECK-GI-LABEL: bicimm2s_lsl16:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI26_0
-; CHECK-GI-NEXT:    ldr d1, [x8, :lo12:.LCPI26_0]
+; CHECK-GI-NEXT:    mvni v1.2s, #16, lsl #16
 ; CHECK-GI-NEXT:    and v0.8b, v0.8b, v1.8b
 ; CHECK-GI-NEXT:    ret
 	%tmp1 = and <2 x i32> %a, < i32 4293918719, i32 4293918719 >
@@ -384,8 +369,7 @@ define <2 x i32> @bicimm2s_lsl124(<2 x i32> %a) {
 ;
 ; CHECK-GI-LABEL: bicimm2s_lsl124:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI27_0
-; CHECK-GI-NEXT:    ldr d1, [x8, :lo12:.LCPI27_0]
+; CHECK-GI-NEXT:    mvni v1.2s, #16, lsl #24
 ; CHECK-GI-NEXT:    and v0.8b, v0.8b, v1.8b
 ; CHECK-GI-NEXT:    ret
 	%tmp1 = and <2 x i32> %a, < i32 4026531839, i32  4026531839>
@@ -400,8 +384,7 @@ define <4 x i32> @bicimm4s_lsl0(<4 x i32> %a) {
 ;
 ; CHECK-GI-LABEL: bicimm4s_lsl0:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI28_0
-; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI28_0]
+; CHECK-GI-NEXT:    mvni v1.4s, #16
 ; CHECK-GI-NEXT:    and v0.16b, v0.16b, v1.16b
 ; CHECK-GI-NEXT:    ret
 	%tmp1 = and <4 x i32> %a, < i32 4294967279, i32 4294967279, i32 4294967279, i32 4294967279 >
@@ -416,8 +399,7 @@ define <4 x i32> @bicimm4s_lsl8(<4 x i32> %a) {
 ;
 ; CHECK-GI-LABEL: bicimm4s_lsl8:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI29_0
-; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI29_0]
+; CHECK-GI-NEXT:    mvni v1.4s, #16, lsl #8
 ; CHECK-GI-NEXT:    and v0.16b, v0.16b, v1.16b
 ; CHECK-GI-NEXT:    ret
 	%tmp1 = and <4 x i32> %a, < i32 4294963199, i32  4294963199, i32  4294963199, i32  4294963199 >
@@ -432,8 +414,7 @@ define <4 x i32> @bicimm4s_lsl16(<4 x i32> %a) {
 ;
 ; CHECK-GI-LABEL: bicimm4s_lsl16:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI30_0
-; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI30_0]
+; CHECK-GI-NEXT:    mvni v1.4s, #16, lsl #16
 ; CHECK-GI-NEXT:    and v0.16b, v0.16b, v1.16b
 ; CHECK-GI-NEXT:    ret
 	%tmp1 = and <4 x i32> %a, < i32 4293918719, i32 4293918719, i32 4293918719, i32 4293918719 >
@@ -448,8 +429,7 @@ define <4 x i32> @bicimm4s_lsl124(<4 x i32> %a) {
 ;
 ; CHECK-GI-LABEL: bicimm4s_lsl124:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI31_0
-; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI31_0]
+; CHECK-GI-NEXT:    mvni v1.4s, #16, lsl #24
 ; CHECK-GI-NEXT:    and v0.16b, v0.16b, v1.16b
 ; CHECK-GI-NEXT:    ret
 	%tmp1 = and <4 x i32> %a, < i32 4026531839, i32  4026531839, i32  4026531839, i32  4026531839>
@@ -464,8 +444,7 @@ define <4 x i16> @bicimm4h_lsl0_a(<4 x i16> %a) {
 ;
 ; CHECK-GI-LABEL: bicimm4h_lsl0_a:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI32_0
-; CHECK-GI-NEXT:    ldr d1, [x8, :lo12:.LCPI32_0]
+; CHECK-GI-NEXT:    mvni v1.4h, #16
 ; CHECK-GI-NEXT:    and v0.8b, v0.8b, v1.8b
 ; CHECK-GI-NEXT:    ret
 	%tmp1 = and <4 x i16> %a, < i16 4294967279, i16  4294967279, i16  4294967279, i16  4294967279 >
@@ -480,8 +459,7 @@ define <4 x i16> @bicimm4h_lsl0_b(<4 x i16> %a) {
 ;
 ; CHECK-GI-LABEL: bicimm4h_lsl0_b:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI33_0
-; CHECK-GI-NEXT:    ldr d1, [x8, :lo12:.LCPI33_0]
+; CHECK-GI-NEXT:    movi d1, #0xff00ff00ff00ff00
 ; CHECK-GI-NEXT:    and v0.8b, v0.8b, v1.8b
 ; CHECK-GI-NEXT:    ret
 	%tmp1 = and <4 x i16> %a, < i16 65280, i16  65280, i16  65280, i16 65280 >
@@ -496,8 +474,7 @@ define <4 x i16> @bicimm4h_lsl8_a(<4 x i16> %a) {
 ;
 ; CHECK-GI-LABEL: bicimm4h_lsl8_a:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI34_0
-; CHECK-GI-NEXT:    ldr d1, [x8, :lo12:.LCPI34_0]
+; CHECK-GI-NEXT:    mvni v1.4h, #16, lsl #8
 ; CHECK-GI-NEXT:    and v0.8b, v0.8b, v1.8b
 ; CHECK-GI-NEXT:    ret
 	%tmp1 = and <4 x i16> %a, < i16 4294963199, i16  4294963199, i16  4294963199, i16  4294963199>
@@ -512,8 +489,7 @@ define <4 x i16> @bicimm4h_lsl8_b(<4 x i16> %a) {
 ;
 ; CHECK-GI-LABEL: bicimm4h_lsl8_b:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI35_0
-; CHECK-GI-NEXT:    ldr d1, [x8, :lo12:.LCPI35_0]
+; CHECK-GI-NEXT:    movi d1, #0xff00ff00ff00ff
 ; CHECK-GI-NEXT:    and v0.8b, v0.8b, v1.8b
 ; CHECK-GI-NEXT:    ret
 	%tmp1 = and <4 x i16> %a, < i16 255, i16 255, i16 255, i16 255>
@@ -528,8 +504,7 @@ define <8 x i16> @bicimm8h_lsl0_a(<8 x i16> %a) {
 ;
 ; CHECK-GI-LABEL: bicimm8h_lsl0_a:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI36_0
-; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI36_0]
+; CHECK-GI-NEXT:    mvni v1.8h, #16
 ; CHECK-GI-NEXT:    and v0.16b, v0.16b, v1.16b
 ; CHECK-GI-NEXT:    ret
 	%tmp1 = and <8 x i16> %a, < i16 4294967279, i16  4294967279, i16  4294967279, i16  4294967279,
@@ -545,8 +520,7 @@ define <8 x i16> @bicimm8h_lsl0_b(<8 x i16> %a) {
 ;
 ; CHECK-GI-LABEL: bicimm8h_lsl0_b:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI37_0
-; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI37_0]
+; CHECK-GI-NEXT:    movi v1.2d, #0xff00ff00ff00ff00
 ; CHECK-GI-NEXT:    and v0.16b, v0.16b, v1.16b
 ; CHECK-GI-NEXT:    ret
 	%tmp1 = and <8 x i16> %a, < i16 65280, i16  65280, i16  65280, i16 65280, i16 65280, i16  65280, i16  65280, i16 65280 >
@@ -561,8 +535,7 @@ define <8 x i16> @bicimm8h_lsl8_a(<8 x i16> %a) {
 ;
 ; CHECK-GI-LABEL: bicimm8h_lsl8_a:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI38_0
-; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI38_0]
+; CHECK-GI-NEXT:    mvni v1.8h, #16, lsl #8
 ; CHECK-GI-NEXT:    and v0.16b, v0.16b, v1.16b
 ; CHECK-GI-NEXT:    ret
 	%tmp1 = and <8 x i16> %a, < i16 4294963199, i16  4294963199, i16  4294963199, i16  4294963199,
@@ -578,8 +551,7 @@ define <8 x i16> @bicimm8h_lsl8_b(<8 x i16> %a) {
 ;
 ; CHECK-GI-LABEL: bicimm8h_lsl8_b:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI39_0
-; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI39_0]
+; CHECK-GI-NEXT:    movi v1.2d, #0xff00ff00ff00ff
 ; CHECK-GI-NEXT:    and v0.16b, v0.16b, v1.16b
 ; CHECK-GI-NEXT:    ret
 	%tmp1 = and <8 x i16> %a, < i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
@@ -1990,8 +1962,7 @@ define <2 x i64> @and64imm4s_lsl0(<2 x i64> %a) {
 ;
 ; CHECK-GI-LABEL: and64imm4s_lsl0:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI128_0
-; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI128_0]
+; CHECK-GI-NEXT:    movi v1.2d, #0xffffff00ffffff00
 ; CHECK-GI-NEXT:    and v0.16b, v0.16b, v1.16b
 ; CHECK-GI-NEXT:    ret
 	%tmp1 = and <2 x i64> %a, < i64 -1095216660736, i64 -1095216660736>
@@ -2006,8 +1977,7 @@ define <2 x i64> @and64imm4s_lsl8(<2 x i64> %a) {
 ;
 ; CHECK-GI-LABEL: and64imm4s_lsl8:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI129_0
-; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI129_0]
+; CHECK-GI-NEXT:    movi v1.2d, #0xffff00ffffff00ff
 ; CHECK-GI-NEXT:    and v0.16b, v0.16b, v1.16b
 ; CHECK-GI-NEXT:    ret
 	%tmp1 = and <2 x i64> %a, < i64 -280375465148161, i64 -280375465148161>
@@ -2022,8 +1992,7 @@ define <2 x i64> @and64imm4s_lsl16(<2 x i64> %a) {
 ;
 ; CHECK-GI-LABEL: and64imm4s_lsl16:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI130_0
-; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI130_0]
+; CHECK-GI-NEXT:    movi v1.2d, #0xff00ffffff00ffff
 ; CHECK-GI-NEXT:    and v0.16b, v0.16b, v1.16b
 ; CHECK-GI-NEXT:    ret
 	%tmp1 = and <2 x i64> %a, < i64 -71776119077928961, i64 -71776119077928961>
@@ -2038,8 +2007,7 @@ define <2 x i64> @and64imm4s_lsl24(<2 x i64> %a) {
 ;
 ; CHECK-GI-LABEL: and64imm4s_lsl24:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI131_0
-; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI131_0]
+; CHECK-GI-NEXT:    mvni v1.4s, #254, lsl #24
 ; CHECK-GI-NEXT:    and v0.16b, v0.16b, v1.16b
 ; CHECK-GI-NEXT:    ret
 	%tmp1 = and <2 x i64> %a, < i64 144115183814443007, i64 144115183814443007>
@@ -2086,8 +2054,7 @@ define <2 x i32> @and16imm4h_lsl0(<2 x i32> %a) {
 ;
 ; CHECK-GI-LABEL: and16imm4h_lsl0:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI134_0
-; CHECK-GI-NEXT:    ldr d1, [x8, :lo12:.LCPI134_0]
+; CHECK-GI-NEXT:    movi d1, #0xff00ff00ff00ff00
 ; CHECK-GI-NEXT:    and v0.8b, v0.8b, v1.8b
 ; CHECK-GI-NEXT:    ret
 	%tmp1 = and <2 x i32> %a, < i32 4278255360, i32 4278255360>
@@ -2102,8 +2069,7 @@ define <2 x i32> @and16imm4h_lsl8(<2 x i32> %a) {
 ;
 ; CHECK-GI-LABEL: and16imm4h_lsl8:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI135_0
-; CHECK-GI-NEXT:    ldr d1, [x8, :lo12:.LCPI135_0]
+; CHECK-GI-NEXT:    movi d1, #0xff00ff00ff00ff
 ; CHECK-GI-NEXT:    and v0.8b, v0.8b, v1.8b
 ; CHECK-GI-NEXT:    ret
 	%tmp1 = and <2 x i32> %a, < i32 16711935, i32 16711935>
@@ -2182,8 +2148,7 @@ define <4 x i32> @and16imm8h_lsl0(<4 x i32> %a) {
 ;
 ; CHECK-GI-LABEL: and16imm8h_lsl0:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI140_0
-; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI140_0]
+; CHECK-GI-NEXT:    movi v1.2d, #0xff00ff00ff00ff00
 ; CHECK-GI-NEXT:    and v0.16b, v0.16b, v1.16b
 ; CHECK-GI-NEXT:    ret
 	%tmp1 = and <4 x i32> %a, < i32 4278255360, i32 4278255360, i32 4278255360, i32 4278255360>
@@ -2198,8 +2163,7 @@ define <4 x i32> @and16imm8h_lsl8(<4 x i32> %a) {
 ;
 ; CHECK-GI-LABEL: and16imm8h_lsl8:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI141_0
-; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI141_0]
+; CHECK-GI-NEXT:    movi v1.2d, #0xff00ff00ff00ff
 ; CHECK-GI-NEXT:    and v0.16b, v0.16b, v1.16b
 ; CHECK-GI-NEXT:    ret
 	%tmp1 = and <4 x i32> %a, < i32 16711935, i32 16711935, i32 16711935, i32 16711935>
@@ -2214,8 +2178,7 @@ define <2 x i64> @and64imm8h_lsl0(<2 x i64> %a) {
 ;
 ; CHECK-GI-LABEL: and64imm8h_lsl0:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI142_0
-; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI142_0]
+; CHECK-GI-NEXT:    movi v1.2d, #0xff00ff00ff00ff00
 ; CHECK-GI-NEXT:    and v0.16b, v0.16b, v1.16b
 ; CHECK-GI-NEXT:    ret
 	%tmp1 = and <2 x i64> %a, < i64 -71777214294589696, i64 -71777214294589696>
@@ -2230,8 +2193,7 @@ define <2 x i64> @and64imm8h_lsl8(<2 x i64> %a) {
 ;
 ; CHECK-GI-LABEL: and64imm8h_lsl8:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI143_0
-; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI143_0]
+; CHECK-GI-NEXT:    movi v1.2d, #0xff00ff00ff00ff
 ; CHECK-GI-NEXT:    and v0.16b, v0.16b, v1.16b
 ; CHECK-GI-NEXT:    ret
 	%tmp1 = and <2 x i64> %a, < i64 71777214294589695, i64 71777214294589695>
@@ -2247,9 +2209,8 @@ define <8 x i16> @bic_shifted_knownbits(<8 x i16> %v) {
 ;
 ; CHECK-GI-LABEL: bic_shifted_knownbits:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    adrp x8, .LCPI144_0
+; CHECK-GI-NEXT:    movi v1.8h, #1
 ; CHECK-GI-NEXT:    ushr v0.8h, v0.8h, #9
-; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI144_0]
 ; CHECK-GI-NEXT:    and v0.16b, v0.16b, v1.16b
 ; CHECK-GI-NEXT:    ret
 entry:
@@ -2293,8 +2254,7 @@ define <8 x i32> @bic_shifted_knownbits3(<8 x i16> %v) {
 ;
 ; CHECK-GI-LABEL: bic_shifted_knownbits3:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI146_0
-; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI146_0]
+; CHECK-GI-NEXT:    movi v1.2d, #0xff00ff00ff00ff
 ; CHECK-GI-NEXT:    and v1.16b, v0.16b, v1.16b
 ; CHECK-GI-NEXT:    ushll v0.4s, v1.4h, #0
 ; CHECK-GI-NEXT:    ushll2 v1.4s, v1.8h, #0
@@ -2316,10 +2276,9 @@ define <8 x i32> @bic_shifted_knownbits4(<8 x i32> %v) {
 ;
 ; CHECK-GI-LABEL: bic_shifted_knownbits4:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    adrp x8, .LCPI147_0
+; CHECK-GI-NEXT:    movi v2.2d, #0xffff0000ffff0000
 ; CHECK-GI-NEXT:    shl v0.4s, v0.4s, #8
 ; CHECK-GI-NEXT:    shl v1.4s, v1.4s, #8
-; CHECK-GI-NEXT:    ldr q2, [x8, :lo12:.LCPI147_0]
 ; CHECK-GI-NEXT:    and v0.16b, v0.16b, v2.16b
 ; CHECK-GI-NEXT:    and v1.16b, v1.16b, v2.16b
 ; CHECK-GI-NEXT:    ret
@@ -2657,8 +2616,7 @@ define <2 x i64> @orr64imm4s_lsl0(<2 x i64> %a) {
 ;
 ; CHECK-GI-LABEL: orr64imm4s_lsl0:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI168_0
-; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI168_0]
+; CHECK-GI-NEXT:    movi v1.2d, #0x0000ff000000ff
 ; CHECK-GI-NEXT:    orr v0.16b, v0.16b, v1.16b
 ; CHECK-GI-NEXT:    ret
 	%tmp1 = or <2 x i64> %a, < i64 1095216660735, i64 1095216660735>
@@ -2673,8 +2631,7 @@ define <2 x i64> @orr64imm4s_lsl8(<2 x i64> %a) {
 ;
 ; CHECK-GI-LABEL: orr64imm4s_lsl8:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI169_0
-; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI169_0]
+; CHECK-GI-NEXT:    movi v1.2d, #0x00ff000000ff00
 ; CHECK-GI-NEXT:    orr v0.16b, v0.16b, v1.16b
 ; CHECK-GI-NEXT:    ret
 	%tmp1 = or <2 x i64> %a, < i64 280375465148160, i64 280375465148160>
@@ -2689,8 +2646,7 @@ define <2 x i64> @orr64imm4s_lsl16(<2 x i64> %a) {
 ;
 ; CHECK-GI-LABEL: orr64imm4s_lsl16:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI170_0
-; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI170_0]
+; CHECK-GI-NEXT:    movi v1.2d, #0xff000000ff0000
 ; CHECK-GI-NEXT:    orr v0.16b, v0.16b, v1.16b
 ; CHECK-GI-NEXT:    ret
 	%tmp1 = or <2 x i64> %a, < i64 71776119077928960, i64 71776119077928960>
@@ -2705,8 +2661,7 @@ define <2 x i64> @orr64imm4s_lsl24(<2 x i64> %a) {
 ;
 ; CHECK-GI-LABEL: orr64imm4s_lsl24:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI171_0
-; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI171_0]
+; CHECK-GI-NEXT:    movi v1.2d, #0xff000000ff000000
 ; CHECK-GI-NEXT:    orr v0.16b, v0.16b, v1.16b
 ; CHECK-GI-NEXT:    ret
 	%tmp1 = or <2 x i64> %a, < i64 -72057589759737856, i64 -72057589759737856>
@@ -2753,8 +2708,7 @@ define <2 x i32> @orr16imm4h_lsl0(<2 x i32> %a) {
 ;
 ; CHECK-GI-LABEL: orr16imm4h_lsl0:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI174_0
-; CHECK-GI-NEXT:    ldr d1, [x8, :lo12:.LCPI174_0]
+; CHECK-GI-NEXT:    movi d1, #0xff00ff00ff00ff
 ; CHECK-GI-NEXT:    orr v0.8b, v0.8b, v1.8b
 ; CHECK-GI-NEXT:    ret
 	%tmp1 = or <2 x i32> %a, < i32 16711935, i32 16711935>
@@ -2769,8 +2723,7 @@ define <2 x i32> @orr16imm4h_lsl8(<2 x i32> %a) {
 ;
 ; CHECK-GI-LABEL: orr16imm4h_lsl8:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI175_0
-; CHECK-GI-NEXT:    ldr d1, [x8, :lo12:.LCPI175_0]
+; CHECK-GI-NEXT:    movi d1, #0xff00ff00ff00ff00
 ; CHECK-GI-NEXT:    orr v0.8b, v0.8b, v1.8b
 ; CHECK-GI-NEXT:    ret
 	%tmp1 = or <2 x i32> %a, < i32 4278255360, i32 4278255360>
@@ -2849,8 +2802,7 @@ define <4 x i32> @orr16imm8h_lsl0(<4 x i32> %a) {
 ;
 ; CHECK-GI-LABEL: orr16imm8h_lsl0:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI180_0
-; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI180_0]
+; CHECK-GI-NEXT:    movi v1.2d, #0xff00ff00ff00ff
 ; CHECK-GI-NEXT:    orr v0.16b, v0.16b, v1.16b
 ; CHECK-GI-NEXT:    ret
 	%tmp1 = or <4 x i32> %a, < i32 16711935, i32 16711935, i32 16711935, i32 16711935>
@@ -2865,8 +2817,7 @@ define <4 x i32> @orr16imm8h_lsl8(<4 x i32> %a) {
 ;
 ; CHECK-GI-LABEL: orr16imm8h_lsl8:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI181_0
-; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI181_0]
+; CHECK-GI-NEXT:    movi v1.2d, #0xff00ff00ff00ff00
 ; CHECK-GI-NEXT:    orr v0.16b, v0.16b, v1.16b
 ; CHECK-GI-NEXT:    ret
 	%tmp1 = or <4 x i32> %a, < i32 4278255360, i32 4278255360, i32 4278255360, i32 4278255360>
@@ -2881,8 +2832,7 @@ define <2 x i64> @orr64imm8h_lsl0(<2 x i64> %a) {
 ;
 ; CHECK-GI-LABEL: orr64imm8h_lsl0:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI182_0
-; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI182_0]
+; CHECK-GI-NEXT:    movi v1.2d, #0xff00ff00ff00ff
 ; CHECK-GI-NEXT:    orr v0.16b, v0.16b, v1.16b
 ; CHECK-GI-NEXT:    ret
 	%tmp1 = or <2 x i64> %a, < i64 71777214294589695, i64 71777214294589695>
@@ -2897,8 +2847,7 @@ define <2 x i64> @orr64imm8h_lsl8(<2 x i64> %a) {
 ;
 ; CHECK-GI-LABEL: orr64imm8h_lsl8:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI183_0
-; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI183_0]
+; CHECK-GI-NEXT:    movi v1.2d, #0xff00ff00ff00ff00
 ; CHECK-GI-NEXT:    orr v0.16b, v0.16b, v1.16b
 ; CHECK-GI-NEXT:    ret
 	%tmp1 = or <2 x i64> %a, < i64 -71777214294589696, i64 -71777214294589696>
diff --git a/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll b/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll
index 1919a42e0a2f8ce..0abdda5954f570f 100644
--- a/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll
+++ b/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll
@@ -1667,108 +1667,66 @@ define <2 x i64> @cmneqz2xi64(<2 x i64> %A) {
 }
 
 define <8 x i8> @cmhsz8xi8(<8 x i8> %A) {
-; CHECK-SD-LABEL: cmhsz8xi8:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    movi v1.8b, #2
-; CHECK-SD-NEXT:    cmhs v0.8b, v0.8b, v1.8b
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: cmhsz8xi8:
-; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI126_0
-; CHECK-GI-NEXT:    ldr d1, [x8, :lo12:.LCPI126_0]
-; CHECK-GI-NEXT:    cmhs v0.8b, v0.8b, v1.8b
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: cmhsz8xi8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi v1.8b, #2
+; CHECK-NEXT:    cmhs v0.8b, v0.8b, v1.8b
+; CHECK-NEXT:    ret
   %tmp3 = icmp uge <8 x i8> %A, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
   %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
   ret <8 x i8> %tmp4
 }
 
 define <16 x i8> @cmhsz16xi8(<16 x i8> %A) {
-; CHECK-SD-LABEL: cmhsz16xi8:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    movi v1.16b, #2
-; CHECK-SD-NEXT:    cmhs v0.16b, v0.16b, v1.16b
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: cmhsz16xi8:
-; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI127_0
-; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI127_0]
-; CHECK-GI-NEXT:    cmhs v0.16b, v0.16b, v1.16b
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: cmhsz16xi8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi v1.16b, #2
+; CHECK-NEXT:    cmhs v0.16b, v0.16b, v1.16b
+; CHECK-NEXT:    ret
   %tmp3 = icmp uge <16 x i8> %A, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
   %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
   ret <16 x i8> %tmp4
 }
 
 define <4 x i16> @cmhsz4xi16(<4 x i16> %A) {
-; CHECK-SD-LABEL: cmhsz4xi16:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    movi v1.4h, #2
-; CHECK-SD-NEXT:    cmhs v0.4h, v0.4h, v1.4h
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: cmhsz4xi16:
-; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI128_0
-; CHECK-GI-NEXT:    ldr d1, [x8, :lo12:.LCPI128_0]
-; CHECK-GI-NEXT:    cmhs v0.4h, v0.4h, v1.4h
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: cmhsz4xi16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi v1.4h, #2
+; CHECK-NEXT:    cmhs v0.4h, v0.4h, v1.4h
+; CHECK-NEXT:    ret
   %tmp3 = icmp uge <4 x i16> %A, <i16 2, i16 2, i16 2, i16 2>
   %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
   ret <4 x i16> %tmp4
 }
 
 define <8 x i16> @cmhsz8xi16(<8 x i16> %A) {
-; CHECK-SD-LABEL: cmhsz8xi16:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    movi v1.8h, #2
-; CHECK-SD-NEXT:    cmhs v0.8h, v0.8h, v1.8h
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: cmhsz8xi16:
-; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI129_0
-; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI129_0]
-; CHECK-GI-NEXT:    cmhs v0.8h, v0.8h, v1.8h
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: cmhsz8xi16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi v1.8h, #2
+; CHECK-NEXT:    cmhs v0.8h, v0.8h, v1.8h
+; CHECK-NEXT:    ret
   %tmp3 = icmp uge <8 x i16> %A, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
   %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
   ret <8 x i16> %tmp4
 }
 
 define <2 x i32> @cmhsz2xi32(<2 x i32> %A) {
-; CHECK-SD-LABEL: cmhsz2xi32:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    movi v1.2s, #2
-; CHECK-SD-NEXT:    cmhs v0.2s, v0.2s, v1.2s
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: cmhsz2xi32:
-; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI130_0
-; CHECK-GI-NEXT:    ldr d1, [x8, :lo12:.LCPI130_0]
-; CHECK-GI-NEXT:    cmhs v0.2s, v0.2s, v1.2s
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: cmhsz2xi32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi v1.2s, #2
+; CHECK-NEXT:    cmhs v0.2s, v0.2s, v1.2s
+; CHECK-NEXT:    ret
   %tmp3 = icmp uge <2 x i32> %A, <i32 2, i32 2>
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
   ret <2 x i32> %tmp4
 }
 
 define <4 x i32> @cmhsz4xi32(<4 x i32> %A) {
-; CHECK-SD-LABEL: cmhsz4xi32:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    movi v1.4s, #2
-; CHECK-SD-NEXT:    cmhs v0.4s, v0.4s, v1.4s
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: cmhsz4xi32:
-; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI131_0
-; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI131_0]
-; CHECK-GI-NEXT:    cmhs v0.4s, v0.4s, v1.4s
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: cmhsz4xi32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi v1.4s, #2
+; CHECK-NEXT:    cmhs v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    ret
   %tmp3 = icmp uge <4 x i32> %A, <i32 2, i32 2, i32 2, i32 2>
   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
   ret <4 x i32> %tmp4
@@ -1801,108 +1759,66 @@ define <2 x i64> @cmhsz2xi64(<2 x i64> %A) {
 
 
 define <8 x i8> @cmhiz8xi8(<8 x i8> %A) {
-; CHECK-SD-LABEL: cmhiz8xi8:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    movi v1.8b, #1
-; CHECK-SD-NEXT:    cmhi v0.8b, v0.8b, v1.8b
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: cmhiz8xi8:
-; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI133_0
-; CHECK-GI-NEXT:    ldr d1, [x8, :lo12:.LCPI133_0]
-; CHECK-GI-NEXT:    cmhi v0.8b, v0.8b, v1.8b
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: cmhiz8xi8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi v1.8b, #1
+; CHECK-NEXT:    cmhi v0.8b, v0.8b, v1.8b
+; CHECK-NEXT:    ret
   %tmp3 = icmp ugt <8 x i8> %A, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
   %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
   ret <8 x i8> %tmp4
 }
 
 define <16 x i8> @cmhiz16xi8(<16 x i8> %A) {
-; CHECK-SD-LABEL: cmhiz16xi8:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    movi v1.16b, #1
-; CHECK-SD-NEXT:    cmhi v0.16b, v0.16b, v1.16b
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: cmhiz16xi8:
-; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI134_0
-; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI134_0]
-; CHECK-GI-NEXT:    cmhi v0.16b, v0.16b, v1.16b
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: cmhiz16xi8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi v1.16b, #1
+; CHECK-NEXT:    cmhi v0.16b, v0.16b, v1.16b
+; CHECK-NEXT:    ret
   %tmp3 = icmp ugt <16 x i8> %A, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
   %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
   ret <16 x i8> %tmp4
 }
 
 define <4 x i16> @cmhiz4xi16(<4 x i16> %A) {
-; CHECK-SD-LABEL: cmhiz4xi16:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    movi v1.4h, #1
-; CHECK-SD-NEXT:    cmhi v0.4h, v0.4h, v1.4h
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: cmhiz4xi16:
-; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI135_0
-; CHECK-GI-NEXT:    ldr d1, [x8, :lo12:.LCPI135_0]
-; CHECK-GI-NEXT:    cmhi v0.4h, v0.4h, v1.4h
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: cmhiz4xi16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi v1.4h, #1
+; CHECK-NEXT:    cmhi v0.4h, v0.4h, v1.4h
+; CHECK-NEXT:    ret
   %tmp3 = icmp ugt <4 x i16> %A, <i16 1, i16 1, i16 1, i16 1>
   %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
   ret <4 x i16> %tmp4
 }
 
 define <8 x i16> @cmhiz8xi16(<8 x i16> %A) {
-; CHECK-SD-LABEL: cmhiz8xi16:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    movi v1.8h, #1
-; CHECK-SD-NEXT:    cmhi v0.8h, v0.8h, v1.8h
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: cmhiz8xi16:
-; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI136_0
-; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI136_0]
-; CHECK-GI-NEXT:    cmhi v0.8h, v0.8h, v1.8h
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: cmhiz8xi16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi v1.8h, #1
+; CHECK-NEXT:    cmhi v0.8h, v0.8h, v1.8h
+; CHECK-NEXT:    ret
   %tmp3 = icmp ugt <8 x i16> %A, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
   %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
   ret <8 x i16> %tmp4
 }
 
 define <2 x i32> @cmhiz2xi32(<2 x i32> %A) {
-; CHECK-SD-LABEL: cmhiz2xi32:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    movi v1.2s, #1
-; CHECK-SD-NEXT:    cmhi v0.2s, v0.2s, v1.2s
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: cmhiz2xi32:
-; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI137_0
-; CHECK-GI-NEXT:    ldr d1, [x8, :lo12:.LCPI137_0]
-; CHECK-GI-NEXT:    cmhi v0.2s, v0.2s, v1.2s
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: cmhiz2xi32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi v1.2s, #1
+; CHECK-NEXT:    cmhi v0.2s, v0.2s, v1.2s
+; CHECK-NEXT:    ret
   %tmp3 = icmp ugt <2 x i32> %A, <i32 1, i32 1>
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
   ret <2 x i32> %tmp4
 }
 
 define <4 x i32> @cmhiz4xi32(<4 x i32> %A) {
-; CHECK-SD-LABEL: cmhiz4xi32:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    movi v1.4s, #1
-; CHECK-SD-NEXT:    cmhi v0.4s, v0.4s, v1.4s
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: cmhiz4xi32:
-; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI138_0
-; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI138_0]
-; CHECK-GI-NEXT:    cmhi v0.4s, v0.4s, v1.4s
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: cmhiz4xi32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi v1.4s, #1
+; CHECK-NEXT:    cmhi v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    ret
   %tmp3 = icmp ugt <4 x i32> %A, <i32 1, i32 1, i32 1, i32 1>
   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
   ret <4 x i32> %tmp4
@@ -2019,18 +1935,11 @@ define <2 x i64> @cmlsz2xi64(<2 x i64> %A) {
 
 ; LO implemented as HI, so check reversed operands.
 define <8 x i8> @cmloz8xi8(<8 x i8> %A) {
-; CHECK-SD-LABEL: cmloz8xi8:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    movi v1.8b, #2
-; CHECK-SD-NEXT:    cmhi v0.8b, v1.8b, v0.8b
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: cmloz8xi8:
-; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI147_0
-; CHECK-GI-NEXT:    ldr d1, [x8, :lo12:.LCPI147_0]
-; CHECK-GI-NEXT:    cmhi v0.8b, v1.8b, v0.8b
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: cmloz8xi8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi v1.8b, #2
+; CHECK-NEXT:    cmhi v0.8b, v1.8b, v0.8b
+; CHECK-NEXT:    ret
   %tmp3 = icmp ult <8 x i8> %A, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
   %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
   ret <8 x i8> %tmp4
@@ -2038,18 +1947,11 @@ define <8 x i8> @cmloz8xi8(<8 x i8> %A) {
 
 ; LO implemented as HI, so check reversed operands.
 define <16 x i8> @cmloz16xi8(<16 x i8> %A) {
-; CHECK-SD-LABEL: cmloz16xi8:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    movi v1.16b, #2
-; CHECK-SD-NEXT:    cmhi v0.16b, v1.16b, v0.16b
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: cmloz16xi8:
-; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI148_0
-; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI148_0]
-; CHECK-GI-NEXT:    cmhi v0.16b, v1.16b, v0.16b
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: cmloz16xi8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi v1.16b, #2
+; CHECK-NEXT:    cmhi v0.16b, v1.16b, v0.16b
+; CHECK-NEXT:    ret
   %tmp3 = icmp ult <16 x i8> %A, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
   %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
   ret <16 x i8> %tmp4
@@ -2057,18 +1959,11 @@ define <16 x i8> @cmloz16xi8(<16 x i8> %A) {
 
 ; LO implemented as HI, so check reversed operands.
 define <4 x i16> @cmloz4xi16(<4 x i16> %A) {
-; CHECK-SD-LABEL: cmloz4xi16:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    movi v1.4h, #2
-; CHECK-SD-NEXT:    cmhi v0.4h, v1.4h, v0.4h
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: cmloz4xi16:
-; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI149_0
-; CHECK-GI-NEXT:    ldr d1, [x8, :lo12:.LCPI149_0]
-; CHECK-GI-NEXT:    cmhi v0.4h, v1.4h, v0.4h
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: cmloz4xi16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi v1.4h, #2
+; CHECK-NEXT:    cmhi v0.4h, v1.4h, v0.4h
+; CHECK-NEXT:    ret
   %tmp3 = icmp ult <4 x i16> %A, <i16 2, i16 2, i16 2, i16 2>
   %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
   ret <4 x i16> %tmp4
@@ -2076,18 +1971,11 @@ define <4 x i16> @cmloz4xi16(<4 x i16> %A) {
 
 ; LO implemented as HI, so check reversed operands.
 define <8 x i16> @cmloz8xi16(<8 x i16> %A) {
-; CHECK-SD-LABEL: cmloz8xi16:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    movi v1.8h, #2
-; CHECK-SD-NEXT:    cmhi v0.8h, v1.8h, v0.8h
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: cmloz8xi16:
-; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI150_0
-; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI150_0]
-; CHECK-GI-NEXT:    cmhi v0.8h, v1.8h, v0.8h
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: cmloz8xi16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi v1.8h, #2
+; CHECK-NEXT:    cmhi v0.8h, v1.8h, v0.8h
+; CHECK-NEXT:    ret
   %tmp3 = icmp ult <8 x i16> %A, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
   %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
   ret <8 x i16> %tmp4
@@ -2095,18 +1983,11 @@ define <8 x i16> @cmloz8xi16(<8 x i16> %A) {
 
 ; LO implemented as HI, so check reversed operands.
 define <2 x i32> @cmloz2xi32(<2 x i32> %A) {
-; CHECK-SD-LABEL: cmloz2xi32:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    movi v1.2s, #2
-; CHECK-SD-NEXT:    cmhi v0.2s, v1.2s, v0.2s
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: cmloz2xi32:
-; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI151_0
-; CHECK-GI-NEXT:    ldr d1, [x8, :lo12:.LCPI151_0]
-; CHECK-GI-NEXT:    cmhi v0.2s, v1.2s, v0.2s
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: cmloz2xi32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi v1.2s, #2
+; CHECK-NEXT:    cmhi v0.2s, v1.2s, v0.2s
+; CHECK-NEXT:    ret
   %tmp3 = icmp ult <2 x i32> %A, <i32 2, i32 2>
   %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
   ret <2 x i32> %tmp4
@@ -2114,18 +1995,11 @@ define <2 x i32> @cmloz2xi32(<2 x i32> %A) {
 
 ; LO implemented as HI, so check reversed operands.
 define <4 x i32> @cmloz4xi32(<4 x i32> %A) {
-; CHECK-SD-LABEL: cmloz4xi32:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    movi v1.4s, #2
-; CHECK-SD-NEXT:    cmhi v0.4s, v1.4s, v0.4s
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: cmloz4xi32:
-; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI152_0
-; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI152_0]
-; CHECK-GI-NEXT:    cmhi v0.4s, v1.4s, v0.4s
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: cmloz4xi32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi v1.4s, #2
+; CHECK-NEXT:    cmhi v0.4s, v1.4s, v0.4s
+; CHECK-NEXT:    ret
   %tmp3 = icmp ult <4 x i32> %A, <i32 2, i32 2, i32 2, i32 2>
   %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
   ret <4 x i32> %tmp4
@@ -4297,10 +4171,9 @@ define <4 x i32> @fcmule4xfloat_fast_zext(<4 x float> %A, <4 x float> %B) {
 ;
 ; CHECK-GI-LABEL: fcmule4xfloat_fast_zext:
 ; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    movi v2.4s, #1
 ; CHECK-GI-NEXT:    fcmgt v0.4s, v0.4s, v1.4s
-; CHECK-GI-NEXT:    adrp x8, .LCPI322_0
-; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI322_0]
-; CHECK-GI-NEXT:    bic v0.16b, v1.16b, v0.16b
+; CHECK-GI-NEXT:    bic v0.16b, v2.16b, v0.16b
 ; CHECK-GI-NEXT:    ret
 ; GISEL-LABEL: fcmule4xfloat_fast_zext:
 ; GISEL:       // %bb.0:
diff --git a/llvm/test/CodeGen/AArch64/neon-mov.ll b/llvm/test/CodeGen/AArch64/neon-mov.ll
index 2d0842cadc280c9..48242d99a6002a8 100644
--- a/llvm/test/CodeGen/AArch64/neon-mov.ll
+++ b/llvm/test/CodeGen/AArch64/neon-mov.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon -global-isel %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI
 
 define <8 x i8> @movi8b() {
 ; CHECK-LABEL: movi8b:
@@ -289,54 +290,93 @@ define <2 x i64> @movi2d() {
 }
 
 define <1 x i64> @movid() {
-; CHECK-LABEL: movid:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    movi d0, #0xff0000ff0000ffff
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: movid:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    movi d0, #0xff0000ff0000ffff
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: movid:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    mov x8, #-72056494526300161 // =0xff0000ffffffffff
+; CHECK-GI-NEXT:    movk x8, #0, lsl #16
+; CHECK-GI-NEXT:    fmov d0, x8
+; CHECK-GI-NEXT:    ret
 	ret  <1 x i64> < i64 18374687574888349695 >
 }
 
 define <2 x float> @fmov2s() {
-; CHECK-LABEL: fmov2s:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fmov v0.2s, #-12.00000000
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fmov2s:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fmov v0.2s, #-12.00000000
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fmov2s:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    fmov s0, #-12.00000000
+; CHECK-GI-NEXT:    dup v0.2s, v0.s[0]
+; CHECK-GI-NEXT:    ret
 	ret <2 x float> < float -1.2e1, float -1.2e1>
 }
 
 define <4 x float> @fmov4s() {
-; CHECK-LABEL: fmov4s:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fmov v0.4s, #-12.00000000
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fmov4s:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fmov v0.4s, #-12.00000000
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fmov4s:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    fmov s0, #-12.00000000
+; CHECK-GI-NEXT:    dup v0.4s, v0.s[0]
+; CHECK-GI-NEXT:    ret
 	ret <4 x float> < float -1.2e1, float -1.2e1, float -1.2e1, float -1.2e1>
 }
 
 define <2 x double> @fmov2d() {
-; CHECK-LABEL: fmov2d:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fmov v0.2d, #-12.00000000
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: fmov2d:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    fmov v0.2d, #-12.00000000
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fmov2d:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    fmov d0, #-12.00000000
+; CHECK-GI-NEXT:    dup v0.2d, v0.d[0]
+; CHECK-GI-NEXT:    ret
 	ret <2 x double> < double -1.2e1, double -1.2e1>
 }
 
 define <2 x i32> @movi1d_1() {
-; CHECK-LABEL: movi1d_1:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    movi d0, #0x00ffffffff0000
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: movi1d_1:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    movi d0, #0x00ffffffff0000
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: movi1d_1:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    adrp x8, .LCPI39_0
+; CHECK-GI-NEXT:    ldr d0, [x8, :lo12:.LCPI39_0]
+; CHECK-GI-NEXT:    ret
   ret <2 x i32> < i32  -65536, i32 65535>
 }
 
 
 declare <2 x i32> @test_movi1d(<2 x i32>, <2 x i32>)
 define <2 x i32> @movi1d() {
-; CHECK-LABEL: movi1d:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    movi d1, #0x00ffffffff0000
-; CHECK-NEXT:    adrp x8, .LCPI40_0
-; CHECK-NEXT:    ldr d0, [x8, :lo12:.LCPI40_0]
-; CHECK-NEXT:    b test_movi1d
+; CHECK-SD-LABEL: movi1d:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    movi d1, #0x00ffffffff0000
+; CHECK-SD-NEXT:    adrp x8, .LCPI40_0
+; CHECK-SD-NEXT:    ldr d0, [x8, :lo12:.LCPI40_0]
+; CHECK-SD-NEXT:    b test_movi1d
+;
+; CHECK-GI-LABEL: movi1d:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    adrp x8, .LCPI40_1
+; CHECK-GI-NEXT:    adrp x9, .LCPI40_0
+; CHECK-GI-NEXT:    ldr d0, [x8, :lo12:.LCPI40_1]
+; CHECK-GI-NEXT:    ldr d1, [x9, :lo12:.LCPI40_0]
+; CHECK-GI-NEXT:    b test_movi1d
   %1 = tail call <2 x i32> @test_movi1d(<2 x i32> <i32 -2147483648, i32 2147450880>, <2 x i32> <i32 -65536, i32 65535>)
   ret <2 x i32> %1
 }
diff --git a/llvm/test/CodeGen/AArch64/zext.ll b/llvm/test/CodeGen/AArch64/zext.ll
index 979631adfe2a77f..8b30ee257ad2055 100644
--- a/llvm/test/CodeGen/AArch64/zext.ll
+++ b/llvm/test/CodeGen/AArch64/zext.ll
@@ -108,36 +108,22 @@ entry:
 }
 
 define <2 x i16> @zext_v2i8_v2i16(<2 x i8> %a) {
-; CHECK-SD-LABEL: zext_v2i8_v2i16:
-; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    movi d1, #0x0000ff000000ff
-; CHECK-SD-NEXT:    and v0.8b, v0.8b, v1.8b
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: zext_v2i8_v2i16:
-; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    adrp x8, .LCPI10_0
-; CHECK-GI-NEXT:    ldr d1, [x8, :lo12:.LCPI10_0]
-; CHECK-GI-NEXT:    and v0.8b, v0.8b, v1.8b
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: zext_v2i8_v2i16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    movi d1, #0x0000ff000000ff
+; CHECK-NEXT:    and v0.8b, v0.8b, v1.8b
+; CHECK-NEXT:    ret
 entry:
   %c = zext <2 x i8> %a to <2 x i16>
   ret <2 x i16> %c
 }
 
 define <2 x i32> @zext_v2i8_v2i32(<2 x i8> %a) {
-; CHECK-SD-LABEL: zext_v2i8_v2i32:
-; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    movi d1, #0x0000ff000000ff
-; CHECK-SD-NEXT:    and v0.8b, v0.8b, v1.8b
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: zext_v2i8_v2i32:
-; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    adrp x8, .LCPI11_0
-; CHECK-GI-NEXT:    ldr d1, [x8, :lo12:.LCPI11_0]
-; CHECK-GI-NEXT:    and v0.8b, v0.8b, v1.8b
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: zext_v2i8_v2i32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    movi d1, #0x0000ff000000ff
+; CHECK-NEXT:    and v0.8b, v0.8b, v1.8b
+; CHECK-NEXT:    ret
 entry:
   %c = zext <2 x i8> %a to <2 x i32>
   ret <2 x i32> %c
@@ -153,9 +139,8 @@ define <2 x i64> @zext_v2i8_v2i64(<2 x i8> %a) {
 ;
 ; CHECK-GI-LABEL: zext_v2i8_v2i64:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    adrp x8, .LCPI12_0
+; CHECK-GI-NEXT:    movi v1.2d, #0x000000000000ff
 ; CHECK-GI-NEXT:    ushll v0.2d, v0.2s, #0
-; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI12_0]
 ; CHECK-GI-NEXT:    and v0.16b, v0.16b, v1.16b
 ; CHECK-GI-NEXT:    ret
 entry:
@@ -164,18 +149,11 @@ entry:
 }
 
 define <2 x i32> @zext_v2i16_v2i32(<2 x i16> %a) {
-; CHECK-SD-LABEL: zext_v2i16_v2i32:
-; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    movi d1, #0x00ffff0000ffff
-; CHECK-SD-NEXT:    and v0.8b, v0.8b, v1.8b
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: zext_v2i16_v2i32:
-; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    adrp x8, .LCPI13_0
-; CHECK-GI-NEXT:    ldr d1, [x8, :lo12:.LCPI13_0]
-; CHECK-GI-NEXT:    and v0.8b, v0.8b, v1.8b
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: zext_v2i16_v2i32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    movi d1, #0x00ffff0000ffff
+; CHECK-NEXT:    and v0.8b, v0.8b, v1.8b
+; CHECK-NEXT:    ret
 entry:
   %c = zext <2 x i16> %a to <2 x i32>
   ret <2 x i32> %c
@@ -191,9 +169,8 @@ define <2 x i64> @zext_v2i16_v2i64(<2 x i16> %a) {
 ;
 ; CHECK-GI-LABEL: zext_v2i16_v2i64:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    adrp x8, .LCPI14_0
+; CHECK-GI-NEXT:    movi v1.2d, #0x0000000000ffff
 ; CHECK-GI-NEXT:    ushll v0.2d, v0.2s, #0
-; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI14_0]
 ; CHECK-GI-NEXT:    and v0.16b, v0.16b, v1.16b
 ; CHECK-GI-NEXT:    ret
 entry:
@@ -212,36 +189,22 @@ entry:
 }
 
 define <2 x i16> @zext_v2i10_v2i16(<2 x i10> %a) {
-; CHECK-SD-LABEL: zext_v2i10_v2i16:
-; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    movi v1.2s, #3, msl #8
-; CHECK-SD-NEXT:    and v0.8b, v0.8b, v1.8b
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: zext_v2i10_v2i16:
-; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    adrp x8, .LCPI16_0
-; CHECK-GI-NEXT:    ldr d1, [x8, :lo12:.LCPI16_0]
-; CHECK-GI-NEXT:    and v0.8b, v0.8b, v1.8b
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: zext_v2i10_v2i16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    movi v1.2s, #3, msl #8
+; CHECK-NEXT:    and v0.8b, v0.8b, v1.8b
+; CHECK-NEXT:    ret
 entry:
   %c = zext <2 x i10> %a to <2 x i16>
   ret <2 x i16> %c
 }
 
 define <2 x i32> @zext_v2i10_v2i32(<2 x i10> %a) {
-; CHECK-SD-LABEL: zext_v2i10_v2i32:
-; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    movi v1.2s, #3, msl #8
-; CHECK-SD-NEXT:    and v0.8b, v0.8b, v1.8b
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: zext_v2i10_v2i32:
-; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    adrp x8, .LCPI17_0
-; CHECK-GI-NEXT:    ldr d1, [x8, :lo12:.LCPI17_0]
-; CHECK-GI-NEXT:    and v0.8b, v0.8b, v1.8b
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: zext_v2i10_v2i32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    movi v1.2s, #3, msl #8
+; CHECK-NEXT:    and v0.8b, v0.8b, v1.8b
+; CHECK-NEXT:    ret
 entry:
   %c = zext <2 x i10> %a to <2 x i32>
   ret <2 x i32> %c
@@ -348,15 +311,14 @@ define <3 x i64> @zext_v3i8_v3i64(<3 x i8> %a) {
 ; CHECK-GI-LABEL: zext_v3i8_v3i64:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    // kill: def $w0 killed $w0 def $x0
-; CHECK-GI-NEXT:    fmov d0, x0
+; CHECK-GI-NEXT:    fmov d1, x0
 ; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
-; CHECK-GI-NEXT:    adrp x8, .LCPI21_0
+; CHECK-GI-NEXT:    movi v0.2d, #0x000000000000ff
 ; CHECK-GI-NEXT:    // kill: def $w2 killed $w2 def $x2
-; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI21_0]
 ; CHECK-GI-NEXT:    and x8, x2, #0xff
 ; CHECK-GI-NEXT:    fmov d2, x8
-; CHECK-GI-NEXT:    mov v0.d[1], x1
-; CHECK-GI-NEXT:    and v0.16b, v0.16b, v1.16b
+; CHECK-GI-NEXT:    mov v1.d[1], x1
+; CHECK-GI-NEXT:    and v0.16b, v1.16b, v0.16b
 ; CHECK-GI-NEXT:    mov d1, v0.d[1]
 ; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
@@ -556,8 +518,7 @@ define <4 x i16> @zext_v4i8_v4i16(<4 x i8> %a) {
 ;
 ; CHECK-GI-LABEL: zext_v4i8_v4i16:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    adrp x8, .LCPI28_0
-; CHECK-GI-NEXT:    ldr d1, [x8, :lo12:.LCPI28_0]
+; CHECK-GI-NEXT:    movi d1, #0xff00ff00ff00ff
 ; CHECK-GI-NEXT:    and v0.8b, v0.8b, v1.8b
 ; CHECK-GI-NEXT:    ret
 entry:
@@ -574,9 +535,8 @@ define <4 x i32> @zext_v4i8_v4i32(<4 x i8> %a) {
 ;
 ; CHECK-GI-LABEL: zext_v4i8_v4i32:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    adrp x8, .LCPI29_0
+; CHECK-GI-NEXT:    movi v1.2d, #0x0000ff000000ff
 ; CHECK-GI-NEXT:    ushll v0.4s, v0.4h, #0
-; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI29_0]
 ; CHECK-GI-NEXT:    and v0.16b, v0.16b, v1.16b
 ; CHECK-GI-NEXT:    ret
 entry:
@@ -596,12 +556,11 @@ define <4 x i64> @zext_v4i8_v4i64(<4 x i8> %a) {
 ; CHECK-GI-LABEL: zext_v4i8_v4i64:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    ushll v0.4s, v0.4h, #0
-; CHECK-GI-NEXT:    adrp x8, .LCPI30_0
-; CHECK-GI-NEXT:    ldr q3, [x8, :lo12:.LCPI30_0]
-; CHECK-GI-NEXT:    ushll v1.2d, v0.2s, #0
-; CHECK-GI-NEXT:    ushll2 v2.2d, v0.4s, #0
-; CHECK-GI-NEXT:    and v0.16b, v1.16b, v3.16b
-; CHECK-GI-NEXT:    and v1.16b, v2.16b, v3.16b
+; CHECK-GI-NEXT:    movi v1.2d, #0x000000000000ff
+; CHECK-GI-NEXT:    ushll v2.2d, v0.2s, #0
+; CHECK-GI-NEXT:    ushll2 v3.2d, v0.4s, #0
+; CHECK-GI-NEXT:    and v0.16b, v2.16b, v1.16b
+; CHECK-GI-NEXT:    and v1.16b, v3.16b, v1.16b
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = zext <4 x i8> %a to <4 x i64>
@@ -663,8 +622,7 @@ define <4 x i16> @zext_v4i10_v4i16(<4 x i10> %a) {
 ;
 ; CHECK-GI-LABEL: zext_v4i10_v4i16:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    adrp x8, .LCPI34_0
-; CHECK-GI-NEXT:    ldr d1, [x8, :lo12:.LCPI34_0]
+; CHECK-GI-NEXT:    mvni v1.4h, #252, lsl #8
 ; CHECK-GI-NEXT:    and v0.8b, v0.8b, v1.8b
 ; CHECK-GI-NEXT:    ret
 entry:
@@ -681,9 +639,8 @@ define <4 x i32> @zext_v4i10_v4i32(<4 x i10> %a) {
 ;
 ; CHECK-GI-LABEL: zext_v4i10_v4i32:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    adrp x8, .LCPI35_0
+; CHECK-GI-NEXT:    movi v1.4s, #3, msl #8
 ; CHECK-GI-NEXT:    ushll v0.4s, v0.4h, #0
-; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI35_0]
 ; CHECK-GI-NEXT:    and v0.16b, v0.16b, v1.16b
 ; CHECK-GI-NEXT:    ret
 entry:
@@ -847,8 +804,7 @@ define <8 x i16> @zext_v8i10_v8i16(<8 x i10> %a) {
 ;
 ; CHECK-GI-LABEL: zext_v8i10_v8i16:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    adrp x8, .LCPI43_0
-; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI43_0]
+; CHECK-GI-NEXT:    mvni v1.8h, #252, lsl #8
 ; CHECK-GI-NEXT:    and v0.16b, v0.16b, v1.16b
 ; CHECK-GI-NEXT:    ret
 entry:
@@ -866,12 +822,11 @@ define <8 x i32> @zext_v8i10_v8i32(<8 x i10> %a) {
 ;
 ; CHECK-GI-LABEL: zext_v8i10_v8i32:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    adrp x8, .LCPI44_0
-; CHECK-GI-NEXT:    ushll v1.4s, v0.4h, #0
-; CHECK-GI-NEXT:    ushll2 v2.4s, v0.8h, #0
-; CHECK-GI-NEXT:    ldr q3, [x8, :lo12:.LCPI44_0]
-; CHECK-GI-NEXT:    and v0.16b, v1.16b, v3.16b
-; CHECK-GI-NEXT:    and v1.16b, v2.16b, v3.16b
+; CHECK-GI-NEXT:    movi v1.4s, #3, msl #8
+; CHECK-GI-NEXT:    ushll v2.4s, v0.4h, #0
+; CHECK-GI-NEXT:    ushll2 v3.4s, v0.8h, #0
+; CHECK-GI-NEXT:    and v0.16b, v2.16b, v1.16b
+; CHECK-GI-NEXT:    and v1.16b, v3.16b, v1.16b
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = zext <8 x i10> %a to <8 x i32>
@@ -1173,14 +1128,13 @@ define <16 x i32> @zext_v16i10_v16i32(<16 x i10> %a) {
 ; CHECK-GI-NEXT:    ldr s0, [sp, #8]
 ; CHECK-GI-NEXT:    ldr s3, [sp, #32]
 ; CHECK-GI-NEXT:    ldr s1, [sp, #40]
-; CHECK-GI-NEXT:    adrp x8, .LCPI53_0
+; CHECK-GI-NEXT:    movi v6.4s, #3, msl #8
 ; CHECK-GI-NEXT:    mov v4.s[1], w1
 ; CHECK-GI-NEXT:    mov v5.s[1], w5
 ; CHECK-GI-NEXT:    mov v2.s[1], v0.s[0]
 ; CHECK-GI-NEXT:    mov v3.s[1], v1.s[0]
 ; CHECK-GI-NEXT:    ldr s0, [sp, #16]
 ; CHECK-GI-NEXT:    ldr s1, [sp, #48]
-; CHECK-GI-NEXT:    ldr q6, [x8, :lo12:.LCPI53_0]
 ; CHECK-GI-NEXT:    mov v4.s[2], w2
 ; CHECK-GI-NEXT:    mov v5.s[2], w6
 ; CHECK-GI-NEXT:    mov v2.s[2], v0.s[0]

>From fa4e047688be5e4e83fa18225d72822f3284eb7c Mon Sep 17 00:00:00 2001
From: Mark Harley <mark.harley at arm.com>
Date: Mon, 25 Sep 2023 10:13:15 +0100
Subject: [PATCH 2/2] Remove isAdvSIMDModImm and capitalise variable names

---
 .../GISel/AArch64InstructionSelector.cpp      | 175 ++++++++----------
 1 file changed, 81 insertions(+), 94 deletions(-)

diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index 7462965e5243e48..9e25f79705f4226 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -174,14 +174,14 @@ class AArch64InstructionSelector : public InstructionSelector {
                                   MachineIRBuilder &MIRBuilder);
 
   MachineInstr *tryAdvSIMDModImm16(Register Dst, unsigned DstSize, APInt Bits,
-                                   MachineIRBuilder &MIRBuilder, bool inv);
+                                   MachineIRBuilder &MIRBuilder, bool Inv);
 
   MachineInstr *tryAdvSIMDModImm32(Register Dst, unsigned DstSize, APInt Bits,
-                                   MachineIRBuilder &MIRBuilder, bool inv);
+                                   MachineIRBuilder &MIRBuilder, bool Inv);
   MachineInstr *tryAdvSIMDModImm64(Register Dst, unsigned DstSize, APInt Bits,
                                    MachineIRBuilder &MIRBuilder);
   MachineInstr *tryAdvSIMDModImm321s(Register Dst, unsigned DstSize, APInt Bits,
-                                     MachineIRBuilder &MIRBuilder, bool inv);
+                                     MachineIRBuilder &MIRBuilder, bool Inv);
   MachineInstr *tryAdvSIMDModImmFP(Register Dst, unsigned DstSize, APInt Bits,
                                    MachineIRBuilder &MIRBuilder);
 
@@ -5449,7 +5449,7 @@ bool AArch64InstructionSelector::selectInsertElt(MachineInstr &I,
 }
 
 MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm8(
-    Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &builder) {
+    Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder) {
   unsigned int Op;
   if (DstSize == 128) {
     if (Bits.getHiBits(64) != Bits.getLoBits(64))
@@ -5459,11 +5459,11 @@ MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm8(
     Op = AArch64::MOVIv8b_ns;
   }
 
-  uint64_t val = Bits.zextOrTrunc(64).getZExtValue();
+  uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
 
-  if (AArch64_AM::isAdvSIMDModImmType9(val)) {
-    val = AArch64_AM::encodeAdvSIMDModImmType9(val);
-    auto Mov = builder.buildInstr(Op, {Dst}, {}).addImm(val);
+  if (AArch64_AM::isAdvSIMDModImmType9(Val)) {
+    Val = AArch64_AM::encodeAdvSIMDModImmType9(Val);
+    auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val);
     constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI);
     return &*Mov;
   }
@@ -5471,80 +5471,73 @@ MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm8(
 }
 
 MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm16(
-    Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &builder,
-    bool inv) {
+    Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder,
+    bool Inv) {
 
   unsigned int Op;
   if (DstSize == 128) {
     if (Bits.getHiBits(64) != Bits.getLoBits(64))
       return nullptr;
-    Op = inv ? AArch64::MVNIv8i16 : AArch64::MOVIv8i16;
+    Op = Inv ? AArch64::MVNIv8i16 : AArch64::MOVIv8i16;
   } else {
-    Op = inv ? AArch64::MVNIv4i16 : AArch64::MOVIv4i16;
+    Op = Inv ? AArch64::MVNIv4i16 : AArch64::MOVIv4i16;
   }
 
-  uint64_t val = Bits.zextOrTrunc(64).getZExtValue();
-
-  bool isAdvSIMDModImm = false;
+  uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
   uint64_t Shift;
 
-  if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType5(val))) {
-    val = AArch64_AM::encodeAdvSIMDModImmType5(val);
+  if (AArch64_AM::isAdvSIMDModImmType5(Val)) {
+    Val = AArch64_AM::encodeAdvSIMDModImmType5(Val);
     Shift = 0;
-  } else if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType6(val))) {
-    val = AArch64_AM::encodeAdvSIMDModImmType6(val);
+  } else if (AArch64_AM::isAdvSIMDModImmType6(Val)) {
+    Val = AArch64_AM::encodeAdvSIMDModImmType6(Val);
     Shift = 8;
-  }
+  } else
+    return nullptr;
 
-  if (isAdvSIMDModImm) {
-    auto Mov = builder.buildInstr(Op, {Dst}, {}).addImm(val).addImm(Shift);
-    constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI);
-    return &*Mov;
-  }
-  return nullptr;
+  auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val).addImm(Shift);
+  constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI);
+  return &*Mov;
 }
 
 MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm32(
-    Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &builder,
-    bool inv) {
+    Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder,
+    bool Inv) {
 
   unsigned int Op;
   if (DstSize == 128) {
     if (Bits.getHiBits(64) != Bits.getLoBits(64))
       return nullptr;
-    Op = inv ? AArch64::MVNIv4i32 : AArch64::MOVIv4i32;
+    Op = Inv ? AArch64::MVNIv4i32 : AArch64::MOVIv4i32;
   } else {
-    Op = inv ? AArch64::MVNIv2i32 : AArch64::MOVIv2i32;
+    Op = Inv ? AArch64::MVNIv2i32 : AArch64::MOVIv2i32;
   }
 
-  uint64_t val = Bits.zextOrTrunc(64).getZExtValue();
-  bool isAdvSIMDModImm = false;
+  uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
   uint64_t Shift;
 
-  if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType1(val))) {
-    val = AArch64_AM::encodeAdvSIMDModImmType1(val);
+  if ((AArch64_AM::isAdvSIMDModImmType1(Val))) {
+    Val = AArch64_AM::encodeAdvSIMDModImmType1(Val);
     Shift = 0;
-  } else if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType2(val))) {
-    val = AArch64_AM::encodeAdvSIMDModImmType2(val);
+  } else if ((AArch64_AM::isAdvSIMDModImmType2(Val))) {
+    Val = AArch64_AM::encodeAdvSIMDModImmType2(Val);
     Shift = 8;
-  } else if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType3(val))) {
-    val = AArch64_AM::encodeAdvSIMDModImmType3(val);
+  } else if ((AArch64_AM::isAdvSIMDModImmType3(Val))) {
+    Val = AArch64_AM::encodeAdvSIMDModImmType3(Val);
     Shift = 16;
-  } else if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType4(val))) {
-    val = AArch64_AM::encodeAdvSIMDModImmType4(val);
+  } else if ((AArch64_AM::isAdvSIMDModImmType4(Val))) {
+    Val = AArch64_AM::encodeAdvSIMDModImmType4(Val);
     Shift = 24;
-  }
+  } else
+    return nullptr;
 
-  if (isAdvSIMDModImm) {
-    auto Mov = builder.buildInstr(Op, {Dst}, {}).addImm(val).addImm(Shift);
-    constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI);
-    return &*Mov;
-  }
-  return nullptr;
+  auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val).addImm(Shift);
+  constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI);
+  return &*Mov;
 }
 
 MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm64(
-    Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &builder) {
+    Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder) {
 
   unsigned int Op;
   if (DstSize == 128) {
@@ -5555,10 +5548,10 @@ MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm64(
     Op = AArch64::MOVID;
   }
 
-  uint64_t val = Bits.zextOrTrunc(64).getZExtValue();
-  if (AArch64_AM::isAdvSIMDModImmType10(val)) {
-    val = AArch64_AM::encodeAdvSIMDModImmType10(val);
-    auto Mov = builder.buildInstr(Op, {Dst}, {}).addImm(val);
+  uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
+  if (AArch64_AM::isAdvSIMDModImmType10(Val)) {
+    Val = AArch64_AM::encodeAdvSIMDModImmType10(Val);
+    auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val);
     constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI);
     return &*Mov;
   }
@@ -5566,68 +5559,62 @@ MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm64(
 }
 
 MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm321s(
-    Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &builder,
-    bool inv) {
+    Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder,
+    bool Inv) {
 
   unsigned int Op;
   if (DstSize == 128) {
     if (Bits.getHiBits(64) != Bits.getLoBits(64))
       return nullptr;
-    Op = inv ? AArch64::MVNIv4s_msl : AArch64::MOVIv4s_msl;
+    Op = Inv ? AArch64::MVNIv4s_msl : AArch64::MOVIv4s_msl;
   } else {
-    Op = inv ? AArch64::MVNIv2s_msl : AArch64::MOVIv2s_msl;
+    Op = Inv ? AArch64::MVNIv2s_msl : AArch64::MOVIv2s_msl;
   }
 
-  uint64_t val = Bits.zextOrTrunc(64).getZExtValue();
-  bool isAdvSIMDModImm = false;
+  uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
   uint64_t Shift;
 
-  if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType7(val))) {
-    val = AArch64_AM::encodeAdvSIMDModImmType7(val);
+  if (AArch64_AM::isAdvSIMDModImmType7(Val)) {
+    Val = AArch64_AM::encodeAdvSIMDModImmType7(Val);
     Shift = 264;
-  } else if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType8(val))) {
-    val = AArch64_AM::encodeAdvSIMDModImmType8(val);
+  } else if (AArch64_AM::isAdvSIMDModImmType8(Val)) {
+    Val = AArch64_AM::encodeAdvSIMDModImmType8(Val);
     Shift = 272;
-  }
-  if (isAdvSIMDModImm) {
-    auto Mov = builder.buildInstr(Op, {Dst}, {}).addImm(val).addImm(Shift);
-    constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI);
-    return &*Mov;
-  }
-  return nullptr;
+  } else
+    return nullptr;
+
+  auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val).addImm(Shift);
+  constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI);
+  return &*Mov;
 }
 
 MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImmFP(
-    Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &builder) {
+    Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder) {
 
   unsigned int Op;
-  bool isWide = false;
+  bool IsWide = false;
   if (DstSize == 128) {
     if (Bits.getHiBits(64) != Bits.getLoBits(64))
       return nullptr;
     // Need to deal with 4f32
     Op = AArch64::FMOVv2f64_ns;
-    isWide = true;
+    IsWide = true;
   } else {
     Op = AArch64::FMOVv2f32_ns;
   }
 
-  uint64_t val = Bits.zextOrTrunc(64).getZExtValue();
-  bool isAdvSIMDModImm = false;
+  uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
 
-  if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType11(val))) {
-    val = AArch64_AM::encodeAdvSIMDModImmType7(val);
-  } else if (isWide &&
-             (isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType12(val))) {
-    val = AArch64_AM::encodeAdvSIMDModImmType12(val);
-  }
+  if (AArch64_AM::isAdvSIMDModImmType11(Val)) {
+    Val = AArch64_AM::encodeAdvSIMDModImmType7(Val);
+  } else if (IsWide && AArch64_AM::isAdvSIMDModImmType12(Val)) {
+    Val = AArch64_AM::encodeAdvSIMDModImmType12(Val);
+  } else
+    return nullptr;
 
-  if (isAdvSIMDModImm) {
-    auto Mov = builder.buildInstr(Op, {Dst}, {}).addImm(val);
-    constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI);
-    return &*Mov;
-  }
-  return nullptr;
+  auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val);
+  constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI);
+  return &*Mov;
 }
 
 MachineInstr *
@@ -5659,22 +5646,22 @@ AArch64InstructionSelector::emitConstantVector(Register Dst, Constant *CV,
   if (CV->getSplatValue()) {
     APInt DefBits = APInt::getSplat(DstSize, CV->getUniqueInteger());
     MachineInstr *NewOp;
-    bool inv = false;
+    bool Inv = false;
     if ((NewOp = tryAdvSIMDModImm64(Dst, DstSize, DefBits, MIRBuilder)) ||
-        (NewOp = tryAdvSIMDModImm32(Dst, DstSize, DefBits, MIRBuilder, inv)) ||
+        (NewOp = tryAdvSIMDModImm32(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
         (NewOp =
-             tryAdvSIMDModImm321s(Dst, DstSize, DefBits, MIRBuilder, inv)) ||
-        (NewOp = tryAdvSIMDModImm16(Dst, DstSize, DefBits, MIRBuilder, inv)) ||
+             tryAdvSIMDModImm321s(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
+        (NewOp = tryAdvSIMDModImm16(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
         (NewOp = tryAdvSIMDModImm8(Dst, DstSize, DefBits, MIRBuilder)) ||
         (NewOp = tryAdvSIMDModImmFP(Dst, DstSize, DefBits, MIRBuilder)))
       return NewOp;
 
     DefBits = ~DefBits;
-    inv = true;
-    if ((NewOp = tryAdvSIMDModImm32(Dst, DstSize, DefBits, MIRBuilder, inv)) ||
+    Inv = true;
+    if ((NewOp = tryAdvSIMDModImm32(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
         (NewOp =
-             tryAdvSIMDModImm321s(Dst, DstSize, DefBits, MIRBuilder, inv)) ||
-        (NewOp = tryAdvSIMDModImm16(Dst, DstSize, DefBits, MIRBuilder, inv)))
+             tryAdvSIMDModImm321s(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
+        (NewOp = tryAdvSIMDModImm16(Dst, DstSize, DefBits, MIRBuilder, Inv)))
       return NewOp;
   }
 



More information about the llvm-commits mailing list