[llvm] [AArch64] MOVI lowering (PR #185526)

David Green via llvm-commits llvm-commits at lists.llvm.org
Wed Apr 1 06:19:34 PDT 2026


https://github.com/davemgreen updated https://github.com/llvm/llvm-project/pull/185526

>From 413d21b35c37bad24ecdb8c902ce90d3fd131a5c Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Fri, 20 Mar 2026 16:36:20 +0000
Subject: [PATCH 1/3] [AArch64] MOVI lowering

This patch redoes how movi is lowered, adding a expandVectorMOVImm function
that, similar to expandMOVImm for scalar immediates returns how the vector
immediate can be lowered. This is used to create a single MOVI ISD node that
can be used for all immediates. We then use the same method to work out if a fp
constant can be lowered to a vector move.

One test, qmovin.ll, hits an unfortunate case where demanded-bits is now able
to remove an and as the bits are unused after a truncation, leading to not
being able to generate a BSP.
---
 llvm/lib/Target/AArch64/AArch64Combine.td     |   11 +-
 llvm/lib/Target/AArch64/AArch64ExpandImm.cpp  |  141 +-
 llvm/lib/Target/AArch64/AArch64ExpandImm.h    |    8 +-
 .../Target/AArch64/AArch64ISelDAGToDAG.cpp    |  120 +-
 .../Target/AArch64/AArch64ISelLowering.cpp    |  308 +--
 .../lib/Target/AArch64/AArch64InstrFormats.td |   16 -
 llvm/lib/Target/AArch64/AArch64InstrInfo.td   |  111 +-
 .../GISel/AArch64InstructionSelector.cpp      |  514 ++---
 .../GISel/AArch64PostLegalizerCombiner.cpp    |    1 +
 .../GISel/AArch64PostLegalizerLowering.cpp    |   61 +
 .../AArch64/GISel/AArch64RegisterBankInfo.cpp |   12 +-
 .../GlobalISel/select-fp16-fconstant.mir      |    6 +-
 .../CodeGen/AArch64/arm64-build-vector.ll     |   26 +-
 .../test/CodeGen/AArch64/arm64-fp-imm-size.ll |   18 +-
 llvm/test/CodeGen/AArch64/arm64-fp-imm.ll     |    6 +-
 llvm/test/CodeGen/AArch64/arm64-fp128.ll      |   24 +-
 .../CodeGen/AArch64/arm64-neon-aba-abd.ll     |    9 +-
 llvm/test/CodeGen/AArch64/bf16-imm.ll         |   27 +-
 .../test/CodeGen/AArch64/bf16-instructions.ll |    6 +-
 llvm/test/CodeGen/AArch64/clmul-fixed.ll      | 2037 ++++++++---------
 llvm/test/CodeGen/AArch64/dup.ll              |    9 +-
 llvm/test/CodeGen/AArch64/f16-imm.ll          |   36 +-
 llvm/test/CodeGen/AArch64/f16-instructions.ll |   12 +-
 llvm/test/CodeGen/AArch64/fabs-combine.ll     |   19 +-
 llvm/test/CodeGen/AArch64/fcvt-fixed.ll       |  104 +-
 llvm/test/CodeGen/AArch64/fdiv-const.ll       |   36 +-
 .../AArch64/fp-maximumnum-minimumnum.ll       |    6 +-
 .../test/CodeGen/AArch64/fptosi-sat-scalar.ll |   36 +-
 .../test/CodeGen/AArch64/fptosi-sat-vector.ll |   48 +-
 .../test/CodeGen/AArch64/fptoui-sat-scalar.ll |    7 +-
 .../test/CodeGen/AArch64/fptoui-sat-vector.ll |  155 +-
 llvm/test/CodeGen/AArch64/frem-power2.ll      |    4 +-
 .../half-precision-signof-no-assert.ll        |    3 +-
 llvm/test/CodeGen/AArch64/isinf.ll            |    3 +-
 llvm/test/CodeGen/AArch64/known-never-nan.ll  |    9 +-
 llvm/test/CodeGen/AArch64/movi64_sve.ll       |  267 +--
 .../AArch64/neon-bitwise-instructions.ll      |  220 +-
 .../AArch64/neon-compare-instructions.ll      |   37 +-
 llvm/test/CodeGen/AArch64/neon-mov.ll         |  123 +-
 llvm/test/CodeGen/AArch64/qmovn.ll            |    8 +-
 .../sve-fixed-length-insert-vector-elt.ll     |    3 +-
 llvm/test/CodeGen/AArch64/vecreduce-fadd.ll   |    3 +-
 .../vecreduce-fmax-legalization-nan.ll        |   12 +-
 .../AArch64/vecreduce-fmax-legalization.ll    |   12 +-
 .../CodeGen/AArch64/vecreduce-fmaximum.ll     |   12 +-
 .../AArch64/AArch64SelectionDAGTest.cpp       |  155 +-
 46 files changed, 2097 insertions(+), 2704 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td
index fe953a627939d..285b3ab7ea026 100644
--- a/llvm/lib/Target/AArch64/AArch64Combine.td
+++ b/llvm/lib/Target/AArch64/AArch64Combine.td
@@ -273,7 +273,16 @@ def build_vector_to_vector_insert : GICombineRule<
   (apply [{ applyLowerBuildToInsertVecElt(*${root}, MRI, B); }])
 >;
 
-def build_vector_lowering : GICombineGroup<[build_vector_to_dup,
+def const_build_vector_to_fneg_matchdata : GIDefMatchData<"std::pair<APInt, unsigned>">;
+def const_build_vector_to_fneg : GICombineRule<
+  (defs root:$root, const_build_vector_to_fneg_matchdata:$matchinfo),
+  (match (wip_match_opcode G_BUILD_VECTOR):$root,
+          [{ return matchConstBuildVectorToFNeg(*${root}, MRI, STI, ${matchinfo}); }]),
+  (apply [{ applyConstBuildVectorToFNeg(*${root}, MRI, B, ${matchinfo}); }])
+>;
+
+def build_vector_lowering : GICombineGroup<[const_build_vector_to_fneg,
+                                            build_vector_to_dup,
                                             build_vector_to_vector_insert]>;
 
 def lower_vector_fcmp : GICombineRule<
diff --git a/llvm/lib/Target/AArch64/AArch64ExpandImm.cpp b/llvm/lib/Target/AArch64/AArch64ExpandImm.cpp
index f44cb8a0628d7..9f89efebea9e1 100644
--- a/llvm/lib/Target/AArch64/AArch64ExpandImm.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ExpandImm.cpp
@@ -10,8 +10,9 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "AArch64.h"
 #include "AArch64ExpandImm.h"
+#include "AArch64.h"
+#include "AArch64Subtarget.h"
 #include "MCTargetDesc/AArch64AddressingModes.h"
 
 using namespace llvm;
@@ -720,3 +721,141 @@ void AArch64_IMM::expandMOVImm(uint64_t Imm, unsigned BitSize,
   // four-instruction sequence.
   expandMOVImmSimple(Imm, BitSize, OneChunks, ZeroChunks, Insn);
 }
+
+bool AArch64_IMM::expandVectorMOVImm(
+    APInt Imm, const AArch64Subtarget *ST,
+    SmallVectorImpl<AArch64_IMM::ImmInsnModel> &Insn) {
+  assert((Imm.getBitWidth() == 64 || Imm.getBitWidth() == 128) &&
+         "Expected vector sized constant");
+  bool Is64Bit = Imm.getBitWidth() == 64;
+
+  if (ST->isNeonAvailable() && Imm.getHiBits(64) == Imm.getLoBits(64)) {
+    uint64_t Value = Imm.trunc(64).getZExtValue();
+    if (Value == 0) {
+      Insn.push_back({AArch64::FMOVD0, 0, 0});
+      return true;
+    }
+    if (AArch64_AM::isAdvSIMDModImmType10(Value)) {
+      Insn.push_back({Is64Bit ? AArch64::MOVID : AArch64::MOVIv2d_ns,
+                      AArch64_AM::encodeAdvSIMDModImmType10(Value), 0});
+      return true;
+    }
+    if (AArch64_AM::isAdvSIMDModImmType1(Value)) {
+      Insn.push_back({Is64Bit ? AArch64::MOVIv2i32 : AArch64::MOVIv4i32,
+                      AArch64_AM::encodeAdvSIMDModImmType1(Value), 0});
+      return true;
+    }
+    if (AArch64_AM::isAdvSIMDModImmType2(Value)) {
+      Insn.push_back({Is64Bit ? AArch64::MOVIv2i32 : AArch64::MOVIv4i32,
+                      AArch64_AM::encodeAdvSIMDModImmType2(Value), 8});
+      return true;
+    }
+    if (AArch64_AM::isAdvSIMDModImmType3(Value)) {
+      Insn.push_back({Is64Bit ? AArch64::MOVIv2i32 : AArch64::MOVIv4i32,
+                      AArch64_AM::encodeAdvSIMDModImmType3(Value), 16});
+      return true;
+    }
+    if (AArch64_AM::isAdvSIMDModImmType4(Value)) {
+      Insn.push_back({Is64Bit ? AArch64::MOVIv2i32 : AArch64::MOVIv4i32,
+                      AArch64_AM::encodeAdvSIMDModImmType4(Value), 24});
+      return true;
+    }
+    if (AArch64_AM::isAdvSIMDModImmType5(Value)) {
+      Insn.push_back({Is64Bit ? AArch64::MOVIv4i16 : AArch64::MOVIv8i16,
+                      AArch64_AM::encodeAdvSIMDModImmType5(Value), 0});
+      return true;
+    }
+    if (AArch64_AM::isAdvSIMDModImmType6(Value)) {
+      Insn.push_back({Is64Bit ? AArch64::MOVIv4i16 : AArch64::MOVIv8i16,
+                      AArch64_AM::encodeAdvSIMDModImmType6(Value), 8});
+      return true;
+    }
+    if (AArch64_AM::isAdvSIMDModImmType7(Value)) {
+      Insn.push_back({Is64Bit ? AArch64::MOVIv2s_msl : AArch64::MOVIv4s_msl,
+                      AArch64_AM::encodeAdvSIMDModImmType7(Value), 264});
+      return true;
+    }
+    if (AArch64_AM::isAdvSIMDModImmType8(Value)) {
+      Insn.push_back({Is64Bit ? AArch64::MOVIv2s_msl : AArch64::MOVIv4s_msl,
+                      AArch64_AM::encodeAdvSIMDModImmType8(Value), 272});
+      return true;
+    }
+    if (AArch64_AM::isAdvSIMDModImmType9(Value)) {
+      Insn.push_back({Is64Bit ? AArch64::MOVIv8b_ns : AArch64::MOVIv16b_ns,
+                      AArch64_AM::encodeAdvSIMDModImmType9(Value), 0});
+      return true;
+    }
+    if (AArch64_AM::isAdvSIMDModImmType11(Value)) {
+      Insn.push_back({Is64Bit ? AArch64::FMOVv2f32_ns : AArch64::FMOVv4f32_ns,
+                      AArch64_AM::encodeAdvSIMDModImmType11(Value), 0});
+      return true;
+    }
+    if (AArch64_AM::isAdvSIMDModImmType12(Value)) {
+      Insn.push_back({Is64Bit ? AArch64::FMOVDi : AArch64::FMOVv2f64_ns,
+                      AArch64_AM::encodeAdvSIMDModImmType12(Value), 0});
+      return true;
+    }
+
+    APInt NotImm = ~Imm;
+    Value = NotImm.trunc(64).getZExtValue();
+    if (AArch64_AM::isAdvSIMDModImmType1(Value)) {
+      Insn.push_back({Is64Bit ? AArch64::MVNIv2i32 : AArch64::MVNIv4i32,
+                      AArch64_AM::encodeAdvSIMDModImmType1(Value), 0});
+      return true;
+    }
+    if (AArch64_AM::isAdvSIMDModImmType2(Value)) {
+      Insn.push_back({Is64Bit ? AArch64::MVNIv2i32 : AArch64::MVNIv4i32,
+                      AArch64_AM::encodeAdvSIMDModImmType2(Value), 8});
+      return true;
+    }
+    if (AArch64_AM::isAdvSIMDModImmType3(Value)) {
+      Insn.push_back({Is64Bit ? AArch64::MVNIv2i32 : AArch64::MVNIv4i32,
+                      AArch64_AM::encodeAdvSIMDModImmType3(Value), 16});
+      return true;
+    }
+    if (AArch64_AM::isAdvSIMDModImmType4(Value)) {
+      Insn.push_back({Is64Bit ? AArch64::MVNIv2i32 : AArch64::MVNIv4i32,
+                      AArch64_AM::encodeAdvSIMDModImmType4(Value), 24});
+      return true;
+    }
+    if (AArch64_AM::isAdvSIMDModImmType5(Value)) {
+      Insn.push_back({Is64Bit ? AArch64::MVNIv4i16 : AArch64::MVNIv8i16,
+                      AArch64_AM::encodeAdvSIMDModImmType5(Value), 0});
+      return true;
+    }
+    if (AArch64_AM::isAdvSIMDModImmType6(Value)) {
+      Insn.push_back({Is64Bit ? AArch64::MVNIv4i16 : AArch64::MVNIv8i16,
+                      AArch64_AM::encodeAdvSIMDModImmType6(Value), 8});
+      return true;
+    }
+    if (AArch64_AM::isAdvSIMDModImmType7(Value)) {
+      Insn.push_back({Is64Bit ? AArch64::MVNIv2s_msl : AArch64::MVNIv4s_msl,
+                      AArch64_AM::encodeAdvSIMDModImmType7(Value), 264});
+      return true;
+    }
+    if (AArch64_AM::isAdvSIMDModImmType8(Value)) {
+      Insn.push_back({Is64Bit ? AArch64::MVNIv2s_msl : AArch64::MVNIv4s_msl,
+                      AArch64_AM::encodeAdvSIMDModImmType8(Value), 272});
+      return true;
+    }
+  }
+
+  // TODO: We should be able to support 64-bit destinations too
+  if (ST->hasSVE() && !Is64Bit && Imm.getHiBits(64) == Imm.getLoBits(64)) {
+    // See if we can make use of the SVE dup instruction.
+    APInt Val64 = Imm.trunc(64);
+    int32_t ImmVal, ShiftVal;
+    if (AArch64_AM::isSVECpyDupImm(64, Val64.getSExtValue(), ImmVal,
+                                   ShiftVal)) {
+      Insn.push_back({AArch64::DUP_ZI_D, (uint64_t)ImmVal, (uint64_t)ShiftVal});
+      return true;
+    }
+    uint64_t Encoding;
+    if (AArch64_AM::isSVELogicalImm(64, Val64.getZExtValue(), Encoding)) {
+      Insn.push_back({AArch64::DUPM_ZI, Encoding, 0});
+      return true;
+    }
+  }
+
+  return false;
+}
\ No newline at end of file
diff --git a/llvm/lib/Target/AArch64/AArch64ExpandImm.h b/llvm/lib/Target/AArch64/AArch64ExpandImm.h
index 42c97d2c3e9b5..d50cf3723cfc6 100644
--- a/llvm/lib/Target/AArch64/AArch64ExpandImm.h
+++ b/llvm/lib/Target/AArch64/AArch64ExpandImm.h
@@ -13,10 +13,13 @@
 #ifndef LLVM_LIB_TARGET_AARCH64_MCTARGETDESC_AARCH64EXPANDIMM_H
 #define LLVM_LIB_TARGET_AARCH64_MCTARGETDESC_AARCH64EXPANDIMM_H
 
+#include "llvm/ADT/APInt.h"
 #include "llvm/ADT/SmallVector.h"
 
 namespace llvm {
 
+class AArch64Subtarget;
+
 namespace AArch64_IMM {
 
 struct ImmInsnModel {
@@ -26,7 +29,10 @@ struct ImmInsnModel {
 };
 
 void expandMOVImm(uint64_t Imm, unsigned BitSize,
-		  SmallVectorImpl<ImmInsnModel> &Insn);
+                  SmallVectorImpl<ImmInsnModel> &Insn);
+
+bool expandVectorMOVImm(APInt Imm, const AArch64Subtarget *ST,
+                        SmallVectorImpl<ImmInsnModel> &Insn);
 
 } // end namespace AArch64_IMM
 
diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index 9d3e707cccaaa..c4e007e802d5a 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -10,6 +10,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "AArch64ExpandImm.h"
 #include "AArch64MachineFunctionInfo.h"
 #include "AArch64TargetMachine.h"
 #include "MCTargetDesc/AArch64AddressingModes.h"
@@ -193,10 +194,11 @@ class AArch64DAGToDAGISel : public SelectionDAGISel {
       return false;
 
     APInt Imm;
-    if (Op.getOperand(1).getOpcode() == AArch64ISD::MOVIshift)
-      Imm = APInt(VT.getScalarSizeInBits(),
-                  Op.getOperand(1).getConstantOperandVal(0)
-                      << Op.getOperand(1).getConstantOperandVal(1));
+    if (Op.getOperand(1).getOpcode() == AArch64ISD::MOVI &&
+        Op.getOperand(1).getConstantOperandAPInt(0).isSplat(
+            VT.getScalarSizeInBits()))
+      Imm = Op.getOperand(1).getConstantOperandAPInt(0).trunc(
+          VT.getScalarSizeInBits());
     else if (Op.getOperand(1).getOpcode() == AArch64ISD::DUP &&
              isa<ConstantSDNode>(Op.getOperand(1).getOperand(0)))
       Imm = APInt(VT.getScalarSizeInBits(),
@@ -4175,12 +4177,11 @@ bool AArch64DAGToDAGISel::SelectCVTFixedPointVec(SDValue N, SDValue &FixedPos,
 
   APFloat FVal(0.0);
   switch (N->getOpcode()) {
-  case AArch64ISD::MOVIshift:
-    FVal = ImmToFloat(APInt(RegWidth, N.getConstantOperandVal(0)
-                                          << N.getConstantOperandVal(1)));
-    break;
-  case AArch64ISD::FMOV:
-    FVal = ImmToFloat(DecodeFMOVImm(N.getConstantOperandVal(0), RegWidth));
+  case AArch64ISD::MOVI:
+    if (N.getConstantOperandAPInt(0).isSplat(RegWidth))
+      FVal = ImmToFloat(N.getConstantOperandAPInt(0).trunc(RegWidth));
+    else
+      return false;
     break;
   case AArch64ISD::DUP:
     if (isa<ConstantSDNode>(N.getOperand(0)))
@@ -4965,6 +4966,7 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
 
   // Few custom selection stuff.
   EVT VT = Node->getValueType(0);
+  auto *TLI = static_cast<const AArch64TargetLowering *>(getTargetLowering());
 
   switch (Node->getOpcode()) {
   default:
@@ -5054,11 +5056,107 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
     break;
   }
 
+  case ISD::ConstantFP:
+    // Leave legal fmov cases to tablegen.
+    if (TLI->isFPImmLegalAsFMov(cast<ConstantFPSDNode>(Node)->getValueAPF(),
+                                VT))
+      break;
+    [[fallthrough]];
+  case AArch64ISD::MOVI: {
+    APInt DefBits;
+    if (Node->getOpcode() == ISD::ConstantFP) {
+      APInt Imm = *Node->bitcastToAPInt();
+      DefBits = Imm.getBitWidth() >= 64 ? Imm : APInt::getSplat(64, Imm);
+    } else {
+      APInt Imm = Node->getConstantOperandAPInt(0);
+      DefBits = APInt::getSplat(VT.getSizeInBits(), Imm);
+    }
+
+    SmallVector<AArch64_IMM::ImmInsnModel> Insns;
+    if (AArch64_IMM::expandVectorMOVImm(DefBits, Subtarget, Insns)) {
+      SDNode *Src = nullptr;
+      SDLoc DL(Node);
+      EVT FVT = VT.getSizeInBits() < 64 ? MVT::f64 : VT;
+
+      for (AArch64_IMM::ImmInsnModel Insn : Insns) {
+        switch (Insn.Opcode) {
+        case AArch64::FMOVD0:
+          Src = CurDAG->getMachineNode(Insn.Opcode, DL, MVT::f64);
+          if (FVT.getSizeInBits() > 64)
+            Src = CurDAG->getMachineNode(
+                TargetOpcode::SUBREG_TO_REG, DL, VT, SDValue(Src, 0),
+                CurDAG->getTargetConstant(AArch64::dsub, DL, MVT::i32));
+          break;
+        case AArch64::MOVID:
+        case AArch64::MOVIv2d_ns:
+        case AArch64::MOVIv8b_ns:
+        case AArch64::MOVIv16b_ns:
+        case AArch64::FMOVv2f32_ns:
+        case AArch64::FMOVv4f32_ns:
+        case AArch64::FMOVDi:
+        case AArch64::FMOVv2f64_ns:
+          Src = CurDAG->getMachineNode(
+              Insn.Opcode, DL, FVT,
+              CurDAG->getTargetConstant(Insn.Op1, DL, MVT::i64));
+          break;
+        case AArch64::MOVIv2i32:
+        case AArch64::MOVIv4i32:
+        case AArch64::MOVIv4i16:
+        case AArch64::MOVIv8i16:
+        case AArch64::MOVIv2s_msl:
+        case AArch64::MOVIv4s_msl:
+        case AArch64::MVNIv2i32:
+        case AArch64::MVNIv4i32:
+        case AArch64::MVNIv4i16:
+        case AArch64::MVNIv8i16:
+        case AArch64::MVNIv2s_msl:
+        case AArch64::MVNIv4s_msl:
+          Src = CurDAG->getMachineNode(
+              Insn.Opcode, DL, FVT,
+              CurDAG->getTargetConstant(Insn.Op1, DL, MVT::i64),
+              CurDAG->getTargetConstant(Insn.Op2, DL, MVT::i64));
+          break;
+        case AArch64::DUPM_ZI:
+          Src = CurDAG->getMachineNode(
+              Insn.Opcode, DL, MVT::nxv2f64,
+              CurDAG->getTargetConstant(Insn.Op1, DL, MVT::i64));
+          Src = CurDAG
+                    ->getTargetExtractSubreg(AArch64::zsub, DL, FVT,
+                                             SDValue(Src, 0))
+                    .getNode();
+          break;
+        case AArch64::DUP_ZI_D:
+          Src = CurDAG->getMachineNode(
+              Insn.Opcode, DL, MVT::nxv2f64,
+              CurDAG->getTargetConstant(Insn.Op1, DL, MVT::i64),
+              CurDAG->getTargetConstant(Insn.Op2, DL, MVT::i64));
+          Src = CurDAG
+                    ->getTargetExtractSubreg(AArch64::zsub, DL, FVT,
+                                             SDValue(Src, 0))
+                    .getNode();
+          break;
+        default:
+          llvm_unreachable("Unexpected node in expandVectorMOVImm\n");
+        }
+      }
+
+      if (VT.getSizeInBits() < FVT.getSizeInBits())
+        Src = CurDAG->getMachineNode(
+            TargetOpcode::EXTRACT_SUBREG, DL, VT, SDValue(Src, 0),
+            CurDAG->getTargetConstant(VT.getSizeInBits() == 16 ? AArch64::hsub
+                                                               : AArch64::ssub,
+                                      DL, MVT::i32));
+
+      ReplaceNode(Node, Src);
+      return;
+    }
+    break;
+  }
+
   case ISD::FrameIndex: {
     // Selects to ADDXri FI, 0 which in turn will become ADDXri SP, imm.
     int FI = cast<FrameIndexSDNode>(Node)->getIndex();
     unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0);
-    const TargetLowering *TLI = getTargetLowering();
     SDValue TFI = CurDAG->getTargetFrameIndex(
         FI, TLI->getPointerTy(CurDAG->getDataLayout()));
     SDLoc DL(Node);
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 38db1ac4a2fb9..8a1a9da8cbaa8 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -2812,40 +2812,18 @@ void AArch64TargetLowering::computeKnownBitsForTargetNode(
     break;
   }
   case AArch64ISD::MOVI: {
-    Known = KnownBits::makeConstant(
-        APInt(Known.getBitWidth(), Op->getConstantOperandVal(0)));
-    break;
-  }
-  case AArch64ISD::MOVIshift: {
-    Known = KnownBits::makeConstant(
-        APInt(Known.getBitWidth(), Op->getConstantOperandVal(0)
-                                       << Op->getConstantOperandVal(1)));
-    break;
-  }
-  case AArch64ISD::MOVImsl: {
-    unsigned ShiftAmt = AArch64_AM::getShiftValue(Op->getConstantOperandVal(1));
-    Known = KnownBits::makeConstant(APInt(
-        Known.getBitWidth(), ~(~Op->getConstantOperandVal(0) << ShiftAmt)));
-    break;
-  }
-  case AArch64ISD::MOVIedit: {
-    Known = KnownBits::makeConstant(APInt(
-        Known.getBitWidth(),
-        AArch64_AM::decodeAdvSIMDModImmType10(Op->getConstantOperandVal(0))));
-    break;
-  }
-  case AArch64ISD::MVNIshift: {
-    Known = KnownBits::makeConstant(
-        APInt(Known.getBitWidth(),
-              ~(Op->getConstantOperandVal(0) << Op->getConstantOperandVal(1)),
-              /*isSigned*/ false, /*implicitTrunc*/ true));
-    break;
-  }
-  case AArch64ISD::MVNImsl: {
-    unsigned ShiftAmt = AArch64_AM::getShiftValue(Op->getConstantOperandVal(1));
-    Known = KnownBits::makeConstant(
-        APInt(Known.getBitWidth(), (~Op->getConstantOperandVal(0) << ShiftAmt),
-              /*isSigned*/ false, /*implicitTrunc*/ true));
+    // MOVI has any type, the constant is the i64 value. Get the full width
+    // constant value and find the common bits of size EltSize.
+    EVT VT = Op.getValueType();
+    APInt Imm =
+        APInt::getSplat(VT.getSizeInBits(), Op->getConstantOperandAPInt(0));
+    unsigned EltSize = Known.getBitWidth();
+    unsigned Lanes = VT.getSizeInBits() / EltSize;
+    Known.setAllConflict();
+    for (unsigned I = 0; I < Lanes; I++)
+      if (DemandedElts[I])
+        Known = Known.intersectWith(
+            KnownBits::makeConstant(Imm.lshr(I * EltSize).trunc(EltSize)));
     break;
   }
   case AArch64ISD::LOADgot:
@@ -13290,6 +13268,14 @@ bool AArch64TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
   bool IsLegal = isFPImmLegalAsFMov(Imm, VT);
   const APInt ImmInt = Imm.bitcastToAPInt();
 
+  if (!IsLegal && ImmInt.getBitWidth() <= 128) {
+    // Try duplicating it to all lanes and see if we can usea vector movi.
+    APInt DefBits =
+        ImmInt.getBitWidth() == 128 ? ImmInt : APInt::getSplat(64, ImmInt);
+    SmallVector<AArch64_IMM::ImmInsnModel> Insn;
+    IsLegal = AArch64_IMM::expandVectorMOVImm(DefBits, Subtarget, Insn);
+  }
+
   // If we can not materialize in immediate field for fmov, check if the
   // value can be encoded as the immediate operand of a logical instruction.
   // The immediate value will be created with either MOVZ, MOVN, or ORR.
@@ -15515,27 +15501,6 @@ static bool resolveBuildVector(BuildVectorSDNode *BVN, APInt &CnstBits,
   return false;
 }
 
-// Try 64-bit splatted SIMD immediate.
-static SDValue tryAdvSIMDModImm64(unsigned NewOp, SDValue Op, SelectionDAG &DAG,
-                                 const APInt &Bits) {
-  if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
-    uint64_t Value = Bits.zextOrTrunc(64).getZExtValue();
-    EVT VT = Op.getValueType();
-    MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v2i64 : MVT::f64;
-
-    if (AArch64_AM::isAdvSIMDModImmType10(Value)) {
-      Value = AArch64_AM::encodeAdvSIMDModImmType10(Value);
-
-      SDLoc DL(Op);
-      SDValue Mov =
-          DAG.getNode(NewOp, DL, MovTy, DAG.getConstant(Value, DL, MVT::i32));
-      return DAG.getNode(AArch64ISD::NVCAST, DL, VT, Mov);
-    }
-  }
-
-  return SDValue();
-}
-
 // Try 32-bit splatted SIMD immediate.
 static SDValue tryAdvSIMDModImm32(unsigned NewOp, SDValue Op, SelectionDAG &DAG,
                                   const APInt &Bits,
@@ -15634,89 +15599,6 @@ static SDValue tryAdvSIMDModImm16(unsigned NewOp, SDValue Op, SelectionDAG &DAG,
   return SDValue();
 }
 
-// Try 32-bit splatted SIMD immediate with shifted ones.
-static SDValue tryAdvSIMDModImm321s(unsigned NewOp, SDValue Op,
-                                    SelectionDAG &DAG, const APInt &Bits) {
-  if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
-    uint64_t Value = Bits.zextOrTrunc(64).getZExtValue();
-    EVT VT = Op.getValueType();
-    MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
-    bool isAdvSIMDModImm = false;
-    uint64_t Shift;
-
-    if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType7(Value))) {
-      Value = AArch64_AM::encodeAdvSIMDModImmType7(Value);
-      Shift = 264;
-    }
-    else if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType8(Value))) {
-      Value = AArch64_AM::encodeAdvSIMDModImmType8(Value);
-      Shift = 272;
-    }
-
-    if (isAdvSIMDModImm) {
-      SDLoc DL(Op);
-      SDValue Mov =
-          DAG.getNode(NewOp, DL, MovTy, DAG.getConstant(Value, DL, MVT::i32),
-                      DAG.getConstant(Shift, DL, MVT::i32));
-      return DAG.getNode(AArch64ISD::NVCAST, DL, VT, Mov);
-    }
-  }
-
-  return SDValue();
-}
-
-// Try 8-bit splatted SIMD immediate.
-static SDValue tryAdvSIMDModImm8(unsigned NewOp, SDValue Op, SelectionDAG &DAG,
-                                 const APInt &Bits) {
-  if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
-    uint64_t Value = Bits.zextOrTrunc(64).getZExtValue();
-    EVT VT = Op.getValueType();
-    MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v16i8 : MVT::v8i8;
-
-    if (AArch64_AM::isAdvSIMDModImmType9(Value)) {
-      Value = AArch64_AM::encodeAdvSIMDModImmType9(Value);
-
-      SDLoc DL(Op);
-      SDValue Mov =
-          DAG.getNode(NewOp, DL, MovTy, DAG.getConstant(Value, DL, MVT::i32));
-      return DAG.getNode(AArch64ISD::NVCAST, DL, VT, Mov);
-    }
-  }
-
-  return SDValue();
-}
-
-// Try FP splatted SIMD immediate.
-static SDValue tryAdvSIMDModImmFP(unsigned NewOp, SDValue Op, SelectionDAG &DAG,
-                                  const APInt &Bits) {
-  if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
-    uint64_t Value = Bits.zextOrTrunc(64).getZExtValue();
-    EVT VT = Op.getValueType();
-    bool isWide = (VT.getSizeInBits() == 128);
-    MVT MovTy;
-    bool isAdvSIMDModImm = false;
-
-    if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType11(Value))) {
-      Value = AArch64_AM::encodeAdvSIMDModImmType11(Value);
-      MovTy = isWide ? MVT::v4f32 : MVT::v2f32;
-    }
-    else if (isWide &&
-             (isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType12(Value))) {
-      Value = AArch64_AM::encodeAdvSIMDModImmType12(Value);
-      MovTy = MVT::v2f64;
-    }
-
-    if (isAdvSIMDModImm) {
-      SDLoc DL(Op);
-      SDValue Mov =
-          DAG.getNode(NewOp, DL, MovTy, DAG.getConstant(Value, DL, MVT::i32));
-      return DAG.getNode(AArch64ISD::NVCAST, DL, VT, Mov);
-    }
-  }
-
-  return SDValue();
-}
-
 // Specialized code to quickly find if PotentialBVec is a BuildVector that
 // consists of only the same constant int value, returned in reference arg
 // ConstVal
@@ -16046,28 +15928,6 @@ static SDValue NormalizeBuildVector(SDValue Op,
   return DAG.getBuildVector(VT, DL, Ops);
 }
 
-static SDValue trySVESplat64(SDValue Op, SelectionDAG &DAG,
-                             const AArch64Subtarget *ST, APInt &DefBits) {
-  EVT VT = Op.getValueType();
-  // TODO: We should be able to support 64-bit destinations too
-  if (!ST->hasSVE() || !VT.is128BitVector() ||
-      DefBits.getHiBits(64) != DefBits.getLoBits(64))
-    return SDValue();
-
-  // See if we can make use of the SVE dup instruction.
-  APInt Val64 = DefBits.trunc(64);
-  int32_t ImmVal, ShiftVal;
-  uint64_t Encoding;
-  if (!AArch64_AM::isSVECpyDupImm(64, Val64.getSExtValue(), ImmVal, ShiftVal) &&
-      !AArch64_AM::isSVELogicalImm(64, Val64.getZExtValue(), Encoding))
-    return SDValue();
-
-  SDLoc DL(Op);
-  SDValue SplatVal = DAG.getNode(AArch64ISD::DUP, DL, MVT::v2i64,
-                                 DAG.getConstant(Val64, DL, MVT::i64));
-  return DAG.getNode(AArch64ISD::NVCAST, DL, VT, SplatVal);
-}
-
 static SDValue ConstantBuildVector(SDValue Op, SelectionDAG &DAG,
                                    const AArch64Subtarget *ST) {
   EVT VT = Op.getValueType();
@@ -16077,71 +15937,44 @@ static SDValue ConstantBuildVector(SDValue Op, SelectionDAG &DAG,
   APInt DefBits(VT.getSizeInBits(), 0);
   APInt UndefBits(VT.getSizeInBits(), 0);
   BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
-  if (resolveBuildVector(BVN, DefBits, UndefBits)) {
-    auto TryMOVIWithBits = [&](APInt DefBits) {
-      SDValue NewOp;
-      if ((NewOp =
-               tryAdvSIMDModImm64(AArch64ISD::MOVIedit, Op, DAG, DefBits)) ||
-          (NewOp =
-               tryAdvSIMDModImm32(AArch64ISD::MOVIshift, Op, DAG, DefBits)) ||
-          (NewOp =
-               tryAdvSIMDModImm321s(AArch64ISD::MOVImsl, Op, DAG, DefBits)) ||
-          (NewOp =
-               tryAdvSIMDModImm16(AArch64ISD::MOVIshift, Op, DAG, DefBits)) ||
-          (NewOp = tryAdvSIMDModImm8(AArch64ISD::MOVI, Op, DAG, DefBits)) ||
-          (NewOp = tryAdvSIMDModImmFP(AArch64ISD::FMOV, Op, DAG, DefBits)))
-        return NewOp;
-
-      APInt NotDefBits = ~DefBits;
-      if ((NewOp = tryAdvSIMDModImm32(AArch64ISD::MVNIshift, Op, DAG,
-                                      NotDefBits)) ||
-          (NewOp = tryAdvSIMDModImm321s(AArch64ISD::MVNImsl, Op, DAG,
-                                        NotDefBits)) ||
-          (NewOp =
-               tryAdvSIMDModImm16(AArch64ISD::MVNIshift, Op, DAG, NotDefBits)))
-        return NewOp;
-      return SDValue();
-    };
-    if (SDValue R = TryMOVIWithBits(DefBits))
-      return R;
-    if (SDValue R = TryMOVIWithBits(UndefBits))
-      return R;
-
-    // Try to materialise the constant using SVE when available.
-    if (SDValue R = trySVESplat64(Op, DAG, ST, DefBits))
-      return R;
-
-    // See if a fneg of the constant can be materialized with a MOVI, etc
-    auto TryWithFNeg = [&](APInt DefBits, MVT FVT) {
-      // FNegate each sub-element of the constant
-      assert(VT.getSizeInBits() % FVT.getScalarSizeInBits() == 0);
-      APInt Neg = APInt::getHighBitsSet(FVT.getSizeInBits(), 1)
-                      .zext(VT.getSizeInBits());
-      APInt NegBits(VT.getSizeInBits(), 0);
-      unsigned NumElts = VT.getSizeInBits() / FVT.getScalarSizeInBits();
-      for (unsigned i = 0; i < NumElts; i++)
-        NegBits |= Neg << (FVT.getScalarSizeInBits() * i);
-      NegBits = DefBits ^ NegBits;
-
-      // Try to create the new constants with MOVI, and if so generate a fneg
-      // for it.
-      if (SDValue NewOp = TryMOVIWithBits(NegBits)) {
-        SDLoc DL(Op);
-        MVT VFVT = NumElts == 1 ? FVT : MVT::getVectorVT(FVT, NumElts);
-        return DAG.getNode(
-            AArch64ISD::NVCAST, DL, VT,
-            DAG.getNode(ISD::FNEG, DL, VFVT,
-                        DAG.getNode(AArch64ISD::NVCAST, DL, VFVT, NewOp)));
-      }
-      return SDValue();
-    };
-    SDValue R;
-    if ((R = TryWithFNeg(DefBits, MVT::f32)) ||
-        (R = TryWithFNeg(DefBits, MVT::f64)) ||
-        (ST->hasFullFP16() && (R = TryWithFNeg(DefBits, MVT::f16))))
-      return R;
-  }
+  if (!resolveBuildVector(BVN, DefBits, UndefBits))
+    return SDValue();
 
+  SDLoc DL(Op);
+  SmallVector<AArch64_IMM::ImmInsnModel> Insns;
+  if (expandVectorMOVImm(DefBits, ST, Insns))
+    return DAG.getNode(AArch64ISD::MOVI, DL, VT,
+                       DAG.getConstant(DefBits.trunc(64), DL, MVT::i64));
+
+  // See if a fneg of the constant can be materialized with a MOVI, etc
+  auto TryWithFNeg = [&](APInt DefBits, MVT FVT) {
+    // FNegate each sub-element of the constant
+    assert(VT.getSizeInBits() % FVT.getScalarSizeInBits() == 0);
+    APInt Neg =
+        APInt::getHighBitsSet(FVT.getSizeInBits(), 1).zext(VT.getSizeInBits());
+    APInt NegBits(VT.getSizeInBits(), 0);
+    unsigned NumElts = VT.getSizeInBits() / FVT.getScalarSizeInBits();
+    for (unsigned i = 0; i < NumElts; i++)
+      NegBits |= Neg << (FVT.getScalarSizeInBits() * i);
+    NegBits = DefBits ^ NegBits;
+
+    SmallVector<AArch64_IMM::ImmInsnModel> Insns;
+    if (expandVectorMOVImm(NegBits, ST, Insns)) {
+      SDLoc DL(Op);
+      MVT VFVT = NumElts == 1 ? FVT : MVT::getVectorVT(FVT, NumElts);
+      SDValue MOVI =
+          DAG.getNode(AArch64ISD::MOVI, DL, VFVT,
+                      DAG.getConstant(NegBits.trunc(64), DL, MVT::i64));
+      return DAG.getNode(AArch64ISD::NVCAST, DL, VT,
+                         DAG.getNode(ISD::FNEG, DL, VFVT, MOVI));
+    }
+    return SDValue();
+  };
+  SDValue R;
+  if ((R = TryWithFNeg(DefBits, MVT::f32)) ||
+      (R = TryWithFNeg(DefBits, MVT::f64)) ||
+      (ST->hasFullFP16() && (R = TryWithFNeg(DefBits, MVT::f16))))
+    return R;
   return SDValue();
 }
 
@@ -21901,10 +21734,11 @@ static SDValue performConcatVectorsCombine(SDNode *N,
       return false;
 
     APInt Imm;
-    if (Op.getOperand(1).getOpcode() == AArch64ISD::MOVIshift)
-      Imm = APInt(VT.getScalarSizeInBits(),
-                  Op.getOperand(1).getConstantOperandVal(0)
-                      << Op.getOperand(1).getConstantOperandVal(1));
+    if (Op.getOperand(1).getOpcode() == AArch64ISD::MOVI &&
+        Op.getOperand(1).getConstantOperandAPInt(0).isSplat(
+            VT.getScalarSizeInBits()))
+      Imm = Op.getOperand(1).getConstantOperandAPInt(0).trunc(
+          VT.getScalarSizeInBits());
     else if (Op.getOperand(1).getOpcode() == AArch64ISD::DUP &&
              isa<ConstantSDNode>(Op.getOperand(1).getOperand(0)))
       Imm = APInt(VT.getScalarSizeInBits(),
@@ -22136,11 +21970,6 @@ static SDValue tryExtendDUPToExtractHigh(SDValue N, SelectionDAG &DAG) {
   case AArch64ISD::DUPLANE32:
   case AArch64ISD::DUPLANE64:
   case AArch64ISD::MOVI:
-  case AArch64ISD::MOVIshift:
-  case AArch64ISD::MOVIedit:
-  case AArch64ISD::MOVImsl:
-  case AArch64ISD::MVNIshift:
-  case AArch64ISD::MVNImsl:
     break;
   default:
     // FMOV could be supported, but isn't very useful, as it would only occur
@@ -33245,11 +33074,6 @@ bool AArch64TargetLowering::canCreateUndefOrPoisonForTargetNode(
   // TODO: Add more target nodes.
   switch (Op.getOpcode()) {
   case AArch64ISD::MOVI:
-  case AArch64ISD::MOVIedit:
-  case AArch64ISD::MOVImsl:
-  case AArch64ISD::MOVIshift:
-  case AArch64ISD::MVNImsl:
-  case AArch64ISD::MVNIshift:
   case AArch64ISD::VASHR:
   case AArch64ISD::VLSHR:
   case AArch64ISD::VSHL:
@@ -33262,17 +33086,11 @@ bool AArch64TargetLowering::canCreateUndefOrPoisonForTargetNode(
 bool AArch64TargetLowering::isTargetCanonicalConstantNode(SDValue Op) const {
   return Op.getOpcode() == AArch64ISD::DUP ||
          Op.getOpcode() == AArch64ISD::MOVI ||
-         Op.getOpcode() == AArch64ISD::MOVIshift ||
-         Op.getOpcode() == AArch64ISD::MOVImsl ||
-         Op.getOpcode() == AArch64ISD::MOVIedit ||
-         Op.getOpcode() == AArch64ISD::MVNIshift ||
-         Op.getOpcode() == AArch64ISD::MVNImsl ||
          // Ignoring fneg(movi(0)), because if it is folded to FPConstant(-0.0),
          // ISel will select fmov(mov i64 0x8000000000000000), resulting in a
          // fmov from fpr to gpr, which is more expensive than fneg(movi(0))
          (Op.getOpcode() == ISD::FNEG &&
-          Op.getOperand(0).getOpcode() == AArch64ISD::MOVIedit &&
-          Op.getOperand(0).getConstantOperandVal(0) == 0) ||
+          Op.getOperand(0).getOpcode() == AArch64ISD::MOVI) ||
          (Op.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
           Op.getOperand(0).getOpcode() == AArch64ISD::DUP) ||
          TargetLowering::isTargetCanonicalConstantNode(Op);
diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index 1774927e9297d..e0bad63a11cbd 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -1494,13 +1494,6 @@ def fpimm32XForm : SDNodeXForm<fpimm, [{
       return CurDAG->getTargetConstant(Enc, SDLoc(N), MVT::i32);
     }]>;
 
-def fpimm32SIMDModImmType4XForm : SDNodeXForm<fpimm, [{
-      uint32_t enc = AArch64_AM::encodeAdvSIMDModImmType4(N->getValueAPF()
-                                                          .bitcastToAPInt()
-                                                          .getZExtValue());
-      return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i32);
-    }]>;
-
 def fpimm64XForm : SDNodeXForm<fpimm, [{
       uint32_t Enc = AArch64_AM::getFP64Imm(N->getValueAPF());
       return CurDAG->getTargetConstant(Enc, SDLoc(N), MVT::i32);
@@ -1527,12 +1520,6 @@ def fpimm32 : Operand<f32>,
   let PrintMethod = "printFPImmOperand";
 }
 
-def fpimm32SIMDModImmType4 : FPImmLeaf<f32, [{
-      uint64_t Enc = Imm.bitcastToAPInt().getZExtValue();
-      return Enc != 0 && AArch64_AM::isAdvSIMDModImmType4(Enc << 32 | Enc);
-    }], fpimm32SIMDModImmType4XForm> {
-}
-
 def fpimm64 : Operand<f64>,
               FPImmLeaf<f64, [{
       return AArch64_AM::getFP64Imm(Imm) != -1;
@@ -1573,9 +1560,6 @@ def gi_fpimm32 : GICustomOperandRenderer<"renderFPImm32">,
   GISDNodeXFormEquiv<fpimm32XForm>;
 def gi_fpimm64 : GICustomOperandRenderer<"renderFPImm64">,
   GISDNodeXFormEquiv<fpimm64XForm>;
-def gi_fpimm32SIMDModImmType4 :
-    GICustomOperandRenderer<"renderFPImm32SIMDModImmType4">,
-  GISDNodeXFormEquiv<fpimm32SIMDModImmType4XForm>;
 
 // Vector lane operands
 class AsmVectorIndex<int Min, int Max, string NamePrefix=""> : AsmOperandClass {
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 08512f6ed8df1..05d383e63167b 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -509,8 +509,7 @@ def SDT_AArch64Insr  : SDTypeProfile<1, 2, [SDTCisVec<0>]>;
 def SDT_AArch64Zip   : SDTypeProfile<1, 2, [SDTCisVec<0>,
                                           SDTCisSameAs<0, 1>,
                                           SDTCisSameAs<0, 2>]>;
-def SDT_AArch64MOVIedit : SDTypeProfile<1, 1, [SDTCisInt<1>]>;
-def SDT_AArch64MOVIshift : SDTypeProfile<1, 2, [SDTCisInt<1>, SDTCisInt<2>]>;
+def SDT_AArch64MOVI : SDTypeProfile<1, 1, [SDTCisInt<1>]>;
 def SDT_AArch64vecimm : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
                                            SDTCisInt<2>, SDTCisInt<3>]>;
 def SDT_AArch64UnaryVec: SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>;
@@ -920,13 +919,7 @@ def AArch64trn1      : SDNode<"AArch64ISD::TRN1", SDT_AArch64Zip>;
 def AArch64trn2      : SDNode<"AArch64ISD::TRN2", SDT_AArch64Zip>;
 
 // Vector immediate moves
-def AArch64movi_edit : SDNode<"AArch64ISD::MOVIedit", SDT_AArch64MOVIedit>;
-def AArch64movi_shift : SDNode<"AArch64ISD::MOVIshift", SDT_AArch64MOVIshift>;
-def AArch64movi_msl : SDNode<"AArch64ISD::MOVImsl", SDT_AArch64MOVIshift>;
-def AArch64mvni_shift : SDNode<"AArch64ISD::MVNIshift", SDT_AArch64MOVIshift>;
-def AArch64mvni_msl : SDNode<"AArch64ISD::MVNImsl", SDT_AArch64MOVIshift>;
-def AArch64movi : SDNode<"AArch64ISD::MOVI", SDT_AArch64MOVIedit>;
-def AArch64fmov : SDNode<"AArch64ISD::FMOV", SDT_AArch64MOVIedit>;
+def AArch64movi : SDNode<"AArch64ISD::MOVI", SDT_AArch64MOVI>;
 
 def AArch64rev16 : SDNode<"AArch64ISD::REV16", SDT_AArch64Rev>;
 def AArch64rev32 : SDNode<"AArch64ISD::REV32", SDT_AArch64Rev>;
@@ -8673,46 +8666,32 @@ def : InstAlias<"orr.4s $Vd, $imm", (ORRv4i32 V128:$Vd, imm0_255:$imm, 0)>;
 }
 
 // AdvSIMD FMOV
-let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
+let isReMaterializable = 1, isAsCheapAsAMove = 1, hasSideEffects = 0 in {
 def FMOVv2f64_ns : SIMDModifiedImmVectorNoShift<1, 1, 0, 0b1111, V128, fpimm8,
-                                              "fmov", ".2d",
-                       [(set (v2f64 V128:$Rd), (AArch64fmov imm0_255:$imm8))]>;
+                                              "fmov", ".2d", []>;
 def FMOVv2f32_ns : SIMDModifiedImmVectorNoShift<0, 0, 0, 0b1111, V64,  fpimm8,
-                                              "fmov", ".2s",
-                       [(set (v2f32 V64:$Rd), (AArch64fmov imm0_255:$imm8))]>;
+                                              "fmov", ".2s", []>;
 def FMOVv4f32_ns : SIMDModifiedImmVectorNoShift<1, 0, 0, 0b1111, V128, fpimm8,
-                                              "fmov", ".4s",
-                       [(set (v4f32 V128:$Rd), (AArch64fmov imm0_255:$imm8))]>;
+                                              "fmov", ".4s", []>;
 let Predicates = [HasNEON, HasFullFP16] in {
 def FMOVv4f16_ns : SIMDModifiedImmVectorNoShift<0, 0, 1, 0b1111, V64,  fpimm8,
-                                              "fmov", ".4h",
-                       [(set (v4f16 V64:$Rd), (AArch64fmov imm0_255:$imm8))]>;
+                                              "fmov", ".4h", []>;
 def FMOVv8f16_ns : SIMDModifiedImmVectorNoShift<1, 0, 1, 0b1111, V128, fpimm8,
-                                              "fmov", ".8h",
-                       [(set (v8f16 V128:$Rd), (AArch64fmov imm0_255:$imm8))]>;
+                                              "fmov", ".8h", []>;
 } // Predicates = [HasNEON, HasFullFP16]
 }
 
 // AdvSIMD MOVI
 
 // EDIT byte mask: scalar
-let isReMaterializable = 1, isAsCheapAsAMove = 1 in
+let isReMaterializable = 1, isAsCheapAsAMove = 1, hasSideEffects = 0 in
 def MOVID      : SIMDModifiedImmScalarNoShift<0, 1, 0b1110, "movi",
                     [(set FPR64:$Rd, simdimmtype10:$imm8)]>;
-// The movi_edit node has the immediate value already encoded, so we use
-// a plain imm0_255 here.
-def : Pat<(f64 (AArch64movi_edit imm0_255:$shift)),
-          (MOVID imm0_255:$shift)>;
-
-// EDIT byte mask: 2d
 
-// The movi_edit node has the immediate value already encoded, so we use
-// a plain imm0_255 in the pattern
-let isReMaterializable = 1, isAsCheapAsAMove = 1 in
+// EDIT byte mask: 2ds
+let isReMaterializable = 1, isAsCheapAsAMove = 1, hasSideEffects = 0 in
 def MOVIv2d_ns   : SIMDModifiedImmVectorNoShift<1, 1, 0, 0b1110, V128,
-                                                simdimmtype10,
-                                                "movi", ".2d",
-                   [(set (v2i64 V128:$Rd), (AArch64movi_edit imm0_255:$imm8))]>;
+                                                simdimmtype10, "movi", ".2d", []>;
 
 let Predicates = [HasNEON] in {
 def : Pat<(f128 fpimm0), (f128 (MOVIv2d_ns (i32 0)))>;
@@ -8766,17 +8745,9 @@ def : Pat<(v8i8  immAllOnesV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 255)), dsub)>;
 }
 
 // EDIT per word & halfword: 2s, 4h, 4s, & 8h
-let isReMaterializable = 1, isAsCheapAsAMove = 1 in
+let isReMaterializable = 1, isAsCheapAsAMove = 1, hasSideEffects = 0 in
 defm MOVI      : SIMDModifiedImmVectorShift<0, 0b10, 0b00, "movi">;
 
-let Predicates = [HasNEON] in {
-  // Using the MOVI to materialize fp constants.
-  def : Pat<(f32 fpimm32SIMDModImmType4:$in),
-            (EXTRACT_SUBREG (MOVIv2i32 (fpimm32SIMDModImmType4XForm f32:$in),
-                                       (i32 24)),
-                            ssub)>;
-}
-
 let Predicates = [HasNEON] in {
 def : InstAlias<"movi $Vd.4h, $imm", (MOVIv4i16 V64:$Vd,  imm0_255:$imm, 0), 0>;
 def : InstAlias<"movi $Vd.8h, $imm", (MOVIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>;
@@ -8789,38 +8760,23 @@ def : InstAlias<"movi.2s $Vd, $imm", (MOVIv2i32 V64:$Vd,  imm0_255:$imm, 0), 0>;
 def : InstAlias<"movi.4s $Vd, $imm", (MOVIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>;
 }
 
-def : Pat<(v2i32 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))),
-          (MOVIv2i32 imm0_255:$imm8, imm:$shift)>;
-def : Pat<(v4i32 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))),
-          (MOVIv4i32 imm0_255:$imm8, imm:$shift)>;
-def : Pat<(v4i16 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))),
-          (MOVIv4i16 imm0_255:$imm8, imm:$shift)>;
-def : Pat<(v8i16 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))),
-          (MOVIv8i16 imm0_255:$imm8, imm:$shift)>;
-
-let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
+let isReMaterializable = 1, isAsCheapAsAMove = 1, hasSideEffects = 0 in {
 // EDIT per word: 2s & 4s with MSL shifter
-def MOVIv2s_msl  : SIMDModifiedImmMoveMSL<0, 0, {1,1,0,?}, V64, "movi", ".2s",
-                      [(set (v2i32 V64:$Rd),
-                            (AArch64movi_msl imm0_255:$imm8, (i32 imm:$shift)))]>;
-def MOVIv4s_msl  : SIMDModifiedImmMoveMSL<1, 0, {1,1,0,?}, V128, "movi", ".4s",
-                      [(set (v4i32 V128:$Rd),
-                            (AArch64movi_msl imm0_255:$imm8, (i32 imm:$shift)))]>;
+def MOVIv2s_msl  : SIMDModifiedImmMoveMSL<0, 0, {1,1,0,?}, V64, "movi", ".2s", []>;
+def MOVIv4s_msl  : SIMDModifiedImmMoveMSL<1, 0, {1,1,0,?}, V128, "movi", ".4s", []>;
 
 // Per byte: 8b & 16b
 def MOVIv8b_ns   : SIMDModifiedImmVectorNoShift<0, 0, 0, 0b1110, V64,  imm0_255,
-                                                 "movi", ".8b",
-                       [(set (v8i8 V64:$Rd), (AArch64movi imm0_255:$imm8))]>;
+                                                 "movi", ".8b", []>;
 
 def MOVIv16b_ns  : SIMDModifiedImmVectorNoShift<1, 0, 0, 0b1110, V128, imm0_255,
-                                                 "movi", ".16b",
-                       [(set (v16i8 V128:$Rd), (AArch64movi imm0_255:$imm8))]>;
+                                                 "movi", ".16b", []>;
 }
 
 // AdvSIMD MVNI
 
 // EDIT per word & halfword: 2s, 4h, 4s, & 8h
-let isReMaterializable = 1, isAsCheapAsAMove = 1 in
+let isReMaterializable = 1, isAsCheapAsAMove = 1, hasSideEffects = 0 in
 defm MVNI      : SIMDModifiedImmVectorShift<1, 0b10, 0b00, "mvni">;
 
 let Predicates = [HasNEON] in {
@@ -8835,23 +8791,10 @@ def : InstAlias<"mvni.2s $Vd, $imm", (MVNIv2i32 V64:$Vd,  imm0_255:$imm, 0), 0>;
 def : InstAlias<"mvni.4s $Vd, $imm", (MVNIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>;
 }
 
-def : Pat<(v2i32 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))),
-          (MVNIv2i32 imm0_255:$imm8, imm:$shift)>;
-def : Pat<(v4i32 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))),
-          (MVNIv4i32 imm0_255:$imm8, imm:$shift)>;
-def : Pat<(v4i16 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))),
-          (MVNIv4i16 imm0_255:$imm8, imm:$shift)>;
-def : Pat<(v8i16 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))),
-          (MVNIv8i16 imm0_255:$imm8, imm:$shift)>;
-
 // EDIT per word: 2s & 4s with MSL shifter
-let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
-def MVNIv2s_msl   : SIMDModifiedImmMoveMSL<0, 1, {1,1,0,?}, V64, "mvni", ".2s",
-                      [(set (v2i32 V64:$Rd),
-                            (AArch64mvni_msl imm0_255:$imm8, (i32 imm:$shift)))]>;
-def MVNIv4s_msl   : SIMDModifiedImmMoveMSL<1, 1, {1,1,0,?}, V128, "mvni", ".4s",
-                      [(set (v4i32 V128:$Rd),
-                            (AArch64mvni_msl imm0_255:$imm8, (i32 imm:$shift)))]>;
+let isReMaterializable = 1, isAsCheapAsAMove = 1, hasSideEffects = 0 in {
+def MVNIv2s_msl   : SIMDModifiedImmMoveMSL<0, 1, {1,1,0,?}, V64, "mvni", ".2s", []>;
+def MVNIv4s_msl   : SIMDModifiedImmMoveMSL<1, 1, {1,1,0,?}, V128, "mvni", ".4s", []>;
 }
 
 // SABA patterns for add(x, abs(y)) -> saba(x, y, 0)
@@ -9319,16 +9262,15 @@ defm USHR    : SIMDVectorRShiftBHSD<1, 0b00000, "ushr", AArch64vlshr>;
 defm USRA    : SIMDVectorRShiftBHSDTied<1, 0b00010, "usra",
                 TriOpFrag<(add_like node:$LHS, (AArch64vlshr node:$MHS, node:$RHS))> >;
 
-def VImm0080:         PatLeaf<(AArch64movi_shift (i32 128), (i32 0))>;
-def VImm00008000:     PatLeaf<(AArch64movi_shift (i32 128), (i32 8))>;
-def VImm0000000080000000: PatLeaf<(AArch64NvCast (v2f64 (fneg (AArch64NvCast (v4i32 (AArch64movi_shift (i32 128), (i32 24)))))))>;
+def VImm0080:         PatLeaf<(AArch64movi (i64 0x0080008000800080))>;
+def VImm00008000:     PatLeaf<(AArch64movi (i64 0x0000800000008000))>;
+def VImm0000000080000000: PatLeaf<(AArch64NvCast (v2f64 (fneg (AArch64movi (i64 0x8000000080000000)))))>;
 
 // RADDHN patterns for when RSHRN shifts by half the size of the vector element
 def : Pat<(v8i8 (trunc (AArch64vlshr (add (v8i16 V128:$Vn), VImm0080), (i32 8)))),
           (RADDHNv8i16_v8i8 V128:$Vn, (v8i16 (MOVIv2d_ns (i32 0))))>;
 def : Pat<(v4i16 (trunc (AArch64vlshr (add (v4i32 V128:$Vn), VImm00008000), (i32 16)))),
           (RADDHNv4i32_v4i16 V128:$Vn, (v4i32 (MOVIv2d_ns (i32 0))))>;
-let AddedComplexity = 5 in
 def : Pat<(v2i32 (trunc (AArch64vlshr (add (v2i64 V128:$Vn), VImm0000000080000000), (i32 32)))),
           (RADDHNv2i64_v2i32 V128:$Vn, (v2i64 (MOVIv2d_ns (i32 0))))>;
 def : Pat<(v8i8 (int_aarch64_neon_rshrn (v8i16 V128:$Vn), (i32 8))),
@@ -9339,6 +9281,7 @@ def : Pat<(v2i32 (int_aarch64_neon_rshrn (v2i64 V128:$Vn), (i32 32))),
           (RADDHNv2i64_v2i32 V128:$Vn, (v2i64 (MOVIv2d_ns (i32 0))))>;
 
 // RADDHN2 patterns for when RSHRN shifts by half the size of the vector element
+let AddedComplexity = 5 in {
 def : Pat<(v16i8 (concat_vectors
                  (v8i8 V64:$Vd),
                  (v8i8 (trunc (AArch64vlshr (add (v8i16 V128:$Vn), VImm0080), (i32 8)))))),
@@ -9351,13 +9294,13 @@ def : Pat<(v8i16 (concat_vectors
           (RADDHNv4i32_v8i16
                  (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn,
                  (v4i32 (MOVIv2d_ns (i32 0))))>;
-let AddedComplexity = 5 in
 def : Pat<(v4i32 (concat_vectors
                  (v2i32 V64:$Vd),
                  (v2i32 (trunc (AArch64vlshr (add (v2i64 V128:$Vn), VImm0000000080000000), (i32 32)))))),
           (RADDHNv2i64_v4i32
                  (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn,
                  (v2i64 (MOVIv2d_ns (i32 0))))>;
+}
 def : Pat<(v16i8 (concat_vectors
                  (v8i8 V64:$Vd),
                  (v8i8 (int_aarch64_neon_rshrn (v8i16 V128:$Vn), (i32 8))))),
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index 2fa0fca176c88..97546adf94a8a 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -11,6 +11,7 @@
 /// \todo This should be generated by TableGen.
 //===----------------------------------------------------------------------===//
 
+#include "AArch64ExpandImm.h"
 #include "AArch64GlobalISelUtils.h"
 #include "AArch64InstrInfo.h"
 #include "AArch64MachineFunctionInfo.h"
@@ -167,30 +168,21 @@ class AArch64InstructionSelector : public InstructionSelector {
                                const RegisterBank &RB,
                                MachineIRBuilder &MIRBuilder) const;
 
+  /// Emit a sequence of instructions representing a constant \p Imm for a
+  /// vector register \p Dst via MOVI.
+  MachineInstr *emitConstantMOVVector(DstOp Dst, APInt Imm,
+                                      MachineIRBuilder &MIRBuilder,
+                                      MachineRegisterInfo &MRI);
+
   /// Emit a sequence of instructions representing a constant \p CV for a
   /// vector register \p Dst. (E.g. a MOV, or a load from a constant pool.)
   ///
   /// \returns the last instruction in the sequence on success, and nullptr
   /// otherwise.
-  MachineInstr *emitConstantVector(Register Dst, Constant *CV,
+  MachineInstr *emitConstantVector(Register Dst, Constant *CV, APInt Imm,
                                    MachineIRBuilder &MIRBuilder,
                                    MachineRegisterInfo &MRI);
 
-  MachineInstr *tryAdvSIMDModImm8(Register Dst, unsigned DstSize, APInt Bits,
-                                  MachineIRBuilder &MIRBuilder);
-
-  MachineInstr *tryAdvSIMDModImm16(Register Dst, unsigned DstSize, APInt Bits,
-                                   MachineIRBuilder &MIRBuilder, bool Inv);
-
-  MachineInstr *tryAdvSIMDModImm32(Register Dst, unsigned DstSize, APInt Bits,
-                                   MachineIRBuilder &MIRBuilder, bool Inv);
-  MachineInstr *tryAdvSIMDModImm64(Register Dst, unsigned DstSize, APInt Bits,
-                                   MachineIRBuilder &MIRBuilder);
-  MachineInstr *tryAdvSIMDModImm321s(Register Dst, unsigned DstSize, APInt Bits,
-                                     MachineIRBuilder &MIRBuilder, bool Inv);
-  MachineInstr *tryAdvSIMDModImmFP(Register Dst, unsigned DstSize, APInt Bits,
-                                   MachineIRBuilder &MIRBuilder);
-
   bool tryOptConstantBuildVec(MachineInstr &MI, LLT DstTy,
                               MachineRegisterInfo &MRI);
   /// \returns true if a G_BUILD_VECTOR instruction \p MI can be selected as a
@@ -502,9 +494,6 @@ class AArch64InstructionSelector : public InstructionSelector {
                      int OpIdx = -1) const;
   void renderFPImm64(MachineInstrBuilder &MIB, const MachineInstr &MI,
                      int OpIdx = -1) const;
-  void renderFPImm32SIMDModImmType4(MachineInstrBuilder &MIB,
-                                    const MachineInstr &MI,
-                                    int OpIdx = -1) const;
 
   // Materialize a GlobalValue or BlockAddress using a movz+movk sequence.
   void materializeLargeCMVal(MachineInstr &I, const Value *V, unsigned OpFlags);
@@ -2373,12 +2362,15 @@ bool AArch64InstructionSelector::earlySelect(MachineInstr &I) {
       return false;
     LLVMContext &Ctx = MF.getFunction().getContext();
     Register Dst = I.getOperand(0).getReg();
+    LLT DstTy = MRI.getType(Dst);
     auto *CV = ConstantDataVector::getSplat(
-        MRI.getType(Dst).getNumElements(),
-        ConstantInt::get(
-            Type::getIntNTy(Ctx, MRI.getType(Dst).getScalarSizeInBits()),
-            ValAndVReg->Value.trunc(MRI.getType(Dst).getScalarSizeInBits())));
-    if (!emitConstantVector(Dst, CV, MIB, MRI))
+        DstTy.getNumElements(),
+        ConstantInt::get(Type::getIntNTy(Ctx, DstTy.getScalarSizeInBits()),
+                         ValAndVReg->Value.trunc(DstTy.getScalarSizeInBits())));
+    APInt Imm =
+        APInt::getSplat(DstTy.getSizeInBits(),
+                        ValAndVReg->Value.trunc(DstTy.getScalarSizeInBits()));
+    if (!emitConstantVector(Dst, CV, Imm, MIB, MRI))
       return false;
     I.eraseFromParent();
     return true;
@@ -2699,58 +2691,68 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
     const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
 
     const TargetRegisterClass &FPRRC = *getRegClassForTypeOnBank(DefTy, RB);
-    // For 16, 64, and 128b values, emit a constant pool load.
-    switch (DefSize) {
-    default:
-      llvm_unreachable("Unexpected destination size for G_FCONSTANT?");
-    case 32:
-    case 64: {
-      bool OptForSize = shouldOptForSize(&MF);
-      const auto &TLI = MF.getSubtarget().getTargetLowering();
-      // If TLI says that this fpimm is illegal, then we'll expand to a
-      // constant pool load.
-      if (TLI->isFPImmLegal(I.getOperand(1).getFPImm()->getValueAPF(),
-                            EVT::getFloatingPointVT(DefSize), OptForSize))
-        break;
-      [[fallthrough]];
-    }
-    case 16:
-    case 128: {
-      auto *FPImm = I.getOperand(1).getFPImm();
-      auto *LoadMI = emitLoadFromConstantPool(FPImm, MIB);
-      if (!LoadMI) {
-        LLVM_DEBUG(dbgs() << "Failed to load double constant pool entry\n");
-        return false;
+    assert(
+        (DefSize == 16 || DefSize == 32 || DefSize == 64 || DefSize == 128) &&
+        "Unexpected destination size for G_FCONSTANT?");
+
+    // Try to emit as a vector movi.
+    const ConstantFP *FPImm = I.getOperand(1).getFPImm();
+    APInt Imm = DefSize >= 64
+                    ? FPImm->getValue().bitcastToAPInt()
+                    : APInt::getSplat(64, FPImm->getValue().bitcastToAPInt());
+    if (auto *Mov = emitConstantMOVVector(
+            DefSize >= 64 ? DstOp(DefReg) : DstOp(&AArch64::FPR64RegClass), Imm,
+            MIB, MRI)) {
+      if (DefSize < 64) {
+        MIB.buildInstr(TargetOpcode::COPY, {DefReg}, {})
+            .addReg(Mov->getOperand(0).getReg(), {},
+                    DefSize == 16 ? AArch64::hsub : AArch64::ssub);
+        RBI.constrainGenericRegister(DefReg, FPRRC, MRI);
       }
-      MIB.buildCopy({DefReg}, {LoadMI->getOperand(0).getReg()});
       I.eraseFromParent();
-      return RBI.constrainGenericRegister(DefReg, FPRRC, MRI);
-    }
+      return true;
     }
 
-    assert((DefSize == 32 || DefSize == 64) && "Unexpected const def size");
-    // Either emit a FMOV, or emit a copy to emit a normal mov.
-    const Register DefGPRReg = MRI.createVirtualRegister(
-        DefSize == 32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
-    MachineOperand &RegOp = I.getOperand(0);
-    RegOp.setReg(DefGPRReg);
-    MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
-    MIB.buildCopy({DefReg}, {DefGPRReg});
+    // If TLI says that this fpimm is legal then prefer a copy from GPR movimm,
+    // otherwise we'll expand to a constant pool load
+    bool OptForSize = shouldOptForSize(&MF);
+    const auto &TLI = MF.getSubtarget().getTargetLowering();
+    if (TLI->isFPImmLegal(I.getOperand(1).getFPImm()->getValueAPF(),
+                          EVT::getFloatingPointVT(DefSize), OptForSize)) {
+      assert((DefSize == 32 || DefSize == 64) && "Unexpected const def size");
+      // Emit a copy to emit a normal movimm.
+      const Register DefGPRReg = MRI.createVirtualRegister(
+          DefSize == 32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
+      MachineOperand &RegOp = I.getOperand(0);
+      RegOp.setReg(DefGPRReg);
+      MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
+      MIB.buildCopy({DefReg}, {DefGPRReg});
+
+      if (!RBI.constrainGenericRegister(DefReg, FPRRC, MRI)) {
+        LLVM_DEBUG(dbgs() << "Failed to constrain G_FCONSTANT def operand\n");
+        return false;
+      }
 
-    if (!RBI.constrainGenericRegister(DefReg, FPRRC, MRI)) {
-      LLVM_DEBUG(dbgs() << "Failed to constrain G_FCONSTANT def operand\n");
-      return false;
-    }
+      MachineOperand &ImmOp = I.getOperand(1);
+      ImmOp.ChangeToImmediate(
+          ImmOp.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue());
 
-    MachineOperand &ImmOp = I.getOperand(1);
-    ImmOp.ChangeToImmediate(
-        ImmOp.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue());
+      const unsigned MovOpc =
+          DefSize == 64 ? AArch64::MOVi64imm : AArch64::MOVi32imm;
+      I.setDesc(TII.get(MovOpc));
+      constrainSelectedInstRegOperands(I, TII, TRI, RBI);
+      return true;
+    }
 
-    const unsigned MovOpc =
-        DefSize == 64 ? AArch64::MOVi64imm : AArch64::MOVi32imm;
-    I.setDesc(TII.get(MovOpc));
-    constrainSelectedInstRegOperands(I, TII, TRI, RBI);
-    return true;
+    // Expand to a constant pool.
+    auto *LoadMI = emitLoadFromConstantPool(FPImm, MIB);
+    if (!LoadMI) {
+      LLVM_DEBUG(dbgs() << "Failed to load double constant pool entry\n");
+      return false;
+    }
+    MIB.buildCopy({DefReg}, {LoadMI->getOperand(0).getReg()});
+    I.eraseFromParent();
+    return RBI.constrainGenericRegister(DefReg, FPRRC, MRI);
   }
   case TargetOpcode::G_EXTRACT: {
     Register DstReg = I.getOperand(0).getReg();
@@ -5276,175 +5278,6 @@ bool AArch64InstructionSelector::selectUSMovFromExtend(
   return true;
 }
 
-MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm8(
-    Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder) {
-  unsigned int Op;
-  if (DstSize == 128) {
-    if (Bits.getHiBits(64) != Bits.getLoBits(64))
-      return nullptr;
-    Op = AArch64::MOVIv16b_ns;
-  } else {
-    Op = AArch64::MOVIv8b_ns;
-  }
-
-  uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
-
-  if (AArch64_AM::isAdvSIMDModImmType9(Val)) {
-    Val = AArch64_AM::encodeAdvSIMDModImmType9(Val);
-    auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val);
-    constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI);
-    return &*Mov;
-  }
-  return nullptr;
-}
-
-MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm16(
-    Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder,
-    bool Inv) {
-
-  unsigned int Op;
-  if (DstSize == 128) {
-    if (Bits.getHiBits(64) != Bits.getLoBits(64))
-      return nullptr;
-    Op = Inv ? AArch64::MVNIv8i16 : AArch64::MOVIv8i16;
-  } else {
-    Op = Inv ? AArch64::MVNIv4i16 : AArch64::MOVIv4i16;
-  }
-
-  uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
-  uint64_t Shift;
-
-  if (AArch64_AM::isAdvSIMDModImmType5(Val)) {
-    Val = AArch64_AM::encodeAdvSIMDModImmType5(Val);
-    Shift = 0;
-  } else if (AArch64_AM::isAdvSIMDModImmType6(Val)) {
-    Val = AArch64_AM::encodeAdvSIMDModImmType6(Val);
-    Shift = 8;
-  } else
-    return nullptr;
-
-  auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val).addImm(Shift);
-  constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI);
-  return &*Mov;
-}
-
-MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm32(
-    Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder,
-    bool Inv) {
-
-  unsigned int Op;
-  if (DstSize == 128) {
-    if (Bits.getHiBits(64) != Bits.getLoBits(64))
-      return nullptr;
-    Op = Inv ? AArch64::MVNIv4i32 : AArch64::MOVIv4i32;
-  } else {
-    Op = Inv ? AArch64::MVNIv2i32 : AArch64::MOVIv2i32;
-  }
-
-  uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
-  uint64_t Shift;
-
-  if ((AArch64_AM::isAdvSIMDModImmType1(Val))) {
-    Val = AArch64_AM::encodeAdvSIMDModImmType1(Val);
-    Shift = 0;
-  } else if ((AArch64_AM::isAdvSIMDModImmType2(Val))) {
-    Val = AArch64_AM::encodeAdvSIMDModImmType2(Val);
-    Shift = 8;
-  } else if ((AArch64_AM::isAdvSIMDModImmType3(Val))) {
-    Val = AArch64_AM::encodeAdvSIMDModImmType3(Val);
-    Shift = 16;
-  } else if ((AArch64_AM::isAdvSIMDModImmType4(Val))) {
-    Val = AArch64_AM::encodeAdvSIMDModImmType4(Val);
-    Shift = 24;
-  } else
-    return nullptr;
-
-  auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val).addImm(Shift);
-  constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI);
-  return &*Mov;
-}
-
-MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm64(
-    Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder) {
-
-  unsigned int Op;
-  if (DstSize == 128) {
-    if (Bits.getHiBits(64) != Bits.getLoBits(64))
-      return nullptr;
-    Op = AArch64::MOVIv2d_ns;
-  } else {
-    Op = AArch64::MOVID;
-  }
-
-  uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
-  if (AArch64_AM::isAdvSIMDModImmType10(Val)) {
-    Val = AArch64_AM::encodeAdvSIMDModImmType10(Val);
-    auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val);
-    constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI);
-    return &*Mov;
-  }
-  return nullptr;
-}
-
-MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm321s(
-    Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder,
-    bool Inv) {
-
-  unsigned int Op;
-  if (DstSize == 128) {
-    if (Bits.getHiBits(64) != Bits.getLoBits(64))
-      return nullptr;
-    Op = Inv ? AArch64::MVNIv4s_msl : AArch64::MOVIv4s_msl;
-  } else {
-    Op = Inv ? AArch64::MVNIv2s_msl : AArch64::MOVIv2s_msl;
-  }
-
-  uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
-  uint64_t Shift;
-
-  if (AArch64_AM::isAdvSIMDModImmType7(Val)) {
-    Val = AArch64_AM::encodeAdvSIMDModImmType7(Val);
-    Shift = 264;
-  } else if (AArch64_AM::isAdvSIMDModImmType8(Val)) {
-    Val = AArch64_AM::encodeAdvSIMDModImmType8(Val);
-    Shift = 272;
-  } else
-    return nullptr;
-
-  auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val).addImm(Shift);
-  constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI);
-  return &*Mov;
-}
-
-MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImmFP(
-    Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder) {
-
-  unsigned int Op;
-  bool IsWide = false;
-  if (DstSize == 128) {
-    if (Bits.getHiBits(64) != Bits.getLoBits(64))
-      return nullptr;
-    Op = AArch64::FMOVv4f32_ns;
-    IsWide = true;
-  } else {
-    Op = AArch64::FMOVv2f32_ns;
-  }
-
-  uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
-
-  if (AArch64_AM::isAdvSIMDModImmType11(Val)) {
-    Val = AArch64_AM::encodeAdvSIMDModImmType11(Val);
-  } else if (IsWide && AArch64_AM::isAdvSIMDModImmType12(Val)) {
-    Val = AArch64_AM::encodeAdvSIMDModImmType12(Val);
-    Op = AArch64::FMOVv2f64_ns;
-  } else
-    return nullptr;
-
-  auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val);
-  constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI);
-  return &*Mov;
-}
-
 bool AArch64InstructionSelector::selectIndexedExtLoad(
     MachineInstr &MI, MachineRegisterInfo &MRI) {
   auto &ExtLd = cast<GIndexedAnyExtLoad>(MI);
@@ -5653,103 +5486,108 @@ bool AArch64InstructionSelector::selectIndexedStore(GIndexedStore &I,
 }
 
 MachineInstr *
-AArch64InstructionSelector::emitConstantVector(Register Dst, Constant *CV,
-                                               MachineIRBuilder &MIRBuilder,
-                                               MachineRegisterInfo &MRI) {
-  LLT DstTy = MRI.getType(Dst);
-  unsigned DstSize = DstTy.getSizeInBits();
+AArch64InstructionSelector::emitConstantMOVVector(DstOp Dst, APInt Imm,
+                                                  MachineIRBuilder &MIRBuilder,
+                                                  MachineRegisterInfo &MRI) {
+  unsigned DstSize = Imm.getBitWidth();
   assert((DstSize == 64 || DstSize == 128) &&
          "Unexpected vector constant size");
 
-  if (CV->isNullValue()) {
-    if (DstSize == 128) {
-      auto Mov =
-          MIRBuilder.buildInstr(AArch64::MOVIv2d_ns, {Dst}, {}).addImm(0);
-      constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI);
-      return &*Mov;
-    }
-
-    if (DstSize == 64) {
-      auto Mov =
-          MIRBuilder
-              .buildInstr(AArch64::MOVIv2d_ns, {&AArch64::FPR128RegClass}, {})
-              .addImm(0);
-      auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {Dst}, {})
-                      .addReg(Mov.getReg(0), {}, AArch64::dsub);
-      RBI.constrainGenericRegister(Dst, AArch64::FPR64RegClass, MRI);
-      return &*Copy;
+  SmallVector<AArch64_IMM::ImmInsnModel> Insns;
+  if (AArch64_IMM::expandVectorMOVImm(Imm, &STI, Insns)) {
+    for (AArch64_IMM::ImmInsnModel Insn : Insns) {
+      switch (Insn.Opcode) {
+      case AArch64::FMOVD0: {
+        // Currently emit a MOVIv2d_ns in both cases to match SDAG.
+        auto Mov =
+            MIRBuilder
+                .buildInstr(
+                    AArch64::MOVIv2d_ns,
+                    {DstSize == 128 ? Dst : DstOp(&AArch64::FPR128RegClass)},
+                    {})
+                .addImm(0);
+        constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI);
+        if (DstSize != 128) {
+          Mov = MIB.buildInstr(TargetOpcode::COPY, {Dst}, {})
+                    .addReg(Mov.getReg(0), {}, AArch64::dsub);
+          RBI.constrainGenericRegister(Mov.getReg(0), AArch64::FPR64RegClass,
+                                       MRI);
+        }
+        return &*Mov;
+      }
+      case AArch64::MOVID:
+      case AArch64::MOVIv2d_ns:
+      case AArch64::MOVIv8b_ns:
+      case AArch64::MOVIv16b_ns:
+      case AArch64::FMOVv2f32_ns:
+      case AArch64::FMOVv4f32_ns:
+      case AArch64::FMOVDi:
+      case AArch64::FMOVv2f64_ns: {
+        auto Mov =
+            MIRBuilder.buildInstr(Insn.Opcode, {Dst}, {}).addImm(Insn.Op1);
+        constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI);
+        return &*Mov;
+      }
+      case AArch64::MOVIv2i32:
+      case AArch64::MOVIv4i32:
+      case AArch64::MOVIv4i16:
+      case AArch64::MOVIv8i16:
+      case AArch64::MOVIv2s_msl:
+      case AArch64::MOVIv4s_msl:
+      case AArch64::MVNIv2i32:
+      case AArch64::MVNIv4i32:
+      case AArch64::MVNIv4i16:
+      case AArch64::MVNIv8i16:
+      case AArch64::MVNIv2s_msl:
+      case AArch64::MVNIv4s_msl: {
+        auto Mov = MIRBuilder.buildInstr(Insn.Opcode, {Dst}, {})
+                       .addImm(Insn.Op1)
+                       .addImm(Insn.Op2);
+        constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI);
+        return &*Mov;
+      }
+      case AArch64::DUPM_ZI: {
+        auto Mov =
+            MIRBuilder.buildInstr(Insn.Opcode, {&AArch64::ZPRRegClass}, {})
+                .addImm(Insn.Op1);
+        constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI);
+        Mov = MIB.buildInstr(TargetOpcode::COPY, {Dst}, {})
+                  .addReg(Mov.getReg(0), {},
+                          DstSize == 64 ? AArch64::dsub : AArch64::zsub);
+        RBI.constrainGenericRegister(Mov.getReg(0),
+                                     DstSize == 64 ? AArch64::FPR64RegClass
+                                                   : AArch64::FPR128RegClass,
+                                     MRI);
+        return &*Mov;
+      }
+      case AArch64::DUP_ZI_D: {
+        auto Mov =
+            MIRBuilder.buildInstr(Insn.Opcode, {&AArch64::ZPRRegClass}, {})
+                .addImm(Insn.Op1)
+                .addImm(Insn.Op2);
+        constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI);
+        Mov = MIB.buildInstr(TargetOpcode::COPY, {Dst}, {})
+                  .addReg(Mov.getReg(0), {},
+                          DstSize == 64 ? AArch64::dsub : AArch64::zsub);
+        RBI.constrainGenericRegister(Mov.getReg(0),
+                                     DstSize == 64 ? AArch64::FPR64RegClass
+                                                   : AArch64::FPR128RegClass,
+                                     MRI);
+        return &*Mov;
+      }
+      default:
+        llvm_unreachable("Unexpected node in expandVectorMOVImm\n");
+      }
     }
   }
+  return nullptr;
+}
 
-  if (Constant *SplatValue = CV->getSplatValue()) {
-    APInt SplatValueAsInt =
-        isa<ConstantFP>(SplatValue)
-            ? cast<ConstantFP>(SplatValue)->getValueAPF().bitcastToAPInt()
-            : SplatValue->getUniqueInteger();
-    APInt DefBits = APInt::getSplat(
-        DstSize, SplatValueAsInt.trunc(DstTy.getScalarSizeInBits()));
-    auto TryMOVIWithBits = [&](APInt DefBits) -> MachineInstr * {
-      MachineInstr *NewOp;
-      bool Inv = false;
-      if ((NewOp = tryAdvSIMDModImm64(Dst, DstSize, DefBits, MIRBuilder)) ||
-          (NewOp =
-               tryAdvSIMDModImm32(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
-          (NewOp =
-               tryAdvSIMDModImm321s(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
-          (NewOp =
-               tryAdvSIMDModImm16(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
-          (NewOp = tryAdvSIMDModImm8(Dst, DstSize, DefBits, MIRBuilder)) ||
-          (NewOp = tryAdvSIMDModImmFP(Dst, DstSize, DefBits, MIRBuilder)))
-        return NewOp;
-
-      DefBits = ~DefBits;
-      Inv = true;
-      if ((NewOp =
-               tryAdvSIMDModImm32(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
-          (NewOp =
-               tryAdvSIMDModImm321s(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
-          (NewOp = tryAdvSIMDModImm16(Dst, DstSize, DefBits, MIRBuilder, Inv)))
-        return NewOp;
-      return nullptr;
-    };
-
-    if (auto *NewOp = TryMOVIWithBits(DefBits))
-      return NewOp;
-
-    // See if a fneg of the constant can be materialized with a MOVI, etc
-    auto TryWithFNeg = [&](APInt DefBits, int NumBits,
-                           unsigned NegOpc) -> MachineInstr * {
-      // FNegate each sub-element of the constant
-      APInt Neg = APInt::getHighBitsSet(NumBits, 1).zext(DstSize);
-      APInt NegBits(DstSize, 0);
-      unsigned NumElts = DstSize / NumBits;
-      for (unsigned i = 0; i < NumElts; i++)
-        NegBits |= Neg << (NumBits * i);
-      NegBits = DefBits ^ NegBits;
-
-      // Try to create the new constants with MOVI, and if so generate a fneg
-      // for it.
-      if (auto *NewOp = TryMOVIWithBits(NegBits)) {
-        Register NewDst = MRI.createVirtualRegister(
-            DstSize == 64 ? &AArch64::FPR64RegClass : &AArch64::FPR128RegClass);
-        NewOp->getOperand(0).setReg(NewDst);
-        return MIRBuilder.buildInstr(NegOpc, {Dst}, {NewDst});
-      }
-      return nullptr;
-    };
-    MachineInstr *R;
-    if ((R = TryWithFNeg(DefBits, 32,
-                         DstSize == 64 ? AArch64::FNEGv2f32
-                                       : AArch64::FNEGv4f32)) ||
-        (R = TryWithFNeg(DefBits, 64,
-                         DstSize == 64 ? AArch64::FNEGDr
-                                       : AArch64::FNEGv2f64)) ||
-        (STI.hasFullFP16() &&
-         (R = TryWithFNeg(DefBits, 16,
-                          DstSize == 64 ? AArch64::FNEGv4f16
-                                        : AArch64::FNEGv8f16))))
-      return R;
-  }
+MachineInstr *AArch64InstructionSelector::emitConstantVector(
+    Register Dst, Constant *CV, APInt Imm, MachineIRBuilder &MIRBuilder,
+    MachineRegisterInfo &MRI) {
+  if (auto *MovI = emitConstantMOVVector(Dst, Imm, MIRBuilder, MRI))
+    return MovI;
 
   auto *CPLoad = emitLoadFromConstantPool(CV, MIRBuilder);
   if (!CPLoad) {
@@ -5773,12 +5611,15 @@ bool AArch64InstructionSelector::tryOptConstantBuildVec(
   // Check if we're building a constant vector, in which case we want to
   // generate a constant pool load instead of a vector insert sequence.
   SmallVector<Constant *, 16> Csts;
+  APInt Imm(DstSize, 0);
   for (unsigned Idx = 1; Idx < I.getNumOperands(); ++Idx) {
     Register OpReg = I.getOperand(Idx).getReg();
     if (auto AnyConst = getAnyConstantVRegValWithLookThrough(
             OpReg, MRI, /*LookThroughInstrs=*/true,
             /*LookThroughAnyExt=*/true)) {
       MachineInstr *DefMI = MRI.getVRegDef(AnyConst->VReg);
+      Imm |= AnyConst->Value.zext(DstSize).shl((Idx - 1) *
+                                               DstTy.getScalarSizeInBits());
 
       if (DefMI->getOpcode() == TargetOpcode::G_CONSTANT) {
         Csts.emplace_back(
@@ -5796,7 +5637,7 @@ bool AArch64InstructionSelector::tryOptConstantBuildVec(
     return false;
   }
   Constant *CV = ConstantVector::get(Csts);
-  if (!emitConstantVector(I.getOperand(0).getReg(), CV, MIB, MRI))
+  if (!emitConstantVector(I.getOperand(0).getReg(), CV, Imm, MIB, MRI))
     return false;
   I.eraseFromParent();
   return true;
@@ -7871,7 +7712,7 @@ AArch64InstructionSelector::selectCVTFixedPointVecBase(
   APFloat FVal(0.0);
   switch (RegWidth) {
   case 16:
-    FVal = APFloat(APFloat::IEEEhalf(), CstVal->Value);
+    FVal = APFloat(APFloat::IEEEhalf(), CstVal->Value.trunc(RegWidth));
     break;
   case 32:
     FVal = APFloat(APFloat::IEEEsingle(), CstVal->Value);
@@ -7972,17 +7813,6 @@ void AArch64InstructionSelector::renderFPImm64(MachineInstrBuilder &MIB,
       AArch64_AM::getFP64Imm(MI.getOperand(1).getFPImm()->getValueAPF()));
 }
 
-void AArch64InstructionSelector::renderFPImm32SIMDModImmType4(
-    MachineInstrBuilder &MIB, const MachineInstr &MI, int OpIdx) const {
-  assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
-         "Expected G_FCONSTANT");
-  MIB.addImm(AArch64_AM::encodeAdvSIMDModImmType4(MI.getOperand(1)
-                                                      .getFPImm()
-                                                      ->getValueAPF()
-                                                      .bitcastToAPInt()
-                                                      .getZExtValue()));
-}
-
 bool AArch64InstructionSelector::isLoadStoreOfNumBytes(
     const MachineInstr &MI, unsigned NumBytes) const {
   if (!MI.mayLoadOrStore())
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
index 631c09db7242e..8e5618163d45d 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
@@ -19,6 +19,7 @@
 ///
 //===----------------------------------------------------------------------===//
 
+#include "AArch64ExpandImm.h"
 #include "AArch64TargetMachine.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/CodeGen/GlobalISel/CSEInfo.h"
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
index 2b34a89a1db64..2625b0053570d 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
@@ -1110,6 +1110,67 @@ void applyLowerBuildToInsertVecElt(MachineInstr &MI, MachineRegisterInfo &MRI,
   GBuildVec->eraseFromParent();
 }
 
+bool matchConstBuildVectorToFNeg(MachineInstr &MI, MachineRegisterInfo &MRI,
+                                 const AArch64Subtarget &ST,
+                                 std::pair<APInt, unsigned> &MatchInfo) {
+  assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
+  LLT Ty = MRI.getType(MI.getOperand(0).getReg());
+  // Limit to v2f32, v4f32, f64, v2f64 or v4f16 and v8f16 with fullfp16.
+  if (Ty.getSizeInBits() != 64 && Ty.getSizeInBits() != 128)
+    return false;
+  unsigned EltSize = Ty.getScalarSizeInBits();
+
+  APInt Imm(Ty.getSizeInBits(), 0);
+  for (unsigned Idx = 1; Idx < MI.getNumOperands(); Idx++) {
+    auto C =
+        getAnyConstantVRegValWithLookThrough(MI.getOperand(Idx).getReg(), MRI);
+    if (!C)
+      return false;
+    Imm |= C->Value.trunc(EltSize)
+               .zext(Ty.getSizeInBits())
+               .shl((Idx - 1) * EltSize);
+  }
+
+  SmallVector<AArch64_IMM::ImmInsnModel> Insns;
+  if (expandVectorMOVImm(Imm, &ST, Insns))
+    return false;
+
+  auto Check = [&](APInt &Imm, unsigned Size) {
+    APInt NImm = Imm ^ APInt::getSplat(Imm.getBitWidth(),
+                                       APInt::getHighBitsSet(Size, 1));
+    SmallVector<AArch64_IMM::ImmInsnModel> Insns;
+    MatchInfo.first = NImm;
+    MatchInfo.second = Size;
+    return expandVectorMOVImm(NImm, &ST, Insns);
+  };
+
+  return Check(Imm, 64) || Check(Imm, 32) ||
+         (ST.hasFullFP16() && Check(Imm, 16));
+}
+
+void applyConstBuildVectorToFNeg(MachineInstr &MI, MachineRegisterInfo &MRI,
+                                 MachineIRBuilder &B,
+                                 std::pair<APInt, unsigned> &MatchInfo) {
+  B.setInstrAndDebugLoc(MI);
+  LLT Ty = MRI.getType(MI.getOperand(0).getReg());
+  // Extract the Immediate into chunks of size EltSize
+  unsigned EltSize = MatchInfo.second;
+  unsigned NumLanes = Ty.getSizeInBits() / EltSize;
+  LLT VTy = NumLanes == 1 ? LLT::scalar(EltSize)
+                          : LLT::fixed_vector(NumLanes, EltSize);
+  SmallVector<APInt> Imms;
+  for (unsigned I = 0; I < NumLanes; I++)
+    Imms.push_back(MatchInfo.first.extractBits(EltSize, I * EltSize));
+  // Build the new BV and FNeg of it.
+  auto BV = NumLanes == 1 ? B.buildFConstant(VTy, APFloat(APFloat::IEEEdouble(),
+                                                          MatchInfo.first))
+                          : B.buildBuildVectorConstant(VTy, Imms);
+  auto FNeg = B.buildFNeg(Ty == VTy ? DstOp(MI.getOperand(0)) : DstOp(VTy), BV);
+  if (Ty != VTy)
+    B.buildBitcast(MI.getOperand(0), FNeg);
+  MI.eraseFromParent();
+}
+
 bool matchFormTruncstore(MachineInstr &MI, MachineRegisterInfo &MRI,
                          Register &SrcReg) {
   assert(MI.getOpcode() == TargetOpcode::G_STORE);
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
index 2657a89f9d9cf..482a85cd01bd1 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "AArch64RegisterBankInfo.h"
+#include "AArch64ExpandImm.h"
 #include "AArch64RegisterInfo.h"
 #include "AArch64Subtarget.h"
 #include "MCTargetDesc/AArch64AddressingModes.h"
@@ -379,13 +380,10 @@ static bool preferGPRForFPImm(const MachineInstr &MI,
   const APFloat Imm = MI.getOperand(1).getFPImm()->getValueAPF();
   const APInt ImmBits = Imm.bitcastToAPInt();
 
-  // Check if we can encode this as a movi. Note, we only have one pattern so
-  // far for movis, hence the one check.
-  if (Size == 32) {
-    uint64_t Val = APInt::getSplat(64, ImmBits).getZExtValue();
-    if (AArch64_AM::isAdvSIMDModImmType4(Val))
-      return false;
-  }
+  // Check if we can encode this as a movi.
+  SmallVector<AArch64_IMM::ImmInsnModel> Insn;
+  if (AArch64_IMM::expandVectorMOVImm(APInt::getSplat(64, ImmBits), &STI, Insn))
+    return false;
 
   // We want to use GPR when the value cannot be encoded as the immediate value
   // of a fmov and when it will not result in a constant pool load. As
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-fp16-fconstant.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-fp16-fconstant.mir
index 5b6726d6e5bf3..a6be213319e10 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/select-fp16-fconstant.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-fp16-fconstant.mir
@@ -39,9 +39,9 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     ; CHECK-LABEL: name: constant_pool_load
-    ; CHECK: [[ADRP:%[0-9]+]]:gpr64common = ADRP target-flags(aarch64-page) %const.0
-    ; CHECK-NEXT: [[LDRHui:%[0-9]+]]:fpr16 = LDRHui [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) %const.0 :: (load (s16) from constant-pool)
-    ; CHECK-NEXT: $h0 = COPY [[LDRHui]]
+    ; CHECK: [[MOVIv4i16_:%[0-9]+]]:fpr64 = MOVIv4i16 11, 0
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr16 = COPY [[MOVIv4i16_]].hsub
+    ; CHECK-NEXT: $h0 = COPY [[COPY]]
     ; CHECK-NEXT: RET_ReallyLR implicit $h0
     %0:fpr(s16) = G_FCONSTANT half 0xH000B
     $h0 = COPY %0(s16)
diff --git a/llvm/test/CodeGen/AArch64/arm64-build-vector.ll b/llvm/test/CodeGen/AArch64/arm64-build-vector.ll
index 914f431866cce..ff8f84b1ee51a 100644
--- a/llvm/test/CodeGen/AArch64/arm64-build-vector.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-build-vector.ll
@@ -125,10 +125,9 @@ define <1 x double> @convert_single_fp_vector_constant(i1 %cmp) {
 ; CHECK-SD-LABEL: convert_single_fp_vector_constant:
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    tst w0, #0x1
-; CHECK-SD-NEXT:    mov x8, #4607182418800017408 // =0x3ff0000000000000
-; CHECK-SD-NEXT:    csetm x9, ne
-; CHECK-SD-NEXT:    fmov d0, x8
-; CHECK-SD-NEXT:    fmov d1, x9
+; CHECK-SD-NEXT:    fmov d0, #1.00000000
+; CHECK-SD-NEXT:    csetm x8, ne
+; CHECK-SD-NEXT:    fmov d1, x8
 ; CHECK-SD-NEXT:    and v0.8b, v0.8b, v1.8b
 ; CHECK-SD-NEXT:    ret
 ;
@@ -158,12 +157,19 @@ define <2 x double> @poszero_v2f64(<2 x double> %a) {
 }
 
 define <2 x double> @negzero_v2f64(<2 x double> %a) {
-; CHECK-LABEL: negzero_v2f64:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    movi v1.2d, #0000000000000000
-; CHECK-NEXT:    fneg v1.2d, v1.2d
-; CHECK-NEXT:    fmul v0.2d, v0.2d, v1.2d
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: negzero_v2f64:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    movi d1, #0000000000000000
+; CHECK-SD-NEXT:    fneg v1.2d, v1.2d
+; CHECK-SD-NEXT:    fmul v0.2d, v0.2d, v1.2d
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: negzero_v2f64:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    movi v1.2d, #0000000000000000
+; CHECK-GI-NEXT:    fneg v1.2d, v1.2d
+; CHECK-GI-NEXT:    fmul v0.2d, v0.2d, v1.2d
+; CHECK-GI-NEXT:    ret
   %b = fmul <2 x double> %a, <double -0.0, double -0.0>
   ret <2 x double> %b
 }
diff --git a/llvm/test/CodeGen/AArch64/arm64-fp-imm-size.ll b/llvm/test/CodeGen/AArch64/arm64-fp-imm-size.ll
index 9e187378eea68..036656e694190 100644
--- a/llvm/test/CodeGen/AArch64/arm64-fp-imm-size.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-fp-imm-size.ll
@@ -41,12 +41,8 @@ define float @bar() optsize {
 define fp128 @baz() optsize {
 ; CHECK-SD-LABEL: baz:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:  Lloh6:
-; CHECK-SD-NEXT:    adrp x8, lCPI3_0 at PAGE
-; CHECK-SD-NEXT:  Lloh7:
-; CHECK-SD-NEXT:    ldr q0, [x8, lCPI3_0 at PAGEOFF]
+; CHECK-SD-NEXT:    movi d0, #0000000000000000
 ; CHECK-SD-NEXT:    ret
-; CHECK-SD-NEXT:    .loh AdrpLdr Lloh6, Lloh7
 ;
 ; CHECK-GI-LABEL: baz:
 ; CHECK-GI:       ; %bb.0:
@@ -58,12 +54,12 @@ define fp128 @baz() optsize {
 define double @foo2_pgso() !prof !14 {
 ; CHECK-SD-LABEL: foo2_pgso:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:  Lloh8:
+; CHECK-SD-NEXT:  Lloh6:
 ; CHECK-SD-NEXT:    adrp x8, lCPI4_0 at PAGE
-; CHECK-SD-NEXT:  Lloh9:
+; CHECK-SD-NEXT:  Lloh7:
 ; CHECK-SD-NEXT:    ldr d0, [x8, lCPI4_0 at PAGEOFF]
 ; CHECK-SD-NEXT:    ret
-; CHECK-SD-NEXT:    .loh AdrpLdr Lloh8, Lloh9
+; CHECK-SD-NEXT:    .loh AdrpLdr Lloh6, Lloh7
 ;
 ; CHECK-GI-LABEL: foo2_pgso:
 ; CHECK-GI:       ; %bb.0:
@@ -77,12 +73,12 @@ define double @foo2_pgso() !prof !14 {
 define float @bar_pgso() !prof !14 {
 ; CHECK-SD-LABEL: bar_pgso:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:  Lloh10:
+; CHECK-SD-NEXT:  Lloh8:
 ; CHECK-SD-NEXT:    adrp x8, lCPI5_0 at PAGE
-; CHECK-SD-NEXT:  Lloh11:
+; CHECK-SD-NEXT:  Lloh9:
 ; CHECK-SD-NEXT:    ldr s0, [x8, lCPI5_0 at PAGEOFF]
 ; CHECK-SD-NEXT:    ret
-; CHECK-SD-NEXT:    .loh AdrpLdr Lloh10, Lloh11
+; CHECK-SD-NEXT:    .loh AdrpLdr Lloh8, Lloh9
 ;
 ; CHECK-GI-LABEL: bar_pgso:
 ; CHECK-GI:       ; %bb.0:
diff --git a/llvm/test/CodeGen/AArch64/arm64-fp-imm.ll b/llvm/test/CodeGen/AArch64/arm64-fp-imm.ll
index d7c5f2ae35766..0e4ebfd867a21 100644
--- a/llvm/test/CodeGen/AArch64/arm64-fp-imm.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-fp-imm.ll
@@ -27,12 +27,8 @@ define float @bar() {
 define fp128 @baz() {
 ; CHECK-SD-LABEL: baz:
 ; CHECK-SD:       ; %bb.0:
-; CHECK-SD-NEXT:  Lloh2:
-; CHECK-SD-NEXT:    adrp x8, lCPI2_0 at PAGE
-; CHECK-SD-NEXT:  Lloh3:
-; CHECK-SD-NEXT:    ldr q0, [x8, lCPI2_0 at PAGEOFF]
+; CHECK-SD-NEXT:    movi d0, #0000000000000000
 ; CHECK-SD-NEXT:    ret
-; CHECK-SD-NEXT:    .loh AdrpLdr Lloh2, Lloh3
 ;
 ; CHECK-GI-LABEL: baz:
 ; CHECK-GI:       ; %bb.0:
diff --git a/llvm/test/CodeGen/AArch64/arm64-fp128.ll b/llvm/test/CodeGen/AArch64/arm64-fp128.ll
index c4f91c66fb9a6..743bec78190cc 100644
--- a/llvm/test/CodeGen/AArch64/arm64-fp128.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-fp128.ll
@@ -1448,24 +1448,22 @@ define <2 x fp128> @vec_extend_f64(<2 x double> %val) {
 define <2 x fp128> @vec_neg_sub(<2 x fp128> %in) {
 ; CHECK-SD-LABEL: vec_neg_sub:
 ; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    sub sp, sp, #64
-; CHECK-SD-NEXT:    str x30, [sp, #48] // 8-byte Spill
-; CHECK-SD-NEXT:    .cfi_def_cfa_offset 64
+; CHECK-SD-NEXT:    sub sp, sp, #48
+; CHECK-SD-NEXT:    str x30, [sp, #32] // 8-byte Spill
+; CHECK-SD-NEXT:    .cfi_def_cfa_offset 48
 ; CHECK-SD-NEXT:    .cfi_offset w30, -16
-; CHECK-SD-NEXT:    str q1, [sp, #32] // 16-byte Spill
+; CHECK-SD-NEXT:    str q1, [sp, #16] // 16-byte Spill
 ; CHECK-SD-NEXT:    mov v1.16b, v0.16b
-; CHECK-SD-NEXT:    adrp x8, .LCPI47_0
-; CHECK-SD-NEXT:    ldr q0, [x8, :lo12:.LCPI47_0]
-; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Spill
+; CHECK-SD-NEXT:    movi d0, #0000000000000000
 ; CHECK-SD-NEXT:    bl __subtf3
-; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Spill
-; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Reload
-; CHECK-SD-NEXT:    ldr q1, [sp, #32] // 16-byte Reload
+; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Spill
+; CHECK-SD-NEXT:    movi d0, #0000000000000000
+; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Reload
 ; CHECK-SD-NEXT:    bl __subtf3
 ; CHECK-SD-NEXT:    mov v1.16b, v0.16b
-; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Reload
-; CHECK-SD-NEXT:    ldr x30, [sp, #48] // 8-byte Reload
-; CHECK-SD-NEXT:    add sp, sp, #64
+; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Reload
+; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Reload
+; CHECK-SD-NEXT:    add sp, sp, #48
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: vec_neg_sub:
diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-aba-abd.ll b/llvm/test/CodeGen/AArch64/arm64-neon-aba-abd.ll
index 062f5de38c45b..1df28931f700c 100644
--- a/llvm/test/CodeGen/AArch64/arm64-neon-aba-abd.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-neon-aba-abd.ll
@@ -206,11 +206,10 @@ define <2 x i32> @test_sabd_v2i32_const() {
 ;
 ; CHECK-GI-LABEL: test_sabd_v2i32_const:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI19_1
-; CHECK-GI-NEXT:    adrp x9, .LCPI19_0
-; CHECK-GI-NEXT:    ldr d0, [x8, :lo12:.LCPI19_1]
-; CHECK-GI-NEXT:    ldr d1, [x9, :lo12:.LCPI19_0]
-; CHECK-GI-NEXT:    sabd v0.2s, v0.2s, v1.2s
+; CHECK-GI-NEXT:    movi d0, #0x00ffffffff0000
+; CHECK-GI-NEXT:    adrp x8, .LCPI19_0
+; CHECK-GI-NEXT:    ldr d1, [x8, :lo12:.LCPI19_0]
+; CHECK-GI-NEXT:    sabd v0.2s, v1.2s, v0.2s
 ; CHECK-GI-NEXT:    ret
   %1 = tail call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(
     <2 x i32> <i32 -2147483648, i32 2147450880>,
diff --git a/llvm/test/CodeGen/AArch64/bf16-imm.ll b/llvm/test/CodeGen/AArch64/bf16-imm.ll
index 7383a741c5fc3..49c50dbc09146 100644
--- a/llvm/test/CodeGen/AArch64/bf16-imm.ll
+++ b/llvm/test/CodeGen/AArch64/bf16-imm.ll
@@ -21,8 +21,7 @@ define bfloat @Const1() {
 ;
 ; CHECK-NOFP16-LABEL: Const1:
 ; CHECK-NOFP16:       // %bb.0: // %entry
-; CHECK-NOFP16-NEXT:    adrp x8, .LCPI1_0
-; CHECK-NOFP16-NEXT:    ldr h0, [x8, :lo12:.LCPI1_0]
+; CHECK-NOFP16-NEXT:    movi v0.4h, #60, lsl #8
 ; CHECK-NOFP16-NEXT:    ret
 entry:
   ret bfloat 0xR3C00
@@ -36,8 +35,7 @@ define bfloat @Const2() {
 ;
 ; CHECK-NOFP16-LABEL: Const2:
 ; CHECK-NOFP16:       // %bb.0: // %entry
-; CHECK-NOFP16-NEXT:    adrp x8, .LCPI2_0
-; CHECK-NOFP16-NEXT:    ldr h0, [x8, :lo12:.LCPI2_0]
+; CHECK-NOFP16-NEXT:    movi v0.4h, #48, lsl #8
 ; CHECK-NOFP16-NEXT:    ret
 entry:
   ret bfloat 0xR3000
@@ -118,23 +116,10 @@ entry:
 }
 
 define bfloat @Const7() {
-; CHECK-FP16-SD-LABEL: Const7:
-; CHECK-FP16-SD:       // %bb.0: // %entry
-; CHECK-FP16-SD-NEXT:    mov w8, #20480 // =0x5000
-; CHECK-FP16-SD-NEXT:    fmov h0, w8
-; CHECK-FP16-SD-NEXT:    ret
-;
-; CHECK-NOFP16-LABEL: Const7:
-; CHECK-NOFP16:       // %bb.0: // %entry
-; CHECK-NOFP16-NEXT:    adrp x8, .LCPI7_0
-; CHECK-NOFP16-NEXT:    ldr h0, [x8, :lo12:.LCPI7_0]
-; CHECK-NOFP16-NEXT:    ret
-;
-; CHECK-FP16-GI-LABEL: Const7:
-; CHECK-FP16-GI:       // %bb.0: // %entry
-; CHECK-FP16-GI-NEXT:    adrp x8, .LCPI7_0
-; CHECK-FP16-GI-NEXT:    ldr h0, [x8, :lo12:.LCPI7_0]
-; CHECK-FP16-GI-NEXT:    ret
+; CHECK-LABEL: Const7:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    movi v0.4h, #80, lsl #8
+; CHECK-NEXT:    ret
 entry:
   ret bfloat 0xR5000
 }
diff --git a/llvm/test/CodeGen/AArch64/bf16-instructions.ll b/llvm/test/CodeGen/AArch64/bf16-instructions.ll
index c965d11ffc2e0..3b34c9a72e803 100644
--- a/llvm/test/CodeGen/AArch64/bf16-instructions.ll
+++ b/llvm/test/CodeGen/AArch64/bf16-instructions.ll
@@ -671,10 +671,9 @@ define void @test_fccmp(bfloat %in, ptr %out) {
 ; CHECK-CVT-NEXT:    movi v1.2s, #69, lsl #24
 ; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 def $d0
 ; CHECK-CVT-NEXT:    shll v2.4s, v0.4h, #16
-; CHECK-CVT-NEXT:    adrp x8, .LCPI29_0
 ; CHECK-CVT-NEXT:    movi v3.2s, #72, lsl #24
 ; CHECK-CVT-NEXT:    fcmp s2, s1
-; CHECK-CVT-NEXT:    ldr h1, [x8, :lo12:.LCPI29_0]
+; CHECK-CVT-NEXT:    movi v1.4h, #69, lsl #8
 ; CHECK-CVT-NEXT:    fccmp s2, s3, #4, mi
 ; CHECK-CVT-NEXT:    fcsel s0, s0, s1, gt
 ; CHECK-CVT-NEXT:    str h0, [x0]
@@ -685,10 +684,9 @@ define void @test_fccmp(bfloat %in, ptr %out) {
 ; CHECK-SD-NEXT:    movi v1.2s, #69, lsl #24
 ; CHECK-SD-NEXT:    // kill: def $h0 killed $h0 def $d0
 ; CHECK-SD-NEXT:    shll v2.4s, v0.4h, #16
-; CHECK-SD-NEXT:    adrp x8, .LCPI29_0
 ; CHECK-SD-NEXT:    movi v3.2s, #72, lsl #24
 ; CHECK-SD-NEXT:    fcmp s2, s1
-; CHECK-SD-NEXT:    ldr h1, [x8, :lo12:.LCPI29_0]
+; CHECK-SD-NEXT:    movi v1.4h, #69, lsl #8
 ; CHECK-SD-NEXT:    fccmp s2, s3, #4, mi
 ; CHECK-SD-NEXT:    fcsel s0, s0, s1, gt
 ; CHECK-SD-NEXT:    str h0, [x0]
diff --git a/llvm/test/CodeGen/AArch64/clmul-fixed.ll b/llvm/test/CodeGen/AArch64/clmul-fixed.ll
index 70ddde74aafbc..e40ed025a6add 100644
--- a/llvm/test/CodeGen/AArch64/clmul-fixed.ll
+++ b/llvm/test/CodeGen/AArch64/clmul-fixed.ll
@@ -916,7 +916,7 @@ define <2 x i64> @clmul_v2i64_neon(<2 x i64> %x, <2 x i64> %y) {
 ; CHECK-NEON-NEXT:    dup v4.2d, x3
 ; CHECK-NEON-NEXT:    eor v2.16b, v2.16b, v16.16b
 ; CHECK-NEON-NEXT:    and v3.16b, v1.16b, v17.16b
-; CHECK-NEON-NEXT:    movi v17.2d, #0000000000000000
+; CHECK-NEON-NEXT:    movi d17, #0000000000000000
 ; CHECK-NEON-NEXT:    dup v16.2d, x20
 ; CHECK-NEON-NEXT:    mul x15, x8, x25
 ; CHECK-NEON-NEXT:    mov v5.d[1], x21
@@ -1028,442 +1028,441 @@ define <1 x i64> @clmul_v1i64_neon(<1 x i64> %x, <1 x i64> %y) {
 ; CHECK-NEON-NEXT:    mov w10, #8 // =0x8
 ; CHECK-NEON-NEXT:    fmov d2, x8
 ; CHECK-NEON-NEXT:    mov w8, #1 // =0x1
+; CHECK-NEON-NEXT:    mov w11, #16 // =0x10
 ; CHECK-NEON-NEXT:    fmov d3, x8
 ; CHECK-NEON-NEXT:    fmov x8, d0
 ; CHECK-NEON-NEXT:    fmov d0, x9
+; CHECK-NEON-NEXT:    mov w14, #64 // =0x40
 ; CHECK-NEON-NEXT:    and v2.8b, v1.8b, v2.8b
-; CHECK-NEON-NEXT:    and v3.8b, v1.8b, v3.8b
 ; CHECK-NEON-NEXT:    and v0.8b, v1.8b, v0.8b
+; CHECK-NEON-NEXT:    and v3.8b, v1.8b, v3.8b
 ; CHECK-NEON-NEXT:    fmov x9, d2
 ; CHECK-NEON-NEXT:    fmov d2, x10
-; CHECK-NEON-NEXT:    mov w10, #16 // =0x10
-; CHECK-NEON-NEXT:    mul x14, x8, x9
-; CHECK-NEON-NEXT:    fmov x9, d3
-; CHECK-NEON-NEXT:    and v2.8b, v1.8b, v2.8b
-; CHECK-NEON-NEXT:    mul x15, x8, x9
-; CHECK-NEON-NEXT:    fmov x9, d0
-; CHECK-NEON-NEXT:    fmov d0, x10
-; CHECK-NEON-NEXT:    fmov x10, d2
-; CHECK-NEON-NEXT:    fmov d3, x14
-; CHECK-NEON-NEXT:    mul x12, x8, x9
-; CHECK-NEON-NEXT:    and v0.8b, v1.8b, v0.8b
-; CHECK-NEON-NEXT:    mov w9, #32 // =0x20
-; CHECK-NEON-NEXT:    fmov d2, x9
-; CHECK-NEON-NEXT:    mov w9, #64 // =0x40
-; CHECK-NEON-NEXT:    mul x11, x8, x10
-; CHECK-NEON-NEXT:    fmov d4, x15
-; CHECK-NEON-NEXT:    fmov x10, d0
-; CHECK-NEON-NEXT:    fmov d0, x9
+; CHECK-NEON-NEXT:    fmov x12, d0
+; CHECK-NEON-NEXT:    fmov d0, x11
+; CHECK-NEON-NEXT:    fmov x10, d3
 ; CHECK-NEON-NEXT:    and v2.8b, v1.8b, v2.8b
-; CHECK-NEON-NEXT:    fmov d5, x12
-; CHECK-NEON-NEXT:    eor v3.8b, v4.8b, v3.8b
-; CHECK-NEON-NEXT:    mul x9, x8, x10
+; CHECK-NEON-NEXT:    mul x9, x8, x9
 ; CHECK-NEON-NEXT:    and v0.8b, v1.8b, v0.8b
-; CHECK-NEON-NEXT:    mov w10, #128 // =0x80
+; CHECK-NEON-NEXT:    mul x11, x8, x12
+; CHECK-NEON-NEXT:    mov w12, #32 // =0x20
 ; CHECK-NEON-NEXT:    fmov x13, d2
-; CHECK-NEON-NEXT:    fmov d2, x10
-; CHECK-NEON-NEXT:    mov w10, #256 // =0x100
-; CHECK-NEON-NEXT:    fmov d6, x11
-; CHECK-NEON-NEXT:    fmov x14, d0
-; CHECK-NEON-NEXT:    fmov d0, x10
+; CHECK-NEON-NEXT:    fmov d2, x12
+; CHECK-NEON-NEXT:    mul x10, x8, x10
 ; CHECK-NEON-NEXT:    and v2.8b, v1.8b, v2.8b
+; CHECK-NEON-NEXT:    mul x12, x8, x13
+; CHECK-NEON-NEXT:    fmov x13, d0
+; CHECK-NEON-NEXT:    fmov d0, x14
+; CHECK-NEON-NEXT:    fmov x14, d2
 ; CHECK-NEON-NEXT:    mul x13, x8, x13
-; CHECK-NEON-NEXT:    mul x10, x8, x14
-; CHECK-NEON-NEXT:    mov w14, #512 // =0x200
+; CHECK-NEON-NEXT:    fmov d2, x9
 ; CHECK-NEON-NEXT:    and v0.8b, v1.8b, v0.8b
-; CHECK-NEON-NEXT:    fmov x15, d2
-; CHECK-NEON-NEXT:    fmov d2, x14
-; CHECK-NEON-NEXT:    fmov x12, d0
-; CHECK-NEON-NEXT:    mul x14, x8, x15
+; CHECK-NEON-NEXT:    fmov d3, x10
+; CHECK-NEON-NEXT:    mov w10, #128 // =0x80
+; CHECK-NEON-NEXT:    fmov d4, x12
+; CHECK-NEON-NEXT:    fmov d5, x10
+; CHECK-NEON-NEXT:    mov w10, #256 // =0x100
+; CHECK-NEON-NEXT:    mul x14, x8, x14
+; CHECK-NEON-NEXT:    fmov x9, d0
+; CHECK-NEON-NEXT:    fmov d0, x11
+; CHECK-NEON-NEXT:    eor v2.8b, v3.8b, v2.8b
+; CHECK-NEON-NEXT:    fmov d3, x13
+; CHECK-NEON-NEXT:    mul x9, x8, x9
+; CHECK-NEON-NEXT:    eor v0.8b, v0.8b, v4.8b
+; CHECK-NEON-NEXT:    fmov d4, x14
+; CHECK-NEON-NEXT:    eor v0.8b, v2.8b, v0.8b
+; CHECK-NEON-NEXT:    fmov d2, x10
+; CHECK-NEON-NEXT:    eor v3.8b, v3.8b, v4.8b
+; CHECK-NEON-NEXT:    and v4.8b, v1.8b, v5.8b
+; CHECK-NEON-NEXT:    fmov d5, x9
+; CHECK-NEON-NEXT:    mov w9, #512 // =0x200
 ; CHECK-NEON-NEXT:    and v2.8b, v1.8b, v2.8b
-; CHECK-NEON-NEXT:    mov w15, #1024 // =0x400
-; CHECK-NEON-NEXT:    fmov d0, x15
-; CHECK-NEON-NEXT:    mov w15, #2048 // =0x800
-; CHECK-NEON-NEXT:    fmov d7, x10
-; CHECK-NEON-NEXT:    mul x12, x8, x12
-; CHECK-NEON-NEXT:    mov w10, #16384 // =0x4000
+; CHECK-NEON-NEXT:    fmov x10, d4
+; CHECK-NEON-NEXT:    fmov d4, x9
+; CHECK-NEON-NEXT:    mov w9, #2048 // =0x800
+; CHECK-NEON-NEXT:    eor v3.8b, v3.8b, v5.8b
 ; CHECK-NEON-NEXT:    fmov x11, d2
-; CHECK-NEON-NEXT:    fmov d2, x15
-; CHECK-NEON-NEXT:    mov w15, #4096 // =0x1000
-; CHECK-NEON-NEXT:    and v0.8b, v1.8b, v0.8b
-; CHECK-NEON-NEXT:    fmov d4, x15
+; CHECK-NEON-NEXT:    fmov d2, x9
+; CHECK-NEON-NEXT:    mov w9, #4096 // =0x1000
+; CHECK-NEON-NEXT:    and v4.8b, v1.8b, v4.8b
+; CHECK-NEON-NEXT:    mul x10, x8, x10
+; CHECK-NEON-NEXT:    eor v0.8b, v0.8b, v3.8b
+; CHECK-NEON-NEXT:    fmov d3, x9
+; CHECK-NEON-NEXT:    mul x11, x8, x11
 ; CHECK-NEON-NEXT:    and v2.8b, v1.8b, v2.8b
+; CHECK-NEON-NEXT:    fmov x12, d4
+; CHECK-NEON-NEXT:    and v3.8b, v1.8b, v3.8b
+; CHECK-NEON-NEXT:    fmov d4, x10
+; CHECK-NEON-NEXT:    fmov x10, d2
+; CHECK-NEON-NEXT:    mul x9, x8, x12
+; CHECK-NEON-NEXT:    mov w12, #1024 // =0x400
+; CHECK-NEON-NEXT:    fmov d5, x12
+; CHECK-NEON-NEXT:    mov w12, #8192 // =0x2000
+; CHECK-NEON-NEXT:    mul x10, x8, x10
+; CHECK-NEON-NEXT:    and v2.8b, v1.8b, v5.8b
+; CHECK-NEON-NEXT:    fmov d5, x11
+; CHECK-NEON-NEXT:    fmov x11, d3
+; CHECK-NEON-NEXT:    fmov d3, x12
+; CHECK-NEON-NEXT:    mov w12, #16384 // =0x4000
+; CHECK-NEON-NEXT:    fmov x13, d2
 ; CHECK-NEON-NEXT:    mul x11, x8, x11
-; CHECK-NEON-NEXT:    fmov x15, d0
-; CHECK-NEON-NEXT:    eor v0.8b, v5.8b, v6.8b
-; CHECK-NEON-NEXT:    and v4.8b, v1.8b, v4.8b
-; CHECK-NEON-NEXT:    fmov d5, x9
-; CHECK-NEON-NEXT:    fmov d6, x13
-; CHECK-NEON-NEXT:    mov w13, #8192 // =0x2000
-; CHECK-NEON-NEXT:    fmov x9, d2
-; CHECK-NEON-NEXT:    eor v0.8b, v3.8b, v0.8b
-; CHECK-NEON-NEXT:    fmov d3, x10
-; CHECK-NEON-NEXT:    mul x15, x8, x15
-; CHECK-NEON-NEXT:    eor v2.8b, v5.8b, v6.8b
-; CHECK-NEON-NEXT:    fmov d5, x14
-; CHECK-NEON-NEXT:    fmov x14, d4
-; CHECK-NEON-NEXT:    fmov d4, x13
-; CHECK-NEON-NEXT:    fmov d6, x12
-; CHECK-NEON-NEXT:    mul x9, x8, x9
+; CHECK-NEON-NEXT:    fmov d2, x12
 ; CHECK-NEON-NEXT:    and v3.8b, v1.8b, v3.8b
-; CHECK-NEON-NEXT:    mul x12, x8, x14
-; CHECK-NEON-NEXT:    eor v2.8b, v2.8b, v7.8b
-; CHECK-NEON-NEXT:    and v4.8b, v1.8b, v4.8b
-; CHECK-NEON-NEXT:    eor v5.8b, v5.8b, v6.8b
+; CHECK-NEON-NEXT:    eor v4.8b, v4.8b, v5.8b
+; CHECK-NEON-NEXT:    fmov d5, x10
+; CHECK-NEON-NEXT:    mul x12, x8, x13
+; CHECK-NEON-NEXT:    mov w13, #32768 // =0x8000
+; CHECK-NEON-NEXT:    and v2.8b, v1.8b, v2.8b
+; CHECK-NEON-NEXT:    fmov x10, d3
+; CHECK-NEON-NEXT:    fmov d3, x13
+; CHECK-NEON-NEXT:    mov w13, #65536 // =0x10000
 ; CHECK-NEON-NEXT:    fmov d6, x11
-; CHECK-NEON-NEXT:    fmov x11, d3
-; CHECK-NEON-NEXT:    eor v0.8b, v0.8b, v2.8b
-; CHECK-NEON-NEXT:    fmov x10, d4
-; CHECK-NEON-NEXT:    eor v4.8b, v5.8b, v6.8b
+; CHECK-NEON-NEXT:    fmov x11, d2
+; CHECK-NEON-NEXT:    fmov d2, x13
+; CHECK-NEON-NEXT:    mov w13, #131072 // =0x20000
+; CHECK-NEON-NEXT:    and v3.8b, v1.8b, v3.8b
+; CHECK-NEON-NEXT:    mul x10, x8, x10
+; CHECK-NEON-NEXT:    eor v5.8b, v5.8b, v6.8b
 ; CHECK-NEON-NEXT:    fmov d6, x9
-; CHECK-NEON-NEXT:    mov w9, #32768 // =0x8000
-; CHECK-NEON-NEXT:    fmov d5, x15
-; CHECK-NEON-NEXT:    fmov d7, x12
-; CHECK-NEON-NEXT:    fmov d3, x9
-; CHECK-NEON-NEXT:    mul x9, x8, x11
-; CHECK-NEON-NEXT:    mov w11, #65536 // =0x10000
+; CHECK-NEON-NEXT:    and v2.8b, v1.8b, v2.8b
+; CHECK-NEON-NEXT:    mul x11, x8, x11
+; CHECK-NEON-NEXT:    fmov x14, d3
+; CHECK-NEON-NEXT:    fmov d3, x13
+; CHECK-NEON-NEXT:    mov w13, #262144 // =0x40000
+; CHECK-NEON-NEXT:    eor v4.8b, v4.8b, v6.8b
+; CHECK-NEON-NEXT:    fmov d6, x10
+; CHECK-NEON-NEXT:    fmov x10, d2
+; CHECK-NEON-NEXT:    and v3.8b, v1.8b, v3.8b
+; CHECK-NEON-NEXT:    mul x9, x8, x14
+; CHECK-NEON-NEXT:    eor v2.8b, v5.8b, v6.8b
+; CHECK-NEON-NEXT:    fmov d5, x12
+; CHECK-NEON-NEXT:    fmov d6, x11
+; CHECK-NEON-NEXT:    fmov x12, d3
+; CHECK-NEON-NEXT:    fmov d3, x13
 ; CHECK-NEON-NEXT:    mul x10, x8, x10
-; CHECK-NEON-NEXT:    eor v2.8b, v4.8b, v5.8b
-; CHECK-NEON-NEXT:    fmov d4, x11
+; CHECK-NEON-NEXT:    eor v4.8b, v4.8b, v5.8b
+; CHECK-NEON-NEXT:    eor v2.8b, v2.8b, v6.8b
+; CHECK-NEON-NEXT:    fmov d6, x9
+; CHECK-NEON-NEXT:    mul x11, x8, x12
+; CHECK-NEON-NEXT:    mov w12, #524288 // =0x80000
 ; CHECK-NEON-NEXT:    and v3.8b, v1.8b, v3.8b
-; CHECK-NEON-NEXT:    eor v6.8b, v6.8b, v7.8b
-; CHECK-NEON-NEXT:    and v4.8b, v1.8b, v4.8b
+; CHECK-NEON-NEXT:    fmov d5, x12
+; CHECK-NEON-NEXT:    eor v0.8b, v0.8b, v4.8b
+; CHECK-NEON-NEXT:    eor v2.8b, v2.8b, v6.8b
+; CHECK-NEON-NEXT:    fmov d4, x10
+; CHECK-NEON-NEXT:    fmov x9, d3
+; CHECK-NEON-NEXT:    mov w10, #1048576 // =0x100000
+; CHECK-NEON-NEXT:    and v3.8b, v1.8b, v5.8b
+; CHECK-NEON-NEXT:    fmov d5, x11
 ; CHECK-NEON-NEXT:    eor v0.8b, v0.8b, v2.8b
+; CHECK-NEON-NEXT:    mul x9, x8, x9
 ; CHECK-NEON-NEXT:    fmov x11, d3
-; CHECK-NEON-NEXT:    fmov d5, x10
-; CHECK-NEON-NEXT:    mov w10, #131072 // =0x20000
 ; CHECK-NEON-NEXT:    fmov d3, x10
+; CHECK-NEON-NEXT:    eor v4.8b, v4.8b, v5.8b
 ; CHECK-NEON-NEXT:    mul x10, x8, x11
-; CHECK-NEON-NEXT:    mov w11, #262144 // =0x40000
-; CHECK-NEON-NEXT:    eor v5.8b, v6.8b, v5.8b
-; CHECK-NEON-NEXT:    and v6.8b, v1.8b, v3.8b
+; CHECK-NEON-NEXT:    mov w11, #4194304 // =0x400000
+; CHECK-NEON-NEXT:    and v3.8b, v1.8b, v3.8b
+; CHECK-NEON-NEXT:    fmov d2, x11
+; CHECK-NEON-NEXT:    fmov d5, x9
+; CHECK-NEON-NEXT:    mov w9, #8388608 // =0x800000
+; CHECK-NEON-NEXT:    fmov x11, d3
 ; CHECK-NEON-NEXT:    fmov d3, x9
-; CHECK-NEON-NEXT:    fmov x9, d4
+; CHECK-NEON-NEXT:    and v2.8b, v1.8b, v2.8b
+; CHECK-NEON-NEXT:    eor v4.8b, v4.8b, v5.8b
+; CHECK-NEON-NEXT:    fmov d5, x10
+; CHECK-NEON-NEXT:    mul x9, x8, x11
+; CHECK-NEON-NEXT:    mov w11, #2097152 // =0x200000
+; CHECK-NEON-NEXT:    and v3.8b, v1.8b, v3.8b
+; CHECK-NEON-NEXT:    fmov x10, d2
+; CHECK-NEON-NEXT:    fmov d6, x11
+; CHECK-NEON-NEXT:    eor v4.8b, v4.8b, v5.8b
+; CHECK-NEON-NEXT:    fmov x12, d3
+; CHECK-NEON-NEXT:    mul x11, x8, x10
+; CHECK-NEON-NEXT:    mov w10, #16777216 // =0x1000000
+; CHECK-NEON-NEXT:    and v2.8b, v1.8b, v6.8b
+; CHECK-NEON-NEXT:    fmov d3, x10
+; CHECK-NEON-NEXT:    fmov d5, x9
+; CHECK-NEON-NEXT:    mul x10, x8, x12
+; CHECK-NEON-NEXT:    mov w12, #33554432 // =0x2000000
+; CHECK-NEON-NEXT:    fmov x9, d2
+; CHECK-NEON-NEXT:    fmov d2, x12
+; CHECK-NEON-NEXT:    mov w12, #67108864 // =0x4000000
+; CHECK-NEON-NEXT:    and v3.8b, v1.8b, v3.8b
+; CHECK-NEON-NEXT:    and v6.8b, v1.8b, v2.8b
+; CHECK-NEON-NEXT:    eor v2.8b, v4.8b, v5.8b
 ; CHECK-NEON-NEXT:    fmov d4, x11
-; CHECK-NEON-NEXT:    mov w11, #524288 // =0x80000
-; CHECK-NEON-NEXT:    fmov x12, d6
-; CHECK-NEON-NEXT:    eor v3.8b, v5.8b, v3.8b
-; CHECK-NEON-NEXT:    fmov d5, x11
-; CHECK-NEON-NEXT:    and v4.8b, v1.8b, v4.8b
+; CHECK-NEON-NEXT:    fmov x13, d3
+; CHECK-NEON-NEXT:    fmov d3, x12
+; CHECK-NEON-NEXT:    movi v5.2s, #128, lsl #24
 ; CHECK-NEON-NEXT:    mul x9, x8, x9
+; CHECK-NEON-NEXT:    fmov x12, d6
 ; CHECK-NEON-NEXT:    fmov d6, x10
-; CHECK-NEON-NEXT:    mul x11, x8, x12
-; CHECK-NEON-NEXT:    mov w12, #1048576 // =0x100000
+; CHECK-NEON-NEXT:    mul x11, x8, x13
+; CHECK-NEON-NEXT:    mov w13, #536870912 // =0x20000000
+; CHECK-NEON-NEXT:    and v3.8b, v1.8b, v3.8b
+; CHECK-NEON-NEXT:    fmov d7, x13
+; CHECK-NEON-NEXT:    mov w13, #1073741824 // =0x40000000
+; CHECK-NEON-NEXT:    fneg d5, d5
+; CHECK-NEON-NEXT:    mul x10, x8, x12
+; CHECK-NEON-NEXT:    eor v4.8b, v4.8b, v6.8b
+; CHECK-NEON-NEXT:    fmov d6, x13
+; CHECK-NEON-NEXT:    fmov x12, d3
+; CHECK-NEON-NEXT:    mov w13, #134217728 // =0x8000000
+; CHECK-NEON-NEXT:    and v3.8b, v1.8b, v7.8b
+; CHECK-NEON-NEXT:    and v6.8b, v1.8b, v6.8b
+; CHECK-NEON-NEXT:    fmov d7, x11
 ; CHECK-NEON-NEXT:    and v5.8b, v1.8b, v5.8b
-; CHECK-NEON-NEXT:    fmov x10, d4
-; CHECK-NEON-NEXT:    fmov d4, x12
-; CHECK-NEON-NEXT:    mov w12, #4194304 // =0x400000
-; CHECK-NEON-NEXT:    eor v2.8b, v3.8b, v6.8b
+; CHECK-NEON-NEXT:    mul x12, x8, x12
+; CHECK-NEON-NEXT:    fmov x11, d3
+; CHECK-NEON-NEXT:    fmov d3, x13
+; CHECK-NEON-NEXT:    fmov x13, d6
+; CHECK-NEON-NEXT:    eor v4.8b, v4.8b, v7.8b
 ; CHECK-NEON-NEXT:    fmov d7, x9
-; CHECK-NEON-NEXT:    fmov x9, d5
-; CHECK-NEON-NEXT:    fmov d5, x12
-; CHECK-NEON-NEXT:    and v4.8b, v1.8b, v4.8b
-; CHECK-NEON-NEXT:    mul x10, x8, x10
-; CHECK-NEON-NEXT:    fmov d16, x11
+; CHECK-NEON-NEXT:    mov w9, #268435456 // =0x10000000
+; CHECK-NEON-NEXT:    fmov x14, d5
+; CHECK-NEON-NEXT:    and v3.8b, v1.8b, v3.8b
+; CHECK-NEON-NEXT:    fmov d6, x9
+; CHECK-NEON-NEXT:    mul x11, x8, x11
+; CHECK-NEON-NEXT:    eor v2.8b, v2.8b, v7.8b
+; CHECK-NEON-NEXT:    fmov d7, x12
+; CHECK-NEON-NEXT:    mul x9, x8, x13
+; CHECK-NEON-NEXT:    fmov x13, d3
+; CHECK-NEON-NEXT:    and v3.8b, v1.8b, v6.8b
+; CHECK-NEON-NEXT:    fmov d6, x10
+; CHECK-NEON-NEXT:    mul x14, x8, x14
 ; CHECK-NEON-NEXT:    eor v0.8b, v0.8b, v2.8b
-; CHECK-NEON-NEXT:    mul x12, x8, x9
-; CHECK-NEON-NEXT:    mov w9, #8388608 // =0x800000
-; CHECK-NEON-NEXT:    and v5.8b, v1.8b, v5.8b
-; CHECK-NEON-NEXT:    fmov x11, d4
-; CHECK-NEON-NEXT:    fmov d4, x9
-; CHECK-NEON-NEXT:    eor v7.8b, v7.8b, v16.8b
-; CHECK-NEON-NEXT:    fmov x13, d5
-; CHECK-NEON-NEXT:    fmov d16, x10
-; CHECK-NEON-NEXT:    mul x9, x8, x11
-; CHECK-NEON-NEXT:    mov w11, #16777216 // =0x1000000
-; CHECK-NEON-NEXT:    and v4.8b, v1.8b, v4.8b
-; CHECK-NEON-NEXT:    fmov d5, x11
-; CHECK-NEON-NEXT:    mov w11, #2097152 // =0x200000
-; CHECK-NEON-NEXT:    fmov d3, x12
-; CHECK-NEON-NEXT:    mul x13, x8, x13
-; CHECK-NEON-NEXT:    eor v7.8b, v7.8b, v16.8b
-; CHECK-NEON-NEXT:    movi v16.2s, #128, lsl #24
-; CHECK-NEON-NEXT:    fmov x10, d4
-; CHECK-NEON-NEXT:    fmov d4, x11
-; CHECK-NEON-NEXT:    and v5.8b, v1.8b, v5.8b
-; CHECK-NEON-NEXT:    eor v3.8b, v7.8b, v3.8b
-; CHECK-NEON-NEXT:    and v4.8b, v1.8b, v4.8b
-; CHECK-NEON-NEXT:    mul x11, x8, x10
-; CHECK-NEON-NEXT:    fmov x10, d5
+; CHECK-NEON-NEXT:    mul x10, x8, x13
+; CHECK-NEON-NEXT:    mov x13, #4294967296 // =0x100000000
+; CHECK-NEON-NEXT:    eor v4.8b, v4.8b, v6.8b
 ; CHECK-NEON-NEXT:    fmov d5, x13
-; CHECK-NEON-NEXT:    fmov x14, d4
-; CHECK-NEON-NEXT:    mul x12, x8, x10
-; CHECK-NEON-NEXT:    mov w10, #33554432 // =0x2000000
+; CHECK-NEON-NEXT:    fmov x13, d3
+; CHECK-NEON-NEXT:    fmov d6, x9
+; CHECK-NEON-NEXT:    mov x9, #8589934592 // =0x200000000
+; CHECK-NEON-NEXT:    eor v2.8b, v4.8b, v7.8b
+; CHECK-NEON-NEXT:    and v3.8b, v1.8b, v5.8b
+; CHECK-NEON-NEXT:    fmov d5, x11
+; CHECK-NEON-NEXT:    mul x11, x8, x13
 ; CHECK-NEON-NEXT:    fmov d4, x10
-; CHECK-NEON-NEXT:    fmov d6, x11
-; CHECK-NEON-NEXT:    mov w11, #134217728 // =0x8000000
-; CHECK-NEON-NEXT:    mul x10, x8, x14
-; CHECK-NEON-NEXT:    mov w14, #67108864 // =0x4000000
-; CHECK-NEON-NEXT:    fmov d7, x14
-; CHECK-NEON-NEXT:    and v4.8b, v1.8b, v4.8b
+; CHECK-NEON-NEXT:    mov x10, #17179869184 // =0x400000000
 ; CHECK-NEON-NEXT:    eor v5.8b, v5.8b, v6.8b
-; CHECK-NEON-NEXT:    and v6.8b, v1.8b, v7.8b
-; CHECK-NEON-NEXT:    fmov d7, x12
-; CHECK-NEON-NEXT:    fmov x12, d4
+; CHECK-NEON-NEXT:    fmov d6, x9
+; CHECK-NEON-NEXT:    fmov x9, d3
+; CHECK-NEON-NEXT:    eor v2.8b, v2.8b, v4.8b
 ; CHECK-NEON-NEXT:    fmov d4, x11
-; CHECK-NEON-NEXT:    fmov x13, d6
-; CHECK-NEON-NEXT:    eor v5.8b, v5.8b, v7.8b
-; CHECK-NEON-NEXT:    fneg d7, d16
-; CHECK-NEON-NEXT:    mul x11, x8, x12
-; CHECK-NEON-NEXT:    and v4.8b, v1.8b, v4.8b
-; CHECK-NEON-NEXT:    mov w12, #536870912 // =0x20000000
-; CHECK-NEON-NEXT:    fmov d6, x12
-; CHECK-NEON-NEXT:    mul x12, x8, x13
-; CHECK-NEON-NEXT:    mov w13, #1073741824 // =0x40000000
-; CHECK-NEON-NEXT:    fmov x14, d4
-; CHECK-NEON-NEXT:    fmov d4, x13
-; CHECK-NEON-NEXT:    and v6.8b, v1.8b, v6.8b
-; CHECK-NEON-NEXT:    mul x13, x8, x14
-; CHECK-NEON-NEXT:    mov w14, #268435456 // =0x10000000
-; CHECK-NEON-NEXT:    and v4.8b, v1.8b, v4.8b
-; CHECK-NEON-NEXT:    fmov x15, d6
-; CHECK-NEON-NEXT:    fmov d6, x14
-; CHECK-NEON-NEXT:    and v2.8b, v1.8b, v6.8b
-; CHECK-NEON-NEXT:    fmov d6, x9
-; CHECK-NEON-NEXT:    fmov x9, d4
-; CHECK-NEON-NEXT:    fmov d4, x11
-; CHECK-NEON-NEXT:    mov x11, #4294967296 // =0x100000000
-; CHECK-NEON-NEXT:    mul x14, x8, x15
-; CHECK-NEON-NEXT:    eor v3.8b, v3.8b, v6.8b
+; CHECK-NEON-NEXT:    and v3.8b, v1.8b, v6.8b
 ; CHECK-NEON-NEXT:    mul x9, x8, x9
-; CHECK-NEON-NEXT:    fmov d6, x12
-; CHECK-NEON-NEXT:    eor v4.8b, v5.8b, v4.8b
-; CHECK-NEON-NEXT:    fmov d5, x10
-; CHECK-NEON-NEXT:    fmov x10, d2
-; CHECK-NEON-NEXT:    and v2.8b, v1.8b, v7.8b
-; CHECK-NEON-NEXT:    eor v3.8b, v3.8b, v5.8b
-; CHECK-NEON-NEXT:    fmov d5, x11
-; CHECK-NEON-NEXT:    mul x10, x8, x10
-; CHECK-NEON-NEXT:    fmov x11, d2
-; CHECK-NEON-NEXT:    eor v4.8b, v4.8b, v6.8b
-; CHECK-NEON-NEXT:    fmov d6, x13
+; CHECK-NEON-NEXT:    fmov d6, x14
+; CHECK-NEON-NEXT:    eor v2.8b, v2.8b, v4.8b
+; CHECK-NEON-NEXT:    fmov x12, d3
+; CHECK-NEON-NEXT:    fmov d3, x10
+; CHECK-NEON-NEXT:    eor v5.8b, v5.8b, v6.8b
 ; CHECK-NEON-NEXT:    fmov d7, x9
-; CHECK-NEON-NEXT:    and v2.8b, v1.8b, v5.8b
-; CHECK-NEON-NEXT:    fmov d5, x14
-; CHECK-NEON-NEXT:    eor v0.8b, v0.8b, v3.8b
+; CHECK-NEON-NEXT:    mov x9, #68719476736 // =0x1000000000
+; CHECK-NEON-NEXT:    eor v0.8b, v0.8b, v2.8b
+; CHECK-NEON-NEXT:    and v3.8b, v1.8b, v3.8b
+; CHECK-NEON-NEXT:    mul x10, x8, x12
+; CHECK-NEON-NEXT:    mov x12, #34359738368 // =0x800000000
+; CHECK-NEON-NEXT:    fmov d6, x12
+; CHECK-NEON-NEXT:    eor v5.8b, v5.8b, v7.8b
+; CHECK-NEON-NEXT:    fmov x11, d3
+; CHECK-NEON-NEXT:    fmov d3, x9
+; CHECK-NEON-NEXT:    and v6.8b, v1.8b, v6.8b
+; CHECK-NEON-NEXT:    fmov d7, x10
 ; CHECK-NEON-NEXT:    mul x9, x8, x11
-; CHECK-NEON-NEXT:    mov x11, #8589934592 // =0x200000000
-; CHECK-NEON-NEXT:    eor v4.8b, v4.8b, v6.8b
-; CHECK-NEON-NEXT:    fmov d6, x10
-; CHECK-NEON-NEXT:    mov x14, #1152921504606846976 // =0x1000000000000000
-; CHECK-NEON-NEXT:    fmov x10, d2
-; CHECK-NEON-NEXT:    fmov d2, x11
-; CHECK-NEON-NEXT:    mov x11, #17179869184 // =0x400000000
+; CHECK-NEON-NEXT:    and v3.8b, v1.8b, v3.8b
+; CHECK-NEON-NEXT:    mov x11, #137438953472 // =0x2000000000
+; CHECK-NEON-NEXT:    fmov x10, d6
+; CHECK-NEON-NEXT:    fmov d6, x11
+; CHECK-NEON-NEXT:    mov x11, #274877906944 // =0x4000000000
 ; CHECK-NEON-NEXT:    eor v5.8b, v5.8b, v7.8b
-; CHECK-NEON-NEXT:    eor v3.8b, v4.8b, v6.8b
+; CHECK-NEON-NEXT:    fmov d7, #2.00000000
+; CHECK-NEON-NEXT:    fmov x12, d3
+; CHECK-NEON-NEXT:    fmov d3, x11
+; CHECK-NEON-NEXT:    mul x10, x8, x10
+; CHECK-NEON-NEXT:    and v6.8b, v1.8b, v6.8b
+; CHECK-NEON-NEXT:    fmov d4, x9
+; CHECK-NEON-NEXT:    mul x11, x8, x12
+; CHECK-NEON-NEXT:    and v3.8b, v1.8b, v3.8b
+; CHECK-NEON-NEXT:    mov x12, #549755813888 // =0x8000000000
+; CHECK-NEON-NEXT:    fmov x9, d6
+; CHECK-NEON-NEXT:    eor v4.8b, v5.8b, v4.8b
+; CHECK-NEON-NEXT:    fmov d5, x12
+; CHECK-NEON-NEXT:    fmov x12, d3
+; CHECK-NEON-NEXT:    mul x9, x8, x9
+; CHECK-NEON-NEXT:    and v3.8b, v1.8b, v5.8b
+; CHECK-NEON-NEXT:    fmov d5, x10
+; CHECK-NEON-NEXT:    mul x10, x8, x12
+; CHECK-NEON-NEXT:    mov x12, #1099511627776 // =0x10000000000
+; CHECK-NEON-NEXT:    eor v2.8b, v4.8b, v5.8b
+; CHECK-NEON-NEXT:    fmov d4, x12
+; CHECK-NEON-NEXT:    fmov x12, d3
+; CHECK-NEON-NEXT:    fmov d5, x9
+; CHECK-NEON-NEXT:    and v3.8b, v1.8b, v4.8b
+; CHECK-NEON-NEXT:    fmov d4, x11
+; CHECK-NEON-NEXT:    mov x11, #2199023255552 // =0x20000000000
+; CHECK-NEON-NEXT:    mul x9, x8, x12
+; CHECK-NEON-NEXT:    eor v2.8b, v2.8b, v5.8b
+; CHECK-NEON-NEXT:    eor v0.8b, v0.8b, v4.8b
+; CHECK-NEON-NEXT:    fmov d4, x10
+; CHECK-NEON-NEXT:    fmov x10, d3
+; CHECK-NEON-NEXT:    fmov d3, x11
+; CHECK-NEON-NEXT:    mov x11, #4398046511104 // =0x40000000000
+; CHECK-NEON-NEXT:    eor v0.8b, v0.8b, v4.8b
 ; CHECK-NEON-NEXT:    fmov d4, x11
-; CHECK-NEON-NEXT:    and v2.8b, v1.8b, v2.8b
 ; CHECK-NEON-NEXT:    mul x10, x8, x10
-; CHECK-NEON-NEXT:    fmov d6, x9
-; CHECK-NEON-NEXT:    mov x9, #34359738368 // =0x800000000
+; CHECK-NEON-NEXT:    and v3.8b, v1.8b, v3.8b
+; CHECK-NEON-NEXT:    fmov d5, x9
+; CHECK-NEON-NEXT:    mov x9, #8796093022208 // =0x80000000000
 ; CHECK-NEON-NEXT:    and v4.8b, v1.8b, v4.8b
-; CHECK-NEON-NEXT:    eor v0.8b, v0.8b, v3.8b
-; CHECK-NEON-NEXT:    fmov x11, d2
-; CHECK-NEON-NEXT:    fmov d2, x9
-; CHECK-NEON-NEXT:    eor v5.8b, v5.8b, v6.8b
-; CHECK-NEON-NEXT:    fmov d6, x10
+; CHECK-NEON-NEXT:    fmov x11, d3
+; CHECK-NEON-NEXT:    fmov d3, x9
+; CHECK-NEON-NEXT:    eor v2.8b, v2.8b, v5.8b
+; CHECK-NEON-NEXT:    fmov d5, x10
 ; CHECK-NEON-NEXT:    fmov x10, d4
 ; CHECK-NEON-NEXT:    mul x9, x8, x11
-; CHECK-NEON-NEXT:    and v2.8b, v1.8b, v2.8b
-; CHECK-NEON-NEXT:    mov x11, #68719476736 // =0x1000000000
+; CHECK-NEON-NEXT:    and v3.8b, v1.8b, v3.8b
+; CHECK-NEON-NEXT:    mov x11, #17592186044416 // =0x100000000000
 ; CHECK-NEON-NEXT:    fmov d4, x11
-; CHECK-NEON-NEXT:    mov x11, #137438953472 // =0x2000000000
+; CHECK-NEON-NEXT:    mov x11, #35184372088832 // =0x200000000000
+; CHECK-NEON-NEXT:    eor v0.8b, v0.8b, v5.8b
 ; CHECK-NEON-NEXT:    mul x10, x8, x10
-; CHECK-NEON-NEXT:    eor v5.8b, v5.8b, v6.8b
-; CHECK-NEON-NEXT:    fmov x12, d2
-; CHECK-NEON-NEXT:    fmov d2, x11
+; CHECK-NEON-NEXT:    fmov x12, d3
+; CHECK-NEON-NEXT:    fmov d3, x11
 ; CHECK-NEON-NEXT:    and v4.8b, v1.8b, v4.8b
-; CHECK-NEON-NEXT:    fmov d6, x9
+; CHECK-NEON-NEXT:    fmov d5, x9
 ; CHECK-NEON-NEXT:    mul x11, x8, x12
-; CHECK-NEON-NEXT:    and v2.8b, v1.8b, v2.8b
-; CHECK-NEON-NEXT:    mov x12, #274877906944 // =0x4000000000
+; CHECK-NEON-NEXT:    and v3.8b, v1.8b, v3.8b
+; CHECK-NEON-NEXT:    mov x12, #70368744177664 // =0x400000000000
 ; CHECK-NEON-NEXT:    fmov x9, d4
 ; CHECK-NEON-NEXT:    fmov d4, x12
-; CHECK-NEON-NEXT:    mov x12, #549755813888 // =0x8000000000
-; CHECK-NEON-NEXT:    eor v5.8b, v5.8b, v6.8b
-; CHECK-NEON-NEXT:    fmov d3, x10
-; CHECK-NEON-NEXT:    fmov d6, x14
-; CHECK-NEON-NEXT:    fmov x13, d2
-; CHECK-NEON-NEXT:    fmov d2, x12
-; CHECK-NEON-NEXT:    mov x14, #2305843009213693952 // =0x2000000000000000
+; CHECK-NEON-NEXT:    mov x12, #140737488355328 // =0x800000000000
+; CHECK-NEON-NEXT:    eor v2.8b, v2.8b, v5.8b
+; CHECK-NEON-NEXT:    fmov d5, x10
+; CHECK-NEON-NEXT:    fmov x13, d3
+; CHECK-NEON-NEXT:    fmov d3, x12
 ; CHECK-NEON-NEXT:    and v4.8b, v1.8b, v4.8b
 ; CHECK-NEON-NEXT:    mul x9, x8, x9
-; CHECK-NEON-NEXT:    eor v3.8b, v5.8b, v3.8b
+; CHECK-NEON-NEXT:    eor v0.8b, v0.8b, v5.8b
 ; CHECK-NEON-NEXT:    fmov d5, x11
 ; CHECK-NEON-NEXT:    mul x12, x8, x13
-; CHECK-NEON-NEXT:    and v2.8b, v1.8b, v2.8b
-; CHECK-NEON-NEXT:    mov x13, #1099511627776 // =0x10000000000
+; CHECK-NEON-NEXT:    mov x13, #281474976710656 // =0x1000000000000
+; CHECK-NEON-NEXT:    and v3.8b, v1.8b, v3.8b
 ; CHECK-NEON-NEXT:    fmov x10, d4
 ; CHECK-NEON-NEXT:    fmov d4, x13
-; CHECK-NEON-NEXT:    eor v3.8b, v3.8b, v5.8b
-; CHECK-NEON-NEXT:    fmov x11, d2
-; CHECK-NEON-NEXT:    and v2.8b, v1.8b, v4.8b
-; CHECK-NEON-NEXT:    fmov d4, x9
+; CHECK-NEON-NEXT:    mov x13, #562949953421312 // =0x2000000000000
+; CHECK-NEON-NEXT:    eor v2.8b, v2.8b, v5.8b
+; CHECK-NEON-NEXT:    fmov x11, d3
+; CHECK-NEON-NEXT:    fmov d3, x13
+; CHECK-NEON-NEXT:    fmov d5, x9
+; CHECK-NEON-NEXT:    and v4.8b, v1.8b, v4.8b
 ; CHECK-NEON-NEXT:    mul x10, x8, x10
-; CHECK-NEON-NEXT:    mul x9, x8, x11
-; CHECK-NEON-NEXT:    mov x11, #2199023255552 // =0x20000000000
-; CHECK-NEON-NEXT:    eor v0.8b, v0.8b, v4.8b
-; CHECK-NEON-NEXT:    fmov d4, x11
-; CHECK-NEON-NEXT:    fmov x11, d2
-; CHECK-NEON-NEXT:    fmov d5, x10
-; CHECK-NEON-NEXT:    and v2.8b, v1.8b, v4.8b
-; CHECK-NEON-NEXT:    fmov d4, x12
-; CHECK-NEON-NEXT:    mul x10, x8, x11
-; CHECK-NEON-NEXT:    mov x11, #4398046511104 // =0x40000000000
+; CHECK-NEON-NEXT:    mul x11, x8, x11
+; CHECK-NEON-NEXT:    and v3.8b, v1.8b, v3.8b
 ; CHECK-NEON-NEXT:    eor v0.8b, v0.8b, v5.8b
-; CHECK-NEON-NEXT:    eor v3.8b, v3.8b, v4.8b
+; CHECK-NEON-NEXT:    fmov x9, d4
+; CHECK-NEON-NEXT:    fmov d4, x12
+; CHECK-NEON-NEXT:    mov x12, #1125899906842624 // =0x4000000000000
+; CHECK-NEON-NEXT:    eor v2.8b, v2.8b, v4.8b
+; CHECK-NEON-NEXT:    fmov d4, x12
+; CHECK-NEON-NEXT:    mul x9, x8, x9
+; CHECK-NEON-NEXT:    fmov x12, d3
+; CHECK-NEON-NEXT:    fmov d5, x11
+; CHECK-NEON-NEXT:    mov x11, #2251799813685248 // =0x8000000000000
+; CHECK-NEON-NEXT:    and v3.8b, v1.8b, v4.8b
+; CHECK-NEON-NEXT:    fmov d4, x10
+; CHECK-NEON-NEXT:    mul x10, x8, x12
+; CHECK-NEON-NEXT:    eor v2.8b, v2.8b, v5.8b
+; CHECK-NEON-NEXT:    eor v0.8b, v0.8b, v4.8b
 ; CHECK-NEON-NEXT:    fmov d4, x9
-; CHECK-NEON-NEXT:    fmov x9, d2
-; CHECK-NEON-NEXT:    fmov d2, x11
-; CHECK-NEON-NEXT:    mov x11, #8796093022208 // =0x80000000000
-; CHECK-NEON-NEXT:    fmov d5, x10
-; CHECK-NEON-NEXT:    mov x10, #17592186044416 // =0x100000000000
-; CHECK-NEON-NEXT:    eor v3.8b, v3.8b, v4.8b
+; CHECK-NEON-NEXT:    fmov x9, d3
+; CHECK-NEON-NEXT:    fmov d3, x11
+; CHECK-NEON-NEXT:    mov x11, #4503599627370496 // =0x10000000000000
+; CHECK-NEON-NEXT:    eor v0.8b, v0.8b, v4.8b
 ; CHECK-NEON-NEXT:    fmov d4, x11
 ; CHECK-NEON-NEXT:    mul x9, x8, x9
-; CHECK-NEON-NEXT:    and v2.8b, v1.8b, v2.8b
-; CHECK-NEON-NEXT:    eor v0.8b, v0.8b, v5.8b
+; CHECK-NEON-NEXT:    and v3.8b, v1.8b, v3.8b
+; CHECK-NEON-NEXT:    fmov d5, x10
+; CHECK-NEON-NEXT:    mov x10, #9007199254740992 // =0x20000000000000
 ; CHECK-NEON-NEXT:    and v4.8b, v1.8b, v4.8b
-; CHECK-NEON-NEXT:    fmov x11, d2
-; CHECK-NEON-NEXT:    fmov d2, x10
+; CHECK-NEON-NEXT:    fmov x11, d3
+; CHECK-NEON-NEXT:    fmov d3, x10
+; CHECK-NEON-NEXT:    eor v2.8b, v2.8b, v5.8b
 ; CHECK-NEON-NEXT:    fmov d5, x9
 ; CHECK-NEON-NEXT:    fmov x9, d4
 ; CHECK-NEON-NEXT:    mul x10, x8, x11
-; CHECK-NEON-NEXT:    and v2.8b, v1.8b, v2.8b
-; CHECK-NEON-NEXT:    mov x11, #35184372088832 // =0x200000000000
+; CHECK-NEON-NEXT:    and v3.8b, v1.8b, v3.8b
+; CHECK-NEON-NEXT:    mov x11, #18014398509481984 // =0x40000000000000
 ; CHECK-NEON-NEXT:    fmov d4, x11
-; CHECK-NEON-NEXT:    mov x11, #70368744177664 // =0x400000000000
-; CHECK-NEON-NEXT:    eor v3.8b, v3.8b, v5.8b
+; CHECK-NEON-NEXT:    mov x11, #36028797018963968 // =0x80000000000000
+; CHECK-NEON-NEXT:    eor v0.8b, v0.8b, v5.8b
 ; CHECK-NEON-NEXT:    mul x9, x8, x9
-; CHECK-NEON-NEXT:    fmov x12, d2
-; CHECK-NEON-NEXT:    fmov d2, x11
+; CHECK-NEON-NEXT:    fmov x12, d3
+; CHECK-NEON-NEXT:    fmov d3, x11
 ; CHECK-NEON-NEXT:    and v4.8b, v1.8b, v4.8b
 ; CHECK-NEON-NEXT:    fmov d5, x10
 ; CHECK-NEON-NEXT:    mul x11, x8, x12
-; CHECK-NEON-NEXT:    and v2.8b, v1.8b, v2.8b
-; CHECK-NEON-NEXT:    mov x12, #140737488355328 // =0x800000000000
+; CHECK-NEON-NEXT:    and v3.8b, v1.8b, v3.8b
+; CHECK-NEON-NEXT:    mov x12, #72057594037927936 // =0x100000000000000
 ; CHECK-NEON-NEXT:    fmov x10, d4
 ; CHECK-NEON-NEXT:    fmov d4, x12
-; CHECK-NEON-NEXT:    mov x12, #281474976710656 // =0x1000000000000
-; CHECK-NEON-NEXT:    eor v0.8b, v0.8b, v5.8b
+; CHECK-NEON-NEXT:    mov x12, #144115188075855872 // =0x200000000000000
+; CHECK-NEON-NEXT:    eor v2.8b, v2.8b, v5.8b
 ; CHECK-NEON-NEXT:    fmov d5, x9
-; CHECK-NEON-NEXT:    fmov x13, d2
-; CHECK-NEON-NEXT:    fmov d2, x12
-; CHECK-NEON-NEXT:    and v4.8b, v1.8b, v4.8b
-; CHECK-NEON-NEXT:    mul x10, x8, x10
-; CHECK-NEON-NEXT:    eor v3.8b, v3.8b, v5.8b
-; CHECK-NEON-NEXT:    fmov d5, x11
-; CHECK-NEON-NEXT:    mul x12, x8, x13
-; CHECK-NEON-NEXT:    mov x13, #562949953421312 // =0x2000000000000
-; CHECK-NEON-NEXT:    and v2.8b, v1.8b, v2.8b
-; CHECK-NEON-NEXT:    fmov x9, d4
-; CHECK-NEON-NEXT:    fmov d4, x13
-; CHECK-NEON-NEXT:    mov x13, #1125899906842624 // =0x4000000000000
-; CHECK-NEON-NEXT:    eor v0.8b, v0.8b, v5.8b
-; CHECK-NEON-NEXT:    fmov x11, d2
-; CHECK-NEON-NEXT:    fmov d2, x13
-; CHECK-NEON-NEXT:    fmov d5, x10
+; CHECK-NEON-NEXT:    fmov x13, d3
+; CHECK-NEON-NEXT:    fmov d3, x12
 ; CHECK-NEON-NEXT:    and v4.8b, v1.8b, v4.8b
-; CHECK-NEON-NEXT:    mul x9, x8, x9
-; CHECK-NEON-NEXT:    and v2.8b, v1.8b, v2.8b
-; CHECK-NEON-NEXT:    mul x11, x8, x11
-; CHECK-NEON-NEXT:    eor v3.8b, v3.8b, v5.8b
-; CHECK-NEON-NEXT:    fmov x10, d4
-; CHECK-NEON-NEXT:    fmov d4, x12
-; CHECK-NEON-NEXT:    mov x12, #2251799813685248 // =0x8000000000000
-; CHECK-NEON-NEXT:    eor v0.8b, v0.8b, v4.8b
-; CHECK-NEON-NEXT:    fmov d4, x12
 ; CHECK-NEON-NEXT:    mul x10, x8, x10
-; CHECK-NEON-NEXT:    fmov x12, d2
-; CHECK-NEON-NEXT:    fmov d5, x11
-; CHECK-NEON-NEXT:    mov x11, #4503599627370496 // =0x10000000000000
-; CHECK-NEON-NEXT:    and v2.8b, v1.8b, v4.8b
-; CHECK-NEON-NEXT:    fmov d4, x9
-; CHECK-NEON-NEXT:    mul x9, x8, x12
 ; CHECK-NEON-NEXT:    eor v0.8b, v0.8b, v5.8b
-; CHECK-NEON-NEXT:    eor v3.8b, v3.8b, v4.8b
-; CHECK-NEON-NEXT:    fmov d4, x10
-; CHECK-NEON-NEXT:    fmov x10, d2
-; CHECK-NEON-NEXT:    fmov d2, x11
-; CHECK-NEON-NEXT:    mov x11, #9007199254740992 // =0x20000000000000
-; CHECK-NEON-NEXT:    eor v3.8b, v3.8b, v4.8b
+; CHECK-NEON-NEXT:    mul x9, x8, x13
+; CHECK-NEON-NEXT:    mov x13, #288230376151711744 // =0x400000000000000
+; CHECK-NEON-NEXT:    and v3.8b, v1.8b, v3.8b
+; CHECK-NEON-NEXT:    fmov x12, d4
 ; CHECK-NEON-NEXT:    fmov d4, x11
+; CHECK-NEON-NEXT:    fmov d5, x13
+; CHECK-NEON-NEXT:    mov x13, #576460752303423488 // =0x800000000000000
+; CHECK-NEON-NEXT:    fmov d6, x10
+; CHECK-NEON-NEXT:    mul x11, x8, x12
+; CHECK-NEON-NEXT:    fmov x12, d3
+; CHECK-NEON-NEXT:    and v3.8b, v1.8b, v5.8b
+; CHECK-NEON-NEXT:    eor v2.8b, v2.8b, v4.8b
+; CHECK-NEON-NEXT:    fmov d4, x13
+; CHECK-NEON-NEXT:    mov x13, #1152921504606846976 // =0x1000000000000000
+; CHECK-NEON-NEXT:    movi d5, #0000000000000000
+; CHECK-NEON-NEXT:    eor v0.8b, v0.8b, v6.8b
+; CHECK-NEON-NEXT:    fmov x10, d3
+; CHECK-NEON-NEXT:    fmov d3, x13
+; CHECK-NEON-NEXT:    mov x13, #2305843009213693952 // =0x2000000000000000
+; CHECK-NEON-NEXT:    and v4.8b, v1.8b, v4.8b
+; CHECK-NEON-NEXT:    mul x12, x8, x12
+; CHECK-NEON-NEXT:    and v3.8b, v1.8b, v3.8b
+; CHECK-NEON-NEXT:    fneg d5, d5
 ; CHECK-NEON-NEXT:    mul x10, x8, x10
-; CHECK-NEON-NEXT:    and v2.8b, v1.8b, v2.8b
-; CHECK-NEON-NEXT:    fmov d5, x9
-; CHECK-NEON-NEXT:    mov x9, #18014398509481984 // =0x40000000000000
+; CHECK-NEON-NEXT:    fmov x14, d4
+; CHECK-NEON-NEXT:    fmov d4, x13
+; CHECK-NEON-NEXT:    mul x13, x8, x14
 ; CHECK-NEON-NEXT:    and v4.8b, v1.8b, v4.8b
-; CHECK-NEON-NEXT:    fmov x11, d2
-; CHECK-NEON-NEXT:    fmov d2, x9
-; CHECK-NEON-NEXT:    eor v0.8b, v0.8b, v5.8b
-; CHECK-NEON-NEXT:    fmov d5, x10
-; CHECK-NEON-NEXT:    fmov x10, d4
-; CHECK-NEON-NEXT:    mul x9, x8, x11
-; CHECK-NEON-NEXT:    and v2.8b, v1.8b, v2.8b
-; CHECK-NEON-NEXT:    mov x11, #36028797018963968 // =0x80000000000000
+; CHECK-NEON-NEXT:    fmov x14, d3
+; CHECK-NEON-NEXT:    fmov d3, x9
+; CHECK-NEON-NEXT:    mul x9, x8, x14
+; CHECK-NEON-NEXT:    fmov x14, d4
 ; CHECK-NEON-NEXT:    fmov d4, x11
-; CHECK-NEON-NEXT:    eor v3.8b, v3.8b, v5.8b
-; CHECK-NEON-NEXT:    mul x12, x8, x10
-; CHECK-NEON-NEXT:    mov x10, #72057594037927936 // =0x100000000000000
-; CHECK-NEON-NEXT:    fmov x11, d2
-; CHECK-NEON-NEXT:    fmov d2, x10
-; CHECK-NEON-NEXT:    and v4.8b, v1.8b, v4.8b
-; CHECK-NEON-NEXT:    fmov d5, x9
-; CHECK-NEON-NEXT:    mul x10, x8, x11
-; CHECK-NEON-NEXT:    and v2.8b, v1.8b, v2.8b
-; CHECK-NEON-NEXT:    mov x11, #144115188075855872 // =0x200000000000000
-; CHECK-NEON-NEXT:    fmov x9, d4
-; CHECK-NEON-NEXT:    fmov d4, x11
-; CHECK-NEON-NEXT:    mov x11, #288230376151711744 // =0x400000000000000
-; CHECK-NEON-NEXT:    eor v0.8b, v0.8b, v5.8b
+; CHECK-NEON-NEXT:    eor v2.8b, v2.8b, v3.8b
+; CHECK-NEON-NEXT:    and v3.8b, v1.8b, v7.8b
+; CHECK-NEON-NEXT:    and v1.8b, v1.8b, v5.8b
 ; CHECK-NEON-NEXT:    fmov d5, x12
-; CHECK-NEON-NEXT:    fmov x13, d2
-; CHECK-NEON-NEXT:    fmov d2, x11
-; CHECK-NEON-NEXT:    and v4.8b, v1.8b, v4.8b
-; CHECK-NEON-NEXT:    mul x9, x8, x9
-; CHECK-NEON-NEXT:    eor v3.8b, v3.8b, v5.8b
-; CHECK-NEON-NEXT:    fmov d5, x10
-; CHECK-NEON-NEXT:    mul x11, x8, x13
-; CHECK-NEON-NEXT:    mov x13, #576460752303423488 // =0x800000000000000
-; CHECK-NEON-NEXT:    and v2.8b, v1.8b, v2.8b
-; CHECK-NEON-NEXT:    fmov x12, d4
-; CHECK-NEON-NEXT:    fmov d4, x13
-; CHECK-NEON-NEXT:    eor v0.8b, v0.8b, v5.8b
-; CHECK-NEON-NEXT:    fmov d5, x14
-; CHECK-NEON-NEXT:    mov x14, #4611686018427387904 // =0x4000000000000000
-; CHECK-NEON-NEXT:    fmov x13, d2
-; CHECK-NEON-NEXT:    movi d2, #0000000000000000
-; CHECK-NEON-NEXT:    and v4.8b, v1.8b, v4.8b
-; CHECK-NEON-NEXT:    mul x12, x8, x12
-; CHECK-NEON-NEXT:    and v5.8b, v1.8b, v5.8b
-; CHECK-NEON-NEXT:    mul x10, x8, x13
-; CHECK-NEON-NEXT:    fmov x13, d4
-; CHECK-NEON-NEXT:    and v4.8b, v1.8b, v6.8b
-; CHECK-NEON-NEXT:    fneg d2, d2
-; CHECK-NEON-NEXT:    fmov d6, x9
-; CHECK-NEON-NEXT:    fmov x9, d4
-; CHECK-NEON-NEXT:    fmov d4, x14
-; CHECK-NEON-NEXT:    mul x13, x8, x13
-; CHECK-NEON-NEXT:    eor v3.8b, v3.8b, v6.8b
-; CHECK-NEON-NEXT:    fmov x14, d5
-; CHECK-NEON-NEXT:    fmov d5, x11
-; CHECK-NEON-NEXT:    and v4.8b, v1.8b, v4.8b
-; CHECK-NEON-NEXT:    and v1.8b, v1.8b, v2.8b
-; CHECK-NEON-NEXT:    fmov d2, x12
-; CHECK-NEON-NEXT:    mul x9, x8, x9
-; CHECK-NEON-NEXT:    eor v0.8b, v0.8b, v5.8b
 ; CHECK-NEON-NEXT:    mul x11, x8, x14
-; CHECK-NEON-NEXT:    eor v2.8b, v3.8b, v2.8b
-; CHECK-NEON-NEXT:    fmov x12, d4
+; CHECK-NEON-NEXT:    eor v0.8b, v0.8b, v4.8b
+; CHECK-NEON-NEXT:    fmov x12, d3
 ; CHECK-NEON-NEXT:    fmov d3, x10
 ; CHECK-NEON-NEXT:    fmov x10, d1
+; CHECK-NEON-NEXT:    eor v2.8b, v2.8b, v5.8b
 ; CHECK-NEON-NEXT:    fmov d1, x13
 ; CHECK-NEON-NEXT:    mul x12, x8, x12
 ; CHECK-NEON-NEXT:    eor v0.8b, v0.8b, v3.8b
 ; CHECK-NEON-NEXT:    eor v1.8b, v2.8b, v1.8b
 ; CHECK-NEON-NEXT:    fmov d2, x9
-; CHECK-NEON-NEXT:    mul x8, x8, x10
 ; CHECK-NEON-NEXT:    fmov d3, x11
+; CHECK-NEON-NEXT:    mul x8, x8, x10
 ; CHECK-NEON-NEXT:    eor v0.8b, v0.8b, v2.8b
 ; CHECK-NEON-NEXT:    eor v1.8b, v1.8b, v3.8b
 ; CHECK-NEON-NEXT:    fmov d2, x12
@@ -6452,7 +6451,7 @@ define <2 x i64> @clmulr_v2i64_neon(<2 x i64> %a, <2 x i64> %b) nounwind {
 ; CHECK-NEON-NEXT:    eor v2.16b, v3.16b, v2.16b
 ; CHECK-NEON-NEXT:    mov v17.d[1], x0
 ; CHECK-NEON-NEXT:    eor v5.16b, v6.16b, v21.16b
-; CHECK-NEON-NEXT:    movi v6.2d, #0000000000000000
+; CHECK-NEON-NEXT:    movi d6, #0000000000000000
 ; CHECK-NEON-NEXT:    mul x0, x9, x2
 ; CHECK-NEON-NEXT:    fmov x2, d16
 ; CHECK-NEON-NEXT:    fmov v16.2d, #2.00000000
@@ -6565,7 +6564,9 @@ define <1 x i64> @clmulr_v1i64_neon(<1 x i64> %a, <1 x i64> %b) nounwind {
 ; CHECK-NEON-NEXT:    mov w12, #32 // =0x20
 ; CHECK-NEON-NEXT:    mov w13, #64 // =0x40
 ; CHECK-NEON-NEXT:    mov w14, #128 // =0x80
+; CHECK-NEON-NEXT:    mov w16, #512 // =0x200
 ; CHECK-NEON-NEXT:    mov w15, #256 // =0x100
+; CHECK-NEON-NEXT:    movi d18, #0000000000000000
 ; CHECK-NEON-NEXT:    rbit v0.8b, v1.8b
 ; CHECK-NEON-NEXT:    fmov d1, x8
 ; CHECK-NEON-NEXT:    mov w8, #1 // =0x1
@@ -6582,11 +6583,8 @@ define <1 x i64> @clmulr_v1i64_neon(<1 x i64> %a, <1 x i64> %b) nounwind {
 ; CHECK-NEON-NEXT:    fmov x10, d3
 ; CHECK-NEON-NEXT:    fmov d3, x11
 ; CHECK-NEON-NEXT:    and v2.8b, v0.8b, v2.8b
-; CHECK-NEON-NEXT:    fmov d4, x14
-; CHECK-NEON-NEXT:    mov w14, #512 // =0x200
 ; CHECK-NEON-NEXT:    fmov x11, d1
 ; CHECK-NEON-NEXT:    mul x9, x8, x9
-; CHECK-NEON-NEXT:    fmov d5, x14
 ; CHECK-NEON-NEXT:    and v1.8b, v0.8b, v3.8b
 ; CHECK-NEON-NEXT:    fmov d3, x12
 ; CHECK-NEON-NEXT:    fmov x12, d2
@@ -6595,412 +6593,412 @@ define <1 x i64> @clmulr_v1i64_neon(<1 x i64> %a, <1 x i64> %b) nounwind {
 ; CHECK-NEON-NEXT:    fmov d3, x13
 ; CHECK-NEON-NEXT:    fmov x13, d1
 ; CHECK-NEON-NEXT:    mul x11, x8, x11
+; CHECK-NEON-NEXT:    fmov d4, x9
 ; CHECK-NEON-NEXT:    and v1.8b, v0.8b, v3.8b
+; CHECK-NEON-NEXT:    fmov d3, x14
 ; CHECK-NEON-NEXT:    mul x12, x8, x12
-; CHECK-NEON-NEXT:    fmov d3, x15
 ; CHECK-NEON-NEXT:    fmov x14, d2
-; CHECK-NEON-NEXT:    and v2.8b, v0.8b, v4.8b
-; CHECK-NEON-NEXT:    fmov d4, x9
-; CHECK-NEON-NEXT:    mov w15, #1024 // =0x400
 ; CHECK-NEON-NEXT:    mul x13, x8, x13
-; CHECK-NEON-NEXT:    fmov x9, d1
-; CHECK-NEON-NEXT:    and v1.8b, v0.8b, v5.8b
-; CHECK-NEON-NEXT:    fmov d5, x10
-; CHECK-NEON-NEXT:    and v3.8b, v0.8b, v3.8b
-; CHECK-NEON-NEXT:    fmov x10, d2
-; CHECK-NEON-NEXT:    fmov d2, x11
-; CHECK-NEON-NEXT:    fmov d6, x12
+; CHECK-NEON-NEXT:    and v2.8b, v0.8b, v3.8b
+; CHECK-NEON-NEXT:    fmov d3, x16
+; CHECK-NEON-NEXT:    fmov x17, d1
+; CHECK-NEON-NEXT:    fmov d1, x15
+; CHECK-NEON-NEXT:    fmov d5, x11
 ; CHECK-NEON-NEXT:    mul x14, x8, x14
-; CHECK-NEON-NEXT:    mov w11, #2048 // =0x800
-; CHECK-NEON-NEXT:    eor v4.8b, v5.8b, v4.8b
-; CHECK-NEON-NEXT:    fmov d5, x15
-; CHECK-NEON-NEXT:    fmov x12, d3
-; CHECK-NEON-NEXT:    mul x10, x8, x10
-; CHECK-NEON-NEXT:    fmov d3, x11
-; CHECK-NEON-NEXT:    eor v2.8b, v2.8b, v6.8b
-; CHECK-NEON-NEXT:    and v5.8b, v0.8b, v5.8b
+; CHECK-NEON-NEXT:    fmov d6, x12
+; CHECK-NEON-NEXT:    mov w12, #4096 // =0x1000
+; CHECK-NEON-NEXT:    fmov x9, d2
+; CHECK-NEON-NEXT:    and v2.8b, v0.8b, v3.8b
+; CHECK-NEON-NEXT:    fmov d3, x10
+; CHECK-NEON-NEXT:    and v1.8b, v0.8b, v1.8b
+; CHECK-NEON-NEXT:    mov w10, #2048 // =0x800
+; CHECK-NEON-NEXT:    mul x15, x8, x17
+; CHECK-NEON-NEXT:    eor v3.8b, v3.8b, v4.8b
+; CHECK-NEON-NEXT:    eor v4.8b, v5.8b, v6.8b
+; CHECK-NEON-NEXT:    fmov d5, x10
+; CHECK-NEON-NEXT:    fmov x10, d1
+; CHECK-NEON-NEXT:    fmov x11, d2
+; CHECK-NEON-NEXT:    fmov d2, x13
 ; CHECK-NEON-NEXT:    mul x9, x8, x9
-; CHECK-NEON-NEXT:    and v3.8b, v0.8b, v3.8b
-; CHECK-NEON-NEXT:    mul x11, x8, x12
+; CHECK-NEON-NEXT:    mov w13, #8192 // =0x2000
+; CHECK-NEON-NEXT:    eor v3.8b, v3.8b, v4.8b
+; CHECK-NEON-NEXT:    fmov d4, x12
+; CHECK-NEON-NEXT:    and v1.8b, v0.8b, v5.8b
+; CHECK-NEON-NEXT:    fmov d5, x14
+; CHECK-NEON-NEXT:    mul x10, x8, x10
+; CHECK-NEON-NEXT:    mov w12, #1024 // =0x400
+; CHECK-NEON-NEXT:    and v4.8b, v0.8b, v4.8b
+; CHECK-NEON-NEXT:    mul x11, x8, x11
+; CHECK-NEON-NEXT:    eor v2.8b, v2.8b, v5.8b
+; CHECK-NEON-NEXT:    fmov d5, x12
 ; CHECK-NEON-NEXT:    fmov x12, d1
-; CHECK-NEON-NEXT:    fmov d1, x13
-; CHECK-NEON-NEXT:    mov w13, #4096 // =0x1000
-; CHECK-NEON-NEXT:    eor v2.8b, v4.8b, v2.8b
-; CHECK-NEON-NEXT:    fmov d4, x14
-; CHECK-NEON-NEXT:    fmov x14, d5
-; CHECK-NEON-NEXT:    fmov d5, x13
+; CHECK-NEON-NEXT:    fmov d6, x9
+; CHECK-NEON-NEXT:    fmov x14, d4
+; CHECK-NEON-NEXT:    fmov d4, x13
+; CHECK-NEON-NEXT:    fmov d7, x10
+; CHECK-NEON-NEXT:    and v1.8b, v0.8b, v5.8b
+; CHECK-NEON-NEXT:    mov w13, #16384 // =0x4000
 ; CHECK-NEON-NEXT:    mul x12, x8, x12
-; CHECK-NEON-NEXT:    eor v1.8b, v1.8b, v4.8b
-; CHECK-NEON-NEXT:    and v4.8b, v0.8b, v5.8b
-; CHECK-NEON-NEXT:    fmov d5, x10
+; CHECK-NEON-NEXT:    fmov d5, x15
+; CHECK-NEON-NEXT:    and v4.8b, v0.8b, v4.8b
+; CHECK-NEON-NEXT:    mul x9, x8, x14
+; CHECK-NEON-NEXT:    mov w14, #32768 // =0x8000
+; CHECK-NEON-NEXT:    fmov x10, d1
+; CHECK-NEON-NEXT:    eor v1.8b, v6.8b, v7.8b
+; CHECK-NEON-NEXT:    fmov d6, x13
+; CHECK-NEON-NEXT:    fmov d7, x14
+; CHECK-NEON-NEXT:    eor v2.8b, v2.8b, v5.8b
+; CHECK-NEON-NEXT:    fmov x13, d4
+; CHECK-NEON-NEXT:    mul x10, x8, x10
+; CHECK-NEON-NEXT:    and v4.8b, v0.8b, v6.8b
 ; CHECK-NEON-NEXT:    fmov d6, x11
-; CHECK-NEON-NEXT:    mov w11, #8192 // =0x2000
-; CHECK-NEON-NEXT:    fmov x10, d3
-; CHECK-NEON-NEXT:    mul x13, x8, x14
-; CHECK-NEON-NEXT:    eor v3.8b, v5.8b, v6.8b
-; CHECK-NEON-NEXT:    fmov d5, x9
+; CHECK-NEON-NEXT:    and v5.8b, v0.8b, v7.8b
+; CHECK-NEON-NEXT:    fmov d7, x9
+; CHECK-NEON-NEXT:    eor v2.8b, v3.8b, v2.8b
+; CHECK-NEON-NEXT:    mul x11, x8, x13
 ; CHECK-NEON-NEXT:    fmov x9, d4
-; CHECK-NEON-NEXT:    fmov d4, x11
-; CHECK-NEON-NEXT:    mov w11, #16384 // =0x4000
+; CHECK-NEON-NEXT:    eor v1.8b, v1.8b, v6.8b
 ; CHECK-NEON-NEXT:    fmov d6, x12
+; CHECK-NEON-NEXT:    fmov d4, x10
+; CHECK-NEON-NEXT:    fmov x10, d5
+; CHECK-NEON-NEXT:    mul x9, x8, x9
+; CHECK-NEON-NEXT:    eor v6.8b, v6.8b, v7.8b
+; CHECK-NEON-NEXT:    fmov d3, x11
+; CHECK-NEON-NEXT:    mov w11, #65536 // =0x10000
+; CHECK-NEON-NEXT:    eor v1.8b, v1.8b, v4.8b
+; CHECK-NEON-NEXT:    fmov d4, x11
 ; CHECK-NEON-NEXT:    mul x10, x8, x10
-; CHECK-NEON-NEXT:    eor v1.8b, v1.8b, v5.8b
+; CHECK-NEON-NEXT:    mov w11, #131072 // =0x20000
+; CHECK-NEON-NEXT:    eor v3.8b, v6.8b, v3.8b
 ; CHECK-NEON-NEXT:    fmov d5, x11
-; CHECK-NEON-NEXT:    mul x9, x8, x9
 ; CHECK-NEON-NEXT:    and v4.8b, v0.8b, v4.8b
-; CHECK-NEON-NEXT:    eor v3.8b, v3.8b, v6.8b
-; CHECK-NEON-NEXT:    fmov d6, x13
 ; CHECK-NEON-NEXT:    eor v1.8b, v2.8b, v1.8b
-; CHECK-NEON-NEXT:    fmov x11, d4
-; CHECK-NEON-NEXT:    and v4.8b, v0.8b, v5.8b
-; CHECK-NEON-NEXT:    eor v2.8b, v3.8b, v6.8b
-; CHECK-NEON-NEXT:    fmov d3, x10
-; CHECK-NEON-NEXT:    fmov d5, x9
-; CHECK-NEON-NEXT:    mov w9, #32768 // =0x8000
+; CHECK-NEON-NEXT:    fmov d2, x9
+; CHECK-NEON-NEXT:    mov w9, #262144 // =0x40000
+; CHECK-NEON-NEXT:    eor v2.8b, v3.8b, v2.8b
+; CHECK-NEON-NEXT:    and v3.8b, v0.8b, v5.8b
+; CHECK-NEON-NEXT:    fmov d5, x10
 ; CHECK-NEON-NEXT:    fmov x10, d4
 ; CHECK-NEON-NEXT:    fmov d4, x9
-; CHECK-NEON-NEXT:    mul x11, x8, x11
-; CHECK-NEON-NEXT:    eor v1.8b, v1.8b, v2.8b
-; CHECK-NEON-NEXT:    eor v3.8b, v3.8b, v5.8b
-; CHECK-NEON-NEXT:    mul x9, x8, x10
+; CHECK-NEON-NEXT:    eor v2.8b, v2.8b, v5.8b
+; CHECK-NEON-NEXT:    fmov x11, d3
 ; CHECK-NEON-NEXT:    and v4.8b, v0.8b, v4.8b
-; CHECK-NEON-NEXT:    mov w10, #65536 // =0x10000
+; CHECK-NEON-NEXT:    mul x9, x8, x10
+; CHECK-NEON-NEXT:    mov w10, #524288 // =0x80000
+; CHECK-NEON-NEXT:    fmov d3, x10
+; CHECK-NEON-NEXT:    mov w10, #1048576 // =0x100000
+; CHECK-NEON-NEXT:    eor v1.8b, v1.8b, v2.8b
+; CHECK-NEON-NEXT:    mul x11, x8, x11
+; CHECK-NEON-NEXT:    fmov x12, d4
+; CHECK-NEON-NEXT:    fmov d4, x10
+; CHECK-NEON-NEXT:    mov w10, #2097152 // =0x200000
 ; CHECK-NEON-NEXT:    fmov d2, x10
-; CHECK-NEON-NEXT:    mov w10, #131072 // =0x20000
+; CHECK-NEON-NEXT:    and v3.8b, v0.8b, v3.8b
+; CHECK-NEON-NEXT:    mov w10, #4194304 // =0x400000
+; CHECK-NEON-NEXT:    and v4.8b, v0.8b, v4.8b
+; CHECK-NEON-NEXT:    mul x12, x8, x12
+; CHECK-NEON-NEXT:    and v2.8b, v0.8b, v2.8b
+; CHECK-NEON-NEXT:    fmov x13, d3
+; CHECK-NEON-NEXT:    fmov d3, x9
 ; CHECK-NEON-NEXT:    fmov d5, x11
-; CHECK-NEON-NEXT:    fmov x11, d4
+; CHECK-NEON-NEXT:    fmov x9, d4
 ; CHECK-NEON-NEXT:    fmov d4, x10
-; CHECK-NEON-NEXT:    and v2.8b, v0.8b, v2.8b
+; CHECK-NEON-NEXT:    fmov x11, d2
+; CHECK-NEON-NEXT:    mul x13, x8, x13
 ; CHECK-NEON-NEXT:    eor v3.8b, v3.8b, v5.8b
-; CHECK-NEON-NEXT:    fmov d5, x9
-; CHECK-NEON-NEXT:    mul x10, x8, x11
-; CHECK-NEON-NEXT:    mov w11, #262144 // =0x40000
+; CHECK-NEON-NEXT:    fmov d5, x12
+; CHECK-NEON-NEXT:    mul x10, x8, x9
 ; CHECK-NEON-NEXT:    and v4.8b, v0.8b, v4.8b
-; CHECK-NEON-NEXT:    fmov x9, d2
-; CHECK-NEON-NEXT:    fmov d2, x11
-; CHECK-NEON-NEXT:    mov w11, #524288 // =0x80000
+; CHECK-NEON-NEXT:    mov w9, #8388608 // =0x800000
+; CHECK-NEON-NEXT:    fmov d2, x9
+; CHECK-NEON-NEXT:    mul x9, x8, x11
+; CHECK-NEON-NEXT:    mov w11, #16777216 // =0x1000000
 ; CHECK-NEON-NEXT:    eor v3.8b, v3.8b, v5.8b
-; CHECK-NEON-NEXT:    fmov x12, d4
+; CHECK-NEON-NEXT:    fmov x14, d4
 ; CHECK-NEON-NEXT:    fmov d4, x11
-; CHECK-NEON-NEXT:    mov w11, #1048576 // =0x100000
+; CHECK-NEON-NEXT:    fmov d5, x13
+; CHECK-NEON-NEXT:    and v2.8b, v0.8b, v2.8b
+; CHECK-NEON-NEXT:    mul x11, x8, x14
+; CHECK-NEON-NEXT:    and v4.8b, v0.8b, v4.8b
+; CHECK-NEON-NEXT:    mov w14, #33554432 // =0x2000000
+; CHECK-NEON-NEXT:    fmov x12, d2
+; CHECK-NEON-NEXT:    fmov d2, x14
+; CHECK-NEON-NEXT:    mov w14, #67108864 // =0x4000000
+; CHECK-NEON-NEXT:    eor v3.8b, v3.8b, v5.8b
+; CHECK-NEON-NEXT:    fmov x15, d4
+; CHECK-NEON-NEXT:    fmov d4, x14
 ; CHECK-NEON-NEXT:    and v2.8b, v0.8b, v2.8b
-; CHECK-NEON-NEXT:    mul x9, x8, x9
-; CHECK-NEON-NEXT:    fmov d5, x10
 ; CHECK-NEON-NEXT:    mul x12, x8, x12
+; CHECK-NEON-NEXT:    fmov d6, x11
+; CHECK-NEON-NEXT:    mul x13, x8, x15
+; CHECK-NEON-NEXT:    mov w15, #134217728 // =0x8000000
 ; CHECK-NEON-NEXT:    and v4.8b, v0.8b, v4.8b
-; CHECK-NEON-NEXT:    fmov x10, d2
-; CHECK-NEON-NEXT:    fmov d2, x11
-; CHECK-NEON-NEXT:    fmov x11, d4
-; CHECK-NEON-NEXT:    fmov d4, x9
-; CHECK-NEON-NEXT:    mul x13, x8, x10
-; CHECK-NEON-NEXT:    mov w10, #2097152 // =0x200000
-; CHECK-NEON-NEXT:    and v6.8b, v0.8b, v2.8b
-; CHECK-NEON-NEXT:    eor v2.8b, v3.8b, v5.8b
-; CHECK-NEON-NEXT:    fmov d3, x10
-; CHECK-NEON-NEXT:    mov w10, #4194304 // =0x400000
-; CHECK-NEON-NEXT:    fmov d5, x10
-; CHECK-NEON-NEXT:    mul x11, x8, x11
-; CHECK-NEON-NEXT:    fmov x9, d6
-; CHECK-NEON-NEXT:    fmov d6, x12
-; CHECK-NEON-NEXT:    and v3.8b, v0.8b, v3.8b
-; CHECK-NEON-NEXT:    eor v1.8b, v1.8b, v2.8b
+; CHECK-NEON-NEXT:    fmov x14, d2
+; CHECK-NEON-NEXT:    fmov d7, x15
+; CHECK-NEON-NEXT:    mov w15, #536870912 // =0x20000000
+; CHECK-NEON-NEXT:    fmov d5, x15
+; CHECK-NEON-NEXT:    mov w15, #1073741824 // =0x40000000
+; CHECK-NEON-NEXT:    movi v2.2s, #128, lsl #24
+; CHECK-NEON-NEXT:    mul x11, x8, x14
+; CHECK-NEON-NEXT:    fmov x14, d4
+; CHECK-NEON-NEXT:    and v4.8b, v0.8b, v7.8b
+; CHECK-NEON-NEXT:    fmov d7, x12
 ; CHECK-NEON-NEXT:    and v5.8b, v0.8b, v5.8b
-; CHECK-NEON-NEXT:    mul x10, x8, x9
-; CHECK-NEON-NEXT:    mov w9, #8388608 // =0x800000
-; CHECK-NEON-NEXT:    eor v4.8b, v4.8b, v6.8b
-; CHECK-NEON-NEXT:    fmov x12, d3
-; CHECK-NEON-NEXT:    fmov d3, x9
-; CHECK-NEON-NEXT:    fmov d6, x13
-; CHECK-NEON-NEXT:    fmov x14, d5
-; CHECK-NEON-NEXT:    mul x9, x8, x12
-; CHECK-NEON-NEXT:    mov w12, #16777216 // =0x1000000
-; CHECK-NEON-NEXT:    and v3.8b, v0.8b, v3.8b
-; CHECK-NEON-NEXT:    fmov d5, x12
-; CHECK-NEON-NEXT:    mov w12, #33554432 // =0x2000000
+; CHECK-NEON-NEXT:    fmov d16, x13
+; CHECK-NEON-NEXT:    fneg d2, d2
+; CHECK-NEON-NEXT:    mov w13, #268435456 // =0x10000000
+; CHECK-NEON-NEXT:    fmov x12, d4
+; CHECK-NEON-NEXT:    fmov d4, x15
+; CHECK-NEON-NEXT:    mov x15, #4294967296 // =0x100000000
+; CHECK-NEON-NEXT:    eor v6.8b, v6.8b, v7.8b
+; CHECK-NEON-NEXT:    fmov d7, x10
+; CHECK-NEON-NEXT:    fmov x10, d5
 ; CHECK-NEON-NEXT:    mul x14, x8, x14
-; CHECK-NEON-NEXT:    fmov x13, d3
-; CHECK-NEON-NEXT:    fmov d3, x12
-; CHECK-NEON-NEXT:    mov w12, #67108864 // =0x4000000
-; CHECK-NEON-NEXT:    and v5.8b, v0.8b, v5.8b
-; CHECK-NEON-NEXT:    and v7.8b, v0.8b, v3.8b
-; CHECK-NEON-NEXT:    eor v3.8b, v4.8b, v6.8b
-; CHECK-NEON-NEXT:    fmov d4, x12
-; CHECK-NEON-NEXT:    fmov x15, d5
-; CHECK-NEON-NEXT:    mul x13, x8, x13
-; CHECK-NEON-NEXT:    fmov d5, x11
 ; CHECK-NEON-NEXT:    and v4.8b, v0.8b, v4.8b
-; CHECK-NEON-NEXT:    fmov x11, d7
-; CHECK-NEON-NEXT:    fmov d7, x14
-; CHECK-NEON-NEXT:    mul x12, x8, x15
-; CHECK-NEON-NEXT:    mov w15, #134217728 // =0x8000000
-; CHECK-NEON-NEXT:    eor v3.8b, v3.8b, v5.8b
+; CHECK-NEON-NEXT:    and v2.8b, v0.8b, v2.8b
+; CHECK-NEON-NEXT:    eor v5.8b, v6.8b, v16.8b
+; CHECK-NEON-NEXT:    fmov d6, x13
+; CHECK-NEON-NEXT:    mul x10, x8, x10
+; CHECK-NEON-NEXT:    eor v3.8b, v3.8b, v7.8b
+; CHECK-NEON-NEXT:    fmov x13, d4
+; CHECK-NEON-NEXT:    mul x12, x8, x12
+; CHECK-NEON-NEXT:    and v4.8b, v0.8b, v6.8b
 ; CHECK-NEON-NEXT:    fmov d6, x15
-; CHECK-NEON-NEXT:    mov w15, #536870912 // =0x20000000
-; CHECK-NEON-NEXT:    fmov x14, d4
-; CHECK-NEON-NEXT:    fmov d16, x13
-; CHECK-NEON-NEXT:    fmov d17, x15
-; CHECK-NEON-NEXT:    movi v4.2s, #128, lsl #24
-; CHECK-NEON-NEXT:    mov w15, #1073741824 // =0x40000000
-; CHECK-NEON-NEXT:    mul x11, x8, x11
+; CHECK-NEON-NEXT:    fmov x15, d2
+; CHECK-NEON-NEXT:    fmov d2, x11
+; CHECK-NEON-NEXT:    mul x13, x8, x13
 ; CHECK-NEON-NEXT:    and v6.8b, v0.8b, v6.8b
-; CHECK-NEON-NEXT:    mul x13, x8, x14
-; CHECK-NEON-NEXT:    eor v7.8b, v7.8b, v16.8b
-; CHECK-NEON-NEXT:    fmov d16, x15
-; CHECK-NEON-NEXT:    mov w15, #268435456 // =0x10000000
-; CHECK-NEON-NEXT:    fmov x14, d6
-; CHECK-NEON-NEXT:    and v6.8b, v0.8b, v17.8b
-; CHECK-NEON-NEXT:    fneg d4, d4
-; CHECK-NEON-NEXT:    and v16.8b, v0.8b, v16.8b
-; CHECK-NEON-NEXT:    fmov d5, x15
-; CHECK-NEON-NEXT:    fmov x15, d6
-; CHECK-NEON-NEXT:    fmov d6, x12
-; CHECK-NEON-NEXT:    mul x14, x8, x14
-; CHECK-NEON-NEXT:    and v5.8b, v0.8b, v5.8b
-; CHECK-NEON-NEXT:    and v4.8b, v0.8b, v4.8b
-; CHECK-NEON-NEXT:    mul x12, x8, x15
-; CHECK-NEON-NEXT:    eor v2.8b, v7.8b, v6.8b
-; CHECK-NEON-NEXT:    fmov x15, d16
+; CHECK-NEON-NEXT:    eor v2.8b, v5.8b, v2.8b
+; CHECK-NEON-NEXT:    fmov d5, x14
+; CHECK-NEON-NEXT:    fmov x14, d4
+; CHECK-NEON-NEXT:    fmov d4, x10
+; CHECK-NEON-NEXT:    mov x10, #8589934592 // =0x200000000
+; CHECK-NEON-NEXT:    mul x11, x8, x15
+; CHECK-NEON-NEXT:    fmov d7, x13
+; CHECK-NEON-NEXT:    fmov x13, d6
 ; CHECK-NEON-NEXT:    fmov d6, x10
-; CHECK-NEON-NEXT:    mul x10, x8, x15
-; CHECK-NEON-NEXT:    mov x15, #4294967296 // =0x100000000
-; CHECK-NEON-NEXT:    eor v3.8b, v3.8b, v6.8b
-; CHECK-NEON-NEXT:    fmov d6, x11
-; CHECK-NEON-NEXT:    fmov x11, d5
-; CHECK-NEON-NEXT:    fmov d5, x9
-; CHECK-NEON-NEXT:    fmov x9, d4
-; CHECK-NEON-NEXT:    fmov d4, x15
-; CHECK-NEON-NEXT:    fmov d7, x12
-; CHECK-NEON-NEXT:    mov x15, #281474976710656 // =0x1000000000000
-; CHECK-NEON-NEXT:    eor v2.8b, v2.8b, v6.8b
-; CHECK-NEON-NEXT:    fmov d6, x13
-; CHECK-NEON-NEXT:    mov x13, #8589934592 // =0x200000000
-; CHECK-NEON-NEXT:    mul x9, x8, x9
-; CHECK-NEON-NEXT:    and v4.8b, v0.8b, v4.8b
-; CHECK-NEON-NEXT:    fmov d17, x13
-; CHECK-NEON-NEXT:    fmov d16, x10
-; CHECK-NEON-NEXT:    eor v3.8b, v3.8b, v5.8b
-; CHECK-NEON-NEXT:    mov x13, #549755813888 // =0x8000000000
-; CHECK-NEON-NEXT:    mul x11, x8, x11
-; CHECK-NEON-NEXT:    eor v2.8b, v2.8b, v6.8b
-; CHECK-NEON-NEXT:    fmov d6, x14
-; CHECK-NEON-NEXT:    fmov x10, d4
-; CHECK-NEON-NEXT:    and v4.8b, v0.8b, v17.8b
-; CHECK-NEON-NEXT:    mov x14, #17592186044416 // =0x100000000000
-; CHECK-NEON-NEXT:    eor v7.8b, v7.8b, v16.8b
-; CHECK-NEON-NEXT:    eor v1.8b, v1.8b, v3.8b
-; CHECK-NEON-NEXT:    eor v2.8b, v2.8b, v6.8b
-; CHECK-NEON-NEXT:    fmov d6, x9
+; CHECK-NEON-NEXT:    mul x14, x8, x14
+; CHECK-NEON-NEXT:    eor v2.8b, v2.8b, v5.8b
+; CHECK-NEON-NEXT:    fmov d5, x12
+; CHECK-NEON-NEXT:    mov x12, #1099511627776 // =0x10000000000
+; CHECK-NEON-NEXT:    eor v4.8b, v4.8b, v7.8b
+; CHECK-NEON-NEXT:    fmov d7, x9
 ; CHECK-NEON-NEXT:    mov x9, #17179869184 // =0x400000000
-; CHECK-NEON-NEXT:    mul x10, x8, x10
-; CHECK-NEON-NEXT:    fmov d5, x11
-; CHECK-NEON-NEXT:    fmov x11, d4
-; CHECK-NEON-NEXT:    fmov d4, x9
-; CHECK-NEON-NEXT:    eor v6.8b, v7.8b, v6.8b
-; CHECK-NEON-NEXT:    mul x9, x8, x11
-; CHECK-NEON-NEXT:    and v4.8b, v0.8b, v4.8b
-; CHECK-NEON-NEXT:    mov x11, #34359738368 // =0x800000000
-; CHECK-NEON-NEXT:    fmov d3, x11
+; CHECK-NEON-NEXT:    mul x10, x8, x13
+; CHECK-NEON-NEXT:    and v6.8b, v0.8b, v6.8b
+; CHECK-NEON-NEXT:    fmov d17, x9
+; CHECK-NEON-NEXT:    fmov d16, x11
 ; CHECK-NEON-NEXT:    eor v2.8b, v2.8b, v5.8b
-; CHECK-NEON-NEXT:    fmov d5, x10
-; CHECK-NEON-NEXT:    mov x10, #137438953472 // =0x2000000000
-; CHECK-NEON-NEXT:    fmov x11, d4
-; CHECK-NEON-NEXT:    fmov d4, x10
-; CHECK-NEON-NEXT:    and v3.8b, v0.8b, v3.8b
-; CHECK-NEON-NEXT:    eor v5.8b, v6.8b, v5.8b
-; CHECK-NEON-NEXT:    eor v1.8b, v1.8b, v2.8b
-; CHECK-NEON-NEXT:    fmov d6, x9
+; CHECK-NEON-NEXT:    eor v3.8b, v3.8b, v7.8b
+; CHECK-NEON-NEXT:    fmov d5, x14
+; CHECK-NEON-NEXT:    mov x14, #562949953421312 // =0x2000000000000
+; CHECK-NEON-NEXT:    fmov x9, d6
+; CHECK-NEON-NEXT:    and v6.8b, v0.8b, v17.8b
+; CHECK-NEON-NEXT:    eor v4.8b, v4.8b, v16.8b
+; CHECK-NEON-NEXT:    fmov d7, x10
+; CHECK-NEON-NEXT:    mov x10, #34359738368 // =0x800000000
+; CHECK-NEON-NEXT:    eor v1.8b, v1.8b, v3.8b
+; CHECK-NEON-NEXT:    mul x9, x8, x9
+; CHECK-NEON-NEXT:    fmov x11, d6
+; CHECK-NEON-NEXT:    fmov d6, x10
+; CHECK-NEON-NEXT:    eor v2.8b, v2.8b, v5.8b
+; CHECK-NEON-NEXT:    eor v4.8b, v4.8b, v7.8b
 ; CHECK-NEON-NEXT:    mul x10, x8, x11
-; CHECK-NEON-NEXT:    mov x11, #274877906944 // =0x4000000000
-; CHECK-NEON-NEXT:    and v4.8b, v0.8b, v4.8b
-; CHECK-NEON-NEXT:    fmov x9, d3
+; CHECK-NEON-NEXT:    and v6.8b, v0.8b, v6.8b
+; CHECK-NEON-NEXT:    mov x11, #137438953472 // =0x2000000000
 ; CHECK-NEON-NEXT:    fmov d3, x11
-; CHECK-NEON-NEXT:    mov x11, #68719476736 // =0x1000000000
-; CHECK-NEON-NEXT:    eor v5.8b, v5.8b, v6.8b
-; CHECK-NEON-NEXT:    fmov d6, x13
-; CHECK-NEON-NEXT:    mov x13, #1099511627776 // =0x10000000000
-; CHECK-NEON-NEXT:    fmov x12, d4
-; CHECK-NEON-NEXT:    fmov d4, x11
+; CHECK-NEON-NEXT:    eor v1.8b, v1.8b, v2.8b
+; CHECK-NEON-NEXT:    fmov d5, x9
+; CHECK-NEON-NEXT:    mov x9, #274877906944 // =0x4000000000
+; CHECK-NEON-NEXT:    fmov x11, d6
 ; CHECK-NEON-NEXT:    and v3.8b, v0.8b, v3.8b
-; CHECK-NEON-NEXT:    mul x9, x8, x9
-; CHECK-NEON-NEXT:    fmov d7, x10
-; CHECK-NEON-NEXT:    mul x11, x8, x12
-; CHECK-NEON-NEXT:    and v4.8b, v0.8b, v4.8b
-; CHECK-NEON-NEXT:    fmov x12, d3
-; CHECK-NEON-NEXT:    and v3.8b, v0.8b, v6.8b
-; CHECK-NEON-NEXT:    fmov d6, x13
-; CHECK-NEON-NEXT:    eor v2.8b, v5.8b, v7.8b
-; CHECK-NEON-NEXT:    fmov d7, x14
-; CHECK-NEON-NEXT:    mov x14, #35184372088832 // =0x200000000000
-; CHECK-NEON-NEXT:    mul x12, x8, x12
-; CHECK-NEON-NEXT:    and v6.8b, v0.8b, v6.8b
+; CHECK-NEON-NEXT:    eor v4.8b, v4.8b, v5.8b
+; CHECK-NEON-NEXT:    fmov d5, x9
+; CHECK-NEON-NEXT:    fmov d6, x10
+; CHECK-NEON-NEXT:    mul x9, x8, x11
+; CHECK-NEON-NEXT:    mov x11, #549755813888 // =0x8000000000
 ; CHECK-NEON-NEXT:    fmov x10, d3
-; CHECK-NEON-NEXT:    fmov d3, x9
-; CHECK-NEON-NEXT:    fmov x9, d4
-; CHECK-NEON-NEXT:    fmov d4, x11
-; CHECK-NEON-NEXT:    mov x11, #2199023255552 // =0x20000000000
-; CHECK-NEON-NEXT:    mul x13, x8, x10
-; CHECK-NEON-NEXT:    fmov x10, d6
-; CHECK-NEON-NEXT:    fmov d6, x11
-; CHECK-NEON-NEXT:    mov x11, #4398046511104 // =0x40000000000
-; CHECK-NEON-NEXT:    eor v2.8b, v2.8b, v3.8b
-; CHECK-NEON-NEXT:    fmov d5, x12
 ; CHECK-NEON-NEXT:    fmov d3, x11
-; CHECK-NEON-NEXT:    mul x12, x8, x10
-; CHECK-NEON-NEXT:    eor v4.8b, v4.8b, v5.8b
-; CHECK-NEON-NEXT:    and v5.8b, v0.8b, v6.8b
-; CHECK-NEON-NEXT:    mul x10, x8, x9
-; CHECK-NEON-NEXT:    fmov d6, x13
+; CHECK-NEON-NEXT:    mov x11, #68719476736 // =0x1000000000
+; CHECK-NEON-NEXT:    and v5.8b, v0.8b, v5.8b
+; CHECK-NEON-NEXT:    eor v4.8b, v4.8b, v6.8b
+; CHECK-NEON-NEXT:    fmov d6, x11
 ; CHECK-NEON-NEXT:    and v3.8b, v0.8b, v3.8b
-; CHECK-NEON-NEXT:    mov x9, #8796093022208 // =0x80000000000
+; CHECK-NEON-NEXT:    mul x10, x8, x10
 ; CHECK-NEON-NEXT:    fmov x11, d5
-; CHECK-NEON-NEXT:    fmov d5, x9
-; CHECK-NEON-NEXT:    eor v4.8b, v4.8b, v6.8b
-; CHECK-NEON-NEXT:    fmov d6, x12
-; CHECK-NEON-NEXT:    fmov x12, d3
-; CHECK-NEON-NEXT:    mul x9, x8, x11
+; CHECK-NEON-NEXT:    and v5.8b, v0.8b, v6.8b
+; CHECK-NEON-NEXT:    fmov d2, x9
+; CHECK-NEON-NEXT:    fmov x9, d3
+; CHECK-NEON-NEXT:    fmov d3, x12
+; CHECK-NEON-NEXT:    mov x12, #2199023255552 // =0x20000000000
+; CHECK-NEON-NEXT:    mul x11, x8, x11
+; CHECK-NEON-NEXT:    eor v2.8b, v4.8b, v2.8b
+; CHECK-NEON-NEXT:    fmov d4, x10
+; CHECK-NEON-NEXT:    mov x10, #4398046511104 // =0x40000000000
+; CHECK-NEON-NEXT:    mul x13, x8, x9
+; CHECK-NEON-NEXT:    fmov x9, d5
+; CHECK-NEON-NEXT:    and v3.8b, v0.8b, v3.8b
+; CHECK-NEON-NEXT:    fmov d5, x12
+; CHECK-NEON-NEXT:    fmov d6, x11
+; CHECK-NEON-NEXT:    fmov x11, d3
+; CHECK-NEON-NEXT:    fmov d3, x10
 ; CHECK-NEON-NEXT:    and v5.8b, v0.8b, v5.8b
+; CHECK-NEON-NEXT:    mul x9, x8, x9
+; CHECK-NEON-NEXT:    mul x10, x8, x11
+; CHECK-NEON-NEXT:    and v3.8b, v0.8b, v3.8b
 ; CHECK-NEON-NEXT:    mov x11, #70368744177664 // =0x400000000000
-; CHECK-NEON-NEXT:    fmov d3, x11
+; CHECK-NEON-NEXT:    fmov x12, d5
+; CHECK-NEON-NEXT:    fmov d5, x11
 ; CHECK-NEON-NEXT:    eor v4.8b, v4.8b, v6.8b
-; CHECK-NEON-NEXT:    and v6.8b, v0.8b, v7.8b
+; CHECK-NEON-NEXT:    fmov d6, x13
+; CHECK-NEON-NEXT:    fmov x13, d3
 ; CHECK-NEON-NEXT:    mul x11, x8, x12
 ; CHECK-NEON-NEXT:    mov x12, #140737488355328 // =0x800000000000
-; CHECK-NEON-NEXT:    fmov x13, d5
-; CHECK-NEON-NEXT:    fmov d5, x12
-; CHECK-NEON-NEXT:    and v3.8b, v0.8b, v3.8b
-; CHECK-NEON-NEXT:    fmov d16, x9
 ; CHECK-NEON-NEXT:    and v5.8b, v0.8b, v5.8b
+; CHECK-NEON-NEXT:    fmov d3, x12
+; CHECK-NEON-NEXT:    eor v4.8b, v4.8b, v6.8b
+; CHECK-NEON-NEXT:    fmov d6, x10
 ; CHECK-NEON-NEXT:    mul x12, x8, x13
+; CHECK-NEON-NEXT:    mov x13, #281474976710656 // =0x1000000000000
+; CHECK-NEON-NEXT:    fmov x10, d5
+; CHECK-NEON-NEXT:    fmov d5, x13
+; CHECK-NEON-NEXT:    mov x13, #8796093022208 // =0x80000000000
+; CHECK-NEON-NEXT:    and v3.8b, v0.8b, v3.8b
+; CHECK-NEON-NEXT:    eor v4.8b, v4.8b, v6.8b
+; CHECK-NEON-NEXT:    fmov d6, x13
+; CHECK-NEON-NEXT:    fmov d7, x11
+; CHECK-NEON-NEXT:    and v5.8b, v0.8b, v5.8b
+; CHECK-NEON-NEXT:    mul x10, x8, x10
 ; CHECK-NEON-NEXT:    fmov x13, d3
-; CHECK-NEON-NEXT:    fmov d3, x14
-; CHECK-NEON-NEXT:    eor v4.8b, v4.8b, v16.8b
-; CHECK-NEON-NEXT:    fmov x14, d5
-; CHECK-NEON-NEXT:    fmov d5, x15
-; CHECK-NEON-NEXT:    mov x15, #562949953421312 // =0x2000000000000
+; CHECK-NEON-NEXT:    and v3.8b, v0.8b, v6.8b
+; CHECK-NEON-NEXT:    fmov d6, x9
+; CHECK-NEON-NEXT:    eor v4.8b, v4.8b, v7.8b
+; CHECK-NEON-NEXT:    fmov x9, d5
+; CHECK-NEON-NEXT:    fmov d5, x14
+; CHECK-NEON-NEXT:    mov x14, #1125899906842624 // =0x4000000000000
 ; CHECK-NEON-NEXT:    mul x13, x8, x13
-; CHECK-NEON-NEXT:    fmov d7, x15
-; CHECK-NEON-NEXT:    fmov x15, d6
-; CHECK-NEON-NEXT:    and v6.8b, v0.8b, v3.8b
+; CHECK-NEON-NEXT:    fmov x11, d3
+; CHECK-NEON-NEXT:    fmov d7, x14
 ; CHECK-NEON-NEXT:    fmov d3, x10
+; CHECK-NEON-NEXT:    eor v2.8b, v2.8b, v6.8b
 ; CHECK-NEON-NEXT:    and v5.8b, v0.8b, v5.8b
-; CHECK-NEON-NEXT:    mul x14, x8, x14
-; CHECK-NEON-NEXT:    and v7.8b, v0.8b, v7.8b
-; CHECK-NEON-NEXT:    mul x9, x8, x15
-; CHECK-NEON-NEXT:    eor v2.8b, v2.8b, v3.8b
-; CHECK-NEON-NEXT:    fmov x10, d5
-; CHECK-NEON-NEXT:    fmov d5, x11
-; CHECK-NEON-NEXT:    fmov x11, d6
-; CHECK-NEON-NEXT:    fmov d6, x13
-; CHECK-NEON-NEXT:    mov x13, #1125899906842624 // =0x4000000000000
+; CHECK-NEON-NEXT:    mul x9, x8, x9
+; CHECK-NEON-NEXT:    mul x11, x8, x11
+; CHECK-NEON-NEXT:    eor v1.8b, v1.8b, v2.8b
 ; CHECK-NEON-NEXT:    fmov d16, x13
+; CHECK-NEON-NEXT:    fmov x10, d5
 ; CHECK-NEON-NEXT:    mov x13, #2251799813685248 // =0x8000000000000
-; CHECK-NEON-NEXT:    eor v1.8b, v1.8b, v2.8b
-; CHECK-NEON-NEXT:    mul x15, x8, x10
-; CHECK-NEON-NEXT:    fmov x10, d7
-; CHECK-NEON-NEXT:    fmov d7, x14
-; CHECK-NEON-NEXT:    fmov d17, x13
-; CHECK-NEON-NEXT:    eor v4.8b, v4.8b, v5.8b
-; CHECK-NEON-NEXT:    and v16.8b, v0.8b, v16.8b
-; CHECK-NEON-NEXT:    mul x14, x8, x10
-; CHECK-NEON-NEXT:    eor v7.8b, v6.8b, v7.8b
+; CHECK-NEON-NEXT:    and v5.8b, v0.8b, v7.8b
+; CHECK-NEON-NEXT:    fmov d7, x12
+; CHECK-NEON-NEXT:    mov x12, #17592186044416 // =0x100000000000
+; CHECK-NEON-NEXT:    fmov d17, x9
+; CHECK-NEON-NEXT:    eor v3.8b, v3.8b, v16.8b
+; CHECK-NEON-NEXT:    fmov d16, x13
+; CHECK-NEON-NEXT:    mul x10, x8, x10
+; CHECK-NEON-NEXT:    fmov x9, d5
+; CHECK-NEON-NEXT:    eor v4.8b, v4.8b, v7.8b
+; CHECK-NEON-NEXT:    fmov d7, x11
+; CHECK-NEON-NEXT:    mov x11, #35184372088832 // =0x200000000000
+; CHECK-NEON-NEXT:    mov x13, #9007199254740992 // =0x20000000000000
+; CHECK-NEON-NEXT:    and v5.8b, v0.8b, v16.8b
+; CHECK-NEON-NEXT:    fmov d16, x12
+; CHECK-NEON-NEXT:    eor v3.8b, v3.8b, v17.8b
+; CHECK-NEON-NEXT:    mul x9, x8, x9
+; CHECK-NEON-NEXT:    mov x12, #4503599627370496 // =0x10000000000000
+; CHECK-NEON-NEXT:    fmov d17, x10
 ; CHECK-NEON-NEXT:    fmov d6, x12
-; CHECK-NEON-NEXT:    and v17.8b, v0.8b, v17.8b
-; CHECK-NEON-NEXT:    mul x10, x8, x11
-; CHECK-NEON-NEXT:    mov x11, #4503599627370496 // =0x10000000000000
-; CHECK-NEON-NEXT:    fmov x12, d16
-; CHECK-NEON-NEXT:    fmov d16, x11
-; CHECK-NEON-NEXT:    fmov d18, x15
-; CHECK-NEON-NEXT:    mov x15, #288230376151711744 // =0x400000000000000
-; CHECK-NEON-NEXT:    fmov x13, d17
-; CHECK-NEON-NEXT:    eor v4.8b, v4.8b, v6.8b
-; CHECK-NEON-NEXT:    mul x11, x8, x12
-; CHECK-NEON-NEXT:    mov x12, #9007199254740992 // =0x20000000000000
+; CHECK-NEON-NEXT:    mov x12, #72057594037927936 // =0x100000000000000
+; CHECK-NEON-NEXT:    fmov x10, d5
 ; CHECK-NEON-NEXT:    and v16.8b, v0.8b, v16.8b
-; CHECK-NEON-NEXT:    fmov d17, x12
-; CHECK-NEON-NEXT:    eor v7.8b, v7.8b, v18.8b
-; CHECK-NEON-NEXT:    fmov d18, x14
+; CHECK-NEON-NEXT:    fmov d5, x11
+; CHECK-NEON-NEXT:    eor v17.8b, v3.8b, v17.8b
+; CHECK-NEON-NEXT:    eor v3.8b, v4.8b, v7.8b
+; CHECK-NEON-NEXT:    fmov d7, x12
+; CHECK-NEON-NEXT:    mul x11, x8, x10
+; CHECK-NEON-NEXT:    and v4.8b, v0.8b, v5.8b
+; CHECK-NEON-NEXT:    fmov x10, d16
+; CHECK-NEON-NEXT:    fmov d5, x9
+; CHECK-NEON-NEXT:    and v6.8b, v0.8b, v6.8b
+; CHECK-NEON-NEXT:    mul x9, x8, x10
+; CHECK-NEON-NEXT:    fmov x10, d4
+; CHECK-NEON-NEXT:    eor v5.8b, v17.8b, v5.8b
+; CHECK-NEON-NEXT:    fmov x12, d6
+; CHECK-NEON-NEXT:    fmov d4, x11
+; CHECK-NEON-NEXT:    mov x11, #144115188075855872 // =0x200000000000000
+; CHECK-NEON-NEXT:    fmov d6, x11
+; CHECK-NEON-NEXT:    mul x10, x8, x10
+; CHECK-NEON-NEXT:    eor v4.8b, v5.8b, v4.8b
+; CHECK-NEON-NEXT:    and v5.8b, v0.8b, v7.8b
+; CHECK-NEON-NEXT:    fmov d7, x13
+; CHECK-NEON-NEXT:    and v6.8b, v0.8b, v6.8b
+; CHECK-NEON-NEXT:    mul x11, x8, x12
+; CHECK-NEON-NEXT:    mov x12, #18014398509481984 // =0x40000000000000
+; CHECK-NEON-NEXT:    fmov x13, d5
+; CHECK-NEON-NEXT:    fmov d5, x12
+; CHECK-NEON-NEXT:    and v7.8b, v0.8b, v7.8b
+; CHECK-NEON-NEXT:    fmov x14, d6
 ; CHECK-NEON-NEXT:    mul x12, x8, x13
-; CHECK-NEON-NEXT:    mov x13, #72057594037927936 // =0x100000000000000
-; CHECK-NEON-NEXT:    fmov x14, d16
-; CHECK-NEON-NEXT:    and v17.8b, v0.8b, v17.8b
-; CHECK-NEON-NEXT:    fmov d16, x13
-; CHECK-NEON-NEXT:    eor v7.8b, v7.8b, v18.8b
-; CHECK-NEON-NEXT:    fmov d18, x11
+; CHECK-NEON-NEXT:    mov x13, #288230376151711744 // =0x400000000000000
+; CHECK-NEON-NEXT:    and v5.8b, v0.8b, v5.8b
+; CHECK-NEON-NEXT:    fmov d6, x13
+; CHECK-NEON-NEXT:    fmov d16, x11
+; CHECK-NEON-NEXT:    fmov x11, d7
 ; CHECK-NEON-NEXT:    mul x13, x8, x14
-; CHECK-NEON-NEXT:    mov x14, #144115188075855872 // =0x200000000000000
-; CHECK-NEON-NEXT:    fmov x11, d17
-; CHECK-NEON-NEXT:    fmov d17, x14
-; CHECK-NEON-NEXT:    mov x14, #18014398509481984 // =0x40000000000000
-; CHECK-NEON-NEXT:    and v16.8b, v0.8b, v16.8b
-; CHECK-NEON-NEXT:    eor v7.8b, v7.8b, v18.8b
-; CHECK-NEON-NEXT:    fmov d18, x14
+; CHECK-NEON-NEXT:    mov x14, #576460752303423488 // =0x800000000000000
+; CHECK-NEON-NEXT:    fmov d7, x14
+; CHECK-NEON-NEXT:    mov x14, #36028797018963968 // =0x80000000000000
+; CHECK-NEON-NEXT:    fmov x15, d5
+; CHECK-NEON-NEXT:    and v6.8b, v0.8b, v6.8b
+; CHECK-NEON-NEXT:    fmov d5, x14
 ; CHECK-NEON-NEXT:    mul x11, x8, x11
-; CHECK-NEON-NEXT:    and v17.8b, v0.8b, v17.8b
-; CHECK-NEON-NEXT:    fmov x14, d16
-; CHECK-NEON-NEXT:    and v16.8b, v0.8b, v18.8b
-; CHECK-NEON-NEXT:    fmov d18, x12
-; CHECK-NEON-NEXT:    fmov x12, d17
-; CHECK-NEON-NEXT:    fmov d17, x15
-; CHECK-NEON-NEXT:    mul x14, x8, x14
-; CHECK-NEON-NEXT:    mul x15, x8, x12
-; CHECK-NEON-NEXT:    mov x12, #576460752303423488 // =0x800000000000000
-; CHECK-NEON-NEXT:    and v17.8b, v0.8b, v17.8b
-; CHECK-NEON-NEXT:    fmov d5, x12
-; CHECK-NEON-NEXT:    fmov x12, d16
-; CHECK-NEON-NEXT:    fmov d6, x14
-; CHECK-NEON-NEXT:    and v3.8b, v0.8b, v5.8b
-; CHECK-NEON-NEXT:    eor v5.8b, v7.8b, v18.8b
-; CHECK-NEON-NEXT:    fmov d7, x13
-; CHECK-NEON-NEXT:    fmov x13, d17
-; CHECK-NEON-NEXT:    fmov d16, x15
-; CHECK-NEON-NEXT:    mov x15, #1152921504606846976 // =0x1000000000000000
-; CHECK-NEON-NEXT:    mul x12, x8, x12
-; CHECK-NEON-NEXT:    fmov x14, d3
-; CHECK-NEON-NEXT:    eor v3.8b, v5.8b, v7.8b
-; CHECK-NEON-NEXT:    fmov d5, x15
-; CHECK-NEON-NEXT:    mul x13, x8, x13
-; CHECK-NEON-NEXT:    mov x15, #2305843009213693952 // =0x2000000000000000
-; CHECK-NEON-NEXT:    eor v6.8b, v6.8b, v16.8b
-; CHECK-NEON-NEXT:    fmov d7, x15
-; CHECK-NEON-NEXT:    mov x15, #36028797018963968 // =0x80000000000000
-; CHECK-NEON-NEXT:    movi d16, #0000000000000000
-; CHECK-NEON-NEXT:    mul x14, x8, x14
+; CHECK-NEON-NEXT:    eor v4.8b, v4.8b, v16.8b
+; CHECK-NEON-NEXT:    and v7.8b, v0.8b, v7.8b
+; CHECK-NEON-NEXT:    fmov x14, d6
+; CHECK-NEON-NEXT:    fmov d6, x12
+; CHECK-NEON-NEXT:    fmov d17, x13
 ; CHECK-NEON-NEXT:    and v5.8b, v0.8b, v5.8b
-; CHECK-NEON-NEXT:    fmov d17, x15
+; CHECK-NEON-NEXT:    fmov x13, d7
+; CHECK-NEON-NEXT:    mul x12, x8, x14
+; CHECK-NEON-NEXT:    mov x14, #1152921504606846976 // =0x1000000000000000
+; CHECK-NEON-NEXT:    eor v6.8b, v6.8b, v17.8b
+; CHECK-NEON-NEXT:    fmov d7, x14
+; CHECK-NEON-NEXT:    mov x14, #2305843009213693952 // =0x2000000000000000
+; CHECK-NEON-NEXT:    fmov d17, x14
+; CHECK-NEON-NEXT:    mul x13, x8, x13
 ; CHECK-NEON-NEXT:    and v7.8b, v0.8b, v7.8b
-; CHECK-NEON-NEXT:    fmov d18, x13
-; CHECK-NEON-NEXT:    fmov x13, d5
+; CHECK-NEON-NEXT:    mul x14, x8, x15
+; CHECK-NEON-NEXT:    fmov d19, x12
 ; CHECK-NEON-NEXT:    and v17.8b, v0.8b, v17.8b
-; CHECK-NEON-NEXT:    fneg d16, d16
-; CHECK-NEON-NEXT:    fmov d5, x14
-; CHECK-NEON-NEXT:    mov x14, #4611686018427387904 // =0x4000000000000000
-; CHECK-NEON-NEXT:    fmov x15, d7
-; CHECK-NEON-NEXT:    eor v6.8b, v6.8b, v18.8b
-; CHECK-NEON-NEXT:    mul x13, x8, x13
-; CHECK-NEON-NEXT:    fmov d7, x14
-; CHECK-NEON-NEXT:    fmov x14, d17
-; CHECK-NEON-NEXT:    fmov d17, x9
+; CHECK-NEON-NEXT:    fmov x12, d7
+; CHECK-NEON-NEXT:    fmov d7, #2.00000000
+; CHECK-NEON-NEXT:    fmov d16, x13
+; CHECK-NEON-NEXT:    eor v6.8b, v6.8b, v19.8b
+; CHECK-NEON-NEXT:    fmov x15, d17
+; CHECK-NEON-NEXT:    fneg d17, d18
+; CHECK-NEON-NEXT:    fmov x13, d5
+; CHECK-NEON-NEXT:    mul x12, x8, x12
+; CHECK-NEON-NEXT:    and v5.8b, v0.8b, v7.8b
+; CHECK-NEON-NEXT:    fmov d7, x9
+; CHECK-NEON-NEXT:    eor v6.8b, v6.8b, v16.8b
+; CHECK-NEON-NEXT:    fmov d16, x11
 ; CHECK-NEON-NEXT:    mul x15, x8, x15
+; CHECK-NEON-NEXT:    and v0.8b, v0.8b, v17.8b
+; CHECK-NEON-NEXT:    fmov x11, d5
+; CHECK-NEON-NEXT:    eor v3.8b, v3.8b, v7.8b
+; CHECK-NEON-NEXT:    mul x9, x8, x13
+; CHECK-NEON-NEXT:    eor v4.8b, v4.8b, v16.8b
+; CHECK-NEON-NEXT:    fmov d5, x12
 ; CHECK-NEON-NEXT:    eor v5.8b, v6.8b, v5.8b
-; CHECK-NEON-NEXT:    fmov d6, x11
-; CHECK-NEON-NEXT:    and v7.8b, v0.8b, v7.8b
-; CHECK-NEON-NEXT:    and v0.8b, v0.8b, v16.8b
-; CHECK-NEON-NEXT:    eor v4.8b, v4.8b, v17.8b
-; CHECK-NEON-NEXT:    mul x9, x8, x14
-; CHECK-NEON-NEXT:    eor v3.8b, v3.8b, v6.8b
-; CHECK-NEON-NEXT:    fmov d6, x13
-; CHECK-NEON-NEXT:    fmov x11, d7
-; CHECK-NEON-NEXT:    eor v5.8b, v5.8b, v6.8b
 ; CHECK-NEON-NEXT:    fmov d6, x10
 ; CHECK-NEON-NEXT:    mul x10, x8, x11
 ; CHECK-NEON-NEXT:    fmov x11, d0
 ; CHECK-NEON-NEXT:    fmov d0, x15
-; CHECK-NEON-NEXT:    eor v2.8b, v4.8b, v6.8b
-; CHECK-NEON-NEXT:    fmov d4, x12
+; CHECK-NEON-NEXT:    eor v2.8b, v3.8b, v6.8b
+; CHECK-NEON-NEXT:    fmov d3, x14
 ; CHECK-NEON-NEXT:    mul x8, x8, x11
 ; CHECK-NEON-NEXT:    eor v0.8b, v5.8b, v0.8b
 ; CHECK-NEON-NEXT:    fmov d5, x10
-; CHECK-NEON-NEXT:    eor v3.8b, v3.8b, v4.8b
+; CHECK-NEON-NEXT:    eor v3.8b, v4.8b, v3.8b
 ; CHECK-NEON-NEXT:    fmov d4, x9
 ; CHECK-NEON-NEXT:    eor v1.8b, v1.8b, v2.8b
 ; CHECK-NEON-NEXT:    eor v0.8b, v0.8b, v5.8b
@@ -8004,7 +8002,7 @@ define <2 x i64> @clmulh_v2i64_neon(<2 x i64> %a, <2 x i64> %b) nounwind {
 ; CHECK-NEON-NEXT:    eor v2.16b, v3.16b, v2.16b
 ; CHECK-NEON-NEXT:    mov v17.d[1], x0
 ; CHECK-NEON-NEXT:    eor v5.16b, v6.16b, v21.16b
-; CHECK-NEON-NEXT:    movi v6.2d, #0000000000000000
+; CHECK-NEON-NEXT:    movi d6, #0000000000000000
 ; CHECK-NEON-NEXT:    mul x0, x9, x2
 ; CHECK-NEON-NEXT:    fmov x2, d16
 ; CHECK-NEON-NEXT:    fmov v16.2d, #2.00000000
@@ -8119,7 +8117,9 @@ define <1 x i64> @clmulh_v1i64_neon(<1 x i64> %a, <1 x i64> %b) nounwind {
 ; CHECK-NEON-NEXT:    mov w12, #32 // =0x20
 ; CHECK-NEON-NEXT:    mov w13, #64 // =0x40
 ; CHECK-NEON-NEXT:    mov w14, #128 // =0x80
+; CHECK-NEON-NEXT:    mov w16, #512 // =0x200
 ; CHECK-NEON-NEXT:    mov w15, #256 // =0x100
+; CHECK-NEON-NEXT:    movi d18, #0000000000000000
 ; CHECK-NEON-NEXT:    rbit v0.8b, v1.8b
 ; CHECK-NEON-NEXT:    fmov d1, x8
 ; CHECK-NEON-NEXT:    mov w8, #1 // =0x1
@@ -8136,11 +8136,8 @@ define <1 x i64> @clmulh_v1i64_neon(<1 x i64> %a, <1 x i64> %b) nounwind {
 ; CHECK-NEON-NEXT:    fmov x10, d3
 ; CHECK-NEON-NEXT:    fmov d3, x11
 ; CHECK-NEON-NEXT:    and v2.8b, v0.8b, v2.8b
-; CHECK-NEON-NEXT:    fmov d4, x14
-; CHECK-NEON-NEXT:    mov w14, #512 // =0x200
 ; CHECK-NEON-NEXT:    fmov x11, d1
 ; CHECK-NEON-NEXT:    mul x9, x8, x9
-; CHECK-NEON-NEXT:    fmov d5, x14
 ; CHECK-NEON-NEXT:    and v1.8b, v0.8b, v3.8b
 ; CHECK-NEON-NEXT:    fmov d3, x12
 ; CHECK-NEON-NEXT:    fmov x12, d2
@@ -8149,412 +8146,412 @@ define <1 x i64> @clmulh_v1i64_neon(<1 x i64> %a, <1 x i64> %b) nounwind {
 ; CHECK-NEON-NEXT:    fmov d3, x13
 ; CHECK-NEON-NEXT:    fmov x13, d1
 ; CHECK-NEON-NEXT:    mul x11, x8, x11
+; CHECK-NEON-NEXT:    fmov d4, x9
 ; CHECK-NEON-NEXT:    and v1.8b, v0.8b, v3.8b
+; CHECK-NEON-NEXT:    fmov d3, x14
 ; CHECK-NEON-NEXT:    mul x12, x8, x12
-; CHECK-NEON-NEXT:    fmov d3, x15
 ; CHECK-NEON-NEXT:    fmov x14, d2
-; CHECK-NEON-NEXT:    and v2.8b, v0.8b, v4.8b
-; CHECK-NEON-NEXT:    fmov d4, x9
-; CHECK-NEON-NEXT:    mov w15, #1024 // =0x400
 ; CHECK-NEON-NEXT:    mul x13, x8, x13
-; CHECK-NEON-NEXT:    fmov x9, d1
-; CHECK-NEON-NEXT:    and v1.8b, v0.8b, v5.8b
-; CHECK-NEON-NEXT:    fmov d5, x10
-; CHECK-NEON-NEXT:    and v3.8b, v0.8b, v3.8b
-; CHECK-NEON-NEXT:    fmov x10, d2
-; CHECK-NEON-NEXT:    fmov d2, x11
-; CHECK-NEON-NEXT:    fmov d6, x12
+; CHECK-NEON-NEXT:    and v2.8b, v0.8b, v3.8b
+; CHECK-NEON-NEXT:    fmov d3, x16
+; CHECK-NEON-NEXT:    fmov x17, d1
+; CHECK-NEON-NEXT:    fmov d1, x15
+; CHECK-NEON-NEXT:    fmov d5, x11
 ; CHECK-NEON-NEXT:    mul x14, x8, x14
-; CHECK-NEON-NEXT:    mov w11, #2048 // =0x800
-; CHECK-NEON-NEXT:    eor v4.8b, v5.8b, v4.8b
-; CHECK-NEON-NEXT:    fmov d5, x15
-; CHECK-NEON-NEXT:    fmov x12, d3
-; CHECK-NEON-NEXT:    mul x10, x8, x10
-; CHECK-NEON-NEXT:    fmov d3, x11
-; CHECK-NEON-NEXT:    eor v2.8b, v2.8b, v6.8b
-; CHECK-NEON-NEXT:    and v5.8b, v0.8b, v5.8b
+; CHECK-NEON-NEXT:    fmov d6, x12
+; CHECK-NEON-NEXT:    mov w12, #4096 // =0x1000
+; CHECK-NEON-NEXT:    fmov x9, d2
+; CHECK-NEON-NEXT:    and v2.8b, v0.8b, v3.8b
+; CHECK-NEON-NEXT:    fmov d3, x10
+; CHECK-NEON-NEXT:    and v1.8b, v0.8b, v1.8b
+; CHECK-NEON-NEXT:    mov w10, #2048 // =0x800
+; CHECK-NEON-NEXT:    mul x15, x8, x17
+; CHECK-NEON-NEXT:    eor v3.8b, v3.8b, v4.8b
+; CHECK-NEON-NEXT:    eor v4.8b, v5.8b, v6.8b
+; CHECK-NEON-NEXT:    fmov d5, x10
+; CHECK-NEON-NEXT:    fmov x10, d1
+; CHECK-NEON-NEXT:    fmov x11, d2
+; CHECK-NEON-NEXT:    fmov d2, x13
 ; CHECK-NEON-NEXT:    mul x9, x8, x9
-; CHECK-NEON-NEXT:    and v3.8b, v0.8b, v3.8b
-; CHECK-NEON-NEXT:    mul x11, x8, x12
+; CHECK-NEON-NEXT:    mov w13, #8192 // =0x2000
+; CHECK-NEON-NEXT:    eor v3.8b, v3.8b, v4.8b
+; CHECK-NEON-NEXT:    fmov d4, x12
+; CHECK-NEON-NEXT:    and v1.8b, v0.8b, v5.8b
+; CHECK-NEON-NEXT:    fmov d5, x14
+; CHECK-NEON-NEXT:    mul x10, x8, x10
+; CHECK-NEON-NEXT:    mov w12, #1024 // =0x400
+; CHECK-NEON-NEXT:    and v4.8b, v0.8b, v4.8b
+; CHECK-NEON-NEXT:    mul x11, x8, x11
+; CHECK-NEON-NEXT:    eor v2.8b, v2.8b, v5.8b
+; CHECK-NEON-NEXT:    fmov d5, x12
 ; CHECK-NEON-NEXT:    fmov x12, d1
-; CHECK-NEON-NEXT:    fmov d1, x13
-; CHECK-NEON-NEXT:    mov w13, #4096 // =0x1000
-; CHECK-NEON-NEXT:    eor v2.8b, v4.8b, v2.8b
-; CHECK-NEON-NEXT:    fmov d4, x14
-; CHECK-NEON-NEXT:    fmov x14, d5
-; CHECK-NEON-NEXT:    fmov d5, x13
+; CHECK-NEON-NEXT:    fmov d6, x9
+; CHECK-NEON-NEXT:    fmov x14, d4
+; CHECK-NEON-NEXT:    fmov d4, x13
+; CHECK-NEON-NEXT:    fmov d7, x10
+; CHECK-NEON-NEXT:    and v1.8b, v0.8b, v5.8b
+; CHECK-NEON-NEXT:    mov w13, #16384 // =0x4000
 ; CHECK-NEON-NEXT:    mul x12, x8, x12
-; CHECK-NEON-NEXT:    eor v1.8b, v1.8b, v4.8b
-; CHECK-NEON-NEXT:    and v4.8b, v0.8b, v5.8b
-; CHECK-NEON-NEXT:    fmov d5, x10
+; CHECK-NEON-NEXT:    fmov d5, x15
+; CHECK-NEON-NEXT:    and v4.8b, v0.8b, v4.8b
+; CHECK-NEON-NEXT:    mul x9, x8, x14
+; CHECK-NEON-NEXT:    mov w14, #32768 // =0x8000
+; CHECK-NEON-NEXT:    fmov x10, d1
+; CHECK-NEON-NEXT:    eor v1.8b, v6.8b, v7.8b
+; CHECK-NEON-NEXT:    fmov d6, x13
+; CHECK-NEON-NEXT:    fmov d7, x14
+; CHECK-NEON-NEXT:    eor v2.8b, v2.8b, v5.8b
+; CHECK-NEON-NEXT:    fmov x13, d4
+; CHECK-NEON-NEXT:    mul x10, x8, x10
+; CHECK-NEON-NEXT:    and v4.8b, v0.8b, v6.8b
 ; CHECK-NEON-NEXT:    fmov d6, x11
-; CHECK-NEON-NEXT:    mov w11, #8192 // =0x2000
-; CHECK-NEON-NEXT:    fmov x10, d3
-; CHECK-NEON-NEXT:    mul x13, x8, x14
-; CHECK-NEON-NEXT:    eor v3.8b, v5.8b, v6.8b
-; CHECK-NEON-NEXT:    fmov d5, x9
+; CHECK-NEON-NEXT:    and v5.8b, v0.8b, v7.8b
+; CHECK-NEON-NEXT:    fmov d7, x9
+; CHECK-NEON-NEXT:    eor v2.8b, v3.8b, v2.8b
+; CHECK-NEON-NEXT:    mul x11, x8, x13
 ; CHECK-NEON-NEXT:    fmov x9, d4
-; CHECK-NEON-NEXT:    fmov d4, x11
-; CHECK-NEON-NEXT:    mov w11, #16384 // =0x4000
+; CHECK-NEON-NEXT:    eor v1.8b, v1.8b, v6.8b
 ; CHECK-NEON-NEXT:    fmov d6, x12
+; CHECK-NEON-NEXT:    fmov d4, x10
+; CHECK-NEON-NEXT:    fmov x10, d5
+; CHECK-NEON-NEXT:    mul x9, x8, x9
+; CHECK-NEON-NEXT:    eor v6.8b, v6.8b, v7.8b
+; CHECK-NEON-NEXT:    fmov d3, x11
+; CHECK-NEON-NEXT:    mov w11, #65536 // =0x10000
+; CHECK-NEON-NEXT:    eor v1.8b, v1.8b, v4.8b
+; CHECK-NEON-NEXT:    fmov d4, x11
 ; CHECK-NEON-NEXT:    mul x10, x8, x10
-; CHECK-NEON-NEXT:    eor v1.8b, v1.8b, v5.8b
+; CHECK-NEON-NEXT:    mov w11, #131072 // =0x20000
+; CHECK-NEON-NEXT:    eor v3.8b, v6.8b, v3.8b
 ; CHECK-NEON-NEXT:    fmov d5, x11
-; CHECK-NEON-NEXT:    mul x9, x8, x9
 ; CHECK-NEON-NEXT:    and v4.8b, v0.8b, v4.8b
-; CHECK-NEON-NEXT:    eor v3.8b, v3.8b, v6.8b
-; CHECK-NEON-NEXT:    fmov d6, x13
 ; CHECK-NEON-NEXT:    eor v1.8b, v2.8b, v1.8b
-; CHECK-NEON-NEXT:    fmov x11, d4
-; CHECK-NEON-NEXT:    and v4.8b, v0.8b, v5.8b
-; CHECK-NEON-NEXT:    eor v2.8b, v3.8b, v6.8b
-; CHECK-NEON-NEXT:    fmov d3, x10
-; CHECK-NEON-NEXT:    fmov d5, x9
-; CHECK-NEON-NEXT:    mov w9, #32768 // =0x8000
+; CHECK-NEON-NEXT:    fmov d2, x9
+; CHECK-NEON-NEXT:    mov w9, #262144 // =0x40000
+; CHECK-NEON-NEXT:    eor v2.8b, v3.8b, v2.8b
+; CHECK-NEON-NEXT:    and v3.8b, v0.8b, v5.8b
+; CHECK-NEON-NEXT:    fmov d5, x10
 ; CHECK-NEON-NEXT:    fmov x10, d4
 ; CHECK-NEON-NEXT:    fmov d4, x9
-; CHECK-NEON-NEXT:    mul x11, x8, x11
-; CHECK-NEON-NEXT:    eor v1.8b, v1.8b, v2.8b
-; CHECK-NEON-NEXT:    eor v3.8b, v3.8b, v5.8b
-; CHECK-NEON-NEXT:    mul x9, x8, x10
+; CHECK-NEON-NEXT:    eor v2.8b, v2.8b, v5.8b
+; CHECK-NEON-NEXT:    fmov x11, d3
 ; CHECK-NEON-NEXT:    and v4.8b, v0.8b, v4.8b
-; CHECK-NEON-NEXT:    mov w10, #65536 // =0x10000
+; CHECK-NEON-NEXT:    mul x9, x8, x10
+; CHECK-NEON-NEXT:    mov w10, #524288 // =0x80000
+; CHECK-NEON-NEXT:    fmov d3, x10
+; CHECK-NEON-NEXT:    mov w10, #1048576 // =0x100000
+; CHECK-NEON-NEXT:    eor v1.8b, v1.8b, v2.8b
+; CHECK-NEON-NEXT:    mul x11, x8, x11
+; CHECK-NEON-NEXT:    fmov x12, d4
+; CHECK-NEON-NEXT:    fmov d4, x10
+; CHECK-NEON-NEXT:    mov w10, #2097152 // =0x200000
 ; CHECK-NEON-NEXT:    fmov d2, x10
-; CHECK-NEON-NEXT:    mov w10, #131072 // =0x20000
+; CHECK-NEON-NEXT:    and v3.8b, v0.8b, v3.8b
+; CHECK-NEON-NEXT:    mov w10, #4194304 // =0x400000
+; CHECK-NEON-NEXT:    and v4.8b, v0.8b, v4.8b
+; CHECK-NEON-NEXT:    mul x12, x8, x12
+; CHECK-NEON-NEXT:    and v2.8b, v0.8b, v2.8b
+; CHECK-NEON-NEXT:    fmov x13, d3
+; CHECK-NEON-NEXT:    fmov d3, x9
 ; CHECK-NEON-NEXT:    fmov d5, x11
-; CHECK-NEON-NEXT:    fmov x11, d4
+; CHECK-NEON-NEXT:    fmov x9, d4
 ; CHECK-NEON-NEXT:    fmov d4, x10
-; CHECK-NEON-NEXT:    and v2.8b, v0.8b, v2.8b
+; CHECK-NEON-NEXT:    fmov x11, d2
+; CHECK-NEON-NEXT:    mul x13, x8, x13
 ; CHECK-NEON-NEXT:    eor v3.8b, v3.8b, v5.8b
-; CHECK-NEON-NEXT:    fmov d5, x9
-; CHECK-NEON-NEXT:    mul x10, x8, x11
-; CHECK-NEON-NEXT:    mov w11, #262144 // =0x40000
+; CHECK-NEON-NEXT:    fmov d5, x12
+; CHECK-NEON-NEXT:    mul x10, x8, x9
 ; CHECK-NEON-NEXT:    and v4.8b, v0.8b, v4.8b
-; CHECK-NEON-NEXT:    fmov x9, d2
-; CHECK-NEON-NEXT:    fmov d2, x11
-; CHECK-NEON-NEXT:    mov w11, #524288 // =0x80000
+; CHECK-NEON-NEXT:    mov w9, #8388608 // =0x800000
+; CHECK-NEON-NEXT:    fmov d2, x9
+; CHECK-NEON-NEXT:    mul x9, x8, x11
+; CHECK-NEON-NEXT:    mov w11, #16777216 // =0x1000000
 ; CHECK-NEON-NEXT:    eor v3.8b, v3.8b, v5.8b
-; CHECK-NEON-NEXT:    fmov x12, d4
+; CHECK-NEON-NEXT:    fmov x14, d4
 ; CHECK-NEON-NEXT:    fmov d4, x11
-; CHECK-NEON-NEXT:    mov w11, #1048576 // =0x100000
+; CHECK-NEON-NEXT:    fmov d5, x13
 ; CHECK-NEON-NEXT:    and v2.8b, v0.8b, v2.8b
-; CHECK-NEON-NEXT:    mul x9, x8, x9
-; CHECK-NEON-NEXT:    fmov d5, x10
-; CHECK-NEON-NEXT:    mul x12, x8, x12
-; CHECK-NEON-NEXT:    and v4.8b, v0.8b, v4.8b
-; CHECK-NEON-NEXT:    fmov x10, d2
-; CHECK-NEON-NEXT:    fmov d2, x11
-; CHECK-NEON-NEXT:    fmov x11, d4
-; CHECK-NEON-NEXT:    fmov d4, x9
-; CHECK-NEON-NEXT:    mul x13, x8, x10
-; CHECK-NEON-NEXT:    mov w10, #2097152 // =0x200000
-; CHECK-NEON-NEXT:    and v6.8b, v0.8b, v2.8b
-; CHECK-NEON-NEXT:    eor v2.8b, v3.8b, v5.8b
-; CHECK-NEON-NEXT:    fmov d3, x10
-; CHECK-NEON-NEXT:    mov w10, #4194304 // =0x400000
-; CHECK-NEON-NEXT:    fmov d5, x10
-; CHECK-NEON-NEXT:    mul x11, x8, x11
-; CHECK-NEON-NEXT:    fmov x9, d6
-; CHECK-NEON-NEXT:    fmov d6, x12
-; CHECK-NEON-NEXT:    and v3.8b, v0.8b, v3.8b
-; CHECK-NEON-NEXT:    eor v1.8b, v1.8b, v2.8b
-; CHECK-NEON-NEXT:    and v5.8b, v0.8b, v5.8b
-; CHECK-NEON-NEXT:    mul x10, x8, x9
-; CHECK-NEON-NEXT:    mov w9, #8388608 // =0x800000
-; CHECK-NEON-NEXT:    eor v4.8b, v4.8b, v6.8b
-; CHECK-NEON-NEXT:    fmov x12, d3
-; CHECK-NEON-NEXT:    fmov d3, x9
-; CHECK-NEON-NEXT:    fmov d6, x13
-; CHECK-NEON-NEXT:    fmov x14, d5
-; CHECK-NEON-NEXT:    mul x9, x8, x12
-; CHECK-NEON-NEXT:    mov w12, #16777216 // =0x1000000
-; CHECK-NEON-NEXT:    and v3.8b, v0.8b, v3.8b
-; CHECK-NEON-NEXT:    fmov d5, x12
-; CHECK-NEON-NEXT:    mov w12, #33554432 // =0x2000000
-; CHECK-NEON-NEXT:    mul x14, x8, x14
-; CHECK-NEON-NEXT:    fmov x13, d3
-; CHECK-NEON-NEXT:    fmov d3, x12
-; CHECK-NEON-NEXT:    mov w12, #67108864 // =0x4000000
-; CHECK-NEON-NEXT:    and v5.8b, v0.8b, v5.8b
-; CHECK-NEON-NEXT:    and v7.8b, v0.8b, v3.8b
-; CHECK-NEON-NEXT:    eor v3.8b, v4.8b, v6.8b
-; CHECK-NEON-NEXT:    fmov d4, x12
-; CHECK-NEON-NEXT:    fmov x15, d5
-; CHECK-NEON-NEXT:    mul x13, x8, x13
-; CHECK-NEON-NEXT:    fmov d5, x11
+; CHECK-NEON-NEXT:    mul x11, x8, x14
 ; CHECK-NEON-NEXT:    and v4.8b, v0.8b, v4.8b
-; CHECK-NEON-NEXT:    fmov x11, d7
-; CHECK-NEON-NEXT:    fmov d7, x14
-; CHECK-NEON-NEXT:    mul x12, x8, x15
-; CHECK-NEON-NEXT:    mov w15, #134217728 // =0x8000000
+; CHECK-NEON-NEXT:    mov w14, #33554432 // =0x2000000
+; CHECK-NEON-NEXT:    fmov x12, d2
+; CHECK-NEON-NEXT:    fmov d2, x14
+; CHECK-NEON-NEXT:    mov w14, #67108864 // =0x4000000
 ; CHECK-NEON-NEXT:    eor v3.8b, v3.8b, v5.8b
-; CHECK-NEON-NEXT:    fmov d6, x15
+; CHECK-NEON-NEXT:    fmov x15, d4
+; CHECK-NEON-NEXT:    fmov d4, x14
+; CHECK-NEON-NEXT:    and v2.8b, v0.8b, v2.8b
+; CHECK-NEON-NEXT:    mul x12, x8, x12
+; CHECK-NEON-NEXT:    fmov d6, x11
+; CHECK-NEON-NEXT:    mul x13, x8, x15
+; CHECK-NEON-NEXT:    mov w15, #134217728 // =0x8000000
+; CHECK-NEON-NEXT:    and v4.8b, v0.8b, v4.8b
+; CHECK-NEON-NEXT:    fmov x14, d2
+; CHECK-NEON-NEXT:    fmov d7, x15
 ; CHECK-NEON-NEXT:    mov w15, #536870912 // =0x20000000
-; CHECK-NEON-NEXT:    fmov x14, d4
-; CHECK-NEON-NEXT:    fmov d16, x13
-; CHECK-NEON-NEXT:    fmov d17, x15
-; CHECK-NEON-NEXT:    movi v4.2s, #128, lsl #24
-; CHECK-NEON-NEXT:    mov w15, #1073741824 // =0x40000000
-; CHECK-NEON-NEXT:    mul x11, x8, x11
-; CHECK-NEON-NEXT:    and v6.8b, v0.8b, v6.8b
-; CHECK-NEON-NEXT:    mul x13, x8, x14
-; CHECK-NEON-NEXT:    eor v7.8b, v7.8b, v16.8b
-; CHECK-NEON-NEXT:    fmov d16, x15
-; CHECK-NEON-NEXT:    mov w15, #268435456 // =0x10000000
-; CHECK-NEON-NEXT:    fmov x14, d6
-; CHECK-NEON-NEXT:    and v6.8b, v0.8b, v17.8b
-; CHECK-NEON-NEXT:    fneg d4, d4
-; CHECK-NEON-NEXT:    and v16.8b, v0.8b, v16.8b
 ; CHECK-NEON-NEXT:    fmov d5, x15
-; CHECK-NEON-NEXT:    fmov x15, d6
-; CHECK-NEON-NEXT:    fmov d6, x12
-; CHECK-NEON-NEXT:    mul x14, x8, x14
-; CHECK-NEON-NEXT:    and v5.8b, v0.8b, v5.8b
-; CHECK-NEON-NEXT:    and v4.8b, v0.8b, v4.8b
-; CHECK-NEON-NEXT:    mul x12, x8, x15
-; CHECK-NEON-NEXT:    eor v2.8b, v7.8b, v6.8b
-; CHECK-NEON-NEXT:    fmov x15, d16
-; CHECK-NEON-NEXT:    fmov d6, x10
-; CHECK-NEON-NEXT:    mul x10, x8, x15
-; CHECK-NEON-NEXT:    mov x15, #4294967296 // =0x100000000
-; CHECK-NEON-NEXT:    eor v3.8b, v3.8b, v6.8b
-; CHECK-NEON-NEXT:    fmov d6, x11
-; CHECK-NEON-NEXT:    fmov x11, d5
-; CHECK-NEON-NEXT:    fmov d5, x9
-; CHECK-NEON-NEXT:    fmov x9, d4
-; CHECK-NEON-NEXT:    fmov d4, x15
+; CHECK-NEON-NEXT:    mov w15, #1073741824 // =0x40000000
+; CHECK-NEON-NEXT:    movi v2.2s, #128, lsl #24
+; CHECK-NEON-NEXT:    mul x11, x8, x14
+; CHECK-NEON-NEXT:    fmov x14, d4
+; CHECK-NEON-NEXT:    and v4.8b, v0.8b, v7.8b
 ; CHECK-NEON-NEXT:    fmov d7, x12
-; CHECK-NEON-NEXT:    mov x15, #281474976710656 // =0x1000000000000
-; CHECK-NEON-NEXT:    eor v2.8b, v2.8b, v6.8b
-; CHECK-NEON-NEXT:    fmov d6, x13
-; CHECK-NEON-NEXT:    mov x13, #8589934592 // =0x200000000
-; CHECK-NEON-NEXT:    mul x9, x8, x9
-; CHECK-NEON-NEXT:    and v4.8b, v0.8b, v4.8b
-; CHECK-NEON-NEXT:    fmov d17, x13
-; CHECK-NEON-NEXT:    fmov d16, x10
-; CHECK-NEON-NEXT:    eor v3.8b, v3.8b, v5.8b
-; CHECK-NEON-NEXT:    mov x13, #549755813888 // =0x8000000000
-; CHECK-NEON-NEXT:    mul x11, x8, x11
-; CHECK-NEON-NEXT:    eor v2.8b, v2.8b, v6.8b
-; CHECK-NEON-NEXT:    fmov d6, x14
-; CHECK-NEON-NEXT:    fmov x10, d4
-; CHECK-NEON-NEXT:    and v4.8b, v0.8b, v17.8b
-; CHECK-NEON-NEXT:    mov x14, #17592186044416 // =0x100000000000
-; CHECK-NEON-NEXT:    eor v7.8b, v7.8b, v16.8b
-; CHECK-NEON-NEXT:    eor v1.8b, v1.8b, v3.8b
-; CHECK-NEON-NEXT:    eor v2.8b, v2.8b, v6.8b
-; CHECK-NEON-NEXT:    fmov d6, x9
-; CHECK-NEON-NEXT:    mov x9, #17179869184 // =0x400000000
-; CHECK-NEON-NEXT:    mul x10, x8, x10
-; CHECK-NEON-NEXT:    fmov d5, x11
-; CHECK-NEON-NEXT:    fmov x11, d4
-; CHECK-NEON-NEXT:    fmov d4, x9
-; CHECK-NEON-NEXT:    eor v6.8b, v7.8b, v6.8b
-; CHECK-NEON-NEXT:    mul x9, x8, x11
-; CHECK-NEON-NEXT:    and v4.8b, v0.8b, v4.8b
-; CHECK-NEON-NEXT:    mov x11, #34359738368 // =0x800000000
-; CHECK-NEON-NEXT:    fmov d3, x11
-; CHECK-NEON-NEXT:    eor v2.8b, v2.8b, v5.8b
-; CHECK-NEON-NEXT:    fmov d5, x10
-; CHECK-NEON-NEXT:    mov x10, #137438953472 // =0x2000000000
-; CHECK-NEON-NEXT:    fmov x11, d4
-; CHECK-NEON-NEXT:    fmov d4, x10
-; CHECK-NEON-NEXT:    and v3.8b, v0.8b, v3.8b
-; CHECK-NEON-NEXT:    eor v5.8b, v6.8b, v5.8b
-; CHECK-NEON-NEXT:    eor v1.8b, v1.8b, v2.8b
-; CHECK-NEON-NEXT:    fmov d6, x9
-; CHECK-NEON-NEXT:    mul x10, x8, x11
-; CHECK-NEON-NEXT:    mov x11, #274877906944 // =0x4000000000
-; CHECK-NEON-NEXT:    and v4.8b, v0.8b, v4.8b
-; CHECK-NEON-NEXT:    fmov x9, d3
-; CHECK-NEON-NEXT:    fmov d3, x11
-; CHECK-NEON-NEXT:    mov x11, #68719476736 // =0x1000000000
-; CHECK-NEON-NEXT:    eor v5.8b, v5.8b, v6.8b
-; CHECK-NEON-NEXT:    fmov d6, x13
-; CHECK-NEON-NEXT:    mov x13, #1099511627776 // =0x10000000000
+; CHECK-NEON-NEXT:    and v5.8b, v0.8b, v5.8b
+; CHECK-NEON-NEXT:    fmov d16, x13
+; CHECK-NEON-NEXT:    fneg d2, d2
+; CHECK-NEON-NEXT:    mov w13, #268435456 // =0x10000000
 ; CHECK-NEON-NEXT:    fmov x12, d4
-; CHECK-NEON-NEXT:    fmov d4, x11
-; CHECK-NEON-NEXT:    and v3.8b, v0.8b, v3.8b
-; CHECK-NEON-NEXT:    mul x9, x8, x9
+; CHECK-NEON-NEXT:    fmov d4, x15
+; CHECK-NEON-NEXT:    mov x15, #4294967296 // =0x100000000
+; CHECK-NEON-NEXT:    eor v6.8b, v6.8b, v7.8b
 ; CHECK-NEON-NEXT:    fmov d7, x10
-; CHECK-NEON-NEXT:    mul x11, x8, x12
+; CHECK-NEON-NEXT:    fmov x10, d5
+; CHECK-NEON-NEXT:    mul x14, x8, x14
 ; CHECK-NEON-NEXT:    and v4.8b, v0.8b, v4.8b
-; CHECK-NEON-NEXT:    fmov x12, d3
-; CHECK-NEON-NEXT:    and v3.8b, v0.8b, v6.8b
+; CHECK-NEON-NEXT:    and v2.8b, v0.8b, v2.8b
+; CHECK-NEON-NEXT:    eor v5.8b, v6.8b, v16.8b
 ; CHECK-NEON-NEXT:    fmov d6, x13
-; CHECK-NEON-NEXT:    eor v2.8b, v5.8b, v7.8b
-; CHECK-NEON-NEXT:    fmov d7, x14
-; CHECK-NEON-NEXT:    mov x14, #35184372088832 // =0x200000000000
+; CHECK-NEON-NEXT:    mul x10, x8, x10
+; CHECK-NEON-NEXT:    eor v3.8b, v3.8b, v7.8b
+; CHECK-NEON-NEXT:    fmov x13, d4
 ; CHECK-NEON-NEXT:    mul x12, x8, x12
+; CHECK-NEON-NEXT:    and v4.8b, v0.8b, v6.8b
+; CHECK-NEON-NEXT:    fmov d6, x15
+; CHECK-NEON-NEXT:    fmov x15, d2
+; CHECK-NEON-NEXT:    fmov d2, x11
+; CHECK-NEON-NEXT:    mul x13, x8, x13
 ; CHECK-NEON-NEXT:    and v6.8b, v0.8b, v6.8b
-; CHECK-NEON-NEXT:    fmov x10, d3
-; CHECK-NEON-NEXT:    fmov d3, x9
-; CHECK-NEON-NEXT:    fmov x9, d4
-; CHECK-NEON-NEXT:    fmov d4, x11
-; CHECK-NEON-NEXT:    mov x11, #2199023255552 // =0x20000000000
-; CHECK-NEON-NEXT:    mul x13, x8, x10
-; CHECK-NEON-NEXT:    fmov x10, d6
-; CHECK-NEON-NEXT:    fmov d6, x11
-; CHECK-NEON-NEXT:    mov x11, #4398046511104 // =0x40000000000
-; CHECK-NEON-NEXT:    eor v2.8b, v2.8b, v3.8b
+; CHECK-NEON-NEXT:    eor v2.8b, v5.8b, v2.8b
+; CHECK-NEON-NEXT:    fmov d5, x14
+; CHECK-NEON-NEXT:    fmov x14, d4
+; CHECK-NEON-NEXT:    fmov d4, x10
+; CHECK-NEON-NEXT:    mov x10, #8589934592 // =0x200000000
+; CHECK-NEON-NEXT:    mul x11, x8, x15
+; CHECK-NEON-NEXT:    fmov d7, x13
+; CHECK-NEON-NEXT:    fmov x13, d6
+; CHECK-NEON-NEXT:    fmov d6, x10
+; CHECK-NEON-NEXT:    mul x14, x8, x14
+; CHECK-NEON-NEXT:    eor v2.8b, v2.8b, v5.8b
 ; CHECK-NEON-NEXT:    fmov d5, x12
+; CHECK-NEON-NEXT:    mov x12, #1099511627776 // =0x10000000000
+; CHECK-NEON-NEXT:    eor v4.8b, v4.8b, v7.8b
+; CHECK-NEON-NEXT:    fmov d7, x9
+; CHECK-NEON-NEXT:    mov x9, #17179869184 // =0x400000000
+; CHECK-NEON-NEXT:    mul x10, x8, x13
+; CHECK-NEON-NEXT:    and v6.8b, v0.8b, v6.8b
+; CHECK-NEON-NEXT:    fmov d17, x9
+; CHECK-NEON-NEXT:    fmov d16, x11
+; CHECK-NEON-NEXT:    eor v2.8b, v2.8b, v5.8b
+; CHECK-NEON-NEXT:    eor v3.8b, v3.8b, v7.8b
+; CHECK-NEON-NEXT:    fmov d5, x14
+; CHECK-NEON-NEXT:    mov x14, #562949953421312 // =0x2000000000000
+; CHECK-NEON-NEXT:    fmov x9, d6
+; CHECK-NEON-NEXT:    and v6.8b, v0.8b, v17.8b
+; CHECK-NEON-NEXT:    eor v4.8b, v4.8b, v16.8b
+; CHECK-NEON-NEXT:    fmov d7, x10
+; CHECK-NEON-NEXT:    mov x10, #34359738368 // =0x800000000
+; CHECK-NEON-NEXT:    eor v1.8b, v1.8b, v3.8b
+; CHECK-NEON-NEXT:    mul x9, x8, x9
+; CHECK-NEON-NEXT:    fmov x11, d6
+; CHECK-NEON-NEXT:    fmov d6, x10
+; CHECK-NEON-NEXT:    eor v2.8b, v2.8b, v5.8b
+; CHECK-NEON-NEXT:    eor v4.8b, v4.8b, v7.8b
+; CHECK-NEON-NEXT:    mul x10, x8, x11
+; CHECK-NEON-NEXT:    and v6.8b, v0.8b, v6.8b
+; CHECK-NEON-NEXT:    mov x11, #137438953472 // =0x2000000000
 ; CHECK-NEON-NEXT:    fmov d3, x11
-; CHECK-NEON-NEXT:    mul x12, x8, x10
-; CHECK-NEON-NEXT:    eor v4.8b, v4.8b, v5.8b
-; CHECK-NEON-NEXT:    and v5.8b, v0.8b, v6.8b
-; CHECK-NEON-NEXT:    mul x10, x8, x9
-; CHECK-NEON-NEXT:    fmov d6, x13
+; CHECK-NEON-NEXT:    eor v1.8b, v1.8b, v2.8b
+; CHECK-NEON-NEXT:    fmov d5, x9
+; CHECK-NEON-NEXT:    mov x9, #274877906944 // =0x4000000000
+; CHECK-NEON-NEXT:    fmov x11, d6
 ; CHECK-NEON-NEXT:    and v3.8b, v0.8b, v3.8b
-; CHECK-NEON-NEXT:    mov x9, #8796093022208 // =0x80000000000
-; CHECK-NEON-NEXT:    fmov x11, d5
+; CHECK-NEON-NEXT:    eor v4.8b, v4.8b, v5.8b
 ; CHECK-NEON-NEXT:    fmov d5, x9
-; CHECK-NEON-NEXT:    eor v4.8b, v4.8b, v6.8b
-; CHECK-NEON-NEXT:    fmov d6, x12
-; CHECK-NEON-NEXT:    fmov x12, d3
+; CHECK-NEON-NEXT:    fmov d6, x10
 ; CHECK-NEON-NEXT:    mul x9, x8, x11
+; CHECK-NEON-NEXT:    mov x11, #549755813888 // =0x8000000000
+; CHECK-NEON-NEXT:    fmov x10, d3
+; CHECK-NEON-NEXT:    fmov d3, x11
+; CHECK-NEON-NEXT:    mov x11, #68719476736 // =0x1000000000
+; CHECK-NEON-NEXT:    and v5.8b, v0.8b, v5.8b
+; CHECK-NEON-NEXT:    eor v4.8b, v4.8b, v6.8b
+; CHECK-NEON-NEXT:    fmov d6, x11
+; CHECK-NEON-NEXT:    and v3.8b, v0.8b, v3.8b
+; CHECK-NEON-NEXT:    mul x10, x8, x10
+; CHECK-NEON-NEXT:    fmov x11, d5
+; CHECK-NEON-NEXT:    and v5.8b, v0.8b, v6.8b
+; CHECK-NEON-NEXT:    fmov d2, x9
+; CHECK-NEON-NEXT:    fmov x9, d3
+; CHECK-NEON-NEXT:    fmov d3, x12
+; CHECK-NEON-NEXT:    mov x12, #2199023255552 // =0x20000000000
+; CHECK-NEON-NEXT:    mul x11, x8, x11
+; CHECK-NEON-NEXT:    eor v2.8b, v4.8b, v2.8b
+; CHECK-NEON-NEXT:    fmov d4, x10
+; CHECK-NEON-NEXT:    mov x10, #4398046511104 // =0x40000000000
+; CHECK-NEON-NEXT:    mul x13, x8, x9
+; CHECK-NEON-NEXT:    fmov x9, d5
+; CHECK-NEON-NEXT:    and v3.8b, v0.8b, v3.8b
+; CHECK-NEON-NEXT:    fmov d5, x12
+; CHECK-NEON-NEXT:    fmov d6, x11
+; CHECK-NEON-NEXT:    fmov x11, d3
+; CHECK-NEON-NEXT:    fmov d3, x10
 ; CHECK-NEON-NEXT:    and v5.8b, v0.8b, v5.8b
+; CHECK-NEON-NEXT:    mul x9, x8, x9
+; CHECK-NEON-NEXT:    mul x10, x8, x11
+; CHECK-NEON-NEXT:    and v3.8b, v0.8b, v3.8b
 ; CHECK-NEON-NEXT:    mov x11, #70368744177664 // =0x400000000000
-; CHECK-NEON-NEXT:    fmov d3, x11
+; CHECK-NEON-NEXT:    fmov x12, d5
+; CHECK-NEON-NEXT:    fmov d5, x11
 ; CHECK-NEON-NEXT:    eor v4.8b, v4.8b, v6.8b
-; CHECK-NEON-NEXT:    and v6.8b, v0.8b, v7.8b
+; CHECK-NEON-NEXT:    fmov d6, x13
+; CHECK-NEON-NEXT:    fmov x13, d3
 ; CHECK-NEON-NEXT:    mul x11, x8, x12
 ; CHECK-NEON-NEXT:    mov x12, #140737488355328 // =0x800000000000
-; CHECK-NEON-NEXT:    fmov x13, d5
-; CHECK-NEON-NEXT:    fmov d5, x12
-; CHECK-NEON-NEXT:    and v3.8b, v0.8b, v3.8b
-; CHECK-NEON-NEXT:    fmov d16, x9
 ; CHECK-NEON-NEXT:    and v5.8b, v0.8b, v5.8b
+; CHECK-NEON-NEXT:    fmov d3, x12
+; CHECK-NEON-NEXT:    eor v4.8b, v4.8b, v6.8b
+; CHECK-NEON-NEXT:    fmov d6, x10
 ; CHECK-NEON-NEXT:    mul x12, x8, x13
+; CHECK-NEON-NEXT:    mov x13, #281474976710656 // =0x1000000000000
+; CHECK-NEON-NEXT:    fmov x10, d5
+; CHECK-NEON-NEXT:    fmov d5, x13
+; CHECK-NEON-NEXT:    mov x13, #8796093022208 // =0x80000000000
+; CHECK-NEON-NEXT:    and v3.8b, v0.8b, v3.8b
+; CHECK-NEON-NEXT:    eor v4.8b, v4.8b, v6.8b
+; CHECK-NEON-NEXT:    fmov d6, x13
+; CHECK-NEON-NEXT:    fmov d7, x11
+; CHECK-NEON-NEXT:    and v5.8b, v0.8b, v5.8b
+; CHECK-NEON-NEXT:    mul x10, x8, x10
 ; CHECK-NEON-NEXT:    fmov x13, d3
-; CHECK-NEON-NEXT:    fmov d3, x14
-; CHECK-NEON-NEXT:    eor v4.8b, v4.8b, v16.8b
-; CHECK-NEON-NEXT:    fmov x14, d5
-; CHECK-NEON-NEXT:    fmov d5, x15
-; CHECK-NEON-NEXT:    mov x15, #562949953421312 // =0x2000000000000
+; CHECK-NEON-NEXT:    and v3.8b, v0.8b, v6.8b
+; CHECK-NEON-NEXT:    fmov d6, x9
+; CHECK-NEON-NEXT:    eor v4.8b, v4.8b, v7.8b
+; CHECK-NEON-NEXT:    fmov x9, d5
+; CHECK-NEON-NEXT:    fmov d5, x14
+; CHECK-NEON-NEXT:    mov x14, #1125899906842624 // =0x4000000000000
 ; CHECK-NEON-NEXT:    mul x13, x8, x13
-; CHECK-NEON-NEXT:    fmov d7, x15
-; CHECK-NEON-NEXT:    fmov x15, d6
-; CHECK-NEON-NEXT:    and v6.8b, v0.8b, v3.8b
+; CHECK-NEON-NEXT:    fmov x11, d3
+; CHECK-NEON-NEXT:    fmov d7, x14
 ; CHECK-NEON-NEXT:    fmov d3, x10
+; CHECK-NEON-NEXT:    eor v2.8b, v2.8b, v6.8b
 ; CHECK-NEON-NEXT:    and v5.8b, v0.8b, v5.8b
-; CHECK-NEON-NEXT:    mul x14, x8, x14
-; CHECK-NEON-NEXT:    and v7.8b, v0.8b, v7.8b
-; CHECK-NEON-NEXT:    mul x9, x8, x15
-; CHECK-NEON-NEXT:    eor v2.8b, v2.8b, v3.8b
-; CHECK-NEON-NEXT:    fmov x10, d5
-; CHECK-NEON-NEXT:    fmov d5, x11
-; CHECK-NEON-NEXT:    fmov x11, d6
-; CHECK-NEON-NEXT:    fmov d6, x13
-; CHECK-NEON-NEXT:    mov x13, #1125899906842624 // =0x4000000000000
+; CHECK-NEON-NEXT:    mul x9, x8, x9
+; CHECK-NEON-NEXT:    mul x11, x8, x11
+; CHECK-NEON-NEXT:    eor v1.8b, v1.8b, v2.8b
 ; CHECK-NEON-NEXT:    fmov d16, x13
+; CHECK-NEON-NEXT:    fmov x10, d5
 ; CHECK-NEON-NEXT:    mov x13, #2251799813685248 // =0x8000000000000
-; CHECK-NEON-NEXT:    eor v1.8b, v1.8b, v2.8b
-; CHECK-NEON-NEXT:    mul x15, x8, x10
-; CHECK-NEON-NEXT:    fmov x10, d7
-; CHECK-NEON-NEXT:    fmov d7, x14
-; CHECK-NEON-NEXT:    fmov d17, x13
-; CHECK-NEON-NEXT:    eor v4.8b, v4.8b, v5.8b
-; CHECK-NEON-NEXT:    and v16.8b, v0.8b, v16.8b
-; CHECK-NEON-NEXT:    mul x14, x8, x10
-; CHECK-NEON-NEXT:    eor v7.8b, v6.8b, v7.8b
+; CHECK-NEON-NEXT:    and v5.8b, v0.8b, v7.8b
+; CHECK-NEON-NEXT:    fmov d7, x12
+; CHECK-NEON-NEXT:    mov x12, #17592186044416 // =0x100000000000
+; CHECK-NEON-NEXT:    fmov d17, x9
+; CHECK-NEON-NEXT:    eor v3.8b, v3.8b, v16.8b
+; CHECK-NEON-NEXT:    fmov d16, x13
+; CHECK-NEON-NEXT:    mul x10, x8, x10
+; CHECK-NEON-NEXT:    fmov x9, d5
+; CHECK-NEON-NEXT:    eor v4.8b, v4.8b, v7.8b
+; CHECK-NEON-NEXT:    fmov d7, x11
+; CHECK-NEON-NEXT:    mov x11, #35184372088832 // =0x200000000000
+; CHECK-NEON-NEXT:    mov x13, #9007199254740992 // =0x20000000000000
+; CHECK-NEON-NEXT:    and v5.8b, v0.8b, v16.8b
+; CHECK-NEON-NEXT:    fmov d16, x12
+; CHECK-NEON-NEXT:    eor v3.8b, v3.8b, v17.8b
+; CHECK-NEON-NEXT:    mul x9, x8, x9
+; CHECK-NEON-NEXT:    mov x12, #4503599627370496 // =0x10000000000000
+; CHECK-NEON-NEXT:    fmov d17, x10
 ; CHECK-NEON-NEXT:    fmov d6, x12
-; CHECK-NEON-NEXT:    and v17.8b, v0.8b, v17.8b
-; CHECK-NEON-NEXT:    mul x10, x8, x11
-; CHECK-NEON-NEXT:    mov x11, #4503599627370496 // =0x10000000000000
-; CHECK-NEON-NEXT:    fmov x12, d16
-; CHECK-NEON-NEXT:    fmov d16, x11
-; CHECK-NEON-NEXT:    fmov d18, x15
-; CHECK-NEON-NEXT:    mov x15, #288230376151711744 // =0x400000000000000
-; CHECK-NEON-NEXT:    fmov x13, d17
-; CHECK-NEON-NEXT:    eor v4.8b, v4.8b, v6.8b
-; CHECK-NEON-NEXT:    mul x11, x8, x12
-; CHECK-NEON-NEXT:    mov x12, #9007199254740992 // =0x20000000000000
+; CHECK-NEON-NEXT:    mov x12, #72057594037927936 // =0x100000000000000
+; CHECK-NEON-NEXT:    fmov x10, d5
 ; CHECK-NEON-NEXT:    and v16.8b, v0.8b, v16.8b
-; CHECK-NEON-NEXT:    fmov d17, x12
-; CHECK-NEON-NEXT:    eor v7.8b, v7.8b, v18.8b
-; CHECK-NEON-NEXT:    fmov d18, x14
+; CHECK-NEON-NEXT:    fmov d5, x11
+; CHECK-NEON-NEXT:    eor v17.8b, v3.8b, v17.8b
+; CHECK-NEON-NEXT:    eor v3.8b, v4.8b, v7.8b
+; CHECK-NEON-NEXT:    fmov d7, x12
+; CHECK-NEON-NEXT:    mul x11, x8, x10
+; CHECK-NEON-NEXT:    and v4.8b, v0.8b, v5.8b
+; CHECK-NEON-NEXT:    fmov x10, d16
+; CHECK-NEON-NEXT:    fmov d5, x9
+; CHECK-NEON-NEXT:    and v6.8b, v0.8b, v6.8b
+; CHECK-NEON-NEXT:    mul x9, x8, x10
+; CHECK-NEON-NEXT:    fmov x10, d4
+; CHECK-NEON-NEXT:    eor v5.8b, v17.8b, v5.8b
+; CHECK-NEON-NEXT:    fmov x12, d6
+; CHECK-NEON-NEXT:    fmov d4, x11
+; CHECK-NEON-NEXT:    mov x11, #144115188075855872 // =0x200000000000000
+; CHECK-NEON-NEXT:    fmov d6, x11
+; CHECK-NEON-NEXT:    mul x10, x8, x10
+; CHECK-NEON-NEXT:    eor v4.8b, v5.8b, v4.8b
+; CHECK-NEON-NEXT:    and v5.8b, v0.8b, v7.8b
+; CHECK-NEON-NEXT:    fmov d7, x13
+; CHECK-NEON-NEXT:    and v6.8b, v0.8b, v6.8b
+; CHECK-NEON-NEXT:    mul x11, x8, x12
+; CHECK-NEON-NEXT:    mov x12, #18014398509481984 // =0x40000000000000
+; CHECK-NEON-NEXT:    fmov x13, d5
+; CHECK-NEON-NEXT:    fmov d5, x12
+; CHECK-NEON-NEXT:    and v7.8b, v0.8b, v7.8b
+; CHECK-NEON-NEXT:    fmov x14, d6
 ; CHECK-NEON-NEXT:    mul x12, x8, x13
-; CHECK-NEON-NEXT:    mov x13, #72057594037927936 // =0x100000000000000
-; CHECK-NEON-NEXT:    fmov x14, d16
-; CHECK-NEON-NEXT:    and v17.8b, v0.8b, v17.8b
-; CHECK-NEON-NEXT:    fmov d16, x13
-; CHECK-NEON-NEXT:    eor v7.8b, v7.8b, v18.8b
-; CHECK-NEON-NEXT:    fmov d18, x11
+; CHECK-NEON-NEXT:    mov x13, #288230376151711744 // =0x400000000000000
+; CHECK-NEON-NEXT:    and v5.8b, v0.8b, v5.8b
+; CHECK-NEON-NEXT:    fmov d6, x13
+; CHECK-NEON-NEXT:    fmov d16, x11
+; CHECK-NEON-NEXT:    fmov x11, d7
 ; CHECK-NEON-NEXT:    mul x13, x8, x14
-; CHECK-NEON-NEXT:    mov x14, #144115188075855872 // =0x200000000000000
-; CHECK-NEON-NEXT:    fmov x11, d17
-; CHECK-NEON-NEXT:    fmov d17, x14
-; CHECK-NEON-NEXT:    mov x14, #18014398509481984 // =0x40000000000000
-; CHECK-NEON-NEXT:    and v16.8b, v0.8b, v16.8b
-; CHECK-NEON-NEXT:    eor v7.8b, v7.8b, v18.8b
-; CHECK-NEON-NEXT:    fmov d18, x14
+; CHECK-NEON-NEXT:    mov x14, #576460752303423488 // =0x800000000000000
+; CHECK-NEON-NEXT:    fmov d7, x14
+; CHECK-NEON-NEXT:    mov x14, #36028797018963968 // =0x80000000000000
+; CHECK-NEON-NEXT:    fmov x15, d5
+; CHECK-NEON-NEXT:    and v6.8b, v0.8b, v6.8b
+; CHECK-NEON-NEXT:    fmov d5, x14
 ; CHECK-NEON-NEXT:    mul x11, x8, x11
-; CHECK-NEON-NEXT:    and v17.8b, v0.8b, v17.8b
-; CHECK-NEON-NEXT:    fmov x14, d16
-; CHECK-NEON-NEXT:    and v16.8b, v0.8b, v18.8b
-; CHECK-NEON-NEXT:    fmov d18, x12
-; CHECK-NEON-NEXT:    fmov x12, d17
-; CHECK-NEON-NEXT:    fmov d17, x15
-; CHECK-NEON-NEXT:    mul x14, x8, x14
-; CHECK-NEON-NEXT:    mul x15, x8, x12
-; CHECK-NEON-NEXT:    mov x12, #576460752303423488 // =0x800000000000000
-; CHECK-NEON-NEXT:    and v17.8b, v0.8b, v17.8b
-; CHECK-NEON-NEXT:    fmov d5, x12
-; CHECK-NEON-NEXT:    fmov x12, d16
-; CHECK-NEON-NEXT:    fmov d6, x14
-; CHECK-NEON-NEXT:    and v3.8b, v0.8b, v5.8b
-; CHECK-NEON-NEXT:    eor v5.8b, v7.8b, v18.8b
-; CHECK-NEON-NEXT:    fmov d7, x13
-; CHECK-NEON-NEXT:    fmov x13, d17
-; CHECK-NEON-NEXT:    fmov d16, x15
-; CHECK-NEON-NEXT:    mov x15, #1152921504606846976 // =0x1000000000000000
-; CHECK-NEON-NEXT:    mul x12, x8, x12
-; CHECK-NEON-NEXT:    fmov x14, d3
-; CHECK-NEON-NEXT:    eor v3.8b, v5.8b, v7.8b
-; CHECK-NEON-NEXT:    fmov d5, x15
-; CHECK-NEON-NEXT:    mul x13, x8, x13
-; CHECK-NEON-NEXT:    mov x15, #2305843009213693952 // =0x2000000000000000
-; CHECK-NEON-NEXT:    eor v6.8b, v6.8b, v16.8b
-; CHECK-NEON-NEXT:    fmov d7, x15
-; CHECK-NEON-NEXT:    mov x15, #36028797018963968 // =0x80000000000000
-; CHECK-NEON-NEXT:    movi d16, #0000000000000000
-; CHECK-NEON-NEXT:    mul x14, x8, x14
+; CHECK-NEON-NEXT:    eor v4.8b, v4.8b, v16.8b
+; CHECK-NEON-NEXT:    and v7.8b, v0.8b, v7.8b
+; CHECK-NEON-NEXT:    fmov x14, d6
+; CHECK-NEON-NEXT:    fmov d6, x12
+; CHECK-NEON-NEXT:    fmov d17, x13
 ; CHECK-NEON-NEXT:    and v5.8b, v0.8b, v5.8b
-; CHECK-NEON-NEXT:    fmov d17, x15
+; CHECK-NEON-NEXT:    fmov x13, d7
+; CHECK-NEON-NEXT:    mul x12, x8, x14
+; CHECK-NEON-NEXT:    mov x14, #1152921504606846976 // =0x1000000000000000
+; CHECK-NEON-NEXT:    eor v6.8b, v6.8b, v17.8b
+; CHECK-NEON-NEXT:    fmov d7, x14
+; CHECK-NEON-NEXT:    mov x14, #2305843009213693952 // =0x2000000000000000
+; CHECK-NEON-NEXT:    fmov d17, x14
+; CHECK-NEON-NEXT:    mul x13, x8, x13
 ; CHECK-NEON-NEXT:    and v7.8b, v0.8b, v7.8b
-; CHECK-NEON-NEXT:    fmov d18, x13
-; CHECK-NEON-NEXT:    fmov x13, d5
+; CHECK-NEON-NEXT:    mul x14, x8, x15
+; CHECK-NEON-NEXT:    fmov d19, x12
 ; CHECK-NEON-NEXT:    and v17.8b, v0.8b, v17.8b
-; CHECK-NEON-NEXT:    fneg d16, d16
-; CHECK-NEON-NEXT:    fmov d5, x14
-; CHECK-NEON-NEXT:    mov x14, #4611686018427387904 // =0x4000000000000000
-; CHECK-NEON-NEXT:    fmov x15, d7
-; CHECK-NEON-NEXT:    eor v6.8b, v6.8b, v18.8b
-; CHECK-NEON-NEXT:    mul x13, x8, x13
-; CHECK-NEON-NEXT:    fmov d7, x14
-; CHECK-NEON-NEXT:    fmov x14, d17
-; CHECK-NEON-NEXT:    fmov d17, x9
+; CHECK-NEON-NEXT:    fmov x12, d7
+; CHECK-NEON-NEXT:    fmov d7, #2.00000000
+; CHECK-NEON-NEXT:    fmov d16, x13
+; CHECK-NEON-NEXT:    eor v6.8b, v6.8b, v19.8b
+; CHECK-NEON-NEXT:    fmov x15, d17
+; CHECK-NEON-NEXT:    fneg d17, d18
+; CHECK-NEON-NEXT:    fmov x13, d5
+; CHECK-NEON-NEXT:    mul x12, x8, x12
+; CHECK-NEON-NEXT:    and v5.8b, v0.8b, v7.8b
+; CHECK-NEON-NEXT:    fmov d7, x9
+; CHECK-NEON-NEXT:    eor v6.8b, v6.8b, v16.8b
+; CHECK-NEON-NEXT:    fmov d16, x11
 ; CHECK-NEON-NEXT:    mul x15, x8, x15
+; CHECK-NEON-NEXT:    and v0.8b, v0.8b, v17.8b
+; CHECK-NEON-NEXT:    fmov x11, d5
+; CHECK-NEON-NEXT:    eor v3.8b, v3.8b, v7.8b
+; CHECK-NEON-NEXT:    mul x9, x8, x13
+; CHECK-NEON-NEXT:    eor v4.8b, v4.8b, v16.8b
+; CHECK-NEON-NEXT:    fmov d5, x12
 ; CHECK-NEON-NEXT:    eor v5.8b, v6.8b, v5.8b
-; CHECK-NEON-NEXT:    fmov d6, x11
-; CHECK-NEON-NEXT:    and v7.8b, v0.8b, v7.8b
-; CHECK-NEON-NEXT:    and v0.8b, v0.8b, v16.8b
-; CHECK-NEON-NEXT:    eor v4.8b, v4.8b, v17.8b
-; CHECK-NEON-NEXT:    mul x9, x8, x14
-; CHECK-NEON-NEXT:    eor v3.8b, v3.8b, v6.8b
-; CHECK-NEON-NEXT:    fmov d6, x13
-; CHECK-NEON-NEXT:    fmov x11, d7
-; CHECK-NEON-NEXT:    eor v5.8b, v5.8b, v6.8b
 ; CHECK-NEON-NEXT:    fmov d6, x10
 ; CHECK-NEON-NEXT:    mul x10, x8, x11
 ; CHECK-NEON-NEXT:    fmov x11, d0
 ; CHECK-NEON-NEXT:    fmov d0, x15
-; CHECK-NEON-NEXT:    eor v2.8b, v4.8b, v6.8b
-; CHECK-NEON-NEXT:    fmov d4, x12
+; CHECK-NEON-NEXT:    eor v2.8b, v3.8b, v6.8b
+; CHECK-NEON-NEXT:    fmov d3, x14
 ; CHECK-NEON-NEXT:    mul x8, x8, x11
 ; CHECK-NEON-NEXT:    eor v0.8b, v5.8b, v0.8b
 ; CHECK-NEON-NEXT:    fmov d5, x10
-; CHECK-NEON-NEXT:    eor v3.8b, v3.8b, v4.8b
+; CHECK-NEON-NEXT:    eor v3.8b, v4.8b, v3.8b
 ; CHECK-NEON-NEXT:    fmov d4, x9
 ; CHECK-NEON-NEXT:    eor v1.8b, v1.8b, v2.8b
 ; CHECK-NEON-NEXT:    eor v0.8b, v0.8b, v5.8b
diff --git a/llvm/test/CodeGen/AArch64/dup.ll b/llvm/test/CodeGen/AArch64/dup.ll
index 16748cf16cb19..6420b35827aca 100644
--- a/llvm/test/CodeGen/AArch64/dup.ll
+++ b/llvm/test/CodeGen/AArch64/dup.ll
@@ -2569,8 +2569,7 @@ define <2 x fp128> @loaddup_str_v2fp128(ptr %p) {
 ; CHECK-SD-LABEL: loaddup_str_v2fp128:
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    ldr q0, [x0]
-; CHECK-SD-NEXT:    adrp x8, .LCPI155_0
-; CHECK-SD-NEXT:    ldr q2, [x8, :lo12:.LCPI155_0]
+; CHECK-SD-NEXT:    movi d2, #0000000000000000
 ; CHECK-SD-NEXT:    mov v1.16b, v0.16b
 ; CHECK-SD-NEXT:    str q2, [x0]
 ; CHECK-SD-NEXT:    ret
@@ -2631,8 +2630,7 @@ define <3 x fp128> @loaddup_str_v3fp128(ptr %p) {
 ; CHECK-SD-LABEL: loaddup_str_v3fp128:
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    ldr q0, [x0]
-; CHECK-SD-NEXT:    adrp x8, .LCPI159_0
-; CHECK-SD-NEXT:    ldr q3, [x8, :lo12:.LCPI159_0]
+; CHECK-SD-NEXT:    movi d3, #0000000000000000
 ; CHECK-SD-NEXT:    mov v1.16b, v0.16b
 ; CHECK-SD-NEXT:    mov v2.16b, v0.16b
 ; CHECK-SD-NEXT:    str q3, [x0]
@@ -2698,8 +2696,7 @@ define <4 x fp128> @loaddup_str_v4fp128(ptr %p) {
 ; CHECK-SD-LABEL: loaddup_str_v4fp128:
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    ldr q0, [x0]
-; CHECK-SD-NEXT:    adrp x8, .LCPI163_0
-; CHECK-SD-NEXT:    ldr q4, [x8, :lo12:.LCPI163_0]
+; CHECK-SD-NEXT:    movi d4, #0000000000000000
 ; CHECK-SD-NEXT:    mov v1.16b, v0.16b
 ; CHECK-SD-NEXT:    mov v2.16b, v0.16b
 ; CHECK-SD-NEXT:    mov v3.16b, v0.16b
diff --git a/llvm/test/CodeGen/AArch64/f16-imm.ll b/llvm/test/CodeGen/AArch64/f16-imm.ll
index 911bf44a3ce93..830e6093b1bb8 100644
--- a/llvm/test/CodeGen/AArch64/f16-imm.ll
+++ b/llvm/test/CodeGen/AArch64/f16-imm.ll
@@ -33,8 +33,7 @@ define half @Const1() {
 ;
 ; CHECK-NOFP16-LABEL: Const1:
 ; CHECK-NOFP16:       // %bb.0: // %entry
-; CHECK-NOFP16-NEXT:    adrp x8, .LCPI1_0
-; CHECK-NOFP16-NEXT:    ldr h0, [x8, :lo12:.LCPI1_0]
+; CHECK-NOFP16-NEXT:    movi v0.4h, #60, lsl #8
 ; CHECK-NOFP16-NEXT:    ret
 entry:
   ret half 0xH3C00
@@ -48,8 +47,7 @@ define half @Const2() {
 ;
 ; CHECK-NOFP16-LABEL: Const2:
 ; CHECK-NOFP16:       // %bb.0: // %entry
-; CHECK-NOFP16-NEXT:    adrp x8, .LCPI2_0
-; CHECK-NOFP16-NEXT:    ldr h0, [x8, :lo12:.LCPI2_0]
+; CHECK-NOFP16-NEXT:    movi v0.4h, #48, lsl #8
 ; CHECK-NOFP16-NEXT:    ret
 entry:
   ret half 0xH3000
@@ -154,35 +152,15 @@ entry:
 }
 
 define half @Const7() {
-; CHECK-NOZCZ-SD-LABEL: Const7:
-; CHECK-NOZCZ-SD:       // %bb.0: // %entry
-; CHECK-NOZCZ-SD-NEXT:    mov w8, #20480 // =0x5000
-; CHECK-NOZCZ-SD-NEXT:    fmov h0, w8
-; CHECK-NOZCZ-SD-NEXT:    ret
-;
-; CHECK-ZCZ-SD-LABEL: Const7:
-; CHECK-ZCZ-SD:       // %bb.0: // %entry
-; CHECK-ZCZ-SD-NEXT:    mov w8, #20480 // =0x5000
-; CHECK-ZCZ-SD-NEXT:    fmov h0, w8
-; CHECK-ZCZ-SD-NEXT:    ret
+; CHECK-FP16-LABEL: Const7:
+; CHECK-FP16:       // %bb.0: // %entry
+; CHECK-FP16-NEXT:    movi v0.4h, #80, lsl #8
+; CHECK-FP16-NEXT:    ret
 ;
 ; CHECK-NOFP16-LABEL: Const7:
 ; CHECK-NOFP16:       // %bb.0: // %entry
-; CHECK-NOFP16-NEXT:    adrp x8, .LCPI7_0
-; CHECK-NOFP16-NEXT:    ldr h0, [x8, :lo12:.LCPI7_0]
+; CHECK-NOFP16-NEXT:    movi v0.4h, #80, lsl #8
 ; CHECK-NOFP16-NEXT:    ret
-;
-; CHECK-NOZCZ-GI-LABEL: Const7:
-; CHECK-NOZCZ-GI:       // %bb.0: // %entry
-; CHECK-NOZCZ-GI-NEXT:    adrp x8, .LCPI7_0
-; CHECK-NOZCZ-GI-NEXT:    ldr h0, [x8, :lo12:.LCPI7_0]
-; CHECK-NOZCZ-GI-NEXT:    ret
-;
-; CHECK-ZCZ-GI-LABEL: Const7:
-; CHECK-ZCZ-GI:       // %bb.0: // %entry
-; CHECK-ZCZ-GI-NEXT:    adrp x8, .LCPI7_0
-; CHECK-ZCZ-GI-NEXT:    ldr h0, [x8, :lo12:.LCPI7_0]
-; CHECK-ZCZ-GI-NEXT:    ret
 entry:
   ret half 0xH5000
 }
diff --git a/llvm/test/CodeGen/AArch64/f16-instructions.ll b/llvm/test/CodeGen/AArch64/f16-instructions.ll
index f6d701b518699..d4a2fd68025f3 100644
--- a/llvm/test/CodeGen/AArch64/f16-instructions.ll
+++ b/llvm/test/CodeGen/AArch64/f16-instructions.ll
@@ -761,12 +761,11 @@ define void @test_fccmp(half %in, ptr %out) {
 ; CHECK-CVT-SD-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-CVT-SD-NEXT:    fcvt s1, h0
 ; CHECK-CVT-SD-NEXT:    fmov s2, #5.00000000
-; CHECK-CVT-SD-NEXT:    adrp x8, .LCPI29_0
+; CHECK-CVT-SD-NEXT:    movi v3.4h, #69, lsl #8
 ; CHECK-CVT-SD-NEXT:    fcmp s1, s2
 ; CHECK-CVT-SD-NEXT:    fmov s2, #8.00000000
 ; CHECK-CVT-SD-NEXT:    fccmp s1, s2, #4, mi
-; CHECK-CVT-SD-NEXT:    ldr h1, [x8, :lo12:.LCPI29_0]
-; CHECK-CVT-SD-NEXT:    fcsel s0, s0, s1, gt
+; CHECK-CVT-SD-NEXT:    fcsel s0, s0, s3, gt
 ; CHECK-CVT-SD-NEXT:    str h0, [x0]
 ; CHECK-CVT-SD-NEXT:    ret
 ;
@@ -785,13 +784,12 @@ define void @test_fccmp(half %in, ptr %out) {
 ; CHECK-CVT-GI-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-CVT-GI-NEXT:    fcvt s1, h0
 ; CHECK-CVT-GI-NEXT:    fmov s2, #5.00000000
-; CHECK-CVT-GI-NEXT:    adrp x8, .LCPI29_0
 ; CHECK-CVT-GI-NEXT:    fmov s3, #8.00000000
-; CHECK-CVT-GI-NEXT:    fcmp s1, s2
-; CHECK-CVT-GI-NEXT:    ldr h2, [x8, :lo12:.LCPI29_0]
 ; CHECK-CVT-GI-NEXT:    fmov w8, s0
-; CHECK-CVT-GI-NEXT:    fmov w9, s2
+; CHECK-CVT-GI-NEXT:    fcmp s1, s2
+; CHECK-CVT-GI-NEXT:    movi v2.4h, #69, lsl #8
 ; CHECK-CVT-GI-NEXT:    fccmp s1, s3, #4, mi
+; CHECK-CVT-GI-NEXT:    fmov w9, s2
 ; CHECK-CVT-GI-NEXT:    csel w8, w8, w9, gt
 ; CHECK-CVT-GI-NEXT:    strh w8, [x0]
 ; CHECK-CVT-GI-NEXT:    ret
diff --git a/llvm/test/CodeGen/AArch64/fabs-combine.ll b/llvm/test/CodeGen/AArch64/fabs-combine.ll
index a7f6a251cdc1b..490754bf6fa02 100644
--- a/llvm/test/CodeGen/AArch64/fabs-combine.ll
+++ b/llvm/test/CodeGen/AArch64/fabs-combine.ll
@@ -93,12 +93,19 @@ define <4 x float> @nabsv4f32(<4 x float> %a) {
 }
 
 define <2 x double> @nabsv2d64(<2 x double> %a) {
-; CHECK-LABEL: nabsv2d64:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    movi v1.2d, #0000000000000000
-; CHECK-NEXT:    fneg v1.2d, v1.2d
-; CHECK-NEXT:    orr v0.16b, v0.16b, v1.16b
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: nabsv2d64:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    movi d1, #0000000000000000
+; CHECK-SD-NEXT:    fneg v1.2d, v1.2d
+; CHECK-SD-NEXT:    orr v0.16b, v0.16b, v1.16b
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: nabsv2d64:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    movi v1.2d, #0000000000000000
+; CHECK-GI-NEXT:    fneg v1.2d, v1.2d
+; CHECK-GI-NEXT:    orr v0.16b, v0.16b, v1.16b
+; CHECK-GI-NEXT:    ret
   %conv = bitcast <2 x double> %a to <2 x i64>
   %and = or <2 x i64> %conv, <i64 -9223372036854775808, i64 -9223372036854775808>
   %conv1 = bitcast <2 x i64> %and to <2 x double>
diff --git a/llvm/test/CodeGen/AArch64/fcvt-fixed.ll b/llvm/test/CodeGen/AArch64/fcvt-fixed.ll
index 743d1604388de..949291ce8576f 100644
--- a/llvm/test/CodeGen/AArch64/fcvt-fixed.ll
+++ b/llvm/test/CodeGen/AArch64/fcvt-fixed.ll
@@ -166,8 +166,7 @@ define i32 @fcvtzs_f16_i32_7(half %flt) {
 ;
 ; CHECK-GI-FP16-LABEL: fcvtzs_f16_i32_7:
 ; CHECK-GI-FP16:       // %bb.0:
-; CHECK-GI-FP16-NEXT:    adrp x8, .LCPI8_0
-; CHECK-GI-FP16-NEXT:    ldr h1, [x8, :lo12:.LCPI8_0]
+; CHECK-GI-FP16-NEXT:    movi v1.4h, #88, lsl #8
 ; CHECK-GI-FP16-NEXT:    fmul h0, h0, h1
 ; CHECK-GI-FP16-NEXT:    fcvtzs w0, h0
 ; CHECK-GI-FP16-NEXT:    ret
@@ -194,8 +193,7 @@ define i32 @fcvtzs_f16_i32_15(half %flt) {
 ;
 ; CHECK-GI-FP16-LABEL: fcvtzs_f16_i32_15:
 ; CHECK-GI-FP16:       // %bb.0:
-; CHECK-GI-FP16-NEXT:    adrp x8, .LCPI9_0
-; CHECK-GI-FP16-NEXT:    ldr h1, [x8, :lo12:.LCPI9_0]
+; CHECK-GI-FP16-NEXT:    movi v1.4h, #120, lsl #8
 ; CHECK-GI-FP16-NEXT:    fmul h0, h0, h1
 ; CHECK-GI-FP16-NEXT:    fcvtzs w0, h0
 ; CHECK-GI-FP16-NEXT:    ret
@@ -222,8 +220,7 @@ define i64 @fcvtzs_f16_i64_7(half %flt) {
 ;
 ; CHECK-GI-FP16-LABEL: fcvtzs_f16_i64_7:
 ; CHECK-GI-FP16:       // %bb.0:
-; CHECK-GI-FP16-NEXT:    adrp x8, .LCPI10_0
-; CHECK-GI-FP16-NEXT:    ldr h1, [x8, :lo12:.LCPI10_0]
+; CHECK-GI-FP16-NEXT:    movi v1.4h, #88, lsl #8
 ; CHECK-GI-FP16-NEXT:    fmul h0, h0, h1
 ; CHECK-GI-FP16-NEXT:    fcvtzs x0, h0
 ; CHECK-GI-FP16-NEXT:    ret
@@ -250,8 +247,7 @@ define i64 @fcvtzs_f16_i64_15(half %flt) {
 ;
 ; CHECK-GI-FP16-LABEL: fcvtzs_f16_i64_15:
 ; CHECK-GI-FP16:       // %bb.0:
-; CHECK-GI-FP16-NEXT:    adrp x8, .LCPI11_0
-; CHECK-GI-FP16-NEXT:    ldr h1, [x8, :lo12:.LCPI11_0]
+; CHECK-GI-FP16-NEXT:    movi v1.4h, #120, lsl #8
 ; CHECK-GI-FP16-NEXT:    fmul h0, h0, h1
 ; CHECK-GI-FP16-NEXT:    fcvtzs x0, h0
 ; CHECK-GI-FP16-NEXT:    ret
@@ -422,8 +418,7 @@ define i32 @fcvtzu_f16_i32_7(half %flt) {
 ;
 ; CHECK-GI-FP16-LABEL: fcvtzu_f16_i32_7:
 ; CHECK-GI-FP16:       // %bb.0:
-; CHECK-GI-FP16-NEXT:    adrp x8, .LCPI20_0
-; CHECK-GI-FP16-NEXT:    ldr h1, [x8, :lo12:.LCPI20_0]
+; CHECK-GI-FP16-NEXT:    movi v1.4h, #88, lsl #8
 ; CHECK-GI-FP16-NEXT:    fmul h0, h0, h1
 ; CHECK-GI-FP16-NEXT:    fcvtzu w0, h0
 ; CHECK-GI-FP16-NEXT:    ret
@@ -450,8 +445,7 @@ define i32 @fcvtzu_f16_i32_15(half %flt) {
 ;
 ; CHECK-GI-FP16-LABEL: fcvtzu_f16_i32_15:
 ; CHECK-GI-FP16:       // %bb.0:
-; CHECK-GI-FP16-NEXT:    adrp x8, .LCPI21_0
-; CHECK-GI-FP16-NEXT:    ldr h1, [x8, :lo12:.LCPI21_0]
+; CHECK-GI-FP16-NEXT:    movi v1.4h, #120, lsl #8
 ; CHECK-GI-FP16-NEXT:    fmul h0, h0, h1
 ; CHECK-GI-FP16-NEXT:    fcvtzu w0, h0
 ; CHECK-GI-FP16-NEXT:    ret
@@ -478,8 +472,7 @@ define i64 @fcvtzu_f16_i64_7(half %flt) {
 ;
 ; CHECK-GI-FP16-LABEL: fcvtzu_f16_i64_7:
 ; CHECK-GI-FP16:       // %bb.0:
-; CHECK-GI-FP16-NEXT:    adrp x8, .LCPI22_0
-; CHECK-GI-FP16-NEXT:    ldr h1, [x8, :lo12:.LCPI22_0]
+; CHECK-GI-FP16-NEXT:    movi v1.4h, #88, lsl #8
 ; CHECK-GI-FP16-NEXT:    fmul h0, h0, h1
 ; CHECK-GI-FP16-NEXT:    fcvtzu x0, h0
 ; CHECK-GI-FP16-NEXT:    ret
@@ -506,8 +499,7 @@ define i64 @fcvtzu_f16_i64_15(half %flt) {
 ;
 ; CHECK-GI-FP16-LABEL: fcvtzu_f16_i64_15:
 ; CHECK-GI-FP16:       // %bb.0:
-; CHECK-GI-FP16-NEXT:    adrp x8, .LCPI23_0
-; CHECK-GI-FP16-NEXT:    ldr h1, [x8, :lo12:.LCPI23_0]
+; CHECK-GI-FP16-NEXT:    movi v1.4h, #120, lsl #8
 ; CHECK-GI-FP16-NEXT:    fmul h0, h0, h1
 ; CHECK-GI-FP16-NEXT:    fcvtzu x0, h0
 ; CHECK-GI-FP16-NEXT:    ret
@@ -688,10 +680,9 @@ define half @scvtf_f16_i32_7(i32 %int) {
 ;
 ; CHECK-GI-FP16-LABEL: scvtf_f16_i32_7:
 ; CHECK-GI-FP16:       // %bb.0:
-; CHECK-GI-FP16-NEXT:    scvtf h0, w0
-; CHECK-GI-FP16-NEXT:    adrp x8, .LCPI32_0
-; CHECK-GI-FP16-NEXT:    ldr h1, [x8, :lo12:.LCPI32_0]
-; CHECK-GI-FP16-NEXT:    fdiv h0, h0, h1
+; CHECK-GI-FP16-NEXT:    movi v0.4h, #88, lsl #8
+; CHECK-GI-FP16-NEXT:    scvtf h1, w0
+; CHECK-GI-FP16-NEXT:    fdiv h0, h1, h0
 ; CHECK-GI-FP16-NEXT:    ret
   %cvt = sitofp i32 %int to half
   %fix = fdiv half %cvt, 128.0
@@ -726,10 +717,9 @@ define half @scvtf_f16_i32_15(i32 %int) {
 ;
 ; CHECK-GI-FP16-LABEL: scvtf_f16_i32_15:
 ; CHECK-GI-FP16:       // %bb.0:
-; CHECK-GI-FP16-NEXT:    scvtf h0, w0
-; CHECK-GI-FP16-NEXT:    adrp x8, .LCPI33_0
-; CHECK-GI-FP16-NEXT:    ldr h1, [x8, :lo12:.LCPI33_0]
-; CHECK-GI-FP16-NEXT:    fdiv h0, h0, h1
+; CHECK-GI-FP16-NEXT:    movi v0.4h, #120, lsl #8
+; CHECK-GI-FP16-NEXT:    scvtf h1, w0
+; CHECK-GI-FP16-NEXT:    fdiv h0, h1, h0
 ; CHECK-GI-FP16-NEXT:    ret
   %cvt = sitofp i32 %int to half
   %fix = fdiv half %cvt, 32768.0
@@ -764,10 +754,9 @@ define half @scvtf_f16_i64_7(i64 %long) {
 ;
 ; CHECK-GI-FP16-LABEL: scvtf_f16_i64_7:
 ; CHECK-GI-FP16:       // %bb.0:
-; CHECK-GI-FP16-NEXT:    scvtf h0, x0
-; CHECK-GI-FP16-NEXT:    adrp x8, .LCPI34_0
-; CHECK-GI-FP16-NEXT:    ldr h1, [x8, :lo12:.LCPI34_0]
-; CHECK-GI-FP16-NEXT:    fdiv h0, h0, h1
+; CHECK-GI-FP16-NEXT:    movi v0.4h, #88, lsl #8
+; CHECK-GI-FP16-NEXT:    scvtf h1, x0
+; CHECK-GI-FP16-NEXT:    fdiv h0, h1, h0
 ; CHECK-GI-FP16-NEXT:    ret
   %cvt = sitofp i64 %long to half
   %fix = fdiv half %cvt, 128.0
@@ -802,10 +791,9 @@ define half @scvtf_f16_i64_15(i64 %long) {
 ;
 ; CHECK-GI-FP16-LABEL: scvtf_f16_i64_15:
 ; CHECK-GI-FP16:       // %bb.0:
-; CHECK-GI-FP16-NEXT:    scvtf h0, x0
-; CHECK-GI-FP16-NEXT:    adrp x8, .LCPI35_0
-; CHECK-GI-FP16-NEXT:    ldr h1, [x8, :lo12:.LCPI35_0]
-; CHECK-GI-FP16-NEXT:    fdiv h0, h0, h1
+; CHECK-GI-FP16-NEXT:    movi v0.4h, #120, lsl #8
+; CHECK-GI-FP16-NEXT:    scvtf h1, x0
+; CHECK-GI-FP16-NEXT:    fdiv h0, h1, h0
 ; CHECK-GI-FP16-NEXT:    ret
   %cvt = sitofp i64 %long to half
   %fix = fdiv half %cvt, 32768.0
@@ -984,10 +972,9 @@ define half @ucvtf_f16_i32_7(i32 %int) {
 ;
 ; CHECK-GI-FP16-LABEL: ucvtf_f16_i32_7:
 ; CHECK-GI-FP16:       // %bb.0:
-; CHECK-GI-FP16-NEXT:    ucvtf h0, w0
-; CHECK-GI-FP16-NEXT:    adrp x8, .LCPI44_0
-; CHECK-GI-FP16-NEXT:    ldr h1, [x8, :lo12:.LCPI44_0]
-; CHECK-GI-FP16-NEXT:    fdiv h0, h0, h1
+; CHECK-GI-FP16-NEXT:    movi v0.4h, #88, lsl #8
+; CHECK-GI-FP16-NEXT:    ucvtf h1, w0
+; CHECK-GI-FP16-NEXT:    fdiv h0, h1, h0
 ; CHECK-GI-FP16-NEXT:    ret
   %cvt = uitofp i32 %int to half
   %fix = fdiv half %cvt, 128.0
@@ -1022,10 +1009,9 @@ define half @ucvtf_f16_i32_15(i32 %int) {
 ;
 ; CHECK-GI-FP16-LABEL: ucvtf_f16_i32_15:
 ; CHECK-GI-FP16:       // %bb.0:
-; CHECK-GI-FP16-NEXT:    ucvtf h0, w0
-; CHECK-GI-FP16-NEXT:    adrp x8, .LCPI45_0
-; CHECK-GI-FP16-NEXT:    ldr h1, [x8, :lo12:.LCPI45_0]
-; CHECK-GI-FP16-NEXT:    fdiv h0, h0, h1
+; CHECK-GI-FP16-NEXT:    movi v0.4h, #120, lsl #8
+; CHECK-GI-FP16-NEXT:    ucvtf h1, w0
+; CHECK-GI-FP16-NEXT:    fdiv h0, h1, h0
 ; CHECK-GI-FP16-NEXT:    ret
   %cvt = uitofp i32 %int to half
   %fix = fdiv half %cvt, 32768.0
@@ -1060,10 +1046,9 @@ define half @ucvtf_f16_i64_7(i64 %long) {
 ;
 ; CHECK-GI-FP16-LABEL: ucvtf_f16_i64_7:
 ; CHECK-GI-FP16:       // %bb.0:
-; CHECK-GI-FP16-NEXT:    ucvtf h0, x0
-; CHECK-GI-FP16-NEXT:    adrp x8, .LCPI46_0
-; CHECK-GI-FP16-NEXT:    ldr h1, [x8, :lo12:.LCPI46_0]
-; CHECK-GI-FP16-NEXT:    fdiv h0, h0, h1
+; CHECK-GI-FP16-NEXT:    movi v0.4h, #88, lsl #8
+; CHECK-GI-FP16-NEXT:    ucvtf h1, x0
+; CHECK-GI-FP16-NEXT:    fdiv h0, h1, h0
 ; CHECK-GI-FP16-NEXT:    ret
   %cvt = uitofp i64 %long to half
   %fix = fdiv half %cvt, 128.0
@@ -1098,10 +1083,9 @@ define half @ucvtf_f16_i64_15(i64 %long) {
 ;
 ; CHECK-GI-FP16-LABEL: ucvtf_f16_i64_15:
 ; CHECK-GI-FP16:       // %bb.0:
-; CHECK-GI-FP16-NEXT:    ucvtf h0, x0
-; CHECK-GI-FP16-NEXT:    adrp x8, .LCPI47_0
-; CHECK-GI-FP16-NEXT:    ldr h1, [x8, :lo12:.LCPI47_0]
-; CHECK-GI-FP16-NEXT:    fdiv h0, h0, h1
+; CHECK-GI-FP16-NEXT:    movi v0.4h, #120, lsl #8
+; CHECK-GI-FP16-NEXT:    ucvtf h1, x0
+; CHECK-GI-FP16-NEXT:    fdiv h0, h1, h0
 ; CHECK-GI-FP16-NEXT:    ret
   %cvt = uitofp i64 %long to half
   %fix = fdiv half %cvt, 32768.0
@@ -1261,8 +1245,7 @@ define i32 @fcvtzs_sat_f16_i32_7(half %dbl) {
 ;
 ; CHECK-GI-FP16-LABEL: fcvtzs_sat_f16_i32_7:
 ; CHECK-GI-FP16:       // %bb.0:
-; CHECK-GI-FP16-NEXT:    adrp x8, .LCPI55_0
-; CHECK-GI-FP16-NEXT:    ldr h1, [x8, :lo12:.LCPI55_0]
+; CHECK-GI-FP16-NEXT:    movi v1.4h, #88, lsl #8
 ; CHECK-GI-FP16-NEXT:    fmul h0, h0, h1
 ; CHECK-GI-FP16-NEXT:    fcvtzs w0, h0
 ; CHECK-GI-FP16-NEXT:    ret
@@ -1289,8 +1272,7 @@ define i32 @fcvtzs_sat_f16_i32_15(half %dbl) {
 ;
 ; CHECK-GI-FP16-LABEL: fcvtzs_sat_f16_i32_15:
 ; CHECK-GI-FP16:       // %bb.0:
-; CHECK-GI-FP16-NEXT:    adrp x8, .LCPI56_0
-; CHECK-GI-FP16-NEXT:    ldr h1, [x8, :lo12:.LCPI56_0]
+; CHECK-GI-FP16-NEXT:    movi v1.4h, #120, lsl #8
 ; CHECK-GI-FP16-NEXT:    fmul h0, h0, h1
 ; CHECK-GI-FP16-NEXT:    fcvtzs w0, h0
 ; CHECK-GI-FP16-NEXT:    ret
@@ -1317,8 +1299,7 @@ define i64 @fcvtzs_sat_f16_i64_7(half %dbl) {
 ;
 ; CHECK-GI-FP16-LABEL: fcvtzs_sat_f16_i64_7:
 ; CHECK-GI-FP16:       // %bb.0:
-; CHECK-GI-FP16-NEXT:    adrp x8, .LCPI57_0
-; CHECK-GI-FP16-NEXT:    ldr h1, [x8, :lo12:.LCPI57_0]
+; CHECK-GI-FP16-NEXT:    movi v1.4h, #88, lsl #8
 ; CHECK-GI-FP16-NEXT:    fmul h0, h0, h1
 ; CHECK-GI-FP16-NEXT:    fcvtzs x0, h0
 ; CHECK-GI-FP16-NEXT:    ret
@@ -1345,8 +1326,7 @@ define i64 @fcvtzs_sat_f16_i64_15(half %dbl) {
 ;
 ; CHECK-GI-FP16-LABEL: fcvtzs_sat_f16_i64_15:
 ; CHECK-GI-FP16:       // %bb.0:
-; CHECK-GI-FP16-NEXT:    adrp x8, .LCPI58_0
-; CHECK-GI-FP16-NEXT:    ldr h1, [x8, :lo12:.LCPI58_0]
+; CHECK-GI-FP16-NEXT:    movi v1.4h, #120, lsl #8
 ; CHECK-GI-FP16-NEXT:    fmul h0, h0, h1
 ; CHECK-GI-FP16-NEXT:    fcvtzs x0, h0
 ; CHECK-GI-FP16-NEXT:    ret
@@ -1507,8 +1487,7 @@ define i32 @fcvtzu_sat_f16_i32_7(half %dbl) {
 ;
 ; CHECK-GI-FP16-LABEL: fcvtzu_sat_f16_i32_7:
 ; CHECK-GI-FP16:       // %bb.0:
-; CHECK-GI-FP16-NEXT:    adrp x8, .LCPI66_0
-; CHECK-GI-FP16-NEXT:    ldr h1, [x8, :lo12:.LCPI66_0]
+; CHECK-GI-FP16-NEXT:    movi v1.4h, #88, lsl #8
 ; CHECK-GI-FP16-NEXT:    fmul h0, h0, h1
 ; CHECK-GI-FP16-NEXT:    fcvtzu w0, h0
 ; CHECK-GI-FP16-NEXT:    ret
@@ -1535,8 +1514,7 @@ define i32 @fcvtzu_sat_f16_i32_15(half %dbl) {
 ;
 ; CHECK-GI-FP16-LABEL: fcvtzu_sat_f16_i32_15:
 ; CHECK-GI-FP16:       // %bb.0:
-; CHECK-GI-FP16-NEXT:    adrp x8, .LCPI67_0
-; CHECK-GI-FP16-NEXT:    ldr h1, [x8, :lo12:.LCPI67_0]
+; CHECK-GI-FP16-NEXT:    movi v1.4h, #120, lsl #8
 ; CHECK-GI-FP16-NEXT:    fmul h0, h0, h1
 ; CHECK-GI-FP16-NEXT:    fcvtzu w0, h0
 ; CHECK-GI-FP16-NEXT:    ret
@@ -1563,8 +1541,7 @@ define i64 @fcvtzu_sat_f16_i64_7(half %dbl) {
 ;
 ; CHECK-GI-FP16-LABEL: fcvtzu_sat_f16_i64_7:
 ; CHECK-GI-FP16:       // %bb.0:
-; CHECK-GI-FP16-NEXT:    adrp x8, .LCPI68_0
-; CHECK-GI-FP16-NEXT:    ldr h1, [x8, :lo12:.LCPI68_0]
+; CHECK-GI-FP16-NEXT:    movi v1.4h, #88, lsl #8
 ; CHECK-GI-FP16-NEXT:    fmul h0, h0, h1
 ; CHECK-GI-FP16-NEXT:    fcvtzu x0, h0
 ; CHECK-GI-FP16-NEXT:    ret
@@ -1591,8 +1568,7 @@ define i64 @fcvtzu_sat_f16_i64_15(half %dbl) {
 ;
 ; CHECK-GI-FP16-LABEL: fcvtzu_sat_f16_i64_15:
 ; CHECK-GI-FP16:       // %bb.0:
-; CHECK-GI-FP16-NEXT:    adrp x8, .LCPI69_0
-; CHECK-GI-FP16-NEXT:    ldr h1, [x8, :lo12:.LCPI69_0]
+; CHECK-GI-FP16-NEXT:    movi v1.4h, #120, lsl #8
 ; CHECK-GI-FP16-NEXT:    fmul h0, h0, h1
 ; CHECK-GI-FP16-NEXT:    fcvtzu x0, h0
 ; CHECK-GI-FP16-NEXT:    ret
diff --git a/llvm/test/CodeGen/AArch64/fdiv-const.ll b/llvm/test/CodeGen/AArch64/fdiv-const.ll
index 2866b5f6d8f20..ba507bae9366b 100644
--- a/llvm/test/CodeGen/AArch64/fdiv-const.ll
+++ b/llvm/test/CodeGen/AArch64/fdiv-const.ll
@@ -81,37 +81,21 @@ define half @divf16_2(half %a) nounwind {
 }
 
 define half @divf16_32768(half %a) nounwind {
-; CHECK-SD-LABEL: divf16_32768:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    mov w8, #30720 // =0x7800
-; CHECK-SD-NEXT:    fmov h1, w8
-; CHECK-SD-NEXT:    fdiv h0, h0, h1
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: divf16_32768:
-; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI5_0
-; CHECK-GI-NEXT:    ldr h1, [x8, :lo12:.LCPI5_0]
-; CHECK-GI-NEXT:    fdiv h0, h0, h1
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: divf16_32768:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi v1.4h, #120, lsl #8
+; CHECK-NEXT:    fdiv h0, h0, h1
+; CHECK-NEXT:    ret
   %r = fdiv half %a, 32768.0
   ret half %r
 }
 
 define half @divf16_32768_arcp(half %a) nounwind {
-; CHECK-SD-LABEL: divf16_32768_arcp:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    mov w8, #30720 // =0x7800
-; CHECK-SD-NEXT:    fmov h1, w8
-; CHECK-SD-NEXT:    fdiv h0, h0, h1
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: divf16_32768_arcp:
-; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI6_0
-; CHECK-GI-NEXT:    ldr h1, [x8, :lo12:.LCPI6_0]
-; CHECK-GI-NEXT:    fdiv h0, h0, h1
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: divf16_32768_arcp:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi v1.4h, #120, lsl #8
+; CHECK-NEXT:    fdiv h0, h0, h1
+; CHECK-NEXT:    ret
   %r = fdiv arcp half %a, 32768.0
   ret half %r
 }
diff --git a/llvm/test/CodeGen/AArch64/fp-maximumnum-minimumnum.ll b/llvm/test/CodeGen/AArch64/fp-maximumnum-minimumnum.ll
index 5353920ed5667..3473c4c093fcc 100644
--- a/llvm/test/CodeGen/AArch64/fp-maximumnum-minimumnum.ll
+++ b/llvm/test/CodeGen/AArch64/fp-maximumnum-minimumnum.ll
@@ -1256,9 +1256,8 @@ define fp128 @max_fp128(fp128 %x, fp128 %y) {
 ; CHECK-NEXT:  // %bb.7: // %start
 ; CHECK-NEXT:    ldr q1, [sp] // 16-byte Reload
 ; CHECK-NEXT:  .LBB32_8: // %start
-; CHECK-NEXT:    adrp x8, .LCPI32_0
 ; CHECK-NEXT:    str q1, [sp] // 16-byte Spill
-; CHECK-NEXT:    ldr q1, [x8, :lo12:.LCPI32_0]
+; CHECK-NEXT:    movi d1, #0000000000000000
 ; CHECK-NEXT:    bl __eqtf2
 ; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Reload
 ; CHECK-NEXT:    cmp w0, #0
@@ -1900,9 +1899,8 @@ define fp128 @min_fp128(fp128 %x, fp128 %y) {
 ; CHECK-NEXT:  // %bb.7: // %start
 ; CHECK-NEXT:    ldr q1, [sp] // 16-byte Reload
 ; CHECK-NEXT:  .LBB49_8: // %start
-; CHECK-NEXT:    adrp x8, .LCPI49_0
 ; CHECK-NEXT:    str q1, [sp] // 16-byte Spill
-; CHECK-NEXT:    ldr q1, [x8, :lo12:.LCPI49_0]
+; CHECK-NEXT:    movi d1, #0000000000000000
 ; CHECK-NEXT:    bl __eqtf2
 ; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Reload
 ; CHECK-NEXT:    cmp w0, #0
diff --git a/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll b/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll
index b6cbe9eb46389..a055f5e681a9c 100644
--- a/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll
+++ b/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll
@@ -153,12 +153,11 @@ define i100 @test_signed_i100_f32(float %f) nounwind {
 ; CHECK-SD-NEXT:    fmov s8, s0
 ; CHECK-SD-NEXT:    bl __fixsfti
 ; CHECK-SD-NEXT:    movi v0.2s, #241, lsl #24
-; CHECK-SD-NEXT:    mov w8, #1895825407 // =0x70ffffff
+; CHECK-SD-NEXT:    mov x8, #-34359738368 // =0xfffffff800000000
 ; CHECK-SD-NEXT:    mov x10, #34359738367 // =0x7ffffffff
 ; CHECK-SD-NEXT:    ldr x30, [sp, #8] // 8-byte Reload
 ; CHECK-SD-NEXT:    fcmp s8, s0
-; CHECK-SD-NEXT:    fmov s0, w8
-; CHECK-SD-NEXT:    mov x8, #-34359738368 // =0xfffffff800000000
+; CHECK-SD-NEXT:    mvni v0.2s, #143, lsl #24
 ; CHECK-SD-NEXT:    csel x9, xzr, x0, lt
 ; CHECK-SD-NEXT:    csel x8, x8, x1, lt
 ; CHECK-SD-NEXT:    fcmp s8, s0
@@ -177,12 +176,11 @@ define i100 @test_signed_i100_f32(float %f) nounwind {
 ; CHECK-GI-NEXT:    fmov s8, s0
 ; CHECK-GI-NEXT:    bl __fixsfti
 ; CHECK-GI-NEXT:    movi v0.2s, #241, lsl #24
-; CHECK-GI-NEXT:    mov w8, #1895825407 // =0x70ffffff
+; CHECK-GI-NEXT:    mov x8, #34359738368 // =0x800000000
 ; CHECK-GI-NEXT:    mov x10, #34359738367 // =0x7ffffffff
 ; CHECK-GI-NEXT:    ldr x30, [sp, #8] // 8-byte Reload
 ; CHECK-GI-NEXT:    fcmp s8, s0
-; CHECK-GI-NEXT:    fmov s0, w8
-; CHECK-GI-NEXT:    mov x8, #34359738368 // =0x800000000
+; CHECK-GI-NEXT:    mvni v0.2s, #143, lsl #24
 ; CHECK-GI-NEXT:    csel x9, xzr, x0, lt
 ; CHECK-GI-NEXT:    csel x8, x8, x1, lt
 ; CHECK-GI-NEXT:    fcmp s8, s0
@@ -204,13 +202,12 @@ define i128 @test_signed_i128_f32(float %f) nounwind {
 ; CHECK-SD-NEXT:    str x30, [sp, #8] // 8-byte Spill
 ; CHECK-SD-NEXT:    fmov s8, s0
 ; CHECK-SD-NEXT:    bl __fixsfti
-; CHECK-SD-NEXT:    movi v0.2s, #255, lsl #24
-; CHECK-SD-NEXT:    mov w8, #2130706431 // =0x7effffff
+; CHECK-SD-NEXT:    movi d0, #0xff000000ff000000
+; CHECK-SD-NEXT:    mov x8, #-9223372036854775808 // =0x8000000000000000
 ; CHECK-SD-NEXT:    mov x10, #9223372036854775807 // =0x7fffffffffffffff
 ; CHECK-SD-NEXT:    ldr x30, [sp, #8] // 8-byte Reload
 ; CHECK-SD-NEXT:    fcmp s8, s0
-; CHECK-SD-NEXT:    fmov s0, w8
-; CHECK-SD-NEXT:    mov x8, #-9223372036854775808 // =0x8000000000000000
+; CHECK-SD-NEXT:    mvni v0.2s, #129, lsl #24
 ; CHECK-SD-NEXT:    csel x9, xzr, x0, lt
 ; CHECK-SD-NEXT:    csel x8, x8, x1, lt
 ; CHECK-SD-NEXT:    fcmp s8, s0
@@ -228,13 +225,12 @@ define i128 @test_signed_i128_f32(float %f) nounwind {
 ; CHECK-GI-NEXT:    str x30, [sp, #8] // 8-byte Spill
 ; CHECK-GI-NEXT:    fmov s8, s0
 ; CHECK-GI-NEXT:    bl __fixsfti
-; CHECK-GI-NEXT:    movi v0.2s, #255, lsl #24
-; CHECK-GI-NEXT:    mov w8, #2130706431 // =0x7effffff
+; CHECK-GI-NEXT:    movi d0, #0xff000000ff000000
+; CHECK-GI-NEXT:    mov x8, #-9223372036854775808 // =0x8000000000000000
 ; CHECK-GI-NEXT:    mov x10, #9223372036854775807 // =0x7fffffffffffffff
 ; CHECK-GI-NEXT:    ldr x30, [sp, #8] // 8-byte Reload
 ; CHECK-GI-NEXT:    fcmp s8, s0
-; CHECK-GI-NEXT:    fmov s0, w8
-; CHECK-GI-NEXT:    mov x8, #-9223372036854775808 // =0x8000000000000000
+; CHECK-GI-NEXT:    mvni v0.2s, #129, lsl #24
 ; CHECK-GI-NEXT:    csel x9, xzr, x0, lt
 ; CHECK-GI-NEXT:    csel x8, x8, x1, lt
 ; CHECK-GI-NEXT:    fcmp s8, s0
@@ -857,12 +853,11 @@ define i100 @test_signed_i100_f16(half %f) nounwind {
 ; CHECK-SD-NEXT:    fmov s0, s8
 ; CHECK-SD-NEXT:    bl __fixsfti
 ; CHECK-SD-NEXT:    movi v0.2s, #241, lsl #24
-; CHECK-SD-NEXT:    mov w8, #1895825407 // =0x70ffffff
+; CHECK-SD-NEXT:    mov x8, #-34359738368 // =0xfffffff800000000
 ; CHECK-SD-NEXT:    mov x10, #34359738367 // =0x7ffffffff
 ; CHECK-SD-NEXT:    ldr x30, [sp, #8] // 8-byte Reload
 ; CHECK-SD-NEXT:    fcmp s8, s0
-; CHECK-SD-NEXT:    fmov s0, w8
-; CHECK-SD-NEXT:    mov x8, #-34359738368 // =0xfffffff800000000
+; CHECK-SD-NEXT:    mvni v0.2s, #143, lsl #24
 ; CHECK-SD-NEXT:    csel x9, xzr, x0, lt
 ; CHECK-SD-NEXT:    csel x8, x8, x1, lt
 ; CHECK-SD-NEXT:    fcmp s8, s0
@@ -898,13 +893,12 @@ define i128 @test_signed_i128_f16(half %f) nounwind {
 ; CHECK-SD-NEXT:    str x30, [sp, #8] // 8-byte Spill
 ; CHECK-SD-NEXT:    fmov s0, s8
 ; CHECK-SD-NEXT:    bl __fixsfti
-; CHECK-SD-NEXT:    movi v0.2s, #255, lsl #24
-; CHECK-SD-NEXT:    mov w8, #2130706431 // =0x7effffff
+; CHECK-SD-NEXT:    movi d0, #0xff000000ff000000
+; CHECK-SD-NEXT:    mov x8, #-9223372036854775808 // =0x8000000000000000
 ; CHECK-SD-NEXT:    mov x10, #9223372036854775807 // =0x7fffffffffffffff
 ; CHECK-SD-NEXT:    ldr x30, [sp, #8] // 8-byte Reload
 ; CHECK-SD-NEXT:    fcmp s8, s0
-; CHECK-SD-NEXT:    fmov s0, w8
-; CHECK-SD-NEXT:    mov x8, #-9223372036854775808 // =0x8000000000000000
+; CHECK-SD-NEXT:    mvni v0.2s, #129, lsl #24
 ; CHECK-SD-NEXT:    csel x9, xzr, x0, lt
 ; CHECK-SD-NEXT:    csel x8, x8, x1, lt
 ; CHECK-SD-NEXT:    fcmp s8, s0
diff --git a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
index 6a06d99689df9..978b404a28938 100644
--- a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
+++ b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
@@ -1552,8 +1552,7 @@ define <2 x i100> @test_signed_v2f32_v2i100(<2 x float> %f) {
 ; CHECK-SD-NEXT:    bl __fixsfti
 ; CHECK-SD-NEXT:    movi v9.2s, #241, lsl #24
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Reload
-; CHECK-SD-NEXT:    mov w8, #1895825407 // =0x70ffffff
-; CHECK-SD-NEXT:    fmov s10, w8
+; CHECK-SD-NEXT:    mvni v10.2s, #143, lsl #24
 ; CHECK-SD-NEXT:    mov x21, #-34359738368 // =0xfffffff800000000
 ; CHECK-SD-NEXT:    mov x22, #34359738367 // =0x7ffffffff
 ; CHECK-SD-NEXT:    mov s8, v0.s[1]
@@ -1611,8 +1610,7 @@ define <2 x i100> @test_signed_v2f32_v2i100(<2 x float> %f) {
 ; CHECK-GI-NEXT:    bl __fixsfti
 ; CHECK-GI-NEXT:    movi v9.2s, #241, lsl #24
 ; CHECK-GI-NEXT:    ldr q0, [sp] // 16-byte Reload
-; CHECK-GI-NEXT:    mov w8, #1895825407 // =0x70ffffff
-; CHECK-GI-NEXT:    fmov s10, w8
+; CHECK-GI-NEXT:    mvni v10.2s, #143, lsl #24
 ; CHECK-GI-NEXT:    mov x21, #34359738368 // =0x800000000
 ; CHECK-GI-NEXT:    mov x22, #34359738367 // =0x7ffffffff
 ; CHECK-GI-NEXT:    fcmp s0, s9
@@ -1670,10 +1668,9 @@ define <2 x i128> @test_signed_v2f32_v2i128(<2 x float> %f) {
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Spill
 ; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl __fixsfti
-; CHECK-SD-NEXT:    movi v9.2s, #255, lsl #24
+; CHECK-SD-NEXT:    movi d9, #0xff000000ff000000
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Reload
-; CHECK-SD-NEXT:    mov w8, #2130706431 // =0x7effffff
-; CHECK-SD-NEXT:    fmov s10, w8
+; CHECK-SD-NEXT:    mvni v10.2s, #129, lsl #24
 ; CHECK-SD-NEXT:    mov x21, #-9223372036854775808 // =0x8000000000000000
 ; CHECK-SD-NEXT:    mov x22, #9223372036854775807 // =0x7fffffffffffffff
 ; CHECK-SD-NEXT:    mov s8, v0.s[1]
@@ -1729,10 +1726,9 @@ define <2 x i128> @test_signed_v2f32_v2i128(<2 x float> %f) {
 ; CHECK-GI-NEXT:    mov s8, v0.s[1]
 ; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl __fixsfti
-; CHECK-GI-NEXT:    movi v9.2s, #255, lsl #24
+; CHECK-GI-NEXT:    movi d9, #0xff000000ff000000
 ; CHECK-GI-NEXT:    ldr q0, [sp] // 16-byte Reload
-; CHECK-GI-NEXT:    mov w8, #2130706431 // =0x7effffff
-; CHECK-GI-NEXT:    fmov s10, w8
+; CHECK-GI-NEXT:    mvni v10.2s, #129, lsl #24
 ; CHECK-GI-NEXT:    mov x21, #-9223372036854775808 // =0x8000000000000000
 ; CHECK-GI-NEXT:    mov x22, #9223372036854775807 // =0x7fffffffffffffff
 ; CHECK-GI-NEXT:    fcmp s0, s9
@@ -1967,8 +1963,7 @@ define <4 x i100> @test_signed_v4f32_v4i100(<4 x float> %f) {
 ; CHECK-SD-NEXT:    bl __fixsfti
 ; CHECK-SD-NEXT:    movi v9.2s, #241, lsl #24
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Reload
-; CHECK-SD-NEXT:    mov w8, #1895825407 // =0x70ffffff
-; CHECK-SD-NEXT:    fmov s10, w8
+; CHECK-SD-NEXT:    mvni v10.2s, #143, lsl #24
 ; CHECK-SD-NEXT:    mov x25, #-34359738368 // =0xfffffff800000000
 ; CHECK-SD-NEXT:    mov x26, #34359738367 // =0x7ffffffff
 ; CHECK-SD-NEXT:    mov s8, v0.s[1]
@@ -2069,8 +2064,7 @@ define <4 x i100> @test_signed_v4f32_v4i100(<4 x float> %f) {
 ; CHECK-GI-NEXT:    bl __fixsfti
 ; CHECK-GI-NEXT:    movi v11.2s, #241, lsl #24
 ; CHECK-GI-NEXT:    ldr q0, [sp] // 16-byte Reload
-; CHECK-GI-NEXT:    mov w8, #1895825407 // =0x70ffffff
-; CHECK-GI-NEXT:    fmov s12, w8
+; CHECK-GI-NEXT:    mvni v12.2s, #143, lsl #24
 ; CHECK-GI-NEXT:    mov x25, #34359738368 // =0x800000000
 ; CHECK-GI-NEXT:    mov x26, #34359738367 // =0x7ffffffff
 ; CHECK-GI-NEXT:    fcmp s0, s11
@@ -2162,10 +2156,9 @@ define <4 x i128> @test_signed_v4f32_v4i128(<4 x float> %f) {
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Spill
 ; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl __fixsfti
-; CHECK-SD-NEXT:    movi v9.2s, #255, lsl #24
+; CHECK-SD-NEXT:    movi d9, #0xff000000ff000000
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Reload
-; CHECK-SD-NEXT:    mov w8, #2130706431 // =0x7effffff
-; CHECK-SD-NEXT:    fmov s10, w8
+; CHECK-SD-NEXT:    mvni v10.2s, #129, lsl #24
 ; CHECK-SD-NEXT:    mov x25, #-9223372036854775808 // =0x8000000000000000
 ; CHECK-SD-NEXT:    mov x26, #9223372036854775807 // =0x7fffffffffffffff
 ; CHECK-SD-NEXT:    mov s8, v0.s[1]
@@ -2264,10 +2257,9 @@ define <4 x i128> @test_signed_v4f32_v4i128(<4 x float> %f) {
 ; CHECK-GI-NEXT:    mov s8, v0.s[3]
 ; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl __fixsfti
-; CHECK-GI-NEXT:    movi v11.2s, #255, lsl #24
+; CHECK-GI-NEXT:    movi d11, #0xff000000ff000000
 ; CHECK-GI-NEXT:    ldr q0, [sp] // 16-byte Reload
-; CHECK-GI-NEXT:    mov w8, #2130706431 // =0x7effffff
-; CHECK-GI-NEXT:    fmov s12, w8
+; CHECK-GI-NEXT:    mvni v12.2s, #129, lsl #24
 ; CHECK-GI-NEXT:    mov x25, #-9223372036854775808 // =0x8000000000000000
 ; CHECK-GI-NEXT:    mov x26, #9223372036854775807 // =0x7fffffffffffffff
 ; CHECK-GI-NEXT:    fcmp s0, s11
@@ -3163,9 +3155,8 @@ define <4 x i100> @test_signed_v4f16_v4i100(<4 x half> %f) {
 ; CHECK-SD-NEXT:    fmov s0, s8
 ; CHECK-SD-NEXT:    bl __fixsfti
 ; CHECK-SD-NEXT:    movi v9.2s, #241, lsl #24
-; CHECK-SD-NEXT:    mov w8, #1895825407 // =0x70ffffff
+; CHECK-SD-NEXT:    mvni v10.2s, #143, lsl #24
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Reload
-; CHECK-SD-NEXT:    fmov s10, w8
 ; CHECK-SD-NEXT:    mov x25, #-34359738368 // =0xfffffff800000000
 ; CHECK-SD-NEXT:    mov x26, #34359738367 // =0x7ffffffff
 ; CHECK-SD-NEXT:    mov h0, v0.h[1]
@@ -3302,10 +3293,9 @@ define <4 x i128> @test_signed_v4f16_v4i128(<4 x half> %f) {
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Spill
 ; CHECK-SD-NEXT:    fmov s0, s8
 ; CHECK-SD-NEXT:    bl __fixsfti
-; CHECK-SD-NEXT:    movi v9.2s, #255, lsl #24
-; CHECK-SD-NEXT:    mov w8, #2130706431 // =0x7effffff
+; CHECK-SD-NEXT:    movi d9, #0xff000000ff000000
+; CHECK-SD-NEXT:    mvni v10.2s, #129, lsl #24
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Reload
-; CHECK-SD-NEXT:    fmov s10, w8
 ; CHECK-SD-NEXT:    mov x25, #-9223372036854775808 // =0x8000000000000000
 ; CHECK-SD-NEXT:    mov x26, #9223372036854775807 // =0x7fffffffffffffff
 ; CHECK-SD-NEXT:    mov h0, v0.h[1]
@@ -3868,9 +3858,8 @@ define <8 x i100> @test_signed_v8f16_v8i100(<8 x half> %f) {
 ; CHECK-NEXT:    fmov s0, s8
 ; CHECK-NEXT:    bl __fixsfti
 ; CHECK-NEXT:    movi v10.2s, #241, lsl #24
-; CHECK-NEXT:    mov w8, #1895825407 // =0x70ffffff
+; CHECK-NEXT:    mvni v9.2s, #143, lsl #24
 ; CHECK-NEXT:    ldr q0, [sp, #32] // 16-byte Reload
-; CHECK-NEXT:    fmov s9, w8
 ; CHECK-NEXT:    mov x22, #-34359738368 // =0xfffffff800000000
 ; CHECK-NEXT:    mov x23, #34359738367 // =0x7ffffffff
 ; CHECK-NEXT:    mov h0, v0.h[3]
@@ -4063,10 +4052,9 @@ define <8 x i128> @test_signed_v8f16_v8i128(<8 x half> %f) {
 ; CHECK-SD-NEXT:    str q0, [sp, #32] // 16-byte Spill
 ; CHECK-SD-NEXT:    fmov s0, s8
 ; CHECK-SD-NEXT:    bl __fixsfti
-; CHECK-SD-NEXT:    movi v9.2s, #255, lsl #24
-; CHECK-SD-NEXT:    mov w8, #2130706431 // =0x7effffff
+; CHECK-SD-NEXT:    movi d9, #0xff000000ff000000
+; CHECK-SD-NEXT:    mvni v10.2s, #129, lsl #24
 ; CHECK-SD-NEXT:    ldr q0, [sp, #32] // 16-byte Reload
-; CHECK-SD-NEXT:    fmov s10, w8
 ; CHECK-SD-NEXT:    mov x22, #-9223372036854775808 // =0x8000000000000000
 ; CHECK-SD-NEXT:    mov x23, #9223372036854775807 // =0x7fffffffffffffff
 ; CHECK-SD-NEXT:    mov h0, v0.h[1]
diff --git a/llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll b/llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll
index 7a5fe0f4222bc..343c9901ce1c1 100644
--- a/llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll
+++ b/llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll
@@ -762,17 +762,16 @@ define i32 @test_unsigned_f128_i32(fp128 %f) {
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-SD-NEXT:    .cfi_offset w19, -8
 ; CHECK-SD-NEXT:    .cfi_offset w30, -16
-; CHECK-SD-NEXT:    adrp x8, .LCPI30_0
+; CHECK-SD-NEXT:    movi d1, #0000000000000000
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Spill
-; CHECK-SD-NEXT:    ldr q1, [x8, :lo12:.LCPI30_0]
 ; CHECK-SD-NEXT:    bl __getf2
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Reload
 ; CHECK-SD-NEXT:    mov w19, w0
 ; CHECK-SD-NEXT:    bl __fixunstfsi
-; CHECK-SD-NEXT:    adrp x8, .LCPI30_1
+; CHECK-SD-NEXT:    adrp x8, .LCPI30_0
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Reload
 ; CHECK-SD-NEXT:    cmp w19, #0
-; CHECK-SD-NEXT:    ldr q1, [x8, :lo12:.LCPI30_1]
+; CHECK-SD-NEXT:    ldr q1, [x8, :lo12:.LCPI30_0]
 ; CHECK-SD-NEXT:    csel w19, wzr, w0, mi
 ; CHECK-SD-NEXT:    bl __gttf2
 ; CHECK-SD-NEXT:    cmp w0, #0
diff --git a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll
index d4feab2ea5d9b..62eb711989046 100644
--- a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll
+++ b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll
@@ -451,17 +451,16 @@ define <1 x i32> @test_unsigned_v1f128_v1i32(<1 x fp128> %f) {
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-SD-NEXT:    .cfi_offset w19, -8
 ; CHECK-SD-NEXT:    .cfi_offset w30, -16
-; CHECK-SD-NEXT:    adrp x8, .LCPI14_0
+; CHECK-SD-NEXT:    movi d1, #0000000000000000
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Spill
-; CHECK-SD-NEXT:    ldr q1, [x8, :lo12:.LCPI14_0]
 ; CHECK-SD-NEXT:    bl __getf2
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Reload
 ; CHECK-SD-NEXT:    mov w19, w0
 ; CHECK-SD-NEXT:    bl __fixunstfsi
-; CHECK-SD-NEXT:    adrp x8, .LCPI14_1
+; CHECK-SD-NEXT:    adrp x8, .LCPI14_0
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Reload
 ; CHECK-SD-NEXT:    cmp w19, #0
-; CHECK-SD-NEXT:    ldr q1, [x8, :lo12:.LCPI14_1]
+; CHECK-SD-NEXT:    ldr q1, [x8, :lo12:.LCPI14_0]
 ; CHECK-SD-NEXT:    csel w19, wzr, w0, mi
 ; CHECK-SD-NEXT:    bl __gttf2
 ; CHECK-SD-NEXT:    cmp w0, #0
@@ -514,51 +513,49 @@ define <1 x i32> @test_unsigned_v1f128_v1i32(<1 x fp128> %f) {
 define <2 x i32> @test_unsigned_v2f128_v2i32(<2 x fp128> %f) {
 ; CHECK-SD-LABEL: test_unsigned_v2f128_v2i32:
 ; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    sub sp, sp, #96
-; CHECK-SD-NEXT:    str x30, [sp, #64] // 8-byte Spill
-; CHECK-SD-NEXT:    stp x20, x19, [sp, #80] // 16-byte Folded Spill
-; CHECK-SD-NEXT:    .cfi_def_cfa_offset 96
+; CHECK-SD-NEXT:    sub sp, sp, #80
+; CHECK-SD-NEXT:    str x30, [sp, #48] // 8-byte Spill
+; CHECK-SD-NEXT:    stp x20, x19, [sp, #64] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    .cfi_def_cfa_offset 80
 ; CHECK-SD-NEXT:    .cfi_offset w19, -8
 ; CHECK-SD-NEXT:    .cfi_offset w20, -16
 ; CHECK-SD-NEXT:    .cfi_offset w30, -32
 ; CHECK-SD-NEXT:    mov v2.16b, v1.16b
-; CHECK-SD-NEXT:    stp q1, q0, [sp, #32] // 32-byte Folded Spill
-; CHECK-SD-NEXT:    adrp x8, .LCPI15_0
-; CHECK-SD-NEXT:    ldr q1, [x8, :lo12:.LCPI15_0]
+; CHECK-SD-NEXT:    stp q1, q0, [sp, #16] // 32-byte Folded Spill
+; CHECK-SD-NEXT:    movi d1, #0000000000000000
 ; CHECK-SD-NEXT:    mov v0.16b, v2.16b
-; CHECK-SD-NEXT:    str q1, [sp, #16] // 16-byte Spill
 ; CHECK-SD-NEXT:    bl __getf2
-; CHECK-SD-NEXT:    ldr q0, [sp, #32] // 16-byte Reload
+; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Reload
 ; CHECK-SD-NEXT:    mov w19, w0
 ; CHECK-SD-NEXT:    bl __fixunstfsi
-; CHECK-SD-NEXT:    adrp x8, .LCPI15_1
-; CHECK-SD-NEXT:    ldr q0, [sp, #32] // 16-byte Reload
+; CHECK-SD-NEXT:    adrp x8, .LCPI15_0
+; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Reload
 ; CHECK-SD-NEXT:    cmp w19, #0
-; CHECK-SD-NEXT:    ldr q1, [x8, :lo12:.LCPI15_1]
+; CHECK-SD-NEXT:    ldr q1, [x8, :lo12:.LCPI15_0]
 ; CHECK-SD-NEXT:    csel w19, wzr, w0, mi
 ; CHECK-SD-NEXT:    str q1, [sp] // 16-byte Spill
 ; CHECK-SD-NEXT:    bl __gttf2
-; CHECK-SD-NEXT:    ldr q0, [sp, #48] // 16-byte Reload
-; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Reload
+; CHECK-SD-NEXT:    movi d1, #0000000000000000
+; CHECK-SD-NEXT:    ldr q0, [sp, #32] // 16-byte Reload
 ; CHECK-SD-NEXT:    cmp w0, #0
 ; CHECK-SD-NEXT:    csinv w20, w19, wzr, le
 ; CHECK-SD-NEXT:    bl __getf2
-; CHECK-SD-NEXT:    ldr q0, [sp, #48] // 16-byte Reload
+; CHECK-SD-NEXT:    ldr q0, [sp, #32] // 16-byte Reload
 ; CHECK-SD-NEXT:    mov w19, w0
 ; CHECK-SD-NEXT:    bl __fixunstfsi
-; CHECK-SD-NEXT:    ldr q0, [sp, #48] // 16-byte Reload
+; CHECK-SD-NEXT:    ldr q0, [sp, #32] // 16-byte Reload
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Reload
 ; CHECK-SD-NEXT:    cmp w19, #0
 ; CHECK-SD-NEXT:    csel w19, wzr, w0, mi
 ; CHECK-SD-NEXT:    bl __gttf2
 ; CHECK-SD-NEXT:    cmp w0, #0
-; CHECK-SD-NEXT:    ldr x30, [sp, #64] // 8-byte Reload
+; CHECK-SD-NEXT:    ldr x30, [sp, #48] // 8-byte Reload
 ; CHECK-SD-NEXT:    csinv w8, w19, wzr, le
 ; CHECK-SD-NEXT:    fmov s0, w8
 ; CHECK-SD-NEXT:    mov v0.s[1], w20
-; CHECK-SD-NEXT:    ldp x20, x19, [sp, #80] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    ldp x20, x19, [sp, #64] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-SD-NEXT:    add sp, sp, #96
+; CHECK-SD-NEXT:    add sp, sp, #80
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_unsigned_v2f128_v2i32:
@@ -630,52 +627,50 @@ define <2 x i32> @test_unsigned_v2f128_v2i32(<2 x fp128> %f) {
 define <3 x i32> @test_unsigned_v3f128_v3i32(<3 x fp128> %f) {
 ; CHECK-SD-LABEL: test_unsigned_v3f128_v3i32:
 ; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    sub sp, sp, #112
-; CHECK-SD-NEXT:    stp x30, x21, [sp, #80] // 16-byte Folded Spill
-; CHECK-SD-NEXT:    stp x20, x19, [sp, #96] // 16-byte Folded Spill
-; CHECK-SD-NEXT:    .cfi_def_cfa_offset 112
+; CHECK-SD-NEXT:    sub sp, sp, #96
+; CHECK-SD-NEXT:    stp x30, x21, [sp, #64] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    stp x20, x19, [sp, #80] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    .cfi_def_cfa_offset 96
 ; CHECK-SD-NEXT:    .cfi_offset w19, -8
 ; CHECK-SD-NEXT:    .cfi_offset w20, -16
 ; CHECK-SD-NEXT:    .cfi_offset w21, -24
 ; CHECK-SD-NEXT:    .cfi_offset w30, -32
-; CHECK-SD-NEXT:    stp q1, q0, [sp, #48] // 32-byte Folded Spill
-; CHECK-SD-NEXT:    adrp x8, .LCPI16_0
+; CHECK-SD-NEXT:    stp q1, q0, [sp, #32] // 32-byte Folded Spill
+; CHECK-SD-NEXT:    movi d1, #0000000000000000
 ; CHECK-SD-NEXT:    mov v0.16b, v2.16b
-; CHECK-SD-NEXT:    ldr q1, [x8, :lo12:.LCPI16_0]
 ; CHECK-SD-NEXT:    str q2, [sp] // 16-byte Spill
-; CHECK-SD-NEXT:    str q1, [sp, #32] // 16-byte Spill
 ; CHECK-SD-NEXT:    bl __getf2
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Reload
 ; CHECK-SD-NEXT:    mov w19, w0
 ; CHECK-SD-NEXT:    bl __fixunstfsi
-; CHECK-SD-NEXT:    adrp x8, .LCPI16_1
+; CHECK-SD-NEXT:    adrp x8, .LCPI16_0
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Reload
 ; CHECK-SD-NEXT:    cmp w19, #0
-; CHECK-SD-NEXT:    ldr q1, [x8, :lo12:.LCPI16_1]
+; CHECK-SD-NEXT:    ldr q1, [x8, :lo12:.LCPI16_0]
 ; CHECK-SD-NEXT:    csel w19, wzr, w0, mi
 ; CHECK-SD-NEXT:    str q1, [sp, #16] // 16-byte Spill
 ; CHECK-SD-NEXT:    bl __gttf2
-; CHECK-SD-NEXT:    ldp q1, q0, [sp, #32] // 32-byte Folded Reload
+; CHECK-SD-NEXT:    movi d1, #0000000000000000
+; CHECK-SD-NEXT:    ldr q0, [sp, #32] // 16-byte Reload
 ; CHECK-SD-NEXT:    cmp w0, #0
 ; CHECK-SD-NEXT:    csinv w20, w19, wzr, le
 ; CHECK-SD-NEXT:    bl __getf2
-; CHECK-SD-NEXT:    ldr q0, [sp, #48] // 16-byte Reload
+; CHECK-SD-NEXT:    ldr q0, [sp, #32] // 16-byte Reload
 ; CHECK-SD-NEXT:    mov w19, w0
 ; CHECK-SD-NEXT:    bl __fixunstfsi
-; CHECK-SD-NEXT:    ldr q0, [sp, #48] // 16-byte Reload
-; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Reload
+; CHECK-SD-NEXT:    ldp q1, q0, [sp, #16] // 32-byte Folded Reload
 ; CHECK-SD-NEXT:    cmp w19, #0
 ; CHECK-SD-NEXT:    csel w19, wzr, w0, mi
 ; CHECK-SD-NEXT:    bl __gttf2
-; CHECK-SD-NEXT:    ldr q0, [sp, #64] // 16-byte Reload
-; CHECK-SD-NEXT:    ldr q1, [sp, #32] // 16-byte Reload
+; CHECK-SD-NEXT:    movi d1, #0000000000000000
+; CHECK-SD-NEXT:    ldr q0, [sp, #48] // 16-byte Reload
 ; CHECK-SD-NEXT:    cmp w0, #0
 ; CHECK-SD-NEXT:    csinv w21, w19, wzr, le
 ; CHECK-SD-NEXT:    bl __getf2
-; CHECK-SD-NEXT:    ldr q0, [sp, #64] // 16-byte Reload
+; CHECK-SD-NEXT:    ldr q0, [sp, #48] // 16-byte Reload
 ; CHECK-SD-NEXT:    mov w19, w0
 ; CHECK-SD-NEXT:    bl __fixunstfsi
-; CHECK-SD-NEXT:    ldr q0, [sp, #64] // 16-byte Reload
+; CHECK-SD-NEXT:    ldr q0, [sp, #48] // 16-byte Reload
 ; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Reload
 ; CHECK-SD-NEXT:    cmp w19, #0
 ; CHECK-SD-NEXT:    csel w19, wzr, w0, mi
@@ -684,10 +679,10 @@ define <3 x i32> @test_unsigned_v3f128_v3i32(<3 x fp128> %f) {
 ; CHECK-SD-NEXT:    csinv w8, w19, wzr, le
 ; CHECK-SD-NEXT:    fmov s0, w8
 ; CHECK-SD-NEXT:    mov v0.s[1], w21
-; CHECK-SD-NEXT:    ldp x30, x21, [sp, #80] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    ldp x30, x21, [sp, #64] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v0.s[2], w20
-; CHECK-SD-NEXT:    ldp x20, x19, [sp, #96] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    add sp, sp, #112
+; CHECK-SD-NEXT:    ldp x20, x19, [sp, #80] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    add sp, sp, #96
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_unsigned_v3f128_v3i32:
@@ -780,34 +775,32 @@ define <3 x i32> @test_unsigned_v3f128_v3i32(<3 x fp128> %f) {
 define <4 x i32> @test_unsigned_v4f128_v4i32(<4 x fp128> %f) {
 ; CHECK-SD-LABEL: test_unsigned_v4f128_v4i32:
 ; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    sub sp, sp, #128
-; CHECK-SD-NEXT:    str x30, [sp, #96] // 8-byte Spill
-; CHECK-SD-NEXT:    stp x20, x19, [sp, #112] // 16-byte Folded Spill
-; CHECK-SD-NEXT:    .cfi_def_cfa_offset 128
+; CHECK-SD-NEXT:    sub sp, sp, #112
+; CHECK-SD-NEXT:    str x30, [sp, #80] // 8-byte Spill
+; CHECK-SD-NEXT:    stp x20, x19, [sp, #96] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    .cfi_def_cfa_offset 112
 ; CHECK-SD-NEXT:    .cfi_offset w19, -8
 ; CHECK-SD-NEXT:    .cfi_offset w20, -16
 ; CHECK-SD-NEXT:    .cfi_offset w30, -32
 ; CHECK-SD-NEXT:    stp q0, q2, [sp, #16] // 32-byte Folded Spill
 ; CHECK-SD-NEXT:    mov v2.16b, v1.16b
-; CHECK-SD-NEXT:    adrp x8, .LCPI17_0
 ; CHECK-SD-NEXT:    str q1, [sp] // 16-byte Spill
-; CHECK-SD-NEXT:    ldr q1, [x8, :lo12:.LCPI17_0]
-; CHECK-SD-NEXT:    str q3, [sp, #80] // 16-byte Spill
+; CHECK-SD-NEXT:    movi d1, #0000000000000000
+; CHECK-SD-NEXT:    str q3, [sp, #64] // 16-byte Spill
 ; CHECK-SD-NEXT:    mov v0.16b, v2.16b
-; CHECK-SD-NEXT:    str q1, [sp, #64] // 16-byte Spill
 ; CHECK-SD-NEXT:    bl __getf2
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Reload
 ; CHECK-SD-NEXT:    mov w19, w0
 ; CHECK-SD-NEXT:    bl __fixunstfsi
-; CHECK-SD-NEXT:    adrp x8, .LCPI17_1
+; CHECK-SD-NEXT:    adrp x8, .LCPI17_0
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Reload
 ; CHECK-SD-NEXT:    cmp w19, #0
-; CHECK-SD-NEXT:    ldr q1, [x8, :lo12:.LCPI17_1]
+; CHECK-SD-NEXT:    ldr q1, [x8, :lo12:.LCPI17_0]
 ; CHECK-SD-NEXT:    csel w19, wzr, w0, mi
 ; CHECK-SD-NEXT:    str q1, [sp, #48] // 16-byte Spill
 ; CHECK-SD-NEXT:    bl __gttf2
+; CHECK-SD-NEXT:    movi d1, #0000000000000000
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Reload
-; CHECK-SD-NEXT:    ldr q1, [sp, #64] // 16-byte Reload
 ; CHECK-SD-NEXT:    cmp w0, #0
 ; CHECK-SD-NEXT:    csinv w20, w19, wzr, le
 ; CHECK-SD-NEXT:    bl __getf2
@@ -820,7 +813,7 @@ define <4 x i32> @test_unsigned_v4f128_v4i32(<4 x fp128> %f) {
 ; CHECK-SD-NEXT:    csel w19, wzr, w0, mi
 ; CHECK-SD-NEXT:    bl __gttf2
 ; CHECK-SD-NEXT:    cmp w0, #0
-; CHECK-SD-NEXT:    ldr q1, [sp, #64] // 16-byte Reload
+; CHECK-SD-NEXT:    movi d1, #0000000000000000
 ; CHECK-SD-NEXT:    csinv w8, w19, wzr, le
 ; CHECK-SD-NEXT:    fmov s0, w8
 ; CHECK-SD-NEXT:    mov v0.s[1], w20
@@ -836,26 +829,26 @@ define <4 x i32> @test_unsigned_v4f128_v4i32(<4 x fp128> %f) {
 ; CHECK-SD-NEXT:    bl __gttf2
 ; CHECK-SD-NEXT:    cmp w0, #0
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Reload
+; CHECK-SD-NEXT:    movi d1, #0000000000000000
 ; CHECK-SD-NEXT:    csinv w8, w19, wzr, le
 ; CHECK-SD-NEXT:    mov v0.s[2], w8
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Spill
-; CHECK-SD-NEXT:    ldp q1, q0, [sp, #64] // 32-byte Folded Reload
+; CHECK-SD-NEXT:    ldr q0, [sp, #64] // 16-byte Reload
 ; CHECK-SD-NEXT:    bl __getf2
-; CHECK-SD-NEXT:    ldr q0, [sp, #80] // 16-byte Reload
+; CHECK-SD-NEXT:    ldr q0, [sp, #64] // 16-byte Reload
 ; CHECK-SD-NEXT:    mov w19, w0
 ; CHECK-SD-NEXT:    bl __fixunstfsi
-; CHECK-SD-NEXT:    ldr q0, [sp, #80] // 16-byte Reload
-; CHECK-SD-NEXT:    ldr q1, [sp, #48] // 16-byte Reload
+; CHECK-SD-NEXT:    ldp q1, q0, [sp, #48] // 32-byte Folded Reload
 ; CHECK-SD-NEXT:    cmp w19, #0
 ; CHECK-SD-NEXT:    csel w19, wzr, w0, mi
 ; CHECK-SD-NEXT:    bl __gttf2
 ; CHECK-SD-NEXT:    cmp w0, #0
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Reload
-; CHECK-SD-NEXT:    ldr x30, [sp, #96] // 8-byte Reload
+; CHECK-SD-NEXT:    ldr x30, [sp, #80] // 8-byte Reload
 ; CHECK-SD-NEXT:    csinv w8, w19, wzr, le
-; CHECK-SD-NEXT:    ldp x20, x19, [sp, #112] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    ldp x20, x19, [sp, #96] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v0.s[3], w8
-; CHECK-SD-NEXT:    add sp, sp, #128
+; CHECK-SD-NEXT:    add sp, sp, #112
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_unsigned_v4f128_v4i32:
@@ -4076,50 +4069,48 @@ define <16 x i16> @test_unsigned_v16f64_v16i16(<16 x double> %f) {
 define <2 x i64> @test_signed_v2f128_v2i64(<2 x fp128> %f) {
 ; CHECK-SD-LABEL: test_signed_v2f128_v2i64:
 ; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    sub sp, sp, #80
-; CHECK-SD-NEXT:    stp x30, x19, [sp, #64] // 16-byte Folded Spill
-; CHECK-SD-NEXT:    .cfi_def_cfa_offset 80
+; CHECK-SD-NEXT:    sub sp, sp, #64
+; CHECK-SD-NEXT:    stp x30, x19, [sp, #48] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    .cfi_def_cfa_offset 64
 ; CHECK-SD-NEXT:    .cfi_offset w19, -8
 ; CHECK-SD-NEXT:    .cfi_offset w30, -16
 ; CHECK-SD-NEXT:    mov v2.16b, v1.16b
-; CHECK-SD-NEXT:    stp q1, q0, [sp, #32] // 32-byte Folded Spill
-; CHECK-SD-NEXT:    adrp x8, .LCPI86_0
-; CHECK-SD-NEXT:    ldr q1, [x8, :lo12:.LCPI86_0]
+; CHECK-SD-NEXT:    stp q1, q0, [sp, #16] // 32-byte Folded Spill
+; CHECK-SD-NEXT:    movi d1, #0000000000000000
 ; CHECK-SD-NEXT:    mov v0.16b, v2.16b
-; CHECK-SD-NEXT:    str q1, [sp, #16] // 16-byte Spill
 ; CHECK-SD-NEXT:    bl __getf2
-; CHECK-SD-NEXT:    ldr q0, [sp, #32] // 16-byte Reload
+; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Reload
 ; CHECK-SD-NEXT:    mov w19, w0
 ; CHECK-SD-NEXT:    bl __fixunstfdi
-; CHECK-SD-NEXT:    adrp x8, .LCPI86_1
-; CHECK-SD-NEXT:    ldr q0, [sp, #32] // 16-byte Reload
+; CHECK-SD-NEXT:    adrp x8, .LCPI86_0
+; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Reload
 ; CHECK-SD-NEXT:    cmp w19, #0
-; CHECK-SD-NEXT:    ldr q1, [x8, :lo12:.LCPI86_1]
+; CHECK-SD-NEXT:    ldr q1, [x8, :lo12:.LCPI86_0]
 ; CHECK-SD-NEXT:    csel x19, xzr, x0, mi
 ; CHECK-SD-NEXT:    str q1, [sp] // 16-byte Spill
 ; CHECK-SD-NEXT:    bl __gttf2
 ; CHECK-SD-NEXT:    cmp w0, #0
-; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Reload
+; CHECK-SD-NEXT:    movi d1, #0000000000000000
 ; CHECK-SD-NEXT:    csinv x8, x19, xzr, le
 ; CHECK-SD-NEXT:    fmov d0, x8
-; CHECK-SD-NEXT:    str q0, [sp, #32] // 16-byte Spill
-; CHECK-SD-NEXT:    ldr q0, [sp, #48] // 16-byte Reload
+; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Spill
+; CHECK-SD-NEXT:    ldr q0, [sp, #32] // 16-byte Reload
 ; CHECK-SD-NEXT:    bl __getf2
-; CHECK-SD-NEXT:    ldr q0, [sp, #48] // 16-byte Reload
+; CHECK-SD-NEXT:    ldr q0, [sp, #32] // 16-byte Reload
 ; CHECK-SD-NEXT:    mov w19, w0
 ; CHECK-SD-NEXT:    bl __fixunstfdi
-; CHECK-SD-NEXT:    ldr q0, [sp, #48] // 16-byte Reload
+; CHECK-SD-NEXT:    ldr q0, [sp, #32] // 16-byte Reload
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Reload
 ; CHECK-SD-NEXT:    cmp w19, #0
 ; CHECK-SD-NEXT:    csel x19, xzr, x0, mi
 ; CHECK-SD-NEXT:    bl __gttf2
 ; CHECK-SD-NEXT:    cmp w0, #0
-; CHECK-SD-NEXT:    ldr q1, [sp, #32] // 16-byte Reload
+; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Reload
 ; CHECK-SD-NEXT:    csinv x8, x19, xzr, le
-; CHECK-SD-NEXT:    ldp x30, x19, [sp, #64] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    ldp x30, x19, [sp, #48] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    fmov d0, x8
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
-; CHECK-SD-NEXT:    add sp, sp, #80
+; CHECK-SD-NEXT:    add sp, sp, #64
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_signed_v2f128_v2i64:
diff --git a/llvm/test/CodeGen/AArch64/frem-power2.ll b/llvm/test/CodeGen/AArch64/frem-power2.ll
index 548079dc5c4aa..019c92e8e1e13 100644
--- a/llvm/test/CodeGen/AArch64/frem-power2.ll
+++ b/llvm/test/CodeGen/AArch64/frem-power2.ll
@@ -419,11 +419,11 @@ define <4 x float> @frem2_vec(<4 x float> %x) {
 ; CHECK-SD-LABEL: frem2_vec:
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    movi v1.4s, #63, lsl #24
-; CHECK-SD-NEXT:    movi v2.4s, #64, lsl #24
+; CHECK-SD-NEXT:    movi v2.4s, #192, lsl #24
 ; CHECK-SD-NEXT:    mov v3.16b, v0.16b
 ; CHECK-SD-NEXT:    fmul v1.4s, v0.4s, v1.4s
 ; CHECK-SD-NEXT:    frintz v1.4s, v1.4s
-; CHECK-SD-NEXT:    fmls v3.4s, v2.4s, v1.4s
+; CHECK-SD-NEXT:    fmla v3.4s, v2.4s, v1.4s
 ; CHECK-SD-NEXT:    mvni v1.4s, #128, lsl #24
 ; CHECK-SD-NEXT:    bit v0.16b, v3.16b, v1.16b
 ; CHECK-SD-NEXT:    ret
diff --git a/llvm/test/CodeGen/AArch64/half-precision-signof-no-assert.ll b/llvm/test/CodeGen/AArch64/half-precision-signof-no-assert.ll
index 4bf2b8c7e6a57..b46be46d2b95f 100644
--- a/llvm/test/CodeGen/AArch64/half-precision-signof-no-assert.ll
+++ b/llvm/test/CodeGen/AArch64/half-precision-signof-no-assert.ll
@@ -10,12 +10,11 @@ define ptr @fn(ptr %in, ptr %out) {
 ; CHECK-SD:       // %bb.0: // %fn
 ; CHECK-SD-NEXT:    ldr d1, [x0]
 ; CHECK-SD-NEXT:    movi v0.4h, #60, lsl #8
-; CHECK-SD-NEXT:    adrp x8, .LCPI0_0
 ; CHECK-SD-NEXT:    fcvtl v1.4s, v1.4h
 ; CHECK-SD-NEXT:    fcmgt v2.4s, v1.4s, #0.0
 ; CHECK-SD-NEXT:    fcmlt v1.4s, v1.4s, #0.0
 ; CHECK-SD-NEXT:    orr v1.16b, v1.16b, v2.16b
-; CHECK-SD-NEXT:    ldr h2, [x8, :lo12:.LCPI0_0]
+; CHECK-SD-NEXT:    movi v2.4h, #60, lsl #8
 ; CHECK-SD-NEXT:    xtn v1.4h, v1.4s
 ; CHECK-SD-NEXT:    and v0.8b, v1.8b, v0.8b
 ; CHECK-SD-NEXT:    movi d1, #0000000000000000
diff --git a/llvm/test/CodeGen/AArch64/isinf.ll b/llvm/test/CodeGen/AArch64/isinf.ll
index f3283d2cf7ec2..2f79e083c5787 100644
--- a/llvm/test/CodeGen/AArch64/isinf.ll
+++ b/llvm/test/CodeGen/AArch64/isinf.ll
@@ -6,9 +6,8 @@
 define i32 @replace_isinf_call_f16(half %x) {
 ; CHECK-SD-LABEL: replace_isinf_call_f16:
 ; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    mov w8, #31744 // =0x7c00
+; CHECK-SD-NEXT:    movi v1.4h, #124, lsl #8
 ; CHECK-SD-NEXT:    fabs h0, h0
-; CHECK-SD-NEXT:    fmov h1, w8
 ; CHECK-SD-NEXT:    fcmp h0, h1
 ; CHECK-SD-NEXT:    cset w0, eq
 ; CHECK-SD-NEXT:    ret
diff --git a/llvm/test/CodeGen/AArch64/known-never-nan.ll b/llvm/test/CodeGen/AArch64/known-never-nan.ll
index bd080e29890e2..2e8e5d1e4cee3 100644
--- a/llvm/test/CodeGen/AArch64/known-never-nan.ll
+++ b/llvm/test/CodeGen/AArch64/known-never-nan.ll
@@ -31,11 +31,10 @@ define float @not_fmaxnm_maybe_nan(i32 %i1, i32 %i2) #0 {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ucvtf s0, w0
 ; CHECK-NEXT:    ucvtf s1, w1
-; CHECK-NEXT:    mov w8, #-8388608 // =0xff800000
-; CHECK-NEXT:    fmov s2, #17.00000000
-; CHECK-NEXT:    fmov s3, w8
-; CHECK-NEXT:    fmul s0, s0, s3
-; CHECK-NEXT:    fadd s1, s1, s2
+; CHECK-NEXT:    mvni v2.2s, #127, msl #16
+; CHECK-NEXT:    fmov s3, #17.00000000
+; CHECK-NEXT:    fmul s0, s0, s2
+; CHECK-NEXT:    fadd s1, s1, s3
 ; CHECK-NEXT:    fcmp s0, s1
 ; CHECK-NEXT:    fcsel s0, s0, s1, pl
 ; CHECK-NEXT:    ret
diff --git a/llvm/test/CodeGen/AArch64/movi64_sve.ll b/llvm/test/CodeGen/AArch64/movi64_sve.ll
index 63b6a2a957b63..ea1a3d79e9e0b 100644
--- a/llvm/test/CodeGen/AArch64/movi64_sve.ll
+++ b/llvm/test/CodeGen/AArch64/movi64_sve.ll
@@ -11,22 +11,16 @@ define <2 x i64> @movi_1_v2i64() {
 ; CHECK-NEON-SD-NEXT:    dup v0.2d, x8
 ; CHECK-NEON-SD-NEXT:    ret
 ;
-; CHECK-SVE-SD-LABEL: movi_1_v2i64:
-; CHECK-SVE-SD:       // %bb.0:
-; CHECK-SVE-SD-NEXT:    mov z0.d, #1 // =0x1
-; CHECK-SVE-SD-NEXT:    ret
+; CHECK-SVE-LABEL: movi_1_v2i64:
+; CHECK-SVE:       // %bb.0:
+; CHECK-SVE-NEXT:    mov z0.d, #1 // =0x1
+; CHECK-SVE-NEXT:    ret
 ;
 ; CHECK-NEON-GI-LABEL: movi_1_v2i64:
 ; CHECK-NEON-GI:       // %bb.0:
 ; CHECK-NEON-GI-NEXT:    adrp x8, .LCPI0_0
 ; CHECK-NEON-GI-NEXT:    ldr q0, [x8, :lo12:.LCPI0_0]
 ; CHECK-NEON-GI-NEXT:    ret
-;
-; CHECK-SVE-GI-LABEL: movi_1_v2i64:
-; CHECK-SVE-GI:       // %bb.0:
-; CHECK-SVE-GI-NEXT:    adrp x8, .LCPI0_0
-; CHECK-SVE-GI-NEXT:    ldr q0, [x8, :lo12:.LCPI0_0]
-; CHECK-SVE-GI-NEXT:    ret
   ret <2 x i64> splat (i64 1)
 }
 
@@ -37,22 +31,16 @@ define <2 x i64> @movi_127_v2i64() {
 ; CHECK-NEON-SD-NEXT:    dup v0.2d, x8
 ; CHECK-NEON-SD-NEXT:    ret
 ;
-; CHECK-SVE-SD-LABEL: movi_127_v2i64:
-; CHECK-SVE-SD:       // %bb.0:
-; CHECK-SVE-SD-NEXT:    mov z0.d, #127 // =0x7f
-; CHECK-SVE-SD-NEXT:    ret
+; CHECK-SVE-LABEL: movi_127_v2i64:
+; CHECK-SVE:       // %bb.0:
+; CHECK-SVE-NEXT:    mov z0.d, #127 // =0x7f
+; CHECK-SVE-NEXT:    ret
 ;
 ; CHECK-NEON-GI-LABEL: movi_127_v2i64:
 ; CHECK-NEON-GI:       // %bb.0:
 ; CHECK-NEON-GI-NEXT:    adrp x8, .LCPI1_0
 ; CHECK-NEON-GI-NEXT:    ldr q0, [x8, :lo12:.LCPI1_0]
 ; CHECK-NEON-GI-NEXT:    ret
-;
-; CHECK-SVE-GI-LABEL: movi_127_v2i64:
-; CHECK-SVE-GI:       // %bb.0:
-; CHECK-SVE-GI-NEXT:    adrp x8, .LCPI1_0
-; CHECK-SVE-GI-NEXT:    ldr q0, [x8, :lo12:.LCPI1_0]
-; CHECK-SVE-GI-NEXT:    ret
   ret <2 x i64> splat (i64 127)
 }
 
@@ -63,22 +51,16 @@ define <2 x i64> @movi_m128_v2i64() {
 ; CHECK-NEON-SD-NEXT:    dup v0.2d, x8
 ; CHECK-NEON-SD-NEXT:    ret
 ;
-; CHECK-SVE-SD-LABEL: movi_m128_v2i64:
-; CHECK-SVE-SD:       // %bb.0:
-; CHECK-SVE-SD-NEXT:    mov z0.d, #-128 // =0xffffffffffffff80
-; CHECK-SVE-SD-NEXT:    ret
+; CHECK-SVE-LABEL: movi_m128_v2i64:
+; CHECK-SVE:       // %bb.0:
+; CHECK-SVE-NEXT:    mov z0.d, #-128 // =0xffffffffffffff80
+; CHECK-SVE-NEXT:    ret
 ;
 ; CHECK-NEON-GI-LABEL: movi_m128_v2i64:
 ; CHECK-NEON-GI:       // %bb.0:
 ; CHECK-NEON-GI-NEXT:    adrp x8, .LCPI2_0
 ; CHECK-NEON-GI-NEXT:    ldr q0, [x8, :lo12:.LCPI2_0]
 ; CHECK-NEON-GI-NEXT:    ret
-;
-; CHECK-SVE-GI-LABEL: movi_m128_v2i64:
-; CHECK-SVE-GI:       // %bb.0:
-; CHECK-SVE-GI-NEXT:    adrp x8, .LCPI2_0
-; CHECK-SVE-GI-NEXT:    ldr q0, [x8, :lo12:.LCPI2_0]
-; CHECK-SVE-GI-NEXT:    ret
   ret <2 x i64> splat (i64 -128)
 }
 
@@ -89,22 +71,16 @@ define <2 x i64> @movi_256_v2i64() {
 ; CHECK-NEON-SD-NEXT:    dup v0.2d, x8
 ; CHECK-NEON-SD-NEXT:    ret
 ;
-; CHECK-SVE-SD-LABEL: movi_256_v2i64:
-; CHECK-SVE-SD:       // %bb.0:
-; CHECK-SVE-SD-NEXT:    mov z0.d, #256 // =0x100
-; CHECK-SVE-SD-NEXT:    ret
+; CHECK-SVE-LABEL: movi_256_v2i64:
+; CHECK-SVE:       // %bb.0:
+; CHECK-SVE-NEXT:    mov z0.d, #256 // =0x100
+; CHECK-SVE-NEXT:    ret
 ;
 ; CHECK-NEON-GI-LABEL: movi_256_v2i64:
 ; CHECK-NEON-GI:       // %bb.0:
 ; CHECK-NEON-GI-NEXT:    adrp x8, .LCPI3_0
 ; CHECK-NEON-GI-NEXT:    ldr q0, [x8, :lo12:.LCPI3_0]
 ; CHECK-NEON-GI-NEXT:    ret
-;
-; CHECK-SVE-GI-LABEL: movi_256_v2i64:
-; CHECK-SVE-GI:       // %bb.0:
-; CHECK-SVE-GI-NEXT:    adrp x8, .LCPI3_0
-; CHECK-SVE-GI-NEXT:    ldr q0, [x8, :lo12:.LCPI3_0]
-; CHECK-SVE-GI-NEXT:    ret
   ret <2 x i64> splat (i64 256)
 }
 
@@ -115,22 +91,16 @@ define <2 x i64> @movi_32512_v2i64() {
 ; CHECK-NEON-SD-NEXT:    dup v0.2d, x8
 ; CHECK-NEON-SD-NEXT:    ret
 ;
-; CHECK-SVE-SD-LABEL: movi_32512_v2i64:
-; CHECK-SVE-SD:       // %bb.0:
-; CHECK-SVE-SD-NEXT:    mov z0.d, #32512 // =0x7f00
-; CHECK-SVE-SD-NEXT:    ret
+; CHECK-SVE-LABEL: movi_32512_v2i64:
+; CHECK-SVE:       // %bb.0:
+; CHECK-SVE-NEXT:    mov z0.d, #32512 // =0x7f00
+; CHECK-SVE-NEXT:    ret
 ;
 ; CHECK-NEON-GI-LABEL: movi_32512_v2i64:
 ; CHECK-NEON-GI:       // %bb.0:
 ; CHECK-NEON-GI-NEXT:    adrp x8, .LCPI4_0
 ; CHECK-NEON-GI-NEXT:    ldr q0, [x8, :lo12:.LCPI4_0]
 ; CHECK-NEON-GI-NEXT:    ret
-;
-; CHECK-SVE-GI-LABEL: movi_32512_v2i64:
-; CHECK-SVE-GI:       // %bb.0:
-; CHECK-SVE-GI-NEXT:    adrp x8, .LCPI4_0
-; CHECK-SVE-GI-NEXT:    ldr q0, [x8, :lo12:.LCPI4_0]
-; CHECK-SVE-GI-NEXT:    ret
   ret <2 x i64> splat (i64 32512)
 }
 
@@ -141,22 +111,16 @@ define <2 x i64> @movi_m32768_v2i64() {
 ; CHECK-NEON-SD-NEXT:    dup v0.2d, x8
 ; CHECK-NEON-SD-NEXT:    ret
 ;
-; CHECK-SVE-SD-LABEL: movi_m32768_v2i64:
-; CHECK-SVE-SD:       // %bb.0:
-; CHECK-SVE-SD-NEXT:    mov z0.d, #-32768 // =0xffffffffffff8000
-; CHECK-SVE-SD-NEXT:    ret
+; CHECK-SVE-LABEL: movi_m32768_v2i64:
+; CHECK-SVE:       // %bb.0:
+; CHECK-SVE-NEXT:    mov z0.d, #-32768 // =0xffffffffffff8000
+; CHECK-SVE-NEXT:    ret
 ;
 ; CHECK-NEON-GI-LABEL: movi_m32768_v2i64:
 ; CHECK-NEON-GI:       // %bb.0:
 ; CHECK-NEON-GI-NEXT:    adrp x8, .LCPI5_0
 ; CHECK-NEON-GI-NEXT:    ldr q0, [x8, :lo12:.LCPI5_0]
 ; CHECK-NEON-GI-NEXT:    ret
-;
-; CHECK-SVE-GI-LABEL: movi_m32768_v2i64:
-; CHECK-SVE-GI:       // %bb.0:
-; CHECK-SVE-GI-NEXT:    adrp x8, .LCPI5_0
-; CHECK-SVE-GI-NEXT:    ldr q0, [x8, :lo12:.LCPI5_0]
-; CHECK-SVE-GI-NEXT:    ret
   ret <2 x i64> splat (i64 -32768)
 }
 
@@ -169,16 +133,10 @@ define <4 x i32> @movi_v4i32_1() {
 ; CHECK-NEON-NEXT:    ldr q0, [x8, :lo12:.LCPI6_0]
 ; CHECK-NEON-NEXT:    ret
 ;
-; CHECK-SVE-SD-LABEL: movi_v4i32_1:
-; CHECK-SVE-SD:       // %bb.0:
-; CHECK-SVE-SD-NEXT:    mov z0.d, #127 // =0x7f
-; CHECK-SVE-SD-NEXT:    ret
-;
-; CHECK-SVE-GI-LABEL: movi_v4i32_1:
-; CHECK-SVE-GI:       // %bb.0:
-; CHECK-SVE-GI-NEXT:    adrp x8, .LCPI6_0
-; CHECK-SVE-GI-NEXT:    ldr q0, [x8, :lo12:.LCPI6_0]
-; CHECK-SVE-GI-NEXT:    ret
+; CHECK-SVE-LABEL: movi_v4i32_1:
+; CHECK-SVE:       // %bb.0:
+; CHECK-SVE-NEXT:    mov z0.d, #127 // =0x7f
+; CHECK-SVE-NEXT:    ret
   ret <4 x i32> <i32 127, i32 0, i32 127, i32 0>
 }
 
@@ -189,16 +147,10 @@ define <4 x i32> @movi_v4i32_2() {
 ; CHECK-NEON-NEXT:    ldr q0, [x8, :lo12:.LCPI7_0]
 ; CHECK-NEON-NEXT:    ret
 ;
-; CHECK-SVE-SD-LABEL: movi_v4i32_2:
-; CHECK-SVE-SD:       // %bb.0:
-; CHECK-SVE-SD-NEXT:    mov z0.d, #32512 // =0x7f00
-; CHECK-SVE-SD-NEXT:    ret
-;
-; CHECK-SVE-GI-LABEL: movi_v4i32_2:
-; CHECK-SVE-GI:       // %bb.0:
-; CHECK-SVE-GI-NEXT:    adrp x8, .LCPI7_0
-; CHECK-SVE-GI-NEXT:    ldr q0, [x8, :lo12:.LCPI7_0]
-; CHECK-SVE-GI-NEXT:    ret
+; CHECK-SVE-LABEL: movi_v4i32_2:
+; CHECK-SVE:       // %bb.0:
+; CHECK-SVE-NEXT:    mov z0.d, #32512 // =0x7f00
+; CHECK-SVE-NEXT:    ret
   ret <4 x i32> <i32 32512, i32 0, i32 32512, i32 0>
 }
 
@@ -209,22 +161,16 @@ define <4 x i32> @movi_v4i32_4092() {
 ; CHECK-NEON-SD-NEXT:    dup v0.4s, w8
 ; CHECK-NEON-SD-NEXT:    ret
 ;
-; CHECK-SVE-SD-LABEL: movi_v4i32_4092:
-; CHECK-SVE-SD:       // %bb.0:
-; CHECK-SVE-SD-NEXT:    mov z0.s, #4092 // =0xffc
-; CHECK-SVE-SD-NEXT:    ret
+; CHECK-SVE-LABEL: movi_v4i32_4092:
+; CHECK-SVE:       // %bb.0:
+; CHECK-SVE-NEXT:    mov z0.s, #4092 // =0xffc
+; CHECK-SVE-NEXT:    ret
 ;
 ; CHECK-NEON-GI-LABEL: movi_v4i32_4092:
 ; CHECK-NEON-GI:       // %bb.0:
 ; CHECK-NEON-GI-NEXT:    adrp x8, .LCPI8_0
 ; CHECK-NEON-GI-NEXT:    ldr q0, [x8, :lo12:.LCPI8_0]
 ; CHECK-NEON-GI-NEXT:    ret
-;
-; CHECK-SVE-GI-LABEL: movi_v4i32_4092:
-; CHECK-SVE-GI:       // %bb.0:
-; CHECK-SVE-GI-NEXT:    adrp x8, .LCPI8_0
-; CHECK-SVE-GI-NEXT:    ldr q0, [x8, :lo12:.LCPI8_0]
-; CHECK-SVE-GI-NEXT:    ret
   ret <4 x i32> splat (i32 4092)
 }
 
@@ -235,16 +181,10 @@ define <8 x i16> @movi_v8i16_1() {
 ; CHECK-NEON-NEXT:    ldr q0, [x8, :lo12:.LCPI9_0]
 ; CHECK-NEON-NEXT:    ret
 ;
-; CHECK-SVE-SD-LABEL: movi_v8i16_1:
-; CHECK-SVE-SD:       // %bb.0:
-; CHECK-SVE-SD-NEXT:    mov z0.d, #127 // =0x7f
-; CHECK-SVE-SD-NEXT:    ret
-;
-; CHECK-SVE-GI-LABEL: movi_v8i16_1:
-; CHECK-SVE-GI:       // %bb.0:
-; CHECK-SVE-GI-NEXT:    adrp x8, .LCPI9_0
-; CHECK-SVE-GI-NEXT:    ldr q0, [x8, :lo12:.LCPI9_0]
-; CHECK-SVE-GI-NEXT:    ret
+; CHECK-SVE-LABEL: movi_v8i16_1:
+; CHECK-SVE:       // %bb.0:
+; CHECK-SVE-NEXT:    mov z0.d, #127 // =0x7f
+; CHECK-SVE-NEXT:    ret
   ret <8 x i16> <i16 127, i16 0, i16 0, i16 0, i16 127, i16 0, i16 0, i16 0>
 }
 
@@ -255,16 +195,10 @@ define <8 x i16> @movi_v8i16_2() {
 ; CHECK-NEON-NEXT:    ldr q0, [x8, :lo12:.LCPI10_0]
 ; CHECK-NEON-NEXT:    ret
 ;
-; CHECK-SVE-SD-LABEL: movi_v8i16_2:
-; CHECK-SVE-SD:       // %bb.0:
-; CHECK-SVE-SD-NEXT:    mov z0.d, #32512 // =0x7f00
-; CHECK-SVE-SD-NEXT:    ret
-;
-; CHECK-SVE-GI-LABEL: movi_v8i16_2:
-; CHECK-SVE-GI:       // %bb.0:
-; CHECK-SVE-GI-NEXT:    adrp x8, .LCPI10_0
-; CHECK-SVE-GI-NEXT:    ldr q0, [x8, :lo12:.LCPI10_0]
-; CHECK-SVE-GI-NEXT:    ret
+; CHECK-SVE-LABEL: movi_v8i16_2:
+; CHECK-SVE:       // %bb.0:
+; CHECK-SVE-NEXT:    mov z0.d, #32512 // =0x7f00
+; CHECK-SVE-NEXT:    ret
   ret <8 x i16> <i16 32512, i16 0, i16 0, i16 0, i16 32512, i16 0, i16 0, i16 0>
 }
 
@@ -275,22 +209,16 @@ define <8 x i16> @movi_v8i16_510() {
 ; CHECK-NEON-SD-NEXT:    dup v0.8h, w8
 ; CHECK-NEON-SD-NEXT:    ret
 ;
-; CHECK-SVE-SD-LABEL: movi_v8i16_510:
-; CHECK-SVE-SD:       // %bb.0:
-; CHECK-SVE-SD-NEXT:    mov z0.h, #510 // =0x1fe
-; CHECK-SVE-SD-NEXT:    ret
+; CHECK-SVE-LABEL: movi_v8i16_510:
+; CHECK-SVE:       // %bb.0:
+; CHECK-SVE-NEXT:    mov z0.h, #510 // =0x1fe
+; CHECK-SVE-NEXT:    ret
 ;
 ; CHECK-NEON-GI-LABEL: movi_v8i16_510:
 ; CHECK-NEON-GI:       // %bb.0:
 ; CHECK-NEON-GI-NEXT:    adrp x8, .LCPI11_0
 ; CHECK-NEON-GI-NEXT:    ldr q0, [x8, :lo12:.LCPI11_0]
 ; CHECK-NEON-GI-NEXT:    ret
-;
-; CHECK-SVE-GI-LABEL: movi_v8i16_510:
-; CHECK-SVE-GI:       // %bb.0:
-; CHECK-SVE-GI-NEXT:    adrp x8, .LCPI11_0
-; CHECK-SVE-GI-NEXT:    ldr q0, [x8, :lo12:.LCPI11_0]
-; CHECK-SVE-GI-NEXT:    ret
   ret <8 x i16> splat (i16 510)
 }
 
@@ -301,16 +229,10 @@ define <16 x i8> @movi_v16i8_1() {
 ; CHECK-NEON-NEXT:    ldr q0, [x8, :lo12:.LCPI12_0]
 ; CHECK-NEON-NEXT:    ret
 ;
-; CHECK-SVE-SD-LABEL: movi_v16i8_1:
-; CHECK-SVE-SD:       // %bb.0:
-; CHECK-SVE-SD-NEXT:    mov z0.d, #127 // =0x7f
-; CHECK-SVE-SD-NEXT:    ret
-;
-; CHECK-SVE-GI-LABEL: movi_v16i8_1:
-; CHECK-SVE-GI:       // %bb.0:
-; CHECK-SVE-GI-NEXT:    adrp x8, .LCPI12_0
-; CHECK-SVE-GI-NEXT:    ldr q0, [x8, :lo12:.LCPI12_0]
-; CHECK-SVE-GI-NEXT:    ret
+; CHECK-SVE-LABEL: movi_v16i8_1:
+; CHECK-SVE:       // %bb.0:
+; CHECK-SVE-NEXT:    mov z0.d, #127 // =0x7f
+; CHECK-SVE-NEXT:    ret
   ret <16 x i8> <i8 127, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 127, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
 }
 
@@ -321,16 +243,10 @@ define <16 x i8> @movi_v16i8_2() {
 ; CHECK-NEON-NEXT:    ldr q0, [x8, :lo12:.LCPI13_0]
 ; CHECK-NEON-NEXT:    ret
 ;
-; CHECK-SVE-SD-LABEL: movi_v16i8_2:
-; CHECK-SVE-SD:       // %bb.0:
-; CHECK-SVE-SD-NEXT:    mov z0.d, #32512 // =0x7f00
-; CHECK-SVE-SD-NEXT:    ret
-;
-; CHECK-SVE-GI-LABEL: movi_v16i8_2:
-; CHECK-SVE-GI:       // %bb.0:
-; CHECK-SVE-GI-NEXT:    adrp x8, .LCPI13_0
-; CHECK-SVE-GI-NEXT:    ldr q0, [x8, :lo12:.LCPI13_0]
-; CHECK-SVE-GI-NEXT:    ret
+; CHECK-SVE-LABEL: movi_v16i8_2:
+; CHECK-SVE:       // %bb.0:
+; CHECK-SVE-NEXT:    mov z0.d, #32512 // =0x7f00
+; CHECK-SVE-NEXT:    ret
   ret <16 x i8> <i8 0, i8 127, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 127, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
 }
 
@@ -343,22 +259,16 @@ define <2 x i64> @movi_128_v2i64() {
 ; CHECK-NEON-SD-NEXT:    dup v0.2d, x8
 ; CHECK-NEON-SD-NEXT:    ret
 ;
-; CHECK-SVE-SD-LABEL: movi_128_v2i64:
-; CHECK-SVE-SD:       // %bb.0:
-; CHECK-SVE-SD-NEXT:    mov z0.d, #128 // =0x80
-; CHECK-SVE-SD-NEXT:    ret
+; CHECK-SVE-LABEL: movi_128_v2i64:
+; CHECK-SVE:       // %bb.0:
+; CHECK-SVE-NEXT:    mov z0.d, #128 // =0x80
+; CHECK-SVE-NEXT:    ret
 ;
 ; CHECK-NEON-GI-LABEL: movi_128_v2i64:
 ; CHECK-NEON-GI:       // %bb.0:
 ; CHECK-NEON-GI-NEXT:    adrp x8, .LCPI14_0
 ; CHECK-NEON-GI-NEXT:    ldr q0, [x8, :lo12:.LCPI14_0]
 ; CHECK-NEON-GI-NEXT:    ret
-;
-; CHECK-SVE-GI-LABEL: movi_128_v2i64:
-; CHECK-SVE-GI:       // %bb.0:
-; CHECK-SVE-GI-NEXT:    adrp x8, .LCPI14_0
-; CHECK-SVE-GI-NEXT:    ldr q0, [x8, :lo12:.LCPI14_0]
-; CHECK-SVE-GI-NEXT:    ret
   ret <2 x i64> splat (i64 128)
 }
 
@@ -369,22 +279,16 @@ define <2 x i64> @movi_m127_v2i64() {
 ; CHECK-NEON-SD-NEXT:    dup v0.2d, x8
 ; CHECK-NEON-SD-NEXT:    ret
 ;
-; CHECK-SVE-SD-LABEL: movi_m127_v2i64:
-; CHECK-SVE-SD:       // %bb.0:
-; CHECK-SVE-SD-NEXT:    mov z0.d, #-129 // =0xffffffffffffff7f
-; CHECK-SVE-SD-NEXT:    ret
+; CHECK-SVE-LABEL: movi_m127_v2i64:
+; CHECK-SVE:       // %bb.0:
+; CHECK-SVE-NEXT:    mov z0.d, #-129 // =0xffffffffffffff7f
+; CHECK-SVE-NEXT:    ret
 ;
 ; CHECK-NEON-GI-LABEL: movi_m127_v2i64:
 ; CHECK-NEON-GI:       // %bb.0:
 ; CHECK-NEON-GI-NEXT:    adrp x8, .LCPI15_0
 ; CHECK-NEON-GI-NEXT:    ldr q0, [x8, :lo12:.LCPI15_0]
 ; CHECK-NEON-GI-NEXT:    ret
-;
-; CHECK-SVE-GI-LABEL: movi_m127_v2i64:
-; CHECK-SVE-GI:       // %bb.0:
-; CHECK-SVE-GI-NEXT:    adrp x8, .LCPI15_0
-; CHECK-SVE-GI-NEXT:    ldr q0, [x8, :lo12:.LCPI15_0]
-; CHECK-SVE-GI-NEXT:    ret
   ret <2 x i64> splat (i64 -129)
 }
 
@@ -422,22 +326,16 @@ define <2 x i64> @movi_m32769_v2i64() {
 ; CHECK-NEON-SD-NEXT:    dup v0.2d, x8
 ; CHECK-NEON-SD-NEXT:    ret
 ;
-; CHECK-SVE-SD-LABEL: movi_m32769_v2i64:
-; CHECK-SVE-SD:       // %bb.0:
-; CHECK-SVE-SD-NEXT:    mov z0.d, #0xffffffffffff7fff
-; CHECK-SVE-SD-NEXT:    ret
+; CHECK-SVE-LABEL: movi_m32769_v2i64:
+; CHECK-SVE:       // %bb.0:
+; CHECK-SVE-NEXT:    mov z0.d, #0xffffffffffff7fff
+; CHECK-SVE-NEXT:    ret
 ;
 ; CHECK-NEON-GI-LABEL: movi_m32769_v2i64:
 ; CHECK-NEON-GI:       // %bb.0:
 ; CHECK-NEON-GI-NEXT:    adrp x8, .LCPI17_0
 ; CHECK-NEON-GI-NEXT:    ldr q0, [x8, :lo12:.LCPI17_0]
 ; CHECK-NEON-GI-NEXT:    ret
-;
-; CHECK-SVE-GI-LABEL: movi_m32769_v2i64:
-; CHECK-SVE-GI:       // %bb.0:
-; CHECK-SVE-GI-NEXT:    adrp x8, .LCPI17_0
-; CHECK-SVE-GI-NEXT:    ldr q0, [x8, :lo12:.LCPI17_0]
-; CHECK-SVE-GI-NEXT:    ret
   ret <2 x i64> splat (i64 -32769)
 }
 
@@ -475,16 +373,10 @@ define <4 x i32> @movi_v4i32_3() {
 ; CHECK-NEON-NEXT:    ldr q0, [x8, :lo12:.LCPI19_0]
 ; CHECK-NEON-NEXT:    ret
 ;
-; CHECK-SVE-SD-LABEL: movi_v4i32_3:
-; CHECK-SVE-SD:       // %bb.0:
-; CHECK-SVE-SD-NEXT:    mov z0.d, #0xffffff80
-; CHECK-SVE-SD-NEXT:    ret
-;
-; CHECK-SVE-GI-LABEL: movi_v4i32_3:
-; CHECK-SVE-GI:       // %bb.0:
-; CHECK-SVE-GI-NEXT:    adrp x8, .LCPI19_0
-; CHECK-SVE-GI-NEXT:    ldr q0, [x8, :lo12:.LCPI19_0]
-; CHECK-SVE-GI-NEXT:    ret
+; CHECK-SVE-LABEL: movi_v4i32_3:
+; CHECK-SVE:       // %bb.0:
+; CHECK-SVE-NEXT:    mov z0.d, #0xffffff80
+; CHECK-SVE-NEXT:    ret
   ret <4 x i32> <i32 -128, i32 0, i32 -128, i32 0>
 }
 
@@ -495,18 +387,11 @@ define <16 x i8> @movi_v16i8_3() {
 ; CHECK-NEON-NEXT:    ldr q0, [x8, :lo12:.LCPI20_0]
 ; CHECK-NEON-NEXT:    ret
 ;
-; CHECK-SVE-SD-LABEL: movi_v16i8_3:
-; CHECK-SVE-SD:       // %bb.0:
-; CHECK-SVE-SD-NEXT:    mov z0.d, #0x7f0000
-; CHECK-SVE-SD-NEXT:    ret
-;
-; CHECK-SVE-GI-LABEL: movi_v16i8_3:
-; CHECK-SVE-GI:       // %bb.0:
-; CHECK-SVE-GI-NEXT:    adrp x8, .LCPI20_0
-; CHECK-SVE-GI-NEXT:    ldr q0, [x8, :lo12:.LCPI20_0]
-; CHECK-SVE-GI-NEXT:    ret
+; CHECK-SVE-LABEL: movi_v16i8_3:
+; CHECK-SVE:       // %bb.0:
+; CHECK-SVE-NEXT:    mov z0.d, #0x7f0000
+; CHECK-SVE-NEXT:    ret
   ret <16 x i8> <i8 0, i8 0, i8 127, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 127, i8 0, i8 0, i8 0, i8 0, i8 0>
 }
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
 ; CHECK: {{.*}}
-; CHECK-SVE: {{.*}}
diff --git a/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll b/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll
index e72d79ef45757..e157f85e54988 100644
--- a/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll
+++ b/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll
@@ -59,18 +59,11 @@ define <16 x i8> @xor16xi8(<16 x i8> %a, <16 x i8> %b) {
 }
 
 define <8 x i8> @bsl8xi8_const(<8 x i8> %a, <8 x i8> %b)  {
-; CHECK-SD-LABEL: bsl8xi8_const:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    movi d2, #0x00ffff0000ffff
-; CHECK-SD-NEXT:    bif v0.8b, v1.8b, v2.8b
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: bsl8xi8_const:
-; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI6_0
-; CHECK-GI-NEXT:    ldr d2, [x8, :lo12:.LCPI6_0]
-; CHECK-GI-NEXT:    bif v0.8b, v1.8b, v2.8b
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: bsl8xi8_const:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi d2, #0x00ffff0000ffff
+; CHECK-NEXT:    bif v0.8b, v1.8b, v2.8b
+; CHECK-NEXT:    ret
 	%tmp1 = and <8 x i8> %a, < i8 -1, i8 -1, i8 0, i8 0, i8 -1, i8 -1, i8 0, i8 0 >
 	%tmp2 = and <8 x i8> %b, < i8 0, i8 0, i8 -1, i8 -1, i8 0, i8 0, i8 -1, i8 -1 >
 	%tmp3 = or <8 x i8> %tmp1, %tmp2
@@ -78,18 +71,11 @@ define <8 x i8> @bsl8xi8_const(<8 x i8> %a, <8 x i8> %b)  {
 }
 
 define <16 x i8> @bsl16xi8_const(<16 x i8> %a, <16 x i8> %b) {
-; CHECK-SD-LABEL: bsl16xi8_const:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    movi v2.2d, #0x000000ffffffff
-; CHECK-SD-NEXT:    bif v0.16b, v1.16b, v2.16b
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: bsl16xi8_const:
-; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI7_0
-; CHECK-GI-NEXT:    ldr q2, [x8, :lo12:.LCPI7_0]
-; CHECK-GI-NEXT:    bif v0.16b, v1.16b, v2.16b
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: bsl16xi8_const:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi v2.2d, #0x000000ffffffff
+; CHECK-NEXT:    bif v0.16b, v1.16b, v2.16b
+; CHECK-NEXT:    ret
 	%tmp1 = and <16 x i8> %a, < i8 -1, i8 -1, i8 -1, i8 -1, i8 0, i8 0, i8 0, i8 0, i8 -1, i8 -1, i8 -1, i8 -1, i8 0, i8 0, i8 0, i8 0 >
 	%tmp2 = and <16 x i8> %b, < i8 0, i8 0, i8 0, i8 0, i8 -1, i8 -1, i8 -1, i8 -1, i8 0, i8 0, i8 0, i8 0, i8 -1, i8 -1, i8 -1, i8 -1 >
 	%tmp3 = or <16 x i8> %tmp1, %tmp2
@@ -892,8 +878,7 @@ define <2 x i32> @bsl2xi32_const(<2 x i32> %a, <2 x i32> %b)  {
 ;
 ; CHECK-GI-LABEL: bsl2xi32_const:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI70_0
-; CHECK-GI-NEXT:    ldr d2, [x8, :lo12:.LCPI70_0]
+; CHECK-GI-NEXT:    movi d2, #0x000000ffffffff
 ; CHECK-GI-NEXT:    bif v0.8b, v1.8b, v2.8b
 ; CHECK-GI-NEXT:    ret
 	%tmp1 = and <2 x i32> %a, < i32 -1, i32 0 >
@@ -904,18 +889,11 @@ define <2 x i32> @bsl2xi32_const(<2 x i32> %a, <2 x i32> %b)  {
 
 
 define <4 x i16> @bsl4xi16_const(<4 x i16> %a, <4 x i16> %b)  {
-; CHECK-SD-LABEL: bsl4xi16_const:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    movi d2, #0x00ffff0000ffff
-; CHECK-SD-NEXT:    bif v0.8b, v1.8b, v2.8b
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: bsl4xi16_const:
-; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI71_0
-; CHECK-GI-NEXT:    ldr d2, [x8, :lo12:.LCPI71_0]
-; CHECK-GI-NEXT:    bif v0.8b, v1.8b, v2.8b
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: bsl4xi16_const:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi d2, #0x00ffff0000ffff
+; CHECK-NEXT:    bif v0.8b, v1.8b, v2.8b
+; CHECK-NEXT:    ret
 	%tmp1 = and <4 x i16> %a, < i16 -1, i16 0, i16 -1,i16 0 >
 	%tmp2 = and <4 x i16> %b, < i16 0, i16 -1,i16 0, i16 -1 >
 	%tmp3 = or <4 x i16> %tmp1, %tmp2
@@ -947,18 +925,11 @@ define <1 x i64> @bsl1xi64_const(<1 x i64> %a, <1 x i64> %b)  {
 }
 
 define <4 x i32> @bsl4xi32_const(<4 x i32> %a, <4 x i32> %b)  {
-; CHECK-SD-LABEL: bsl4xi32_const:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    movi v2.2d, #0x000000ffffffff
-; CHECK-SD-NEXT:    bif v0.16b, v1.16b, v2.16b
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: bsl4xi32_const:
-; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI73_0
-; CHECK-GI-NEXT:    ldr q2, [x8, :lo12:.LCPI73_0]
-; CHECK-GI-NEXT:    bif v0.16b, v1.16b, v2.16b
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: bsl4xi32_const:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi v2.2d, #0x000000ffffffff
+; CHECK-NEXT:    bif v0.16b, v1.16b, v2.16b
+; CHECK-NEXT:    ret
 	%tmp1 = and <4 x i32> %a, < i32 -1, i32 0, i32 -1, i32 0 >
 	%tmp2 = and <4 x i32> %b, < i32 0, i32 -1, i32 0, i32 -1 >
 	%tmp3 = or <4 x i32> %tmp1, %tmp2
@@ -966,18 +937,11 @@ define <4 x i32> @bsl4xi32_const(<4 x i32> %a, <4 x i32> %b)  {
 }
 
 define <8 x i16> @bsl8xi16_const(<8 x i16> %a, <8 x i16> %b)  {
-; CHECK-SD-LABEL: bsl8xi16_const:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    movi v2.2d, #0x000000ffffffff
-; CHECK-SD-NEXT:    bif v0.16b, v1.16b, v2.16b
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: bsl8xi16_const:
-; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI74_0
-; CHECK-GI-NEXT:    ldr q2, [x8, :lo12:.LCPI74_0]
-; CHECK-GI-NEXT:    bif v0.16b, v1.16b, v2.16b
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: bsl8xi16_const:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi v2.2d, #0x000000ffffffff
+; CHECK-NEXT:    bif v0.16b, v1.16b, v2.16b
+; CHECK-NEXT:    ret
 	%tmp1 = and <8 x i16> %a, < i16 -1, i16 -1, i16 0,i16 0, i16 -1, i16 -1, i16 0,i16 0 >
 	%tmp2 = and <8 x i16> %b, < i16 0, i16 0, i16 -1, i16 -1, i16 0, i16 0, i16 -1, i16 -1 >
 	%tmp3 = or <8 x i16> %tmp1, %tmp2
@@ -1561,8 +1525,7 @@ define <8 x i8> @orrimm8b_as_orrimm4h_lsl0(<8 x i8> %a) {
 ;
 ; CHECK-GI-LABEL: orrimm8b_as_orrimm4h_lsl0:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI104_0
-; CHECK-GI-NEXT:    ldr d1, [x8, :lo12:.LCPI104_0]
+; CHECK-GI-NEXT:    movi d1, #0xff00ff00ff00ff
 ; CHECK-GI-NEXT:    orr v0.8b, v0.8b, v1.8b
 ; CHECK-GI-NEXT:    ret
   %val = or <8 x i8> %a, <i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0>
@@ -1577,8 +1540,7 @@ define <8 x i8> @orrimm8b_as_orimm4h_lsl8(<8 x i8> %a) {
 ;
 ; CHECK-GI-LABEL: orrimm8b_as_orimm4h_lsl8:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI105_0
-; CHECK-GI-NEXT:    ldr d1, [x8, :lo12:.LCPI105_0]
+; CHECK-GI-NEXT:    movi d1, #0xff00ff00ff00ff00
 ; CHECK-GI-NEXT:    orr v0.8b, v0.8b, v1.8b
 ; CHECK-GI-NEXT:    ret
   %val = or <8 x i8> %a, <i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255>
@@ -1593,8 +1555,7 @@ define <16 x i8> @orimm16b_as_orrimm8h_lsl0(<16 x i8> %a) {
 ;
 ; CHECK-GI-LABEL: orimm16b_as_orrimm8h_lsl0:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI106_0
-; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI106_0]
+; CHECK-GI-NEXT:    movi v1.2d, #0xff00ff00ff00ff
 ; CHECK-GI-NEXT:    orr v0.16b, v0.16b, v1.16b
 ; CHECK-GI-NEXT:    ret
   %val = or <16 x i8> %a, <i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0>
@@ -1609,8 +1570,7 @@ define <16 x i8> @orimm16b_as_orrimm8h_lsl8(<16 x i8> %a) {
 ;
 ; CHECK-GI-LABEL: orimm16b_as_orrimm8h_lsl8:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI107_0
-; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI107_0]
+; CHECK-GI-NEXT:    movi v1.2d, #0xff00ff00ff00ff00
 ; CHECK-GI-NEXT:    orr v0.16b, v0.16b, v1.16b
 ; CHECK-GI-NEXT:    ret
   %val = or <16 x i8> %a, <i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255>
@@ -1625,8 +1585,7 @@ define <8 x i8> @and8imm2s_lsl0(<8 x i8> %a) {
 ;
 ; CHECK-GI-LABEL: and8imm2s_lsl0:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI108_0
-; CHECK-GI-NEXT:    ldr d1, [x8, :lo12:.LCPI108_0]
+; CHECK-GI-NEXT:    movi d1, #0xffffff00ffffff00
 ; CHECK-GI-NEXT:    and v0.8b, v0.8b, v1.8b
 ; CHECK-GI-NEXT:    ret
 	%tmp1 = and <8 x i8> %a, < i8 0, i8 255, i8 255, i8 255, i8 0, i8 255, i8 255, i8 255>
@@ -1641,8 +1600,7 @@ define <8 x i8> @and8imm2s_lsl8(<8 x i8> %a) {
 ;
 ; CHECK-GI-LABEL: and8imm2s_lsl8:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI109_0
-; CHECK-GI-NEXT:    ldr d1, [x8, :lo12:.LCPI109_0]
+; CHECK-GI-NEXT:    movi d1, #0xffff00ffffff00ff
 ; CHECK-GI-NEXT:    and v0.8b, v0.8b, v1.8b
 ; CHECK-GI-NEXT:    ret
 	%tmp1 = and <8 x i8> %a, < i8 255, i8 0, i8 255, i8 255, i8 255, i8 0, i8 255, i8 255>
@@ -1657,8 +1615,7 @@ define <8 x i8> @and8imm2s_lsl16(<8 x i8> %a) {
 ;
 ; CHECK-GI-LABEL: and8imm2s_lsl16:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI110_0
-; CHECK-GI-NEXT:    ldr d1, [x8, :lo12:.LCPI110_0]
+; CHECK-GI-NEXT:    movi d1, #0xff00ffffff00ffff
 ; CHECK-GI-NEXT:    and v0.8b, v0.8b, v1.8b
 ; CHECK-GI-NEXT:    ret
 	%tmp1 = and <8 x i8> %a, < i8 255, i8 255, i8 0, i8 255, i8 255, i8 255, i8 0, i8 255>
@@ -1673,8 +1630,7 @@ define <8 x i8> @and8imm2s_lsl24(<8 x i8> %a) {
 ;
 ; CHECK-GI-LABEL: and8imm2s_lsl24:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI111_0
-; CHECK-GI-NEXT:    ldr d1, [x8, :lo12:.LCPI111_0]
+; CHECK-GI-NEXT:    mvni v1.2s, #254, lsl #24
 ; CHECK-GI-NEXT:    and v0.8b, v0.8b, v1.8b
 ; CHECK-GI-NEXT:    ret
 	%tmp1 = and <8 x i8> %a, < i8 255, i8 255, i8 255, i8 1, i8 255, i8 255, i8 255, i8 1>
@@ -1689,8 +1645,7 @@ define <4 x i16> @and16imm2s_lsl0(<4 x i16> %a) {
 ;
 ; CHECK-GI-LABEL: and16imm2s_lsl0:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI112_0
-; CHECK-GI-NEXT:    ldr d1, [x8, :lo12:.LCPI112_0]
+; CHECK-GI-NEXT:    movi d1, #0xffffff00ffffff00
 ; CHECK-GI-NEXT:    and v0.8b, v0.8b, v1.8b
 ; CHECK-GI-NEXT:    ret
 	%tmp1 = and <4 x i16> %a, < i16 65280, i16 65535, i16 65280, i16 65535>
@@ -1705,8 +1660,7 @@ define <4 x i16> @and16imm2s_lsl8(<4 x i16> %a) {
 ;
 ; CHECK-GI-LABEL: and16imm2s_lsl8:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI113_0
-; CHECK-GI-NEXT:    ldr d1, [x8, :lo12:.LCPI113_0]
+; CHECK-GI-NEXT:    movi d1, #0xffff00ffffff00ff
 ; CHECK-GI-NEXT:    and v0.8b, v0.8b, v1.8b
 ; CHECK-GI-NEXT:    ret
 	%tmp1 = and <4 x i16> %a, < i16 255, i16 65535, i16 255, i16 65535>
@@ -1721,8 +1675,7 @@ define <4 x i16> @and16imm2s_lsl16(<4 x i16> %a) {
 ;
 ; CHECK-GI-LABEL: and16imm2s_lsl16:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI114_0
-; CHECK-GI-NEXT:    ldr d1, [x8, :lo12:.LCPI114_0]
+; CHECK-GI-NEXT:    movi d1, #0xff00ffffff00ffff
 ; CHECK-GI-NEXT:    and v0.8b, v0.8b, v1.8b
 ; CHECK-GI-NEXT:    ret
 	%tmp1 = and <4 x i16> %a, < i16 65535, i16 65280, i16 65535, i16 65280>
@@ -1737,8 +1690,7 @@ define <4 x i16> @and16imm2s_lsl24(<4 x i16> %a) {
 ;
 ; CHECK-GI-LABEL: and16imm2s_lsl24:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI115_0
-; CHECK-GI-NEXT:    ldr d1, [x8, :lo12:.LCPI115_0]
+; CHECK-GI-NEXT:    mvni v1.2s, #254, lsl #24
 ; CHECK-GI-NEXT:    and v0.8b, v0.8b, v1.8b
 ; CHECK-GI-NEXT:    ret
 	%tmp1 = and <4 x i16> %a, < i16 65535, i16 511, i16 65535, i16 511>
@@ -1818,8 +1770,7 @@ define <16 x i8> @and8imm4s_lsl0(<16 x i8> %a) {
 ;
 ; CHECK-GI-LABEL: and8imm4s_lsl0:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI120_0
-; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI120_0]
+; CHECK-GI-NEXT:    movi v1.2d, #0xffffff00ffffff00
 ; CHECK-GI-NEXT:    and v0.16b, v0.16b, v1.16b
 ; CHECK-GI-NEXT:    ret
 	%tmp1 = and <16 x i8> %a, < i8 0, i8 255, i8 255, i8 255, i8 0, i8 255, i8 255, i8 255, i8 0, i8 255, i8 255, i8 255, i8 0, i8 255, i8 255, i8 255>
@@ -1834,8 +1785,7 @@ define <16 x i8> @and8imm4s_lsl8(<16 x i8> %a) {
 ;
 ; CHECK-GI-LABEL: and8imm4s_lsl8:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI121_0
-; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI121_0]
+; CHECK-GI-NEXT:    movi v1.2d, #0xffff00ffffff00ff
 ; CHECK-GI-NEXT:    and v0.16b, v0.16b, v1.16b
 ; CHECK-GI-NEXT:    ret
 	%tmp1 = and <16 x i8> %a, < i8 255, i8 0, i8 255, i8 255, i8 255, i8 0, i8 255, i8 255, i8 255, i8 0, i8 255, i8 255, i8 255, i8 0, i8 255, i8 255>
@@ -1850,8 +1800,7 @@ define <16 x i8> @and8imm4s_lsl16(<16 x i8> %a) {
 ;
 ; CHECK-GI-LABEL: and8imm4s_lsl16:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI122_0
-; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI122_0]
+; CHECK-GI-NEXT:    movi v1.2d, #0xff00ffffff00ffff
 ; CHECK-GI-NEXT:    and v0.16b, v0.16b, v1.16b
 ; CHECK-GI-NEXT:    ret
 	%tmp1 = and <16 x i8> %a, < i8 255, i8 255, i8 0, i8 255, i8 255, i8 255, i8 0, i8 255, i8 255, i8 255, i8 0, i8 255, i8 255, i8 255, i8 0, i8 255>
@@ -1866,8 +1815,7 @@ define <16 x i8> @and8imm4s_lsl24(<16 x i8> %a) {
 ;
 ; CHECK-GI-LABEL: and8imm4s_lsl24:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI123_0
-; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI123_0]
+; CHECK-GI-NEXT:    mvni v1.4s, #254, lsl #24
 ; CHECK-GI-NEXT:    and v0.16b, v0.16b, v1.16b
 ; CHECK-GI-NEXT:    ret
 	%tmp1 = and <16 x i8> %a, < i8 255, i8 255, i8 255, i8 1, i8 255, i8 255, i8 255, i8 1, i8 255, i8 255, i8 255, i8 1, i8 255, i8 255, i8 255, i8 1>
@@ -1882,8 +1830,7 @@ define <8 x i16> @and16imm4s_lsl0(<8 x i16> %a) {
 ;
 ; CHECK-GI-LABEL: and16imm4s_lsl0:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI124_0
-; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI124_0]
+; CHECK-GI-NEXT:    movi v1.2d, #0xffffff00ffffff00
 ; CHECK-GI-NEXT:    and v0.16b, v0.16b, v1.16b
 ; CHECK-GI-NEXT:    ret
 	%tmp1 = and <8 x i16> %a, < i16 65280, i16 65535, i16 65280, i16 65535, i16 65280, i16 65535, i16 65280, i16 65535>
@@ -1898,8 +1845,7 @@ define <8 x i16> @and16imm4s_lsl8(<8 x i16> %a) {
 ;
 ; CHECK-GI-LABEL: and16imm4s_lsl8:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI125_0
-; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI125_0]
+; CHECK-GI-NEXT:    movi v1.2d, #0xffff00ffffff00ff
 ; CHECK-GI-NEXT:    and v0.16b, v0.16b, v1.16b
 ; CHECK-GI-NEXT:    ret
 	%tmp1 = and <8 x i16> %a, < i16 255, i16 65535, i16 255, i16 65535, i16 255, i16 65535, i16 255, i16 65535>
@@ -1914,8 +1860,7 @@ define <8 x i16> @and16imm4s_lsl16(<8 x i16> %a) {
 ;
 ; CHECK-GI-LABEL: and16imm4s_lsl16:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI126_0
-; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI126_0]
+; CHECK-GI-NEXT:    movi v1.2d, #0xff00ffffff00ffff
 ; CHECK-GI-NEXT:    and v0.16b, v0.16b, v1.16b
 ; CHECK-GI-NEXT:    ret
 	%tmp1 = and <8 x i16> %a, < i16 65535, i16 65280, i16 65535, i16 65280, i16 65535, i16 65280, i16 65535, i16 65280>
@@ -1930,8 +1875,7 @@ define <8 x i16> @and16imm4s_lsl24(<8 x i16> %a) {
 ;
 ; CHECK-GI-LABEL: and16imm4s_lsl24:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI127_0
-; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI127_0]
+; CHECK-GI-NEXT:    mvni v1.4s, #254, lsl #24
 ; CHECK-GI-NEXT:    and v0.16b, v0.16b, v1.16b
 ; CHECK-GI-NEXT:    ret
 	%tmp1 = and <8 x i16> %a, < i16 65535, i16 511, i16 65535, i16 511, i16 65535, i16 511, i16 65535, i16 511>
@@ -2006,8 +1950,7 @@ define <8 x i8> @and8imm4h_lsl0(<8 x i8> %a) {
 ;
 ; CHECK-GI-LABEL: and8imm4h_lsl0:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI132_0
-; CHECK-GI-NEXT:    ldr d1, [x8, :lo12:.LCPI132_0]
+; CHECK-GI-NEXT:    movi d1, #0xff00ff00ff00ff00
 ; CHECK-GI-NEXT:    and v0.8b, v0.8b, v1.8b
 ; CHECK-GI-NEXT:    ret
 	%tmp1 = and <8 x i8> %a, < i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255>
@@ -2022,8 +1965,7 @@ define <8 x i8> @and8imm4h_lsl8(<8 x i8> %a) {
 ;
 ; CHECK-GI-LABEL: and8imm4h_lsl8:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI133_0
-; CHECK-GI-NEXT:    ldr d1, [x8, :lo12:.LCPI133_0]
+; CHECK-GI-NEXT:    movi d1, #0xff00ff00ff00ff
 ; CHECK-GI-NEXT:    and v0.8b, v0.8b, v1.8b
 ; CHECK-GI-NEXT:    ret
 	%tmp1 = and <8 x i8> %a, < i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0>
@@ -2100,8 +2042,7 @@ define <16 x i8> @and8imm8h_lsl0(<16 x i8> %a) {
 ;
 ; CHECK-GI-LABEL: and8imm8h_lsl0:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI138_0
-; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI138_0]
+; CHECK-GI-NEXT:    movi v1.2d, #0xff00ff00ff00ff00
 ; CHECK-GI-NEXT:    and v0.16b, v0.16b, v1.16b
 ; CHECK-GI-NEXT:    ret
 	%tmp1 = and <16 x i8> %a, < i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255 >
@@ -2116,8 +2057,7 @@ define <16 x i8> @and8imm8h_lsl8(<16 x i8> %a) {
 ;
 ; CHECK-GI-LABEL: and8imm8h_lsl8:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI139_0
-; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI139_0]
+; CHECK-GI-NEXT:    movi v1.2d, #0xff00ff00ff00ff
 ; CHECK-GI-NEXT:    and v0.16b, v0.16b, v1.16b
 ; CHECK-GI-NEXT:    ret
 	%tmp1 = and <16 x i8> %a, <i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0 >
@@ -2280,8 +2220,7 @@ define <8 x i8> @orr8imm2s_lsl0(<8 x i8> %a) {
 ;
 ; CHECK-GI-LABEL: orr8imm2s_lsl0:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI148_0
-; CHECK-GI-NEXT:    ldr d1, [x8, :lo12:.LCPI148_0]
+; CHECK-GI-NEXT:    movi d1, #0x0000ff000000ff
 ; CHECK-GI-NEXT:    orr v0.8b, v0.8b, v1.8b
 ; CHECK-GI-NEXT:    ret
 	%tmp1 = or <8 x i8> %a, < i8 255, i8 0, i8 0, i8 0, i8 255, i8 0, i8 0, i8 0>
@@ -2296,8 +2235,7 @@ define <8 x i8> @orr8imm2s_lsl8(<8 x i8> %a) {
 ;
 ; CHECK-GI-LABEL: orr8imm2s_lsl8:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI149_0
-; CHECK-GI-NEXT:    ldr d1, [x8, :lo12:.LCPI149_0]
+; CHECK-GI-NEXT:    movi d1, #0x00ff000000ff00
 ; CHECK-GI-NEXT:    orr v0.8b, v0.8b, v1.8b
 ; CHECK-GI-NEXT:    ret
 	%tmp1 = or <8 x i8> %a, < i8 0, i8 255, i8 0, i8 0, i8 0, i8 255, i8 0, i8 0>
@@ -2312,8 +2250,7 @@ define <8 x i8> @orr8imm2s_lsl16(<8 x i8> %a) {
 ;
 ; CHECK-GI-LABEL: orr8imm2s_lsl16:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI150_0
-; CHECK-GI-NEXT:    ldr d1, [x8, :lo12:.LCPI150_0]
+; CHECK-GI-NEXT:    movi d1, #0xff000000ff0000
 ; CHECK-GI-NEXT:    orr v0.8b, v0.8b, v1.8b
 ; CHECK-GI-NEXT:    ret
 	%tmp1 = or <8 x i8> %a, < i8 0, i8 0, i8 255, i8 0, i8 0, i8 0, i8 255, i8 0>
@@ -2328,8 +2265,7 @@ define <8 x i8> @orr8imm2s_lsl24(<8 x i8> %a) {
 ;
 ; CHECK-GI-LABEL: orr8imm2s_lsl24:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI151_0
-; CHECK-GI-NEXT:    ldr d1, [x8, :lo12:.LCPI151_0]
+; CHECK-GI-NEXT:    movi d1, #0xff000000ff000000
 ; CHECK-GI-NEXT:    orr v0.8b, v0.8b, v1.8b
 ; CHECK-GI-NEXT:    ret
 	%tmp1 = or <8 x i8> %a, < i8 0, i8 0, i8 0, i8 255, i8 0, i8 0, i8 0, i8 255>
@@ -2344,8 +2280,7 @@ define <4 x i16> @orr16imm2s_lsl0(<4 x i16> %a) {
 ;
 ; CHECK-GI-LABEL: orr16imm2s_lsl0:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI152_0
-; CHECK-GI-NEXT:    ldr d1, [x8, :lo12:.LCPI152_0]
+; CHECK-GI-NEXT:    movi d1, #0x0000ff000000ff
 ; CHECK-GI-NEXT:    orr v0.8b, v0.8b, v1.8b
 ; CHECK-GI-NEXT:    ret
 	%tmp1 = or <4 x i16> %a, < i16 255, i16 0, i16 255, i16 0>
@@ -2360,8 +2295,7 @@ define <4 x i16> @orr16imm2s_lsl8(<4 x i16> %a) {
 ;
 ; CHECK-GI-LABEL: orr16imm2s_lsl8:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI153_0
-; CHECK-GI-NEXT:    ldr d1, [x8, :lo12:.LCPI153_0]
+; CHECK-GI-NEXT:    movi d1, #0x00ff000000ff00
 ; CHECK-GI-NEXT:    orr v0.8b, v0.8b, v1.8b
 ; CHECK-GI-NEXT:    ret
 	%tmp1 = or <4 x i16> %a, < i16 65280, i16 0, i16 65280, i16 0>
@@ -2376,8 +2310,7 @@ define <4 x i16> @orr16imm2s_lsl16(<4 x i16> %a) {
 ;
 ; CHECK-GI-LABEL: orr16imm2s_lsl16:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI154_0
-; CHECK-GI-NEXT:    ldr d1, [x8, :lo12:.LCPI154_0]
+; CHECK-GI-NEXT:    movi d1, #0xff000000ff0000
 ; CHECK-GI-NEXT:    orr v0.8b, v0.8b, v1.8b
 ; CHECK-GI-NEXT:    ret
 	%tmp1 = or <4 x i16> %a, < i16 0, i16 255, i16 0, i16 255>
@@ -2392,8 +2325,7 @@ define <4 x i16> @orr16imm2s_lsl24(<4 x i16> %a) {
 ;
 ; CHECK-GI-LABEL: orr16imm2s_lsl24:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI155_0
-; CHECK-GI-NEXT:    ldr d1, [x8, :lo12:.LCPI155_0]
+; CHECK-GI-NEXT:    movi d1, #0xff000000ff000000
 ; CHECK-GI-NEXT:    orr v0.8b, v0.8b, v1.8b
 ; CHECK-GI-NEXT:    ret
 	%tmp1 = or <4 x i16> %a, < i16 0, i16 65280, i16 0, i16 65280>
@@ -2472,8 +2404,7 @@ define <16 x i8> @orr8imm4s_lsl0(<16 x i8> %a) {
 ;
 ; CHECK-GI-LABEL: orr8imm4s_lsl0:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI160_0
-; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI160_0]
+; CHECK-GI-NEXT:    movi v1.2d, #0x0000ff000000ff
 ; CHECK-GI-NEXT:    orr v0.16b, v0.16b, v1.16b
 ; CHECK-GI-NEXT:    ret
 	%tmp1 = or <16 x i8> %a, < i8 255, i8 0, i8 0, i8 0, i8 255, i8 0, i8 0, i8 0, i8 255, i8 0, i8 0, i8 0, i8 255, i8 0, i8 0, i8 0>
@@ -2488,8 +2419,7 @@ define <16 x i8> @orr8imm4s_lsl8(<16 x i8> %a) {
 ;
 ; CHECK-GI-LABEL: orr8imm4s_lsl8:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI161_0
-; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI161_0]
+; CHECK-GI-NEXT:    movi v1.2d, #0x00ff000000ff00
 ; CHECK-GI-NEXT:    orr v0.16b, v0.16b, v1.16b
 ; CHECK-GI-NEXT:    ret
 	%tmp1 = or <16 x i8> %a, < i8 0, i8 255, i8 0, i8 0, i8 0, i8 255, i8 0, i8 0, i8 0, i8 255, i8 0, i8 0, i8 0, i8 255, i8 0, i8 0>
@@ -2504,8 +2434,7 @@ define <16 x i8> @orr8imm4s_lsl16(<16 x i8> %a) {
 ;
 ; CHECK-GI-LABEL: orr8imm4s_lsl16:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI162_0
-; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI162_0]
+; CHECK-GI-NEXT:    movi v1.2d, #0xff000000ff0000
 ; CHECK-GI-NEXT:    orr v0.16b, v0.16b, v1.16b
 ; CHECK-GI-NEXT:    ret
 	%tmp1 = or <16 x i8> %a, < i8 0, i8 0, i8 255, i8 0, i8 0, i8 0, i8 255, i8 0, i8 0, i8 0, i8 255, i8 0, i8 0, i8 0, i8 255, i8 0>
@@ -2520,8 +2449,7 @@ define <16 x i8> @orr8imm4s_lsl24(<16 x i8> %a) {
 ;
 ; CHECK-GI-LABEL: orr8imm4s_lsl24:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI163_0
-; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI163_0]
+; CHECK-GI-NEXT:    movi v1.2d, #0xff000000ff000000
 ; CHECK-GI-NEXT:    orr v0.16b, v0.16b, v1.16b
 ; CHECK-GI-NEXT:    ret
 	%tmp1 = or <16 x i8> %a, < i8 0, i8 0, i8 0, i8 255, i8 0, i8 0, i8 0, i8 255, i8 0, i8 0, i8 0, i8 255, i8 0, i8 0, i8 0, i8 255>
@@ -2536,8 +2464,7 @@ define <8 x i16> @orr16imm4s_lsl0(<8 x i16> %a) {
 ;
 ; CHECK-GI-LABEL: orr16imm4s_lsl0:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI164_0
-; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI164_0]
+; CHECK-GI-NEXT:    movi v1.2d, #0x0000ff000000ff
 ; CHECK-GI-NEXT:    orr v0.16b, v0.16b, v1.16b
 ; CHECK-GI-NEXT:    ret
 	%tmp1 = or <8 x i16> %a, < i16 255, i16 0, i16 255, i16 0, i16 255, i16 0, i16 255, i16 0>
@@ -2552,8 +2479,7 @@ define <8 x i16> @orr16imm4s_lsl8(<8 x i16> %a) {
 ;
 ; CHECK-GI-LABEL: orr16imm4s_lsl8:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI165_0
-; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI165_0]
+; CHECK-GI-NEXT:    movi v1.2d, #0x00ff000000ff00
 ; CHECK-GI-NEXT:    orr v0.16b, v0.16b, v1.16b
 ; CHECK-GI-NEXT:    ret
 	%tmp1 = or <8 x i16> %a, < i16 65280, i16 0, i16 65280, i16 0, i16 65280, i16 0, i16 65280, i16 0>
@@ -2568,8 +2494,7 @@ define <8 x i16> @orr16imm4s_lsl16(<8 x i16> %a) {
 ;
 ; CHECK-GI-LABEL: orr16imm4s_lsl16:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI166_0
-; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI166_0]
+; CHECK-GI-NEXT:    movi v1.2d, #0xff000000ff0000
 ; CHECK-GI-NEXT:    orr v0.16b, v0.16b, v1.16b
 ; CHECK-GI-NEXT:    ret
 	%tmp1 = or <8 x i16> %a, < i16 0, i16 255, i16 0, i16 255, i16 0, i16 255, i16 0, i16 255>
@@ -2584,8 +2509,7 @@ define <8 x i16> @orr16imm4s_lsl24(<8 x i16> %a) {
 ;
 ; CHECK-GI-LABEL: orr16imm4s_lsl24:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI167_0
-; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI167_0]
+; CHECK-GI-NEXT:    movi v1.2d, #0xff000000ff000000
 ; CHECK-GI-NEXT:    orr v0.16b, v0.16b, v1.16b
 ; CHECK-GI-NEXT:    ret
 	%tmp1 = or <8 x i16> %a, < i16 0, i16 65280, i16 0, i16 65280, i16 0, i16 65280, i16 0, i16 65280>
@@ -2660,8 +2584,7 @@ define <8 x i8> @orr8imm4h_lsl0(<8 x i8> %a) {
 ;
 ; CHECK-GI-LABEL: orr8imm4h_lsl0:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI172_0
-; CHECK-GI-NEXT:    ldr d1, [x8, :lo12:.LCPI172_0]
+; CHECK-GI-NEXT:    movi d1, #0xff00ff00ff00ff
 ; CHECK-GI-NEXT:    orr v0.8b, v0.8b, v1.8b
 ; CHECK-GI-NEXT:    ret
 	%tmp1 = or <8 x i8> %a, < i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0>
@@ -2676,8 +2599,7 @@ define <8 x i8> @orr8imm4h_lsl8(<8 x i8> %a) {
 ;
 ; CHECK-GI-LABEL: orr8imm4h_lsl8:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI173_0
-; CHECK-GI-NEXT:    ldr d1, [x8, :lo12:.LCPI173_0]
+; CHECK-GI-NEXT:    movi d1, #0xff00ff00ff00ff00
 ; CHECK-GI-NEXT:    orr v0.8b, v0.8b, v1.8b
 ; CHECK-GI-NEXT:    ret
 	%tmp1 = or <8 x i8> %a, < i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255>
@@ -2754,8 +2676,7 @@ define <16 x i8> @orr8imm8h_lsl0(<16 x i8> %a) {
 ;
 ; CHECK-GI-LABEL: orr8imm8h_lsl0:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI178_0
-; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI178_0]
+; CHECK-GI-NEXT:    movi v1.2d, #0xff00ff00ff00ff
 ; CHECK-GI-NEXT:    orr v0.16b, v0.16b, v1.16b
 ; CHECK-GI-NEXT:    ret
 	%tmp1 = or <16 x i8> %a, < i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0>
@@ -2770,8 +2691,7 @@ define <16 x i8> @orr8imm8h_lsl8(<16 x i8> %a) {
 ;
 ; CHECK-GI-LABEL: orr8imm8h_lsl8:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI179_0
-; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI179_0]
+; CHECK-GI-NEXT:    movi v1.2d, #0xff00ff00ff00ff00
 ; CHECK-GI-NEXT:    orr v0.16b, v0.16b, v1.16b
 ; CHECK-GI-NEXT:    ret
 	%tmp1 = or <16 x i8> %a, < i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255>
diff --git a/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll b/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll
index 9f646c28ce74a..9147183bf5ea5 100644
--- a/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll
+++ b/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll
@@ -1465,37 +1465,22 @@ define <2 x i64> @cmltz2xi64(<2 x i64> %A) {
 }
 
 define <8 x i1> @not_cmle8xi8(<8 x i8> %0) {
-; CHECK-SD-LABEL: not_cmle8xi8:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    movi v1.2s, #1
-; CHECK-SD-NEXT:    cmgt v0.8b, v1.8b, v0.8b
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: not_cmle8xi8:
-; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    adrp x8, .LCPI133_0
-; CHECK-GI-NEXT:    ldr d1, [x8, :lo12:.LCPI133_0]
-; CHECK-GI-NEXT:    cmgt v0.8b, v1.8b, v0.8b
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: not_cmle8xi8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi v1.2s, #1
+; CHECK-NEXT:    cmgt v0.8b, v1.8b, v0.8b
+; CHECK-NEXT:    ret
   %cmp.i = icmp slt <8 x i8> %0, <i8 1, i8 0, i8 0, i8 0, i8 1, i8 0, i8 0, i8 0>
   ret <8 x i1> %cmp.i
 }
 
 define <4 x i1> @not_cmle16xi8(<4 x i32> %0) {
-; CHECK-SD-LABEL: not_cmle16xi8:
-; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    movi v1.8h, #1
-; CHECK-SD-NEXT:    cmgt v0.4s, v1.4s, v0.4s
-; CHECK-SD-NEXT:    xtn v0.4h, v0.4s
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: not_cmle16xi8:
-; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    adrp x8, .LCPI134_0
-; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI134_0]
-; CHECK-GI-NEXT:    cmgt v0.4s, v1.4s, v0.4s
-; CHECK-GI-NEXT:    xtn v0.4h, v0.4s
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: not_cmle16xi8:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    movi v1.8h, #1
+; CHECK-NEXT:    cmgt v0.4s, v1.4s, v0.4s
+; CHECK-NEXT:    xtn v0.4h, v0.4s
+; CHECK-NEXT:    ret
 entry:
   %bc = bitcast <16 x i8> <i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0> to <4 x i32>
   %cmp.i = icmp slt <4 x i32> %0, %bc
diff --git a/llvm/test/CodeGen/AArch64/neon-mov.ll b/llvm/test/CodeGen/AArch64/neon-mov.ll
index ca5af2c7c452e..417c1de0c250c 100644
--- a/llvm/test/CodeGen/AArch64/neon-mov.ll
+++ b/llvm/test/CodeGen/AArch64/neon-mov.ll
@@ -159,29 +159,11 @@ define <4 x i16> @movi4h_lsl8() {
 }
 
 define <4 x i16> @movi4h_fneg() {
-; CHECK-NOFP16-SD-LABEL: movi4h_fneg:
-; CHECK-NOFP16-SD:       // %bb.0:
-; CHECK-NOFP16-SD-NEXT:    movi v0.4h, #127, lsl #8
-; CHECK-NOFP16-SD-NEXT:    fneg v0.2s, v0.2s
-; CHECK-NOFP16-SD-NEXT:    ret
-;
-; CHECK-FP16-SD-LABEL: movi4h_fneg:
-; CHECK-FP16-SD:       // %bb.0:
-; CHECK-FP16-SD-NEXT:    movi v0.4h, #127, lsl #8
-; CHECK-FP16-SD-NEXT:    fneg v0.2s, v0.2s
-; CHECK-FP16-SD-NEXT:    ret
-;
-; CHECK-NOFP16-GI-LABEL: movi4h_fneg:
-; CHECK-NOFP16-GI:       // %bb.0:
-; CHECK-NOFP16-GI-NEXT:    adrp x8, .LCPI18_0
-; CHECK-NOFP16-GI-NEXT:    ldr d0, [x8, :lo12:.LCPI18_0]
-; CHECK-NOFP16-GI-NEXT:    ret
-;
-; CHECK-FP16-GI-LABEL: movi4h_fneg:
-; CHECK-FP16-GI:       // %bb.0:
-; CHECK-FP16-GI-NEXT:    adrp x8, .LCPI18_0
-; CHECK-FP16-GI-NEXT:    ldr d0, [x8, :lo12:.LCPI18_0]
-; CHECK-FP16-GI-NEXT:    ret
+; CHECK-LABEL: movi4h_fneg:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi v0.4h, #127, lsl #8
+; CHECK-NEXT:    fneg v0.2s, v0.2s
+; CHECK-NEXT:    ret
    ret <4 x i16> <i16 32512, i16 65280, i16 32512, i16 65280>
 }
 
@@ -202,29 +184,11 @@ define <8 x i16> @movi8h_lsl8() {
 }
 
 define <8 x i16> @movi8h_fneg() {
-; CHECK-NOFP16-SD-LABEL: movi8h_fneg:
-; CHECK-NOFP16-SD:       // %bb.0:
-; CHECK-NOFP16-SD-NEXT:    movi v0.8h, #127, lsl #8
-; CHECK-NOFP16-SD-NEXT:    fneg v0.4s, v0.4s
-; CHECK-NOFP16-SD-NEXT:    ret
-;
-; CHECK-FP16-SD-LABEL: movi8h_fneg:
-; CHECK-FP16-SD:       // %bb.0:
-; CHECK-FP16-SD-NEXT:    movi v0.8h, #127, lsl #8
-; CHECK-FP16-SD-NEXT:    fneg v0.4s, v0.4s
-; CHECK-FP16-SD-NEXT:    ret
-;
-; CHECK-NOFP16-GI-LABEL: movi8h_fneg:
-; CHECK-NOFP16-GI:       // %bb.0:
-; CHECK-NOFP16-GI-NEXT:    adrp x8, .LCPI21_0
-; CHECK-NOFP16-GI-NEXT:    ldr q0, [x8, :lo12:.LCPI21_0]
-; CHECK-NOFP16-GI-NEXT:    ret
-;
-; CHECK-FP16-GI-LABEL: movi8h_fneg:
-; CHECK-FP16-GI:       // %bb.0:
-; CHECK-FP16-GI-NEXT:    adrp x8, .LCPI21_0
-; CHECK-FP16-GI-NEXT:    ldr q0, [x8, :lo12:.LCPI21_0]
-; CHECK-FP16-GI-NEXT:    ret
+; CHECK-LABEL: movi8h_fneg:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi v0.8h, #127, lsl #8
+; CHECK-NEXT:    fneg v0.4s, v0.4s
+; CHECK-NEXT:    ret
    ret <8 x i16> <i16 32512, i16 65280, i16 32512, i16 65280, i16 32512, i16 65280, i16 32512, i16 65280>
 }
 
@@ -535,11 +499,29 @@ define <2 x double> @fmov2d() {
 }
 
 define <2 x double> @fmov2d_neg0() {
-; CHECK-LABEL: fmov2d_neg0:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    movi v0.2d, #0000000000000000
-; CHECK-NEXT:    fneg v0.2d, v0.2d
-; CHECK-NEXT:    ret
+; CHECK-NOFP16-SD-LABEL: fmov2d_neg0:
+; CHECK-NOFP16-SD:       // %bb.0:
+; CHECK-NOFP16-SD-NEXT:    movi d0, #0000000000000000
+; CHECK-NOFP16-SD-NEXT:    fneg v0.2d, v0.2d
+; CHECK-NOFP16-SD-NEXT:    ret
+;
+; CHECK-FP16-SD-LABEL: fmov2d_neg0:
+; CHECK-FP16-SD:       // %bb.0:
+; CHECK-FP16-SD-NEXT:    movi d0, #0000000000000000
+; CHECK-FP16-SD-NEXT:    fneg v0.2d, v0.2d
+; CHECK-FP16-SD-NEXT:    ret
+;
+; CHECK-NOFP16-GI-LABEL: fmov2d_neg0:
+; CHECK-NOFP16-GI:       // %bb.0:
+; CHECK-NOFP16-GI-NEXT:    movi v0.2d, #0000000000000000
+; CHECK-NOFP16-GI-NEXT:    fneg v0.2d, v0.2d
+; CHECK-NOFP16-GI-NEXT:    ret
+;
+; CHECK-FP16-GI-LABEL: fmov2d_neg0:
+; CHECK-FP16-GI:       // %bb.0:
+; CHECK-FP16-GI-NEXT:    movi v0.2d, #0000000000000000
+; CHECK-FP16-GI-NEXT:    fneg v0.2d, v0.2d
+; CHECK-FP16-GI-NEXT:    ret
 	ret <2 x double> <double -0.0, double -0.0>
 }
 
@@ -571,27 +553,10 @@ define <1 x double> @fmov1d_neg0() {
 }
 
 define <2 x i32> @movi1d_1() {
-; CHECK-NOFP16-SD-LABEL: movi1d_1:
-; CHECK-NOFP16-SD:       // %bb.0:
-; CHECK-NOFP16-SD-NEXT:    movi d0, #0x00ffffffff0000
-; CHECK-NOFP16-SD-NEXT:    ret
-;
-; CHECK-FP16-SD-LABEL: movi1d_1:
-; CHECK-FP16-SD:       // %bb.0:
-; CHECK-FP16-SD-NEXT:    movi d0, #0x00ffffffff0000
-; CHECK-FP16-SD-NEXT:    ret
-;
-; CHECK-NOFP16-GI-LABEL: movi1d_1:
-; CHECK-NOFP16-GI:       // %bb.0:
-; CHECK-NOFP16-GI-NEXT:    adrp x8, .LCPI56_0
-; CHECK-NOFP16-GI-NEXT:    ldr d0, [x8, :lo12:.LCPI56_0]
-; CHECK-NOFP16-GI-NEXT:    ret
-;
-; CHECK-FP16-GI-LABEL: movi1d_1:
-; CHECK-FP16-GI:       // %bb.0:
-; CHECK-FP16-GI-NEXT:    adrp x8, .LCPI56_0
-; CHECK-FP16-GI-NEXT:    ldr d0, [x8, :lo12:.LCPI56_0]
-; CHECK-FP16-GI-NEXT:    ret
+; CHECK-LABEL: movi1d_1:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi d0, #0x00ffffffff0000
+; CHECK-NEXT:    ret
   ret <2 x i32> <i32  -65536, i32 65535>
 }
 
@@ -616,18 +581,16 @@ define <2 x i32> @movi1d() {
 ;
 ; CHECK-NOFP16-GI-LABEL: movi1d:
 ; CHECK-NOFP16-GI:       // %bb.0:
-; CHECK-NOFP16-GI-NEXT:    adrp x8, .LCPI57_1
-; CHECK-NOFP16-GI-NEXT:    adrp x9, .LCPI57_0
-; CHECK-NOFP16-GI-NEXT:    ldr d0, [x8, :lo12:.LCPI57_1]
-; CHECK-NOFP16-GI-NEXT:    ldr d1, [x9, :lo12:.LCPI57_0]
+; CHECK-NOFP16-GI-NEXT:    movi d1, #0x00ffffffff0000
+; CHECK-NOFP16-GI-NEXT:    adrp x8, .LCPI57_0
+; CHECK-NOFP16-GI-NEXT:    ldr d0, [x8, :lo12:.LCPI57_0]
 ; CHECK-NOFP16-GI-NEXT:    b test_movi1d
 ;
 ; CHECK-FP16-GI-LABEL: movi1d:
 ; CHECK-FP16-GI:       // %bb.0:
-; CHECK-FP16-GI-NEXT:    adrp x8, .LCPI57_1
-; CHECK-FP16-GI-NEXT:    adrp x9, .LCPI57_0
-; CHECK-FP16-GI-NEXT:    ldr d0, [x8, :lo12:.LCPI57_1]
-; CHECK-FP16-GI-NEXT:    ldr d1, [x9, :lo12:.LCPI57_0]
+; CHECK-FP16-GI-NEXT:    movi d1, #0x00ffffffff0000
+; CHECK-FP16-GI-NEXT:    adrp x8, .LCPI57_0
+; CHECK-FP16-GI-NEXT:    ldr d0, [x8, :lo12:.LCPI57_0]
 ; CHECK-FP16-GI-NEXT:    b test_movi1d
   %1 = tail call <2 x i32> @test_movi1d(<2 x i32> <i32 -2147483648, i32 2147450880>, <2 x i32> <i32 -65536, i32 65535>)
   ret <2 x i32> %1
diff --git a/llvm/test/CodeGen/AArch64/qmovn.ll b/llvm/test/CodeGen/AArch64/qmovn.ll
index dc1b7df9b32ae..9d58e94d791a7 100644
--- a/llvm/test/CodeGen/AArch64/qmovn.ll
+++ b/llvm/test/CodeGen/AArch64/qmovn.ll
@@ -638,7 +638,8 @@ define <4 x i16> @sminsmax_range_unsigned_i64_to_i16(<2 x i16> %x, <2 x i64> %y)
 ; CHECK-SD-NEXT:    movi v3.2d, #0x0000000000ffff
 ; CHECK-SD-NEXT:    and v1.16b, v1.16b, v2.16b
 ; CHECK-SD-NEXT:    cmgt v2.2d, v3.2d, v1.2d
-; CHECK-SD-NEXT:    bif v1.16b, v3.16b, v2.16b
+; CHECK-SD-NEXT:    and v1.16b, v1.16b, v2.16b
+; CHECK-SD-NEXT:    orn v1.16b, v1.16b, v2.16b
 ; CHECK-SD-NEXT:    xtn v1.2s, v1.2d
 ; CHECK-SD-NEXT:    uzp1 v0.4h, v0.4h, v1.4h
 ; CHECK-SD-NEXT:    ret
@@ -709,8 +710,9 @@ define <4 x i16> @umin_range_unsigned_i64_to_i16(<2 x i16> %x, <2 x i64> %y) {
 ; CHECK-SD-LABEL: umin_range_unsigned_i64_to_i16:
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    movi v2.2d, #0x0000000000ffff
-; CHECK-SD-NEXT:    cmhi v3.2d, v2.2d, v1.2d
-; CHECK-SD-NEXT:    bif v1.16b, v2.16b, v3.16b
+; CHECK-SD-NEXT:    cmhi v2.2d, v2.2d, v1.2d
+; CHECK-SD-NEXT:    and v1.16b, v1.16b, v2.16b
+; CHECK-SD-NEXT:    orn v1.16b, v1.16b, v2.16b
 ; CHECK-SD-NEXT:    xtn v1.2s, v1.2d
 ; CHECK-SD-NEXT:    uzp1 v0.4h, v0.4h, v1.4h
 ; CHECK-SD-NEXT:    ret
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-insert-vector-elt.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-insert-vector-elt.ll
index 6f4d257039bca..8f2e068fd020a 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-insert-vector-elt.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-insert-vector-elt.ll
@@ -255,8 +255,7 @@ define void @insertelement_v64f32(ptr %a, ptr %b) vscale_range(16,0) #0 {
 define <1 x double> @insertelement_v1f64(<1 x double> %op1) vscale_range(2,0) #0 {
 ; CHECK-LABEL: insertelement_v1f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov x8, #4617315517961601024 // =0x4014000000000000
-; CHECK-NEXT:    fmov d0, x8
+; CHECK-NEXT:    fmov d0, #5.00000000
 ; CHECK-NEXT:    ret
   %r = insertelement <1 x double> %op1, double 5.0, i64 0
   ret <1 x double> %r
diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fadd.ll b/llvm/test/CodeGen/AArch64/vecreduce-fadd.ll
index f561502445bbe..b8a3aa46bdb53 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-fadd.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-fadd.ll
@@ -81,9 +81,8 @@ define half @add_v3HalfH(<3 x half> %bin.rdx)  {
 ;
 ; CHECK-GI-FP16-LABEL: add_v3HalfH:
 ; CHECK-GI-FP16:       // %bb.0:
-; CHECK-GI-FP16-NEXT:    adrp x8, .LCPI2_0
+; CHECK-GI-FP16-NEXT:    movi v1.4h, #128, lsl #8
 ; CHECK-GI-FP16-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-FP16-NEXT:    ldr h1, [x8, :lo12:.LCPI2_0]
 ; CHECK-GI-FP16-NEXT:    mov v0.h[3], v1.h[0]
 ; CHECK-GI-FP16-NEXT:    faddp v0.4h, v0.4h, v0.4h
 ; CHECK-GI-FP16-NEXT:    faddp h0, v0.2h
diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization-nan.ll b/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization-nan.ll
index 2368b0288ccb7..1e04a0341ad17 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization-nan.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization-nan.ll
@@ -553,16 +553,14 @@ define half @test_v11f16_ninf(<11 x half> %a) nounwind {
 define float @test_v3f32(<3 x float> %a) nounwind {
 ; CHECK-NOFP-SD-LABEL: test_v3f32:
 ; CHECK-NOFP-SD:       // %bb.0:
-; CHECK-NOFP-SD-NEXT:    mov w8, #-4194304 // =0xffc00000
-; CHECK-NOFP-SD-NEXT:    fmov s1, w8
+; CHECK-NOFP-SD-NEXT:    mvni v1.2s, #63, msl #16
 ; CHECK-NOFP-SD-NEXT:    mov v0.s[3], v1.s[0]
 ; CHECK-NOFP-SD-NEXT:    fmaxnmv s0, v0.4s
 ; CHECK-NOFP-SD-NEXT:    ret
 ;
 ; CHECK-FP-SD-LABEL: test_v3f32:
 ; CHECK-FP-SD:       // %bb.0:
-; CHECK-FP-SD-NEXT:    mov w8, #-4194304 // =0xffc00000
-; CHECK-FP-SD-NEXT:    fmov s1, w8
+; CHECK-FP-SD-NEXT:    mvni v1.2s, #63, msl #16
 ; CHECK-FP-SD-NEXT:    mov v0.s[3], v1.s[0]
 ; CHECK-FP-SD-NEXT:    fmaxnmv s0, v0.4s
 ; CHECK-FP-SD-NEXT:    ret
@@ -589,16 +587,14 @@ define float @test_v3f32(<3 x float> %a) nounwind {
 define float @test_v3f32_ninf(<3 x float> %a) nounwind {
 ; CHECK-NOFP-SD-LABEL: test_v3f32_ninf:
 ; CHECK-NOFP-SD:       // %bb.0:
-; CHECK-NOFP-SD-NEXT:    mov w8, #-4194304 // =0xffc00000
-; CHECK-NOFP-SD-NEXT:    fmov s1, w8
+; CHECK-NOFP-SD-NEXT:    mvni v1.2s, #63, msl #16
 ; CHECK-NOFP-SD-NEXT:    mov v0.s[3], v1.s[0]
 ; CHECK-NOFP-SD-NEXT:    fmaxnmv s0, v0.4s
 ; CHECK-NOFP-SD-NEXT:    ret
 ;
 ; CHECK-FP-SD-LABEL: test_v3f32_ninf:
 ; CHECK-FP-SD:       // %bb.0:
-; CHECK-FP-SD-NEXT:    mov w8, #-4194304 // =0xffc00000
-; CHECK-FP-SD-NEXT:    fmov s1, w8
+; CHECK-FP-SD-NEXT:    mvni v1.2s, #63, msl #16
 ; CHECK-FP-SD-NEXT:    mov v0.s[3], v1.s[0]
 ; CHECK-FP-SD-NEXT:    fmaxnmv s0, v0.4s
 ; CHECK-FP-SD-NEXT:    ret
diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll b/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll
index d81952087086e..4d665b2a599a7 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll
@@ -553,16 +553,14 @@ define half @test_v11f16_ninf(<11 x half> %a) nounwind {
 define float @test_v3f32(<3 x float> %a) nounwind {
 ; CHECK-NOFP-SD-LABEL: test_v3f32:
 ; CHECK-NOFP-SD:       // %bb.0:
-; CHECK-NOFP-SD-NEXT:    mov w8, #-8388608 // =0xff800000
-; CHECK-NOFP-SD-NEXT:    fmov s1, w8
+; CHECK-NOFP-SD-NEXT:    mvni v1.2s, #127, msl #16
 ; CHECK-NOFP-SD-NEXT:    mov v0.s[3], v1.s[0]
 ; CHECK-NOFP-SD-NEXT:    fmaxnmv s0, v0.4s
 ; CHECK-NOFP-SD-NEXT:    ret
 ;
 ; CHECK-FP-SD-LABEL: test_v3f32:
 ; CHECK-FP-SD:       // %bb.0:
-; CHECK-FP-SD-NEXT:    mov w8, #-8388608 // =0xff800000
-; CHECK-FP-SD-NEXT:    fmov s1, w8
+; CHECK-FP-SD-NEXT:    mvni v1.2s, #127, msl #16
 ; CHECK-FP-SD-NEXT:    mov v0.s[3], v1.s[0]
 ; CHECK-FP-SD-NEXT:    fmaxnmv s0, v0.4s
 ; CHECK-FP-SD-NEXT:    ret
@@ -589,16 +587,14 @@ define float @test_v3f32(<3 x float> %a) nounwind {
 define float @test_v3f32_ninf(<3 x float> %a) nounwind {
 ; CHECK-NOFP-SD-LABEL: test_v3f32_ninf:
 ; CHECK-NOFP-SD:       // %bb.0:
-; CHECK-NOFP-SD-NEXT:    mov w8, #-8388609 // =0xff7fffff
-; CHECK-NOFP-SD-NEXT:    fmov s1, w8
+; CHECK-NOFP-SD-NEXT:    mvni v1.2s, #128, lsl #16
 ; CHECK-NOFP-SD-NEXT:    mov v0.s[3], v1.s[0]
 ; CHECK-NOFP-SD-NEXT:    fmaxnmv s0, v0.4s
 ; CHECK-NOFP-SD-NEXT:    ret
 ;
 ; CHECK-FP-SD-LABEL: test_v3f32_ninf:
 ; CHECK-FP-SD:       // %bb.0:
-; CHECK-FP-SD-NEXT:    mov w8, #-8388609 // =0xff7fffff
-; CHECK-FP-SD-NEXT:    fmov s1, w8
+; CHECK-FP-SD-NEXT:    mvni v1.2s, #128, lsl #16
 ; CHECK-FP-SD-NEXT:    mov v0.s[3], v1.s[0]
 ; CHECK-FP-SD-NEXT:    fmaxnmv s0, v0.4s
 ; CHECK-FP-SD-NEXT:    ret
diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fmaximum.ll b/llvm/test/CodeGen/AArch64/vecreduce-fmaximum.ll
index 1d5b70796bdb1..fcb06fb6c9725 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-fmaximum.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-fmaximum.ll
@@ -438,16 +438,14 @@ define half @test_v11f16(<11 x half> %a) nounwind {
 define float @test_v3f32(<3 x float> %a) nounwind {
 ; CHECK-NOFP-SD-LABEL: test_v3f32:
 ; CHECK-NOFP-SD:       // %bb.0:
-; CHECK-NOFP-SD-NEXT:    mov w8, #-8388608 // =0xff800000
-; CHECK-NOFP-SD-NEXT:    fmov s1, w8
+; CHECK-NOFP-SD-NEXT:    mvni v1.2s, #127, msl #16
 ; CHECK-NOFP-SD-NEXT:    mov v0.s[3], v1.s[0]
 ; CHECK-NOFP-SD-NEXT:    fmaxv s0, v0.4s
 ; CHECK-NOFP-SD-NEXT:    ret
 ;
 ; CHECK-FP-SD-LABEL: test_v3f32:
 ; CHECK-FP-SD:       // %bb.0:
-; CHECK-FP-SD-NEXT:    mov w8, #-8388608 // =0xff800000
-; CHECK-FP-SD-NEXT:    fmov s1, w8
+; CHECK-FP-SD-NEXT:    mvni v1.2s, #127, msl #16
 ; CHECK-FP-SD-NEXT:    mov v0.s[3], v1.s[0]
 ; CHECK-FP-SD-NEXT:    fmaxv s0, v0.4s
 ; CHECK-FP-SD-NEXT:    ret
@@ -475,16 +473,14 @@ define float @test_v3f32(<3 x float> %a) nounwind {
 define float @test_v3f32_ninf(<3 x float> %a) nounwind {
 ; CHECK-NOFP-SD-LABEL: test_v3f32_ninf:
 ; CHECK-NOFP-SD:       // %bb.0:
-; CHECK-NOFP-SD-NEXT:    mov w8, #-8388609 // =0xff7fffff
-; CHECK-NOFP-SD-NEXT:    fmov s1, w8
+; CHECK-NOFP-SD-NEXT:    mvni v1.2s, #128, lsl #16
 ; CHECK-NOFP-SD-NEXT:    mov v0.s[3], v1.s[0]
 ; CHECK-NOFP-SD-NEXT:    fmaxv s0, v0.4s
 ; CHECK-NOFP-SD-NEXT:    ret
 ;
 ; CHECK-FP-SD-LABEL: test_v3f32_ninf:
 ; CHECK-FP-SD:       // %bb.0:
-; CHECK-FP-SD-NEXT:    mov w8, #-8388609 // =0xff7fffff
-; CHECK-FP-SD-NEXT:    fmov s1, w8
+; CHECK-FP-SD-NEXT:    mvni v1.2s, #128, lsl #16
 ; CHECK-FP-SD-NEXT:    mov v0.s[3], v1.s[0]
 ; CHECK-FP-SD-NEXT:    fmaxv s0, v0.4s
 ; CHECK-FP-SD-NEXT:    ret
diff --git a/llvm/unittests/Target/AArch64/AArch64SelectionDAGTest.cpp b/llvm/unittests/Target/AArch64/AArch64SelectionDAGTest.cpp
index 12b7763274f6c..35f34f8604437 100644
--- a/llvm/unittests/Target/AArch64/AArch64SelectionDAGTest.cpp
+++ b/llvm/unittests/Target/AArch64/AArch64SelectionDAGTest.cpp
@@ -549,158 +549,41 @@ TEST_F(AArch64SelectionDAGTest, ComputeKnownBits_UADDO_CARRY) {
 }
 
 // Piggy-backing on the AArch64 tests to verify SelectionDAG::computeKnownBits.
-// Attempt to FREEZE the MOV/MVN nodes to show that they can still be analysed.
+// Attempt to FREEZE the MOV nodes to show that they can still be analysed.
 TEST_F(AArch64SelectionDAGTest, ComputeKnownBits_MOVI) {
   SDLoc Loc;
-  auto IntSca32VT = MVT::i32;
-  auto Int8Vec8VT = MVT::v8i8;
+  auto IntSca64VT = MVT::i64;
   auto Int16Vec8VT = MVT::v16i8;
-  auto Int4Vec16VT = MVT::v4i16;
-  auto Int8Vec16VT = MVT::v8i16;
-  auto Int2Vec32VT = MVT::v2i32;
-  auto Int4Vec32VT = MVT::v4i32;
   auto IntVec64VT = MVT::v1i64;
   auto Int2Vec64VT = MVT::v2i64;
-  auto N165 = DAG->getConstant(0x000000A5, Loc, IntSca32VT);
   KnownBits Known;
 
-  auto OpMOVIedit64 = DAG->getNode(AArch64ISD::MOVIedit, Loc, IntVec64VT, N165);
-  Known = DAG->computeKnownBits(OpMOVIedit64);
+  auto N165 = DAG->getConstant(0xFF00FF0000FF00FF, Loc, IntSca64VT);
+  auto OpMOVI64 = DAG->getNode(AArch64ISD::MOVI, Loc, IntVec64VT, N165);
+  Known = DAG->computeKnownBits(OpMOVI64);
   EXPECT_EQ(Known.Zero, APInt(64, 0x00FF00FFFF00FF00));
   EXPECT_EQ(Known.One, APInt(64, 0xFF00FF0000FF00FF));
 
-  auto OpMOVIedit128 =
-      DAG->getNode(AArch64ISD::MOVIedit, Loc, Int2Vec64VT, N165);
-  Known = DAG->computeKnownBits(OpMOVIedit128);
+  auto OpMOVI128 = DAG->getNode(AArch64ISD::MOVI, Loc, Int2Vec64VT, N165);
+  Known = DAG->computeKnownBits(OpMOVI128);
   EXPECT_EQ(Known.Zero, APInt(64, 0x00FF00FFFF00FF00));
   EXPECT_EQ(Known.One, APInt(64, 0xFF00FF0000FF00FF));
 
-  auto FrMOVIedit128 = DAG->getFreeze(OpMOVIedit128);
-  Known = DAG->computeKnownBits(FrMOVIedit128);
+  auto FrMOVI128 = DAG->getFreeze(OpMOVI128);
+  Known = DAG->computeKnownBits(FrMOVI128);
   EXPECT_EQ(Known.Zero, APInt(64, 0x00FF00FFFF00FF00));
   EXPECT_EQ(Known.One, APInt(64, 0xFF00FF0000FF00FF));
 
-  auto N264 = DAG->getConstant(264, Loc, IntSca32VT);
-  auto OpMOVImsl64 =
-      DAG->getNode(AArch64ISD::MOVImsl, Loc, Int2Vec32VT, N165, N264);
-  Known = DAG->computeKnownBits(OpMOVImsl64);
-  EXPECT_EQ(Known.Zero, APInt(32, 0xFFFF5A00));
-  EXPECT_EQ(Known.One, APInt(32, 0x0000A5FF));
-
-  auto N272 = DAG->getConstant(272, Loc, IntSca32VT);
-  auto OpMOVImsl128 =
-      DAG->getNode(AArch64ISD::MOVImsl, Loc, Int4Vec32VT, N165, N272);
-  Known = DAG->computeKnownBits(OpMOVImsl128);
-  EXPECT_EQ(Known.Zero, APInt(32, 0xFF5A0000));
-  EXPECT_EQ(Known.One, APInt(32, 0x00A5FFFF));
-
-  auto FrMOVImsl128 = DAG->getFreeze(OpMOVImsl128);
-  Known = DAG->computeKnownBits(FrMOVImsl128);
-  EXPECT_EQ(Known.Zero, APInt(32, 0xFF5A0000));
-  EXPECT_EQ(Known.One, APInt(32, 0x00A5FFFF));
-
-  auto OpMVNImsl64 =
-      DAG->getNode(AArch64ISD::MVNImsl, Loc, Int2Vec32VT, N165, N272);
-  Known = DAG->computeKnownBits(OpMVNImsl64);
-  EXPECT_EQ(Known.Zero, APInt(32, 0x00A5FFFF));
-  EXPECT_EQ(Known.One, APInt(32, 0xFF5A0000));
-
-  auto OpMVNImsl128 =
-      DAG->getNode(AArch64ISD::MVNImsl, Loc, Int4Vec32VT, N165, N264);
-  Known = DAG->computeKnownBits(OpMVNImsl128);
-  EXPECT_EQ(Known.Zero, APInt(32, 0x0000A5FF));
-  EXPECT_EQ(Known.One, APInt(32, 0xFFFF5A00));
-
-  auto FrMVNImsl128 = DAG->getFreeze(OpMVNImsl128);
-  Known = DAG->computeKnownBits(FrMVNImsl128);
-  EXPECT_EQ(Known.Zero, APInt(32, 0x0000A5FF));
-  EXPECT_EQ(Known.One, APInt(32, 0xFFFF5A00));
-
-  auto N0 = DAG->getConstant(0, Loc, IntSca32VT);
-  auto OpMOVIshift2Vec32 =
-      DAG->getNode(AArch64ISD::MOVIshift, Loc, Int2Vec32VT, N165, N0);
-  Known = DAG->computeKnownBits(OpMOVIshift2Vec32);
-  EXPECT_EQ(Known.Zero, APInt(32, 0xFFFFFF5A));
-  EXPECT_EQ(Known.One, APInt(32, 0x000000A5));
-
-  auto N24 = DAG->getConstant(24, Loc, IntSca32VT);
-  auto OpMOVIshift4Vec32 =
-      DAG->getNode(AArch64ISD::MOVIshift, Loc, Int4Vec32VT, N165, N24);
-  Known = DAG->computeKnownBits(OpMOVIshift4Vec32);
-  EXPECT_EQ(Known.Zero, APInt(32, 0x5AFFFFFF));
-  EXPECT_EQ(Known.One, APInt(32, 0xA5000000));
-
-  auto FrMOVIshift4Vec32 = DAG->getFreeze(OpMOVIshift4Vec32);
-  Known = DAG->computeKnownBits(FrMOVIshift4Vec32);
-  EXPECT_EQ(Known.Zero, APInt(32, 0x5AFFFFFF));
-  EXPECT_EQ(Known.One, APInt(32, 0xA5000000));
-
-  auto OpMVNIshift2Vec32 =
-      DAG->getNode(AArch64ISD::MVNIshift, Loc, Int2Vec32VT, N165, N24);
-  Known = DAG->computeKnownBits(OpMVNIshift2Vec32);
-  EXPECT_EQ(Known.Zero, APInt(32, 0xA5000000));
-  EXPECT_EQ(Known.One, APInt(32, 0x5AFFFFFF));
-
-  auto OpMVNIshift4Vec32 =
-      DAG->getNode(AArch64ISD::MVNIshift, Loc, Int4Vec32VT, N165, N0);
-  Known = DAG->computeKnownBits(OpMVNIshift4Vec32);
-  EXPECT_EQ(Known.Zero, APInt(32, 0x000000A5));
-  EXPECT_EQ(Known.One, APInt(32, 0xFFFFFF5A));
-
-  auto FrMVNIshift4Vec32 = DAG->getFreeze(OpMVNIshift4Vec32);
-  Known = DAG->computeKnownBits(FrMVNIshift4Vec32);
-  EXPECT_EQ(Known.Zero, APInt(32, 0x000000A5));
-  EXPECT_EQ(Known.One, APInt(32, 0xFFFFFF5A));
-
-  auto N8 = DAG->getConstant(8, Loc, IntSca32VT);
-  auto OpMOVIshift4Vec16 =
-      DAG->getNode(AArch64ISD::MOVIshift, Loc, Int4Vec16VT, N165, N0);
-  Known = DAG->computeKnownBits(OpMOVIshift4Vec16);
-  EXPECT_EQ(Known.Zero, APInt(16, 0xFF5A));
-  EXPECT_EQ(Known.One, APInt(16, 0x00A5));
-
-  auto OpMOVIshift8Vec16 =
-      DAG->getNode(AArch64ISD::MOVIshift, Loc, Int8Vec16VT, N165, N8);
-  Known = DAG->computeKnownBits(OpMOVIshift8Vec16);
-  EXPECT_EQ(Known.Zero, APInt(16, 0x5AFF));
-  EXPECT_EQ(Known.One, APInt(16, 0xA500));
-
-  auto FrMOVIshift8Vec16 = DAG->getFreeze(OpMOVIshift8Vec16);
-  Known = DAG->computeKnownBits(FrMOVIshift8Vec16);
-  EXPECT_EQ(Known.Zero, APInt(16, 0x5AFF));
-  EXPECT_EQ(Known.One, APInt(16, 0xA500));
-
-  auto OpMVNIshift4Vec16 =
-      DAG->getNode(AArch64ISD::MVNIshift, Loc, Int4Vec16VT, N165, N8);
-  Known = DAG->computeKnownBits(OpMVNIshift4Vec16);
-  EXPECT_EQ(Known.Zero, APInt(16, 0xA500));
-  EXPECT_EQ(Known.One, APInt(16, 0x5AFF));
-
-  auto OpMVNIshift8Vec16 =
-      DAG->getNode(AArch64ISD::MVNIshift, Loc, Int8Vec16VT, N165, N0);
-  Known = DAG->computeKnownBits(OpMVNIshift8Vec16);
-  EXPECT_EQ(Known.Zero, APInt(16, 0x00A5));
-  EXPECT_EQ(Known.One, APInt(16, 0xFF5A));
-
-  auto FrMVNIshift8Vec16 = DAG->getFreeze(OpMVNIshift8Vec16);
-  Known = DAG->computeKnownBits(FrMVNIshift8Vec16);
-  EXPECT_EQ(Known.Zero, APInt(16, 0x00A5));
-  EXPECT_EQ(Known.One, APInt(16, 0xFF5A));
-
-  auto OpMOVI8Vec8 = DAG->getNode(AArch64ISD::MOVI, Loc, Int8Vec8VT, N165);
-  Known = DAG->computeKnownBits(OpMOVI8Vec8);
-  EXPECT_EQ(Known.Zero, APInt(8, 0x5A));
-  EXPECT_EQ(Known.One, APInt(8, 0xA5));
-
-  auto OpMOVI16Vec8 = DAG->getNode(AArch64ISD::MOVI, Loc, Int16Vec8VT, N165);
-  Known = DAG->computeKnownBits(OpMOVI16Vec8);
-  EXPECT_EQ(Known.Zero, APInt(8, 0x5A));
-  EXPECT_EQ(Known.One, APInt(8, 0xA5));
-
-  auto FrMOVI16Vec8 = DAG->getFreeze(OpMOVI16Vec8);
-  Known = DAG->computeKnownBits(FrMOVI16Vec8);
-  EXPECT_EQ(Known.Zero, APInt(8, 0x5A));
-  EXPECT_EQ(Known.One, APInt(8, 0xA5));
+  auto OpMOVI8 = DAG->getNode(AArch64ISD::MOVI, Loc, Int16Vec8VT, N165);
+  Known = DAG->computeKnownBits(OpMOVI8);
+  EXPECT_EQ(Known.Zero, APInt(8, 0x00));
+  EXPECT_EQ(Known.One, APInt(8, 0x00));
+
+  auto OneLow = DAG->getConstant(0x1133557799bbddff, Loc, IntSca64VT);
+  auto OpMOVI8OneLow = DAG->getNode(AArch64ISD::MOVI, Loc, Int16Vec8VT, OneLow);
+  Known = DAG->computeKnownBits(OpMOVI8OneLow);
+  EXPECT_EQ(Known.Zero, APInt(8, 0x00));
+  EXPECT_EQ(Known.One, APInt(8, 0x11));
 }
 
 // Piggy-backing on the AArch64 tests to verify SelectionDAG::computeKnownBits.

>From 29785d53a7f63b75868f46329a33d2b48295e73f Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Fri, 20 Mar 2026 17:04:34 +0000
Subject: [PATCH 2/3] Rebase and address comments / windows build failure.

---
 llvm/lib/Target/AArch64/AArch64ExpandImm.cpp  | 80 +++++++++----------
 .../Target/AArch64/AArch64ISelDAGToDAG.cpp    | 49 ++----------
 .../Target/AArch64/AArch64ISelLowering.cpp    |  7 +-
 3 files changed, 48 insertions(+), 88 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64ExpandImm.cpp b/llvm/lib/Target/AArch64/AArch64ExpandImm.cpp
index 9f89efebea9e1..818b207391e77 100644
--- a/llvm/lib/Target/AArch64/AArch64ExpandImm.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ExpandImm.cpp
@@ -736,106 +736,106 @@ bool AArch64_IMM::expandVectorMOVImm(
       return true;
     }
     if (AArch64_AM::isAdvSIMDModImmType10(Value)) {
-      Insn.push_back({Is64Bit ? AArch64::MOVID : AArch64::MOVIv2d_ns,
-                      AArch64_AM::encodeAdvSIMDModImmType10(Value), 0});
+      unsigned Opc = Is64Bit ? AArch64::MOVID : AArch64::MOVIv2d_ns;
+      Insn.push_back({Opc, AArch64_AM::encodeAdvSIMDModImmType10(Value), 0});
       return true;
     }
     if (AArch64_AM::isAdvSIMDModImmType1(Value)) {
-      Insn.push_back({Is64Bit ? AArch64::MOVIv2i32 : AArch64::MOVIv4i32,
-                      AArch64_AM::encodeAdvSIMDModImmType1(Value), 0});
+      unsigned Opc = Is64Bit ? AArch64::MOVIv2i32 : AArch64::MOVIv4i32;
+      Insn.push_back({Opc, AArch64_AM::encodeAdvSIMDModImmType1(Value), 0});
       return true;
     }
     if (AArch64_AM::isAdvSIMDModImmType2(Value)) {
-      Insn.push_back({Is64Bit ? AArch64::MOVIv2i32 : AArch64::MOVIv4i32,
-                      AArch64_AM::encodeAdvSIMDModImmType2(Value), 8});
+      unsigned Opc = Is64Bit ? AArch64::MOVIv2i32 : AArch64::MOVIv4i32;
+      Insn.push_back({Opc, AArch64_AM::encodeAdvSIMDModImmType2(Value), 8});
       return true;
     }
     if (AArch64_AM::isAdvSIMDModImmType3(Value)) {
-      Insn.push_back({Is64Bit ? AArch64::MOVIv2i32 : AArch64::MOVIv4i32,
-                      AArch64_AM::encodeAdvSIMDModImmType3(Value), 16});
+      unsigned Opc = Is64Bit ? AArch64::MOVIv2i32 : AArch64::MOVIv4i32;
+      Insn.push_back({Opc, AArch64_AM::encodeAdvSIMDModImmType3(Value), 16});
       return true;
     }
     if (AArch64_AM::isAdvSIMDModImmType4(Value)) {
-      Insn.push_back({Is64Bit ? AArch64::MOVIv2i32 : AArch64::MOVIv4i32,
-                      AArch64_AM::encodeAdvSIMDModImmType4(Value), 24});
+      unsigned Opc = Is64Bit ? AArch64::MOVIv2i32 : AArch64::MOVIv4i32;
+      Insn.push_back({Opc, AArch64_AM::encodeAdvSIMDModImmType4(Value), 24});
       return true;
     }
     if (AArch64_AM::isAdvSIMDModImmType5(Value)) {
-      Insn.push_back({Is64Bit ? AArch64::MOVIv4i16 : AArch64::MOVIv8i16,
-                      AArch64_AM::encodeAdvSIMDModImmType5(Value), 0});
+      unsigned Opc = Is64Bit ? AArch64::MOVIv4i16 : AArch64::MOVIv8i16;
+      Insn.push_back({Opc, AArch64_AM::encodeAdvSIMDModImmType5(Value), 0});
       return true;
     }
     if (AArch64_AM::isAdvSIMDModImmType6(Value)) {
-      Insn.push_back({Is64Bit ? AArch64::MOVIv4i16 : AArch64::MOVIv8i16,
-                      AArch64_AM::encodeAdvSIMDModImmType6(Value), 8});
+      unsigned Opc = Is64Bit ? AArch64::MOVIv4i16 : AArch64::MOVIv8i16;
+      Insn.push_back({Opc, AArch64_AM::encodeAdvSIMDModImmType6(Value), 8});
       return true;
     }
     if (AArch64_AM::isAdvSIMDModImmType7(Value)) {
-      Insn.push_back({Is64Bit ? AArch64::MOVIv2s_msl : AArch64::MOVIv4s_msl,
-                      AArch64_AM::encodeAdvSIMDModImmType7(Value), 264});
+      unsigned Opc = Is64Bit ? AArch64::MOVIv2s_msl : AArch64::MOVIv4s_msl;
+      Insn.push_back({Opc, AArch64_AM::encodeAdvSIMDModImmType7(Value), 264});
       return true;
     }
     if (AArch64_AM::isAdvSIMDModImmType8(Value)) {
-      Insn.push_back({Is64Bit ? AArch64::MOVIv2s_msl : AArch64::MOVIv4s_msl,
-                      AArch64_AM::encodeAdvSIMDModImmType8(Value), 272});
+      unsigned Opc = Is64Bit ? AArch64::MOVIv2s_msl : AArch64::MOVIv4s_msl;
+      Insn.push_back({Opc, AArch64_AM::encodeAdvSIMDModImmType8(Value), 272});
       return true;
     }
     if (AArch64_AM::isAdvSIMDModImmType9(Value)) {
-      Insn.push_back({Is64Bit ? AArch64::MOVIv8b_ns : AArch64::MOVIv16b_ns,
-                      AArch64_AM::encodeAdvSIMDModImmType9(Value), 0});
+      unsigned Opc = Is64Bit ? AArch64::MOVIv8b_ns : AArch64::MOVIv16b_ns;
+      Insn.push_back({Opc, AArch64_AM::encodeAdvSIMDModImmType9(Value), 0});
       return true;
     }
     if (AArch64_AM::isAdvSIMDModImmType11(Value)) {
-      Insn.push_back({Is64Bit ? AArch64::FMOVv2f32_ns : AArch64::FMOVv4f32_ns,
-                      AArch64_AM::encodeAdvSIMDModImmType11(Value), 0});
+      unsigned Opc = Is64Bit ? AArch64::FMOVv2f32_ns : AArch64::FMOVv4f32_ns;
+      Insn.push_back({Opc, AArch64_AM::encodeAdvSIMDModImmType11(Value), 0});
       return true;
     }
     if (AArch64_AM::isAdvSIMDModImmType12(Value)) {
-      Insn.push_back({Is64Bit ? AArch64::FMOVDi : AArch64::FMOVv2f64_ns,
-                      AArch64_AM::encodeAdvSIMDModImmType12(Value), 0});
+      unsigned Opc = Is64Bit ? AArch64::FMOVDi : AArch64::FMOVv2f64_ns;
+      Insn.push_back({Opc, AArch64_AM::encodeAdvSIMDModImmType12(Value), 0});
       return true;
     }
 
     APInt NotImm = ~Imm;
     Value = NotImm.trunc(64).getZExtValue();
     if (AArch64_AM::isAdvSIMDModImmType1(Value)) {
-      Insn.push_back({Is64Bit ? AArch64::MVNIv2i32 : AArch64::MVNIv4i32,
-                      AArch64_AM::encodeAdvSIMDModImmType1(Value), 0});
+      unsigned Opc = Is64Bit ? AArch64::MVNIv2i32 : AArch64::MVNIv4i32;
+      Insn.push_back({Opc, AArch64_AM::encodeAdvSIMDModImmType1(Value), 0});
       return true;
     }
     if (AArch64_AM::isAdvSIMDModImmType2(Value)) {
-      Insn.push_back({Is64Bit ? AArch64::MVNIv2i32 : AArch64::MVNIv4i32,
-                      AArch64_AM::encodeAdvSIMDModImmType2(Value), 8});
+      unsigned Opc = Is64Bit ? AArch64::MVNIv2i32 : AArch64::MVNIv4i32;
+      Insn.push_back({Opc, AArch64_AM::encodeAdvSIMDModImmType2(Value), 8});
       return true;
     }
     if (AArch64_AM::isAdvSIMDModImmType3(Value)) {
-      Insn.push_back({Is64Bit ? AArch64::MVNIv2i32 : AArch64::MVNIv4i32,
-                      AArch64_AM::encodeAdvSIMDModImmType3(Value), 16});
+      unsigned Opc = Is64Bit ? AArch64::MVNIv2i32 : AArch64::MVNIv4i32;
+      Insn.push_back({Opc, AArch64_AM::encodeAdvSIMDModImmType3(Value), 16});
       return true;
     }
     if (AArch64_AM::isAdvSIMDModImmType4(Value)) {
-      Insn.push_back({Is64Bit ? AArch64::MVNIv2i32 : AArch64::MVNIv4i32,
-                      AArch64_AM::encodeAdvSIMDModImmType4(Value), 24});
+      unsigned Opc = Is64Bit ? AArch64::MVNIv2i32 : AArch64::MVNIv4i32;
+      Insn.push_back({Opc, AArch64_AM::encodeAdvSIMDModImmType4(Value), 24});
       return true;
     }
     if (AArch64_AM::isAdvSIMDModImmType5(Value)) {
-      Insn.push_back({Is64Bit ? AArch64::MVNIv4i16 : AArch64::MVNIv8i16,
-                      AArch64_AM::encodeAdvSIMDModImmType5(Value), 0});
+      unsigned Opc = Is64Bit ? AArch64::MVNIv4i16 : AArch64::MVNIv8i16;
+      Insn.push_back({Opc, AArch64_AM::encodeAdvSIMDModImmType5(Value), 0});
       return true;
     }
     if (AArch64_AM::isAdvSIMDModImmType6(Value)) {
-      Insn.push_back({Is64Bit ? AArch64::MVNIv4i16 : AArch64::MVNIv8i16,
-                      AArch64_AM::encodeAdvSIMDModImmType6(Value), 8});
+      unsigned Opc = Is64Bit ? AArch64::MVNIv4i16 : AArch64::MVNIv8i16;
+      Insn.push_back({Opc, AArch64_AM::encodeAdvSIMDModImmType6(Value), 8});
       return true;
     }
     if (AArch64_AM::isAdvSIMDModImmType7(Value)) {
-      Insn.push_back({Is64Bit ? AArch64::MVNIv2s_msl : AArch64::MVNIv4s_msl,
-                      AArch64_AM::encodeAdvSIMDModImmType7(Value), 264});
+      unsigned Opc = Is64Bit ? AArch64::MVNIv2s_msl : AArch64::MVNIv4s_msl;
+      Insn.push_back({Opc, AArch64_AM::encodeAdvSIMDModImmType7(Value), 264});
       return true;
     }
     if (AArch64_AM::isAdvSIMDModImmType8(Value)) {
-      Insn.push_back({Is64Bit ? AArch64::MVNIv2s_msl : AArch64::MVNIv4s_msl,
-                      AArch64_AM::encodeAdvSIMDModImmType8(Value), 272});
+      unsigned Opc = Is64Bit ? AArch64::MVNIv2s_msl : AArch64::MVNIv4s_msl;
+      Insn.push_back({Opc, AArch64_AM::encodeAdvSIMDModImmType8(Value), 272});
       return true;
     }
   }
diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index c4e007e802d5a..c283d69a3d938 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -607,51 +607,12 @@ static bool isIntImmediateEq(SDValue N, const uint64_t ImmExpected) {
 }
 #endif
 
-static APInt DecodeFMOVImm(uint64_t Imm, unsigned RegWidth) {
-  assert(RegWidth == 32 || RegWidth == 64);
-  if (RegWidth == 32)
-    return APInt(RegWidth,
-                 uint32_t(AArch64_AM::decodeAdvSIMDModImmType11(Imm)));
-  return APInt(RegWidth, AArch64_AM::decodeAdvSIMDModImmType12(Imm));
-}
-
-// Decodes the raw integer splat value from a NEON splat operation.
-static std::optional<APInt> DecodeNEONSplat(SDValue N) {
-  assert(N.getValueType().isInteger() && "Only integers are supported");
-  if (N->getOpcode() == AArch64ISD::NVCAST)
-    N = N->getOperand(0);
-  unsigned SplatWidth = N.getScalarValueSizeInBits();
-  if (N.getOpcode() == AArch64ISD::FMOV)
-    return DecodeFMOVImm(N.getConstantOperandVal(0), SplatWidth);
-  if (N->getOpcode() == AArch64ISD::MOVI)
-    return APInt(SplatWidth, N.getConstantOperandVal(0));
-  if (N->getOpcode() == AArch64ISD::MOVIshift)
-    return APInt(SplatWidth, N.getConstantOperandVal(0)
-                                 << N.getConstantOperandVal(1));
-  if (N->getOpcode() == AArch64ISD::MVNIshift)
-    return ~APInt(SplatWidth, N.getConstantOperandVal(0)
-                                  << N.getConstantOperandVal(1));
-  if (N->getOpcode() == AArch64ISD::MOVIedit)
-    return APInt(SplatWidth, AArch64_AM::decodeAdvSIMDModImmType10(
-                                 N.getConstantOperandVal(0)));
-  if (N->getOpcode() == AArch64ISD::DUP)
-    if (auto *Const = dyn_cast<ConstantSDNode>(N->getOperand(0)))
-      return Const->getAPIntValue().trunc(SplatWidth);
-  // TODO: Recognize more splat-like NEON operations. See ConstantBuildVector
-  // in AArch64ISelLowering.
-  return std::nullopt;
-}
-
-// If \p N is a NEON splat operation (movi, fmov, etc), return the splat value
-// matching the element size of N.
+// Decodes the integer splat value from a NEON splat operation.
 static std::optional<APInt> GetNEONSplatValue(SDValue N) {
-  unsigned SplatWidth = N.getScalarValueSizeInBits();
-  if (std::optional<APInt> SplatVal = DecodeNEONSplat(N)) {
-    if (SplatVal->getBitWidth() <= SplatWidth)
-      return APInt::getSplat(SplatWidth, *SplatVal);
-    if (SplatVal->isSplat(SplatWidth))
-      return SplatVal->trunc(SplatWidth);
-  }
+  unsigned RegWidth = N.getScalarValueSizeInBits();
+  if (N.getOpcode() == AArch64ISD::MOVI &&
+      N.getConstantOperandAPInt(0).isSplat(RegWidth))
+    return N.getConstantOperandAPInt(0).trunc(RegWidth);
   return std::nullopt;
 }
 
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 8a1a9da8cbaa8..fe6bbf1e0e0bd 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -13269,7 +13269,7 @@ bool AArch64TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
   const APInt ImmInt = Imm.bitcastToAPInt();
 
   if (!IsLegal && ImmInt.getBitWidth() <= 128) {
-    // Try duplicating it to all lanes and see if we can usea vector movi.
+    // Try duplicating it to all lanes and see if we can use a vector movi.
     APInt DefBits =
         ImmInt.getBitWidth() == 128 ? ImmInt : APInt::getSplat(64, ImmInt);
     SmallVector<AArch64_IMM::ImmInsnModel> Insn;
@@ -33086,9 +33086,8 @@ bool AArch64TargetLowering::canCreateUndefOrPoisonForTargetNode(
 bool AArch64TargetLowering::isTargetCanonicalConstantNode(SDValue Op) const {
   return Op.getOpcode() == AArch64ISD::DUP ||
          Op.getOpcode() == AArch64ISD::MOVI ||
-         // Ignoring fneg(movi(0)), because if it is folded to FPConstant(-0.0),
-         // ISel will select fmov(mov i64 0x8000000000000000), resulting in a
-         // fmov from fpr to gpr, which is more expensive than fneg(movi(0))
+         // ConstantBuildVector / TryWithFNeg may represent a negated constant
+         // as fneg(movi).
          (Op.getOpcode() == ISD::FNEG &&
           Op.getOperand(0).getOpcode() == AArch64ISD::MOVI) ||
          (Op.getOpcode() == ISD::EXTRACT_SUBVECTOR &&

>From 4201ee04cf253e4affd94228ad90204e6b74e00e Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Wed, 1 Apr 2026 14:19:17 +0100
Subject: [PATCH 3/3] Rebase and use optional for operands to simplify instrn
 generation

---
 llvm/lib/Target/AArch64/AArch64ExpandImm.cpp  | 16 +++++---
 llvm/lib/Target/AArch64/AArch64ExpandImm.h    |  4 +-
 .../AArch64/AArch64ExpandPseudoInsts.cpp      | 35 +++++++++---------
 .../Target/AArch64/AArch64ISelDAGToDAG.cpp    | 23 ++++++------
 .../GISel/AArch64InstructionSelector.cpp      | 37 +++++++++----------
 5 files changed, 58 insertions(+), 57 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64ExpandImm.cpp b/llvm/lib/Target/AArch64/AArch64ExpandImm.cpp
index 818b207391e77..da5ab84be76c7 100644
--- a/llvm/lib/Target/AArch64/AArch64ExpandImm.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ExpandImm.cpp
@@ -732,12 +732,13 @@ bool AArch64_IMM::expandVectorMOVImm(
   if (ST->isNeonAvailable() && Imm.getHiBits(64) == Imm.getLoBits(64)) {
     uint64_t Value = Imm.trunc(64).getZExtValue();
     if (Value == 0) {
-      Insn.push_back({AArch64::FMOVD0, 0, 0});
+      Insn.push_back({AArch64::FMOVD0, std::nullopt, std::nullopt});
       return true;
     }
     if (AArch64_AM::isAdvSIMDModImmType10(Value)) {
       unsigned Opc = Is64Bit ? AArch64::MOVID : AArch64::MOVIv2d_ns;
-      Insn.push_back({Opc, AArch64_AM::encodeAdvSIMDModImmType10(Value), 0});
+      Insn.push_back(
+          {Opc, AArch64_AM::encodeAdvSIMDModImmType10(Value), std::nullopt});
       return true;
     }
     if (AArch64_AM::isAdvSIMDModImmType1(Value)) {
@@ -782,17 +783,20 @@ bool AArch64_IMM::expandVectorMOVImm(
     }
     if (AArch64_AM::isAdvSIMDModImmType9(Value)) {
       unsigned Opc = Is64Bit ? AArch64::MOVIv8b_ns : AArch64::MOVIv16b_ns;
-      Insn.push_back({Opc, AArch64_AM::encodeAdvSIMDModImmType9(Value), 0});
+      Insn.push_back(
+          {Opc, AArch64_AM::encodeAdvSIMDModImmType9(Value), std::nullopt});
       return true;
     }
     if (AArch64_AM::isAdvSIMDModImmType11(Value)) {
       unsigned Opc = Is64Bit ? AArch64::FMOVv2f32_ns : AArch64::FMOVv4f32_ns;
-      Insn.push_back({Opc, AArch64_AM::encodeAdvSIMDModImmType11(Value), 0});
+      Insn.push_back(
+          {Opc, AArch64_AM::encodeAdvSIMDModImmType11(Value), std::nullopt});
       return true;
     }
     if (AArch64_AM::isAdvSIMDModImmType12(Value)) {
       unsigned Opc = Is64Bit ? AArch64::FMOVDi : AArch64::FMOVv2f64_ns;
-      Insn.push_back({Opc, AArch64_AM::encodeAdvSIMDModImmType12(Value), 0});
+      Insn.push_back(
+          {Opc, AArch64_AM::encodeAdvSIMDModImmType12(Value), std::nullopt});
       return true;
     }
 
@@ -852,7 +856,7 @@ bool AArch64_IMM::expandVectorMOVImm(
     }
     uint64_t Encoding;
     if (AArch64_AM::isSVELogicalImm(64, Val64.getZExtValue(), Encoding)) {
-      Insn.push_back({AArch64::DUPM_ZI, Encoding, 0});
+      Insn.push_back({AArch64::DUPM_ZI, Encoding, std::nullopt});
       return true;
     }
   }
diff --git a/llvm/lib/Target/AArch64/AArch64ExpandImm.h b/llvm/lib/Target/AArch64/AArch64ExpandImm.h
index d50cf3723cfc6..73390454e4184 100644
--- a/llvm/lib/Target/AArch64/AArch64ExpandImm.h
+++ b/llvm/lib/Target/AArch64/AArch64ExpandImm.h
@@ -24,8 +24,8 @@ namespace AArch64_IMM {
 
 struct ImmInsnModel {
   unsigned Opcode;
-  uint64_t Op1;
-  uint64_t Op2;
+  std::optional<uint32_t> Op1;
+  std::optional<uint32_t> Op2;
 };
 
 void expandMOVImm(uint64_t Imm, unsigned BitSize,
diff --git a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
index 4ab8adeb2c9bc..d528742da8c11 100644
--- a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
@@ -177,7 +177,7 @@ bool AArch64ExpandPseudoImpl::expandMOVImm(MachineBasicBlock &MBB,
         MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
                            .add(MI.getOperand(0))
                            .addReg(BitSize == 32 ? AArch64::WZR : AArch64::XZR)
-                           .addImm(I->Op2));
+                           .addImm(*I->Op2));
       } else {
         Register DstReg = MI.getOperand(0).getReg();
         bool DstIsDead = MI.getOperand(0).isDead();
@@ -187,7 +187,7 @@ bool AArch64ExpandPseudoImpl::expandMOVImm(MachineBasicBlock &MBB,
                                     getDeadRegState(DstIsDead && LastItem) |
                                     RenamableState)
                 .addReg(DstReg)
-                .addImm(I->Op2));
+                .addImm(*I->Op2));
       }
       break;
     case AArch64::EONXrs:
@@ -203,32 +203,33 @@ bool AArch64ExpandPseudoImpl::expandMOVImm(MachineBasicBlock &MBB,
                                   RenamableState)
               .addReg(DstReg)
               .addReg(DstReg)
-              .addImm(I->Op2));
+              .addImm(*I->Op2));
     } break;
     case AArch64::MOVNWi:
     case AArch64::MOVNXi:
     case AArch64::MOVZWi:
     case AArch64::MOVZXi: {
       bool DstIsDead = MI.getOperand(0).isDead();
-      MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
-        .addReg(DstReg, RegState::Define |
-                getDeadRegState(DstIsDead && LastItem) |
-                RenamableState)
-        .addImm(I->Op1)
-        .addImm(I->Op2));
+      MIBS.push_back(
+          BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
+              .addReg(DstReg, RegState::Define |
+                                  getDeadRegState(DstIsDead && LastItem) |
+                                  RenamableState)
+              .addImm(*I->Op1)
+              .addImm(*I->Op2));
       } break;
     case AArch64::MOVKWi:
     case AArch64::MOVKXi: {
       Register DstReg = MI.getOperand(0).getReg();
       bool DstIsDead = MI.getOperand(0).isDead();
-      MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
-        .addReg(DstReg,
-                RegState::Define |
-                getDeadRegState(DstIsDead && LastItem) |
-                RenamableState)
-        .addReg(DstReg)
-        .addImm(I->Op1)
-        .addImm(I->Op2));
+      MIBS.push_back(
+          BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
+              .addReg(DstReg, RegState::Define |
+                                  getDeadRegState(DstIsDead && LastItem) |
+                                  RenamableState)
+              .addReg(DstReg)
+              .addImm(*I->Op1)
+              .addImm(*I->Op2));
       } break;
     }
   }
diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index c283d69a3d938..e97263db19575 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -5056,10 +5056,6 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
         case AArch64::FMOVv4f32_ns:
         case AArch64::FMOVDi:
         case AArch64::FMOVv2f64_ns:
-          Src = CurDAG->getMachineNode(
-              Insn.Opcode, DL, FVT,
-              CurDAG->getTargetConstant(Insn.Op1, DL, MVT::i64));
-          break;
         case AArch64::MOVIv2i32:
         case AArch64::MOVIv4i32:
         case AArch64::MOVIv4i16:
@@ -5071,16 +5067,19 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
         case AArch64::MVNIv4i16:
         case AArch64::MVNIv8i16:
         case AArch64::MVNIv2s_msl:
-        case AArch64::MVNIv4s_msl:
-          Src = CurDAG->getMachineNode(
-              Insn.Opcode, DL, FVT,
-              CurDAG->getTargetConstant(Insn.Op1, DL, MVT::i64),
-              CurDAG->getTargetConstant(Insn.Op2, DL, MVT::i64));
+        case AArch64::MVNIv4s_msl: {
+          SmallVector<SDValue> Ops;
+          if (Insn.Op1)
+            Ops.push_back(CurDAG->getTargetConstant(*Insn.Op1, DL, MVT::i64));
+          if (Insn.Op2)
+            Ops.push_back(CurDAG->getTargetConstant(*Insn.Op2, DL, MVT::i64));
+          Src = CurDAG->getMachineNode(Insn.Opcode, DL, FVT, Ops);
           break;
+        }
         case AArch64::DUPM_ZI:
           Src = CurDAG->getMachineNode(
               Insn.Opcode, DL, MVT::nxv2f64,
-              CurDAG->getTargetConstant(Insn.Op1, DL, MVT::i64));
+              CurDAG->getTargetConstant(*Insn.Op1, DL, MVT::i64));
           Src = CurDAG
                     ->getTargetExtractSubreg(AArch64::zsub, DL, FVT,
                                              SDValue(Src, 0))
@@ -5089,8 +5088,8 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
         case AArch64::DUP_ZI_D:
           Src = CurDAG->getMachineNode(
               Insn.Opcode, DL, MVT::nxv2f64,
-              CurDAG->getTargetConstant(Insn.Op1, DL, MVT::i64),
-              CurDAG->getTargetConstant(Insn.Op2, DL, MVT::i64));
+              CurDAG->getTargetConstant(*Insn.Op1, DL, MVT::i64),
+              CurDAG->getTargetConstant(*Insn.Op2, DL, MVT::i64));
           Src = CurDAG
                     ->getTargetExtractSubreg(AArch64::zsub, DL, FVT,
                                              SDValue(Src, 0))
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index 97546adf94a8a..716789c404cad 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -5515,19 +5515,6 @@ AArch64InstructionSelector::emitConstantMOVVector(DstOp Dst, APInt Imm,
         }
         return &*Mov;
       }
-      case AArch64::MOVID:
-      case AArch64::MOVIv2d_ns:
-      case AArch64::MOVIv8b_ns:
-      case AArch64::MOVIv16b_ns:
-      case AArch64::FMOVv2f32_ns:
-      case AArch64::FMOVv4f32_ns:
-      case AArch64::FMOVDi:
-      case AArch64::FMOVv2f64_ns: {
-        auto Mov =
-            MIRBuilder.buildInstr(Insn.Opcode, {Dst}, {}).addImm(Insn.Op1);
-        constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI);
-        return &*Mov;
-      }
       case AArch64::MOVIv2i32:
       case AArch64::MOVIv4i32:
       case AArch64::MOVIv4i16:
@@ -5539,17 +5526,27 @@ AArch64InstructionSelector::emitConstantMOVVector(DstOp Dst, APInt Imm,
       case AArch64::MVNIv4i16:
       case AArch64::MVNIv8i16:
       case AArch64::MVNIv2s_msl:
-      case AArch64::MVNIv4s_msl: {
-        auto Mov = MIRBuilder.buildInstr(Insn.Opcode, {Dst}, {})
-                       .addImm(Insn.Op1)
-                       .addImm(Insn.Op2);
+      case AArch64::MVNIv4s_msl:
+      case AArch64::MOVID:
+      case AArch64::MOVIv2d_ns:
+      case AArch64::MOVIv8b_ns:
+      case AArch64::MOVIv16b_ns:
+      case AArch64::FMOVv2f32_ns:
+      case AArch64::FMOVv4f32_ns:
+      case AArch64::FMOVDi:
+      case AArch64::FMOVv2f64_ns: {
+        auto Mov = MIRBuilder.buildInstr(Insn.Opcode, {Dst}, {});
+        if (Insn.Op1)
+          Mov.addImm(*Insn.Op1);
+        if (Insn.Op2)
+          Mov.addImm(*Insn.Op2);
         constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI);
         return &*Mov;
       }
       case AArch64::DUPM_ZI: {
         auto Mov =
             MIRBuilder.buildInstr(Insn.Opcode, {&AArch64::ZPRRegClass}, {})
-                .addImm(Insn.Op1);
+                .addImm(*Insn.Op1);
         constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI);
         Mov = MIB.buildInstr(TargetOpcode::COPY, {Dst}, {})
                   .addReg(Mov.getReg(0), {},
@@ -5563,8 +5560,8 @@ AArch64InstructionSelector::emitConstantMOVVector(DstOp Dst, APInt Imm,
       case AArch64::DUP_ZI_D: {
         auto Mov =
             MIRBuilder.buildInstr(Insn.Opcode, {&AArch64::ZPRRegClass}, {})
-                .addImm(Insn.Op1)
-                .addImm(Insn.Op2);
+                .addImm(*Insn.Op1)
+                .addImm(*Insn.Op2);
         constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI);
         Mov = MIB.buildInstr(TargetOpcode::COPY, {Dst}, {})
                   .addReg(Mov.getReg(0), {},



More information about the llvm-commits mailing list