[llvm] [AArch64] MOVI lowering (PR #185526)
David Green via llvm-commits
llvm-commits at lists.llvm.org
Wed Apr 1 06:19:34 PDT 2026
https://github.com/davemgreen updated https://github.com/llvm/llvm-project/pull/185526
>From 413d21b35c37bad24ecdb8c902ce90d3fd131a5c Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Fri, 20 Mar 2026 16:36:20 +0000
Subject: [PATCH 1/3] [AArch64] MOVI lowering
This patch redoes how movi is lowered, adding a expandVectorMOVImm function
that, similar to expandMOVImm for scalar immediates returns how the vector
immediate can be lowered. This is used to create a single MOVI ISD node that
can be used for all immediates. We then use the same method to work out if a fp
constant can be lowered to a vector move.
One test, qmovin.ll, hits an unfortunate case where demanded-bits is now able
to remove an and as the bits are unused after a truncation, leading to not
being able to generate a BSP.
---
llvm/lib/Target/AArch64/AArch64Combine.td | 11 +-
llvm/lib/Target/AArch64/AArch64ExpandImm.cpp | 141 +-
llvm/lib/Target/AArch64/AArch64ExpandImm.h | 8 +-
.../Target/AArch64/AArch64ISelDAGToDAG.cpp | 120 +-
.../Target/AArch64/AArch64ISelLowering.cpp | 308 +--
.../lib/Target/AArch64/AArch64InstrFormats.td | 16 -
llvm/lib/Target/AArch64/AArch64InstrInfo.td | 111 +-
.../GISel/AArch64InstructionSelector.cpp | 514 ++---
.../GISel/AArch64PostLegalizerCombiner.cpp | 1 +
.../GISel/AArch64PostLegalizerLowering.cpp | 61 +
.../AArch64/GISel/AArch64RegisterBankInfo.cpp | 12 +-
.../GlobalISel/select-fp16-fconstant.mir | 6 +-
.../CodeGen/AArch64/arm64-build-vector.ll | 26 +-
.../test/CodeGen/AArch64/arm64-fp-imm-size.ll | 18 +-
llvm/test/CodeGen/AArch64/arm64-fp-imm.ll | 6 +-
llvm/test/CodeGen/AArch64/arm64-fp128.ll | 24 +-
.../CodeGen/AArch64/arm64-neon-aba-abd.ll | 9 +-
llvm/test/CodeGen/AArch64/bf16-imm.ll | 27 +-
.../test/CodeGen/AArch64/bf16-instructions.ll | 6 +-
llvm/test/CodeGen/AArch64/clmul-fixed.ll | 2037 ++++++++---------
llvm/test/CodeGen/AArch64/dup.ll | 9 +-
llvm/test/CodeGen/AArch64/f16-imm.ll | 36 +-
llvm/test/CodeGen/AArch64/f16-instructions.ll | 12 +-
llvm/test/CodeGen/AArch64/fabs-combine.ll | 19 +-
llvm/test/CodeGen/AArch64/fcvt-fixed.ll | 104 +-
llvm/test/CodeGen/AArch64/fdiv-const.ll | 36 +-
.../AArch64/fp-maximumnum-minimumnum.ll | 6 +-
.../test/CodeGen/AArch64/fptosi-sat-scalar.ll | 36 +-
.../test/CodeGen/AArch64/fptosi-sat-vector.ll | 48 +-
.../test/CodeGen/AArch64/fptoui-sat-scalar.ll | 7 +-
.../test/CodeGen/AArch64/fptoui-sat-vector.ll | 155 +-
llvm/test/CodeGen/AArch64/frem-power2.ll | 4 +-
.../half-precision-signof-no-assert.ll | 3 +-
llvm/test/CodeGen/AArch64/isinf.ll | 3 +-
llvm/test/CodeGen/AArch64/known-never-nan.ll | 9 +-
llvm/test/CodeGen/AArch64/movi64_sve.ll | 267 +--
.../AArch64/neon-bitwise-instructions.ll | 220 +-
.../AArch64/neon-compare-instructions.ll | 37 +-
llvm/test/CodeGen/AArch64/neon-mov.ll | 123 +-
llvm/test/CodeGen/AArch64/qmovn.ll | 8 +-
.../sve-fixed-length-insert-vector-elt.ll | 3 +-
llvm/test/CodeGen/AArch64/vecreduce-fadd.ll | 3 +-
.../vecreduce-fmax-legalization-nan.ll | 12 +-
.../AArch64/vecreduce-fmax-legalization.ll | 12 +-
.../CodeGen/AArch64/vecreduce-fmaximum.ll | 12 +-
.../AArch64/AArch64SelectionDAGTest.cpp | 155 +-
46 files changed, 2097 insertions(+), 2704 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td
index fe953a627939d..285b3ab7ea026 100644
--- a/llvm/lib/Target/AArch64/AArch64Combine.td
+++ b/llvm/lib/Target/AArch64/AArch64Combine.td
@@ -273,7 +273,16 @@ def build_vector_to_vector_insert : GICombineRule<
(apply [{ applyLowerBuildToInsertVecElt(*${root}, MRI, B); }])
>;
-def build_vector_lowering : GICombineGroup<[build_vector_to_dup,
+def const_build_vector_to_fneg_matchdata : GIDefMatchData<"std::pair<APInt, unsigned>">;
+def const_build_vector_to_fneg : GICombineRule<
+ (defs root:$root, const_build_vector_to_fneg_matchdata:$matchinfo),
+ (match (wip_match_opcode G_BUILD_VECTOR):$root,
+ [{ return matchConstBuildVectorToFNeg(*${root}, MRI, STI, ${matchinfo}); }]),
+ (apply [{ applyConstBuildVectorToFNeg(*${root}, MRI, B, ${matchinfo}); }])
+>;
+
+def build_vector_lowering : GICombineGroup<[const_build_vector_to_fneg,
+ build_vector_to_dup,
build_vector_to_vector_insert]>;
def lower_vector_fcmp : GICombineRule<
diff --git a/llvm/lib/Target/AArch64/AArch64ExpandImm.cpp b/llvm/lib/Target/AArch64/AArch64ExpandImm.cpp
index f44cb8a0628d7..9f89efebea9e1 100644
--- a/llvm/lib/Target/AArch64/AArch64ExpandImm.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ExpandImm.cpp
@@ -10,8 +10,9 @@
//
//===----------------------------------------------------------------------===//
-#include "AArch64.h"
#include "AArch64ExpandImm.h"
+#include "AArch64.h"
+#include "AArch64Subtarget.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
using namespace llvm;
@@ -720,3 +721,141 @@ void AArch64_IMM::expandMOVImm(uint64_t Imm, unsigned BitSize,
// four-instruction sequence.
expandMOVImmSimple(Imm, BitSize, OneChunks, ZeroChunks, Insn);
}
+
+bool AArch64_IMM::expandVectorMOVImm(
+ APInt Imm, const AArch64Subtarget *ST,
+ SmallVectorImpl<AArch64_IMM::ImmInsnModel> &Insn) {
+ assert((Imm.getBitWidth() == 64 || Imm.getBitWidth() == 128) &&
+ "Expected vector sized constant");
+ bool Is64Bit = Imm.getBitWidth() == 64;
+
+ if (ST->isNeonAvailable() && Imm.getHiBits(64) == Imm.getLoBits(64)) {
+ uint64_t Value = Imm.trunc(64).getZExtValue();
+ if (Value == 0) {
+ Insn.push_back({AArch64::FMOVD0, 0, 0});
+ return true;
+ }
+ if (AArch64_AM::isAdvSIMDModImmType10(Value)) {
+ Insn.push_back({Is64Bit ? AArch64::MOVID : AArch64::MOVIv2d_ns,
+ AArch64_AM::encodeAdvSIMDModImmType10(Value), 0});
+ return true;
+ }
+ if (AArch64_AM::isAdvSIMDModImmType1(Value)) {
+ Insn.push_back({Is64Bit ? AArch64::MOVIv2i32 : AArch64::MOVIv4i32,
+ AArch64_AM::encodeAdvSIMDModImmType1(Value), 0});
+ return true;
+ }
+ if (AArch64_AM::isAdvSIMDModImmType2(Value)) {
+ Insn.push_back({Is64Bit ? AArch64::MOVIv2i32 : AArch64::MOVIv4i32,
+ AArch64_AM::encodeAdvSIMDModImmType2(Value), 8});
+ return true;
+ }
+ if (AArch64_AM::isAdvSIMDModImmType3(Value)) {
+ Insn.push_back({Is64Bit ? AArch64::MOVIv2i32 : AArch64::MOVIv4i32,
+ AArch64_AM::encodeAdvSIMDModImmType3(Value), 16});
+ return true;
+ }
+ if (AArch64_AM::isAdvSIMDModImmType4(Value)) {
+ Insn.push_back({Is64Bit ? AArch64::MOVIv2i32 : AArch64::MOVIv4i32,
+ AArch64_AM::encodeAdvSIMDModImmType4(Value), 24});
+ return true;
+ }
+ if (AArch64_AM::isAdvSIMDModImmType5(Value)) {
+ Insn.push_back({Is64Bit ? AArch64::MOVIv4i16 : AArch64::MOVIv8i16,
+ AArch64_AM::encodeAdvSIMDModImmType5(Value), 0});
+ return true;
+ }
+ if (AArch64_AM::isAdvSIMDModImmType6(Value)) {
+ Insn.push_back({Is64Bit ? AArch64::MOVIv4i16 : AArch64::MOVIv8i16,
+ AArch64_AM::encodeAdvSIMDModImmType6(Value), 8});
+ return true;
+ }
+ if (AArch64_AM::isAdvSIMDModImmType7(Value)) {
+ Insn.push_back({Is64Bit ? AArch64::MOVIv2s_msl : AArch64::MOVIv4s_msl,
+ AArch64_AM::encodeAdvSIMDModImmType7(Value), 264});
+ return true;
+ }
+ if (AArch64_AM::isAdvSIMDModImmType8(Value)) {
+ Insn.push_back({Is64Bit ? AArch64::MOVIv2s_msl : AArch64::MOVIv4s_msl,
+ AArch64_AM::encodeAdvSIMDModImmType8(Value), 272});
+ return true;
+ }
+ if (AArch64_AM::isAdvSIMDModImmType9(Value)) {
+ Insn.push_back({Is64Bit ? AArch64::MOVIv8b_ns : AArch64::MOVIv16b_ns,
+ AArch64_AM::encodeAdvSIMDModImmType9(Value), 0});
+ return true;
+ }
+ if (AArch64_AM::isAdvSIMDModImmType11(Value)) {
+ Insn.push_back({Is64Bit ? AArch64::FMOVv2f32_ns : AArch64::FMOVv4f32_ns,
+ AArch64_AM::encodeAdvSIMDModImmType11(Value), 0});
+ return true;
+ }
+ if (AArch64_AM::isAdvSIMDModImmType12(Value)) {
+ Insn.push_back({Is64Bit ? AArch64::FMOVDi : AArch64::FMOVv2f64_ns,
+ AArch64_AM::encodeAdvSIMDModImmType12(Value), 0});
+ return true;
+ }
+
+ APInt NotImm = ~Imm;
+ Value = NotImm.trunc(64).getZExtValue();
+ if (AArch64_AM::isAdvSIMDModImmType1(Value)) {
+ Insn.push_back({Is64Bit ? AArch64::MVNIv2i32 : AArch64::MVNIv4i32,
+ AArch64_AM::encodeAdvSIMDModImmType1(Value), 0});
+ return true;
+ }
+ if (AArch64_AM::isAdvSIMDModImmType2(Value)) {
+ Insn.push_back({Is64Bit ? AArch64::MVNIv2i32 : AArch64::MVNIv4i32,
+ AArch64_AM::encodeAdvSIMDModImmType2(Value), 8});
+ return true;
+ }
+ if (AArch64_AM::isAdvSIMDModImmType3(Value)) {
+ Insn.push_back({Is64Bit ? AArch64::MVNIv2i32 : AArch64::MVNIv4i32,
+ AArch64_AM::encodeAdvSIMDModImmType3(Value), 16});
+ return true;
+ }
+ if (AArch64_AM::isAdvSIMDModImmType4(Value)) {
+ Insn.push_back({Is64Bit ? AArch64::MVNIv2i32 : AArch64::MVNIv4i32,
+ AArch64_AM::encodeAdvSIMDModImmType4(Value), 24});
+ return true;
+ }
+ if (AArch64_AM::isAdvSIMDModImmType5(Value)) {
+ Insn.push_back({Is64Bit ? AArch64::MVNIv4i16 : AArch64::MVNIv8i16,
+ AArch64_AM::encodeAdvSIMDModImmType5(Value), 0});
+ return true;
+ }
+ if (AArch64_AM::isAdvSIMDModImmType6(Value)) {
+ Insn.push_back({Is64Bit ? AArch64::MVNIv4i16 : AArch64::MVNIv8i16,
+ AArch64_AM::encodeAdvSIMDModImmType6(Value), 8});
+ return true;
+ }
+ if (AArch64_AM::isAdvSIMDModImmType7(Value)) {
+ Insn.push_back({Is64Bit ? AArch64::MVNIv2s_msl : AArch64::MVNIv4s_msl,
+ AArch64_AM::encodeAdvSIMDModImmType7(Value), 264});
+ return true;
+ }
+ if (AArch64_AM::isAdvSIMDModImmType8(Value)) {
+ Insn.push_back({Is64Bit ? AArch64::MVNIv2s_msl : AArch64::MVNIv4s_msl,
+ AArch64_AM::encodeAdvSIMDModImmType8(Value), 272});
+ return true;
+ }
+ }
+
+ // TODO: We should be able to support 64-bit destinations too
+ if (ST->hasSVE() && !Is64Bit && Imm.getHiBits(64) == Imm.getLoBits(64)) {
+ // See if we can make use of the SVE dup instruction.
+ APInt Val64 = Imm.trunc(64);
+ int32_t ImmVal, ShiftVal;
+ if (AArch64_AM::isSVECpyDupImm(64, Val64.getSExtValue(), ImmVal,
+ ShiftVal)) {
+ Insn.push_back({AArch64::DUP_ZI_D, (uint64_t)ImmVal, (uint64_t)ShiftVal});
+ return true;
+ }
+ uint64_t Encoding;
+ if (AArch64_AM::isSVELogicalImm(64, Val64.getZExtValue(), Encoding)) {
+ Insn.push_back({AArch64::DUPM_ZI, Encoding, 0});
+ return true;
+ }
+ }
+
+ return false;
+}
\ No newline at end of file
diff --git a/llvm/lib/Target/AArch64/AArch64ExpandImm.h b/llvm/lib/Target/AArch64/AArch64ExpandImm.h
index 42c97d2c3e9b5..d50cf3723cfc6 100644
--- a/llvm/lib/Target/AArch64/AArch64ExpandImm.h
+++ b/llvm/lib/Target/AArch64/AArch64ExpandImm.h
@@ -13,10 +13,13 @@
#ifndef LLVM_LIB_TARGET_AARCH64_MCTARGETDESC_AARCH64EXPANDIMM_H
#define LLVM_LIB_TARGET_AARCH64_MCTARGETDESC_AARCH64EXPANDIMM_H
+#include "llvm/ADT/APInt.h"
#include "llvm/ADT/SmallVector.h"
namespace llvm {
+class AArch64Subtarget;
+
namespace AArch64_IMM {
struct ImmInsnModel {
@@ -26,7 +29,10 @@ struct ImmInsnModel {
};
void expandMOVImm(uint64_t Imm, unsigned BitSize,
- SmallVectorImpl<ImmInsnModel> &Insn);
+ SmallVectorImpl<ImmInsnModel> &Insn);
+
+bool expandVectorMOVImm(APInt Imm, const AArch64Subtarget *ST,
+ SmallVectorImpl<ImmInsnModel> &Insn);
} // end namespace AArch64_IMM
diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index 9d3e707cccaaa..c4e007e802d5a 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -10,6 +10,7 @@
//
//===----------------------------------------------------------------------===//
+#include "AArch64ExpandImm.h"
#include "AArch64MachineFunctionInfo.h"
#include "AArch64TargetMachine.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
@@ -193,10 +194,11 @@ class AArch64DAGToDAGISel : public SelectionDAGISel {
return false;
APInt Imm;
- if (Op.getOperand(1).getOpcode() == AArch64ISD::MOVIshift)
- Imm = APInt(VT.getScalarSizeInBits(),
- Op.getOperand(1).getConstantOperandVal(0)
- << Op.getOperand(1).getConstantOperandVal(1));
+ if (Op.getOperand(1).getOpcode() == AArch64ISD::MOVI &&
+ Op.getOperand(1).getConstantOperandAPInt(0).isSplat(
+ VT.getScalarSizeInBits()))
+ Imm = Op.getOperand(1).getConstantOperandAPInt(0).trunc(
+ VT.getScalarSizeInBits());
else if (Op.getOperand(1).getOpcode() == AArch64ISD::DUP &&
isa<ConstantSDNode>(Op.getOperand(1).getOperand(0)))
Imm = APInt(VT.getScalarSizeInBits(),
@@ -4175,12 +4177,11 @@ bool AArch64DAGToDAGISel::SelectCVTFixedPointVec(SDValue N, SDValue &FixedPos,
APFloat FVal(0.0);
switch (N->getOpcode()) {
- case AArch64ISD::MOVIshift:
- FVal = ImmToFloat(APInt(RegWidth, N.getConstantOperandVal(0)
- << N.getConstantOperandVal(1)));
- break;
- case AArch64ISD::FMOV:
- FVal = ImmToFloat(DecodeFMOVImm(N.getConstantOperandVal(0), RegWidth));
+ case AArch64ISD::MOVI:
+ if (N.getConstantOperandAPInt(0).isSplat(RegWidth))
+ FVal = ImmToFloat(N.getConstantOperandAPInt(0).trunc(RegWidth));
+ else
+ return false;
break;
case AArch64ISD::DUP:
if (isa<ConstantSDNode>(N.getOperand(0)))
@@ -4965,6 +4966,7 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
// Few custom selection stuff.
EVT VT = Node->getValueType(0);
+ auto *TLI = static_cast<const AArch64TargetLowering *>(getTargetLowering());
switch (Node->getOpcode()) {
default:
@@ -5054,11 +5056,107 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
break;
}
+ case ISD::ConstantFP:
+ // Leave legal fmov cases to tablegen.
+ if (TLI->isFPImmLegalAsFMov(cast<ConstantFPSDNode>(Node)->getValueAPF(),
+ VT))
+ break;
+ [[fallthrough]];
+ case AArch64ISD::MOVI: {
+ APInt DefBits;
+ if (Node->getOpcode() == ISD::ConstantFP) {
+ APInt Imm = *Node->bitcastToAPInt();
+ DefBits = Imm.getBitWidth() >= 64 ? Imm : APInt::getSplat(64, Imm);
+ } else {
+ APInt Imm = Node->getConstantOperandAPInt(0);
+ DefBits = APInt::getSplat(VT.getSizeInBits(), Imm);
+ }
+
+ SmallVector<AArch64_IMM::ImmInsnModel> Insns;
+ if (AArch64_IMM::expandVectorMOVImm(DefBits, Subtarget, Insns)) {
+ SDNode *Src = nullptr;
+ SDLoc DL(Node);
+ EVT FVT = VT.getSizeInBits() < 64 ? MVT::f64 : VT;
+
+ for (AArch64_IMM::ImmInsnModel Insn : Insns) {
+ switch (Insn.Opcode) {
+ case AArch64::FMOVD0:
+ Src = CurDAG->getMachineNode(Insn.Opcode, DL, MVT::f64);
+ if (FVT.getSizeInBits() > 64)
+ Src = CurDAG->getMachineNode(
+ TargetOpcode::SUBREG_TO_REG, DL, VT, SDValue(Src, 0),
+ CurDAG->getTargetConstant(AArch64::dsub, DL, MVT::i32));
+ break;
+ case AArch64::MOVID:
+ case AArch64::MOVIv2d_ns:
+ case AArch64::MOVIv8b_ns:
+ case AArch64::MOVIv16b_ns:
+ case AArch64::FMOVv2f32_ns:
+ case AArch64::FMOVv4f32_ns:
+ case AArch64::FMOVDi:
+ case AArch64::FMOVv2f64_ns:
+ Src = CurDAG->getMachineNode(
+ Insn.Opcode, DL, FVT,
+ CurDAG->getTargetConstant(Insn.Op1, DL, MVT::i64));
+ break;
+ case AArch64::MOVIv2i32:
+ case AArch64::MOVIv4i32:
+ case AArch64::MOVIv4i16:
+ case AArch64::MOVIv8i16:
+ case AArch64::MOVIv2s_msl:
+ case AArch64::MOVIv4s_msl:
+ case AArch64::MVNIv2i32:
+ case AArch64::MVNIv4i32:
+ case AArch64::MVNIv4i16:
+ case AArch64::MVNIv8i16:
+ case AArch64::MVNIv2s_msl:
+ case AArch64::MVNIv4s_msl:
+ Src = CurDAG->getMachineNode(
+ Insn.Opcode, DL, FVT,
+ CurDAG->getTargetConstant(Insn.Op1, DL, MVT::i64),
+ CurDAG->getTargetConstant(Insn.Op2, DL, MVT::i64));
+ break;
+ case AArch64::DUPM_ZI:
+ Src = CurDAG->getMachineNode(
+ Insn.Opcode, DL, MVT::nxv2f64,
+ CurDAG->getTargetConstant(Insn.Op1, DL, MVT::i64));
+ Src = CurDAG
+ ->getTargetExtractSubreg(AArch64::zsub, DL, FVT,
+ SDValue(Src, 0))
+ .getNode();
+ break;
+ case AArch64::DUP_ZI_D:
+ Src = CurDAG->getMachineNode(
+ Insn.Opcode, DL, MVT::nxv2f64,
+ CurDAG->getTargetConstant(Insn.Op1, DL, MVT::i64),
+ CurDAG->getTargetConstant(Insn.Op2, DL, MVT::i64));
+ Src = CurDAG
+ ->getTargetExtractSubreg(AArch64::zsub, DL, FVT,
+ SDValue(Src, 0))
+ .getNode();
+ break;
+ default:
+ llvm_unreachable("Unexpected node in expandVectorMOVImm\n");
+ }
+ }
+
+ if (VT.getSizeInBits() < FVT.getSizeInBits())
+ Src = CurDAG->getMachineNode(
+ TargetOpcode::EXTRACT_SUBREG, DL, VT, SDValue(Src, 0),
+ CurDAG->getTargetConstant(VT.getSizeInBits() == 16 ? AArch64::hsub
+ : AArch64::ssub,
+ DL, MVT::i32));
+
+ ReplaceNode(Node, Src);
+ return;
+ }
+ break;
+ }
+
case ISD::FrameIndex: {
// Selects to ADDXri FI, 0 which in turn will become ADDXri SP, imm.
int FI = cast<FrameIndexSDNode>(Node)->getIndex();
unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0);
- const TargetLowering *TLI = getTargetLowering();
SDValue TFI = CurDAG->getTargetFrameIndex(
FI, TLI->getPointerTy(CurDAG->getDataLayout()));
SDLoc DL(Node);
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 38db1ac4a2fb9..8a1a9da8cbaa8 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -2812,40 +2812,18 @@ void AArch64TargetLowering::computeKnownBitsForTargetNode(
break;
}
case AArch64ISD::MOVI: {
- Known = KnownBits::makeConstant(
- APInt(Known.getBitWidth(), Op->getConstantOperandVal(0)));
- break;
- }
- case AArch64ISD::MOVIshift: {
- Known = KnownBits::makeConstant(
- APInt(Known.getBitWidth(), Op->getConstantOperandVal(0)
- << Op->getConstantOperandVal(1)));
- break;
- }
- case AArch64ISD::MOVImsl: {
- unsigned ShiftAmt = AArch64_AM::getShiftValue(Op->getConstantOperandVal(1));
- Known = KnownBits::makeConstant(APInt(
- Known.getBitWidth(), ~(~Op->getConstantOperandVal(0) << ShiftAmt)));
- break;
- }
- case AArch64ISD::MOVIedit: {
- Known = KnownBits::makeConstant(APInt(
- Known.getBitWidth(),
- AArch64_AM::decodeAdvSIMDModImmType10(Op->getConstantOperandVal(0))));
- break;
- }
- case AArch64ISD::MVNIshift: {
- Known = KnownBits::makeConstant(
- APInt(Known.getBitWidth(),
- ~(Op->getConstantOperandVal(0) << Op->getConstantOperandVal(1)),
- /*isSigned*/ false, /*implicitTrunc*/ true));
- break;
- }
- case AArch64ISD::MVNImsl: {
- unsigned ShiftAmt = AArch64_AM::getShiftValue(Op->getConstantOperandVal(1));
- Known = KnownBits::makeConstant(
- APInt(Known.getBitWidth(), (~Op->getConstantOperandVal(0) << ShiftAmt),
- /*isSigned*/ false, /*implicitTrunc*/ true));
+ // MOVI has any type, the constant is the i64 value. Get the full width
+ // constant value and find the common bits of size EltSize.
+ EVT VT = Op.getValueType();
+ APInt Imm =
+ APInt::getSplat(VT.getSizeInBits(), Op->getConstantOperandAPInt(0));
+ unsigned EltSize = Known.getBitWidth();
+ unsigned Lanes = VT.getSizeInBits() / EltSize;
+ Known.setAllConflict();
+ for (unsigned I = 0; I < Lanes; I++)
+ if (DemandedElts[I])
+ Known = Known.intersectWith(
+ KnownBits::makeConstant(Imm.lshr(I * EltSize).trunc(EltSize)));
break;
}
case AArch64ISD::LOADgot:
@@ -13290,6 +13268,14 @@ bool AArch64TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
bool IsLegal = isFPImmLegalAsFMov(Imm, VT);
const APInt ImmInt = Imm.bitcastToAPInt();
+ if (!IsLegal && ImmInt.getBitWidth() <= 128) {
+ // Try duplicating it to all lanes and see if we can usea vector movi.
+ APInt DefBits =
+ ImmInt.getBitWidth() == 128 ? ImmInt : APInt::getSplat(64, ImmInt);
+ SmallVector<AArch64_IMM::ImmInsnModel> Insn;
+ IsLegal = AArch64_IMM::expandVectorMOVImm(DefBits, Subtarget, Insn);
+ }
+
// If we can not materialize in immediate field for fmov, check if the
// value can be encoded as the immediate operand of a logical instruction.
// The immediate value will be created with either MOVZ, MOVN, or ORR.
@@ -15515,27 +15501,6 @@ static bool resolveBuildVector(BuildVectorSDNode *BVN, APInt &CnstBits,
return false;
}
-// Try 64-bit splatted SIMD immediate.
-static SDValue tryAdvSIMDModImm64(unsigned NewOp, SDValue Op, SelectionDAG &DAG,
- const APInt &Bits) {
- if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
- uint64_t Value = Bits.zextOrTrunc(64).getZExtValue();
- EVT VT = Op.getValueType();
- MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v2i64 : MVT::f64;
-
- if (AArch64_AM::isAdvSIMDModImmType10(Value)) {
- Value = AArch64_AM::encodeAdvSIMDModImmType10(Value);
-
- SDLoc DL(Op);
- SDValue Mov =
- DAG.getNode(NewOp, DL, MovTy, DAG.getConstant(Value, DL, MVT::i32));
- return DAG.getNode(AArch64ISD::NVCAST, DL, VT, Mov);
- }
- }
-
- return SDValue();
-}
-
// Try 32-bit splatted SIMD immediate.
static SDValue tryAdvSIMDModImm32(unsigned NewOp, SDValue Op, SelectionDAG &DAG,
const APInt &Bits,
@@ -15634,89 +15599,6 @@ static SDValue tryAdvSIMDModImm16(unsigned NewOp, SDValue Op, SelectionDAG &DAG,
return SDValue();
}
-// Try 32-bit splatted SIMD immediate with shifted ones.
-static SDValue tryAdvSIMDModImm321s(unsigned NewOp, SDValue Op,
- SelectionDAG &DAG, const APInt &Bits) {
- if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
- uint64_t Value = Bits.zextOrTrunc(64).getZExtValue();
- EVT VT = Op.getValueType();
- MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
- bool isAdvSIMDModImm = false;
- uint64_t Shift;
-
- if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType7(Value))) {
- Value = AArch64_AM::encodeAdvSIMDModImmType7(Value);
- Shift = 264;
- }
- else if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType8(Value))) {
- Value = AArch64_AM::encodeAdvSIMDModImmType8(Value);
- Shift = 272;
- }
-
- if (isAdvSIMDModImm) {
- SDLoc DL(Op);
- SDValue Mov =
- DAG.getNode(NewOp, DL, MovTy, DAG.getConstant(Value, DL, MVT::i32),
- DAG.getConstant(Shift, DL, MVT::i32));
- return DAG.getNode(AArch64ISD::NVCAST, DL, VT, Mov);
- }
- }
-
- return SDValue();
-}
-
-// Try 8-bit splatted SIMD immediate.
-static SDValue tryAdvSIMDModImm8(unsigned NewOp, SDValue Op, SelectionDAG &DAG,
- const APInt &Bits) {
- if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
- uint64_t Value = Bits.zextOrTrunc(64).getZExtValue();
- EVT VT = Op.getValueType();
- MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v16i8 : MVT::v8i8;
-
- if (AArch64_AM::isAdvSIMDModImmType9(Value)) {
- Value = AArch64_AM::encodeAdvSIMDModImmType9(Value);
-
- SDLoc DL(Op);
- SDValue Mov =
- DAG.getNode(NewOp, DL, MovTy, DAG.getConstant(Value, DL, MVT::i32));
- return DAG.getNode(AArch64ISD::NVCAST, DL, VT, Mov);
- }
- }
-
- return SDValue();
-}
-
-// Try FP splatted SIMD immediate.
-static SDValue tryAdvSIMDModImmFP(unsigned NewOp, SDValue Op, SelectionDAG &DAG,
- const APInt &Bits) {
- if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
- uint64_t Value = Bits.zextOrTrunc(64).getZExtValue();
- EVT VT = Op.getValueType();
- bool isWide = (VT.getSizeInBits() == 128);
- MVT MovTy;
- bool isAdvSIMDModImm = false;
-
- if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType11(Value))) {
- Value = AArch64_AM::encodeAdvSIMDModImmType11(Value);
- MovTy = isWide ? MVT::v4f32 : MVT::v2f32;
- }
- else if (isWide &&
- (isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType12(Value))) {
- Value = AArch64_AM::encodeAdvSIMDModImmType12(Value);
- MovTy = MVT::v2f64;
- }
-
- if (isAdvSIMDModImm) {
- SDLoc DL(Op);
- SDValue Mov =
- DAG.getNode(NewOp, DL, MovTy, DAG.getConstant(Value, DL, MVT::i32));
- return DAG.getNode(AArch64ISD::NVCAST, DL, VT, Mov);
- }
- }
-
- return SDValue();
-}
-
// Specialized code to quickly find if PotentialBVec is a BuildVector that
// consists of only the same constant int value, returned in reference arg
// ConstVal
@@ -16046,28 +15928,6 @@ static SDValue NormalizeBuildVector(SDValue Op,
return DAG.getBuildVector(VT, DL, Ops);
}
-static SDValue trySVESplat64(SDValue Op, SelectionDAG &DAG,
- const AArch64Subtarget *ST, APInt &DefBits) {
- EVT VT = Op.getValueType();
- // TODO: We should be able to support 64-bit destinations too
- if (!ST->hasSVE() || !VT.is128BitVector() ||
- DefBits.getHiBits(64) != DefBits.getLoBits(64))
- return SDValue();
-
- // See if we can make use of the SVE dup instruction.
- APInt Val64 = DefBits.trunc(64);
- int32_t ImmVal, ShiftVal;
- uint64_t Encoding;
- if (!AArch64_AM::isSVECpyDupImm(64, Val64.getSExtValue(), ImmVal, ShiftVal) &&
- !AArch64_AM::isSVELogicalImm(64, Val64.getZExtValue(), Encoding))
- return SDValue();
-
- SDLoc DL(Op);
- SDValue SplatVal = DAG.getNode(AArch64ISD::DUP, DL, MVT::v2i64,
- DAG.getConstant(Val64, DL, MVT::i64));
- return DAG.getNode(AArch64ISD::NVCAST, DL, VT, SplatVal);
-}
-
static SDValue ConstantBuildVector(SDValue Op, SelectionDAG &DAG,
const AArch64Subtarget *ST) {
EVT VT = Op.getValueType();
@@ -16077,71 +15937,44 @@ static SDValue ConstantBuildVector(SDValue Op, SelectionDAG &DAG,
APInt DefBits(VT.getSizeInBits(), 0);
APInt UndefBits(VT.getSizeInBits(), 0);
BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
- if (resolveBuildVector(BVN, DefBits, UndefBits)) {
- auto TryMOVIWithBits = [&](APInt DefBits) {
- SDValue NewOp;
- if ((NewOp =
- tryAdvSIMDModImm64(AArch64ISD::MOVIedit, Op, DAG, DefBits)) ||
- (NewOp =
- tryAdvSIMDModImm32(AArch64ISD::MOVIshift, Op, DAG, DefBits)) ||
- (NewOp =
- tryAdvSIMDModImm321s(AArch64ISD::MOVImsl, Op, DAG, DefBits)) ||
- (NewOp =
- tryAdvSIMDModImm16(AArch64ISD::MOVIshift, Op, DAG, DefBits)) ||
- (NewOp = tryAdvSIMDModImm8(AArch64ISD::MOVI, Op, DAG, DefBits)) ||
- (NewOp = tryAdvSIMDModImmFP(AArch64ISD::FMOV, Op, DAG, DefBits)))
- return NewOp;
-
- APInt NotDefBits = ~DefBits;
- if ((NewOp = tryAdvSIMDModImm32(AArch64ISD::MVNIshift, Op, DAG,
- NotDefBits)) ||
- (NewOp = tryAdvSIMDModImm321s(AArch64ISD::MVNImsl, Op, DAG,
- NotDefBits)) ||
- (NewOp =
- tryAdvSIMDModImm16(AArch64ISD::MVNIshift, Op, DAG, NotDefBits)))
- return NewOp;
- return SDValue();
- };
- if (SDValue R = TryMOVIWithBits(DefBits))
- return R;
- if (SDValue R = TryMOVIWithBits(UndefBits))
- return R;
-
- // Try to materialise the constant using SVE when available.
- if (SDValue R = trySVESplat64(Op, DAG, ST, DefBits))
- return R;
-
- // See if a fneg of the constant can be materialized with a MOVI, etc
- auto TryWithFNeg = [&](APInt DefBits, MVT FVT) {
- // FNegate each sub-element of the constant
- assert(VT.getSizeInBits() % FVT.getScalarSizeInBits() == 0);
- APInt Neg = APInt::getHighBitsSet(FVT.getSizeInBits(), 1)
- .zext(VT.getSizeInBits());
- APInt NegBits(VT.getSizeInBits(), 0);
- unsigned NumElts = VT.getSizeInBits() / FVT.getScalarSizeInBits();
- for (unsigned i = 0; i < NumElts; i++)
- NegBits |= Neg << (FVT.getScalarSizeInBits() * i);
- NegBits = DefBits ^ NegBits;
-
- // Try to create the new constants with MOVI, and if so generate a fneg
- // for it.
- if (SDValue NewOp = TryMOVIWithBits(NegBits)) {
- SDLoc DL(Op);
- MVT VFVT = NumElts == 1 ? FVT : MVT::getVectorVT(FVT, NumElts);
- return DAG.getNode(
- AArch64ISD::NVCAST, DL, VT,
- DAG.getNode(ISD::FNEG, DL, VFVT,
- DAG.getNode(AArch64ISD::NVCAST, DL, VFVT, NewOp)));
- }
- return SDValue();
- };
- SDValue R;
- if ((R = TryWithFNeg(DefBits, MVT::f32)) ||
- (R = TryWithFNeg(DefBits, MVT::f64)) ||
- (ST->hasFullFP16() && (R = TryWithFNeg(DefBits, MVT::f16))))
- return R;
- }
+ if (!resolveBuildVector(BVN, DefBits, UndefBits))
+ return SDValue();
+ SDLoc DL(Op);
+ SmallVector<AArch64_IMM::ImmInsnModel> Insns;
+ if (expandVectorMOVImm(DefBits, ST, Insns))
+ return DAG.getNode(AArch64ISD::MOVI, DL, VT,
+ DAG.getConstant(DefBits.trunc(64), DL, MVT::i64));
+
+ // See if a fneg of the constant can be materialized with a MOVI, etc
+ auto TryWithFNeg = [&](APInt DefBits, MVT FVT) {
+ // FNegate each sub-element of the constant
+ assert(VT.getSizeInBits() % FVT.getScalarSizeInBits() == 0);
+ APInt Neg =
+ APInt::getHighBitsSet(FVT.getSizeInBits(), 1).zext(VT.getSizeInBits());
+ APInt NegBits(VT.getSizeInBits(), 0);
+ unsigned NumElts = VT.getSizeInBits() / FVT.getScalarSizeInBits();
+ for (unsigned i = 0; i < NumElts; i++)
+ NegBits |= Neg << (FVT.getScalarSizeInBits() * i);
+ NegBits = DefBits ^ NegBits;
+
+ SmallVector<AArch64_IMM::ImmInsnModel> Insns;
+ if (expandVectorMOVImm(NegBits, ST, Insns)) {
+ SDLoc DL(Op);
+ MVT VFVT = NumElts == 1 ? FVT : MVT::getVectorVT(FVT, NumElts);
+ SDValue MOVI =
+ DAG.getNode(AArch64ISD::MOVI, DL, VFVT,
+ DAG.getConstant(NegBits.trunc(64), DL, MVT::i64));
+ return DAG.getNode(AArch64ISD::NVCAST, DL, VT,
+ DAG.getNode(ISD::FNEG, DL, VFVT, MOVI));
+ }
+ return SDValue();
+ };
+ SDValue R;
+ if ((R = TryWithFNeg(DefBits, MVT::f32)) ||
+ (R = TryWithFNeg(DefBits, MVT::f64)) ||
+ (ST->hasFullFP16() && (R = TryWithFNeg(DefBits, MVT::f16))))
+ return R;
return SDValue();
}
@@ -21901,10 +21734,11 @@ static SDValue performConcatVectorsCombine(SDNode *N,
return false;
APInt Imm;
- if (Op.getOperand(1).getOpcode() == AArch64ISD::MOVIshift)
- Imm = APInt(VT.getScalarSizeInBits(),
- Op.getOperand(1).getConstantOperandVal(0)
- << Op.getOperand(1).getConstantOperandVal(1));
+ if (Op.getOperand(1).getOpcode() == AArch64ISD::MOVI &&
+ Op.getOperand(1).getConstantOperandAPInt(0).isSplat(
+ VT.getScalarSizeInBits()))
+ Imm = Op.getOperand(1).getConstantOperandAPInt(0).trunc(
+ VT.getScalarSizeInBits());
else if (Op.getOperand(1).getOpcode() == AArch64ISD::DUP &&
isa<ConstantSDNode>(Op.getOperand(1).getOperand(0)))
Imm = APInt(VT.getScalarSizeInBits(),
@@ -22136,11 +21970,6 @@ static SDValue tryExtendDUPToExtractHigh(SDValue N, SelectionDAG &DAG) {
case AArch64ISD::DUPLANE32:
case AArch64ISD::DUPLANE64:
case AArch64ISD::MOVI:
- case AArch64ISD::MOVIshift:
- case AArch64ISD::MOVIedit:
- case AArch64ISD::MOVImsl:
- case AArch64ISD::MVNIshift:
- case AArch64ISD::MVNImsl:
break;
default:
// FMOV could be supported, but isn't very useful, as it would only occur
@@ -33245,11 +33074,6 @@ bool AArch64TargetLowering::canCreateUndefOrPoisonForTargetNode(
// TODO: Add more target nodes.
switch (Op.getOpcode()) {
case AArch64ISD::MOVI:
- case AArch64ISD::MOVIedit:
- case AArch64ISD::MOVImsl:
- case AArch64ISD::MOVIshift:
- case AArch64ISD::MVNImsl:
- case AArch64ISD::MVNIshift:
case AArch64ISD::VASHR:
case AArch64ISD::VLSHR:
case AArch64ISD::VSHL:
@@ -33262,17 +33086,11 @@ bool AArch64TargetLowering::canCreateUndefOrPoisonForTargetNode(
bool AArch64TargetLowering::isTargetCanonicalConstantNode(SDValue Op) const {
return Op.getOpcode() == AArch64ISD::DUP ||
Op.getOpcode() == AArch64ISD::MOVI ||
- Op.getOpcode() == AArch64ISD::MOVIshift ||
- Op.getOpcode() == AArch64ISD::MOVImsl ||
- Op.getOpcode() == AArch64ISD::MOVIedit ||
- Op.getOpcode() == AArch64ISD::MVNIshift ||
- Op.getOpcode() == AArch64ISD::MVNImsl ||
// Ignoring fneg(movi(0)), because if it is folded to FPConstant(-0.0),
// ISel will select fmov(mov i64 0x8000000000000000), resulting in a
// fmov from fpr to gpr, which is more expensive than fneg(movi(0))
(Op.getOpcode() == ISD::FNEG &&
- Op.getOperand(0).getOpcode() == AArch64ISD::MOVIedit &&
- Op.getOperand(0).getConstantOperandVal(0) == 0) ||
+ Op.getOperand(0).getOpcode() == AArch64ISD::MOVI) ||
(Op.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
Op.getOperand(0).getOpcode() == AArch64ISD::DUP) ||
TargetLowering::isTargetCanonicalConstantNode(Op);
diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index 1774927e9297d..e0bad63a11cbd 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -1494,13 +1494,6 @@ def fpimm32XForm : SDNodeXForm<fpimm, [{
return CurDAG->getTargetConstant(Enc, SDLoc(N), MVT::i32);
}]>;
-def fpimm32SIMDModImmType4XForm : SDNodeXForm<fpimm, [{
- uint32_t enc = AArch64_AM::encodeAdvSIMDModImmType4(N->getValueAPF()
- .bitcastToAPInt()
- .getZExtValue());
- return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i32);
- }]>;
-
def fpimm64XForm : SDNodeXForm<fpimm, [{
uint32_t Enc = AArch64_AM::getFP64Imm(N->getValueAPF());
return CurDAG->getTargetConstant(Enc, SDLoc(N), MVT::i32);
@@ -1527,12 +1520,6 @@ def fpimm32 : Operand<f32>,
let PrintMethod = "printFPImmOperand";
}
-def fpimm32SIMDModImmType4 : FPImmLeaf<f32, [{
- uint64_t Enc = Imm.bitcastToAPInt().getZExtValue();
- return Enc != 0 && AArch64_AM::isAdvSIMDModImmType4(Enc << 32 | Enc);
- }], fpimm32SIMDModImmType4XForm> {
-}
-
def fpimm64 : Operand<f64>,
FPImmLeaf<f64, [{
return AArch64_AM::getFP64Imm(Imm) != -1;
@@ -1573,9 +1560,6 @@ def gi_fpimm32 : GICustomOperandRenderer<"renderFPImm32">,
GISDNodeXFormEquiv<fpimm32XForm>;
def gi_fpimm64 : GICustomOperandRenderer<"renderFPImm64">,
GISDNodeXFormEquiv<fpimm64XForm>;
-def gi_fpimm32SIMDModImmType4 :
- GICustomOperandRenderer<"renderFPImm32SIMDModImmType4">,
- GISDNodeXFormEquiv<fpimm32SIMDModImmType4XForm>;
// Vector lane operands
class AsmVectorIndex<int Min, int Max, string NamePrefix=""> : AsmOperandClass {
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 08512f6ed8df1..05d383e63167b 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -509,8 +509,7 @@ def SDT_AArch64Insr : SDTypeProfile<1, 2, [SDTCisVec<0>]>;
def SDT_AArch64Zip : SDTypeProfile<1, 2, [SDTCisVec<0>,
SDTCisSameAs<0, 1>,
SDTCisSameAs<0, 2>]>;
-def SDT_AArch64MOVIedit : SDTypeProfile<1, 1, [SDTCisInt<1>]>;
-def SDT_AArch64MOVIshift : SDTypeProfile<1, 2, [SDTCisInt<1>, SDTCisInt<2>]>;
+def SDT_AArch64MOVI : SDTypeProfile<1, 1, [SDTCisInt<1>]>;
def SDT_AArch64vecimm : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
SDTCisInt<2>, SDTCisInt<3>]>;
def SDT_AArch64UnaryVec: SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>;
@@ -920,13 +919,7 @@ def AArch64trn1 : SDNode<"AArch64ISD::TRN1", SDT_AArch64Zip>;
def AArch64trn2 : SDNode<"AArch64ISD::TRN2", SDT_AArch64Zip>;
// Vector immediate moves
-def AArch64movi_edit : SDNode<"AArch64ISD::MOVIedit", SDT_AArch64MOVIedit>;
-def AArch64movi_shift : SDNode<"AArch64ISD::MOVIshift", SDT_AArch64MOVIshift>;
-def AArch64movi_msl : SDNode<"AArch64ISD::MOVImsl", SDT_AArch64MOVIshift>;
-def AArch64mvni_shift : SDNode<"AArch64ISD::MVNIshift", SDT_AArch64MOVIshift>;
-def AArch64mvni_msl : SDNode<"AArch64ISD::MVNImsl", SDT_AArch64MOVIshift>;
-def AArch64movi : SDNode<"AArch64ISD::MOVI", SDT_AArch64MOVIedit>;
-def AArch64fmov : SDNode<"AArch64ISD::FMOV", SDT_AArch64MOVIedit>;
+def AArch64movi : SDNode<"AArch64ISD::MOVI", SDT_AArch64MOVI>;
def AArch64rev16 : SDNode<"AArch64ISD::REV16", SDT_AArch64Rev>;
def AArch64rev32 : SDNode<"AArch64ISD::REV32", SDT_AArch64Rev>;
@@ -8673,46 +8666,32 @@ def : InstAlias<"orr.4s $Vd, $imm", (ORRv4i32 V128:$Vd, imm0_255:$imm, 0)>;
}
// AdvSIMD FMOV
-let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
+let isReMaterializable = 1, isAsCheapAsAMove = 1, hasSideEffects = 0 in {
def FMOVv2f64_ns : SIMDModifiedImmVectorNoShift<1, 1, 0, 0b1111, V128, fpimm8,
- "fmov", ".2d",
- [(set (v2f64 V128:$Rd), (AArch64fmov imm0_255:$imm8))]>;
+ "fmov", ".2d", []>;
def FMOVv2f32_ns : SIMDModifiedImmVectorNoShift<0, 0, 0, 0b1111, V64, fpimm8,
- "fmov", ".2s",
- [(set (v2f32 V64:$Rd), (AArch64fmov imm0_255:$imm8))]>;
+ "fmov", ".2s", []>;
def FMOVv4f32_ns : SIMDModifiedImmVectorNoShift<1, 0, 0, 0b1111, V128, fpimm8,
- "fmov", ".4s",
- [(set (v4f32 V128:$Rd), (AArch64fmov imm0_255:$imm8))]>;
+ "fmov", ".4s", []>;
let Predicates = [HasNEON, HasFullFP16] in {
def FMOVv4f16_ns : SIMDModifiedImmVectorNoShift<0, 0, 1, 0b1111, V64, fpimm8,
- "fmov", ".4h",
- [(set (v4f16 V64:$Rd), (AArch64fmov imm0_255:$imm8))]>;
+ "fmov", ".4h", []>;
def FMOVv8f16_ns : SIMDModifiedImmVectorNoShift<1, 0, 1, 0b1111, V128, fpimm8,
- "fmov", ".8h",
- [(set (v8f16 V128:$Rd), (AArch64fmov imm0_255:$imm8))]>;
+ "fmov", ".8h", []>;
} // Predicates = [HasNEON, HasFullFP16]
}
// AdvSIMD MOVI
// EDIT byte mask: scalar
-let isReMaterializable = 1, isAsCheapAsAMove = 1 in
+let isReMaterializable = 1, isAsCheapAsAMove = 1, hasSideEffects = 0 in
def MOVID : SIMDModifiedImmScalarNoShift<0, 1, 0b1110, "movi",
[(set FPR64:$Rd, simdimmtype10:$imm8)]>;
-// The movi_edit node has the immediate value already encoded, so we use
-// a plain imm0_255 here.
-def : Pat<(f64 (AArch64movi_edit imm0_255:$shift)),
- (MOVID imm0_255:$shift)>;
-
-// EDIT byte mask: 2d
-// The movi_edit node has the immediate value already encoded, so we use
-// a plain imm0_255 in the pattern
-let isReMaterializable = 1, isAsCheapAsAMove = 1 in
+// EDIT byte mask: 2ds
+let isReMaterializable = 1, isAsCheapAsAMove = 1, hasSideEffects = 0 in
def MOVIv2d_ns : SIMDModifiedImmVectorNoShift<1, 1, 0, 0b1110, V128,
- simdimmtype10,
- "movi", ".2d",
- [(set (v2i64 V128:$Rd), (AArch64movi_edit imm0_255:$imm8))]>;
+ simdimmtype10, "movi", ".2d", []>;
let Predicates = [HasNEON] in {
def : Pat<(f128 fpimm0), (f128 (MOVIv2d_ns (i32 0)))>;
@@ -8766,17 +8745,9 @@ def : Pat<(v8i8 immAllOnesV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 255)), dsub)>;
}
// EDIT per word & halfword: 2s, 4h, 4s, & 8h
-let isReMaterializable = 1, isAsCheapAsAMove = 1 in
+let isReMaterializable = 1, isAsCheapAsAMove = 1, hasSideEffects = 0 in
defm MOVI : SIMDModifiedImmVectorShift<0, 0b10, 0b00, "movi">;
-let Predicates = [HasNEON] in {
- // Using the MOVI to materialize fp constants.
- def : Pat<(f32 fpimm32SIMDModImmType4:$in),
- (EXTRACT_SUBREG (MOVIv2i32 (fpimm32SIMDModImmType4XForm f32:$in),
- (i32 24)),
- ssub)>;
-}
-
let Predicates = [HasNEON] in {
def : InstAlias<"movi $Vd.4h, $imm", (MOVIv4i16 V64:$Vd, imm0_255:$imm, 0), 0>;
def : InstAlias<"movi $Vd.8h, $imm", (MOVIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>;
@@ -8789,38 +8760,23 @@ def : InstAlias<"movi.2s $Vd, $imm", (MOVIv2i32 V64:$Vd, imm0_255:$imm, 0), 0>;
def : InstAlias<"movi.4s $Vd, $imm", (MOVIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>;
}
-def : Pat<(v2i32 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))),
- (MOVIv2i32 imm0_255:$imm8, imm:$shift)>;
-def : Pat<(v4i32 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))),
- (MOVIv4i32 imm0_255:$imm8, imm:$shift)>;
-def : Pat<(v4i16 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))),
- (MOVIv4i16 imm0_255:$imm8, imm:$shift)>;
-def : Pat<(v8i16 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))),
- (MOVIv8i16 imm0_255:$imm8, imm:$shift)>;
-
-let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
+let isReMaterializable = 1, isAsCheapAsAMove = 1, hasSideEffects = 0 in {
// EDIT per word: 2s & 4s with MSL shifter
-def MOVIv2s_msl : SIMDModifiedImmMoveMSL<0, 0, {1,1,0,?}, V64, "movi", ".2s",
- [(set (v2i32 V64:$Rd),
- (AArch64movi_msl imm0_255:$imm8, (i32 imm:$shift)))]>;
-def MOVIv4s_msl : SIMDModifiedImmMoveMSL<1, 0, {1,1,0,?}, V128, "movi", ".4s",
- [(set (v4i32 V128:$Rd),
- (AArch64movi_msl imm0_255:$imm8, (i32 imm:$shift)))]>;
+def MOVIv2s_msl : SIMDModifiedImmMoveMSL<0, 0, {1,1,0,?}, V64, "movi", ".2s", []>;
+def MOVIv4s_msl : SIMDModifiedImmMoveMSL<1, 0, {1,1,0,?}, V128, "movi", ".4s", []>;
// Per byte: 8b & 16b
def MOVIv8b_ns : SIMDModifiedImmVectorNoShift<0, 0, 0, 0b1110, V64, imm0_255,
- "movi", ".8b",
- [(set (v8i8 V64:$Rd), (AArch64movi imm0_255:$imm8))]>;
+ "movi", ".8b", []>;
def MOVIv16b_ns : SIMDModifiedImmVectorNoShift<1, 0, 0, 0b1110, V128, imm0_255,
- "movi", ".16b",
- [(set (v16i8 V128:$Rd), (AArch64movi imm0_255:$imm8))]>;
+ "movi", ".16b", []>;
}
// AdvSIMD MVNI
// EDIT per word & halfword: 2s, 4h, 4s, & 8h
-let isReMaterializable = 1, isAsCheapAsAMove = 1 in
+let isReMaterializable = 1, isAsCheapAsAMove = 1, hasSideEffects = 0 in
defm MVNI : SIMDModifiedImmVectorShift<1, 0b10, 0b00, "mvni">;
let Predicates = [HasNEON] in {
@@ -8835,23 +8791,10 @@ def : InstAlias<"mvni.2s $Vd, $imm", (MVNIv2i32 V64:$Vd, imm0_255:$imm, 0), 0>;
def : InstAlias<"mvni.4s $Vd, $imm", (MVNIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>;
}
-def : Pat<(v2i32 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))),
- (MVNIv2i32 imm0_255:$imm8, imm:$shift)>;
-def : Pat<(v4i32 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))),
- (MVNIv4i32 imm0_255:$imm8, imm:$shift)>;
-def : Pat<(v4i16 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))),
- (MVNIv4i16 imm0_255:$imm8, imm:$shift)>;
-def : Pat<(v8i16 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))),
- (MVNIv8i16 imm0_255:$imm8, imm:$shift)>;
-
// EDIT per word: 2s & 4s with MSL shifter
-let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
-def MVNIv2s_msl : SIMDModifiedImmMoveMSL<0, 1, {1,1,0,?}, V64, "mvni", ".2s",
- [(set (v2i32 V64:$Rd),
- (AArch64mvni_msl imm0_255:$imm8, (i32 imm:$shift)))]>;
-def MVNIv4s_msl : SIMDModifiedImmMoveMSL<1, 1, {1,1,0,?}, V128, "mvni", ".4s",
- [(set (v4i32 V128:$Rd),
- (AArch64mvni_msl imm0_255:$imm8, (i32 imm:$shift)))]>;
+let isReMaterializable = 1, isAsCheapAsAMove = 1, hasSideEffects = 0 in {
+def MVNIv2s_msl : SIMDModifiedImmMoveMSL<0, 1, {1,1,0,?}, V64, "mvni", ".2s", []>;
+def MVNIv4s_msl : SIMDModifiedImmMoveMSL<1, 1, {1,1,0,?}, V128, "mvni", ".4s", []>;
}
// SABA patterns for add(x, abs(y)) -> saba(x, y, 0)
@@ -9319,16 +9262,15 @@ defm USHR : SIMDVectorRShiftBHSD<1, 0b00000, "ushr", AArch64vlshr>;
defm USRA : SIMDVectorRShiftBHSDTied<1, 0b00010, "usra",
TriOpFrag<(add_like node:$LHS, (AArch64vlshr node:$MHS, node:$RHS))> >;
-def VImm0080: PatLeaf<(AArch64movi_shift (i32 128), (i32 0))>;
-def VImm00008000: PatLeaf<(AArch64movi_shift (i32 128), (i32 8))>;
-def VImm0000000080000000: PatLeaf<(AArch64NvCast (v2f64 (fneg (AArch64NvCast (v4i32 (AArch64movi_shift (i32 128), (i32 24)))))))>;
+def VImm0080: PatLeaf<(AArch64movi (i64 0x0080008000800080))>;
+def VImm00008000: PatLeaf<(AArch64movi (i64 0x0000800000008000))>;
+def VImm0000000080000000: PatLeaf<(AArch64NvCast (v2f64 (fneg (AArch64movi (i64 0x8000000080000000)))))>;
// RADDHN patterns for when RSHRN shifts by half the size of the vector element
def : Pat<(v8i8 (trunc (AArch64vlshr (add (v8i16 V128:$Vn), VImm0080), (i32 8)))),
(RADDHNv8i16_v8i8 V128:$Vn, (v8i16 (MOVIv2d_ns (i32 0))))>;
def : Pat<(v4i16 (trunc (AArch64vlshr (add (v4i32 V128:$Vn), VImm00008000), (i32 16)))),
(RADDHNv4i32_v4i16 V128:$Vn, (v4i32 (MOVIv2d_ns (i32 0))))>;
-let AddedComplexity = 5 in
def : Pat<(v2i32 (trunc (AArch64vlshr (add (v2i64 V128:$Vn), VImm0000000080000000), (i32 32)))),
(RADDHNv2i64_v2i32 V128:$Vn, (v2i64 (MOVIv2d_ns (i32 0))))>;
def : Pat<(v8i8 (int_aarch64_neon_rshrn (v8i16 V128:$Vn), (i32 8))),
@@ -9339,6 +9281,7 @@ def : Pat<(v2i32 (int_aarch64_neon_rshrn (v2i64 V128:$Vn), (i32 32))),
(RADDHNv2i64_v2i32 V128:$Vn, (v2i64 (MOVIv2d_ns (i32 0))))>;
// RADDHN2 patterns for when RSHRN shifts by half the size of the vector element
+let AddedComplexity = 5 in {
def : Pat<(v16i8 (concat_vectors
(v8i8 V64:$Vd),
(v8i8 (trunc (AArch64vlshr (add (v8i16 V128:$Vn), VImm0080), (i32 8)))))),
@@ -9351,13 +9294,13 @@ def : Pat<(v8i16 (concat_vectors
(RADDHNv4i32_v8i16
(INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn,
(v4i32 (MOVIv2d_ns (i32 0))))>;
-let AddedComplexity = 5 in
def : Pat<(v4i32 (concat_vectors
(v2i32 V64:$Vd),
(v2i32 (trunc (AArch64vlshr (add (v2i64 V128:$Vn), VImm0000000080000000), (i32 32)))))),
(RADDHNv2i64_v4i32
(INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn,
(v2i64 (MOVIv2d_ns (i32 0))))>;
+}
def : Pat<(v16i8 (concat_vectors
(v8i8 V64:$Vd),
(v8i8 (int_aarch64_neon_rshrn (v8i16 V128:$Vn), (i32 8))))),
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index 2fa0fca176c88..97546adf94a8a 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -11,6 +11,7 @@
/// \todo This should be generated by TableGen.
//===----------------------------------------------------------------------===//
+#include "AArch64ExpandImm.h"
#include "AArch64GlobalISelUtils.h"
#include "AArch64InstrInfo.h"
#include "AArch64MachineFunctionInfo.h"
@@ -167,30 +168,21 @@ class AArch64InstructionSelector : public InstructionSelector {
const RegisterBank &RB,
MachineIRBuilder &MIRBuilder) const;
+ /// Emit a sequence of instructions representing a constant \p Imm for a
+ /// vector register \p Dst via MOVI.
+ MachineInstr *emitConstantMOVVector(DstOp Dst, APInt Imm,
+ MachineIRBuilder &MIRBuilder,
+ MachineRegisterInfo &MRI);
+
/// Emit a sequence of instructions representing a constant \p CV for a
/// vector register \p Dst. (E.g. a MOV, or a load from a constant pool.)
///
/// \returns the last instruction in the sequence on success, and nullptr
/// otherwise.
- MachineInstr *emitConstantVector(Register Dst, Constant *CV,
+ MachineInstr *emitConstantVector(Register Dst, Constant *CV, APInt Imm,
MachineIRBuilder &MIRBuilder,
MachineRegisterInfo &MRI);
- MachineInstr *tryAdvSIMDModImm8(Register Dst, unsigned DstSize, APInt Bits,
- MachineIRBuilder &MIRBuilder);
-
- MachineInstr *tryAdvSIMDModImm16(Register Dst, unsigned DstSize, APInt Bits,
- MachineIRBuilder &MIRBuilder, bool Inv);
-
- MachineInstr *tryAdvSIMDModImm32(Register Dst, unsigned DstSize, APInt Bits,
- MachineIRBuilder &MIRBuilder, bool Inv);
- MachineInstr *tryAdvSIMDModImm64(Register Dst, unsigned DstSize, APInt Bits,
- MachineIRBuilder &MIRBuilder);
- MachineInstr *tryAdvSIMDModImm321s(Register Dst, unsigned DstSize, APInt Bits,
- MachineIRBuilder &MIRBuilder, bool Inv);
- MachineInstr *tryAdvSIMDModImmFP(Register Dst, unsigned DstSize, APInt Bits,
- MachineIRBuilder &MIRBuilder);
-
bool tryOptConstantBuildVec(MachineInstr &MI, LLT DstTy,
MachineRegisterInfo &MRI);
/// \returns true if a G_BUILD_VECTOR instruction \p MI can be selected as a
@@ -502,9 +494,6 @@ class AArch64InstructionSelector : public InstructionSelector {
int OpIdx = -1) const;
void renderFPImm64(MachineInstrBuilder &MIB, const MachineInstr &MI,
int OpIdx = -1) const;
- void renderFPImm32SIMDModImmType4(MachineInstrBuilder &MIB,
- const MachineInstr &MI,
- int OpIdx = -1) const;
// Materialize a GlobalValue or BlockAddress using a movz+movk sequence.
void materializeLargeCMVal(MachineInstr &I, const Value *V, unsigned OpFlags);
@@ -2373,12 +2362,15 @@ bool AArch64InstructionSelector::earlySelect(MachineInstr &I) {
return false;
LLVMContext &Ctx = MF.getFunction().getContext();
Register Dst = I.getOperand(0).getReg();
+ LLT DstTy = MRI.getType(Dst);
auto *CV = ConstantDataVector::getSplat(
- MRI.getType(Dst).getNumElements(),
- ConstantInt::get(
- Type::getIntNTy(Ctx, MRI.getType(Dst).getScalarSizeInBits()),
- ValAndVReg->Value.trunc(MRI.getType(Dst).getScalarSizeInBits())));
- if (!emitConstantVector(Dst, CV, MIB, MRI))
+ DstTy.getNumElements(),
+ ConstantInt::get(Type::getIntNTy(Ctx, DstTy.getScalarSizeInBits()),
+ ValAndVReg->Value.trunc(DstTy.getScalarSizeInBits())));
+ APInt Imm =
+ APInt::getSplat(DstTy.getSizeInBits(),
+ ValAndVReg->Value.trunc(DstTy.getScalarSizeInBits()));
+ if (!emitConstantVector(Dst, CV, Imm, MIB, MRI))
return false;
I.eraseFromParent();
return true;
@@ -2699,58 +2691,68 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
const TargetRegisterClass &FPRRC = *getRegClassForTypeOnBank(DefTy, RB);
- // For 16, 64, and 128b values, emit a constant pool load.
- switch (DefSize) {
- default:
- llvm_unreachable("Unexpected destination size for G_FCONSTANT?");
- case 32:
- case 64: {
- bool OptForSize = shouldOptForSize(&MF);
- const auto &TLI = MF.getSubtarget().getTargetLowering();
- // If TLI says that this fpimm is illegal, then we'll expand to a
- // constant pool load.
- if (TLI->isFPImmLegal(I.getOperand(1).getFPImm()->getValueAPF(),
- EVT::getFloatingPointVT(DefSize), OptForSize))
- break;
- [[fallthrough]];
- }
- case 16:
- case 128: {
- auto *FPImm = I.getOperand(1).getFPImm();
- auto *LoadMI = emitLoadFromConstantPool(FPImm, MIB);
- if (!LoadMI) {
- LLVM_DEBUG(dbgs() << "Failed to load double constant pool entry\n");
- return false;
+ assert(
+ (DefSize == 16 || DefSize == 32 || DefSize == 64 || DefSize == 128) &&
+ "Unexpected destination size for G_FCONSTANT?");
+
+ // Try to emit as a vector movi.
+ const ConstantFP *FPImm = I.getOperand(1).getFPImm();
+ APInt Imm = DefSize >= 64
+ ? FPImm->getValue().bitcastToAPInt()
+ : APInt::getSplat(64, FPImm->getValue().bitcastToAPInt());
+ if (auto *Mov = emitConstantMOVVector(
+ DefSize >= 64 ? DstOp(DefReg) : DstOp(&AArch64::FPR64RegClass), Imm,
+ MIB, MRI)) {
+ if (DefSize < 64) {
+ MIB.buildInstr(TargetOpcode::COPY, {DefReg}, {})
+ .addReg(Mov->getOperand(0).getReg(), {},
+ DefSize == 16 ? AArch64::hsub : AArch64::ssub);
+ RBI.constrainGenericRegister(DefReg, FPRRC, MRI);
}
- MIB.buildCopy({DefReg}, {LoadMI->getOperand(0).getReg()});
I.eraseFromParent();
- return RBI.constrainGenericRegister(DefReg, FPRRC, MRI);
- }
+ return true;
}
- assert((DefSize == 32 || DefSize == 64) && "Unexpected const def size");
- // Either emit a FMOV, or emit a copy to emit a normal mov.
- const Register DefGPRReg = MRI.createVirtualRegister(
- DefSize == 32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
- MachineOperand &RegOp = I.getOperand(0);
- RegOp.setReg(DefGPRReg);
- MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
- MIB.buildCopy({DefReg}, {DefGPRReg});
+ // If TLI says that this fpimm is legal then prefer a copy from GPR movimm,
+ // otherwise we'll expand to a constant pool load
+ bool OptForSize = shouldOptForSize(&MF);
+ const auto &TLI = MF.getSubtarget().getTargetLowering();
+ if (TLI->isFPImmLegal(I.getOperand(1).getFPImm()->getValueAPF(),
+ EVT::getFloatingPointVT(DefSize), OptForSize)) {
+ assert((DefSize == 32 || DefSize == 64) && "Unexpected const def size");
+ // Emit a copy to emit a normal movimm.
+ const Register DefGPRReg = MRI.createVirtualRegister(
+ DefSize == 32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
+ MachineOperand &RegOp = I.getOperand(0);
+ RegOp.setReg(DefGPRReg);
+ MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
+ MIB.buildCopy({DefReg}, {DefGPRReg});
+
+ if (!RBI.constrainGenericRegister(DefReg, FPRRC, MRI)) {
+ LLVM_DEBUG(dbgs() << "Failed to constrain G_FCONSTANT def operand\n");
+ return false;
+ }
- if (!RBI.constrainGenericRegister(DefReg, FPRRC, MRI)) {
- LLVM_DEBUG(dbgs() << "Failed to constrain G_FCONSTANT def operand\n");
- return false;
- }
+ MachineOperand &ImmOp = I.getOperand(1);
+ ImmOp.ChangeToImmediate(
+ ImmOp.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue());
- MachineOperand &ImmOp = I.getOperand(1);
- ImmOp.ChangeToImmediate(
- ImmOp.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue());
+ const unsigned MovOpc =
+ DefSize == 64 ? AArch64::MOVi64imm : AArch64::MOVi32imm;
+ I.setDesc(TII.get(MovOpc));
+ constrainSelectedInstRegOperands(I, TII, TRI, RBI);
+ return true;
+ }
- const unsigned MovOpc =
- DefSize == 64 ? AArch64::MOVi64imm : AArch64::MOVi32imm;
- I.setDesc(TII.get(MovOpc));
- constrainSelectedInstRegOperands(I, TII, TRI, RBI);
- return true;
+ // Expand to a constant pool.
+ auto *LoadMI = emitLoadFromConstantPool(FPImm, MIB);
+ if (!LoadMI) {
+ LLVM_DEBUG(dbgs() << "Failed to load double constant pool entry\n");
+ return false;
+ }
+ MIB.buildCopy({DefReg}, {LoadMI->getOperand(0).getReg()});
+ I.eraseFromParent();
+ return RBI.constrainGenericRegister(DefReg, FPRRC, MRI);
}
case TargetOpcode::G_EXTRACT: {
Register DstReg = I.getOperand(0).getReg();
@@ -5276,175 +5278,6 @@ bool AArch64InstructionSelector::selectUSMovFromExtend(
return true;
}
-MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm8(
- Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder) {
- unsigned int Op;
- if (DstSize == 128) {
- if (Bits.getHiBits(64) != Bits.getLoBits(64))
- return nullptr;
- Op = AArch64::MOVIv16b_ns;
- } else {
- Op = AArch64::MOVIv8b_ns;
- }
-
- uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
-
- if (AArch64_AM::isAdvSIMDModImmType9(Val)) {
- Val = AArch64_AM::encodeAdvSIMDModImmType9(Val);
- auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val);
- constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI);
- return &*Mov;
- }
- return nullptr;
-}
-
-MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm16(
- Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder,
- bool Inv) {
-
- unsigned int Op;
- if (DstSize == 128) {
- if (Bits.getHiBits(64) != Bits.getLoBits(64))
- return nullptr;
- Op = Inv ? AArch64::MVNIv8i16 : AArch64::MOVIv8i16;
- } else {
- Op = Inv ? AArch64::MVNIv4i16 : AArch64::MOVIv4i16;
- }
-
- uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
- uint64_t Shift;
-
- if (AArch64_AM::isAdvSIMDModImmType5(Val)) {
- Val = AArch64_AM::encodeAdvSIMDModImmType5(Val);
- Shift = 0;
- } else if (AArch64_AM::isAdvSIMDModImmType6(Val)) {
- Val = AArch64_AM::encodeAdvSIMDModImmType6(Val);
- Shift = 8;
- } else
- return nullptr;
-
- auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val).addImm(Shift);
- constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI);
- return &*Mov;
-}
-
-MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm32(
- Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder,
- bool Inv) {
-
- unsigned int Op;
- if (DstSize == 128) {
- if (Bits.getHiBits(64) != Bits.getLoBits(64))
- return nullptr;
- Op = Inv ? AArch64::MVNIv4i32 : AArch64::MOVIv4i32;
- } else {
- Op = Inv ? AArch64::MVNIv2i32 : AArch64::MOVIv2i32;
- }
-
- uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
- uint64_t Shift;
-
- if ((AArch64_AM::isAdvSIMDModImmType1(Val))) {
- Val = AArch64_AM::encodeAdvSIMDModImmType1(Val);
- Shift = 0;
- } else if ((AArch64_AM::isAdvSIMDModImmType2(Val))) {
- Val = AArch64_AM::encodeAdvSIMDModImmType2(Val);
- Shift = 8;
- } else if ((AArch64_AM::isAdvSIMDModImmType3(Val))) {
- Val = AArch64_AM::encodeAdvSIMDModImmType3(Val);
- Shift = 16;
- } else if ((AArch64_AM::isAdvSIMDModImmType4(Val))) {
- Val = AArch64_AM::encodeAdvSIMDModImmType4(Val);
- Shift = 24;
- } else
- return nullptr;
-
- auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val).addImm(Shift);
- constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI);
- return &*Mov;
-}
-
-MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm64(
- Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder) {
-
- unsigned int Op;
- if (DstSize == 128) {
- if (Bits.getHiBits(64) != Bits.getLoBits(64))
- return nullptr;
- Op = AArch64::MOVIv2d_ns;
- } else {
- Op = AArch64::MOVID;
- }
-
- uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
- if (AArch64_AM::isAdvSIMDModImmType10(Val)) {
- Val = AArch64_AM::encodeAdvSIMDModImmType10(Val);
- auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val);
- constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI);
- return &*Mov;
- }
- return nullptr;
-}
-
-MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm321s(
- Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder,
- bool Inv) {
-
- unsigned int Op;
- if (DstSize == 128) {
- if (Bits.getHiBits(64) != Bits.getLoBits(64))
- return nullptr;
- Op = Inv ? AArch64::MVNIv4s_msl : AArch64::MOVIv4s_msl;
- } else {
- Op = Inv ? AArch64::MVNIv2s_msl : AArch64::MOVIv2s_msl;
- }
-
- uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
- uint64_t Shift;
-
- if (AArch64_AM::isAdvSIMDModImmType7(Val)) {
- Val = AArch64_AM::encodeAdvSIMDModImmType7(Val);
- Shift = 264;
- } else if (AArch64_AM::isAdvSIMDModImmType8(Val)) {
- Val = AArch64_AM::encodeAdvSIMDModImmType8(Val);
- Shift = 272;
- } else
- return nullptr;
-
- auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val).addImm(Shift);
- constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI);
- return &*Mov;
-}
-
-MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImmFP(
- Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder) {
-
- unsigned int Op;
- bool IsWide = false;
- if (DstSize == 128) {
- if (Bits.getHiBits(64) != Bits.getLoBits(64))
- return nullptr;
- Op = AArch64::FMOVv4f32_ns;
- IsWide = true;
- } else {
- Op = AArch64::FMOVv2f32_ns;
- }
-
- uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
-
- if (AArch64_AM::isAdvSIMDModImmType11(Val)) {
- Val = AArch64_AM::encodeAdvSIMDModImmType11(Val);
- } else if (IsWide && AArch64_AM::isAdvSIMDModImmType12(Val)) {
- Val = AArch64_AM::encodeAdvSIMDModImmType12(Val);
- Op = AArch64::FMOVv2f64_ns;
- } else
- return nullptr;
-
- auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val);
- constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI);
- return &*Mov;
-}
-
bool AArch64InstructionSelector::selectIndexedExtLoad(
MachineInstr &MI, MachineRegisterInfo &MRI) {
auto &ExtLd = cast<GIndexedAnyExtLoad>(MI);
@@ -5653,103 +5486,108 @@ bool AArch64InstructionSelector::selectIndexedStore(GIndexedStore &I,
}
MachineInstr *
-AArch64InstructionSelector::emitConstantVector(Register Dst, Constant *CV,
- MachineIRBuilder &MIRBuilder,
- MachineRegisterInfo &MRI) {
- LLT DstTy = MRI.getType(Dst);
- unsigned DstSize = DstTy.getSizeInBits();
+AArch64InstructionSelector::emitConstantMOVVector(DstOp Dst, APInt Imm,
+ MachineIRBuilder &MIRBuilder,
+ MachineRegisterInfo &MRI) {
+ unsigned DstSize = Imm.getBitWidth();
assert((DstSize == 64 || DstSize == 128) &&
"Unexpected vector constant size");
- if (CV->isNullValue()) {
- if (DstSize == 128) {
- auto Mov =
- MIRBuilder.buildInstr(AArch64::MOVIv2d_ns, {Dst}, {}).addImm(0);
- constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI);
- return &*Mov;
- }
-
- if (DstSize == 64) {
- auto Mov =
- MIRBuilder
- .buildInstr(AArch64::MOVIv2d_ns, {&AArch64::FPR128RegClass}, {})
- .addImm(0);
- auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {Dst}, {})
- .addReg(Mov.getReg(0), {}, AArch64::dsub);
- RBI.constrainGenericRegister(Dst, AArch64::FPR64RegClass, MRI);
- return &*Copy;
+ SmallVector<AArch64_IMM::ImmInsnModel> Insns;
+ if (AArch64_IMM::expandVectorMOVImm(Imm, &STI, Insns)) {
+ for (AArch64_IMM::ImmInsnModel Insn : Insns) {
+ switch (Insn.Opcode) {
+ case AArch64::FMOVD0: {
+ // Currently emit a MOVIv2d_ns in both cases to match SDAG.
+ auto Mov =
+ MIRBuilder
+ .buildInstr(
+ AArch64::MOVIv2d_ns,
+ {DstSize == 128 ? Dst : DstOp(&AArch64::FPR128RegClass)},
+ {})
+ .addImm(0);
+ constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI);
+ if (DstSize != 128) {
+ Mov = MIB.buildInstr(TargetOpcode::COPY, {Dst}, {})
+ .addReg(Mov.getReg(0), {}, AArch64::dsub);
+ RBI.constrainGenericRegister(Mov.getReg(0), AArch64::FPR64RegClass,
+ MRI);
+ }
+ return &*Mov;
+ }
+ case AArch64::MOVID:
+ case AArch64::MOVIv2d_ns:
+ case AArch64::MOVIv8b_ns:
+ case AArch64::MOVIv16b_ns:
+ case AArch64::FMOVv2f32_ns:
+ case AArch64::FMOVv4f32_ns:
+ case AArch64::FMOVDi:
+ case AArch64::FMOVv2f64_ns: {
+ auto Mov =
+ MIRBuilder.buildInstr(Insn.Opcode, {Dst}, {}).addImm(Insn.Op1);
+ constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI);
+ return &*Mov;
+ }
+ case AArch64::MOVIv2i32:
+ case AArch64::MOVIv4i32:
+ case AArch64::MOVIv4i16:
+ case AArch64::MOVIv8i16:
+ case AArch64::MOVIv2s_msl:
+ case AArch64::MOVIv4s_msl:
+ case AArch64::MVNIv2i32:
+ case AArch64::MVNIv4i32:
+ case AArch64::MVNIv4i16:
+ case AArch64::MVNIv8i16:
+ case AArch64::MVNIv2s_msl:
+ case AArch64::MVNIv4s_msl: {
+ auto Mov = MIRBuilder.buildInstr(Insn.Opcode, {Dst}, {})
+ .addImm(Insn.Op1)
+ .addImm(Insn.Op2);
+ constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI);
+ return &*Mov;
+ }
+ case AArch64::DUPM_ZI: {
+ auto Mov =
+ MIRBuilder.buildInstr(Insn.Opcode, {&AArch64::ZPRRegClass}, {})
+ .addImm(Insn.Op1);
+ constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI);
+ Mov = MIB.buildInstr(TargetOpcode::COPY, {Dst}, {})
+ .addReg(Mov.getReg(0), {},
+ DstSize == 64 ? AArch64::dsub : AArch64::zsub);
+ RBI.constrainGenericRegister(Mov.getReg(0),
+ DstSize == 64 ? AArch64::FPR64RegClass
+ : AArch64::FPR128RegClass,
+ MRI);
+ return &*Mov;
+ }
+ case AArch64::DUP_ZI_D: {
+ auto Mov =
+ MIRBuilder.buildInstr(Insn.Opcode, {&AArch64::ZPRRegClass}, {})
+ .addImm(Insn.Op1)
+ .addImm(Insn.Op2);
+ constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI);
+ Mov = MIB.buildInstr(TargetOpcode::COPY, {Dst}, {})
+ .addReg(Mov.getReg(0), {},
+ DstSize == 64 ? AArch64::dsub : AArch64::zsub);
+ RBI.constrainGenericRegister(Mov.getReg(0),
+ DstSize == 64 ? AArch64::FPR64RegClass
+ : AArch64::FPR128RegClass,
+ MRI);
+ return &*Mov;
+ }
+ default:
+ llvm_unreachable("Unexpected node in expandVectorMOVImm\n");
+ }
}
}
+ return nullptr;
+}
- if (Constant *SplatValue = CV->getSplatValue()) {
- APInt SplatValueAsInt =
- isa<ConstantFP>(SplatValue)
- ? cast<ConstantFP>(SplatValue)->getValueAPF().bitcastToAPInt()
- : SplatValue->getUniqueInteger();
- APInt DefBits = APInt::getSplat(
- DstSize, SplatValueAsInt.trunc(DstTy.getScalarSizeInBits()));
- auto TryMOVIWithBits = [&](APInt DefBits) -> MachineInstr * {
- MachineInstr *NewOp;
- bool Inv = false;
- if ((NewOp = tryAdvSIMDModImm64(Dst, DstSize, DefBits, MIRBuilder)) ||
- (NewOp =
- tryAdvSIMDModImm32(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
- (NewOp =
- tryAdvSIMDModImm321s(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
- (NewOp =
- tryAdvSIMDModImm16(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
- (NewOp = tryAdvSIMDModImm8(Dst, DstSize, DefBits, MIRBuilder)) ||
- (NewOp = tryAdvSIMDModImmFP(Dst, DstSize, DefBits, MIRBuilder)))
- return NewOp;
-
- DefBits = ~DefBits;
- Inv = true;
- if ((NewOp =
- tryAdvSIMDModImm32(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
- (NewOp =
- tryAdvSIMDModImm321s(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
- (NewOp = tryAdvSIMDModImm16(Dst, DstSize, DefBits, MIRBuilder, Inv)))
- return NewOp;
- return nullptr;
- };
-
- if (auto *NewOp = TryMOVIWithBits(DefBits))
- return NewOp;
-
- // See if a fneg of the constant can be materialized with a MOVI, etc
- auto TryWithFNeg = [&](APInt DefBits, int NumBits,
- unsigned NegOpc) -> MachineInstr * {
- // FNegate each sub-element of the constant
- APInt Neg = APInt::getHighBitsSet(NumBits, 1).zext(DstSize);
- APInt NegBits(DstSize, 0);
- unsigned NumElts = DstSize / NumBits;
- for (unsigned i = 0; i < NumElts; i++)
- NegBits |= Neg << (NumBits * i);
- NegBits = DefBits ^ NegBits;
-
- // Try to create the new constants with MOVI, and if so generate a fneg
- // for it.
- if (auto *NewOp = TryMOVIWithBits(NegBits)) {
- Register NewDst = MRI.createVirtualRegister(
- DstSize == 64 ? &AArch64::FPR64RegClass : &AArch64::FPR128RegClass);
- NewOp->getOperand(0).setReg(NewDst);
- return MIRBuilder.buildInstr(NegOpc, {Dst}, {NewDst});
- }
- return nullptr;
- };
- MachineInstr *R;
- if ((R = TryWithFNeg(DefBits, 32,
- DstSize == 64 ? AArch64::FNEGv2f32
- : AArch64::FNEGv4f32)) ||
- (R = TryWithFNeg(DefBits, 64,
- DstSize == 64 ? AArch64::FNEGDr
- : AArch64::FNEGv2f64)) ||
- (STI.hasFullFP16() &&
- (R = TryWithFNeg(DefBits, 16,
- DstSize == 64 ? AArch64::FNEGv4f16
- : AArch64::FNEGv8f16))))
- return R;
- }
+MachineInstr *AArch64InstructionSelector::emitConstantVector(
+ Register Dst, Constant *CV, APInt Imm, MachineIRBuilder &MIRBuilder,
+ MachineRegisterInfo &MRI) {
+ if (auto *MovI = emitConstantMOVVector(Dst, Imm, MIRBuilder, MRI))
+ return MovI;
auto *CPLoad = emitLoadFromConstantPool(CV, MIRBuilder);
if (!CPLoad) {
@@ -5773,12 +5611,15 @@ bool AArch64InstructionSelector::tryOptConstantBuildVec(
// Check if we're building a constant vector, in which case we want to
// generate a constant pool load instead of a vector insert sequence.
SmallVector<Constant *, 16> Csts;
+ APInt Imm(DstSize, 0);
for (unsigned Idx = 1; Idx < I.getNumOperands(); ++Idx) {
Register OpReg = I.getOperand(Idx).getReg();
if (auto AnyConst = getAnyConstantVRegValWithLookThrough(
OpReg, MRI, /*LookThroughInstrs=*/true,
/*LookThroughAnyExt=*/true)) {
MachineInstr *DefMI = MRI.getVRegDef(AnyConst->VReg);
+ Imm |= AnyConst->Value.zext(DstSize).shl((Idx - 1) *
+ DstTy.getScalarSizeInBits());
if (DefMI->getOpcode() == TargetOpcode::G_CONSTANT) {
Csts.emplace_back(
@@ -5796,7 +5637,7 @@ bool AArch64InstructionSelector::tryOptConstantBuildVec(
return false;
}
Constant *CV = ConstantVector::get(Csts);
- if (!emitConstantVector(I.getOperand(0).getReg(), CV, MIB, MRI))
+ if (!emitConstantVector(I.getOperand(0).getReg(), CV, Imm, MIB, MRI))
return false;
I.eraseFromParent();
return true;
@@ -7871,7 +7712,7 @@ AArch64InstructionSelector::selectCVTFixedPointVecBase(
APFloat FVal(0.0);
switch (RegWidth) {
case 16:
- FVal = APFloat(APFloat::IEEEhalf(), CstVal->Value);
+ FVal = APFloat(APFloat::IEEEhalf(), CstVal->Value.trunc(RegWidth));
break;
case 32:
FVal = APFloat(APFloat::IEEEsingle(), CstVal->Value);
@@ -7972,17 +7813,6 @@ void AArch64InstructionSelector::renderFPImm64(MachineInstrBuilder &MIB,
AArch64_AM::getFP64Imm(MI.getOperand(1).getFPImm()->getValueAPF()));
}
-void AArch64InstructionSelector::renderFPImm32SIMDModImmType4(
- MachineInstrBuilder &MIB, const MachineInstr &MI, int OpIdx) const {
- assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
- "Expected G_FCONSTANT");
- MIB.addImm(AArch64_AM::encodeAdvSIMDModImmType4(MI.getOperand(1)
- .getFPImm()
- ->getValueAPF()
- .bitcastToAPInt()
- .getZExtValue()));
-}
-
bool AArch64InstructionSelector::isLoadStoreOfNumBytes(
const MachineInstr &MI, unsigned NumBytes) const {
if (!MI.mayLoadOrStore())
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
index 631c09db7242e..8e5618163d45d 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
@@ -19,6 +19,7 @@
///
//===----------------------------------------------------------------------===//
+#include "AArch64ExpandImm.h"
#include "AArch64TargetMachine.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/GlobalISel/CSEInfo.h"
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
index 2b34a89a1db64..2625b0053570d 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
@@ -1110,6 +1110,67 @@ void applyLowerBuildToInsertVecElt(MachineInstr &MI, MachineRegisterInfo &MRI,
GBuildVec->eraseFromParent();
}
+bool matchConstBuildVectorToFNeg(MachineInstr &MI, MachineRegisterInfo &MRI,
+ const AArch64Subtarget &ST,
+ std::pair<APInt, unsigned> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
+ LLT Ty = MRI.getType(MI.getOperand(0).getReg());
+ // Limit to v2f32, v4f32, f64, v2f64 or v4f16 and v8f16 with fullfp16.
+ if (Ty.getSizeInBits() != 64 && Ty.getSizeInBits() != 128)
+ return false;
+ unsigned EltSize = Ty.getScalarSizeInBits();
+
+ APInt Imm(Ty.getSizeInBits(), 0);
+ for (unsigned Idx = 1; Idx < MI.getNumOperands(); Idx++) {
+ auto C =
+ getAnyConstantVRegValWithLookThrough(MI.getOperand(Idx).getReg(), MRI);
+ if (!C)
+ return false;
+ Imm |= C->Value.trunc(EltSize)
+ .zext(Ty.getSizeInBits())
+ .shl((Idx - 1) * EltSize);
+ }
+
+ SmallVector<AArch64_IMM::ImmInsnModel> Insns;
+ if (expandVectorMOVImm(Imm, &ST, Insns))
+ return false;
+
+ auto Check = [&](APInt &Imm, unsigned Size) {
+ APInt NImm = Imm ^ APInt::getSplat(Imm.getBitWidth(),
+ APInt::getHighBitsSet(Size, 1));
+ SmallVector<AArch64_IMM::ImmInsnModel> Insns;
+ MatchInfo.first = NImm;
+ MatchInfo.second = Size;
+ return expandVectorMOVImm(NImm, &ST, Insns);
+ };
+
+ return Check(Imm, 64) || Check(Imm, 32) ||
+ (ST.hasFullFP16() && Check(Imm, 16));
+}
+
+void applyConstBuildVectorToFNeg(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &B,
+ std::pair<APInt, unsigned> &MatchInfo) {
+ B.setInstrAndDebugLoc(MI);
+ LLT Ty = MRI.getType(MI.getOperand(0).getReg());
+ // Extract the Immediate into chunks of size EltSize
+ unsigned EltSize = MatchInfo.second;
+ unsigned NumLanes = Ty.getSizeInBits() / EltSize;
+ LLT VTy = NumLanes == 1 ? LLT::scalar(EltSize)
+ : LLT::fixed_vector(NumLanes, EltSize);
+ SmallVector<APInt> Imms;
+ for (unsigned I = 0; I < NumLanes; I++)
+ Imms.push_back(MatchInfo.first.extractBits(EltSize, I * EltSize));
+ // Build the new BV and FNeg of it.
+ auto BV = NumLanes == 1 ? B.buildFConstant(VTy, APFloat(APFloat::IEEEdouble(),
+ MatchInfo.first))
+ : B.buildBuildVectorConstant(VTy, Imms);
+ auto FNeg = B.buildFNeg(Ty == VTy ? DstOp(MI.getOperand(0)) : DstOp(VTy), BV);
+ if (Ty != VTy)
+ B.buildBitcast(MI.getOperand(0), FNeg);
+ MI.eraseFromParent();
+}
+
bool matchFormTruncstore(MachineInstr &MI, MachineRegisterInfo &MRI,
Register &SrcReg) {
assert(MI.getOpcode() == TargetOpcode::G_STORE);
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
index 2657a89f9d9cf..482a85cd01bd1 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "AArch64RegisterBankInfo.h"
+#include "AArch64ExpandImm.h"
#include "AArch64RegisterInfo.h"
#include "AArch64Subtarget.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
@@ -379,13 +380,10 @@ static bool preferGPRForFPImm(const MachineInstr &MI,
const APFloat Imm = MI.getOperand(1).getFPImm()->getValueAPF();
const APInt ImmBits = Imm.bitcastToAPInt();
- // Check if we can encode this as a movi. Note, we only have one pattern so
- // far for movis, hence the one check.
- if (Size == 32) {
- uint64_t Val = APInt::getSplat(64, ImmBits).getZExtValue();
- if (AArch64_AM::isAdvSIMDModImmType4(Val))
- return false;
- }
+ // Check if we can encode this as a movi.
+ SmallVector<AArch64_IMM::ImmInsnModel> Insn;
+ if (AArch64_IMM::expandVectorMOVImm(APInt::getSplat(64, ImmBits), &STI, Insn))
+ return false;
// We want to use GPR when the value cannot be encoded as the immediate value
// of a fmov and when it will not result in a constant pool load. As
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-fp16-fconstant.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-fp16-fconstant.mir
index 5b6726d6e5bf3..a6be213319e10 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/select-fp16-fconstant.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-fp16-fconstant.mir
@@ -39,9 +39,9 @@ tracksRegLiveness: true
body: |
bb.0:
; CHECK-LABEL: name: constant_pool_load
- ; CHECK: [[ADRP:%[0-9]+]]:gpr64common = ADRP target-flags(aarch64-page) %const.0
- ; CHECK-NEXT: [[LDRHui:%[0-9]+]]:fpr16 = LDRHui [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) %const.0 :: (load (s16) from constant-pool)
- ; CHECK-NEXT: $h0 = COPY [[LDRHui]]
+ ; CHECK: [[MOVIv4i16_:%[0-9]+]]:fpr64 = MOVIv4i16 11, 0
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr16 = COPY [[MOVIv4i16_]].hsub
+ ; CHECK-NEXT: $h0 = COPY [[COPY]]
; CHECK-NEXT: RET_ReallyLR implicit $h0
%0:fpr(s16) = G_FCONSTANT half 0xH000B
$h0 = COPY %0(s16)
diff --git a/llvm/test/CodeGen/AArch64/arm64-build-vector.ll b/llvm/test/CodeGen/AArch64/arm64-build-vector.ll
index 914f431866cce..ff8f84b1ee51a 100644
--- a/llvm/test/CodeGen/AArch64/arm64-build-vector.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-build-vector.ll
@@ -125,10 +125,9 @@ define <1 x double> @convert_single_fp_vector_constant(i1 %cmp) {
; CHECK-SD-LABEL: convert_single_fp_vector_constant:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: tst w0, #0x1
-; CHECK-SD-NEXT: mov x8, #4607182418800017408 // =0x3ff0000000000000
-; CHECK-SD-NEXT: csetm x9, ne
-; CHECK-SD-NEXT: fmov d0, x8
-; CHECK-SD-NEXT: fmov d1, x9
+; CHECK-SD-NEXT: fmov d0, #1.00000000
+; CHECK-SD-NEXT: csetm x8, ne
+; CHECK-SD-NEXT: fmov d1, x8
; CHECK-SD-NEXT: and v0.8b, v0.8b, v1.8b
; CHECK-SD-NEXT: ret
;
@@ -158,12 +157,19 @@ define <2 x double> @poszero_v2f64(<2 x double> %a) {
}
define <2 x double> @negzero_v2f64(<2 x double> %a) {
-; CHECK-LABEL: negzero_v2f64:
-; CHECK: // %bb.0:
-; CHECK-NEXT: movi v1.2d, #0000000000000000
-; CHECK-NEXT: fneg v1.2d, v1.2d
-; CHECK-NEXT: fmul v0.2d, v0.2d, v1.2d
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: negzero_v2f64:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: movi d1, #0000000000000000
+; CHECK-SD-NEXT: fneg v1.2d, v1.2d
+; CHECK-SD-NEXT: fmul v0.2d, v0.2d, v1.2d
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: negzero_v2f64:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: movi v1.2d, #0000000000000000
+; CHECK-GI-NEXT: fneg v1.2d, v1.2d
+; CHECK-GI-NEXT: fmul v0.2d, v0.2d, v1.2d
+; CHECK-GI-NEXT: ret
%b = fmul <2 x double> %a, <double -0.0, double -0.0>
ret <2 x double> %b
}
diff --git a/llvm/test/CodeGen/AArch64/arm64-fp-imm-size.ll b/llvm/test/CodeGen/AArch64/arm64-fp-imm-size.ll
index 9e187378eea68..036656e694190 100644
--- a/llvm/test/CodeGen/AArch64/arm64-fp-imm-size.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-fp-imm-size.ll
@@ -41,12 +41,8 @@ define float @bar() optsize {
define fp128 @baz() optsize {
; CHECK-SD-LABEL: baz:
; CHECK-SD: ; %bb.0:
-; CHECK-SD-NEXT: Lloh6:
-; CHECK-SD-NEXT: adrp x8, lCPI3_0 at PAGE
-; CHECK-SD-NEXT: Lloh7:
-; CHECK-SD-NEXT: ldr q0, [x8, lCPI3_0 at PAGEOFF]
+; CHECK-SD-NEXT: movi d0, #0000000000000000
; CHECK-SD-NEXT: ret
-; CHECK-SD-NEXT: .loh AdrpLdr Lloh6, Lloh7
;
; CHECK-GI-LABEL: baz:
; CHECK-GI: ; %bb.0:
@@ -58,12 +54,12 @@ define fp128 @baz() optsize {
define double @foo2_pgso() !prof !14 {
; CHECK-SD-LABEL: foo2_pgso:
; CHECK-SD: ; %bb.0:
-; CHECK-SD-NEXT: Lloh8:
+; CHECK-SD-NEXT: Lloh6:
; CHECK-SD-NEXT: adrp x8, lCPI4_0 at PAGE
-; CHECK-SD-NEXT: Lloh9:
+; CHECK-SD-NEXT: Lloh7:
; CHECK-SD-NEXT: ldr d0, [x8, lCPI4_0 at PAGEOFF]
; CHECK-SD-NEXT: ret
-; CHECK-SD-NEXT: .loh AdrpLdr Lloh8, Lloh9
+; CHECK-SD-NEXT: .loh AdrpLdr Lloh6, Lloh7
;
; CHECK-GI-LABEL: foo2_pgso:
; CHECK-GI: ; %bb.0:
@@ -77,12 +73,12 @@ define double @foo2_pgso() !prof !14 {
define float @bar_pgso() !prof !14 {
; CHECK-SD-LABEL: bar_pgso:
; CHECK-SD: ; %bb.0:
-; CHECK-SD-NEXT: Lloh10:
+; CHECK-SD-NEXT: Lloh8:
; CHECK-SD-NEXT: adrp x8, lCPI5_0 at PAGE
-; CHECK-SD-NEXT: Lloh11:
+; CHECK-SD-NEXT: Lloh9:
; CHECK-SD-NEXT: ldr s0, [x8, lCPI5_0 at PAGEOFF]
; CHECK-SD-NEXT: ret
-; CHECK-SD-NEXT: .loh AdrpLdr Lloh10, Lloh11
+; CHECK-SD-NEXT: .loh AdrpLdr Lloh8, Lloh9
;
; CHECK-GI-LABEL: bar_pgso:
; CHECK-GI: ; %bb.0:
diff --git a/llvm/test/CodeGen/AArch64/arm64-fp-imm.ll b/llvm/test/CodeGen/AArch64/arm64-fp-imm.ll
index d7c5f2ae35766..0e4ebfd867a21 100644
--- a/llvm/test/CodeGen/AArch64/arm64-fp-imm.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-fp-imm.ll
@@ -27,12 +27,8 @@ define float @bar() {
define fp128 @baz() {
; CHECK-SD-LABEL: baz:
; CHECK-SD: ; %bb.0:
-; CHECK-SD-NEXT: Lloh2:
-; CHECK-SD-NEXT: adrp x8, lCPI2_0 at PAGE
-; CHECK-SD-NEXT: Lloh3:
-; CHECK-SD-NEXT: ldr q0, [x8, lCPI2_0 at PAGEOFF]
+; CHECK-SD-NEXT: movi d0, #0000000000000000
; CHECK-SD-NEXT: ret
-; CHECK-SD-NEXT: .loh AdrpLdr Lloh2, Lloh3
;
; CHECK-GI-LABEL: baz:
; CHECK-GI: ; %bb.0:
diff --git a/llvm/test/CodeGen/AArch64/arm64-fp128.ll b/llvm/test/CodeGen/AArch64/arm64-fp128.ll
index c4f91c66fb9a6..743bec78190cc 100644
--- a/llvm/test/CodeGen/AArch64/arm64-fp128.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-fp128.ll
@@ -1448,24 +1448,22 @@ define <2 x fp128> @vec_extend_f64(<2 x double> %val) {
define <2 x fp128> @vec_neg_sub(<2 x fp128> %in) {
; CHECK-SD-LABEL: vec_neg_sub:
; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: sub sp, sp, #64
-; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Spill
-; CHECK-SD-NEXT: .cfi_def_cfa_offset 64
+; CHECK-SD-NEXT: sub sp, sp, #48
+; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Spill
+; CHECK-SD-NEXT: .cfi_def_cfa_offset 48
; CHECK-SD-NEXT: .cfi_offset w30, -16
-; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Spill
+; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill
; CHECK-SD-NEXT: mov v1.16b, v0.16b
-; CHECK-SD-NEXT: adrp x8, .LCPI47_0
-; CHECK-SD-NEXT: ldr q0, [x8, :lo12:.LCPI47_0]
-; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill
+; CHECK-SD-NEXT: movi d0, #0000000000000000
; CHECK-SD-NEXT: bl __subtf3
-; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill
-; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload
-; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Reload
+; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill
+; CHECK-SD-NEXT: movi d0, #0000000000000000
+; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload
; CHECK-SD-NEXT: bl __subtf3
; CHECK-SD-NEXT: mov v1.16b, v0.16b
-; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload
-; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Reload
-; CHECK-SD-NEXT: add sp, sp, #64
+; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload
+; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Reload
+; CHECK-SD-NEXT: add sp, sp, #48
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: vec_neg_sub:
diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-aba-abd.ll b/llvm/test/CodeGen/AArch64/arm64-neon-aba-abd.ll
index 062f5de38c45b..1df28931f700c 100644
--- a/llvm/test/CodeGen/AArch64/arm64-neon-aba-abd.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-neon-aba-abd.ll
@@ -206,11 +206,10 @@ define <2 x i32> @test_sabd_v2i32_const() {
;
; CHECK-GI-LABEL: test_sabd_v2i32_const:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: adrp x8, .LCPI19_1
-; CHECK-GI-NEXT: adrp x9, .LCPI19_0
-; CHECK-GI-NEXT: ldr d0, [x8, :lo12:.LCPI19_1]
-; CHECK-GI-NEXT: ldr d1, [x9, :lo12:.LCPI19_0]
-; CHECK-GI-NEXT: sabd v0.2s, v0.2s, v1.2s
+; CHECK-GI-NEXT: movi d0, #0x00ffffffff0000
+; CHECK-GI-NEXT: adrp x8, .LCPI19_0
+; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI19_0]
+; CHECK-GI-NEXT: sabd v0.2s, v1.2s, v0.2s
; CHECK-GI-NEXT: ret
%1 = tail call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(
<2 x i32> <i32 -2147483648, i32 2147450880>,
diff --git a/llvm/test/CodeGen/AArch64/bf16-imm.ll b/llvm/test/CodeGen/AArch64/bf16-imm.ll
index 7383a741c5fc3..49c50dbc09146 100644
--- a/llvm/test/CodeGen/AArch64/bf16-imm.ll
+++ b/llvm/test/CodeGen/AArch64/bf16-imm.ll
@@ -21,8 +21,7 @@ define bfloat @Const1() {
;
; CHECK-NOFP16-LABEL: Const1:
; CHECK-NOFP16: // %bb.0: // %entry
-; CHECK-NOFP16-NEXT: adrp x8, .LCPI1_0
-; CHECK-NOFP16-NEXT: ldr h0, [x8, :lo12:.LCPI1_0]
+; CHECK-NOFP16-NEXT: movi v0.4h, #60, lsl #8
; CHECK-NOFP16-NEXT: ret
entry:
ret bfloat 0xR3C00
@@ -36,8 +35,7 @@ define bfloat @Const2() {
;
; CHECK-NOFP16-LABEL: Const2:
; CHECK-NOFP16: // %bb.0: // %entry
-; CHECK-NOFP16-NEXT: adrp x8, .LCPI2_0
-; CHECK-NOFP16-NEXT: ldr h0, [x8, :lo12:.LCPI2_0]
+; CHECK-NOFP16-NEXT: movi v0.4h, #48, lsl #8
; CHECK-NOFP16-NEXT: ret
entry:
ret bfloat 0xR3000
@@ -118,23 +116,10 @@ entry:
}
define bfloat @Const7() {
-; CHECK-FP16-SD-LABEL: Const7:
-; CHECK-FP16-SD: // %bb.0: // %entry
-; CHECK-FP16-SD-NEXT: mov w8, #20480 // =0x5000
-; CHECK-FP16-SD-NEXT: fmov h0, w8
-; CHECK-FP16-SD-NEXT: ret
-;
-; CHECK-NOFP16-LABEL: Const7:
-; CHECK-NOFP16: // %bb.0: // %entry
-; CHECK-NOFP16-NEXT: adrp x8, .LCPI7_0
-; CHECK-NOFP16-NEXT: ldr h0, [x8, :lo12:.LCPI7_0]
-; CHECK-NOFP16-NEXT: ret
-;
-; CHECK-FP16-GI-LABEL: Const7:
-; CHECK-FP16-GI: // %bb.0: // %entry
-; CHECK-FP16-GI-NEXT: adrp x8, .LCPI7_0
-; CHECK-FP16-GI-NEXT: ldr h0, [x8, :lo12:.LCPI7_0]
-; CHECK-FP16-GI-NEXT: ret
+; CHECK-LABEL: Const7:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: movi v0.4h, #80, lsl #8
+; CHECK-NEXT: ret
entry:
ret bfloat 0xR5000
}
diff --git a/llvm/test/CodeGen/AArch64/bf16-instructions.ll b/llvm/test/CodeGen/AArch64/bf16-instructions.ll
index c965d11ffc2e0..3b34c9a72e803 100644
--- a/llvm/test/CodeGen/AArch64/bf16-instructions.ll
+++ b/llvm/test/CodeGen/AArch64/bf16-instructions.ll
@@ -671,10 +671,9 @@ define void @test_fccmp(bfloat %in, ptr %out) {
; CHECK-CVT-NEXT: movi v1.2s, #69, lsl #24
; CHECK-CVT-NEXT: // kill: def $h0 killed $h0 def $d0
; CHECK-CVT-NEXT: shll v2.4s, v0.4h, #16
-; CHECK-CVT-NEXT: adrp x8, .LCPI29_0
; CHECK-CVT-NEXT: movi v3.2s, #72, lsl #24
; CHECK-CVT-NEXT: fcmp s2, s1
-; CHECK-CVT-NEXT: ldr h1, [x8, :lo12:.LCPI29_0]
+; CHECK-CVT-NEXT: movi v1.4h, #69, lsl #8
; CHECK-CVT-NEXT: fccmp s2, s3, #4, mi
; CHECK-CVT-NEXT: fcsel s0, s0, s1, gt
; CHECK-CVT-NEXT: str h0, [x0]
@@ -685,10 +684,9 @@ define void @test_fccmp(bfloat %in, ptr %out) {
; CHECK-SD-NEXT: movi v1.2s, #69, lsl #24
; CHECK-SD-NEXT: // kill: def $h0 killed $h0 def $d0
; CHECK-SD-NEXT: shll v2.4s, v0.4h, #16
-; CHECK-SD-NEXT: adrp x8, .LCPI29_0
; CHECK-SD-NEXT: movi v3.2s, #72, lsl #24
; CHECK-SD-NEXT: fcmp s2, s1
-; CHECK-SD-NEXT: ldr h1, [x8, :lo12:.LCPI29_0]
+; CHECK-SD-NEXT: movi v1.4h, #69, lsl #8
; CHECK-SD-NEXT: fccmp s2, s3, #4, mi
; CHECK-SD-NEXT: fcsel s0, s0, s1, gt
; CHECK-SD-NEXT: str h0, [x0]
diff --git a/llvm/test/CodeGen/AArch64/clmul-fixed.ll b/llvm/test/CodeGen/AArch64/clmul-fixed.ll
index 70ddde74aafbc..e40ed025a6add 100644
--- a/llvm/test/CodeGen/AArch64/clmul-fixed.ll
+++ b/llvm/test/CodeGen/AArch64/clmul-fixed.ll
@@ -916,7 +916,7 @@ define <2 x i64> @clmul_v2i64_neon(<2 x i64> %x, <2 x i64> %y) {
; CHECK-NEON-NEXT: dup v4.2d, x3
; CHECK-NEON-NEXT: eor v2.16b, v2.16b, v16.16b
; CHECK-NEON-NEXT: and v3.16b, v1.16b, v17.16b
-; CHECK-NEON-NEXT: movi v17.2d, #0000000000000000
+; CHECK-NEON-NEXT: movi d17, #0000000000000000
; CHECK-NEON-NEXT: dup v16.2d, x20
; CHECK-NEON-NEXT: mul x15, x8, x25
; CHECK-NEON-NEXT: mov v5.d[1], x21
@@ -1028,442 +1028,441 @@ define <1 x i64> @clmul_v1i64_neon(<1 x i64> %x, <1 x i64> %y) {
; CHECK-NEON-NEXT: mov w10, #8 // =0x8
; CHECK-NEON-NEXT: fmov d2, x8
; CHECK-NEON-NEXT: mov w8, #1 // =0x1
+; CHECK-NEON-NEXT: mov w11, #16 // =0x10
; CHECK-NEON-NEXT: fmov d3, x8
; CHECK-NEON-NEXT: fmov x8, d0
; CHECK-NEON-NEXT: fmov d0, x9
+; CHECK-NEON-NEXT: mov w14, #64 // =0x40
; CHECK-NEON-NEXT: and v2.8b, v1.8b, v2.8b
-; CHECK-NEON-NEXT: and v3.8b, v1.8b, v3.8b
; CHECK-NEON-NEXT: and v0.8b, v1.8b, v0.8b
+; CHECK-NEON-NEXT: and v3.8b, v1.8b, v3.8b
; CHECK-NEON-NEXT: fmov x9, d2
; CHECK-NEON-NEXT: fmov d2, x10
-; CHECK-NEON-NEXT: mov w10, #16 // =0x10
-; CHECK-NEON-NEXT: mul x14, x8, x9
-; CHECK-NEON-NEXT: fmov x9, d3
-; CHECK-NEON-NEXT: and v2.8b, v1.8b, v2.8b
-; CHECK-NEON-NEXT: mul x15, x8, x9
-; CHECK-NEON-NEXT: fmov x9, d0
-; CHECK-NEON-NEXT: fmov d0, x10
-; CHECK-NEON-NEXT: fmov x10, d2
-; CHECK-NEON-NEXT: fmov d3, x14
-; CHECK-NEON-NEXT: mul x12, x8, x9
-; CHECK-NEON-NEXT: and v0.8b, v1.8b, v0.8b
-; CHECK-NEON-NEXT: mov w9, #32 // =0x20
-; CHECK-NEON-NEXT: fmov d2, x9
-; CHECK-NEON-NEXT: mov w9, #64 // =0x40
-; CHECK-NEON-NEXT: mul x11, x8, x10
-; CHECK-NEON-NEXT: fmov d4, x15
-; CHECK-NEON-NEXT: fmov x10, d0
-; CHECK-NEON-NEXT: fmov d0, x9
+; CHECK-NEON-NEXT: fmov x12, d0
+; CHECK-NEON-NEXT: fmov d0, x11
+; CHECK-NEON-NEXT: fmov x10, d3
; CHECK-NEON-NEXT: and v2.8b, v1.8b, v2.8b
-; CHECK-NEON-NEXT: fmov d5, x12
-; CHECK-NEON-NEXT: eor v3.8b, v4.8b, v3.8b
-; CHECK-NEON-NEXT: mul x9, x8, x10
+; CHECK-NEON-NEXT: mul x9, x8, x9
; CHECK-NEON-NEXT: and v0.8b, v1.8b, v0.8b
-; CHECK-NEON-NEXT: mov w10, #128 // =0x80
+; CHECK-NEON-NEXT: mul x11, x8, x12
+; CHECK-NEON-NEXT: mov w12, #32 // =0x20
; CHECK-NEON-NEXT: fmov x13, d2
-; CHECK-NEON-NEXT: fmov d2, x10
-; CHECK-NEON-NEXT: mov w10, #256 // =0x100
-; CHECK-NEON-NEXT: fmov d6, x11
-; CHECK-NEON-NEXT: fmov x14, d0
-; CHECK-NEON-NEXT: fmov d0, x10
+; CHECK-NEON-NEXT: fmov d2, x12
+; CHECK-NEON-NEXT: mul x10, x8, x10
; CHECK-NEON-NEXT: and v2.8b, v1.8b, v2.8b
+; CHECK-NEON-NEXT: mul x12, x8, x13
+; CHECK-NEON-NEXT: fmov x13, d0
+; CHECK-NEON-NEXT: fmov d0, x14
+; CHECK-NEON-NEXT: fmov x14, d2
; CHECK-NEON-NEXT: mul x13, x8, x13
-; CHECK-NEON-NEXT: mul x10, x8, x14
-; CHECK-NEON-NEXT: mov w14, #512 // =0x200
+; CHECK-NEON-NEXT: fmov d2, x9
; CHECK-NEON-NEXT: and v0.8b, v1.8b, v0.8b
-; CHECK-NEON-NEXT: fmov x15, d2
-; CHECK-NEON-NEXT: fmov d2, x14
-; CHECK-NEON-NEXT: fmov x12, d0
-; CHECK-NEON-NEXT: mul x14, x8, x15
+; CHECK-NEON-NEXT: fmov d3, x10
+; CHECK-NEON-NEXT: mov w10, #128 // =0x80
+; CHECK-NEON-NEXT: fmov d4, x12
+; CHECK-NEON-NEXT: fmov d5, x10
+; CHECK-NEON-NEXT: mov w10, #256 // =0x100
+; CHECK-NEON-NEXT: mul x14, x8, x14
+; CHECK-NEON-NEXT: fmov x9, d0
+; CHECK-NEON-NEXT: fmov d0, x11
+; CHECK-NEON-NEXT: eor v2.8b, v3.8b, v2.8b
+; CHECK-NEON-NEXT: fmov d3, x13
+; CHECK-NEON-NEXT: mul x9, x8, x9
+; CHECK-NEON-NEXT: eor v0.8b, v0.8b, v4.8b
+; CHECK-NEON-NEXT: fmov d4, x14
+; CHECK-NEON-NEXT: eor v0.8b, v2.8b, v0.8b
+; CHECK-NEON-NEXT: fmov d2, x10
+; CHECK-NEON-NEXT: eor v3.8b, v3.8b, v4.8b
+; CHECK-NEON-NEXT: and v4.8b, v1.8b, v5.8b
+; CHECK-NEON-NEXT: fmov d5, x9
+; CHECK-NEON-NEXT: mov w9, #512 // =0x200
; CHECK-NEON-NEXT: and v2.8b, v1.8b, v2.8b
-; CHECK-NEON-NEXT: mov w15, #1024 // =0x400
-; CHECK-NEON-NEXT: fmov d0, x15
-; CHECK-NEON-NEXT: mov w15, #2048 // =0x800
-; CHECK-NEON-NEXT: fmov d7, x10
-; CHECK-NEON-NEXT: mul x12, x8, x12
-; CHECK-NEON-NEXT: mov w10, #16384 // =0x4000
+; CHECK-NEON-NEXT: fmov x10, d4
+; CHECK-NEON-NEXT: fmov d4, x9
+; CHECK-NEON-NEXT: mov w9, #2048 // =0x800
+; CHECK-NEON-NEXT: eor v3.8b, v3.8b, v5.8b
; CHECK-NEON-NEXT: fmov x11, d2
-; CHECK-NEON-NEXT: fmov d2, x15
-; CHECK-NEON-NEXT: mov w15, #4096 // =0x1000
-; CHECK-NEON-NEXT: and v0.8b, v1.8b, v0.8b
-; CHECK-NEON-NEXT: fmov d4, x15
+; CHECK-NEON-NEXT: fmov d2, x9
+; CHECK-NEON-NEXT: mov w9, #4096 // =0x1000
+; CHECK-NEON-NEXT: and v4.8b, v1.8b, v4.8b
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: eor v0.8b, v0.8b, v3.8b
+; CHECK-NEON-NEXT: fmov d3, x9
+; CHECK-NEON-NEXT: mul x11, x8, x11
; CHECK-NEON-NEXT: and v2.8b, v1.8b, v2.8b
+; CHECK-NEON-NEXT: fmov x12, d4
+; CHECK-NEON-NEXT: and v3.8b, v1.8b, v3.8b
+; CHECK-NEON-NEXT: fmov d4, x10
+; CHECK-NEON-NEXT: fmov x10, d2
+; CHECK-NEON-NEXT: mul x9, x8, x12
+; CHECK-NEON-NEXT: mov w12, #1024 // =0x400
+; CHECK-NEON-NEXT: fmov d5, x12
+; CHECK-NEON-NEXT: mov w12, #8192 // =0x2000
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: and v2.8b, v1.8b, v5.8b
+; CHECK-NEON-NEXT: fmov d5, x11
+; CHECK-NEON-NEXT: fmov x11, d3
+; CHECK-NEON-NEXT: fmov d3, x12
+; CHECK-NEON-NEXT: mov w12, #16384 // =0x4000
+; CHECK-NEON-NEXT: fmov x13, d2
; CHECK-NEON-NEXT: mul x11, x8, x11
-; CHECK-NEON-NEXT: fmov x15, d0
-; CHECK-NEON-NEXT: eor v0.8b, v5.8b, v6.8b
-; CHECK-NEON-NEXT: and v4.8b, v1.8b, v4.8b
-; CHECK-NEON-NEXT: fmov d5, x9
-; CHECK-NEON-NEXT: fmov d6, x13
-; CHECK-NEON-NEXT: mov w13, #8192 // =0x2000
-; CHECK-NEON-NEXT: fmov x9, d2
-; CHECK-NEON-NEXT: eor v0.8b, v3.8b, v0.8b
-; CHECK-NEON-NEXT: fmov d3, x10
-; CHECK-NEON-NEXT: mul x15, x8, x15
-; CHECK-NEON-NEXT: eor v2.8b, v5.8b, v6.8b
-; CHECK-NEON-NEXT: fmov d5, x14
-; CHECK-NEON-NEXT: fmov x14, d4
-; CHECK-NEON-NEXT: fmov d4, x13
-; CHECK-NEON-NEXT: fmov d6, x12
-; CHECK-NEON-NEXT: mul x9, x8, x9
+; CHECK-NEON-NEXT: fmov d2, x12
; CHECK-NEON-NEXT: and v3.8b, v1.8b, v3.8b
-; CHECK-NEON-NEXT: mul x12, x8, x14
-; CHECK-NEON-NEXT: eor v2.8b, v2.8b, v7.8b
-; CHECK-NEON-NEXT: and v4.8b, v1.8b, v4.8b
-; CHECK-NEON-NEXT: eor v5.8b, v5.8b, v6.8b
+; CHECK-NEON-NEXT: eor v4.8b, v4.8b, v5.8b
+; CHECK-NEON-NEXT: fmov d5, x10
+; CHECK-NEON-NEXT: mul x12, x8, x13
+; CHECK-NEON-NEXT: mov w13, #32768 // =0x8000
+; CHECK-NEON-NEXT: and v2.8b, v1.8b, v2.8b
+; CHECK-NEON-NEXT: fmov x10, d3
+; CHECK-NEON-NEXT: fmov d3, x13
+; CHECK-NEON-NEXT: mov w13, #65536 // =0x10000
; CHECK-NEON-NEXT: fmov d6, x11
-; CHECK-NEON-NEXT: fmov x11, d3
-; CHECK-NEON-NEXT: eor v0.8b, v0.8b, v2.8b
-; CHECK-NEON-NEXT: fmov x10, d4
-; CHECK-NEON-NEXT: eor v4.8b, v5.8b, v6.8b
+; CHECK-NEON-NEXT: fmov x11, d2
+; CHECK-NEON-NEXT: fmov d2, x13
+; CHECK-NEON-NEXT: mov w13, #131072 // =0x20000
+; CHECK-NEON-NEXT: and v3.8b, v1.8b, v3.8b
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: eor v5.8b, v5.8b, v6.8b
; CHECK-NEON-NEXT: fmov d6, x9
-; CHECK-NEON-NEXT: mov w9, #32768 // =0x8000
-; CHECK-NEON-NEXT: fmov d5, x15
-; CHECK-NEON-NEXT: fmov d7, x12
-; CHECK-NEON-NEXT: fmov d3, x9
-; CHECK-NEON-NEXT: mul x9, x8, x11
-; CHECK-NEON-NEXT: mov w11, #65536 // =0x10000
+; CHECK-NEON-NEXT: and v2.8b, v1.8b, v2.8b
+; CHECK-NEON-NEXT: mul x11, x8, x11
+; CHECK-NEON-NEXT: fmov x14, d3
+; CHECK-NEON-NEXT: fmov d3, x13
+; CHECK-NEON-NEXT: mov w13, #262144 // =0x40000
+; CHECK-NEON-NEXT: eor v4.8b, v4.8b, v6.8b
+; CHECK-NEON-NEXT: fmov d6, x10
+; CHECK-NEON-NEXT: fmov x10, d2
+; CHECK-NEON-NEXT: and v3.8b, v1.8b, v3.8b
+; CHECK-NEON-NEXT: mul x9, x8, x14
+; CHECK-NEON-NEXT: eor v2.8b, v5.8b, v6.8b
+; CHECK-NEON-NEXT: fmov d5, x12
+; CHECK-NEON-NEXT: fmov d6, x11
+; CHECK-NEON-NEXT: fmov x12, d3
+; CHECK-NEON-NEXT: fmov d3, x13
; CHECK-NEON-NEXT: mul x10, x8, x10
-; CHECK-NEON-NEXT: eor v2.8b, v4.8b, v5.8b
-; CHECK-NEON-NEXT: fmov d4, x11
+; CHECK-NEON-NEXT: eor v4.8b, v4.8b, v5.8b
+; CHECK-NEON-NEXT: eor v2.8b, v2.8b, v6.8b
+; CHECK-NEON-NEXT: fmov d6, x9
+; CHECK-NEON-NEXT: mul x11, x8, x12
+; CHECK-NEON-NEXT: mov w12, #524288 // =0x80000
; CHECK-NEON-NEXT: and v3.8b, v1.8b, v3.8b
-; CHECK-NEON-NEXT: eor v6.8b, v6.8b, v7.8b
-; CHECK-NEON-NEXT: and v4.8b, v1.8b, v4.8b
+; CHECK-NEON-NEXT: fmov d5, x12
+; CHECK-NEON-NEXT: eor v0.8b, v0.8b, v4.8b
+; CHECK-NEON-NEXT: eor v2.8b, v2.8b, v6.8b
+; CHECK-NEON-NEXT: fmov d4, x10
+; CHECK-NEON-NEXT: fmov x9, d3
+; CHECK-NEON-NEXT: mov w10, #1048576 // =0x100000
+; CHECK-NEON-NEXT: and v3.8b, v1.8b, v5.8b
+; CHECK-NEON-NEXT: fmov d5, x11
; CHECK-NEON-NEXT: eor v0.8b, v0.8b, v2.8b
+; CHECK-NEON-NEXT: mul x9, x8, x9
; CHECK-NEON-NEXT: fmov x11, d3
-; CHECK-NEON-NEXT: fmov d5, x10
-; CHECK-NEON-NEXT: mov w10, #131072 // =0x20000
; CHECK-NEON-NEXT: fmov d3, x10
+; CHECK-NEON-NEXT: eor v4.8b, v4.8b, v5.8b
; CHECK-NEON-NEXT: mul x10, x8, x11
-; CHECK-NEON-NEXT: mov w11, #262144 // =0x40000
-; CHECK-NEON-NEXT: eor v5.8b, v6.8b, v5.8b
-; CHECK-NEON-NEXT: and v6.8b, v1.8b, v3.8b
+; CHECK-NEON-NEXT: mov w11, #4194304 // =0x400000
+; CHECK-NEON-NEXT: and v3.8b, v1.8b, v3.8b
+; CHECK-NEON-NEXT: fmov d2, x11
+; CHECK-NEON-NEXT: fmov d5, x9
+; CHECK-NEON-NEXT: mov w9, #8388608 // =0x800000
+; CHECK-NEON-NEXT: fmov x11, d3
; CHECK-NEON-NEXT: fmov d3, x9
-; CHECK-NEON-NEXT: fmov x9, d4
+; CHECK-NEON-NEXT: and v2.8b, v1.8b, v2.8b
+; CHECK-NEON-NEXT: eor v4.8b, v4.8b, v5.8b
+; CHECK-NEON-NEXT: fmov d5, x10
+; CHECK-NEON-NEXT: mul x9, x8, x11
+; CHECK-NEON-NEXT: mov w11, #2097152 // =0x200000
+; CHECK-NEON-NEXT: and v3.8b, v1.8b, v3.8b
+; CHECK-NEON-NEXT: fmov x10, d2
+; CHECK-NEON-NEXT: fmov d6, x11
+; CHECK-NEON-NEXT: eor v4.8b, v4.8b, v5.8b
+; CHECK-NEON-NEXT: fmov x12, d3
+; CHECK-NEON-NEXT: mul x11, x8, x10
+; CHECK-NEON-NEXT: mov w10, #16777216 // =0x1000000
+; CHECK-NEON-NEXT: and v2.8b, v1.8b, v6.8b
+; CHECK-NEON-NEXT: fmov d3, x10
+; CHECK-NEON-NEXT: fmov d5, x9
+; CHECK-NEON-NEXT: mul x10, x8, x12
+; CHECK-NEON-NEXT: mov w12, #33554432 // =0x2000000
+; CHECK-NEON-NEXT: fmov x9, d2
+; CHECK-NEON-NEXT: fmov d2, x12
+; CHECK-NEON-NEXT: mov w12, #67108864 // =0x4000000
+; CHECK-NEON-NEXT: and v3.8b, v1.8b, v3.8b
+; CHECK-NEON-NEXT: and v6.8b, v1.8b, v2.8b
+; CHECK-NEON-NEXT: eor v2.8b, v4.8b, v5.8b
; CHECK-NEON-NEXT: fmov d4, x11
-; CHECK-NEON-NEXT: mov w11, #524288 // =0x80000
-; CHECK-NEON-NEXT: fmov x12, d6
-; CHECK-NEON-NEXT: eor v3.8b, v5.8b, v3.8b
-; CHECK-NEON-NEXT: fmov d5, x11
-; CHECK-NEON-NEXT: and v4.8b, v1.8b, v4.8b
+; CHECK-NEON-NEXT: fmov x13, d3
+; CHECK-NEON-NEXT: fmov d3, x12
+; CHECK-NEON-NEXT: movi v5.2s, #128, lsl #24
; CHECK-NEON-NEXT: mul x9, x8, x9
+; CHECK-NEON-NEXT: fmov x12, d6
; CHECK-NEON-NEXT: fmov d6, x10
-; CHECK-NEON-NEXT: mul x11, x8, x12
-; CHECK-NEON-NEXT: mov w12, #1048576 // =0x100000
+; CHECK-NEON-NEXT: mul x11, x8, x13
+; CHECK-NEON-NEXT: mov w13, #536870912 // =0x20000000
+; CHECK-NEON-NEXT: and v3.8b, v1.8b, v3.8b
+; CHECK-NEON-NEXT: fmov d7, x13
+; CHECK-NEON-NEXT: mov w13, #1073741824 // =0x40000000
+; CHECK-NEON-NEXT: fneg d5, d5
+; CHECK-NEON-NEXT: mul x10, x8, x12
+; CHECK-NEON-NEXT: eor v4.8b, v4.8b, v6.8b
+; CHECK-NEON-NEXT: fmov d6, x13
+; CHECK-NEON-NEXT: fmov x12, d3
+; CHECK-NEON-NEXT: mov w13, #134217728 // =0x8000000
+; CHECK-NEON-NEXT: and v3.8b, v1.8b, v7.8b
+; CHECK-NEON-NEXT: and v6.8b, v1.8b, v6.8b
+; CHECK-NEON-NEXT: fmov d7, x11
; CHECK-NEON-NEXT: and v5.8b, v1.8b, v5.8b
-; CHECK-NEON-NEXT: fmov x10, d4
-; CHECK-NEON-NEXT: fmov d4, x12
-; CHECK-NEON-NEXT: mov w12, #4194304 // =0x400000
-; CHECK-NEON-NEXT: eor v2.8b, v3.8b, v6.8b
+; CHECK-NEON-NEXT: mul x12, x8, x12
+; CHECK-NEON-NEXT: fmov x11, d3
+; CHECK-NEON-NEXT: fmov d3, x13
+; CHECK-NEON-NEXT: fmov x13, d6
+; CHECK-NEON-NEXT: eor v4.8b, v4.8b, v7.8b
; CHECK-NEON-NEXT: fmov d7, x9
-; CHECK-NEON-NEXT: fmov x9, d5
-; CHECK-NEON-NEXT: fmov d5, x12
-; CHECK-NEON-NEXT: and v4.8b, v1.8b, v4.8b
-; CHECK-NEON-NEXT: mul x10, x8, x10
-; CHECK-NEON-NEXT: fmov d16, x11
+; CHECK-NEON-NEXT: mov w9, #268435456 // =0x10000000
+; CHECK-NEON-NEXT: fmov x14, d5
+; CHECK-NEON-NEXT: and v3.8b, v1.8b, v3.8b
+; CHECK-NEON-NEXT: fmov d6, x9
+; CHECK-NEON-NEXT: mul x11, x8, x11
+; CHECK-NEON-NEXT: eor v2.8b, v2.8b, v7.8b
+; CHECK-NEON-NEXT: fmov d7, x12
+; CHECK-NEON-NEXT: mul x9, x8, x13
+; CHECK-NEON-NEXT: fmov x13, d3
+; CHECK-NEON-NEXT: and v3.8b, v1.8b, v6.8b
+; CHECK-NEON-NEXT: fmov d6, x10
+; CHECK-NEON-NEXT: mul x14, x8, x14
; CHECK-NEON-NEXT: eor v0.8b, v0.8b, v2.8b
-; CHECK-NEON-NEXT: mul x12, x8, x9
-; CHECK-NEON-NEXT: mov w9, #8388608 // =0x800000
-; CHECK-NEON-NEXT: and v5.8b, v1.8b, v5.8b
-; CHECK-NEON-NEXT: fmov x11, d4
-; CHECK-NEON-NEXT: fmov d4, x9
-; CHECK-NEON-NEXT: eor v7.8b, v7.8b, v16.8b
-; CHECK-NEON-NEXT: fmov x13, d5
-; CHECK-NEON-NEXT: fmov d16, x10
-; CHECK-NEON-NEXT: mul x9, x8, x11
-; CHECK-NEON-NEXT: mov w11, #16777216 // =0x1000000
-; CHECK-NEON-NEXT: and v4.8b, v1.8b, v4.8b
-; CHECK-NEON-NEXT: fmov d5, x11
-; CHECK-NEON-NEXT: mov w11, #2097152 // =0x200000
-; CHECK-NEON-NEXT: fmov d3, x12
-; CHECK-NEON-NEXT: mul x13, x8, x13
-; CHECK-NEON-NEXT: eor v7.8b, v7.8b, v16.8b
-; CHECK-NEON-NEXT: movi v16.2s, #128, lsl #24
-; CHECK-NEON-NEXT: fmov x10, d4
-; CHECK-NEON-NEXT: fmov d4, x11
-; CHECK-NEON-NEXT: and v5.8b, v1.8b, v5.8b
-; CHECK-NEON-NEXT: eor v3.8b, v7.8b, v3.8b
-; CHECK-NEON-NEXT: and v4.8b, v1.8b, v4.8b
-; CHECK-NEON-NEXT: mul x11, x8, x10
-; CHECK-NEON-NEXT: fmov x10, d5
+; CHECK-NEON-NEXT: mul x10, x8, x13
+; CHECK-NEON-NEXT: mov x13, #4294967296 // =0x100000000
+; CHECK-NEON-NEXT: eor v4.8b, v4.8b, v6.8b
; CHECK-NEON-NEXT: fmov d5, x13
-; CHECK-NEON-NEXT: fmov x14, d4
-; CHECK-NEON-NEXT: mul x12, x8, x10
-; CHECK-NEON-NEXT: mov w10, #33554432 // =0x2000000
+; CHECK-NEON-NEXT: fmov x13, d3
+; CHECK-NEON-NEXT: fmov d6, x9
+; CHECK-NEON-NEXT: mov x9, #8589934592 // =0x200000000
+; CHECK-NEON-NEXT: eor v2.8b, v4.8b, v7.8b
+; CHECK-NEON-NEXT: and v3.8b, v1.8b, v5.8b
+; CHECK-NEON-NEXT: fmov d5, x11
+; CHECK-NEON-NEXT: mul x11, x8, x13
; CHECK-NEON-NEXT: fmov d4, x10
-; CHECK-NEON-NEXT: fmov d6, x11
-; CHECK-NEON-NEXT: mov w11, #134217728 // =0x8000000
-; CHECK-NEON-NEXT: mul x10, x8, x14
-; CHECK-NEON-NEXT: mov w14, #67108864 // =0x4000000
-; CHECK-NEON-NEXT: fmov d7, x14
-; CHECK-NEON-NEXT: and v4.8b, v1.8b, v4.8b
+; CHECK-NEON-NEXT: mov x10, #17179869184 // =0x400000000
; CHECK-NEON-NEXT: eor v5.8b, v5.8b, v6.8b
-; CHECK-NEON-NEXT: and v6.8b, v1.8b, v7.8b
-; CHECK-NEON-NEXT: fmov d7, x12
-; CHECK-NEON-NEXT: fmov x12, d4
+; CHECK-NEON-NEXT: fmov d6, x9
+; CHECK-NEON-NEXT: fmov x9, d3
+; CHECK-NEON-NEXT: eor v2.8b, v2.8b, v4.8b
; CHECK-NEON-NEXT: fmov d4, x11
-; CHECK-NEON-NEXT: fmov x13, d6
-; CHECK-NEON-NEXT: eor v5.8b, v5.8b, v7.8b
-; CHECK-NEON-NEXT: fneg d7, d16
-; CHECK-NEON-NEXT: mul x11, x8, x12
-; CHECK-NEON-NEXT: and v4.8b, v1.8b, v4.8b
-; CHECK-NEON-NEXT: mov w12, #536870912 // =0x20000000
-; CHECK-NEON-NEXT: fmov d6, x12
-; CHECK-NEON-NEXT: mul x12, x8, x13
-; CHECK-NEON-NEXT: mov w13, #1073741824 // =0x40000000
-; CHECK-NEON-NEXT: fmov x14, d4
-; CHECK-NEON-NEXT: fmov d4, x13
-; CHECK-NEON-NEXT: and v6.8b, v1.8b, v6.8b
-; CHECK-NEON-NEXT: mul x13, x8, x14
-; CHECK-NEON-NEXT: mov w14, #268435456 // =0x10000000
-; CHECK-NEON-NEXT: and v4.8b, v1.8b, v4.8b
-; CHECK-NEON-NEXT: fmov x15, d6
-; CHECK-NEON-NEXT: fmov d6, x14
-; CHECK-NEON-NEXT: and v2.8b, v1.8b, v6.8b
-; CHECK-NEON-NEXT: fmov d6, x9
-; CHECK-NEON-NEXT: fmov x9, d4
-; CHECK-NEON-NEXT: fmov d4, x11
-; CHECK-NEON-NEXT: mov x11, #4294967296 // =0x100000000
-; CHECK-NEON-NEXT: mul x14, x8, x15
-; CHECK-NEON-NEXT: eor v3.8b, v3.8b, v6.8b
+; CHECK-NEON-NEXT: and v3.8b, v1.8b, v6.8b
; CHECK-NEON-NEXT: mul x9, x8, x9
-; CHECK-NEON-NEXT: fmov d6, x12
-; CHECK-NEON-NEXT: eor v4.8b, v5.8b, v4.8b
-; CHECK-NEON-NEXT: fmov d5, x10
-; CHECK-NEON-NEXT: fmov x10, d2
-; CHECK-NEON-NEXT: and v2.8b, v1.8b, v7.8b
-; CHECK-NEON-NEXT: eor v3.8b, v3.8b, v5.8b
-; CHECK-NEON-NEXT: fmov d5, x11
-; CHECK-NEON-NEXT: mul x10, x8, x10
-; CHECK-NEON-NEXT: fmov x11, d2
-; CHECK-NEON-NEXT: eor v4.8b, v4.8b, v6.8b
-; CHECK-NEON-NEXT: fmov d6, x13
+; CHECK-NEON-NEXT: fmov d6, x14
+; CHECK-NEON-NEXT: eor v2.8b, v2.8b, v4.8b
+; CHECK-NEON-NEXT: fmov x12, d3
+; CHECK-NEON-NEXT: fmov d3, x10
+; CHECK-NEON-NEXT: eor v5.8b, v5.8b, v6.8b
; CHECK-NEON-NEXT: fmov d7, x9
-; CHECK-NEON-NEXT: and v2.8b, v1.8b, v5.8b
-; CHECK-NEON-NEXT: fmov d5, x14
-; CHECK-NEON-NEXT: eor v0.8b, v0.8b, v3.8b
+; CHECK-NEON-NEXT: mov x9, #68719476736 // =0x1000000000
+; CHECK-NEON-NEXT: eor v0.8b, v0.8b, v2.8b
+; CHECK-NEON-NEXT: and v3.8b, v1.8b, v3.8b
+; CHECK-NEON-NEXT: mul x10, x8, x12
+; CHECK-NEON-NEXT: mov x12, #34359738368 // =0x800000000
+; CHECK-NEON-NEXT: fmov d6, x12
+; CHECK-NEON-NEXT: eor v5.8b, v5.8b, v7.8b
+; CHECK-NEON-NEXT: fmov x11, d3
+; CHECK-NEON-NEXT: fmov d3, x9
+; CHECK-NEON-NEXT: and v6.8b, v1.8b, v6.8b
+; CHECK-NEON-NEXT: fmov d7, x10
; CHECK-NEON-NEXT: mul x9, x8, x11
-; CHECK-NEON-NEXT: mov x11, #8589934592 // =0x200000000
-; CHECK-NEON-NEXT: eor v4.8b, v4.8b, v6.8b
-; CHECK-NEON-NEXT: fmov d6, x10
-; CHECK-NEON-NEXT: mov x14, #1152921504606846976 // =0x1000000000000000
-; CHECK-NEON-NEXT: fmov x10, d2
-; CHECK-NEON-NEXT: fmov d2, x11
-; CHECK-NEON-NEXT: mov x11, #17179869184 // =0x400000000
+; CHECK-NEON-NEXT: and v3.8b, v1.8b, v3.8b
+; CHECK-NEON-NEXT: mov x11, #137438953472 // =0x2000000000
+; CHECK-NEON-NEXT: fmov x10, d6
+; CHECK-NEON-NEXT: fmov d6, x11
+; CHECK-NEON-NEXT: mov x11, #274877906944 // =0x4000000000
; CHECK-NEON-NEXT: eor v5.8b, v5.8b, v7.8b
-; CHECK-NEON-NEXT: eor v3.8b, v4.8b, v6.8b
+; CHECK-NEON-NEXT: fmov d7, #2.00000000
+; CHECK-NEON-NEXT: fmov x12, d3
+; CHECK-NEON-NEXT: fmov d3, x11
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: and v6.8b, v1.8b, v6.8b
+; CHECK-NEON-NEXT: fmov d4, x9
+; CHECK-NEON-NEXT: mul x11, x8, x12
+; CHECK-NEON-NEXT: and v3.8b, v1.8b, v3.8b
+; CHECK-NEON-NEXT: mov x12, #549755813888 // =0x8000000000
+; CHECK-NEON-NEXT: fmov x9, d6
+; CHECK-NEON-NEXT: eor v4.8b, v5.8b, v4.8b
+; CHECK-NEON-NEXT: fmov d5, x12
+; CHECK-NEON-NEXT: fmov x12, d3
+; CHECK-NEON-NEXT: mul x9, x8, x9
+; CHECK-NEON-NEXT: and v3.8b, v1.8b, v5.8b
+; CHECK-NEON-NEXT: fmov d5, x10
+; CHECK-NEON-NEXT: mul x10, x8, x12
+; CHECK-NEON-NEXT: mov x12, #1099511627776 // =0x10000000000
+; CHECK-NEON-NEXT: eor v2.8b, v4.8b, v5.8b
+; CHECK-NEON-NEXT: fmov d4, x12
+; CHECK-NEON-NEXT: fmov x12, d3
+; CHECK-NEON-NEXT: fmov d5, x9
+; CHECK-NEON-NEXT: and v3.8b, v1.8b, v4.8b
+; CHECK-NEON-NEXT: fmov d4, x11
+; CHECK-NEON-NEXT: mov x11, #2199023255552 // =0x20000000000
+; CHECK-NEON-NEXT: mul x9, x8, x12
+; CHECK-NEON-NEXT: eor v2.8b, v2.8b, v5.8b
+; CHECK-NEON-NEXT: eor v0.8b, v0.8b, v4.8b
+; CHECK-NEON-NEXT: fmov d4, x10
+; CHECK-NEON-NEXT: fmov x10, d3
+; CHECK-NEON-NEXT: fmov d3, x11
+; CHECK-NEON-NEXT: mov x11, #4398046511104 // =0x40000000000
+; CHECK-NEON-NEXT: eor v0.8b, v0.8b, v4.8b
; CHECK-NEON-NEXT: fmov d4, x11
-; CHECK-NEON-NEXT: and v2.8b, v1.8b, v2.8b
; CHECK-NEON-NEXT: mul x10, x8, x10
-; CHECK-NEON-NEXT: fmov d6, x9
-; CHECK-NEON-NEXT: mov x9, #34359738368 // =0x800000000
+; CHECK-NEON-NEXT: and v3.8b, v1.8b, v3.8b
+; CHECK-NEON-NEXT: fmov d5, x9
+; CHECK-NEON-NEXT: mov x9, #8796093022208 // =0x80000000000
; CHECK-NEON-NEXT: and v4.8b, v1.8b, v4.8b
-; CHECK-NEON-NEXT: eor v0.8b, v0.8b, v3.8b
-; CHECK-NEON-NEXT: fmov x11, d2
-; CHECK-NEON-NEXT: fmov d2, x9
-; CHECK-NEON-NEXT: eor v5.8b, v5.8b, v6.8b
-; CHECK-NEON-NEXT: fmov d6, x10
+; CHECK-NEON-NEXT: fmov x11, d3
+; CHECK-NEON-NEXT: fmov d3, x9
+; CHECK-NEON-NEXT: eor v2.8b, v2.8b, v5.8b
+; CHECK-NEON-NEXT: fmov d5, x10
; CHECK-NEON-NEXT: fmov x10, d4
; CHECK-NEON-NEXT: mul x9, x8, x11
-; CHECK-NEON-NEXT: and v2.8b, v1.8b, v2.8b
-; CHECK-NEON-NEXT: mov x11, #68719476736 // =0x1000000000
+; CHECK-NEON-NEXT: and v3.8b, v1.8b, v3.8b
+; CHECK-NEON-NEXT: mov x11, #17592186044416 // =0x100000000000
; CHECK-NEON-NEXT: fmov d4, x11
-; CHECK-NEON-NEXT: mov x11, #137438953472 // =0x2000000000
+; CHECK-NEON-NEXT: mov x11, #35184372088832 // =0x200000000000
+; CHECK-NEON-NEXT: eor v0.8b, v0.8b, v5.8b
; CHECK-NEON-NEXT: mul x10, x8, x10
-; CHECK-NEON-NEXT: eor v5.8b, v5.8b, v6.8b
-; CHECK-NEON-NEXT: fmov x12, d2
-; CHECK-NEON-NEXT: fmov d2, x11
+; CHECK-NEON-NEXT: fmov x12, d3
+; CHECK-NEON-NEXT: fmov d3, x11
; CHECK-NEON-NEXT: and v4.8b, v1.8b, v4.8b
-; CHECK-NEON-NEXT: fmov d6, x9
+; CHECK-NEON-NEXT: fmov d5, x9
; CHECK-NEON-NEXT: mul x11, x8, x12
-; CHECK-NEON-NEXT: and v2.8b, v1.8b, v2.8b
-; CHECK-NEON-NEXT: mov x12, #274877906944 // =0x4000000000
+; CHECK-NEON-NEXT: and v3.8b, v1.8b, v3.8b
+; CHECK-NEON-NEXT: mov x12, #70368744177664 // =0x400000000000
; CHECK-NEON-NEXT: fmov x9, d4
; CHECK-NEON-NEXT: fmov d4, x12
-; CHECK-NEON-NEXT: mov x12, #549755813888 // =0x8000000000
-; CHECK-NEON-NEXT: eor v5.8b, v5.8b, v6.8b
-; CHECK-NEON-NEXT: fmov d3, x10
-; CHECK-NEON-NEXT: fmov d6, x14
-; CHECK-NEON-NEXT: fmov x13, d2
-; CHECK-NEON-NEXT: fmov d2, x12
-; CHECK-NEON-NEXT: mov x14, #2305843009213693952 // =0x2000000000000000
+; CHECK-NEON-NEXT: mov x12, #140737488355328 // =0x800000000000
+; CHECK-NEON-NEXT: eor v2.8b, v2.8b, v5.8b
+; CHECK-NEON-NEXT: fmov d5, x10
+; CHECK-NEON-NEXT: fmov x13, d3
+; CHECK-NEON-NEXT: fmov d3, x12
; CHECK-NEON-NEXT: and v4.8b, v1.8b, v4.8b
; CHECK-NEON-NEXT: mul x9, x8, x9
-; CHECK-NEON-NEXT: eor v3.8b, v5.8b, v3.8b
+; CHECK-NEON-NEXT: eor v0.8b, v0.8b, v5.8b
; CHECK-NEON-NEXT: fmov d5, x11
; CHECK-NEON-NEXT: mul x12, x8, x13
-; CHECK-NEON-NEXT: and v2.8b, v1.8b, v2.8b
-; CHECK-NEON-NEXT: mov x13, #1099511627776 // =0x10000000000
+; CHECK-NEON-NEXT: mov x13, #281474976710656 // =0x1000000000000
+; CHECK-NEON-NEXT: and v3.8b, v1.8b, v3.8b
; CHECK-NEON-NEXT: fmov x10, d4
; CHECK-NEON-NEXT: fmov d4, x13
-; CHECK-NEON-NEXT: eor v3.8b, v3.8b, v5.8b
-; CHECK-NEON-NEXT: fmov x11, d2
-; CHECK-NEON-NEXT: and v2.8b, v1.8b, v4.8b
-; CHECK-NEON-NEXT: fmov d4, x9
+; CHECK-NEON-NEXT: mov x13, #562949953421312 // =0x2000000000000
+; CHECK-NEON-NEXT: eor v2.8b, v2.8b, v5.8b
+; CHECK-NEON-NEXT: fmov x11, d3
+; CHECK-NEON-NEXT: fmov d3, x13
+; CHECK-NEON-NEXT: fmov d5, x9
+; CHECK-NEON-NEXT: and v4.8b, v1.8b, v4.8b
; CHECK-NEON-NEXT: mul x10, x8, x10
-; CHECK-NEON-NEXT: mul x9, x8, x11
-; CHECK-NEON-NEXT: mov x11, #2199023255552 // =0x20000000000
-; CHECK-NEON-NEXT: eor v0.8b, v0.8b, v4.8b
-; CHECK-NEON-NEXT: fmov d4, x11
-; CHECK-NEON-NEXT: fmov x11, d2
-; CHECK-NEON-NEXT: fmov d5, x10
-; CHECK-NEON-NEXT: and v2.8b, v1.8b, v4.8b
-; CHECK-NEON-NEXT: fmov d4, x12
-; CHECK-NEON-NEXT: mul x10, x8, x11
-; CHECK-NEON-NEXT: mov x11, #4398046511104 // =0x40000000000
+; CHECK-NEON-NEXT: mul x11, x8, x11
+; CHECK-NEON-NEXT: and v3.8b, v1.8b, v3.8b
; CHECK-NEON-NEXT: eor v0.8b, v0.8b, v5.8b
-; CHECK-NEON-NEXT: eor v3.8b, v3.8b, v4.8b
+; CHECK-NEON-NEXT: fmov x9, d4
+; CHECK-NEON-NEXT: fmov d4, x12
+; CHECK-NEON-NEXT: mov x12, #1125899906842624 // =0x4000000000000
+; CHECK-NEON-NEXT: eor v2.8b, v2.8b, v4.8b
+; CHECK-NEON-NEXT: fmov d4, x12
+; CHECK-NEON-NEXT: mul x9, x8, x9
+; CHECK-NEON-NEXT: fmov x12, d3
+; CHECK-NEON-NEXT: fmov d5, x11
+; CHECK-NEON-NEXT: mov x11, #2251799813685248 // =0x8000000000000
+; CHECK-NEON-NEXT: and v3.8b, v1.8b, v4.8b
+; CHECK-NEON-NEXT: fmov d4, x10
+; CHECK-NEON-NEXT: mul x10, x8, x12
+; CHECK-NEON-NEXT: eor v2.8b, v2.8b, v5.8b
+; CHECK-NEON-NEXT: eor v0.8b, v0.8b, v4.8b
; CHECK-NEON-NEXT: fmov d4, x9
-; CHECK-NEON-NEXT: fmov x9, d2
-; CHECK-NEON-NEXT: fmov d2, x11
-; CHECK-NEON-NEXT: mov x11, #8796093022208 // =0x80000000000
-; CHECK-NEON-NEXT: fmov d5, x10
-; CHECK-NEON-NEXT: mov x10, #17592186044416 // =0x100000000000
-; CHECK-NEON-NEXT: eor v3.8b, v3.8b, v4.8b
+; CHECK-NEON-NEXT: fmov x9, d3
+; CHECK-NEON-NEXT: fmov d3, x11
+; CHECK-NEON-NEXT: mov x11, #4503599627370496 // =0x10000000000000
+; CHECK-NEON-NEXT: eor v0.8b, v0.8b, v4.8b
; CHECK-NEON-NEXT: fmov d4, x11
; CHECK-NEON-NEXT: mul x9, x8, x9
-; CHECK-NEON-NEXT: and v2.8b, v1.8b, v2.8b
-; CHECK-NEON-NEXT: eor v0.8b, v0.8b, v5.8b
+; CHECK-NEON-NEXT: and v3.8b, v1.8b, v3.8b
+; CHECK-NEON-NEXT: fmov d5, x10
+; CHECK-NEON-NEXT: mov x10, #9007199254740992 // =0x20000000000000
; CHECK-NEON-NEXT: and v4.8b, v1.8b, v4.8b
-; CHECK-NEON-NEXT: fmov x11, d2
-; CHECK-NEON-NEXT: fmov d2, x10
+; CHECK-NEON-NEXT: fmov x11, d3
+; CHECK-NEON-NEXT: fmov d3, x10
+; CHECK-NEON-NEXT: eor v2.8b, v2.8b, v5.8b
; CHECK-NEON-NEXT: fmov d5, x9
; CHECK-NEON-NEXT: fmov x9, d4
; CHECK-NEON-NEXT: mul x10, x8, x11
-; CHECK-NEON-NEXT: and v2.8b, v1.8b, v2.8b
-; CHECK-NEON-NEXT: mov x11, #35184372088832 // =0x200000000000
+; CHECK-NEON-NEXT: and v3.8b, v1.8b, v3.8b
+; CHECK-NEON-NEXT: mov x11, #18014398509481984 // =0x40000000000000
; CHECK-NEON-NEXT: fmov d4, x11
-; CHECK-NEON-NEXT: mov x11, #70368744177664 // =0x400000000000
-; CHECK-NEON-NEXT: eor v3.8b, v3.8b, v5.8b
+; CHECK-NEON-NEXT: mov x11, #36028797018963968 // =0x80000000000000
+; CHECK-NEON-NEXT: eor v0.8b, v0.8b, v5.8b
; CHECK-NEON-NEXT: mul x9, x8, x9
-; CHECK-NEON-NEXT: fmov x12, d2
-; CHECK-NEON-NEXT: fmov d2, x11
+; CHECK-NEON-NEXT: fmov x12, d3
+; CHECK-NEON-NEXT: fmov d3, x11
; CHECK-NEON-NEXT: and v4.8b, v1.8b, v4.8b
; CHECK-NEON-NEXT: fmov d5, x10
; CHECK-NEON-NEXT: mul x11, x8, x12
-; CHECK-NEON-NEXT: and v2.8b, v1.8b, v2.8b
-; CHECK-NEON-NEXT: mov x12, #140737488355328 // =0x800000000000
+; CHECK-NEON-NEXT: and v3.8b, v1.8b, v3.8b
+; CHECK-NEON-NEXT: mov x12, #72057594037927936 // =0x100000000000000
; CHECK-NEON-NEXT: fmov x10, d4
; CHECK-NEON-NEXT: fmov d4, x12
-; CHECK-NEON-NEXT: mov x12, #281474976710656 // =0x1000000000000
-; CHECK-NEON-NEXT: eor v0.8b, v0.8b, v5.8b
+; CHECK-NEON-NEXT: mov x12, #144115188075855872 // =0x200000000000000
+; CHECK-NEON-NEXT: eor v2.8b, v2.8b, v5.8b
; CHECK-NEON-NEXT: fmov d5, x9
-; CHECK-NEON-NEXT: fmov x13, d2
-; CHECK-NEON-NEXT: fmov d2, x12
-; CHECK-NEON-NEXT: and v4.8b, v1.8b, v4.8b
-; CHECK-NEON-NEXT: mul x10, x8, x10
-; CHECK-NEON-NEXT: eor v3.8b, v3.8b, v5.8b
-; CHECK-NEON-NEXT: fmov d5, x11
-; CHECK-NEON-NEXT: mul x12, x8, x13
-; CHECK-NEON-NEXT: mov x13, #562949953421312 // =0x2000000000000
-; CHECK-NEON-NEXT: and v2.8b, v1.8b, v2.8b
-; CHECK-NEON-NEXT: fmov x9, d4
-; CHECK-NEON-NEXT: fmov d4, x13
-; CHECK-NEON-NEXT: mov x13, #1125899906842624 // =0x4000000000000
-; CHECK-NEON-NEXT: eor v0.8b, v0.8b, v5.8b
-; CHECK-NEON-NEXT: fmov x11, d2
-; CHECK-NEON-NEXT: fmov d2, x13
-; CHECK-NEON-NEXT: fmov d5, x10
+; CHECK-NEON-NEXT: fmov x13, d3
+; CHECK-NEON-NEXT: fmov d3, x12
; CHECK-NEON-NEXT: and v4.8b, v1.8b, v4.8b
-; CHECK-NEON-NEXT: mul x9, x8, x9
-; CHECK-NEON-NEXT: and v2.8b, v1.8b, v2.8b
-; CHECK-NEON-NEXT: mul x11, x8, x11
-; CHECK-NEON-NEXT: eor v3.8b, v3.8b, v5.8b
-; CHECK-NEON-NEXT: fmov x10, d4
-; CHECK-NEON-NEXT: fmov d4, x12
-; CHECK-NEON-NEXT: mov x12, #2251799813685248 // =0x8000000000000
-; CHECK-NEON-NEXT: eor v0.8b, v0.8b, v4.8b
-; CHECK-NEON-NEXT: fmov d4, x12
; CHECK-NEON-NEXT: mul x10, x8, x10
-; CHECK-NEON-NEXT: fmov x12, d2
-; CHECK-NEON-NEXT: fmov d5, x11
-; CHECK-NEON-NEXT: mov x11, #4503599627370496 // =0x10000000000000
-; CHECK-NEON-NEXT: and v2.8b, v1.8b, v4.8b
-; CHECK-NEON-NEXT: fmov d4, x9
-; CHECK-NEON-NEXT: mul x9, x8, x12
; CHECK-NEON-NEXT: eor v0.8b, v0.8b, v5.8b
-; CHECK-NEON-NEXT: eor v3.8b, v3.8b, v4.8b
-; CHECK-NEON-NEXT: fmov d4, x10
-; CHECK-NEON-NEXT: fmov x10, d2
-; CHECK-NEON-NEXT: fmov d2, x11
-; CHECK-NEON-NEXT: mov x11, #9007199254740992 // =0x20000000000000
-; CHECK-NEON-NEXT: eor v3.8b, v3.8b, v4.8b
+; CHECK-NEON-NEXT: mul x9, x8, x13
+; CHECK-NEON-NEXT: mov x13, #288230376151711744 // =0x400000000000000
+; CHECK-NEON-NEXT: and v3.8b, v1.8b, v3.8b
+; CHECK-NEON-NEXT: fmov x12, d4
; CHECK-NEON-NEXT: fmov d4, x11
+; CHECK-NEON-NEXT: fmov d5, x13
+; CHECK-NEON-NEXT: mov x13, #576460752303423488 // =0x800000000000000
+; CHECK-NEON-NEXT: fmov d6, x10
+; CHECK-NEON-NEXT: mul x11, x8, x12
+; CHECK-NEON-NEXT: fmov x12, d3
+; CHECK-NEON-NEXT: and v3.8b, v1.8b, v5.8b
+; CHECK-NEON-NEXT: eor v2.8b, v2.8b, v4.8b
+; CHECK-NEON-NEXT: fmov d4, x13
+; CHECK-NEON-NEXT: mov x13, #1152921504606846976 // =0x1000000000000000
+; CHECK-NEON-NEXT: movi d5, #0000000000000000
+; CHECK-NEON-NEXT: eor v0.8b, v0.8b, v6.8b
+; CHECK-NEON-NEXT: fmov x10, d3
+; CHECK-NEON-NEXT: fmov d3, x13
+; CHECK-NEON-NEXT: mov x13, #2305843009213693952 // =0x2000000000000000
+; CHECK-NEON-NEXT: and v4.8b, v1.8b, v4.8b
+; CHECK-NEON-NEXT: mul x12, x8, x12
+; CHECK-NEON-NEXT: and v3.8b, v1.8b, v3.8b
+; CHECK-NEON-NEXT: fneg d5, d5
; CHECK-NEON-NEXT: mul x10, x8, x10
-; CHECK-NEON-NEXT: and v2.8b, v1.8b, v2.8b
-; CHECK-NEON-NEXT: fmov d5, x9
-; CHECK-NEON-NEXT: mov x9, #18014398509481984 // =0x40000000000000
+; CHECK-NEON-NEXT: fmov x14, d4
+; CHECK-NEON-NEXT: fmov d4, x13
+; CHECK-NEON-NEXT: mul x13, x8, x14
; CHECK-NEON-NEXT: and v4.8b, v1.8b, v4.8b
-; CHECK-NEON-NEXT: fmov x11, d2
-; CHECK-NEON-NEXT: fmov d2, x9
-; CHECK-NEON-NEXT: eor v0.8b, v0.8b, v5.8b
-; CHECK-NEON-NEXT: fmov d5, x10
-; CHECK-NEON-NEXT: fmov x10, d4
-; CHECK-NEON-NEXT: mul x9, x8, x11
-; CHECK-NEON-NEXT: and v2.8b, v1.8b, v2.8b
-; CHECK-NEON-NEXT: mov x11, #36028797018963968 // =0x80000000000000
+; CHECK-NEON-NEXT: fmov x14, d3
+; CHECK-NEON-NEXT: fmov d3, x9
+; CHECK-NEON-NEXT: mul x9, x8, x14
+; CHECK-NEON-NEXT: fmov x14, d4
; CHECK-NEON-NEXT: fmov d4, x11
-; CHECK-NEON-NEXT: eor v3.8b, v3.8b, v5.8b
-; CHECK-NEON-NEXT: mul x12, x8, x10
-; CHECK-NEON-NEXT: mov x10, #72057594037927936 // =0x100000000000000
-; CHECK-NEON-NEXT: fmov x11, d2
-; CHECK-NEON-NEXT: fmov d2, x10
-; CHECK-NEON-NEXT: and v4.8b, v1.8b, v4.8b
-; CHECK-NEON-NEXT: fmov d5, x9
-; CHECK-NEON-NEXT: mul x10, x8, x11
-; CHECK-NEON-NEXT: and v2.8b, v1.8b, v2.8b
-; CHECK-NEON-NEXT: mov x11, #144115188075855872 // =0x200000000000000
-; CHECK-NEON-NEXT: fmov x9, d4
-; CHECK-NEON-NEXT: fmov d4, x11
-; CHECK-NEON-NEXT: mov x11, #288230376151711744 // =0x400000000000000
-; CHECK-NEON-NEXT: eor v0.8b, v0.8b, v5.8b
+; CHECK-NEON-NEXT: eor v2.8b, v2.8b, v3.8b
+; CHECK-NEON-NEXT: and v3.8b, v1.8b, v7.8b
+; CHECK-NEON-NEXT: and v1.8b, v1.8b, v5.8b
; CHECK-NEON-NEXT: fmov d5, x12
-; CHECK-NEON-NEXT: fmov x13, d2
-; CHECK-NEON-NEXT: fmov d2, x11
-; CHECK-NEON-NEXT: and v4.8b, v1.8b, v4.8b
-; CHECK-NEON-NEXT: mul x9, x8, x9
-; CHECK-NEON-NEXT: eor v3.8b, v3.8b, v5.8b
-; CHECK-NEON-NEXT: fmov d5, x10
-; CHECK-NEON-NEXT: mul x11, x8, x13
-; CHECK-NEON-NEXT: mov x13, #576460752303423488 // =0x800000000000000
-; CHECK-NEON-NEXT: and v2.8b, v1.8b, v2.8b
-; CHECK-NEON-NEXT: fmov x12, d4
-; CHECK-NEON-NEXT: fmov d4, x13
-; CHECK-NEON-NEXT: eor v0.8b, v0.8b, v5.8b
-; CHECK-NEON-NEXT: fmov d5, x14
-; CHECK-NEON-NEXT: mov x14, #4611686018427387904 // =0x4000000000000000
-; CHECK-NEON-NEXT: fmov x13, d2
-; CHECK-NEON-NEXT: movi d2, #0000000000000000
-; CHECK-NEON-NEXT: and v4.8b, v1.8b, v4.8b
-; CHECK-NEON-NEXT: mul x12, x8, x12
-; CHECK-NEON-NEXT: and v5.8b, v1.8b, v5.8b
-; CHECK-NEON-NEXT: mul x10, x8, x13
-; CHECK-NEON-NEXT: fmov x13, d4
-; CHECK-NEON-NEXT: and v4.8b, v1.8b, v6.8b
-; CHECK-NEON-NEXT: fneg d2, d2
-; CHECK-NEON-NEXT: fmov d6, x9
-; CHECK-NEON-NEXT: fmov x9, d4
-; CHECK-NEON-NEXT: fmov d4, x14
-; CHECK-NEON-NEXT: mul x13, x8, x13
-; CHECK-NEON-NEXT: eor v3.8b, v3.8b, v6.8b
-; CHECK-NEON-NEXT: fmov x14, d5
-; CHECK-NEON-NEXT: fmov d5, x11
-; CHECK-NEON-NEXT: and v4.8b, v1.8b, v4.8b
-; CHECK-NEON-NEXT: and v1.8b, v1.8b, v2.8b
-; CHECK-NEON-NEXT: fmov d2, x12
-; CHECK-NEON-NEXT: mul x9, x8, x9
-; CHECK-NEON-NEXT: eor v0.8b, v0.8b, v5.8b
; CHECK-NEON-NEXT: mul x11, x8, x14
-; CHECK-NEON-NEXT: eor v2.8b, v3.8b, v2.8b
-; CHECK-NEON-NEXT: fmov x12, d4
+; CHECK-NEON-NEXT: eor v0.8b, v0.8b, v4.8b
+; CHECK-NEON-NEXT: fmov x12, d3
; CHECK-NEON-NEXT: fmov d3, x10
; CHECK-NEON-NEXT: fmov x10, d1
+; CHECK-NEON-NEXT: eor v2.8b, v2.8b, v5.8b
; CHECK-NEON-NEXT: fmov d1, x13
; CHECK-NEON-NEXT: mul x12, x8, x12
; CHECK-NEON-NEXT: eor v0.8b, v0.8b, v3.8b
; CHECK-NEON-NEXT: eor v1.8b, v2.8b, v1.8b
; CHECK-NEON-NEXT: fmov d2, x9
-; CHECK-NEON-NEXT: mul x8, x8, x10
; CHECK-NEON-NEXT: fmov d3, x11
+; CHECK-NEON-NEXT: mul x8, x8, x10
; CHECK-NEON-NEXT: eor v0.8b, v0.8b, v2.8b
; CHECK-NEON-NEXT: eor v1.8b, v1.8b, v3.8b
; CHECK-NEON-NEXT: fmov d2, x12
@@ -6452,7 +6451,7 @@ define <2 x i64> @clmulr_v2i64_neon(<2 x i64> %a, <2 x i64> %b) nounwind {
; CHECK-NEON-NEXT: eor v2.16b, v3.16b, v2.16b
; CHECK-NEON-NEXT: mov v17.d[1], x0
; CHECK-NEON-NEXT: eor v5.16b, v6.16b, v21.16b
-; CHECK-NEON-NEXT: movi v6.2d, #0000000000000000
+; CHECK-NEON-NEXT: movi d6, #0000000000000000
; CHECK-NEON-NEXT: mul x0, x9, x2
; CHECK-NEON-NEXT: fmov x2, d16
; CHECK-NEON-NEXT: fmov v16.2d, #2.00000000
@@ -6565,7 +6564,9 @@ define <1 x i64> @clmulr_v1i64_neon(<1 x i64> %a, <1 x i64> %b) nounwind {
; CHECK-NEON-NEXT: mov w12, #32 // =0x20
; CHECK-NEON-NEXT: mov w13, #64 // =0x40
; CHECK-NEON-NEXT: mov w14, #128 // =0x80
+; CHECK-NEON-NEXT: mov w16, #512 // =0x200
; CHECK-NEON-NEXT: mov w15, #256 // =0x100
+; CHECK-NEON-NEXT: movi d18, #0000000000000000
; CHECK-NEON-NEXT: rbit v0.8b, v1.8b
; CHECK-NEON-NEXT: fmov d1, x8
; CHECK-NEON-NEXT: mov w8, #1 // =0x1
@@ -6582,11 +6583,8 @@ define <1 x i64> @clmulr_v1i64_neon(<1 x i64> %a, <1 x i64> %b) nounwind {
; CHECK-NEON-NEXT: fmov x10, d3
; CHECK-NEON-NEXT: fmov d3, x11
; CHECK-NEON-NEXT: and v2.8b, v0.8b, v2.8b
-; CHECK-NEON-NEXT: fmov d4, x14
-; CHECK-NEON-NEXT: mov w14, #512 // =0x200
; CHECK-NEON-NEXT: fmov x11, d1
; CHECK-NEON-NEXT: mul x9, x8, x9
-; CHECK-NEON-NEXT: fmov d5, x14
; CHECK-NEON-NEXT: and v1.8b, v0.8b, v3.8b
; CHECK-NEON-NEXT: fmov d3, x12
; CHECK-NEON-NEXT: fmov x12, d2
@@ -6595,412 +6593,412 @@ define <1 x i64> @clmulr_v1i64_neon(<1 x i64> %a, <1 x i64> %b) nounwind {
; CHECK-NEON-NEXT: fmov d3, x13
; CHECK-NEON-NEXT: fmov x13, d1
; CHECK-NEON-NEXT: mul x11, x8, x11
+; CHECK-NEON-NEXT: fmov d4, x9
; CHECK-NEON-NEXT: and v1.8b, v0.8b, v3.8b
+; CHECK-NEON-NEXT: fmov d3, x14
; CHECK-NEON-NEXT: mul x12, x8, x12
-; CHECK-NEON-NEXT: fmov d3, x15
; CHECK-NEON-NEXT: fmov x14, d2
-; CHECK-NEON-NEXT: and v2.8b, v0.8b, v4.8b
-; CHECK-NEON-NEXT: fmov d4, x9
-; CHECK-NEON-NEXT: mov w15, #1024 // =0x400
; CHECK-NEON-NEXT: mul x13, x8, x13
-; CHECK-NEON-NEXT: fmov x9, d1
-; CHECK-NEON-NEXT: and v1.8b, v0.8b, v5.8b
-; CHECK-NEON-NEXT: fmov d5, x10
-; CHECK-NEON-NEXT: and v3.8b, v0.8b, v3.8b
-; CHECK-NEON-NEXT: fmov x10, d2
-; CHECK-NEON-NEXT: fmov d2, x11
-; CHECK-NEON-NEXT: fmov d6, x12
+; CHECK-NEON-NEXT: and v2.8b, v0.8b, v3.8b
+; CHECK-NEON-NEXT: fmov d3, x16
+; CHECK-NEON-NEXT: fmov x17, d1
+; CHECK-NEON-NEXT: fmov d1, x15
+; CHECK-NEON-NEXT: fmov d5, x11
; CHECK-NEON-NEXT: mul x14, x8, x14
-; CHECK-NEON-NEXT: mov w11, #2048 // =0x800
-; CHECK-NEON-NEXT: eor v4.8b, v5.8b, v4.8b
-; CHECK-NEON-NEXT: fmov d5, x15
-; CHECK-NEON-NEXT: fmov x12, d3
-; CHECK-NEON-NEXT: mul x10, x8, x10
-; CHECK-NEON-NEXT: fmov d3, x11
-; CHECK-NEON-NEXT: eor v2.8b, v2.8b, v6.8b
-; CHECK-NEON-NEXT: and v5.8b, v0.8b, v5.8b
+; CHECK-NEON-NEXT: fmov d6, x12
+; CHECK-NEON-NEXT: mov w12, #4096 // =0x1000
+; CHECK-NEON-NEXT: fmov x9, d2
+; CHECK-NEON-NEXT: and v2.8b, v0.8b, v3.8b
+; CHECK-NEON-NEXT: fmov d3, x10
+; CHECK-NEON-NEXT: and v1.8b, v0.8b, v1.8b
+; CHECK-NEON-NEXT: mov w10, #2048 // =0x800
+; CHECK-NEON-NEXT: mul x15, x8, x17
+; CHECK-NEON-NEXT: eor v3.8b, v3.8b, v4.8b
+; CHECK-NEON-NEXT: eor v4.8b, v5.8b, v6.8b
+; CHECK-NEON-NEXT: fmov d5, x10
+; CHECK-NEON-NEXT: fmov x10, d1
+; CHECK-NEON-NEXT: fmov x11, d2
+; CHECK-NEON-NEXT: fmov d2, x13
; CHECK-NEON-NEXT: mul x9, x8, x9
-; CHECK-NEON-NEXT: and v3.8b, v0.8b, v3.8b
-; CHECK-NEON-NEXT: mul x11, x8, x12
+; CHECK-NEON-NEXT: mov w13, #8192 // =0x2000
+; CHECK-NEON-NEXT: eor v3.8b, v3.8b, v4.8b
+; CHECK-NEON-NEXT: fmov d4, x12
+; CHECK-NEON-NEXT: and v1.8b, v0.8b, v5.8b
+; CHECK-NEON-NEXT: fmov d5, x14
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: mov w12, #1024 // =0x400
+; CHECK-NEON-NEXT: and v4.8b, v0.8b, v4.8b
+; CHECK-NEON-NEXT: mul x11, x8, x11
+; CHECK-NEON-NEXT: eor v2.8b, v2.8b, v5.8b
+; CHECK-NEON-NEXT: fmov d5, x12
; CHECK-NEON-NEXT: fmov x12, d1
-; CHECK-NEON-NEXT: fmov d1, x13
-; CHECK-NEON-NEXT: mov w13, #4096 // =0x1000
-; CHECK-NEON-NEXT: eor v2.8b, v4.8b, v2.8b
-; CHECK-NEON-NEXT: fmov d4, x14
-; CHECK-NEON-NEXT: fmov x14, d5
-; CHECK-NEON-NEXT: fmov d5, x13
+; CHECK-NEON-NEXT: fmov d6, x9
+; CHECK-NEON-NEXT: fmov x14, d4
+; CHECK-NEON-NEXT: fmov d4, x13
+; CHECK-NEON-NEXT: fmov d7, x10
+; CHECK-NEON-NEXT: and v1.8b, v0.8b, v5.8b
+; CHECK-NEON-NEXT: mov w13, #16384 // =0x4000
; CHECK-NEON-NEXT: mul x12, x8, x12
-; CHECK-NEON-NEXT: eor v1.8b, v1.8b, v4.8b
-; CHECK-NEON-NEXT: and v4.8b, v0.8b, v5.8b
-; CHECK-NEON-NEXT: fmov d5, x10
+; CHECK-NEON-NEXT: fmov d5, x15
+; CHECK-NEON-NEXT: and v4.8b, v0.8b, v4.8b
+; CHECK-NEON-NEXT: mul x9, x8, x14
+; CHECK-NEON-NEXT: mov w14, #32768 // =0x8000
+; CHECK-NEON-NEXT: fmov x10, d1
+; CHECK-NEON-NEXT: eor v1.8b, v6.8b, v7.8b
+; CHECK-NEON-NEXT: fmov d6, x13
+; CHECK-NEON-NEXT: fmov d7, x14
+; CHECK-NEON-NEXT: eor v2.8b, v2.8b, v5.8b
+; CHECK-NEON-NEXT: fmov x13, d4
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: and v4.8b, v0.8b, v6.8b
; CHECK-NEON-NEXT: fmov d6, x11
-; CHECK-NEON-NEXT: mov w11, #8192 // =0x2000
-; CHECK-NEON-NEXT: fmov x10, d3
-; CHECK-NEON-NEXT: mul x13, x8, x14
-; CHECK-NEON-NEXT: eor v3.8b, v5.8b, v6.8b
-; CHECK-NEON-NEXT: fmov d5, x9
+; CHECK-NEON-NEXT: and v5.8b, v0.8b, v7.8b
+; CHECK-NEON-NEXT: fmov d7, x9
+; CHECK-NEON-NEXT: eor v2.8b, v3.8b, v2.8b
+; CHECK-NEON-NEXT: mul x11, x8, x13
; CHECK-NEON-NEXT: fmov x9, d4
-; CHECK-NEON-NEXT: fmov d4, x11
-; CHECK-NEON-NEXT: mov w11, #16384 // =0x4000
+; CHECK-NEON-NEXT: eor v1.8b, v1.8b, v6.8b
; CHECK-NEON-NEXT: fmov d6, x12
+; CHECK-NEON-NEXT: fmov d4, x10
+; CHECK-NEON-NEXT: fmov x10, d5
+; CHECK-NEON-NEXT: mul x9, x8, x9
+; CHECK-NEON-NEXT: eor v6.8b, v6.8b, v7.8b
+; CHECK-NEON-NEXT: fmov d3, x11
+; CHECK-NEON-NEXT: mov w11, #65536 // =0x10000
+; CHECK-NEON-NEXT: eor v1.8b, v1.8b, v4.8b
+; CHECK-NEON-NEXT: fmov d4, x11
; CHECK-NEON-NEXT: mul x10, x8, x10
-; CHECK-NEON-NEXT: eor v1.8b, v1.8b, v5.8b
+; CHECK-NEON-NEXT: mov w11, #131072 // =0x20000
+; CHECK-NEON-NEXT: eor v3.8b, v6.8b, v3.8b
; CHECK-NEON-NEXT: fmov d5, x11
-; CHECK-NEON-NEXT: mul x9, x8, x9
; CHECK-NEON-NEXT: and v4.8b, v0.8b, v4.8b
-; CHECK-NEON-NEXT: eor v3.8b, v3.8b, v6.8b
-; CHECK-NEON-NEXT: fmov d6, x13
; CHECK-NEON-NEXT: eor v1.8b, v2.8b, v1.8b
-; CHECK-NEON-NEXT: fmov x11, d4
-; CHECK-NEON-NEXT: and v4.8b, v0.8b, v5.8b
-; CHECK-NEON-NEXT: eor v2.8b, v3.8b, v6.8b
-; CHECK-NEON-NEXT: fmov d3, x10
-; CHECK-NEON-NEXT: fmov d5, x9
-; CHECK-NEON-NEXT: mov w9, #32768 // =0x8000
+; CHECK-NEON-NEXT: fmov d2, x9
+; CHECK-NEON-NEXT: mov w9, #262144 // =0x40000
+; CHECK-NEON-NEXT: eor v2.8b, v3.8b, v2.8b
+; CHECK-NEON-NEXT: and v3.8b, v0.8b, v5.8b
+; CHECK-NEON-NEXT: fmov d5, x10
; CHECK-NEON-NEXT: fmov x10, d4
; CHECK-NEON-NEXT: fmov d4, x9
-; CHECK-NEON-NEXT: mul x11, x8, x11
-; CHECK-NEON-NEXT: eor v1.8b, v1.8b, v2.8b
-; CHECK-NEON-NEXT: eor v3.8b, v3.8b, v5.8b
-; CHECK-NEON-NEXT: mul x9, x8, x10
+; CHECK-NEON-NEXT: eor v2.8b, v2.8b, v5.8b
+; CHECK-NEON-NEXT: fmov x11, d3
; CHECK-NEON-NEXT: and v4.8b, v0.8b, v4.8b
-; CHECK-NEON-NEXT: mov w10, #65536 // =0x10000
+; CHECK-NEON-NEXT: mul x9, x8, x10
+; CHECK-NEON-NEXT: mov w10, #524288 // =0x80000
+; CHECK-NEON-NEXT: fmov d3, x10
+; CHECK-NEON-NEXT: mov w10, #1048576 // =0x100000
+; CHECK-NEON-NEXT: eor v1.8b, v1.8b, v2.8b
+; CHECK-NEON-NEXT: mul x11, x8, x11
+; CHECK-NEON-NEXT: fmov x12, d4
+; CHECK-NEON-NEXT: fmov d4, x10
+; CHECK-NEON-NEXT: mov w10, #2097152 // =0x200000
; CHECK-NEON-NEXT: fmov d2, x10
-; CHECK-NEON-NEXT: mov w10, #131072 // =0x20000
+; CHECK-NEON-NEXT: and v3.8b, v0.8b, v3.8b
+; CHECK-NEON-NEXT: mov w10, #4194304 // =0x400000
+; CHECK-NEON-NEXT: and v4.8b, v0.8b, v4.8b
+; CHECK-NEON-NEXT: mul x12, x8, x12
+; CHECK-NEON-NEXT: and v2.8b, v0.8b, v2.8b
+; CHECK-NEON-NEXT: fmov x13, d3
+; CHECK-NEON-NEXT: fmov d3, x9
; CHECK-NEON-NEXT: fmov d5, x11
-; CHECK-NEON-NEXT: fmov x11, d4
+; CHECK-NEON-NEXT: fmov x9, d4
; CHECK-NEON-NEXT: fmov d4, x10
-; CHECK-NEON-NEXT: and v2.8b, v0.8b, v2.8b
+; CHECK-NEON-NEXT: fmov x11, d2
+; CHECK-NEON-NEXT: mul x13, x8, x13
; CHECK-NEON-NEXT: eor v3.8b, v3.8b, v5.8b
-; CHECK-NEON-NEXT: fmov d5, x9
-; CHECK-NEON-NEXT: mul x10, x8, x11
-; CHECK-NEON-NEXT: mov w11, #262144 // =0x40000
+; CHECK-NEON-NEXT: fmov d5, x12
+; CHECK-NEON-NEXT: mul x10, x8, x9
; CHECK-NEON-NEXT: and v4.8b, v0.8b, v4.8b
-; CHECK-NEON-NEXT: fmov x9, d2
-; CHECK-NEON-NEXT: fmov d2, x11
-; CHECK-NEON-NEXT: mov w11, #524288 // =0x80000
+; CHECK-NEON-NEXT: mov w9, #8388608 // =0x800000
+; CHECK-NEON-NEXT: fmov d2, x9
+; CHECK-NEON-NEXT: mul x9, x8, x11
+; CHECK-NEON-NEXT: mov w11, #16777216 // =0x1000000
; CHECK-NEON-NEXT: eor v3.8b, v3.8b, v5.8b
-; CHECK-NEON-NEXT: fmov x12, d4
+; CHECK-NEON-NEXT: fmov x14, d4
; CHECK-NEON-NEXT: fmov d4, x11
-; CHECK-NEON-NEXT: mov w11, #1048576 // =0x100000
+; CHECK-NEON-NEXT: fmov d5, x13
+; CHECK-NEON-NEXT: and v2.8b, v0.8b, v2.8b
+; CHECK-NEON-NEXT: mul x11, x8, x14
+; CHECK-NEON-NEXT: and v4.8b, v0.8b, v4.8b
+; CHECK-NEON-NEXT: mov w14, #33554432 // =0x2000000
+; CHECK-NEON-NEXT: fmov x12, d2
+; CHECK-NEON-NEXT: fmov d2, x14
+; CHECK-NEON-NEXT: mov w14, #67108864 // =0x4000000
+; CHECK-NEON-NEXT: eor v3.8b, v3.8b, v5.8b
+; CHECK-NEON-NEXT: fmov x15, d4
+; CHECK-NEON-NEXT: fmov d4, x14
; CHECK-NEON-NEXT: and v2.8b, v0.8b, v2.8b
-; CHECK-NEON-NEXT: mul x9, x8, x9
-; CHECK-NEON-NEXT: fmov d5, x10
; CHECK-NEON-NEXT: mul x12, x8, x12
+; CHECK-NEON-NEXT: fmov d6, x11
+; CHECK-NEON-NEXT: mul x13, x8, x15
+; CHECK-NEON-NEXT: mov w15, #134217728 // =0x8000000
; CHECK-NEON-NEXT: and v4.8b, v0.8b, v4.8b
-; CHECK-NEON-NEXT: fmov x10, d2
-; CHECK-NEON-NEXT: fmov d2, x11
-; CHECK-NEON-NEXT: fmov x11, d4
-; CHECK-NEON-NEXT: fmov d4, x9
-; CHECK-NEON-NEXT: mul x13, x8, x10
-; CHECK-NEON-NEXT: mov w10, #2097152 // =0x200000
-; CHECK-NEON-NEXT: and v6.8b, v0.8b, v2.8b
-; CHECK-NEON-NEXT: eor v2.8b, v3.8b, v5.8b
-; CHECK-NEON-NEXT: fmov d3, x10
-; CHECK-NEON-NEXT: mov w10, #4194304 // =0x400000
-; CHECK-NEON-NEXT: fmov d5, x10
-; CHECK-NEON-NEXT: mul x11, x8, x11
-; CHECK-NEON-NEXT: fmov x9, d6
-; CHECK-NEON-NEXT: fmov d6, x12
-; CHECK-NEON-NEXT: and v3.8b, v0.8b, v3.8b
-; CHECK-NEON-NEXT: eor v1.8b, v1.8b, v2.8b
+; CHECK-NEON-NEXT: fmov x14, d2
+; CHECK-NEON-NEXT: fmov d7, x15
+; CHECK-NEON-NEXT: mov w15, #536870912 // =0x20000000
+; CHECK-NEON-NEXT: fmov d5, x15
+; CHECK-NEON-NEXT: mov w15, #1073741824 // =0x40000000
+; CHECK-NEON-NEXT: movi v2.2s, #128, lsl #24
+; CHECK-NEON-NEXT: mul x11, x8, x14
+; CHECK-NEON-NEXT: fmov x14, d4
+; CHECK-NEON-NEXT: and v4.8b, v0.8b, v7.8b
+; CHECK-NEON-NEXT: fmov d7, x12
; CHECK-NEON-NEXT: and v5.8b, v0.8b, v5.8b
-; CHECK-NEON-NEXT: mul x10, x8, x9
-; CHECK-NEON-NEXT: mov w9, #8388608 // =0x800000
-; CHECK-NEON-NEXT: eor v4.8b, v4.8b, v6.8b
-; CHECK-NEON-NEXT: fmov x12, d3
-; CHECK-NEON-NEXT: fmov d3, x9
-; CHECK-NEON-NEXT: fmov d6, x13
-; CHECK-NEON-NEXT: fmov x14, d5
-; CHECK-NEON-NEXT: mul x9, x8, x12
-; CHECK-NEON-NEXT: mov w12, #16777216 // =0x1000000
-; CHECK-NEON-NEXT: and v3.8b, v0.8b, v3.8b
-; CHECK-NEON-NEXT: fmov d5, x12
-; CHECK-NEON-NEXT: mov w12, #33554432 // =0x2000000
+; CHECK-NEON-NEXT: fmov d16, x13
+; CHECK-NEON-NEXT: fneg d2, d2
+; CHECK-NEON-NEXT: mov w13, #268435456 // =0x10000000
+; CHECK-NEON-NEXT: fmov x12, d4
+; CHECK-NEON-NEXT: fmov d4, x15
+; CHECK-NEON-NEXT: mov x15, #4294967296 // =0x100000000
+; CHECK-NEON-NEXT: eor v6.8b, v6.8b, v7.8b
+; CHECK-NEON-NEXT: fmov d7, x10
+; CHECK-NEON-NEXT: fmov x10, d5
; CHECK-NEON-NEXT: mul x14, x8, x14
-; CHECK-NEON-NEXT: fmov x13, d3
-; CHECK-NEON-NEXT: fmov d3, x12
-; CHECK-NEON-NEXT: mov w12, #67108864 // =0x4000000
-; CHECK-NEON-NEXT: and v5.8b, v0.8b, v5.8b
-; CHECK-NEON-NEXT: and v7.8b, v0.8b, v3.8b
-; CHECK-NEON-NEXT: eor v3.8b, v4.8b, v6.8b
-; CHECK-NEON-NEXT: fmov d4, x12
-; CHECK-NEON-NEXT: fmov x15, d5
-; CHECK-NEON-NEXT: mul x13, x8, x13
-; CHECK-NEON-NEXT: fmov d5, x11
; CHECK-NEON-NEXT: and v4.8b, v0.8b, v4.8b
-; CHECK-NEON-NEXT: fmov x11, d7
-; CHECK-NEON-NEXT: fmov d7, x14
-; CHECK-NEON-NEXT: mul x12, x8, x15
-; CHECK-NEON-NEXT: mov w15, #134217728 // =0x8000000
-; CHECK-NEON-NEXT: eor v3.8b, v3.8b, v5.8b
+; CHECK-NEON-NEXT: and v2.8b, v0.8b, v2.8b
+; CHECK-NEON-NEXT: eor v5.8b, v6.8b, v16.8b
+; CHECK-NEON-NEXT: fmov d6, x13
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: eor v3.8b, v3.8b, v7.8b
+; CHECK-NEON-NEXT: fmov x13, d4
+; CHECK-NEON-NEXT: mul x12, x8, x12
+; CHECK-NEON-NEXT: and v4.8b, v0.8b, v6.8b
; CHECK-NEON-NEXT: fmov d6, x15
-; CHECK-NEON-NEXT: mov w15, #536870912 // =0x20000000
-; CHECK-NEON-NEXT: fmov x14, d4
-; CHECK-NEON-NEXT: fmov d16, x13
-; CHECK-NEON-NEXT: fmov d17, x15
-; CHECK-NEON-NEXT: movi v4.2s, #128, lsl #24
-; CHECK-NEON-NEXT: mov w15, #1073741824 // =0x40000000
-; CHECK-NEON-NEXT: mul x11, x8, x11
+; CHECK-NEON-NEXT: fmov x15, d2
+; CHECK-NEON-NEXT: fmov d2, x11
+; CHECK-NEON-NEXT: mul x13, x8, x13
; CHECK-NEON-NEXT: and v6.8b, v0.8b, v6.8b
-; CHECK-NEON-NEXT: mul x13, x8, x14
-; CHECK-NEON-NEXT: eor v7.8b, v7.8b, v16.8b
-; CHECK-NEON-NEXT: fmov d16, x15
-; CHECK-NEON-NEXT: mov w15, #268435456 // =0x10000000
-; CHECK-NEON-NEXT: fmov x14, d6
-; CHECK-NEON-NEXT: and v6.8b, v0.8b, v17.8b
-; CHECK-NEON-NEXT: fneg d4, d4
-; CHECK-NEON-NEXT: and v16.8b, v0.8b, v16.8b
-; CHECK-NEON-NEXT: fmov d5, x15
-; CHECK-NEON-NEXT: fmov x15, d6
-; CHECK-NEON-NEXT: fmov d6, x12
-; CHECK-NEON-NEXT: mul x14, x8, x14
-; CHECK-NEON-NEXT: and v5.8b, v0.8b, v5.8b
-; CHECK-NEON-NEXT: and v4.8b, v0.8b, v4.8b
-; CHECK-NEON-NEXT: mul x12, x8, x15
-; CHECK-NEON-NEXT: eor v2.8b, v7.8b, v6.8b
-; CHECK-NEON-NEXT: fmov x15, d16
+; CHECK-NEON-NEXT: eor v2.8b, v5.8b, v2.8b
+; CHECK-NEON-NEXT: fmov d5, x14
+; CHECK-NEON-NEXT: fmov x14, d4
+; CHECK-NEON-NEXT: fmov d4, x10
+; CHECK-NEON-NEXT: mov x10, #8589934592 // =0x200000000
+; CHECK-NEON-NEXT: mul x11, x8, x15
+; CHECK-NEON-NEXT: fmov d7, x13
+; CHECK-NEON-NEXT: fmov x13, d6
; CHECK-NEON-NEXT: fmov d6, x10
-; CHECK-NEON-NEXT: mul x10, x8, x15
-; CHECK-NEON-NEXT: mov x15, #4294967296 // =0x100000000
-; CHECK-NEON-NEXT: eor v3.8b, v3.8b, v6.8b
-; CHECK-NEON-NEXT: fmov d6, x11
-; CHECK-NEON-NEXT: fmov x11, d5
-; CHECK-NEON-NEXT: fmov d5, x9
-; CHECK-NEON-NEXT: fmov x9, d4
-; CHECK-NEON-NEXT: fmov d4, x15
-; CHECK-NEON-NEXT: fmov d7, x12
-; CHECK-NEON-NEXT: mov x15, #281474976710656 // =0x1000000000000
-; CHECK-NEON-NEXT: eor v2.8b, v2.8b, v6.8b
-; CHECK-NEON-NEXT: fmov d6, x13
-; CHECK-NEON-NEXT: mov x13, #8589934592 // =0x200000000
-; CHECK-NEON-NEXT: mul x9, x8, x9
-; CHECK-NEON-NEXT: and v4.8b, v0.8b, v4.8b
-; CHECK-NEON-NEXT: fmov d17, x13
-; CHECK-NEON-NEXT: fmov d16, x10
-; CHECK-NEON-NEXT: eor v3.8b, v3.8b, v5.8b
-; CHECK-NEON-NEXT: mov x13, #549755813888 // =0x8000000000
-; CHECK-NEON-NEXT: mul x11, x8, x11
-; CHECK-NEON-NEXT: eor v2.8b, v2.8b, v6.8b
-; CHECK-NEON-NEXT: fmov d6, x14
-; CHECK-NEON-NEXT: fmov x10, d4
-; CHECK-NEON-NEXT: and v4.8b, v0.8b, v17.8b
-; CHECK-NEON-NEXT: mov x14, #17592186044416 // =0x100000000000
-; CHECK-NEON-NEXT: eor v7.8b, v7.8b, v16.8b
-; CHECK-NEON-NEXT: eor v1.8b, v1.8b, v3.8b
-; CHECK-NEON-NEXT: eor v2.8b, v2.8b, v6.8b
-; CHECK-NEON-NEXT: fmov d6, x9
+; CHECK-NEON-NEXT: mul x14, x8, x14
+; CHECK-NEON-NEXT: eor v2.8b, v2.8b, v5.8b
+; CHECK-NEON-NEXT: fmov d5, x12
+; CHECK-NEON-NEXT: mov x12, #1099511627776 // =0x10000000000
+; CHECK-NEON-NEXT: eor v4.8b, v4.8b, v7.8b
+; CHECK-NEON-NEXT: fmov d7, x9
; CHECK-NEON-NEXT: mov x9, #17179869184 // =0x400000000
-; CHECK-NEON-NEXT: mul x10, x8, x10
-; CHECK-NEON-NEXT: fmov d5, x11
-; CHECK-NEON-NEXT: fmov x11, d4
-; CHECK-NEON-NEXT: fmov d4, x9
-; CHECK-NEON-NEXT: eor v6.8b, v7.8b, v6.8b
-; CHECK-NEON-NEXT: mul x9, x8, x11
-; CHECK-NEON-NEXT: and v4.8b, v0.8b, v4.8b
-; CHECK-NEON-NEXT: mov x11, #34359738368 // =0x800000000
-; CHECK-NEON-NEXT: fmov d3, x11
+; CHECK-NEON-NEXT: mul x10, x8, x13
+; CHECK-NEON-NEXT: and v6.8b, v0.8b, v6.8b
+; CHECK-NEON-NEXT: fmov d17, x9
+; CHECK-NEON-NEXT: fmov d16, x11
; CHECK-NEON-NEXT: eor v2.8b, v2.8b, v5.8b
-; CHECK-NEON-NEXT: fmov d5, x10
-; CHECK-NEON-NEXT: mov x10, #137438953472 // =0x2000000000
-; CHECK-NEON-NEXT: fmov x11, d4
-; CHECK-NEON-NEXT: fmov d4, x10
-; CHECK-NEON-NEXT: and v3.8b, v0.8b, v3.8b
-; CHECK-NEON-NEXT: eor v5.8b, v6.8b, v5.8b
-; CHECK-NEON-NEXT: eor v1.8b, v1.8b, v2.8b
-; CHECK-NEON-NEXT: fmov d6, x9
+; CHECK-NEON-NEXT: eor v3.8b, v3.8b, v7.8b
+; CHECK-NEON-NEXT: fmov d5, x14
+; CHECK-NEON-NEXT: mov x14, #562949953421312 // =0x2000000000000
+; CHECK-NEON-NEXT: fmov x9, d6
+; CHECK-NEON-NEXT: and v6.8b, v0.8b, v17.8b
+; CHECK-NEON-NEXT: eor v4.8b, v4.8b, v16.8b
+; CHECK-NEON-NEXT: fmov d7, x10
+; CHECK-NEON-NEXT: mov x10, #34359738368 // =0x800000000
+; CHECK-NEON-NEXT: eor v1.8b, v1.8b, v3.8b
+; CHECK-NEON-NEXT: mul x9, x8, x9
+; CHECK-NEON-NEXT: fmov x11, d6
+; CHECK-NEON-NEXT: fmov d6, x10
+; CHECK-NEON-NEXT: eor v2.8b, v2.8b, v5.8b
+; CHECK-NEON-NEXT: eor v4.8b, v4.8b, v7.8b
; CHECK-NEON-NEXT: mul x10, x8, x11
-; CHECK-NEON-NEXT: mov x11, #274877906944 // =0x4000000000
-; CHECK-NEON-NEXT: and v4.8b, v0.8b, v4.8b
-; CHECK-NEON-NEXT: fmov x9, d3
+; CHECK-NEON-NEXT: and v6.8b, v0.8b, v6.8b
+; CHECK-NEON-NEXT: mov x11, #137438953472 // =0x2000000000
; CHECK-NEON-NEXT: fmov d3, x11
-; CHECK-NEON-NEXT: mov x11, #68719476736 // =0x1000000000
-; CHECK-NEON-NEXT: eor v5.8b, v5.8b, v6.8b
-; CHECK-NEON-NEXT: fmov d6, x13
-; CHECK-NEON-NEXT: mov x13, #1099511627776 // =0x10000000000
-; CHECK-NEON-NEXT: fmov x12, d4
-; CHECK-NEON-NEXT: fmov d4, x11
+; CHECK-NEON-NEXT: eor v1.8b, v1.8b, v2.8b
+; CHECK-NEON-NEXT: fmov d5, x9
+; CHECK-NEON-NEXT: mov x9, #274877906944 // =0x4000000000
+; CHECK-NEON-NEXT: fmov x11, d6
; CHECK-NEON-NEXT: and v3.8b, v0.8b, v3.8b
-; CHECK-NEON-NEXT: mul x9, x8, x9
-; CHECK-NEON-NEXT: fmov d7, x10
-; CHECK-NEON-NEXT: mul x11, x8, x12
-; CHECK-NEON-NEXT: and v4.8b, v0.8b, v4.8b
-; CHECK-NEON-NEXT: fmov x12, d3
-; CHECK-NEON-NEXT: and v3.8b, v0.8b, v6.8b
-; CHECK-NEON-NEXT: fmov d6, x13
-; CHECK-NEON-NEXT: eor v2.8b, v5.8b, v7.8b
-; CHECK-NEON-NEXT: fmov d7, x14
-; CHECK-NEON-NEXT: mov x14, #35184372088832 // =0x200000000000
-; CHECK-NEON-NEXT: mul x12, x8, x12
-; CHECK-NEON-NEXT: and v6.8b, v0.8b, v6.8b
+; CHECK-NEON-NEXT: eor v4.8b, v4.8b, v5.8b
+; CHECK-NEON-NEXT: fmov d5, x9
+; CHECK-NEON-NEXT: fmov d6, x10
+; CHECK-NEON-NEXT: mul x9, x8, x11
+; CHECK-NEON-NEXT: mov x11, #549755813888 // =0x8000000000
; CHECK-NEON-NEXT: fmov x10, d3
-; CHECK-NEON-NEXT: fmov d3, x9
-; CHECK-NEON-NEXT: fmov x9, d4
-; CHECK-NEON-NEXT: fmov d4, x11
-; CHECK-NEON-NEXT: mov x11, #2199023255552 // =0x20000000000
-; CHECK-NEON-NEXT: mul x13, x8, x10
-; CHECK-NEON-NEXT: fmov x10, d6
-; CHECK-NEON-NEXT: fmov d6, x11
-; CHECK-NEON-NEXT: mov x11, #4398046511104 // =0x40000000000
-; CHECK-NEON-NEXT: eor v2.8b, v2.8b, v3.8b
-; CHECK-NEON-NEXT: fmov d5, x12
; CHECK-NEON-NEXT: fmov d3, x11
-; CHECK-NEON-NEXT: mul x12, x8, x10
-; CHECK-NEON-NEXT: eor v4.8b, v4.8b, v5.8b
-; CHECK-NEON-NEXT: and v5.8b, v0.8b, v6.8b
-; CHECK-NEON-NEXT: mul x10, x8, x9
-; CHECK-NEON-NEXT: fmov d6, x13
+; CHECK-NEON-NEXT: mov x11, #68719476736 // =0x1000000000
+; CHECK-NEON-NEXT: and v5.8b, v0.8b, v5.8b
+; CHECK-NEON-NEXT: eor v4.8b, v4.8b, v6.8b
+; CHECK-NEON-NEXT: fmov d6, x11
; CHECK-NEON-NEXT: and v3.8b, v0.8b, v3.8b
-; CHECK-NEON-NEXT: mov x9, #8796093022208 // =0x80000000000
+; CHECK-NEON-NEXT: mul x10, x8, x10
; CHECK-NEON-NEXT: fmov x11, d5
-; CHECK-NEON-NEXT: fmov d5, x9
-; CHECK-NEON-NEXT: eor v4.8b, v4.8b, v6.8b
-; CHECK-NEON-NEXT: fmov d6, x12
-; CHECK-NEON-NEXT: fmov x12, d3
-; CHECK-NEON-NEXT: mul x9, x8, x11
+; CHECK-NEON-NEXT: and v5.8b, v0.8b, v6.8b
+; CHECK-NEON-NEXT: fmov d2, x9
+; CHECK-NEON-NEXT: fmov x9, d3
+; CHECK-NEON-NEXT: fmov d3, x12
+; CHECK-NEON-NEXT: mov x12, #2199023255552 // =0x20000000000
+; CHECK-NEON-NEXT: mul x11, x8, x11
+; CHECK-NEON-NEXT: eor v2.8b, v4.8b, v2.8b
+; CHECK-NEON-NEXT: fmov d4, x10
+; CHECK-NEON-NEXT: mov x10, #4398046511104 // =0x40000000000
+; CHECK-NEON-NEXT: mul x13, x8, x9
+; CHECK-NEON-NEXT: fmov x9, d5
+; CHECK-NEON-NEXT: and v3.8b, v0.8b, v3.8b
+; CHECK-NEON-NEXT: fmov d5, x12
+; CHECK-NEON-NEXT: fmov d6, x11
+; CHECK-NEON-NEXT: fmov x11, d3
+; CHECK-NEON-NEXT: fmov d3, x10
; CHECK-NEON-NEXT: and v5.8b, v0.8b, v5.8b
+; CHECK-NEON-NEXT: mul x9, x8, x9
+; CHECK-NEON-NEXT: mul x10, x8, x11
+; CHECK-NEON-NEXT: and v3.8b, v0.8b, v3.8b
; CHECK-NEON-NEXT: mov x11, #70368744177664 // =0x400000000000
-; CHECK-NEON-NEXT: fmov d3, x11
+; CHECK-NEON-NEXT: fmov x12, d5
+; CHECK-NEON-NEXT: fmov d5, x11
; CHECK-NEON-NEXT: eor v4.8b, v4.8b, v6.8b
-; CHECK-NEON-NEXT: and v6.8b, v0.8b, v7.8b
+; CHECK-NEON-NEXT: fmov d6, x13
+; CHECK-NEON-NEXT: fmov x13, d3
; CHECK-NEON-NEXT: mul x11, x8, x12
; CHECK-NEON-NEXT: mov x12, #140737488355328 // =0x800000000000
-; CHECK-NEON-NEXT: fmov x13, d5
-; CHECK-NEON-NEXT: fmov d5, x12
-; CHECK-NEON-NEXT: and v3.8b, v0.8b, v3.8b
-; CHECK-NEON-NEXT: fmov d16, x9
; CHECK-NEON-NEXT: and v5.8b, v0.8b, v5.8b
+; CHECK-NEON-NEXT: fmov d3, x12
+; CHECK-NEON-NEXT: eor v4.8b, v4.8b, v6.8b
+; CHECK-NEON-NEXT: fmov d6, x10
; CHECK-NEON-NEXT: mul x12, x8, x13
+; CHECK-NEON-NEXT: mov x13, #281474976710656 // =0x1000000000000
+; CHECK-NEON-NEXT: fmov x10, d5
+; CHECK-NEON-NEXT: fmov d5, x13
+; CHECK-NEON-NEXT: mov x13, #8796093022208 // =0x80000000000
+; CHECK-NEON-NEXT: and v3.8b, v0.8b, v3.8b
+; CHECK-NEON-NEXT: eor v4.8b, v4.8b, v6.8b
+; CHECK-NEON-NEXT: fmov d6, x13
+; CHECK-NEON-NEXT: fmov d7, x11
+; CHECK-NEON-NEXT: and v5.8b, v0.8b, v5.8b
+; CHECK-NEON-NEXT: mul x10, x8, x10
; CHECK-NEON-NEXT: fmov x13, d3
-; CHECK-NEON-NEXT: fmov d3, x14
-; CHECK-NEON-NEXT: eor v4.8b, v4.8b, v16.8b
-; CHECK-NEON-NEXT: fmov x14, d5
-; CHECK-NEON-NEXT: fmov d5, x15
-; CHECK-NEON-NEXT: mov x15, #562949953421312 // =0x2000000000000
+; CHECK-NEON-NEXT: and v3.8b, v0.8b, v6.8b
+; CHECK-NEON-NEXT: fmov d6, x9
+; CHECK-NEON-NEXT: eor v4.8b, v4.8b, v7.8b
+; CHECK-NEON-NEXT: fmov x9, d5
+; CHECK-NEON-NEXT: fmov d5, x14
+; CHECK-NEON-NEXT: mov x14, #1125899906842624 // =0x4000000000000
; CHECK-NEON-NEXT: mul x13, x8, x13
-; CHECK-NEON-NEXT: fmov d7, x15
-; CHECK-NEON-NEXT: fmov x15, d6
-; CHECK-NEON-NEXT: and v6.8b, v0.8b, v3.8b
+; CHECK-NEON-NEXT: fmov x11, d3
+; CHECK-NEON-NEXT: fmov d7, x14
; CHECK-NEON-NEXT: fmov d3, x10
+; CHECK-NEON-NEXT: eor v2.8b, v2.8b, v6.8b
; CHECK-NEON-NEXT: and v5.8b, v0.8b, v5.8b
-; CHECK-NEON-NEXT: mul x14, x8, x14
-; CHECK-NEON-NEXT: and v7.8b, v0.8b, v7.8b
-; CHECK-NEON-NEXT: mul x9, x8, x15
-; CHECK-NEON-NEXT: eor v2.8b, v2.8b, v3.8b
-; CHECK-NEON-NEXT: fmov x10, d5
-; CHECK-NEON-NEXT: fmov d5, x11
-; CHECK-NEON-NEXT: fmov x11, d6
-; CHECK-NEON-NEXT: fmov d6, x13
-; CHECK-NEON-NEXT: mov x13, #1125899906842624 // =0x4000000000000
+; CHECK-NEON-NEXT: mul x9, x8, x9
+; CHECK-NEON-NEXT: mul x11, x8, x11
+; CHECK-NEON-NEXT: eor v1.8b, v1.8b, v2.8b
; CHECK-NEON-NEXT: fmov d16, x13
+; CHECK-NEON-NEXT: fmov x10, d5
; CHECK-NEON-NEXT: mov x13, #2251799813685248 // =0x8000000000000
-; CHECK-NEON-NEXT: eor v1.8b, v1.8b, v2.8b
-; CHECK-NEON-NEXT: mul x15, x8, x10
-; CHECK-NEON-NEXT: fmov x10, d7
-; CHECK-NEON-NEXT: fmov d7, x14
-; CHECK-NEON-NEXT: fmov d17, x13
-; CHECK-NEON-NEXT: eor v4.8b, v4.8b, v5.8b
-; CHECK-NEON-NEXT: and v16.8b, v0.8b, v16.8b
-; CHECK-NEON-NEXT: mul x14, x8, x10
-; CHECK-NEON-NEXT: eor v7.8b, v6.8b, v7.8b
+; CHECK-NEON-NEXT: and v5.8b, v0.8b, v7.8b
+; CHECK-NEON-NEXT: fmov d7, x12
+; CHECK-NEON-NEXT: mov x12, #17592186044416 // =0x100000000000
+; CHECK-NEON-NEXT: fmov d17, x9
+; CHECK-NEON-NEXT: eor v3.8b, v3.8b, v16.8b
+; CHECK-NEON-NEXT: fmov d16, x13
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: fmov x9, d5
+; CHECK-NEON-NEXT: eor v4.8b, v4.8b, v7.8b
+; CHECK-NEON-NEXT: fmov d7, x11
+; CHECK-NEON-NEXT: mov x11, #35184372088832 // =0x200000000000
+; CHECK-NEON-NEXT: mov x13, #9007199254740992 // =0x20000000000000
+; CHECK-NEON-NEXT: and v5.8b, v0.8b, v16.8b
+; CHECK-NEON-NEXT: fmov d16, x12
+; CHECK-NEON-NEXT: eor v3.8b, v3.8b, v17.8b
+; CHECK-NEON-NEXT: mul x9, x8, x9
+; CHECK-NEON-NEXT: mov x12, #4503599627370496 // =0x10000000000000
+; CHECK-NEON-NEXT: fmov d17, x10
; CHECK-NEON-NEXT: fmov d6, x12
-; CHECK-NEON-NEXT: and v17.8b, v0.8b, v17.8b
-; CHECK-NEON-NEXT: mul x10, x8, x11
-; CHECK-NEON-NEXT: mov x11, #4503599627370496 // =0x10000000000000
-; CHECK-NEON-NEXT: fmov x12, d16
-; CHECK-NEON-NEXT: fmov d16, x11
-; CHECK-NEON-NEXT: fmov d18, x15
-; CHECK-NEON-NEXT: mov x15, #288230376151711744 // =0x400000000000000
-; CHECK-NEON-NEXT: fmov x13, d17
-; CHECK-NEON-NEXT: eor v4.8b, v4.8b, v6.8b
-; CHECK-NEON-NEXT: mul x11, x8, x12
-; CHECK-NEON-NEXT: mov x12, #9007199254740992 // =0x20000000000000
+; CHECK-NEON-NEXT: mov x12, #72057594037927936 // =0x100000000000000
+; CHECK-NEON-NEXT: fmov x10, d5
; CHECK-NEON-NEXT: and v16.8b, v0.8b, v16.8b
-; CHECK-NEON-NEXT: fmov d17, x12
-; CHECK-NEON-NEXT: eor v7.8b, v7.8b, v18.8b
-; CHECK-NEON-NEXT: fmov d18, x14
+; CHECK-NEON-NEXT: fmov d5, x11
+; CHECK-NEON-NEXT: eor v17.8b, v3.8b, v17.8b
+; CHECK-NEON-NEXT: eor v3.8b, v4.8b, v7.8b
+; CHECK-NEON-NEXT: fmov d7, x12
+; CHECK-NEON-NEXT: mul x11, x8, x10
+; CHECK-NEON-NEXT: and v4.8b, v0.8b, v5.8b
+; CHECK-NEON-NEXT: fmov x10, d16
+; CHECK-NEON-NEXT: fmov d5, x9
+; CHECK-NEON-NEXT: and v6.8b, v0.8b, v6.8b
+; CHECK-NEON-NEXT: mul x9, x8, x10
+; CHECK-NEON-NEXT: fmov x10, d4
+; CHECK-NEON-NEXT: eor v5.8b, v17.8b, v5.8b
+; CHECK-NEON-NEXT: fmov x12, d6
+; CHECK-NEON-NEXT: fmov d4, x11
+; CHECK-NEON-NEXT: mov x11, #144115188075855872 // =0x200000000000000
+; CHECK-NEON-NEXT: fmov d6, x11
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: eor v4.8b, v5.8b, v4.8b
+; CHECK-NEON-NEXT: and v5.8b, v0.8b, v7.8b
+; CHECK-NEON-NEXT: fmov d7, x13
+; CHECK-NEON-NEXT: and v6.8b, v0.8b, v6.8b
+; CHECK-NEON-NEXT: mul x11, x8, x12
+; CHECK-NEON-NEXT: mov x12, #18014398509481984 // =0x40000000000000
+; CHECK-NEON-NEXT: fmov x13, d5
+; CHECK-NEON-NEXT: fmov d5, x12
+; CHECK-NEON-NEXT: and v7.8b, v0.8b, v7.8b
+; CHECK-NEON-NEXT: fmov x14, d6
; CHECK-NEON-NEXT: mul x12, x8, x13
-; CHECK-NEON-NEXT: mov x13, #72057594037927936 // =0x100000000000000
-; CHECK-NEON-NEXT: fmov x14, d16
-; CHECK-NEON-NEXT: and v17.8b, v0.8b, v17.8b
-; CHECK-NEON-NEXT: fmov d16, x13
-; CHECK-NEON-NEXT: eor v7.8b, v7.8b, v18.8b
-; CHECK-NEON-NEXT: fmov d18, x11
+; CHECK-NEON-NEXT: mov x13, #288230376151711744 // =0x400000000000000
+; CHECK-NEON-NEXT: and v5.8b, v0.8b, v5.8b
+; CHECK-NEON-NEXT: fmov d6, x13
+; CHECK-NEON-NEXT: fmov d16, x11
+; CHECK-NEON-NEXT: fmov x11, d7
; CHECK-NEON-NEXT: mul x13, x8, x14
-; CHECK-NEON-NEXT: mov x14, #144115188075855872 // =0x200000000000000
-; CHECK-NEON-NEXT: fmov x11, d17
-; CHECK-NEON-NEXT: fmov d17, x14
-; CHECK-NEON-NEXT: mov x14, #18014398509481984 // =0x40000000000000
-; CHECK-NEON-NEXT: and v16.8b, v0.8b, v16.8b
-; CHECK-NEON-NEXT: eor v7.8b, v7.8b, v18.8b
-; CHECK-NEON-NEXT: fmov d18, x14
+; CHECK-NEON-NEXT: mov x14, #576460752303423488 // =0x800000000000000
+; CHECK-NEON-NEXT: fmov d7, x14
+; CHECK-NEON-NEXT: mov x14, #36028797018963968 // =0x80000000000000
+; CHECK-NEON-NEXT: fmov x15, d5
+; CHECK-NEON-NEXT: and v6.8b, v0.8b, v6.8b
+; CHECK-NEON-NEXT: fmov d5, x14
; CHECK-NEON-NEXT: mul x11, x8, x11
-; CHECK-NEON-NEXT: and v17.8b, v0.8b, v17.8b
-; CHECK-NEON-NEXT: fmov x14, d16
-; CHECK-NEON-NEXT: and v16.8b, v0.8b, v18.8b
-; CHECK-NEON-NEXT: fmov d18, x12
-; CHECK-NEON-NEXT: fmov x12, d17
-; CHECK-NEON-NEXT: fmov d17, x15
-; CHECK-NEON-NEXT: mul x14, x8, x14
-; CHECK-NEON-NEXT: mul x15, x8, x12
-; CHECK-NEON-NEXT: mov x12, #576460752303423488 // =0x800000000000000
-; CHECK-NEON-NEXT: and v17.8b, v0.8b, v17.8b
-; CHECK-NEON-NEXT: fmov d5, x12
-; CHECK-NEON-NEXT: fmov x12, d16
-; CHECK-NEON-NEXT: fmov d6, x14
-; CHECK-NEON-NEXT: and v3.8b, v0.8b, v5.8b
-; CHECK-NEON-NEXT: eor v5.8b, v7.8b, v18.8b
-; CHECK-NEON-NEXT: fmov d7, x13
-; CHECK-NEON-NEXT: fmov x13, d17
-; CHECK-NEON-NEXT: fmov d16, x15
-; CHECK-NEON-NEXT: mov x15, #1152921504606846976 // =0x1000000000000000
-; CHECK-NEON-NEXT: mul x12, x8, x12
-; CHECK-NEON-NEXT: fmov x14, d3
-; CHECK-NEON-NEXT: eor v3.8b, v5.8b, v7.8b
-; CHECK-NEON-NEXT: fmov d5, x15
-; CHECK-NEON-NEXT: mul x13, x8, x13
-; CHECK-NEON-NEXT: mov x15, #2305843009213693952 // =0x2000000000000000
-; CHECK-NEON-NEXT: eor v6.8b, v6.8b, v16.8b
-; CHECK-NEON-NEXT: fmov d7, x15
-; CHECK-NEON-NEXT: mov x15, #36028797018963968 // =0x80000000000000
-; CHECK-NEON-NEXT: movi d16, #0000000000000000
-; CHECK-NEON-NEXT: mul x14, x8, x14
+; CHECK-NEON-NEXT: eor v4.8b, v4.8b, v16.8b
+; CHECK-NEON-NEXT: and v7.8b, v0.8b, v7.8b
+; CHECK-NEON-NEXT: fmov x14, d6
+; CHECK-NEON-NEXT: fmov d6, x12
+; CHECK-NEON-NEXT: fmov d17, x13
; CHECK-NEON-NEXT: and v5.8b, v0.8b, v5.8b
-; CHECK-NEON-NEXT: fmov d17, x15
+; CHECK-NEON-NEXT: fmov x13, d7
+; CHECK-NEON-NEXT: mul x12, x8, x14
+; CHECK-NEON-NEXT: mov x14, #1152921504606846976 // =0x1000000000000000
+; CHECK-NEON-NEXT: eor v6.8b, v6.8b, v17.8b
+; CHECK-NEON-NEXT: fmov d7, x14
+; CHECK-NEON-NEXT: mov x14, #2305843009213693952 // =0x2000000000000000
+; CHECK-NEON-NEXT: fmov d17, x14
+; CHECK-NEON-NEXT: mul x13, x8, x13
; CHECK-NEON-NEXT: and v7.8b, v0.8b, v7.8b
-; CHECK-NEON-NEXT: fmov d18, x13
-; CHECK-NEON-NEXT: fmov x13, d5
+; CHECK-NEON-NEXT: mul x14, x8, x15
+; CHECK-NEON-NEXT: fmov d19, x12
; CHECK-NEON-NEXT: and v17.8b, v0.8b, v17.8b
-; CHECK-NEON-NEXT: fneg d16, d16
-; CHECK-NEON-NEXT: fmov d5, x14
-; CHECK-NEON-NEXT: mov x14, #4611686018427387904 // =0x4000000000000000
-; CHECK-NEON-NEXT: fmov x15, d7
-; CHECK-NEON-NEXT: eor v6.8b, v6.8b, v18.8b
-; CHECK-NEON-NEXT: mul x13, x8, x13
-; CHECK-NEON-NEXT: fmov d7, x14
-; CHECK-NEON-NEXT: fmov x14, d17
-; CHECK-NEON-NEXT: fmov d17, x9
+; CHECK-NEON-NEXT: fmov x12, d7
+; CHECK-NEON-NEXT: fmov d7, #2.00000000
+; CHECK-NEON-NEXT: fmov d16, x13
+; CHECK-NEON-NEXT: eor v6.8b, v6.8b, v19.8b
+; CHECK-NEON-NEXT: fmov x15, d17
+; CHECK-NEON-NEXT: fneg d17, d18
+; CHECK-NEON-NEXT: fmov x13, d5
+; CHECK-NEON-NEXT: mul x12, x8, x12
+; CHECK-NEON-NEXT: and v5.8b, v0.8b, v7.8b
+; CHECK-NEON-NEXT: fmov d7, x9
+; CHECK-NEON-NEXT: eor v6.8b, v6.8b, v16.8b
+; CHECK-NEON-NEXT: fmov d16, x11
; CHECK-NEON-NEXT: mul x15, x8, x15
+; CHECK-NEON-NEXT: and v0.8b, v0.8b, v17.8b
+; CHECK-NEON-NEXT: fmov x11, d5
+; CHECK-NEON-NEXT: eor v3.8b, v3.8b, v7.8b
+; CHECK-NEON-NEXT: mul x9, x8, x13
+; CHECK-NEON-NEXT: eor v4.8b, v4.8b, v16.8b
+; CHECK-NEON-NEXT: fmov d5, x12
; CHECK-NEON-NEXT: eor v5.8b, v6.8b, v5.8b
-; CHECK-NEON-NEXT: fmov d6, x11
-; CHECK-NEON-NEXT: and v7.8b, v0.8b, v7.8b
-; CHECK-NEON-NEXT: and v0.8b, v0.8b, v16.8b
-; CHECK-NEON-NEXT: eor v4.8b, v4.8b, v17.8b
-; CHECK-NEON-NEXT: mul x9, x8, x14
-; CHECK-NEON-NEXT: eor v3.8b, v3.8b, v6.8b
-; CHECK-NEON-NEXT: fmov d6, x13
-; CHECK-NEON-NEXT: fmov x11, d7
-; CHECK-NEON-NEXT: eor v5.8b, v5.8b, v6.8b
; CHECK-NEON-NEXT: fmov d6, x10
; CHECK-NEON-NEXT: mul x10, x8, x11
; CHECK-NEON-NEXT: fmov x11, d0
; CHECK-NEON-NEXT: fmov d0, x15
-; CHECK-NEON-NEXT: eor v2.8b, v4.8b, v6.8b
-; CHECK-NEON-NEXT: fmov d4, x12
+; CHECK-NEON-NEXT: eor v2.8b, v3.8b, v6.8b
+; CHECK-NEON-NEXT: fmov d3, x14
; CHECK-NEON-NEXT: mul x8, x8, x11
; CHECK-NEON-NEXT: eor v0.8b, v5.8b, v0.8b
; CHECK-NEON-NEXT: fmov d5, x10
-; CHECK-NEON-NEXT: eor v3.8b, v3.8b, v4.8b
+; CHECK-NEON-NEXT: eor v3.8b, v4.8b, v3.8b
; CHECK-NEON-NEXT: fmov d4, x9
; CHECK-NEON-NEXT: eor v1.8b, v1.8b, v2.8b
; CHECK-NEON-NEXT: eor v0.8b, v0.8b, v5.8b
@@ -8004,7 +8002,7 @@ define <2 x i64> @clmulh_v2i64_neon(<2 x i64> %a, <2 x i64> %b) nounwind {
; CHECK-NEON-NEXT: eor v2.16b, v3.16b, v2.16b
; CHECK-NEON-NEXT: mov v17.d[1], x0
; CHECK-NEON-NEXT: eor v5.16b, v6.16b, v21.16b
-; CHECK-NEON-NEXT: movi v6.2d, #0000000000000000
+; CHECK-NEON-NEXT: movi d6, #0000000000000000
; CHECK-NEON-NEXT: mul x0, x9, x2
; CHECK-NEON-NEXT: fmov x2, d16
; CHECK-NEON-NEXT: fmov v16.2d, #2.00000000
@@ -8119,7 +8117,9 @@ define <1 x i64> @clmulh_v1i64_neon(<1 x i64> %a, <1 x i64> %b) nounwind {
; CHECK-NEON-NEXT: mov w12, #32 // =0x20
; CHECK-NEON-NEXT: mov w13, #64 // =0x40
; CHECK-NEON-NEXT: mov w14, #128 // =0x80
+; CHECK-NEON-NEXT: mov w16, #512 // =0x200
; CHECK-NEON-NEXT: mov w15, #256 // =0x100
+; CHECK-NEON-NEXT: movi d18, #0000000000000000
; CHECK-NEON-NEXT: rbit v0.8b, v1.8b
; CHECK-NEON-NEXT: fmov d1, x8
; CHECK-NEON-NEXT: mov w8, #1 // =0x1
@@ -8136,11 +8136,8 @@ define <1 x i64> @clmulh_v1i64_neon(<1 x i64> %a, <1 x i64> %b) nounwind {
; CHECK-NEON-NEXT: fmov x10, d3
; CHECK-NEON-NEXT: fmov d3, x11
; CHECK-NEON-NEXT: and v2.8b, v0.8b, v2.8b
-; CHECK-NEON-NEXT: fmov d4, x14
-; CHECK-NEON-NEXT: mov w14, #512 // =0x200
; CHECK-NEON-NEXT: fmov x11, d1
; CHECK-NEON-NEXT: mul x9, x8, x9
-; CHECK-NEON-NEXT: fmov d5, x14
; CHECK-NEON-NEXT: and v1.8b, v0.8b, v3.8b
; CHECK-NEON-NEXT: fmov d3, x12
; CHECK-NEON-NEXT: fmov x12, d2
@@ -8149,412 +8146,412 @@ define <1 x i64> @clmulh_v1i64_neon(<1 x i64> %a, <1 x i64> %b) nounwind {
; CHECK-NEON-NEXT: fmov d3, x13
; CHECK-NEON-NEXT: fmov x13, d1
; CHECK-NEON-NEXT: mul x11, x8, x11
+; CHECK-NEON-NEXT: fmov d4, x9
; CHECK-NEON-NEXT: and v1.8b, v0.8b, v3.8b
+; CHECK-NEON-NEXT: fmov d3, x14
; CHECK-NEON-NEXT: mul x12, x8, x12
-; CHECK-NEON-NEXT: fmov d3, x15
; CHECK-NEON-NEXT: fmov x14, d2
-; CHECK-NEON-NEXT: and v2.8b, v0.8b, v4.8b
-; CHECK-NEON-NEXT: fmov d4, x9
-; CHECK-NEON-NEXT: mov w15, #1024 // =0x400
; CHECK-NEON-NEXT: mul x13, x8, x13
-; CHECK-NEON-NEXT: fmov x9, d1
-; CHECK-NEON-NEXT: and v1.8b, v0.8b, v5.8b
-; CHECK-NEON-NEXT: fmov d5, x10
-; CHECK-NEON-NEXT: and v3.8b, v0.8b, v3.8b
-; CHECK-NEON-NEXT: fmov x10, d2
-; CHECK-NEON-NEXT: fmov d2, x11
-; CHECK-NEON-NEXT: fmov d6, x12
+; CHECK-NEON-NEXT: and v2.8b, v0.8b, v3.8b
+; CHECK-NEON-NEXT: fmov d3, x16
+; CHECK-NEON-NEXT: fmov x17, d1
+; CHECK-NEON-NEXT: fmov d1, x15
+; CHECK-NEON-NEXT: fmov d5, x11
; CHECK-NEON-NEXT: mul x14, x8, x14
-; CHECK-NEON-NEXT: mov w11, #2048 // =0x800
-; CHECK-NEON-NEXT: eor v4.8b, v5.8b, v4.8b
-; CHECK-NEON-NEXT: fmov d5, x15
-; CHECK-NEON-NEXT: fmov x12, d3
-; CHECK-NEON-NEXT: mul x10, x8, x10
-; CHECK-NEON-NEXT: fmov d3, x11
-; CHECK-NEON-NEXT: eor v2.8b, v2.8b, v6.8b
-; CHECK-NEON-NEXT: and v5.8b, v0.8b, v5.8b
+; CHECK-NEON-NEXT: fmov d6, x12
+; CHECK-NEON-NEXT: mov w12, #4096 // =0x1000
+; CHECK-NEON-NEXT: fmov x9, d2
+; CHECK-NEON-NEXT: and v2.8b, v0.8b, v3.8b
+; CHECK-NEON-NEXT: fmov d3, x10
+; CHECK-NEON-NEXT: and v1.8b, v0.8b, v1.8b
+; CHECK-NEON-NEXT: mov w10, #2048 // =0x800
+; CHECK-NEON-NEXT: mul x15, x8, x17
+; CHECK-NEON-NEXT: eor v3.8b, v3.8b, v4.8b
+; CHECK-NEON-NEXT: eor v4.8b, v5.8b, v6.8b
+; CHECK-NEON-NEXT: fmov d5, x10
+; CHECK-NEON-NEXT: fmov x10, d1
+; CHECK-NEON-NEXT: fmov x11, d2
+; CHECK-NEON-NEXT: fmov d2, x13
; CHECK-NEON-NEXT: mul x9, x8, x9
-; CHECK-NEON-NEXT: and v3.8b, v0.8b, v3.8b
-; CHECK-NEON-NEXT: mul x11, x8, x12
+; CHECK-NEON-NEXT: mov w13, #8192 // =0x2000
+; CHECK-NEON-NEXT: eor v3.8b, v3.8b, v4.8b
+; CHECK-NEON-NEXT: fmov d4, x12
+; CHECK-NEON-NEXT: and v1.8b, v0.8b, v5.8b
+; CHECK-NEON-NEXT: fmov d5, x14
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: mov w12, #1024 // =0x400
+; CHECK-NEON-NEXT: and v4.8b, v0.8b, v4.8b
+; CHECK-NEON-NEXT: mul x11, x8, x11
+; CHECK-NEON-NEXT: eor v2.8b, v2.8b, v5.8b
+; CHECK-NEON-NEXT: fmov d5, x12
; CHECK-NEON-NEXT: fmov x12, d1
-; CHECK-NEON-NEXT: fmov d1, x13
-; CHECK-NEON-NEXT: mov w13, #4096 // =0x1000
-; CHECK-NEON-NEXT: eor v2.8b, v4.8b, v2.8b
-; CHECK-NEON-NEXT: fmov d4, x14
-; CHECK-NEON-NEXT: fmov x14, d5
-; CHECK-NEON-NEXT: fmov d5, x13
+; CHECK-NEON-NEXT: fmov d6, x9
+; CHECK-NEON-NEXT: fmov x14, d4
+; CHECK-NEON-NEXT: fmov d4, x13
+; CHECK-NEON-NEXT: fmov d7, x10
+; CHECK-NEON-NEXT: and v1.8b, v0.8b, v5.8b
+; CHECK-NEON-NEXT: mov w13, #16384 // =0x4000
; CHECK-NEON-NEXT: mul x12, x8, x12
-; CHECK-NEON-NEXT: eor v1.8b, v1.8b, v4.8b
-; CHECK-NEON-NEXT: and v4.8b, v0.8b, v5.8b
-; CHECK-NEON-NEXT: fmov d5, x10
+; CHECK-NEON-NEXT: fmov d5, x15
+; CHECK-NEON-NEXT: and v4.8b, v0.8b, v4.8b
+; CHECK-NEON-NEXT: mul x9, x8, x14
+; CHECK-NEON-NEXT: mov w14, #32768 // =0x8000
+; CHECK-NEON-NEXT: fmov x10, d1
+; CHECK-NEON-NEXT: eor v1.8b, v6.8b, v7.8b
+; CHECK-NEON-NEXT: fmov d6, x13
+; CHECK-NEON-NEXT: fmov d7, x14
+; CHECK-NEON-NEXT: eor v2.8b, v2.8b, v5.8b
+; CHECK-NEON-NEXT: fmov x13, d4
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: and v4.8b, v0.8b, v6.8b
; CHECK-NEON-NEXT: fmov d6, x11
-; CHECK-NEON-NEXT: mov w11, #8192 // =0x2000
-; CHECK-NEON-NEXT: fmov x10, d3
-; CHECK-NEON-NEXT: mul x13, x8, x14
-; CHECK-NEON-NEXT: eor v3.8b, v5.8b, v6.8b
-; CHECK-NEON-NEXT: fmov d5, x9
+; CHECK-NEON-NEXT: and v5.8b, v0.8b, v7.8b
+; CHECK-NEON-NEXT: fmov d7, x9
+; CHECK-NEON-NEXT: eor v2.8b, v3.8b, v2.8b
+; CHECK-NEON-NEXT: mul x11, x8, x13
; CHECK-NEON-NEXT: fmov x9, d4
-; CHECK-NEON-NEXT: fmov d4, x11
-; CHECK-NEON-NEXT: mov w11, #16384 // =0x4000
+; CHECK-NEON-NEXT: eor v1.8b, v1.8b, v6.8b
; CHECK-NEON-NEXT: fmov d6, x12
+; CHECK-NEON-NEXT: fmov d4, x10
+; CHECK-NEON-NEXT: fmov x10, d5
+; CHECK-NEON-NEXT: mul x9, x8, x9
+; CHECK-NEON-NEXT: eor v6.8b, v6.8b, v7.8b
+; CHECK-NEON-NEXT: fmov d3, x11
+; CHECK-NEON-NEXT: mov w11, #65536 // =0x10000
+; CHECK-NEON-NEXT: eor v1.8b, v1.8b, v4.8b
+; CHECK-NEON-NEXT: fmov d4, x11
; CHECK-NEON-NEXT: mul x10, x8, x10
-; CHECK-NEON-NEXT: eor v1.8b, v1.8b, v5.8b
+; CHECK-NEON-NEXT: mov w11, #131072 // =0x20000
+; CHECK-NEON-NEXT: eor v3.8b, v6.8b, v3.8b
; CHECK-NEON-NEXT: fmov d5, x11
-; CHECK-NEON-NEXT: mul x9, x8, x9
; CHECK-NEON-NEXT: and v4.8b, v0.8b, v4.8b
-; CHECK-NEON-NEXT: eor v3.8b, v3.8b, v6.8b
-; CHECK-NEON-NEXT: fmov d6, x13
; CHECK-NEON-NEXT: eor v1.8b, v2.8b, v1.8b
-; CHECK-NEON-NEXT: fmov x11, d4
-; CHECK-NEON-NEXT: and v4.8b, v0.8b, v5.8b
-; CHECK-NEON-NEXT: eor v2.8b, v3.8b, v6.8b
-; CHECK-NEON-NEXT: fmov d3, x10
-; CHECK-NEON-NEXT: fmov d5, x9
-; CHECK-NEON-NEXT: mov w9, #32768 // =0x8000
+; CHECK-NEON-NEXT: fmov d2, x9
+; CHECK-NEON-NEXT: mov w9, #262144 // =0x40000
+; CHECK-NEON-NEXT: eor v2.8b, v3.8b, v2.8b
+; CHECK-NEON-NEXT: and v3.8b, v0.8b, v5.8b
+; CHECK-NEON-NEXT: fmov d5, x10
; CHECK-NEON-NEXT: fmov x10, d4
; CHECK-NEON-NEXT: fmov d4, x9
-; CHECK-NEON-NEXT: mul x11, x8, x11
-; CHECK-NEON-NEXT: eor v1.8b, v1.8b, v2.8b
-; CHECK-NEON-NEXT: eor v3.8b, v3.8b, v5.8b
-; CHECK-NEON-NEXT: mul x9, x8, x10
+; CHECK-NEON-NEXT: eor v2.8b, v2.8b, v5.8b
+; CHECK-NEON-NEXT: fmov x11, d3
; CHECK-NEON-NEXT: and v4.8b, v0.8b, v4.8b
-; CHECK-NEON-NEXT: mov w10, #65536 // =0x10000
+; CHECK-NEON-NEXT: mul x9, x8, x10
+; CHECK-NEON-NEXT: mov w10, #524288 // =0x80000
+; CHECK-NEON-NEXT: fmov d3, x10
+; CHECK-NEON-NEXT: mov w10, #1048576 // =0x100000
+; CHECK-NEON-NEXT: eor v1.8b, v1.8b, v2.8b
+; CHECK-NEON-NEXT: mul x11, x8, x11
+; CHECK-NEON-NEXT: fmov x12, d4
+; CHECK-NEON-NEXT: fmov d4, x10
+; CHECK-NEON-NEXT: mov w10, #2097152 // =0x200000
; CHECK-NEON-NEXT: fmov d2, x10
-; CHECK-NEON-NEXT: mov w10, #131072 // =0x20000
+; CHECK-NEON-NEXT: and v3.8b, v0.8b, v3.8b
+; CHECK-NEON-NEXT: mov w10, #4194304 // =0x400000
+; CHECK-NEON-NEXT: and v4.8b, v0.8b, v4.8b
+; CHECK-NEON-NEXT: mul x12, x8, x12
+; CHECK-NEON-NEXT: and v2.8b, v0.8b, v2.8b
+; CHECK-NEON-NEXT: fmov x13, d3
+; CHECK-NEON-NEXT: fmov d3, x9
; CHECK-NEON-NEXT: fmov d5, x11
-; CHECK-NEON-NEXT: fmov x11, d4
+; CHECK-NEON-NEXT: fmov x9, d4
; CHECK-NEON-NEXT: fmov d4, x10
-; CHECK-NEON-NEXT: and v2.8b, v0.8b, v2.8b
+; CHECK-NEON-NEXT: fmov x11, d2
+; CHECK-NEON-NEXT: mul x13, x8, x13
; CHECK-NEON-NEXT: eor v3.8b, v3.8b, v5.8b
-; CHECK-NEON-NEXT: fmov d5, x9
-; CHECK-NEON-NEXT: mul x10, x8, x11
-; CHECK-NEON-NEXT: mov w11, #262144 // =0x40000
+; CHECK-NEON-NEXT: fmov d5, x12
+; CHECK-NEON-NEXT: mul x10, x8, x9
; CHECK-NEON-NEXT: and v4.8b, v0.8b, v4.8b
-; CHECK-NEON-NEXT: fmov x9, d2
-; CHECK-NEON-NEXT: fmov d2, x11
-; CHECK-NEON-NEXT: mov w11, #524288 // =0x80000
+; CHECK-NEON-NEXT: mov w9, #8388608 // =0x800000
+; CHECK-NEON-NEXT: fmov d2, x9
+; CHECK-NEON-NEXT: mul x9, x8, x11
+; CHECK-NEON-NEXT: mov w11, #16777216 // =0x1000000
; CHECK-NEON-NEXT: eor v3.8b, v3.8b, v5.8b
-; CHECK-NEON-NEXT: fmov x12, d4
+; CHECK-NEON-NEXT: fmov x14, d4
; CHECK-NEON-NEXT: fmov d4, x11
-; CHECK-NEON-NEXT: mov w11, #1048576 // =0x100000
+; CHECK-NEON-NEXT: fmov d5, x13
; CHECK-NEON-NEXT: and v2.8b, v0.8b, v2.8b
-; CHECK-NEON-NEXT: mul x9, x8, x9
-; CHECK-NEON-NEXT: fmov d5, x10
-; CHECK-NEON-NEXT: mul x12, x8, x12
-; CHECK-NEON-NEXT: and v4.8b, v0.8b, v4.8b
-; CHECK-NEON-NEXT: fmov x10, d2
-; CHECK-NEON-NEXT: fmov d2, x11
-; CHECK-NEON-NEXT: fmov x11, d4
-; CHECK-NEON-NEXT: fmov d4, x9
-; CHECK-NEON-NEXT: mul x13, x8, x10
-; CHECK-NEON-NEXT: mov w10, #2097152 // =0x200000
-; CHECK-NEON-NEXT: and v6.8b, v0.8b, v2.8b
-; CHECK-NEON-NEXT: eor v2.8b, v3.8b, v5.8b
-; CHECK-NEON-NEXT: fmov d3, x10
-; CHECK-NEON-NEXT: mov w10, #4194304 // =0x400000
-; CHECK-NEON-NEXT: fmov d5, x10
-; CHECK-NEON-NEXT: mul x11, x8, x11
-; CHECK-NEON-NEXT: fmov x9, d6
-; CHECK-NEON-NEXT: fmov d6, x12
-; CHECK-NEON-NEXT: and v3.8b, v0.8b, v3.8b
-; CHECK-NEON-NEXT: eor v1.8b, v1.8b, v2.8b
-; CHECK-NEON-NEXT: and v5.8b, v0.8b, v5.8b
-; CHECK-NEON-NEXT: mul x10, x8, x9
-; CHECK-NEON-NEXT: mov w9, #8388608 // =0x800000
-; CHECK-NEON-NEXT: eor v4.8b, v4.8b, v6.8b
-; CHECK-NEON-NEXT: fmov x12, d3
-; CHECK-NEON-NEXT: fmov d3, x9
-; CHECK-NEON-NEXT: fmov d6, x13
-; CHECK-NEON-NEXT: fmov x14, d5
-; CHECK-NEON-NEXT: mul x9, x8, x12
-; CHECK-NEON-NEXT: mov w12, #16777216 // =0x1000000
-; CHECK-NEON-NEXT: and v3.8b, v0.8b, v3.8b
-; CHECK-NEON-NEXT: fmov d5, x12
-; CHECK-NEON-NEXT: mov w12, #33554432 // =0x2000000
-; CHECK-NEON-NEXT: mul x14, x8, x14
-; CHECK-NEON-NEXT: fmov x13, d3
-; CHECK-NEON-NEXT: fmov d3, x12
-; CHECK-NEON-NEXT: mov w12, #67108864 // =0x4000000
-; CHECK-NEON-NEXT: and v5.8b, v0.8b, v5.8b
-; CHECK-NEON-NEXT: and v7.8b, v0.8b, v3.8b
-; CHECK-NEON-NEXT: eor v3.8b, v4.8b, v6.8b
-; CHECK-NEON-NEXT: fmov d4, x12
-; CHECK-NEON-NEXT: fmov x15, d5
-; CHECK-NEON-NEXT: mul x13, x8, x13
-; CHECK-NEON-NEXT: fmov d5, x11
+; CHECK-NEON-NEXT: mul x11, x8, x14
; CHECK-NEON-NEXT: and v4.8b, v0.8b, v4.8b
-; CHECK-NEON-NEXT: fmov x11, d7
-; CHECK-NEON-NEXT: fmov d7, x14
-; CHECK-NEON-NEXT: mul x12, x8, x15
-; CHECK-NEON-NEXT: mov w15, #134217728 // =0x8000000
+; CHECK-NEON-NEXT: mov w14, #33554432 // =0x2000000
+; CHECK-NEON-NEXT: fmov x12, d2
+; CHECK-NEON-NEXT: fmov d2, x14
+; CHECK-NEON-NEXT: mov w14, #67108864 // =0x4000000
; CHECK-NEON-NEXT: eor v3.8b, v3.8b, v5.8b
-; CHECK-NEON-NEXT: fmov d6, x15
+; CHECK-NEON-NEXT: fmov x15, d4
+; CHECK-NEON-NEXT: fmov d4, x14
+; CHECK-NEON-NEXT: and v2.8b, v0.8b, v2.8b
+; CHECK-NEON-NEXT: mul x12, x8, x12
+; CHECK-NEON-NEXT: fmov d6, x11
+; CHECK-NEON-NEXT: mul x13, x8, x15
+; CHECK-NEON-NEXT: mov w15, #134217728 // =0x8000000
+; CHECK-NEON-NEXT: and v4.8b, v0.8b, v4.8b
+; CHECK-NEON-NEXT: fmov x14, d2
+; CHECK-NEON-NEXT: fmov d7, x15
; CHECK-NEON-NEXT: mov w15, #536870912 // =0x20000000
-; CHECK-NEON-NEXT: fmov x14, d4
-; CHECK-NEON-NEXT: fmov d16, x13
-; CHECK-NEON-NEXT: fmov d17, x15
-; CHECK-NEON-NEXT: movi v4.2s, #128, lsl #24
-; CHECK-NEON-NEXT: mov w15, #1073741824 // =0x40000000
-; CHECK-NEON-NEXT: mul x11, x8, x11
-; CHECK-NEON-NEXT: and v6.8b, v0.8b, v6.8b
-; CHECK-NEON-NEXT: mul x13, x8, x14
-; CHECK-NEON-NEXT: eor v7.8b, v7.8b, v16.8b
-; CHECK-NEON-NEXT: fmov d16, x15
-; CHECK-NEON-NEXT: mov w15, #268435456 // =0x10000000
-; CHECK-NEON-NEXT: fmov x14, d6
-; CHECK-NEON-NEXT: and v6.8b, v0.8b, v17.8b
-; CHECK-NEON-NEXT: fneg d4, d4
-; CHECK-NEON-NEXT: and v16.8b, v0.8b, v16.8b
; CHECK-NEON-NEXT: fmov d5, x15
-; CHECK-NEON-NEXT: fmov x15, d6
-; CHECK-NEON-NEXT: fmov d6, x12
-; CHECK-NEON-NEXT: mul x14, x8, x14
-; CHECK-NEON-NEXT: and v5.8b, v0.8b, v5.8b
-; CHECK-NEON-NEXT: and v4.8b, v0.8b, v4.8b
-; CHECK-NEON-NEXT: mul x12, x8, x15
-; CHECK-NEON-NEXT: eor v2.8b, v7.8b, v6.8b
-; CHECK-NEON-NEXT: fmov x15, d16
-; CHECK-NEON-NEXT: fmov d6, x10
-; CHECK-NEON-NEXT: mul x10, x8, x15
-; CHECK-NEON-NEXT: mov x15, #4294967296 // =0x100000000
-; CHECK-NEON-NEXT: eor v3.8b, v3.8b, v6.8b
-; CHECK-NEON-NEXT: fmov d6, x11
-; CHECK-NEON-NEXT: fmov x11, d5
-; CHECK-NEON-NEXT: fmov d5, x9
-; CHECK-NEON-NEXT: fmov x9, d4
-; CHECK-NEON-NEXT: fmov d4, x15
+; CHECK-NEON-NEXT: mov w15, #1073741824 // =0x40000000
+; CHECK-NEON-NEXT: movi v2.2s, #128, lsl #24
+; CHECK-NEON-NEXT: mul x11, x8, x14
+; CHECK-NEON-NEXT: fmov x14, d4
+; CHECK-NEON-NEXT: and v4.8b, v0.8b, v7.8b
; CHECK-NEON-NEXT: fmov d7, x12
-; CHECK-NEON-NEXT: mov x15, #281474976710656 // =0x1000000000000
-; CHECK-NEON-NEXT: eor v2.8b, v2.8b, v6.8b
-; CHECK-NEON-NEXT: fmov d6, x13
-; CHECK-NEON-NEXT: mov x13, #8589934592 // =0x200000000
-; CHECK-NEON-NEXT: mul x9, x8, x9
-; CHECK-NEON-NEXT: and v4.8b, v0.8b, v4.8b
-; CHECK-NEON-NEXT: fmov d17, x13
-; CHECK-NEON-NEXT: fmov d16, x10
-; CHECK-NEON-NEXT: eor v3.8b, v3.8b, v5.8b
-; CHECK-NEON-NEXT: mov x13, #549755813888 // =0x8000000000
-; CHECK-NEON-NEXT: mul x11, x8, x11
-; CHECK-NEON-NEXT: eor v2.8b, v2.8b, v6.8b
-; CHECK-NEON-NEXT: fmov d6, x14
-; CHECK-NEON-NEXT: fmov x10, d4
-; CHECK-NEON-NEXT: and v4.8b, v0.8b, v17.8b
-; CHECK-NEON-NEXT: mov x14, #17592186044416 // =0x100000000000
-; CHECK-NEON-NEXT: eor v7.8b, v7.8b, v16.8b
-; CHECK-NEON-NEXT: eor v1.8b, v1.8b, v3.8b
-; CHECK-NEON-NEXT: eor v2.8b, v2.8b, v6.8b
-; CHECK-NEON-NEXT: fmov d6, x9
-; CHECK-NEON-NEXT: mov x9, #17179869184 // =0x400000000
-; CHECK-NEON-NEXT: mul x10, x8, x10
-; CHECK-NEON-NEXT: fmov d5, x11
-; CHECK-NEON-NEXT: fmov x11, d4
-; CHECK-NEON-NEXT: fmov d4, x9
-; CHECK-NEON-NEXT: eor v6.8b, v7.8b, v6.8b
-; CHECK-NEON-NEXT: mul x9, x8, x11
-; CHECK-NEON-NEXT: and v4.8b, v0.8b, v4.8b
-; CHECK-NEON-NEXT: mov x11, #34359738368 // =0x800000000
-; CHECK-NEON-NEXT: fmov d3, x11
-; CHECK-NEON-NEXT: eor v2.8b, v2.8b, v5.8b
-; CHECK-NEON-NEXT: fmov d5, x10
-; CHECK-NEON-NEXT: mov x10, #137438953472 // =0x2000000000
-; CHECK-NEON-NEXT: fmov x11, d4
-; CHECK-NEON-NEXT: fmov d4, x10
-; CHECK-NEON-NEXT: and v3.8b, v0.8b, v3.8b
-; CHECK-NEON-NEXT: eor v5.8b, v6.8b, v5.8b
-; CHECK-NEON-NEXT: eor v1.8b, v1.8b, v2.8b
-; CHECK-NEON-NEXT: fmov d6, x9
-; CHECK-NEON-NEXT: mul x10, x8, x11
-; CHECK-NEON-NEXT: mov x11, #274877906944 // =0x4000000000
-; CHECK-NEON-NEXT: and v4.8b, v0.8b, v4.8b
-; CHECK-NEON-NEXT: fmov x9, d3
-; CHECK-NEON-NEXT: fmov d3, x11
-; CHECK-NEON-NEXT: mov x11, #68719476736 // =0x1000000000
-; CHECK-NEON-NEXT: eor v5.8b, v5.8b, v6.8b
-; CHECK-NEON-NEXT: fmov d6, x13
-; CHECK-NEON-NEXT: mov x13, #1099511627776 // =0x10000000000
+; CHECK-NEON-NEXT: and v5.8b, v0.8b, v5.8b
+; CHECK-NEON-NEXT: fmov d16, x13
+; CHECK-NEON-NEXT: fneg d2, d2
+; CHECK-NEON-NEXT: mov w13, #268435456 // =0x10000000
; CHECK-NEON-NEXT: fmov x12, d4
-; CHECK-NEON-NEXT: fmov d4, x11
-; CHECK-NEON-NEXT: and v3.8b, v0.8b, v3.8b
-; CHECK-NEON-NEXT: mul x9, x8, x9
+; CHECK-NEON-NEXT: fmov d4, x15
+; CHECK-NEON-NEXT: mov x15, #4294967296 // =0x100000000
+; CHECK-NEON-NEXT: eor v6.8b, v6.8b, v7.8b
; CHECK-NEON-NEXT: fmov d7, x10
-; CHECK-NEON-NEXT: mul x11, x8, x12
+; CHECK-NEON-NEXT: fmov x10, d5
+; CHECK-NEON-NEXT: mul x14, x8, x14
; CHECK-NEON-NEXT: and v4.8b, v0.8b, v4.8b
-; CHECK-NEON-NEXT: fmov x12, d3
-; CHECK-NEON-NEXT: and v3.8b, v0.8b, v6.8b
+; CHECK-NEON-NEXT: and v2.8b, v0.8b, v2.8b
+; CHECK-NEON-NEXT: eor v5.8b, v6.8b, v16.8b
; CHECK-NEON-NEXT: fmov d6, x13
-; CHECK-NEON-NEXT: eor v2.8b, v5.8b, v7.8b
-; CHECK-NEON-NEXT: fmov d7, x14
-; CHECK-NEON-NEXT: mov x14, #35184372088832 // =0x200000000000
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: eor v3.8b, v3.8b, v7.8b
+; CHECK-NEON-NEXT: fmov x13, d4
; CHECK-NEON-NEXT: mul x12, x8, x12
+; CHECK-NEON-NEXT: and v4.8b, v0.8b, v6.8b
+; CHECK-NEON-NEXT: fmov d6, x15
+; CHECK-NEON-NEXT: fmov x15, d2
+; CHECK-NEON-NEXT: fmov d2, x11
+; CHECK-NEON-NEXT: mul x13, x8, x13
; CHECK-NEON-NEXT: and v6.8b, v0.8b, v6.8b
-; CHECK-NEON-NEXT: fmov x10, d3
-; CHECK-NEON-NEXT: fmov d3, x9
-; CHECK-NEON-NEXT: fmov x9, d4
-; CHECK-NEON-NEXT: fmov d4, x11
-; CHECK-NEON-NEXT: mov x11, #2199023255552 // =0x20000000000
-; CHECK-NEON-NEXT: mul x13, x8, x10
-; CHECK-NEON-NEXT: fmov x10, d6
-; CHECK-NEON-NEXT: fmov d6, x11
-; CHECK-NEON-NEXT: mov x11, #4398046511104 // =0x40000000000
-; CHECK-NEON-NEXT: eor v2.8b, v2.8b, v3.8b
+; CHECK-NEON-NEXT: eor v2.8b, v5.8b, v2.8b
+; CHECK-NEON-NEXT: fmov d5, x14
+; CHECK-NEON-NEXT: fmov x14, d4
+; CHECK-NEON-NEXT: fmov d4, x10
+; CHECK-NEON-NEXT: mov x10, #8589934592 // =0x200000000
+; CHECK-NEON-NEXT: mul x11, x8, x15
+; CHECK-NEON-NEXT: fmov d7, x13
+; CHECK-NEON-NEXT: fmov x13, d6
+; CHECK-NEON-NEXT: fmov d6, x10
+; CHECK-NEON-NEXT: mul x14, x8, x14
+; CHECK-NEON-NEXT: eor v2.8b, v2.8b, v5.8b
; CHECK-NEON-NEXT: fmov d5, x12
+; CHECK-NEON-NEXT: mov x12, #1099511627776 // =0x10000000000
+; CHECK-NEON-NEXT: eor v4.8b, v4.8b, v7.8b
+; CHECK-NEON-NEXT: fmov d7, x9
+; CHECK-NEON-NEXT: mov x9, #17179869184 // =0x400000000
+; CHECK-NEON-NEXT: mul x10, x8, x13
+; CHECK-NEON-NEXT: and v6.8b, v0.8b, v6.8b
+; CHECK-NEON-NEXT: fmov d17, x9
+; CHECK-NEON-NEXT: fmov d16, x11
+; CHECK-NEON-NEXT: eor v2.8b, v2.8b, v5.8b
+; CHECK-NEON-NEXT: eor v3.8b, v3.8b, v7.8b
+; CHECK-NEON-NEXT: fmov d5, x14
+; CHECK-NEON-NEXT: mov x14, #562949953421312 // =0x2000000000000
+; CHECK-NEON-NEXT: fmov x9, d6
+; CHECK-NEON-NEXT: and v6.8b, v0.8b, v17.8b
+; CHECK-NEON-NEXT: eor v4.8b, v4.8b, v16.8b
+; CHECK-NEON-NEXT: fmov d7, x10
+; CHECK-NEON-NEXT: mov x10, #34359738368 // =0x800000000
+; CHECK-NEON-NEXT: eor v1.8b, v1.8b, v3.8b
+; CHECK-NEON-NEXT: mul x9, x8, x9
+; CHECK-NEON-NEXT: fmov x11, d6
+; CHECK-NEON-NEXT: fmov d6, x10
+; CHECK-NEON-NEXT: eor v2.8b, v2.8b, v5.8b
+; CHECK-NEON-NEXT: eor v4.8b, v4.8b, v7.8b
+; CHECK-NEON-NEXT: mul x10, x8, x11
+; CHECK-NEON-NEXT: and v6.8b, v0.8b, v6.8b
+; CHECK-NEON-NEXT: mov x11, #137438953472 // =0x2000000000
; CHECK-NEON-NEXT: fmov d3, x11
-; CHECK-NEON-NEXT: mul x12, x8, x10
-; CHECK-NEON-NEXT: eor v4.8b, v4.8b, v5.8b
-; CHECK-NEON-NEXT: and v5.8b, v0.8b, v6.8b
-; CHECK-NEON-NEXT: mul x10, x8, x9
-; CHECK-NEON-NEXT: fmov d6, x13
+; CHECK-NEON-NEXT: eor v1.8b, v1.8b, v2.8b
+; CHECK-NEON-NEXT: fmov d5, x9
+; CHECK-NEON-NEXT: mov x9, #274877906944 // =0x4000000000
+; CHECK-NEON-NEXT: fmov x11, d6
; CHECK-NEON-NEXT: and v3.8b, v0.8b, v3.8b
-; CHECK-NEON-NEXT: mov x9, #8796093022208 // =0x80000000000
-; CHECK-NEON-NEXT: fmov x11, d5
+; CHECK-NEON-NEXT: eor v4.8b, v4.8b, v5.8b
; CHECK-NEON-NEXT: fmov d5, x9
-; CHECK-NEON-NEXT: eor v4.8b, v4.8b, v6.8b
-; CHECK-NEON-NEXT: fmov d6, x12
-; CHECK-NEON-NEXT: fmov x12, d3
+; CHECK-NEON-NEXT: fmov d6, x10
; CHECK-NEON-NEXT: mul x9, x8, x11
+; CHECK-NEON-NEXT: mov x11, #549755813888 // =0x8000000000
+; CHECK-NEON-NEXT: fmov x10, d3
+; CHECK-NEON-NEXT: fmov d3, x11
+; CHECK-NEON-NEXT: mov x11, #68719476736 // =0x1000000000
+; CHECK-NEON-NEXT: and v5.8b, v0.8b, v5.8b
+; CHECK-NEON-NEXT: eor v4.8b, v4.8b, v6.8b
+; CHECK-NEON-NEXT: fmov d6, x11
+; CHECK-NEON-NEXT: and v3.8b, v0.8b, v3.8b
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: fmov x11, d5
+; CHECK-NEON-NEXT: and v5.8b, v0.8b, v6.8b
+; CHECK-NEON-NEXT: fmov d2, x9
+; CHECK-NEON-NEXT: fmov x9, d3
+; CHECK-NEON-NEXT: fmov d3, x12
+; CHECK-NEON-NEXT: mov x12, #2199023255552 // =0x20000000000
+; CHECK-NEON-NEXT: mul x11, x8, x11
+; CHECK-NEON-NEXT: eor v2.8b, v4.8b, v2.8b
+; CHECK-NEON-NEXT: fmov d4, x10
+; CHECK-NEON-NEXT: mov x10, #4398046511104 // =0x40000000000
+; CHECK-NEON-NEXT: mul x13, x8, x9
+; CHECK-NEON-NEXT: fmov x9, d5
+; CHECK-NEON-NEXT: and v3.8b, v0.8b, v3.8b
+; CHECK-NEON-NEXT: fmov d5, x12
+; CHECK-NEON-NEXT: fmov d6, x11
+; CHECK-NEON-NEXT: fmov x11, d3
+; CHECK-NEON-NEXT: fmov d3, x10
; CHECK-NEON-NEXT: and v5.8b, v0.8b, v5.8b
+; CHECK-NEON-NEXT: mul x9, x8, x9
+; CHECK-NEON-NEXT: mul x10, x8, x11
+; CHECK-NEON-NEXT: and v3.8b, v0.8b, v3.8b
; CHECK-NEON-NEXT: mov x11, #70368744177664 // =0x400000000000
-; CHECK-NEON-NEXT: fmov d3, x11
+; CHECK-NEON-NEXT: fmov x12, d5
+; CHECK-NEON-NEXT: fmov d5, x11
; CHECK-NEON-NEXT: eor v4.8b, v4.8b, v6.8b
-; CHECK-NEON-NEXT: and v6.8b, v0.8b, v7.8b
+; CHECK-NEON-NEXT: fmov d6, x13
+; CHECK-NEON-NEXT: fmov x13, d3
; CHECK-NEON-NEXT: mul x11, x8, x12
; CHECK-NEON-NEXT: mov x12, #140737488355328 // =0x800000000000
-; CHECK-NEON-NEXT: fmov x13, d5
-; CHECK-NEON-NEXT: fmov d5, x12
-; CHECK-NEON-NEXT: and v3.8b, v0.8b, v3.8b
-; CHECK-NEON-NEXT: fmov d16, x9
; CHECK-NEON-NEXT: and v5.8b, v0.8b, v5.8b
+; CHECK-NEON-NEXT: fmov d3, x12
+; CHECK-NEON-NEXT: eor v4.8b, v4.8b, v6.8b
+; CHECK-NEON-NEXT: fmov d6, x10
; CHECK-NEON-NEXT: mul x12, x8, x13
+; CHECK-NEON-NEXT: mov x13, #281474976710656 // =0x1000000000000
+; CHECK-NEON-NEXT: fmov x10, d5
+; CHECK-NEON-NEXT: fmov d5, x13
+; CHECK-NEON-NEXT: mov x13, #8796093022208 // =0x80000000000
+; CHECK-NEON-NEXT: and v3.8b, v0.8b, v3.8b
+; CHECK-NEON-NEXT: eor v4.8b, v4.8b, v6.8b
+; CHECK-NEON-NEXT: fmov d6, x13
+; CHECK-NEON-NEXT: fmov d7, x11
+; CHECK-NEON-NEXT: and v5.8b, v0.8b, v5.8b
+; CHECK-NEON-NEXT: mul x10, x8, x10
; CHECK-NEON-NEXT: fmov x13, d3
-; CHECK-NEON-NEXT: fmov d3, x14
-; CHECK-NEON-NEXT: eor v4.8b, v4.8b, v16.8b
-; CHECK-NEON-NEXT: fmov x14, d5
-; CHECK-NEON-NEXT: fmov d5, x15
-; CHECK-NEON-NEXT: mov x15, #562949953421312 // =0x2000000000000
+; CHECK-NEON-NEXT: and v3.8b, v0.8b, v6.8b
+; CHECK-NEON-NEXT: fmov d6, x9
+; CHECK-NEON-NEXT: eor v4.8b, v4.8b, v7.8b
+; CHECK-NEON-NEXT: fmov x9, d5
+; CHECK-NEON-NEXT: fmov d5, x14
+; CHECK-NEON-NEXT: mov x14, #1125899906842624 // =0x4000000000000
; CHECK-NEON-NEXT: mul x13, x8, x13
-; CHECK-NEON-NEXT: fmov d7, x15
-; CHECK-NEON-NEXT: fmov x15, d6
-; CHECK-NEON-NEXT: and v6.8b, v0.8b, v3.8b
+; CHECK-NEON-NEXT: fmov x11, d3
+; CHECK-NEON-NEXT: fmov d7, x14
; CHECK-NEON-NEXT: fmov d3, x10
+; CHECK-NEON-NEXT: eor v2.8b, v2.8b, v6.8b
; CHECK-NEON-NEXT: and v5.8b, v0.8b, v5.8b
-; CHECK-NEON-NEXT: mul x14, x8, x14
-; CHECK-NEON-NEXT: and v7.8b, v0.8b, v7.8b
-; CHECK-NEON-NEXT: mul x9, x8, x15
-; CHECK-NEON-NEXT: eor v2.8b, v2.8b, v3.8b
-; CHECK-NEON-NEXT: fmov x10, d5
-; CHECK-NEON-NEXT: fmov d5, x11
-; CHECK-NEON-NEXT: fmov x11, d6
-; CHECK-NEON-NEXT: fmov d6, x13
-; CHECK-NEON-NEXT: mov x13, #1125899906842624 // =0x4000000000000
+; CHECK-NEON-NEXT: mul x9, x8, x9
+; CHECK-NEON-NEXT: mul x11, x8, x11
+; CHECK-NEON-NEXT: eor v1.8b, v1.8b, v2.8b
; CHECK-NEON-NEXT: fmov d16, x13
+; CHECK-NEON-NEXT: fmov x10, d5
; CHECK-NEON-NEXT: mov x13, #2251799813685248 // =0x8000000000000
-; CHECK-NEON-NEXT: eor v1.8b, v1.8b, v2.8b
-; CHECK-NEON-NEXT: mul x15, x8, x10
-; CHECK-NEON-NEXT: fmov x10, d7
-; CHECK-NEON-NEXT: fmov d7, x14
-; CHECK-NEON-NEXT: fmov d17, x13
-; CHECK-NEON-NEXT: eor v4.8b, v4.8b, v5.8b
-; CHECK-NEON-NEXT: and v16.8b, v0.8b, v16.8b
-; CHECK-NEON-NEXT: mul x14, x8, x10
-; CHECK-NEON-NEXT: eor v7.8b, v6.8b, v7.8b
+; CHECK-NEON-NEXT: and v5.8b, v0.8b, v7.8b
+; CHECK-NEON-NEXT: fmov d7, x12
+; CHECK-NEON-NEXT: mov x12, #17592186044416 // =0x100000000000
+; CHECK-NEON-NEXT: fmov d17, x9
+; CHECK-NEON-NEXT: eor v3.8b, v3.8b, v16.8b
+; CHECK-NEON-NEXT: fmov d16, x13
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: fmov x9, d5
+; CHECK-NEON-NEXT: eor v4.8b, v4.8b, v7.8b
+; CHECK-NEON-NEXT: fmov d7, x11
+; CHECK-NEON-NEXT: mov x11, #35184372088832 // =0x200000000000
+; CHECK-NEON-NEXT: mov x13, #9007199254740992 // =0x20000000000000
+; CHECK-NEON-NEXT: and v5.8b, v0.8b, v16.8b
+; CHECK-NEON-NEXT: fmov d16, x12
+; CHECK-NEON-NEXT: eor v3.8b, v3.8b, v17.8b
+; CHECK-NEON-NEXT: mul x9, x8, x9
+; CHECK-NEON-NEXT: mov x12, #4503599627370496 // =0x10000000000000
+; CHECK-NEON-NEXT: fmov d17, x10
; CHECK-NEON-NEXT: fmov d6, x12
-; CHECK-NEON-NEXT: and v17.8b, v0.8b, v17.8b
-; CHECK-NEON-NEXT: mul x10, x8, x11
-; CHECK-NEON-NEXT: mov x11, #4503599627370496 // =0x10000000000000
-; CHECK-NEON-NEXT: fmov x12, d16
-; CHECK-NEON-NEXT: fmov d16, x11
-; CHECK-NEON-NEXT: fmov d18, x15
-; CHECK-NEON-NEXT: mov x15, #288230376151711744 // =0x400000000000000
-; CHECK-NEON-NEXT: fmov x13, d17
-; CHECK-NEON-NEXT: eor v4.8b, v4.8b, v6.8b
-; CHECK-NEON-NEXT: mul x11, x8, x12
-; CHECK-NEON-NEXT: mov x12, #9007199254740992 // =0x20000000000000
+; CHECK-NEON-NEXT: mov x12, #72057594037927936 // =0x100000000000000
+; CHECK-NEON-NEXT: fmov x10, d5
; CHECK-NEON-NEXT: and v16.8b, v0.8b, v16.8b
-; CHECK-NEON-NEXT: fmov d17, x12
-; CHECK-NEON-NEXT: eor v7.8b, v7.8b, v18.8b
-; CHECK-NEON-NEXT: fmov d18, x14
+; CHECK-NEON-NEXT: fmov d5, x11
+; CHECK-NEON-NEXT: eor v17.8b, v3.8b, v17.8b
+; CHECK-NEON-NEXT: eor v3.8b, v4.8b, v7.8b
+; CHECK-NEON-NEXT: fmov d7, x12
+; CHECK-NEON-NEXT: mul x11, x8, x10
+; CHECK-NEON-NEXT: and v4.8b, v0.8b, v5.8b
+; CHECK-NEON-NEXT: fmov x10, d16
+; CHECK-NEON-NEXT: fmov d5, x9
+; CHECK-NEON-NEXT: and v6.8b, v0.8b, v6.8b
+; CHECK-NEON-NEXT: mul x9, x8, x10
+; CHECK-NEON-NEXT: fmov x10, d4
+; CHECK-NEON-NEXT: eor v5.8b, v17.8b, v5.8b
+; CHECK-NEON-NEXT: fmov x12, d6
+; CHECK-NEON-NEXT: fmov d4, x11
+; CHECK-NEON-NEXT: mov x11, #144115188075855872 // =0x200000000000000
+; CHECK-NEON-NEXT: fmov d6, x11
+; CHECK-NEON-NEXT: mul x10, x8, x10
+; CHECK-NEON-NEXT: eor v4.8b, v5.8b, v4.8b
+; CHECK-NEON-NEXT: and v5.8b, v0.8b, v7.8b
+; CHECK-NEON-NEXT: fmov d7, x13
+; CHECK-NEON-NEXT: and v6.8b, v0.8b, v6.8b
+; CHECK-NEON-NEXT: mul x11, x8, x12
+; CHECK-NEON-NEXT: mov x12, #18014398509481984 // =0x40000000000000
+; CHECK-NEON-NEXT: fmov x13, d5
+; CHECK-NEON-NEXT: fmov d5, x12
+; CHECK-NEON-NEXT: and v7.8b, v0.8b, v7.8b
+; CHECK-NEON-NEXT: fmov x14, d6
; CHECK-NEON-NEXT: mul x12, x8, x13
-; CHECK-NEON-NEXT: mov x13, #72057594037927936 // =0x100000000000000
-; CHECK-NEON-NEXT: fmov x14, d16
-; CHECK-NEON-NEXT: and v17.8b, v0.8b, v17.8b
-; CHECK-NEON-NEXT: fmov d16, x13
-; CHECK-NEON-NEXT: eor v7.8b, v7.8b, v18.8b
-; CHECK-NEON-NEXT: fmov d18, x11
+; CHECK-NEON-NEXT: mov x13, #288230376151711744 // =0x400000000000000
+; CHECK-NEON-NEXT: and v5.8b, v0.8b, v5.8b
+; CHECK-NEON-NEXT: fmov d6, x13
+; CHECK-NEON-NEXT: fmov d16, x11
+; CHECK-NEON-NEXT: fmov x11, d7
; CHECK-NEON-NEXT: mul x13, x8, x14
-; CHECK-NEON-NEXT: mov x14, #144115188075855872 // =0x200000000000000
-; CHECK-NEON-NEXT: fmov x11, d17
-; CHECK-NEON-NEXT: fmov d17, x14
-; CHECK-NEON-NEXT: mov x14, #18014398509481984 // =0x40000000000000
-; CHECK-NEON-NEXT: and v16.8b, v0.8b, v16.8b
-; CHECK-NEON-NEXT: eor v7.8b, v7.8b, v18.8b
-; CHECK-NEON-NEXT: fmov d18, x14
+; CHECK-NEON-NEXT: mov x14, #576460752303423488 // =0x800000000000000
+; CHECK-NEON-NEXT: fmov d7, x14
+; CHECK-NEON-NEXT: mov x14, #36028797018963968 // =0x80000000000000
+; CHECK-NEON-NEXT: fmov x15, d5
+; CHECK-NEON-NEXT: and v6.8b, v0.8b, v6.8b
+; CHECK-NEON-NEXT: fmov d5, x14
; CHECK-NEON-NEXT: mul x11, x8, x11
-; CHECK-NEON-NEXT: and v17.8b, v0.8b, v17.8b
-; CHECK-NEON-NEXT: fmov x14, d16
-; CHECK-NEON-NEXT: and v16.8b, v0.8b, v18.8b
-; CHECK-NEON-NEXT: fmov d18, x12
-; CHECK-NEON-NEXT: fmov x12, d17
-; CHECK-NEON-NEXT: fmov d17, x15
-; CHECK-NEON-NEXT: mul x14, x8, x14
-; CHECK-NEON-NEXT: mul x15, x8, x12
-; CHECK-NEON-NEXT: mov x12, #576460752303423488 // =0x800000000000000
-; CHECK-NEON-NEXT: and v17.8b, v0.8b, v17.8b
-; CHECK-NEON-NEXT: fmov d5, x12
-; CHECK-NEON-NEXT: fmov x12, d16
-; CHECK-NEON-NEXT: fmov d6, x14
-; CHECK-NEON-NEXT: and v3.8b, v0.8b, v5.8b
-; CHECK-NEON-NEXT: eor v5.8b, v7.8b, v18.8b
-; CHECK-NEON-NEXT: fmov d7, x13
-; CHECK-NEON-NEXT: fmov x13, d17
-; CHECK-NEON-NEXT: fmov d16, x15
-; CHECK-NEON-NEXT: mov x15, #1152921504606846976 // =0x1000000000000000
-; CHECK-NEON-NEXT: mul x12, x8, x12
-; CHECK-NEON-NEXT: fmov x14, d3
-; CHECK-NEON-NEXT: eor v3.8b, v5.8b, v7.8b
-; CHECK-NEON-NEXT: fmov d5, x15
-; CHECK-NEON-NEXT: mul x13, x8, x13
-; CHECK-NEON-NEXT: mov x15, #2305843009213693952 // =0x2000000000000000
-; CHECK-NEON-NEXT: eor v6.8b, v6.8b, v16.8b
-; CHECK-NEON-NEXT: fmov d7, x15
-; CHECK-NEON-NEXT: mov x15, #36028797018963968 // =0x80000000000000
-; CHECK-NEON-NEXT: movi d16, #0000000000000000
-; CHECK-NEON-NEXT: mul x14, x8, x14
+; CHECK-NEON-NEXT: eor v4.8b, v4.8b, v16.8b
+; CHECK-NEON-NEXT: and v7.8b, v0.8b, v7.8b
+; CHECK-NEON-NEXT: fmov x14, d6
+; CHECK-NEON-NEXT: fmov d6, x12
+; CHECK-NEON-NEXT: fmov d17, x13
; CHECK-NEON-NEXT: and v5.8b, v0.8b, v5.8b
-; CHECK-NEON-NEXT: fmov d17, x15
+; CHECK-NEON-NEXT: fmov x13, d7
+; CHECK-NEON-NEXT: mul x12, x8, x14
+; CHECK-NEON-NEXT: mov x14, #1152921504606846976 // =0x1000000000000000
+; CHECK-NEON-NEXT: eor v6.8b, v6.8b, v17.8b
+; CHECK-NEON-NEXT: fmov d7, x14
+; CHECK-NEON-NEXT: mov x14, #2305843009213693952 // =0x2000000000000000
+; CHECK-NEON-NEXT: fmov d17, x14
+; CHECK-NEON-NEXT: mul x13, x8, x13
; CHECK-NEON-NEXT: and v7.8b, v0.8b, v7.8b
-; CHECK-NEON-NEXT: fmov d18, x13
-; CHECK-NEON-NEXT: fmov x13, d5
+; CHECK-NEON-NEXT: mul x14, x8, x15
+; CHECK-NEON-NEXT: fmov d19, x12
; CHECK-NEON-NEXT: and v17.8b, v0.8b, v17.8b
-; CHECK-NEON-NEXT: fneg d16, d16
-; CHECK-NEON-NEXT: fmov d5, x14
-; CHECK-NEON-NEXT: mov x14, #4611686018427387904 // =0x4000000000000000
-; CHECK-NEON-NEXT: fmov x15, d7
-; CHECK-NEON-NEXT: eor v6.8b, v6.8b, v18.8b
-; CHECK-NEON-NEXT: mul x13, x8, x13
-; CHECK-NEON-NEXT: fmov d7, x14
-; CHECK-NEON-NEXT: fmov x14, d17
-; CHECK-NEON-NEXT: fmov d17, x9
+; CHECK-NEON-NEXT: fmov x12, d7
+; CHECK-NEON-NEXT: fmov d7, #2.00000000
+; CHECK-NEON-NEXT: fmov d16, x13
+; CHECK-NEON-NEXT: eor v6.8b, v6.8b, v19.8b
+; CHECK-NEON-NEXT: fmov x15, d17
+; CHECK-NEON-NEXT: fneg d17, d18
+; CHECK-NEON-NEXT: fmov x13, d5
+; CHECK-NEON-NEXT: mul x12, x8, x12
+; CHECK-NEON-NEXT: and v5.8b, v0.8b, v7.8b
+; CHECK-NEON-NEXT: fmov d7, x9
+; CHECK-NEON-NEXT: eor v6.8b, v6.8b, v16.8b
+; CHECK-NEON-NEXT: fmov d16, x11
; CHECK-NEON-NEXT: mul x15, x8, x15
+; CHECK-NEON-NEXT: and v0.8b, v0.8b, v17.8b
+; CHECK-NEON-NEXT: fmov x11, d5
+; CHECK-NEON-NEXT: eor v3.8b, v3.8b, v7.8b
+; CHECK-NEON-NEXT: mul x9, x8, x13
+; CHECK-NEON-NEXT: eor v4.8b, v4.8b, v16.8b
+; CHECK-NEON-NEXT: fmov d5, x12
; CHECK-NEON-NEXT: eor v5.8b, v6.8b, v5.8b
-; CHECK-NEON-NEXT: fmov d6, x11
-; CHECK-NEON-NEXT: and v7.8b, v0.8b, v7.8b
-; CHECK-NEON-NEXT: and v0.8b, v0.8b, v16.8b
-; CHECK-NEON-NEXT: eor v4.8b, v4.8b, v17.8b
-; CHECK-NEON-NEXT: mul x9, x8, x14
-; CHECK-NEON-NEXT: eor v3.8b, v3.8b, v6.8b
-; CHECK-NEON-NEXT: fmov d6, x13
-; CHECK-NEON-NEXT: fmov x11, d7
-; CHECK-NEON-NEXT: eor v5.8b, v5.8b, v6.8b
; CHECK-NEON-NEXT: fmov d6, x10
; CHECK-NEON-NEXT: mul x10, x8, x11
; CHECK-NEON-NEXT: fmov x11, d0
; CHECK-NEON-NEXT: fmov d0, x15
-; CHECK-NEON-NEXT: eor v2.8b, v4.8b, v6.8b
-; CHECK-NEON-NEXT: fmov d4, x12
+; CHECK-NEON-NEXT: eor v2.8b, v3.8b, v6.8b
+; CHECK-NEON-NEXT: fmov d3, x14
; CHECK-NEON-NEXT: mul x8, x8, x11
; CHECK-NEON-NEXT: eor v0.8b, v5.8b, v0.8b
; CHECK-NEON-NEXT: fmov d5, x10
-; CHECK-NEON-NEXT: eor v3.8b, v3.8b, v4.8b
+; CHECK-NEON-NEXT: eor v3.8b, v4.8b, v3.8b
; CHECK-NEON-NEXT: fmov d4, x9
; CHECK-NEON-NEXT: eor v1.8b, v1.8b, v2.8b
; CHECK-NEON-NEXT: eor v0.8b, v0.8b, v5.8b
diff --git a/llvm/test/CodeGen/AArch64/dup.ll b/llvm/test/CodeGen/AArch64/dup.ll
index 16748cf16cb19..6420b35827aca 100644
--- a/llvm/test/CodeGen/AArch64/dup.ll
+++ b/llvm/test/CodeGen/AArch64/dup.ll
@@ -2569,8 +2569,7 @@ define <2 x fp128> @loaddup_str_v2fp128(ptr %p) {
; CHECK-SD-LABEL: loaddup_str_v2fp128:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: ldr q0, [x0]
-; CHECK-SD-NEXT: adrp x8, .LCPI155_0
-; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI155_0]
+; CHECK-SD-NEXT: movi d2, #0000000000000000
; CHECK-SD-NEXT: mov v1.16b, v0.16b
; CHECK-SD-NEXT: str q2, [x0]
; CHECK-SD-NEXT: ret
@@ -2631,8 +2630,7 @@ define <3 x fp128> @loaddup_str_v3fp128(ptr %p) {
; CHECK-SD-LABEL: loaddup_str_v3fp128:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: ldr q0, [x0]
-; CHECK-SD-NEXT: adrp x8, .LCPI159_0
-; CHECK-SD-NEXT: ldr q3, [x8, :lo12:.LCPI159_0]
+; CHECK-SD-NEXT: movi d3, #0000000000000000
; CHECK-SD-NEXT: mov v1.16b, v0.16b
; CHECK-SD-NEXT: mov v2.16b, v0.16b
; CHECK-SD-NEXT: str q3, [x0]
@@ -2698,8 +2696,7 @@ define <4 x fp128> @loaddup_str_v4fp128(ptr %p) {
; CHECK-SD-LABEL: loaddup_str_v4fp128:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: ldr q0, [x0]
-; CHECK-SD-NEXT: adrp x8, .LCPI163_0
-; CHECK-SD-NEXT: ldr q4, [x8, :lo12:.LCPI163_0]
+; CHECK-SD-NEXT: movi d4, #0000000000000000
; CHECK-SD-NEXT: mov v1.16b, v0.16b
; CHECK-SD-NEXT: mov v2.16b, v0.16b
; CHECK-SD-NEXT: mov v3.16b, v0.16b
diff --git a/llvm/test/CodeGen/AArch64/f16-imm.ll b/llvm/test/CodeGen/AArch64/f16-imm.ll
index 911bf44a3ce93..830e6093b1bb8 100644
--- a/llvm/test/CodeGen/AArch64/f16-imm.ll
+++ b/llvm/test/CodeGen/AArch64/f16-imm.ll
@@ -33,8 +33,7 @@ define half @Const1() {
;
; CHECK-NOFP16-LABEL: Const1:
; CHECK-NOFP16: // %bb.0: // %entry
-; CHECK-NOFP16-NEXT: adrp x8, .LCPI1_0
-; CHECK-NOFP16-NEXT: ldr h0, [x8, :lo12:.LCPI1_0]
+; CHECK-NOFP16-NEXT: movi v0.4h, #60, lsl #8
; CHECK-NOFP16-NEXT: ret
entry:
ret half 0xH3C00
@@ -48,8 +47,7 @@ define half @Const2() {
;
; CHECK-NOFP16-LABEL: Const2:
; CHECK-NOFP16: // %bb.0: // %entry
-; CHECK-NOFP16-NEXT: adrp x8, .LCPI2_0
-; CHECK-NOFP16-NEXT: ldr h0, [x8, :lo12:.LCPI2_0]
+; CHECK-NOFP16-NEXT: movi v0.4h, #48, lsl #8
; CHECK-NOFP16-NEXT: ret
entry:
ret half 0xH3000
@@ -154,35 +152,15 @@ entry:
}
define half @Const7() {
-; CHECK-NOZCZ-SD-LABEL: Const7:
-; CHECK-NOZCZ-SD: // %bb.0: // %entry
-; CHECK-NOZCZ-SD-NEXT: mov w8, #20480 // =0x5000
-; CHECK-NOZCZ-SD-NEXT: fmov h0, w8
-; CHECK-NOZCZ-SD-NEXT: ret
-;
-; CHECK-ZCZ-SD-LABEL: Const7:
-; CHECK-ZCZ-SD: // %bb.0: // %entry
-; CHECK-ZCZ-SD-NEXT: mov w8, #20480 // =0x5000
-; CHECK-ZCZ-SD-NEXT: fmov h0, w8
-; CHECK-ZCZ-SD-NEXT: ret
+; CHECK-FP16-LABEL: Const7:
+; CHECK-FP16: // %bb.0: // %entry
+; CHECK-FP16-NEXT: movi v0.4h, #80, lsl #8
+; CHECK-FP16-NEXT: ret
;
; CHECK-NOFP16-LABEL: Const7:
; CHECK-NOFP16: // %bb.0: // %entry
-; CHECK-NOFP16-NEXT: adrp x8, .LCPI7_0
-; CHECK-NOFP16-NEXT: ldr h0, [x8, :lo12:.LCPI7_0]
+; CHECK-NOFP16-NEXT: movi v0.4h, #80, lsl #8
; CHECK-NOFP16-NEXT: ret
-;
-; CHECK-NOZCZ-GI-LABEL: Const7:
-; CHECK-NOZCZ-GI: // %bb.0: // %entry
-; CHECK-NOZCZ-GI-NEXT: adrp x8, .LCPI7_0
-; CHECK-NOZCZ-GI-NEXT: ldr h0, [x8, :lo12:.LCPI7_0]
-; CHECK-NOZCZ-GI-NEXT: ret
-;
-; CHECK-ZCZ-GI-LABEL: Const7:
-; CHECK-ZCZ-GI: // %bb.0: // %entry
-; CHECK-ZCZ-GI-NEXT: adrp x8, .LCPI7_0
-; CHECK-ZCZ-GI-NEXT: ldr h0, [x8, :lo12:.LCPI7_0]
-; CHECK-ZCZ-GI-NEXT: ret
entry:
ret half 0xH5000
}
diff --git a/llvm/test/CodeGen/AArch64/f16-instructions.ll b/llvm/test/CodeGen/AArch64/f16-instructions.ll
index f6d701b518699..d4a2fd68025f3 100644
--- a/llvm/test/CodeGen/AArch64/f16-instructions.ll
+++ b/llvm/test/CodeGen/AArch64/f16-instructions.ll
@@ -761,12 +761,11 @@ define void @test_fccmp(half %in, ptr %out) {
; CHECK-CVT-SD-NEXT: // kill: def $h0 killed $h0 def $s0
; CHECK-CVT-SD-NEXT: fcvt s1, h0
; CHECK-CVT-SD-NEXT: fmov s2, #5.00000000
-; CHECK-CVT-SD-NEXT: adrp x8, .LCPI29_0
+; CHECK-CVT-SD-NEXT: movi v3.4h, #69, lsl #8
; CHECK-CVT-SD-NEXT: fcmp s1, s2
; CHECK-CVT-SD-NEXT: fmov s2, #8.00000000
; CHECK-CVT-SD-NEXT: fccmp s1, s2, #4, mi
-; CHECK-CVT-SD-NEXT: ldr h1, [x8, :lo12:.LCPI29_0]
-; CHECK-CVT-SD-NEXT: fcsel s0, s0, s1, gt
+; CHECK-CVT-SD-NEXT: fcsel s0, s0, s3, gt
; CHECK-CVT-SD-NEXT: str h0, [x0]
; CHECK-CVT-SD-NEXT: ret
;
@@ -785,13 +784,12 @@ define void @test_fccmp(half %in, ptr %out) {
; CHECK-CVT-GI-NEXT: // kill: def $h0 killed $h0 def $s0
; CHECK-CVT-GI-NEXT: fcvt s1, h0
; CHECK-CVT-GI-NEXT: fmov s2, #5.00000000
-; CHECK-CVT-GI-NEXT: adrp x8, .LCPI29_0
; CHECK-CVT-GI-NEXT: fmov s3, #8.00000000
-; CHECK-CVT-GI-NEXT: fcmp s1, s2
-; CHECK-CVT-GI-NEXT: ldr h2, [x8, :lo12:.LCPI29_0]
; CHECK-CVT-GI-NEXT: fmov w8, s0
-; CHECK-CVT-GI-NEXT: fmov w9, s2
+; CHECK-CVT-GI-NEXT: fcmp s1, s2
+; CHECK-CVT-GI-NEXT: movi v2.4h, #69, lsl #8
; CHECK-CVT-GI-NEXT: fccmp s1, s3, #4, mi
+; CHECK-CVT-GI-NEXT: fmov w9, s2
; CHECK-CVT-GI-NEXT: csel w8, w8, w9, gt
; CHECK-CVT-GI-NEXT: strh w8, [x0]
; CHECK-CVT-GI-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/fabs-combine.ll b/llvm/test/CodeGen/AArch64/fabs-combine.ll
index a7f6a251cdc1b..490754bf6fa02 100644
--- a/llvm/test/CodeGen/AArch64/fabs-combine.ll
+++ b/llvm/test/CodeGen/AArch64/fabs-combine.ll
@@ -93,12 +93,19 @@ define <4 x float> @nabsv4f32(<4 x float> %a) {
}
define <2 x double> @nabsv2d64(<2 x double> %a) {
-; CHECK-LABEL: nabsv2d64:
-; CHECK: // %bb.0:
-; CHECK-NEXT: movi v1.2d, #0000000000000000
-; CHECK-NEXT: fneg v1.2d, v1.2d
-; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: nabsv2d64:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: movi d1, #0000000000000000
+; CHECK-SD-NEXT: fneg v1.2d, v1.2d
+; CHECK-SD-NEXT: orr v0.16b, v0.16b, v1.16b
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: nabsv2d64:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: movi v1.2d, #0000000000000000
+; CHECK-GI-NEXT: fneg v1.2d, v1.2d
+; CHECK-GI-NEXT: orr v0.16b, v0.16b, v1.16b
+; CHECK-GI-NEXT: ret
%conv = bitcast <2 x double> %a to <2 x i64>
%and = or <2 x i64> %conv, <i64 -9223372036854775808, i64 -9223372036854775808>
%conv1 = bitcast <2 x i64> %and to <2 x double>
diff --git a/llvm/test/CodeGen/AArch64/fcvt-fixed.ll b/llvm/test/CodeGen/AArch64/fcvt-fixed.ll
index 743d1604388de..949291ce8576f 100644
--- a/llvm/test/CodeGen/AArch64/fcvt-fixed.ll
+++ b/llvm/test/CodeGen/AArch64/fcvt-fixed.ll
@@ -166,8 +166,7 @@ define i32 @fcvtzs_f16_i32_7(half %flt) {
;
; CHECK-GI-FP16-LABEL: fcvtzs_f16_i32_7:
; CHECK-GI-FP16: // %bb.0:
-; CHECK-GI-FP16-NEXT: adrp x8, .LCPI8_0
-; CHECK-GI-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI8_0]
+; CHECK-GI-FP16-NEXT: movi v1.4h, #88, lsl #8
; CHECK-GI-FP16-NEXT: fmul h0, h0, h1
; CHECK-GI-FP16-NEXT: fcvtzs w0, h0
; CHECK-GI-FP16-NEXT: ret
@@ -194,8 +193,7 @@ define i32 @fcvtzs_f16_i32_15(half %flt) {
;
; CHECK-GI-FP16-LABEL: fcvtzs_f16_i32_15:
; CHECK-GI-FP16: // %bb.0:
-; CHECK-GI-FP16-NEXT: adrp x8, .LCPI9_0
-; CHECK-GI-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI9_0]
+; CHECK-GI-FP16-NEXT: movi v1.4h, #120, lsl #8
; CHECK-GI-FP16-NEXT: fmul h0, h0, h1
; CHECK-GI-FP16-NEXT: fcvtzs w0, h0
; CHECK-GI-FP16-NEXT: ret
@@ -222,8 +220,7 @@ define i64 @fcvtzs_f16_i64_7(half %flt) {
;
; CHECK-GI-FP16-LABEL: fcvtzs_f16_i64_7:
; CHECK-GI-FP16: // %bb.0:
-; CHECK-GI-FP16-NEXT: adrp x8, .LCPI10_0
-; CHECK-GI-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI10_0]
+; CHECK-GI-FP16-NEXT: movi v1.4h, #88, lsl #8
; CHECK-GI-FP16-NEXT: fmul h0, h0, h1
; CHECK-GI-FP16-NEXT: fcvtzs x0, h0
; CHECK-GI-FP16-NEXT: ret
@@ -250,8 +247,7 @@ define i64 @fcvtzs_f16_i64_15(half %flt) {
;
; CHECK-GI-FP16-LABEL: fcvtzs_f16_i64_15:
; CHECK-GI-FP16: // %bb.0:
-; CHECK-GI-FP16-NEXT: adrp x8, .LCPI11_0
-; CHECK-GI-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI11_0]
+; CHECK-GI-FP16-NEXT: movi v1.4h, #120, lsl #8
; CHECK-GI-FP16-NEXT: fmul h0, h0, h1
; CHECK-GI-FP16-NEXT: fcvtzs x0, h0
; CHECK-GI-FP16-NEXT: ret
@@ -422,8 +418,7 @@ define i32 @fcvtzu_f16_i32_7(half %flt) {
;
; CHECK-GI-FP16-LABEL: fcvtzu_f16_i32_7:
; CHECK-GI-FP16: // %bb.0:
-; CHECK-GI-FP16-NEXT: adrp x8, .LCPI20_0
-; CHECK-GI-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI20_0]
+; CHECK-GI-FP16-NEXT: movi v1.4h, #88, lsl #8
; CHECK-GI-FP16-NEXT: fmul h0, h0, h1
; CHECK-GI-FP16-NEXT: fcvtzu w0, h0
; CHECK-GI-FP16-NEXT: ret
@@ -450,8 +445,7 @@ define i32 @fcvtzu_f16_i32_15(half %flt) {
;
; CHECK-GI-FP16-LABEL: fcvtzu_f16_i32_15:
; CHECK-GI-FP16: // %bb.0:
-; CHECK-GI-FP16-NEXT: adrp x8, .LCPI21_0
-; CHECK-GI-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI21_0]
+; CHECK-GI-FP16-NEXT: movi v1.4h, #120, lsl #8
; CHECK-GI-FP16-NEXT: fmul h0, h0, h1
; CHECK-GI-FP16-NEXT: fcvtzu w0, h0
; CHECK-GI-FP16-NEXT: ret
@@ -478,8 +472,7 @@ define i64 @fcvtzu_f16_i64_7(half %flt) {
;
; CHECK-GI-FP16-LABEL: fcvtzu_f16_i64_7:
; CHECK-GI-FP16: // %bb.0:
-; CHECK-GI-FP16-NEXT: adrp x8, .LCPI22_0
-; CHECK-GI-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI22_0]
+; CHECK-GI-FP16-NEXT: movi v1.4h, #88, lsl #8
; CHECK-GI-FP16-NEXT: fmul h0, h0, h1
; CHECK-GI-FP16-NEXT: fcvtzu x0, h0
; CHECK-GI-FP16-NEXT: ret
@@ -506,8 +499,7 @@ define i64 @fcvtzu_f16_i64_15(half %flt) {
;
; CHECK-GI-FP16-LABEL: fcvtzu_f16_i64_15:
; CHECK-GI-FP16: // %bb.0:
-; CHECK-GI-FP16-NEXT: adrp x8, .LCPI23_0
-; CHECK-GI-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI23_0]
+; CHECK-GI-FP16-NEXT: movi v1.4h, #120, lsl #8
; CHECK-GI-FP16-NEXT: fmul h0, h0, h1
; CHECK-GI-FP16-NEXT: fcvtzu x0, h0
; CHECK-GI-FP16-NEXT: ret
@@ -688,10 +680,9 @@ define half @scvtf_f16_i32_7(i32 %int) {
;
; CHECK-GI-FP16-LABEL: scvtf_f16_i32_7:
; CHECK-GI-FP16: // %bb.0:
-; CHECK-GI-FP16-NEXT: scvtf h0, w0
-; CHECK-GI-FP16-NEXT: adrp x8, .LCPI32_0
-; CHECK-GI-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI32_0]
-; CHECK-GI-FP16-NEXT: fdiv h0, h0, h1
+; CHECK-GI-FP16-NEXT: movi v0.4h, #88, lsl #8
+; CHECK-GI-FP16-NEXT: scvtf h1, w0
+; CHECK-GI-FP16-NEXT: fdiv h0, h1, h0
; CHECK-GI-FP16-NEXT: ret
%cvt = sitofp i32 %int to half
%fix = fdiv half %cvt, 128.0
@@ -726,10 +717,9 @@ define half @scvtf_f16_i32_15(i32 %int) {
;
; CHECK-GI-FP16-LABEL: scvtf_f16_i32_15:
; CHECK-GI-FP16: // %bb.0:
-; CHECK-GI-FP16-NEXT: scvtf h0, w0
-; CHECK-GI-FP16-NEXT: adrp x8, .LCPI33_0
-; CHECK-GI-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI33_0]
-; CHECK-GI-FP16-NEXT: fdiv h0, h0, h1
+; CHECK-GI-FP16-NEXT: movi v0.4h, #120, lsl #8
+; CHECK-GI-FP16-NEXT: scvtf h1, w0
+; CHECK-GI-FP16-NEXT: fdiv h0, h1, h0
; CHECK-GI-FP16-NEXT: ret
%cvt = sitofp i32 %int to half
%fix = fdiv half %cvt, 32768.0
@@ -764,10 +754,9 @@ define half @scvtf_f16_i64_7(i64 %long) {
;
; CHECK-GI-FP16-LABEL: scvtf_f16_i64_7:
; CHECK-GI-FP16: // %bb.0:
-; CHECK-GI-FP16-NEXT: scvtf h0, x0
-; CHECK-GI-FP16-NEXT: adrp x8, .LCPI34_0
-; CHECK-GI-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI34_0]
-; CHECK-GI-FP16-NEXT: fdiv h0, h0, h1
+; CHECK-GI-FP16-NEXT: movi v0.4h, #88, lsl #8
+; CHECK-GI-FP16-NEXT: scvtf h1, x0
+; CHECK-GI-FP16-NEXT: fdiv h0, h1, h0
; CHECK-GI-FP16-NEXT: ret
%cvt = sitofp i64 %long to half
%fix = fdiv half %cvt, 128.0
@@ -802,10 +791,9 @@ define half @scvtf_f16_i64_15(i64 %long) {
;
; CHECK-GI-FP16-LABEL: scvtf_f16_i64_15:
; CHECK-GI-FP16: // %bb.0:
-; CHECK-GI-FP16-NEXT: scvtf h0, x0
-; CHECK-GI-FP16-NEXT: adrp x8, .LCPI35_0
-; CHECK-GI-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI35_0]
-; CHECK-GI-FP16-NEXT: fdiv h0, h0, h1
+; CHECK-GI-FP16-NEXT: movi v0.4h, #120, lsl #8
+; CHECK-GI-FP16-NEXT: scvtf h1, x0
+; CHECK-GI-FP16-NEXT: fdiv h0, h1, h0
; CHECK-GI-FP16-NEXT: ret
%cvt = sitofp i64 %long to half
%fix = fdiv half %cvt, 32768.0
@@ -984,10 +972,9 @@ define half @ucvtf_f16_i32_7(i32 %int) {
;
; CHECK-GI-FP16-LABEL: ucvtf_f16_i32_7:
; CHECK-GI-FP16: // %bb.0:
-; CHECK-GI-FP16-NEXT: ucvtf h0, w0
-; CHECK-GI-FP16-NEXT: adrp x8, .LCPI44_0
-; CHECK-GI-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI44_0]
-; CHECK-GI-FP16-NEXT: fdiv h0, h0, h1
+; CHECK-GI-FP16-NEXT: movi v0.4h, #88, lsl #8
+; CHECK-GI-FP16-NEXT: ucvtf h1, w0
+; CHECK-GI-FP16-NEXT: fdiv h0, h1, h0
; CHECK-GI-FP16-NEXT: ret
%cvt = uitofp i32 %int to half
%fix = fdiv half %cvt, 128.0
@@ -1022,10 +1009,9 @@ define half @ucvtf_f16_i32_15(i32 %int) {
;
; CHECK-GI-FP16-LABEL: ucvtf_f16_i32_15:
; CHECK-GI-FP16: // %bb.0:
-; CHECK-GI-FP16-NEXT: ucvtf h0, w0
-; CHECK-GI-FP16-NEXT: adrp x8, .LCPI45_0
-; CHECK-GI-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI45_0]
-; CHECK-GI-FP16-NEXT: fdiv h0, h0, h1
+; CHECK-GI-FP16-NEXT: movi v0.4h, #120, lsl #8
+; CHECK-GI-FP16-NEXT: ucvtf h1, w0
+; CHECK-GI-FP16-NEXT: fdiv h0, h1, h0
; CHECK-GI-FP16-NEXT: ret
%cvt = uitofp i32 %int to half
%fix = fdiv half %cvt, 32768.0
@@ -1060,10 +1046,9 @@ define half @ucvtf_f16_i64_7(i64 %long) {
;
; CHECK-GI-FP16-LABEL: ucvtf_f16_i64_7:
; CHECK-GI-FP16: // %bb.0:
-; CHECK-GI-FP16-NEXT: ucvtf h0, x0
-; CHECK-GI-FP16-NEXT: adrp x8, .LCPI46_0
-; CHECK-GI-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI46_0]
-; CHECK-GI-FP16-NEXT: fdiv h0, h0, h1
+; CHECK-GI-FP16-NEXT: movi v0.4h, #88, lsl #8
+; CHECK-GI-FP16-NEXT: ucvtf h1, x0
+; CHECK-GI-FP16-NEXT: fdiv h0, h1, h0
; CHECK-GI-FP16-NEXT: ret
%cvt = uitofp i64 %long to half
%fix = fdiv half %cvt, 128.0
@@ -1098,10 +1083,9 @@ define half @ucvtf_f16_i64_15(i64 %long) {
;
; CHECK-GI-FP16-LABEL: ucvtf_f16_i64_15:
; CHECK-GI-FP16: // %bb.0:
-; CHECK-GI-FP16-NEXT: ucvtf h0, x0
-; CHECK-GI-FP16-NEXT: adrp x8, .LCPI47_0
-; CHECK-GI-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI47_0]
-; CHECK-GI-FP16-NEXT: fdiv h0, h0, h1
+; CHECK-GI-FP16-NEXT: movi v0.4h, #120, lsl #8
+; CHECK-GI-FP16-NEXT: ucvtf h1, x0
+; CHECK-GI-FP16-NEXT: fdiv h0, h1, h0
; CHECK-GI-FP16-NEXT: ret
%cvt = uitofp i64 %long to half
%fix = fdiv half %cvt, 32768.0
@@ -1261,8 +1245,7 @@ define i32 @fcvtzs_sat_f16_i32_7(half %dbl) {
;
; CHECK-GI-FP16-LABEL: fcvtzs_sat_f16_i32_7:
; CHECK-GI-FP16: // %bb.0:
-; CHECK-GI-FP16-NEXT: adrp x8, .LCPI55_0
-; CHECK-GI-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI55_0]
+; CHECK-GI-FP16-NEXT: movi v1.4h, #88, lsl #8
; CHECK-GI-FP16-NEXT: fmul h0, h0, h1
; CHECK-GI-FP16-NEXT: fcvtzs w0, h0
; CHECK-GI-FP16-NEXT: ret
@@ -1289,8 +1272,7 @@ define i32 @fcvtzs_sat_f16_i32_15(half %dbl) {
;
; CHECK-GI-FP16-LABEL: fcvtzs_sat_f16_i32_15:
; CHECK-GI-FP16: // %bb.0:
-; CHECK-GI-FP16-NEXT: adrp x8, .LCPI56_0
-; CHECK-GI-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI56_0]
+; CHECK-GI-FP16-NEXT: movi v1.4h, #120, lsl #8
; CHECK-GI-FP16-NEXT: fmul h0, h0, h1
; CHECK-GI-FP16-NEXT: fcvtzs w0, h0
; CHECK-GI-FP16-NEXT: ret
@@ -1317,8 +1299,7 @@ define i64 @fcvtzs_sat_f16_i64_7(half %dbl) {
;
; CHECK-GI-FP16-LABEL: fcvtzs_sat_f16_i64_7:
; CHECK-GI-FP16: // %bb.0:
-; CHECK-GI-FP16-NEXT: adrp x8, .LCPI57_0
-; CHECK-GI-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI57_0]
+; CHECK-GI-FP16-NEXT: movi v1.4h, #88, lsl #8
; CHECK-GI-FP16-NEXT: fmul h0, h0, h1
; CHECK-GI-FP16-NEXT: fcvtzs x0, h0
; CHECK-GI-FP16-NEXT: ret
@@ -1345,8 +1326,7 @@ define i64 @fcvtzs_sat_f16_i64_15(half %dbl) {
;
; CHECK-GI-FP16-LABEL: fcvtzs_sat_f16_i64_15:
; CHECK-GI-FP16: // %bb.0:
-; CHECK-GI-FP16-NEXT: adrp x8, .LCPI58_0
-; CHECK-GI-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI58_0]
+; CHECK-GI-FP16-NEXT: movi v1.4h, #120, lsl #8
; CHECK-GI-FP16-NEXT: fmul h0, h0, h1
; CHECK-GI-FP16-NEXT: fcvtzs x0, h0
; CHECK-GI-FP16-NEXT: ret
@@ -1507,8 +1487,7 @@ define i32 @fcvtzu_sat_f16_i32_7(half %dbl) {
;
; CHECK-GI-FP16-LABEL: fcvtzu_sat_f16_i32_7:
; CHECK-GI-FP16: // %bb.0:
-; CHECK-GI-FP16-NEXT: adrp x8, .LCPI66_0
-; CHECK-GI-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI66_0]
+; CHECK-GI-FP16-NEXT: movi v1.4h, #88, lsl #8
; CHECK-GI-FP16-NEXT: fmul h0, h0, h1
; CHECK-GI-FP16-NEXT: fcvtzu w0, h0
; CHECK-GI-FP16-NEXT: ret
@@ -1535,8 +1514,7 @@ define i32 @fcvtzu_sat_f16_i32_15(half %dbl) {
;
; CHECK-GI-FP16-LABEL: fcvtzu_sat_f16_i32_15:
; CHECK-GI-FP16: // %bb.0:
-; CHECK-GI-FP16-NEXT: adrp x8, .LCPI67_0
-; CHECK-GI-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI67_0]
+; CHECK-GI-FP16-NEXT: movi v1.4h, #120, lsl #8
; CHECK-GI-FP16-NEXT: fmul h0, h0, h1
; CHECK-GI-FP16-NEXT: fcvtzu w0, h0
; CHECK-GI-FP16-NEXT: ret
@@ -1563,8 +1541,7 @@ define i64 @fcvtzu_sat_f16_i64_7(half %dbl) {
;
; CHECK-GI-FP16-LABEL: fcvtzu_sat_f16_i64_7:
; CHECK-GI-FP16: // %bb.0:
-; CHECK-GI-FP16-NEXT: adrp x8, .LCPI68_0
-; CHECK-GI-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI68_0]
+; CHECK-GI-FP16-NEXT: movi v1.4h, #88, lsl #8
; CHECK-GI-FP16-NEXT: fmul h0, h0, h1
; CHECK-GI-FP16-NEXT: fcvtzu x0, h0
; CHECK-GI-FP16-NEXT: ret
@@ -1591,8 +1568,7 @@ define i64 @fcvtzu_sat_f16_i64_15(half %dbl) {
;
; CHECK-GI-FP16-LABEL: fcvtzu_sat_f16_i64_15:
; CHECK-GI-FP16: // %bb.0:
-; CHECK-GI-FP16-NEXT: adrp x8, .LCPI69_0
-; CHECK-GI-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI69_0]
+; CHECK-GI-FP16-NEXT: movi v1.4h, #120, lsl #8
; CHECK-GI-FP16-NEXT: fmul h0, h0, h1
; CHECK-GI-FP16-NEXT: fcvtzu x0, h0
; CHECK-GI-FP16-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/fdiv-const.ll b/llvm/test/CodeGen/AArch64/fdiv-const.ll
index 2866b5f6d8f20..ba507bae9366b 100644
--- a/llvm/test/CodeGen/AArch64/fdiv-const.ll
+++ b/llvm/test/CodeGen/AArch64/fdiv-const.ll
@@ -81,37 +81,21 @@ define half @divf16_2(half %a) nounwind {
}
define half @divf16_32768(half %a) nounwind {
-; CHECK-SD-LABEL: divf16_32768:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: mov w8, #30720 // =0x7800
-; CHECK-SD-NEXT: fmov h1, w8
-; CHECK-SD-NEXT: fdiv h0, h0, h1
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: divf16_32768:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: adrp x8, .LCPI5_0
-; CHECK-GI-NEXT: ldr h1, [x8, :lo12:.LCPI5_0]
-; CHECK-GI-NEXT: fdiv h0, h0, h1
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: divf16_32768:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movi v1.4h, #120, lsl #8
+; CHECK-NEXT: fdiv h0, h0, h1
+; CHECK-NEXT: ret
%r = fdiv half %a, 32768.0
ret half %r
}
define half @divf16_32768_arcp(half %a) nounwind {
-; CHECK-SD-LABEL: divf16_32768_arcp:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: mov w8, #30720 // =0x7800
-; CHECK-SD-NEXT: fmov h1, w8
-; CHECK-SD-NEXT: fdiv h0, h0, h1
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: divf16_32768_arcp:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: adrp x8, .LCPI6_0
-; CHECK-GI-NEXT: ldr h1, [x8, :lo12:.LCPI6_0]
-; CHECK-GI-NEXT: fdiv h0, h0, h1
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: divf16_32768_arcp:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movi v1.4h, #120, lsl #8
+; CHECK-NEXT: fdiv h0, h0, h1
+; CHECK-NEXT: ret
%r = fdiv arcp half %a, 32768.0
ret half %r
}
diff --git a/llvm/test/CodeGen/AArch64/fp-maximumnum-minimumnum.ll b/llvm/test/CodeGen/AArch64/fp-maximumnum-minimumnum.ll
index 5353920ed5667..3473c4c093fcc 100644
--- a/llvm/test/CodeGen/AArch64/fp-maximumnum-minimumnum.ll
+++ b/llvm/test/CodeGen/AArch64/fp-maximumnum-minimumnum.ll
@@ -1256,9 +1256,8 @@ define fp128 @max_fp128(fp128 %x, fp128 %y) {
; CHECK-NEXT: // %bb.7: // %start
; CHECK-NEXT: ldr q1, [sp] // 16-byte Reload
; CHECK-NEXT: .LBB32_8: // %start
-; CHECK-NEXT: adrp x8, .LCPI32_0
; CHECK-NEXT: str q1, [sp] // 16-byte Spill
-; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI32_0]
+; CHECK-NEXT: movi d1, #0000000000000000
; CHECK-NEXT: bl __eqtf2
; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Reload
; CHECK-NEXT: cmp w0, #0
@@ -1900,9 +1899,8 @@ define fp128 @min_fp128(fp128 %x, fp128 %y) {
; CHECK-NEXT: // %bb.7: // %start
; CHECK-NEXT: ldr q1, [sp] // 16-byte Reload
; CHECK-NEXT: .LBB49_8: // %start
-; CHECK-NEXT: adrp x8, .LCPI49_0
; CHECK-NEXT: str q1, [sp] // 16-byte Spill
-; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI49_0]
+; CHECK-NEXT: movi d1, #0000000000000000
; CHECK-NEXT: bl __eqtf2
; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Reload
; CHECK-NEXT: cmp w0, #0
diff --git a/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll b/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll
index b6cbe9eb46389..a055f5e681a9c 100644
--- a/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll
+++ b/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll
@@ -153,12 +153,11 @@ define i100 @test_signed_i100_f32(float %f) nounwind {
; CHECK-SD-NEXT: fmov s8, s0
; CHECK-SD-NEXT: bl __fixsfti
; CHECK-SD-NEXT: movi v0.2s, #241, lsl #24
-; CHECK-SD-NEXT: mov w8, #1895825407 // =0x70ffffff
+; CHECK-SD-NEXT: mov x8, #-34359738368 // =0xfffffff800000000
; CHECK-SD-NEXT: mov x10, #34359738367 // =0x7ffffffff
; CHECK-SD-NEXT: ldr x30, [sp, #8] // 8-byte Reload
; CHECK-SD-NEXT: fcmp s8, s0
-; CHECK-SD-NEXT: fmov s0, w8
-; CHECK-SD-NEXT: mov x8, #-34359738368 // =0xfffffff800000000
+; CHECK-SD-NEXT: mvni v0.2s, #143, lsl #24
; CHECK-SD-NEXT: csel x9, xzr, x0, lt
; CHECK-SD-NEXT: csel x8, x8, x1, lt
; CHECK-SD-NEXT: fcmp s8, s0
@@ -177,12 +176,11 @@ define i100 @test_signed_i100_f32(float %f) nounwind {
; CHECK-GI-NEXT: fmov s8, s0
; CHECK-GI-NEXT: bl __fixsfti
; CHECK-GI-NEXT: movi v0.2s, #241, lsl #24
-; CHECK-GI-NEXT: mov w8, #1895825407 // =0x70ffffff
+; CHECK-GI-NEXT: mov x8, #34359738368 // =0x800000000
; CHECK-GI-NEXT: mov x10, #34359738367 // =0x7ffffffff
; CHECK-GI-NEXT: ldr x30, [sp, #8] // 8-byte Reload
; CHECK-GI-NEXT: fcmp s8, s0
-; CHECK-GI-NEXT: fmov s0, w8
-; CHECK-GI-NEXT: mov x8, #34359738368 // =0x800000000
+; CHECK-GI-NEXT: mvni v0.2s, #143, lsl #24
; CHECK-GI-NEXT: csel x9, xzr, x0, lt
; CHECK-GI-NEXT: csel x8, x8, x1, lt
; CHECK-GI-NEXT: fcmp s8, s0
@@ -204,13 +202,12 @@ define i128 @test_signed_i128_f32(float %f) nounwind {
; CHECK-SD-NEXT: str x30, [sp, #8] // 8-byte Spill
; CHECK-SD-NEXT: fmov s8, s0
; CHECK-SD-NEXT: bl __fixsfti
-; CHECK-SD-NEXT: movi v0.2s, #255, lsl #24
-; CHECK-SD-NEXT: mov w8, #2130706431 // =0x7effffff
+; CHECK-SD-NEXT: movi d0, #0xff000000ff000000
+; CHECK-SD-NEXT: mov x8, #-9223372036854775808 // =0x8000000000000000
; CHECK-SD-NEXT: mov x10, #9223372036854775807 // =0x7fffffffffffffff
; CHECK-SD-NEXT: ldr x30, [sp, #8] // 8-byte Reload
; CHECK-SD-NEXT: fcmp s8, s0
-; CHECK-SD-NEXT: fmov s0, w8
-; CHECK-SD-NEXT: mov x8, #-9223372036854775808 // =0x8000000000000000
+; CHECK-SD-NEXT: mvni v0.2s, #129, lsl #24
; CHECK-SD-NEXT: csel x9, xzr, x0, lt
; CHECK-SD-NEXT: csel x8, x8, x1, lt
; CHECK-SD-NEXT: fcmp s8, s0
@@ -228,13 +225,12 @@ define i128 @test_signed_i128_f32(float %f) nounwind {
; CHECK-GI-NEXT: str x30, [sp, #8] // 8-byte Spill
; CHECK-GI-NEXT: fmov s8, s0
; CHECK-GI-NEXT: bl __fixsfti
-; CHECK-GI-NEXT: movi v0.2s, #255, lsl #24
-; CHECK-GI-NEXT: mov w8, #2130706431 // =0x7effffff
+; CHECK-GI-NEXT: movi d0, #0xff000000ff000000
+; CHECK-GI-NEXT: mov x8, #-9223372036854775808 // =0x8000000000000000
; CHECK-GI-NEXT: mov x10, #9223372036854775807 // =0x7fffffffffffffff
; CHECK-GI-NEXT: ldr x30, [sp, #8] // 8-byte Reload
; CHECK-GI-NEXT: fcmp s8, s0
-; CHECK-GI-NEXT: fmov s0, w8
-; CHECK-GI-NEXT: mov x8, #-9223372036854775808 // =0x8000000000000000
+; CHECK-GI-NEXT: mvni v0.2s, #129, lsl #24
; CHECK-GI-NEXT: csel x9, xzr, x0, lt
; CHECK-GI-NEXT: csel x8, x8, x1, lt
; CHECK-GI-NEXT: fcmp s8, s0
@@ -857,12 +853,11 @@ define i100 @test_signed_i100_f16(half %f) nounwind {
; CHECK-SD-NEXT: fmov s0, s8
; CHECK-SD-NEXT: bl __fixsfti
; CHECK-SD-NEXT: movi v0.2s, #241, lsl #24
-; CHECK-SD-NEXT: mov w8, #1895825407 // =0x70ffffff
+; CHECK-SD-NEXT: mov x8, #-34359738368 // =0xfffffff800000000
; CHECK-SD-NEXT: mov x10, #34359738367 // =0x7ffffffff
; CHECK-SD-NEXT: ldr x30, [sp, #8] // 8-byte Reload
; CHECK-SD-NEXT: fcmp s8, s0
-; CHECK-SD-NEXT: fmov s0, w8
-; CHECK-SD-NEXT: mov x8, #-34359738368 // =0xfffffff800000000
+; CHECK-SD-NEXT: mvni v0.2s, #143, lsl #24
; CHECK-SD-NEXT: csel x9, xzr, x0, lt
; CHECK-SD-NEXT: csel x8, x8, x1, lt
; CHECK-SD-NEXT: fcmp s8, s0
@@ -898,13 +893,12 @@ define i128 @test_signed_i128_f16(half %f) nounwind {
; CHECK-SD-NEXT: str x30, [sp, #8] // 8-byte Spill
; CHECK-SD-NEXT: fmov s0, s8
; CHECK-SD-NEXT: bl __fixsfti
-; CHECK-SD-NEXT: movi v0.2s, #255, lsl #24
-; CHECK-SD-NEXT: mov w8, #2130706431 // =0x7effffff
+; CHECK-SD-NEXT: movi d0, #0xff000000ff000000
+; CHECK-SD-NEXT: mov x8, #-9223372036854775808 // =0x8000000000000000
; CHECK-SD-NEXT: mov x10, #9223372036854775807 // =0x7fffffffffffffff
; CHECK-SD-NEXT: ldr x30, [sp, #8] // 8-byte Reload
; CHECK-SD-NEXT: fcmp s8, s0
-; CHECK-SD-NEXT: fmov s0, w8
-; CHECK-SD-NEXT: mov x8, #-9223372036854775808 // =0x8000000000000000
+; CHECK-SD-NEXT: mvni v0.2s, #129, lsl #24
; CHECK-SD-NEXT: csel x9, xzr, x0, lt
; CHECK-SD-NEXT: csel x8, x8, x1, lt
; CHECK-SD-NEXT: fcmp s8, s0
diff --git a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
index 6a06d99689df9..978b404a28938 100644
--- a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
+++ b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
@@ -1552,8 +1552,7 @@ define <2 x i100> @test_signed_v2f32_v2i100(<2 x float> %f) {
; CHECK-SD-NEXT: bl __fixsfti
; CHECK-SD-NEXT: movi v9.2s, #241, lsl #24
; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload
-; CHECK-SD-NEXT: mov w8, #1895825407 // =0x70ffffff
-; CHECK-SD-NEXT: fmov s10, w8
+; CHECK-SD-NEXT: mvni v10.2s, #143, lsl #24
; CHECK-SD-NEXT: mov x21, #-34359738368 // =0xfffffff800000000
; CHECK-SD-NEXT: mov x22, #34359738367 // =0x7ffffffff
; CHECK-SD-NEXT: mov s8, v0.s[1]
@@ -1611,8 +1610,7 @@ define <2 x i100> @test_signed_v2f32_v2i100(<2 x float> %f) {
; CHECK-GI-NEXT: bl __fixsfti
; CHECK-GI-NEXT: movi v9.2s, #241, lsl #24
; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Reload
-; CHECK-GI-NEXT: mov w8, #1895825407 // =0x70ffffff
-; CHECK-GI-NEXT: fmov s10, w8
+; CHECK-GI-NEXT: mvni v10.2s, #143, lsl #24
; CHECK-GI-NEXT: mov x21, #34359738368 // =0x800000000
; CHECK-GI-NEXT: mov x22, #34359738367 // =0x7ffffffff
; CHECK-GI-NEXT: fcmp s0, s9
@@ -1670,10 +1668,9 @@ define <2 x i128> @test_signed_v2f32_v2i128(<2 x float> %f) {
; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill
; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0
; CHECK-SD-NEXT: bl __fixsfti
-; CHECK-SD-NEXT: movi v9.2s, #255, lsl #24
+; CHECK-SD-NEXT: movi d9, #0xff000000ff000000
; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload
-; CHECK-SD-NEXT: mov w8, #2130706431 // =0x7effffff
-; CHECK-SD-NEXT: fmov s10, w8
+; CHECK-SD-NEXT: mvni v10.2s, #129, lsl #24
; CHECK-SD-NEXT: mov x21, #-9223372036854775808 // =0x8000000000000000
; CHECK-SD-NEXT: mov x22, #9223372036854775807 // =0x7fffffffffffffff
; CHECK-SD-NEXT: mov s8, v0.s[1]
@@ -1729,10 +1726,9 @@ define <2 x i128> @test_signed_v2f32_v2i128(<2 x float> %f) {
; CHECK-GI-NEXT: mov s8, v0.s[1]
; CHECK-GI-NEXT: // kill: def $s0 killed $s0 killed $q0
; CHECK-GI-NEXT: bl __fixsfti
-; CHECK-GI-NEXT: movi v9.2s, #255, lsl #24
+; CHECK-GI-NEXT: movi d9, #0xff000000ff000000
; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Reload
-; CHECK-GI-NEXT: mov w8, #2130706431 // =0x7effffff
-; CHECK-GI-NEXT: fmov s10, w8
+; CHECK-GI-NEXT: mvni v10.2s, #129, lsl #24
; CHECK-GI-NEXT: mov x21, #-9223372036854775808 // =0x8000000000000000
; CHECK-GI-NEXT: mov x22, #9223372036854775807 // =0x7fffffffffffffff
; CHECK-GI-NEXT: fcmp s0, s9
@@ -1967,8 +1963,7 @@ define <4 x i100> @test_signed_v4f32_v4i100(<4 x float> %f) {
; CHECK-SD-NEXT: bl __fixsfti
; CHECK-SD-NEXT: movi v9.2s, #241, lsl #24
; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload
-; CHECK-SD-NEXT: mov w8, #1895825407 // =0x70ffffff
-; CHECK-SD-NEXT: fmov s10, w8
+; CHECK-SD-NEXT: mvni v10.2s, #143, lsl #24
; CHECK-SD-NEXT: mov x25, #-34359738368 // =0xfffffff800000000
; CHECK-SD-NEXT: mov x26, #34359738367 // =0x7ffffffff
; CHECK-SD-NEXT: mov s8, v0.s[1]
@@ -2069,8 +2064,7 @@ define <4 x i100> @test_signed_v4f32_v4i100(<4 x float> %f) {
; CHECK-GI-NEXT: bl __fixsfti
; CHECK-GI-NEXT: movi v11.2s, #241, lsl #24
; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Reload
-; CHECK-GI-NEXT: mov w8, #1895825407 // =0x70ffffff
-; CHECK-GI-NEXT: fmov s12, w8
+; CHECK-GI-NEXT: mvni v12.2s, #143, lsl #24
; CHECK-GI-NEXT: mov x25, #34359738368 // =0x800000000
; CHECK-GI-NEXT: mov x26, #34359738367 // =0x7ffffffff
; CHECK-GI-NEXT: fcmp s0, s11
@@ -2162,10 +2156,9 @@ define <4 x i128> @test_signed_v4f32_v4i128(<4 x float> %f) {
; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill
; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0
; CHECK-SD-NEXT: bl __fixsfti
-; CHECK-SD-NEXT: movi v9.2s, #255, lsl #24
+; CHECK-SD-NEXT: movi d9, #0xff000000ff000000
; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload
-; CHECK-SD-NEXT: mov w8, #2130706431 // =0x7effffff
-; CHECK-SD-NEXT: fmov s10, w8
+; CHECK-SD-NEXT: mvni v10.2s, #129, lsl #24
; CHECK-SD-NEXT: mov x25, #-9223372036854775808 // =0x8000000000000000
; CHECK-SD-NEXT: mov x26, #9223372036854775807 // =0x7fffffffffffffff
; CHECK-SD-NEXT: mov s8, v0.s[1]
@@ -2264,10 +2257,9 @@ define <4 x i128> @test_signed_v4f32_v4i128(<4 x float> %f) {
; CHECK-GI-NEXT: mov s8, v0.s[3]
; CHECK-GI-NEXT: // kill: def $s0 killed $s0 killed $q0
; CHECK-GI-NEXT: bl __fixsfti
-; CHECK-GI-NEXT: movi v11.2s, #255, lsl #24
+; CHECK-GI-NEXT: movi d11, #0xff000000ff000000
; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Reload
-; CHECK-GI-NEXT: mov w8, #2130706431 // =0x7effffff
-; CHECK-GI-NEXT: fmov s12, w8
+; CHECK-GI-NEXT: mvni v12.2s, #129, lsl #24
; CHECK-GI-NEXT: mov x25, #-9223372036854775808 // =0x8000000000000000
; CHECK-GI-NEXT: mov x26, #9223372036854775807 // =0x7fffffffffffffff
; CHECK-GI-NEXT: fcmp s0, s11
@@ -3163,9 +3155,8 @@ define <4 x i100> @test_signed_v4f16_v4i100(<4 x half> %f) {
; CHECK-SD-NEXT: fmov s0, s8
; CHECK-SD-NEXT: bl __fixsfti
; CHECK-SD-NEXT: movi v9.2s, #241, lsl #24
-; CHECK-SD-NEXT: mov w8, #1895825407 // =0x70ffffff
+; CHECK-SD-NEXT: mvni v10.2s, #143, lsl #24
; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload
-; CHECK-SD-NEXT: fmov s10, w8
; CHECK-SD-NEXT: mov x25, #-34359738368 // =0xfffffff800000000
; CHECK-SD-NEXT: mov x26, #34359738367 // =0x7ffffffff
; CHECK-SD-NEXT: mov h0, v0.h[1]
@@ -3302,10 +3293,9 @@ define <4 x i128> @test_signed_v4f16_v4i128(<4 x half> %f) {
; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill
; CHECK-SD-NEXT: fmov s0, s8
; CHECK-SD-NEXT: bl __fixsfti
-; CHECK-SD-NEXT: movi v9.2s, #255, lsl #24
-; CHECK-SD-NEXT: mov w8, #2130706431 // =0x7effffff
+; CHECK-SD-NEXT: movi d9, #0xff000000ff000000
+; CHECK-SD-NEXT: mvni v10.2s, #129, lsl #24
; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload
-; CHECK-SD-NEXT: fmov s10, w8
; CHECK-SD-NEXT: mov x25, #-9223372036854775808 // =0x8000000000000000
; CHECK-SD-NEXT: mov x26, #9223372036854775807 // =0x7fffffffffffffff
; CHECK-SD-NEXT: mov h0, v0.h[1]
@@ -3868,9 +3858,8 @@ define <8 x i100> @test_signed_v8f16_v8i100(<8 x half> %f) {
; CHECK-NEXT: fmov s0, s8
; CHECK-NEXT: bl __fixsfti
; CHECK-NEXT: movi v10.2s, #241, lsl #24
-; CHECK-NEXT: mov w8, #1895825407 // =0x70ffffff
+; CHECK-NEXT: mvni v9.2s, #143, lsl #24
; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Reload
-; CHECK-NEXT: fmov s9, w8
; CHECK-NEXT: mov x22, #-34359738368 // =0xfffffff800000000
; CHECK-NEXT: mov x23, #34359738367 // =0x7ffffffff
; CHECK-NEXT: mov h0, v0.h[3]
@@ -4063,10 +4052,9 @@ define <8 x i128> @test_signed_v8f16_v8i128(<8 x half> %f) {
; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Spill
; CHECK-SD-NEXT: fmov s0, s8
; CHECK-SD-NEXT: bl __fixsfti
-; CHECK-SD-NEXT: movi v9.2s, #255, lsl #24
-; CHECK-SD-NEXT: mov w8, #2130706431 // =0x7effffff
+; CHECK-SD-NEXT: movi d9, #0xff000000ff000000
+; CHECK-SD-NEXT: mvni v10.2s, #129, lsl #24
; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Reload
-; CHECK-SD-NEXT: fmov s10, w8
; CHECK-SD-NEXT: mov x22, #-9223372036854775808 // =0x8000000000000000
; CHECK-SD-NEXT: mov x23, #9223372036854775807 // =0x7fffffffffffffff
; CHECK-SD-NEXT: mov h0, v0.h[1]
diff --git a/llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll b/llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll
index 7a5fe0f4222bc..343c9901ce1c1 100644
--- a/llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll
+++ b/llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll
@@ -762,17 +762,16 @@ define i32 @test_unsigned_f128_i32(fp128 %f) {
; CHECK-SD-NEXT: .cfi_def_cfa_offset 32
; CHECK-SD-NEXT: .cfi_offset w19, -8
; CHECK-SD-NEXT: .cfi_offset w30, -16
-; CHECK-SD-NEXT: adrp x8, .LCPI30_0
+; CHECK-SD-NEXT: movi d1, #0000000000000000
; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill
-; CHECK-SD-NEXT: ldr q1, [x8, :lo12:.LCPI30_0]
; CHECK-SD-NEXT: bl __getf2
; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload
; CHECK-SD-NEXT: mov w19, w0
; CHECK-SD-NEXT: bl __fixunstfsi
-; CHECK-SD-NEXT: adrp x8, .LCPI30_1
+; CHECK-SD-NEXT: adrp x8, .LCPI30_0
; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload
; CHECK-SD-NEXT: cmp w19, #0
-; CHECK-SD-NEXT: ldr q1, [x8, :lo12:.LCPI30_1]
+; CHECK-SD-NEXT: ldr q1, [x8, :lo12:.LCPI30_0]
; CHECK-SD-NEXT: csel w19, wzr, w0, mi
; CHECK-SD-NEXT: bl __gttf2
; CHECK-SD-NEXT: cmp w0, #0
diff --git a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll
index d4feab2ea5d9b..62eb711989046 100644
--- a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll
+++ b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll
@@ -451,17 +451,16 @@ define <1 x i32> @test_unsigned_v1f128_v1i32(<1 x fp128> %f) {
; CHECK-SD-NEXT: .cfi_def_cfa_offset 32
; CHECK-SD-NEXT: .cfi_offset w19, -8
; CHECK-SD-NEXT: .cfi_offset w30, -16
-; CHECK-SD-NEXT: adrp x8, .LCPI14_0
+; CHECK-SD-NEXT: movi d1, #0000000000000000
; CHECK-SD-NEXT: str q0, [sp] // 16-byte Spill
-; CHECK-SD-NEXT: ldr q1, [x8, :lo12:.LCPI14_0]
; CHECK-SD-NEXT: bl __getf2
; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload
; CHECK-SD-NEXT: mov w19, w0
; CHECK-SD-NEXT: bl __fixunstfsi
-; CHECK-SD-NEXT: adrp x8, .LCPI14_1
+; CHECK-SD-NEXT: adrp x8, .LCPI14_0
; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload
; CHECK-SD-NEXT: cmp w19, #0
-; CHECK-SD-NEXT: ldr q1, [x8, :lo12:.LCPI14_1]
+; CHECK-SD-NEXT: ldr q1, [x8, :lo12:.LCPI14_0]
; CHECK-SD-NEXT: csel w19, wzr, w0, mi
; CHECK-SD-NEXT: bl __gttf2
; CHECK-SD-NEXT: cmp w0, #0
@@ -514,51 +513,49 @@ define <1 x i32> @test_unsigned_v1f128_v1i32(<1 x fp128> %f) {
define <2 x i32> @test_unsigned_v2f128_v2i32(<2 x fp128> %f) {
; CHECK-SD-LABEL: test_unsigned_v2f128_v2i32:
; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: sub sp, sp, #96
-; CHECK-SD-NEXT: str x30, [sp, #64] // 8-byte Spill
-; CHECK-SD-NEXT: stp x20, x19, [sp, #80] // 16-byte Folded Spill
-; CHECK-SD-NEXT: .cfi_def_cfa_offset 96
+; CHECK-SD-NEXT: sub sp, sp, #80
+; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Spill
+; CHECK-SD-NEXT: stp x20, x19, [sp, #64] // 16-byte Folded Spill
+; CHECK-SD-NEXT: .cfi_def_cfa_offset 80
; CHECK-SD-NEXT: .cfi_offset w19, -8
; CHECK-SD-NEXT: .cfi_offset w20, -16
; CHECK-SD-NEXT: .cfi_offset w30, -32
; CHECK-SD-NEXT: mov v2.16b, v1.16b
-; CHECK-SD-NEXT: stp q1, q0, [sp, #32] // 32-byte Folded Spill
-; CHECK-SD-NEXT: adrp x8, .LCPI15_0
-; CHECK-SD-NEXT: ldr q1, [x8, :lo12:.LCPI15_0]
+; CHECK-SD-NEXT: stp q1, q0, [sp, #16] // 32-byte Folded Spill
+; CHECK-SD-NEXT: movi d1, #0000000000000000
; CHECK-SD-NEXT: mov v0.16b, v2.16b
-; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill
; CHECK-SD-NEXT: bl __getf2
-; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Reload
+; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload
; CHECK-SD-NEXT: mov w19, w0
; CHECK-SD-NEXT: bl __fixunstfsi
-; CHECK-SD-NEXT: adrp x8, .LCPI15_1
-; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Reload
+; CHECK-SD-NEXT: adrp x8, .LCPI15_0
+; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload
; CHECK-SD-NEXT: cmp w19, #0
-; CHECK-SD-NEXT: ldr q1, [x8, :lo12:.LCPI15_1]
+; CHECK-SD-NEXT: ldr q1, [x8, :lo12:.LCPI15_0]
; CHECK-SD-NEXT: csel w19, wzr, w0, mi
; CHECK-SD-NEXT: str q1, [sp] // 16-byte Spill
; CHECK-SD-NEXT: bl __gttf2
-; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Reload
-; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload
+; CHECK-SD-NEXT: movi d1, #0000000000000000
+; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Reload
; CHECK-SD-NEXT: cmp w0, #0
; CHECK-SD-NEXT: csinv w20, w19, wzr, le
; CHECK-SD-NEXT: bl __getf2
-; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Reload
+; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Reload
; CHECK-SD-NEXT: mov w19, w0
; CHECK-SD-NEXT: bl __fixunstfsi
-; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Reload
+; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Reload
; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Reload
; CHECK-SD-NEXT: cmp w19, #0
; CHECK-SD-NEXT: csel w19, wzr, w0, mi
; CHECK-SD-NEXT: bl __gttf2
; CHECK-SD-NEXT: cmp w0, #0
-; CHECK-SD-NEXT: ldr x30, [sp, #64] // 8-byte Reload
+; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Reload
; CHECK-SD-NEXT: csinv w8, w19, wzr, le
; CHECK-SD-NEXT: fmov s0, w8
; CHECK-SD-NEXT: mov v0.s[1], w20
-; CHECK-SD-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload
+; CHECK-SD-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
-; CHECK-SD-NEXT: add sp, sp, #96
+; CHECK-SD-NEXT: add sp, sp, #80
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_unsigned_v2f128_v2i32:
@@ -630,52 +627,50 @@ define <2 x i32> @test_unsigned_v2f128_v2i32(<2 x fp128> %f) {
define <3 x i32> @test_unsigned_v3f128_v3i32(<3 x fp128> %f) {
; CHECK-SD-LABEL: test_unsigned_v3f128_v3i32:
; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: sub sp, sp, #112
-; CHECK-SD-NEXT: stp x30, x21, [sp, #80] // 16-byte Folded Spill
-; CHECK-SD-NEXT: stp x20, x19, [sp, #96] // 16-byte Folded Spill
-; CHECK-SD-NEXT: .cfi_def_cfa_offset 112
+; CHECK-SD-NEXT: sub sp, sp, #96
+; CHECK-SD-NEXT: stp x30, x21, [sp, #64] // 16-byte Folded Spill
+; CHECK-SD-NEXT: stp x20, x19, [sp, #80] // 16-byte Folded Spill
+; CHECK-SD-NEXT: .cfi_def_cfa_offset 96
; CHECK-SD-NEXT: .cfi_offset w19, -8
; CHECK-SD-NEXT: .cfi_offset w20, -16
; CHECK-SD-NEXT: .cfi_offset w21, -24
; CHECK-SD-NEXT: .cfi_offset w30, -32
-; CHECK-SD-NEXT: stp q1, q0, [sp, #48] // 32-byte Folded Spill
-; CHECK-SD-NEXT: adrp x8, .LCPI16_0
+; CHECK-SD-NEXT: stp q1, q0, [sp, #32] // 32-byte Folded Spill
+; CHECK-SD-NEXT: movi d1, #0000000000000000
; CHECK-SD-NEXT: mov v0.16b, v2.16b
-; CHECK-SD-NEXT: ldr q1, [x8, :lo12:.LCPI16_0]
; CHECK-SD-NEXT: str q2, [sp] // 16-byte Spill
-; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Spill
; CHECK-SD-NEXT: bl __getf2
; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload
; CHECK-SD-NEXT: mov w19, w0
; CHECK-SD-NEXT: bl __fixunstfsi
-; CHECK-SD-NEXT: adrp x8, .LCPI16_1
+; CHECK-SD-NEXT: adrp x8, .LCPI16_0
; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload
; CHECK-SD-NEXT: cmp w19, #0
-; CHECK-SD-NEXT: ldr q1, [x8, :lo12:.LCPI16_1]
+; CHECK-SD-NEXT: ldr q1, [x8, :lo12:.LCPI16_0]
; CHECK-SD-NEXT: csel w19, wzr, w0, mi
; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill
; CHECK-SD-NEXT: bl __gttf2
-; CHECK-SD-NEXT: ldp q1, q0, [sp, #32] // 32-byte Folded Reload
+; CHECK-SD-NEXT: movi d1, #0000000000000000
+; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Reload
; CHECK-SD-NEXT: cmp w0, #0
; CHECK-SD-NEXT: csinv w20, w19, wzr, le
; CHECK-SD-NEXT: bl __getf2
-; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Reload
+; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Reload
; CHECK-SD-NEXT: mov w19, w0
; CHECK-SD-NEXT: bl __fixunstfsi
-; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Reload
-; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload
+; CHECK-SD-NEXT: ldp q1, q0, [sp, #16] // 32-byte Folded Reload
; CHECK-SD-NEXT: cmp w19, #0
; CHECK-SD-NEXT: csel w19, wzr, w0, mi
; CHECK-SD-NEXT: bl __gttf2
-; CHECK-SD-NEXT: ldr q0, [sp, #64] // 16-byte Reload
-; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Reload
+; CHECK-SD-NEXT: movi d1, #0000000000000000
+; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Reload
; CHECK-SD-NEXT: cmp w0, #0
; CHECK-SD-NEXT: csinv w21, w19, wzr, le
; CHECK-SD-NEXT: bl __getf2
-; CHECK-SD-NEXT: ldr q0, [sp, #64] // 16-byte Reload
+; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Reload
; CHECK-SD-NEXT: mov w19, w0
; CHECK-SD-NEXT: bl __fixunstfsi
-; CHECK-SD-NEXT: ldr q0, [sp, #64] // 16-byte Reload
+; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Reload
; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload
; CHECK-SD-NEXT: cmp w19, #0
; CHECK-SD-NEXT: csel w19, wzr, w0, mi
@@ -684,10 +679,10 @@ define <3 x i32> @test_unsigned_v3f128_v3i32(<3 x fp128> %f) {
; CHECK-SD-NEXT: csinv w8, w19, wzr, le
; CHECK-SD-NEXT: fmov s0, w8
; CHECK-SD-NEXT: mov v0.s[1], w21
-; CHECK-SD-NEXT: ldp x30, x21, [sp, #80] // 16-byte Folded Reload
+; CHECK-SD-NEXT: ldp x30, x21, [sp, #64] // 16-byte Folded Reload
; CHECK-SD-NEXT: mov v0.s[2], w20
-; CHECK-SD-NEXT: ldp x20, x19, [sp, #96] // 16-byte Folded Reload
-; CHECK-SD-NEXT: add sp, sp, #112
+; CHECK-SD-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload
+; CHECK-SD-NEXT: add sp, sp, #96
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_unsigned_v3f128_v3i32:
@@ -780,34 +775,32 @@ define <3 x i32> @test_unsigned_v3f128_v3i32(<3 x fp128> %f) {
define <4 x i32> @test_unsigned_v4f128_v4i32(<4 x fp128> %f) {
; CHECK-SD-LABEL: test_unsigned_v4f128_v4i32:
; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: sub sp, sp, #128
-; CHECK-SD-NEXT: str x30, [sp, #96] // 8-byte Spill
-; CHECK-SD-NEXT: stp x20, x19, [sp, #112] // 16-byte Folded Spill
-; CHECK-SD-NEXT: .cfi_def_cfa_offset 128
+; CHECK-SD-NEXT: sub sp, sp, #112
+; CHECK-SD-NEXT: str x30, [sp, #80] // 8-byte Spill
+; CHECK-SD-NEXT: stp x20, x19, [sp, #96] // 16-byte Folded Spill
+; CHECK-SD-NEXT: .cfi_def_cfa_offset 112
; CHECK-SD-NEXT: .cfi_offset w19, -8
; CHECK-SD-NEXT: .cfi_offset w20, -16
; CHECK-SD-NEXT: .cfi_offset w30, -32
; CHECK-SD-NEXT: stp q0, q2, [sp, #16] // 32-byte Folded Spill
; CHECK-SD-NEXT: mov v2.16b, v1.16b
-; CHECK-SD-NEXT: adrp x8, .LCPI17_0
; CHECK-SD-NEXT: str q1, [sp] // 16-byte Spill
-; CHECK-SD-NEXT: ldr q1, [x8, :lo12:.LCPI17_0]
-; CHECK-SD-NEXT: str q3, [sp, #80] // 16-byte Spill
+; CHECK-SD-NEXT: movi d1, #0000000000000000
+; CHECK-SD-NEXT: str q3, [sp, #64] // 16-byte Spill
; CHECK-SD-NEXT: mov v0.16b, v2.16b
-; CHECK-SD-NEXT: str q1, [sp, #64] // 16-byte Spill
; CHECK-SD-NEXT: bl __getf2
; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload
; CHECK-SD-NEXT: mov w19, w0
; CHECK-SD-NEXT: bl __fixunstfsi
-; CHECK-SD-NEXT: adrp x8, .LCPI17_1
+; CHECK-SD-NEXT: adrp x8, .LCPI17_0
; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Reload
; CHECK-SD-NEXT: cmp w19, #0
-; CHECK-SD-NEXT: ldr q1, [x8, :lo12:.LCPI17_1]
+; CHECK-SD-NEXT: ldr q1, [x8, :lo12:.LCPI17_0]
; CHECK-SD-NEXT: csel w19, wzr, w0, mi
; CHECK-SD-NEXT: str q1, [sp, #48] // 16-byte Spill
; CHECK-SD-NEXT: bl __gttf2
+; CHECK-SD-NEXT: movi d1, #0000000000000000
; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload
-; CHECK-SD-NEXT: ldr q1, [sp, #64] // 16-byte Reload
; CHECK-SD-NEXT: cmp w0, #0
; CHECK-SD-NEXT: csinv w20, w19, wzr, le
; CHECK-SD-NEXT: bl __getf2
@@ -820,7 +813,7 @@ define <4 x i32> @test_unsigned_v4f128_v4i32(<4 x fp128> %f) {
; CHECK-SD-NEXT: csel w19, wzr, w0, mi
; CHECK-SD-NEXT: bl __gttf2
; CHECK-SD-NEXT: cmp w0, #0
-; CHECK-SD-NEXT: ldr q1, [sp, #64] // 16-byte Reload
+; CHECK-SD-NEXT: movi d1, #0000000000000000
; CHECK-SD-NEXT: csinv w8, w19, wzr, le
; CHECK-SD-NEXT: fmov s0, w8
; CHECK-SD-NEXT: mov v0.s[1], w20
@@ -836,26 +829,26 @@ define <4 x i32> @test_unsigned_v4f128_v4i32(<4 x fp128> %f) {
; CHECK-SD-NEXT: bl __gttf2
; CHECK-SD-NEXT: cmp w0, #0
; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload
+; CHECK-SD-NEXT: movi d1, #0000000000000000
; CHECK-SD-NEXT: csinv w8, w19, wzr, le
; CHECK-SD-NEXT: mov v0.s[2], w8
; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill
-; CHECK-SD-NEXT: ldp q1, q0, [sp, #64] // 32-byte Folded Reload
+; CHECK-SD-NEXT: ldr q0, [sp, #64] // 16-byte Reload
; CHECK-SD-NEXT: bl __getf2
-; CHECK-SD-NEXT: ldr q0, [sp, #80] // 16-byte Reload
+; CHECK-SD-NEXT: ldr q0, [sp, #64] // 16-byte Reload
; CHECK-SD-NEXT: mov w19, w0
; CHECK-SD-NEXT: bl __fixunstfsi
-; CHECK-SD-NEXT: ldr q0, [sp, #80] // 16-byte Reload
-; CHECK-SD-NEXT: ldr q1, [sp, #48] // 16-byte Reload
+; CHECK-SD-NEXT: ldp q1, q0, [sp, #48] // 32-byte Folded Reload
; CHECK-SD-NEXT: cmp w19, #0
; CHECK-SD-NEXT: csel w19, wzr, w0, mi
; CHECK-SD-NEXT: bl __gttf2
; CHECK-SD-NEXT: cmp w0, #0
; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload
-; CHECK-SD-NEXT: ldr x30, [sp, #96] // 8-byte Reload
+; CHECK-SD-NEXT: ldr x30, [sp, #80] // 8-byte Reload
; CHECK-SD-NEXT: csinv w8, w19, wzr, le
-; CHECK-SD-NEXT: ldp x20, x19, [sp, #112] // 16-byte Folded Reload
+; CHECK-SD-NEXT: ldp x20, x19, [sp, #96] // 16-byte Folded Reload
; CHECK-SD-NEXT: mov v0.s[3], w8
-; CHECK-SD-NEXT: add sp, sp, #128
+; CHECK-SD-NEXT: add sp, sp, #112
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_unsigned_v4f128_v4i32:
@@ -4076,50 +4069,48 @@ define <16 x i16> @test_unsigned_v16f64_v16i16(<16 x double> %f) {
define <2 x i64> @test_signed_v2f128_v2i64(<2 x fp128> %f) {
; CHECK-SD-LABEL: test_signed_v2f128_v2i64:
; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: sub sp, sp, #80
-; CHECK-SD-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill
-; CHECK-SD-NEXT: .cfi_def_cfa_offset 80
+; CHECK-SD-NEXT: sub sp, sp, #64
+; CHECK-SD-NEXT: stp x30, x19, [sp, #48] // 16-byte Folded Spill
+; CHECK-SD-NEXT: .cfi_def_cfa_offset 64
; CHECK-SD-NEXT: .cfi_offset w19, -8
; CHECK-SD-NEXT: .cfi_offset w30, -16
; CHECK-SD-NEXT: mov v2.16b, v1.16b
-; CHECK-SD-NEXT: stp q1, q0, [sp, #32] // 32-byte Folded Spill
-; CHECK-SD-NEXT: adrp x8, .LCPI86_0
-; CHECK-SD-NEXT: ldr q1, [x8, :lo12:.LCPI86_0]
+; CHECK-SD-NEXT: stp q1, q0, [sp, #16] // 32-byte Folded Spill
+; CHECK-SD-NEXT: movi d1, #0000000000000000
; CHECK-SD-NEXT: mov v0.16b, v2.16b
-; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Spill
; CHECK-SD-NEXT: bl __getf2
-; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Reload
+; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload
; CHECK-SD-NEXT: mov w19, w0
; CHECK-SD-NEXT: bl __fixunstfdi
-; CHECK-SD-NEXT: adrp x8, .LCPI86_1
-; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Reload
+; CHECK-SD-NEXT: adrp x8, .LCPI86_0
+; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Reload
; CHECK-SD-NEXT: cmp w19, #0
-; CHECK-SD-NEXT: ldr q1, [x8, :lo12:.LCPI86_1]
+; CHECK-SD-NEXT: ldr q1, [x8, :lo12:.LCPI86_0]
; CHECK-SD-NEXT: csel x19, xzr, x0, mi
; CHECK-SD-NEXT: str q1, [sp] // 16-byte Spill
; CHECK-SD-NEXT: bl __gttf2
; CHECK-SD-NEXT: cmp w0, #0
-; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload
+; CHECK-SD-NEXT: movi d1, #0000000000000000
; CHECK-SD-NEXT: csinv x8, x19, xzr, le
; CHECK-SD-NEXT: fmov d0, x8
-; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Spill
-; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Reload
+; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Spill
+; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Reload
; CHECK-SD-NEXT: bl __getf2
-; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Reload
+; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Reload
; CHECK-SD-NEXT: mov w19, w0
; CHECK-SD-NEXT: bl __fixunstfdi
-; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Reload
+; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Reload
; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Reload
; CHECK-SD-NEXT: cmp w19, #0
; CHECK-SD-NEXT: csel x19, xzr, x0, mi
; CHECK-SD-NEXT: bl __gttf2
; CHECK-SD-NEXT: cmp w0, #0
-; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Reload
+; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Reload
; CHECK-SD-NEXT: csinv x8, x19, xzr, le
-; CHECK-SD-NEXT: ldp x30, x19, [sp, #64] // 16-byte Folded Reload
+; CHECK-SD-NEXT: ldp x30, x19, [sp, #48] // 16-byte Folded Reload
; CHECK-SD-NEXT: fmov d0, x8
; CHECK-SD-NEXT: mov v0.d[1], v1.d[0]
-; CHECK-SD-NEXT: add sp, sp, #80
+; CHECK-SD-NEXT: add sp, sp, #64
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_signed_v2f128_v2i64:
diff --git a/llvm/test/CodeGen/AArch64/frem-power2.ll b/llvm/test/CodeGen/AArch64/frem-power2.ll
index 548079dc5c4aa..019c92e8e1e13 100644
--- a/llvm/test/CodeGen/AArch64/frem-power2.ll
+++ b/llvm/test/CodeGen/AArch64/frem-power2.ll
@@ -419,11 +419,11 @@ define <4 x float> @frem2_vec(<4 x float> %x) {
; CHECK-SD-LABEL: frem2_vec:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: movi v1.4s, #63, lsl #24
-; CHECK-SD-NEXT: movi v2.4s, #64, lsl #24
+; CHECK-SD-NEXT: movi v2.4s, #192, lsl #24
; CHECK-SD-NEXT: mov v3.16b, v0.16b
; CHECK-SD-NEXT: fmul v1.4s, v0.4s, v1.4s
; CHECK-SD-NEXT: frintz v1.4s, v1.4s
-; CHECK-SD-NEXT: fmls v3.4s, v2.4s, v1.4s
+; CHECK-SD-NEXT: fmla v3.4s, v2.4s, v1.4s
; CHECK-SD-NEXT: mvni v1.4s, #128, lsl #24
; CHECK-SD-NEXT: bit v0.16b, v3.16b, v1.16b
; CHECK-SD-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/half-precision-signof-no-assert.ll b/llvm/test/CodeGen/AArch64/half-precision-signof-no-assert.ll
index 4bf2b8c7e6a57..b46be46d2b95f 100644
--- a/llvm/test/CodeGen/AArch64/half-precision-signof-no-assert.ll
+++ b/llvm/test/CodeGen/AArch64/half-precision-signof-no-assert.ll
@@ -10,12 +10,11 @@ define ptr @fn(ptr %in, ptr %out) {
; CHECK-SD: // %bb.0: // %fn
; CHECK-SD-NEXT: ldr d1, [x0]
; CHECK-SD-NEXT: movi v0.4h, #60, lsl #8
-; CHECK-SD-NEXT: adrp x8, .LCPI0_0
; CHECK-SD-NEXT: fcvtl v1.4s, v1.4h
; CHECK-SD-NEXT: fcmgt v2.4s, v1.4s, #0.0
; CHECK-SD-NEXT: fcmlt v1.4s, v1.4s, #0.0
; CHECK-SD-NEXT: orr v1.16b, v1.16b, v2.16b
-; CHECK-SD-NEXT: ldr h2, [x8, :lo12:.LCPI0_0]
+; CHECK-SD-NEXT: movi v2.4h, #60, lsl #8
; CHECK-SD-NEXT: xtn v1.4h, v1.4s
; CHECK-SD-NEXT: and v0.8b, v1.8b, v0.8b
; CHECK-SD-NEXT: movi d1, #0000000000000000
diff --git a/llvm/test/CodeGen/AArch64/isinf.ll b/llvm/test/CodeGen/AArch64/isinf.ll
index f3283d2cf7ec2..2f79e083c5787 100644
--- a/llvm/test/CodeGen/AArch64/isinf.ll
+++ b/llvm/test/CodeGen/AArch64/isinf.ll
@@ -6,9 +6,8 @@
define i32 @replace_isinf_call_f16(half %x) {
; CHECK-SD-LABEL: replace_isinf_call_f16:
; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: mov w8, #31744 // =0x7c00
+; CHECK-SD-NEXT: movi v1.4h, #124, lsl #8
; CHECK-SD-NEXT: fabs h0, h0
-; CHECK-SD-NEXT: fmov h1, w8
; CHECK-SD-NEXT: fcmp h0, h1
; CHECK-SD-NEXT: cset w0, eq
; CHECK-SD-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/known-never-nan.ll b/llvm/test/CodeGen/AArch64/known-never-nan.ll
index bd080e29890e2..2e8e5d1e4cee3 100644
--- a/llvm/test/CodeGen/AArch64/known-never-nan.ll
+++ b/llvm/test/CodeGen/AArch64/known-never-nan.ll
@@ -31,11 +31,10 @@ define float @not_fmaxnm_maybe_nan(i32 %i1, i32 %i2) #0 {
; CHECK: // %bb.0:
; CHECK-NEXT: ucvtf s0, w0
; CHECK-NEXT: ucvtf s1, w1
-; CHECK-NEXT: mov w8, #-8388608 // =0xff800000
-; CHECK-NEXT: fmov s2, #17.00000000
-; CHECK-NEXT: fmov s3, w8
-; CHECK-NEXT: fmul s0, s0, s3
-; CHECK-NEXT: fadd s1, s1, s2
+; CHECK-NEXT: mvni v2.2s, #127, msl #16
+; CHECK-NEXT: fmov s3, #17.00000000
+; CHECK-NEXT: fmul s0, s0, s2
+; CHECK-NEXT: fadd s1, s1, s3
; CHECK-NEXT: fcmp s0, s1
; CHECK-NEXT: fcsel s0, s0, s1, pl
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/movi64_sve.ll b/llvm/test/CodeGen/AArch64/movi64_sve.ll
index 63b6a2a957b63..ea1a3d79e9e0b 100644
--- a/llvm/test/CodeGen/AArch64/movi64_sve.ll
+++ b/llvm/test/CodeGen/AArch64/movi64_sve.ll
@@ -11,22 +11,16 @@ define <2 x i64> @movi_1_v2i64() {
; CHECK-NEON-SD-NEXT: dup v0.2d, x8
; CHECK-NEON-SD-NEXT: ret
;
-; CHECK-SVE-SD-LABEL: movi_1_v2i64:
-; CHECK-SVE-SD: // %bb.0:
-; CHECK-SVE-SD-NEXT: mov z0.d, #1 // =0x1
-; CHECK-SVE-SD-NEXT: ret
+; CHECK-SVE-LABEL: movi_1_v2i64:
+; CHECK-SVE: // %bb.0:
+; CHECK-SVE-NEXT: mov z0.d, #1 // =0x1
+; CHECK-SVE-NEXT: ret
;
; CHECK-NEON-GI-LABEL: movi_1_v2i64:
; CHECK-NEON-GI: // %bb.0:
; CHECK-NEON-GI-NEXT: adrp x8, .LCPI0_0
; CHECK-NEON-GI-NEXT: ldr q0, [x8, :lo12:.LCPI0_0]
; CHECK-NEON-GI-NEXT: ret
-;
-; CHECK-SVE-GI-LABEL: movi_1_v2i64:
-; CHECK-SVE-GI: // %bb.0:
-; CHECK-SVE-GI-NEXT: adrp x8, .LCPI0_0
-; CHECK-SVE-GI-NEXT: ldr q0, [x8, :lo12:.LCPI0_0]
-; CHECK-SVE-GI-NEXT: ret
ret <2 x i64> splat (i64 1)
}
@@ -37,22 +31,16 @@ define <2 x i64> @movi_127_v2i64() {
; CHECK-NEON-SD-NEXT: dup v0.2d, x8
; CHECK-NEON-SD-NEXT: ret
;
-; CHECK-SVE-SD-LABEL: movi_127_v2i64:
-; CHECK-SVE-SD: // %bb.0:
-; CHECK-SVE-SD-NEXT: mov z0.d, #127 // =0x7f
-; CHECK-SVE-SD-NEXT: ret
+; CHECK-SVE-LABEL: movi_127_v2i64:
+; CHECK-SVE: // %bb.0:
+; CHECK-SVE-NEXT: mov z0.d, #127 // =0x7f
+; CHECK-SVE-NEXT: ret
;
; CHECK-NEON-GI-LABEL: movi_127_v2i64:
; CHECK-NEON-GI: // %bb.0:
; CHECK-NEON-GI-NEXT: adrp x8, .LCPI1_0
; CHECK-NEON-GI-NEXT: ldr q0, [x8, :lo12:.LCPI1_0]
; CHECK-NEON-GI-NEXT: ret
-;
-; CHECK-SVE-GI-LABEL: movi_127_v2i64:
-; CHECK-SVE-GI: // %bb.0:
-; CHECK-SVE-GI-NEXT: adrp x8, .LCPI1_0
-; CHECK-SVE-GI-NEXT: ldr q0, [x8, :lo12:.LCPI1_0]
-; CHECK-SVE-GI-NEXT: ret
ret <2 x i64> splat (i64 127)
}
@@ -63,22 +51,16 @@ define <2 x i64> @movi_m128_v2i64() {
; CHECK-NEON-SD-NEXT: dup v0.2d, x8
; CHECK-NEON-SD-NEXT: ret
;
-; CHECK-SVE-SD-LABEL: movi_m128_v2i64:
-; CHECK-SVE-SD: // %bb.0:
-; CHECK-SVE-SD-NEXT: mov z0.d, #-128 // =0xffffffffffffff80
-; CHECK-SVE-SD-NEXT: ret
+; CHECK-SVE-LABEL: movi_m128_v2i64:
+; CHECK-SVE: // %bb.0:
+; CHECK-SVE-NEXT: mov z0.d, #-128 // =0xffffffffffffff80
+; CHECK-SVE-NEXT: ret
;
; CHECK-NEON-GI-LABEL: movi_m128_v2i64:
; CHECK-NEON-GI: // %bb.0:
; CHECK-NEON-GI-NEXT: adrp x8, .LCPI2_0
; CHECK-NEON-GI-NEXT: ldr q0, [x8, :lo12:.LCPI2_0]
; CHECK-NEON-GI-NEXT: ret
-;
-; CHECK-SVE-GI-LABEL: movi_m128_v2i64:
-; CHECK-SVE-GI: // %bb.0:
-; CHECK-SVE-GI-NEXT: adrp x8, .LCPI2_0
-; CHECK-SVE-GI-NEXT: ldr q0, [x8, :lo12:.LCPI2_0]
-; CHECK-SVE-GI-NEXT: ret
ret <2 x i64> splat (i64 -128)
}
@@ -89,22 +71,16 @@ define <2 x i64> @movi_256_v2i64() {
; CHECK-NEON-SD-NEXT: dup v0.2d, x8
; CHECK-NEON-SD-NEXT: ret
;
-; CHECK-SVE-SD-LABEL: movi_256_v2i64:
-; CHECK-SVE-SD: // %bb.0:
-; CHECK-SVE-SD-NEXT: mov z0.d, #256 // =0x100
-; CHECK-SVE-SD-NEXT: ret
+; CHECK-SVE-LABEL: movi_256_v2i64:
+; CHECK-SVE: // %bb.0:
+; CHECK-SVE-NEXT: mov z0.d, #256 // =0x100
+; CHECK-SVE-NEXT: ret
;
; CHECK-NEON-GI-LABEL: movi_256_v2i64:
; CHECK-NEON-GI: // %bb.0:
; CHECK-NEON-GI-NEXT: adrp x8, .LCPI3_0
; CHECK-NEON-GI-NEXT: ldr q0, [x8, :lo12:.LCPI3_0]
; CHECK-NEON-GI-NEXT: ret
-;
-; CHECK-SVE-GI-LABEL: movi_256_v2i64:
-; CHECK-SVE-GI: // %bb.0:
-; CHECK-SVE-GI-NEXT: adrp x8, .LCPI3_0
-; CHECK-SVE-GI-NEXT: ldr q0, [x8, :lo12:.LCPI3_0]
-; CHECK-SVE-GI-NEXT: ret
ret <2 x i64> splat (i64 256)
}
@@ -115,22 +91,16 @@ define <2 x i64> @movi_32512_v2i64() {
; CHECK-NEON-SD-NEXT: dup v0.2d, x8
; CHECK-NEON-SD-NEXT: ret
;
-; CHECK-SVE-SD-LABEL: movi_32512_v2i64:
-; CHECK-SVE-SD: // %bb.0:
-; CHECK-SVE-SD-NEXT: mov z0.d, #32512 // =0x7f00
-; CHECK-SVE-SD-NEXT: ret
+; CHECK-SVE-LABEL: movi_32512_v2i64:
+; CHECK-SVE: // %bb.0:
+; CHECK-SVE-NEXT: mov z0.d, #32512 // =0x7f00
+; CHECK-SVE-NEXT: ret
;
; CHECK-NEON-GI-LABEL: movi_32512_v2i64:
; CHECK-NEON-GI: // %bb.0:
; CHECK-NEON-GI-NEXT: adrp x8, .LCPI4_0
; CHECK-NEON-GI-NEXT: ldr q0, [x8, :lo12:.LCPI4_0]
; CHECK-NEON-GI-NEXT: ret
-;
-; CHECK-SVE-GI-LABEL: movi_32512_v2i64:
-; CHECK-SVE-GI: // %bb.0:
-; CHECK-SVE-GI-NEXT: adrp x8, .LCPI4_0
-; CHECK-SVE-GI-NEXT: ldr q0, [x8, :lo12:.LCPI4_0]
-; CHECK-SVE-GI-NEXT: ret
ret <2 x i64> splat (i64 32512)
}
@@ -141,22 +111,16 @@ define <2 x i64> @movi_m32768_v2i64() {
; CHECK-NEON-SD-NEXT: dup v0.2d, x8
; CHECK-NEON-SD-NEXT: ret
;
-; CHECK-SVE-SD-LABEL: movi_m32768_v2i64:
-; CHECK-SVE-SD: // %bb.0:
-; CHECK-SVE-SD-NEXT: mov z0.d, #-32768 // =0xffffffffffff8000
-; CHECK-SVE-SD-NEXT: ret
+; CHECK-SVE-LABEL: movi_m32768_v2i64:
+; CHECK-SVE: // %bb.0:
+; CHECK-SVE-NEXT: mov z0.d, #-32768 // =0xffffffffffff8000
+; CHECK-SVE-NEXT: ret
;
; CHECK-NEON-GI-LABEL: movi_m32768_v2i64:
; CHECK-NEON-GI: // %bb.0:
; CHECK-NEON-GI-NEXT: adrp x8, .LCPI5_0
; CHECK-NEON-GI-NEXT: ldr q0, [x8, :lo12:.LCPI5_0]
; CHECK-NEON-GI-NEXT: ret
-;
-; CHECK-SVE-GI-LABEL: movi_m32768_v2i64:
-; CHECK-SVE-GI: // %bb.0:
-; CHECK-SVE-GI-NEXT: adrp x8, .LCPI5_0
-; CHECK-SVE-GI-NEXT: ldr q0, [x8, :lo12:.LCPI5_0]
-; CHECK-SVE-GI-NEXT: ret
ret <2 x i64> splat (i64 -32768)
}
@@ -169,16 +133,10 @@ define <4 x i32> @movi_v4i32_1() {
; CHECK-NEON-NEXT: ldr q0, [x8, :lo12:.LCPI6_0]
; CHECK-NEON-NEXT: ret
;
-; CHECK-SVE-SD-LABEL: movi_v4i32_1:
-; CHECK-SVE-SD: // %bb.0:
-; CHECK-SVE-SD-NEXT: mov z0.d, #127 // =0x7f
-; CHECK-SVE-SD-NEXT: ret
-;
-; CHECK-SVE-GI-LABEL: movi_v4i32_1:
-; CHECK-SVE-GI: // %bb.0:
-; CHECK-SVE-GI-NEXT: adrp x8, .LCPI6_0
-; CHECK-SVE-GI-NEXT: ldr q0, [x8, :lo12:.LCPI6_0]
-; CHECK-SVE-GI-NEXT: ret
+; CHECK-SVE-LABEL: movi_v4i32_1:
+; CHECK-SVE: // %bb.0:
+; CHECK-SVE-NEXT: mov z0.d, #127 // =0x7f
+; CHECK-SVE-NEXT: ret
ret <4 x i32> <i32 127, i32 0, i32 127, i32 0>
}
@@ -189,16 +147,10 @@ define <4 x i32> @movi_v4i32_2() {
; CHECK-NEON-NEXT: ldr q0, [x8, :lo12:.LCPI7_0]
; CHECK-NEON-NEXT: ret
;
-; CHECK-SVE-SD-LABEL: movi_v4i32_2:
-; CHECK-SVE-SD: // %bb.0:
-; CHECK-SVE-SD-NEXT: mov z0.d, #32512 // =0x7f00
-; CHECK-SVE-SD-NEXT: ret
-;
-; CHECK-SVE-GI-LABEL: movi_v4i32_2:
-; CHECK-SVE-GI: // %bb.0:
-; CHECK-SVE-GI-NEXT: adrp x8, .LCPI7_0
-; CHECK-SVE-GI-NEXT: ldr q0, [x8, :lo12:.LCPI7_0]
-; CHECK-SVE-GI-NEXT: ret
+; CHECK-SVE-LABEL: movi_v4i32_2:
+; CHECK-SVE: // %bb.0:
+; CHECK-SVE-NEXT: mov z0.d, #32512 // =0x7f00
+; CHECK-SVE-NEXT: ret
ret <4 x i32> <i32 32512, i32 0, i32 32512, i32 0>
}
@@ -209,22 +161,16 @@ define <4 x i32> @movi_v4i32_4092() {
; CHECK-NEON-SD-NEXT: dup v0.4s, w8
; CHECK-NEON-SD-NEXT: ret
;
-; CHECK-SVE-SD-LABEL: movi_v4i32_4092:
-; CHECK-SVE-SD: // %bb.0:
-; CHECK-SVE-SD-NEXT: mov z0.s, #4092 // =0xffc
-; CHECK-SVE-SD-NEXT: ret
+; CHECK-SVE-LABEL: movi_v4i32_4092:
+; CHECK-SVE: // %bb.0:
+; CHECK-SVE-NEXT: mov z0.s, #4092 // =0xffc
+; CHECK-SVE-NEXT: ret
;
; CHECK-NEON-GI-LABEL: movi_v4i32_4092:
; CHECK-NEON-GI: // %bb.0:
; CHECK-NEON-GI-NEXT: adrp x8, .LCPI8_0
; CHECK-NEON-GI-NEXT: ldr q0, [x8, :lo12:.LCPI8_0]
; CHECK-NEON-GI-NEXT: ret
-;
-; CHECK-SVE-GI-LABEL: movi_v4i32_4092:
-; CHECK-SVE-GI: // %bb.0:
-; CHECK-SVE-GI-NEXT: adrp x8, .LCPI8_0
-; CHECK-SVE-GI-NEXT: ldr q0, [x8, :lo12:.LCPI8_0]
-; CHECK-SVE-GI-NEXT: ret
ret <4 x i32> splat (i32 4092)
}
@@ -235,16 +181,10 @@ define <8 x i16> @movi_v8i16_1() {
; CHECK-NEON-NEXT: ldr q0, [x8, :lo12:.LCPI9_0]
; CHECK-NEON-NEXT: ret
;
-; CHECK-SVE-SD-LABEL: movi_v8i16_1:
-; CHECK-SVE-SD: // %bb.0:
-; CHECK-SVE-SD-NEXT: mov z0.d, #127 // =0x7f
-; CHECK-SVE-SD-NEXT: ret
-;
-; CHECK-SVE-GI-LABEL: movi_v8i16_1:
-; CHECK-SVE-GI: // %bb.0:
-; CHECK-SVE-GI-NEXT: adrp x8, .LCPI9_0
-; CHECK-SVE-GI-NEXT: ldr q0, [x8, :lo12:.LCPI9_0]
-; CHECK-SVE-GI-NEXT: ret
+; CHECK-SVE-LABEL: movi_v8i16_1:
+; CHECK-SVE: // %bb.0:
+; CHECK-SVE-NEXT: mov z0.d, #127 // =0x7f
+; CHECK-SVE-NEXT: ret
ret <8 x i16> <i16 127, i16 0, i16 0, i16 0, i16 127, i16 0, i16 0, i16 0>
}
@@ -255,16 +195,10 @@ define <8 x i16> @movi_v8i16_2() {
; CHECK-NEON-NEXT: ldr q0, [x8, :lo12:.LCPI10_0]
; CHECK-NEON-NEXT: ret
;
-; CHECK-SVE-SD-LABEL: movi_v8i16_2:
-; CHECK-SVE-SD: // %bb.0:
-; CHECK-SVE-SD-NEXT: mov z0.d, #32512 // =0x7f00
-; CHECK-SVE-SD-NEXT: ret
-;
-; CHECK-SVE-GI-LABEL: movi_v8i16_2:
-; CHECK-SVE-GI: // %bb.0:
-; CHECK-SVE-GI-NEXT: adrp x8, .LCPI10_0
-; CHECK-SVE-GI-NEXT: ldr q0, [x8, :lo12:.LCPI10_0]
-; CHECK-SVE-GI-NEXT: ret
+; CHECK-SVE-LABEL: movi_v8i16_2:
+; CHECK-SVE: // %bb.0:
+; CHECK-SVE-NEXT: mov z0.d, #32512 // =0x7f00
+; CHECK-SVE-NEXT: ret
ret <8 x i16> <i16 32512, i16 0, i16 0, i16 0, i16 32512, i16 0, i16 0, i16 0>
}
@@ -275,22 +209,16 @@ define <8 x i16> @movi_v8i16_510() {
; CHECK-NEON-SD-NEXT: dup v0.8h, w8
; CHECK-NEON-SD-NEXT: ret
;
-; CHECK-SVE-SD-LABEL: movi_v8i16_510:
-; CHECK-SVE-SD: // %bb.0:
-; CHECK-SVE-SD-NEXT: mov z0.h, #510 // =0x1fe
-; CHECK-SVE-SD-NEXT: ret
+; CHECK-SVE-LABEL: movi_v8i16_510:
+; CHECK-SVE: // %bb.0:
+; CHECK-SVE-NEXT: mov z0.h, #510 // =0x1fe
+; CHECK-SVE-NEXT: ret
;
; CHECK-NEON-GI-LABEL: movi_v8i16_510:
; CHECK-NEON-GI: // %bb.0:
; CHECK-NEON-GI-NEXT: adrp x8, .LCPI11_0
; CHECK-NEON-GI-NEXT: ldr q0, [x8, :lo12:.LCPI11_0]
; CHECK-NEON-GI-NEXT: ret
-;
-; CHECK-SVE-GI-LABEL: movi_v8i16_510:
-; CHECK-SVE-GI: // %bb.0:
-; CHECK-SVE-GI-NEXT: adrp x8, .LCPI11_0
-; CHECK-SVE-GI-NEXT: ldr q0, [x8, :lo12:.LCPI11_0]
-; CHECK-SVE-GI-NEXT: ret
ret <8 x i16> splat (i16 510)
}
@@ -301,16 +229,10 @@ define <16 x i8> @movi_v16i8_1() {
; CHECK-NEON-NEXT: ldr q0, [x8, :lo12:.LCPI12_0]
; CHECK-NEON-NEXT: ret
;
-; CHECK-SVE-SD-LABEL: movi_v16i8_1:
-; CHECK-SVE-SD: // %bb.0:
-; CHECK-SVE-SD-NEXT: mov z0.d, #127 // =0x7f
-; CHECK-SVE-SD-NEXT: ret
-;
-; CHECK-SVE-GI-LABEL: movi_v16i8_1:
-; CHECK-SVE-GI: // %bb.0:
-; CHECK-SVE-GI-NEXT: adrp x8, .LCPI12_0
-; CHECK-SVE-GI-NEXT: ldr q0, [x8, :lo12:.LCPI12_0]
-; CHECK-SVE-GI-NEXT: ret
+; CHECK-SVE-LABEL: movi_v16i8_1:
+; CHECK-SVE: // %bb.0:
+; CHECK-SVE-NEXT: mov z0.d, #127 // =0x7f
+; CHECK-SVE-NEXT: ret
ret <16 x i8> <i8 127, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 127, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
}
@@ -321,16 +243,10 @@ define <16 x i8> @movi_v16i8_2() {
; CHECK-NEON-NEXT: ldr q0, [x8, :lo12:.LCPI13_0]
; CHECK-NEON-NEXT: ret
;
-; CHECK-SVE-SD-LABEL: movi_v16i8_2:
-; CHECK-SVE-SD: // %bb.0:
-; CHECK-SVE-SD-NEXT: mov z0.d, #32512 // =0x7f00
-; CHECK-SVE-SD-NEXT: ret
-;
-; CHECK-SVE-GI-LABEL: movi_v16i8_2:
-; CHECK-SVE-GI: // %bb.0:
-; CHECK-SVE-GI-NEXT: adrp x8, .LCPI13_0
-; CHECK-SVE-GI-NEXT: ldr q0, [x8, :lo12:.LCPI13_0]
-; CHECK-SVE-GI-NEXT: ret
+; CHECK-SVE-LABEL: movi_v16i8_2:
+; CHECK-SVE: // %bb.0:
+; CHECK-SVE-NEXT: mov z0.d, #32512 // =0x7f00
+; CHECK-SVE-NEXT: ret
ret <16 x i8> <i8 0, i8 127, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 127, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
}
@@ -343,22 +259,16 @@ define <2 x i64> @movi_128_v2i64() {
; CHECK-NEON-SD-NEXT: dup v0.2d, x8
; CHECK-NEON-SD-NEXT: ret
;
-; CHECK-SVE-SD-LABEL: movi_128_v2i64:
-; CHECK-SVE-SD: // %bb.0:
-; CHECK-SVE-SD-NEXT: mov z0.d, #128 // =0x80
-; CHECK-SVE-SD-NEXT: ret
+; CHECK-SVE-LABEL: movi_128_v2i64:
+; CHECK-SVE: // %bb.0:
+; CHECK-SVE-NEXT: mov z0.d, #128 // =0x80
+; CHECK-SVE-NEXT: ret
;
; CHECK-NEON-GI-LABEL: movi_128_v2i64:
; CHECK-NEON-GI: // %bb.0:
; CHECK-NEON-GI-NEXT: adrp x8, .LCPI14_0
; CHECK-NEON-GI-NEXT: ldr q0, [x8, :lo12:.LCPI14_0]
; CHECK-NEON-GI-NEXT: ret
-;
-; CHECK-SVE-GI-LABEL: movi_128_v2i64:
-; CHECK-SVE-GI: // %bb.0:
-; CHECK-SVE-GI-NEXT: adrp x8, .LCPI14_0
-; CHECK-SVE-GI-NEXT: ldr q0, [x8, :lo12:.LCPI14_0]
-; CHECK-SVE-GI-NEXT: ret
ret <2 x i64> splat (i64 128)
}
@@ -369,22 +279,16 @@ define <2 x i64> @movi_m127_v2i64() {
; CHECK-NEON-SD-NEXT: dup v0.2d, x8
; CHECK-NEON-SD-NEXT: ret
;
-; CHECK-SVE-SD-LABEL: movi_m127_v2i64:
-; CHECK-SVE-SD: // %bb.0:
-; CHECK-SVE-SD-NEXT: mov z0.d, #-129 // =0xffffffffffffff7f
-; CHECK-SVE-SD-NEXT: ret
+; CHECK-SVE-LABEL: movi_m127_v2i64:
+; CHECK-SVE: // %bb.0:
+; CHECK-SVE-NEXT: mov z0.d, #-129 // =0xffffffffffffff7f
+; CHECK-SVE-NEXT: ret
;
; CHECK-NEON-GI-LABEL: movi_m127_v2i64:
; CHECK-NEON-GI: // %bb.0:
; CHECK-NEON-GI-NEXT: adrp x8, .LCPI15_0
; CHECK-NEON-GI-NEXT: ldr q0, [x8, :lo12:.LCPI15_0]
; CHECK-NEON-GI-NEXT: ret
-;
-; CHECK-SVE-GI-LABEL: movi_m127_v2i64:
-; CHECK-SVE-GI: // %bb.0:
-; CHECK-SVE-GI-NEXT: adrp x8, .LCPI15_0
-; CHECK-SVE-GI-NEXT: ldr q0, [x8, :lo12:.LCPI15_0]
-; CHECK-SVE-GI-NEXT: ret
ret <2 x i64> splat (i64 -129)
}
@@ -422,22 +326,16 @@ define <2 x i64> @movi_m32769_v2i64() {
; CHECK-NEON-SD-NEXT: dup v0.2d, x8
; CHECK-NEON-SD-NEXT: ret
;
-; CHECK-SVE-SD-LABEL: movi_m32769_v2i64:
-; CHECK-SVE-SD: // %bb.0:
-; CHECK-SVE-SD-NEXT: mov z0.d, #0xffffffffffff7fff
-; CHECK-SVE-SD-NEXT: ret
+; CHECK-SVE-LABEL: movi_m32769_v2i64:
+; CHECK-SVE: // %bb.0:
+; CHECK-SVE-NEXT: mov z0.d, #0xffffffffffff7fff
+; CHECK-SVE-NEXT: ret
;
; CHECK-NEON-GI-LABEL: movi_m32769_v2i64:
; CHECK-NEON-GI: // %bb.0:
; CHECK-NEON-GI-NEXT: adrp x8, .LCPI17_0
; CHECK-NEON-GI-NEXT: ldr q0, [x8, :lo12:.LCPI17_0]
; CHECK-NEON-GI-NEXT: ret
-;
-; CHECK-SVE-GI-LABEL: movi_m32769_v2i64:
-; CHECK-SVE-GI: // %bb.0:
-; CHECK-SVE-GI-NEXT: adrp x8, .LCPI17_0
-; CHECK-SVE-GI-NEXT: ldr q0, [x8, :lo12:.LCPI17_0]
-; CHECK-SVE-GI-NEXT: ret
ret <2 x i64> splat (i64 -32769)
}
@@ -475,16 +373,10 @@ define <4 x i32> @movi_v4i32_3() {
; CHECK-NEON-NEXT: ldr q0, [x8, :lo12:.LCPI19_0]
; CHECK-NEON-NEXT: ret
;
-; CHECK-SVE-SD-LABEL: movi_v4i32_3:
-; CHECK-SVE-SD: // %bb.0:
-; CHECK-SVE-SD-NEXT: mov z0.d, #0xffffff80
-; CHECK-SVE-SD-NEXT: ret
-;
-; CHECK-SVE-GI-LABEL: movi_v4i32_3:
-; CHECK-SVE-GI: // %bb.0:
-; CHECK-SVE-GI-NEXT: adrp x8, .LCPI19_0
-; CHECK-SVE-GI-NEXT: ldr q0, [x8, :lo12:.LCPI19_0]
-; CHECK-SVE-GI-NEXT: ret
+; CHECK-SVE-LABEL: movi_v4i32_3:
+; CHECK-SVE: // %bb.0:
+; CHECK-SVE-NEXT: mov z0.d, #0xffffff80
+; CHECK-SVE-NEXT: ret
ret <4 x i32> <i32 -128, i32 0, i32 -128, i32 0>
}
@@ -495,18 +387,11 @@ define <16 x i8> @movi_v16i8_3() {
; CHECK-NEON-NEXT: ldr q0, [x8, :lo12:.LCPI20_0]
; CHECK-NEON-NEXT: ret
;
-; CHECK-SVE-SD-LABEL: movi_v16i8_3:
-; CHECK-SVE-SD: // %bb.0:
-; CHECK-SVE-SD-NEXT: mov z0.d, #0x7f0000
-; CHECK-SVE-SD-NEXT: ret
-;
-; CHECK-SVE-GI-LABEL: movi_v16i8_3:
-; CHECK-SVE-GI: // %bb.0:
-; CHECK-SVE-GI-NEXT: adrp x8, .LCPI20_0
-; CHECK-SVE-GI-NEXT: ldr q0, [x8, :lo12:.LCPI20_0]
-; CHECK-SVE-GI-NEXT: ret
+; CHECK-SVE-LABEL: movi_v16i8_3:
+; CHECK-SVE: // %bb.0:
+; CHECK-SVE-NEXT: mov z0.d, #0x7f0000
+; CHECK-SVE-NEXT: ret
ret <16 x i8> <i8 0, i8 0, i8 127, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 127, i8 0, i8 0, i8 0, i8 0, i8 0>
}
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; CHECK: {{.*}}
-; CHECK-SVE: {{.*}}
diff --git a/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll b/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll
index e72d79ef45757..e157f85e54988 100644
--- a/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll
+++ b/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll
@@ -59,18 +59,11 @@ define <16 x i8> @xor16xi8(<16 x i8> %a, <16 x i8> %b) {
}
define <8 x i8> @bsl8xi8_const(<8 x i8> %a, <8 x i8> %b) {
-; CHECK-SD-LABEL: bsl8xi8_const:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: movi d2, #0x00ffff0000ffff
-; CHECK-SD-NEXT: bif v0.8b, v1.8b, v2.8b
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: bsl8xi8_const:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: adrp x8, .LCPI6_0
-; CHECK-GI-NEXT: ldr d2, [x8, :lo12:.LCPI6_0]
-; CHECK-GI-NEXT: bif v0.8b, v1.8b, v2.8b
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: bsl8xi8_const:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movi d2, #0x00ffff0000ffff
+; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b
+; CHECK-NEXT: ret
%tmp1 = and <8 x i8> %a, < i8 -1, i8 -1, i8 0, i8 0, i8 -1, i8 -1, i8 0, i8 0 >
%tmp2 = and <8 x i8> %b, < i8 0, i8 0, i8 -1, i8 -1, i8 0, i8 0, i8 -1, i8 -1 >
%tmp3 = or <8 x i8> %tmp1, %tmp2
@@ -78,18 +71,11 @@ define <8 x i8> @bsl8xi8_const(<8 x i8> %a, <8 x i8> %b) {
}
define <16 x i8> @bsl16xi8_const(<16 x i8> %a, <16 x i8> %b) {
-; CHECK-SD-LABEL: bsl16xi8_const:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: movi v2.2d, #0x000000ffffffff
-; CHECK-SD-NEXT: bif v0.16b, v1.16b, v2.16b
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: bsl16xi8_const:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: adrp x8, .LCPI7_0
-; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI7_0]
-; CHECK-GI-NEXT: bif v0.16b, v1.16b, v2.16b
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: bsl16xi8_const:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movi v2.2d, #0x000000ffffffff
+; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b
+; CHECK-NEXT: ret
%tmp1 = and <16 x i8> %a, < i8 -1, i8 -1, i8 -1, i8 -1, i8 0, i8 0, i8 0, i8 0, i8 -1, i8 -1, i8 -1, i8 -1, i8 0, i8 0, i8 0, i8 0 >
%tmp2 = and <16 x i8> %b, < i8 0, i8 0, i8 0, i8 0, i8 -1, i8 -1, i8 -1, i8 -1, i8 0, i8 0, i8 0, i8 0, i8 -1, i8 -1, i8 -1, i8 -1 >
%tmp3 = or <16 x i8> %tmp1, %tmp2
@@ -892,8 +878,7 @@ define <2 x i32> @bsl2xi32_const(<2 x i32> %a, <2 x i32> %b) {
;
; CHECK-GI-LABEL: bsl2xi32_const:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: adrp x8, .LCPI70_0
-; CHECK-GI-NEXT: ldr d2, [x8, :lo12:.LCPI70_0]
+; CHECK-GI-NEXT: movi d2, #0x000000ffffffff
; CHECK-GI-NEXT: bif v0.8b, v1.8b, v2.8b
; CHECK-GI-NEXT: ret
%tmp1 = and <2 x i32> %a, < i32 -1, i32 0 >
@@ -904,18 +889,11 @@ define <2 x i32> @bsl2xi32_const(<2 x i32> %a, <2 x i32> %b) {
define <4 x i16> @bsl4xi16_const(<4 x i16> %a, <4 x i16> %b) {
-; CHECK-SD-LABEL: bsl4xi16_const:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: movi d2, #0x00ffff0000ffff
-; CHECK-SD-NEXT: bif v0.8b, v1.8b, v2.8b
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: bsl4xi16_const:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: adrp x8, .LCPI71_0
-; CHECK-GI-NEXT: ldr d2, [x8, :lo12:.LCPI71_0]
-; CHECK-GI-NEXT: bif v0.8b, v1.8b, v2.8b
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: bsl4xi16_const:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movi d2, #0x00ffff0000ffff
+; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b
+; CHECK-NEXT: ret
%tmp1 = and <4 x i16> %a, < i16 -1, i16 0, i16 -1,i16 0 >
%tmp2 = and <4 x i16> %b, < i16 0, i16 -1,i16 0, i16 -1 >
%tmp3 = or <4 x i16> %tmp1, %tmp2
@@ -947,18 +925,11 @@ define <1 x i64> @bsl1xi64_const(<1 x i64> %a, <1 x i64> %b) {
}
define <4 x i32> @bsl4xi32_const(<4 x i32> %a, <4 x i32> %b) {
-; CHECK-SD-LABEL: bsl4xi32_const:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: movi v2.2d, #0x000000ffffffff
-; CHECK-SD-NEXT: bif v0.16b, v1.16b, v2.16b
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: bsl4xi32_const:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: adrp x8, .LCPI73_0
-; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI73_0]
-; CHECK-GI-NEXT: bif v0.16b, v1.16b, v2.16b
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: bsl4xi32_const:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movi v2.2d, #0x000000ffffffff
+; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b
+; CHECK-NEXT: ret
%tmp1 = and <4 x i32> %a, < i32 -1, i32 0, i32 -1, i32 0 >
%tmp2 = and <4 x i32> %b, < i32 0, i32 -1, i32 0, i32 -1 >
%tmp3 = or <4 x i32> %tmp1, %tmp2
@@ -966,18 +937,11 @@ define <4 x i32> @bsl4xi32_const(<4 x i32> %a, <4 x i32> %b) {
}
define <8 x i16> @bsl8xi16_const(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-SD-LABEL: bsl8xi16_const:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: movi v2.2d, #0x000000ffffffff
-; CHECK-SD-NEXT: bif v0.16b, v1.16b, v2.16b
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: bsl8xi16_const:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: adrp x8, .LCPI74_0
-; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI74_0]
-; CHECK-GI-NEXT: bif v0.16b, v1.16b, v2.16b
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: bsl8xi16_const:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movi v2.2d, #0x000000ffffffff
+; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b
+; CHECK-NEXT: ret
%tmp1 = and <8 x i16> %a, < i16 -1, i16 -1, i16 0,i16 0, i16 -1, i16 -1, i16 0,i16 0 >
%tmp2 = and <8 x i16> %b, < i16 0, i16 0, i16 -1, i16 -1, i16 0, i16 0, i16 -1, i16 -1 >
%tmp3 = or <8 x i16> %tmp1, %tmp2
@@ -1561,8 +1525,7 @@ define <8 x i8> @orrimm8b_as_orrimm4h_lsl0(<8 x i8> %a) {
;
; CHECK-GI-LABEL: orrimm8b_as_orrimm4h_lsl0:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: adrp x8, .LCPI104_0
-; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI104_0]
+; CHECK-GI-NEXT: movi d1, #0xff00ff00ff00ff
; CHECK-GI-NEXT: orr v0.8b, v0.8b, v1.8b
; CHECK-GI-NEXT: ret
%val = or <8 x i8> %a, <i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0>
@@ -1577,8 +1540,7 @@ define <8 x i8> @orrimm8b_as_orimm4h_lsl8(<8 x i8> %a) {
;
; CHECK-GI-LABEL: orrimm8b_as_orimm4h_lsl8:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: adrp x8, .LCPI105_0
-; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI105_0]
+; CHECK-GI-NEXT: movi d1, #0xff00ff00ff00ff00
; CHECK-GI-NEXT: orr v0.8b, v0.8b, v1.8b
; CHECK-GI-NEXT: ret
%val = or <8 x i8> %a, <i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255>
@@ -1593,8 +1555,7 @@ define <16 x i8> @orimm16b_as_orrimm8h_lsl0(<16 x i8> %a) {
;
; CHECK-GI-LABEL: orimm16b_as_orrimm8h_lsl0:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: adrp x8, .LCPI106_0
-; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI106_0]
+; CHECK-GI-NEXT: movi v1.2d, #0xff00ff00ff00ff
; CHECK-GI-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-GI-NEXT: ret
%val = or <16 x i8> %a, <i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0>
@@ -1609,8 +1570,7 @@ define <16 x i8> @orimm16b_as_orrimm8h_lsl8(<16 x i8> %a) {
;
; CHECK-GI-LABEL: orimm16b_as_orrimm8h_lsl8:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: adrp x8, .LCPI107_0
-; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI107_0]
+; CHECK-GI-NEXT: movi v1.2d, #0xff00ff00ff00ff00
; CHECK-GI-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-GI-NEXT: ret
%val = or <16 x i8> %a, <i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255>
@@ -1625,8 +1585,7 @@ define <8 x i8> @and8imm2s_lsl0(<8 x i8> %a) {
;
; CHECK-GI-LABEL: and8imm2s_lsl0:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: adrp x8, .LCPI108_0
-; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI108_0]
+; CHECK-GI-NEXT: movi d1, #0xffffff00ffffff00
; CHECK-GI-NEXT: and v0.8b, v0.8b, v1.8b
; CHECK-GI-NEXT: ret
%tmp1 = and <8 x i8> %a, < i8 0, i8 255, i8 255, i8 255, i8 0, i8 255, i8 255, i8 255>
@@ -1641,8 +1600,7 @@ define <8 x i8> @and8imm2s_lsl8(<8 x i8> %a) {
;
; CHECK-GI-LABEL: and8imm2s_lsl8:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: adrp x8, .LCPI109_0
-; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI109_0]
+; CHECK-GI-NEXT: movi d1, #0xffff00ffffff00ff
; CHECK-GI-NEXT: and v0.8b, v0.8b, v1.8b
; CHECK-GI-NEXT: ret
%tmp1 = and <8 x i8> %a, < i8 255, i8 0, i8 255, i8 255, i8 255, i8 0, i8 255, i8 255>
@@ -1657,8 +1615,7 @@ define <8 x i8> @and8imm2s_lsl16(<8 x i8> %a) {
;
; CHECK-GI-LABEL: and8imm2s_lsl16:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: adrp x8, .LCPI110_0
-; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI110_0]
+; CHECK-GI-NEXT: movi d1, #0xff00ffffff00ffff
; CHECK-GI-NEXT: and v0.8b, v0.8b, v1.8b
; CHECK-GI-NEXT: ret
%tmp1 = and <8 x i8> %a, < i8 255, i8 255, i8 0, i8 255, i8 255, i8 255, i8 0, i8 255>
@@ -1673,8 +1630,7 @@ define <8 x i8> @and8imm2s_lsl24(<8 x i8> %a) {
;
; CHECK-GI-LABEL: and8imm2s_lsl24:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: adrp x8, .LCPI111_0
-; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI111_0]
+; CHECK-GI-NEXT: mvni v1.2s, #254, lsl #24
; CHECK-GI-NEXT: and v0.8b, v0.8b, v1.8b
; CHECK-GI-NEXT: ret
%tmp1 = and <8 x i8> %a, < i8 255, i8 255, i8 255, i8 1, i8 255, i8 255, i8 255, i8 1>
@@ -1689,8 +1645,7 @@ define <4 x i16> @and16imm2s_lsl0(<4 x i16> %a) {
;
; CHECK-GI-LABEL: and16imm2s_lsl0:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: adrp x8, .LCPI112_0
-; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI112_0]
+; CHECK-GI-NEXT: movi d1, #0xffffff00ffffff00
; CHECK-GI-NEXT: and v0.8b, v0.8b, v1.8b
; CHECK-GI-NEXT: ret
%tmp1 = and <4 x i16> %a, < i16 65280, i16 65535, i16 65280, i16 65535>
@@ -1705,8 +1660,7 @@ define <4 x i16> @and16imm2s_lsl8(<4 x i16> %a) {
;
; CHECK-GI-LABEL: and16imm2s_lsl8:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: adrp x8, .LCPI113_0
-; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI113_0]
+; CHECK-GI-NEXT: movi d1, #0xffff00ffffff00ff
; CHECK-GI-NEXT: and v0.8b, v0.8b, v1.8b
; CHECK-GI-NEXT: ret
%tmp1 = and <4 x i16> %a, < i16 255, i16 65535, i16 255, i16 65535>
@@ -1721,8 +1675,7 @@ define <4 x i16> @and16imm2s_lsl16(<4 x i16> %a) {
;
; CHECK-GI-LABEL: and16imm2s_lsl16:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: adrp x8, .LCPI114_0
-; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI114_0]
+; CHECK-GI-NEXT: movi d1, #0xff00ffffff00ffff
; CHECK-GI-NEXT: and v0.8b, v0.8b, v1.8b
; CHECK-GI-NEXT: ret
%tmp1 = and <4 x i16> %a, < i16 65535, i16 65280, i16 65535, i16 65280>
@@ -1737,8 +1690,7 @@ define <4 x i16> @and16imm2s_lsl24(<4 x i16> %a) {
;
; CHECK-GI-LABEL: and16imm2s_lsl24:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: adrp x8, .LCPI115_0
-; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI115_0]
+; CHECK-GI-NEXT: mvni v1.2s, #254, lsl #24
; CHECK-GI-NEXT: and v0.8b, v0.8b, v1.8b
; CHECK-GI-NEXT: ret
%tmp1 = and <4 x i16> %a, < i16 65535, i16 511, i16 65535, i16 511>
@@ -1818,8 +1770,7 @@ define <16 x i8> @and8imm4s_lsl0(<16 x i8> %a) {
;
; CHECK-GI-LABEL: and8imm4s_lsl0:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: adrp x8, .LCPI120_0
-; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI120_0]
+; CHECK-GI-NEXT: movi v1.2d, #0xffffff00ffffff00
; CHECK-GI-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-GI-NEXT: ret
%tmp1 = and <16 x i8> %a, < i8 0, i8 255, i8 255, i8 255, i8 0, i8 255, i8 255, i8 255, i8 0, i8 255, i8 255, i8 255, i8 0, i8 255, i8 255, i8 255>
@@ -1834,8 +1785,7 @@ define <16 x i8> @and8imm4s_lsl8(<16 x i8> %a) {
;
; CHECK-GI-LABEL: and8imm4s_lsl8:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: adrp x8, .LCPI121_0
-; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI121_0]
+; CHECK-GI-NEXT: movi v1.2d, #0xffff00ffffff00ff
; CHECK-GI-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-GI-NEXT: ret
%tmp1 = and <16 x i8> %a, < i8 255, i8 0, i8 255, i8 255, i8 255, i8 0, i8 255, i8 255, i8 255, i8 0, i8 255, i8 255, i8 255, i8 0, i8 255, i8 255>
@@ -1850,8 +1800,7 @@ define <16 x i8> @and8imm4s_lsl16(<16 x i8> %a) {
;
; CHECK-GI-LABEL: and8imm4s_lsl16:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: adrp x8, .LCPI122_0
-; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI122_0]
+; CHECK-GI-NEXT: movi v1.2d, #0xff00ffffff00ffff
; CHECK-GI-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-GI-NEXT: ret
%tmp1 = and <16 x i8> %a, < i8 255, i8 255, i8 0, i8 255, i8 255, i8 255, i8 0, i8 255, i8 255, i8 255, i8 0, i8 255, i8 255, i8 255, i8 0, i8 255>
@@ -1866,8 +1815,7 @@ define <16 x i8> @and8imm4s_lsl24(<16 x i8> %a) {
;
; CHECK-GI-LABEL: and8imm4s_lsl24:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: adrp x8, .LCPI123_0
-; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI123_0]
+; CHECK-GI-NEXT: mvni v1.4s, #254, lsl #24
; CHECK-GI-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-GI-NEXT: ret
%tmp1 = and <16 x i8> %a, < i8 255, i8 255, i8 255, i8 1, i8 255, i8 255, i8 255, i8 1, i8 255, i8 255, i8 255, i8 1, i8 255, i8 255, i8 255, i8 1>
@@ -1882,8 +1830,7 @@ define <8 x i16> @and16imm4s_lsl0(<8 x i16> %a) {
;
; CHECK-GI-LABEL: and16imm4s_lsl0:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: adrp x8, .LCPI124_0
-; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI124_0]
+; CHECK-GI-NEXT: movi v1.2d, #0xffffff00ffffff00
; CHECK-GI-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-GI-NEXT: ret
%tmp1 = and <8 x i16> %a, < i16 65280, i16 65535, i16 65280, i16 65535, i16 65280, i16 65535, i16 65280, i16 65535>
@@ -1898,8 +1845,7 @@ define <8 x i16> @and16imm4s_lsl8(<8 x i16> %a) {
;
; CHECK-GI-LABEL: and16imm4s_lsl8:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: adrp x8, .LCPI125_0
-; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI125_0]
+; CHECK-GI-NEXT: movi v1.2d, #0xffff00ffffff00ff
; CHECK-GI-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-GI-NEXT: ret
%tmp1 = and <8 x i16> %a, < i16 255, i16 65535, i16 255, i16 65535, i16 255, i16 65535, i16 255, i16 65535>
@@ -1914,8 +1860,7 @@ define <8 x i16> @and16imm4s_lsl16(<8 x i16> %a) {
;
; CHECK-GI-LABEL: and16imm4s_lsl16:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: adrp x8, .LCPI126_0
-; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI126_0]
+; CHECK-GI-NEXT: movi v1.2d, #0xff00ffffff00ffff
; CHECK-GI-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-GI-NEXT: ret
%tmp1 = and <8 x i16> %a, < i16 65535, i16 65280, i16 65535, i16 65280, i16 65535, i16 65280, i16 65535, i16 65280>
@@ -1930,8 +1875,7 @@ define <8 x i16> @and16imm4s_lsl24(<8 x i16> %a) {
;
; CHECK-GI-LABEL: and16imm4s_lsl24:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: adrp x8, .LCPI127_0
-; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI127_0]
+; CHECK-GI-NEXT: mvni v1.4s, #254, lsl #24
; CHECK-GI-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-GI-NEXT: ret
%tmp1 = and <8 x i16> %a, < i16 65535, i16 511, i16 65535, i16 511, i16 65535, i16 511, i16 65535, i16 511>
@@ -2006,8 +1950,7 @@ define <8 x i8> @and8imm4h_lsl0(<8 x i8> %a) {
;
; CHECK-GI-LABEL: and8imm4h_lsl0:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: adrp x8, .LCPI132_0
-; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI132_0]
+; CHECK-GI-NEXT: movi d1, #0xff00ff00ff00ff00
; CHECK-GI-NEXT: and v0.8b, v0.8b, v1.8b
; CHECK-GI-NEXT: ret
%tmp1 = and <8 x i8> %a, < i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255>
@@ -2022,8 +1965,7 @@ define <8 x i8> @and8imm4h_lsl8(<8 x i8> %a) {
;
; CHECK-GI-LABEL: and8imm4h_lsl8:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: adrp x8, .LCPI133_0
-; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI133_0]
+; CHECK-GI-NEXT: movi d1, #0xff00ff00ff00ff
; CHECK-GI-NEXT: and v0.8b, v0.8b, v1.8b
; CHECK-GI-NEXT: ret
%tmp1 = and <8 x i8> %a, < i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0>
@@ -2100,8 +2042,7 @@ define <16 x i8> @and8imm8h_lsl0(<16 x i8> %a) {
;
; CHECK-GI-LABEL: and8imm8h_lsl0:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: adrp x8, .LCPI138_0
-; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI138_0]
+; CHECK-GI-NEXT: movi v1.2d, #0xff00ff00ff00ff00
; CHECK-GI-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-GI-NEXT: ret
%tmp1 = and <16 x i8> %a, < i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255 >
@@ -2116,8 +2057,7 @@ define <16 x i8> @and8imm8h_lsl8(<16 x i8> %a) {
;
; CHECK-GI-LABEL: and8imm8h_lsl8:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: adrp x8, .LCPI139_0
-; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI139_0]
+; CHECK-GI-NEXT: movi v1.2d, #0xff00ff00ff00ff
; CHECK-GI-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-GI-NEXT: ret
%tmp1 = and <16 x i8> %a, <i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0 >
@@ -2280,8 +2220,7 @@ define <8 x i8> @orr8imm2s_lsl0(<8 x i8> %a) {
;
; CHECK-GI-LABEL: orr8imm2s_lsl0:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: adrp x8, .LCPI148_0
-; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI148_0]
+; CHECK-GI-NEXT: movi d1, #0x0000ff000000ff
; CHECK-GI-NEXT: orr v0.8b, v0.8b, v1.8b
; CHECK-GI-NEXT: ret
%tmp1 = or <8 x i8> %a, < i8 255, i8 0, i8 0, i8 0, i8 255, i8 0, i8 0, i8 0>
@@ -2296,8 +2235,7 @@ define <8 x i8> @orr8imm2s_lsl8(<8 x i8> %a) {
;
; CHECK-GI-LABEL: orr8imm2s_lsl8:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: adrp x8, .LCPI149_0
-; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI149_0]
+; CHECK-GI-NEXT: movi d1, #0x00ff000000ff00
; CHECK-GI-NEXT: orr v0.8b, v0.8b, v1.8b
; CHECK-GI-NEXT: ret
%tmp1 = or <8 x i8> %a, < i8 0, i8 255, i8 0, i8 0, i8 0, i8 255, i8 0, i8 0>
@@ -2312,8 +2250,7 @@ define <8 x i8> @orr8imm2s_lsl16(<8 x i8> %a) {
;
; CHECK-GI-LABEL: orr8imm2s_lsl16:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: adrp x8, .LCPI150_0
-; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI150_0]
+; CHECK-GI-NEXT: movi d1, #0xff000000ff0000
; CHECK-GI-NEXT: orr v0.8b, v0.8b, v1.8b
; CHECK-GI-NEXT: ret
%tmp1 = or <8 x i8> %a, < i8 0, i8 0, i8 255, i8 0, i8 0, i8 0, i8 255, i8 0>
@@ -2328,8 +2265,7 @@ define <8 x i8> @orr8imm2s_lsl24(<8 x i8> %a) {
;
; CHECK-GI-LABEL: orr8imm2s_lsl24:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: adrp x8, .LCPI151_0
-; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI151_0]
+; CHECK-GI-NEXT: movi d1, #0xff000000ff000000
; CHECK-GI-NEXT: orr v0.8b, v0.8b, v1.8b
; CHECK-GI-NEXT: ret
%tmp1 = or <8 x i8> %a, < i8 0, i8 0, i8 0, i8 255, i8 0, i8 0, i8 0, i8 255>
@@ -2344,8 +2280,7 @@ define <4 x i16> @orr16imm2s_lsl0(<4 x i16> %a) {
;
; CHECK-GI-LABEL: orr16imm2s_lsl0:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: adrp x8, .LCPI152_0
-; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI152_0]
+; CHECK-GI-NEXT: movi d1, #0x0000ff000000ff
; CHECK-GI-NEXT: orr v0.8b, v0.8b, v1.8b
; CHECK-GI-NEXT: ret
%tmp1 = or <4 x i16> %a, < i16 255, i16 0, i16 255, i16 0>
@@ -2360,8 +2295,7 @@ define <4 x i16> @orr16imm2s_lsl8(<4 x i16> %a) {
;
; CHECK-GI-LABEL: orr16imm2s_lsl8:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: adrp x8, .LCPI153_0
-; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI153_0]
+; CHECK-GI-NEXT: movi d1, #0x00ff000000ff00
; CHECK-GI-NEXT: orr v0.8b, v0.8b, v1.8b
; CHECK-GI-NEXT: ret
%tmp1 = or <4 x i16> %a, < i16 65280, i16 0, i16 65280, i16 0>
@@ -2376,8 +2310,7 @@ define <4 x i16> @orr16imm2s_lsl16(<4 x i16> %a) {
;
; CHECK-GI-LABEL: orr16imm2s_lsl16:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: adrp x8, .LCPI154_0
-; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI154_0]
+; CHECK-GI-NEXT: movi d1, #0xff000000ff0000
; CHECK-GI-NEXT: orr v0.8b, v0.8b, v1.8b
; CHECK-GI-NEXT: ret
%tmp1 = or <4 x i16> %a, < i16 0, i16 255, i16 0, i16 255>
@@ -2392,8 +2325,7 @@ define <4 x i16> @orr16imm2s_lsl24(<4 x i16> %a) {
;
; CHECK-GI-LABEL: orr16imm2s_lsl24:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: adrp x8, .LCPI155_0
-; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI155_0]
+; CHECK-GI-NEXT: movi d1, #0xff000000ff000000
; CHECK-GI-NEXT: orr v0.8b, v0.8b, v1.8b
; CHECK-GI-NEXT: ret
%tmp1 = or <4 x i16> %a, < i16 0, i16 65280, i16 0, i16 65280>
@@ -2472,8 +2404,7 @@ define <16 x i8> @orr8imm4s_lsl0(<16 x i8> %a) {
;
; CHECK-GI-LABEL: orr8imm4s_lsl0:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: adrp x8, .LCPI160_0
-; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI160_0]
+; CHECK-GI-NEXT: movi v1.2d, #0x0000ff000000ff
; CHECK-GI-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-GI-NEXT: ret
%tmp1 = or <16 x i8> %a, < i8 255, i8 0, i8 0, i8 0, i8 255, i8 0, i8 0, i8 0, i8 255, i8 0, i8 0, i8 0, i8 255, i8 0, i8 0, i8 0>
@@ -2488,8 +2419,7 @@ define <16 x i8> @orr8imm4s_lsl8(<16 x i8> %a) {
;
; CHECK-GI-LABEL: orr8imm4s_lsl8:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: adrp x8, .LCPI161_0
-; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI161_0]
+; CHECK-GI-NEXT: movi v1.2d, #0x00ff000000ff00
; CHECK-GI-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-GI-NEXT: ret
%tmp1 = or <16 x i8> %a, < i8 0, i8 255, i8 0, i8 0, i8 0, i8 255, i8 0, i8 0, i8 0, i8 255, i8 0, i8 0, i8 0, i8 255, i8 0, i8 0>
@@ -2504,8 +2434,7 @@ define <16 x i8> @orr8imm4s_lsl16(<16 x i8> %a) {
;
; CHECK-GI-LABEL: orr8imm4s_lsl16:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: adrp x8, .LCPI162_0
-; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI162_0]
+; CHECK-GI-NEXT: movi v1.2d, #0xff000000ff0000
; CHECK-GI-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-GI-NEXT: ret
%tmp1 = or <16 x i8> %a, < i8 0, i8 0, i8 255, i8 0, i8 0, i8 0, i8 255, i8 0, i8 0, i8 0, i8 255, i8 0, i8 0, i8 0, i8 255, i8 0>
@@ -2520,8 +2449,7 @@ define <16 x i8> @orr8imm4s_lsl24(<16 x i8> %a) {
;
; CHECK-GI-LABEL: orr8imm4s_lsl24:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: adrp x8, .LCPI163_0
-; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI163_0]
+; CHECK-GI-NEXT: movi v1.2d, #0xff000000ff000000
; CHECK-GI-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-GI-NEXT: ret
%tmp1 = or <16 x i8> %a, < i8 0, i8 0, i8 0, i8 255, i8 0, i8 0, i8 0, i8 255, i8 0, i8 0, i8 0, i8 255, i8 0, i8 0, i8 0, i8 255>
@@ -2536,8 +2464,7 @@ define <8 x i16> @orr16imm4s_lsl0(<8 x i16> %a) {
;
; CHECK-GI-LABEL: orr16imm4s_lsl0:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: adrp x8, .LCPI164_0
-; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI164_0]
+; CHECK-GI-NEXT: movi v1.2d, #0x0000ff000000ff
; CHECK-GI-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-GI-NEXT: ret
%tmp1 = or <8 x i16> %a, < i16 255, i16 0, i16 255, i16 0, i16 255, i16 0, i16 255, i16 0>
@@ -2552,8 +2479,7 @@ define <8 x i16> @orr16imm4s_lsl8(<8 x i16> %a) {
;
; CHECK-GI-LABEL: orr16imm4s_lsl8:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: adrp x8, .LCPI165_0
-; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI165_0]
+; CHECK-GI-NEXT: movi v1.2d, #0x00ff000000ff00
; CHECK-GI-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-GI-NEXT: ret
%tmp1 = or <8 x i16> %a, < i16 65280, i16 0, i16 65280, i16 0, i16 65280, i16 0, i16 65280, i16 0>
@@ -2568,8 +2494,7 @@ define <8 x i16> @orr16imm4s_lsl16(<8 x i16> %a) {
;
; CHECK-GI-LABEL: orr16imm4s_lsl16:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: adrp x8, .LCPI166_0
-; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI166_0]
+; CHECK-GI-NEXT: movi v1.2d, #0xff000000ff0000
; CHECK-GI-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-GI-NEXT: ret
%tmp1 = or <8 x i16> %a, < i16 0, i16 255, i16 0, i16 255, i16 0, i16 255, i16 0, i16 255>
@@ -2584,8 +2509,7 @@ define <8 x i16> @orr16imm4s_lsl24(<8 x i16> %a) {
;
; CHECK-GI-LABEL: orr16imm4s_lsl24:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: adrp x8, .LCPI167_0
-; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI167_0]
+; CHECK-GI-NEXT: movi v1.2d, #0xff000000ff000000
; CHECK-GI-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-GI-NEXT: ret
%tmp1 = or <8 x i16> %a, < i16 0, i16 65280, i16 0, i16 65280, i16 0, i16 65280, i16 0, i16 65280>
@@ -2660,8 +2584,7 @@ define <8 x i8> @orr8imm4h_lsl0(<8 x i8> %a) {
;
; CHECK-GI-LABEL: orr8imm4h_lsl0:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: adrp x8, .LCPI172_0
-; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI172_0]
+; CHECK-GI-NEXT: movi d1, #0xff00ff00ff00ff
; CHECK-GI-NEXT: orr v0.8b, v0.8b, v1.8b
; CHECK-GI-NEXT: ret
%tmp1 = or <8 x i8> %a, < i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0>
@@ -2676,8 +2599,7 @@ define <8 x i8> @orr8imm4h_lsl8(<8 x i8> %a) {
;
; CHECK-GI-LABEL: orr8imm4h_lsl8:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: adrp x8, .LCPI173_0
-; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI173_0]
+; CHECK-GI-NEXT: movi d1, #0xff00ff00ff00ff00
; CHECK-GI-NEXT: orr v0.8b, v0.8b, v1.8b
; CHECK-GI-NEXT: ret
%tmp1 = or <8 x i8> %a, < i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255>
@@ -2754,8 +2676,7 @@ define <16 x i8> @orr8imm8h_lsl0(<16 x i8> %a) {
;
; CHECK-GI-LABEL: orr8imm8h_lsl0:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: adrp x8, .LCPI178_0
-; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI178_0]
+; CHECK-GI-NEXT: movi v1.2d, #0xff00ff00ff00ff
; CHECK-GI-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-GI-NEXT: ret
%tmp1 = or <16 x i8> %a, < i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0>
@@ -2770,8 +2691,7 @@ define <16 x i8> @orr8imm8h_lsl8(<16 x i8> %a) {
;
; CHECK-GI-LABEL: orr8imm8h_lsl8:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: adrp x8, .LCPI179_0
-; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI179_0]
+; CHECK-GI-NEXT: movi v1.2d, #0xff00ff00ff00ff00
; CHECK-GI-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-GI-NEXT: ret
%tmp1 = or <16 x i8> %a, < i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255>
diff --git a/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll b/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll
index 9f646c28ce74a..9147183bf5ea5 100644
--- a/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll
+++ b/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll
@@ -1465,37 +1465,22 @@ define <2 x i64> @cmltz2xi64(<2 x i64> %A) {
}
define <8 x i1> @not_cmle8xi8(<8 x i8> %0) {
-; CHECK-SD-LABEL: not_cmle8xi8:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: movi v1.2s, #1
-; CHECK-SD-NEXT: cmgt v0.8b, v1.8b, v0.8b
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: not_cmle8xi8:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: adrp x8, .LCPI133_0
-; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI133_0]
-; CHECK-GI-NEXT: cmgt v0.8b, v1.8b, v0.8b
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: not_cmle8xi8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movi v1.2s, #1
+; CHECK-NEXT: cmgt v0.8b, v1.8b, v0.8b
+; CHECK-NEXT: ret
%cmp.i = icmp slt <8 x i8> %0, <i8 1, i8 0, i8 0, i8 0, i8 1, i8 0, i8 0, i8 0>
ret <8 x i1> %cmp.i
}
define <4 x i1> @not_cmle16xi8(<4 x i32> %0) {
-; CHECK-SD-LABEL: not_cmle16xi8:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: movi v1.8h, #1
-; CHECK-SD-NEXT: cmgt v0.4s, v1.4s, v0.4s
-; CHECK-SD-NEXT: xtn v0.4h, v0.4s
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: not_cmle16xi8:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: adrp x8, .LCPI134_0
-; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI134_0]
-; CHECK-GI-NEXT: cmgt v0.4s, v1.4s, v0.4s
-; CHECK-GI-NEXT: xtn v0.4h, v0.4s
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: not_cmle16xi8:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: movi v1.8h, #1
+; CHECK-NEXT: cmgt v0.4s, v1.4s, v0.4s
+; CHECK-NEXT: xtn v0.4h, v0.4s
+; CHECK-NEXT: ret
entry:
%bc = bitcast <16 x i8> <i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0> to <4 x i32>
%cmp.i = icmp slt <4 x i32> %0, %bc
diff --git a/llvm/test/CodeGen/AArch64/neon-mov.ll b/llvm/test/CodeGen/AArch64/neon-mov.ll
index ca5af2c7c452e..417c1de0c250c 100644
--- a/llvm/test/CodeGen/AArch64/neon-mov.ll
+++ b/llvm/test/CodeGen/AArch64/neon-mov.ll
@@ -159,29 +159,11 @@ define <4 x i16> @movi4h_lsl8() {
}
define <4 x i16> @movi4h_fneg() {
-; CHECK-NOFP16-SD-LABEL: movi4h_fneg:
-; CHECK-NOFP16-SD: // %bb.0:
-; CHECK-NOFP16-SD-NEXT: movi v0.4h, #127, lsl #8
-; CHECK-NOFP16-SD-NEXT: fneg v0.2s, v0.2s
-; CHECK-NOFP16-SD-NEXT: ret
-;
-; CHECK-FP16-SD-LABEL: movi4h_fneg:
-; CHECK-FP16-SD: // %bb.0:
-; CHECK-FP16-SD-NEXT: movi v0.4h, #127, lsl #8
-; CHECK-FP16-SD-NEXT: fneg v0.2s, v0.2s
-; CHECK-FP16-SD-NEXT: ret
-;
-; CHECK-NOFP16-GI-LABEL: movi4h_fneg:
-; CHECK-NOFP16-GI: // %bb.0:
-; CHECK-NOFP16-GI-NEXT: adrp x8, .LCPI18_0
-; CHECK-NOFP16-GI-NEXT: ldr d0, [x8, :lo12:.LCPI18_0]
-; CHECK-NOFP16-GI-NEXT: ret
-;
-; CHECK-FP16-GI-LABEL: movi4h_fneg:
-; CHECK-FP16-GI: // %bb.0:
-; CHECK-FP16-GI-NEXT: adrp x8, .LCPI18_0
-; CHECK-FP16-GI-NEXT: ldr d0, [x8, :lo12:.LCPI18_0]
-; CHECK-FP16-GI-NEXT: ret
+; CHECK-LABEL: movi4h_fneg:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movi v0.4h, #127, lsl #8
+; CHECK-NEXT: fneg v0.2s, v0.2s
+; CHECK-NEXT: ret
ret <4 x i16> <i16 32512, i16 65280, i16 32512, i16 65280>
}
@@ -202,29 +184,11 @@ define <8 x i16> @movi8h_lsl8() {
}
define <8 x i16> @movi8h_fneg() {
-; CHECK-NOFP16-SD-LABEL: movi8h_fneg:
-; CHECK-NOFP16-SD: // %bb.0:
-; CHECK-NOFP16-SD-NEXT: movi v0.8h, #127, lsl #8
-; CHECK-NOFP16-SD-NEXT: fneg v0.4s, v0.4s
-; CHECK-NOFP16-SD-NEXT: ret
-;
-; CHECK-FP16-SD-LABEL: movi8h_fneg:
-; CHECK-FP16-SD: // %bb.0:
-; CHECK-FP16-SD-NEXT: movi v0.8h, #127, lsl #8
-; CHECK-FP16-SD-NEXT: fneg v0.4s, v0.4s
-; CHECK-FP16-SD-NEXT: ret
-;
-; CHECK-NOFP16-GI-LABEL: movi8h_fneg:
-; CHECK-NOFP16-GI: // %bb.0:
-; CHECK-NOFP16-GI-NEXT: adrp x8, .LCPI21_0
-; CHECK-NOFP16-GI-NEXT: ldr q0, [x8, :lo12:.LCPI21_0]
-; CHECK-NOFP16-GI-NEXT: ret
-;
-; CHECK-FP16-GI-LABEL: movi8h_fneg:
-; CHECK-FP16-GI: // %bb.0:
-; CHECK-FP16-GI-NEXT: adrp x8, .LCPI21_0
-; CHECK-FP16-GI-NEXT: ldr q0, [x8, :lo12:.LCPI21_0]
-; CHECK-FP16-GI-NEXT: ret
+; CHECK-LABEL: movi8h_fneg:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movi v0.8h, #127, lsl #8
+; CHECK-NEXT: fneg v0.4s, v0.4s
+; CHECK-NEXT: ret
ret <8 x i16> <i16 32512, i16 65280, i16 32512, i16 65280, i16 32512, i16 65280, i16 32512, i16 65280>
}
@@ -535,11 +499,29 @@ define <2 x double> @fmov2d() {
}
define <2 x double> @fmov2d_neg0() {
-; CHECK-LABEL: fmov2d_neg0:
-; CHECK: // %bb.0:
-; CHECK-NEXT: movi v0.2d, #0000000000000000
-; CHECK-NEXT: fneg v0.2d, v0.2d
-; CHECK-NEXT: ret
+; CHECK-NOFP16-SD-LABEL: fmov2d_neg0:
+; CHECK-NOFP16-SD: // %bb.0:
+; CHECK-NOFP16-SD-NEXT: movi d0, #0000000000000000
+; CHECK-NOFP16-SD-NEXT: fneg v0.2d, v0.2d
+; CHECK-NOFP16-SD-NEXT: ret
+;
+; CHECK-FP16-SD-LABEL: fmov2d_neg0:
+; CHECK-FP16-SD: // %bb.0:
+; CHECK-FP16-SD-NEXT: movi d0, #0000000000000000
+; CHECK-FP16-SD-NEXT: fneg v0.2d, v0.2d
+; CHECK-FP16-SD-NEXT: ret
+;
+; CHECK-NOFP16-GI-LABEL: fmov2d_neg0:
+; CHECK-NOFP16-GI: // %bb.0:
+; CHECK-NOFP16-GI-NEXT: movi v0.2d, #0000000000000000
+; CHECK-NOFP16-GI-NEXT: fneg v0.2d, v0.2d
+; CHECK-NOFP16-GI-NEXT: ret
+;
+; CHECK-FP16-GI-LABEL: fmov2d_neg0:
+; CHECK-FP16-GI: // %bb.0:
+; CHECK-FP16-GI-NEXT: movi v0.2d, #0000000000000000
+; CHECK-FP16-GI-NEXT: fneg v0.2d, v0.2d
+; CHECK-FP16-GI-NEXT: ret
ret <2 x double> <double -0.0, double -0.0>
}
@@ -571,27 +553,10 @@ define <1 x double> @fmov1d_neg0() {
}
define <2 x i32> @movi1d_1() {
-; CHECK-NOFP16-SD-LABEL: movi1d_1:
-; CHECK-NOFP16-SD: // %bb.0:
-; CHECK-NOFP16-SD-NEXT: movi d0, #0x00ffffffff0000
-; CHECK-NOFP16-SD-NEXT: ret
-;
-; CHECK-FP16-SD-LABEL: movi1d_1:
-; CHECK-FP16-SD: // %bb.0:
-; CHECK-FP16-SD-NEXT: movi d0, #0x00ffffffff0000
-; CHECK-FP16-SD-NEXT: ret
-;
-; CHECK-NOFP16-GI-LABEL: movi1d_1:
-; CHECK-NOFP16-GI: // %bb.0:
-; CHECK-NOFP16-GI-NEXT: adrp x8, .LCPI56_0
-; CHECK-NOFP16-GI-NEXT: ldr d0, [x8, :lo12:.LCPI56_0]
-; CHECK-NOFP16-GI-NEXT: ret
-;
-; CHECK-FP16-GI-LABEL: movi1d_1:
-; CHECK-FP16-GI: // %bb.0:
-; CHECK-FP16-GI-NEXT: adrp x8, .LCPI56_0
-; CHECK-FP16-GI-NEXT: ldr d0, [x8, :lo12:.LCPI56_0]
-; CHECK-FP16-GI-NEXT: ret
+; CHECK-LABEL: movi1d_1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movi d0, #0x00ffffffff0000
+; CHECK-NEXT: ret
ret <2 x i32> <i32 -65536, i32 65535>
}
@@ -616,18 +581,16 @@ define <2 x i32> @movi1d() {
;
; CHECK-NOFP16-GI-LABEL: movi1d:
; CHECK-NOFP16-GI: // %bb.0:
-; CHECK-NOFP16-GI-NEXT: adrp x8, .LCPI57_1
-; CHECK-NOFP16-GI-NEXT: adrp x9, .LCPI57_0
-; CHECK-NOFP16-GI-NEXT: ldr d0, [x8, :lo12:.LCPI57_1]
-; CHECK-NOFP16-GI-NEXT: ldr d1, [x9, :lo12:.LCPI57_0]
+; CHECK-NOFP16-GI-NEXT: movi d1, #0x00ffffffff0000
+; CHECK-NOFP16-GI-NEXT: adrp x8, .LCPI57_0
+; CHECK-NOFP16-GI-NEXT: ldr d0, [x8, :lo12:.LCPI57_0]
; CHECK-NOFP16-GI-NEXT: b test_movi1d
;
; CHECK-FP16-GI-LABEL: movi1d:
; CHECK-FP16-GI: // %bb.0:
-; CHECK-FP16-GI-NEXT: adrp x8, .LCPI57_1
-; CHECK-FP16-GI-NEXT: adrp x9, .LCPI57_0
-; CHECK-FP16-GI-NEXT: ldr d0, [x8, :lo12:.LCPI57_1]
-; CHECK-FP16-GI-NEXT: ldr d1, [x9, :lo12:.LCPI57_0]
+; CHECK-FP16-GI-NEXT: movi d1, #0x00ffffffff0000
+; CHECK-FP16-GI-NEXT: adrp x8, .LCPI57_0
+; CHECK-FP16-GI-NEXT: ldr d0, [x8, :lo12:.LCPI57_0]
; CHECK-FP16-GI-NEXT: b test_movi1d
%1 = tail call <2 x i32> @test_movi1d(<2 x i32> <i32 -2147483648, i32 2147450880>, <2 x i32> <i32 -65536, i32 65535>)
ret <2 x i32> %1
diff --git a/llvm/test/CodeGen/AArch64/qmovn.ll b/llvm/test/CodeGen/AArch64/qmovn.ll
index dc1b7df9b32ae..9d58e94d791a7 100644
--- a/llvm/test/CodeGen/AArch64/qmovn.ll
+++ b/llvm/test/CodeGen/AArch64/qmovn.ll
@@ -638,7 +638,8 @@ define <4 x i16> @sminsmax_range_unsigned_i64_to_i16(<2 x i16> %x, <2 x i64> %y)
; CHECK-SD-NEXT: movi v3.2d, #0x0000000000ffff
; CHECK-SD-NEXT: and v1.16b, v1.16b, v2.16b
; CHECK-SD-NEXT: cmgt v2.2d, v3.2d, v1.2d
-; CHECK-SD-NEXT: bif v1.16b, v3.16b, v2.16b
+; CHECK-SD-NEXT: and v1.16b, v1.16b, v2.16b
+; CHECK-SD-NEXT: orn v1.16b, v1.16b, v2.16b
; CHECK-SD-NEXT: xtn v1.2s, v1.2d
; CHECK-SD-NEXT: uzp1 v0.4h, v0.4h, v1.4h
; CHECK-SD-NEXT: ret
@@ -709,8 +710,9 @@ define <4 x i16> @umin_range_unsigned_i64_to_i16(<2 x i16> %x, <2 x i64> %y) {
; CHECK-SD-LABEL: umin_range_unsigned_i64_to_i16:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: movi v2.2d, #0x0000000000ffff
-; CHECK-SD-NEXT: cmhi v3.2d, v2.2d, v1.2d
-; CHECK-SD-NEXT: bif v1.16b, v2.16b, v3.16b
+; CHECK-SD-NEXT: cmhi v2.2d, v2.2d, v1.2d
+; CHECK-SD-NEXT: and v1.16b, v1.16b, v2.16b
+; CHECK-SD-NEXT: orn v1.16b, v1.16b, v2.16b
; CHECK-SD-NEXT: xtn v1.2s, v1.2d
; CHECK-SD-NEXT: uzp1 v0.4h, v0.4h, v1.4h
; CHECK-SD-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-insert-vector-elt.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-insert-vector-elt.ll
index 6f4d257039bca..8f2e068fd020a 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-insert-vector-elt.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-insert-vector-elt.ll
@@ -255,8 +255,7 @@ define void @insertelement_v64f32(ptr %a, ptr %b) vscale_range(16,0) #0 {
define <1 x double> @insertelement_v1f64(<1 x double> %op1) vscale_range(2,0) #0 {
; CHECK-LABEL: insertelement_v1f64:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov x8, #4617315517961601024 // =0x4014000000000000
-; CHECK-NEXT: fmov d0, x8
+; CHECK-NEXT: fmov d0, #5.00000000
; CHECK-NEXT: ret
%r = insertelement <1 x double> %op1, double 5.0, i64 0
ret <1 x double> %r
diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fadd.ll b/llvm/test/CodeGen/AArch64/vecreduce-fadd.ll
index f561502445bbe..b8a3aa46bdb53 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-fadd.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-fadd.ll
@@ -81,9 +81,8 @@ define half @add_v3HalfH(<3 x half> %bin.rdx) {
;
; CHECK-GI-FP16-LABEL: add_v3HalfH:
; CHECK-GI-FP16: // %bb.0:
-; CHECK-GI-FP16-NEXT: adrp x8, .LCPI2_0
+; CHECK-GI-FP16-NEXT: movi v1.4h, #128, lsl #8
; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI2_0]
; CHECK-GI-FP16-NEXT: mov v0.h[3], v1.h[0]
; CHECK-GI-FP16-NEXT: faddp v0.4h, v0.4h, v0.4h
; CHECK-GI-FP16-NEXT: faddp h0, v0.2h
diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization-nan.ll b/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization-nan.ll
index 2368b0288ccb7..1e04a0341ad17 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization-nan.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization-nan.ll
@@ -553,16 +553,14 @@ define half @test_v11f16_ninf(<11 x half> %a) nounwind {
define float @test_v3f32(<3 x float> %a) nounwind {
; CHECK-NOFP-SD-LABEL: test_v3f32:
; CHECK-NOFP-SD: // %bb.0:
-; CHECK-NOFP-SD-NEXT: mov w8, #-4194304 // =0xffc00000
-; CHECK-NOFP-SD-NEXT: fmov s1, w8
+; CHECK-NOFP-SD-NEXT: mvni v1.2s, #63, msl #16
; CHECK-NOFP-SD-NEXT: mov v0.s[3], v1.s[0]
; CHECK-NOFP-SD-NEXT: fmaxnmv s0, v0.4s
; CHECK-NOFP-SD-NEXT: ret
;
; CHECK-FP-SD-LABEL: test_v3f32:
; CHECK-FP-SD: // %bb.0:
-; CHECK-FP-SD-NEXT: mov w8, #-4194304 // =0xffc00000
-; CHECK-FP-SD-NEXT: fmov s1, w8
+; CHECK-FP-SD-NEXT: mvni v1.2s, #63, msl #16
; CHECK-FP-SD-NEXT: mov v0.s[3], v1.s[0]
; CHECK-FP-SD-NEXT: fmaxnmv s0, v0.4s
; CHECK-FP-SD-NEXT: ret
@@ -589,16 +587,14 @@ define float @test_v3f32(<3 x float> %a) nounwind {
define float @test_v3f32_ninf(<3 x float> %a) nounwind {
; CHECK-NOFP-SD-LABEL: test_v3f32_ninf:
; CHECK-NOFP-SD: // %bb.0:
-; CHECK-NOFP-SD-NEXT: mov w8, #-4194304 // =0xffc00000
-; CHECK-NOFP-SD-NEXT: fmov s1, w8
+; CHECK-NOFP-SD-NEXT: mvni v1.2s, #63, msl #16
; CHECK-NOFP-SD-NEXT: mov v0.s[3], v1.s[0]
; CHECK-NOFP-SD-NEXT: fmaxnmv s0, v0.4s
; CHECK-NOFP-SD-NEXT: ret
;
; CHECK-FP-SD-LABEL: test_v3f32_ninf:
; CHECK-FP-SD: // %bb.0:
-; CHECK-FP-SD-NEXT: mov w8, #-4194304 // =0xffc00000
-; CHECK-FP-SD-NEXT: fmov s1, w8
+; CHECK-FP-SD-NEXT: mvni v1.2s, #63, msl #16
; CHECK-FP-SD-NEXT: mov v0.s[3], v1.s[0]
; CHECK-FP-SD-NEXT: fmaxnmv s0, v0.4s
; CHECK-FP-SD-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll b/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll
index d81952087086e..4d665b2a599a7 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll
@@ -553,16 +553,14 @@ define half @test_v11f16_ninf(<11 x half> %a) nounwind {
define float @test_v3f32(<3 x float> %a) nounwind {
; CHECK-NOFP-SD-LABEL: test_v3f32:
; CHECK-NOFP-SD: // %bb.0:
-; CHECK-NOFP-SD-NEXT: mov w8, #-8388608 // =0xff800000
-; CHECK-NOFP-SD-NEXT: fmov s1, w8
+; CHECK-NOFP-SD-NEXT: mvni v1.2s, #127, msl #16
; CHECK-NOFP-SD-NEXT: mov v0.s[3], v1.s[0]
; CHECK-NOFP-SD-NEXT: fmaxnmv s0, v0.4s
; CHECK-NOFP-SD-NEXT: ret
;
; CHECK-FP-SD-LABEL: test_v3f32:
; CHECK-FP-SD: // %bb.0:
-; CHECK-FP-SD-NEXT: mov w8, #-8388608 // =0xff800000
-; CHECK-FP-SD-NEXT: fmov s1, w8
+; CHECK-FP-SD-NEXT: mvni v1.2s, #127, msl #16
; CHECK-FP-SD-NEXT: mov v0.s[3], v1.s[0]
; CHECK-FP-SD-NEXT: fmaxnmv s0, v0.4s
; CHECK-FP-SD-NEXT: ret
@@ -589,16 +587,14 @@ define float @test_v3f32(<3 x float> %a) nounwind {
define float @test_v3f32_ninf(<3 x float> %a) nounwind {
; CHECK-NOFP-SD-LABEL: test_v3f32_ninf:
; CHECK-NOFP-SD: // %bb.0:
-; CHECK-NOFP-SD-NEXT: mov w8, #-8388609 // =0xff7fffff
-; CHECK-NOFP-SD-NEXT: fmov s1, w8
+; CHECK-NOFP-SD-NEXT: mvni v1.2s, #128, lsl #16
; CHECK-NOFP-SD-NEXT: mov v0.s[3], v1.s[0]
; CHECK-NOFP-SD-NEXT: fmaxnmv s0, v0.4s
; CHECK-NOFP-SD-NEXT: ret
;
; CHECK-FP-SD-LABEL: test_v3f32_ninf:
; CHECK-FP-SD: // %bb.0:
-; CHECK-FP-SD-NEXT: mov w8, #-8388609 // =0xff7fffff
-; CHECK-FP-SD-NEXT: fmov s1, w8
+; CHECK-FP-SD-NEXT: mvni v1.2s, #128, lsl #16
; CHECK-FP-SD-NEXT: mov v0.s[3], v1.s[0]
; CHECK-FP-SD-NEXT: fmaxnmv s0, v0.4s
; CHECK-FP-SD-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fmaximum.ll b/llvm/test/CodeGen/AArch64/vecreduce-fmaximum.ll
index 1d5b70796bdb1..fcb06fb6c9725 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-fmaximum.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-fmaximum.ll
@@ -438,16 +438,14 @@ define half @test_v11f16(<11 x half> %a) nounwind {
define float @test_v3f32(<3 x float> %a) nounwind {
; CHECK-NOFP-SD-LABEL: test_v3f32:
; CHECK-NOFP-SD: // %bb.0:
-; CHECK-NOFP-SD-NEXT: mov w8, #-8388608 // =0xff800000
-; CHECK-NOFP-SD-NEXT: fmov s1, w8
+; CHECK-NOFP-SD-NEXT: mvni v1.2s, #127, msl #16
; CHECK-NOFP-SD-NEXT: mov v0.s[3], v1.s[0]
; CHECK-NOFP-SD-NEXT: fmaxv s0, v0.4s
; CHECK-NOFP-SD-NEXT: ret
;
; CHECK-FP-SD-LABEL: test_v3f32:
; CHECK-FP-SD: // %bb.0:
-; CHECK-FP-SD-NEXT: mov w8, #-8388608 // =0xff800000
-; CHECK-FP-SD-NEXT: fmov s1, w8
+; CHECK-FP-SD-NEXT: mvni v1.2s, #127, msl #16
; CHECK-FP-SD-NEXT: mov v0.s[3], v1.s[0]
; CHECK-FP-SD-NEXT: fmaxv s0, v0.4s
; CHECK-FP-SD-NEXT: ret
@@ -475,16 +473,14 @@ define float @test_v3f32(<3 x float> %a) nounwind {
define float @test_v3f32_ninf(<3 x float> %a) nounwind {
; CHECK-NOFP-SD-LABEL: test_v3f32_ninf:
; CHECK-NOFP-SD: // %bb.0:
-; CHECK-NOFP-SD-NEXT: mov w8, #-8388609 // =0xff7fffff
-; CHECK-NOFP-SD-NEXT: fmov s1, w8
+; CHECK-NOFP-SD-NEXT: mvni v1.2s, #128, lsl #16
; CHECK-NOFP-SD-NEXT: mov v0.s[3], v1.s[0]
; CHECK-NOFP-SD-NEXT: fmaxv s0, v0.4s
; CHECK-NOFP-SD-NEXT: ret
;
; CHECK-FP-SD-LABEL: test_v3f32_ninf:
; CHECK-FP-SD: // %bb.0:
-; CHECK-FP-SD-NEXT: mov w8, #-8388609 // =0xff7fffff
-; CHECK-FP-SD-NEXT: fmov s1, w8
+; CHECK-FP-SD-NEXT: mvni v1.2s, #128, lsl #16
; CHECK-FP-SD-NEXT: mov v0.s[3], v1.s[0]
; CHECK-FP-SD-NEXT: fmaxv s0, v0.4s
; CHECK-FP-SD-NEXT: ret
diff --git a/llvm/unittests/Target/AArch64/AArch64SelectionDAGTest.cpp b/llvm/unittests/Target/AArch64/AArch64SelectionDAGTest.cpp
index 12b7763274f6c..35f34f8604437 100644
--- a/llvm/unittests/Target/AArch64/AArch64SelectionDAGTest.cpp
+++ b/llvm/unittests/Target/AArch64/AArch64SelectionDAGTest.cpp
@@ -549,158 +549,41 @@ TEST_F(AArch64SelectionDAGTest, ComputeKnownBits_UADDO_CARRY) {
}
// Piggy-backing on the AArch64 tests to verify SelectionDAG::computeKnownBits.
-// Attempt to FREEZE the MOV/MVN nodes to show that they can still be analysed.
+// Attempt to FREEZE the MOV nodes to show that they can still be analysed.
TEST_F(AArch64SelectionDAGTest, ComputeKnownBits_MOVI) {
SDLoc Loc;
- auto IntSca32VT = MVT::i32;
- auto Int8Vec8VT = MVT::v8i8;
+ auto IntSca64VT = MVT::i64;
auto Int16Vec8VT = MVT::v16i8;
- auto Int4Vec16VT = MVT::v4i16;
- auto Int8Vec16VT = MVT::v8i16;
- auto Int2Vec32VT = MVT::v2i32;
- auto Int4Vec32VT = MVT::v4i32;
auto IntVec64VT = MVT::v1i64;
auto Int2Vec64VT = MVT::v2i64;
- auto N165 = DAG->getConstant(0x000000A5, Loc, IntSca32VT);
KnownBits Known;
- auto OpMOVIedit64 = DAG->getNode(AArch64ISD::MOVIedit, Loc, IntVec64VT, N165);
- Known = DAG->computeKnownBits(OpMOVIedit64);
+ auto N165 = DAG->getConstant(0xFF00FF0000FF00FF, Loc, IntSca64VT);
+ auto OpMOVI64 = DAG->getNode(AArch64ISD::MOVI, Loc, IntVec64VT, N165);
+ Known = DAG->computeKnownBits(OpMOVI64);
EXPECT_EQ(Known.Zero, APInt(64, 0x00FF00FFFF00FF00));
EXPECT_EQ(Known.One, APInt(64, 0xFF00FF0000FF00FF));
- auto OpMOVIedit128 =
- DAG->getNode(AArch64ISD::MOVIedit, Loc, Int2Vec64VT, N165);
- Known = DAG->computeKnownBits(OpMOVIedit128);
+ auto OpMOVI128 = DAG->getNode(AArch64ISD::MOVI, Loc, Int2Vec64VT, N165);
+ Known = DAG->computeKnownBits(OpMOVI128);
EXPECT_EQ(Known.Zero, APInt(64, 0x00FF00FFFF00FF00));
EXPECT_EQ(Known.One, APInt(64, 0xFF00FF0000FF00FF));
- auto FrMOVIedit128 = DAG->getFreeze(OpMOVIedit128);
- Known = DAG->computeKnownBits(FrMOVIedit128);
+ auto FrMOVI128 = DAG->getFreeze(OpMOVI128);
+ Known = DAG->computeKnownBits(FrMOVI128);
EXPECT_EQ(Known.Zero, APInt(64, 0x00FF00FFFF00FF00));
EXPECT_EQ(Known.One, APInt(64, 0xFF00FF0000FF00FF));
- auto N264 = DAG->getConstant(264, Loc, IntSca32VT);
- auto OpMOVImsl64 =
- DAG->getNode(AArch64ISD::MOVImsl, Loc, Int2Vec32VT, N165, N264);
- Known = DAG->computeKnownBits(OpMOVImsl64);
- EXPECT_EQ(Known.Zero, APInt(32, 0xFFFF5A00));
- EXPECT_EQ(Known.One, APInt(32, 0x0000A5FF));
-
- auto N272 = DAG->getConstant(272, Loc, IntSca32VT);
- auto OpMOVImsl128 =
- DAG->getNode(AArch64ISD::MOVImsl, Loc, Int4Vec32VT, N165, N272);
- Known = DAG->computeKnownBits(OpMOVImsl128);
- EXPECT_EQ(Known.Zero, APInt(32, 0xFF5A0000));
- EXPECT_EQ(Known.One, APInt(32, 0x00A5FFFF));
-
- auto FrMOVImsl128 = DAG->getFreeze(OpMOVImsl128);
- Known = DAG->computeKnownBits(FrMOVImsl128);
- EXPECT_EQ(Known.Zero, APInt(32, 0xFF5A0000));
- EXPECT_EQ(Known.One, APInt(32, 0x00A5FFFF));
-
- auto OpMVNImsl64 =
- DAG->getNode(AArch64ISD::MVNImsl, Loc, Int2Vec32VT, N165, N272);
- Known = DAG->computeKnownBits(OpMVNImsl64);
- EXPECT_EQ(Known.Zero, APInt(32, 0x00A5FFFF));
- EXPECT_EQ(Known.One, APInt(32, 0xFF5A0000));
-
- auto OpMVNImsl128 =
- DAG->getNode(AArch64ISD::MVNImsl, Loc, Int4Vec32VT, N165, N264);
- Known = DAG->computeKnownBits(OpMVNImsl128);
- EXPECT_EQ(Known.Zero, APInt(32, 0x0000A5FF));
- EXPECT_EQ(Known.One, APInt(32, 0xFFFF5A00));
-
- auto FrMVNImsl128 = DAG->getFreeze(OpMVNImsl128);
- Known = DAG->computeKnownBits(FrMVNImsl128);
- EXPECT_EQ(Known.Zero, APInt(32, 0x0000A5FF));
- EXPECT_EQ(Known.One, APInt(32, 0xFFFF5A00));
-
- auto N0 = DAG->getConstant(0, Loc, IntSca32VT);
- auto OpMOVIshift2Vec32 =
- DAG->getNode(AArch64ISD::MOVIshift, Loc, Int2Vec32VT, N165, N0);
- Known = DAG->computeKnownBits(OpMOVIshift2Vec32);
- EXPECT_EQ(Known.Zero, APInt(32, 0xFFFFFF5A));
- EXPECT_EQ(Known.One, APInt(32, 0x000000A5));
-
- auto N24 = DAG->getConstant(24, Loc, IntSca32VT);
- auto OpMOVIshift4Vec32 =
- DAG->getNode(AArch64ISD::MOVIshift, Loc, Int4Vec32VT, N165, N24);
- Known = DAG->computeKnownBits(OpMOVIshift4Vec32);
- EXPECT_EQ(Known.Zero, APInt(32, 0x5AFFFFFF));
- EXPECT_EQ(Known.One, APInt(32, 0xA5000000));
-
- auto FrMOVIshift4Vec32 = DAG->getFreeze(OpMOVIshift4Vec32);
- Known = DAG->computeKnownBits(FrMOVIshift4Vec32);
- EXPECT_EQ(Known.Zero, APInt(32, 0x5AFFFFFF));
- EXPECT_EQ(Known.One, APInt(32, 0xA5000000));
-
- auto OpMVNIshift2Vec32 =
- DAG->getNode(AArch64ISD::MVNIshift, Loc, Int2Vec32VT, N165, N24);
- Known = DAG->computeKnownBits(OpMVNIshift2Vec32);
- EXPECT_EQ(Known.Zero, APInt(32, 0xA5000000));
- EXPECT_EQ(Known.One, APInt(32, 0x5AFFFFFF));
-
- auto OpMVNIshift4Vec32 =
- DAG->getNode(AArch64ISD::MVNIshift, Loc, Int4Vec32VT, N165, N0);
- Known = DAG->computeKnownBits(OpMVNIshift4Vec32);
- EXPECT_EQ(Known.Zero, APInt(32, 0x000000A5));
- EXPECT_EQ(Known.One, APInt(32, 0xFFFFFF5A));
-
- auto FrMVNIshift4Vec32 = DAG->getFreeze(OpMVNIshift4Vec32);
- Known = DAG->computeKnownBits(FrMVNIshift4Vec32);
- EXPECT_EQ(Known.Zero, APInt(32, 0x000000A5));
- EXPECT_EQ(Known.One, APInt(32, 0xFFFFFF5A));
-
- auto N8 = DAG->getConstant(8, Loc, IntSca32VT);
- auto OpMOVIshift4Vec16 =
- DAG->getNode(AArch64ISD::MOVIshift, Loc, Int4Vec16VT, N165, N0);
- Known = DAG->computeKnownBits(OpMOVIshift4Vec16);
- EXPECT_EQ(Known.Zero, APInt(16, 0xFF5A));
- EXPECT_EQ(Known.One, APInt(16, 0x00A5));
-
- auto OpMOVIshift8Vec16 =
- DAG->getNode(AArch64ISD::MOVIshift, Loc, Int8Vec16VT, N165, N8);
- Known = DAG->computeKnownBits(OpMOVIshift8Vec16);
- EXPECT_EQ(Known.Zero, APInt(16, 0x5AFF));
- EXPECT_EQ(Known.One, APInt(16, 0xA500));
-
- auto FrMOVIshift8Vec16 = DAG->getFreeze(OpMOVIshift8Vec16);
- Known = DAG->computeKnownBits(FrMOVIshift8Vec16);
- EXPECT_EQ(Known.Zero, APInt(16, 0x5AFF));
- EXPECT_EQ(Known.One, APInt(16, 0xA500));
-
- auto OpMVNIshift4Vec16 =
- DAG->getNode(AArch64ISD::MVNIshift, Loc, Int4Vec16VT, N165, N8);
- Known = DAG->computeKnownBits(OpMVNIshift4Vec16);
- EXPECT_EQ(Known.Zero, APInt(16, 0xA500));
- EXPECT_EQ(Known.One, APInt(16, 0x5AFF));
-
- auto OpMVNIshift8Vec16 =
- DAG->getNode(AArch64ISD::MVNIshift, Loc, Int8Vec16VT, N165, N0);
- Known = DAG->computeKnownBits(OpMVNIshift8Vec16);
- EXPECT_EQ(Known.Zero, APInt(16, 0x00A5));
- EXPECT_EQ(Known.One, APInt(16, 0xFF5A));
-
- auto FrMVNIshift8Vec16 = DAG->getFreeze(OpMVNIshift8Vec16);
- Known = DAG->computeKnownBits(FrMVNIshift8Vec16);
- EXPECT_EQ(Known.Zero, APInt(16, 0x00A5));
- EXPECT_EQ(Known.One, APInt(16, 0xFF5A));
-
- auto OpMOVI8Vec8 = DAG->getNode(AArch64ISD::MOVI, Loc, Int8Vec8VT, N165);
- Known = DAG->computeKnownBits(OpMOVI8Vec8);
- EXPECT_EQ(Known.Zero, APInt(8, 0x5A));
- EXPECT_EQ(Known.One, APInt(8, 0xA5));
-
- auto OpMOVI16Vec8 = DAG->getNode(AArch64ISD::MOVI, Loc, Int16Vec8VT, N165);
- Known = DAG->computeKnownBits(OpMOVI16Vec8);
- EXPECT_EQ(Known.Zero, APInt(8, 0x5A));
- EXPECT_EQ(Known.One, APInt(8, 0xA5));
-
- auto FrMOVI16Vec8 = DAG->getFreeze(OpMOVI16Vec8);
- Known = DAG->computeKnownBits(FrMOVI16Vec8);
- EXPECT_EQ(Known.Zero, APInt(8, 0x5A));
- EXPECT_EQ(Known.One, APInt(8, 0xA5));
+ auto OpMOVI8 = DAG->getNode(AArch64ISD::MOVI, Loc, Int16Vec8VT, N165);
+ Known = DAG->computeKnownBits(OpMOVI8);
+ EXPECT_EQ(Known.Zero, APInt(8, 0x00));
+ EXPECT_EQ(Known.One, APInt(8, 0x00));
+
+ auto OneLow = DAG->getConstant(0x1133557799bbddff, Loc, IntSca64VT);
+ auto OpMOVI8OneLow = DAG->getNode(AArch64ISD::MOVI, Loc, Int16Vec8VT, OneLow);
+ Known = DAG->computeKnownBits(OpMOVI8OneLow);
+ EXPECT_EQ(Known.Zero, APInt(8, 0x00));
+ EXPECT_EQ(Known.One, APInt(8, 0x11));
}
// Piggy-backing on the AArch64 tests to verify SelectionDAG::computeKnownBits.
>From 29785d53a7f63b75868f46329a33d2b48295e73f Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Fri, 20 Mar 2026 17:04:34 +0000
Subject: [PATCH 2/3] Rebase and address comments / windows build failure.
---
llvm/lib/Target/AArch64/AArch64ExpandImm.cpp | 80 +++++++++----------
.../Target/AArch64/AArch64ISelDAGToDAG.cpp | 49 ++----------
.../Target/AArch64/AArch64ISelLowering.cpp | 7 +-
3 files changed, 48 insertions(+), 88 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ExpandImm.cpp b/llvm/lib/Target/AArch64/AArch64ExpandImm.cpp
index 9f89efebea9e1..818b207391e77 100644
--- a/llvm/lib/Target/AArch64/AArch64ExpandImm.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ExpandImm.cpp
@@ -736,106 +736,106 @@ bool AArch64_IMM::expandVectorMOVImm(
return true;
}
if (AArch64_AM::isAdvSIMDModImmType10(Value)) {
- Insn.push_back({Is64Bit ? AArch64::MOVID : AArch64::MOVIv2d_ns,
- AArch64_AM::encodeAdvSIMDModImmType10(Value), 0});
+ unsigned Opc = Is64Bit ? AArch64::MOVID : AArch64::MOVIv2d_ns;
+ Insn.push_back({Opc, AArch64_AM::encodeAdvSIMDModImmType10(Value), 0});
return true;
}
if (AArch64_AM::isAdvSIMDModImmType1(Value)) {
- Insn.push_back({Is64Bit ? AArch64::MOVIv2i32 : AArch64::MOVIv4i32,
- AArch64_AM::encodeAdvSIMDModImmType1(Value), 0});
+ unsigned Opc = Is64Bit ? AArch64::MOVIv2i32 : AArch64::MOVIv4i32;
+ Insn.push_back({Opc, AArch64_AM::encodeAdvSIMDModImmType1(Value), 0});
return true;
}
if (AArch64_AM::isAdvSIMDModImmType2(Value)) {
- Insn.push_back({Is64Bit ? AArch64::MOVIv2i32 : AArch64::MOVIv4i32,
- AArch64_AM::encodeAdvSIMDModImmType2(Value), 8});
+ unsigned Opc = Is64Bit ? AArch64::MOVIv2i32 : AArch64::MOVIv4i32;
+ Insn.push_back({Opc, AArch64_AM::encodeAdvSIMDModImmType2(Value), 8});
return true;
}
if (AArch64_AM::isAdvSIMDModImmType3(Value)) {
- Insn.push_back({Is64Bit ? AArch64::MOVIv2i32 : AArch64::MOVIv4i32,
- AArch64_AM::encodeAdvSIMDModImmType3(Value), 16});
+ unsigned Opc = Is64Bit ? AArch64::MOVIv2i32 : AArch64::MOVIv4i32;
+ Insn.push_back({Opc, AArch64_AM::encodeAdvSIMDModImmType3(Value), 16});
return true;
}
if (AArch64_AM::isAdvSIMDModImmType4(Value)) {
- Insn.push_back({Is64Bit ? AArch64::MOVIv2i32 : AArch64::MOVIv4i32,
- AArch64_AM::encodeAdvSIMDModImmType4(Value), 24});
+ unsigned Opc = Is64Bit ? AArch64::MOVIv2i32 : AArch64::MOVIv4i32;
+ Insn.push_back({Opc, AArch64_AM::encodeAdvSIMDModImmType4(Value), 24});
return true;
}
if (AArch64_AM::isAdvSIMDModImmType5(Value)) {
- Insn.push_back({Is64Bit ? AArch64::MOVIv4i16 : AArch64::MOVIv8i16,
- AArch64_AM::encodeAdvSIMDModImmType5(Value), 0});
+ unsigned Opc = Is64Bit ? AArch64::MOVIv4i16 : AArch64::MOVIv8i16;
+ Insn.push_back({Opc, AArch64_AM::encodeAdvSIMDModImmType5(Value), 0});
return true;
}
if (AArch64_AM::isAdvSIMDModImmType6(Value)) {
- Insn.push_back({Is64Bit ? AArch64::MOVIv4i16 : AArch64::MOVIv8i16,
- AArch64_AM::encodeAdvSIMDModImmType6(Value), 8});
+ unsigned Opc = Is64Bit ? AArch64::MOVIv4i16 : AArch64::MOVIv8i16;
+ Insn.push_back({Opc, AArch64_AM::encodeAdvSIMDModImmType6(Value), 8});
return true;
}
if (AArch64_AM::isAdvSIMDModImmType7(Value)) {
- Insn.push_back({Is64Bit ? AArch64::MOVIv2s_msl : AArch64::MOVIv4s_msl,
- AArch64_AM::encodeAdvSIMDModImmType7(Value), 264});
+ unsigned Opc = Is64Bit ? AArch64::MOVIv2s_msl : AArch64::MOVIv4s_msl;
+ Insn.push_back({Opc, AArch64_AM::encodeAdvSIMDModImmType7(Value), 264});
return true;
}
if (AArch64_AM::isAdvSIMDModImmType8(Value)) {
- Insn.push_back({Is64Bit ? AArch64::MOVIv2s_msl : AArch64::MOVIv4s_msl,
- AArch64_AM::encodeAdvSIMDModImmType8(Value), 272});
+ unsigned Opc = Is64Bit ? AArch64::MOVIv2s_msl : AArch64::MOVIv4s_msl;
+ Insn.push_back({Opc, AArch64_AM::encodeAdvSIMDModImmType8(Value), 272});
return true;
}
if (AArch64_AM::isAdvSIMDModImmType9(Value)) {
- Insn.push_back({Is64Bit ? AArch64::MOVIv8b_ns : AArch64::MOVIv16b_ns,
- AArch64_AM::encodeAdvSIMDModImmType9(Value), 0});
+ unsigned Opc = Is64Bit ? AArch64::MOVIv8b_ns : AArch64::MOVIv16b_ns;
+ Insn.push_back({Opc, AArch64_AM::encodeAdvSIMDModImmType9(Value), 0});
return true;
}
if (AArch64_AM::isAdvSIMDModImmType11(Value)) {
- Insn.push_back({Is64Bit ? AArch64::FMOVv2f32_ns : AArch64::FMOVv4f32_ns,
- AArch64_AM::encodeAdvSIMDModImmType11(Value), 0});
+ unsigned Opc = Is64Bit ? AArch64::FMOVv2f32_ns : AArch64::FMOVv4f32_ns;
+ Insn.push_back({Opc, AArch64_AM::encodeAdvSIMDModImmType11(Value), 0});
return true;
}
if (AArch64_AM::isAdvSIMDModImmType12(Value)) {
- Insn.push_back({Is64Bit ? AArch64::FMOVDi : AArch64::FMOVv2f64_ns,
- AArch64_AM::encodeAdvSIMDModImmType12(Value), 0});
+ unsigned Opc = Is64Bit ? AArch64::FMOVDi : AArch64::FMOVv2f64_ns;
+ Insn.push_back({Opc, AArch64_AM::encodeAdvSIMDModImmType12(Value), 0});
return true;
}
APInt NotImm = ~Imm;
Value = NotImm.trunc(64).getZExtValue();
if (AArch64_AM::isAdvSIMDModImmType1(Value)) {
- Insn.push_back({Is64Bit ? AArch64::MVNIv2i32 : AArch64::MVNIv4i32,
- AArch64_AM::encodeAdvSIMDModImmType1(Value), 0});
+ unsigned Opc = Is64Bit ? AArch64::MVNIv2i32 : AArch64::MVNIv4i32;
+ Insn.push_back({Opc, AArch64_AM::encodeAdvSIMDModImmType1(Value), 0});
return true;
}
if (AArch64_AM::isAdvSIMDModImmType2(Value)) {
- Insn.push_back({Is64Bit ? AArch64::MVNIv2i32 : AArch64::MVNIv4i32,
- AArch64_AM::encodeAdvSIMDModImmType2(Value), 8});
+ unsigned Opc = Is64Bit ? AArch64::MVNIv2i32 : AArch64::MVNIv4i32;
+ Insn.push_back({Opc, AArch64_AM::encodeAdvSIMDModImmType2(Value), 8});
return true;
}
if (AArch64_AM::isAdvSIMDModImmType3(Value)) {
- Insn.push_back({Is64Bit ? AArch64::MVNIv2i32 : AArch64::MVNIv4i32,
- AArch64_AM::encodeAdvSIMDModImmType3(Value), 16});
+ unsigned Opc = Is64Bit ? AArch64::MVNIv2i32 : AArch64::MVNIv4i32;
+ Insn.push_back({Opc, AArch64_AM::encodeAdvSIMDModImmType3(Value), 16});
return true;
}
if (AArch64_AM::isAdvSIMDModImmType4(Value)) {
- Insn.push_back({Is64Bit ? AArch64::MVNIv2i32 : AArch64::MVNIv4i32,
- AArch64_AM::encodeAdvSIMDModImmType4(Value), 24});
+ unsigned Opc = Is64Bit ? AArch64::MVNIv2i32 : AArch64::MVNIv4i32;
+ Insn.push_back({Opc, AArch64_AM::encodeAdvSIMDModImmType4(Value), 24});
return true;
}
if (AArch64_AM::isAdvSIMDModImmType5(Value)) {
- Insn.push_back({Is64Bit ? AArch64::MVNIv4i16 : AArch64::MVNIv8i16,
- AArch64_AM::encodeAdvSIMDModImmType5(Value), 0});
+ unsigned Opc = Is64Bit ? AArch64::MVNIv4i16 : AArch64::MVNIv8i16;
+ Insn.push_back({Opc, AArch64_AM::encodeAdvSIMDModImmType5(Value), 0});
return true;
}
if (AArch64_AM::isAdvSIMDModImmType6(Value)) {
- Insn.push_back({Is64Bit ? AArch64::MVNIv4i16 : AArch64::MVNIv8i16,
- AArch64_AM::encodeAdvSIMDModImmType6(Value), 8});
+ unsigned Opc = Is64Bit ? AArch64::MVNIv4i16 : AArch64::MVNIv8i16;
+ Insn.push_back({Opc, AArch64_AM::encodeAdvSIMDModImmType6(Value), 8});
return true;
}
if (AArch64_AM::isAdvSIMDModImmType7(Value)) {
- Insn.push_back({Is64Bit ? AArch64::MVNIv2s_msl : AArch64::MVNIv4s_msl,
- AArch64_AM::encodeAdvSIMDModImmType7(Value), 264});
+ unsigned Opc = Is64Bit ? AArch64::MVNIv2s_msl : AArch64::MVNIv4s_msl;
+ Insn.push_back({Opc, AArch64_AM::encodeAdvSIMDModImmType7(Value), 264});
return true;
}
if (AArch64_AM::isAdvSIMDModImmType8(Value)) {
- Insn.push_back({Is64Bit ? AArch64::MVNIv2s_msl : AArch64::MVNIv4s_msl,
- AArch64_AM::encodeAdvSIMDModImmType8(Value), 272});
+ unsigned Opc = Is64Bit ? AArch64::MVNIv2s_msl : AArch64::MVNIv4s_msl;
+ Insn.push_back({Opc, AArch64_AM::encodeAdvSIMDModImmType8(Value), 272});
return true;
}
}
diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index c4e007e802d5a..c283d69a3d938 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -607,51 +607,12 @@ static bool isIntImmediateEq(SDValue N, const uint64_t ImmExpected) {
}
#endif
-static APInt DecodeFMOVImm(uint64_t Imm, unsigned RegWidth) {
- assert(RegWidth == 32 || RegWidth == 64);
- if (RegWidth == 32)
- return APInt(RegWidth,
- uint32_t(AArch64_AM::decodeAdvSIMDModImmType11(Imm)));
- return APInt(RegWidth, AArch64_AM::decodeAdvSIMDModImmType12(Imm));
-}
-
-// Decodes the raw integer splat value from a NEON splat operation.
-static std::optional<APInt> DecodeNEONSplat(SDValue N) {
- assert(N.getValueType().isInteger() && "Only integers are supported");
- if (N->getOpcode() == AArch64ISD::NVCAST)
- N = N->getOperand(0);
- unsigned SplatWidth = N.getScalarValueSizeInBits();
- if (N.getOpcode() == AArch64ISD::FMOV)
- return DecodeFMOVImm(N.getConstantOperandVal(0), SplatWidth);
- if (N->getOpcode() == AArch64ISD::MOVI)
- return APInt(SplatWidth, N.getConstantOperandVal(0));
- if (N->getOpcode() == AArch64ISD::MOVIshift)
- return APInt(SplatWidth, N.getConstantOperandVal(0)
- << N.getConstantOperandVal(1));
- if (N->getOpcode() == AArch64ISD::MVNIshift)
- return ~APInt(SplatWidth, N.getConstantOperandVal(0)
- << N.getConstantOperandVal(1));
- if (N->getOpcode() == AArch64ISD::MOVIedit)
- return APInt(SplatWidth, AArch64_AM::decodeAdvSIMDModImmType10(
- N.getConstantOperandVal(0)));
- if (N->getOpcode() == AArch64ISD::DUP)
- if (auto *Const = dyn_cast<ConstantSDNode>(N->getOperand(0)))
- return Const->getAPIntValue().trunc(SplatWidth);
- // TODO: Recognize more splat-like NEON operations. See ConstantBuildVector
- // in AArch64ISelLowering.
- return std::nullopt;
-}
-
-// If \p N is a NEON splat operation (movi, fmov, etc), return the splat value
-// matching the element size of N.
+// Decodes the integer splat value from a NEON splat operation.
static std::optional<APInt> GetNEONSplatValue(SDValue N) {
- unsigned SplatWidth = N.getScalarValueSizeInBits();
- if (std::optional<APInt> SplatVal = DecodeNEONSplat(N)) {
- if (SplatVal->getBitWidth() <= SplatWidth)
- return APInt::getSplat(SplatWidth, *SplatVal);
- if (SplatVal->isSplat(SplatWidth))
- return SplatVal->trunc(SplatWidth);
- }
+ unsigned RegWidth = N.getScalarValueSizeInBits();
+ if (N.getOpcode() == AArch64ISD::MOVI &&
+ N.getConstantOperandAPInt(0).isSplat(RegWidth))
+ return N.getConstantOperandAPInt(0).trunc(RegWidth);
return std::nullopt;
}
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 8a1a9da8cbaa8..fe6bbf1e0e0bd 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -13269,7 +13269,7 @@ bool AArch64TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
const APInt ImmInt = Imm.bitcastToAPInt();
if (!IsLegal && ImmInt.getBitWidth() <= 128) {
- // Try duplicating it to all lanes and see if we can usea vector movi.
+ // Try duplicating it to all lanes and see if we can use a vector movi.
APInt DefBits =
ImmInt.getBitWidth() == 128 ? ImmInt : APInt::getSplat(64, ImmInt);
SmallVector<AArch64_IMM::ImmInsnModel> Insn;
@@ -33086,9 +33086,8 @@ bool AArch64TargetLowering::canCreateUndefOrPoisonForTargetNode(
bool AArch64TargetLowering::isTargetCanonicalConstantNode(SDValue Op) const {
return Op.getOpcode() == AArch64ISD::DUP ||
Op.getOpcode() == AArch64ISD::MOVI ||
- // Ignoring fneg(movi(0)), because if it is folded to FPConstant(-0.0),
- // ISel will select fmov(mov i64 0x8000000000000000), resulting in a
- // fmov from fpr to gpr, which is more expensive than fneg(movi(0))
+ // ConstantBuildVector / TryWithFNeg may represent a negated constant
+ // as fneg(movi).
(Op.getOpcode() == ISD::FNEG &&
Op.getOperand(0).getOpcode() == AArch64ISD::MOVI) ||
(Op.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
>From 4201ee04cf253e4affd94228ad90204e6b74e00e Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Wed, 1 Apr 2026 14:19:17 +0100
Subject: [PATCH 3/3] Rebase and use optional for operands to simplify instrn
generation
---
llvm/lib/Target/AArch64/AArch64ExpandImm.cpp | 16 +++++---
llvm/lib/Target/AArch64/AArch64ExpandImm.h | 4 +-
.../AArch64/AArch64ExpandPseudoInsts.cpp | 35 +++++++++---------
.../Target/AArch64/AArch64ISelDAGToDAG.cpp | 23 ++++++------
.../GISel/AArch64InstructionSelector.cpp | 37 +++++++++----------
5 files changed, 58 insertions(+), 57 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ExpandImm.cpp b/llvm/lib/Target/AArch64/AArch64ExpandImm.cpp
index 818b207391e77..da5ab84be76c7 100644
--- a/llvm/lib/Target/AArch64/AArch64ExpandImm.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ExpandImm.cpp
@@ -732,12 +732,13 @@ bool AArch64_IMM::expandVectorMOVImm(
if (ST->isNeonAvailable() && Imm.getHiBits(64) == Imm.getLoBits(64)) {
uint64_t Value = Imm.trunc(64).getZExtValue();
if (Value == 0) {
- Insn.push_back({AArch64::FMOVD0, 0, 0});
+ Insn.push_back({AArch64::FMOVD0, std::nullopt, std::nullopt});
return true;
}
if (AArch64_AM::isAdvSIMDModImmType10(Value)) {
unsigned Opc = Is64Bit ? AArch64::MOVID : AArch64::MOVIv2d_ns;
- Insn.push_back({Opc, AArch64_AM::encodeAdvSIMDModImmType10(Value), 0});
+ Insn.push_back(
+ {Opc, AArch64_AM::encodeAdvSIMDModImmType10(Value), std::nullopt});
return true;
}
if (AArch64_AM::isAdvSIMDModImmType1(Value)) {
@@ -782,17 +783,20 @@ bool AArch64_IMM::expandVectorMOVImm(
}
if (AArch64_AM::isAdvSIMDModImmType9(Value)) {
unsigned Opc = Is64Bit ? AArch64::MOVIv8b_ns : AArch64::MOVIv16b_ns;
- Insn.push_back({Opc, AArch64_AM::encodeAdvSIMDModImmType9(Value), 0});
+ Insn.push_back(
+ {Opc, AArch64_AM::encodeAdvSIMDModImmType9(Value), std::nullopt});
return true;
}
if (AArch64_AM::isAdvSIMDModImmType11(Value)) {
unsigned Opc = Is64Bit ? AArch64::FMOVv2f32_ns : AArch64::FMOVv4f32_ns;
- Insn.push_back({Opc, AArch64_AM::encodeAdvSIMDModImmType11(Value), 0});
+ Insn.push_back(
+ {Opc, AArch64_AM::encodeAdvSIMDModImmType11(Value), std::nullopt});
return true;
}
if (AArch64_AM::isAdvSIMDModImmType12(Value)) {
unsigned Opc = Is64Bit ? AArch64::FMOVDi : AArch64::FMOVv2f64_ns;
- Insn.push_back({Opc, AArch64_AM::encodeAdvSIMDModImmType12(Value), 0});
+ Insn.push_back(
+ {Opc, AArch64_AM::encodeAdvSIMDModImmType12(Value), std::nullopt});
return true;
}
@@ -852,7 +856,7 @@ bool AArch64_IMM::expandVectorMOVImm(
}
uint64_t Encoding;
if (AArch64_AM::isSVELogicalImm(64, Val64.getZExtValue(), Encoding)) {
- Insn.push_back({AArch64::DUPM_ZI, Encoding, 0});
+ Insn.push_back({AArch64::DUPM_ZI, Encoding, std::nullopt});
return true;
}
}
diff --git a/llvm/lib/Target/AArch64/AArch64ExpandImm.h b/llvm/lib/Target/AArch64/AArch64ExpandImm.h
index d50cf3723cfc6..73390454e4184 100644
--- a/llvm/lib/Target/AArch64/AArch64ExpandImm.h
+++ b/llvm/lib/Target/AArch64/AArch64ExpandImm.h
@@ -24,8 +24,8 @@ namespace AArch64_IMM {
struct ImmInsnModel {
unsigned Opcode;
- uint64_t Op1;
- uint64_t Op2;
+ std::optional<uint32_t> Op1;
+ std::optional<uint32_t> Op2;
};
void expandMOVImm(uint64_t Imm, unsigned BitSize,
diff --git a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
index 4ab8adeb2c9bc..d528742da8c11 100644
--- a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
@@ -177,7 +177,7 @@ bool AArch64ExpandPseudoImpl::expandMOVImm(MachineBasicBlock &MBB,
MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
.add(MI.getOperand(0))
.addReg(BitSize == 32 ? AArch64::WZR : AArch64::XZR)
- .addImm(I->Op2));
+ .addImm(*I->Op2));
} else {
Register DstReg = MI.getOperand(0).getReg();
bool DstIsDead = MI.getOperand(0).isDead();
@@ -187,7 +187,7 @@ bool AArch64ExpandPseudoImpl::expandMOVImm(MachineBasicBlock &MBB,
getDeadRegState(DstIsDead && LastItem) |
RenamableState)
.addReg(DstReg)
- .addImm(I->Op2));
+ .addImm(*I->Op2));
}
break;
case AArch64::EONXrs:
@@ -203,32 +203,33 @@ bool AArch64ExpandPseudoImpl::expandMOVImm(MachineBasicBlock &MBB,
RenamableState)
.addReg(DstReg)
.addReg(DstReg)
- .addImm(I->Op2));
+ .addImm(*I->Op2));
} break;
case AArch64::MOVNWi:
case AArch64::MOVNXi:
case AArch64::MOVZWi:
case AArch64::MOVZXi: {
bool DstIsDead = MI.getOperand(0).isDead();
- MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
- .addReg(DstReg, RegState::Define |
- getDeadRegState(DstIsDead && LastItem) |
- RenamableState)
- .addImm(I->Op1)
- .addImm(I->Op2));
+ MIBS.push_back(
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
+ .addReg(DstReg, RegState::Define |
+ getDeadRegState(DstIsDead && LastItem) |
+ RenamableState)
+ .addImm(*I->Op1)
+ .addImm(*I->Op2));
} break;
case AArch64::MOVKWi:
case AArch64::MOVKXi: {
Register DstReg = MI.getOperand(0).getReg();
bool DstIsDead = MI.getOperand(0).isDead();
- MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
- .addReg(DstReg,
- RegState::Define |
- getDeadRegState(DstIsDead && LastItem) |
- RenamableState)
- .addReg(DstReg)
- .addImm(I->Op1)
- .addImm(I->Op2));
+ MIBS.push_back(
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
+ .addReg(DstReg, RegState::Define |
+ getDeadRegState(DstIsDead && LastItem) |
+ RenamableState)
+ .addReg(DstReg)
+ .addImm(*I->Op1)
+ .addImm(*I->Op2));
} break;
}
}
diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index c283d69a3d938..e97263db19575 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -5056,10 +5056,6 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
case AArch64::FMOVv4f32_ns:
case AArch64::FMOVDi:
case AArch64::FMOVv2f64_ns:
- Src = CurDAG->getMachineNode(
- Insn.Opcode, DL, FVT,
- CurDAG->getTargetConstant(Insn.Op1, DL, MVT::i64));
- break;
case AArch64::MOVIv2i32:
case AArch64::MOVIv4i32:
case AArch64::MOVIv4i16:
@@ -5071,16 +5067,19 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
case AArch64::MVNIv4i16:
case AArch64::MVNIv8i16:
case AArch64::MVNIv2s_msl:
- case AArch64::MVNIv4s_msl:
- Src = CurDAG->getMachineNode(
- Insn.Opcode, DL, FVT,
- CurDAG->getTargetConstant(Insn.Op1, DL, MVT::i64),
- CurDAG->getTargetConstant(Insn.Op2, DL, MVT::i64));
+ case AArch64::MVNIv4s_msl: {
+ SmallVector<SDValue> Ops;
+ if (Insn.Op1)
+ Ops.push_back(CurDAG->getTargetConstant(*Insn.Op1, DL, MVT::i64));
+ if (Insn.Op2)
+ Ops.push_back(CurDAG->getTargetConstant(*Insn.Op2, DL, MVT::i64));
+ Src = CurDAG->getMachineNode(Insn.Opcode, DL, FVT, Ops);
break;
+ }
case AArch64::DUPM_ZI:
Src = CurDAG->getMachineNode(
Insn.Opcode, DL, MVT::nxv2f64,
- CurDAG->getTargetConstant(Insn.Op1, DL, MVT::i64));
+ CurDAG->getTargetConstant(*Insn.Op1, DL, MVT::i64));
Src = CurDAG
->getTargetExtractSubreg(AArch64::zsub, DL, FVT,
SDValue(Src, 0))
@@ -5089,8 +5088,8 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
case AArch64::DUP_ZI_D:
Src = CurDAG->getMachineNode(
Insn.Opcode, DL, MVT::nxv2f64,
- CurDAG->getTargetConstant(Insn.Op1, DL, MVT::i64),
- CurDAG->getTargetConstant(Insn.Op2, DL, MVT::i64));
+ CurDAG->getTargetConstant(*Insn.Op1, DL, MVT::i64),
+ CurDAG->getTargetConstant(*Insn.Op2, DL, MVT::i64));
Src = CurDAG
->getTargetExtractSubreg(AArch64::zsub, DL, FVT,
SDValue(Src, 0))
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index 97546adf94a8a..716789c404cad 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -5515,19 +5515,6 @@ AArch64InstructionSelector::emitConstantMOVVector(DstOp Dst, APInt Imm,
}
return &*Mov;
}
- case AArch64::MOVID:
- case AArch64::MOVIv2d_ns:
- case AArch64::MOVIv8b_ns:
- case AArch64::MOVIv16b_ns:
- case AArch64::FMOVv2f32_ns:
- case AArch64::FMOVv4f32_ns:
- case AArch64::FMOVDi:
- case AArch64::FMOVv2f64_ns: {
- auto Mov =
- MIRBuilder.buildInstr(Insn.Opcode, {Dst}, {}).addImm(Insn.Op1);
- constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI);
- return &*Mov;
- }
case AArch64::MOVIv2i32:
case AArch64::MOVIv4i32:
case AArch64::MOVIv4i16:
@@ -5539,17 +5526,27 @@ AArch64InstructionSelector::emitConstantMOVVector(DstOp Dst, APInt Imm,
case AArch64::MVNIv4i16:
case AArch64::MVNIv8i16:
case AArch64::MVNIv2s_msl:
- case AArch64::MVNIv4s_msl: {
- auto Mov = MIRBuilder.buildInstr(Insn.Opcode, {Dst}, {})
- .addImm(Insn.Op1)
- .addImm(Insn.Op2);
+ case AArch64::MVNIv4s_msl:
+ case AArch64::MOVID:
+ case AArch64::MOVIv2d_ns:
+ case AArch64::MOVIv8b_ns:
+ case AArch64::MOVIv16b_ns:
+ case AArch64::FMOVv2f32_ns:
+ case AArch64::FMOVv4f32_ns:
+ case AArch64::FMOVDi:
+ case AArch64::FMOVv2f64_ns: {
+ auto Mov = MIRBuilder.buildInstr(Insn.Opcode, {Dst}, {});
+ if (Insn.Op1)
+ Mov.addImm(*Insn.Op1);
+ if (Insn.Op2)
+ Mov.addImm(*Insn.Op2);
constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI);
return &*Mov;
}
case AArch64::DUPM_ZI: {
auto Mov =
MIRBuilder.buildInstr(Insn.Opcode, {&AArch64::ZPRRegClass}, {})
- .addImm(Insn.Op1);
+ .addImm(*Insn.Op1);
constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI);
Mov = MIB.buildInstr(TargetOpcode::COPY, {Dst}, {})
.addReg(Mov.getReg(0), {},
@@ -5563,8 +5560,8 @@ AArch64InstructionSelector::emitConstantMOVVector(DstOp Dst, APInt Imm,
case AArch64::DUP_ZI_D: {
auto Mov =
MIRBuilder.buildInstr(Insn.Opcode, {&AArch64::ZPRRegClass}, {})
- .addImm(Insn.Op1)
- .addImm(Insn.Op2);
+ .addImm(*Insn.Op1)
+ .addImm(*Insn.Op2);
constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI);
Mov = MIB.buildInstr(TargetOpcode::COPY, {Dst}, {})
.addReg(Mov.getReg(0), {},
More information about the llvm-commits
mailing list