[llvm] [AArch64][GlobalISel] Use GPR for illegal fconstants and extend < 32 bit GPR constants to 32 bits (PR #178692)
Ryan Cowan via llvm-commits
llvm-commits at lists.llvm.org
Thu Feb 19 03:23:53 PST 2026
https://github.com/HolyMolyCowMan updated https://github.com/llvm/llvm-project/pull/178692
>From 0ce53dba32f4562872d85ed2c67d032fcfb0265d Mon Sep 17 00:00:00 2001
From: Ryan Cowan <ryan.cowan at arm.com>
Date: Wed, 28 Jan 2026 14:56:54 +0000
Subject: [PATCH 01/10] [AArch64][GlobalISel] Use GPR for illegal fconstants
and extend < 32 bit GPR constants to 32 bits
---
.../Target/AArch64/AArch64ISelLowering.cpp | 2 +-
.../AArch64/GISel/AArch64GlobalISelUtils.cpp | 4 +-
.../GISel/AArch64InstructionSelector.cpp | 34 +++--
.../AArch64/GISel/AArch64RegisterBankInfo.cpp | 76 ++++++++++
.../GlobalISel/regbankselect-build-vector.mir | 45 ++++--
.../test/CodeGen/AArch64/arm64-fp-imm-size.ll | 138 +++++++++++++-----
llvm/test/CodeGen/AArch64/arm64-fp-imm.ll | 63 +++++---
llvm/test/CodeGen/AArch64/arm64-vhadd.ll | 8 +-
llvm/test/CodeGen/AArch64/f16-instructions.ll | 8 +-
llvm/test/CodeGen/AArch64/fcvt-fixed.ll | 96 ++++++------
llvm/test/CodeGen/AArch64/fpow.ll | 34 +++--
.../AArch64/neon-compare-instructions.ll | 3 +-
llvm/test/CodeGen/AArch64/rem-by-const.ll | 23 +--
llvm/test/CodeGen/AArch64/select_const.ll | 119 ++++++++-------
llvm/test/CodeGen/AArch64/vecreduce-fadd.ll | 5 +-
15 files changed, 441 insertions(+), 217 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 88836d6e167b8..a0153aacba922 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -13134,7 +13134,7 @@ bool AArch64TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
// however the mov+fmov sequence is always better because of the reduced
// cache pressure. The timings are still the same if you consider
// movw+movk+fmov vs. adrp+ldr (it's one instruction longer, but the
- // movw+movk is fused). So we limit up to 2 instrdduction at most.
+ // movw+movk is fused). So we limit up to 4 instructions at most.
SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn;
AArch64_IMM::expandMOVImm(ImmInt.getZExtValue(), VT.getSizeInBits(), Insn);
assert(Insn.size() <= 4 &&
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.cpp b/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.cpp
index d51466c623347..baf98ca8e4e3c 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.cpp
@@ -23,8 +23,8 @@ AArch64GISelUtils::getAArch64VectorSplat(const MachineInstr &MI,
if (MI.getOpcode() != AArch64::G_DUP)
return std::nullopt;
Register Src = MI.getOperand(1).getReg();
- if (auto ValAndVReg =
- getAnyConstantVRegValWithLookThrough(MI.getOperand(1).getReg(), MRI))
+ if (auto ValAndVReg = getAnyConstantVRegValWithLookThrough(
+ MI.getOperand(1).getReg(), MRI, true, true))
return RegOrConstant(ValAndVReg->Value.getSExtValue());
return RegOrConstant(Src);
}
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index 38e58481e2f71..bae425f1f2ee7 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -2349,7 +2349,8 @@ bool AArch64InstructionSelector::earlySelect(MachineInstr &I) {
// Before selecting a DUP instruction, check if it is better selected as a
// MOV or load from a constant pool.
Register Src = I.getOperand(1).getReg();
- auto ValAndVReg = getAnyConstantVRegValWithLookThrough(Src, MRI);
+ auto ValAndVReg =
+ getAnyConstantVRegValWithLookThrough(Src, MRI, true, true);
if (!ValAndVReg)
return false;
LLVMContext &Ctx = MF.getFunction().getContext();
@@ -5813,18 +5814,25 @@ bool AArch64InstructionSelector::tryOptConstantBuildVec(
// generate a constant pool load instead of a vector insert sequence.
SmallVector<Constant *, 16> Csts;
for (unsigned Idx = 1; Idx < I.getNumOperands(); ++Idx) {
- // Try to find G_CONSTANT or G_FCONSTANT
- auto *OpMI =
- getOpcodeDef(TargetOpcode::G_CONSTANT, I.getOperand(Idx).getReg(), MRI);
- if (OpMI)
- Csts.emplace_back(
- const_cast<ConstantInt *>(OpMI->getOperand(1).getCImm()));
- else if ((OpMI = getOpcodeDef(TargetOpcode::G_FCONSTANT,
- I.getOperand(Idx).getReg(), MRI)))
- Csts.emplace_back(
- const_cast<ConstantFP *>(OpMI->getOperand(1).getFPImm()));
- else
- return false;
+ Register OpReg = I.getOperand(Idx).getReg();
+ if (auto AnyConst =
+ getAnyConstantVRegValWithLookThrough(OpReg, MRI, true, true)) {
+ MachineInstr *DefMI = MRI.getVRegDef(AnyConst->VReg);
+
+ if (DefMI->getOpcode() == TargetOpcode::G_CONSTANT) {
+ Csts.emplace_back(
+ ConstantInt::get(MIB.getMF().getFunction().getContext(),
+ std::move(AnyConst->Value)));
+ continue;
+ }
+
+ if (DefMI->getOpcode() == TargetOpcode::G_FCONSTANT) {
+ Csts.emplace_back(
+ const_cast<ConstantFP *>(DefMI->getOperand(1).getFPImm()));
+ continue;
+ }
+ }
+ return false;
}
Constant *CV = ConstantVector::get(Csts);
if (!emitConstantVector(I.getOperand(0).getReg(), CV, MIB, MRI))
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
index f8b5739d1d13a..ceabe776f768b 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
@@ -30,6 +30,7 @@
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/Constants.h"
#include "llvm/IR/IntrinsicsAArch64.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Threading.h"
@@ -358,12 +359,71 @@ AArch64RegisterBankInfo::getInstrAlternativeMappings(
return RegisterBankInfo::getInstrAlternativeMappings(MI);
}
+static bool isLegalFPImm(const MachineInstr &MI, const MachineRegisterInfo &MRI,
+ const AArch64Subtarget &STI) {
+ assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT);
+ Register Dst = MI.getOperand(0).getReg();
+ LLT Ty = MRI.getType(Dst);
+ if (!Ty.isScalar())
+ return false;
+
+ unsigned Bits = Ty.getSizeInBits();
+ if (Bits != 16 && Bits != 32 && Bits != 64)
+ return false;
+
+ EVT VT = EVT::getFloatingPointVT(Bits);
+ bool OptForSize = MI.getMF()->getFunction().hasOptSize() ||
+ MI.getMF()->getFunction().hasMinSize();
+ const TargetLowering *TLI = STI.getTargetLowering();
+ return TLI->isFPImmLegal(MI.getOperand(1).getFPImm()->getValueAPF(), VT,
+ OptForSize);
+}
+
void AArch64RegisterBankInfo::applyMappingImpl(
MachineIRBuilder &Builder, const OperandsMapper &OpdMapper) const {
MachineInstr &MI = OpdMapper.getMI();
MachineRegisterInfo &MRI = OpdMapper.getMRI();
switch (MI.getOpcode()) {
+ case TargetOpcode::G_CONSTANT: {
+ Register Dst = MI.getOperand(0).getReg();
+ LLT DstTy = MRI.getType(Dst);
+ if (MRI.getRegBank(Dst) == &AArch64::GPRRegBank && DstTy.isScalar() &&
+ DstTy.getSizeInBits() < 32) {
+ Builder.setInsertPt(*MI.getParent(), std::next(MI.getIterator()));
+ Register ExtReg = MRI.createGenericVirtualRegister(LLT::scalar(32));
+ Builder.buildTrunc(Dst, ExtReg);
+
+ auto Val = MI.getOperand(1).getCImm()->getValue().zext(32);
+ LLVMContext &Ctx = Builder.getMF().getFunction().getContext();
+ MI.getOperand(1).setCImm(ConstantInt::get(Ctx, Val));
+ MI.getOperand(0).setReg(ExtReg);
+ MRI.setRegBank(ExtReg, AArch64::GPRRegBank);
+
+ for (MachineInstr &UseMI :
+ make_early_inc_range(MRI.use_nodbg_instructions(Dst))) {
+ if (UseMI.getOpcode() != AArch64::G_DUP)
+ continue;
+ for (MachineOperand &Op : UseMI.operands()) {
+ if (Op.isReg() && Op.getReg() == Dst)
+ Op.setReg(ExtReg);
+ }
+ }
+ }
+ return applyDefaultMapping(OpdMapper);
+ }
+ case TargetOpcode::G_FCONSTANT: {
+ Register Dst = MI.getOperand(0).getReg();
+ if (MRI.getRegBank(Dst) == &AArch64::GPRRegBank) {
+ const APFloat &Imm = MI.getOperand(1).getFPImm()->getValueAPF();
+ Builder.setInsertPt(*MI.getParent(), MI.getIterator());
+ Builder.buildConstant(Dst, Imm.bitcastToAPInt());
+ MI.eraseFromParent();
+ return;
+ }
+ return applyDefaultMapping(OpdMapper);
+ }
+
case TargetOpcode::G_STORE: {
Register Dst = MI.getOperand(0).getReg();
LLT Ty = MRI.getType(Dst);
@@ -855,6 +915,22 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
// Some of the floating-point instructions have mixed GPR and FPR operands:
// fine-tune the computed mapping.
switch (Opc) {
+ case TargetOpcode::G_CONSTANT: {
+ Register Dst = MI.getOperand(0).getReg();
+ LLT DstTy = MRI.getType(Dst);
+ if (DstTy.isScalar() && DstTy.getSizeInBits() < 32)
+ MappingID = CustomMappingID;
+ break;
+ }
+ case TargetOpcode::G_FCONSTANT: {
+ if (!isLegalFPImm(MI, MRI, STI) &&
+ MRI.getType(MI.getOperand(0).getReg()).getScalarSizeInBits() != 128) {
+ // Materialize in GPR and rely on later bank copies for FP uses.
+ MappingID = CustomMappingID;
+ OpRegBankIdx = {PMI_FirstGPR};
+ }
+ break;
+ }
case AArch64::G_DUP: {
Register ScalarReg = MI.getOperand(1).getReg();
LLT ScalarTy = MRI.getType(ScalarReg);
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/regbankselect-build-vector.mir b/llvm/test/CodeGen/AArch64/GlobalISel/regbankselect-build-vector.mir
index 015949ed8de95..55f316facfca3 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/regbankselect-build-vector.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/regbankselect-build-vector.mir
@@ -44,16 +44,41 @@ body: |
; They're all constant, so we can select it via a constant-pool load if needed
; and this form is more amenable to selection by patterns (without x-bank copies).
; CHECK-LABEL: name: g_constant_operands_on_gpr
- ; CHECK: [[C:%[0-9]+]]:gpr(s8) = G_CONSTANT i8 4
- ; CHECK-NEXT: [[C1:%[0-9]+]]:gpr(s8) = G_CONSTANT i8 10
- ; CHECK-NEXT: [[C2:%[0-9]+]]:gpr(s8) = G_CONSTANT i8 3
- ; CHECK-NEXT: [[C3:%[0-9]+]]:gpr(s8) = G_CONSTANT i8 11
- ; CHECK-NEXT: [[C4:%[0-9]+]]:gpr(s8) = G_CONSTANT i8 15
- ; CHECK-NEXT: [[C5:%[0-9]+]]:gpr(s8) = G_CONSTANT i8 44
- ; CHECK-NEXT: [[C6:%[0-9]+]]:gpr(s8) = G_CONSTANT i8 22
- ; CHECK-NEXT: [[C7:%[0-9]+]]:gpr(s8) = G_CONSTANT i8 19
- ; CHECK-NEXT: [[C8:%[0-9]+]]:gpr(s8) = G_CONSTANT i8 55
- ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:fpr(<16 x s8>) = G_BUILD_VECTOR [[C]](s8), [[C1]](s8), [[C2]](s8), [[C3]](s8), [[C4]](s8), [[C]](s8), [[C1]](s8), [[C5]](s8), [[C6]](s8), [[C4]](s8), [[C]](s8), [[C7]](s8), [[C2]](s8), [[C3]](s8), [[C4]](s8), [[C8]](s8)
+ ; CHECK: [[C:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 4
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:gpr(s8) = G_TRUNC [[C]](s32)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 10
+ ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:gpr(s8) = G_TRUNC [[C1]](s32)
+ ; CHECK-NEXT: [[C2:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 3
+ ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:gpr(s8) = G_TRUNC [[C2]](s32)
+ ; CHECK-NEXT: [[C3:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 11
+ ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:gpr(s8) = G_TRUNC [[C3]](s32)
+ ; CHECK-NEXT: [[C4:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 15
+ ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:gpr(s8) = G_TRUNC [[C4]](s32)
+ ; CHECK-NEXT: [[C5:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 44
+ ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:gpr(s8) = G_TRUNC [[C5]](s32)
+ ; CHECK-NEXT: [[C6:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 22
+ ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:gpr(s8) = G_TRUNC [[C6]](s32)
+ ; CHECK-NEXT: [[C7:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 19
+ ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:gpr(s8) = G_TRUNC [[C7]](s32)
+ ; CHECK-NEXT: [[C8:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 55
+ ; CHECK-NEXT: [[TRUNC8:%[0-9]+]]:gpr(s8) = G_TRUNC [[C8]](s32)
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr(s8) = COPY [[TRUNC]](s8)
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr(s8) = COPY [[TRUNC1]](s8)
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:fpr(s8) = COPY [[TRUNC2]](s8)
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:fpr(s8) = COPY [[TRUNC3]](s8)
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:fpr(s8) = COPY [[TRUNC4]](s8)
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:fpr(s8) = COPY [[TRUNC]](s8)
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:fpr(s8) = COPY [[TRUNC1]](s8)
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:fpr(s8) = COPY [[TRUNC5]](s8)
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:fpr(s8) = COPY [[TRUNC6]](s8)
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:fpr(s8) = COPY [[TRUNC4]](s8)
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:fpr(s8) = COPY [[TRUNC]](s8)
+ ; CHECK-NEXT: [[COPY11:%[0-9]+]]:fpr(s8) = COPY [[TRUNC7]](s8)
+ ; CHECK-NEXT: [[COPY12:%[0-9]+]]:fpr(s8) = COPY [[TRUNC2]](s8)
+ ; CHECK-NEXT: [[COPY13:%[0-9]+]]:fpr(s8) = COPY [[TRUNC3]](s8)
+ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:fpr(s8) = COPY [[TRUNC4]](s8)
+ ; CHECK-NEXT: [[COPY15:%[0-9]+]]:fpr(s8) = COPY [[TRUNC8]](s8)
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:fpr(<16 x s8>) = G_BUILD_VECTOR [[COPY]](s8), [[COPY1]](s8), [[COPY2]](s8), [[COPY3]](s8), [[COPY4]](s8), [[COPY5]](s8), [[COPY6]](s8), [[COPY7]](s8), [[COPY8]](s8), [[COPY9]](s8), [[COPY10]](s8), [[COPY11]](s8), [[COPY12]](s8), [[COPY13]](s8), [[COPY14]](s8), [[COPY15]](s8)
; CHECK-NEXT: $q0 = COPY [[BUILD_VECTOR]](<16 x s8>)
; CHECK-NEXT: RET_ReallyLR implicit $q0
%1:_(s8) = G_CONSTANT i8 4
diff --git a/llvm/test/CodeGen/AArch64/arm64-fp-imm-size.ll b/llvm/test/CodeGen/AArch64/arm64-fp-imm-size.ll
index cfb7c60f5a8b0..db219c926c0f2 100644
--- a/llvm/test/CodeGen/AArch64/arm64-fp-imm-size.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-fp-imm-size.ll
@@ -1,60 +1,126 @@
-; RUN: llc < %s -mtriple=arm64-apple-darwin | FileCheck %s
-; RUN: llc < %s -mtriple=arm64-apple-darwin -global-isel | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc < %s -mtriple=arm64-apple-darwin | FileCheck --check-prefixes=CHECK,CHECK-SD %s
+; RUN: llc < %s -mtriple=arm64-apple-darwin -global-isel | FileCheck --check-prefixes=CHECK,CHECK-GI %s
-; CHECK: literal8
-; CHECK: .quad 0x400921fb54442d18
define double @foo() optsize {
-; CHECK: _foo:
-; CHECK: adrp x[[REG:[0-9]+]], lCPI0_0 at PAGE
-; CHECK: ldr d0, [x[[REG]], lCPI0_0 at PAGEOFF]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: foo:
+; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: Lloh0:
+; CHECK-SD-NEXT: adrp x8, lCPI0_0 at PAGE
+; CHECK-SD-NEXT: Lloh1:
+; CHECK-SD-NEXT: ldr d0, [x8, lCPI0_0 at PAGEOFF]
+; CHECK-SD-NEXT: ret
+; CHECK-SD-NEXT: .loh AdrpLdr Lloh0, Lloh1
+;
+; CHECK-GI-LABEL: foo:
+; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: mov x8, #11544 ; =0x2d18
+; CHECK-GI-NEXT: movk x8, #21572, lsl #16
+; CHECK-GI-NEXT: movk x8, #8699, lsl #32
+; CHECK-GI-NEXT: movk x8, #16393, lsl #48
+; CHECK-GI-NEXT: fmov d0, x8
+; CHECK-GI-NEXT: ret
ret double 0x400921FB54442D18
}
-; CHECK: literal8
-; CHECK: .quad 0x0000001fffffffc
define double @foo2() optsize {
-; CHECK: _foo2:
-; CHECK: adrp x[[REG:[0-9]+]], lCPI1_0 at PAGE
-; CHECK: ldr d0, [x[[REG]], lCPI1_0 at PAGEOFF]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: foo2:
+; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: Lloh2:
+; CHECK-SD-NEXT: adrp x8, lCPI1_0 at PAGE
+; CHECK-SD-NEXT: Lloh3:
+; CHECK-SD-NEXT: ldr d0, [x8, lCPI1_0 at PAGEOFF]
+; CHECK-SD-NEXT: ret
+; CHECK-SD-NEXT: .loh AdrpLdr Lloh2, Lloh3
+;
+; CHECK-GI-LABEL: foo2:
+; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: mov x8, #137438887936 ; =0x1fffff0000
+; CHECK-GI-NEXT: movk x8, #65473
+; CHECK-GI-NEXT: fmov d0, x8
+; CHECK-GI-NEXT: ret
ret double 0x1FFFFFFFC1
}
define float @bar() optsize {
-; CHECK: _bar:
-; CHECK: adrp x[[REG:[0-9]+]], lCPI2_0 at PAGE
-; CHECK: ldr s0, [x[[REG]], lCPI2_0 at PAGEOFF]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: bar:
+; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: Lloh4:
+; CHECK-SD-NEXT: adrp x8, lCPI2_0 at PAGE
+; CHECK-SD-NEXT: Lloh5:
+; CHECK-SD-NEXT: ldr s0, [x8, lCPI2_0 at PAGEOFF]
+; CHECK-SD-NEXT: ret
+; CHECK-SD-NEXT: .loh AdrpLdr Lloh4, Lloh5
+;
+; CHECK-GI-LABEL: bar:
+; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: mov w8, #4059 ; =0xfdb
+; CHECK-GI-NEXT: movk w8, #16457, lsl #16
+; CHECK-GI-NEXT: fmov s0, w8
+; CHECK-GI-NEXT: ret
ret float 0x400921FB60000000
}
-; CHECK: literal16
-; CHECK: .quad 0
-; CHECK: .quad 0
define fp128 @baz() optsize {
-; CHECK: _baz:
-; CHECK: adrp x[[REG:[0-9]+]], lCPI3_0 at PAGE
-; CHECK: ldr q0, [x[[REG]], lCPI3_0 at PAGEOFF]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: baz:
+; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: Lloh6:
+; CHECK-SD-NEXT: adrp x8, lCPI3_0 at PAGE
+; CHECK-SD-NEXT: Lloh7:
+; CHECK-SD-NEXT: ldr q0, [x8, lCPI3_0 at PAGEOFF]
+; CHECK-SD-NEXT: ret
+; CHECK-SD-NEXT: .loh AdrpLdr Lloh6, Lloh7
+;
+; CHECK-GI-LABEL: baz:
+; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: Lloh0:
+; CHECK-GI-NEXT: adrp x8, lCPI3_0 at PAGE
+; CHECK-GI-NEXT: Lloh1:
+; CHECK-GI-NEXT: ldr q0, [x8, lCPI3_0 at PAGEOFF]
+; CHECK-GI-NEXT: ret
+; CHECK-GI-NEXT: .loh AdrpLdr Lloh0, Lloh1
ret fp128 0xL00000000000000000000000000000000
}
-; CHECK: literal8
-; CHECK: .quad 0x0000001fffffffd
define double @foo2_pgso() !prof !14 {
-; CHECK: _foo2_pgso:
-; CHECK: adrp x[[REG:[0-9]+]], lCPI4_0 at PAGE
-; CHECK: ldr d0, [x[[REG]], lCPI4_0 at PAGEOFF]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: foo2_pgso:
+; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: Lloh8:
+; CHECK-SD-NEXT: adrp x8, lCPI4_0 at PAGE
+; CHECK-SD-NEXT: Lloh9:
+; CHECK-SD-NEXT: ldr d0, [x8, lCPI4_0 at PAGEOFF]
+; CHECK-SD-NEXT: ret
+; CHECK-SD-NEXT: .loh AdrpLdr Lloh8, Lloh9
+;
+; CHECK-GI-LABEL: foo2_pgso:
+; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: Lloh2:
+; CHECK-GI-NEXT: adrp x8, lCPI4_0 at PAGE
+; CHECK-GI-NEXT: Lloh3:
+; CHECK-GI-NEXT: ldr d0, [x8, lCPI4_0 at PAGEOFF]
+; CHECK-GI-NEXT: ret
+; CHECK-GI-NEXT: .loh AdrpLdr Lloh2, Lloh3
ret double 0x1FFFFFFFd1
}
define float @bar_pgso() !prof !14 {
-; CHECK: _bar_pgso:
-; CHECK: adrp x[[REG:[0-9]+]], lCPI5_0 at PAGE
-; CHECK: ldr s0, [x[[REG]], lCPI5_0 at PAGEOFF]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: bar_pgso:
+; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: Lloh10:
+; CHECK-SD-NEXT: adrp x8, lCPI5_0 at PAGE
+; CHECK-SD-NEXT: Lloh11:
+; CHECK-SD-NEXT: ldr s0, [x8, lCPI5_0 at PAGEOFF]
+; CHECK-SD-NEXT: ret
+; CHECK-SD-NEXT: .loh AdrpLdr Lloh10, Lloh11
+;
+; CHECK-GI-LABEL: bar_pgso:
+; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: Lloh4:
+; CHECK-GI-NEXT: adrp x8, lCPI5_0 at PAGE
+; CHECK-GI-NEXT: Lloh5:
+; CHECK-GI-NEXT: ldr s0, [x8, lCPI5_0 at PAGEOFF]
+; CHECK-GI-NEXT: ret
+; CHECK-GI-NEXT: .loh AdrpLdr Lloh4, Lloh5
ret float 0x400921FB80000000
}
@@ -74,3 +140,5 @@ define float @bar_pgso() !prof !14 {
!12 = !{i32 999000, i64 100, i32 1}
!13 = !{i32 999999, i64 1, i32 2}
!14 = !{!"function_entry_count", i64 0}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK: {{.*}}
diff --git a/llvm/test/CodeGen/AArch64/arm64-fp-imm.ll b/llvm/test/CodeGen/AArch64/arm64-fp-imm.ll
index 61eb67486ae3d..f541715235e03 100644
--- a/llvm/test/CodeGen/AArch64/arm64-fp-imm.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-fp-imm.ll
@@ -1,32 +1,55 @@
-; RUN: llc < %s -mtriple=arm64-apple-darwin | FileCheck %s
-; RUN: llc < %s -mtriple=arm64-apple-darwin -global-isel | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc < %s -mtriple=arm64-apple-darwin | FileCheck --check-prefixes=CHECK,CHECK-SD %s
+; RUN: llc < %s -mtriple=arm64-apple-darwin -global-isel | FileCheck --check-prefixes=CHECK,CHECK-GI %s
-; CHECK: literal8
-; CHECK: .quad 0x400921fb54442d18
define double @foo() {
-; CHECK: _foo:
-; CHECK: adrp x[[REG:[0-9]+]], lCPI0_0 at PAGE
-; CHECK: ldr d0, [x[[REG]], lCPI0_0 at PAGEOFF]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: foo:
+; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: Lloh0:
+; CHECK-SD-NEXT: adrp x8, lCPI0_0 at PAGE
+; CHECK-SD-NEXT: Lloh1:
+; CHECK-SD-NEXT: ldr d0, [x8, lCPI0_0 at PAGEOFF]
+; CHECK-SD-NEXT: ret
+; CHECK-SD-NEXT: .loh AdrpLdr Lloh0, Lloh1
+;
+; CHECK-GI-LABEL: foo:
+; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: mov x8, #11544 ; =0x2d18
+; CHECK-GI-NEXT: movk x8, #21572, lsl #16
+; CHECK-GI-NEXT: movk x8, #8699, lsl #32
+; CHECK-GI-NEXT: movk x8, #16393, lsl #48
+; CHECK-GI-NEXT: fmov d0, x8
+; CHECK-GI-NEXT: ret
ret double 0x400921FB54442D18
}
define float @bar() {
-; CHECK: _bar:
-; CHECK: mov [[REG:w[0-9]+]], #4059
-; CHECK: movk [[REG]], #16457, lsl #16
-; CHECK: fmov s0, [[REG]]
-; CHECK-NEXT: ret
+; CHECK-LABEL: bar:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: mov w8, #4059 ; =0xfdb
+; CHECK-NEXT: movk w8, #16457, lsl #16
+; CHECK-NEXT: fmov s0, w8
+; CHECK-NEXT: ret
ret float 0x400921FB60000000
}
-; CHECK: literal16
-; CHECK: .quad 0
-; CHECK: .quad 0
define fp128 @baz() {
-; CHECK: _baz:
-; CHECK: adrp x[[REG:[0-9]+]], lCPI2_0 at PAGE
-; CHECK: ldr q0, [x[[REG]], lCPI2_0 at PAGEOFF]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: baz:
+; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: Lloh2:
+; CHECK-SD-NEXT: adrp x8, lCPI2_0 at PAGE
+; CHECK-SD-NEXT: Lloh3:
+; CHECK-SD-NEXT: ldr q0, [x8, lCPI2_0 at PAGEOFF]
+; CHECK-SD-NEXT: ret
+; CHECK-SD-NEXT: .loh AdrpLdr Lloh2, Lloh3
+;
+; CHECK-GI-LABEL: baz:
+; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: Lloh0:
+; CHECK-GI-NEXT: adrp x8, lCPI2_0 at PAGE
+; CHECK-GI-NEXT: Lloh1:
+; CHECK-GI-NEXT: ldr q0, [x8, lCPI2_0 at PAGEOFF]
+; CHECK-GI-NEXT: ret
+; CHECK-GI-NEXT: .loh AdrpLdr Lloh0, Lloh1
ret fp128 0xL00000000000000000000000000000000
}
diff --git a/llvm/test/CodeGen/AArch64/arm64-vhadd.ll b/llvm/test/CodeGen/AArch64/arm64-vhadd.ll
index 09ea9eeb03914..4723867bc99f0 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vhadd.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vhadd.ll
@@ -1437,7 +1437,7 @@ define <2 x i16> @rhadd8x2_sext_asr(<2 x i8> %src1, <2 x i8> %src2) {
; CHECK-GI-NEXT: shl.2s v1, v1, #24
; CHECK-GI-NEXT: shl.2s v0, v0, #24
; CHECK-GI-NEXT: mov w8, #1 // =0x1
-; CHECK-GI-NEXT: dup.2s v2, w8
+; CHECK-GI-NEXT: movi.2s v2, #1
; CHECK-GI-NEXT: sshr.2s v1, v1, #24
; CHECK-GI-NEXT: ssra.2s v1, v0, #24
; CHECK-GI-NEXT: fmov s0, w8
@@ -1472,7 +1472,7 @@ define <2 x i16> @rhadd8x2_zext_asr(<2 x i8> %src1, <2 x i8> %src2) {
; CHECK-GI-NEXT: mov w8, #1 // =0x1
; CHECK-GI-NEXT: and.8b v0, v0, v2
; CHECK-GI-NEXT: and.8b v1, v1, v2
-; CHECK-GI-NEXT: dup.2s v2, w8
+; CHECK-GI-NEXT: movi.2s v2, #1
; CHECK-GI-NEXT: add.2s v0, v0, v1
; CHECK-GI-NEXT: fmov s1, w8
; CHECK-GI-NEXT: add.2s v0, v0, v2
@@ -1510,7 +1510,7 @@ define <2 x i16> @rhadd8x2_sext_lsr(<2 x i8> %src1, <2 x i8> %src2) {
; CHECK-GI-NEXT: shl.2s v1, v1, #24
; CHECK-GI-NEXT: shl.2s v0, v0, #24
; CHECK-GI-NEXT: mov w8, #1 // =0x1
-; CHECK-GI-NEXT: dup.2s v2, w8
+; CHECK-GI-NEXT: movi.2s v2, #1
; CHECK-GI-NEXT: sshr.2s v1, v1, #24
; CHECK-GI-NEXT: ssra.2s v1, v0, #24
; CHECK-GI-NEXT: fmov s0, w8
@@ -1545,7 +1545,7 @@ define <2 x i16> @rhadd8x2_zext_lsr(<2 x i8> %src1, <2 x i8> %src2) {
; CHECK-GI-NEXT: mov w8, #1 // =0x1
; CHECK-GI-NEXT: and.8b v0, v0, v2
; CHECK-GI-NEXT: and.8b v1, v1, v2
-; CHECK-GI-NEXT: dup.2s v2, w8
+; CHECK-GI-NEXT: movi.2s v2, #1
; CHECK-GI-NEXT: add.2s v0, v0, v1
; CHECK-GI-NEXT: fmov s1, w8
; CHECK-GI-NEXT: add.2s v0, v0, v2
diff --git a/llvm/test/CodeGen/AArch64/f16-instructions.ll b/llvm/test/CodeGen/AArch64/f16-instructions.ll
index f6d701b518699..7101d38c47766 100644
--- a/llvm/test/CodeGen/AArch64/f16-instructions.ll
+++ b/llvm/test/CodeGen/AArch64/f16-instructions.ll
@@ -785,14 +785,12 @@ define void @test_fccmp(half %in, ptr %out) {
; CHECK-CVT-GI-NEXT: // kill: def $h0 killed $h0 def $s0
; CHECK-CVT-GI-NEXT: fcvt s1, h0
; CHECK-CVT-GI-NEXT: fmov s2, #5.00000000
-; CHECK-CVT-GI-NEXT: adrp x8, .LCPI29_0
+; CHECK-CVT-GI-NEXT: mov w8, #17664 // =0x4500
; CHECK-CVT-GI-NEXT: fmov s3, #8.00000000
+; CHECK-CVT-GI-NEXT: fmov w9, s0
; CHECK-CVT-GI-NEXT: fcmp s1, s2
-; CHECK-CVT-GI-NEXT: ldr h2, [x8, :lo12:.LCPI29_0]
-; CHECK-CVT-GI-NEXT: fmov w8, s0
-; CHECK-CVT-GI-NEXT: fmov w9, s2
; CHECK-CVT-GI-NEXT: fccmp s1, s3, #4, mi
-; CHECK-CVT-GI-NEXT: csel w8, w8, w9, gt
+; CHECK-CVT-GI-NEXT: csel w8, w9, w8, gt
; CHECK-CVT-GI-NEXT: strh w8, [x0]
; CHECK-CVT-GI-NEXT: ret
;
diff --git a/llvm/test/CodeGen/AArch64/fcvt-fixed.ll b/llvm/test/CodeGen/AArch64/fcvt-fixed.ll
index 743d1604388de..22e2694063330 100644
--- a/llvm/test/CodeGen/AArch64/fcvt-fixed.ll
+++ b/llvm/test/CodeGen/AArch64/fcvt-fixed.ll
@@ -166,8 +166,8 @@ define i32 @fcvtzs_f16_i32_7(half %flt) {
;
; CHECK-GI-FP16-LABEL: fcvtzs_f16_i32_7:
; CHECK-GI-FP16: // %bb.0:
-; CHECK-GI-FP16-NEXT: adrp x8, .LCPI8_0
-; CHECK-GI-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI8_0]
+; CHECK-GI-FP16-NEXT: mov w8, #22528 // =0x5800
+; CHECK-GI-FP16-NEXT: fmov s1, w8
; CHECK-GI-FP16-NEXT: fmul h0, h0, h1
; CHECK-GI-FP16-NEXT: fcvtzs w0, h0
; CHECK-GI-FP16-NEXT: ret
@@ -194,8 +194,8 @@ define i32 @fcvtzs_f16_i32_15(half %flt) {
;
; CHECK-GI-FP16-LABEL: fcvtzs_f16_i32_15:
; CHECK-GI-FP16: // %bb.0:
-; CHECK-GI-FP16-NEXT: adrp x8, .LCPI9_0
-; CHECK-GI-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI9_0]
+; CHECK-GI-FP16-NEXT: mov w8, #30720 // =0x7800
+; CHECK-GI-FP16-NEXT: fmov s1, w8
; CHECK-GI-FP16-NEXT: fmul h0, h0, h1
; CHECK-GI-FP16-NEXT: fcvtzs w0, h0
; CHECK-GI-FP16-NEXT: ret
@@ -222,8 +222,8 @@ define i64 @fcvtzs_f16_i64_7(half %flt) {
;
; CHECK-GI-FP16-LABEL: fcvtzs_f16_i64_7:
; CHECK-GI-FP16: // %bb.0:
-; CHECK-GI-FP16-NEXT: adrp x8, .LCPI10_0
-; CHECK-GI-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI10_0]
+; CHECK-GI-FP16-NEXT: mov w8, #22528 // =0x5800
+; CHECK-GI-FP16-NEXT: fmov s1, w8
; CHECK-GI-FP16-NEXT: fmul h0, h0, h1
; CHECK-GI-FP16-NEXT: fcvtzs x0, h0
; CHECK-GI-FP16-NEXT: ret
@@ -250,8 +250,8 @@ define i64 @fcvtzs_f16_i64_15(half %flt) {
;
; CHECK-GI-FP16-LABEL: fcvtzs_f16_i64_15:
; CHECK-GI-FP16: // %bb.0:
-; CHECK-GI-FP16-NEXT: adrp x8, .LCPI11_0
-; CHECK-GI-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI11_0]
+; CHECK-GI-FP16-NEXT: mov w8, #30720 // =0x7800
+; CHECK-GI-FP16-NEXT: fmov s1, w8
; CHECK-GI-FP16-NEXT: fmul h0, h0, h1
; CHECK-GI-FP16-NEXT: fcvtzs x0, h0
; CHECK-GI-FP16-NEXT: ret
@@ -422,8 +422,8 @@ define i32 @fcvtzu_f16_i32_7(half %flt) {
;
; CHECK-GI-FP16-LABEL: fcvtzu_f16_i32_7:
; CHECK-GI-FP16: // %bb.0:
-; CHECK-GI-FP16-NEXT: adrp x8, .LCPI20_0
-; CHECK-GI-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI20_0]
+; CHECK-GI-FP16-NEXT: mov w8, #22528 // =0x5800
+; CHECK-GI-FP16-NEXT: fmov s1, w8
; CHECK-GI-FP16-NEXT: fmul h0, h0, h1
; CHECK-GI-FP16-NEXT: fcvtzu w0, h0
; CHECK-GI-FP16-NEXT: ret
@@ -450,8 +450,8 @@ define i32 @fcvtzu_f16_i32_15(half %flt) {
;
; CHECK-GI-FP16-LABEL: fcvtzu_f16_i32_15:
; CHECK-GI-FP16: // %bb.0:
-; CHECK-GI-FP16-NEXT: adrp x8, .LCPI21_0
-; CHECK-GI-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI21_0]
+; CHECK-GI-FP16-NEXT: mov w8, #30720 // =0x7800
+; CHECK-GI-FP16-NEXT: fmov s1, w8
; CHECK-GI-FP16-NEXT: fmul h0, h0, h1
; CHECK-GI-FP16-NEXT: fcvtzu w0, h0
; CHECK-GI-FP16-NEXT: ret
@@ -478,8 +478,8 @@ define i64 @fcvtzu_f16_i64_7(half %flt) {
;
; CHECK-GI-FP16-LABEL: fcvtzu_f16_i64_7:
; CHECK-GI-FP16: // %bb.0:
-; CHECK-GI-FP16-NEXT: adrp x8, .LCPI22_0
-; CHECK-GI-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI22_0]
+; CHECK-GI-FP16-NEXT: mov w8, #22528 // =0x5800
+; CHECK-GI-FP16-NEXT: fmov s1, w8
; CHECK-GI-FP16-NEXT: fmul h0, h0, h1
; CHECK-GI-FP16-NEXT: fcvtzu x0, h0
; CHECK-GI-FP16-NEXT: ret
@@ -506,8 +506,8 @@ define i64 @fcvtzu_f16_i64_15(half %flt) {
;
; CHECK-GI-FP16-LABEL: fcvtzu_f16_i64_15:
; CHECK-GI-FP16: // %bb.0:
-; CHECK-GI-FP16-NEXT: adrp x8, .LCPI23_0
-; CHECK-GI-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI23_0]
+; CHECK-GI-FP16-NEXT: mov w8, #30720 // =0x7800
+; CHECK-GI-FP16-NEXT: fmov s1, w8
; CHECK-GI-FP16-NEXT: fmul h0, h0, h1
; CHECK-GI-FP16-NEXT: fcvtzu x0, h0
; CHECK-GI-FP16-NEXT: ret
@@ -689,8 +689,8 @@ define half @scvtf_f16_i32_7(i32 %int) {
; CHECK-GI-FP16-LABEL: scvtf_f16_i32_7:
; CHECK-GI-FP16: // %bb.0:
; CHECK-GI-FP16-NEXT: scvtf h0, w0
-; CHECK-GI-FP16-NEXT: adrp x8, .LCPI32_0
-; CHECK-GI-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI32_0]
+; CHECK-GI-FP16-NEXT: mov w8, #22528 // =0x5800
+; CHECK-GI-FP16-NEXT: fmov s1, w8
; CHECK-GI-FP16-NEXT: fdiv h0, h0, h1
; CHECK-GI-FP16-NEXT: ret
%cvt = sitofp i32 %int to half
@@ -727,8 +727,8 @@ define half @scvtf_f16_i32_15(i32 %int) {
; CHECK-GI-FP16-LABEL: scvtf_f16_i32_15:
; CHECK-GI-FP16: // %bb.0:
; CHECK-GI-FP16-NEXT: scvtf h0, w0
-; CHECK-GI-FP16-NEXT: adrp x8, .LCPI33_0
-; CHECK-GI-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI33_0]
+; CHECK-GI-FP16-NEXT: mov w8, #30720 // =0x7800
+; CHECK-GI-FP16-NEXT: fmov s1, w8
; CHECK-GI-FP16-NEXT: fdiv h0, h0, h1
; CHECK-GI-FP16-NEXT: ret
%cvt = sitofp i32 %int to half
@@ -765,8 +765,8 @@ define half @scvtf_f16_i64_7(i64 %long) {
; CHECK-GI-FP16-LABEL: scvtf_f16_i64_7:
; CHECK-GI-FP16: // %bb.0:
; CHECK-GI-FP16-NEXT: scvtf h0, x0
-; CHECK-GI-FP16-NEXT: adrp x8, .LCPI34_0
-; CHECK-GI-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI34_0]
+; CHECK-GI-FP16-NEXT: mov w8, #22528 // =0x5800
+; CHECK-GI-FP16-NEXT: fmov s1, w8
; CHECK-GI-FP16-NEXT: fdiv h0, h0, h1
; CHECK-GI-FP16-NEXT: ret
%cvt = sitofp i64 %long to half
@@ -803,8 +803,8 @@ define half @scvtf_f16_i64_15(i64 %long) {
; CHECK-GI-FP16-LABEL: scvtf_f16_i64_15:
; CHECK-GI-FP16: // %bb.0:
; CHECK-GI-FP16-NEXT: scvtf h0, x0
-; CHECK-GI-FP16-NEXT: adrp x8, .LCPI35_0
-; CHECK-GI-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI35_0]
+; CHECK-GI-FP16-NEXT: mov w8, #30720 // =0x7800
+; CHECK-GI-FP16-NEXT: fmov s1, w8
; CHECK-GI-FP16-NEXT: fdiv h0, h0, h1
; CHECK-GI-FP16-NEXT: ret
%cvt = sitofp i64 %long to half
@@ -985,8 +985,8 @@ define half @ucvtf_f16_i32_7(i32 %int) {
; CHECK-GI-FP16-LABEL: ucvtf_f16_i32_7:
; CHECK-GI-FP16: // %bb.0:
; CHECK-GI-FP16-NEXT: ucvtf h0, w0
-; CHECK-GI-FP16-NEXT: adrp x8, .LCPI44_0
-; CHECK-GI-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI44_0]
+; CHECK-GI-FP16-NEXT: mov w8, #22528 // =0x5800
+; CHECK-GI-FP16-NEXT: fmov s1, w8
; CHECK-GI-FP16-NEXT: fdiv h0, h0, h1
; CHECK-GI-FP16-NEXT: ret
%cvt = uitofp i32 %int to half
@@ -1023,8 +1023,8 @@ define half @ucvtf_f16_i32_15(i32 %int) {
; CHECK-GI-FP16-LABEL: ucvtf_f16_i32_15:
; CHECK-GI-FP16: // %bb.0:
; CHECK-GI-FP16-NEXT: ucvtf h0, w0
-; CHECK-GI-FP16-NEXT: adrp x8, .LCPI45_0
-; CHECK-GI-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI45_0]
+; CHECK-GI-FP16-NEXT: mov w8, #30720 // =0x7800
+; CHECK-GI-FP16-NEXT: fmov s1, w8
; CHECK-GI-FP16-NEXT: fdiv h0, h0, h1
; CHECK-GI-FP16-NEXT: ret
%cvt = uitofp i32 %int to half
@@ -1061,8 +1061,8 @@ define half @ucvtf_f16_i64_7(i64 %long) {
; CHECK-GI-FP16-LABEL: ucvtf_f16_i64_7:
; CHECK-GI-FP16: // %bb.0:
; CHECK-GI-FP16-NEXT: ucvtf h0, x0
-; CHECK-GI-FP16-NEXT: adrp x8, .LCPI46_0
-; CHECK-GI-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI46_0]
+; CHECK-GI-FP16-NEXT: mov w8, #22528 // =0x5800
+; CHECK-GI-FP16-NEXT: fmov s1, w8
; CHECK-GI-FP16-NEXT: fdiv h0, h0, h1
; CHECK-GI-FP16-NEXT: ret
%cvt = uitofp i64 %long to half
@@ -1099,8 +1099,8 @@ define half @ucvtf_f16_i64_15(i64 %long) {
; CHECK-GI-FP16-LABEL: ucvtf_f16_i64_15:
; CHECK-GI-FP16: // %bb.0:
; CHECK-GI-FP16-NEXT: ucvtf h0, x0
-; CHECK-GI-FP16-NEXT: adrp x8, .LCPI47_0
-; CHECK-GI-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI47_0]
+; CHECK-GI-FP16-NEXT: mov w8, #30720 // =0x7800
+; CHECK-GI-FP16-NEXT: fmov s1, w8
; CHECK-GI-FP16-NEXT: fdiv h0, h0, h1
; CHECK-GI-FP16-NEXT: ret
%cvt = uitofp i64 %long to half
@@ -1261,8 +1261,8 @@ define i32 @fcvtzs_sat_f16_i32_7(half %dbl) {
;
; CHECK-GI-FP16-LABEL: fcvtzs_sat_f16_i32_7:
; CHECK-GI-FP16: // %bb.0:
-; CHECK-GI-FP16-NEXT: adrp x8, .LCPI55_0
-; CHECK-GI-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI55_0]
+; CHECK-GI-FP16-NEXT: mov w8, #22528 // =0x5800
+; CHECK-GI-FP16-NEXT: fmov s1, w8
; CHECK-GI-FP16-NEXT: fmul h0, h0, h1
; CHECK-GI-FP16-NEXT: fcvtzs w0, h0
; CHECK-GI-FP16-NEXT: ret
@@ -1289,8 +1289,8 @@ define i32 @fcvtzs_sat_f16_i32_15(half %dbl) {
;
; CHECK-GI-FP16-LABEL: fcvtzs_sat_f16_i32_15:
; CHECK-GI-FP16: // %bb.0:
-; CHECK-GI-FP16-NEXT: adrp x8, .LCPI56_0
-; CHECK-GI-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI56_0]
+; CHECK-GI-FP16-NEXT: mov w8, #30720 // =0x7800
+; CHECK-GI-FP16-NEXT: fmov s1, w8
; CHECK-GI-FP16-NEXT: fmul h0, h0, h1
; CHECK-GI-FP16-NEXT: fcvtzs w0, h0
; CHECK-GI-FP16-NEXT: ret
@@ -1317,8 +1317,8 @@ define i64 @fcvtzs_sat_f16_i64_7(half %dbl) {
;
; CHECK-GI-FP16-LABEL: fcvtzs_sat_f16_i64_7:
; CHECK-GI-FP16: // %bb.0:
-; CHECK-GI-FP16-NEXT: adrp x8, .LCPI57_0
-; CHECK-GI-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI57_0]
+; CHECK-GI-FP16-NEXT: mov w8, #22528 // =0x5800
+; CHECK-GI-FP16-NEXT: fmov s1, w8
; CHECK-GI-FP16-NEXT: fmul h0, h0, h1
; CHECK-GI-FP16-NEXT: fcvtzs x0, h0
; CHECK-GI-FP16-NEXT: ret
@@ -1345,8 +1345,8 @@ define i64 @fcvtzs_sat_f16_i64_15(half %dbl) {
;
; CHECK-GI-FP16-LABEL: fcvtzs_sat_f16_i64_15:
; CHECK-GI-FP16: // %bb.0:
-; CHECK-GI-FP16-NEXT: adrp x8, .LCPI58_0
-; CHECK-GI-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI58_0]
+; CHECK-GI-FP16-NEXT: mov w8, #30720 // =0x7800
+; CHECK-GI-FP16-NEXT: fmov s1, w8
; CHECK-GI-FP16-NEXT: fmul h0, h0, h1
; CHECK-GI-FP16-NEXT: fcvtzs x0, h0
; CHECK-GI-FP16-NEXT: ret
@@ -1507,8 +1507,8 @@ define i32 @fcvtzu_sat_f16_i32_7(half %dbl) {
;
; CHECK-GI-FP16-LABEL: fcvtzu_sat_f16_i32_7:
; CHECK-GI-FP16: // %bb.0:
-; CHECK-GI-FP16-NEXT: adrp x8, .LCPI66_0
-; CHECK-GI-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI66_0]
+; CHECK-GI-FP16-NEXT: mov w8, #22528 // =0x5800
+; CHECK-GI-FP16-NEXT: fmov s1, w8
; CHECK-GI-FP16-NEXT: fmul h0, h0, h1
; CHECK-GI-FP16-NEXT: fcvtzu w0, h0
; CHECK-GI-FP16-NEXT: ret
@@ -1535,8 +1535,8 @@ define i32 @fcvtzu_sat_f16_i32_15(half %dbl) {
;
; CHECK-GI-FP16-LABEL: fcvtzu_sat_f16_i32_15:
; CHECK-GI-FP16: // %bb.0:
-; CHECK-GI-FP16-NEXT: adrp x8, .LCPI67_0
-; CHECK-GI-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI67_0]
+; CHECK-GI-FP16-NEXT: mov w8, #30720 // =0x7800
+; CHECK-GI-FP16-NEXT: fmov s1, w8
; CHECK-GI-FP16-NEXT: fmul h0, h0, h1
; CHECK-GI-FP16-NEXT: fcvtzu w0, h0
; CHECK-GI-FP16-NEXT: ret
@@ -1563,8 +1563,8 @@ define i64 @fcvtzu_sat_f16_i64_7(half %dbl) {
;
; CHECK-GI-FP16-LABEL: fcvtzu_sat_f16_i64_7:
; CHECK-GI-FP16: // %bb.0:
-; CHECK-GI-FP16-NEXT: adrp x8, .LCPI68_0
-; CHECK-GI-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI68_0]
+; CHECK-GI-FP16-NEXT: mov w8, #22528 // =0x5800
+; CHECK-GI-FP16-NEXT: fmov s1, w8
; CHECK-GI-FP16-NEXT: fmul h0, h0, h1
; CHECK-GI-FP16-NEXT: fcvtzu x0, h0
; CHECK-GI-FP16-NEXT: ret
@@ -1591,8 +1591,8 @@ define i64 @fcvtzu_sat_f16_i64_15(half %dbl) {
;
; CHECK-GI-FP16-LABEL: fcvtzu_sat_f16_i64_15:
; CHECK-GI-FP16: // %bb.0:
-; CHECK-GI-FP16-NEXT: adrp x8, .LCPI69_0
-; CHECK-GI-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI69_0]
+; CHECK-GI-FP16-NEXT: mov w8, #30720 // =0x7800
+; CHECK-GI-FP16-NEXT: fmov s1, w8
; CHECK-GI-FP16-NEXT: fmul h0, h0, h1
; CHECK-GI-FP16-NEXT: fcvtzu x0, h0
; CHECK-GI-FP16-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/fpow.ll b/llvm/test/CodeGen/AArch64/fpow.ll
index 3e9e1da87cd75..005352ab3486b 100644
--- a/llvm/test/CodeGen/AArch64/fpow.ll
+++ b/llvm/test/CodeGen/AArch64/fpow.ll
@@ -47,16 +47,30 @@ entry:
}
define <1 x double> @pow_v1f64(<1 x double> %x) {
-; CHECK-LABEL: pow_v1f64:
-; CHECK: // %bb.0:
-; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: .cfi_offset w30, -16
-; CHECK-NEXT: adrp x8, .LCPI4_0
-; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI4_0]
-; CHECK-NEXT: bl pow
-; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: pow_v1f64:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-SD-NEXT: .cfi_def_cfa_offset 16
+; CHECK-SD-NEXT: .cfi_offset w30, -16
+; CHECK-SD-NEXT: adrp x8, .LCPI4_0
+; CHECK-SD-NEXT: ldr d1, [x8, :lo12:.LCPI4_0]
+; CHECK-SD-NEXT: bl pow
+; CHECK-SD-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: pow_v1f64:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
+; CHECK-GI-NEXT: .cfi_offset w30, -16
+; CHECK-GI-NEXT: mov x8, #34079 // =0x851f
+; CHECK-GI-NEXT: movk x8, #20971, lsl #16
+; CHECK-GI-NEXT: movk x8, #7864, lsl #32
+; CHECK-GI-NEXT: movk x8, #16393, lsl #48
+; CHECK-GI-NEXT: fmov d1, x8
+; CHECK-GI-NEXT: bl pow
+; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-GI-NEXT: ret
%c = call <1 x double> @llvm.pow.v1f64(<1 x double> %x, <1 x double> <double 3.140000e+00>)
ret <1 x double> %c
}
diff --git a/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll b/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll
index 8c8fde7934b89..be2efccd2432a 100644
--- a/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll
+++ b/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll
@@ -2503,8 +2503,7 @@ define <4 x i32> @fcmal4xfloat(<4 x float> %A, <4 x float> %B) {
;
; CHECK-GI-LABEL: fcmal4xfloat:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: mov w8, #1 // =0x1
-; CHECK-GI-NEXT: dup v0.2s, w8
+; CHECK-GI-NEXT: movi v0.2s, #1
; CHECK-GI-NEXT: mov v0.d[1], v0.d[0]
; CHECK-GI-NEXT: shl v0.4s, v0.4s, #31
; CHECK-GI-NEXT: cmlt v0.4s, v0.4s, #0
diff --git a/llvm/test/CodeGen/AArch64/rem-by-const.ll b/llvm/test/CodeGen/AArch64/rem-by-const.ll
index 927d46612f443..b6e06003c7c72 100644
--- a/llvm/test/CodeGen/AArch64/rem-by-const.ll
+++ b/llvm/test/CodeGen/AArch64/rem-by-const.ll
@@ -929,7 +929,7 @@ define <4 x i8> @sv4i8_7(<4 x i8> %d, <4 x i8> %e) {
; CHECK-GI-NEXT: mov v3.b[3], w8
; CHECK-GI-NEXT: uzp1 v1.8b, v2.8b, v0.8b
; CHECK-GI-NEXT: neg v2.8b, v3.8b
-; CHECK-GI-NEXT: dup v3.4h, w9
+; CHECK-GI-NEXT: movi v3.4h, #7
; CHECK-GI-NEXT: sshl v1.8b, v1.8b, v2.8b
; CHECK-GI-NEXT: neg v2.8b, v4.8b
; CHECK-GI-NEXT: ushl v2.8b, v1.8b, v2.8b
@@ -980,10 +980,9 @@ define <4 x i8> @sv4i8_100(<4 x i8> %d, <4 x i8> %e) {
; CHECK-GI-NEXT: mov v3.b[2], w8
; CHECK-GI-NEXT: sshr v1.4h, v1.4h, #8
; CHECK-GI-NEXT: mov v3.b[3], w8
-; CHECK-GI-NEXT: mov w8, #100 // =0x64
; CHECK-GI-NEXT: uzp1 v1.8b, v1.8b, v0.8b
; CHECK-GI-NEXT: neg v2.8b, v3.8b
-; CHECK-GI-NEXT: dup v3.4h, w8
+; CHECK-GI-NEXT: movi v3.4h, #100
; CHECK-GI-NEXT: sshl v1.8b, v1.8b, v2.8b
; CHECK-GI-NEXT: neg v2.8b, v4.8b
; CHECK-GI-NEXT: ushl v2.8b, v1.8b, v2.8b
@@ -1414,12 +1413,11 @@ define <4 x i8> @uv4i8_7(<4 x i8> %d, <4 x i8> %e) {
; CHECK-GI-NEXT: ushl v2.8b, v2.8b, v3.8b
; CHECK-GI-NEXT: ushll v2.8h, v2.8b, #0
; CHECK-GI-NEXT: mov v4.b[3], w8
-; CHECK-GI-NEXT: mov w8, #7 // =0x7
; CHECK-GI-NEXT: usra v2.4h, v1.4h, #8
; CHECK-GI-NEXT: uzp1 v1.8b, v2.8b, v0.8b
; CHECK-GI-NEXT: neg v2.8b, v4.8b
; CHECK-GI-NEXT: ushl v1.8b, v1.8b, v2.8b
-; CHECK-GI-NEXT: dup v2.4h, w8
+; CHECK-GI-NEXT: movi v2.4h, #7
; CHECK-GI-NEXT: ushll v1.8h, v1.8b, #0
; CHECK-GI-NEXT: mls v0.4h, v1.4h, v2.4h
; CHECK-GI-NEXT: ret
@@ -1457,11 +1455,10 @@ define <4 x i8> @uv4i8_100(<4 x i8> %d, <4 x i8> %e) {
; CHECK-GI-NEXT: mov v3.b[2], w8
; CHECK-GI-NEXT: ushr v1.4h, v1.4h, #8
; CHECK-GI-NEXT: mov v3.b[3], w8
-; CHECK-GI-NEXT: mov w8, #100 // =0x64
; CHECK-GI-NEXT: uzp1 v1.8b, v1.8b, v0.8b
; CHECK-GI-NEXT: neg v2.8b, v3.8b
; CHECK-GI-NEXT: ushl v1.8b, v1.8b, v2.8b
-; CHECK-GI-NEXT: dup v2.4h, w8
+; CHECK-GI-NEXT: movi v2.4h, #100
; CHECK-GI-NEXT: ushll v1.8h, v1.8b, #0
; CHECK-GI-NEXT: mls v0.4h, v1.4h, v2.4h
; CHECK-GI-NEXT: ret
@@ -1595,10 +1592,9 @@ define <2 x i16> @sv2i16_7(<2 x i16> %d, <2 x i16> %e) {
; CHECK-GI-NEXT: uzp1 v1.4h, v1.4h, v0.4h
; CHECK-GI-NEXT: mov v3.h[1], w8
; CHECK-GI-NEXT: neg v2.4h, v2.4h
-; CHECK-GI-NEXT: mov w8, #7 // =0x7
; CHECK-GI-NEXT: sshl v1.4h, v1.4h, v2.4h
; CHECK-GI-NEXT: neg v2.4h, v3.4h
-; CHECK-GI-NEXT: dup v3.2s, w8
+; CHECK-GI-NEXT: movi v3.2s, #7
; CHECK-GI-NEXT: ushl v2.4h, v1.4h, v2.4h
; CHECK-GI-NEXT: ushll v1.4s, v1.4h, #0
; CHECK-GI-NEXT: ushll v2.4s, v2.4h, #0
@@ -1644,10 +1640,9 @@ define <2 x i16> @sv2i16_100(<2 x i16> %d, <2 x i16> %e) {
; CHECK-GI-NEXT: uzp1 v1.4h, v1.4h, v0.4h
; CHECK-GI-NEXT: mov v3.h[1], w8
; CHECK-GI-NEXT: neg v2.4h, v2.4h
-; CHECK-GI-NEXT: mov w8, #100 // =0x64
; CHECK-GI-NEXT: sshl v1.4h, v1.4h, v2.4h
; CHECK-GI-NEXT: neg v2.4h, v3.4h
-; CHECK-GI-NEXT: dup v3.2s, w8
+; CHECK-GI-NEXT: movi v3.2s, #100
; CHECK-GI-NEXT: ushl v2.4h, v1.4h, v2.4h
; CHECK-GI-NEXT: ushll v1.4s, v1.4h, #0
; CHECK-GI-NEXT: ushll v2.4s, v2.4h, #0
@@ -1937,12 +1932,11 @@ define <2 x i16> @uv2i16_7(<2 x i16> %d, <2 x i16> %e) {
; CHECK-GI-NEXT: fmov s3, w8
; CHECK-GI-NEXT: ushll v2.4s, v2.4h, #0
; CHECK-GI-NEXT: mov v3.h[1], w8
-; CHECK-GI-NEXT: mov w8, #7 // =0x7
; CHECK-GI-NEXT: usra v2.2s, v1.2s, #16
; CHECK-GI-NEXT: uzp1 v1.4h, v2.4h, v0.4h
; CHECK-GI-NEXT: neg v2.4h, v3.4h
; CHECK-GI-NEXT: ushl v1.4h, v1.4h, v2.4h
-; CHECK-GI-NEXT: dup v2.2s, w8
+; CHECK-GI-NEXT: movi v2.2s, #7
; CHECK-GI-NEXT: ushll v1.4s, v1.4h, #0
; CHECK-GI-NEXT: mls v0.2s, v1.2s, v2.2s
; CHECK-GI-NEXT: ret
@@ -1982,12 +1976,11 @@ define <2 x i16> @uv2i16_100(<2 x i16> %d, <2 x i16> %e) {
; CHECK-GI-NEXT: mul v1.2s, v1.2s, v2.2s
; CHECK-GI-NEXT: fmov s2, w8
; CHECK-GI-NEXT: mov v2.h[1], w8
-; CHECK-GI-NEXT: mov w8, #100 // =0x64
; CHECK-GI-NEXT: ushr v1.2s, v1.2s, #16
; CHECK-GI-NEXT: uzp1 v1.4h, v1.4h, v0.4h
; CHECK-GI-NEXT: neg v2.4h, v2.4h
; CHECK-GI-NEXT: ushl v1.4h, v1.4h, v2.4h
-; CHECK-GI-NEXT: dup v2.2s, w8
+; CHECK-GI-NEXT: movi v2.2s, #100
; CHECK-GI-NEXT: ushll v1.4s, v1.4h, #0
; CHECK-GI-NEXT: mls v0.2s, v1.2s, v2.2s
; CHECK-GI-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/select_const.ll b/llvm/test/CodeGen/AArch64/select_const.ll
index 0a73aed803415..2b2b1c1dd2222 100644
--- a/llvm/test/CodeGen/AArch64/select_const.ll
+++ b/llvm/test/CodeGen/AArch64/select_const.ll
@@ -769,14 +769,16 @@ define double @sel_constants_fadd_constant(i1 %cond) {
;
; CHECK-GI-LABEL: sel_constants_fadd_constant:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: mov x9, #7378697629483820646 // =0x6666666666666666
-; CHECK-GI-NEXT: adrp x8, .LCPI42_0
-; CHECK-GI-NEXT: movk x9, #16444, lsl #48
-; CHECK-GI-NEXT: ldr d0, [x8, :lo12:.LCPI42_0]
-; CHECK-GI-NEXT: and w8, w0, #0x1
+; CHECK-GI-NEXT: mov x9, #-7378697629483820647 // =0x9999999999999999
+; CHECK-GI-NEXT: mov x8, #7378697629483820646 // =0x6666666666666666
+; CHECK-GI-NEXT: and w10, w0, #0x1
+; CHECK-GI-NEXT: movk x9, #39320
+; CHECK-GI-NEXT: movk x8, #16444, lsl #48
+; CHECK-GI-NEXT: tst w10, #0x1
+; CHECK-GI-NEXT: movk x9, #16369, lsl #48
+; CHECK-GI-NEXT: fmov d0, x8
; CHECK-GI-NEXT: fmov d1, x9
-; CHECK-GI-NEXT: tst w8, #0x1
-; CHECK-GI-NEXT: fcsel d0, d0, d1, ne
+; CHECK-GI-NEXT: fcsel d0, d1, d0, ne
; CHECK-GI-NEXT: ret
%sel = select i1 %cond, double -4.0, double 23.3
%bo = fadd double %sel, 5.1
@@ -797,14 +799,16 @@ define double @sel_constants_fsub_constant(i1 %cond) {
;
; CHECK-GI-LABEL: sel_constants_fsub_constant:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: adrp x8, .LCPI43_0
-; CHECK-GI-NEXT: and w9, w0, #0x1
-; CHECK-GI-NEXT: ldr d0, [x8, :lo12:.LCPI43_0]
; CHECK-GI-NEXT: mov x8, #3689348814741910323 // =0x3333333333333333
-; CHECK-GI-NEXT: tst w9, #0x1
-; CHECK-GI-NEXT: movk x8, #49186, lsl #48
+; CHECK-GI-NEXT: mov x9, #3689348814741910323 // =0x3333333333333333
+; CHECK-GI-NEXT: and w10, w0, #0x1
+; CHECK-GI-NEXT: movk x8, #13108
+; CHECK-GI-NEXT: movk x9, #49186, lsl #48
+; CHECK-GI-NEXT: tst w10, #0x1
+; CHECK-GI-NEXT: movk x8, #16434, lsl #48
+; CHECK-GI-NEXT: fmov d0, x9
; CHECK-GI-NEXT: fmov d1, x8
-; CHECK-GI-NEXT: fcsel d0, d1, d0, ne
+; CHECK-GI-NEXT: fcsel d0, d0, d1, ne
; CHECK-GI-NEXT: ret
%sel = select i1 %cond, double -4.0, double 23.3
%bo = fsub double %sel, 5.1
@@ -825,14 +829,16 @@ define double @fsub_constant_sel_constants(i1 %cond) {
;
; CHECK-GI-LABEL: fsub_constant_sel_constants:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: adrp x8, .LCPI44_0
-; CHECK-GI-NEXT: and w9, w0, #0x1
-; CHECK-GI-NEXT: ldr d0, [x8, :lo12:.LCPI44_0]
; CHECK-GI-NEXT: mov x8, #3689348814741910323 // =0x3333333333333333
-; CHECK-GI-NEXT: tst w9, #0x1
-; CHECK-GI-NEXT: movk x8, #16418, lsl #48
+; CHECK-GI-NEXT: mov x9, #3689348814741910323 // =0x3333333333333333
+; CHECK-GI-NEXT: and w10, w0, #0x1
+; CHECK-GI-NEXT: movk x8, #13108
+; CHECK-GI-NEXT: movk x9, #16418, lsl #48
+; CHECK-GI-NEXT: tst w10, #0x1
+; CHECK-GI-NEXT: movk x8, #49202, lsl #48
+; CHECK-GI-NEXT: fmov d0, x9
; CHECK-GI-NEXT: fmov d1, x8
-; CHECK-GI-NEXT: fcsel d0, d1, d0, ne
+; CHECK-GI-NEXT: fcsel d0, d0, d1, ne
; CHECK-GI-NEXT: ret
%sel = select i1 %cond, double -4.0, double 23.3
%bo = fsub double 5.1, %sel
@@ -853,14 +859,17 @@ define double @sel_constants_fmul_constant(i1 %cond) {
;
; CHECK-GI-LABEL: sel_constants_fmul_constant:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: adrp x8, .LCPI45_0
-; CHECK-GI-NEXT: and w9, w0, #0x1
-; CHECK-GI-NEXT: ldr d0, [x8, :lo12:.LCPI45_0]
-; CHECK-GI-NEXT: mov x8, #7378697629483820646 // =0x6666666666666666
-; CHECK-GI-NEXT: tst w9, #0x1
-; CHECK-GI-NEXT: movk x8, #49204, lsl #48
+; CHECK-GI-NEXT: mov x8, #60293 // =0xeb85
+; CHECK-GI-NEXT: mov x9, #7378697629483820646 // =0x6666666666666666
+; CHECK-GI-NEXT: and w10, w0, #0x1
+; CHECK-GI-NEXT: movk x8, #47185, lsl #16
+; CHECK-GI-NEXT: movk x9, #49204, lsl #48
+; CHECK-GI-NEXT: tst w10, #0x1
+; CHECK-GI-NEXT: movk x8, #46366, lsl #32
+; CHECK-GI-NEXT: fmov d0, x9
+; CHECK-GI-NEXT: movk x8, #16477, lsl #48
; CHECK-GI-NEXT: fmov d1, x8
-; CHECK-GI-NEXT: fcsel d0, d1, d0, ne
+; CHECK-GI-NEXT: fcsel d0, d0, d1, ne
; CHECK-GI-NEXT: ret
%sel = select i1 %cond, double -4.0, double 23.3
%bo = fmul double %sel, 5.1
@@ -880,13 +889,18 @@ define double @sel_constants_fdiv_constant(i1 %cond) {
;
; CHECK-GI-LABEL: sel_constants_fdiv_constant:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: adrp x8, .LCPI46_1
-; CHECK-GI-NEXT: adrp x9, .LCPI46_0
-; CHECK-GI-NEXT: ldr d0, [x8, :lo12:.LCPI46_1]
-; CHECK-GI-NEXT: ldr d1, [x9, :lo12:.LCPI46_0]
+; CHECK-GI-NEXT: mov x9, #17991 // =0x4647
+; CHECK-GI-NEXT: mov x10, #6426 // =0x191a
; CHECK-GI-NEXT: and w8, w0, #0x1
+; CHECK-GI-NEXT: movk x9, #17990, lsl #16
+; CHECK-GI-NEXT: movk x10, #6425, lsl #16
; CHECK-GI-NEXT: tst w8, #0x1
-; CHECK-GI-NEXT: fcsel d0, d1, d0, ne
+; CHECK-GI-NEXT: movk x9, #17990, lsl #32
+; CHECK-GI-NEXT: movk x10, #6425, lsl #32
+; CHECK-GI-NEXT: movk x9, #16402, lsl #48
+; CHECK-GI-NEXT: movk x10, #49129, lsl #48
+; CHECK-GI-NEXT: csel x8, x10, x9, ne
+; CHECK-GI-NEXT: fmov d0, x8
; CHECK-GI-NEXT: ret
%sel = select i1 %cond, double -4.0, double 23.3
%bo = fdiv double %sel, 5.1
@@ -907,14 +921,17 @@ define double @fdiv_constant_sel_constants(i1 %cond) {
;
; CHECK-GI-LABEL: fdiv_constant_sel_constants:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: adrp x8, .LCPI47_0
-; CHECK-GI-NEXT: and w9, w0, #0x1
-; CHECK-GI-NEXT: ldr d0, [x8, :lo12:.LCPI47_0]
-; CHECK-GI-NEXT: mov x8, #7378697629483820646 // =0x6666666666666666
-; CHECK-GI-NEXT: tst w9, #0x1
-; CHECK-GI-NEXT: movk x8, #49140, lsl #48
+; CHECK-GI-NEXT: mov x8, #9000 // =0x2328
+; CHECK-GI-NEXT: mov x9, #7378697629483820646 // =0x6666666666666666
+; CHECK-GI-NEXT: and w10, w0, #0x1
+; CHECK-GI-NEXT: movk x8, #5344, lsl #16
+; CHECK-GI-NEXT: movk x9, #49140, lsl #48
+; CHECK-GI-NEXT: tst w10, #0x1
+; CHECK-GI-NEXT: movk x8, #1125, lsl #32
+; CHECK-GI-NEXT: fmov d0, x9
+; CHECK-GI-NEXT: movk x8, #16332, lsl #48
; CHECK-GI-NEXT: fmov d1, x8
-; CHECK-GI-NEXT: fcsel d0, d1, d0, ne
+; CHECK-GI-NEXT: fcsel d0, d0, d1, ne
; CHECK-GI-NEXT: ret
%sel = select i1 %cond, double -4.0, double 23.3
%bo = fdiv double 5.1, %sel
@@ -933,11 +950,13 @@ define double @sel_constants_frem_constant(i1 %cond) {
;
; CHECK-GI-LABEL: sel_constants_frem_constant:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: adrp x8, .LCPI48_0
+; CHECK-GI-NEXT: mov x8, #3689348814741910323 // =0x3333333333333333
; CHECK-GI-NEXT: fmov d0, #-4.00000000
-; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI48_0]
-; CHECK-GI-NEXT: and w8, w0, #0x1
-; CHECK-GI-NEXT: tst w8, #0x1
+; CHECK-GI-NEXT: and w9, w0, #0x1
+; CHECK-GI-NEXT: movk x8, #13112
+; CHECK-GI-NEXT: tst w9, #0x1
+; CHECK-GI-NEXT: movk x8, #16391, lsl #48
+; CHECK-GI-NEXT: fmov d1, x8
; CHECK-GI-NEXT: fcsel d0, d0, d1, ne
; CHECK-GI-NEXT: ret
%sel = select i1 %cond, double -4.0, double 23.3
@@ -959,14 +978,16 @@ define double @frem_constant_sel_constants(i1 %cond) {
;
; CHECK-GI-LABEL: frem_constant_sel_constants:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: adrp x8, .LCPI49_0
-; CHECK-GI-NEXT: and w9, w0, #0x1
-; CHECK-GI-NEXT: ldr d0, [x8, :lo12:.LCPI49_0]
-; CHECK-GI-NEXT: mov x8, #7378697629483820646 // =0x6666666666666666
-; CHECK-GI-NEXT: tst w9, #0x1
-; CHECK-GI-NEXT: movk x8, #16404, lsl #48
+; CHECK-GI-NEXT: mov x8, #-7378697629483820647 // =0x9999999999999999
+; CHECK-GI-NEXT: mov x9, #7378697629483820646 // =0x6666666666666666
+; CHECK-GI-NEXT: and w10, w0, #0x1
+; CHECK-GI-NEXT: movk x8, #39320
+; CHECK-GI-NEXT: movk x9, #16404, lsl #48
+; CHECK-GI-NEXT: tst w10, #0x1
+; CHECK-GI-NEXT: movk x8, #16369, lsl #48
+; CHECK-GI-NEXT: fmov d0, x9
; CHECK-GI-NEXT: fmov d1, x8
-; CHECK-GI-NEXT: fcsel d0, d0, d1, ne
+; CHECK-GI-NEXT: fcsel d0, d1, d0, ne
; CHECK-GI-NEXT: ret
%sel = select i1 %cond, double -4.0, double 23.3
%bo = frem double 5.1, %sel
diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fadd.ll b/llvm/test/CodeGen/AArch64/vecreduce-fadd.ll
index 40925da0557ec..9436d1561675b 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-fadd.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-fadd.ll
@@ -82,10 +82,9 @@ define half @add_v3HalfH(<3 x half> %bin.rdx) {
;
; CHECK-GI-FP16-LABEL: add_v3HalfH:
; CHECK-GI-FP16: // %bb.0:
-; CHECK-GI-FP16-NEXT: adrp x8, .LCPI2_0
; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI2_0]
-; CHECK-GI-FP16-NEXT: mov v0.h[3], v1.h[0]
+; CHECK-GI-FP16-NEXT: mov w8, #32768 // =0x8000
+; CHECK-GI-FP16-NEXT: mov v0.h[3], w8
; CHECK-GI-FP16-NEXT: faddp v0.4h, v0.4h, v0.4h
; CHECK-GI-FP16-NEXT: faddp h0, v0.2h
; CHECK-GI-FP16-NEXT: ret
>From b005ea66082a86588379d78593241f17103f0b8d Mon Sep 17 00:00:00 2001
From: Ryan Cowan <ryan.cowan at arm.com>
Date: Thu, 29 Jan 2026 11:29:12 +0000
Subject: [PATCH 02/10] Create 32 bit g_constants only & add GISel equivalents
for bitcast_fpimm_to_{i32,i64}
---
llvm/lib/Target/AArch64/AArch64InstrInfo.td | 5 +
.../GISel/AArch64InstructionSelector.cpp | 169 ++++++++++--------
.../AArch64/GISel/AArch64RegisterBankInfo.cpp | 10 +-
.../GlobalISel/preselect-process-phis.mir | 2 +-
.../AArch64/GlobalISel/select-constant.mir | 2 +-
.../CodeGen/AArch64/GlobalISel/select-dup.mir | 2 +-
6 files changed, 111 insertions(+), 79 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index bd67f97bc0e03..4b3317448f2cc 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -2683,6 +2683,11 @@ return CurDAG->getTargetConstant(
N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i64);
}]>;
+def gi_bitcast_fpimm_to_i32 : GICustomOperandRenderer<"renderBitcastFPImm">,
+ GISDNodeXFormEquiv<bitcast_fpimm_to_i32>;
+def gi_bitcast_fpimm_to_i64 : GICustomOperandRenderer<"renderBitcastFPImm">,
+ GISDNodeXFormEquiv<bitcast_fpimm_to_i64>;
+
def : Pat<(f32 fpimm:$in),
(COPY_TO_REGCLASS (MOVi32imm (bitcast_fpimm_to_i32 f32:$in)), FPR32)>;
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index bae425f1f2ee7..2d3cae9b85cde 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -496,6 +496,8 @@ class AArch64InstructionSelector : public InstructionSelector {
int OpIdx = -1) const;
void renderFPImm64(MachineInstrBuilder &MIB, const MachineInstr &MI,
int OpIdx = -1) const;
+ void renderBitcastFPImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
+ int OpIdx = -1) const;
void renderFPImm32SIMDModImmType4(MachineInstrBuilder &MIB,
const MachineInstr &MI,
int OpIdx = -1) const;
@@ -2678,56 +2680,58 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
case TargetOpcode::G_CONSTANT: {
const bool isFP = Opcode == TargetOpcode::G_FCONSTANT;
- const LLT s8 = LLT::scalar(8);
- const LLT s16 = LLT::scalar(16);
- const LLT s32 = LLT::scalar(32);
- const LLT s64 = LLT::scalar(64);
- const LLT s128 = LLT::scalar(128);
- const LLT p0 = LLT::pointer(0, 64);
+ // const LLT s8 = LLT::scalar(8);
+ // const LLT s16 = LLT::scalar(16);
+ // const LLT s32 = LLT::scalar(32);
+ // const LLT s64 = LLT::scalar(64);
+ // const LLT s128 = LLT::scalar(128);
+ // const LLT p0 = LLT::pointer(0, 64);
const Register DefReg = I.getOperand(0).getReg();
const LLT DefTy = MRI.getType(DefReg);
const unsigned DefSize = DefTy.getSizeInBits();
const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
- // FIXME: Redundant check, but even less readable when factored out.
- if (isFP) {
- if (Ty != s16 && Ty != s32 && Ty != s64 && Ty != s128) {
- LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
- << " constant, expected: " << s16 << " or " << s32
- << " or " << s64 << " or " << s128 << '\n');
- return false;
- }
-
- if (RB.getID() != AArch64::FPRRegBankID) {
- LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
- << " constant on bank: " << RB
- << ", expected: FPR\n");
- return false;
- }
-
- // The case when we have 0.0 is covered by tablegen. Reject it here so we
- // can be sure tablegen works correctly and isn't rescued by this code.
- // 0.0 is not covered by tablegen for FP128. So we will handle this
- // scenario in the code here.
- if (DefSize != 128 && I.getOperand(1).getFPImm()->isExactlyValue(0.0))
- return false;
- } else {
- // s32 and s64 are covered by tablegen.
- if (Ty != p0 && Ty != s8 && Ty != s16) {
- LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
- << " constant, expected: " << s32 << ", " << s64
- << ", or " << p0 << '\n');
- return false;
- }
-
- if (RB.getID() != AArch64::GPRRegBankID) {
- LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
- << " constant on bank: " << RB
- << ", expected: GPR\n");
- return false;
- }
- }
+ // // FIXME: Redundant check, but even less readable when factored out.
+ // if (isFP) {
+ // if (Ty != s16 && Ty != s32 && Ty != s64 && Ty != s128) {
+ // LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
+ // << " constant, expected: " << s16 << " or " <<
+ // s32
+ // << " or " << s64 << " or " << s128 << '\n');
+ // return false;
+ // }
+
+ // if (RB.getID() != AArch64::FPRRegBankID) {
+ // LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
+ // << " constant on bank: " << RB
+ // << ", expected: FPR\n");
+ // return false;
+ // }
+
+ // The case when we have 0.0 is covered by tablegen. Reject it here so we
+ // can be sure tablegen works correctly and isn't rescued by this code.
+ // 0.0 is not covered by tablegen for FP128. So we will handle this
+ // scenario in the code here.
+ // if (isFP && DefSize != 128 &&
+ // I.getOperand(1).getFPImm()->isExactlyValue(0.0))
+ // return false;
+ // } else {
+ // // s32 and s64 are covered by tablegen.
+ // if (Ty != p0 && Ty != s8 && Ty != s16) {
+ // LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
+ // << " constant, expected: " << s32 << ", " << s64
+ // << ", or " << p0 << '\n');
+ // return false;
+ // }
+
+ // if (RB.getID() != AArch64::GPRRegBankID) {
+ // LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
+ // << " constant on bank: " << RB
+ // << ", expected: GPR\n");
+ // return false;
+ // }
+ // }
if (isFP) {
const TargetRegisterClass &FPRRC = *getRegClassForTypeOnBank(DefTy, RB);
@@ -2759,39 +2763,44 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
return RBI.constrainGenericRegister(DefReg, FPRRC, MRI);
}
}
+ }
- assert((DefSize == 32 || DefSize == 64) && "Unexpected const def size");
- // Either emit a FMOV, or emit a copy to emit a normal mov.
- const Register DefGPRReg = MRI.createVirtualRegister(
- DefSize == 32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
- MachineOperand &RegOp = I.getOperand(0);
- RegOp.setReg(DefGPRReg);
- MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
- MIB.buildCopy({DefReg}, {DefGPRReg});
-
- if (!RBI.constrainGenericRegister(DefReg, FPRRC, MRI)) {
- LLVM_DEBUG(dbgs() << "Failed to constrain G_FCONSTANT def operand\n");
- return false;
- }
-
- MachineOperand &ImmOp = I.getOperand(1);
- // FIXME: Is going through int64_t always correct?
- ImmOp.ChangeToImmediate(
- ImmOp.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue());
- } else if (I.getOperand(1).isCImm()) {
- uint64_t Val = I.getOperand(1).getCImm()->getZExtValue();
- I.getOperand(1).ChangeToImmediate(Val);
- } else if (I.getOperand(1).isImm()) {
- uint64_t Val = I.getOperand(1).getImm();
- I.getOperand(1).ChangeToImmediate(Val);
- }
-
- const unsigned MovOpc =
- DefSize == 64 ? AArch64::MOVi64imm : AArch64::MOVi32imm;
- I.setDesc(TII.get(MovOpc));
- constrainSelectedInstRegOperands(I, TII, TRI, RBI);
- return true;
+ return false;
}
+
+ // assert((DefSize == 32 || DefSize == 64) && "Unexpected const def
+ // size");
+ // // Either emit a FMOV, or emit a copy to emit a normal mov.
+ // const Register DefGPRReg = MRI.createVirtualRegister(
+ // DefSize == 32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
+ // MachineOperand &RegOp = I.getOperand(0);
+ // RegOp.setReg(DefGPRReg);
+ // MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
+ // MIB.buildCopy({DefReg}, {DefGPRReg});
+
+ // if (!RBI.constrainGenericRegister(DefReg, FPRRC, MRI)) {
+ // LLVM_DEBUG(dbgs() << "Failed to constrain G_FCONSTANT def
+ // operand\n"); return false;
+ // }
+
+ // MachineOperand &ImmOp = I.getOperand(1);
+ // // FIXME: Is going through int64_t always correct?
+ // ImmOp.ChangeToImmediate(
+ // ImmOp.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue());
+ // } else if (I.getOperand(1).isCImm()) {
+ // uint64_t Val = I.getOperand(1).getCImm()->getZExtValue();
+ // I.getOperand(1).ChangeToImmediate(Val);
+ // } else if (I.getOperand(1).isImm()) {
+ // uint64_t Val = I.getOperand(1).getImm();
+ // I.getOperand(1).ChangeToImmediate(Val);
+ // }
+
+ // const unsigned MovOpc =
+ // DefSize == 64 ? AArch64::MOVi64imm : AArch64::MOVi32imm;
+ // I.setDesc(TII.get(MovOpc));
+ // constrainSelectedInstRegOperands(I, TII, TRI, RBI);
+ // return true;
+ // }
case TargetOpcode::G_EXTRACT: {
Register DstReg = I.getOperand(0).getReg();
Register SrcReg = I.getOperand(1).getReg();
@@ -7966,6 +7975,16 @@ void AArch64InstructionSelector::renderFPImm64(MachineInstrBuilder &MIB,
AArch64_AM::getFP64Imm(MI.getOperand(1).getFPImm()->getValueAPF()));
}
+void AArch64InstructionSelector::renderBitcastFPImm(MachineInstrBuilder &MIB,
+ const MachineInstr &MI,
+ int OpIdx) const {
+ assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
+ "Expected G_FCONSTANT");
+ const APInt Bits =
+ MI.getOperand(1).getFPImm()->getValueAPF().bitcastToAPInt();
+ MIB.addImm(Bits.getZExtValue());
+}
+
void AArch64InstructionSelector::renderFPImm32SIMDModImmType4(
MachineInstrBuilder &MIB, const MachineInstr &MI, int OpIdx) const {
assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
index ceabe776f768b..6cb645bc0a81b 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
@@ -416,8 +416,16 @@ void AArch64RegisterBankInfo::applyMappingImpl(
Register Dst = MI.getOperand(0).getReg();
if (MRI.getRegBank(Dst) == &AArch64::GPRRegBank) {
const APFloat &Imm = MI.getOperand(1).getFPImm()->getValueAPF();
+ APInt Bits = Imm.bitcastToAPInt();
Builder.setInsertPt(*MI.getParent(), MI.getIterator());
- Builder.buildConstant(Dst, Imm.bitcastToAPInt());
+ if (Bits.getBitWidth() < 32) {
+ Register ExtReg = MRI.createGenericVirtualRegister(LLT::scalar(32));
+ Builder.buildConstant(ExtReg, Bits.zext(32));
+ Builder.buildTrunc(Dst, ExtReg);
+ MRI.setRegBank(ExtReg, AArch64::GPRRegBank);
+ } else {
+ Builder.buildConstant(Dst, Bits);
+ }
MI.eraseFromParent();
return;
}
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/preselect-process-phis.mir b/llvm/test/CodeGen/AArch64/GlobalISel/preselect-process-phis.mir
index 7620c729d580e..dff78cb695369 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/preselect-process-phis.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/preselect-process-phis.mir
@@ -1,5 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -verify-machineinstrs -mtriple aarch64--- -run-pass=instruction-select -global-isel-abort=1 %s -o - | FileCheck %s
+# RUN: llc -verify-machineinstrs -mtriple aarch64--- --run-pass=regbankselect --run-pass=instruction-select -global-isel-abort=1 %s -o - | FileCheck %s
---
name: test_loop_phi_fpr_to_gpr
alignment: 4
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-constant.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-constant.mir
index 72e691bf520ea..a1a5b525c4538 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/select-constant.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-constant.mir
@@ -1,5 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=aarch64-- -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
+# RUN: llc -mtriple=aarch64-- -run-pass=regbankselect -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
--- |
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-dup.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-dup.mir
index cf2bab78fe5a6..60a9c6e0c1490 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/select-dup.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-dup.mir
@@ -1,5 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=aarch64 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
+# RUN: llc -mtriple=aarch64 -run-pass=regbankselect -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
#
# GPR variants should not use INSERT_SUBREG. FPR variants (DUP<ty>lane) should.
>From 1d9a539581d48f8220e6a972c53288a000e27699 Mon Sep 17 00:00:00 2001
From: Ryan Cowan <ryan.cowan at arm.com>
Date: Thu, 29 Jan 2026 14:16:06 +0000
Subject: [PATCH 03/10] Add handling for pointers
---
.../GISel/AArch64InstructionSelector.cpp | 109 ++++--------------
.../GlobalISel/preselect-process-phis.mir | 10 +-
.../CodeGen/AArch64/GlobalISel/select-dup.mir | 2 +-
.../GlobalISel/select-fp16-fconstant.mir | 6 +-
.../CodeGen/AArch64/GlobalISel/select-imm.mir | 33 +++---
.../test/CodeGen/AArch64/arm64-fp-imm-size.ll | 16 +--
6 files changed, 59 insertions(+), 117 deletions(-)
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index 2d3cae9b85cde..976645afe6b00 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -2680,59 +2680,11 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
case TargetOpcode::G_CONSTANT: {
const bool isFP = Opcode == TargetOpcode::G_FCONSTANT;
- // const LLT s8 = LLT::scalar(8);
- // const LLT s16 = LLT::scalar(16);
- // const LLT s32 = LLT::scalar(32);
- // const LLT s64 = LLT::scalar(64);
- // const LLT s128 = LLT::scalar(128);
- // const LLT p0 = LLT::pointer(0, 64);
-
const Register DefReg = I.getOperand(0).getReg();
const LLT DefTy = MRI.getType(DefReg);
const unsigned DefSize = DefTy.getSizeInBits();
const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
- // // FIXME: Redundant check, but even less readable when factored out.
- // if (isFP) {
- // if (Ty != s16 && Ty != s32 && Ty != s64 && Ty != s128) {
- // LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
- // << " constant, expected: " << s16 << " or " <<
- // s32
- // << " or " << s64 << " or " << s128 << '\n');
- // return false;
- // }
-
- // if (RB.getID() != AArch64::FPRRegBankID) {
- // LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
- // << " constant on bank: " << RB
- // << ", expected: FPR\n");
- // return false;
- // }
-
- // The case when we have 0.0 is covered by tablegen. Reject it here so we
- // can be sure tablegen works correctly and isn't rescued by this code.
- // 0.0 is not covered by tablegen for FP128. So we will handle this
- // scenario in the code here.
- // if (isFP && DefSize != 128 &&
- // I.getOperand(1).getFPImm()->isExactlyValue(0.0))
- // return false;
- // } else {
- // // s32 and s64 are covered by tablegen.
- // if (Ty != p0 && Ty != s8 && Ty != s16) {
- // LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
- // << " constant, expected: " << s32 << ", " << s64
- // << ", or " << p0 << '\n');
- // return false;
- // }
-
- // if (RB.getID() != AArch64::GPRRegBankID) {
- // LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
- // << " constant on bank: " << RB
- // << ", expected: GPR\n");
- // return false;
- // }
- // }
-
if (isFP) {
const TargetRegisterClass &FPRRC = *getRegClassForTypeOnBank(DefTy, RB);
// For 16, 64, and 128b values, emit a constant pool load.
@@ -2763,44 +2715,33 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
return RBI.constrainGenericRegister(DefReg, FPRRC, MRI);
}
}
- }
+ }
- return false;
- }
+ if (DefTy.isPointer()) {
+ if (DefSize != 64)
+ return false;
- // assert((DefSize == 32 || DefSize == 64) && "Unexpected const def
- // size");
- // // Either emit a FMOV, or emit a copy to emit a normal mov.
- // const Register DefGPRReg = MRI.createVirtualRegister(
- // DefSize == 32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
- // MachineOperand &RegOp = I.getOperand(0);
- // RegOp.setReg(DefGPRReg);
- // MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
- // MIB.buildCopy({DefReg}, {DefGPRReg});
-
- // if (!RBI.constrainGenericRegister(DefReg, FPRRC, MRI)) {
- // LLVM_DEBUG(dbgs() << "Failed to constrain G_FCONSTANT def
- // operand\n"); return false;
- // }
-
- // MachineOperand &ImmOp = I.getOperand(1);
- // // FIXME: Is going through int64_t always correct?
- // ImmOp.ChangeToImmediate(
- // ImmOp.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue());
- // } else if (I.getOperand(1).isCImm()) {
- // uint64_t Val = I.getOperand(1).getCImm()->getZExtValue();
- // I.getOperand(1).ChangeToImmediate(Val);
- // } else if (I.getOperand(1).isImm()) {
- // uint64_t Val = I.getOperand(1).getImm();
- // I.getOperand(1).ChangeToImmediate(Val);
- // }
-
- // const unsigned MovOpc =
- // DefSize == 64 ? AArch64::MOVi64imm : AArch64::MOVi32imm;
- // I.setDesc(TII.get(MovOpc));
- // constrainSelectedInstRegOperands(I, TII, TRI, RBI);
- // return true;
- // }
+ uint64_t Val = 0;
+ if (I.getOperand(1).isCImm())
+ Val = I.getOperand(1).getCImm()->getZExtValue();
+ else if (I.getOperand(1).isImm())
+ Val = I.getOperand(1).getImm();
+ else
+ return false;
+
+ Register TmpReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
+ auto Mov = MIB.buildInstr(AArch64::MOVi64imm, {TmpReg}, {}).addImm(Val);
+ MIB.buildCopy({DefReg}, {TmpReg});
+ I.eraseFromParent();
+
+ const TargetRegisterClass &RC = *getRegClassForTypeOnBank(DefTy, RB);
+ if (!RBI.constrainGenericRegister(DefReg, RC, MRI))
+ return false;
+ return constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI);
+ }
+
+ return false;
+ }
case TargetOpcode::G_EXTRACT: {
Register DstReg = I.getOperand(0).getReg();
Register SrcReg = I.getOperand(1).getReg();
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/preselect-process-phis.mir b/llvm/test/CodeGen/AArch64/GlobalISel/preselect-process-phis.mir
index dff78cb695369..5feaa5a9ed63a 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/preselect-process-phis.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/preselect-process-phis.mir
@@ -30,7 +30,7 @@ body: |
; CHECK-NEXT: bb.2:
; CHECK-NEXT: successors: %bb.2(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[PHI:%[0-9]+]]:gpr32 = PHI [[CSELWr]], %bb.1, %8, %bb.2
+ ; CHECK-NEXT: [[PHI:%[0-9]+]]:gpr32 = PHI [[CSELWr]], %bb.1, %9, %bb.2
; CHECK-NEXT: [[FCVTHSr:%[0-9]+]]:fpr16 = nofpexcept FCVTHSr [[COPY]], implicit $fpcr
; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:fpr32 = SUBREG_TO_REG [[FCVTHSr]], %subreg.hsub
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32all = COPY [[SUBREG_TO_REG]]
@@ -154,12 +154,14 @@ body: |
; CHECK-NEXT: bb.3:
; CHECK-NEXT: successors: %bb.4(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: %fpr:fpr16 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:gpr32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:fpr32 = COPY [[DEF]]
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:fpr16 = COPY [[COPY2]].hsub
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.4:
; CHECK-NEXT: successors: %bb.5(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: %fp_phi:fpr16 = PHI %fpr, %bb.3, [[COPY1]], %bb.2
+ ; CHECK-NEXT: %fp_phi:fpr16 = PHI [[COPY3]], %bb.3, [[COPY1]], %bb.2
; CHECK-NEXT: %gp_phi1:gpr32 = PHI %gpr_1, %bb.3, %gpr_2, %bb.2
; CHECK-NEXT: %gp_phi2:gpr32 = PHI %gpr_1, %bb.3, %gpr_2, %bb.2
; CHECK-NEXT: %gp_phi3:gpr32 = PHI %gpr_1, %bb.3, %gpr_2, %bb.2
@@ -167,7 +169,7 @@ body: |
; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr32all = COPY [[SUBREG_TO_REG]]
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.5:
- ; CHECK-NEXT: %use_fp_phi:gpr32 = PHI %gpr_1, %bb.0, [[COPY2]], %bb.4
+ ; CHECK-NEXT: %use_fp_phi:gpr32 = PHI %gpr_1, %bb.0, [[COPY4]], %bb.4
; CHECK-NEXT: %use_gp_phi1:gpr32 = PHI %gpr_1, %bb.0, %gp_phi1, %bb.4
; CHECK-NEXT: %use_gp_phi2:gpr32 = PHI %gpr_1, %bb.0, %gp_phi2, %bb.4
; CHECK-NEXT: %use_gp_phi3:gpr32 = PHI %gpr_1, %bb.0, %gp_phi3, %bb.4
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-dup.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-dup.mir
index 60a9c6e0c1490..f6822e3dde31a 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/select-dup.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-dup.mir
@@ -444,7 +444,7 @@ body: |
; CHECK-LABEL: name: cst_v2p0
; CHECK: liveins: $w0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: %cst:gpr64 = MOVi64imm 3
+ ; CHECK-NEXT: [[MOVi64imm:%[0-9]+]]:gpr64 = MOVi64imm 3
; CHECK-NEXT: [[ADRP:%[0-9]+]]:gpr64common = ADRP target-flags(aarch64-page) %const.0
; CHECK-NEXT: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) %const.0 :: (load (s128) from constant-pool)
; CHECK-NEXT: $q0 = COPY [[LDRQui]]
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-fp16-fconstant.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-fp16-fconstant.mir
index 5b6726d6e5bf3..0966ea4c33b6a 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/select-fp16-fconstant.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-fp16-fconstant.mir
@@ -39,9 +39,9 @@ tracksRegLiveness: true
body: |
bb.0:
; CHECK-LABEL: name: constant_pool_load
- ; CHECK: [[ADRP:%[0-9]+]]:gpr64common = ADRP target-flags(aarch64-page) %const.0
- ; CHECK-NEXT: [[LDRHui:%[0-9]+]]:fpr16 = LDRHui [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) %const.0 :: (load (s16) from constant-pool)
- ; CHECK-NEXT: $h0 = COPY [[LDRHui]]
+ ; CHECK: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 11
+ ; CHECK-NEXT: [[FMOVWHr:%[0-9]+]]:fpr16 = FMOVWHr [[MOVi32imm]]
+ ; CHECK-NEXT: $h0 = COPY [[FMOVWHr]]
; CHECK-NEXT: RET_ReallyLR implicit $h0
%0:fpr(s16) = G_FCONSTANT half 0xH000B
$h0 = COPY %0(s16)
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-imm.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-imm.mir
index ce7dc4feb187c..e5226eb930021 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/select-imm.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-imm.mir
@@ -90,9 +90,9 @@ body: |
; CHECK: liveins: $d0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
- ; CHECK-NEXT: [[ADRP:%[0-9]+]]:gpr64common = ADRP target-flags(aarch64-page) %const.0
- ; CHECK-NEXT: [[LDRDui:%[0-9]+]]:fpr64 = LDRDui [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) %const.0 :: (load (s64) from constant-pool)
- ; CHECK-NEXT: [[FADDDrr:%[0-9]+]]:fpr64 = nofpexcept FADDDrr [[COPY]], [[LDRDui]], implicit $fpcr
+ ; CHECK-NEXT: [[MOVi64imm:%[0-9]+]]:gpr64 = MOVi64imm 4607173411600762667
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr64 = COPY [[MOVi64imm]]
+ ; CHECK-NEXT: [[FADDDrr:%[0-9]+]]:fpr64 = nofpexcept FADDDrr [[COPY]], [[COPY1]], implicit $fpcr
; CHECK-NEXT: $d0 = COPY [[FADDDrr]]
; CHECK-NEXT: RET_ReallyLR implicit $d0
;
@@ -100,8 +100,9 @@ body: |
; CHECK-TINY: liveins: $d0
; CHECK-TINY-NEXT: {{ $}}
; CHECK-TINY-NEXT: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
- ; CHECK-TINY-NEXT: [[LDRDl:%[0-9]+]]:fpr64 = LDRDl %const.0 :: (load (s64) from constant-pool)
- ; CHECK-TINY-NEXT: [[FADDDrr:%[0-9]+]]:fpr64 = nofpexcept FADDDrr [[COPY]], [[LDRDl]], implicit $fpcr
+ ; CHECK-TINY-NEXT: [[MOVi64imm:%[0-9]+]]:gpr64 = MOVi64imm 4607173411600762667
+ ; CHECK-TINY-NEXT: [[COPY1:%[0-9]+]]:fpr64 = COPY [[MOVi64imm]]
+ ; CHECK-TINY-NEXT: [[FADDDrr:%[0-9]+]]:fpr64 = nofpexcept FADDDrr [[COPY]], [[COPY1]], implicit $fpcr
; CHECK-TINY-NEXT: $d0 = COPY [[FADDDrr]]
; CHECK-TINY-NEXT: RET_ReallyLR implicit $d0
%0:fpr(s64) = COPY $d0
@@ -126,9 +127,9 @@ body: |
; CHECK: liveins: $s0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr32 = COPY $s0
- ; CHECK-NEXT: [[ADRP:%[0-9]+]]:gpr64common = ADRP target-flags(aarch64-page) %const.0
- ; CHECK-NEXT: [[LDRSui:%[0-9]+]]:fpr32 = LDRSui [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) %const.0 :: (load (s32) from constant-pool)
- ; CHECK-NEXT: [[FADDSrr:%[0-9]+]]:fpr32 = nofpexcept FADDSrr [[COPY]], [[LDRSui]], implicit $fpcr
+ ; CHECK-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 1054421999
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr32 = COPY [[MOVi32imm]]
+ ; CHECK-NEXT: [[FADDSrr:%[0-9]+]]:fpr32 = nofpexcept FADDSrr [[COPY]], [[COPY1]], implicit $fpcr
; CHECK-NEXT: $s0 = COPY [[FADDSrr]]
; CHECK-NEXT: RET_ReallyLR implicit $s0
;
@@ -136,8 +137,9 @@ body: |
; CHECK-TINY: liveins: $s0
; CHECK-TINY-NEXT: {{ $}}
; CHECK-TINY-NEXT: [[COPY:%[0-9]+]]:fpr32 = COPY $s0
- ; CHECK-TINY-NEXT: [[LDRSl:%[0-9]+]]:fpr32 = LDRSl %const.0 :: (load (s32) from constant-pool)
- ; CHECK-TINY-NEXT: [[FADDSrr:%[0-9]+]]:fpr32 = nofpexcept FADDSrr [[COPY]], [[LDRSl]], implicit $fpcr
+ ; CHECK-TINY-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 1054421999
+ ; CHECK-TINY-NEXT: [[COPY1:%[0-9]+]]:fpr32 = COPY [[MOVi32imm]]
+ ; CHECK-TINY-NEXT: [[FADDSrr:%[0-9]+]]:fpr32 = nofpexcept FADDSrr [[COPY]], [[COPY1]], implicit $fpcr
; CHECK-TINY-NEXT: $s0 = COPY [[FADDSrr]]
; CHECK-TINY-NEXT: RET_ReallyLR implicit $s0
%0:fpr(s32) = COPY $s0
@@ -162,9 +164,9 @@ body: |
; CHECK: liveins: $s0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr32 = COPY $s0
- ; CHECK-NEXT: [[ADRP:%[0-9]+]]:gpr64common = ADRP target-flags(aarch64-page) %const.0
- ; CHECK-NEXT: [[LDRSui:%[0-9]+]]:fpr32 = LDRSui [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) %const.0 :: (load (s32) from constant-pool)
- ; CHECK-NEXT: [[FADDSrr:%[0-9]+]]:fpr32 = nofpexcept FADDSrr [[COPY]], [[LDRSui]], implicit $fpcr
+ ; CHECK-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 1054421999
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr32 = COPY [[MOVi32imm]]
+ ; CHECK-NEXT: [[FADDSrr:%[0-9]+]]:fpr32 = nofpexcept FADDSrr [[COPY]], [[COPY1]], implicit $fpcr
; CHECK-NEXT: $s0 = COPY [[FADDSrr]]
; CHECK-NEXT: RET_ReallyLR implicit $s0
;
@@ -172,8 +174,9 @@ body: |
; CHECK-TINY: liveins: $s0
; CHECK-TINY-NEXT: {{ $}}
; CHECK-TINY-NEXT: [[COPY:%[0-9]+]]:fpr32 = COPY $s0
- ; CHECK-TINY-NEXT: [[LDRSl:%[0-9]+]]:fpr32 = LDRSl %const.0 :: (load (s32) from constant-pool)
- ; CHECK-TINY-NEXT: [[FADDSrr:%[0-9]+]]:fpr32 = nofpexcept FADDSrr [[COPY]], [[LDRSl]], implicit $fpcr
+ ; CHECK-TINY-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 1054421999
+ ; CHECK-TINY-NEXT: [[COPY1:%[0-9]+]]:fpr32 = COPY [[MOVi32imm]]
+ ; CHECK-TINY-NEXT: [[FADDSrr:%[0-9]+]]:fpr32 = nofpexcept FADDSrr [[COPY]], [[COPY1]], implicit $fpcr
; CHECK-TINY-NEXT: $s0 = COPY [[FADDSrr]]
; CHECK-TINY-NEXT: RET_ReallyLR implicit $s0
%0:fpr(s32) = COPY $s0
diff --git a/llvm/test/CodeGen/AArch64/arm64-fp-imm-size.ll b/llvm/test/CodeGen/AArch64/arm64-fp-imm-size.ll
index db219c926c0f2..78c45c9758e7d 100644
--- a/llvm/test/CodeGen/AArch64/arm64-fp-imm-size.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-fp-imm-size.ll
@@ -94,12 +94,10 @@ define double @foo2_pgso() !prof !14 {
;
; CHECK-GI-LABEL: foo2_pgso:
; CHECK-GI: ; %bb.0:
-; CHECK-GI-NEXT: Lloh2:
-; CHECK-GI-NEXT: adrp x8, lCPI4_0 at PAGE
-; CHECK-GI-NEXT: Lloh3:
-; CHECK-GI-NEXT: ldr d0, [x8, lCPI4_0 at PAGEOFF]
+; CHECK-GI-NEXT: mov x8, #137438887936 ; =0x1fffff0000
+; CHECK-GI-NEXT: movk x8, #65489
+; CHECK-GI-NEXT: fmov d0, x8
; CHECK-GI-NEXT: ret
-; CHECK-GI-NEXT: .loh AdrpLdr Lloh2, Lloh3
ret double 0x1FFFFFFFd1
}
@@ -115,12 +113,10 @@ define float @bar_pgso() !prof !14 {
;
; CHECK-GI-LABEL: bar_pgso:
; CHECK-GI: ; %bb.0:
-; CHECK-GI-NEXT: Lloh4:
-; CHECK-GI-NEXT: adrp x8, lCPI5_0 at PAGE
-; CHECK-GI-NEXT: Lloh5:
-; CHECK-GI-NEXT: ldr s0, [x8, lCPI5_0 at PAGEOFF]
+; CHECK-GI-NEXT: mov w8, #4060 ; =0xfdc
+; CHECK-GI-NEXT: movk w8, #16457, lsl #16
+; CHECK-GI-NEXT: fmov s0, w8
; CHECK-GI-NEXT: ret
-; CHECK-GI-NEXT: .loh AdrpLdr Lloh4, Lloh5
ret float 0x400921FB80000000
}
>From cf80dc372965675e464864b44d6152e48c455529 Mon Sep 17 00:00:00 2001
From: Ryan Cowan <ryan.cowan at arm.com>
Date: Fri, 30 Jan 2026 11:21:44 +0000
Subject: [PATCH 04/10] Convert pointers to scalars of the correct size
---
.../GISel/AArch64InstructionSelector.cpp | 35 +++++++------------
.../AArch64/GlobalISel/inttoptr_add.ll | 6 ++--
.../CodeGen/AArch64/GlobalISel/select-dup.mir | 4 ++-
.../CodeGen/AArch64/neon-extadd-extract.ll | 2 +-
4 files changed, 19 insertions(+), 28 deletions(-)
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index 976645afe6b00..62caae40c11d6 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -2127,6 +2127,18 @@ bool AArch64InstructionSelector::preISelLower(MachineInstr &I) {
MachineRegisterInfo &MRI = MF.getRegInfo();
switch (I.getOpcode()) {
+ case TargetOpcode::G_CONSTANT: {
+ Register DefReg = I.getOperand(0).getReg();
+ const LLT DefTy = MRI.getType(DefReg);
+ if (!DefTy.isPointer())
+ return false;
+ const unsigned PtrSize = DefTy.getSizeInBits();
+ if (PtrSize != 32 && PtrSize != 64)
+ return false;
+ // Convert pointer typed constants to integers so TableGen can select.
+ MRI.setType(DefReg, LLT::scalar(PtrSize));
+ return true;
+ }
case TargetOpcode::G_STORE: {
bool Changed = contractCrossBankCopyIntoStore(I, MRI);
MachineOperand &SrcOp = I.getOperand(0);
@@ -2717,29 +2729,6 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
}
}
- if (DefTy.isPointer()) {
- if (DefSize != 64)
- return false;
-
- uint64_t Val = 0;
- if (I.getOperand(1).isCImm())
- Val = I.getOperand(1).getCImm()->getZExtValue();
- else if (I.getOperand(1).isImm())
- Val = I.getOperand(1).getImm();
- else
- return false;
-
- Register TmpReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
- auto Mov = MIB.buildInstr(AArch64::MOVi64imm, {TmpReg}, {}).addImm(Val);
- MIB.buildCopy({DefReg}, {TmpReg});
- I.eraseFromParent();
-
- const TargetRegisterClass &RC = *getRegClassForTypeOnBank(DefTy, RB);
- if (!RBI.constrainGenericRegister(DefReg, RC, MRI))
- return false;
- return constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI);
- }
-
return false;
}
case TargetOpcode::G_EXTRACT: {
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/inttoptr_add.ll b/llvm/test/CodeGen/AArch64/GlobalISel/inttoptr_add.ll
index 38b9558f426f2..21402fade53dd 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/inttoptr_add.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/inttoptr_add.ll
@@ -4,9 +4,9 @@
define dso_local void @fn() {
; CHECK-LABEL: fn:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: mov x8, #4132
-; CHECK-NEXT: mov w9, #1
-; CHECK-NEXT: movk x8, #65489, lsl #16
+; CHECK-NEXT: mov w8, #4132 // =0x1024
+; CHECK-NEXT: mov w9, #1 // =0x1
+; CHECK-NEXT: movk w8, #65489, lsl #16
; CHECK-NEXT: str w9, [x8]
; CHECK-NEXT: ret
entry:
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-dup.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-dup.mir
index f6822e3dde31a..c5bea027403a3 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/select-dup.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-dup.mir
@@ -444,7 +444,9 @@ body: |
; CHECK-LABEL: name: cst_v2p0
; CHECK: liveins: $w0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[MOVi64imm:%[0-9]+]]:gpr64 = MOVi64imm 3
+ ; CHECK-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 3
+ ; CHECK-NEXT: %cst:gpr64all = SUBREG_TO_REG 0, [[MOVi32imm]], %subreg.sub_32
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY %cst
; CHECK-NEXT: [[ADRP:%[0-9]+]]:gpr64common = ADRP target-flags(aarch64-page) %const.0
; CHECK-NEXT: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) %const.0 :: (load (s128) from constant-pool)
; CHECK-NEXT: $q0 = COPY [[LDRQui]]
diff --git a/llvm/test/CodeGen/AArch64/neon-extadd-extract.ll b/llvm/test/CodeGen/AArch64/neon-extadd-extract.ll
index 5753798e87512..b35511dd4ab69 100644
--- a/llvm/test/CodeGen/AArch64/neon-extadd-extract.ll
+++ b/llvm/test/CodeGen/AArch64/neon-extadd-extract.ll
@@ -786,7 +786,7 @@ define <2 x i8> @extract_scalable_vec() vscale_range(1,16) "target-features"="+s
; CHECK-GI-LABEL: extract_scalable_vec:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: mov x8, xzr
-; CHECK-GI-NEXT: mov x9, #1 // =0x1
+; CHECK-GI-NEXT: mov w9, #1 // =0x1
; CHECK-GI-NEXT: ld1 { v0.b }[0], [x8]
; CHECK-GI-NEXT: ldr b1, [x9]
; CHECK-GI-NEXT: adrp x8, .LCPI36_0
>From f145e47d888e8e3855db87ee81f52622322d0a41 Mon Sep 17 00:00:00 2001
From: Ryan Cowan <ryan.cowan at arm.com>
Date: Mon, 2 Feb 2026 14:45:23 +0000
Subject: [PATCH 05/10] Respond to review comments
---
.../AArch64/GISel/AArch64GlobalISelUtils.cpp | 2 +-
.../GISel/AArch64InstructionSelector.cpp | 12 ++++++------
.../AArch64/GISel/AArch64RegisterBankInfo.cpp | 7 ++++---
.../GlobalISel/preselect-process-phis.mir | 2 +-
.../AArch64/GlobalISel/select-constant.mir | 2 +-
.../CodeGen/AArch64/GlobalISel/select-dup.mir | 2 +-
.../CodeGen/AArch64/literal_pools_float.ll | 18 +++++++++++++-----
7 files changed, 27 insertions(+), 18 deletions(-)
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.cpp b/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.cpp
index baf98ca8e4e3c..74cb5e9bb0729 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.cpp
@@ -24,7 +24,7 @@ AArch64GISelUtils::getAArch64VectorSplat(const MachineInstr &MI,
return std::nullopt;
Register Src = MI.getOperand(1).getReg();
if (auto ValAndVReg = getAnyConstantVRegValWithLookThrough(
- MI.getOperand(1).getReg(), MRI, true, true))
+ Src, MRI, /*LookThroughInstrs=*/true, /*LookThroughAnyExt=*/true))
return RegOrConstant(ValAndVReg->Value.getSExtValue());
return RegOrConstant(Src);
}
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index 62caae40c11d6..771936ad5f9ea 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -2363,8 +2363,8 @@ bool AArch64InstructionSelector::earlySelect(MachineInstr &I) {
// Before selecting a DUP instruction, check if it is better selected as a
// MOV or load from a constant pool.
Register Src = I.getOperand(1).getReg();
- auto ValAndVReg =
- getAnyConstantVRegValWithLookThrough(Src, MRI, true, true);
+ auto ValAndVReg = getAnyConstantVRegValWithLookThrough(
+ Src, MRI, /*LookThroughInstrs=*/true, /*LookThroughAnyExt=*/true);
if (!ValAndVReg)
return false;
LLVMContext &Ctx = MF.getFunction().getContext();
@@ -2688,8 +2688,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
return true;
}
- case TargetOpcode::G_FCONSTANT:
- case TargetOpcode::G_CONSTANT: {
+ case TargetOpcode::G_FCONSTANT: {
const bool isFP = Opcode == TargetOpcode::G_FCONSTANT;
const Register DefReg = I.getOperand(0).getReg();
@@ -5754,8 +5753,9 @@ bool AArch64InstructionSelector::tryOptConstantBuildVec(
SmallVector<Constant *, 16> Csts;
for (unsigned Idx = 1; Idx < I.getNumOperands(); ++Idx) {
Register OpReg = I.getOperand(Idx).getReg();
- if (auto AnyConst =
- getAnyConstantVRegValWithLookThrough(OpReg, MRI, true, true)) {
+ if (auto AnyConst = getAnyConstantVRegValWithLookThrough(
+ OpReg, MRI, /*LookThroughInstrs=*/true,
+ /*LookThroughAnyExt=*/true)) {
MachineInstr *DefMI = MRI.getVRegDef(AnyConst->VReg);
if (DefMI->getOpcode() == TargetOpcode::G_CONSTANT) {
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
index 6cb645bc0a81b..cbaf6cb964203 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "AArch64RegisterBankInfo.h"
+#include "AArch64ISelLowering.h"
#include "AArch64RegisterInfo.h"
#include "AArch64Subtarget.h"
#include "MCTargetDesc/AArch64MCTargetDesc.h"
@@ -374,7 +375,7 @@ static bool isLegalFPImm(const MachineInstr &MI, const MachineRegisterInfo &MRI,
EVT VT = EVT::getFloatingPointVT(Bits);
bool OptForSize = MI.getMF()->getFunction().hasOptSize() ||
MI.getMF()->getFunction().hasMinSize();
- const TargetLowering *TLI = STI.getTargetLowering();
+ const AArch64TargetLowering *TLI = STI.getTargetLowering();
return TLI->isFPImmLegal(MI.getOperand(1).getFPImm()->getValueAPF(), VT,
OptForSize);
}
@@ -931,8 +932,8 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
break;
}
case TargetOpcode::G_FCONSTANT: {
- if (!isLegalFPImm(MI, MRI, STI) &&
- MRI.getType(MI.getOperand(0).getReg()).getScalarSizeInBits() != 128) {
+ LLT ImmTy = MRI.getType(MI.getOperand(0).getReg());
+ if (ImmTy.getScalarSizeInBits() != 128 && !isLegalFPImm(MI, MRI, STI)) {
// Materialize in GPR and rely on later bank copies for FP uses.
MappingID = CustomMappingID;
OpRegBankIdx = {PMI_FirstGPR};
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/preselect-process-phis.mir b/llvm/test/CodeGen/AArch64/GlobalISel/preselect-process-phis.mir
index 5feaa5a9ed63a..0a1e2570ece07 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/preselect-process-phis.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/preselect-process-phis.mir
@@ -1,5 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -verify-machineinstrs -mtriple aarch64--- --run-pass=regbankselect --run-pass=instruction-select -global-isel-abort=1 %s -o - | FileCheck %s
+# RUN: llc -verify-machineinstrs -mtriple aarch64--- --run-pass=regbankselect,instruction-select -global-isel-abort=1 %s -o - | FileCheck %s
---
name: test_loop_phi_fpr_to_gpr
alignment: 4
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-constant.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-constant.mir
index a1a5b525c4538..8ce47079d9e7e 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/select-constant.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-constant.mir
@@ -1,5 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=aarch64-- -run-pass=regbankselect -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
+# RUN: llc -mtriple=aarch64-- -run-pass=regbankselect,instruction-select -verify-machineinstrs %s -o - | FileCheck %s
--- |
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-dup.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-dup.mir
index c5bea027403a3..83c323bcc51ea 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/select-dup.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-dup.mir
@@ -1,5 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=aarch64 -run-pass=regbankselect -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
+# RUN: llc -mtriple=aarch64 -run-pass=regbankselect,instruction-select -verify-machineinstrs %s -o - | FileCheck %s
#
# GPR variants should not use INSERT_SUBREG. FPR variants (DUP<ty>lane) should.
diff --git a/llvm/test/CodeGen/AArch64/literal_pools_float.ll b/llvm/test/CodeGen/AArch64/literal_pools_float.ll
index 4debc8c87d57e..bf59223c6de8d 100644
--- a/llvm/test/CodeGen/AArch64/literal_pools_float.ll
+++ b/llvm/test/CodeGen/AArch64/literal_pools_float.ll
@@ -99,19 +99,26 @@ define dso_local float @float_ret_optnone() optnone noinline {
define dso_local double @double_ret_optnone() optnone noinline {
; CHECK-LABEL: double_ret_optnone:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI2_0
-; CHECK-NEXT: ldr d0, [x8, :lo12:.LCPI2_0]
+; CHECK-NEXT: mov x8, #-7378697629483820647 // =0x9999999999999999
+; CHECK-NEXT: movk x8, #39322
+; CHECK-NEXT: movk x8, #16313, lsl #48
+; CHECK-NEXT: fmov d0, x8
; CHECK-NEXT: ret
;
; CHECK-LARGE-LABEL: double_ret_optnone:
; CHECK-LARGE: // %bb.0:
-; CHECK-LARGE-NEXT: adrp x8, .LCPI2_0
-; CHECK-LARGE-NEXT: ldr d0, [x8, :lo12:.LCPI2_0]
+; CHECK-LARGE-NEXT: mov x8, #-7378697629483820647 // =0x9999999999999999
+; CHECK-LARGE-NEXT: movk x8, #39322
+; CHECK-LARGE-NEXT: movk x8, #16313, lsl #48
+; CHECK-LARGE-NEXT: fmov d0, x8
; CHECK-LARGE-NEXT: ret
;
; CHECK-TINY-LABEL: double_ret_optnone:
; CHECK-TINY: // %bb.0:
-; CHECK-TINY-NEXT: ldr d0, .LCPI2_0
+; CHECK-TINY-NEXT: mov x8, #-7378697629483820647 // =0x9999999999999999
+; CHECK-TINY-NEXT: movk x8, #39322
+; CHECK-TINY-NEXT: movk x8, #16313, lsl #48
+; CHECK-TINY-NEXT: fmov d0, x8
; CHECK-TINY-NEXT: ret
;
; CHECK-NOFP-LABEL: double_ret_optnone:
@@ -136,3 +143,4 @@ define dso_local double @double_ret_optnone() optnone noinline {
; CHECK-NOFP-TINY-NEXT: ret
ret double 0.1
}
+
>From 5957cebc128320d3aa16b940712c5db658874b7c Mon Sep 17 00:00:00 2001
From: Ryan Cowan <ryan.cowan at arm.com>
Date: Mon, 2 Feb 2026 14:58:02 +0000
Subject: [PATCH 06/10] Remove isFP check
---
.../GISel/AArch64InstructionSelector.cpp | 58 +++++++++----------
1 file changed, 27 insertions(+), 31 deletions(-)
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index 771936ad5f9ea..01ba826216564 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -2689,43 +2689,39 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
}
case TargetOpcode::G_FCONSTANT: {
- const bool isFP = Opcode == TargetOpcode::G_FCONSTANT;
-
const Register DefReg = I.getOperand(0).getReg();
const LLT DefTy = MRI.getType(DefReg);
const unsigned DefSize = DefTy.getSizeInBits();
const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
- if (isFP) {
- const TargetRegisterClass &FPRRC = *getRegClassForTypeOnBank(DefTy, RB);
- // For 16, 64, and 128b values, emit a constant pool load.
- switch (DefSize) {
- default:
- llvm_unreachable("Unexpected destination size for G_FCONSTANT?");
- case 32:
- case 64: {
- bool OptForSize = shouldOptForSize(&MF);
- const auto &TLI = MF.getSubtarget().getTargetLowering();
- // If TLI says that this fpimm is illegal, then we'll expand to a
- // constant pool load.
- if (TLI->isFPImmLegal(I.getOperand(1).getFPImm()->getValueAPF(),
- EVT::getFloatingPointVT(DefSize), OptForSize))
- break;
- [[fallthrough]];
- }
- case 16:
- case 128: {
- auto *FPImm = I.getOperand(1).getFPImm();
- auto *LoadMI = emitLoadFromConstantPool(FPImm, MIB);
- if (!LoadMI) {
- LLVM_DEBUG(dbgs() << "Failed to load double constant pool entry\n");
- return false;
- }
- MIB.buildCopy({DefReg}, {LoadMI->getOperand(0).getReg()});
- I.eraseFromParent();
- return RBI.constrainGenericRegister(DefReg, FPRRC, MRI);
- }
+ const TargetRegisterClass &FPRRC = *getRegClassForTypeOnBank(DefTy, RB);
+ // For 16, 64, and 128b values, emit a constant pool load.
+ switch (DefSize) {
+ default:
+ llvm_unreachable("Unexpected destination size for G_FCONSTANT?");
+ case 32:
+ case 64: {
+ bool OptForSize = shouldOptForSize(&MF);
+ const auto &TLI = MF.getSubtarget().getTargetLowering();
+ // If TLI says that this fpimm is illegal, then we'll expand to a
+ // constant pool load.
+ if (TLI->isFPImmLegal(I.getOperand(1).getFPImm()->getValueAPF(),
+ EVT::getFloatingPointVT(DefSize), OptForSize))
+ break;
+ [[fallthrough]];
+ }
+ case 16:
+ case 128: {
+ auto *FPImm = I.getOperand(1).getFPImm();
+ auto *LoadMI = emitLoadFromConstantPool(FPImm, MIB);
+ if (!LoadMI) {
+ LLVM_DEBUG(dbgs() << "Failed to load double constant pool entry\n");
+ return false;
}
+ MIB.buildCopy({DefReg}, {LoadMI->getOperand(0).getReg()});
+ I.eraseFromParent();
+ return RBI.constrainGenericRegister(DefReg, FPRRC, MRI);
+ }
}
return false;
>From 72770ca599277f051049e82a6ec193aadd0bc1ac Mon Sep 17 00:00:00 2001
From: Ryan Cowan <ryan.cowan at arm.com>
Date: Thu, 5 Feb 2026 10:26:20 +0000
Subject: [PATCH 07/10] Address review comments
---
.../AArch64/GISel/AArch64RegisterBankInfo.cpp | 84 ++++++++-----------
1 file changed, 37 insertions(+), 47 deletions(-)
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
index cbaf6cb964203..b3a1398547702 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
@@ -12,7 +12,6 @@
//===----------------------------------------------------------------------===//
#include "AArch64RegisterBankInfo.h"
-#include "AArch64ISelLowering.h"
#include "AArch64RegisterInfo.h"
#include "AArch64Subtarget.h"
#include "MCTargetDesc/AArch64MCTargetDesc.h"
@@ -389,50 +388,48 @@ void AArch64RegisterBankInfo::applyMappingImpl(
case TargetOpcode::G_CONSTANT: {
Register Dst = MI.getOperand(0).getReg();
LLT DstTy = MRI.getType(Dst);
- if (MRI.getRegBank(Dst) == &AArch64::GPRRegBank && DstTy.isScalar() &&
- DstTy.getSizeInBits() < 32) {
- Builder.setInsertPt(*MI.getParent(), std::next(MI.getIterator()));
- Register ExtReg = MRI.createGenericVirtualRegister(LLT::scalar(32));
- Builder.buildTrunc(Dst, ExtReg);
-
- auto Val = MI.getOperand(1).getCImm()->getValue().zext(32);
- LLVMContext &Ctx = Builder.getMF().getFunction().getContext();
- MI.getOperand(1).setCImm(ConstantInt::get(Ctx, Val));
- MI.getOperand(0).setReg(ExtReg);
- MRI.setRegBank(ExtReg, AArch64::GPRRegBank);
-
- for (MachineInstr &UseMI :
- make_early_inc_range(MRI.use_nodbg_instructions(Dst))) {
- if (UseMI.getOpcode() != AArch64::G_DUP)
- continue;
- for (MachineOperand &Op : UseMI.operands()) {
- if (Op.isReg() && Op.getReg() == Dst)
- Op.setReg(ExtReg);
- }
+ assert(MRI.getRegBank(Dst) == &AArch64::GPRRegBank && DstTy.isScalar() &&
+ DstTy.getSizeInBits() < 32 &&
+ "Expected a scalar smaller than 32 bits on a GPR.");
+ Builder.setInsertPt(*MI.getParent(), std::next(MI.getIterator()));
+ Register ExtReg = MRI.createGenericVirtualRegister(LLT::scalar(32));
+ Builder.buildTrunc(Dst, ExtReg);
+
+ auto Val = MI.getOperand(1).getCImm()->getValue().zext(32);
+ LLVMContext &Ctx = Builder.getMF().getFunction().getContext();
+ MI.getOperand(1).setCImm(ConstantInt::get(Ctx, Val));
+ MI.getOperand(0).setReg(ExtReg);
+ MRI.setRegBank(ExtReg, AArch64::GPRRegBank);
+
+ for (MachineInstr &UseMI :
+ make_early_inc_range(MRI.use_nodbg_instructions(Dst))) {
+ if (UseMI.getOpcode() != AArch64::G_DUP)
+ continue;
+ for (MachineOperand &Op : UseMI.operands()) {
+ if (Op.isReg() && Op.getReg() == Dst)
+ Op.setReg(ExtReg);
}
}
return applyDefaultMapping(OpdMapper);
}
case TargetOpcode::G_FCONSTANT: {
Register Dst = MI.getOperand(0).getReg();
- if (MRI.getRegBank(Dst) == &AArch64::GPRRegBank) {
- const APFloat &Imm = MI.getOperand(1).getFPImm()->getValueAPF();
- APInt Bits = Imm.bitcastToAPInt();
- Builder.setInsertPt(*MI.getParent(), MI.getIterator());
- if (Bits.getBitWidth() < 32) {
- Register ExtReg = MRI.createGenericVirtualRegister(LLT::scalar(32));
- Builder.buildConstant(ExtReg, Bits.zext(32));
- Builder.buildTrunc(Dst, ExtReg);
- MRI.setRegBank(ExtReg, AArch64::GPRRegBank);
- } else {
- Builder.buildConstant(Dst, Bits);
- }
- MI.eraseFromParent();
- return;
+ assert(MRI.getRegBank(Dst) == &AArch64::GPRRegBank &&
+ "Expected Dst to be on a GPR.");
+ const APFloat &Imm = MI.getOperand(1).getFPImm()->getValueAPF();
+ APInt Bits = Imm.bitcastToAPInt();
+ Builder.setInsertPt(*MI.getParent(), MI.getIterator());
+ if (Bits.getBitWidth() < 32) {
+ Register ExtReg = MRI.createGenericVirtualRegister(LLT::scalar(32));
+ Builder.buildConstant(ExtReg, Bits.zext(32));
+ Builder.buildTrunc(Dst, ExtReg);
+ MRI.setRegBank(ExtReg, AArch64::GPRRegBank);
+ } else {
+ Builder.buildConstant(Dst, Bits);
}
- return applyDefaultMapping(OpdMapper);
+ MI.eraseFromParent();
+ return;
}
-
case TargetOpcode::G_STORE: {
Register Dst = MI.getOperand(0).getReg();
LLT Ty = MRI.getType(Dst);
@@ -479,16 +476,9 @@ void AArch64RegisterBankInfo::applyMappingImpl(
"Expected sources smaller than 32-bits");
Builder.setInsertPt(*MI.getParent(), MI.getIterator());
- Register ConstReg;
- auto ConstMI = MRI.getVRegDef(MI.getOperand(1).getReg());
- if (ConstMI->getOpcode() == TargetOpcode::G_CONSTANT) {
- auto CstVal = ConstMI->getOperand(1).getCImm()->getValue();
- ConstReg =
- Builder.buildConstant(LLT::scalar(32), CstVal.sext(32)).getReg(0);
- } else {
- ConstReg = Builder.buildAnyExt(LLT::scalar(32), MI.getOperand(1).getReg())
- .getReg(0);
- }
+ Register ConstReg =
+ Builder.buildAnyExt(LLT::scalar(32), MI.getOperand(1).getReg())
+ .getReg(0);
MRI.setRegBank(ConstReg, getRegBank(AArch64::GPRRegBankID));
MI.getOperand(1).setReg(ConstReg);
return applyDefaultMapping(OpdMapper);
>From 77a345c2e0c43d31e76b95c69c4d22e0015b6a82 Mon Sep 17 00:00:00 2001
From: Ryan Cowan <ryan.cowan at arm.com>
Date: Thu, 5 Feb 2026 10:50:47 +0000
Subject: [PATCH 08/10] Update tests post rebase
---
llvm/test/CodeGen/AArch64/GlobalISel/preselect-process-phis.mir | 2 +-
llvm/test/CodeGen/AArch64/GlobalISel/select-dup.mir | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/preselect-process-phis.mir b/llvm/test/CodeGen/AArch64/GlobalISel/preselect-process-phis.mir
index 0a1e2570ece07..f31226c8d72ad 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/preselect-process-phis.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/preselect-process-phis.mir
@@ -166,7 +166,7 @@ body: |
; CHECK-NEXT: %gp_phi2:gpr32 = PHI %gpr_1, %bb.3, %gpr_2, %bb.2
; CHECK-NEXT: %gp_phi3:gpr32 = PHI %gpr_1, %bb.3, %gpr_2, %bb.2
; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:fpr32 = SUBREG_TO_REG %fp_phi, %subreg.hsub
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr32all = COPY [[SUBREG_TO_REG]]
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr32all = COPY [[SUBREG_TO_REG]]
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.5:
; CHECK-NEXT: %use_fp_phi:gpr32 = PHI %gpr_1, %bb.0, [[COPY4]], %bb.4
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-dup.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-dup.mir
index 83c323bcc51ea..2007ff68e73c8 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/select-dup.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-dup.mir
@@ -445,7 +445,7 @@ body: |
; CHECK: liveins: $w0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 3
- ; CHECK-NEXT: %cst:gpr64all = SUBREG_TO_REG 0, [[MOVi32imm]], %subreg.sub_32
+ ; CHECK-NEXT: %cst:gpr64all = SUBREG_TO_REG [[MOVi32imm]], %subreg.sub_32
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY %cst
; CHECK-NEXT: [[ADRP:%[0-9]+]]:gpr64common = ADRP target-flags(aarch64-page) %const.0
; CHECK-NEXT: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[ADRP]], target-flags(aarch64-pageoff, aarch64-nc) %const.0 :: (load (s128) from constant-pool)
>From 3f10350c2923bc1ddb5ebdd19e73c0c1bd72e85c Mon Sep 17 00:00:00 2001
From: Ryan Cowan <ryan.cowan at arm.com>
Date: Tue, 17 Feb 2026 13:32:05 +0000
Subject: [PATCH 09/10] Use shouldOptimizeForSize & remove auto
---
.../Target/AArch64/GISel/AArch64RegisterBankInfo.cpp | 11 ++++++-----
1 file changed, 6 insertions(+), 5 deletions(-)
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
index b3a1398547702..b14023840d986 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
@@ -15,6 +15,7 @@
#include "AArch64RegisterInfo.h"
#include "AArch64Subtarget.h"
#include "MCTargetDesc/AArch64MCTargetDesc.h"
+#include "llvm/ADT/APInt.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
@@ -25,6 +26,7 @@
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineSizeOpts.h"
#include "llvm/CodeGen/RegisterBank.h"
#include "llvm/CodeGen/RegisterBankInfo.h"
#include "llvm/CodeGen/TargetOpcodes.h"
@@ -372,11 +374,10 @@ static bool isLegalFPImm(const MachineInstr &MI, const MachineRegisterInfo &MRI,
return false;
EVT VT = EVT::getFloatingPointVT(Bits);
- bool OptForSize = MI.getMF()->getFunction().hasOptSize() ||
- MI.getMF()->getFunction().hasMinSize();
const AArch64TargetLowering *TLI = STI.getTargetLowering();
- return TLI->isFPImmLegal(MI.getOperand(1).getFPImm()->getValueAPF(), VT,
- OptForSize);
+ return TLI->isFPImmLegal(
+ MI.getOperand(1).getFPImm()->getValueAPF(), VT,
+ shouldOptimizeForSize(&MI.getMF()->getFunction(), nullptr, nullptr));
}
void AArch64RegisterBankInfo::applyMappingImpl(
@@ -395,7 +396,7 @@ void AArch64RegisterBankInfo::applyMappingImpl(
Register ExtReg = MRI.createGenericVirtualRegister(LLT::scalar(32));
Builder.buildTrunc(Dst, ExtReg);
- auto Val = MI.getOperand(1).getCImm()->getValue().zext(32);
+ APInt Val = MI.getOperand(1).getCImm()->getValue().zext(32);
LLVMContext &Ctx = Builder.getMF().getFunction().getContext();
MI.getOperand(1).setCImm(ConstantInt::get(Ctx, Val));
MI.getOperand(0).setReg(ExtReg);
>From 88eb84dff1503394195af5aca4412d11a8057390 Mon Sep 17 00:00:00 2001
From: Ryan Cowan <ryan.cowan at arm.com>
Date: Thu, 19 Feb 2026 11:21:31 +0000
Subject: [PATCH 10/10] Check if fpimm would be legal as an advSIMD mov & make
use of value prior to truncation instead of extending
---
.../Target/AArch64/AArch64ISelLowering.cpp | 8 +-
llvm/lib/Target/AArch64/AArch64InstrInfo.td | 2 +
.../AArch64/GISel/AArch64RegisterBankInfo.cpp | 60 ++-
.../GlobalISel/preselect-process-phis.mir | 14 +-
llvm/test/CodeGen/AArch64/aarch64-mops.ll | 416 +++++++-----------
.../test/CodeGen/AArch64/arm64-fp-imm-size.ll | 6 +-
llvm/test/CodeGen/AArch64/arm64-fp-imm.ll | 6 +-
llvm/test/CodeGen/AArch64/arm64-fp128.ll | 24 +-
llvm/test/CodeGen/AArch64/dup.ll | 81 ++--
llvm/test/CodeGen/AArch64/fpimm.ll | 5 +-
.../test/CodeGen/AArch64/fptoui-sat-scalar.ll | 3 +-
.../test/CodeGen/AArch64/fptoui-sat-vector.ll | 131 +++---
12 files changed, 349 insertions(+), 407 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index c3639cff17f17..62cbd6a895792 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -13116,9 +13116,13 @@ bool AArch64TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
// FIXME: We should be able to handle f128 as well with a clever lowering.
const APInt ImmInt = Imm.bitcastToAPInt();
if (VT == MVT::f64)
- IsLegal = AArch64_AM::getFP64Imm(ImmInt) != -1 || Imm.isPosZero();
+ IsLegal = AArch64_AM::getFP64Imm(ImmInt) != -1 ||
+ AArch64_AM::isAdvSIMDModImmType12(ImmInt.getZExtValue()) ||
+ Imm.isPosZero();
else if (VT == MVT::f32)
- IsLegal = AArch64_AM::getFP32Imm(ImmInt) != -1 || Imm.isPosZero();
+ IsLegal = AArch64_AM::getFP32Imm(ImmInt) != -1 ||
+ AArch64_AM::isAdvSIMDModImmType11(ImmInt.getZExtValue()) ||
+ Imm.isPosZero();
else if (VT == MVT::f16 || VT == MVT::bf16)
IsLegal =
(Subtarget->hasFullFP16() && AArch64_AM::getFP16Imm(ImmInt) != -1) ||
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index d234445d6c0d4..e8f9d58864d7d 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -8654,6 +8654,8 @@ def MOVIv2d_ns : SIMDModifiedImmVectorNoShift<1, 1, 0, 0b1110, V128,
[(set (v2i64 V128:$Rd), (AArch64movi_edit imm0_255:$imm8))]>;
let Predicates = [HasNEON] in {
+def : Pat<(f128 fpimm0), (f128 (MOVIv2d_ns (i32 0)))>;
+
def : Pat<(v2i64 immAllZerosV), (MOVIv2d_ns (i32 0))>;
def : Pat<(v4i32 immAllZerosV), (MOVIv2d_ns (i32 0))>;
def : Pat<(v8i16 immAllZerosV), (MOVIv2d_ns (i32 0))>;
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
index b14023840d986..0b6b2746add8a 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
@@ -380,6 +380,42 @@ static bool isLegalFPImm(const MachineInstr &MI, const MachineRegisterInfo &MRI,
shouldOptimizeForSize(&MI.getMF()->getFunction(), nullptr, nullptr));
}
+// Some of the instructions in applyMappingImpl attempt to anyext small values.
+// It may be that these values come from a G_CONSTANT that has been expanded to
+// 32 bits and then truncated. If this is the case, we shouldn't insert an any
+// ext and should instead make use of the G_CONSTANT directly, deleting the
+// trunc if possible.
+static bool foldTruncOfI32Constant(MachineInstr &MI, unsigned OpIdx,
+ MachineRegisterInfo &MRI,
+ const AArch64RegisterBankInfo &RBI) {
+ MachineOperand &Op = MI.getOperand(OpIdx);
+ if (!Op.isReg())
+ return false;
+
+ Register ScalarReg = Op.getReg();
+ MachineInstr *TruncMI = MRI.getVRegDef(ScalarReg);
+ if (!TruncMI || TruncMI->getOpcode() != TargetOpcode::G_TRUNC)
+ return false;
+
+ Register TruncSrc = TruncMI->getOperand(1).getReg();
+ MachineInstr *SrcDef = MRI.getVRegDef(TruncSrc);
+ if (!SrcDef || SrcDef->getOpcode() != TargetOpcode::G_CONSTANT)
+ return false;
+
+ LLT TruncSrcTy = MRI.getType(TruncSrc);
+ if (!TruncSrcTy.isScalar() || TruncSrcTy.getSizeInBits() != 32)
+ return false;
+
+ // Avoid truncating and extending a constant, this helps with selection.
+ Op.setReg(TruncSrc);
+ MRI.setRegBank(TruncSrc, RBI.getRegBank(AArch64::GPRRegBankID));
+
+ if (MRI.use_empty(ScalarReg))
+ TruncMI->eraseFromParent();
+
+ return true;
+}
+
void AArch64RegisterBankInfo::applyMappingImpl(
MachineIRBuilder &Builder, const OperandsMapper &OpdMapper) const {
MachineInstr &MI = OpdMapper.getMI();
@@ -402,15 +438,6 @@ void AArch64RegisterBankInfo::applyMappingImpl(
MI.getOperand(0).setReg(ExtReg);
MRI.setRegBank(ExtReg, AArch64::GPRRegBank);
- for (MachineInstr &UseMI :
- make_early_inc_range(MRI.use_nodbg_instructions(Dst))) {
- if (UseMI.getOpcode() != AArch64::G_DUP)
- continue;
- for (MachineOperand &Op : UseMI.operands()) {
- if (Op.isReg() && Op.getReg() == Dst)
- Op.setReg(ExtReg);
- }
- }
return applyDefaultMapping(OpdMapper);
}
case TargetOpcode::G_FCONSTANT: {
@@ -434,6 +461,10 @@ void AArch64RegisterBankInfo::applyMappingImpl(
case TargetOpcode::G_STORE: {
Register Dst = MI.getOperand(0).getReg();
LLT Ty = MRI.getType(Dst);
+
+ if (foldTruncOfI32Constant(MI, 0, MRI, *this))
+ return applyDefaultMapping(OpdMapper);
+
if (MRI.getRegBank(Dst) == &AArch64::GPRRegBank && Ty.isScalar() &&
Ty.getSizeInBits() < 32) {
Builder.setInsertPt(*MI.getParent(), MI.getIterator());
@@ -464,6 +495,9 @@ void AArch64RegisterBankInfo::applyMappingImpl(
"Don't know how to handle that ID");
return applyDefaultMapping(OpdMapper);
case TargetOpcode::G_INSERT_VECTOR_ELT: {
+ if (foldTruncOfI32Constant(MI, 2, MRI, *this))
+ return applyDefaultMapping(OpdMapper);
+
// Extend smaller gpr operands to 32 bit.
Builder.setInsertPt(*MI.getParent(), MI.getIterator());
auto Ext = Builder.buildAnyExt(LLT::scalar(32), MI.getOperand(2).getReg());
@@ -472,9 +506,10 @@ void AArch64RegisterBankInfo::applyMappingImpl(
return applyDefaultMapping(OpdMapper);
}
case AArch64::G_DUP: {
- // Extend smaller gpr to 32-bits
- assert(MRI.getType(MI.getOperand(1).getReg()).getSizeInBits() < 32 &&
- "Expected sources smaller than 32-bits");
+ if (foldTruncOfI32Constant(MI, 1, MRI, *this))
+ return applyDefaultMapping(OpdMapper);
+
+ // Extend smaller gpr to 32-bits.
Builder.setInsertPt(*MI.getParent(), MI.getIterator());
Register ConstReg =
@@ -482,6 +517,7 @@ void AArch64RegisterBankInfo::applyMappingImpl(
.getReg(0);
MRI.setRegBank(ConstReg, getRegBank(AArch64::GPRRegBankID));
MI.getOperand(1).setReg(ConstReg);
+
return applyDefaultMapping(OpdMapper);
}
default:
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/preselect-process-phis.mir b/llvm/test/CodeGen/AArch64/GlobalISel/preselect-process-phis.mir
index f31226c8d72ad..4bc3b5de3ffae 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/preselect-process-phis.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/preselect-process-phis.mir
@@ -1,5 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -verify-machineinstrs -mtriple aarch64--- --run-pass=regbankselect,instruction-select -global-isel-abort=1 %s -o - | FileCheck %s
+# RUN: llc -verify-machineinstrs -mtriple aarch64--- --run-pass=instruction-select -global-isel-abort=1 %s -o - | FileCheck %s
---
name: test_loop_phi_fpr_to_gpr
alignment: 4
@@ -30,7 +30,7 @@ body: |
; CHECK-NEXT: bb.2:
; CHECK-NEXT: successors: %bb.2(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[PHI:%[0-9]+]]:gpr32 = PHI [[CSELWr]], %bb.1, %9, %bb.2
+ ; CHECK-NEXT: [[PHI:%[0-9]+]]:gpr32 = PHI [[CSELWr]], %bb.1, %8, %bb.2
; CHECK-NEXT: [[FCVTHSr:%[0-9]+]]:fpr16 = nofpexcept FCVTHSr [[COPY]], implicit $fpcr
; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:fpr32 = SUBREG_TO_REG [[FCVTHSr]], %subreg.hsub
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32all = COPY [[SUBREG_TO_REG]]
@@ -154,22 +154,20 @@ body: |
; CHECK-NEXT: bb.3:
; CHECK-NEXT: successors: %bb.4(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[DEF:%[0-9]+]]:gpr32 = IMPLICIT_DEF
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:fpr32 = COPY [[DEF]]
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:fpr16 = COPY [[COPY2]].hsub
+ ; CHECK-NEXT: %fpr:fpr16 = IMPLICIT_DEF
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.4:
; CHECK-NEXT: successors: %bb.5(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: %fp_phi:fpr16 = PHI [[COPY3]], %bb.3, [[COPY1]], %bb.2
+ ; CHECK-NEXT: %fp_phi:fpr16 = PHI %fpr, %bb.3, [[COPY1]], %bb.2
; CHECK-NEXT: %gp_phi1:gpr32 = PHI %gpr_1, %bb.3, %gpr_2, %bb.2
; CHECK-NEXT: %gp_phi2:gpr32 = PHI %gpr_1, %bb.3, %gpr_2, %bb.2
; CHECK-NEXT: %gp_phi3:gpr32 = PHI %gpr_1, %bb.3, %gpr_2, %bb.2
; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:fpr32 = SUBREG_TO_REG %fp_phi, %subreg.hsub
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr32all = COPY [[SUBREG_TO_REG]]
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr32all = COPY [[SUBREG_TO_REG]]
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.5:
- ; CHECK-NEXT: %use_fp_phi:gpr32 = PHI %gpr_1, %bb.0, [[COPY4]], %bb.4
+ ; CHECK-NEXT: %use_fp_phi:gpr32 = PHI %gpr_1, %bb.0, [[COPY2]], %bb.4
; CHECK-NEXT: %use_gp_phi1:gpr32 = PHI %gpr_1, %bb.0, %gp_phi1, %bb.4
; CHECK-NEXT: %use_gp_phi2:gpr32 = PHI %gpr_1, %bb.0, %gp_phi2, %bb.4
; CHECK-NEXT: %use_gp_phi3:gpr32 = PHI %gpr_1, %bb.0, %gp_phi3, %bb.4
diff --git a/llvm/test/CodeGen/AArch64/aarch64-mops.ll b/llvm/test/CodeGen/AArch64/aarch64-mops.ll
index d082c583faeae..cefe25e322529 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-mops.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-mops.ll
@@ -60,31 +60,17 @@ entry:
}
define void @memset_10_zeroval(ptr %dst) {
-; GISel-WITHOUT-MOPS-O0-LABEL: memset_10_zeroval:
-; GISel-WITHOUT-MOPS-O0: // %bb.0: // %entry
-; GISel-WITHOUT-MOPS-O0-NEXT: str xzr, [x0]
-; GISel-WITHOUT-MOPS-O0-NEXT: mov w8, wzr
-; GISel-WITHOUT-MOPS-O0-NEXT: strh w8, [x0, #8]
-; GISel-WITHOUT-MOPS-O0-NEXT: ret
-;
-; GISel-WITHOUT-MOPS-O3-LABEL: memset_10_zeroval:
-; GISel-WITHOUT-MOPS-O3: // %bb.0: // %entry
-; GISel-WITHOUT-MOPS-O3-NEXT: str xzr, [x0]
-; GISel-WITHOUT-MOPS-O3-NEXT: strh wzr, [x0, #8]
-; GISel-WITHOUT-MOPS-O3-NEXT: ret
-;
-; GISel-MOPS-O0-LABEL: memset_10_zeroval:
-; GISel-MOPS-O0: // %bb.0: // %entry
-; GISel-MOPS-O0-NEXT: str xzr, [x0]
-; GISel-MOPS-O0-NEXT: mov w8, wzr
-; GISel-MOPS-O0-NEXT: strh w8, [x0, #8]
-; GISel-MOPS-O0-NEXT: ret
+; GISel-WITHOUT-MOPS-LABEL: memset_10_zeroval:
+; GISel-WITHOUT-MOPS: // %bb.0: // %entry
+; GISel-WITHOUT-MOPS-NEXT: str xzr, [x0]
+; GISel-WITHOUT-MOPS-NEXT: strh wzr, [x0, #8]
+; GISel-WITHOUT-MOPS-NEXT: ret
;
-; GISel-MOPS-O3-LABEL: memset_10_zeroval:
-; GISel-MOPS-O3: // %bb.0: // %entry
-; GISel-MOPS-O3-NEXT: str xzr, [x0]
-; GISel-MOPS-O3-NEXT: strh wzr, [x0, #8]
-; GISel-MOPS-O3-NEXT: ret
+; GISel-MOPS-LABEL: memset_10_zeroval:
+; GISel-MOPS: // %bb.0: // %entry
+; GISel-MOPS-NEXT: str xzr, [x0]
+; GISel-MOPS-NEXT: strh wzr, [x0, #8]
+; GISel-MOPS-NEXT: ret
;
; SDAG-WITHOUT-MOPS-O2-LABEL: memset_10_zeroval:
; SDAG-WITHOUT-MOPS-O2: // %bb.0: // %entry
@@ -103,31 +89,17 @@ entry:
}
define void @memset_10_zeroval_volatile(ptr %dst) {
-; GISel-WITHOUT-MOPS-O0-LABEL: memset_10_zeroval_volatile:
-; GISel-WITHOUT-MOPS-O0: // %bb.0: // %entry
-; GISel-WITHOUT-MOPS-O0-NEXT: str xzr, [x0]
-; GISel-WITHOUT-MOPS-O0-NEXT: mov w8, wzr
-; GISel-WITHOUT-MOPS-O0-NEXT: strh w8, [x0, #8]
-; GISel-WITHOUT-MOPS-O0-NEXT: ret
-;
-; GISel-WITHOUT-MOPS-O3-LABEL: memset_10_zeroval_volatile:
-; GISel-WITHOUT-MOPS-O3: // %bb.0: // %entry
-; GISel-WITHOUT-MOPS-O3-NEXT: str xzr, [x0]
-; GISel-WITHOUT-MOPS-O3-NEXT: strh wzr, [x0, #8]
-; GISel-WITHOUT-MOPS-O3-NEXT: ret
-;
-; GISel-MOPS-O0-LABEL: memset_10_zeroval_volatile:
-; GISel-MOPS-O0: // %bb.0: // %entry
-; GISel-MOPS-O0-NEXT: str xzr, [x0]
-; GISel-MOPS-O0-NEXT: mov w8, wzr
-; GISel-MOPS-O0-NEXT: strh w8, [x0, #8]
-; GISel-MOPS-O0-NEXT: ret
+; GISel-WITHOUT-MOPS-LABEL: memset_10_zeroval_volatile:
+; GISel-WITHOUT-MOPS: // %bb.0: // %entry
+; GISel-WITHOUT-MOPS-NEXT: str xzr, [x0]
+; GISel-WITHOUT-MOPS-NEXT: strh wzr, [x0, #8]
+; GISel-WITHOUT-MOPS-NEXT: ret
;
-; GISel-MOPS-O3-LABEL: memset_10_zeroval_volatile:
-; GISel-MOPS-O3: // %bb.0: // %entry
-; GISel-MOPS-O3-NEXT: str xzr, [x0]
-; GISel-MOPS-O3-NEXT: strh wzr, [x0, #8]
-; GISel-MOPS-O3-NEXT: ret
+; GISel-MOPS-LABEL: memset_10_zeroval_volatile:
+; GISel-MOPS: // %bb.0: // %entry
+; GISel-MOPS-NEXT: str xzr, [x0]
+; GISel-MOPS-NEXT: strh wzr, [x0, #8]
+; GISel-MOPS-NEXT: ret
;
; SDAG-WITHOUT-MOPS-O2-LABEL: memset_10_zeroval_volatile:
; SDAG-WITHOUT-MOPS-O2: // %bb.0: // %entry
@@ -459,16 +431,16 @@ define void @memset_10(ptr %dst, i32 %value) {
;
; SDAG-WITHOUT-MOPS-O2-LABEL: memset_10:
; SDAG-WITHOUT-MOPS-O2: // %bb.0: // %entry
-; SDAG-WITHOUT-MOPS-O2-NEXT: dup v0.16b, w1
-; SDAG-WITHOUT-MOPS-O2-NEXT: str h0, [x0, #8]
-; SDAG-WITHOUT-MOPS-O2-NEXT: str d0, [x0]
+; SDAG-WITHOUT-MOPS-O2-NEXT: dup v0.16b, w1
+; SDAG-WITHOUT-MOPS-O2-NEXT: str h0, [x0, #8]
+; SDAG-WITHOUT-MOPS-O2-NEXT: str d0, [x0]
; SDAG-WITHOUT-MOPS-O2-NEXT: ret
;
; SDAG-MOPS-O2-LABEL: memset_10:
; SDAG-MOPS-O2: // %bb.0: // %entry
-; SDAG-MOPS-O2-NEXT: dup v0.16b, w1
-; SDAG-MOPS-O2-NEXT: str h0, [x0, #8]
-; SDAG-MOPS-O2-NEXT: str d0, [x0]
+; SDAG-MOPS-O2-NEXT: dup v0.16b, w1
+; SDAG-MOPS-O2-NEXT: str h0, [x0, #8]
+; SDAG-MOPS-O2-NEXT: str d0, [x0]
; SDAG-MOPS-O2-NEXT: ret
entry:
%value_trunc = trunc i32 %value to i8
@@ -519,16 +491,16 @@ define void @memset_10_volatile(ptr %dst, i32 %value) {
;
; SDAG-WITHOUT-MOPS-O2-LABEL: memset_10_volatile:
; SDAG-WITHOUT-MOPS-O2: // %bb.0: // %entry
-; SDAG-WITHOUT-MOPS-O2-NEXT: dup v0.16b, w1
-; SDAG-WITHOUT-MOPS-O2-NEXT: str h0, [x0, #8]
-; SDAG-WITHOUT-MOPS-O2-NEXT: str d0, [x0]
+; SDAG-WITHOUT-MOPS-O2-NEXT: dup v0.16b, w1
+; SDAG-WITHOUT-MOPS-O2-NEXT: str h0, [x0, #8]
+; SDAG-WITHOUT-MOPS-O2-NEXT: str d0, [x0]
; SDAG-WITHOUT-MOPS-O2-NEXT: ret
;
; SDAG-MOPS-O2-LABEL: memset_10_volatile:
; SDAG-MOPS-O2: // %bb.0: // %entry
-; SDAG-MOPS-O2-NEXT: dup v0.16b, w1
-; SDAG-MOPS-O2-NEXT: str h0, [x0, #8]
-; SDAG-MOPS-O2-NEXT: str d0, [x0]
+; SDAG-MOPS-O2-NEXT: dup v0.16b, w1
+; SDAG-MOPS-O2-NEXT: str h0, [x0, #8]
+; SDAG-MOPS-O2-NEXT: str d0, [x0]
; SDAG-MOPS-O2-NEXT: ret
entry:
%value_trunc = trunc i32 %value to i8
@@ -1526,28 +1498,28 @@ define void @memcpy_inline_300(ptr %dst, ptr %src, i32 %value) {
;
; SDAG-WITHOUT-MOPS-O2-LABEL: memcpy_inline_300:
; SDAG-WITHOUT-MOPS-O2: // %bb.0: // %entry
-; SDAG-WITHOUT-MOPS-O2-NEXT: ldp q1, q0, [x1, #32]
-; SDAG-WITHOUT-MOPS-O2-NEXT: add x8, x1, #284
-; SDAG-WITHOUT-MOPS-O2-NEXT: ldp q2, q3, [x1]
-; SDAG-WITHOUT-MOPS-O2-NEXT: stp q1, q0, [x0, #32]
-; SDAG-WITHOUT-MOPS-O2-NEXT: stp q2, q3, [x0]
-; SDAG-WITHOUT-MOPS-O2-NEXT: ldp q1, q0, [x1, #96]
-; SDAG-WITHOUT-MOPS-O2-NEXT: ldp q2, q3, [x1, #64]
-; SDAG-WITHOUT-MOPS-O2-NEXT: stp q1, q0, [x0, #96]
-; SDAG-WITHOUT-MOPS-O2-NEXT: stp q2, q3, [x0, #64]
-; SDAG-WITHOUT-MOPS-O2-NEXT: ldp q1, q0, [x1, #160]
-; SDAG-WITHOUT-MOPS-O2-NEXT: ldp q2, q3, [x1, #128]
-; SDAG-WITHOUT-MOPS-O2-NEXT: stp q1, q0, [x0, #160]
-; SDAG-WITHOUT-MOPS-O2-NEXT: stp q2, q3, [x0, #128]
-; SDAG-WITHOUT-MOPS-O2-NEXT: ldp q1, q0, [x1, #224]
-; SDAG-WITHOUT-MOPS-O2-NEXT: ldp q2, q3, [x1, #192]
-; SDAG-WITHOUT-MOPS-O2-NEXT: stp q1, q0, [x0, #224]
-; SDAG-WITHOUT-MOPS-O2-NEXT: stp q2, q3, [x0, #192]
-; SDAG-WITHOUT-MOPS-O2-NEXT: ldp q2, q1, [x1, #256]
-; SDAG-WITHOUT-MOPS-O2-NEXT: ldr q0, [x8]
-; SDAG-WITHOUT-MOPS-O2-NEXT: add x8, x0, #284
-; SDAG-WITHOUT-MOPS-O2-NEXT: str q0, [x8]
-; SDAG-WITHOUT-MOPS-O2-NEXT: stp q2, q1, [x0, #256]
+; SDAG-WITHOUT-MOPS-O2-NEXT: ldp q1, q0, [x1, #32]
+; SDAG-WITHOUT-MOPS-O2-NEXT: add x8, x1, #284
+; SDAG-WITHOUT-MOPS-O2-NEXT: ldp q2, q3, [x1]
+; SDAG-WITHOUT-MOPS-O2-NEXT: stp q1, q0, [x0, #32]
+; SDAG-WITHOUT-MOPS-O2-NEXT: stp q2, q3, [x0]
+; SDAG-WITHOUT-MOPS-O2-NEXT: ldp q1, q0, [x1, #96]
+; SDAG-WITHOUT-MOPS-O2-NEXT: ldp q2, q3, [x1, #64]
+; SDAG-WITHOUT-MOPS-O2-NEXT: stp q1, q0, [x0, #96]
+; SDAG-WITHOUT-MOPS-O2-NEXT: stp q2, q3, [x0, #64]
+; SDAG-WITHOUT-MOPS-O2-NEXT: ldp q1, q0, [x1, #160]
+; SDAG-WITHOUT-MOPS-O2-NEXT: ldp q2, q3, [x1, #128]
+; SDAG-WITHOUT-MOPS-O2-NEXT: stp q1, q0, [x0, #160]
+; SDAG-WITHOUT-MOPS-O2-NEXT: stp q2, q3, [x0, #128]
+; SDAG-WITHOUT-MOPS-O2-NEXT: ldp q1, q0, [x1, #224]
+; SDAG-WITHOUT-MOPS-O2-NEXT: ldp q2, q3, [x1, #192]
+; SDAG-WITHOUT-MOPS-O2-NEXT: stp q1, q0, [x0, #224]
+; SDAG-WITHOUT-MOPS-O2-NEXT: stp q2, q3, [x0, #192]
+; SDAG-WITHOUT-MOPS-O2-NEXT: ldp q2, q1, [x1, #256]
+; SDAG-WITHOUT-MOPS-O2-NEXT: ldr q0, [x8]
+; SDAG-WITHOUT-MOPS-O2-NEXT: add x8, x0, #284
+; SDAG-WITHOUT-MOPS-O2-NEXT: str q0, [x8]
+; SDAG-WITHOUT-MOPS-O2-NEXT: stp q2, q1, [x0, #256]
; SDAG-WITHOUT-MOPS-O2-NEXT: ret
;
; SDAG-MOPS-O2-LABEL: memcpy_inline_300:
@@ -1708,80 +1680,52 @@ entry:
}
define void @memcpy_inline_65(ptr %dst, ptr %src, i32 %value) {
-; GISel-WITHOUT-MOPS-O0-LABEL: memcpy_inline_65:
-; GISel-WITHOUT-MOPS-O0: // %bb.0: // %entry
-; GISel-WITHOUT-MOPS-O0-NEXT: ldr q0, [x1]
-; GISel-WITHOUT-MOPS-O0-NEXT: str q0, [x0]
-; GISel-WITHOUT-MOPS-O0-NEXT: ldr q0, [x1, #16]
-; GISel-WITHOUT-MOPS-O0-NEXT: str q0, [x0, #16]
-; GISel-WITHOUT-MOPS-O0-NEXT: ldr q0, [x1, #32]
-; GISel-WITHOUT-MOPS-O0-NEXT: str q0, [x0, #32]
-; GISel-WITHOUT-MOPS-O0-NEXT: ldr q0, [x1, #48]
-; GISel-WITHOUT-MOPS-O0-NEXT: str q0, [x0, #48]
-; GISel-WITHOUT-MOPS-O0-NEXT: ldrb w8, [x1, #64]
-; GISel-WITHOUT-MOPS-O0-NEXT: strb w8, [x0, #64]
-; GISel-WITHOUT-MOPS-O0-NEXT: ret
-;
-; GISel-WITHOUT-MOPS-O3-LABEL: memcpy_inline_65:
-; GISel-WITHOUT-MOPS-O3: // %bb.0: // %entry
-; GISel-WITHOUT-MOPS-O3-NEXT: ldr q0, [x1]
-; GISel-WITHOUT-MOPS-O3-NEXT: str q0, [x0]
-; GISel-WITHOUT-MOPS-O3-NEXT: ldr q0, [x1, #16]
-; GISel-WITHOUT-MOPS-O3-NEXT: str q0, [x0, #16]
-; GISel-WITHOUT-MOPS-O3-NEXT: ldr q0, [x1, #32]
-; GISel-WITHOUT-MOPS-O3-NEXT: str q0, [x0, #32]
-; GISel-WITHOUT-MOPS-O3-NEXT: ldr q0, [x1, #48]
-; GISel-WITHOUT-MOPS-O3-NEXT: str q0, [x0, #48]
-; GISel-WITHOUT-MOPS-O3-NEXT: ldrb w8, [x1, #64]
-; GISel-WITHOUT-MOPS-O3-NEXT: strb w8, [x0, #64]
-; GISel-WITHOUT-MOPS-O3-NEXT: ret
-;
-; GISel-MOPS-O0-LABEL: memcpy_inline_65:
-; GISel-MOPS-O0: // %bb.0: // %entry
-; GISel-MOPS-O0-NEXT: ldr q0, [x1]
-; GISel-MOPS-O0-NEXT: str q0, [x0]
-; GISel-MOPS-O0-NEXT: ldr q0, [x1, #16]
-; GISel-MOPS-O0-NEXT: str q0, [x0, #16]
-; GISel-MOPS-O0-NEXT: ldr q0, [x1, #32]
-; GISel-MOPS-O0-NEXT: str q0, [x0, #32]
-; GISel-MOPS-O0-NEXT: ldr q0, [x1, #48]
-; GISel-MOPS-O0-NEXT: str q0, [x0, #48]
-; GISel-MOPS-O0-NEXT: ldrb w8, [x1, #64]
-; GISel-MOPS-O0-NEXT: strb w8, [x0, #64]
-; GISel-MOPS-O0-NEXT: ret
+; GISel-WITHOUT-MOPS-LABEL: memcpy_inline_65:
+; GISel-WITHOUT-MOPS: // %bb.0: // %entry
+; GISel-WITHOUT-MOPS-NEXT: ldr q0, [x1]
+; GISel-WITHOUT-MOPS-NEXT: str q0, [x0]
+; GISel-WITHOUT-MOPS-NEXT: ldr q0, [x1, #16]
+; GISel-WITHOUT-MOPS-NEXT: str q0, [x0, #16]
+; GISel-WITHOUT-MOPS-NEXT: ldr q0, [x1, #32]
+; GISel-WITHOUT-MOPS-NEXT: str q0, [x0, #32]
+; GISel-WITHOUT-MOPS-NEXT: ldr q0, [x1, #48]
+; GISel-WITHOUT-MOPS-NEXT: str q0, [x0, #48]
+; GISel-WITHOUT-MOPS-NEXT: ldrb w8, [x1, #64]
+; GISel-WITHOUT-MOPS-NEXT: strb w8, [x0, #64]
+; GISel-WITHOUT-MOPS-NEXT: ret
;
-; GISel-MOPS-O3-LABEL: memcpy_inline_65:
-; GISel-MOPS-O3: // %bb.0: // %entry
-; GISel-MOPS-O3-NEXT: ldr q0, [x1]
-; GISel-MOPS-O3-NEXT: str q0, [x0]
-; GISel-MOPS-O3-NEXT: ldr q0, [x1, #16]
-; GISel-MOPS-O3-NEXT: str q0, [x0, #16]
-; GISel-MOPS-O3-NEXT: ldr q0, [x1, #32]
-; GISel-MOPS-O3-NEXT: str q0, [x0, #32]
-; GISel-MOPS-O3-NEXT: ldr q0, [x1, #48]
-; GISel-MOPS-O3-NEXT: str q0, [x0, #48]
-; GISel-MOPS-O3-NEXT: ldrb w8, [x1, #64]
-; GISel-MOPS-O3-NEXT: strb w8, [x0, #64]
-; GISel-MOPS-O3-NEXT: ret
+; GISel-MOPS-LABEL: memcpy_inline_65:
+; GISel-MOPS: // %bb.0: // %entry
+; GISel-MOPS-NEXT: ldr q0, [x1]
+; GISel-MOPS-NEXT: str q0, [x0]
+; GISel-MOPS-NEXT: ldr q0, [x1, #16]
+; GISel-MOPS-NEXT: str q0, [x0, #16]
+; GISel-MOPS-NEXT: ldr q0, [x1, #32]
+; GISel-MOPS-NEXT: str q0, [x0, #32]
+; GISel-MOPS-NEXT: ldr q0, [x1, #48]
+; GISel-MOPS-NEXT: str q0, [x0, #48]
+; GISel-MOPS-NEXT: ldrb w8, [x1, #64]
+; GISel-MOPS-NEXT: strb w8, [x0, #64]
+; GISel-MOPS-NEXT: ret
;
; SDAG-WITHOUT-MOPS-O2-LABEL: memcpy_inline_65:
; SDAG-WITHOUT-MOPS-O2: // %bb.0: // %entry
-; SDAG-WITHOUT-MOPS-O2-NEXT: ldp q1, q0, [x1, #32]
-; SDAG-WITHOUT-MOPS-O2-NEXT: ldp q2, q3, [x1]
-; SDAG-WITHOUT-MOPS-O2-NEXT: stp q1, q0, [x0, #32]
-; SDAG-WITHOUT-MOPS-O2-NEXT: stp q2, q3, [x0]
-; SDAG-WITHOUT-MOPS-O2-NEXT: ldrb w8, [x1, #64]
-; SDAG-WITHOUT-MOPS-O2-NEXT: strb w8, [x0, #64]
+; SDAG-WITHOUT-MOPS-O2-NEXT: ldp q1, q0, [x1, #32]
+; SDAG-WITHOUT-MOPS-O2-NEXT: ldp q2, q3, [x1]
+; SDAG-WITHOUT-MOPS-O2-NEXT: stp q1, q0, [x0, #32]
+; SDAG-WITHOUT-MOPS-O2-NEXT: stp q2, q3, [x0]
+; SDAG-WITHOUT-MOPS-O2-NEXT: ldrb w8, [x1, #64]
+; SDAG-WITHOUT-MOPS-O2-NEXT: strb w8, [x0, #64]
; SDAG-WITHOUT-MOPS-O2-NEXT: ret
;
; SDAG-MOPS-O2-LABEL: memcpy_inline_65:
; SDAG-MOPS-O2: // %bb.0: // %entry
-; SDAG-MOPS-O2-NEXT: ldp q1, q0, [x1, #32]
-; SDAG-MOPS-O2-NEXT: ldp q2, q3, [x1]
-; SDAG-MOPS-O2-NEXT: stp q1, q0, [x0, #32]
-; SDAG-MOPS-O2-NEXT: stp q2, q3, [x0]
-; SDAG-MOPS-O2-NEXT: ldrb w8, [x1, #64]
-; SDAG-MOPS-O2-NEXT: strb w8, [x0, #64]
+; SDAG-MOPS-O2-NEXT: ldp q1, q0, [x1, #32]
+; SDAG-MOPS-O2-NEXT: ldp q2, q3, [x1]
+; SDAG-MOPS-O2-NEXT: stp q1, q0, [x0, #32]
+; SDAG-MOPS-O2-NEXT: stp q2, q3, [x0]
+; SDAG-MOPS-O2-NEXT: ldrb w8, [x1, #64]
+; SDAG-MOPS-O2-NEXT: strb w8, [x0, #64]
; SDAG-MOPS-O2-NEXT: ret
entry:
call void @llvm.memcpy.inline.p0.p0.i64(ptr align 1 %dst, ptr align 1 %src, i64 65, i1 false)
@@ -1789,68 +1733,44 @@ entry:
}
define void @memcpy_inline_64(ptr %dst, ptr %src, i32 %value) {
-; GISel-WITHOUT-MOPS-O0-LABEL: memcpy_inline_64:
-; GISel-WITHOUT-MOPS-O0: // %bb.0: // %entry
-; GISel-WITHOUT-MOPS-O0-NEXT: ldr q0, [x1]
-; GISel-WITHOUT-MOPS-O0-NEXT: str q0, [x0]
-; GISel-WITHOUT-MOPS-O0-NEXT: ldr q0, [x1, #16]
-; GISel-WITHOUT-MOPS-O0-NEXT: str q0, [x0, #16]
-; GISel-WITHOUT-MOPS-O0-NEXT: ldr q0, [x1, #32]
-; GISel-WITHOUT-MOPS-O0-NEXT: str q0, [x0, #32]
-; GISel-WITHOUT-MOPS-O0-NEXT: ldr q0, [x1, #48]
-; GISel-WITHOUT-MOPS-O0-NEXT: str q0, [x0, #48]
-; GISel-WITHOUT-MOPS-O0-NEXT: ret
-;
-; GISel-WITHOUT-MOPS-O3-LABEL: memcpy_inline_64:
-; GISel-WITHOUT-MOPS-O3: // %bb.0: // %entry
-; GISel-WITHOUT-MOPS-O3-NEXT: ldr q0, [x1]
-; GISel-WITHOUT-MOPS-O3-NEXT: str q0, [x0]
-; GISel-WITHOUT-MOPS-O3-NEXT: ldr q0, [x1, #16]
-; GISel-WITHOUT-MOPS-O3-NEXT: str q0, [x0, #16]
-; GISel-WITHOUT-MOPS-O3-NEXT: ldr q0, [x1, #32]
-; GISel-WITHOUT-MOPS-O3-NEXT: str q0, [x0, #32]
-; GISel-WITHOUT-MOPS-O3-NEXT: ldr q0, [x1, #48]
-; GISel-WITHOUT-MOPS-O3-NEXT: str q0, [x0, #48]
-; GISel-WITHOUT-MOPS-O3-NEXT: ret
-;
-; GISel-MOPS-O0-LABEL: memcpy_inline_64:
-; GISel-MOPS-O0: // %bb.0: // %entry
-; GISel-MOPS-O0-NEXT: ldr q0, [x1]
-; GISel-MOPS-O0-NEXT: str q0, [x0]
-; GISel-MOPS-O0-NEXT: ldr q0, [x1, #16]
-; GISel-MOPS-O0-NEXT: str q0, [x0, #16]
-; GISel-MOPS-O0-NEXT: ldr q0, [x1, #32]
-; GISel-MOPS-O0-NEXT: str q0, [x0, #32]
-; GISel-MOPS-O0-NEXT: ldr q0, [x1, #48]
-; GISel-MOPS-O0-NEXT: str q0, [x0, #48]
-; GISel-MOPS-O0-NEXT: ret
+; GISel-WITHOUT-MOPS-LABEL: memcpy_inline_64:
+; GISel-WITHOUT-MOPS: // %bb.0: // %entry
+; GISel-WITHOUT-MOPS-NEXT: ldr q0, [x1]
+; GISel-WITHOUT-MOPS-NEXT: str q0, [x0]
+; GISel-WITHOUT-MOPS-NEXT: ldr q0, [x1, #16]
+; GISel-WITHOUT-MOPS-NEXT: str q0, [x0, #16]
+; GISel-WITHOUT-MOPS-NEXT: ldr q0, [x1, #32]
+; GISel-WITHOUT-MOPS-NEXT: str q0, [x0, #32]
+; GISel-WITHOUT-MOPS-NEXT: ldr q0, [x1, #48]
+; GISel-WITHOUT-MOPS-NEXT: str q0, [x0, #48]
+; GISel-WITHOUT-MOPS-NEXT: ret
;
-; GISel-MOPS-O3-LABEL: memcpy_inline_64:
-; GISel-MOPS-O3: // %bb.0: // %entry
-; GISel-MOPS-O3-NEXT: ldr q0, [x1]
-; GISel-MOPS-O3-NEXT: str q0, [x0]
-; GISel-MOPS-O3-NEXT: ldr q0, [x1, #16]
-; GISel-MOPS-O3-NEXT: str q0, [x0, #16]
-; GISel-MOPS-O3-NEXT: ldr q0, [x1, #32]
-; GISel-MOPS-O3-NEXT: str q0, [x0, #32]
-; GISel-MOPS-O3-NEXT: ldr q0, [x1, #48]
-; GISel-MOPS-O3-NEXT: str q0, [x0, #48]
-; GISel-MOPS-O3-NEXT: ret
+; GISel-MOPS-LABEL: memcpy_inline_64:
+; GISel-MOPS: // %bb.0: // %entry
+; GISel-MOPS-NEXT: ldr q0, [x1]
+; GISel-MOPS-NEXT: str q0, [x0]
+; GISel-MOPS-NEXT: ldr q0, [x1, #16]
+; GISel-MOPS-NEXT: str q0, [x0, #16]
+; GISel-MOPS-NEXT: ldr q0, [x1, #32]
+; GISel-MOPS-NEXT: str q0, [x0, #32]
+; GISel-MOPS-NEXT: ldr q0, [x1, #48]
+; GISel-MOPS-NEXT: str q0, [x0, #48]
+; GISel-MOPS-NEXT: ret
;
; SDAG-WITHOUT-MOPS-O2-LABEL: memcpy_inline_64:
; SDAG-WITHOUT-MOPS-O2: // %bb.0: // %entry
-; SDAG-WITHOUT-MOPS-O2-NEXT: ldp q1, q0, [x1, #32]
-; SDAG-WITHOUT-MOPS-O2-NEXT: ldp q2, q3, [x1]
-; SDAG-WITHOUT-MOPS-O2-NEXT: stp q1, q0, [x0, #32]
-; SDAG-WITHOUT-MOPS-O2-NEXT: stp q2, q3, [x0]
+; SDAG-WITHOUT-MOPS-O2-NEXT: ldp q1, q0, [x1, #32]
+; SDAG-WITHOUT-MOPS-O2-NEXT: ldp q2, q3, [x1]
+; SDAG-WITHOUT-MOPS-O2-NEXT: stp q1, q0, [x0, #32]
+; SDAG-WITHOUT-MOPS-O2-NEXT: stp q2, q3, [x0]
; SDAG-WITHOUT-MOPS-O2-NEXT: ret
;
; SDAG-MOPS-O2-LABEL: memcpy_inline_64:
; SDAG-MOPS-O2: // %bb.0: // %entry
-; SDAG-MOPS-O2-NEXT: ldp q1, q0, [x1, #32]
-; SDAG-MOPS-O2-NEXT: ldp q2, q3, [x1]
-; SDAG-MOPS-O2-NEXT: stp q1, q0, [x0, #32]
-; SDAG-MOPS-O2-NEXT: stp q2, q3, [x0]
+; SDAG-MOPS-O2-NEXT: ldp q1, q0, [x1, #32]
+; SDAG-MOPS-O2-NEXT: ldp q2, q3, [x1]
+; SDAG-MOPS-O2-NEXT: stp q1, q0, [x0, #32]
+; SDAG-MOPS-O2-NEXT: stp q2, q3, [x0]
; SDAG-MOPS-O2-NEXT: ret
entry:
call void @llvm.memcpy.inline.p0.p0.i64(ptr align 1 %dst, ptr align 1 %src, i64 64, i1 false)
@@ -1858,72 +1778,48 @@ entry:
}
define void @memcpy_inline_63(ptr %dst, ptr %src, i32 %value) {
-; GISel-WITHOUT-MOPS-O0-LABEL: memcpy_inline_63:
-; GISel-WITHOUT-MOPS-O0: // %bb.0: // %entry
-; GISel-WITHOUT-MOPS-O0-NEXT: ldr q0, [x1]
-; GISel-WITHOUT-MOPS-O0-NEXT: str q0, [x0]
-; GISel-WITHOUT-MOPS-O0-NEXT: ldr q0, [x1, #16]
-; GISel-WITHOUT-MOPS-O0-NEXT: str q0, [x0, #16]
-; GISel-WITHOUT-MOPS-O0-NEXT: ldr q0, [x1, #32]
-; GISel-WITHOUT-MOPS-O0-NEXT: str q0, [x0, #32]
-; GISel-WITHOUT-MOPS-O0-NEXT: ldur q0, [x1, #47]
-; GISel-WITHOUT-MOPS-O0-NEXT: stur q0, [x0, #47]
-; GISel-WITHOUT-MOPS-O0-NEXT: ret
-;
-; GISel-WITHOUT-MOPS-O3-LABEL: memcpy_inline_63:
-; GISel-WITHOUT-MOPS-O3: // %bb.0: // %entry
-; GISel-WITHOUT-MOPS-O3-NEXT: ldr q0, [x1]
-; GISel-WITHOUT-MOPS-O3-NEXT: str q0, [x0]
-; GISel-WITHOUT-MOPS-O3-NEXT: ldr q0, [x1, #16]
-; GISel-WITHOUT-MOPS-O3-NEXT: str q0, [x0, #16]
-; GISel-WITHOUT-MOPS-O3-NEXT: ldr q0, [x1, #32]
-; GISel-WITHOUT-MOPS-O3-NEXT: str q0, [x0, #32]
-; GISel-WITHOUT-MOPS-O3-NEXT: ldur q0, [x1, #47]
-; GISel-WITHOUT-MOPS-O3-NEXT: stur q0, [x0, #47]
-; GISel-WITHOUT-MOPS-O3-NEXT: ret
-;
-; GISel-MOPS-O0-LABEL: memcpy_inline_63:
-; GISel-MOPS-O0: // %bb.0: // %entry
-; GISel-MOPS-O0-NEXT: ldr q0, [x1]
-; GISel-MOPS-O0-NEXT: str q0, [x0]
-; GISel-MOPS-O0-NEXT: ldr q0, [x1, #16]
-; GISel-MOPS-O0-NEXT: str q0, [x0, #16]
-; GISel-MOPS-O0-NEXT: ldr q0, [x1, #32]
-; GISel-MOPS-O0-NEXT: str q0, [x0, #32]
-; GISel-MOPS-O0-NEXT: ldur q0, [x1, #47]
-; GISel-MOPS-O0-NEXT: stur q0, [x0, #47]
-; GISel-MOPS-O0-NEXT: ret
+; GISel-WITHOUT-MOPS-LABEL: memcpy_inline_63:
+; GISel-WITHOUT-MOPS: // %bb.0: // %entry
+; GISel-WITHOUT-MOPS-NEXT: ldr q0, [x1]
+; GISel-WITHOUT-MOPS-NEXT: str q0, [x0]
+; GISel-WITHOUT-MOPS-NEXT: ldr q0, [x1, #16]
+; GISel-WITHOUT-MOPS-NEXT: str q0, [x0, #16]
+; GISel-WITHOUT-MOPS-NEXT: ldr q0, [x1, #32]
+; GISel-WITHOUT-MOPS-NEXT: str q0, [x0, #32]
+; GISel-WITHOUT-MOPS-NEXT: ldur q0, [x1, #47]
+; GISel-WITHOUT-MOPS-NEXT: stur q0, [x0, #47]
+; GISel-WITHOUT-MOPS-NEXT: ret
;
-; GISel-MOPS-O3-LABEL: memcpy_inline_63:
-; GISel-MOPS-O3: // %bb.0: // %entry
-; GISel-MOPS-O3-NEXT: ldr q0, [x1]
-; GISel-MOPS-O3-NEXT: str q0, [x0]
-; GISel-MOPS-O3-NEXT: ldr q0, [x1, #16]
-; GISel-MOPS-O3-NEXT: str q0, [x0, #16]
-; GISel-MOPS-O3-NEXT: ldr q0, [x1, #32]
-; GISel-MOPS-O3-NEXT: str q0, [x0, #32]
-; GISel-MOPS-O3-NEXT: ldur q0, [x1, #47]
-; GISel-MOPS-O3-NEXT: stur q0, [x0, #47]
-; GISel-MOPS-O3-NEXT: ret
+; GISel-MOPS-LABEL: memcpy_inline_63:
+; GISel-MOPS: // %bb.0: // %entry
+; GISel-MOPS-NEXT: ldr q0, [x1]
+; GISel-MOPS-NEXT: str q0, [x0]
+; GISel-MOPS-NEXT: ldr q0, [x1, #16]
+; GISel-MOPS-NEXT: str q0, [x0, #16]
+; GISel-MOPS-NEXT: ldr q0, [x1, #32]
+; GISel-MOPS-NEXT: str q0, [x0, #32]
+; GISel-MOPS-NEXT: ldur q0, [x1, #47]
+; GISel-MOPS-NEXT: stur q0, [x0, #47]
+; GISel-MOPS-NEXT: ret
;
; SDAG-WITHOUT-MOPS-O2-LABEL: memcpy_inline_63:
; SDAG-WITHOUT-MOPS-O2: // %bb.0: // %entry
-; SDAG-WITHOUT-MOPS-O2-NEXT: ldp q3, q1, [x1, #16]
-; SDAG-WITHOUT-MOPS-O2-NEXT: ldur q0, [x1, #47]
-; SDAG-WITHOUT-MOPS-O2-NEXT: ldr q2, [x1]
-; SDAG-WITHOUT-MOPS-O2-NEXT: stur q0, [x0, #47]
-; SDAG-WITHOUT-MOPS-O2-NEXT: stp q3, q1, [x0, #16]
-; SDAG-WITHOUT-MOPS-O2-NEXT: str q2, [x0]
+; SDAG-WITHOUT-MOPS-O2-NEXT: ldp q3, q1, [x1, #16]
+; SDAG-WITHOUT-MOPS-O2-NEXT: ldur q0, [x1, #47]
+; SDAG-WITHOUT-MOPS-O2-NEXT: ldr q2, [x1]
+; SDAG-WITHOUT-MOPS-O2-NEXT: stur q0, [x0, #47]
+; SDAG-WITHOUT-MOPS-O2-NEXT: stp q3, q1, [x0, #16]
+; SDAG-WITHOUT-MOPS-O2-NEXT: str q2, [x0]
; SDAG-WITHOUT-MOPS-O2-NEXT: ret
;
; SDAG-MOPS-O2-LABEL: memcpy_inline_63:
; SDAG-MOPS-O2: // %bb.0: // %entry
-; SDAG-MOPS-O2-NEXT: ldp q3, q1, [x1, #16]
-; SDAG-MOPS-O2-NEXT: ldur q0, [x1, #47]
-; SDAG-MOPS-O2-NEXT: ldr q2, [x1]
-; SDAG-MOPS-O2-NEXT: stur q0, [x0, #47]
-; SDAG-MOPS-O2-NEXT: stp q3, q1, [x0, #16]
-; SDAG-MOPS-O2-NEXT: str q2, [x0]
+; SDAG-MOPS-O2-NEXT: ldp q3, q1, [x1, #16]
+; SDAG-MOPS-O2-NEXT: ldur q0, [x1, #47]
+; SDAG-MOPS-O2-NEXT: ldr q2, [x1]
+; SDAG-MOPS-O2-NEXT: stur q0, [x0, #47]
+; SDAG-MOPS-O2-NEXT: stp q3, q1, [x0, #16]
+; SDAG-MOPS-O2-NEXT: str q2, [x0]
; SDAG-MOPS-O2-NEXT: ret
entry:
call void @llvm.memcpy.inline.p0.p0.i64(ptr align 1 %dst, ptr align 1 %src, i64 63, i1 false)
diff --git a/llvm/test/CodeGen/AArch64/arm64-fp-imm-size.ll b/llvm/test/CodeGen/AArch64/arm64-fp-imm-size.ll
index 78c45c9758e7d..bb34c560a217c 100644
--- a/llvm/test/CodeGen/AArch64/arm64-fp-imm-size.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-fp-imm-size.ll
@@ -73,12 +73,8 @@ define fp128 @baz() optsize {
;
; CHECK-GI-LABEL: baz:
; CHECK-GI: ; %bb.0:
-; CHECK-GI-NEXT: Lloh0:
-; CHECK-GI-NEXT: adrp x8, lCPI3_0 at PAGE
-; CHECK-GI-NEXT: Lloh1:
-; CHECK-GI-NEXT: ldr q0, [x8, lCPI3_0 at PAGEOFF]
+; CHECK-GI-NEXT: movi.2d v0, #0000000000000000
; CHECK-GI-NEXT: ret
-; CHECK-GI-NEXT: .loh AdrpLdr Lloh0, Lloh1
ret fp128 0xL00000000000000000000000000000000
}
diff --git a/llvm/test/CodeGen/AArch64/arm64-fp-imm.ll b/llvm/test/CodeGen/AArch64/arm64-fp-imm.ll
index f541715235e03..fa910c12b95bc 100644
--- a/llvm/test/CodeGen/AArch64/arm64-fp-imm.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-fp-imm.ll
@@ -45,11 +45,7 @@ define fp128 @baz() {
;
; CHECK-GI-LABEL: baz:
; CHECK-GI: ; %bb.0:
-; CHECK-GI-NEXT: Lloh0:
-; CHECK-GI-NEXT: adrp x8, lCPI2_0 at PAGE
-; CHECK-GI-NEXT: Lloh1:
-; CHECK-GI-NEXT: ldr q0, [x8, lCPI2_0 at PAGEOFF]
+; CHECK-GI-NEXT: movi.2d v0, #0000000000000000
; CHECK-GI-NEXT: ret
-; CHECK-GI-NEXT: .loh AdrpLdr Lloh0, Lloh1
ret fp128 0xL00000000000000000000000000000000
}
diff --git a/llvm/test/CodeGen/AArch64/arm64-fp128.ll b/llvm/test/CodeGen/AArch64/arm64-fp128.ll
index 498dce138febf..c4f91c66fb9a6 100644
--- a/llvm/test/CodeGen/AArch64/arm64-fp128.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-fp128.ll
@@ -1470,25 +1470,23 @@ define <2 x fp128> @vec_neg_sub(<2 x fp128> %in) {
;
; CHECK-GI-LABEL: vec_neg_sub:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: sub sp, sp, #64
-; CHECK-GI-NEXT: str x30, [sp, #48] // 8-byte Spill
-; CHECK-GI-NEXT: .cfi_def_cfa_offset 64
+; CHECK-GI-NEXT: sub sp, sp, #48
+; CHECK-GI-NEXT: str x30, [sp, #32] // 8-byte Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 48
; CHECK-GI-NEXT: .cfi_offset w30, -16
; CHECK-GI-NEXT: mov v2.16b, v0.16b
-; CHECK-GI-NEXT: adrp x8, .LCPI47_0
-; CHECK-GI-NEXT: str q1, [sp, #32] // 16-byte Spill
-; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI47_0]
+; CHECK-GI-NEXT: movi v0.2d, #0000000000000000
+; CHECK-GI-NEXT: str q1, [sp, #16] // 16-byte Spill
; CHECK-GI-NEXT: mov v1.16b, v2.16b
-; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill
; CHECK-GI-NEXT: bl __subtf3
-; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Spill
-; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Reload
-; CHECK-GI-NEXT: ldr q1, [sp, #32] // 16-byte Reload
+; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill
+; CHECK-GI-NEXT: movi v0.2d, #0000000000000000
+; CHECK-GI-NEXT: ldr q1, [sp, #16] // 16-byte Reload
; CHECK-GI-NEXT: bl __subtf3
; CHECK-GI-NEXT: mov v1.16b, v0.16b
-; CHECK-GI-NEXT: ldr q0, [sp, #16] // 16-byte Reload
-; CHECK-GI-NEXT: ldr x30, [sp, #48] // 8-byte Reload
-; CHECK-GI-NEXT: add sp, sp, #64
+; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Reload
+; CHECK-GI-NEXT: ldr x30, [sp, #32] // 8-byte Reload
+; CHECK-GI-NEXT: add sp, sp, #48
; CHECK-GI-NEXT: ret
%ret = fsub <2 x fp128> zeroinitializer, %in
ret <2 x fp128> %ret
diff --git a/llvm/test/CodeGen/AArch64/dup.ll b/llvm/test/CodeGen/AArch64/dup.ll
index fa30815fdec30..9897a89dbfbd0 100644
--- a/llvm/test/CodeGen/AArch64/dup.ll
+++ b/llvm/test/CodeGen/AArch64/dup.ll
@@ -2564,14 +2564,22 @@ entry:
}
define <2 x fp128> @loaddup_str_v2fp128(ptr %p) {
-; CHECK-LABEL: loaddup_str_v2fp128:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ldr q0, [x0]
-; CHECK-NEXT: adrp x8, .LCPI155_0
-; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI155_0]
-; CHECK-NEXT: mov v1.16b, v0.16b
-; CHECK-NEXT: str q2, [x0]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: loaddup_str_v2fp128:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: ldr q0, [x0]
+; CHECK-SD-NEXT: adrp x8, .LCPI155_0
+; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI155_0]
+; CHECK-SD-NEXT: mov v1.16b, v0.16b
+; CHECK-SD-NEXT: str q2, [x0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: loaddup_str_v2fp128:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: ldr q0, [x0]
+; CHECK-GI-NEXT: movi v2.2d, #0000000000000000
+; CHECK-GI-NEXT: mov v1.16b, v0.16b
+; CHECK-GI-NEXT: str q2, [x0]
+; CHECK-GI-NEXT: ret
entry:
%a = load fp128, ptr %p
%b = insertelement <2 x fp128> poison, fp128 %a, i64 0
@@ -2618,15 +2626,24 @@ entry:
}
define <3 x fp128> @loaddup_str_v3fp128(ptr %p) {
-; CHECK-LABEL: loaddup_str_v3fp128:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ldr q0, [x0]
-; CHECK-NEXT: adrp x8, .LCPI159_0
-; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI159_0]
-; CHECK-NEXT: mov v1.16b, v0.16b
-; CHECK-NEXT: mov v2.16b, v0.16b
-; CHECK-NEXT: str q3, [x0]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: loaddup_str_v3fp128:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: ldr q0, [x0]
+; CHECK-SD-NEXT: adrp x8, .LCPI159_0
+; CHECK-SD-NEXT: ldr q3, [x8, :lo12:.LCPI159_0]
+; CHECK-SD-NEXT: mov v1.16b, v0.16b
+; CHECK-SD-NEXT: mov v2.16b, v0.16b
+; CHECK-SD-NEXT: str q3, [x0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: loaddup_str_v3fp128:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: ldr q0, [x0]
+; CHECK-GI-NEXT: movi v3.2d, #0000000000000000
+; CHECK-GI-NEXT: mov v1.16b, v0.16b
+; CHECK-GI-NEXT: mov v2.16b, v0.16b
+; CHECK-GI-NEXT: str q3, [x0]
+; CHECK-GI-NEXT: ret
entry:
%a = load fp128, ptr %p
%b = insertelement <3 x fp128> poison, fp128 %a, i64 0
@@ -2676,16 +2693,26 @@ entry:
}
define <4 x fp128> @loaddup_str_v4fp128(ptr %p) {
-; CHECK-LABEL: loaddup_str_v4fp128:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ldr q0, [x0]
-; CHECK-NEXT: adrp x8, .LCPI163_0
-; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI163_0]
-; CHECK-NEXT: mov v1.16b, v0.16b
-; CHECK-NEXT: mov v2.16b, v0.16b
-; CHECK-NEXT: mov v3.16b, v0.16b
-; CHECK-NEXT: str q4, [x0]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: loaddup_str_v4fp128:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: ldr q0, [x0]
+; CHECK-SD-NEXT: adrp x8, .LCPI163_0
+; CHECK-SD-NEXT: ldr q4, [x8, :lo12:.LCPI163_0]
+; CHECK-SD-NEXT: mov v1.16b, v0.16b
+; CHECK-SD-NEXT: mov v2.16b, v0.16b
+; CHECK-SD-NEXT: mov v3.16b, v0.16b
+; CHECK-SD-NEXT: str q4, [x0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: loaddup_str_v4fp128:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: ldr q0, [x0]
+; CHECK-GI-NEXT: movi v4.2d, #0000000000000000
+; CHECK-GI-NEXT: mov v1.16b, v0.16b
+; CHECK-GI-NEXT: mov v2.16b, v0.16b
+; CHECK-GI-NEXT: mov v3.16b, v0.16b
+; CHECK-GI-NEXT: str q4, [x0]
+; CHECK-GI-NEXT: ret
entry:
%a = load fp128, ptr %p
%b = insertelement <4 x fp128> poison, fp128 %a, i64 0
diff --git a/llvm/test/CodeGen/AArch64/fpimm.ll b/llvm/test/CodeGen/AArch64/fpimm.ll
index e2944243338f5..5dabf7ffda4dc 100644
--- a/llvm/test/CodeGen/AArch64/fpimm.ll
+++ b/llvm/test/CodeGen/AArch64/fpimm.ll
@@ -1,7 +1,7 @@
; RUN: llc -mtriple=aarch64-linux-gnu -verify-machineinstrs < %s | FileCheck %s
; RUN: llc -mtriple=aarch64-apple-darwin -code-model=large -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LARGE
; RUN: llc -mtriple=aarch64 -code-model=tiny -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=aarch64-linux-gnu -global-isel -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -global-isel -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GI
@varf32 = global float 0.0
@varf64 = global double 0.0
@@ -15,8 +15,9 @@ define void @check_float() {
; CHECK-DAG: fmov {{s[0-9]+}}, #8.5
%newval2 = fadd float %val, 128.0
- store volatile float %newval2, ptr @varf32
; CHECK-DAG: movi [[REG:v[0-9s]+]].2s, #67, lsl #24
+; GI-DAG: fmov [[REG:s[0-9s]+]], #8.5
+ store volatile float %newval2, ptr @varf32
; CHECK: ret
ret void
diff --git a/llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll b/llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll
index 0ad09d416ce68..7a5fe0f4222bc 100644
--- a/llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll
+++ b/llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll
@@ -790,9 +790,8 @@ define i32 @test_unsigned_f128_i32(fp128 %f) {
; CHECK-GI-NEXT: .cfi_offset w19, -8
; CHECK-GI-NEXT: .cfi_offset w20, -16
; CHECK-GI-NEXT: .cfi_offset w30, -32
-; CHECK-GI-NEXT: adrp x8, .LCPI30_1
+; CHECK-GI-NEXT: movi v1.2d, #0000000000000000
; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill
-; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI30_1]
; CHECK-GI-NEXT: bl __gttf2
; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Reload
; CHECK-GI-NEXT: cmp w0, #0
diff --git a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll
index ecca1165753bf..f044f5184ab32 100644
--- a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll
+++ b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll
@@ -480,9 +480,8 @@ define <1 x i32> @test_unsigned_v1f128_v1i32(<1 x fp128> %f) {
; CHECK-GI-NEXT: .cfi_offset w19, -8
; CHECK-GI-NEXT: .cfi_offset w20, -16
; CHECK-GI-NEXT: .cfi_offset w30, -32
-; CHECK-GI-NEXT: adrp x8, .LCPI14_1
+; CHECK-GI-NEXT: movi v1.2d, #0000000000000000
; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill
-; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI14_1]
; CHECK-GI-NEXT: bl __gttf2
; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Reload
; CHECK-GI-NEXT: cmp w0, #0
@@ -564,21 +563,18 @@ define <2 x i32> @test_unsigned_v2f128_v2i32(<2 x fp128> %f) {
;
; CHECK-GI-LABEL: test_unsigned_v2f128_v2i32:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: sub sp, sp, #96
-; CHECK-GI-NEXT: str x30, [sp, #48] // 8-byte Spill
-; CHECK-GI-NEXT: stp x22, x21, [sp, #64] // 16-byte Folded Spill
-; CHECK-GI-NEXT: stp x20, x19, [sp, #80] // 16-byte Folded Spill
-; CHECK-GI-NEXT: .cfi_def_cfa_offset 96
+; CHECK-GI-NEXT: sub sp, sp, #80
+; CHECK-GI-NEXT: str x30, [sp, #32] // 8-byte Spill
+; CHECK-GI-NEXT: stp x22, x21, [sp, #48] // 16-byte Folded Spill
+; CHECK-GI-NEXT: stp x20, x19, [sp, #64] // 16-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 80
; CHECK-GI-NEXT: .cfi_offset w19, -8
; CHECK-GI-NEXT: .cfi_offset w20, -16
; CHECK-GI-NEXT: .cfi_offset w21, -24
; CHECK-GI-NEXT: .cfi_offset w22, -32
; CHECK-GI-NEXT: .cfi_offset w30, -48
-; CHECK-GI-NEXT: adrp x8, .LCPI15_1
-; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill
-; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI15_1]
-; CHECK-GI-NEXT: stp q2, q1, [sp, #16] // 32-byte Folded Spill
-; CHECK-GI-NEXT: mov v1.16b, v2.16b
+; CHECK-GI-NEXT: stp q0, q1, [sp] // 32-byte Folded Spill
+; CHECK-GI-NEXT: movi v1.2d, #0000000000000000
; CHECK-GI-NEXT: bl __gttf2
; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Reload
; CHECK-GI-NEXT: cmp w0, #0
@@ -600,12 +596,12 @@ define <2 x i32> @test_unsigned_v2f128_v2i32(<2 x fp128> %f) {
; CHECK-GI-NEXT: csel x8, x20, x21, mi
; CHECK-GI-NEXT: mov v0.d[1], x8
; CHECK-GI-NEXT: bl __fixunstfsi
-; CHECK-GI-NEXT: ldp q1, q0, [sp, #16] // 32-byte Folded Reload
+; CHECK-GI-NEXT: movi v1.2d, #0000000000000000
+; CHECK-GI-NEXT: ldr q0, [sp, #16] // 16-byte Reload
; CHECK-GI-NEXT: mov w19, w0
; CHECK-GI-NEXT: bl __gttf2
-; CHECK-GI-NEXT: ldr q0, [sp, #32] // 16-byte Reload
+; CHECK-GI-NEXT: ldp q1, q0, [sp] // 32-byte Folded Reload
; CHECK-GI-NEXT: cmp w0, #0
-; CHECK-GI-NEXT: ldr q1, [sp] // 16-byte Reload
; CHECK-GI-NEXT: fmov x8, d0
; CHECK-GI-NEXT: csel x20, x8, xzr, gt
; CHECK-GI-NEXT: mov x8, v0.d[1]
@@ -620,12 +616,12 @@ define <2 x i32> @test_unsigned_v2f128_v2i32(<2 x fp128> %f) {
; CHECK-GI-NEXT: mov v0.d[1], x8
; CHECK-GI-NEXT: bl __fixunstfsi
; CHECK-GI-NEXT: fmov s0, w19
-; CHECK-GI-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload
-; CHECK-GI-NEXT: ldp x22, x21, [sp, #64] // 16-byte Folded Reload
-; CHECK-GI-NEXT: ldr x30, [sp, #48] // 8-byte Reload
+; CHECK-GI-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload
+; CHECK-GI-NEXT: ldp x22, x21, [sp, #48] // 16-byte Folded Reload
+; CHECK-GI-NEXT: ldr x30, [sp, #32] // 8-byte Reload
; CHECK-GI-NEXT: mov v0.s[1], w0
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
-; CHECK-GI-NEXT: add sp, sp, #96
+; CHECK-GI-NEXT: add sp, sp, #80
; CHECK-GI-NEXT: ret
%x = call <2 x i32> @llvm.fptoui.sat.v2f128.v2i32(<2 x fp128> %f)
ret <2 x i32> %x
@@ -696,21 +692,20 @@ define <3 x i32> @test_unsigned_v3f128_v3i32(<3 x fp128> %f) {
;
; CHECK-GI-LABEL: test_unsigned_v3f128_v3i32:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: sub sp, sp, #112
-; CHECK-GI-NEXT: stp x30, x23, [sp, #64] // 16-byte Folded Spill
-; CHECK-GI-NEXT: stp x22, x21, [sp, #80] // 16-byte Folded Spill
-; CHECK-GI-NEXT: stp x20, x19, [sp, #96] // 16-byte Folded Spill
-; CHECK-GI-NEXT: .cfi_def_cfa_offset 112
+; CHECK-GI-NEXT: sub sp, sp, #96
+; CHECK-GI-NEXT: stp x30, x23, [sp, #48] // 16-byte Folded Spill
+; CHECK-GI-NEXT: stp x22, x21, [sp, #64] // 16-byte Folded Spill
+; CHECK-GI-NEXT: stp x20, x19, [sp, #80] // 16-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 96
; CHECK-GI-NEXT: .cfi_offset w19, -8
; CHECK-GI-NEXT: .cfi_offset w20, -16
; CHECK-GI-NEXT: .cfi_offset w21, -24
; CHECK-GI-NEXT: .cfi_offset w22, -32
; CHECK-GI-NEXT: .cfi_offset w23, -40
; CHECK-GI-NEXT: .cfi_offset w30, -48
-; CHECK-GI-NEXT: adrp x8, .LCPI16_1
; CHECK-GI-NEXT: stp q1, q0, [sp] // 32-byte Folded Spill
-; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI16_1]
-; CHECK-GI-NEXT: stp q1, q2, [sp, #32] // 32-byte Folded Spill
+; CHECK-GI-NEXT: movi v1.2d, #0000000000000000
+; CHECK-GI-NEXT: str q2, [sp, #32] // 16-byte Spill
; CHECK-GI-NEXT: bl __gttf2
; CHECK-GI-NEXT: ldr q0, [sp, #16] // 16-byte Reload
; CHECK-GI-NEXT: cmp w0, #0
@@ -732,8 +727,8 @@ define <3 x i32> @test_unsigned_v3f128_v3i32(<3 x fp128> %f) {
; CHECK-GI-NEXT: csel x8, x20, x21, mi
; CHECK-GI-NEXT: mov v0.d[1], x8
; CHECK-GI-NEXT: bl __fixunstfsi
+; CHECK-GI-NEXT: movi v1.2d, #0000000000000000
; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Reload
-; CHECK-GI-NEXT: ldr q1, [sp, #32] // 16-byte Reload
; CHECK-GI-NEXT: mov w19, w0
; CHECK-GI-NEXT: bl __gttf2
; CHECK-GI-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload
@@ -751,12 +746,12 @@ define <3 x i32> @test_unsigned_v3f128_v3i32(<3 x fp128> %f) {
; CHECK-GI-NEXT: csel x8, x22, x21, mi
; CHECK-GI-NEXT: mov v0.d[1], x8
; CHECK-GI-NEXT: bl __fixunstfsi
-; CHECK-GI-NEXT: ldp q1, q0, [sp, #32] // 32-byte Folded Reload
+; CHECK-GI-NEXT: movi v1.2d, #0000000000000000
+; CHECK-GI-NEXT: ldr q0, [sp, #32] // 16-byte Reload
; CHECK-GI-NEXT: mov w20, w0
; CHECK-GI-NEXT: bl __gttf2
-; CHECK-GI-NEXT: ldr q0, [sp, #48] // 16-byte Reload
+; CHECK-GI-NEXT: ldp q1, q0, [sp, #16] // 32-byte Folded Reload
; CHECK-GI-NEXT: cmp w0, #0
-; CHECK-GI-NEXT: ldr q1, [sp, #16] // 16-byte Reload
; CHECK-GI-NEXT: fmov x8, d0
; CHECK-GI-NEXT: csel x22, x8, xzr, gt
; CHECK-GI-NEXT: mov x8, v0.d[1]
@@ -771,12 +766,12 @@ define <3 x i32> @test_unsigned_v3f128_v3i32(<3 x fp128> %f) {
; CHECK-GI-NEXT: mov v0.d[1], x8
; CHECK-GI-NEXT: bl __fixunstfsi
; CHECK-GI-NEXT: fmov s0, w19
-; CHECK-GI-NEXT: ldp x22, x21, [sp, #80] // 16-byte Folded Reload
-; CHECK-GI-NEXT: ldp x30, x23, [sp, #64] // 16-byte Folded Reload
+; CHECK-GI-NEXT: ldp x22, x21, [sp, #64] // 16-byte Folded Reload
+; CHECK-GI-NEXT: ldp x30, x23, [sp, #48] // 16-byte Folded Reload
; CHECK-GI-NEXT: mov v0.s[1], w20
-; CHECK-GI-NEXT: ldp x20, x19, [sp, #96] // 16-byte Folded Reload
+; CHECK-GI-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload
; CHECK-GI-NEXT: mov v0.s[2], w0
-; CHECK-GI-NEXT: add sp, sp, #112
+; CHECK-GI-NEXT: add sp, sp, #96
; CHECK-GI-NEXT: ret
%x = call <3 x i32> @llvm.fptoui.sat.v3f128.v3i32(<3 x fp128> %f)
ret <3 x i32> %x
@@ -865,12 +860,12 @@ define <4 x i32> @test_unsigned_v4f128_v4i32(<4 x fp128> %f) {
;
; CHECK-GI-LABEL: test_unsigned_v4f128_v4i32:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: sub sp, sp, #144
-; CHECK-GI-NEXT: str x30, [sp, #80] // 8-byte Spill
-; CHECK-GI-NEXT: stp x24, x23, [sp, #96] // 16-byte Folded Spill
-; CHECK-GI-NEXT: stp x22, x21, [sp, #112] // 16-byte Folded Spill
-; CHECK-GI-NEXT: stp x20, x19, [sp, #128] // 16-byte Folded Spill
-; CHECK-GI-NEXT: .cfi_def_cfa_offset 144
+; CHECK-GI-NEXT: sub sp, sp, #128
+; CHECK-GI-NEXT: str x30, [sp, #64] // 8-byte Spill
+; CHECK-GI-NEXT: stp x24, x23, [sp, #80] // 16-byte Folded Spill
+; CHECK-GI-NEXT: stp x22, x21, [sp, #96] // 16-byte Folded Spill
+; CHECK-GI-NEXT: stp x20, x19, [sp, #112] // 16-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 128
; CHECK-GI-NEXT: .cfi_offset w19, -8
; CHECK-GI-NEXT: .cfi_offset w20, -16
; CHECK-GI-NEXT: .cfi_offset w21, -24
@@ -878,12 +873,9 @@ define <4 x i32> @test_unsigned_v4f128_v4i32(<4 x fp128> %f) {
; CHECK-GI-NEXT: .cfi_offset w23, -40
; CHECK-GI-NEXT: .cfi_offset w24, -48
; CHECK-GI-NEXT: .cfi_offset w30, -64
-; CHECK-GI-NEXT: adrp x8, .LCPI17_1
; CHECK-GI-NEXT: stp q1, q2, [sp] // 32-byte Folded Spill
-; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI17_1]
-; CHECK-GI-NEXT: str q0, [sp, #48] // 16-byte Spill
-; CHECK-GI-NEXT: str q3, [sp, #32] // 16-byte Spill
-; CHECK-GI-NEXT: str q1, [sp, #64] // 16-byte Spill
+; CHECK-GI-NEXT: movi v1.2d, #0000000000000000
+; CHECK-GI-NEXT: stp q3, q0, [sp, #32] // 32-byte Folded Spill
; CHECK-GI-NEXT: bl __gttf2
; CHECK-GI-NEXT: ldr q0, [sp, #48] // 16-byte Reload
; CHECK-GI-NEXT: cmp w0, #0
@@ -905,8 +897,8 @@ define <4 x i32> @test_unsigned_v4f128_v4i32(<4 x fp128> %f) {
; CHECK-GI-NEXT: csel x8, x20, x22, mi
; CHECK-GI-NEXT: mov v0.d[1], x8
; CHECK-GI-NEXT: bl __fixunstfsi
+; CHECK-GI-NEXT: movi v1.2d, #0000000000000000
; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Reload
-; CHECK-GI-NEXT: ldr q1, [sp, #64] // 16-byte Reload
; CHECK-GI-NEXT: mov w19, w0
; CHECK-GI-NEXT: bl __gttf2
; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Reload
@@ -925,8 +917,8 @@ define <4 x i32> @test_unsigned_v4f128_v4i32(<4 x fp128> %f) {
; CHECK-GI-NEXT: csel x8, x21, x22, mi
; CHECK-GI-NEXT: mov v0.d[1], x8
; CHECK-GI-NEXT: bl __fixunstfsi
+; CHECK-GI-NEXT: movi v1.2d, #0000000000000000
; CHECK-GI-NEXT: ldr q0, [sp, #16] // 16-byte Reload
-; CHECK-GI-NEXT: ldr q1, [sp, #64] // 16-byte Reload
; CHECK-GI-NEXT: mov w20, w0
; CHECK-GI-NEXT: bl __gttf2
; CHECK-GI-NEXT: ldr q0, [sp, #16] // 16-byte Reload
@@ -945,8 +937,8 @@ define <4 x i32> @test_unsigned_v4f128_v4i32(<4 x fp128> %f) {
; CHECK-GI-NEXT: csel x8, x23, x22, mi
; CHECK-GI-NEXT: mov v0.d[1], x8
; CHECK-GI-NEXT: bl __fixunstfsi
+; CHECK-GI-NEXT: movi v1.2d, #0000000000000000
; CHECK-GI-NEXT: ldr q0, [sp, #32] // 16-byte Reload
-; CHECK-GI-NEXT: ldr q1, [sp, #64] // 16-byte Reload
; CHECK-GI-NEXT: mov w21, w0
; CHECK-GI-NEXT: bl __gttf2
; CHECK-GI-NEXT: ldp q0, q1, [sp, #32] // 32-byte Folded Reload
@@ -965,14 +957,14 @@ define <4 x i32> @test_unsigned_v4f128_v4i32(<4 x fp128> %f) {
; CHECK-GI-NEXT: mov v0.d[1], x8
; CHECK-GI-NEXT: bl __fixunstfsi
; CHECK-GI-NEXT: fmov s0, w19
-; CHECK-GI-NEXT: ldp x24, x23, [sp, #96] // 16-byte Folded Reload
-; CHECK-GI-NEXT: ldr x30, [sp, #80] // 8-byte Reload
+; CHECK-GI-NEXT: ldp x24, x23, [sp, #80] // 16-byte Folded Reload
+; CHECK-GI-NEXT: ldr x30, [sp, #64] // 8-byte Reload
; CHECK-GI-NEXT: mov v0.s[1], w20
-; CHECK-GI-NEXT: ldp x20, x19, [sp, #128] // 16-byte Folded Reload
+; CHECK-GI-NEXT: ldp x20, x19, [sp, #112] // 16-byte Folded Reload
; CHECK-GI-NEXT: mov v0.s[2], w21
-; CHECK-GI-NEXT: ldp x22, x21, [sp, #112] // 16-byte Folded Reload
+; CHECK-GI-NEXT: ldp x22, x21, [sp, #96] // 16-byte Folded Reload
; CHECK-GI-NEXT: mov v0.s[3], w0
-; CHECK-GI-NEXT: add sp, sp, #144
+; CHECK-GI-NEXT: add sp, sp, #128
; CHECK-GI-NEXT: ret
%x = call <4 x i32> @llvm.fptoui.sat.v4f128.v4i32(<4 x fp128> %f)
ret <4 x i32> %x
@@ -4139,22 +4131,19 @@ define <2 x i64> @test_signed_v2f128_v2i64(<2 x fp128> %f) {
;
; CHECK-GI-LABEL: test_signed_v2f128_v2i64:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: sub sp, sp, #96
-; CHECK-GI-NEXT: stp x30, x23, [sp, #48] // 16-byte Folded Spill
-; CHECK-GI-NEXT: stp x22, x21, [sp, #64] // 16-byte Folded Spill
-; CHECK-GI-NEXT: stp x20, x19, [sp, #80] // 16-byte Folded Spill
-; CHECK-GI-NEXT: .cfi_def_cfa_offset 96
+; CHECK-GI-NEXT: sub sp, sp, #80
+; CHECK-GI-NEXT: stp x30, x23, [sp, #32] // 16-byte Folded Spill
+; CHECK-GI-NEXT: stp x22, x21, [sp, #48] // 16-byte Folded Spill
+; CHECK-GI-NEXT: stp x20, x19, [sp, #64] // 16-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 80
; CHECK-GI-NEXT: .cfi_offset w19, -8
; CHECK-GI-NEXT: .cfi_offset w20, -16
; CHECK-GI-NEXT: .cfi_offset w21, -24
; CHECK-GI-NEXT: .cfi_offset w22, -32
; CHECK-GI-NEXT: .cfi_offset w23, -40
; CHECK-GI-NEXT: .cfi_offset w30, -48
-; CHECK-GI-NEXT: adrp x8, .LCPI86_1
-; CHECK-GI-NEXT: str q0, [sp] // 16-byte Spill
-; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI86_1]
-; CHECK-GI-NEXT: stp q2, q1, [sp, #16] // 32-byte Folded Spill
-; CHECK-GI-NEXT: mov v1.16b, v2.16b
+; CHECK-GI-NEXT: stp q0, q1, [sp] // 32-byte Folded Spill
+; CHECK-GI-NEXT: movi v1.2d, #0000000000000000
; CHECK-GI-NEXT: bl __gttf2
; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Reload
; CHECK-GI-NEXT: cmp w0, #0
@@ -4176,12 +4165,12 @@ define <2 x i64> @test_signed_v2f128_v2i64(<2 x fp128> %f) {
; CHECK-GI-NEXT: csel x8, x20, x22, mi
; CHECK-GI-NEXT: mov v0.d[1], x8
; CHECK-GI-NEXT: bl __fixunstfdi
-; CHECK-GI-NEXT: ldp q1, q0, [sp, #16] // 32-byte Folded Reload
+; CHECK-GI-NEXT: movi v1.2d, #0000000000000000
+; CHECK-GI-NEXT: ldr q0, [sp, #16] // 16-byte Reload
; CHECK-GI-NEXT: mov x19, x0
; CHECK-GI-NEXT: bl __gttf2
-; CHECK-GI-NEXT: ldr q0, [sp, #32] // 16-byte Reload
+; CHECK-GI-NEXT: ldp q1, q0, [sp] // 32-byte Folded Reload
; CHECK-GI-NEXT: cmp w0, #0
-; CHECK-GI-NEXT: ldr q1, [sp] // 16-byte Reload
; CHECK-GI-NEXT: fmov x8, d0
; CHECK-GI-NEXT: csel x20, x8, xzr, gt
; CHECK-GI-NEXT: mov x8, v0.d[1]
@@ -4196,11 +4185,11 @@ define <2 x i64> @test_signed_v2f128_v2i64(<2 x fp128> %f) {
; CHECK-GI-NEXT: mov v0.d[1], x8
; CHECK-GI-NEXT: bl __fixunstfdi
; CHECK-GI-NEXT: fmov d0, x19
-; CHECK-GI-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload
-; CHECK-GI-NEXT: ldp x22, x21, [sp, #64] // 16-byte Folded Reload
-; CHECK-GI-NEXT: ldp x30, x23, [sp, #48] // 16-byte Folded Reload
+; CHECK-GI-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload
+; CHECK-GI-NEXT: ldp x22, x21, [sp, #48] // 16-byte Folded Reload
+; CHECK-GI-NEXT: ldp x30, x23, [sp, #32] // 16-byte Folded Reload
; CHECK-GI-NEXT: mov v0.d[1], x0
-; CHECK-GI-NEXT: add sp, sp, #96
+; CHECK-GI-NEXT: add sp, sp, #80
; CHECK-GI-NEXT: ret
%x = call <2 x i64> @llvm.fptoui.sat.v2f128.v2i64(<2 x fp128> %f)
ret <2 x i64> %x
More information about the llvm-commits
mailing list