[llvm] cf65afb - [AArch64][GISel] Extend lowering for fp round intrinsics.
David Green via llvm-commits
llvm-commits at lists.llvm.org
Thu Aug 17 08:25:40 PDT 2023
Author: David Green
Date: 2023-08-17T16:25:32+01:00
New Revision: cf65afbf93a8cd61ed797a5d23513b8d46ddabb2
URL: https://github.com/llvm/llvm-project/commit/cf65afbf93a8cd61ed797a5d23513b8d46ddabb2
DIFF: https://github.com/llvm/llvm-project/commit/cf65afbf93a8cd61ed797a5d23513b8d46ddabb2.diff
LOG: [AArch64][GISel] Extend lowering for fp round intrinsics.
This extends the lowering of ceil, floor, nearbyint, rint, round, roundeven and
trunc. They are all very similar, so can reuse the same legalization info.
selectIntrinsicTrunc and selectIntrinsicRound can be removed as they can be
selected via tablegen patterns, and G_INTRINSIC_ROUNDEVEN is marked as a gisel
equivalent of froundeven. Otherwise this reuses the existing code, filling it
out to handle more types.
Differential Revision: https://reviews.llvm.org/D157679
Added:
llvm/test/CodeGen/AArch64/GlobalISel/legalize-intrinsic-roundeven.mir
Modified:
llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
llvm/test/CodeGen/AArch64/GlobalISel/legalize-ceil.mir
llvm/test/CodeGen/AArch64/GlobalISel/legalize-frint.mir
llvm/test/CodeGen/AArch64/GlobalISel/legalize-intrinsic-round.mir
llvm/test/CodeGen/AArch64/GlobalISel/legalize-intrinsic-trunc.mir
llvm/test/CodeGen/AArch64/GlobalISel/legalize-nearbyint.mir
llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
llvm/test/CodeGen/AArch64/fcvt.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
index c306be0940577a..5ebd3ab9129f48 100644
--- a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
+++ b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
@@ -144,6 +144,7 @@ def : GINodeEquiv<G_FRINT, frint>;
def : GINodeEquiv<G_FNEARBYINT, fnearbyint>;
def : GINodeEquiv<G_INTRINSIC_TRUNC, ftrunc>;
def : GINodeEquiv<G_INTRINSIC_ROUND, fround>;
+def : GINodeEquiv<G_INTRINSIC_ROUNDEVEN, froundeven>;
def : GINodeEquiv<G_INTRINSIC_LRINT, lrint>;
def : GINodeEquiv<G_FCOPYSIGN, fcopysign>;
def : GINodeEquiv<G_SMIN, smin>;
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 1ab571fd666389..49dfc681e8c1ed 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -4855,6 +4855,13 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
case TargetOpcode::G_FNEG:
case TargetOpcode::G_FABS:
case TargetOpcode::G_FSQRT:
+ case TargetOpcode::G_FCEIL:
+ case TargetOpcode::G_FFLOOR:
+ case TargetOpcode::G_FNEARBYINT:
+ case TargetOpcode::G_FRINT:
+ case TargetOpcode::G_INTRINSIC_ROUND:
+ case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
+ case TargetOpcode::G_INTRINSIC_TRUNC:
case TargetOpcode::G_BSWAP:
case TargetOpcode::G_FCANONICALIZE:
case TargetOpcode::G_SEXT_INREG:
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index fad757a4b2889f..acd429e96fa41b 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -190,8 +190,6 @@ class AArch64InstructionSelector : public InstructionSelector {
MachineRegisterInfo &MRI);
bool selectIntrinsic(MachineInstr &I, MachineRegisterInfo &MRI);
bool selectVectorICmp(MachineInstr &I, MachineRegisterInfo &MRI);
- bool selectIntrinsicTrunc(MachineInstr &I, MachineRegisterInfo &MRI) const;
- bool selectIntrinsicRound(MachineInstr &I, MachineRegisterInfo &MRI) const;
bool selectJumpTable(MachineInstr &I, MachineRegisterInfo &MRI);
bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI);
bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI);
@@ -3494,10 +3492,6 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
return false;
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
}
- case TargetOpcode::G_INTRINSIC_TRUNC:
- return selectIntrinsicTrunc(I, MRI);
- case TargetOpcode::G_INTRINSIC_ROUND:
- return selectIntrinsicRound(I, MRI);
case TargetOpcode::G_BUILD_VECTOR:
return selectBuildVector(I, MRI);
case TargetOpcode::G_MERGE_VALUES:
@@ -3696,116 +3690,6 @@ bool AArch64InstructionSelector::selectTLSGlobalValue(
return true;
}
-bool AArch64InstructionSelector::selectIntrinsicTrunc(
- MachineInstr &I, MachineRegisterInfo &MRI) const {
- const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
-
- // Select the correct opcode.
- unsigned Opc = 0;
- if (!SrcTy.isVector()) {
- switch (SrcTy.getSizeInBits()) {
- default:
- case 16:
- Opc = AArch64::FRINTZHr;
- break;
- case 32:
- Opc = AArch64::FRINTZSr;
- break;
- case 64:
- Opc = AArch64::FRINTZDr;
- break;
- }
- } else {
- unsigned NumElts = SrcTy.getNumElements();
- switch (SrcTy.getElementType().getSizeInBits()) {
- default:
- break;
- case 16:
- if (NumElts == 4)
- Opc = AArch64::FRINTZv4f16;
- else if (NumElts == 8)
- Opc = AArch64::FRINTZv8f16;
- break;
- case 32:
- if (NumElts == 2)
- Opc = AArch64::FRINTZv2f32;
- else if (NumElts == 4)
- Opc = AArch64::FRINTZv4f32;
- break;
- case 64:
- if (NumElts == 2)
- Opc = AArch64::FRINTZv2f64;
- break;
- }
- }
-
- if (!Opc) {
- // Didn't get an opcode above, bail.
- LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_TRUNC!\n");
- return false;
- }
-
- // Legalization would have set us up perfectly for this; we just need to
- // set the opcode and move on.
- I.setDesc(TII.get(Opc));
- return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
-}
-
-bool AArch64InstructionSelector::selectIntrinsicRound(
- MachineInstr &I, MachineRegisterInfo &MRI) const {
- const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
-
- // Select the correct opcode.
- unsigned Opc = 0;
- if (!SrcTy.isVector()) {
- switch (SrcTy.getSizeInBits()) {
- default:
- case 16:
- Opc = AArch64::FRINTAHr;
- break;
- case 32:
- Opc = AArch64::FRINTASr;
- break;
- case 64:
- Opc = AArch64::FRINTADr;
- break;
- }
- } else {
- unsigned NumElts = SrcTy.getNumElements();
- switch (SrcTy.getElementType().getSizeInBits()) {
- default:
- break;
- case 16:
- if (NumElts == 4)
- Opc = AArch64::FRINTAv4f16;
- else if (NumElts == 8)
- Opc = AArch64::FRINTAv8f16;
- break;
- case 32:
- if (NumElts == 2)
- Opc = AArch64::FRINTAv2f32;
- else if (NumElts == 4)
- Opc = AArch64::FRINTAv4f32;
- break;
- case 64:
- if (NumElts == 2)
- Opc = AArch64::FRINTAv2f64;
- break;
- }
- }
-
- if (!Opc) {
- // Didn't get an opcode above, bail.
- LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_ROUND!\n");
- return false;
- }
-
- // Legalization would have set us up perfectly for this; we just need to
- // set the opcode and move on.
- I.setDesc(TII.get(Opc));
- return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
-}
-
bool AArch64InstructionSelector::selectVectorICmp(
MachineInstr &I, MachineRegisterInfo &MRI) {
Register DstReg = I.getOperand(0).getReg();
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 3e6ee6655f43c7..b17f12d82b7939 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -243,9 +243,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
getActionDefinitionsBuilder(G_FREM).libcallFor({s32, s64});
- getActionDefinitionsBuilder({G_FCEIL, G_FFLOOR, G_FRINT, G_FMA,
- G_INTRINSIC_TRUNC, G_INTRINSIC_ROUND,
- G_FNEARBYINT, G_INTRINSIC_LRINT})
+ getActionDefinitionsBuilder({G_FMA, G_INTRINSIC_LRINT})
// If we don't have full FP16 support, then scalarize the elements of
// vectors containing fp16 types.
.fewerElementsIf(
@@ -936,8 +934,10 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
// TODO: Vector types.
getActionDefinitionsBuilder({G_SADDSAT, G_SSUBSAT}).lowerIf(isScalar(0));
- getActionDefinitionsBuilder(
- {G_FABS, G_FSQRT, G_FMAXNUM, G_FMINNUM, G_FMAXIMUM, G_FMINIMUM})
+ getActionDefinitionsBuilder({G_FABS, G_FSQRT, G_FMAXNUM, G_FMINNUM, G_FMAXIMUM,
+ G_FMINIMUM, G_FCEIL, G_FFLOOR, G_FRINT,
+ G_FNEARBYINT, G_INTRINSIC_TRUNC,
+ G_INTRINSIC_ROUND, G_INTRINSIC_ROUNDEVEN})
.legalFor({MinFPScalar, s32, s64, v2s32, v4s32, v2s64})
.legalIf([=](const LegalityQuery &Query) {
const auto &Ty = Query.Types[0];
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
index 0abb9f4d4c78ee..46e9436d4b94b0 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
@@ -431,6 +431,7 @@ static bool isPreISelGenericFloatingPointOpcode(unsigned Opc) {
case TargetOpcode::G_FRINT:
case TargetOpcode::G_INTRINSIC_TRUNC:
case TargetOpcode::G_INTRINSIC_ROUND:
+ case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
case TargetOpcode::G_FMAXNUM:
case TargetOpcode::G_FMINNUM:
case TargetOpcode::G_FMAXIMUM:
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ceil.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ceil.mir
index 7d6ef45a972f77..820377e805b398 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ceil.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ceil.mir
@@ -25,33 +25,15 @@ body: |
; CHECK: liveins: $q0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $q0
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<8 x s16>)
- ; CHECK-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[UV]](s16)
- ; CHECK-NEXT: [[FCEIL:%[0-9]+]]:_(s32) = G_FCEIL [[FPEXT]]
- ; CHECK-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FCEIL]](s32)
- ; CHECK-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[UV1]](s16)
- ; CHECK-NEXT: [[FCEIL1:%[0-9]+]]:_(s32) = G_FCEIL [[FPEXT1]]
- ; CHECK-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FCEIL1]](s32)
- ; CHECK-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[UV2]](s16)
- ; CHECK-NEXT: [[FCEIL2:%[0-9]+]]:_(s32) = G_FCEIL [[FPEXT2]]
- ; CHECK-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FCEIL2]](s32)
- ; CHECK-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[UV3]](s16)
- ; CHECK-NEXT: [[FCEIL3:%[0-9]+]]:_(s32) = G_FCEIL [[FPEXT3]]
- ; CHECK-NEXT: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FCEIL3]](s32)
- ; CHECK-NEXT: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[UV4]](s16)
- ; CHECK-NEXT: [[FCEIL4:%[0-9]+]]:_(s32) = G_FCEIL [[FPEXT4]]
- ; CHECK-NEXT: [[FPTRUNC4:%[0-9]+]]:_(s16) = G_FPTRUNC [[FCEIL4]](s32)
- ; CHECK-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[UV5]](s16)
- ; CHECK-NEXT: [[FCEIL5:%[0-9]+]]:_(s32) = G_FCEIL [[FPEXT5]]
- ; CHECK-NEXT: [[FPTRUNC5:%[0-9]+]]:_(s16) = G_FPTRUNC [[FCEIL5]](s32)
- ; CHECK-NEXT: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[UV6]](s16)
- ; CHECK-NEXT: [[FCEIL6:%[0-9]+]]:_(s32) = G_FCEIL [[FPEXT6]]
- ; CHECK-NEXT: [[FPTRUNC6:%[0-9]+]]:_(s16) = G_FPTRUNC [[FCEIL6]](s32)
- ; CHECK-NEXT: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[UV7]](s16)
- ; CHECK-NEXT: [[FCEIL7:%[0-9]+]]:_(s32) = G_FCEIL [[FPEXT7]]
- ; CHECK-NEXT: [[FPTRUNC7:%[0-9]+]]:_(s16) = G_FPTRUNC [[FCEIL7]](s32)
- ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16), [[FPTRUNC2]](s16), [[FPTRUNC3]](s16), [[FPTRUNC4]](s16), [[FPTRUNC5]](s16), [[FPTRUNC6]](s16), [[FPTRUNC7]](s16)
- ; CHECK-NEXT: $q0 = COPY [[BUILD_VECTOR]](<8 x s16>)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<4 x s16>), [[UV1:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[COPY]](<8 x s16>)
+ ; CHECK-NEXT: [[FPEXT:%[0-9]+]]:_(<4 x s32>) = G_FPEXT [[UV]](<4 x s16>)
+ ; CHECK-NEXT: [[FPEXT1:%[0-9]+]]:_(<4 x s32>) = G_FPEXT [[UV1]](<4 x s16>)
+ ; CHECK-NEXT: [[FCEIL:%[0-9]+]]:_(<4 x s32>) = G_FCEIL [[FPEXT]]
+ ; CHECK-NEXT: [[FCEIL1:%[0-9]+]]:_(<4 x s32>) = G_FCEIL [[FPEXT1]]
+ ; CHECK-NEXT: [[FPTRUNC:%[0-9]+]]:_(<4 x s16>) = G_FPTRUNC [[FCEIL]](<4 x s32>)
+ ; CHECK-NEXT: [[FPTRUNC1:%[0-9]+]]:_(<4 x s16>) = G_FPTRUNC [[FCEIL1]](<4 x s32>)
+ ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[FPTRUNC]](<4 x s16>), [[FPTRUNC1]](<4 x s16>)
+ ; CHECK-NEXT: $q0 = COPY [[CONCAT_VECTORS]](<8 x s16>)
; CHECK-NEXT: RET_ReallyLR implicit $q0
%0:_(<8 x s16>) = COPY $q0
%1:_(<8 x s16>) = G_FCEIL %0
@@ -73,21 +55,10 @@ body: |
; CHECK: liveins: $d0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $d0
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<4 x s16>)
- ; CHECK-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[UV]](s16)
- ; CHECK-NEXT: [[FCEIL:%[0-9]+]]:_(s32) = G_FCEIL [[FPEXT]]
- ; CHECK-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FCEIL]](s32)
- ; CHECK-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[UV1]](s16)
- ; CHECK-NEXT: [[FCEIL1:%[0-9]+]]:_(s32) = G_FCEIL [[FPEXT1]]
- ; CHECK-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FCEIL1]](s32)
- ; CHECK-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[UV2]](s16)
- ; CHECK-NEXT: [[FCEIL2:%[0-9]+]]:_(s32) = G_FCEIL [[FPEXT2]]
- ; CHECK-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FCEIL2]](s32)
- ; CHECK-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[UV3]](s16)
- ; CHECK-NEXT: [[FCEIL3:%[0-9]+]]:_(s32) = G_FCEIL [[FPEXT3]]
- ; CHECK-NEXT: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FCEIL3]](s32)
- ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16), [[FPTRUNC2]](s16), [[FPTRUNC3]](s16)
- ; CHECK-NEXT: $d0 = COPY [[BUILD_VECTOR]](<4 x s16>)
+ ; CHECK-NEXT: [[FPEXT:%[0-9]+]]:_(<4 x s32>) = G_FPEXT [[COPY]](<4 x s16>)
+ ; CHECK-NEXT: [[FCEIL:%[0-9]+]]:_(<4 x s32>) = G_FCEIL [[FPEXT]]
+ ; CHECK-NEXT: [[FPTRUNC:%[0-9]+]]:_(<4 x s16>) = G_FPTRUNC [[FCEIL]](<4 x s32>)
+ ; CHECK-NEXT: $d0 = COPY [[FPTRUNC]](<4 x s16>)
; CHECK-NEXT: RET_ReallyLR implicit $d0
%0:_(<4 x s16>) = COPY $d0
%1:_(<4 x s16>) = G_FCEIL %0
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-frint.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-frint.mir
index a923e9af61a7a0..3aa60935512c18 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-frint.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-frint.mir
@@ -149,21 +149,10 @@ body: |
; NOFP16: liveins: $d0
; NOFP16-NEXT: {{ $}}
; NOFP16-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $d0
- ; NOFP16-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<4 x s16>)
- ; NOFP16-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[UV]](s16)
- ; NOFP16-NEXT: [[FRINT:%[0-9]+]]:_(s32) = G_FRINT [[FPEXT]]
- ; NOFP16-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FRINT]](s32)
- ; NOFP16-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[UV1]](s16)
- ; NOFP16-NEXT: [[FRINT1:%[0-9]+]]:_(s32) = G_FRINT [[FPEXT1]]
- ; NOFP16-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FRINT1]](s32)
- ; NOFP16-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[UV2]](s16)
- ; NOFP16-NEXT: [[FRINT2:%[0-9]+]]:_(s32) = G_FRINT [[FPEXT2]]
- ; NOFP16-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FRINT2]](s32)
- ; NOFP16-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[UV3]](s16)
- ; NOFP16-NEXT: [[FRINT3:%[0-9]+]]:_(s32) = G_FRINT [[FPEXT3]]
- ; NOFP16-NEXT: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FRINT3]](s32)
- ; NOFP16-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16), [[FPTRUNC2]](s16), [[FPTRUNC3]](s16)
- ; NOFP16-NEXT: $d0 = COPY [[BUILD_VECTOR]](<4 x s16>)
+ ; NOFP16-NEXT: [[FPEXT:%[0-9]+]]:_(<4 x s32>) = G_FPEXT [[COPY]](<4 x s16>)
+ ; NOFP16-NEXT: [[FRINT:%[0-9]+]]:_(<4 x s32>) = G_FRINT [[FPEXT]]
+ ; NOFP16-NEXT: [[FPTRUNC:%[0-9]+]]:_(<4 x s16>) = G_FPTRUNC [[FRINT]](<4 x s32>)
+ ; NOFP16-NEXT: $d0 = COPY [[FPTRUNC]](<4 x s16>)
; NOFP16-NEXT: RET_ReallyLR implicit $d0
;
; FP16-LABEL: name: test_v4f16.rint
@@ -192,33 +181,15 @@ body: |
; NOFP16: liveins: $q0
; NOFP16-NEXT: {{ $}}
; NOFP16-NEXT: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $q0
- ; NOFP16-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<8 x s16>)
- ; NOFP16-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[UV]](s16)
- ; NOFP16-NEXT: [[FRINT:%[0-9]+]]:_(s32) = G_FRINT [[FPEXT]]
- ; NOFP16-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FRINT]](s32)
- ; NOFP16-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[UV1]](s16)
- ; NOFP16-NEXT: [[FRINT1:%[0-9]+]]:_(s32) = G_FRINT [[FPEXT1]]
- ; NOFP16-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FRINT1]](s32)
- ; NOFP16-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[UV2]](s16)
- ; NOFP16-NEXT: [[FRINT2:%[0-9]+]]:_(s32) = G_FRINT [[FPEXT2]]
- ; NOFP16-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FRINT2]](s32)
- ; NOFP16-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[UV3]](s16)
- ; NOFP16-NEXT: [[FRINT3:%[0-9]+]]:_(s32) = G_FRINT [[FPEXT3]]
- ; NOFP16-NEXT: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FRINT3]](s32)
- ; NOFP16-NEXT: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[UV4]](s16)
- ; NOFP16-NEXT: [[FRINT4:%[0-9]+]]:_(s32) = G_FRINT [[FPEXT4]]
- ; NOFP16-NEXT: [[FPTRUNC4:%[0-9]+]]:_(s16) = G_FPTRUNC [[FRINT4]](s32)
- ; NOFP16-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[UV5]](s16)
- ; NOFP16-NEXT: [[FRINT5:%[0-9]+]]:_(s32) = G_FRINT [[FPEXT5]]
- ; NOFP16-NEXT: [[FPTRUNC5:%[0-9]+]]:_(s16) = G_FPTRUNC [[FRINT5]](s32)
- ; NOFP16-NEXT: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[UV6]](s16)
- ; NOFP16-NEXT: [[FRINT6:%[0-9]+]]:_(s32) = G_FRINT [[FPEXT6]]
- ; NOFP16-NEXT: [[FPTRUNC6:%[0-9]+]]:_(s16) = G_FPTRUNC [[FRINT6]](s32)
- ; NOFP16-NEXT: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[UV7]](s16)
- ; NOFP16-NEXT: [[FRINT7:%[0-9]+]]:_(s32) = G_FRINT [[FPEXT7]]
- ; NOFP16-NEXT: [[FPTRUNC7:%[0-9]+]]:_(s16) = G_FPTRUNC [[FRINT7]](s32)
- ; NOFP16-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16), [[FPTRUNC2]](s16), [[FPTRUNC3]](s16), [[FPTRUNC4]](s16), [[FPTRUNC5]](s16), [[FPTRUNC6]](s16), [[FPTRUNC7]](s16)
- ; NOFP16-NEXT: $q0 = COPY [[BUILD_VECTOR]](<8 x s16>)
+ ; NOFP16-NEXT: [[UV:%[0-9]+]]:_(<4 x s16>), [[UV1:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[COPY]](<8 x s16>)
+ ; NOFP16-NEXT: [[FPEXT:%[0-9]+]]:_(<4 x s32>) = G_FPEXT [[UV]](<4 x s16>)
+ ; NOFP16-NEXT: [[FPEXT1:%[0-9]+]]:_(<4 x s32>) = G_FPEXT [[UV1]](<4 x s16>)
+ ; NOFP16-NEXT: [[FRINT:%[0-9]+]]:_(<4 x s32>) = G_FRINT [[FPEXT]]
+ ; NOFP16-NEXT: [[FRINT1:%[0-9]+]]:_(<4 x s32>) = G_FRINT [[FPEXT1]]
+ ; NOFP16-NEXT: [[FPTRUNC:%[0-9]+]]:_(<4 x s16>) = G_FPTRUNC [[FRINT]](<4 x s32>)
+ ; NOFP16-NEXT: [[FPTRUNC1:%[0-9]+]]:_(<4 x s16>) = G_FPTRUNC [[FRINT1]](<4 x s32>)
+ ; NOFP16-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[FPTRUNC]](<4 x s16>), [[FPTRUNC1]](<4 x s16>)
+ ; NOFP16-NEXT: $q0 = COPY [[CONCAT_VECTORS]](<8 x s16>)
; NOFP16-NEXT: RET_ReallyLR implicit $q0
;
; FP16-LABEL: name: test_v8f16.rint
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-intrinsic-round.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-intrinsic-round.mir
index 7b8ccdb5e54509..a04c577aa5dbe1 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-intrinsic-round.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-intrinsic-round.mir
@@ -109,33 +109,15 @@ body: |
; NO-FP16: liveins: $q0
; NO-FP16-NEXT: {{ $}}
; NO-FP16-NEXT: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $q0
- ; NO-FP16-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<8 x s16>)
- ; NO-FP16-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[UV]](s16)
- ; NO-FP16-NEXT: [[INTRINSIC_ROUND:%[0-9]+]]:_(s32) = G_INTRINSIC_ROUND [[FPEXT]]
- ; NO-FP16-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_ROUND]](s32)
- ; NO-FP16-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[UV1]](s16)
- ; NO-FP16-NEXT: [[INTRINSIC_ROUND1:%[0-9]+]]:_(s32) = G_INTRINSIC_ROUND [[FPEXT1]]
- ; NO-FP16-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_ROUND1]](s32)
- ; NO-FP16-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[UV2]](s16)
- ; NO-FP16-NEXT: [[INTRINSIC_ROUND2:%[0-9]+]]:_(s32) = G_INTRINSIC_ROUND [[FPEXT2]]
- ; NO-FP16-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_ROUND2]](s32)
- ; NO-FP16-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[UV3]](s16)
- ; NO-FP16-NEXT: [[INTRINSIC_ROUND3:%[0-9]+]]:_(s32) = G_INTRINSIC_ROUND [[FPEXT3]]
- ; NO-FP16-NEXT: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_ROUND3]](s32)
- ; NO-FP16-NEXT: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[UV4]](s16)
- ; NO-FP16-NEXT: [[INTRINSIC_ROUND4:%[0-9]+]]:_(s32) = G_INTRINSIC_ROUND [[FPEXT4]]
- ; NO-FP16-NEXT: [[FPTRUNC4:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_ROUND4]](s32)
- ; NO-FP16-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[UV5]](s16)
- ; NO-FP16-NEXT: [[INTRINSIC_ROUND5:%[0-9]+]]:_(s32) = G_INTRINSIC_ROUND [[FPEXT5]]
- ; NO-FP16-NEXT: [[FPTRUNC5:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_ROUND5]](s32)
- ; NO-FP16-NEXT: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[UV6]](s16)
- ; NO-FP16-NEXT: [[INTRINSIC_ROUND6:%[0-9]+]]:_(s32) = G_INTRINSIC_ROUND [[FPEXT6]]
- ; NO-FP16-NEXT: [[FPTRUNC6:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_ROUND6]](s32)
- ; NO-FP16-NEXT: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[UV7]](s16)
- ; NO-FP16-NEXT: [[INTRINSIC_ROUND7:%[0-9]+]]:_(s32) = G_INTRINSIC_ROUND [[FPEXT7]]
- ; NO-FP16-NEXT: [[FPTRUNC7:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_ROUND7]](s32)
- ; NO-FP16-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16), [[FPTRUNC2]](s16), [[FPTRUNC3]](s16), [[FPTRUNC4]](s16), [[FPTRUNC5]](s16), [[FPTRUNC6]](s16), [[FPTRUNC7]](s16)
- ; NO-FP16-NEXT: $q0 = COPY [[BUILD_VECTOR]](<8 x s16>)
+ ; NO-FP16-NEXT: [[UV:%[0-9]+]]:_(<4 x s16>), [[UV1:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[COPY]](<8 x s16>)
+ ; NO-FP16-NEXT: [[FPEXT:%[0-9]+]]:_(<4 x s32>) = G_FPEXT [[UV]](<4 x s16>)
+ ; NO-FP16-NEXT: [[FPEXT1:%[0-9]+]]:_(<4 x s32>) = G_FPEXT [[UV1]](<4 x s16>)
+ ; NO-FP16-NEXT: [[INTRINSIC_ROUND:%[0-9]+]]:_(<4 x s32>) = G_INTRINSIC_ROUND [[FPEXT]]
+ ; NO-FP16-NEXT: [[INTRINSIC_ROUND1:%[0-9]+]]:_(<4 x s32>) = G_INTRINSIC_ROUND [[FPEXT1]]
+ ; NO-FP16-NEXT: [[FPTRUNC:%[0-9]+]]:_(<4 x s16>) = G_FPTRUNC [[INTRINSIC_ROUND]](<4 x s32>)
+ ; NO-FP16-NEXT: [[FPTRUNC1:%[0-9]+]]:_(<4 x s16>) = G_FPTRUNC [[INTRINSIC_ROUND1]](<4 x s32>)
+ ; NO-FP16-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[FPTRUNC]](<4 x s16>), [[FPTRUNC1]](<4 x s16>)
+ ; NO-FP16-NEXT: $q0 = COPY [[CONCAT_VECTORS]](<8 x s16>)
; NO-FP16-NEXT: RET_ReallyLR implicit $q0
;
; FP16-LABEL: name: test_v8f16.round
@@ -167,21 +149,10 @@ body: |
; NO-FP16: liveins: $d0
; NO-FP16-NEXT: {{ $}}
; NO-FP16-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $d0
- ; NO-FP16-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<4 x s16>)
- ; NO-FP16-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[UV]](s16)
- ; NO-FP16-NEXT: [[INTRINSIC_ROUND:%[0-9]+]]:_(s32) = G_INTRINSIC_ROUND [[FPEXT]]
- ; NO-FP16-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_ROUND]](s32)
- ; NO-FP16-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[UV1]](s16)
- ; NO-FP16-NEXT: [[INTRINSIC_ROUND1:%[0-9]+]]:_(s32) = G_INTRINSIC_ROUND [[FPEXT1]]
- ; NO-FP16-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_ROUND1]](s32)
- ; NO-FP16-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[UV2]](s16)
- ; NO-FP16-NEXT: [[INTRINSIC_ROUND2:%[0-9]+]]:_(s32) = G_INTRINSIC_ROUND [[FPEXT2]]
- ; NO-FP16-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_ROUND2]](s32)
- ; NO-FP16-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[UV3]](s16)
- ; NO-FP16-NEXT: [[INTRINSIC_ROUND3:%[0-9]+]]:_(s32) = G_INTRINSIC_ROUND [[FPEXT3]]
- ; NO-FP16-NEXT: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_ROUND3]](s32)
- ; NO-FP16-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16), [[FPTRUNC2]](s16), [[FPTRUNC3]](s16)
- ; NO-FP16-NEXT: $d0 = COPY [[BUILD_VECTOR]](<4 x s16>)
+ ; NO-FP16-NEXT: [[FPEXT:%[0-9]+]]:_(<4 x s32>) = G_FPEXT [[COPY]](<4 x s16>)
+ ; NO-FP16-NEXT: [[INTRINSIC_ROUND:%[0-9]+]]:_(<4 x s32>) = G_INTRINSIC_ROUND [[FPEXT]]
+ ; NO-FP16-NEXT: [[FPTRUNC:%[0-9]+]]:_(<4 x s16>) = G_FPTRUNC [[INTRINSIC_ROUND]](<4 x s32>)
+ ; NO-FP16-NEXT: $d0 = COPY [[FPTRUNC]](<4 x s16>)
; NO-FP16-NEXT: RET_ReallyLR implicit $d0
;
; FP16-LABEL: name: test_v4f16.round
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-intrinsic-roundeven.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-intrinsic-roundeven.mir
new file mode 100644
index 00000000000000..14486d0ffc77f6
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-intrinsic-roundeven.mir
@@ -0,0 +1,349 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+
+# RUN:llc %s -verify-machineinstrs -mtriple=aarch64-unknown-unknown -run-pass=legalizer -mattr=-fullfp16 -o - | FileCheck %s --check-prefix=NO-FP16
+# RUN:llc %s -verify-machineinstrs -mtriple=aarch64-unknown-unknown -run-pass=legalizer -mattr=+fullfp16 -o - | FileCheck %s --check-prefix=FP16
+
+...
+---
+name: test_f16.roundeven
+alignment: 4
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body: |
+ bb.0:
+ liveins: $h0
+
+ ; NO-FP16-LABEL: name: test_f16.roundeven
+ ; NO-FP16: liveins: $h0
+ ; NO-FP16-NEXT: {{ $}}
+ ; NO-FP16-NEXT: [[COPY:%[0-9]+]]:_(s16) = COPY $h0
+ ; NO-FP16-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[COPY]](s16)
+ ; NO-FP16-NEXT: [[INTRINSIC_ROUNDEVEN:%[0-9]+]]:_(s32) = G_INTRINSIC_ROUNDEVEN [[FPEXT]]
+ ; NO-FP16-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_ROUNDEVEN]](s32)
+ ; NO-FP16-NEXT: $h0 = COPY [[FPTRUNC]](s16)
+ ; NO-FP16-NEXT: RET_ReallyLR implicit $h0
+ ;
+ ; FP16-LABEL: name: test_f16.roundeven
+ ; FP16: liveins: $h0
+ ; FP16-NEXT: {{ $}}
+ ; FP16-NEXT: [[COPY:%[0-9]+]]:_(s16) = COPY $h0
+ ; FP16-NEXT: [[INTRINSIC_ROUNDEVEN:%[0-9]+]]:_(s16) = G_INTRINSIC_ROUNDEVEN [[COPY]]
+ ; FP16-NEXT: $h0 = COPY [[INTRINSIC_ROUNDEVEN]](s16)
+ ; FP16-NEXT: RET_ReallyLR implicit $h0
+ %0:_(s16) = COPY $h0
+ %1:_(s16) = G_INTRINSIC_ROUNDEVEN %0
+ $h0 = COPY %1(s16)
+ RET_ReallyLR implicit $h0
+
+...
+---
+name: test_f32.roundeven
+alignment: 4
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body: |
+ bb.0:
+ liveins: $s0
+
+ ; NO-FP16-LABEL: name: test_f32.roundeven
+ ; NO-FP16: liveins: $s0
+ ; NO-FP16-NEXT: {{ $}}
+ ; NO-FP16-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $s0
+ ; NO-FP16-NEXT: [[INTRINSIC_ROUNDEVEN:%[0-9]+]]:_(s32) = G_INTRINSIC_ROUNDEVEN [[COPY]]
+ ; NO-FP16-NEXT: $s0 = COPY [[INTRINSIC_ROUNDEVEN]](s32)
+ ; NO-FP16-NEXT: RET_ReallyLR implicit $s0
+ ;
+ ; FP16-LABEL: name: test_f32.roundeven
+ ; FP16: liveins: $s0
+ ; FP16-NEXT: {{ $}}
+ ; FP16-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $s0
+ ; FP16-NEXT: [[INTRINSIC_ROUNDEVEN:%[0-9]+]]:_(s32) = G_INTRINSIC_ROUNDEVEN [[COPY]]
+ ; FP16-NEXT: $s0 = COPY [[INTRINSIC_ROUNDEVEN]](s32)
+ ; FP16-NEXT: RET_ReallyLR implicit $s0
+ %0:_(s32) = COPY $s0
+ %1:_(s32) = G_INTRINSIC_ROUNDEVEN %0
+ $s0 = COPY %1(s32)
+ RET_ReallyLR implicit $s0
+
+...
+---
+name: test_f64.roundeven
+alignment: 4
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body: |
+ bb.0:
+ liveins: $d0
+
+ ; NO-FP16-LABEL: name: test_f64.roundeven
+ ; NO-FP16: liveins: $d0
+ ; NO-FP16-NEXT: {{ $}}
+ ; NO-FP16-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $d0
+ ; NO-FP16-NEXT: [[INTRINSIC_ROUNDEVEN:%[0-9]+]]:_(s64) = G_INTRINSIC_ROUNDEVEN [[COPY]]
+ ; NO-FP16-NEXT: $d0 = COPY [[INTRINSIC_ROUNDEVEN]](s64)
+ ; NO-FP16-NEXT: RET_ReallyLR implicit $d0
+ ;
+ ; FP16-LABEL: name: test_f64.roundeven
+ ; FP16: liveins: $d0
+ ; FP16-NEXT: {{ $}}
+ ; FP16-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $d0
+ ; FP16-NEXT: [[INTRINSIC_ROUNDEVEN:%[0-9]+]]:_(s64) = G_INTRINSIC_ROUNDEVEN [[COPY]]
+ ; FP16-NEXT: $d0 = COPY [[INTRINSIC_ROUNDEVEN]](s64)
+ ; FP16-NEXT: RET_ReallyLR implicit $d0
+ %0:_(s64) = COPY $d0
+ %1:_(s64) = G_INTRINSIC_ROUNDEVEN %0
+ $d0 = COPY %1(s64)
+ RET_ReallyLR implicit $d0
+
+...
+---
+name: test_v8f16.roundeven
+alignment: 4
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body: |
+ bb.0:
+ liveins: $q0
+
+ ; NO-FP16-LABEL: name: test_v8f16.roundeven
+ ; NO-FP16: liveins: $q0
+ ; NO-FP16-NEXT: {{ $}}
+ ; NO-FP16-NEXT: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $q0
+ ; NO-FP16-NEXT: [[UV:%[0-9]+]]:_(<4 x s16>), [[UV1:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[COPY]](<8 x s16>)
+ ; NO-FP16-NEXT: [[FPEXT:%[0-9]+]]:_(<4 x s32>) = G_FPEXT [[UV]](<4 x s16>)
+ ; NO-FP16-NEXT: [[FPEXT1:%[0-9]+]]:_(<4 x s32>) = G_FPEXT [[UV1]](<4 x s16>)
+ ; NO-FP16-NEXT: [[INTRINSIC_ROUNDEVEN:%[0-9]+]]:_(<4 x s32>) = G_INTRINSIC_ROUNDEVEN [[FPEXT]]
+ ; NO-FP16-NEXT: [[INTRINSIC_ROUNDEVEN1:%[0-9]+]]:_(<4 x s32>) = G_INTRINSIC_ROUNDEVEN [[FPEXT1]]
+ ; NO-FP16-NEXT: [[FPTRUNC:%[0-9]+]]:_(<4 x s16>) = G_FPTRUNC [[INTRINSIC_ROUNDEVEN]](<4 x s32>)
+ ; NO-FP16-NEXT: [[FPTRUNC1:%[0-9]+]]:_(<4 x s16>) = G_FPTRUNC [[INTRINSIC_ROUNDEVEN1]](<4 x s32>)
+ ; NO-FP16-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[FPTRUNC]](<4 x s16>), [[FPTRUNC1]](<4 x s16>)
+ ; NO-FP16-NEXT: $q0 = COPY [[CONCAT_VECTORS]](<8 x s16>)
+ ; NO-FP16-NEXT: RET_ReallyLR implicit $q0
+ ;
+ ; FP16-LABEL: name: test_v8f16.roundeven
+ ; FP16: liveins: $q0
+ ; FP16-NEXT: {{ $}}
+ ; FP16-NEXT: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $q0
+ ; FP16-NEXT: [[INTRINSIC_ROUNDEVEN:%[0-9]+]]:_(<8 x s16>) = G_INTRINSIC_ROUNDEVEN [[COPY]]
+ ; FP16-NEXT: $q0 = COPY [[INTRINSIC_ROUNDEVEN]](<8 x s16>)
+ ; FP16-NEXT: RET_ReallyLR implicit $q0
+ %0:_(<8 x s16>) = COPY $q0
+ %1:_(<8 x s16>) = G_INTRINSIC_ROUNDEVEN %0
+ $q0 = COPY %1(<8 x s16>)
+ RET_ReallyLR implicit $q0
+
+...
+---
+name: test_v4f16.roundeven
+alignment: 4
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+machineFunctionInfo: {}
+body: |
+ bb.0:
+ liveins: $d0
+
+ ; NO-FP16-LABEL: name: test_v4f16.roundeven
+ ; NO-FP16: liveins: $d0
+ ; NO-FP16-NEXT: {{ $}}
+ ; NO-FP16-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $d0
+ ; NO-FP16-NEXT: [[FPEXT:%[0-9]+]]:_(<4 x s32>) = G_FPEXT [[COPY]](<4 x s16>)
+ ; NO-FP16-NEXT: [[INTRINSIC_ROUNDEVEN:%[0-9]+]]:_(<4 x s32>) = G_INTRINSIC_ROUNDEVEN [[FPEXT]]
+ ; NO-FP16-NEXT: [[FPTRUNC:%[0-9]+]]:_(<4 x s16>) = G_FPTRUNC [[INTRINSIC_ROUNDEVEN]](<4 x s32>)
+ ; NO-FP16-NEXT: $d0 = COPY [[FPTRUNC]](<4 x s16>)
+ ; NO-FP16-NEXT: RET_ReallyLR implicit $d0
+ ;
+ ; FP16-LABEL: name: test_v4f16.roundeven
+ ; FP16: liveins: $d0
+ ; FP16-NEXT: {{ $}}
+ ; FP16-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $d0
+ ; FP16-NEXT: [[INTRINSIC_ROUNDEVEN:%[0-9]+]]:_(<4 x s16>) = G_INTRINSIC_ROUNDEVEN [[COPY]]
+ ; FP16-NEXT: $d0 = COPY [[INTRINSIC_ROUNDEVEN]](<4 x s16>)
+ ; FP16-NEXT: RET_ReallyLR implicit $d0
+ %0:_(<4 x s16>) = COPY $d0
+ %1:_(<4 x s16>) = G_INTRINSIC_ROUNDEVEN %0
+ $d0 = COPY %1(<4 x s16>)
+ RET_ReallyLR implicit $d0
+
+...
+---
+name: test_v2f32.roundeven
+alignment: 4
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+machineFunctionInfo: {}
+body: |
+ bb.0:
+ liveins: $d0
+
+ ; NO-FP16-LABEL: name: test_v2f32.roundeven
+ ; NO-FP16: liveins: $d0
+ ; NO-FP16-NEXT: {{ $}}
+ ; NO-FP16-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0
+ ; NO-FP16-NEXT: [[INTRINSIC_ROUNDEVEN:%[0-9]+]]:_(<2 x s32>) = G_INTRINSIC_ROUNDEVEN [[COPY]]
+ ; NO-FP16-NEXT: $d0 = COPY [[INTRINSIC_ROUNDEVEN]](<2 x s32>)
+ ; NO-FP16-NEXT: RET_ReallyLR implicit $d0
+ ;
+ ; FP16-LABEL: name: test_v2f32.roundeven
+ ; FP16: liveins: $d0
+ ; FP16-NEXT: {{ $}}
+ ; FP16-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0
+ ; FP16-NEXT: [[INTRINSIC_ROUNDEVEN:%[0-9]+]]:_(<2 x s32>) = G_INTRINSIC_ROUNDEVEN [[COPY]]
+ ; FP16-NEXT: $d0 = COPY [[INTRINSIC_ROUNDEVEN]](<2 x s32>)
+ ; FP16-NEXT: RET_ReallyLR implicit $d0
+ %0:_(<2 x s32>) = COPY $d0
+ %1:_(<2 x s32>) = G_INTRINSIC_ROUNDEVEN %0
+ $d0 = COPY %1(<2 x s32>)
+ RET_ReallyLR implicit $d0
+
+...
+---
+name: test_v4f32.roundeven
+alignment: 4
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+machineFunctionInfo: {}
+body: |
+ bb.0:
+ liveins: $q0
+
+ ; NO-FP16-LABEL: name: test_v4f32.roundeven
+ ; NO-FP16: liveins: $q0
+ ; NO-FP16-NEXT: {{ $}}
+ ; NO-FP16-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
+ ; NO-FP16-NEXT: [[INTRINSIC_ROUNDEVEN:%[0-9]+]]:_(<4 x s32>) = G_INTRINSIC_ROUNDEVEN [[COPY]]
+ ; NO-FP16-NEXT: $q0 = COPY [[INTRINSIC_ROUNDEVEN]](<4 x s32>)
+ ; NO-FP16-NEXT: RET_ReallyLR implicit $q0
+ ;
+ ; FP16-LABEL: name: test_v4f32.roundeven
+ ; FP16: liveins: $q0
+ ; FP16-NEXT: {{ $}}
+ ; FP16-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
+ ; FP16-NEXT: [[INTRINSIC_ROUNDEVEN:%[0-9]+]]:_(<4 x s32>) = G_INTRINSIC_ROUNDEVEN [[COPY]]
+ ; FP16-NEXT: $q0 = COPY [[INTRINSIC_ROUNDEVEN]](<4 x s32>)
+ ; FP16-NEXT: RET_ReallyLR implicit $q0
+ %0:_(<4 x s32>) = COPY $q0
+ %1:_(<4 x s32>) = G_INTRINSIC_ROUNDEVEN %0
+ $q0 = COPY %1(<4 x s32>)
+ RET_ReallyLR implicit $q0
+
+...
+---
+name: test_v2f64.roundeven
+alignment: 4
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+machineFunctionInfo: {}
+body: |
+ bb.0:
+ liveins: $q0
+
+ ; NO-FP16-LABEL: name: test_v2f64.roundeven
+ ; NO-FP16: liveins: $q0
+ ; NO-FP16-NEXT: {{ $}}
+ ; NO-FP16-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
+ ; NO-FP16-NEXT: [[INTRINSIC_ROUNDEVEN:%[0-9]+]]:_(<2 x s64>) = G_INTRINSIC_ROUNDEVEN [[COPY]]
+ ; NO-FP16-NEXT: $q0 = COPY [[INTRINSIC_ROUNDEVEN]](<2 x s64>)
+ ; NO-FP16-NEXT: RET_ReallyLR implicit $q0
+ ;
+ ; FP16-LABEL: name: test_v2f64.roundeven
+ ; FP16: liveins: $q0
+ ; FP16-NEXT: {{ $}}
+ ; FP16-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
+ ; FP16-NEXT: [[INTRINSIC_ROUNDEVEN:%[0-9]+]]:_(<2 x s64>) = G_INTRINSIC_ROUNDEVEN [[COPY]]
+ ; FP16-NEXT: $q0 = COPY [[INTRINSIC_ROUNDEVEN]](<2 x s64>)
+ ; FP16-NEXT: RET_ReallyLR implicit $q0
+ %0:_(<2 x s64>) = COPY $q0
+ %1:_(<2 x s64>) = G_INTRINSIC_ROUNDEVEN %0
+ $q0 = COPY %1(<2 x s64>)
+ RET_ReallyLR implicit $q0
+
+...
+---
+name: test_v4f64.roundeven
+alignment: 4
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $q0, $q1
+
+ ; NO-FP16-LABEL: name: test_v4f64.roundeven
+ ; NO-FP16: liveins: $q0, $q1
+ ; NO-FP16-NEXT: {{ $}}
+ ; NO-FP16-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
+ ; NO-FP16-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1
+ ; NO-FP16-NEXT: [[INTRINSIC_ROUNDEVEN:%[0-9]+]]:_(<2 x s64>) = G_INTRINSIC_ROUNDEVEN [[COPY]]
+ ; NO-FP16-NEXT: [[INTRINSIC_ROUNDEVEN1:%[0-9]+]]:_(<2 x s64>) = G_INTRINSIC_ROUNDEVEN [[COPY1]]
+ ; NO-FP16-NEXT: $q0 = COPY [[INTRINSIC_ROUNDEVEN]](<2 x s64>)
+ ; NO-FP16-NEXT: $q1 = COPY [[INTRINSIC_ROUNDEVEN1]](<2 x s64>)
+ ; NO-FP16-NEXT: RET_ReallyLR implicit $q0
+ ;
+ ; FP16-LABEL: name: test_v4f64.roundeven
+ ; FP16: liveins: $q0, $q1
+ ; FP16-NEXT: {{ $}}
+ ; FP16-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
+ ; FP16-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1
+ ; FP16-NEXT: [[INTRINSIC_ROUNDEVEN:%[0-9]+]]:_(<2 x s64>) = G_INTRINSIC_ROUNDEVEN [[COPY]]
+ ; FP16-NEXT: [[INTRINSIC_ROUNDEVEN1:%[0-9]+]]:_(<2 x s64>) = G_INTRINSIC_ROUNDEVEN [[COPY1]]
+ ; FP16-NEXT: $q0 = COPY [[INTRINSIC_ROUNDEVEN]](<2 x s64>)
+ ; FP16-NEXT: $q1 = COPY [[INTRINSIC_ROUNDEVEN1]](<2 x s64>)
+ ; FP16-NEXT: RET_ReallyLR implicit $q0
+ %0:_(<2 x s64>) = COPY $q0
+ %1:_(<2 x s64>) = COPY $q1
+ %2:_(<4 x s64>) = G_CONCAT_VECTORS %0, %1
+ %3:_(<4 x s64>) = G_INTRINSIC_ROUNDEVEN %2
+ %4:_(<2 x s64>), %5:_(<2 x s64>) = G_UNMERGE_VALUES %3
+ $q0 = COPY %4(<2 x s64>)
+ $q1 = COPY %5(<2 x s64>)
+ RET_ReallyLR implicit $q0
+
+...
+---
+name: test_v2f16.roundeven
+alignment: 4
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $s0, $s1
+
+ ; NO-FP16-LABEL: name: test_v2f16.roundeven
+ ; NO-FP16: liveins: $s0, $s1
+ ; NO-FP16-NEXT: {{ $}}
+ ; NO-FP16-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $s0
+ ; NO-FP16-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>)
+ ; NO-FP16-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+ ; NO-FP16-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[DEF]](s16), [[DEF]](s16)
+ ; NO-FP16-NEXT: [[FPEXT:%[0-9]+]]:_(<4 x s32>) = G_FPEXT [[BUILD_VECTOR]](<4 x s16>)
+ ; NO-FP16-NEXT: [[UV2:%[0-9]+]]:_(<2 x s32>), [[UV3:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[FPEXT]](<4 x s32>)
+ ; NO-FP16-NEXT: [[INTRINSIC_ROUNDEVEN:%[0-9]+]]:_(<2 x s32>) = G_INTRINSIC_ROUNDEVEN [[UV2]]
+ ; NO-FP16-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INTRINSIC_ROUNDEVEN]](<2 x s32>)
+ ; NO-FP16-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; NO-FP16-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32), [[DEF1]](s32), [[DEF1]](s32)
+ ; NO-FP16-NEXT: [[FPTRUNC:%[0-9]+]]:_(<4 x s16>) = G_FPTRUNC [[BUILD_VECTOR1]](<4 x s32>)
+ ; NO-FP16-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FPTRUNC]](<4 x s16>)
+ ; NO-FP16-NEXT: $s0 = COPY [[UV6]](<2 x s16>)
+ ; NO-FP16-NEXT: RET_ReallyLR implicit $s0
+ ;
+ ; FP16-LABEL: name: test_v2f16.roundeven
+ ; FP16: liveins: $s0, $s1
+ ; FP16-NEXT: {{ $}}
+ ; FP16-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $s0
+ ; FP16-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>)
+ ; FP16-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+ ; FP16-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[DEF]](s16), [[DEF]](s16)
+ ; FP16-NEXT: [[INTRINSIC_ROUNDEVEN:%[0-9]+]]:_(<4 x s16>) = G_INTRINSIC_ROUNDEVEN [[BUILD_VECTOR]]
+ ; FP16-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INTRINSIC_ROUNDEVEN]](<4 x s16>)
+ ; FP16-NEXT: $s0 = COPY [[UV2]](<2 x s16>)
+ ; FP16-NEXT: RET_ReallyLR implicit $s0
+ %0:_(<2 x s16>) = COPY $s0
+ %1:_(<2 x s16>) = G_INTRINSIC_ROUNDEVEN %0
+ $s0 = COPY %1(<2 x s16>)
+ RET_ReallyLR implicit $s0
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-intrinsic-trunc.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-intrinsic-trunc.mir
index 3a506a3f13a4dc..629a2f88dbc409 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-intrinsic-trunc.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-intrinsic-trunc.mir
@@ -46,21 +46,10 @@ body: |
; NO-FP16: liveins: $d0
; NO-FP16-NEXT: {{ $}}
; NO-FP16-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $d0
- ; NO-FP16-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<4 x s16>)
- ; NO-FP16-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[UV]](s16)
- ; NO-FP16-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FPEXT]]
- ; NO-FP16-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_TRUNC]](s32)
- ; NO-FP16-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[UV1]](s16)
- ; NO-FP16-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FPEXT1]]
- ; NO-FP16-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_TRUNC1]](s32)
- ; NO-FP16-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[UV2]](s16)
- ; NO-FP16-NEXT: [[INTRINSIC_TRUNC2:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FPEXT2]]
- ; NO-FP16-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_TRUNC2]](s32)
- ; NO-FP16-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[UV3]](s16)
- ; NO-FP16-NEXT: [[INTRINSIC_TRUNC3:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FPEXT3]]
- ; NO-FP16-NEXT: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_TRUNC3]](s32)
- ; NO-FP16-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16), [[FPTRUNC2]](s16), [[FPTRUNC3]](s16)
- ; NO-FP16-NEXT: $d0 = COPY [[BUILD_VECTOR]](<4 x s16>)
+ ; NO-FP16-NEXT: [[FPEXT:%[0-9]+]]:_(<4 x s32>) = G_FPEXT [[COPY]](<4 x s16>)
+ ; NO-FP16-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(<4 x s32>) = G_INTRINSIC_TRUNC [[FPEXT]]
+ ; NO-FP16-NEXT: [[FPTRUNC:%[0-9]+]]:_(<4 x s16>) = G_FPTRUNC [[INTRINSIC_TRUNC]](<4 x s32>)
+ ; NO-FP16-NEXT: $d0 = COPY [[FPTRUNC]](<4 x s16>)
; NO-FP16-NEXT: RET_ReallyLR implicit $d0
;
; FP16-LABEL: name: test_v4f16.intrinsic_trunc
@@ -89,33 +78,15 @@ body: |
; NO-FP16: liveins: $q0
; NO-FP16-NEXT: {{ $}}
; NO-FP16-NEXT: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $q0
- ; NO-FP16-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<8 x s16>)
- ; NO-FP16-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[UV]](s16)
- ; NO-FP16-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FPEXT]]
- ; NO-FP16-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_TRUNC]](s32)
- ; NO-FP16-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[UV1]](s16)
- ; NO-FP16-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FPEXT1]]
- ; NO-FP16-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_TRUNC1]](s32)
- ; NO-FP16-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[UV2]](s16)
- ; NO-FP16-NEXT: [[INTRINSIC_TRUNC2:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FPEXT2]]
- ; NO-FP16-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_TRUNC2]](s32)
- ; NO-FP16-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[UV3]](s16)
- ; NO-FP16-NEXT: [[INTRINSIC_TRUNC3:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FPEXT3]]
- ; NO-FP16-NEXT: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_TRUNC3]](s32)
- ; NO-FP16-NEXT: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[UV4]](s16)
- ; NO-FP16-NEXT: [[INTRINSIC_TRUNC4:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FPEXT4]]
- ; NO-FP16-NEXT: [[FPTRUNC4:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_TRUNC4]](s32)
- ; NO-FP16-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[UV5]](s16)
- ; NO-FP16-NEXT: [[INTRINSIC_TRUNC5:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FPEXT5]]
- ; NO-FP16-NEXT: [[FPTRUNC5:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_TRUNC5]](s32)
- ; NO-FP16-NEXT: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[UV6]](s16)
- ; NO-FP16-NEXT: [[INTRINSIC_TRUNC6:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FPEXT6]]
- ; NO-FP16-NEXT: [[FPTRUNC6:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_TRUNC6]](s32)
- ; NO-FP16-NEXT: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[UV7]](s16)
- ; NO-FP16-NEXT: [[INTRINSIC_TRUNC7:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FPEXT7]]
- ; NO-FP16-NEXT: [[FPTRUNC7:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_TRUNC7]](s32)
- ; NO-FP16-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16), [[FPTRUNC2]](s16), [[FPTRUNC3]](s16), [[FPTRUNC4]](s16), [[FPTRUNC5]](s16), [[FPTRUNC6]](s16), [[FPTRUNC7]](s16)
- ; NO-FP16-NEXT: $q0 = COPY [[BUILD_VECTOR]](<8 x s16>)
+ ; NO-FP16-NEXT: [[UV:%[0-9]+]]:_(<4 x s16>), [[UV1:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[COPY]](<8 x s16>)
+ ; NO-FP16-NEXT: [[FPEXT:%[0-9]+]]:_(<4 x s32>) = G_FPEXT [[UV]](<4 x s16>)
+ ; NO-FP16-NEXT: [[FPEXT1:%[0-9]+]]:_(<4 x s32>) = G_FPEXT [[UV1]](<4 x s16>)
+ ; NO-FP16-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(<4 x s32>) = G_INTRINSIC_TRUNC [[FPEXT]]
+ ; NO-FP16-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(<4 x s32>) = G_INTRINSIC_TRUNC [[FPEXT1]]
+ ; NO-FP16-NEXT: [[FPTRUNC:%[0-9]+]]:_(<4 x s16>) = G_FPTRUNC [[INTRINSIC_TRUNC]](<4 x s32>)
+ ; NO-FP16-NEXT: [[FPTRUNC1:%[0-9]+]]:_(<4 x s16>) = G_FPTRUNC [[INTRINSIC_TRUNC1]](<4 x s32>)
+ ; NO-FP16-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[FPTRUNC]](<4 x s16>), [[FPTRUNC1]](<4 x s16>)
+ ; NO-FP16-NEXT: $q0 = COPY [[CONCAT_VECTORS]](<8 x s16>)
; NO-FP16-NEXT: RET_ReallyLR implicit $q0
;
; FP16-LABEL: name: test_v8f16.intrinsic_trunc
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-nearbyint.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-nearbyint.mir
index 3eac322e138fa6..88bff66e493ee2 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-nearbyint.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-nearbyint.mir
@@ -24,21 +24,10 @@ body: |
; NO-FP16: liveins: $d0
; NO-FP16-NEXT: {{ $}}
; NO-FP16-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $d0
- ; NO-FP16-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<4 x s16>)
- ; NO-FP16-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[UV]](s16)
- ; NO-FP16-NEXT: [[FNEARBYINT:%[0-9]+]]:_(s32) = G_FNEARBYINT [[FPEXT]]
- ; NO-FP16-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FNEARBYINT]](s32)
- ; NO-FP16-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[UV1]](s16)
- ; NO-FP16-NEXT: [[FNEARBYINT1:%[0-9]+]]:_(s32) = G_FNEARBYINT [[FPEXT1]]
- ; NO-FP16-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FNEARBYINT1]](s32)
- ; NO-FP16-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[UV2]](s16)
- ; NO-FP16-NEXT: [[FNEARBYINT2:%[0-9]+]]:_(s32) = G_FNEARBYINT [[FPEXT2]]
- ; NO-FP16-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FNEARBYINT2]](s32)
- ; NO-FP16-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[UV3]](s16)
- ; NO-FP16-NEXT: [[FNEARBYINT3:%[0-9]+]]:_(s32) = G_FNEARBYINT [[FPEXT3]]
- ; NO-FP16-NEXT: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FNEARBYINT3]](s32)
- ; NO-FP16-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16), [[FPTRUNC2]](s16), [[FPTRUNC3]](s16)
- ; NO-FP16-NEXT: $d0 = COPY [[BUILD_VECTOR]](<4 x s16>)
+ ; NO-FP16-NEXT: [[FPEXT:%[0-9]+]]:_(<4 x s32>) = G_FPEXT [[COPY]](<4 x s16>)
+ ; NO-FP16-NEXT: [[FNEARBYINT:%[0-9]+]]:_(<4 x s32>) = G_FNEARBYINT [[FPEXT]]
+ ; NO-FP16-NEXT: [[FPTRUNC:%[0-9]+]]:_(<4 x s16>) = G_FPTRUNC [[FNEARBYINT]](<4 x s32>)
+ ; NO-FP16-NEXT: $d0 = COPY [[FPTRUNC]](<4 x s16>)
; NO-FP16-NEXT: RET_ReallyLR implicit $d0
%0:_(<4 x s16>) = COPY $d0
%1:_(<4 x s16>) = G_FNEARBYINT %0
@@ -67,33 +56,15 @@ body: |
; NO-FP16: liveins: $q0
; NO-FP16-NEXT: {{ $}}
; NO-FP16-NEXT: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $q0
- ; NO-FP16-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<8 x s16>)
- ; NO-FP16-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[UV]](s16)
- ; NO-FP16-NEXT: [[FNEARBYINT:%[0-9]+]]:_(s32) = G_FNEARBYINT [[FPEXT]]
- ; NO-FP16-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FNEARBYINT]](s32)
- ; NO-FP16-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[UV1]](s16)
- ; NO-FP16-NEXT: [[FNEARBYINT1:%[0-9]+]]:_(s32) = G_FNEARBYINT [[FPEXT1]]
- ; NO-FP16-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FNEARBYINT1]](s32)
- ; NO-FP16-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[UV2]](s16)
- ; NO-FP16-NEXT: [[FNEARBYINT2:%[0-9]+]]:_(s32) = G_FNEARBYINT [[FPEXT2]]
- ; NO-FP16-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FNEARBYINT2]](s32)
- ; NO-FP16-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[UV3]](s16)
- ; NO-FP16-NEXT: [[FNEARBYINT3:%[0-9]+]]:_(s32) = G_FNEARBYINT [[FPEXT3]]
- ; NO-FP16-NEXT: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FNEARBYINT3]](s32)
- ; NO-FP16-NEXT: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[UV4]](s16)
- ; NO-FP16-NEXT: [[FNEARBYINT4:%[0-9]+]]:_(s32) = G_FNEARBYINT [[FPEXT4]]
- ; NO-FP16-NEXT: [[FPTRUNC4:%[0-9]+]]:_(s16) = G_FPTRUNC [[FNEARBYINT4]](s32)
- ; NO-FP16-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[UV5]](s16)
- ; NO-FP16-NEXT: [[FNEARBYINT5:%[0-9]+]]:_(s32) = G_FNEARBYINT [[FPEXT5]]
- ; NO-FP16-NEXT: [[FPTRUNC5:%[0-9]+]]:_(s16) = G_FPTRUNC [[FNEARBYINT5]](s32)
- ; NO-FP16-NEXT: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[UV6]](s16)
- ; NO-FP16-NEXT: [[FNEARBYINT6:%[0-9]+]]:_(s32) = G_FNEARBYINT [[FPEXT6]]
- ; NO-FP16-NEXT: [[FPTRUNC6:%[0-9]+]]:_(s16) = G_FPTRUNC [[FNEARBYINT6]](s32)
- ; NO-FP16-NEXT: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[UV7]](s16)
- ; NO-FP16-NEXT: [[FNEARBYINT7:%[0-9]+]]:_(s32) = G_FNEARBYINT [[FPEXT7]]
- ; NO-FP16-NEXT: [[FPTRUNC7:%[0-9]+]]:_(s16) = G_FPTRUNC [[FNEARBYINT7]](s32)
- ; NO-FP16-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16), [[FPTRUNC2]](s16), [[FPTRUNC3]](s16), [[FPTRUNC4]](s16), [[FPTRUNC5]](s16), [[FPTRUNC6]](s16), [[FPTRUNC7]](s16)
- ; NO-FP16-NEXT: $q0 = COPY [[BUILD_VECTOR]](<8 x s16>)
+ ; NO-FP16-NEXT: [[UV:%[0-9]+]]:_(<4 x s16>), [[UV1:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[COPY]](<8 x s16>)
+ ; NO-FP16-NEXT: [[FPEXT:%[0-9]+]]:_(<4 x s32>) = G_FPEXT [[UV]](<4 x s16>)
+ ; NO-FP16-NEXT: [[FPEXT1:%[0-9]+]]:_(<4 x s32>) = G_FPEXT [[UV1]](<4 x s16>)
+ ; NO-FP16-NEXT: [[FNEARBYINT:%[0-9]+]]:_(<4 x s32>) = G_FNEARBYINT [[FPEXT]]
+ ; NO-FP16-NEXT: [[FNEARBYINT1:%[0-9]+]]:_(<4 x s32>) = G_FNEARBYINT [[FPEXT1]]
+ ; NO-FP16-NEXT: [[FPTRUNC:%[0-9]+]]:_(<4 x s16>) = G_FPTRUNC [[FNEARBYINT]](<4 x s32>)
+ ; NO-FP16-NEXT: [[FPTRUNC1:%[0-9]+]]:_(<4 x s16>) = G_FPTRUNC [[FNEARBYINT1]](<4 x s32>)
+ ; NO-FP16-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[FPTRUNC]](<4 x s16>), [[FPTRUNC1]](<4 x s16>)
+ ; NO-FP16-NEXT: $q0 = COPY [[CONCAT_VECTORS]](<8 x s16>)
; NO-FP16-NEXT: RET_ReallyLR implicit $q0
%0:_(<8 x s16>) = COPY $q0
%1:_(<8 x s16>) = G_FNEARBYINT %0
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
index 7ef8212c5ffbc0..d1c0140e591573 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
@@ -158,9 +158,9 @@
# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: G_INTRINSIC_ROUNDEVEN (opcode {{[0-9]+}}): 1 type index, 0 imm indices
-# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined
-# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined
-
+# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}}
+# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
+# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: G_READCYCLECOUNTER (opcode {{[0-9]+}}): 1 type index, 0 imm indices
# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined
# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined
@@ -442,7 +442,6 @@
# DEBUG-NEXT: .. the first uncovered type index: 1, OK
# DEBUG-NEXT: .. the first uncovered imm index: 0, OK
# DEBUG-NEXT: G_FMA (opcode {{[0-9]+}}): 1 type index, 0 imm indices
-# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}}
# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: G_FMAD (opcode {{[0-9]+}}): 1 type index, 0 imm indices
@@ -614,6 +613,7 @@
# DEBUG-NEXT: .. the first uncovered type index: 1, OK
# DEBUG-NEXT: .. the first uncovered imm index: 0, OK
# DEBUG-NEXT: G_FCEIL (opcode {{[0-9]+}}): 1 type index, 0 imm indices
+# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}}
# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: G_FCOS (opcode {{[0-9]+}}): 1 type index, 0 imm indices
diff --git a/llvm/test/CodeGen/AArch64/fcvt.ll b/llvm/test/CodeGen/AArch64/fcvt.ll
index aaa9d657c7913e..28817ecec5177a 100644
--- a/llvm/test/CodeGen/AArch64/fcvt.ll
+++ b/llvm/test/CodeGen/AArch64/fcvt.ll
@@ -1,59 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
; RUN: llc -mtriple=aarch64-none-eabi -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD,CHECK-SD-NOFP16
; RUN: llc -mtriple=aarch64-none-eabi -mattr=+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD,CHECK-SD-FP16
-; RUN: llc -mtriple=aarch64-none-eabi -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-NOFP16
-; RUN: llc -mtriple=aarch64-none-eabi -mattr=+fullfp16 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-FP16
-
-; CHECK-GI: warning: Instruction selection used fallback path for ceil_v3f64
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for ceil_v4f64
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for ceil_v3f32
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for ceil_v8f32
-; CHECK-GI-FP16-NEXT: warning: Instruction selection used fallback path for ceil_v7f16
-; CHECK-GI-FP16-NEXT: warning: Instruction selection used fallback path for ceil_v16f16
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for floor_v3f64
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for floor_v4f64
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for floor_v3f32
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for floor_v8f32
-; CHECK-GI-FP16-NEXT: warning: Instruction selection used fallback path for floor_v7f16
-; CHECK-GI-FP16-NEXT: warning: Instruction selection used fallback path for floor_v16f16
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for nearbyint_v3f64
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for nearbyint_v4f64
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for nearbyint_v3f32
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for nearbyint_v8f32
-; CHECK-GI-FP16-NEXT: warning: Instruction selection used fallback path for nearbyint_v7f16
-; CHECK-GI-FP16-NEXT: warning: Instruction selection used fallback path for nearbyint_v16f16
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for roundeven_f64
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for roundeven_f32
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for roundeven_f16
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for roundeven_v2f64
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for roundeven_v3f64
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for roundeven_v4f64
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for roundeven_v2f32
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for roundeven_v3f32
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for roundeven_v4f32
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for roundeven_v8f32
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for roundeven_v7f16
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for roundeven_v4f16
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for roundeven_v8f16
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for roundeven_v16f16
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for rint_v3f64
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for rint_v4f64
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for rint_v3f32
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for rint_v8f32
-; CHECK-GI-FP16-NEXT: warning: Instruction selection used fallback path for rint_v7f16
-; CHECK-GI-FP16-NEXT: warning: Instruction selection used fallback path for rint_v16f16
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for round_v3f64
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for round_v4f64
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for round_v3f32
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for round_v8f32
-; CHECK-GI-FP16-NEXT: warning: Instruction selection used fallback path for round_v7f16
-; CHECK-GI-FP16-NEXT: warning: Instruction selection used fallback path for round_v16f16
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for trunc_v3f64
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for trunc_v4f64
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for trunc_v3f32
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for trunc_v8f32
-; CHECK-GI-FP16-NEXT: warning: Instruction selection used fallback path for trunc_v7f16
-; CHECK-GI-FP16-NEXT: warning: Instruction selection used fallback path for trunc_v16f16
+; RUN: llc -mtriple=aarch64-none-eabi -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-NOFP16
+; RUN: llc -mtriple=aarch64-none-eabi -mattr=+fullfp16 -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-FP16
define double @ceil_f64(double %a) {
; CHECK-LABEL: ceil_f64:
@@ -115,19 +64,30 @@ entry:
}
define <3 x double> @ceil_v3f64(<3 x double> %a) {
-; CHECK-LABEL: ceil_v3f64:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
-; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
-; CHECK-NEXT: mov v0.d[1], v1.d[0]
-; CHECK-NEXT: frintp v2.2d, v2.2d
-; CHECK-NEXT: // kill: def $d2 killed $d2 killed $q2
-; CHECK-NEXT: frintp v0.2d, v0.2d
-; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
-; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q1
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: ceil_v3f64:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-SD-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-SD-NEXT: frintp v2.2d, v2.2d
+; CHECK-SD-NEXT: // kill: def $d2 killed $d2 killed $q2
+; CHECK-SD-NEXT: frintp v0.2d, v0.2d
+; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT: // kill: def $d1 killed $d1 killed $q1
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: ceil_v3f64:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NEXT: frintp d2, d2
+; CHECK-GI-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-GI-NEXT: frintp v0.2d, v0.2d
+; CHECK-GI-NEXT: mov d1, v0.d[1]
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT: ret
entry:
%c = call <3 x double> @llvm.ceil.v3f64(<3 x double> %a)
ret <3 x double> %c
@@ -155,10 +115,25 @@ entry:
}
define <3 x float> @ceil_v3f32(<3 x float> %a) {
-; CHECK-LABEL: ceil_v3f32:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: frintp v0.4s, v0.4s
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: ceil_v3f32:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: frintp v0.4s, v0.4s
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: ceil_v3f32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov s1, v0.s[1]
+; CHECK-GI-NEXT: mov s2, v0.s[2]
+; CHECK-GI-NEXT: mov v0.s[1], v1.s[0]
+; CHECK-GI-NEXT: mov v0.s[2], v2.s[0]
+; CHECK-GI-NEXT: mov v0.s[3], v0.s[0]
+; CHECK-GI-NEXT: frintp v0.4s, v0.4s
+; CHECK-GI-NEXT: mov s1, v0.s[1]
+; CHECK-GI-NEXT: mov s2, v0.s[2]
+; CHECK-GI-NEXT: mov v0.s[1], v1.s[0]
+; CHECK-GI-NEXT: mov v0.s[2], v2.s[0]
+; CHECK-GI-NEXT: mov v0.s[3], v0.s[0]
+; CHECK-GI-NEXT: ret
entry:
%c = call <3 x float> @llvm.ceil.v3f32(<3 x float> %a)
ret <3 x float> %c
@@ -236,46 +211,71 @@ define <7 x half> @ceil_v7f16(<7 x half> %a) {
;
; CHECK-GI-NOFP16-LABEL: ceil_v7f16:
; CHECK-GI-NOFP16: // %bb.0: // %entry
-; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1]
-; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[2]
-; CHECK-GI-NOFP16-NEXT: fcvt s3, h0
-; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[3]
-; CHECK-GI-NOFP16-NEXT: mov h6, v0.h[4]
-; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
-; CHECK-GI-NOFP16-NEXT: fcvt s2, h2
-; CHECK-GI-NOFP16-NEXT: frintp s3, s3
-; CHECK-GI-NOFP16-NEXT: fcvt s4, h4
-; CHECK-GI-NOFP16-NEXT: fcvt s6, h6
-; CHECK-GI-NOFP16-NEXT: frintp s5, s1
-; CHECK-GI-NOFP16-NEXT: frintp s2, s2
-; CHECK-GI-NOFP16-NEXT: fcvt h1, s3
-; CHECK-GI-NOFP16-NEXT: frintp s4, s4
-; CHECK-GI-NOFP16-NEXT: fcvt h3, s5
-; CHECK-GI-NOFP16-NEXT: mov h5, v0.h[5]
-; CHECK-GI-NOFP16-NEXT: fcvt h2, s2
-; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[6]
-; CHECK-GI-NOFP16-NEXT: fcvt h4, s4
-; CHECK-GI-NOFP16-NEXT: mov v1.h[1], v3.h[0]
-; CHECK-GI-NOFP16-NEXT: frintp s3, s6
-; CHECK-GI-NOFP16-NEXT: fcvt s5, h5
-; CHECK-GI-NOFP16-NEXT: fcvt s0, h0
-; CHECK-GI-NOFP16-NEXT: mov v1.h[2], v2.h[0]
-; CHECK-GI-NOFP16-NEXT: fcvt h2, s3
-; CHECK-GI-NOFP16-NEXT: frintp s3, s5
-; CHECK-GI-NOFP16-NEXT: frintp s0, s0
-; CHECK-GI-NOFP16-NEXT: mov v1.h[3], v4.h[0]
-; CHECK-GI-NOFP16-NEXT: fcvt h3, s3
-; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
-; CHECK-GI-NOFP16-NEXT: mov v1.h[4], v2.h[0]
-; CHECK-GI-NOFP16-NEXT: mov v1.h[5], v3.h[0]
-; CHECK-GI-NOFP16-NEXT: mov v1.h[6], v0.h[0]
-; CHECK-GI-NOFP16-NEXT: mov v1.h[7], v0.h[0]
-; CHECK-GI-NOFP16-NEXT: mov v0.16b, v1.16b
+; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[4]
+; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[5]
+; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[6]
+; CHECK-GI-NOFP16-NEXT: mov v1.h[1], v2.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v1.h[2], v3.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v1.h[3], v0.h[0]
+; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h
+; CHECK-GI-NOFP16-NEXT: mov s2, v1.s[1]
+; CHECK-GI-NOFP16-NEXT: mov s3, v1.s[2]
+; CHECK-GI-NOFP16-NEXT: mov v1.s[1], v2.s[0]
+; CHECK-GI-NOFP16-NEXT: mov v1.s[2], v3.s[0]
+; CHECK-GI-NOFP16-NEXT: mov v1.s[3], v0.s[0]
+; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-GI-NOFP16-NEXT: frintp v1.4s, v1.4s
+; CHECK-GI-NOFP16-NEXT: frintp v0.4s, v0.4s
+; CHECK-GI-NOFP16-NEXT: mov s2, v1.s[1]
+; CHECK-GI-NOFP16-NEXT: mov s3, v1.s[2]
+; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v0.4s
+; CHECK-GI-NOFP16-NEXT: mov v1.s[1], v2.s[0]
+; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[1]
+; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[2]
+; CHECK-GI-NOFP16-NEXT: mov h5, v0.h[3]
+; CHECK-GI-NOFP16-NEXT: mov v1.s[2], v3.s[0]
+; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v2.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v1.s[3], v0.s[0]
+; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v4.h[0]
+; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v1.4s
+; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v5.h[0]
+; CHECK-GI-NOFP16-NEXT: mov h2, v1.h[1]
+; CHECK-GI-NOFP16-NEXT: mov v0.h[4], v1.h[0]
+; CHECK-GI-NOFP16-NEXT: mov h1, v1.h[2]
+; CHECK-GI-NOFP16-NEXT: mov v0.h[5], v2.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v0.h[6], v1.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v0.h[7], v0.h[0]
; CHECK-GI-NOFP16-NEXT: ret
;
; CHECK-GI-FP16-LABEL: ceil_v7f16:
; CHECK-GI-FP16: // %bb.0: // %entry
+; CHECK-GI-FP16-NEXT: mov h1, v0.h[1]
+; CHECK-GI-FP16-NEXT: mov h2, v0.h[2]
+; CHECK-GI-FP16-NEXT: mov h3, v0.h[3]
+; CHECK-GI-FP16-NEXT: mov h4, v0.h[4]
+; CHECK-GI-FP16-NEXT: mov h5, v0.h[5]
+; CHECK-GI-FP16-NEXT: mov h6, v0.h[6]
+; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0]
+; CHECK-GI-FP16-NEXT: mov v0.h[2], v2.h[0]
+; CHECK-GI-FP16-NEXT: mov v0.h[3], v3.h[0]
+; CHECK-GI-FP16-NEXT: mov v0.h[4], v4.h[0]
+; CHECK-GI-FP16-NEXT: mov v0.h[5], v5.h[0]
+; CHECK-GI-FP16-NEXT: mov v0.h[6], v6.h[0]
+; CHECK-GI-FP16-NEXT: mov v0.h[7], v0.h[0]
; CHECK-GI-FP16-NEXT: frintp v0.8h, v0.8h
+; CHECK-GI-FP16-NEXT: mov h1, v0.h[1]
+; CHECK-GI-FP16-NEXT: mov h2, v0.h[2]
+; CHECK-GI-FP16-NEXT: mov h3, v0.h[3]
+; CHECK-GI-FP16-NEXT: mov h4, v0.h[4]
+; CHECK-GI-FP16-NEXT: mov h5, v0.h[5]
+; CHECK-GI-FP16-NEXT: mov h6, v0.h[6]
+; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0]
+; CHECK-GI-FP16-NEXT: mov v0.h[2], v2.h[0]
+; CHECK-GI-FP16-NEXT: mov v0.h[3], v3.h[0]
+; CHECK-GI-FP16-NEXT: mov v0.h[4], v4.h[0]
+; CHECK-GI-FP16-NEXT: mov v0.h[5], v5.h[0]
+; CHECK-GI-FP16-NEXT: mov v0.h[6], v6.h[0]
+; CHECK-GI-FP16-NEXT: mov v0.h[7], v0.h[0]
; CHECK-GI-FP16-NEXT: ret
entry:
%c = call <7 x half> @llvm.ceil.v7f16(<7 x half> %a)
@@ -314,26 +314,9 @@ define <4 x half> @ceil_v4f16(<4 x half> %a) {
;
; CHECK-GI-NOFP16-LABEL: ceil_v4f16:
; CHECK-GI-NOFP16: // %bb.0: // %entry
-; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1]
-; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[2]
-; CHECK-GI-NOFP16-NEXT: fcvt s3, h0
-; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[3]
-; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
-; CHECK-GI-NOFP16-NEXT: fcvt s2, h2
-; CHECK-GI-NOFP16-NEXT: frintp s3, s3
-; CHECK-GI-NOFP16-NEXT: fcvt s4, h0
-; CHECK-GI-NOFP16-NEXT: frintp s1, s1
-; CHECK-GI-NOFP16-NEXT: frintp s2, s2
-; CHECK-GI-NOFP16-NEXT: fcvt h0, s3
-; CHECK-GI-NOFP16-NEXT: frintp s3, s4
-; CHECK-GI-NOFP16-NEXT: fcvt h1, s1
-; CHECK-GI-NOFP16-NEXT: fcvt h2, s2
-; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v1.h[0]
-; CHECK-GI-NOFP16-NEXT: fcvt h1, s3
-; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v2.h[0]
-; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v1.h[0]
-; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-GI-NOFP16-NEXT: frintp v0.4s, v0.4s
+; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v0.4s
; CHECK-GI-NOFP16-NEXT: ret
;
; CHECK-GI-FP16-LABEL: ceil_v4f16:
@@ -396,45 +379,12 @@ define <8 x half> @ceil_v8f16(<8 x half> %a) {
;
; CHECK-GI-NOFP16-LABEL: ceil_v8f16:
; CHECK-GI-NOFP16: // %bb.0: // %entry
-; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1]
-; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[2]
-; CHECK-GI-NOFP16-NEXT: fcvt s3, h0
-; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[3]
-; CHECK-GI-NOFP16-NEXT: mov h6, v0.h[4]
-; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
-; CHECK-GI-NOFP16-NEXT: fcvt s2, h2
-; CHECK-GI-NOFP16-NEXT: frintp s3, s3
-; CHECK-GI-NOFP16-NEXT: fcvt s4, h4
-; CHECK-GI-NOFP16-NEXT: fcvt s6, h6
-; CHECK-GI-NOFP16-NEXT: frintp s5, s1
-; CHECK-GI-NOFP16-NEXT: frintp s2, s2
-; CHECK-GI-NOFP16-NEXT: fcvt h1, s3
-; CHECK-GI-NOFP16-NEXT: frintp s4, s4
-; CHECK-GI-NOFP16-NEXT: frintp s6, s6
-; CHECK-GI-NOFP16-NEXT: fcvt h3, s5
-; CHECK-GI-NOFP16-NEXT: mov h5, v0.h[5]
-; CHECK-GI-NOFP16-NEXT: fcvt h2, s2
-; CHECK-GI-NOFP16-NEXT: fcvt h4, s4
-; CHECK-GI-NOFP16-NEXT: mov v1.h[1], v3.h[0]
-; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[6]
-; CHECK-GI-NOFP16-NEXT: fcvt s5, h5
-; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[7]
-; CHECK-GI-NOFP16-NEXT: mov v1.h[2], v2.h[0]
-; CHECK-GI-NOFP16-NEXT: fcvt s3, h3
-; CHECK-GI-NOFP16-NEXT: frintp s2, s5
-; CHECK-GI-NOFP16-NEXT: fcvt h5, s6
-; CHECK-GI-NOFP16-NEXT: fcvt s0, h0
-; CHECK-GI-NOFP16-NEXT: mov v1.h[3], v4.h[0]
-; CHECK-GI-NOFP16-NEXT: frintp s3, s3
-; CHECK-GI-NOFP16-NEXT: fcvt h2, s2
-; CHECK-GI-NOFP16-NEXT: frintp s0, s0
-; CHECK-GI-NOFP16-NEXT: mov v1.h[4], v5.h[0]
-; CHECK-GI-NOFP16-NEXT: fcvt h3, s3
-; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
-; CHECK-GI-NOFP16-NEXT: mov v1.h[5], v2.h[0]
-; CHECK-GI-NOFP16-NEXT: mov v1.h[6], v3.h[0]
-; CHECK-GI-NOFP16-NEXT: mov v1.h[7], v0.h[0]
-; CHECK-GI-NOFP16-NEXT: mov v0.16b, v1.16b
+; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v0.4h
+; CHECK-GI-NOFP16-NEXT: fcvtl2 v0.4s, v0.8h
+; CHECK-GI-NOFP16-NEXT: frintp v1.4s, v1.4s
+; CHECK-GI-NOFP16-NEXT: frintp v2.4s, v0.4s
+; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v1.4s
+; CHECK-GI-NOFP16-NEXT: fcvtn2 v0.8h, v2.4s
; CHECK-GI-NOFP16-NEXT: ret
;
; CHECK-GI-FP16-LABEL: ceil_v8f16:
@@ -537,84 +487,18 @@ define <16 x half> @ceil_v16f16(<16 x half> %a) {
;
; CHECK-GI-NOFP16-LABEL: ceil_v16f16:
; CHECK-GI-NOFP16: // %bb.0: // %entry
-; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[1]
-; CHECK-GI-NOFP16-NEXT: mov h3, v1.h[1]
-; CHECK-GI-NOFP16-NEXT: fcvt s4, h0
-; CHECK-GI-NOFP16-NEXT: fcvt s5, h1
-; CHECK-GI-NOFP16-NEXT: mov h6, v0.h[2]
-; CHECK-GI-NOFP16-NEXT: mov h7, v1.h[2]
-; CHECK-GI-NOFP16-NEXT: mov h18, v0.h[3]
-; CHECK-GI-NOFP16-NEXT: mov h19, v1.h[3]
-; CHECK-GI-NOFP16-NEXT: fcvt s2, h2
-; CHECK-GI-NOFP16-NEXT: fcvt s3, h3
-; CHECK-GI-NOFP16-NEXT: frintp s4, s4
-; CHECK-GI-NOFP16-NEXT: frintp s5, s5
-; CHECK-GI-NOFP16-NEXT: fcvt s6, h6
-; CHECK-GI-NOFP16-NEXT: fcvt s7, h7
-; CHECK-GI-NOFP16-NEXT: frintp s16, s2
-; CHECK-GI-NOFP16-NEXT: frintp s17, s3
-; CHECK-GI-NOFP16-NEXT: fcvt h2, s4
-; CHECK-GI-NOFP16-NEXT: fcvt h3, s5
-; CHECK-GI-NOFP16-NEXT: frintp s6, s6
-; CHECK-GI-NOFP16-NEXT: frintp s7, s7
-; CHECK-GI-NOFP16-NEXT: fcvt h4, s16
-; CHECK-GI-NOFP16-NEXT: fcvt h5, s17
-; CHECK-GI-NOFP16-NEXT: fcvt s16, h18
-; CHECK-GI-NOFP16-NEXT: fcvt s17, h19
-; CHECK-GI-NOFP16-NEXT: mov h18, v1.h[4]
-; CHECK-GI-NOFP16-NEXT: mov v2.h[1], v4.h[0]
-; CHECK-GI-NOFP16-NEXT: fcvt h4, s6
-; CHECK-GI-NOFP16-NEXT: mov v3.h[1], v5.h[0]
-; CHECK-GI-NOFP16-NEXT: mov h5, v0.h[4]
-; CHECK-GI-NOFP16-NEXT: fcvt h6, s7
-; CHECK-GI-NOFP16-NEXT: frintp s16, s16
-; CHECK-GI-NOFP16-NEXT: frintp s17, s17
-; CHECK-GI-NOFP16-NEXT: mov h7, v0.h[5]
-; CHECK-GI-NOFP16-NEXT: mov v2.h[2], v4.h[0]
-; CHECK-GI-NOFP16-NEXT: fcvt s18, h18
-; CHECK-GI-NOFP16-NEXT: fcvt s5, h5
-; CHECK-GI-NOFP16-NEXT: mov v3.h[2], v6.h[0]
-; CHECK-GI-NOFP16-NEXT: fcvt h4, s16
-; CHECK-GI-NOFP16-NEXT: fcvt h16, s17
-; CHECK-GI-NOFP16-NEXT: mov h17, v1.h[5]
-; CHECK-GI-NOFP16-NEXT: mov h6, v0.h[6]
-; CHECK-GI-NOFP16-NEXT: fcvt s7, h7
-; CHECK-GI-NOFP16-NEXT: frintp s5, s5
-; CHECK-GI-NOFP16-NEXT: mov v2.h[3], v4.h[0]
-; CHECK-GI-NOFP16-NEXT: mov h4, v1.h[6]
-; CHECK-GI-NOFP16-NEXT: mov v3.h[3], v16.h[0]
-; CHECK-GI-NOFP16-NEXT: frintp s16, s18
-; CHECK-GI-NOFP16-NEXT: fcvt s17, h17
-; CHECK-GI-NOFP16-NEXT: frintp s7, s7
-; CHECK-GI-NOFP16-NEXT: fcvt s6, h6
-; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[7]
-; CHECK-GI-NOFP16-NEXT: mov h1, v1.h[7]
-; CHECK-GI-NOFP16-NEXT: fcvt s4, h4
-; CHECK-GI-NOFP16-NEXT: fcvt h5, s5
-; CHECK-GI-NOFP16-NEXT: fcvt h16, s16
-; CHECK-GI-NOFP16-NEXT: frintp s17, s17
-; CHECK-GI-NOFP16-NEXT: fcvt h7, s7
-; CHECK-GI-NOFP16-NEXT: frintp s6, s6
-; CHECK-GI-NOFP16-NEXT: fcvt s0, h0
-; CHECK-GI-NOFP16-NEXT: frintp s4, s4
-; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
-; CHECK-GI-NOFP16-NEXT: mov v2.h[4], v5.h[0]
-; CHECK-GI-NOFP16-NEXT: mov v3.h[4], v16.h[0]
-; CHECK-GI-NOFP16-NEXT: fcvt h5, s17
-; CHECK-GI-NOFP16-NEXT: fcvt h6, s6
-; CHECK-GI-NOFP16-NEXT: frintp s0, s0
-; CHECK-GI-NOFP16-NEXT: fcvt h4, s4
-; CHECK-GI-NOFP16-NEXT: frintp s1, s1
-; CHECK-GI-NOFP16-NEXT: mov v2.h[5], v7.h[0]
-; CHECK-GI-NOFP16-NEXT: mov v3.h[5], v5.h[0]
-; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
-; CHECK-GI-NOFP16-NEXT: fcvt h1, s1
-; CHECK-GI-NOFP16-NEXT: mov v2.h[6], v6.h[0]
-; CHECK-GI-NOFP16-NEXT: mov v3.h[6], v4.h[0]
-; CHECK-GI-NOFP16-NEXT: mov v2.h[7], v0.h[0]
-; CHECK-GI-NOFP16-NEXT: mov v3.h[7], v1.h[0]
-; CHECK-GI-NOFP16-NEXT: mov v0.16b, v2.16b
-; CHECK-GI-NOFP16-NEXT: mov v1.16b, v3.16b
+; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v0.4h
+; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v1.4h
+; CHECK-GI-NOFP16-NEXT: fcvtl2 v0.4s, v0.8h
+; CHECK-GI-NOFP16-NEXT: fcvtl2 v1.4s, v1.8h
+; CHECK-GI-NOFP16-NEXT: frintp v2.4s, v2.4s
+; CHECK-GI-NOFP16-NEXT: frintp v3.4s, v3.4s
+; CHECK-GI-NOFP16-NEXT: frintp v4.4s, v0.4s
+; CHECK-GI-NOFP16-NEXT: frintp v5.4s, v1.4s
+; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v2.4s
+; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v3.4s
+; CHECK-GI-NOFP16-NEXT: fcvtn2 v0.8h, v4.4s
+; CHECK-GI-NOFP16-NEXT: fcvtn2 v1.8h, v5.4s
; CHECK-GI-NOFP16-NEXT: ret
;
; CHECK-GI-FP16-LABEL: ceil_v16f16:
@@ -687,19 +571,30 @@ entry:
}
define <3 x double> @floor_v3f64(<3 x double> %a) {
-; CHECK-LABEL: floor_v3f64:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
-; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
-; CHECK-NEXT: mov v0.d[1], v1.d[0]
-; CHECK-NEXT: frintm v2.2d, v2.2d
-; CHECK-NEXT: // kill: def $d2 killed $d2 killed $q2
-; CHECK-NEXT: frintm v0.2d, v0.2d
-; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
-; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q1
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: floor_v3f64:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-SD-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-SD-NEXT: frintm v2.2d, v2.2d
+; CHECK-SD-NEXT: // kill: def $d2 killed $d2 killed $q2
+; CHECK-SD-NEXT: frintm v0.2d, v0.2d
+; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT: // kill: def $d1 killed $d1 killed $q1
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: floor_v3f64:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NEXT: frintm d2, d2
+; CHECK-GI-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-GI-NEXT: frintm v0.2d, v0.2d
+; CHECK-GI-NEXT: mov d1, v0.d[1]
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT: ret
entry:
%c = call <3 x double> @llvm.floor.v3f64(<3 x double> %a)
ret <3 x double> %c
@@ -727,10 +622,25 @@ entry:
}
define <3 x float> @floor_v3f32(<3 x float> %a) {
-; CHECK-LABEL: floor_v3f32:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: frintm v0.4s, v0.4s
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: floor_v3f32:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: frintm v0.4s, v0.4s
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: floor_v3f32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov s1, v0.s[1]
+; CHECK-GI-NEXT: mov s2, v0.s[2]
+; CHECK-GI-NEXT: mov v0.s[1], v1.s[0]
+; CHECK-GI-NEXT: mov v0.s[2], v2.s[0]
+; CHECK-GI-NEXT: mov v0.s[3], v0.s[0]
+; CHECK-GI-NEXT: frintm v0.4s, v0.4s
+; CHECK-GI-NEXT: mov s1, v0.s[1]
+; CHECK-GI-NEXT: mov s2, v0.s[2]
+; CHECK-GI-NEXT: mov v0.s[1], v1.s[0]
+; CHECK-GI-NEXT: mov v0.s[2], v2.s[0]
+; CHECK-GI-NEXT: mov v0.s[3], v0.s[0]
+; CHECK-GI-NEXT: ret
entry:
%c = call <3 x float> @llvm.floor.v3f32(<3 x float> %a)
ret <3 x float> %c
@@ -808,46 +718,71 @@ define <7 x half> @floor_v7f16(<7 x half> %a) {
;
; CHECK-GI-NOFP16-LABEL: floor_v7f16:
; CHECK-GI-NOFP16: // %bb.0: // %entry
-; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1]
-; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[2]
-; CHECK-GI-NOFP16-NEXT: fcvt s3, h0
-; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[3]
-; CHECK-GI-NOFP16-NEXT: mov h6, v0.h[4]
-; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
-; CHECK-GI-NOFP16-NEXT: fcvt s2, h2
-; CHECK-GI-NOFP16-NEXT: frintm s3, s3
-; CHECK-GI-NOFP16-NEXT: fcvt s4, h4
-; CHECK-GI-NOFP16-NEXT: fcvt s6, h6
-; CHECK-GI-NOFP16-NEXT: frintm s5, s1
-; CHECK-GI-NOFP16-NEXT: frintm s2, s2
-; CHECK-GI-NOFP16-NEXT: fcvt h1, s3
-; CHECK-GI-NOFP16-NEXT: frintm s4, s4
-; CHECK-GI-NOFP16-NEXT: fcvt h3, s5
-; CHECK-GI-NOFP16-NEXT: mov h5, v0.h[5]
-; CHECK-GI-NOFP16-NEXT: fcvt h2, s2
-; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[6]
-; CHECK-GI-NOFP16-NEXT: fcvt h4, s4
-; CHECK-GI-NOFP16-NEXT: mov v1.h[1], v3.h[0]
-; CHECK-GI-NOFP16-NEXT: frintm s3, s6
-; CHECK-GI-NOFP16-NEXT: fcvt s5, h5
-; CHECK-GI-NOFP16-NEXT: fcvt s0, h0
-; CHECK-GI-NOFP16-NEXT: mov v1.h[2], v2.h[0]
-; CHECK-GI-NOFP16-NEXT: fcvt h2, s3
-; CHECK-GI-NOFP16-NEXT: frintm s3, s5
-; CHECK-GI-NOFP16-NEXT: frintm s0, s0
-; CHECK-GI-NOFP16-NEXT: mov v1.h[3], v4.h[0]
-; CHECK-GI-NOFP16-NEXT: fcvt h3, s3
-; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
-; CHECK-GI-NOFP16-NEXT: mov v1.h[4], v2.h[0]
-; CHECK-GI-NOFP16-NEXT: mov v1.h[5], v3.h[0]
-; CHECK-GI-NOFP16-NEXT: mov v1.h[6], v0.h[0]
-; CHECK-GI-NOFP16-NEXT: mov v1.h[7], v0.h[0]
-; CHECK-GI-NOFP16-NEXT: mov v0.16b, v1.16b
+; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[4]
+; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[5]
+; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[6]
+; CHECK-GI-NOFP16-NEXT: mov v1.h[1], v2.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v1.h[2], v3.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v1.h[3], v0.h[0]
+; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h
+; CHECK-GI-NOFP16-NEXT: mov s2, v1.s[1]
+; CHECK-GI-NOFP16-NEXT: mov s3, v1.s[2]
+; CHECK-GI-NOFP16-NEXT: mov v1.s[1], v2.s[0]
+; CHECK-GI-NOFP16-NEXT: mov v1.s[2], v3.s[0]
+; CHECK-GI-NOFP16-NEXT: mov v1.s[3], v0.s[0]
+; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-GI-NOFP16-NEXT: frintm v1.4s, v1.4s
+; CHECK-GI-NOFP16-NEXT: frintm v0.4s, v0.4s
+; CHECK-GI-NOFP16-NEXT: mov s2, v1.s[1]
+; CHECK-GI-NOFP16-NEXT: mov s3, v1.s[2]
+; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v0.4s
+; CHECK-GI-NOFP16-NEXT: mov v1.s[1], v2.s[0]
+; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[1]
+; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[2]
+; CHECK-GI-NOFP16-NEXT: mov h5, v0.h[3]
+; CHECK-GI-NOFP16-NEXT: mov v1.s[2], v3.s[0]
+; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v2.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v1.s[3], v0.s[0]
+; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v4.h[0]
+; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v1.4s
+; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v5.h[0]
+; CHECK-GI-NOFP16-NEXT: mov h2, v1.h[1]
+; CHECK-GI-NOFP16-NEXT: mov v0.h[4], v1.h[0]
+; CHECK-GI-NOFP16-NEXT: mov h1, v1.h[2]
+; CHECK-GI-NOFP16-NEXT: mov v0.h[5], v2.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v0.h[6], v1.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v0.h[7], v0.h[0]
; CHECK-GI-NOFP16-NEXT: ret
;
; CHECK-GI-FP16-LABEL: floor_v7f16:
; CHECK-GI-FP16: // %bb.0: // %entry
+; CHECK-GI-FP16-NEXT: mov h1, v0.h[1]
+; CHECK-GI-FP16-NEXT: mov h2, v0.h[2]
+; CHECK-GI-FP16-NEXT: mov h3, v0.h[3]
+; CHECK-GI-FP16-NEXT: mov h4, v0.h[4]
+; CHECK-GI-FP16-NEXT: mov h5, v0.h[5]
+; CHECK-GI-FP16-NEXT: mov h6, v0.h[6]
+; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0]
+; CHECK-GI-FP16-NEXT: mov v0.h[2], v2.h[0]
+; CHECK-GI-FP16-NEXT: mov v0.h[3], v3.h[0]
+; CHECK-GI-FP16-NEXT: mov v0.h[4], v4.h[0]
+; CHECK-GI-FP16-NEXT: mov v0.h[5], v5.h[0]
+; CHECK-GI-FP16-NEXT: mov v0.h[6], v6.h[0]
+; CHECK-GI-FP16-NEXT: mov v0.h[7], v0.h[0]
; CHECK-GI-FP16-NEXT: frintm v0.8h, v0.8h
+; CHECK-GI-FP16-NEXT: mov h1, v0.h[1]
+; CHECK-GI-FP16-NEXT: mov h2, v0.h[2]
+; CHECK-GI-FP16-NEXT: mov h3, v0.h[3]
+; CHECK-GI-FP16-NEXT: mov h4, v0.h[4]
+; CHECK-GI-FP16-NEXT: mov h5, v0.h[5]
+; CHECK-GI-FP16-NEXT: mov h6, v0.h[6]
+; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0]
+; CHECK-GI-FP16-NEXT: mov v0.h[2], v2.h[0]
+; CHECK-GI-FP16-NEXT: mov v0.h[3], v3.h[0]
+; CHECK-GI-FP16-NEXT: mov v0.h[4], v4.h[0]
+; CHECK-GI-FP16-NEXT: mov v0.h[5], v5.h[0]
+; CHECK-GI-FP16-NEXT: mov v0.h[6], v6.h[0]
+; CHECK-GI-FP16-NEXT: mov v0.h[7], v0.h[0]
; CHECK-GI-FP16-NEXT: ret
entry:
%c = call <7 x half> @llvm.floor.v7f16(<7 x half> %a)
@@ -886,26 +821,9 @@ define <4 x half> @floor_v4f16(<4 x half> %a) {
;
; CHECK-GI-NOFP16-LABEL: floor_v4f16:
; CHECK-GI-NOFP16: // %bb.0: // %entry
-; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1]
-; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[2]
-; CHECK-GI-NOFP16-NEXT: fcvt s3, h0
-; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[3]
-; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
-; CHECK-GI-NOFP16-NEXT: fcvt s2, h2
-; CHECK-GI-NOFP16-NEXT: frintm s3, s3
-; CHECK-GI-NOFP16-NEXT: fcvt s4, h0
-; CHECK-GI-NOFP16-NEXT: frintm s1, s1
-; CHECK-GI-NOFP16-NEXT: frintm s2, s2
-; CHECK-GI-NOFP16-NEXT: fcvt h0, s3
-; CHECK-GI-NOFP16-NEXT: frintm s3, s4
-; CHECK-GI-NOFP16-NEXT: fcvt h1, s1
-; CHECK-GI-NOFP16-NEXT: fcvt h2, s2
-; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v1.h[0]
-; CHECK-GI-NOFP16-NEXT: fcvt h1, s3
-; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v2.h[0]
-; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v1.h[0]
-; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-GI-NOFP16-NEXT: frintm v0.4s, v0.4s
+; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v0.4s
; CHECK-GI-NOFP16-NEXT: ret
;
; CHECK-GI-FP16-LABEL: floor_v4f16:
@@ -968,45 +886,12 @@ define <8 x half> @floor_v8f16(<8 x half> %a) {
;
; CHECK-GI-NOFP16-LABEL: floor_v8f16:
; CHECK-GI-NOFP16: // %bb.0: // %entry
-; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1]
-; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[2]
-; CHECK-GI-NOFP16-NEXT: fcvt s3, h0
-; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[3]
-; CHECK-GI-NOFP16-NEXT: mov h6, v0.h[4]
-; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
-; CHECK-GI-NOFP16-NEXT: fcvt s2, h2
-; CHECK-GI-NOFP16-NEXT: frintm s3, s3
-; CHECK-GI-NOFP16-NEXT: fcvt s4, h4
-; CHECK-GI-NOFP16-NEXT: fcvt s6, h6
-; CHECK-GI-NOFP16-NEXT: frintm s5, s1
-; CHECK-GI-NOFP16-NEXT: frintm s2, s2
-; CHECK-GI-NOFP16-NEXT: fcvt h1, s3
-; CHECK-GI-NOFP16-NEXT: frintm s4, s4
-; CHECK-GI-NOFP16-NEXT: frintm s6, s6
-; CHECK-GI-NOFP16-NEXT: fcvt h3, s5
-; CHECK-GI-NOFP16-NEXT: mov h5, v0.h[5]
-; CHECK-GI-NOFP16-NEXT: fcvt h2, s2
-; CHECK-GI-NOFP16-NEXT: fcvt h4, s4
-; CHECK-GI-NOFP16-NEXT: mov v1.h[1], v3.h[0]
-; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[6]
-; CHECK-GI-NOFP16-NEXT: fcvt s5, h5
-; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[7]
-; CHECK-GI-NOFP16-NEXT: mov v1.h[2], v2.h[0]
-; CHECK-GI-NOFP16-NEXT: fcvt s3, h3
-; CHECK-GI-NOFP16-NEXT: frintm s2, s5
-; CHECK-GI-NOFP16-NEXT: fcvt h5, s6
-; CHECK-GI-NOFP16-NEXT: fcvt s0, h0
-; CHECK-GI-NOFP16-NEXT: mov v1.h[3], v4.h[0]
-; CHECK-GI-NOFP16-NEXT: frintm s3, s3
-; CHECK-GI-NOFP16-NEXT: fcvt h2, s2
-; CHECK-GI-NOFP16-NEXT: frintm s0, s0
-; CHECK-GI-NOFP16-NEXT: mov v1.h[4], v5.h[0]
-; CHECK-GI-NOFP16-NEXT: fcvt h3, s3
-; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
-; CHECK-GI-NOFP16-NEXT: mov v1.h[5], v2.h[0]
-; CHECK-GI-NOFP16-NEXT: mov v1.h[6], v3.h[0]
-; CHECK-GI-NOFP16-NEXT: mov v1.h[7], v0.h[0]
-; CHECK-GI-NOFP16-NEXT: mov v0.16b, v1.16b
+; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v0.4h
+; CHECK-GI-NOFP16-NEXT: fcvtl2 v0.4s, v0.8h
+; CHECK-GI-NOFP16-NEXT: frintm v1.4s, v1.4s
+; CHECK-GI-NOFP16-NEXT: frintm v2.4s, v0.4s
+; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v1.4s
+; CHECK-GI-NOFP16-NEXT: fcvtn2 v0.8h, v2.4s
; CHECK-GI-NOFP16-NEXT: ret
;
; CHECK-GI-FP16-LABEL: floor_v8f16:
@@ -1109,84 +994,18 @@ define <16 x half> @floor_v16f16(<16 x half> %a) {
;
; CHECK-GI-NOFP16-LABEL: floor_v16f16:
; CHECK-GI-NOFP16: // %bb.0: // %entry
-; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[1]
-; CHECK-GI-NOFP16-NEXT: mov h3, v1.h[1]
-; CHECK-GI-NOFP16-NEXT: fcvt s4, h0
-; CHECK-GI-NOFP16-NEXT: fcvt s5, h1
-; CHECK-GI-NOFP16-NEXT: mov h6, v0.h[2]
-; CHECK-GI-NOFP16-NEXT: mov h7, v1.h[2]
-; CHECK-GI-NOFP16-NEXT: mov h18, v0.h[3]
-; CHECK-GI-NOFP16-NEXT: mov h19, v1.h[3]
-; CHECK-GI-NOFP16-NEXT: fcvt s2, h2
-; CHECK-GI-NOFP16-NEXT: fcvt s3, h3
-; CHECK-GI-NOFP16-NEXT: frintm s4, s4
-; CHECK-GI-NOFP16-NEXT: frintm s5, s5
-; CHECK-GI-NOFP16-NEXT: fcvt s6, h6
-; CHECK-GI-NOFP16-NEXT: fcvt s7, h7
-; CHECK-GI-NOFP16-NEXT: frintm s16, s2
-; CHECK-GI-NOFP16-NEXT: frintm s17, s3
-; CHECK-GI-NOFP16-NEXT: fcvt h2, s4
-; CHECK-GI-NOFP16-NEXT: fcvt h3, s5
-; CHECK-GI-NOFP16-NEXT: frintm s6, s6
-; CHECK-GI-NOFP16-NEXT: frintm s7, s7
-; CHECK-GI-NOFP16-NEXT: fcvt h4, s16
-; CHECK-GI-NOFP16-NEXT: fcvt h5, s17
-; CHECK-GI-NOFP16-NEXT: fcvt s16, h18
-; CHECK-GI-NOFP16-NEXT: fcvt s17, h19
-; CHECK-GI-NOFP16-NEXT: mov h18, v1.h[4]
-; CHECK-GI-NOFP16-NEXT: mov v2.h[1], v4.h[0]
-; CHECK-GI-NOFP16-NEXT: fcvt h4, s6
-; CHECK-GI-NOFP16-NEXT: mov v3.h[1], v5.h[0]
-; CHECK-GI-NOFP16-NEXT: mov h5, v0.h[4]
-; CHECK-GI-NOFP16-NEXT: fcvt h6, s7
-; CHECK-GI-NOFP16-NEXT: frintm s16, s16
-; CHECK-GI-NOFP16-NEXT: frintm s17, s17
-; CHECK-GI-NOFP16-NEXT: mov h7, v0.h[5]
-; CHECK-GI-NOFP16-NEXT: mov v2.h[2], v4.h[0]
-; CHECK-GI-NOFP16-NEXT: fcvt s18, h18
-; CHECK-GI-NOFP16-NEXT: fcvt s5, h5
-; CHECK-GI-NOFP16-NEXT: mov v3.h[2], v6.h[0]
-; CHECK-GI-NOFP16-NEXT: fcvt h4, s16
-; CHECK-GI-NOFP16-NEXT: fcvt h16, s17
-; CHECK-GI-NOFP16-NEXT: mov h17, v1.h[5]
-; CHECK-GI-NOFP16-NEXT: mov h6, v0.h[6]
-; CHECK-GI-NOFP16-NEXT: fcvt s7, h7
-; CHECK-GI-NOFP16-NEXT: frintm s5, s5
-; CHECK-GI-NOFP16-NEXT: mov v2.h[3], v4.h[0]
-; CHECK-GI-NOFP16-NEXT: mov h4, v1.h[6]
-; CHECK-GI-NOFP16-NEXT: mov v3.h[3], v16.h[0]
-; CHECK-GI-NOFP16-NEXT: frintm s16, s18
-; CHECK-GI-NOFP16-NEXT: fcvt s17, h17
-; CHECK-GI-NOFP16-NEXT: frintm s7, s7
-; CHECK-GI-NOFP16-NEXT: fcvt s6, h6
-; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[7]
-; CHECK-GI-NOFP16-NEXT: mov h1, v1.h[7]
-; CHECK-GI-NOFP16-NEXT: fcvt s4, h4
-; CHECK-GI-NOFP16-NEXT: fcvt h5, s5
-; CHECK-GI-NOFP16-NEXT: fcvt h16, s16
-; CHECK-GI-NOFP16-NEXT: frintm s17, s17
-; CHECK-GI-NOFP16-NEXT: fcvt h7, s7
-; CHECK-GI-NOFP16-NEXT: frintm s6, s6
-; CHECK-GI-NOFP16-NEXT: fcvt s0, h0
-; CHECK-GI-NOFP16-NEXT: frintm s4, s4
-; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
-; CHECK-GI-NOFP16-NEXT: mov v2.h[4], v5.h[0]
-; CHECK-GI-NOFP16-NEXT: mov v3.h[4], v16.h[0]
-; CHECK-GI-NOFP16-NEXT: fcvt h5, s17
-; CHECK-GI-NOFP16-NEXT: fcvt h6, s6
-; CHECK-GI-NOFP16-NEXT: frintm s0, s0
-; CHECK-GI-NOFP16-NEXT: fcvt h4, s4
-; CHECK-GI-NOFP16-NEXT: frintm s1, s1
-; CHECK-GI-NOFP16-NEXT: mov v2.h[5], v7.h[0]
-; CHECK-GI-NOFP16-NEXT: mov v3.h[5], v5.h[0]
-; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
-; CHECK-GI-NOFP16-NEXT: fcvt h1, s1
-; CHECK-GI-NOFP16-NEXT: mov v2.h[6], v6.h[0]
-; CHECK-GI-NOFP16-NEXT: mov v3.h[6], v4.h[0]
-; CHECK-GI-NOFP16-NEXT: mov v2.h[7], v0.h[0]
-; CHECK-GI-NOFP16-NEXT: mov v3.h[7], v1.h[0]
-; CHECK-GI-NOFP16-NEXT: mov v0.16b, v2.16b
-; CHECK-GI-NOFP16-NEXT: mov v1.16b, v3.16b
+; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v0.4h
+; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v1.4h
+; CHECK-GI-NOFP16-NEXT: fcvtl2 v0.4s, v0.8h
+; CHECK-GI-NOFP16-NEXT: fcvtl2 v1.4s, v1.8h
+; CHECK-GI-NOFP16-NEXT: frintm v2.4s, v2.4s
+; CHECK-GI-NOFP16-NEXT: frintm v3.4s, v3.4s
+; CHECK-GI-NOFP16-NEXT: frintm v4.4s, v0.4s
+; CHECK-GI-NOFP16-NEXT: frintm v5.4s, v1.4s
+; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v2.4s
+; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v3.4s
+; CHECK-GI-NOFP16-NEXT: fcvtn2 v0.8h, v4.4s
+; CHECK-GI-NOFP16-NEXT: fcvtn2 v1.8h, v5.4s
; CHECK-GI-NOFP16-NEXT: ret
;
; CHECK-GI-FP16-LABEL: floor_v16f16:
@@ -1259,19 +1078,30 @@ entry:
}
define <3 x double> @nearbyint_v3f64(<3 x double> %a) {
-; CHECK-LABEL: nearbyint_v3f64:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
-; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
-; CHECK-NEXT: mov v0.d[1], v1.d[0]
-; CHECK-NEXT: frinti v2.2d, v2.2d
-; CHECK-NEXT: // kill: def $d2 killed $d2 killed $q2
-; CHECK-NEXT: frinti v0.2d, v0.2d
-; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
-; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q1
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: nearbyint_v3f64:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-SD-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-SD-NEXT: frinti v2.2d, v2.2d
+; CHECK-SD-NEXT: // kill: def $d2 killed $d2 killed $q2
+; CHECK-SD-NEXT: frinti v0.2d, v0.2d
+; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT: // kill: def $d1 killed $d1 killed $q1
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: nearbyint_v3f64:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NEXT: frinti d2, d2
+; CHECK-GI-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-GI-NEXT: frinti v0.2d, v0.2d
+; CHECK-GI-NEXT: mov d1, v0.d[1]
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT: ret
entry:
%c = call <3 x double> @llvm.nearbyint.v3f64(<3 x double> %a)
ret <3 x double> %c
@@ -1299,10 +1129,25 @@ entry:
}
define <3 x float> @nearbyint_v3f32(<3 x float> %a) {
-; CHECK-LABEL: nearbyint_v3f32:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: frinti v0.4s, v0.4s
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: nearbyint_v3f32:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: frinti v0.4s, v0.4s
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: nearbyint_v3f32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov s1, v0.s[1]
+; CHECK-GI-NEXT: mov s2, v0.s[2]
+; CHECK-GI-NEXT: mov v0.s[1], v1.s[0]
+; CHECK-GI-NEXT: mov v0.s[2], v2.s[0]
+; CHECK-GI-NEXT: mov v0.s[3], v0.s[0]
+; CHECK-GI-NEXT: frinti v0.4s, v0.4s
+; CHECK-GI-NEXT: mov s1, v0.s[1]
+; CHECK-GI-NEXT: mov s2, v0.s[2]
+; CHECK-GI-NEXT: mov v0.s[1], v1.s[0]
+; CHECK-GI-NEXT: mov v0.s[2], v2.s[0]
+; CHECK-GI-NEXT: mov v0.s[3], v0.s[0]
+; CHECK-GI-NEXT: ret
entry:
%c = call <3 x float> @llvm.nearbyint.v3f32(<3 x float> %a)
ret <3 x float> %c
@@ -1380,46 +1225,71 @@ define <7 x half> @nearbyint_v7f16(<7 x half> %a) {
;
; CHECK-GI-NOFP16-LABEL: nearbyint_v7f16:
; CHECK-GI-NOFP16: // %bb.0: // %entry
-; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1]
-; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[2]
-; CHECK-GI-NOFP16-NEXT: fcvt s3, h0
-; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[3]
-; CHECK-GI-NOFP16-NEXT: mov h6, v0.h[4]
-; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
-; CHECK-GI-NOFP16-NEXT: fcvt s2, h2
-; CHECK-GI-NOFP16-NEXT: frinti s3, s3
-; CHECK-GI-NOFP16-NEXT: fcvt s4, h4
-; CHECK-GI-NOFP16-NEXT: fcvt s6, h6
-; CHECK-GI-NOFP16-NEXT: frinti s5, s1
-; CHECK-GI-NOFP16-NEXT: frinti s2, s2
-; CHECK-GI-NOFP16-NEXT: fcvt h1, s3
-; CHECK-GI-NOFP16-NEXT: frinti s4, s4
-; CHECK-GI-NOFP16-NEXT: fcvt h3, s5
-; CHECK-GI-NOFP16-NEXT: mov h5, v0.h[5]
-; CHECK-GI-NOFP16-NEXT: fcvt h2, s2
-; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[6]
-; CHECK-GI-NOFP16-NEXT: fcvt h4, s4
-; CHECK-GI-NOFP16-NEXT: mov v1.h[1], v3.h[0]
-; CHECK-GI-NOFP16-NEXT: frinti s3, s6
-; CHECK-GI-NOFP16-NEXT: fcvt s5, h5
-; CHECK-GI-NOFP16-NEXT: fcvt s0, h0
-; CHECK-GI-NOFP16-NEXT: mov v1.h[2], v2.h[0]
-; CHECK-GI-NOFP16-NEXT: fcvt h2, s3
-; CHECK-GI-NOFP16-NEXT: frinti s3, s5
-; CHECK-GI-NOFP16-NEXT: frinti s0, s0
-; CHECK-GI-NOFP16-NEXT: mov v1.h[3], v4.h[0]
-; CHECK-GI-NOFP16-NEXT: fcvt h3, s3
-; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
-; CHECK-GI-NOFP16-NEXT: mov v1.h[4], v2.h[0]
-; CHECK-GI-NOFP16-NEXT: mov v1.h[5], v3.h[0]
-; CHECK-GI-NOFP16-NEXT: mov v1.h[6], v0.h[0]
-; CHECK-GI-NOFP16-NEXT: mov v1.h[7], v0.h[0]
-; CHECK-GI-NOFP16-NEXT: mov v0.16b, v1.16b
+; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[4]
+; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[5]
+; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[6]
+; CHECK-GI-NOFP16-NEXT: mov v1.h[1], v2.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v1.h[2], v3.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v1.h[3], v0.h[0]
+; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h
+; CHECK-GI-NOFP16-NEXT: mov s2, v1.s[1]
+; CHECK-GI-NOFP16-NEXT: mov s3, v1.s[2]
+; CHECK-GI-NOFP16-NEXT: mov v1.s[1], v2.s[0]
+; CHECK-GI-NOFP16-NEXT: mov v1.s[2], v3.s[0]
+; CHECK-GI-NOFP16-NEXT: mov v1.s[3], v0.s[0]
+; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-GI-NOFP16-NEXT: frinti v1.4s, v1.4s
+; CHECK-GI-NOFP16-NEXT: frinti v0.4s, v0.4s
+; CHECK-GI-NOFP16-NEXT: mov s2, v1.s[1]
+; CHECK-GI-NOFP16-NEXT: mov s3, v1.s[2]
+; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v0.4s
+; CHECK-GI-NOFP16-NEXT: mov v1.s[1], v2.s[0]
+; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[1]
+; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[2]
+; CHECK-GI-NOFP16-NEXT: mov h5, v0.h[3]
+; CHECK-GI-NOFP16-NEXT: mov v1.s[2], v3.s[0]
+; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v2.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v1.s[3], v0.s[0]
+; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v4.h[0]
+; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v1.4s
+; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v5.h[0]
+; CHECK-GI-NOFP16-NEXT: mov h2, v1.h[1]
+; CHECK-GI-NOFP16-NEXT: mov v0.h[4], v1.h[0]
+; CHECK-GI-NOFP16-NEXT: mov h1, v1.h[2]
+; CHECK-GI-NOFP16-NEXT: mov v0.h[5], v2.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v0.h[6], v1.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v0.h[7], v0.h[0]
; CHECK-GI-NOFP16-NEXT: ret
;
; CHECK-GI-FP16-LABEL: nearbyint_v7f16:
; CHECK-GI-FP16: // %bb.0: // %entry
+; CHECK-GI-FP16-NEXT: mov h1, v0.h[1]
+; CHECK-GI-FP16-NEXT: mov h2, v0.h[2]
+; CHECK-GI-FP16-NEXT: mov h3, v0.h[3]
+; CHECK-GI-FP16-NEXT: mov h4, v0.h[4]
+; CHECK-GI-FP16-NEXT: mov h5, v0.h[5]
+; CHECK-GI-FP16-NEXT: mov h6, v0.h[6]
+; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0]
+; CHECK-GI-FP16-NEXT: mov v0.h[2], v2.h[0]
+; CHECK-GI-FP16-NEXT: mov v0.h[3], v3.h[0]
+; CHECK-GI-FP16-NEXT: mov v0.h[4], v4.h[0]
+; CHECK-GI-FP16-NEXT: mov v0.h[5], v5.h[0]
+; CHECK-GI-FP16-NEXT: mov v0.h[6], v6.h[0]
+; CHECK-GI-FP16-NEXT: mov v0.h[7], v0.h[0]
; CHECK-GI-FP16-NEXT: frinti v0.8h, v0.8h
+; CHECK-GI-FP16-NEXT: mov h1, v0.h[1]
+; CHECK-GI-FP16-NEXT: mov h2, v0.h[2]
+; CHECK-GI-FP16-NEXT: mov h3, v0.h[3]
+; CHECK-GI-FP16-NEXT: mov h4, v0.h[4]
+; CHECK-GI-FP16-NEXT: mov h5, v0.h[5]
+; CHECK-GI-FP16-NEXT: mov h6, v0.h[6]
+; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0]
+; CHECK-GI-FP16-NEXT: mov v0.h[2], v2.h[0]
+; CHECK-GI-FP16-NEXT: mov v0.h[3], v3.h[0]
+; CHECK-GI-FP16-NEXT: mov v0.h[4], v4.h[0]
+; CHECK-GI-FP16-NEXT: mov v0.h[5], v5.h[0]
+; CHECK-GI-FP16-NEXT: mov v0.h[6], v6.h[0]
+; CHECK-GI-FP16-NEXT: mov v0.h[7], v0.h[0]
; CHECK-GI-FP16-NEXT: ret
entry:
%c = call <7 x half> @llvm.nearbyint.v7f16(<7 x half> %a)
@@ -1458,26 +1328,9 @@ define <4 x half> @nearbyint_v4f16(<4 x half> %a) {
;
; CHECK-GI-NOFP16-LABEL: nearbyint_v4f16:
; CHECK-GI-NOFP16: // %bb.0: // %entry
-; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1]
-; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[2]
-; CHECK-GI-NOFP16-NEXT: fcvt s3, h0
-; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[3]
-; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
-; CHECK-GI-NOFP16-NEXT: fcvt s2, h2
-; CHECK-GI-NOFP16-NEXT: frinti s3, s3
-; CHECK-GI-NOFP16-NEXT: fcvt s4, h0
-; CHECK-GI-NOFP16-NEXT: frinti s1, s1
-; CHECK-GI-NOFP16-NEXT: frinti s2, s2
-; CHECK-GI-NOFP16-NEXT: fcvt h0, s3
-; CHECK-GI-NOFP16-NEXT: frinti s3, s4
-; CHECK-GI-NOFP16-NEXT: fcvt h1, s1
-; CHECK-GI-NOFP16-NEXT: fcvt h2, s2
-; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v1.h[0]
-; CHECK-GI-NOFP16-NEXT: fcvt h1, s3
-; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v2.h[0]
-; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v1.h[0]
-; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-GI-NOFP16-NEXT: frinti v0.4s, v0.4s
+; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v0.4s
; CHECK-GI-NOFP16-NEXT: ret
;
; CHECK-GI-FP16-LABEL: nearbyint_v4f16:
@@ -1540,45 +1393,12 @@ define <8 x half> @nearbyint_v8f16(<8 x half> %a) {
;
; CHECK-GI-NOFP16-LABEL: nearbyint_v8f16:
; CHECK-GI-NOFP16: // %bb.0: // %entry
-; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1]
-; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[2]
-; CHECK-GI-NOFP16-NEXT: fcvt s3, h0
-; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[3]
-; CHECK-GI-NOFP16-NEXT: mov h6, v0.h[4]
-; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
-; CHECK-GI-NOFP16-NEXT: fcvt s2, h2
-; CHECK-GI-NOFP16-NEXT: frinti s3, s3
-; CHECK-GI-NOFP16-NEXT: fcvt s4, h4
-; CHECK-GI-NOFP16-NEXT: fcvt s6, h6
-; CHECK-GI-NOFP16-NEXT: frinti s5, s1
-; CHECK-GI-NOFP16-NEXT: frinti s2, s2
-; CHECK-GI-NOFP16-NEXT: fcvt h1, s3
-; CHECK-GI-NOFP16-NEXT: frinti s4, s4
-; CHECK-GI-NOFP16-NEXT: frinti s6, s6
-; CHECK-GI-NOFP16-NEXT: fcvt h3, s5
-; CHECK-GI-NOFP16-NEXT: mov h5, v0.h[5]
-; CHECK-GI-NOFP16-NEXT: fcvt h2, s2
-; CHECK-GI-NOFP16-NEXT: fcvt h4, s4
-; CHECK-GI-NOFP16-NEXT: mov v1.h[1], v3.h[0]
-; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[6]
-; CHECK-GI-NOFP16-NEXT: fcvt s5, h5
-; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[7]
-; CHECK-GI-NOFP16-NEXT: mov v1.h[2], v2.h[0]
-; CHECK-GI-NOFP16-NEXT: fcvt s3, h3
-; CHECK-GI-NOFP16-NEXT: frinti s2, s5
-; CHECK-GI-NOFP16-NEXT: fcvt h5, s6
-; CHECK-GI-NOFP16-NEXT: fcvt s0, h0
-; CHECK-GI-NOFP16-NEXT: mov v1.h[3], v4.h[0]
-; CHECK-GI-NOFP16-NEXT: frinti s3, s3
-; CHECK-GI-NOFP16-NEXT: fcvt h2, s2
-; CHECK-GI-NOFP16-NEXT: frinti s0, s0
-; CHECK-GI-NOFP16-NEXT: mov v1.h[4], v5.h[0]
-; CHECK-GI-NOFP16-NEXT: fcvt h3, s3
-; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
-; CHECK-GI-NOFP16-NEXT: mov v1.h[5], v2.h[0]
-; CHECK-GI-NOFP16-NEXT: mov v1.h[6], v3.h[0]
-; CHECK-GI-NOFP16-NEXT: mov v1.h[7], v0.h[0]
-; CHECK-GI-NOFP16-NEXT: mov v0.16b, v1.16b
+; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v0.4h
+; CHECK-GI-NOFP16-NEXT: fcvtl2 v0.4s, v0.8h
+; CHECK-GI-NOFP16-NEXT: frinti v1.4s, v1.4s
+; CHECK-GI-NOFP16-NEXT: frinti v2.4s, v0.4s
+; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v1.4s
+; CHECK-GI-NOFP16-NEXT: fcvtn2 v0.8h, v2.4s
; CHECK-GI-NOFP16-NEXT: ret
;
; CHECK-GI-FP16-LABEL: nearbyint_v8f16:
@@ -1681,84 +1501,18 @@ define <16 x half> @nearbyint_v16f16(<16 x half> %a) {
;
; CHECK-GI-NOFP16-LABEL: nearbyint_v16f16:
; CHECK-GI-NOFP16: // %bb.0: // %entry
-; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[1]
-; CHECK-GI-NOFP16-NEXT: mov h3, v1.h[1]
-; CHECK-GI-NOFP16-NEXT: fcvt s4, h0
-; CHECK-GI-NOFP16-NEXT: fcvt s5, h1
-; CHECK-GI-NOFP16-NEXT: mov h6, v0.h[2]
-; CHECK-GI-NOFP16-NEXT: mov h7, v1.h[2]
-; CHECK-GI-NOFP16-NEXT: mov h18, v0.h[3]
-; CHECK-GI-NOFP16-NEXT: mov h19, v1.h[3]
-; CHECK-GI-NOFP16-NEXT: fcvt s2, h2
-; CHECK-GI-NOFP16-NEXT: fcvt s3, h3
-; CHECK-GI-NOFP16-NEXT: frinti s4, s4
-; CHECK-GI-NOFP16-NEXT: frinti s5, s5
-; CHECK-GI-NOFP16-NEXT: fcvt s6, h6
-; CHECK-GI-NOFP16-NEXT: fcvt s7, h7
-; CHECK-GI-NOFP16-NEXT: frinti s16, s2
-; CHECK-GI-NOFP16-NEXT: frinti s17, s3
-; CHECK-GI-NOFP16-NEXT: fcvt h2, s4
-; CHECK-GI-NOFP16-NEXT: fcvt h3, s5
-; CHECK-GI-NOFP16-NEXT: frinti s6, s6
-; CHECK-GI-NOFP16-NEXT: frinti s7, s7
-; CHECK-GI-NOFP16-NEXT: fcvt h4, s16
-; CHECK-GI-NOFP16-NEXT: fcvt h5, s17
-; CHECK-GI-NOFP16-NEXT: fcvt s16, h18
-; CHECK-GI-NOFP16-NEXT: fcvt s17, h19
-; CHECK-GI-NOFP16-NEXT: mov h18, v1.h[4]
-; CHECK-GI-NOFP16-NEXT: mov v2.h[1], v4.h[0]
-; CHECK-GI-NOFP16-NEXT: fcvt h4, s6
-; CHECK-GI-NOFP16-NEXT: mov v3.h[1], v5.h[0]
-; CHECK-GI-NOFP16-NEXT: mov h5, v0.h[4]
-; CHECK-GI-NOFP16-NEXT: fcvt h6, s7
-; CHECK-GI-NOFP16-NEXT: frinti s16, s16
-; CHECK-GI-NOFP16-NEXT: frinti s17, s17
-; CHECK-GI-NOFP16-NEXT: mov h7, v0.h[5]
-; CHECK-GI-NOFP16-NEXT: mov v2.h[2], v4.h[0]
-; CHECK-GI-NOFP16-NEXT: fcvt s18, h18
-; CHECK-GI-NOFP16-NEXT: fcvt s5, h5
-; CHECK-GI-NOFP16-NEXT: mov v3.h[2], v6.h[0]
-; CHECK-GI-NOFP16-NEXT: fcvt h4, s16
-; CHECK-GI-NOFP16-NEXT: fcvt h16, s17
-; CHECK-GI-NOFP16-NEXT: mov h17, v1.h[5]
-; CHECK-GI-NOFP16-NEXT: mov h6, v0.h[6]
-; CHECK-GI-NOFP16-NEXT: fcvt s7, h7
-; CHECK-GI-NOFP16-NEXT: frinti s5, s5
-; CHECK-GI-NOFP16-NEXT: mov v2.h[3], v4.h[0]
-; CHECK-GI-NOFP16-NEXT: mov h4, v1.h[6]
-; CHECK-GI-NOFP16-NEXT: mov v3.h[3], v16.h[0]
-; CHECK-GI-NOFP16-NEXT: frinti s16, s18
-; CHECK-GI-NOFP16-NEXT: fcvt s17, h17
-; CHECK-GI-NOFP16-NEXT: frinti s7, s7
-; CHECK-GI-NOFP16-NEXT: fcvt s6, h6
-; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[7]
-; CHECK-GI-NOFP16-NEXT: mov h1, v1.h[7]
-; CHECK-GI-NOFP16-NEXT: fcvt s4, h4
-; CHECK-GI-NOFP16-NEXT: fcvt h5, s5
-; CHECK-GI-NOFP16-NEXT: fcvt h16, s16
-; CHECK-GI-NOFP16-NEXT: frinti s17, s17
-; CHECK-GI-NOFP16-NEXT: fcvt h7, s7
-; CHECK-GI-NOFP16-NEXT: frinti s6, s6
-; CHECK-GI-NOFP16-NEXT: fcvt s0, h0
-; CHECK-GI-NOFP16-NEXT: frinti s4, s4
-; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
-; CHECK-GI-NOFP16-NEXT: mov v2.h[4], v5.h[0]
-; CHECK-GI-NOFP16-NEXT: mov v3.h[4], v16.h[0]
-; CHECK-GI-NOFP16-NEXT: fcvt h5, s17
-; CHECK-GI-NOFP16-NEXT: fcvt h6, s6
-; CHECK-GI-NOFP16-NEXT: frinti s0, s0
-; CHECK-GI-NOFP16-NEXT: fcvt h4, s4
-; CHECK-GI-NOFP16-NEXT: frinti s1, s1
-; CHECK-GI-NOFP16-NEXT: mov v2.h[5], v7.h[0]
-; CHECK-GI-NOFP16-NEXT: mov v3.h[5], v5.h[0]
-; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
-; CHECK-GI-NOFP16-NEXT: fcvt h1, s1
-; CHECK-GI-NOFP16-NEXT: mov v2.h[6], v6.h[0]
-; CHECK-GI-NOFP16-NEXT: mov v3.h[6], v4.h[0]
-; CHECK-GI-NOFP16-NEXT: mov v2.h[7], v0.h[0]
-; CHECK-GI-NOFP16-NEXT: mov v3.h[7], v1.h[0]
-; CHECK-GI-NOFP16-NEXT: mov v0.16b, v2.16b
-; CHECK-GI-NOFP16-NEXT: mov v1.16b, v3.16b
+; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v0.4h
+; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v1.4h
+; CHECK-GI-NOFP16-NEXT: fcvtl2 v0.4s, v0.8h
+; CHECK-GI-NOFP16-NEXT: fcvtl2 v1.4s, v1.8h
+; CHECK-GI-NOFP16-NEXT: frinti v2.4s, v2.4s
+; CHECK-GI-NOFP16-NEXT: frinti v3.4s, v3.4s
+; CHECK-GI-NOFP16-NEXT: frinti v4.4s, v0.4s
+; CHECK-GI-NOFP16-NEXT: frinti v5.4s, v1.4s
+; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v2.4s
+; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v3.4s
+; CHECK-GI-NOFP16-NEXT: fcvtn2 v0.8h, v4.4s
+; CHECK-GI-NOFP16-NEXT: fcvtn2 v1.8h, v5.4s
; CHECK-GI-NOFP16-NEXT: ret
;
; CHECK-GI-FP16-LABEL: nearbyint_v16f16:
@@ -1831,19 +1585,30 @@ entry:
}
define <3 x double> @roundeven_v3f64(<3 x double> %a) {
-; CHECK-LABEL: roundeven_v3f64:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
-; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
-; CHECK-NEXT: mov v0.d[1], v1.d[0]
-; CHECK-NEXT: frintn v2.2d, v2.2d
-; CHECK-NEXT: // kill: def $d2 killed $d2 killed $q2
-; CHECK-NEXT: frintn v0.2d, v0.2d
-; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
-; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q1
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: roundeven_v3f64:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-SD-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-SD-NEXT: frintn v2.2d, v2.2d
+; CHECK-SD-NEXT: // kill: def $d2 killed $d2 killed $q2
+; CHECK-SD-NEXT: frintn v0.2d, v0.2d
+; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT: // kill: def $d1 killed $d1 killed $q1
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: roundeven_v3f64:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NEXT: frintn d2, d2
+; CHECK-GI-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-GI-NEXT: frintn v0.2d, v0.2d
+; CHECK-GI-NEXT: mov d1, v0.d[1]
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT: ret
entry:
%c = call <3 x double> @llvm.roundeven.v3f64(<3 x double> %a)
ret <3 x double> %c
@@ -1871,10 +1636,25 @@ entry:
}
define <3 x float> @roundeven_v3f32(<3 x float> %a) {
-; CHECK-LABEL: roundeven_v3f32:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: frintn v0.4s, v0.4s
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: roundeven_v3f32:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: frintn v0.4s, v0.4s
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: roundeven_v3f32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov s1, v0.s[1]
+; CHECK-GI-NEXT: mov s2, v0.s[2]
+; CHECK-GI-NEXT: mov v0.s[1], v1.s[0]
+; CHECK-GI-NEXT: mov v0.s[2], v2.s[0]
+; CHECK-GI-NEXT: mov v0.s[3], v0.s[0]
+; CHECK-GI-NEXT: frintn v0.4s, v0.4s
+; CHECK-GI-NEXT: mov s1, v0.s[1]
+; CHECK-GI-NEXT: mov s2, v0.s[2]
+; CHECK-GI-NEXT: mov v0.s[1], v1.s[0]
+; CHECK-GI-NEXT: mov v0.s[2], v2.s[0]
+; CHECK-GI-NEXT: mov v0.s[3], v0.s[0]
+; CHECK-GI-NEXT: ret
entry:
%c = call <3 x float> @llvm.roundeven.v3f32(<3 x float> %a)
ret <3 x float> %c
@@ -1952,50 +1732,71 @@ define <7 x half> @roundeven_v7f16(<7 x half> %a) {
;
; CHECK-GI-NOFP16-LABEL: roundeven_v7f16:
; CHECK-GI-NOFP16: // %bb.0: // %entry
-; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1]
-; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[2]
-; CHECK-GI-NOFP16-NEXT: fcvt s3, h0
-; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[3]
-; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
-; CHECK-GI-NOFP16-NEXT: fcvt s2, h2
-; CHECK-GI-NOFP16-NEXT: frintn s3, s3
-; CHECK-GI-NOFP16-NEXT: fcvt s4, h4
-; CHECK-GI-NOFP16-NEXT: frintn s5, s1
-; CHECK-GI-NOFP16-NEXT: frintn s2, s2
-; CHECK-GI-NOFP16-NEXT: fcvt h1, s3
-; CHECK-GI-NOFP16-NEXT: frintn s4, s4
-; CHECK-GI-NOFP16-NEXT: fcvt h3, s5
-; CHECK-GI-NOFP16-NEXT: mov h5, v0.h[4]
-; CHECK-GI-NOFP16-NEXT: fcvt h2, s2
-; CHECK-GI-NOFP16-NEXT: mov v1.h[1], v3.h[0]
-; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[5]
-; CHECK-GI-NOFP16-NEXT: fcvt s5, h5
-; CHECK-GI-NOFP16-NEXT: mov v1.h[2], v2.h[0]
-; CHECK-GI-NOFP16-NEXT: fcvt h2, s4
-; CHECK-GI-NOFP16-NEXT: fcvt s3, h3
-; CHECK-GI-NOFP16-NEXT: frintn s4, s5
-; CHECK-GI-NOFP16-NEXT: mov v1.h[3], v2.h[0]
-; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[6]
-; CHECK-GI-NOFP16-NEXT: frintn s3, s3
-; CHECK-GI-NOFP16-NEXT: fcvt h4, s4
-; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[7]
-; CHECK-GI-NOFP16-NEXT: fcvt s2, h2
-; CHECK-GI-NOFP16-NEXT: fcvt h3, s3
-; CHECK-GI-NOFP16-NEXT: mov v1.h[4], v4.h[0]
-; CHECK-GI-NOFP16-NEXT: fcvt s0, h0
-; CHECK-GI-NOFP16-NEXT: frintn s2, s2
-; CHECK-GI-NOFP16-NEXT: mov v1.h[5], v3.h[0]
-; CHECK-GI-NOFP16-NEXT: frintn s0, s0
-; CHECK-GI-NOFP16-NEXT: fcvt h2, s2
-; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
-; CHECK-GI-NOFP16-NEXT: mov v1.h[6], v2.h[0]
-; CHECK-GI-NOFP16-NEXT: mov v1.h[7], v0.h[0]
-; CHECK-GI-NOFP16-NEXT: mov v0.16b, v1.16b
+; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[4]
+; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[5]
+; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[6]
+; CHECK-GI-NOFP16-NEXT: mov v1.h[1], v2.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v1.h[2], v3.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v1.h[3], v0.h[0]
+; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h
+; CHECK-GI-NOFP16-NEXT: mov s2, v1.s[1]
+; CHECK-GI-NOFP16-NEXT: mov s3, v1.s[2]
+; CHECK-GI-NOFP16-NEXT: mov v1.s[1], v2.s[0]
+; CHECK-GI-NOFP16-NEXT: mov v1.s[2], v3.s[0]
+; CHECK-GI-NOFP16-NEXT: mov v1.s[3], v0.s[0]
+; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-GI-NOFP16-NEXT: frintn v1.4s, v1.4s
+; CHECK-GI-NOFP16-NEXT: frintn v0.4s, v0.4s
+; CHECK-GI-NOFP16-NEXT: mov s2, v1.s[1]
+; CHECK-GI-NOFP16-NEXT: mov s3, v1.s[2]
+; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v0.4s
+; CHECK-GI-NOFP16-NEXT: mov v1.s[1], v2.s[0]
+; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[1]
+; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[2]
+; CHECK-GI-NOFP16-NEXT: mov h5, v0.h[3]
+; CHECK-GI-NOFP16-NEXT: mov v1.s[2], v3.s[0]
+; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v2.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v1.s[3], v0.s[0]
+; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v4.h[0]
+; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v1.4s
+; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v5.h[0]
+; CHECK-GI-NOFP16-NEXT: mov h2, v1.h[1]
+; CHECK-GI-NOFP16-NEXT: mov v0.h[4], v1.h[0]
+; CHECK-GI-NOFP16-NEXT: mov h1, v1.h[2]
+; CHECK-GI-NOFP16-NEXT: mov v0.h[5], v2.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v0.h[6], v1.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v0.h[7], v0.h[0]
; CHECK-GI-NOFP16-NEXT: ret
;
; CHECK-GI-FP16-LABEL: roundeven_v7f16:
; CHECK-GI-FP16: // %bb.0: // %entry
+; CHECK-GI-FP16-NEXT: mov h1, v0.h[1]
+; CHECK-GI-FP16-NEXT: mov h2, v0.h[2]
+; CHECK-GI-FP16-NEXT: mov h3, v0.h[3]
+; CHECK-GI-FP16-NEXT: mov h4, v0.h[4]
+; CHECK-GI-FP16-NEXT: mov h5, v0.h[5]
+; CHECK-GI-FP16-NEXT: mov h6, v0.h[6]
+; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0]
+; CHECK-GI-FP16-NEXT: mov v0.h[2], v2.h[0]
+; CHECK-GI-FP16-NEXT: mov v0.h[3], v3.h[0]
+; CHECK-GI-FP16-NEXT: mov v0.h[4], v4.h[0]
+; CHECK-GI-FP16-NEXT: mov v0.h[5], v5.h[0]
+; CHECK-GI-FP16-NEXT: mov v0.h[6], v6.h[0]
+; CHECK-GI-FP16-NEXT: mov v0.h[7], v0.h[0]
; CHECK-GI-FP16-NEXT: frintn v0.8h, v0.8h
+; CHECK-GI-FP16-NEXT: mov h1, v0.h[1]
+; CHECK-GI-FP16-NEXT: mov h2, v0.h[2]
+; CHECK-GI-FP16-NEXT: mov h3, v0.h[3]
+; CHECK-GI-FP16-NEXT: mov h4, v0.h[4]
+; CHECK-GI-FP16-NEXT: mov h5, v0.h[5]
+; CHECK-GI-FP16-NEXT: mov h6, v0.h[6]
+; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0]
+; CHECK-GI-FP16-NEXT: mov v0.h[2], v2.h[0]
+; CHECK-GI-FP16-NEXT: mov v0.h[3], v3.h[0]
+; CHECK-GI-FP16-NEXT: mov v0.h[4], v4.h[0]
+; CHECK-GI-FP16-NEXT: mov v0.h[5], v5.h[0]
+; CHECK-GI-FP16-NEXT: mov v0.h[6], v6.h[0]
+; CHECK-GI-FP16-NEXT: mov v0.h[7], v0.h[0]
; CHECK-GI-FP16-NEXT: ret
entry:
%c = call <7 x half> @llvm.roundeven.v7f16(<7 x half> %a)
@@ -2034,26 +1835,9 @@ define <4 x half> @roundeven_v4f16(<4 x half> %a) {
;
; CHECK-GI-NOFP16-LABEL: roundeven_v4f16:
; CHECK-GI-NOFP16: // %bb.0: // %entry
-; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1]
-; CHECK-GI-NOFP16-NEXT: fcvt s2, h0
-; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[2]
-; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[3]
-; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
-; CHECK-GI-NOFP16-NEXT: frintn s2, s2
-; CHECK-GI-NOFP16-NEXT: fcvt s3, h3
-; CHECK-GI-NOFP16-NEXT: frintn s1, s1
-; CHECK-GI-NOFP16-NEXT: fcvt h0, s2
-; CHECK-GI-NOFP16-NEXT: fcvt s2, h4
-; CHECK-GI-NOFP16-NEXT: frintn s3, s3
-; CHECK-GI-NOFP16-NEXT: fcvt h1, s1
-; CHECK-GI-NOFP16-NEXT: frintn s2, s2
-; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v1.h[0]
-; CHECK-GI-NOFP16-NEXT: fcvt h1, s3
-; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v1.h[0]
-; CHECK-GI-NOFP16-NEXT: fcvt h1, s2
-; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v1.h[0]
-; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-GI-NOFP16-NEXT: frintn v0.4s, v0.4s
+; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v0.4s
; CHECK-GI-NOFP16-NEXT: ret
;
; CHECK-GI-FP16-LABEL: roundeven_v4f16:
@@ -2116,45 +1900,12 @@ define <8 x half> @roundeven_v8f16(<8 x half> %a) {
;
; CHECK-GI-NOFP16-LABEL: roundeven_v8f16:
; CHECK-GI-NOFP16: // %bb.0: // %entry
-; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1]
-; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[2]
-; CHECK-GI-NOFP16-NEXT: fcvt s3, h0
-; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[3]
-; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
-; CHECK-GI-NOFP16-NEXT: fcvt s2, h2
-; CHECK-GI-NOFP16-NEXT: frintn s3, s3
-; CHECK-GI-NOFP16-NEXT: fcvt s4, h4
-; CHECK-GI-NOFP16-NEXT: frintn s5, s1
-; CHECK-GI-NOFP16-NEXT: frintn s2, s2
-; CHECK-GI-NOFP16-NEXT: fcvt h1, s3
-; CHECK-GI-NOFP16-NEXT: frintn s4, s4
-; CHECK-GI-NOFP16-NEXT: fcvt h3, s5
-; CHECK-GI-NOFP16-NEXT: mov h5, v0.h[4]
-; CHECK-GI-NOFP16-NEXT: fcvt h2, s2
-; CHECK-GI-NOFP16-NEXT: mov v1.h[1], v3.h[0]
-; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[5]
-; CHECK-GI-NOFP16-NEXT: fcvt s5, h5
-; CHECK-GI-NOFP16-NEXT: mov v1.h[2], v2.h[0]
-; CHECK-GI-NOFP16-NEXT: fcvt h2, s4
-; CHECK-GI-NOFP16-NEXT: fcvt s3, h3
-; CHECK-GI-NOFP16-NEXT: frintn s4, s5
-; CHECK-GI-NOFP16-NEXT: mov v1.h[3], v2.h[0]
-; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[6]
-; CHECK-GI-NOFP16-NEXT: frintn s3, s3
-; CHECK-GI-NOFP16-NEXT: fcvt h4, s4
-; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[7]
-; CHECK-GI-NOFP16-NEXT: fcvt s2, h2
-; CHECK-GI-NOFP16-NEXT: fcvt h3, s3
-; CHECK-GI-NOFP16-NEXT: mov v1.h[4], v4.h[0]
-; CHECK-GI-NOFP16-NEXT: fcvt s0, h0
-; CHECK-GI-NOFP16-NEXT: frintn s2, s2
-; CHECK-GI-NOFP16-NEXT: mov v1.h[5], v3.h[0]
-; CHECK-GI-NOFP16-NEXT: frintn s0, s0
-; CHECK-GI-NOFP16-NEXT: fcvt h2, s2
-; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
-; CHECK-GI-NOFP16-NEXT: mov v1.h[6], v2.h[0]
-; CHECK-GI-NOFP16-NEXT: mov v1.h[7], v0.h[0]
-; CHECK-GI-NOFP16-NEXT: mov v0.16b, v1.16b
+; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v0.4h
+; CHECK-GI-NOFP16-NEXT: fcvtl2 v0.4s, v0.8h
+; CHECK-GI-NOFP16-NEXT: frintn v1.4s, v1.4s
+; CHECK-GI-NOFP16-NEXT: frintn v2.4s, v0.4s
+; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v1.4s
+; CHECK-GI-NOFP16-NEXT: fcvtn2 v0.8h, v2.4s
; CHECK-GI-NOFP16-NEXT: ret
;
; CHECK-GI-FP16-LABEL: roundeven_v8f16:
@@ -2257,84 +2008,18 @@ define <16 x half> @roundeven_v16f16(<16 x half> %a) {
;
; CHECK-GI-NOFP16-LABEL: roundeven_v16f16:
; CHECK-GI-NOFP16: // %bb.0: // %entry
-; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[1]
-; CHECK-GI-NOFP16-NEXT: mov h3, v1.h[1]
-; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[2]
-; CHECK-GI-NOFP16-NEXT: fcvt s5, h0
-; CHECK-GI-NOFP16-NEXT: fcvt s6, h1
-; CHECK-GI-NOFP16-NEXT: mov h7, v1.h[2]
-; CHECK-GI-NOFP16-NEXT: mov h18, v0.h[3]
-; CHECK-GI-NOFP16-NEXT: fcvt s2, h2
-; CHECK-GI-NOFP16-NEXT: fcvt s3, h3
-; CHECK-GI-NOFP16-NEXT: fcvt s4, h4
-; CHECK-GI-NOFP16-NEXT: frintn s5, s5
-; CHECK-GI-NOFP16-NEXT: frintn s6, s6
-; CHECK-GI-NOFP16-NEXT: fcvt s7, h7
-; CHECK-GI-NOFP16-NEXT: frintn s16, s2
-; CHECK-GI-NOFP16-NEXT: frintn s17, s3
-; CHECK-GI-NOFP16-NEXT: frintn s4, s4
-; CHECK-GI-NOFP16-NEXT: fcvt h2, s5
-; CHECK-GI-NOFP16-NEXT: fcvt h3, s6
-; CHECK-GI-NOFP16-NEXT: frintn s7, s7
-; CHECK-GI-NOFP16-NEXT: fcvt h5, s16
-; CHECK-GI-NOFP16-NEXT: fcvt h6, s17
-; CHECK-GI-NOFP16-NEXT: mov h16, v1.h[3]
-; CHECK-GI-NOFP16-NEXT: fcvt s17, h18
-; CHECK-GI-NOFP16-NEXT: fcvt h4, s4
-; CHECK-GI-NOFP16-NEXT: fcvt h7, s7
-; CHECK-GI-NOFP16-NEXT: mov v2.h[1], v5.h[0]
-; CHECK-GI-NOFP16-NEXT: mov h5, v0.h[4]
-; CHECK-GI-NOFP16-NEXT: mov v3.h[1], v6.h[0]
-; CHECK-GI-NOFP16-NEXT: mov h6, v1.h[4]
-; CHECK-GI-NOFP16-NEXT: fcvt s16, h16
-; CHECK-GI-NOFP16-NEXT: frintn s17, s17
-; CHECK-GI-NOFP16-NEXT: mov v2.h[2], v4.h[0]
-; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[5]
-; CHECK-GI-NOFP16-NEXT: fcvt s5, h5
-; CHECK-GI-NOFP16-NEXT: fcvt s6, h6
-; CHECK-GI-NOFP16-NEXT: frintn s16, s16
-; CHECK-GI-NOFP16-NEXT: mov v3.h[2], v7.h[0]
-; CHECK-GI-NOFP16-NEXT: fcvt h7, s17
-; CHECK-GI-NOFP16-NEXT: fcvt s4, h4
-; CHECK-GI-NOFP16-NEXT: frintn s5, s5
-; CHECK-GI-NOFP16-NEXT: frintn s6, s6
-; CHECK-GI-NOFP16-NEXT: fcvt h16, s16
-; CHECK-GI-NOFP16-NEXT: mov v2.h[3], v7.h[0]
-; CHECK-GI-NOFP16-NEXT: mov h7, v1.h[5]
-; CHECK-GI-NOFP16-NEXT: frintn s4, s4
-; CHECK-GI-NOFP16-NEXT: fcvt h5, s5
-; CHECK-GI-NOFP16-NEXT: fcvt h6, s6
-; CHECK-GI-NOFP16-NEXT: mov v3.h[3], v16.h[0]
-; CHECK-GI-NOFP16-NEXT: fcvt h4, s4
-; CHECK-GI-NOFP16-NEXT: mov v2.h[4], v5.h[0]
-; CHECK-GI-NOFP16-NEXT: mov h5, v0.h[6]
-; CHECK-GI-NOFP16-NEXT: mov v3.h[4], v6.h[0]
-; CHECK-GI-NOFP16-NEXT: fcvt s6, h7
-; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[7]
-; CHECK-GI-NOFP16-NEXT: mov v2.h[5], v4.h[0]
-; CHECK-GI-NOFP16-NEXT: mov h4, v1.h[6]
-; CHECK-GI-NOFP16-NEXT: fcvt s5, h5
-; CHECK-GI-NOFP16-NEXT: frintn s6, s6
-; CHECK-GI-NOFP16-NEXT: mov h1, v1.h[7]
-; CHECK-GI-NOFP16-NEXT: fcvt s0, h0
-; CHECK-GI-NOFP16-NEXT: fcvt s4, h4
-; CHECK-GI-NOFP16-NEXT: frintn s5, s5
-; CHECK-GI-NOFP16-NEXT: fcvt h6, s6
-; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
-; CHECK-GI-NOFP16-NEXT: frintn s0, s0
-; CHECK-GI-NOFP16-NEXT: frintn s4, s4
-; CHECK-GI-NOFP16-NEXT: fcvt h5, s5
-; CHECK-GI-NOFP16-NEXT: mov v3.h[5], v6.h[0]
-; CHECK-GI-NOFP16-NEXT: frintn s1, s1
-; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
-; CHECK-GI-NOFP16-NEXT: fcvt h4, s4
-; CHECK-GI-NOFP16-NEXT: mov v2.h[6], v5.h[0]
-; CHECK-GI-NOFP16-NEXT: fcvt h1, s1
-; CHECK-GI-NOFP16-NEXT: mov v3.h[6], v4.h[0]
-; CHECK-GI-NOFP16-NEXT: mov v2.h[7], v0.h[0]
-; CHECK-GI-NOFP16-NEXT: mov v3.h[7], v1.h[0]
-; CHECK-GI-NOFP16-NEXT: mov v0.16b, v2.16b
-; CHECK-GI-NOFP16-NEXT: mov v1.16b, v3.16b
+; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v0.4h
+; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v1.4h
+; CHECK-GI-NOFP16-NEXT: fcvtl2 v0.4s, v0.8h
+; CHECK-GI-NOFP16-NEXT: fcvtl2 v1.4s, v1.8h
+; CHECK-GI-NOFP16-NEXT: frintn v2.4s, v2.4s
+; CHECK-GI-NOFP16-NEXT: frintn v3.4s, v3.4s
+; CHECK-GI-NOFP16-NEXT: frintn v4.4s, v0.4s
+; CHECK-GI-NOFP16-NEXT: frintn v5.4s, v1.4s
+; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v2.4s
+; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v3.4s
+; CHECK-GI-NOFP16-NEXT: fcvtn2 v0.8h, v4.4s
+; CHECK-GI-NOFP16-NEXT: fcvtn2 v1.8h, v5.4s
; CHECK-GI-NOFP16-NEXT: ret
;
; CHECK-GI-FP16-LABEL: roundeven_v16f16:
@@ -2407,19 +2092,30 @@ entry:
}
define <3 x double> @rint_v3f64(<3 x double> %a) {
-; CHECK-LABEL: rint_v3f64:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
-; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
-; CHECK-NEXT: mov v0.d[1], v1.d[0]
-; CHECK-NEXT: frintx v2.2d, v2.2d
-; CHECK-NEXT: // kill: def $d2 killed $d2 killed $q2
-; CHECK-NEXT: frintx v0.2d, v0.2d
-; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
-; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q1
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: rint_v3f64:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-SD-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-SD-NEXT: frintx v2.2d, v2.2d
+; CHECK-SD-NEXT: // kill: def $d2 killed $d2 killed $q2
+; CHECK-SD-NEXT: frintx v0.2d, v0.2d
+; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT: // kill: def $d1 killed $d1 killed $q1
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: rint_v3f64:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NEXT: frintx d2, d2
+; CHECK-GI-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-GI-NEXT: frintx v0.2d, v0.2d
+; CHECK-GI-NEXT: mov d1, v0.d[1]
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT: ret
entry:
%c = call <3 x double> @llvm.rint.v3f64(<3 x double> %a)
ret <3 x double> %c
@@ -2447,10 +2143,25 @@ entry:
}
define <3 x float> @rint_v3f32(<3 x float> %a) {
-; CHECK-LABEL: rint_v3f32:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: frintx v0.4s, v0.4s
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: rint_v3f32:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: frintx v0.4s, v0.4s
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: rint_v3f32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov s1, v0.s[1]
+; CHECK-GI-NEXT: mov s2, v0.s[2]
+; CHECK-GI-NEXT: mov v0.s[1], v1.s[0]
+; CHECK-GI-NEXT: mov v0.s[2], v2.s[0]
+; CHECK-GI-NEXT: mov v0.s[3], v0.s[0]
+; CHECK-GI-NEXT: frintx v0.4s, v0.4s
+; CHECK-GI-NEXT: mov s1, v0.s[1]
+; CHECK-GI-NEXT: mov s2, v0.s[2]
+; CHECK-GI-NEXT: mov v0.s[1], v1.s[0]
+; CHECK-GI-NEXT: mov v0.s[2], v2.s[0]
+; CHECK-GI-NEXT: mov v0.s[3], v0.s[0]
+; CHECK-GI-NEXT: ret
entry:
%c = call <3 x float> @llvm.rint.v3f32(<3 x float> %a)
ret <3 x float> %c
@@ -2528,46 +2239,71 @@ define <7 x half> @rint_v7f16(<7 x half> %a) {
;
; CHECK-GI-NOFP16-LABEL: rint_v7f16:
; CHECK-GI-NOFP16: // %bb.0: // %entry
-; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1]
-; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[2]
-; CHECK-GI-NOFP16-NEXT: fcvt s3, h0
-; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[3]
-; CHECK-GI-NOFP16-NEXT: mov h6, v0.h[4]
-; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
-; CHECK-GI-NOFP16-NEXT: fcvt s2, h2
-; CHECK-GI-NOFP16-NEXT: frintx s3, s3
-; CHECK-GI-NOFP16-NEXT: fcvt s4, h4
-; CHECK-GI-NOFP16-NEXT: fcvt s6, h6
-; CHECK-GI-NOFP16-NEXT: frintx s5, s1
-; CHECK-GI-NOFP16-NEXT: frintx s2, s2
-; CHECK-GI-NOFP16-NEXT: fcvt h1, s3
-; CHECK-GI-NOFP16-NEXT: frintx s4, s4
-; CHECK-GI-NOFP16-NEXT: fcvt h3, s5
-; CHECK-GI-NOFP16-NEXT: mov h5, v0.h[5]
-; CHECK-GI-NOFP16-NEXT: fcvt h2, s2
-; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[6]
-; CHECK-GI-NOFP16-NEXT: fcvt h4, s4
-; CHECK-GI-NOFP16-NEXT: mov v1.h[1], v3.h[0]
-; CHECK-GI-NOFP16-NEXT: frintx s3, s6
-; CHECK-GI-NOFP16-NEXT: fcvt s5, h5
-; CHECK-GI-NOFP16-NEXT: fcvt s0, h0
-; CHECK-GI-NOFP16-NEXT: mov v1.h[2], v2.h[0]
-; CHECK-GI-NOFP16-NEXT: fcvt h2, s3
-; CHECK-GI-NOFP16-NEXT: frintx s3, s5
-; CHECK-GI-NOFP16-NEXT: frintx s0, s0
-; CHECK-GI-NOFP16-NEXT: mov v1.h[3], v4.h[0]
-; CHECK-GI-NOFP16-NEXT: fcvt h3, s3
-; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
-; CHECK-GI-NOFP16-NEXT: mov v1.h[4], v2.h[0]
-; CHECK-GI-NOFP16-NEXT: mov v1.h[5], v3.h[0]
-; CHECK-GI-NOFP16-NEXT: mov v1.h[6], v0.h[0]
-; CHECK-GI-NOFP16-NEXT: mov v1.h[7], v0.h[0]
-; CHECK-GI-NOFP16-NEXT: mov v0.16b, v1.16b
+; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[4]
+; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[5]
+; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[6]
+; CHECK-GI-NOFP16-NEXT: mov v1.h[1], v2.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v1.h[2], v3.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v1.h[3], v0.h[0]
+; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h
+; CHECK-GI-NOFP16-NEXT: mov s2, v1.s[1]
+; CHECK-GI-NOFP16-NEXT: mov s3, v1.s[2]
+; CHECK-GI-NOFP16-NEXT: mov v1.s[1], v2.s[0]
+; CHECK-GI-NOFP16-NEXT: mov v1.s[2], v3.s[0]
+; CHECK-GI-NOFP16-NEXT: mov v1.s[3], v0.s[0]
+; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-GI-NOFP16-NEXT: frintx v1.4s, v1.4s
+; CHECK-GI-NOFP16-NEXT: frintx v0.4s, v0.4s
+; CHECK-GI-NOFP16-NEXT: mov s2, v1.s[1]
+; CHECK-GI-NOFP16-NEXT: mov s3, v1.s[2]
+; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v0.4s
+; CHECK-GI-NOFP16-NEXT: mov v1.s[1], v2.s[0]
+; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[1]
+; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[2]
+; CHECK-GI-NOFP16-NEXT: mov h5, v0.h[3]
+; CHECK-GI-NOFP16-NEXT: mov v1.s[2], v3.s[0]
+; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v2.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v1.s[3], v0.s[0]
+; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v4.h[0]
+; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v1.4s
+; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v5.h[0]
+; CHECK-GI-NOFP16-NEXT: mov h2, v1.h[1]
+; CHECK-GI-NOFP16-NEXT: mov v0.h[4], v1.h[0]
+; CHECK-GI-NOFP16-NEXT: mov h1, v1.h[2]
+; CHECK-GI-NOFP16-NEXT: mov v0.h[5], v2.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v0.h[6], v1.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v0.h[7], v0.h[0]
; CHECK-GI-NOFP16-NEXT: ret
;
; CHECK-GI-FP16-LABEL: rint_v7f16:
; CHECK-GI-FP16: // %bb.0: // %entry
+; CHECK-GI-FP16-NEXT: mov h1, v0.h[1]
+; CHECK-GI-FP16-NEXT: mov h2, v0.h[2]
+; CHECK-GI-FP16-NEXT: mov h3, v0.h[3]
+; CHECK-GI-FP16-NEXT: mov h4, v0.h[4]
+; CHECK-GI-FP16-NEXT: mov h5, v0.h[5]
+; CHECK-GI-FP16-NEXT: mov h6, v0.h[6]
+; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0]
+; CHECK-GI-FP16-NEXT: mov v0.h[2], v2.h[0]
+; CHECK-GI-FP16-NEXT: mov v0.h[3], v3.h[0]
+; CHECK-GI-FP16-NEXT: mov v0.h[4], v4.h[0]
+; CHECK-GI-FP16-NEXT: mov v0.h[5], v5.h[0]
+; CHECK-GI-FP16-NEXT: mov v0.h[6], v6.h[0]
+; CHECK-GI-FP16-NEXT: mov v0.h[7], v0.h[0]
; CHECK-GI-FP16-NEXT: frintx v0.8h, v0.8h
+; CHECK-GI-FP16-NEXT: mov h1, v0.h[1]
+; CHECK-GI-FP16-NEXT: mov h2, v0.h[2]
+; CHECK-GI-FP16-NEXT: mov h3, v0.h[3]
+; CHECK-GI-FP16-NEXT: mov h4, v0.h[4]
+; CHECK-GI-FP16-NEXT: mov h5, v0.h[5]
+; CHECK-GI-FP16-NEXT: mov h6, v0.h[6]
+; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0]
+; CHECK-GI-FP16-NEXT: mov v0.h[2], v2.h[0]
+; CHECK-GI-FP16-NEXT: mov v0.h[3], v3.h[0]
+; CHECK-GI-FP16-NEXT: mov v0.h[4], v4.h[0]
+; CHECK-GI-FP16-NEXT: mov v0.h[5], v5.h[0]
+; CHECK-GI-FP16-NEXT: mov v0.h[6], v6.h[0]
+; CHECK-GI-FP16-NEXT: mov v0.h[7], v0.h[0]
; CHECK-GI-FP16-NEXT: ret
entry:
%c = call <7 x half> @llvm.rint.v7f16(<7 x half> %a)
@@ -2606,26 +2342,9 @@ define <4 x half> @rint_v4f16(<4 x half> %a) {
;
; CHECK-GI-NOFP16-LABEL: rint_v4f16:
; CHECK-GI-NOFP16: // %bb.0: // %entry
-; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1]
-; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[2]
-; CHECK-GI-NOFP16-NEXT: fcvt s3, h0
-; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[3]
-; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
-; CHECK-GI-NOFP16-NEXT: fcvt s2, h2
-; CHECK-GI-NOFP16-NEXT: frintx s3, s3
-; CHECK-GI-NOFP16-NEXT: fcvt s4, h0
-; CHECK-GI-NOFP16-NEXT: frintx s1, s1
-; CHECK-GI-NOFP16-NEXT: frintx s2, s2
-; CHECK-GI-NOFP16-NEXT: fcvt h0, s3
-; CHECK-GI-NOFP16-NEXT: frintx s3, s4
-; CHECK-GI-NOFP16-NEXT: fcvt h1, s1
-; CHECK-GI-NOFP16-NEXT: fcvt h2, s2
-; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v1.h[0]
-; CHECK-GI-NOFP16-NEXT: fcvt h1, s3
-; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v2.h[0]
-; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v1.h[0]
-; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-GI-NOFP16-NEXT: frintx v0.4s, v0.4s
+; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v0.4s
; CHECK-GI-NOFP16-NEXT: ret
;
; CHECK-GI-FP16-LABEL: rint_v4f16:
@@ -2688,45 +2407,12 @@ define <8 x half> @rint_v8f16(<8 x half> %a) {
;
; CHECK-GI-NOFP16-LABEL: rint_v8f16:
; CHECK-GI-NOFP16: // %bb.0: // %entry
-; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1]
-; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[2]
-; CHECK-GI-NOFP16-NEXT: fcvt s3, h0
-; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[3]
-; CHECK-GI-NOFP16-NEXT: mov h6, v0.h[4]
-; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
-; CHECK-GI-NOFP16-NEXT: fcvt s2, h2
-; CHECK-GI-NOFP16-NEXT: frintx s3, s3
-; CHECK-GI-NOFP16-NEXT: fcvt s4, h4
-; CHECK-GI-NOFP16-NEXT: fcvt s6, h6
-; CHECK-GI-NOFP16-NEXT: frintx s5, s1
-; CHECK-GI-NOFP16-NEXT: frintx s2, s2
-; CHECK-GI-NOFP16-NEXT: fcvt h1, s3
-; CHECK-GI-NOFP16-NEXT: frintx s4, s4
-; CHECK-GI-NOFP16-NEXT: frintx s6, s6
-; CHECK-GI-NOFP16-NEXT: fcvt h3, s5
-; CHECK-GI-NOFP16-NEXT: mov h5, v0.h[5]
-; CHECK-GI-NOFP16-NEXT: fcvt h2, s2
-; CHECK-GI-NOFP16-NEXT: fcvt h4, s4
-; CHECK-GI-NOFP16-NEXT: mov v1.h[1], v3.h[0]
-; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[6]
-; CHECK-GI-NOFP16-NEXT: fcvt s5, h5
-; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[7]
-; CHECK-GI-NOFP16-NEXT: mov v1.h[2], v2.h[0]
-; CHECK-GI-NOFP16-NEXT: fcvt s3, h3
-; CHECK-GI-NOFP16-NEXT: frintx s2, s5
-; CHECK-GI-NOFP16-NEXT: fcvt h5, s6
-; CHECK-GI-NOFP16-NEXT: fcvt s0, h0
-; CHECK-GI-NOFP16-NEXT: mov v1.h[3], v4.h[0]
-; CHECK-GI-NOFP16-NEXT: frintx s3, s3
-; CHECK-GI-NOFP16-NEXT: fcvt h2, s2
-; CHECK-GI-NOFP16-NEXT: frintx s0, s0
-; CHECK-GI-NOFP16-NEXT: mov v1.h[4], v5.h[0]
-; CHECK-GI-NOFP16-NEXT: fcvt h3, s3
-; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
-; CHECK-GI-NOFP16-NEXT: mov v1.h[5], v2.h[0]
-; CHECK-GI-NOFP16-NEXT: mov v1.h[6], v3.h[0]
-; CHECK-GI-NOFP16-NEXT: mov v1.h[7], v0.h[0]
-; CHECK-GI-NOFP16-NEXT: mov v0.16b, v1.16b
+; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v0.4h
+; CHECK-GI-NOFP16-NEXT: fcvtl2 v0.4s, v0.8h
+; CHECK-GI-NOFP16-NEXT: frintx v1.4s, v1.4s
+; CHECK-GI-NOFP16-NEXT: frintx v2.4s, v0.4s
+; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v1.4s
+; CHECK-GI-NOFP16-NEXT: fcvtn2 v0.8h, v2.4s
; CHECK-GI-NOFP16-NEXT: ret
;
; CHECK-GI-FP16-LABEL: rint_v8f16:
@@ -2829,84 +2515,18 @@ define <16 x half> @rint_v16f16(<16 x half> %a) {
;
; CHECK-GI-NOFP16-LABEL: rint_v16f16:
; CHECK-GI-NOFP16: // %bb.0: // %entry
-; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[1]
-; CHECK-GI-NOFP16-NEXT: mov h3, v1.h[1]
-; CHECK-GI-NOFP16-NEXT: fcvt s4, h0
-; CHECK-GI-NOFP16-NEXT: fcvt s5, h1
-; CHECK-GI-NOFP16-NEXT: mov h6, v0.h[2]
-; CHECK-GI-NOFP16-NEXT: mov h7, v1.h[2]
-; CHECK-GI-NOFP16-NEXT: mov h18, v0.h[3]
-; CHECK-GI-NOFP16-NEXT: mov h19, v1.h[3]
-; CHECK-GI-NOFP16-NEXT: fcvt s2, h2
-; CHECK-GI-NOFP16-NEXT: fcvt s3, h3
-; CHECK-GI-NOFP16-NEXT: frintx s4, s4
-; CHECK-GI-NOFP16-NEXT: frintx s5, s5
-; CHECK-GI-NOFP16-NEXT: fcvt s6, h6
-; CHECK-GI-NOFP16-NEXT: fcvt s7, h7
-; CHECK-GI-NOFP16-NEXT: frintx s16, s2
-; CHECK-GI-NOFP16-NEXT: frintx s17, s3
-; CHECK-GI-NOFP16-NEXT: fcvt h2, s4
-; CHECK-GI-NOFP16-NEXT: fcvt h3, s5
-; CHECK-GI-NOFP16-NEXT: frintx s6, s6
-; CHECK-GI-NOFP16-NEXT: frintx s7, s7
-; CHECK-GI-NOFP16-NEXT: fcvt h4, s16
-; CHECK-GI-NOFP16-NEXT: fcvt h5, s17
-; CHECK-GI-NOFP16-NEXT: fcvt s16, h18
-; CHECK-GI-NOFP16-NEXT: fcvt s17, h19
-; CHECK-GI-NOFP16-NEXT: mov h18, v1.h[4]
-; CHECK-GI-NOFP16-NEXT: mov v2.h[1], v4.h[0]
-; CHECK-GI-NOFP16-NEXT: fcvt h4, s6
-; CHECK-GI-NOFP16-NEXT: mov v3.h[1], v5.h[0]
-; CHECK-GI-NOFP16-NEXT: mov h5, v0.h[4]
-; CHECK-GI-NOFP16-NEXT: fcvt h6, s7
-; CHECK-GI-NOFP16-NEXT: frintx s16, s16
-; CHECK-GI-NOFP16-NEXT: frintx s17, s17
-; CHECK-GI-NOFP16-NEXT: mov h7, v0.h[5]
-; CHECK-GI-NOFP16-NEXT: mov v2.h[2], v4.h[0]
-; CHECK-GI-NOFP16-NEXT: fcvt s18, h18
-; CHECK-GI-NOFP16-NEXT: fcvt s5, h5
-; CHECK-GI-NOFP16-NEXT: mov v3.h[2], v6.h[0]
-; CHECK-GI-NOFP16-NEXT: fcvt h4, s16
-; CHECK-GI-NOFP16-NEXT: fcvt h16, s17
-; CHECK-GI-NOFP16-NEXT: mov h17, v1.h[5]
-; CHECK-GI-NOFP16-NEXT: mov h6, v0.h[6]
-; CHECK-GI-NOFP16-NEXT: fcvt s7, h7
-; CHECK-GI-NOFP16-NEXT: frintx s5, s5
-; CHECK-GI-NOFP16-NEXT: mov v2.h[3], v4.h[0]
-; CHECK-GI-NOFP16-NEXT: mov h4, v1.h[6]
-; CHECK-GI-NOFP16-NEXT: mov v3.h[3], v16.h[0]
-; CHECK-GI-NOFP16-NEXT: frintx s16, s18
-; CHECK-GI-NOFP16-NEXT: fcvt s17, h17
-; CHECK-GI-NOFP16-NEXT: frintx s7, s7
-; CHECK-GI-NOFP16-NEXT: fcvt s6, h6
-; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[7]
-; CHECK-GI-NOFP16-NEXT: mov h1, v1.h[7]
-; CHECK-GI-NOFP16-NEXT: fcvt s4, h4
-; CHECK-GI-NOFP16-NEXT: fcvt h5, s5
-; CHECK-GI-NOFP16-NEXT: fcvt h16, s16
-; CHECK-GI-NOFP16-NEXT: frintx s17, s17
-; CHECK-GI-NOFP16-NEXT: fcvt h7, s7
-; CHECK-GI-NOFP16-NEXT: frintx s6, s6
-; CHECK-GI-NOFP16-NEXT: fcvt s0, h0
-; CHECK-GI-NOFP16-NEXT: frintx s4, s4
-; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
-; CHECK-GI-NOFP16-NEXT: mov v2.h[4], v5.h[0]
-; CHECK-GI-NOFP16-NEXT: mov v3.h[4], v16.h[0]
-; CHECK-GI-NOFP16-NEXT: fcvt h5, s17
-; CHECK-GI-NOFP16-NEXT: fcvt h6, s6
-; CHECK-GI-NOFP16-NEXT: frintx s0, s0
-; CHECK-GI-NOFP16-NEXT: fcvt h4, s4
-; CHECK-GI-NOFP16-NEXT: frintx s1, s1
-; CHECK-GI-NOFP16-NEXT: mov v2.h[5], v7.h[0]
-; CHECK-GI-NOFP16-NEXT: mov v3.h[5], v5.h[0]
-; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
-; CHECK-GI-NOFP16-NEXT: fcvt h1, s1
-; CHECK-GI-NOFP16-NEXT: mov v2.h[6], v6.h[0]
-; CHECK-GI-NOFP16-NEXT: mov v3.h[6], v4.h[0]
-; CHECK-GI-NOFP16-NEXT: mov v2.h[7], v0.h[0]
-; CHECK-GI-NOFP16-NEXT: mov v3.h[7], v1.h[0]
-; CHECK-GI-NOFP16-NEXT: mov v0.16b, v2.16b
-; CHECK-GI-NOFP16-NEXT: mov v1.16b, v3.16b
+; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v0.4h
+; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v1.4h
+; CHECK-GI-NOFP16-NEXT: fcvtl2 v0.4s, v0.8h
+; CHECK-GI-NOFP16-NEXT: fcvtl2 v1.4s, v1.8h
+; CHECK-GI-NOFP16-NEXT: frintx v2.4s, v2.4s
+; CHECK-GI-NOFP16-NEXT: frintx v3.4s, v3.4s
+; CHECK-GI-NOFP16-NEXT: frintx v4.4s, v0.4s
+; CHECK-GI-NOFP16-NEXT: frintx v5.4s, v1.4s
+; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v2.4s
+; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v3.4s
+; CHECK-GI-NOFP16-NEXT: fcvtn2 v0.8h, v4.4s
+; CHECK-GI-NOFP16-NEXT: fcvtn2 v1.8h, v5.4s
; CHECK-GI-NOFP16-NEXT: ret
;
; CHECK-GI-FP16-LABEL: rint_v16f16:
@@ -2979,19 +2599,30 @@ entry:
}
define <3 x double> @round_v3f64(<3 x double> %a) {
-; CHECK-LABEL: round_v3f64:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
-; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
-; CHECK-NEXT: mov v0.d[1], v1.d[0]
-; CHECK-NEXT: frinta v2.2d, v2.2d
-; CHECK-NEXT: // kill: def $d2 killed $d2 killed $q2
-; CHECK-NEXT: frinta v0.2d, v0.2d
-; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
-; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q1
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: round_v3f64:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-SD-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-SD-NEXT: frinta v2.2d, v2.2d
+; CHECK-SD-NEXT: // kill: def $d2 killed $d2 killed $q2
+; CHECK-SD-NEXT: frinta v0.2d, v0.2d
+; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT: // kill: def $d1 killed $d1 killed $q1
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: round_v3f64:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NEXT: frinta d2, d2
+; CHECK-GI-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-GI-NEXT: frinta v0.2d, v0.2d
+; CHECK-GI-NEXT: mov d1, v0.d[1]
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT: ret
entry:
%c = call <3 x double> @llvm.round.v3f64(<3 x double> %a)
ret <3 x double> %c
@@ -3019,10 +2650,25 @@ entry:
}
define <3 x float> @round_v3f32(<3 x float> %a) {
-; CHECK-LABEL: round_v3f32:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: frinta v0.4s, v0.4s
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: round_v3f32:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: frinta v0.4s, v0.4s
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: round_v3f32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov s1, v0.s[1]
+; CHECK-GI-NEXT: mov s2, v0.s[2]
+; CHECK-GI-NEXT: mov v0.s[1], v1.s[0]
+; CHECK-GI-NEXT: mov v0.s[2], v2.s[0]
+; CHECK-GI-NEXT: mov v0.s[3], v0.s[0]
+; CHECK-GI-NEXT: frinta v0.4s, v0.4s
+; CHECK-GI-NEXT: mov s1, v0.s[1]
+; CHECK-GI-NEXT: mov s2, v0.s[2]
+; CHECK-GI-NEXT: mov v0.s[1], v1.s[0]
+; CHECK-GI-NEXT: mov v0.s[2], v2.s[0]
+; CHECK-GI-NEXT: mov v0.s[3], v0.s[0]
+; CHECK-GI-NEXT: ret
entry:
%c = call <3 x float> @llvm.round.v3f32(<3 x float> %a)
ret <3 x float> %c
@@ -3100,46 +2746,71 @@ define <7 x half> @round_v7f16(<7 x half> %a) {
;
; CHECK-GI-NOFP16-LABEL: round_v7f16:
; CHECK-GI-NOFP16: // %bb.0: // %entry
-; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1]
-; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[2]
-; CHECK-GI-NOFP16-NEXT: fcvt s3, h0
-; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[3]
-; CHECK-GI-NOFP16-NEXT: mov h6, v0.h[4]
-; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
-; CHECK-GI-NOFP16-NEXT: fcvt s2, h2
-; CHECK-GI-NOFP16-NEXT: frinta s3, s3
-; CHECK-GI-NOFP16-NEXT: fcvt s4, h4
-; CHECK-GI-NOFP16-NEXT: fcvt s6, h6
-; CHECK-GI-NOFP16-NEXT: frinta s5, s1
-; CHECK-GI-NOFP16-NEXT: frinta s2, s2
-; CHECK-GI-NOFP16-NEXT: fcvt h1, s3
-; CHECK-GI-NOFP16-NEXT: frinta s4, s4
-; CHECK-GI-NOFP16-NEXT: fcvt h3, s5
-; CHECK-GI-NOFP16-NEXT: mov h5, v0.h[5]
-; CHECK-GI-NOFP16-NEXT: fcvt h2, s2
-; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[6]
-; CHECK-GI-NOFP16-NEXT: fcvt h4, s4
-; CHECK-GI-NOFP16-NEXT: mov v1.h[1], v3.h[0]
-; CHECK-GI-NOFP16-NEXT: frinta s3, s6
-; CHECK-GI-NOFP16-NEXT: fcvt s5, h5
-; CHECK-GI-NOFP16-NEXT: fcvt s0, h0
-; CHECK-GI-NOFP16-NEXT: mov v1.h[2], v2.h[0]
-; CHECK-GI-NOFP16-NEXT: fcvt h2, s3
-; CHECK-GI-NOFP16-NEXT: frinta s3, s5
-; CHECK-GI-NOFP16-NEXT: frinta s0, s0
-; CHECK-GI-NOFP16-NEXT: mov v1.h[3], v4.h[0]
-; CHECK-GI-NOFP16-NEXT: fcvt h3, s3
-; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
-; CHECK-GI-NOFP16-NEXT: mov v1.h[4], v2.h[0]
-; CHECK-GI-NOFP16-NEXT: mov v1.h[5], v3.h[0]
-; CHECK-GI-NOFP16-NEXT: mov v1.h[6], v0.h[0]
-; CHECK-GI-NOFP16-NEXT: mov v1.h[7], v0.h[0]
-; CHECK-GI-NOFP16-NEXT: mov v0.16b, v1.16b
+; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[4]
+; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[5]
+; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[6]
+; CHECK-GI-NOFP16-NEXT: mov v1.h[1], v2.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v1.h[2], v3.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v1.h[3], v0.h[0]
+; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h
+; CHECK-GI-NOFP16-NEXT: mov s2, v1.s[1]
+; CHECK-GI-NOFP16-NEXT: mov s3, v1.s[2]
+; CHECK-GI-NOFP16-NEXT: mov v1.s[1], v2.s[0]
+; CHECK-GI-NOFP16-NEXT: mov v1.s[2], v3.s[0]
+; CHECK-GI-NOFP16-NEXT: mov v1.s[3], v0.s[0]
+; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-GI-NOFP16-NEXT: frinta v1.4s, v1.4s
+; CHECK-GI-NOFP16-NEXT: frinta v0.4s, v0.4s
+; CHECK-GI-NOFP16-NEXT: mov s2, v1.s[1]
+; CHECK-GI-NOFP16-NEXT: mov s3, v1.s[2]
+; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v0.4s
+; CHECK-GI-NOFP16-NEXT: mov v1.s[1], v2.s[0]
+; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[1]
+; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[2]
+; CHECK-GI-NOFP16-NEXT: mov h5, v0.h[3]
+; CHECK-GI-NOFP16-NEXT: mov v1.s[2], v3.s[0]
+; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v2.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v1.s[3], v0.s[0]
+; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v4.h[0]
+; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v1.4s
+; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v5.h[0]
+; CHECK-GI-NOFP16-NEXT: mov h2, v1.h[1]
+; CHECK-GI-NOFP16-NEXT: mov v0.h[4], v1.h[0]
+; CHECK-GI-NOFP16-NEXT: mov h1, v1.h[2]
+; CHECK-GI-NOFP16-NEXT: mov v0.h[5], v2.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v0.h[6], v1.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v0.h[7], v0.h[0]
; CHECK-GI-NOFP16-NEXT: ret
;
; CHECK-GI-FP16-LABEL: round_v7f16:
; CHECK-GI-FP16: // %bb.0: // %entry
+; CHECK-GI-FP16-NEXT: mov h1, v0.h[1]
+; CHECK-GI-FP16-NEXT: mov h2, v0.h[2]
+; CHECK-GI-FP16-NEXT: mov h3, v0.h[3]
+; CHECK-GI-FP16-NEXT: mov h4, v0.h[4]
+; CHECK-GI-FP16-NEXT: mov h5, v0.h[5]
+; CHECK-GI-FP16-NEXT: mov h6, v0.h[6]
+; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0]
+; CHECK-GI-FP16-NEXT: mov v0.h[2], v2.h[0]
+; CHECK-GI-FP16-NEXT: mov v0.h[3], v3.h[0]
+; CHECK-GI-FP16-NEXT: mov v0.h[4], v4.h[0]
+; CHECK-GI-FP16-NEXT: mov v0.h[5], v5.h[0]
+; CHECK-GI-FP16-NEXT: mov v0.h[6], v6.h[0]
+; CHECK-GI-FP16-NEXT: mov v0.h[7], v0.h[0]
; CHECK-GI-FP16-NEXT: frinta v0.8h, v0.8h
+; CHECK-GI-FP16-NEXT: mov h1, v0.h[1]
+; CHECK-GI-FP16-NEXT: mov h2, v0.h[2]
+; CHECK-GI-FP16-NEXT: mov h3, v0.h[3]
+; CHECK-GI-FP16-NEXT: mov h4, v0.h[4]
+; CHECK-GI-FP16-NEXT: mov h5, v0.h[5]
+; CHECK-GI-FP16-NEXT: mov h6, v0.h[6]
+; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0]
+; CHECK-GI-FP16-NEXT: mov v0.h[2], v2.h[0]
+; CHECK-GI-FP16-NEXT: mov v0.h[3], v3.h[0]
+; CHECK-GI-FP16-NEXT: mov v0.h[4], v4.h[0]
+; CHECK-GI-FP16-NEXT: mov v0.h[5], v5.h[0]
+; CHECK-GI-FP16-NEXT: mov v0.h[6], v6.h[0]
+; CHECK-GI-FP16-NEXT: mov v0.h[7], v0.h[0]
; CHECK-GI-FP16-NEXT: ret
entry:
%c = call <7 x half> @llvm.round.v7f16(<7 x half> %a)
@@ -3178,26 +2849,9 @@ define <4 x half> @round_v4f16(<4 x half> %a) {
;
; CHECK-GI-NOFP16-LABEL: round_v4f16:
; CHECK-GI-NOFP16: // %bb.0: // %entry
-; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1]
-; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[2]
-; CHECK-GI-NOFP16-NEXT: fcvt s3, h0
-; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[3]
-; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
-; CHECK-GI-NOFP16-NEXT: fcvt s2, h2
-; CHECK-GI-NOFP16-NEXT: frinta s3, s3
-; CHECK-GI-NOFP16-NEXT: fcvt s4, h0
-; CHECK-GI-NOFP16-NEXT: frinta s1, s1
-; CHECK-GI-NOFP16-NEXT: frinta s2, s2
-; CHECK-GI-NOFP16-NEXT: fcvt h0, s3
-; CHECK-GI-NOFP16-NEXT: frinta s3, s4
-; CHECK-GI-NOFP16-NEXT: fcvt h1, s1
-; CHECK-GI-NOFP16-NEXT: fcvt h2, s2
-; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v1.h[0]
-; CHECK-GI-NOFP16-NEXT: fcvt h1, s3
-; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v2.h[0]
-; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v1.h[0]
-; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-GI-NOFP16-NEXT: frinta v0.4s, v0.4s
+; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v0.4s
; CHECK-GI-NOFP16-NEXT: ret
;
; CHECK-GI-FP16-LABEL: round_v4f16:
@@ -3260,45 +2914,12 @@ define <8 x half> @round_v8f16(<8 x half> %a) {
;
; CHECK-GI-NOFP16-LABEL: round_v8f16:
; CHECK-GI-NOFP16: // %bb.0: // %entry
-; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1]
-; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[2]
-; CHECK-GI-NOFP16-NEXT: fcvt s3, h0
-; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[3]
-; CHECK-GI-NOFP16-NEXT: mov h6, v0.h[4]
-; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
-; CHECK-GI-NOFP16-NEXT: fcvt s2, h2
-; CHECK-GI-NOFP16-NEXT: frinta s3, s3
-; CHECK-GI-NOFP16-NEXT: fcvt s4, h4
-; CHECK-GI-NOFP16-NEXT: fcvt s6, h6
-; CHECK-GI-NOFP16-NEXT: frinta s5, s1
-; CHECK-GI-NOFP16-NEXT: frinta s2, s2
-; CHECK-GI-NOFP16-NEXT: fcvt h1, s3
-; CHECK-GI-NOFP16-NEXT: frinta s4, s4
-; CHECK-GI-NOFP16-NEXT: frinta s6, s6
-; CHECK-GI-NOFP16-NEXT: fcvt h3, s5
-; CHECK-GI-NOFP16-NEXT: mov h5, v0.h[5]
-; CHECK-GI-NOFP16-NEXT: fcvt h2, s2
-; CHECK-GI-NOFP16-NEXT: fcvt h4, s4
-; CHECK-GI-NOFP16-NEXT: mov v1.h[1], v3.h[0]
-; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[6]
-; CHECK-GI-NOFP16-NEXT: fcvt s5, h5
-; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[7]
-; CHECK-GI-NOFP16-NEXT: mov v1.h[2], v2.h[0]
-; CHECK-GI-NOFP16-NEXT: fcvt s3, h3
-; CHECK-GI-NOFP16-NEXT: frinta s2, s5
-; CHECK-GI-NOFP16-NEXT: fcvt h5, s6
-; CHECK-GI-NOFP16-NEXT: fcvt s0, h0
-; CHECK-GI-NOFP16-NEXT: mov v1.h[3], v4.h[0]
-; CHECK-GI-NOFP16-NEXT: frinta s3, s3
-; CHECK-GI-NOFP16-NEXT: fcvt h2, s2
-; CHECK-GI-NOFP16-NEXT: frinta s0, s0
-; CHECK-GI-NOFP16-NEXT: mov v1.h[4], v5.h[0]
-; CHECK-GI-NOFP16-NEXT: fcvt h3, s3
-; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
-; CHECK-GI-NOFP16-NEXT: mov v1.h[5], v2.h[0]
-; CHECK-GI-NOFP16-NEXT: mov v1.h[6], v3.h[0]
-; CHECK-GI-NOFP16-NEXT: mov v1.h[7], v0.h[0]
-; CHECK-GI-NOFP16-NEXT: mov v0.16b, v1.16b
+; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v0.4h
+; CHECK-GI-NOFP16-NEXT: fcvtl2 v0.4s, v0.8h
+; CHECK-GI-NOFP16-NEXT: frinta v1.4s, v1.4s
+; CHECK-GI-NOFP16-NEXT: frinta v2.4s, v0.4s
+; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v1.4s
+; CHECK-GI-NOFP16-NEXT: fcvtn2 v0.8h, v2.4s
; CHECK-GI-NOFP16-NEXT: ret
;
; CHECK-GI-FP16-LABEL: round_v8f16:
@@ -3401,84 +3022,18 @@ define <16 x half> @round_v16f16(<16 x half> %a) {
;
; CHECK-GI-NOFP16-LABEL: round_v16f16:
; CHECK-GI-NOFP16: // %bb.0: // %entry
-; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[1]
-; CHECK-GI-NOFP16-NEXT: mov h3, v1.h[1]
-; CHECK-GI-NOFP16-NEXT: fcvt s4, h0
-; CHECK-GI-NOFP16-NEXT: fcvt s5, h1
-; CHECK-GI-NOFP16-NEXT: mov h6, v0.h[2]
-; CHECK-GI-NOFP16-NEXT: mov h7, v1.h[2]
-; CHECK-GI-NOFP16-NEXT: mov h18, v0.h[3]
-; CHECK-GI-NOFP16-NEXT: mov h19, v1.h[3]
-; CHECK-GI-NOFP16-NEXT: fcvt s2, h2
-; CHECK-GI-NOFP16-NEXT: fcvt s3, h3
-; CHECK-GI-NOFP16-NEXT: frinta s4, s4
-; CHECK-GI-NOFP16-NEXT: frinta s5, s5
-; CHECK-GI-NOFP16-NEXT: fcvt s6, h6
-; CHECK-GI-NOFP16-NEXT: fcvt s7, h7
-; CHECK-GI-NOFP16-NEXT: frinta s16, s2
-; CHECK-GI-NOFP16-NEXT: frinta s17, s3
-; CHECK-GI-NOFP16-NEXT: fcvt h2, s4
-; CHECK-GI-NOFP16-NEXT: fcvt h3, s5
-; CHECK-GI-NOFP16-NEXT: frinta s6, s6
-; CHECK-GI-NOFP16-NEXT: frinta s7, s7
-; CHECK-GI-NOFP16-NEXT: fcvt h4, s16
-; CHECK-GI-NOFP16-NEXT: fcvt h5, s17
-; CHECK-GI-NOFP16-NEXT: fcvt s16, h18
-; CHECK-GI-NOFP16-NEXT: fcvt s17, h19
-; CHECK-GI-NOFP16-NEXT: mov h18, v1.h[4]
-; CHECK-GI-NOFP16-NEXT: mov v2.h[1], v4.h[0]
-; CHECK-GI-NOFP16-NEXT: fcvt h4, s6
-; CHECK-GI-NOFP16-NEXT: mov v3.h[1], v5.h[0]
-; CHECK-GI-NOFP16-NEXT: mov h5, v0.h[4]
-; CHECK-GI-NOFP16-NEXT: fcvt h6, s7
-; CHECK-GI-NOFP16-NEXT: frinta s16, s16
-; CHECK-GI-NOFP16-NEXT: frinta s17, s17
-; CHECK-GI-NOFP16-NEXT: mov h7, v0.h[5]
-; CHECK-GI-NOFP16-NEXT: mov v2.h[2], v4.h[0]
-; CHECK-GI-NOFP16-NEXT: fcvt s18, h18
-; CHECK-GI-NOFP16-NEXT: fcvt s5, h5
-; CHECK-GI-NOFP16-NEXT: mov v3.h[2], v6.h[0]
-; CHECK-GI-NOFP16-NEXT: fcvt h4, s16
-; CHECK-GI-NOFP16-NEXT: fcvt h16, s17
-; CHECK-GI-NOFP16-NEXT: mov h17, v1.h[5]
-; CHECK-GI-NOFP16-NEXT: mov h6, v0.h[6]
-; CHECK-GI-NOFP16-NEXT: fcvt s7, h7
-; CHECK-GI-NOFP16-NEXT: frinta s5, s5
-; CHECK-GI-NOFP16-NEXT: mov v2.h[3], v4.h[0]
-; CHECK-GI-NOFP16-NEXT: mov h4, v1.h[6]
-; CHECK-GI-NOFP16-NEXT: mov v3.h[3], v16.h[0]
-; CHECK-GI-NOFP16-NEXT: frinta s16, s18
-; CHECK-GI-NOFP16-NEXT: fcvt s17, h17
-; CHECK-GI-NOFP16-NEXT: frinta s7, s7
-; CHECK-GI-NOFP16-NEXT: fcvt s6, h6
-; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[7]
-; CHECK-GI-NOFP16-NEXT: mov h1, v1.h[7]
-; CHECK-GI-NOFP16-NEXT: fcvt s4, h4
-; CHECK-GI-NOFP16-NEXT: fcvt h5, s5
-; CHECK-GI-NOFP16-NEXT: fcvt h16, s16
-; CHECK-GI-NOFP16-NEXT: frinta s17, s17
-; CHECK-GI-NOFP16-NEXT: fcvt h7, s7
-; CHECK-GI-NOFP16-NEXT: frinta s6, s6
-; CHECK-GI-NOFP16-NEXT: fcvt s0, h0
-; CHECK-GI-NOFP16-NEXT: frinta s4, s4
-; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
-; CHECK-GI-NOFP16-NEXT: mov v2.h[4], v5.h[0]
-; CHECK-GI-NOFP16-NEXT: mov v3.h[4], v16.h[0]
-; CHECK-GI-NOFP16-NEXT: fcvt h5, s17
-; CHECK-GI-NOFP16-NEXT: fcvt h6, s6
-; CHECK-GI-NOFP16-NEXT: frinta s0, s0
-; CHECK-GI-NOFP16-NEXT: fcvt h4, s4
-; CHECK-GI-NOFP16-NEXT: frinta s1, s1
-; CHECK-GI-NOFP16-NEXT: mov v2.h[5], v7.h[0]
-; CHECK-GI-NOFP16-NEXT: mov v3.h[5], v5.h[0]
-; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
-; CHECK-GI-NOFP16-NEXT: fcvt h1, s1
-; CHECK-GI-NOFP16-NEXT: mov v2.h[6], v6.h[0]
-; CHECK-GI-NOFP16-NEXT: mov v3.h[6], v4.h[0]
-; CHECK-GI-NOFP16-NEXT: mov v2.h[7], v0.h[0]
-; CHECK-GI-NOFP16-NEXT: mov v3.h[7], v1.h[0]
-; CHECK-GI-NOFP16-NEXT: mov v0.16b, v2.16b
-; CHECK-GI-NOFP16-NEXT: mov v1.16b, v3.16b
+; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v0.4h
+; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v1.4h
+; CHECK-GI-NOFP16-NEXT: fcvtl2 v0.4s, v0.8h
+; CHECK-GI-NOFP16-NEXT: fcvtl2 v1.4s, v1.8h
+; CHECK-GI-NOFP16-NEXT: frinta v2.4s, v2.4s
+; CHECK-GI-NOFP16-NEXT: frinta v3.4s, v3.4s
+; CHECK-GI-NOFP16-NEXT: frinta v4.4s, v0.4s
+; CHECK-GI-NOFP16-NEXT: frinta v5.4s, v1.4s
+; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v2.4s
+; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v3.4s
+; CHECK-GI-NOFP16-NEXT: fcvtn2 v0.8h, v4.4s
+; CHECK-GI-NOFP16-NEXT: fcvtn2 v1.8h, v5.4s
; CHECK-GI-NOFP16-NEXT: ret
;
; CHECK-GI-FP16-LABEL: round_v16f16:
@@ -3551,19 +3106,30 @@ entry:
}
define <3 x double> @trunc_v3f64(<3 x double> %a) {
-; CHECK-LABEL: trunc_v3f64:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
-; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
-; CHECK-NEXT: mov v0.d[1], v1.d[0]
-; CHECK-NEXT: frintz v2.2d, v2.2d
-; CHECK-NEXT: // kill: def $d2 killed $d2 killed $q2
-; CHECK-NEXT: frintz v0.2d, v0.2d
-; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
-; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q1
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: trunc_v3f64:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-SD-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-SD-NEXT: frintz v2.2d, v2.2d
+; CHECK-SD-NEXT: // kill: def $d2 killed $d2 killed $q2
+; CHECK-SD-NEXT: frintz v0.2d, v0.2d
+; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT: // kill: def $d1 killed $d1 killed $q1
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: trunc_v3f64:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NEXT: frintz d2, d2
+; CHECK-GI-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-GI-NEXT: frintz v0.2d, v0.2d
+; CHECK-GI-NEXT: mov d1, v0.d[1]
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT: ret
entry:
%c = call <3 x double> @llvm.trunc.v3f64(<3 x double> %a)
ret <3 x double> %c
@@ -3591,10 +3157,25 @@ entry:
}
define <3 x float> @trunc_v3f32(<3 x float> %a) {
-; CHECK-LABEL: trunc_v3f32:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: frintz v0.4s, v0.4s
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: trunc_v3f32:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: frintz v0.4s, v0.4s
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: trunc_v3f32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov s1, v0.s[1]
+; CHECK-GI-NEXT: mov s2, v0.s[2]
+; CHECK-GI-NEXT: mov v0.s[1], v1.s[0]
+; CHECK-GI-NEXT: mov v0.s[2], v2.s[0]
+; CHECK-GI-NEXT: mov v0.s[3], v0.s[0]
+; CHECK-GI-NEXT: frintz v0.4s, v0.4s
+; CHECK-GI-NEXT: mov s1, v0.s[1]
+; CHECK-GI-NEXT: mov s2, v0.s[2]
+; CHECK-GI-NEXT: mov v0.s[1], v1.s[0]
+; CHECK-GI-NEXT: mov v0.s[2], v2.s[0]
+; CHECK-GI-NEXT: mov v0.s[3], v0.s[0]
+; CHECK-GI-NEXT: ret
entry:
%c = call <3 x float> @llvm.trunc.v3f32(<3 x float> %a)
ret <3 x float> %c
@@ -3672,46 +3253,71 @@ define <7 x half> @trunc_v7f16(<7 x half> %a) {
;
; CHECK-GI-NOFP16-LABEL: trunc_v7f16:
; CHECK-GI-NOFP16: // %bb.0: // %entry
-; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1]
-; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[2]
-; CHECK-GI-NOFP16-NEXT: fcvt s3, h0
-; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[3]
-; CHECK-GI-NOFP16-NEXT: mov h6, v0.h[4]
-; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
-; CHECK-GI-NOFP16-NEXT: fcvt s2, h2
-; CHECK-GI-NOFP16-NEXT: frintz s3, s3
-; CHECK-GI-NOFP16-NEXT: fcvt s4, h4
-; CHECK-GI-NOFP16-NEXT: fcvt s6, h6
-; CHECK-GI-NOFP16-NEXT: frintz s5, s1
-; CHECK-GI-NOFP16-NEXT: frintz s2, s2
-; CHECK-GI-NOFP16-NEXT: fcvt h1, s3
-; CHECK-GI-NOFP16-NEXT: frintz s4, s4
-; CHECK-GI-NOFP16-NEXT: fcvt h3, s5
-; CHECK-GI-NOFP16-NEXT: mov h5, v0.h[5]
-; CHECK-GI-NOFP16-NEXT: fcvt h2, s2
-; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[6]
-; CHECK-GI-NOFP16-NEXT: fcvt h4, s4
-; CHECK-GI-NOFP16-NEXT: mov v1.h[1], v3.h[0]
-; CHECK-GI-NOFP16-NEXT: frintz s3, s6
-; CHECK-GI-NOFP16-NEXT: fcvt s5, h5
-; CHECK-GI-NOFP16-NEXT: fcvt s0, h0
-; CHECK-GI-NOFP16-NEXT: mov v1.h[2], v2.h[0]
-; CHECK-GI-NOFP16-NEXT: fcvt h2, s3
-; CHECK-GI-NOFP16-NEXT: frintz s3, s5
-; CHECK-GI-NOFP16-NEXT: frintz s0, s0
-; CHECK-GI-NOFP16-NEXT: mov v1.h[3], v4.h[0]
-; CHECK-GI-NOFP16-NEXT: fcvt h3, s3
-; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
-; CHECK-GI-NOFP16-NEXT: mov v1.h[4], v2.h[0]
-; CHECK-GI-NOFP16-NEXT: mov v1.h[5], v3.h[0]
-; CHECK-GI-NOFP16-NEXT: mov v1.h[6], v0.h[0]
-; CHECK-GI-NOFP16-NEXT: mov v1.h[7], v0.h[0]
-; CHECK-GI-NOFP16-NEXT: mov v0.16b, v1.16b
+; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[4]
+; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[5]
+; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[6]
+; CHECK-GI-NOFP16-NEXT: mov v1.h[1], v2.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v1.h[2], v3.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v1.h[3], v0.h[0]
+; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h
+; CHECK-GI-NOFP16-NEXT: mov s2, v1.s[1]
+; CHECK-GI-NOFP16-NEXT: mov s3, v1.s[2]
+; CHECK-GI-NOFP16-NEXT: mov v1.s[1], v2.s[0]
+; CHECK-GI-NOFP16-NEXT: mov v1.s[2], v3.s[0]
+; CHECK-GI-NOFP16-NEXT: mov v1.s[3], v0.s[0]
+; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-GI-NOFP16-NEXT: frintz v1.4s, v1.4s
+; CHECK-GI-NOFP16-NEXT: frintz v0.4s, v0.4s
+; CHECK-GI-NOFP16-NEXT: mov s2, v1.s[1]
+; CHECK-GI-NOFP16-NEXT: mov s3, v1.s[2]
+; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v0.4s
+; CHECK-GI-NOFP16-NEXT: mov v1.s[1], v2.s[0]
+; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[1]
+; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[2]
+; CHECK-GI-NOFP16-NEXT: mov h5, v0.h[3]
+; CHECK-GI-NOFP16-NEXT: mov v1.s[2], v3.s[0]
+; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v2.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v1.s[3], v0.s[0]
+; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v4.h[0]
+; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v1.4s
+; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v5.h[0]
+; CHECK-GI-NOFP16-NEXT: mov h2, v1.h[1]
+; CHECK-GI-NOFP16-NEXT: mov v0.h[4], v1.h[0]
+; CHECK-GI-NOFP16-NEXT: mov h1, v1.h[2]
+; CHECK-GI-NOFP16-NEXT: mov v0.h[5], v2.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v0.h[6], v1.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v0.h[7], v0.h[0]
; CHECK-GI-NOFP16-NEXT: ret
;
; CHECK-GI-FP16-LABEL: trunc_v7f16:
; CHECK-GI-FP16: // %bb.0: // %entry
+; CHECK-GI-FP16-NEXT: mov h1, v0.h[1]
+; CHECK-GI-FP16-NEXT: mov h2, v0.h[2]
+; CHECK-GI-FP16-NEXT: mov h3, v0.h[3]
+; CHECK-GI-FP16-NEXT: mov h4, v0.h[4]
+; CHECK-GI-FP16-NEXT: mov h5, v0.h[5]
+; CHECK-GI-FP16-NEXT: mov h6, v0.h[6]
+; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0]
+; CHECK-GI-FP16-NEXT: mov v0.h[2], v2.h[0]
+; CHECK-GI-FP16-NEXT: mov v0.h[3], v3.h[0]
+; CHECK-GI-FP16-NEXT: mov v0.h[4], v4.h[0]
+; CHECK-GI-FP16-NEXT: mov v0.h[5], v5.h[0]
+; CHECK-GI-FP16-NEXT: mov v0.h[6], v6.h[0]
+; CHECK-GI-FP16-NEXT: mov v0.h[7], v0.h[0]
; CHECK-GI-FP16-NEXT: frintz v0.8h, v0.8h
+; CHECK-GI-FP16-NEXT: mov h1, v0.h[1]
+; CHECK-GI-FP16-NEXT: mov h2, v0.h[2]
+; CHECK-GI-FP16-NEXT: mov h3, v0.h[3]
+; CHECK-GI-FP16-NEXT: mov h4, v0.h[4]
+; CHECK-GI-FP16-NEXT: mov h5, v0.h[5]
+; CHECK-GI-FP16-NEXT: mov h6, v0.h[6]
+; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0]
+; CHECK-GI-FP16-NEXT: mov v0.h[2], v2.h[0]
+; CHECK-GI-FP16-NEXT: mov v0.h[3], v3.h[0]
+; CHECK-GI-FP16-NEXT: mov v0.h[4], v4.h[0]
+; CHECK-GI-FP16-NEXT: mov v0.h[5], v5.h[0]
+; CHECK-GI-FP16-NEXT: mov v0.h[6], v6.h[0]
+; CHECK-GI-FP16-NEXT: mov v0.h[7], v0.h[0]
; CHECK-GI-FP16-NEXT: ret
entry:
%c = call <7 x half> @llvm.trunc.v7f16(<7 x half> %a)
@@ -3750,26 +3356,9 @@ define <4 x half> @trunc_v4f16(<4 x half> %a) {
;
; CHECK-GI-NOFP16-LABEL: trunc_v4f16:
; CHECK-GI-NOFP16: // %bb.0: // %entry
-; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1]
-; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[2]
-; CHECK-GI-NOFP16-NEXT: fcvt s3, h0
-; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[3]
-; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
-; CHECK-GI-NOFP16-NEXT: fcvt s2, h2
-; CHECK-GI-NOFP16-NEXT: frintz s3, s3
-; CHECK-GI-NOFP16-NEXT: fcvt s4, h0
-; CHECK-GI-NOFP16-NEXT: frintz s1, s1
-; CHECK-GI-NOFP16-NEXT: frintz s2, s2
-; CHECK-GI-NOFP16-NEXT: fcvt h0, s3
-; CHECK-GI-NOFP16-NEXT: frintz s3, s4
-; CHECK-GI-NOFP16-NEXT: fcvt h1, s1
-; CHECK-GI-NOFP16-NEXT: fcvt h2, s2
-; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v1.h[0]
-; CHECK-GI-NOFP16-NEXT: fcvt h1, s3
-; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v2.h[0]
-; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v1.h[0]
-; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-GI-NOFP16-NEXT: frintz v0.4s, v0.4s
+; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v0.4s
; CHECK-GI-NOFP16-NEXT: ret
;
; CHECK-GI-FP16-LABEL: trunc_v4f16:
@@ -3832,45 +3421,12 @@ define <8 x half> @trunc_v8f16(<8 x half> %a) {
;
; CHECK-GI-NOFP16-LABEL: trunc_v8f16:
; CHECK-GI-NOFP16: // %bb.0: // %entry
-; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1]
-; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[2]
-; CHECK-GI-NOFP16-NEXT: fcvt s3, h0
-; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[3]
-; CHECK-GI-NOFP16-NEXT: mov h6, v0.h[4]
-; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
-; CHECK-GI-NOFP16-NEXT: fcvt s2, h2
-; CHECK-GI-NOFP16-NEXT: frintz s3, s3
-; CHECK-GI-NOFP16-NEXT: fcvt s4, h4
-; CHECK-GI-NOFP16-NEXT: fcvt s6, h6
-; CHECK-GI-NOFP16-NEXT: frintz s5, s1
-; CHECK-GI-NOFP16-NEXT: frintz s2, s2
-; CHECK-GI-NOFP16-NEXT: fcvt h1, s3
-; CHECK-GI-NOFP16-NEXT: frintz s4, s4
-; CHECK-GI-NOFP16-NEXT: frintz s6, s6
-; CHECK-GI-NOFP16-NEXT: fcvt h3, s5
-; CHECK-GI-NOFP16-NEXT: mov h5, v0.h[5]
-; CHECK-GI-NOFP16-NEXT: fcvt h2, s2
-; CHECK-GI-NOFP16-NEXT: fcvt h4, s4
-; CHECK-GI-NOFP16-NEXT: mov v1.h[1], v3.h[0]
-; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[6]
-; CHECK-GI-NOFP16-NEXT: fcvt s5, h5
-; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[7]
-; CHECK-GI-NOFP16-NEXT: mov v1.h[2], v2.h[0]
-; CHECK-GI-NOFP16-NEXT: fcvt s3, h3
-; CHECK-GI-NOFP16-NEXT: frintz s2, s5
-; CHECK-GI-NOFP16-NEXT: fcvt h5, s6
-; CHECK-GI-NOFP16-NEXT: fcvt s0, h0
-; CHECK-GI-NOFP16-NEXT: mov v1.h[3], v4.h[0]
-; CHECK-GI-NOFP16-NEXT: frintz s3, s3
-; CHECK-GI-NOFP16-NEXT: fcvt h2, s2
-; CHECK-GI-NOFP16-NEXT: frintz s0, s0
-; CHECK-GI-NOFP16-NEXT: mov v1.h[4], v5.h[0]
-; CHECK-GI-NOFP16-NEXT: fcvt h3, s3
-; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
-; CHECK-GI-NOFP16-NEXT: mov v1.h[5], v2.h[0]
-; CHECK-GI-NOFP16-NEXT: mov v1.h[6], v3.h[0]
-; CHECK-GI-NOFP16-NEXT: mov v1.h[7], v0.h[0]
-; CHECK-GI-NOFP16-NEXT: mov v0.16b, v1.16b
+; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v0.4h
+; CHECK-GI-NOFP16-NEXT: fcvtl2 v0.4s, v0.8h
+; CHECK-GI-NOFP16-NEXT: frintz v1.4s, v1.4s
+; CHECK-GI-NOFP16-NEXT: frintz v2.4s, v0.4s
+; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v1.4s
+; CHECK-GI-NOFP16-NEXT: fcvtn2 v0.8h, v2.4s
; CHECK-GI-NOFP16-NEXT: ret
;
; CHECK-GI-FP16-LABEL: trunc_v8f16:
@@ -3973,84 +3529,18 @@ define <16 x half> @trunc_v16f16(<16 x half> %a) {
;
; CHECK-GI-NOFP16-LABEL: trunc_v16f16:
; CHECK-GI-NOFP16: // %bb.0: // %entry
-; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[1]
-; CHECK-GI-NOFP16-NEXT: mov h3, v1.h[1]
-; CHECK-GI-NOFP16-NEXT: fcvt s4, h0
-; CHECK-GI-NOFP16-NEXT: fcvt s5, h1
-; CHECK-GI-NOFP16-NEXT: mov h6, v0.h[2]
-; CHECK-GI-NOFP16-NEXT: mov h7, v1.h[2]
-; CHECK-GI-NOFP16-NEXT: mov h18, v0.h[3]
-; CHECK-GI-NOFP16-NEXT: mov h19, v1.h[3]
-; CHECK-GI-NOFP16-NEXT: fcvt s2, h2
-; CHECK-GI-NOFP16-NEXT: fcvt s3, h3
-; CHECK-GI-NOFP16-NEXT: frintz s4, s4
-; CHECK-GI-NOFP16-NEXT: frintz s5, s5
-; CHECK-GI-NOFP16-NEXT: fcvt s6, h6
-; CHECK-GI-NOFP16-NEXT: fcvt s7, h7
-; CHECK-GI-NOFP16-NEXT: frintz s16, s2
-; CHECK-GI-NOFP16-NEXT: frintz s17, s3
-; CHECK-GI-NOFP16-NEXT: fcvt h2, s4
-; CHECK-GI-NOFP16-NEXT: fcvt h3, s5
-; CHECK-GI-NOFP16-NEXT: frintz s6, s6
-; CHECK-GI-NOFP16-NEXT: frintz s7, s7
-; CHECK-GI-NOFP16-NEXT: fcvt h4, s16
-; CHECK-GI-NOFP16-NEXT: fcvt h5, s17
-; CHECK-GI-NOFP16-NEXT: fcvt s16, h18
-; CHECK-GI-NOFP16-NEXT: fcvt s17, h19
-; CHECK-GI-NOFP16-NEXT: mov h18, v1.h[4]
-; CHECK-GI-NOFP16-NEXT: mov v2.h[1], v4.h[0]
-; CHECK-GI-NOFP16-NEXT: fcvt h4, s6
-; CHECK-GI-NOFP16-NEXT: mov v3.h[1], v5.h[0]
-; CHECK-GI-NOFP16-NEXT: mov h5, v0.h[4]
-; CHECK-GI-NOFP16-NEXT: fcvt h6, s7
-; CHECK-GI-NOFP16-NEXT: frintz s16, s16
-; CHECK-GI-NOFP16-NEXT: frintz s17, s17
-; CHECK-GI-NOFP16-NEXT: mov h7, v0.h[5]
-; CHECK-GI-NOFP16-NEXT: mov v2.h[2], v4.h[0]
-; CHECK-GI-NOFP16-NEXT: fcvt s18, h18
-; CHECK-GI-NOFP16-NEXT: fcvt s5, h5
-; CHECK-GI-NOFP16-NEXT: mov v3.h[2], v6.h[0]
-; CHECK-GI-NOFP16-NEXT: fcvt h4, s16
-; CHECK-GI-NOFP16-NEXT: fcvt h16, s17
-; CHECK-GI-NOFP16-NEXT: mov h17, v1.h[5]
-; CHECK-GI-NOFP16-NEXT: mov h6, v0.h[6]
-; CHECK-GI-NOFP16-NEXT: fcvt s7, h7
-; CHECK-GI-NOFP16-NEXT: frintz s5, s5
-; CHECK-GI-NOFP16-NEXT: mov v2.h[3], v4.h[0]
-; CHECK-GI-NOFP16-NEXT: mov h4, v1.h[6]
-; CHECK-GI-NOFP16-NEXT: mov v3.h[3], v16.h[0]
-; CHECK-GI-NOFP16-NEXT: frintz s16, s18
-; CHECK-GI-NOFP16-NEXT: fcvt s17, h17
-; CHECK-GI-NOFP16-NEXT: frintz s7, s7
-; CHECK-GI-NOFP16-NEXT: fcvt s6, h6
-; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[7]
-; CHECK-GI-NOFP16-NEXT: mov h1, v1.h[7]
-; CHECK-GI-NOFP16-NEXT: fcvt s4, h4
-; CHECK-GI-NOFP16-NEXT: fcvt h5, s5
-; CHECK-GI-NOFP16-NEXT: fcvt h16, s16
-; CHECK-GI-NOFP16-NEXT: frintz s17, s17
-; CHECK-GI-NOFP16-NEXT: fcvt h7, s7
-; CHECK-GI-NOFP16-NEXT: frintz s6, s6
-; CHECK-GI-NOFP16-NEXT: fcvt s0, h0
-; CHECK-GI-NOFP16-NEXT: frintz s4, s4
-; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
-; CHECK-GI-NOFP16-NEXT: mov v2.h[4], v5.h[0]
-; CHECK-GI-NOFP16-NEXT: mov v3.h[4], v16.h[0]
-; CHECK-GI-NOFP16-NEXT: fcvt h5, s17
-; CHECK-GI-NOFP16-NEXT: fcvt h6, s6
-; CHECK-GI-NOFP16-NEXT: frintz s0, s0
-; CHECK-GI-NOFP16-NEXT: fcvt h4, s4
-; CHECK-GI-NOFP16-NEXT: frintz s1, s1
-; CHECK-GI-NOFP16-NEXT: mov v2.h[5], v7.h[0]
-; CHECK-GI-NOFP16-NEXT: mov v3.h[5], v5.h[0]
-; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
-; CHECK-GI-NOFP16-NEXT: fcvt h1, s1
-; CHECK-GI-NOFP16-NEXT: mov v2.h[6], v6.h[0]
-; CHECK-GI-NOFP16-NEXT: mov v3.h[6], v4.h[0]
-; CHECK-GI-NOFP16-NEXT: mov v2.h[7], v0.h[0]
-; CHECK-GI-NOFP16-NEXT: mov v3.h[7], v1.h[0]
-; CHECK-GI-NOFP16-NEXT: mov v0.16b, v2.16b
-; CHECK-GI-NOFP16-NEXT: mov v1.16b, v3.16b
+; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v0.4h
+; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v1.4h
+; CHECK-GI-NOFP16-NEXT: fcvtl2 v0.4s, v0.8h
+; CHECK-GI-NOFP16-NEXT: fcvtl2 v1.4s, v1.8h
+; CHECK-GI-NOFP16-NEXT: frintz v2.4s, v2.4s
+; CHECK-GI-NOFP16-NEXT: frintz v3.4s, v3.4s
+; CHECK-GI-NOFP16-NEXT: frintz v4.4s, v0.4s
+; CHECK-GI-NOFP16-NEXT: frintz v5.4s, v1.4s
+; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v2.4s
+; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v3.4s
+; CHECK-GI-NOFP16-NEXT: fcvtn2 v0.8h, v4.4s
+; CHECK-GI-NOFP16-NEXT: fcvtn2 v1.8h, v5.4s
; CHECK-GI-NOFP16-NEXT: ret
;
; CHECK-GI-FP16-LABEL: trunc_v16f16:
@@ -4161,7 +3651,3 @@ declare half @llvm.rint.f16(half)
declare half @llvm.round.f16(half)
declare half @llvm.roundeven.f16(half)
declare half @llvm.trunc.f16(half)
-
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; CHECK-GI: {{.*}}
-; CHECK-SD: {{.*}}
More information about the llvm-commits
mailing list