[llvm] [IR] Add `llvm.sincos` intrinsic (PR #109825)
Benjamin Maxwell via llvm-commits
llvm-commits at lists.llvm.org
Tue Oct 22 07:34:30 PDT 2024
https://github.com/MacDue updated https://github.com/llvm/llvm-project/pull/109825
>From bd6ef1ab588962e06ca295ddf788eb85e751d657 Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Tue, 24 Sep 2024 16:01:49 +0000
Subject: [PATCH 1/7] [IR] Add `llvm.sincos` intrinsic
This adds the `llvm.sincos` intrinsic, legalization, and lowering.
The `llvm.sincos` intrinsic takes a floating-point value and returns
both the sine and cosine (as a struct).
```
declare { float, float } @llvm.sincos.f32(float %Val)
declare { double, double } @llvm.sincos.f64(double %Val)
declare { x86_fp80, x86_fp80 } @llvm.sincos.f80(x86_fp80 %Val)
declare { fp128, fp128 } @llvm.sincos.f128(fp128 %Val)
declare { ppc_fp128, ppc_fp128 } @llvm.sincos.ppcf128(ppc_fp128 %Val)
declare { <4 x float>, <4 x float> } @llvm.sincos.v4f32(<4 x float> %Val)
```
The lowering is built on top of the existing FSINCOS ISD node, with
additional type legalization to allow for f16, f128, and vector values.
---
llvm/docs/LangRef.rst | 45 ++
llvm/include/llvm/CodeGen/BasicTTIImpl.h | 3 +
.../CodeGen/GlobalISel/MachineIRBuilder.h | 7 +
llvm/include/llvm/IR/Intrinsics.td | 2 +
llvm/include/llvm/Support/TargetOpcodes.def | 3 +
llvm/include/llvm/Target/GenericOpcodes.td | 7 +
llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp | 7 +
llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 10 +
.../SelectionDAG/LegalizeFloatTypes.cpp | 80 +++
llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h | 9 +-
.../SelectionDAG/LegalizeVectorOps.cpp | 1 +
.../SelectionDAG/LegalizeVectorTypes.cpp | 30 +-
.../SelectionDAG/SelectionDAGBuilder.cpp | 15 +-
llvm/lib/CodeGen/TargetLoweringBase.cpp | 5 +-
.../lib/Target/ARM/ARMTargetTransformInfo.cpp | 1 +
.../GlobalISel/legalizer-info-validation.mir | 3 +
llvm/test/CodeGen/AArch64/llvm.sincos.ll | 516 ++++++++++++++++++
llvm/test/CodeGen/ARM/llvm.sincos.ll | 464 ++++++++++++++++
18 files changed, 1197 insertions(+), 11 deletions(-)
create mode 100644 llvm/test/CodeGen/AArch64/llvm.sincos.ll
create mode 100644 llvm/test/CodeGen/ARM/llvm.sincos.ll
diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index b83675c6ed97aa..7dfa394c8b81b4 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -15512,6 +15512,8 @@ Semantics:
This function returns the first value raised to the second power with an
unspecified sequence of rounding operations.
+.. _t_llvm_sin:
+
'``llvm.sin.*``' Intrinsic
^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -15549,6 +15551,8 @@ trapping or setting ``errno``.
When specified with the fast-math-flag 'afn', the result may be approximated
using a less accurate calculation.
+.. _t_llvm_cos:
+
'``llvm.cos.*``' Intrinsic
^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -15882,6 +15886,47 @@ trapping or setting ``errno``.
When specified with the fast-math-flag 'afn', the result may be approximated
using a less accurate calculation.
+
+'``llvm.sincos.*``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+This is an overloaded intrinsic. You can use ``llvm.sincos`` on any
+floating-point or vector of floating-point type. Not all targets support
+all types however.
+
+::
+
+ declare { float, float } @llvm.sincos.f32(float %Val)
+ declare { double, double } @llvm.sincos.f64(double %Val)
+ declare { x86_fp80, x86_fp80 } @llvm.sincos.f80(x86_fp80 %Val)
+ declare { fp128, fp128 } @llvm.sincos.f128(fp128 %Val)
+ declare { ppc_fp128, ppc_fp128 } @llvm.sincos.ppcf128(ppc_fp128 %Val)
+ declare { <4 x float>, <4 x float> } @llvm.sincos.v4f32(<4 x float> %Val)
+
+Overview:
+"""""""""
+
+The '``llvm.sincos.*``' intrinsics returns the sine and cosine of the operand.
+
+Arguments:
+""""""""""
+
+The argument is a :ref:`floating-point <t_floating>` or :ref:`vector <t_vector>`
+of floating-point values. Returns two values matching the argument type in a
+struct.
+
+Semantics:
+""""""""""
+
+This intrinsic is equivalent to a calling both :ref:`llvm.sin <t_llvm_sin>`
+and :ref:`llvm.cos <t_llvm_cos>` on the argument.
+
+The first result is the sine of the argument and the second result is the cosine
+of the argument.
+
'``llvm.pow.*``' Intrinsic
^^^^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index db3b5cddd7c1c3..b0316e67654dbc 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -1986,6 +1986,9 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
case Intrinsic::cos:
ISD = ISD::FCOS;
break;
+ case Intrinsic::sincos:
+ ISD = ISD::FSINCOS;
+ break;
case Intrinsic::tan:
ISD = ISD::FTAN;
break;
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
index 9b993482c8cc07..ab3025e4923cd0 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
@@ -2009,6 +2009,13 @@ class MachineIRBuilder {
return buildInstr(TargetOpcode::G_FFREXP, {Fract, Exp}, {Src}, Flags);
}
+ /// Build and insert \p Sin, \p Cos = G_FSINCOS \p Src
+ MachineInstrBuilder
+ buildFSincos(const DstOp &Sin, const DstOp &Cos, const SrcOp &Src,
+ std::optional<unsigned> Flags = std::nullopt) {
+ return buildInstr(TargetOpcode::G_FSINCOS, {Sin, Cos}, {Src}, Flags);
+ }
+
/// Build and insert \p Res = G_FCOPYSIGN \p Op0, \p Op1
MachineInstrBuilder buildFCopysign(const DstOp &Dst, const SrcOp &Src0,
const SrcOp &Src1) {
diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
index 94e53f372127da..e91758ed34eb38 100644
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -1050,6 +1050,8 @@ let IntrProperties = [IntrNoMem, IntrSpeculatable, IntrWillReturn] in {
def int_nearbyint : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
def int_round : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
def int_roundeven : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
+ def int_sincos : DefaultAttrsIntrinsic<[LLVMMatchType<0>, LLVMMatchType<0>],
+ [llvm_anyfloat_ty]>;
// Truncate a floating point number with a specific rounding mode
def int_fptrunc_round : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ],
diff --git a/llvm/include/llvm/Support/TargetOpcodes.def b/llvm/include/llvm/Support/TargetOpcodes.def
index 3556a253d875fe..0c4c6ccd5c568e 100644
--- a/llvm/include/llvm/Support/TargetOpcodes.def
+++ b/llvm/include/llvm/Support/TargetOpcodes.def
@@ -809,6 +809,9 @@ HANDLE_TARGET_OPCODE(G_FCOS)
/// Floating point sine.
HANDLE_TARGET_OPCODE(G_FSIN)
+/// Floating point combined sine and cosine.
+HANDLE_TARGET_OPCODE(G_FSINCOS)
+
/// Floating point tangent.
HANDLE_TARGET_OPCODE(G_FTAN)
diff --git a/llvm/include/llvm/Target/GenericOpcodes.td b/llvm/include/llvm/Target/GenericOpcodes.td
index 8b8bc9a0e9cf56..62bb9789afe5d2 100644
--- a/llvm/include/llvm/Target/GenericOpcodes.td
+++ b/llvm/include/llvm/Target/GenericOpcodes.td
@@ -1020,6 +1020,13 @@ def G_FSIN : GenericInstruction {
let hasSideEffects = false;
}
+// Floating point combined sine and cosine.
+def G_FSINCOS : GenericInstruction {
+ let OutOperandList = (outs type0:$dst1, type0:$dst2);
+ let InOperandList = (ins type0:$src1);
+ let hasSideEffects = false;
+}
+
// Floating point tangent of a value.
def G_FTAN : GenericInstruction {
let OutOperandList = (outs type0:$dst);
diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index 563a8264413452..5381dce58f9e65 100644
--- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -2343,6 +2343,13 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
MachineInstr::copyFlagsFromInstruction(CI));
return true;
}
+ case Intrinsic::sincos: {
+ ArrayRef<Register> VRegs = getOrCreateVRegs(CI);
+ MIRBuilder.buildFSincos(VRegs[0], VRegs[1],
+ getOrCreateVReg(*CI.getArgOperand(0)),
+ MachineInstr::copyFlagsFromInstruction(CI));
+ return true;
+ }
case Intrinsic::fptosi_sat:
MIRBuilder.buildFPTOSI_SAT(getOrCreateVReg(CI),
getOrCreateVReg(*CI.getArgOperand(0)));
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index e0a03383358b76..3534c1ca941a9e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -5586,6 +5586,16 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
Results.push_back(Tmp2.getValue(1));
break;
}
+ case ISD::FSINCOS: {
+ Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0));
+ Tmp2 = DAG.getNode(ISD::FSINCOS, dl, {NVT, NVT}, Tmp1);
+
+ for (unsigned ResNum = 0; ResNum < Node->getNumValues(); ResNum++)
+ Results.push_back(
+ DAG.getNode(ISD::FP_ROUND, dl, OVT, Tmp2.getValue(ResNum),
+ DAG.getIntPtrConstant(0, dl, /*isTarget=*/true)));
+ break;
+ }
case ISD::FFLOOR:
case ISD::FCEIL:
case ISD::FRINT:
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 73c258f0f6f18c..9009c8ab4fc9a4 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -129,6 +129,7 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
case ISD::FLDEXP:
case ISD::STRICT_FLDEXP: R = SoftenFloatRes_ExpOp(N); break;
case ISD::FFREXP: R = SoftenFloatRes_FFREXP(N); break;
+ case ISD::FSINCOS: R = SoftenFloatRes_FSINCOS(N); break;
case ISD::STRICT_FREM:
case ISD::FREM: R = SoftenFloatRes_FREM(N); break;
case ISD::STRICT_FRINT:
@@ -774,6 +775,45 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FFREXP(SDNode *N) {
return ReturnVal;
}
+SDValue DAGTypeLegalizer::SoftenFloatRes_FSINCOS(SDNode *N) {
+ assert(!N->isStrictFPOpcode() && "strictfp not implemented for fsincos");
+ EVT VT = N->getValueType(0);
+ RTLIB::Libcall LC = RTLIB::getFSINCOS(VT);
+
+ if (!TLI.getLibcallName(LC))
+ return SDValue();
+
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ SDValue StackSlotSin = DAG.CreateStackTemporary(NVT);
+ SDValue StackSlotCos = DAG.CreateStackTemporary(NVT);
+
+ SDLoc DL(N);
+
+ TargetLowering::MakeLibCallOptions CallOptions;
+ std::array Ops{GetSoftenedFloat(N->getOperand(0)), StackSlotSin,
+ StackSlotCos};
+ std::array OpsVT{VT, StackSlotSin.getValueType(),
+ StackSlotCos.getValueType()};
+
+ // TODO: setTypeListBeforeSoften can't properly express multiple return types,
+ // but since both returns have the same type for sincos it should be okay.
+ CallOptions.setTypeListBeforeSoften({OpsVT}, VT, true);
+
+ auto [ReturnVal, Chain] = TLI.makeLibCall(DAG, LC, NVT, Ops, CallOptions, DL,
+ /*Chain=*/SDValue());
+ unsigned ResNo = 0;
+ for (SDValue OutPtr : {StackSlotSin, StackSlotCos}) {
+ int FrameIdx = cast<FrameIndexSDNode>(OutPtr)->getIndex();
+ auto PtrInfo =
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
+
+ SDValue LoadExp = DAG.getLoad(NVT, DL, Chain, OutPtr, PtrInfo);
+ SetSoftenedFloat(SDValue(N, ResNo++), LoadExp);
+ }
+
+ return SDValue();
+}
+
SDValue DAGTypeLegalizer::SoftenFloatRes_FREM(SDNode *N) {
return SoftenFloatRes_Binary(N, GetFPLibCall(N->getValueType(0),
RTLIB::REM_F32,
@@ -2704,6 +2744,10 @@ void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) {
case ISD::FLDEXP: R = PromoteFloatRes_ExpOp(N); break;
case ISD::FFREXP: R = PromoteFloatRes_FFREXP(N); break;
+ case ISD::FSINCOS:
+ R = PromoteFloatRes_FSINCOS(N);
+ break;
+
case ISD::FP_ROUND: R = PromoteFloatRes_FP_ROUND(N); break;
case ISD::STRICT_FP_ROUND:
R = PromoteFloatRes_STRICT_FP_ROUND(N);
@@ -2899,6 +2943,18 @@ SDValue DAGTypeLegalizer::PromoteFloatRes_FFREXP(SDNode *N) {
return Res;
}
+SDValue DAGTypeLegalizer::PromoteFloatRes_FSINCOS(SDNode *N) {
+ EVT VT = N->getValueType(0);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ SDValue Op = GetPromotedFloat(N->getOperand(0));
+ SDValue Res = DAG.getNode(N->getOpcode(), SDLoc(N), {NVT, NVT}, Op);
+
+ for (unsigned ResNum = 0; ResNum < N->getNumValues(); ResNum++)
+ SetPromotedFloat(SDValue(N, ResNum), Res.getValue(ResNum));
+
+ return SDValue();
+}
+
// Explicit operation to reduce precision. Reduce the value to half precision
// and promote it back to the legal type.
SDValue DAGTypeLegalizer::PromoteFloatRes_FP_ROUND(SDNode *N) {
@@ -3148,6 +3204,10 @@ void DAGTypeLegalizer::SoftPromoteHalfResult(SDNode *N, unsigned ResNo) {
case ISD::FFREXP: R = SoftPromoteHalfRes_FFREXP(N); break;
+ case ISD::FSINCOS:
+ R = SoftPromoteHalfRes_FSINCOS(N);
+ break;
+
case ISD::LOAD: R = SoftPromoteHalfRes_LOAD(N); break;
case ISD::ATOMIC_LOAD:
R = SoftPromoteHalfRes_ATOMIC_LOAD(N);
@@ -3304,6 +3364,26 @@ SDValue DAGTypeLegalizer::SoftPromoteHalfRes_FFREXP(SDNode *N) {
return DAG.getNode(GetPromotionOpcode(NVT, OVT), dl, MVT::i16, Res);
}
+SDValue DAGTypeLegalizer::SoftPromoteHalfRes_FSINCOS(SDNode *N) {
+ EVT OVT = N->getValueType(0);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), OVT);
+ SDValue Op = GetSoftPromotedHalf(N->getOperand(0));
+ SDLoc dl(N);
+
+ // Promote to the larger FP type.
+ Op = DAG.getNode(GetPromotionOpcode(OVT, NVT), dl, NVT, Op);
+ SDValue Res = DAG.getNode(N->getOpcode(), dl, DAG.getVTList(NVT, NVT), Op);
+
+ // Convert back to FP16 as an integer.
+ ISD::NodeType Truncate = GetPromotionOpcode(NVT, OVT);
+ for (unsigned ResNum = 0; ResNum < N->getNumValues(); ResNum++) {
+ SDValue Trunc = DAG.getNode(Truncate, dl, MVT::i16, Res.getValue(ResNum));
+ SetSoftPromotedHalf(SDValue(N, ResNum), Trunc);
+ }
+
+ return SDValue();
+}
+
SDValue DAGTypeLegalizer::SoftPromoteHalfRes_FP_ROUND(SDNode *N) {
EVT RVT = N->getValueType(0);
bool IsStrict = N->isStrictFPOpcode();
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 868da25ca8cb47..33befb8d4ac0e2 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -597,6 +597,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
SDValue SoftenFloatRes_FPOW(SDNode *N);
SDValue SoftenFloatRes_ExpOp(SDNode *N);
SDValue SoftenFloatRes_FFREXP(SDNode *N);
+ SDValue SoftenFloatRes_FSINCOS(SDNode *N);
SDValue SoftenFloatRes_FREEZE(SDNode *N);
SDValue SoftenFloatRes_FREM(SDNode *N);
SDValue SoftenFloatRes_FRINT(SDNode *N);
@@ -744,6 +745,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
SDValue PromoteFloatRes_FMAD(SDNode *N);
SDValue PromoteFloatRes_ExpOp(SDNode *N);
SDValue PromoteFloatRes_FFREXP(SDNode *N);
+ SDValue PromoteFloatRes_FSINCOS(SDNode *N);
SDValue PromoteFloatRes_FP_ROUND(SDNode *N);
SDValue PromoteFloatRes_STRICT_FP_ROUND(SDNode *N);
SDValue PromoteFloatRes_LOAD(SDNode *N);
@@ -792,6 +794,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
SDValue SoftPromoteHalfRes_FMAD(SDNode *N);
SDValue SoftPromoteHalfRes_ExpOp(SDNode *N);
SDValue SoftPromoteHalfRes_FFREXP(SDNode *N);
+ SDValue SoftPromoteHalfRes_FSINCOS(SDNode *N);
SDValue SoftPromoteHalfRes_FP_ROUND(SDNode *N);
SDValue SoftPromoteHalfRes_LOAD(SDNode *N);
SDValue SoftPromoteHalfRes_ATOMIC_LOAD(SDNode *N);
@@ -863,7 +866,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
SDValue ScalarizeVecRes_IS_FPCLASS(SDNode *N);
SDValue ScalarizeVecRes_FIX(SDNode *N);
- SDValue ScalarizeVecRes_FFREXP(SDNode *N, unsigned ResNo);
+ SDValue ScalarizeVecRes_UnaryOpWithTwoResults(SDNode *N, unsigned ResNo);
// Vector Operand Scalarization: <1 x ty> -> ty.
bool ScalarizeVectorOperand(SDNode *N, unsigned OpNo);
@@ -917,7 +920,8 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
void SplitVecRes_CMP(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_ADDRSPACECAST(SDNode *N, SDValue &Lo, SDValue &Hi);
- void SplitVecRes_FFREXP(SDNode *N, unsigned ResNo, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_UnaryOpWithTwoResults(SDNode *N, unsigned ResNo, SDValue &Lo,
+ SDValue &Hi);
void SplitVecRes_ExtendOp(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_InregOp(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_ExtVecInRegOp(SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -1068,6 +1072,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
SDValue WidenVecRes_ExpOp(SDNode *N);
SDValue WidenVecRes_Unary(SDNode *N);
SDValue WidenVecRes_InregOp(SDNode *N);
+ SDValue WidenVecRes_FSINCOS(SDNode *N);
// Widen Vector Operand.
bool WidenVectorOperand(SDNode *N, unsigned OpNo);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index a8042fc3e7a69a..c80da28b3dc34d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -452,6 +452,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
case ISD::UMULO:
case ISD::FCANONICALIZE:
case ISD::FFREXP:
+ case ISD::FSINCOS:
case ISD::SADDSAT:
case ISD::UADDSAT:
case ISD::SSUBSAT:
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 50e2a923699c8a..65c9bb64b3cc14 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -130,7 +130,8 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
R = ScalarizeVecRes_ADDRSPACECAST(N);
break;
case ISD::FFREXP:
- R = ScalarizeVecRes_FFREXP(N, ResNo);
+ case ISD::FSINCOS:
+ R = ScalarizeVecRes_UnaryOpWithTwoResults(N, ResNo);
break;
case ISD::ADD:
case ISD::AND:
@@ -276,7 +277,9 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_FIX(SDNode *N) {
Op2, N->getFlags());
}
-SDValue DAGTypeLegalizer::ScalarizeVecRes_FFREXP(SDNode *N, unsigned ResNo) {
+SDValue
+DAGTypeLegalizer::ScalarizeVecRes_UnaryOpWithTwoResults(SDNode *N,
+ unsigned ResNo) {
assert(N->getValueType(0).getVectorNumElements() == 1 &&
"Unexpected vector type!");
SDValue Elt = GetScalarizedVector(N->getOperand(0));
@@ -1253,7 +1256,8 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
SplitVecRes_ADDRSPACECAST(N, Lo, Hi);
break;
case ISD::FFREXP:
- SplitVecRes_FFREXP(N, ResNo, Lo, Hi);
+ case ISD::FSINCOS:
+ SplitVecRes_UnaryOpWithTwoResults(N, ResNo, Lo, Hi);
break;
case ISD::ANY_EXTEND:
@@ -2615,8 +2619,10 @@ void DAGTypeLegalizer::SplitVecRes_ADDRSPACECAST(SDNode *N, SDValue &Lo,
Hi = DAG.getAddrSpaceCast(dl, HiVT, Hi, SrcAS, DestAS);
}
-void DAGTypeLegalizer::SplitVecRes_FFREXP(SDNode *N, unsigned ResNo,
- SDValue &Lo, SDValue &Hi) {
+void DAGTypeLegalizer::SplitVecRes_UnaryOpWithTwoResults(SDNode *N,
+ unsigned ResNo,
+ SDValue &Lo,
+ SDValue &Hi) {
SDLoc dl(N);
auto [LoVT, HiVT] = DAG.GetSplitDestVTs(N->getValueType(0));
auto [LoVT1, HiVT1] = DAG.GetSplitDestVTs(N->getValueType(1));
@@ -4752,6 +4758,14 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
case ISD::VP_FSHR:
Res = WidenVecRes_Ternary(N);
break;
+ case ISD::FSINCOS: {
+ if (!unrollExpandedOp())
+ Res = WidenVecRes_FSINCOS(N);
+ for (unsigned ResNum = 0; ResNum < N->getNumValues(); ResNum++)
+ SetWidenedVector(SDValue(N, ResNum), Res.getValue(ResNum));
+ Res = SDValue();
+ break;
+ }
}
// If Res is null, the sub-method took care of registering the result.
@@ -5500,6 +5514,12 @@ SDValue DAGTypeLegalizer::WidenVecRes_InregOp(SDNode *N) {
WidenVT, WidenLHS, DAG.getValueType(ExtVT));
}
+SDValue DAGTypeLegalizer::WidenVecRes_FSINCOS(SDNode *N) {
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue InOp = GetWidenedVector(N->getOperand(0));
+ return DAG.getNode(N->getOpcode(), SDLoc(N), {WidenVT, WidenVT}, InOp);
+}
+
SDValue DAGTypeLegalizer::WidenVecRes_MERGE_VALUES(SDNode *N, unsigned ResNo) {
SDValue WidenVec = DisintegrateMERGE_VALUES(N, ResNo);
return GetWidenedVector(WidenVec);
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 8450553743074c..8affa0eaaca788 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -6936,12 +6936,23 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
getValue(I.getArgOperand(0)),
getValue(I.getArgOperand(1)), Flags));
return;
+ case Intrinsic::sincos:
case Intrinsic::frexp: {
+ unsigned Opcode;
+ switch (Intrinsic) {
+ default:
+ llvm_unreachable("unexpected intrinsic");
+ case Intrinsic::sincos:
+ Opcode = ISD::FSINCOS;
+ break;
+ case Intrinsic::frexp:
+ Opcode = ISD::FFREXP;
+ break;
+ }
SmallVector<EVT, 2> ValueVTs;
ComputeValueVTs(TLI, DAG.getDataLayout(), I.getType(), ValueVTs);
SDVTList VTs = DAG.getVTList(ValueVTs);
- setValue(&I,
- DAG.getNode(ISD::FFREXP, sdl, VTs, getValue(I.getArgOperand(0))));
+ setValue(&I, DAG.getNode(Opcode, sdl, VTs, getValue(I.getArgOperand(0))));
return;
}
case Intrinsic::arithmetic_fence: {
diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp
index 7a28f7892cbf31..56ae58030328be 100644
--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -773,8 +773,9 @@ void TargetLoweringBase::initActions() {
setOperationAction({ISD::BITREVERSE, ISD::PARITY}, VT, Expand);
// These library functions default to expand.
- setOperationAction({ISD::FROUND, ISD::FPOWI, ISD::FLDEXP, ISD::FFREXP}, VT,
- Expand);
+ setOperationAction(
+ {ISD::FROUND, ISD::FPOWI, ISD::FLDEXP, ISD::FFREXP, ISD::FSINCOS}, VT,
+ Expand);
// These operations default to expand for vector types.
if (VT.isVector())
diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
index 835ae98efb852d..9dcf8259e3293f 100644
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -2059,6 +2059,7 @@ bool ARMTTIImpl::isLoweredToCall(const Function *F) {
case Intrinsic::powi:
case Intrinsic::sin:
case Intrinsic::cos:
+ case Intrinsic::sincos:
case Intrinsic::pow:
case Intrinsic::log:
case Intrinsic::log10:
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
index 4d096b7231c7c4..4eea1beed82d94 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
@@ -701,6 +701,9 @@
# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}}
# DEBUG-NEXT: .. the first uncovered type index: 1, OK
# DEBUG-NEXT: .. the first uncovered imm index: 0, OK
+# DEBUG-NEXT: G_FSINCOS (opcode {{[0-9]+}}): 1 type index, 0 imm indices
+# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined
+# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined
# DEBUG-NEXT: G_FTAN (opcode {{[0-9]+}}): 1 type index, 0 imm indices
# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}}
# DEBUG-NEXT: .. the first uncovered type index: 1, OK
diff --git a/llvm/test/CodeGen/AArch64/llvm.sincos.ll b/llvm/test/CodeGen/AArch64/llvm.sincos.ll
new file mode 100644
index 00000000000000..841b0b2d665345
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/llvm.sincos.ll
@@ -0,0 +1,516 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc -mtriple=aarch64-gnu-linux < %s | FileCheck -check-prefixes=CHECK %s
+
+define { half, half } @test_sincos_f16(half %a) {
+; CHECK-LABEL: test_sincos_f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: add x0, sp, #12
+; CHECK-NEXT: add x1, sp, #8
+; CHECK-NEXT: bl sincosf
+; CHECK-NEXT: ldp s1, s0, [sp, #8]
+; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: fcvt h1, s1
+; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+ %result = call { half, half } @llvm.sincos.f16(half %a)
+ ret { half, half } %result
+}
+
+define half @test_sincos_f16_only_use_sin(half %a) {
+; CHECK-LABEL: test_sincos_f16_only_use_sin:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: add x0, sp, #12
+; CHECK-NEXT: add x1, sp, #8
+; CHECK-NEXT: bl sincosf
+; CHECK-NEXT: ldr s0, [sp, #12]
+; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+ %result = call { half, half } @llvm.sincos.f16(half %a)
+ %result.0 = extractvalue { half, half } %result, 0
+ ret half %result.0
+}
+
+define half @test_sincos_f16_only_use_cos(half %a) {
+; CHECK-LABEL: test_sincos_f16_only_use_cos:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: add x0, sp, #12
+; CHECK-NEXT: add x1, sp, #8
+; CHECK-NEXT: bl sincosf
+; CHECK-NEXT: ldr s0, [sp, #8]
+; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+ %result = call { half, half } @llvm.sincos.f16(half %a)
+ %result.1 = extractvalue { half, half } %result, 1
+ ret half %result.1
+}
+
+define { <2 x half>, <2 x half> } @test_sincos_v2f16(<2 x half> %a) {
+; CHECK-LABEL: test_sincos_v2f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sub sp, sp, #64
+; CHECK-NEXT: str x30, [sp, #48] // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: mov h1, v0.h[1]
+; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT: add x0, sp, #36
+; CHECK-NEXT: add x1, sp, #32
+; CHECK-NEXT: fcvt s0, h1
+; CHECK-NEXT: bl sincosf
+; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: add x0, sp, #28
+; CHECK-NEXT: add x1, sp, #24
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: bl sincosf
+; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: add x0, sp, #44
+; CHECK-NEXT: add x1, sp, #40
+; CHECK-NEXT: mov h0, v0.h[2]
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: bl sincosf
+; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: add x0, sp, #60
+; CHECK-NEXT: add x1, sp, #56
+; CHECK-NEXT: mov h0, v0.h[3]
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: bl sincosf
+; CHECK-NEXT: ldp s2, s0, [sp, #32]
+; CHECK-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload
+; CHECK-NEXT: ldp s3, s1, [sp, #24]
+; CHECK-NEXT: fcvt h4, s0
+; CHECK-NEXT: fcvt h2, s2
+; CHECK-NEXT: fcvt h0, s1
+; CHECK-NEXT: fcvt h1, s3
+; CHECK-NEXT: ldp s5, s3, [sp, #40]
+; CHECK-NEXT: fcvt h3, s3
+; CHECK-NEXT: mov v0.h[1], v4.h[0]
+; CHECK-NEXT: fcvt h4, s5
+; CHECK-NEXT: mov v1.h[1], v2.h[0]
+; CHECK-NEXT: ldp s5, s2, [sp, #56]
+; CHECK-NEXT: mov v0.h[2], v3.h[0]
+; CHECK-NEXT: fcvt h2, s2
+; CHECK-NEXT: fcvt h3, s5
+; CHECK-NEXT: mov v1.h[2], v4.h[0]
+; CHECK-NEXT: mov v0.h[3], v2.h[0]
+; CHECK-NEXT: mov v1.h[3], v3.h[0]
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q1
+; CHECK-NEXT: add sp, sp, #64
+; CHECK-NEXT: ret
+ %result = call { <2 x half>, <2 x half> } @llvm.sincos.v2f16(<2 x half> %a)
+ ret { <2 x half>, <2 x half> } %result
+}
+
+define <2 x half> @test_sincos_v2f16_only_use_sin(<2 x half> %a) {
+; CHECK-LABEL: test_sincos_v2f16_only_use_sin:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sub sp, sp, #64
+; CHECK-NEXT: str x30, [sp, #48] // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: mov h1, v0.h[1]
+; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT: add x0, sp, #36
+; CHECK-NEXT: add x1, sp, #32
+; CHECK-NEXT: fcvt s0, h1
+; CHECK-NEXT: bl sincosf
+; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: add x0, sp, #28
+; CHECK-NEXT: add x1, sp, #24
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: bl sincosf
+; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: add x0, sp, #44
+; CHECK-NEXT: add x1, sp, #40
+; CHECK-NEXT: mov h0, v0.h[2]
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: bl sincosf
+; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: add x0, sp, #60
+; CHECK-NEXT: add x1, sp, #56
+; CHECK-NEXT: mov h0, v0.h[3]
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: bl sincosf
+; CHECK-NEXT: ldr s0, [sp, #36]
+; CHECK-NEXT: ldr s1, [sp, #28]
+; CHECK-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload
+; CHECK-NEXT: fcvt h2, s0
+; CHECK-NEXT: fcvt h0, s1
+; CHECK-NEXT: ldr s1, [sp, #44]
+; CHECK-NEXT: fcvt h1, s1
+; CHECK-NEXT: mov v0.h[1], v2.h[0]
+; CHECK-NEXT: ldr s2, [sp, #60]
+; CHECK-NEXT: mov v0.h[2], v1.h[0]
+; CHECK-NEXT: fcvt h1, s2
+; CHECK-NEXT: mov v0.h[3], v1.h[0]
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: add sp, sp, #64
+; CHECK-NEXT: ret
+ %result = call { <2 x half>, <2 x half> } @llvm.sincos.v2f16(<2 x half> %a)
+ %result.0 = extractvalue { <2 x half>, <2 x half> } %result, 0
+ ret <2 x half> %result.0
+}
+
+define <2 x half> @test_sincos_v2f16_only_use_cos(<2 x half> %a) {
+; CHECK-LABEL: test_sincos_v2f16_only_use_cos:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sub sp, sp, #64
+; CHECK-NEXT: str x30, [sp, #48] // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: mov h1, v0.h[1]
+; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT: add x0, sp, #36
+; CHECK-NEXT: add x1, sp, #32
+; CHECK-NEXT: fcvt s0, h1
+; CHECK-NEXT: bl sincosf
+; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: add x0, sp, #28
+; CHECK-NEXT: add x1, sp, #24
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: bl sincosf
+; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: add x0, sp, #44
+; CHECK-NEXT: add x1, sp, #40
+; CHECK-NEXT: mov h0, v0.h[2]
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: bl sincosf
+; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: add x0, sp, #60
+; CHECK-NEXT: add x1, sp, #56
+; CHECK-NEXT: mov h0, v0.h[3]
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: bl sincosf
+; CHECK-NEXT: ldr s0, [sp, #32]
+; CHECK-NEXT: ldr s1, [sp, #24]
+; CHECK-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload
+; CHECK-NEXT: fcvt h2, s0
+; CHECK-NEXT: fcvt h0, s1
+; CHECK-NEXT: ldr s1, [sp, #40]
+; CHECK-NEXT: fcvt h1, s1
+; CHECK-NEXT: mov v0.h[1], v2.h[0]
+; CHECK-NEXT: ldr s2, [sp, #56]
+; CHECK-NEXT: mov v0.h[2], v1.h[0]
+; CHECK-NEXT: fcvt h1, s2
+; CHECK-NEXT: mov v0.h[3], v1.h[0]
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: add sp, sp, #64
+; CHECK-NEXT: ret
+ %result = call { <2 x half>, <2 x half> } @llvm.sincos.v2f16(<2 x half> %a)
+ %result.1 = extractvalue { <2 x half>, <2 x half> } %result, 1
+ ret <2 x half> %result.1
+}
+
+define { float, float } @test_sincos_f32(float %a) {
+; CHECK-LABEL: test_sincos_f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: add x0, sp, #12
+; CHECK-NEXT: add x1, sp, #8
+; CHECK-NEXT: bl sincosf
+; CHECK-NEXT: ldp s1, s0, [sp, #8]
+; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+ %result = call { float, float } @llvm.sincos.f32(float %a)
+ ret { float, float } %result
+}
+
+define float @test_sincos_f32_only_use_sin(float %a) {
+; CHECK-LABEL: test_sincos_f32_only_use_sin:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: add x0, sp, #12
+; CHECK-NEXT: add x1, sp, #8
+; CHECK-NEXT: bl sincosf
+; CHECK-NEXT: ldr s0, [sp, #12]
+; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+ %result = call { float, float } @llvm.sincos.f32(float %a)
+ %result.0 = extractvalue { float, float } %result, 0
+ ret float %result.0
+}
+
+define float @test_sincos_f32_only_use_cos(float %a) {
+; CHECK-LABEL: test_sincos_f32_only_use_cos:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: add x0, sp, #12
+; CHECK-NEXT: add x1, sp, #8
+; CHECK-NEXT: bl sincosf
+; CHECK-NEXT: ldr s0, [sp, #8]
+; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+ %result = call { float, float } @llvm.sincos.f32(float %a)
+ %result.1 = extractvalue { float, float } %result, 1
+ ret float %result.1
+}
+
+define { <2 x float>, <2 x float> } @test_sincos_v2f32(<2 x float> %a) {
+; CHECK-LABEL: test_sincos_v2f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sub sp, sp, #64
+; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
+; CHECK-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: .cfi_offset w19, -8
+; CHECK-NEXT: .cfi_offset w20, -16
+; CHECK-NEXT: .cfi_offset w30, -32
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: add x0, sp, #44
+; CHECK-NEXT: add x1, sp, #40
+; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0
+; CHECK-NEXT: bl sincosf
+; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: add x0, sp, #28
+; CHECK-NEXT: add x1, sp, #24
+; CHECK-NEXT: add x19, sp, #28
+; CHECK-NEXT: add x20, sp, #24
+; CHECK-NEXT: mov s0, v0.s[1]
+; CHECK-NEXT: bl sincosf
+; CHECK-NEXT: ldp s1, s0, [sp, #40]
+; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
+; CHECK-NEXT: ld1 { v0.s }[1], [x19]
+; CHECK-NEXT: ld1 { v1.s }[1], [x20]
+; CHECK-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q1
+; CHECK-NEXT: add sp, sp, #64
+; CHECK-NEXT: ret
+ %result = call { <2 x float>, <2 x float> } @llvm.sincos.v2f32(<2 x float> %a)
+ ret { <2 x float>, <2 x float> } %result
+}
+
+define <2 x float> @test_sincos_v2f32_only_use_sin(<2 x float> %a) {
+; CHECK-LABEL: test_sincos_v2f32_only_use_sin:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sub sp, sp, #48
+; CHECK-NEXT: stp x30, x19, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 48
+; CHECK-NEXT: .cfi_offset w19, -8
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: add x0, sp, #28
+; CHECK-NEXT: add x1, sp, #24
+; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0
+; CHECK-NEXT: bl sincosf
+; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: add x0, sp, #20
+; CHECK-NEXT: add x1, sp, #16
+; CHECK-NEXT: add x19, sp, #20
+; CHECK-NEXT: mov s0, v0.s[1]
+; CHECK-NEXT: bl sincosf
+; CHECK-NEXT: ldr s0, [sp, #28]
+; CHECK-NEXT: ld1 { v0.s }[1], [x19]
+; CHECK-NEXT: ldp x30, x19, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: add sp, sp, #48
+; CHECK-NEXT: ret
+ %result = call { <2 x float>, <2 x float> } @llvm.sincos.v2f32(<2 x float> %a)
+ %result.0 = extractvalue { <2 x float>, <2 x float> } %result, 0
+ ret <2 x float> %result.0
+}
+
+define <2 x float> @test_sincos_v2f32_only_use_cos(<2 x float> %a) {
+; CHECK-LABEL: test_sincos_v2f32_only_use_cos:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sub sp, sp, #48
+; CHECK-NEXT: stp x30, x19, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 48
+; CHECK-NEXT: .cfi_offset w19, -8
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: add x0, sp, #28
+; CHECK-NEXT: add x1, sp, #24
+; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0
+; CHECK-NEXT: bl sincosf
+; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: add x0, sp, #20
+; CHECK-NEXT: add x1, sp, #16
+; CHECK-NEXT: add x19, sp, #16
+; CHECK-NEXT: mov s0, v0.s[1]
+; CHECK-NEXT: bl sincosf
+; CHECK-NEXT: ldr s0, [sp, #24]
+; CHECK-NEXT: ld1 { v0.s }[1], [x19]
+; CHECK-NEXT: ldp x30, x19, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: add sp, sp, #48
+; CHECK-NEXT: ret
+ %result = call { <2 x float>, <2 x float> } @llvm.sincos.v2f32(<2 x float> %a)
+ %result.1 = extractvalue { <2 x float>, <2 x float> } %result, 1
+ ret <2 x float> %result.1
+}
+
+define { double, double } @test_sincos_f64(double %a) {
+; CHECK-LABEL: test_sincos_f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sub sp, sp, #32
+; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: add x0, sp, #24
+; CHECK-NEXT: add x1, sp, #8
+; CHECK-NEXT: bl sincos
+; CHECK-NEXT: ldr d0, [sp, #24]
+; CHECK-NEXT: ldr d1, [sp, #8]
+; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-NEXT: add sp, sp, #32
+; CHECK-NEXT: ret
+ %result = call { double, double } @llvm.sincos.f64(double %a)
+ ret { double, double } %result
+}
+
+define double @test_sincos_f64_only_use_sin(double %a) {
+; CHECK-LABEL: test_sincos_f64_only_use_sin:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sub sp, sp, #32
+; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: add x0, sp, #24
+; CHECK-NEXT: add x1, sp, #8
+; CHECK-NEXT: bl sincos
+; CHECK-NEXT: ldr d0, [sp, #24]
+; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-NEXT: add sp, sp, #32
+; CHECK-NEXT: ret
+ %result = call { double, double } @llvm.sincos.f64(double %a)
+ %result.0 = extractvalue { double, double } %result, 0
+ ret double %result.0
+}
+
+define double @test_sincos_f64_only_use_cos(double %a) {
+; CHECK-LABEL: test_sincos_f64_only_use_cos:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sub sp, sp, #32
+; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: add x0, sp, #24
+; CHECK-NEXT: add x1, sp, #8
+; CHECK-NEXT: bl sincos
+; CHECK-NEXT: ldr d0, [sp, #8]
+; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-NEXT: add sp, sp, #32
+; CHECK-NEXT: ret
+ %result = call { double, double } @llvm.sincos.f64(double %a)
+ %result.1 = extractvalue { double, double } %result, 1
+ ret double %result.1
+}
+
+define { <2 x double>, <2 x double> } @test_sincos_v2f64(<2 x double> %a) {
+; CHECK-LABEL: test_sincos_v2f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sub sp, sp, #80
+; CHECK-NEXT: str x30, [sp, #48] // 8-byte Folded Spill
+; CHECK-NEXT: stp x20, x19, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 80
+; CHECK-NEXT: .cfi_offset w19, -8
+; CHECK-NEXT: .cfi_offset w20, -16
+; CHECK-NEXT: .cfi_offset w30, -32
+; CHECK-NEXT: add x0, sp, #56
+; CHECK-NEXT: add x1, sp, #40
+; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: bl sincos
+; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: add x0, sp, #32
+; CHECK-NEXT: add x1, sp, #24
+; CHECK-NEXT: add x19, sp, #32
+; CHECK-NEXT: add x20, sp, #24
+; CHECK-NEXT: mov d0, v0.d[1]
+; CHECK-NEXT: bl sincos
+; CHECK-NEXT: ldr d0, [sp, #56]
+; CHECK-NEXT: ldr d1, [sp, #40]
+; CHECK-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload
+; CHECK-NEXT: ld1 { v0.d }[1], [x19]
+; CHECK-NEXT: ld1 { v1.d }[1], [x20]
+; CHECK-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT: add sp, sp, #80
+; CHECK-NEXT: ret
+ %result = call { <2 x double>, <2 x double> } @llvm.sincos.v2f64(<2 x double> %a)
+ ret { <2 x double>, <2 x double> } %result
+}
+
+define <2 x double> @test_sincos_v2f64_only_use_sin(<2 x double> %a) {
+; CHECK-LABEL: test_sincos_v2f64_only_use_sin:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sub sp, sp, #64
+; CHECK-NEXT: stp x30, x19, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: .cfi_offset w19, -8
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: add x0, sp, #40
+; CHECK-NEXT: add x1, sp, #32
+; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: bl sincos
+; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: add x0, sp, #24
+; CHECK-NEXT: add x1, sp, #16
+; CHECK-NEXT: add x19, sp, #24
+; CHECK-NEXT: mov d0, v0.d[1]
+; CHECK-NEXT: bl sincos
+; CHECK-NEXT: ldr d0, [sp, #40]
+; CHECK-NEXT: ld1 { v0.d }[1], [x19]
+; CHECK-NEXT: ldp x30, x19, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT: add sp, sp, #64
+; CHECK-NEXT: ret
+ %result = call { <2 x double>, <2 x double> } @llvm.sincos.v2f64(<2 x double> %a)
+ %result.0 = extractvalue { <2 x double>, <2 x double> } %result, 0
+ ret <2 x double> %result.0
+}
+
+define <2 x double> @test_sincos_v2f64_only_use_cos(<2 x double> %a) {
+; CHECK-LABEL: test_sincos_v2f64_only_use_cos:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sub sp, sp, #64
+; CHECK-NEXT: stp x30, x19, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: .cfi_offset w19, -8
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: add x0, sp, #40
+; CHECK-NEXT: add x1, sp, #32
+; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: bl sincos
+; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: add x0, sp, #24
+; CHECK-NEXT: add x1, sp, #16
+; CHECK-NEXT: add x19, sp, #16
+; CHECK-NEXT: mov d0, v0.d[1]
+; CHECK-NEXT: bl sincos
+; CHECK-NEXT: ldr d0, [sp, #32]
+; CHECK-NEXT: ld1 { v0.d }[1], [x19]
+; CHECK-NEXT: ldp x30, x19, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT: add sp, sp, #64
+; CHECK-NEXT: ret
+ %result = call { <2 x double>, <2 x double> } @llvm.sincos.v2f64(<2 x double> %a)
+ %result.1 = extractvalue { <2 x double>, <2 x double> } %result, 1
+ ret <2 x double> %result.1
+}
diff --git a/llvm/test/CodeGen/ARM/llvm.sincos.ll b/llvm/test/CodeGen/ARM/llvm.sincos.ll
new file mode 100644
index 00000000000000..b08a642fd37178
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/llvm.sincos.ll
@@ -0,0 +1,464 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc -mtriple=thumbv7-gnu-linux < %s | FileCheck -check-prefixes=CHECK %s
+
+define { half, half } @test_sincos_f16(half %a) {
+; CHECK-LABEL: test_sincos_f16:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: push {r4, lr}
+; CHECK-NEXT: sub sp, #8
+; CHECK-NEXT: bl __gnu_h2f_ieee
+; CHECK-NEXT: add r1, sp, #4
+; CHECK-NEXT: mov r2, sp
+; CHECK-NEXT: bl sincosf
+; CHECK-NEXT: ldr r0, [sp, #4]
+; CHECK-NEXT: bl __gnu_f2h_ieee
+; CHECK-NEXT: mov r4, r0
+; CHECK-NEXT: ldr r0, [sp]
+; CHECK-NEXT: bl __gnu_f2h_ieee
+; CHECK-NEXT: mov r1, r0
+; CHECK-NEXT: mov r0, r4
+; CHECK-NEXT: add sp, #8
+; CHECK-NEXT: pop {r4, pc}
+ %result = call { half, half } @llvm.sincos.f16(half %a)
+ ret { half, half } %result
+}
+
+define half @test_sincos_f16_only_use_sin(half %a) {
+; CHECK-LABEL: test_sincos_f16_only_use_sin:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: sub sp, #8
+; CHECK-NEXT: bl __gnu_h2f_ieee
+; CHECK-NEXT: add r1, sp, #4
+; CHECK-NEXT: mov r2, sp
+; CHECK-NEXT: bl sincosf
+; CHECK-NEXT: ldr r0, [sp, #4]
+; CHECK-NEXT: bl __gnu_f2h_ieee
+; CHECK-NEXT: add sp, #8
+; CHECK-NEXT: pop {r7, pc}
+ %result = call { half, half } @llvm.sincos.f16(half %a)
+ %result.0 = extractvalue { half, half } %result, 0
+ ret half %result.0
+}
+
+define half @test_sincos_f16_only_use_cos(half %a) {
+; CHECK-LABEL: test_sincos_f16_only_use_cos:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: sub sp, #8
+; CHECK-NEXT: bl __gnu_h2f_ieee
+; CHECK-NEXT: add r1, sp, #4
+; CHECK-NEXT: mov r2, sp
+; CHECK-NEXT: bl sincosf
+; CHECK-NEXT: ldr r0, [sp]
+; CHECK-NEXT: bl __gnu_f2h_ieee
+; CHECK-NEXT: add sp, #8
+; CHECK-NEXT: pop {r7, pc}
+ %result = call { half, half } @llvm.sincos.f16(half %a)
+ %result.1 = extractvalue { half, half } %result, 1
+ ret half %result.1
+}
+
+define { <2 x half>, <2 x half> } @test_sincos_v2f16(<2 x half> %a) {
+; CHECK-LABEL: test_sincos_v2f16:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: push {r4, lr}
+; CHECK-NEXT: vpush {d8}
+; CHECK-NEXT: sub sp, #24
+; CHECK-NEXT: mov r4, r0
+; CHECK-NEXT: mov r0, r1
+; CHECK-NEXT: bl __gnu_h2f_ieee
+; CHECK-NEXT: add r1, sp, #12
+; CHECK-NEXT: add r2, sp, #8
+; CHECK-NEXT: bl sincosf
+; CHECK-NEXT: mov r0, r4
+; CHECK-NEXT: bl __gnu_h2f_ieee
+; CHECK-NEXT: add r1, sp, #4
+; CHECK-NEXT: mov r2, sp
+; CHECK-NEXT: bl sincosf
+; CHECK-NEXT: ldr r0, [sp, #12]
+; CHECK-NEXT: bl __gnu_f2h_ieee
+; CHECK-NEXT: ldr r1, [sp, #4]
+; CHECK-NEXT: strh.w r0, [sp, #22]
+; CHECK-NEXT: mov r0, r1
+; CHECK-NEXT: bl __gnu_f2h_ieee
+; CHECK-NEXT: strh.w r0, [sp, #20]
+; CHECK-NEXT: add r0, sp, #20
+; CHECK-NEXT: vld1.32 {d8[0]}, [r0:32]
+; CHECK-NEXT: ldr r0, [sp, #8]
+; CHECK-NEXT: bl __gnu_f2h_ieee
+; CHECK-NEXT: ldr r1, [sp]
+; CHECK-NEXT: strh.w r0, [sp, #18]
+; CHECK-NEXT: mov r0, r1
+; CHECK-NEXT: bl __gnu_f2h_ieee
+; CHECK-NEXT: strh.w r0, [sp, #16]
+; CHECK-NEXT: add r0, sp, #16
+; CHECK-NEXT: vmovl.u16 q9, d8
+; CHECK-NEXT: vld1.32 {d16[0]}, [r0:32]
+; CHECK-NEXT: vmovl.u16 q8, d16
+; CHECK-NEXT: vmov.32 r0, d18[0]
+; CHECK-NEXT: vmov.32 r1, d18[1]
+; CHECK-NEXT: vmov.32 r2, d16[0]
+; CHECK-NEXT: vmov.32 r3, d16[1]
+; CHECK-NEXT: add sp, #24
+; CHECK-NEXT: vpop {d8}
+; CHECK-NEXT: pop {r4, pc}
+ %result = call { <2 x half>, <2 x half> } @llvm.sincos.v2f16(<2 x half> %a)
+ ret { <2 x half>, <2 x half> } %result
+}
+
+define <2 x half> @test_sincos_v2f16_only_use_sin(<2 x half> %a) {
+; CHECK-LABEL: test_sincos_v2f16_only_use_sin:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: push {r4, lr}
+; CHECK-NEXT: sub sp, #24
+; CHECK-NEXT: mov r4, r0
+; CHECK-NEXT: mov r0, r1
+; CHECK-NEXT: bl __gnu_h2f_ieee
+; CHECK-NEXT: add r1, sp, #16
+; CHECK-NEXT: add r2, sp, #12
+; CHECK-NEXT: bl sincosf
+; CHECK-NEXT: mov r0, r4
+; CHECK-NEXT: bl __gnu_h2f_ieee
+; CHECK-NEXT: add r1, sp, #8
+; CHECK-NEXT: add r2, sp, #4
+; CHECK-NEXT: bl sincosf
+; CHECK-NEXT: ldr r0, [sp, #16]
+; CHECK-NEXT: bl __gnu_f2h_ieee
+; CHECK-NEXT: ldr r1, [sp, #8]
+; CHECK-NEXT: strh.w r0, [sp, #22]
+; CHECK-NEXT: mov r0, r1
+; CHECK-NEXT: bl __gnu_f2h_ieee
+; CHECK-NEXT: strh.w r0, [sp, #20]
+; CHECK-NEXT: add r0, sp, #20
+; CHECK-NEXT: vld1.32 {d16[0]}, [r0:32]
+; CHECK-NEXT: vmovl.u16 q8, d16
+; CHECK-NEXT: vmov.32 r0, d16[0]
+; CHECK-NEXT: vmov.32 r1, d16[1]
+; CHECK-NEXT: add sp, #24
+; CHECK-NEXT: pop {r4, pc}
+ %result = call { <2 x half>, <2 x half> } @llvm.sincos.v2f16(<2 x half> %a)
+ %result.0 = extractvalue { <2 x half>, <2 x half> } %result, 0
+ ret <2 x half> %result.0
+}
+
+define <2 x half> @test_sincos_v2f16_only_use_cos(<2 x half> %a) {
+; CHECK-LABEL: test_sincos_v2f16_only_use_cos:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: push {r4, lr}
+; CHECK-NEXT: sub sp, #24
+; CHECK-NEXT: mov r4, r0
+; CHECK-NEXT: mov r0, r1
+; CHECK-NEXT: bl __gnu_h2f_ieee
+; CHECK-NEXT: add r1, sp, #16
+; CHECK-NEXT: add r2, sp, #12
+; CHECK-NEXT: bl sincosf
+; CHECK-NEXT: mov r0, r4
+; CHECK-NEXT: bl __gnu_h2f_ieee
+; CHECK-NEXT: add r1, sp, #8
+; CHECK-NEXT: add r2, sp, #4
+; CHECK-NEXT: bl sincosf
+; CHECK-NEXT: ldr r0, [sp, #12]
+; CHECK-NEXT: bl __gnu_f2h_ieee
+; CHECK-NEXT: ldr r1, [sp, #4]
+; CHECK-NEXT: strh.w r0, [sp, #22]
+; CHECK-NEXT: mov r0, r1
+; CHECK-NEXT: bl __gnu_f2h_ieee
+; CHECK-NEXT: strh.w r0, [sp, #20]
+; CHECK-NEXT: add r0, sp, #20
+; CHECK-NEXT: vld1.32 {d16[0]}, [r0:32]
+; CHECK-NEXT: vmovl.u16 q8, d16
+; CHECK-NEXT: vmov.32 r0, d16[0]
+; CHECK-NEXT: vmov.32 r1, d16[1]
+; CHECK-NEXT: add sp, #24
+; CHECK-NEXT: pop {r4, pc}
+ %result = call { <2 x half>, <2 x half> } @llvm.sincos.v2f16(<2 x half> %a)
+ %result.1 = extractvalue { <2 x half>, <2 x half> } %result, 1
+ ret <2 x half> %result.1
+}
+
+define { float, float } @test_sincos_f32(float %a) {
+; CHECK-LABEL: test_sincos_f32:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: sub sp, #8
+; CHECK-NEXT: add r1, sp, #4
+; CHECK-NEXT: mov r2, sp
+; CHECK-NEXT: bl sincosf
+; CHECK-NEXT: ldrd r1, r0, [sp], #8
+; CHECK-NEXT: pop {r7, pc}
+ %result = call { float, float } @llvm.sincos.f32(float %a)
+ ret { float, float } %result
+}
+
+define float @test_sincos_f32_only_use_sin(float %a) {
+; CHECK-LABEL: test_sincos_f32_only_use_sin:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: sub sp, #8
+; CHECK-NEXT: add r1, sp, #4
+; CHECK-NEXT: mov r2, sp
+; CHECK-NEXT: bl sincosf
+; CHECK-NEXT: ldr r0, [sp, #4]
+; CHECK-NEXT: add sp, #8
+; CHECK-NEXT: pop {r7, pc}
+ %result = call { float, float } @llvm.sincos.f32(float %a)
+ %result.0 = extractvalue { float, float } %result, 0
+ ret float %result.0
+}
+
+define float @test_sincos_f32_only_use_cos(float %a) {
+; CHECK-LABEL: test_sincos_f32_only_use_cos:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: sub sp, #8
+; CHECK-NEXT: add r1, sp, #4
+; CHECK-NEXT: mov r2, sp
+; CHECK-NEXT: bl sincosf
+; CHECK-NEXT: ldr r0, [sp], #8
+; CHECK-NEXT: pop {r7, pc}
+ %result = call { float, float } @llvm.sincos.f32(float %a)
+ %result.1 = extractvalue { float, float } %result, 1
+ ret float %result.1
+}
+
+define { <2 x float>, <2 x float> } @test_sincos_v2f32(<2 x float> %a) {
+; CHECK-LABEL: test_sincos_v2f32:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: vpush {d8}
+; CHECK-NEXT: sub sp, #16
+; CHECK-NEXT: vmov d8, r0, r1
+; CHECK-NEXT: add r1, sp, #4
+; CHECK-NEXT: mov r2, sp
+; CHECK-NEXT: vmov r0, s17
+; CHECK-NEXT: bl sincosf
+; CHECK-NEXT: vmov r0, s16
+; CHECK-NEXT: add r1, sp, #12
+; CHECK-NEXT: add r2, sp, #8
+; CHECK-NEXT: bl sincosf
+; CHECK-NEXT: vldr s1, [sp, #4]
+; CHECK-NEXT: vldr s3, [sp]
+; CHECK-NEXT: vldr s0, [sp, #12]
+; CHECK-NEXT: vldr s2, [sp, #8]
+; CHECK-NEXT: vmov r0, r1, d0
+; CHECK-NEXT: vmov r2, r3, d1
+; CHECK-NEXT: add sp, #16
+; CHECK-NEXT: vpop {d8}
+; CHECK-NEXT: pop {r7, pc}
+ %result = call { <2 x float>, <2 x float> } @llvm.sincos.v2f32(<2 x float> %a)
+ ret { <2 x float>, <2 x float> } %result
+}
+
+define <2 x float> @test_sincos_v2f32_only_use_sin(<2 x float> %a) {
+; CHECK-LABEL: test_sincos_v2f32_only_use_sin:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: vpush {d8}
+; CHECK-NEXT: sub sp, #16
+; CHECK-NEXT: vmov d8, r0, r1
+; CHECK-NEXT: add r1, sp, #4
+; CHECK-NEXT: mov r2, sp
+; CHECK-NEXT: vmov r0, s17
+; CHECK-NEXT: bl sincosf
+; CHECK-NEXT: vmov r0, s16
+; CHECK-NEXT: add r1, sp, #12
+; CHECK-NEXT: add r2, sp, #8
+; CHECK-NEXT: bl sincosf
+; CHECK-NEXT: vldr s1, [sp, #4]
+; CHECK-NEXT: vldr s0, [sp, #12]
+; CHECK-NEXT: vmov r0, r1, d0
+; CHECK-NEXT: add sp, #16
+; CHECK-NEXT: vpop {d8}
+; CHECK-NEXT: pop {r7, pc}
+ %result = call { <2 x float>, <2 x float> } @llvm.sincos.v2f32(<2 x float> %a)
+ %result.0 = extractvalue { <2 x float>, <2 x float> } %result, 0
+ ret <2 x float> %result.0
+}
+
+define <2 x float> @test_sincos_v2f32_only_use_cos(<2 x float> %a) {
+; CHECK-LABEL: test_sincos_v2f32_only_use_cos:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: vpush {d8}
+; CHECK-NEXT: sub sp, #16
+; CHECK-NEXT: vmov d8, r0, r1
+; CHECK-NEXT: add r1, sp, #4
+; CHECK-NEXT: mov r2, sp
+; CHECK-NEXT: vmov r0, s17
+; CHECK-NEXT: bl sincosf
+; CHECK-NEXT: vmov r0, s16
+; CHECK-NEXT: add r1, sp, #12
+; CHECK-NEXT: add r2, sp, #8
+; CHECK-NEXT: bl sincosf
+; CHECK-NEXT: vldr s1, [sp]
+; CHECK-NEXT: vldr s0, [sp, #8]
+; CHECK-NEXT: vmov r0, r1, d0
+; CHECK-NEXT: add sp, #16
+; CHECK-NEXT: vpop {d8}
+; CHECK-NEXT: pop {r7, pc}
+ %result = call { <2 x float>, <2 x float> } @llvm.sincos.v2f32(<2 x float> %a)
+ %result.1 = extractvalue { <2 x float>, <2 x float> } %result, 1
+ ret <2 x float> %result.1
+}
+
+define { double, double } @test_sincos_f64(double %a) {
+; CHECK-LABEL: test_sincos_f64:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: sub sp, #16
+; CHECK-NEXT: add r2, sp, #8
+; CHECK-NEXT: mov r3, sp
+; CHECK-NEXT: bl sincos
+; CHECK-NEXT: ldrd r0, r1, [sp, #8]
+; CHECK-NEXT: ldrd r2, r3, [sp], #16
+; CHECK-NEXT: pop {r7, pc}
+ %result = call { double, double } @llvm.sincos.f64(double %a)
+ ret { double, double } %result
+}
+
+define double @test_sincos_f64_only_use_sin(double %a) {
+; CHECK-LABEL: test_sincos_f64_only_use_sin:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: sub sp, #16
+; CHECK-NEXT: add r2, sp, #8
+; CHECK-NEXT: mov r3, sp
+; CHECK-NEXT: bl sincos
+; CHECK-NEXT: ldrd r0, r1, [sp, #8]
+; CHECK-NEXT: add sp, #16
+; CHECK-NEXT: pop {r7, pc}
+ %result = call { double, double } @llvm.sincos.f64(double %a)
+ %result.0 = extractvalue { double, double } %result, 0
+ ret double %result.0
+}
+
+define double @test_sincos_f64_only_use_cos(double %a) {
+; CHECK-LABEL: test_sincos_f64_only_use_cos:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: sub sp, #16
+; CHECK-NEXT: add r2, sp, #8
+; CHECK-NEXT: mov r3, sp
+; CHECK-NEXT: bl sincos
+; CHECK-NEXT: ldrd r0, r1, [sp], #16
+; CHECK-NEXT: pop {r7, pc}
+ %result = call { double, double } @llvm.sincos.f64(double %a)
+ %result.1 = extractvalue { double, double } %result, 1
+ ret double %result.1
+}
+
+define { <2 x double>, <2 x double> } @test_sincos_v2f64(<2 x double> %a) {
+; CHECK-LABEL: test_sincos_v2f64:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: push {r4, lr}
+; CHECK-NEXT: sub sp, #32
+; CHECK-NEXT: mov r1, r3
+; CHECK-NEXT: mov r12, r2
+; CHECK-NEXT: add r2, sp, #24
+; CHECK-NEXT: add r3, sp, #16
+; CHECK-NEXT: mov r4, r0
+; CHECK-NEXT: mov r0, r12
+; CHECK-NEXT: bl sincos
+; CHECK-NEXT: ldrd r0, r1, [sp, #40]
+; CHECK-NEXT: add r2, sp, #8
+; CHECK-NEXT: mov r3, sp
+; CHECK-NEXT: bl sincos
+; CHECK-NEXT: vldr d19, [sp, #8]
+; CHECK-NEXT: vldr d18, [sp, #24]
+; CHECK-NEXT: vldr d17, [sp]
+; CHECK-NEXT: vldr d16, [sp, #16]
+; CHECK-NEXT: vst1.64 {d18, d19}, [r4]!
+; CHECK-NEXT: vst1.64 {d16, d17}, [r4]
+; CHECK-NEXT: add sp, #32
+; CHECK-NEXT: pop {r4, pc}
+ %result = call { <2 x double>, <2 x double> } @llvm.sincos.v2f64(<2 x double> %a)
+ ret { <2 x double>, <2 x double> } %result
+}
+
+define <2 x double> @test_sincos_v2f64_only_use_sin(<2 x double> %a) {
+; CHECK-LABEL: test_sincos_v2f64_only_use_sin:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: push {r4, r5, r6, r7, lr}
+; CHECK-NEXT: sub sp, #36
+; CHECK-NEXT: mov r6, r3
+; CHECK-NEXT: mov r4, r2
+; CHECK-NEXT: add r2, sp, #24
+; CHECK-NEXT: add r3, sp, #16
+; CHECK-NEXT: mov r7, r1
+; CHECK-NEXT: mov r5, r0
+; CHECK-NEXT: mov r0, r4
+; CHECK-NEXT: mov r1, r6
+; CHECK-NEXT: bl sincos
+; CHECK-NEXT: add r2, sp, #8
+; CHECK-NEXT: mov r3, sp
+; CHECK-NEXT: mov r0, r5
+; CHECK-NEXT: mov r1, r7
+; CHECK-NEXT: bl sincos
+; CHECK-NEXT: vldr d16, [sp, #24]
+; CHECK-NEXT: ldrd r0, r1, [sp, #8]
+; CHECK-NEXT: vmov r2, r3, d16
+; CHECK-NEXT: add sp, #36
+; CHECK-NEXT: pop {r4, r5, r6, r7, pc}
+ %result = call { <2 x double>, <2 x double> } @llvm.sincos.v2f64(<2 x double> %a)
+ %result.0 = extractvalue { <2 x double>, <2 x double> } %result, 0
+ ret <2 x double> %result.0
+}
+
+define <2 x double> @test_sincos_v2f64_only_use_cos(<2 x double> %a) {
+; CHECK-LABEL: test_sincos_v2f64_only_use_cos:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: push {r4, r5, r6, r7, lr}
+; CHECK-NEXT: sub sp, #36
+; CHECK-NEXT: mov r6, r3
+; CHECK-NEXT: mov r4, r2
+; CHECK-NEXT: add r2, sp, #24
+; CHECK-NEXT: add r3, sp, #16
+; CHECK-NEXT: mov r7, r1
+; CHECK-NEXT: mov r5, r0
+; CHECK-NEXT: mov r0, r4
+; CHECK-NEXT: mov r1, r6
+; CHECK-NEXT: bl sincos
+; CHECK-NEXT: add r2, sp, #8
+; CHECK-NEXT: mov r3, sp
+; CHECK-NEXT: mov r0, r5
+; CHECK-NEXT: mov r1, r7
+; CHECK-NEXT: bl sincos
+; CHECK-NEXT: vldr d16, [sp, #16]
+; CHECK-NEXT: vmov r2, r3, d16
+; CHECK-NEXT: ldrd r0, r1, [sp], #36
+; CHECK-NEXT: pop {r4, r5, r6, r7, pc}
+ %result = call { <2 x double>, <2 x double> } @llvm.sincos.v2f64(<2 x double> %a)
+ %result.1 = extractvalue { <2 x double>, <2 x double> } %result, 1
+ ret <2 x double> %result.1
+}
+
+define { fp128, fp128 } @test_sincos_f128(fp128 %a) {
+; CHECK-LABEL: test_sincos_f128:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: push {r4, r5, r7, lr}
+; CHECK-NEXT: sub sp, #40
+; CHECK-NEXT: mov r12, r3
+; CHECK-NEXT: ldr r3, [sp, #56]
+; CHECK-NEXT: add.w lr, sp, #8
+; CHECK-NEXT: mov r4, r0
+; CHECK-NEXT: add r0, sp, #24
+; CHECK-NEXT: strd r0, lr, [sp]
+; CHECK-NEXT: mov r0, r1
+; CHECK-NEXT: mov r1, r2
+; CHECK-NEXT: mov r2, r12
+; CHECK-NEXT: bl sincosl
+; CHECK-NEXT: ldrd r2, r3, [sp, #16]
+; CHECK-NEXT: ldrd r12, r1, [sp, #8]
+; CHECK-NEXT: str r3, [r4, #28]
+; CHECK-NEXT: ldrd r3, r5, [sp, #32]
+; CHECK-NEXT: ldrd lr, r0, [sp, #24]
+; CHECK-NEXT: strd r1, r2, [r4, #20]
+; CHECK-NEXT: add.w r1, r4, #8
+; CHECK-NEXT: stm.w r1, {r3, r5, r12}
+; CHECK-NEXT: strd lr, r0, [r4]
+; CHECK-NEXT: add sp, #40
+; CHECK-NEXT: pop {r4, r5, r7, pc}
+ %result = call { fp128, fp128 } @llvm.sincos.f16(fp128 %a)
+ ret { fp128, fp128 } %result
+}
>From 0f22f684f4e7c5825d3f44c85654597ec8605b7b Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Wed, 25 Sep 2024 11:48:57 +0000
Subject: [PATCH 2/7] Fixups
---
llvm/docs/GlobalISel/GenericOpcode.rst | 4 +-
llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 4 +-
.../AArch64/GlobalISel/irtranslator-sincos.ll | 106 ++++++++++++++++++
3 files changed, 110 insertions(+), 4 deletions(-)
create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-sincos.ll
diff --git a/llvm/docs/GlobalISel/GenericOpcode.rst b/llvm/docs/GlobalISel/GenericOpcode.rst
index 1c4e00b956bc4f..8920530dc3f1a1 100644
--- a/llvm/docs/GlobalISel/GenericOpcode.rst
+++ b/llvm/docs/GlobalISel/GenericOpcode.rst
@@ -633,8 +633,8 @@ G_FCEIL, G_FSQRT, G_FFLOOR, G_FRINT, G_FNEARBYINT
These correspond to the standard C functions of the same name.
-G_FCOS, G_FSIN, G_FTAN, G_FACOS, G_FASIN, G_FATAN, G_FATAN2, G_FCOSH, G_FSINH, G_FTANH
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+G_FCOS, G_FSIN, G_FSINCOS, G_FTAN, G_FACOS, G_FASIN, G_FATAN, G_FATAN2, G_FCOSH, G_FSINH, G_FTANH
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
These correspond to the standard C trigonometry functions of the same name.
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 3534c1ca941a9e..673a3b7affc53a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -5589,11 +5589,11 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
case ISD::FSINCOS: {
Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0));
Tmp2 = DAG.getNode(ISD::FSINCOS, dl, {NVT, NVT}, Tmp1);
+ Tmp3 = DAG.getIntPtrConstant(0, dl, /*isTarget=*/true);
for (unsigned ResNum = 0; ResNum < Node->getNumValues(); ResNum++)
Results.push_back(
- DAG.getNode(ISD::FP_ROUND, dl, OVT, Tmp2.getValue(ResNum),
- DAG.getIntPtrConstant(0, dl, /*isTarget=*/true)));
+ DAG.getNode(ISD::FP_ROUND, dl, OVT, Tmp2.getValue(ResNum), Tmp3));
break;
}
case ISD::FFLOOR:
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-sincos.ll b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-sincos.ll
new file mode 100644
index 00000000000000..3eeddae35b62b4
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-sincos.ll
@@ -0,0 +1,106 @@
+; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+; RUN: llc -O0 -mtriple=aarch64-linux-gnu -global-isel -stop-after=irtranslator %s -o - | FileCheck %s
+
+define { half, half } @test_sincos_f16(half %a) {
+ ; CHECK-LABEL: name: test_sincos_f16
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK-NEXT: liveins: $h0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s16) = COPY $h0
+ ; CHECK-NEXT: [[FSINCOS:%[0-9]+]]:_(s16), [[FSINCOS1:%[0-9]+]]:_ = G_FSINCOS [[COPY]]
+ ; CHECK-NEXT: $h0 = COPY [[FSINCOS]](s16)
+ ; CHECK-NEXT: $h1 = COPY [[FSINCOS1]](s16)
+ ; CHECK-NEXT: RET_ReallyLR implicit $h0, implicit $h1
+ %result = call { half, half } @llvm.sincos.f16(half %a)
+ ret { half, half } %result
+}
+
+define { <2 x half>, <2 x half> } @test_sincos_v2f16(<2 x half> %a) {
+ ; CHECK-LABEL: name: test_sincos_v2f16
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK-NEXT: liveins: $d0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $d0
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>)
+ ; CHECK-NEXT: [[FSINCOS:%[0-9]+]]:_(<2 x s16>), [[FSINCOS1:%[0-9]+]]:_ = G_FSINCOS [[UV]]
+ ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[FSINCOS]](<2 x s16>)
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[UV2]](s16), [[UV3]](s16), [[DEF]](s16), [[DEF]](s16)
+ ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[FSINCOS1]](<2 x s16>)
+ ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[UV4]](s16), [[UV5]](s16), [[DEF]](s16), [[DEF]](s16)
+ ; CHECK-NEXT: $d0 = COPY [[BUILD_VECTOR]](<4 x s16>)
+ ; CHECK-NEXT: $d1 = COPY [[BUILD_VECTOR1]](<4 x s16>)
+ ; CHECK-NEXT: RET_ReallyLR implicit $d0, implicit $d1
+ %result = call { <2 x half>, <2 x half> } @llvm.sincos.v2f16(<2 x half> %a)
+ ret { <2 x half>, <2 x half> } %result
+}
+
+define { float, float } @test_sincos_f32(float %a) {
+ ; CHECK-LABEL: name: test_sincos_f32
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK-NEXT: liveins: $s0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $s0
+ ; CHECK-NEXT: [[FSINCOS:%[0-9]+]]:_(s32), [[FSINCOS1:%[0-9]+]]:_ = G_FSINCOS [[COPY]]
+ ; CHECK-NEXT: $s0 = COPY [[FSINCOS]](s32)
+ ; CHECK-NEXT: $s1 = COPY [[FSINCOS1]](s32)
+ ; CHECK-NEXT: RET_ReallyLR implicit $s0, implicit $s1
+ %result = call { float, float } @llvm.sincos.f32(float %a)
+ ret { float, float } %result
+}
+
+define { <2 x float>, <2 x float> } @test_sincos_v2f32(<2 x float> %a) {
+ ; CHECK-LABEL: name: test_sincos_v2f32
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK-NEXT: liveins: $d0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0
+ ; CHECK-NEXT: [[FSINCOS:%[0-9]+]]:_(<2 x s32>), [[FSINCOS1:%[0-9]+]]:_ = G_FSINCOS [[COPY]]
+ ; CHECK-NEXT: $d0 = COPY [[FSINCOS]](<2 x s32>)
+ ; CHECK-NEXT: $d1 = COPY [[FSINCOS1]](<2 x s32>)
+ ; CHECK-NEXT: RET_ReallyLR implicit $d0, implicit $d1
+ %result = call { <2 x float>, <2 x float> } @llvm.sincos.v2f32(<2 x float> %a)
+ ret { <2 x float>, <2 x float> } %result
+}
+
+define { double, double } @test_sincos_f64(double %a) {
+ ; CHECK-LABEL: name: test_sincos_f64
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK-NEXT: liveins: $d0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $d0
+ ; CHECK-NEXT: [[FSINCOS:%[0-9]+]]:_(s64), [[FSINCOS1:%[0-9]+]]:_ = G_FSINCOS [[COPY]]
+ ; CHECK-NEXT: $d0 = COPY [[FSINCOS]](s64)
+ ; CHECK-NEXT: $d1 = COPY [[FSINCOS1]](s64)
+ ; CHECK-NEXT: RET_ReallyLR implicit $d0, implicit $d1
+ %result = call { double, double } @llvm.sincos.f64(double %a)
+ ret { double, double } %result
+}
+
+define { <2 x double>, <2 x double> } @test_sincos_v2f64(<2 x double> %a) {
+ ; CHECK-LABEL: name: test_sincos_v2f64
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK-NEXT: liveins: $q0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
+ ; CHECK-NEXT: [[FSINCOS:%[0-9]+]]:_(<2 x s64>), [[FSINCOS1:%[0-9]+]]:_ = G_FSINCOS [[COPY]]
+ ; CHECK-NEXT: $q0 = COPY [[FSINCOS]](<2 x s64>)
+ ; CHECK-NEXT: $q1 = COPY [[FSINCOS1]](<2 x s64>)
+ ; CHECK-NEXT: RET_ReallyLR implicit $q0, implicit $q1
+ %result = call { <2 x double>, <2 x double> } @llvm.sincos.v2f64(<2 x double> %a)
+ ret { <2 x double>, <2 x double> } %result
+}
+
+define { fp128, fp128 } @test_sincos_f128(fp128 %a) {
+ ; CHECK-LABEL: name: test_sincos_f128
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK-NEXT: liveins: $q0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $q0
+ ; CHECK-NEXT: [[FSINCOS:%[0-9]+]]:_(s128), [[FSINCOS1:%[0-9]+]]:_ = G_FSINCOS [[COPY]]
+ ; CHECK-NEXT: $q0 = COPY [[FSINCOS]](s128)
+ ; CHECK-NEXT: $q1 = COPY [[FSINCOS1]](s128)
+ ; CHECK-NEXT: RET_ReallyLR implicit $q0, implicit $q1
+ %result = call { fp128, fp128 } @llvm.sincos.f16(fp128 %a)
+ ret { fp128, fp128 } %result
+}
>From 6a120eb2d4ac3c8f93ebd9b4dca3defde67453d4 Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Wed, 2 Oct 2024 09:21:00 +0000
Subject: [PATCH 3/7] Add note about afn
---
llvm/docs/LangRef.rst | 3 +++
1 file changed, 3 insertions(+)
diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index 7dfa394c8b81b4..268905881128f2 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -15927,6 +15927,9 @@ and :ref:`llvm.cos <t_llvm_cos>` on the argument.
The first result is the sine of the argument and the second result is the cosine
of the argument.
+When specified with the fast-math-flag 'afn', the result may be approximated
+using a less accurate calculation.
+
'``llvm.pow.*``' Intrinsic
^^^^^^^^^^^^^^^^^^^^^^^^^^
>From 2f036f2038dc203cffcce6206cf346be0f6a50fe Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Fri, 11 Oct 2024 16:56:36 +0000
Subject: [PATCH 4/7] Fixups
- Flag propagation
- Legalize to individual sin/cos calls when FSINCOS unavailable
- More tests
---
llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 15 +-
.../SelectionDAG/SelectionDAGBuilder.cpp | 3 +-
.../AArch64/GlobalISel/irtranslator-sincos.ll | 14 +
llvm/test/CodeGen/AArch64/llvm.sincos.ll | 420 ++++++++++++++++++
4 files changed, 449 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 673a3b7affc53a..47a9ae12248ccb 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -3714,6 +3714,17 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
}
break;
}
+ case ISD::FSINCOS: {
+ if (isSinCosLibcallAvailable(Node, TLI))
+ break;
+ EVT VT = Node->getValueType(0);
+ SDValue Op = Node->getOperand(0);
+ SDNodeFlags Flags = Node->getFlags();
+ Tmp1 = DAG.getNode(ISD::FSIN, dl, VT, Op, Flags);
+ Tmp2 = DAG.getNode(ISD::FCOS, dl, VT, Op, Flags);
+ Results.append({Tmp1, Tmp2});
+ break;
+ }
case ISD::FMAD:
llvm_unreachable("Illegal fmad should never be formed");
@@ -5588,9 +5599,9 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
}
case ISD::FSINCOS: {
Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0));
- Tmp2 = DAG.getNode(ISD::FSINCOS, dl, {NVT, NVT}, Tmp1);
+ Tmp2 = DAG.getNode(ISD::FSINCOS, dl, DAG.getVTList(NVT, NVT), Tmp1,
+ Node->getFlags());
Tmp3 = DAG.getIntPtrConstant(0, dl, /*isTarget=*/true);
-
for (unsigned ResNum = 0; ResNum < Node->getNumValues(); ResNum++)
Results.push_back(
DAG.getNode(ISD::FP_ROUND, dl, OVT, Tmp2.getValue(ResNum), Tmp3));
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 8affa0eaaca788..203e80e36b46d9 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -6952,7 +6952,8 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
SmallVector<EVT, 2> ValueVTs;
ComputeValueVTs(TLI, DAG.getDataLayout(), I.getType(), ValueVTs);
SDVTList VTs = DAG.getVTList(ValueVTs);
- setValue(&I, DAG.getNode(Opcode, sdl, VTs, getValue(I.getArgOperand(0))));
+ setValue(
+ &I, DAG.getNode(Opcode, sdl, VTs, getValue(I.getArgOperand(0)), Flags));
return;
}
case Intrinsic::arithmetic_fence: {
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-sincos.ll b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-sincos.ll
index 3eeddae35b62b4..69cd6ce87b5c6b 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-sincos.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-sincos.ll
@@ -104,3 +104,17 @@ define { fp128, fp128 } @test_sincos_f128(fp128 %a) {
%result = call { fp128, fp128 } @llvm.sincos.f16(fp128 %a)
ret { fp128, fp128 } %result
}
+
+define { float, float } @test_sincos_f32_afn(float %a) {
+ ; CHECK-LABEL: name: test_sincos_f32_afn
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK-NEXT: liveins: $s0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $s0
+ ; CHECK-NEXT: [[FSINCOS:%[0-9]+]]:_(s32), [[FSINCOS1:%[0-9]+]]:_ = afn G_FSINCOS [[COPY]]
+ ; CHECK-NEXT: $s0 = COPY [[FSINCOS]](s32)
+ ; CHECK-NEXT: $s1 = COPY [[FSINCOS1]](s32)
+ ; CHECK-NEXT: RET_ReallyLR implicit $s0, implicit $s1
+ %result = call afn { float, float } @llvm.sincos.f32(float %a)
+ ret { float, float } %result
+}
diff --git a/llvm/test/CodeGen/AArch64/llvm.sincos.ll b/llvm/test/CodeGen/AArch64/llvm.sincos.ll
index 841b0b2d665345..2e3c02877dfcfa 100644
--- a/llvm/test/CodeGen/AArch64/llvm.sincos.ll
+++ b/llvm/test/CodeGen/AArch64/llvm.sincos.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
; RUN: llc -mtriple=aarch64-gnu-linux < %s | FileCheck -check-prefixes=CHECK %s
+; RUN: llc -mtriple=aarch64-none-linux < %s | FileCheck -check-prefixes=NO-LIBCALL %s
define { half, half } @test_sincos_f16(half %a) {
; CHECK-LABEL: test_sincos_f16:
@@ -16,6 +17,27 @@ define { half, half } @test_sincos_f16(half %a) {
; CHECK-NEXT: fcvt h1, s1
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
+;
+; NO-LIBCALL-LABEL: test_sincos_f16:
+; NO-LIBCALL: // %bb.0:
+; NO-LIBCALL-NEXT: stp d9, d8, [sp, #-32]! // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
+; NO-LIBCALL-NEXT: .cfi_def_cfa_offset 32
+; NO-LIBCALL-NEXT: .cfi_offset w30, -16
+; NO-LIBCALL-NEXT: .cfi_offset b8, -24
+; NO-LIBCALL-NEXT: .cfi_offset b9, -32
+; NO-LIBCALL-NEXT: fcvt s8, h0
+; NO-LIBCALL-NEXT: fmov s0, s8
+; NO-LIBCALL-NEXT: bl sinf
+; NO-LIBCALL-NEXT: fcvt h9, s0
+; NO-LIBCALL-NEXT: fmov s0, s8
+; NO-LIBCALL-NEXT: bl cosf
+; NO-LIBCALL-NEXT: fmov s1, s0
+; NO-LIBCALL-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
+; NO-LIBCALL-NEXT: fmov s0, s9
+; NO-LIBCALL-NEXT: fcvt h1, s1
+; NO-LIBCALL-NEXT: ldp d9, d8, [sp], #32 // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: ret
%result = call { half, half } @llvm.sincos.f16(half %a)
ret { half, half } %result
}
@@ -34,6 +56,17 @@ define half @test_sincos_f16_only_use_sin(half %a) {
; CHECK-NEXT: fcvt h0, s0
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
+;
+; NO-LIBCALL-LABEL: test_sincos_f16_only_use_sin:
+; NO-LIBCALL: // %bb.0:
+; NO-LIBCALL-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; NO-LIBCALL-NEXT: .cfi_def_cfa_offset 16
+; NO-LIBCALL-NEXT: .cfi_offset w30, -16
+; NO-LIBCALL-NEXT: fcvt s0, h0
+; NO-LIBCALL-NEXT: bl sinf
+; NO-LIBCALL-NEXT: fcvt h0, s0
+; NO-LIBCALL-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; NO-LIBCALL-NEXT: ret
%result = call { half, half } @llvm.sincos.f16(half %a)
%result.0 = extractvalue { half, half } %result, 0
ret half %result.0
@@ -53,6 +86,17 @@ define half @test_sincos_f16_only_use_cos(half %a) {
; CHECK-NEXT: fcvt h0, s0
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
+;
+; NO-LIBCALL-LABEL: test_sincos_f16_only_use_cos:
+; NO-LIBCALL: // %bb.0:
+; NO-LIBCALL-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; NO-LIBCALL-NEXT: .cfi_def_cfa_offset 16
+; NO-LIBCALL-NEXT: .cfi_offset w30, -16
+; NO-LIBCALL-NEXT: fcvt s0, h0
+; NO-LIBCALL-NEXT: bl cosf
+; NO-LIBCALL-NEXT: fcvt h0, s0
+; NO-LIBCALL-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; NO-LIBCALL-NEXT: ret
%result = call { half, half } @llvm.sincos.f16(half %a)
%result.1 = extractvalue { half, half } %result, 1
ret half %result.1
@@ -112,6 +156,83 @@ define { <2 x half>, <2 x half> } @test_sincos_v2f16(<2 x half> %a) {
; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q1
; CHECK-NEXT: add sp, sp, #64
; CHECK-NEXT: ret
+;
+; NO-LIBCALL-LABEL: test_sincos_v2f16:
+; NO-LIBCALL: // %bb.0:
+; NO-LIBCALL-NEXT: sub sp, sp, #80
+; NO-LIBCALL-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: str x30, [sp, #64] // 8-byte Folded Spill
+; NO-LIBCALL-NEXT: .cfi_def_cfa_offset 80
+; NO-LIBCALL-NEXT: .cfi_offset w30, -16
+; NO-LIBCALL-NEXT: .cfi_offset b8, -24
+; NO-LIBCALL-NEXT: .cfi_offset b9, -32
+; NO-LIBCALL-NEXT: .cfi_offset b10, -40
+; NO-LIBCALL-NEXT: .cfi_offset b11, -48
+; NO-LIBCALL-NEXT: // kill: def $d0 killed $d0 def $q0
+; NO-LIBCALL-NEXT: mov h1, v0.h[1]
+; NO-LIBCALL-NEXT: str q0, [sp] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: fcvt s8, h1
+; NO-LIBCALL-NEXT: fmov s0, s8
+; NO-LIBCALL-NEXT: bl sinf
+; NO-LIBCALL-NEXT: ldr q1, [sp] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: fcvt h0, s0
+; NO-LIBCALL-NEXT: fcvt s9, h1
+; NO-LIBCALL-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: fmov s0, s9
+; NO-LIBCALL-NEXT: bl sinf
+; NO-LIBCALL-NEXT: ldr q1, [sp] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: fcvt h0, s0
+; NO-LIBCALL-NEXT: mov h1, v1.h[2]
+; NO-LIBCALL-NEXT: fcvt s10, h1
+; NO-LIBCALL-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: mov v0.h[1], v1.h[0]
+; NO-LIBCALL-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: fmov s0, s10
+; NO-LIBCALL-NEXT: bl sinf
+; NO-LIBCALL-NEXT: ldr q1, [sp] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: fcvt h0, s0
+; NO-LIBCALL-NEXT: mov h1, v1.h[3]
+; NO-LIBCALL-NEXT: fcvt s11, h1
+; NO-LIBCALL-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: mov v1.h[2], v0.h[0]
+; NO-LIBCALL-NEXT: fmov s0, s11
+; NO-LIBCALL-NEXT: str q1, [sp, #16] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: bl sinf
+; NO-LIBCALL-NEXT: fcvt h0, s0
+; NO-LIBCALL-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: mov v1.h[3], v0.h[0]
+; NO-LIBCALL-NEXT: fmov s0, s8
+; NO-LIBCALL-NEXT: str q1, [sp, #16] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: bl cosf
+; NO-LIBCALL-NEXT: fcvt h0, s0
+; NO-LIBCALL-NEXT: str q0, [sp] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: fmov s0, s9
+; NO-LIBCALL-NEXT: bl cosf
+; NO-LIBCALL-NEXT: fcvt h0, s0
+; NO-LIBCALL-NEXT: ldr q1, [sp] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: mov v0.h[1], v1.h[0]
+; NO-LIBCALL-NEXT: str q0, [sp] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: fmov s0, s10
+; NO-LIBCALL-NEXT: bl cosf
+; NO-LIBCALL-NEXT: fcvt h0, s0
+; NO-LIBCALL-NEXT: ldr q1, [sp] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: mov v1.h[2], v0.h[0]
+; NO-LIBCALL-NEXT: fmov s0, s11
+; NO-LIBCALL-NEXT: str q1, [sp] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: bl cosf
+; NO-LIBCALL-NEXT: fmov s1, s0
+; NO-LIBCALL-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
+; NO-LIBCALL-NEXT: // kill: def $d0 killed $d0 killed $q0
+; NO-LIBCALL-NEXT: fcvt h2, s1
+; NO-LIBCALL-NEXT: ldr q1, [sp] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: mov v1.h[3], v2.h[0]
+; NO-LIBCALL-NEXT: // kill: def $d1 killed $d1 killed $q1
+; NO-LIBCALL-NEXT: add sp, sp, #80
+; NO-LIBCALL-NEXT: ret
%result = call { <2 x half>, <2 x half> } @llvm.sincos.v2f16(<2 x half> %a)
ret { <2 x half>, <2 x half> } %result
}
@@ -162,6 +283,47 @@ define <2 x half> @test_sincos_v2f16_only_use_sin(<2 x half> %a) {
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: add sp, sp, #64
; CHECK-NEXT: ret
+;
+; NO-LIBCALL-LABEL: test_sincos_v2f16_only_use_sin:
+; NO-LIBCALL: // %bb.0:
+; NO-LIBCALL-NEXT: sub sp, sp, #48
+; NO-LIBCALL-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
+; NO-LIBCALL-NEXT: .cfi_def_cfa_offset 48
+; NO-LIBCALL-NEXT: .cfi_offset w30, -16
+; NO-LIBCALL-NEXT: // kill: def $d0 killed $d0 def $q0
+; NO-LIBCALL-NEXT: mov h1, v0.h[1]
+; NO-LIBCALL-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: fcvt s0, h1
+; NO-LIBCALL-NEXT: bl sinf
+; NO-LIBCALL-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: fcvt h0, s0
+; NO-LIBCALL-NEXT: fcvt s1, h1
+; NO-LIBCALL-NEXT: str q0, [sp] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: fmov s0, s1
+; NO-LIBCALL-NEXT: bl sinf
+; NO-LIBCALL-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: fcvt h2, s0
+; NO-LIBCALL-NEXT: mov h1, v1.h[2]
+; NO-LIBCALL-NEXT: fcvt s0, h1
+; NO-LIBCALL-NEXT: ldr q1, [sp] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: mov v2.h[1], v1.h[0]
+; NO-LIBCALL-NEXT: str q2, [sp] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: bl sinf
+; NO-LIBCALL-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: fcvt h2, s0
+; NO-LIBCALL-NEXT: mov h1, v1.h[3]
+; NO-LIBCALL-NEXT: fcvt s0, h1
+; NO-LIBCALL-NEXT: ldr q1, [sp] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: mov v1.h[2], v2.h[0]
+; NO-LIBCALL-NEXT: str q1, [sp] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: bl sinf
+; NO-LIBCALL-NEXT: fcvt h1, s0
+; NO-LIBCALL-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
+; NO-LIBCALL-NEXT: mov v0.h[3], v1.h[0]
+; NO-LIBCALL-NEXT: // kill: def $d0 killed $d0 killed $q0
+; NO-LIBCALL-NEXT: add sp, sp, #48
+; NO-LIBCALL-NEXT: ret
%result = call { <2 x half>, <2 x half> } @llvm.sincos.v2f16(<2 x half> %a)
%result.0 = extractvalue { <2 x half>, <2 x half> } %result, 0
ret <2 x half> %result.0
@@ -213,6 +375,47 @@ define <2 x half> @test_sincos_v2f16_only_use_cos(<2 x half> %a) {
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: add sp, sp, #64
; CHECK-NEXT: ret
+;
+; NO-LIBCALL-LABEL: test_sincos_v2f16_only_use_cos:
+; NO-LIBCALL: // %bb.0:
+; NO-LIBCALL-NEXT: sub sp, sp, #48
+; NO-LIBCALL-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
+; NO-LIBCALL-NEXT: .cfi_def_cfa_offset 48
+; NO-LIBCALL-NEXT: .cfi_offset w30, -16
+; NO-LIBCALL-NEXT: // kill: def $d0 killed $d0 def $q0
+; NO-LIBCALL-NEXT: mov h1, v0.h[1]
+; NO-LIBCALL-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: fcvt s0, h1
+; NO-LIBCALL-NEXT: bl cosf
+; NO-LIBCALL-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: fcvt h0, s0
+; NO-LIBCALL-NEXT: fcvt s1, h1
+; NO-LIBCALL-NEXT: str q0, [sp] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: fmov s0, s1
+; NO-LIBCALL-NEXT: bl cosf
+; NO-LIBCALL-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: fcvt h2, s0
+; NO-LIBCALL-NEXT: mov h1, v1.h[2]
+; NO-LIBCALL-NEXT: fcvt s0, h1
+; NO-LIBCALL-NEXT: ldr q1, [sp] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: mov v2.h[1], v1.h[0]
+; NO-LIBCALL-NEXT: str q2, [sp] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: bl cosf
+; NO-LIBCALL-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: fcvt h2, s0
+; NO-LIBCALL-NEXT: mov h1, v1.h[3]
+; NO-LIBCALL-NEXT: fcvt s0, h1
+; NO-LIBCALL-NEXT: ldr q1, [sp] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: mov v1.h[2], v2.h[0]
+; NO-LIBCALL-NEXT: str q1, [sp] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: bl cosf
+; NO-LIBCALL-NEXT: fcvt h1, s0
+; NO-LIBCALL-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
+; NO-LIBCALL-NEXT: mov v0.h[3], v1.h[0]
+; NO-LIBCALL-NEXT: // kill: def $d0 killed $d0 killed $q0
+; NO-LIBCALL-NEXT: add sp, sp, #48
+; NO-LIBCALL-NEXT: ret
%result = call { <2 x half>, <2 x half> } @llvm.sincos.v2f16(<2 x half> %a)
%result.1 = extractvalue { <2 x half>, <2 x half> } %result, 1
ret <2 x half> %result.1
@@ -230,6 +433,25 @@ define { float, float } @test_sincos_f32(float %a) {
; CHECK-NEXT: ldp s1, s0, [sp, #8]
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
+;
+; NO-LIBCALL-LABEL: test_sincos_f32:
+; NO-LIBCALL: // %bb.0:
+; NO-LIBCALL-NEXT: stp d9, d8, [sp, #-32]! // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
+; NO-LIBCALL-NEXT: .cfi_def_cfa_offset 32
+; NO-LIBCALL-NEXT: .cfi_offset w30, -16
+; NO-LIBCALL-NEXT: .cfi_offset b8, -24
+; NO-LIBCALL-NEXT: .cfi_offset b9, -32
+; NO-LIBCALL-NEXT: fmov s8, s0
+; NO-LIBCALL-NEXT: bl sinf
+; NO-LIBCALL-NEXT: fmov s9, s0
+; NO-LIBCALL-NEXT: fmov s0, s8
+; NO-LIBCALL-NEXT: bl cosf
+; NO-LIBCALL-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
+; NO-LIBCALL-NEXT: fmov s1, s0
+; NO-LIBCALL-NEXT: fmov s0, s9
+; NO-LIBCALL-NEXT: ldp d9, d8, [sp], #32 // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: ret
%result = call { float, float } @llvm.sincos.f32(float %a)
ret { float, float } %result
}
@@ -246,6 +468,10 @@ define float @test_sincos_f32_only_use_sin(float %a) {
; CHECK-NEXT: ldr s0, [sp, #12]
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
+;
+; NO-LIBCALL-LABEL: test_sincos_f32_only_use_sin:
+; NO-LIBCALL: // %bb.0:
+; NO-LIBCALL-NEXT: b sinf
%result = call { float, float } @llvm.sincos.f32(float %a)
%result.0 = extractvalue { float, float } %result, 0
ret float %result.0
@@ -263,6 +489,10 @@ define float @test_sincos_f32_only_use_cos(float %a) {
; CHECK-NEXT: ldr s0, [sp, #8]
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
+;
+; NO-LIBCALL-LABEL: test_sincos_f32_only_use_cos:
+; NO-LIBCALL: // %bb.0:
+; NO-LIBCALL-NEXT: b cosf
%result = call { float, float } @llvm.sincos.f32(float %a)
%result.1 = extractvalue { float, float } %result, 1
ret float %result.1
@@ -300,6 +530,45 @@ define { <2 x float>, <2 x float> } @test_sincos_v2f32(<2 x float> %a) {
; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q1
; CHECK-NEXT: add sp, sp, #64
; CHECK-NEXT: ret
+;
+; NO-LIBCALL-LABEL: test_sincos_v2f32:
+; NO-LIBCALL: // %bb.0:
+; NO-LIBCALL-NEXT: sub sp, sp, #64
+; NO-LIBCALL-NEXT: str d8, [sp, #48] // 8-byte Folded Spill
+; NO-LIBCALL-NEXT: str x30, [sp, #56] // 8-byte Folded Spill
+; NO-LIBCALL-NEXT: .cfi_def_cfa_offset 64
+; NO-LIBCALL-NEXT: .cfi_offset w30, -8
+; NO-LIBCALL-NEXT: .cfi_offset b8, -16
+; NO-LIBCALL-NEXT: // kill: def $d0 killed $d0 def $q0
+; NO-LIBCALL-NEXT: mov s8, v0.s[1]
+; NO-LIBCALL-NEXT: str q0, [sp, #32] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: fmov s0, s8
+; NO-LIBCALL-NEXT: bl sinf
+; NO-LIBCALL-NEXT: // kill: def $s0 killed $s0 def $q0
+; NO-LIBCALL-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: // kill: def $s0 killed $s0 killed $q0
+; NO-LIBCALL-NEXT: bl sinf
+; NO-LIBCALL-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: // kill: def $s0 killed $s0 def $q0
+; NO-LIBCALL-NEXT: mov v0.s[1], v1.s[0]
+; NO-LIBCALL-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: fmov s0, s8
+; NO-LIBCALL-NEXT: bl cosf
+; NO-LIBCALL-NEXT: // kill: def $s0 killed $s0 def $q0
+; NO-LIBCALL-NEXT: str q0, [sp] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: // kill: def $s0 killed $s0 killed $q0
+; NO-LIBCALL-NEXT: bl cosf
+; NO-LIBCALL-NEXT: fmov s1, s0
+; NO-LIBCALL-NEXT: ldp q2, q0, [sp] // 32-byte Folded Reload
+; NO-LIBCALL-NEXT: ldr x30, [sp, #56] // 8-byte Folded Reload
+; NO-LIBCALL-NEXT: ldr d8, [sp, #48] // 8-byte Folded Reload
+; NO-LIBCALL-NEXT: // kill: def $d0 killed $d0 killed $q0
+; NO-LIBCALL-NEXT: mov v1.s[1], v2.s[0]
+; NO-LIBCALL-NEXT: // kill: def $d1 killed $d1 killed $q1
+; NO-LIBCALL-NEXT: add sp, sp, #64
+; NO-LIBCALL-NEXT: ret
%result = call { <2 x float>, <2 x float> } @llvm.sincos.v2f32(<2 x float> %a)
ret { <2 x float>, <2 x float> } %result
}
@@ -330,6 +599,29 @@ define <2 x float> @test_sincos_v2f32_only_use_sin(<2 x float> %a) {
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: add sp, sp, #48
; CHECK-NEXT: ret
+;
+; NO-LIBCALL-LABEL: test_sincos_v2f32_only_use_sin:
+; NO-LIBCALL: // %bb.0:
+; NO-LIBCALL-NEXT: sub sp, sp, #48
+; NO-LIBCALL-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
+; NO-LIBCALL-NEXT: .cfi_def_cfa_offset 48
+; NO-LIBCALL-NEXT: .cfi_offset w30, -16
+; NO-LIBCALL-NEXT: // kill: def $d0 killed $d0 def $q0
+; NO-LIBCALL-NEXT: str q0, [sp] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: mov s0, v0.s[1]
+; NO-LIBCALL-NEXT: bl sinf
+; NO-LIBCALL-NEXT: // kill: def $s0 killed $s0 def $q0
+; NO-LIBCALL-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: // kill: def $s0 killed $s0 killed $q0
+; NO-LIBCALL-NEXT: bl sinf
+; NO-LIBCALL-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: // kill: def $s0 killed $s0 def $q0
+; NO-LIBCALL-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
+; NO-LIBCALL-NEXT: mov v0.s[1], v1.s[0]
+; NO-LIBCALL-NEXT: // kill: def $d0 killed $d0 killed $q0
+; NO-LIBCALL-NEXT: add sp, sp, #48
+; NO-LIBCALL-NEXT: ret
%result = call { <2 x float>, <2 x float> } @llvm.sincos.v2f32(<2 x float> %a)
%result.0 = extractvalue { <2 x float>, <2 x float> } %result, 0
ret <2 x float> %result.0
@@ -361,6 +653,29 @@ define <2 x float> @test_sincos_v2f32_only_use_cos(<2 x float> %a) {
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: add sp, sp, #48
; CHECK-NEXT: ret
+;
+; NO-LIBCALL-LABEL: test_sincos_v2f32_only_use_cos:
+; NO-LIBCALL: // %bb.0:
+; NO-LIBCALL-NEXT: sub sp, sp, #48
+; NO-LIBCALL-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
+; NO-LIBCALL-NEXT: .cfi_def_cfa_offset 48
+; NO-LIBCALL-NEXT: .cfi_offset w30, -16
+; NO-LIBCALL-NEXT: // kill: def $d0 killed $d0 def $q0
+; NO-LIBCALL-NEXT: str q0, [sp] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: mov s0, v0.s[1]
+; NO-LIBCALL-NEXT: bl cosf
+; NO-LIBCALL-NEXT: // kill: def $s0 killed $s0 def $q0
+; NO-LIBCALL-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: // kill: def $s0 killed $s0 killed $q0
+; NO-LIBCALL-NEXT: bl cosf
+; NO-LIBCALL-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: // kill: def $s0 killed $s0 def $q0
+; NO-LIBCALL-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
+; NO-LIBCALL-NEXT: mov v0.s[1], v1.s[0]
+; NO-LIBCALL-NEXT: // kill: def $d0 killed $d0 killed $q0
+; NO-LIBCALL-NEXT: add sp, sp, #48
+; NO-LIBCALL-NEXT: ret
%result = call { <2 x float>, <2 x float> } @llvm.sincos.v2f32(<2 x float> %a)
%result.1 = extractvalue { <2 x float>, <2 x float> } %result, 1
ret <2 x float> %result.1
@@ -381,6 +696,25 @@ define { double, double } @test_sincos_f64(double %a) {
; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
; CHECK-NEXT: add sp, sp, #32
; CHECK-NEXT: ret
+;
+; NO-LIBCALL-LABEL: test_sincos_f64:
+; NO-LIBCALL: // %bb.0:
+; NO-LIBCALL-NEXT: stp d9, d8, [sp, #-32]! // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
+; NO-LIBCALL-NEXT: .cfi_def_cfa_offset 32
+; NO-LIBCALL-NEXT: .cfi_offset w30, -16
+; NO-LIBCALL-NEXT: .cfi_offset b8, -24
+; NO-LIBCALL-NEXT: .cfi_offset b9, -32
+; NO-LIBCALL-NEXT: fmov d8, d0
+; NO-LIBCALL-NEXT: bl sin
+; NO-LIBCALL-NEXT: fmov d9, d0
+; NO-LIBCALL-NEXT: fmov d0, d8
+; NO-LIBCALL-NEXT: bl cos
+; NO-LIBCALL-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
+; NO-LIBCALL-NEXT: fmov d1, d0
+; NO-LIBCALL-NEXT: fmov d0, d9
+; NO-LIBCALL-NEXT: ldp d9, d8, [sp], #32 // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: ret
%result = call { double, double } @llvm.sincos.f64(double %a)
ret { double, double } %result
}
@@ -399,6 +733,10 @@ define double @test_sincos_f64_only_use_sin(double %a) {
; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
; CHECK-NEXT: add sp, sp, #32
; CHECK-NEXT: ret
+;
+; NO-LIBCALL-LABEL: test_sincos_f64_only_use_sin:
+; NO-LIBCALL: // %bb.0:
+; NO-LIBCALL-NEXT: b sin
%result = call { double, double } @llvm.sincos.f64(double %a)
%result.0 = extractvalue { double, double } %result, 0
ret double %result.0
@@ -418,6 +756,10 @@ define double @test_sincos_f64_only_use_cos(double %a) {
; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
; CHECK-NEXT: add sp, sp, #32
; CHECK-NEXT: ret
+;
+; NO-LIBCALL-LABEL: test_sincos_f64_only_use_cos:
+; NO-LIBCALL: // %bb.0:
+; NO-LIBCALL-NEXT: b cos
%result = call { double, double } @llvm.sincos.f64(double %a)
%result.1 = extractvalue { double, double } %result, 1
ret double %result.1
@@ -453,6 +795,42 @@ define { <2 x double>, <2 x double> } @test_sincos_v2f64(<2 x double> %a) {
; CHECK-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload
; CHECK-NEXT: add sp, sp, #80
; CHECK-NEXT: ret
+;
+; NO-LIBCALL-LABEL: test_sincos_v2f64:
+; NO-LIBCALL: // %bb.0:
+; NO-LIBCALL-NEXT: sub sp, sp, #64
+; NO-LIBCALL-NEXT: str d8, [sp, #48] // 8-byte Folded Spill
+; NO-LIBCALL-NEXT: str x30, [sp, #56] // 8-byte Folded Spill
+; NO-LIBCALL-NEXT: .cfi_def_cfa_offset 64
+; NO-LIBCALL-NEXT: .cfi_offset w30, -8
+; NO-LIBCALL-NEXT: .cfi_offset b8, -16
+; NO-LIBCALL-NEXT: mov d8, v0.d[1]
+; NO-LIBCALL-NEXT: str q0, [sp, #32] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: fmov d0, d8
+; NO-LIBCALL-NEXT: bl sin
+; NO-LIBCALL-NEXT: // kill: def $d0 killed $d0 def $q0
+; NO-LIBCALL-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: // kill: def $d0 killed $d0 killed $q0
+; NO-LIBCALL-NEXT: bl sin
+; NO-LIBCALL-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: // kill: def $d0 killed $d0 def $q0
+; NO-LIBCALL-NEXT: mov v0.d[1], v1.d[0]
+; NO-LIBCALL-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: fmov d0, d8
+; NO-LIBCALL-NEXT: bl cos
+; NO-LIBCALL-NEXT: // kill: def $d0 killed $d0 def $q0
+; NO-LIBCALL-NEXT: str q0, [sp] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: // kill: def $d0 killed $d0 killed $q0
+; NO-LIBCALL-NEXT: bl cos
+; NO-LIBCALL-NEXT: fmov d1, d0
+; NO-LIBCALL-NEXT: ldp q2, q0, [sp] // 32-byte Folded Reload
+; NO-LIBCALL-NEXT: ldr x30, [sp, #56] // 8-byte Folded Reload
+; NO-LIBCALL-NEXT: ldr d8, [sp, #48] // 8-byte Folded Reload
+; NO-LIBCALL-NEXT: mov v1.d[1], v2.d[0]
+; NO-LIBCALL-NEXT: add sp, sp, #64
+; NO-LIBCALL-NEXT: ret
%result = call { <2 x double>, <2 x double> } @llvm.sincos.v2f64(<2 x double> %a)
ret { <2 x double>, <2 x double> } %result
}
@@ -481,6 +859,27 @@ define <2 x double> @test_sincos_v2f64_only_use_sin(<2 x double> %a) {
; CHECK-NEXT: ldp x30, x19, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: add sp, sp, #64
; CHECK-NEXT: ret
+;
+; NO-LIBCALL-LABEL: test_sincos_v2f64_only_use_sin:
+; NO-LIBCALL: // %bb.0:
+; NO-LIBCALL-NEXT: sub sp, sp, #48
+; NO-LIBCALL-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
+; NO-LIBCALL-NEXT: .cfi_def_cfa_offset 48
+; NO-LIBCALL-NEXT: .cfi_offset w30, -16
+; NO-LIBCALL-NEXT: str q0, [sp] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: mov d0, v0.d[1]
+; NO-LIBCALL-NEXT: bl sin
+; NO-LIBCALL-NEXT: // kill: def $d0 killed $d0 def $q0
+; NO-LIBCALL-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: // kill: def $d0 killed $d0 killed $q0
+; NO-LIBCALL-NEXT: bl sin
+; NO-LIBCALL-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: // kill: def $d0 killed $d0 def $q0
+; NO-LIBCALL-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
+; NO-LIBCALL-NEXT: mov v0.d[1], v1.d[0]
+; NO-LIBCALL-NEXT: add sp, sp, #48
+; NO-LIBCALL-NEXT: ret
%result = call { <2 x double>, <2 x double> } @llvm.sincos.v2f64(<2 x double> %a)
%result.0 = extractvalue { <2 x double>, <2 x double> } %result, 0
ret <2 x double> %result.0
@@ -510,6 +909,27 @@ define <2 x double> @test_sincos_v2f64_only_use_cos(<2 x double> %a) {
; CHECK-NEXT: ldp x30, x19, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: add sp, sp, #64
; CHECK-NEXT: ret
+;
+; NO-LIBCALL-LABEL: test_sincos_v2f64_only_use_cos:
+; NO-LIBCALL: // %bb.0:
+; NO-LIBCALL-NEXT: sub sp, sp, #48
+; NO-LIBCALL-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
+; NO-LIBCALL-NEXT: .cfi_def_cfa_offset 48
+; NO-LIBCALL-NEXT: .cfi_offset w30, -16
+; NO-LIBCALL-NEXT: str q0, [sp] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: mov d0, v0.d[1]
+; NO-LIBCALL-NEXT: bl cos
+; NO-LIBCALL-NEXT: // kill: def $d0 killed $d0 def $q0
+; NO-LIBCALL-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: // kill: def $d0 killed $d0 killed $q0
+; NO-LIBCALL-NEXT: bl cos
+; NO-LIBCALL-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: // kill: def $d0 killed $d0 def $q0
+; NO-LIBCALL-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
+; NO-LIBCALL-NEXT: mov v0.d[1], v1.d[0]
+; NO-LIBCALL-NEXT: add sp, sp, #48
+; NO-LIBCALL-NEXT: ret
%result = call { <2 x double>, <2 x double> } @llvm.sincos.v2f64(<2 x double> %a)
%result.1 = extractvalue { <2 x double>, <2 x double> } %result, 1
ret <2 x double> %result.1
>From d44e4d90ecbc4b6561469ff29c9fb3762f92b2ef Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Fri, 11 Oct 2024 19:40:57 +0000
Subject: [PATCH 5/7] Add SDAG flags test
---
llvm/test/CodeGen/AArch64/llvm.sincos-fmf.ll | 16 ++++++++++++++++
1 file changed, 16 insertions(+)
create mode 100644 llvm/test/CodeGen/AArch64/llvm.sincos-fmf.ll
diff --git a/llvm/test/CodeGen/AArch64/llvm.sincos-fmf.ll b/llvm/test/CodeGen/AArch64/llvm.sincos-fmf.ll
new file mode 100644
index 00000000000000..456b7f98974a9e
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/llvm.sincos-fmf.ll
@@ -0,0 +1,16 @@
+; RUN: llc -mtriple=aarch64-gnu-linux -debug-only=isel -o /dev/null < %s 2>&1 | FileCheck %s
+
+; REQUIRES: asserts
+
+define { float, float } @test_sincos_f32_afn(float %a) {
+; CHECK-LABEL: Initial selection DAG: %bb.0 'test_sincos_f32_afn:'
+; CHECK-NEXT: SelectionDAG has 9 nodes:
+; CHECK-NEXT: t0: ch,glue = EntryToken
+; CHECK-NEXT: t2: f32,ch = CopyFromReg t0, Register:f32 %0
+; CHECK-NEXT: t3: f32,f32 = fsincos afn t2
+; CHECK-NEXT: t5: ch,glue = CopyToReg t0, Register:f32 $s0, t3
+; CHECK-NEXT: t7: ch,glue = CopyToReg t5, Register:f32 $s1, t3:1, t5:1
+; CHECK-NEXT: t8: ch = AArch64ISD::RET_GLUE t7, Register:f32 $s0, Register:f32 $s1, t7:1
+ %result = call afn { float, float } @llvm.sincos.f32(float %a)
+ ret { float, float } %result
+}
>From 5a1da2573d4a07ea37368ac5e0e98f7d23574673 Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Fri, 18 Oct 2024 15:15:56 +0000
Subject: [PATCH 6/7] Fixups
- Test <3 x float> case and fix unrolling
- Fix langref nit
- Remove redundant tests
---
llvm/docs/LangRef.rst | 6 +-
.../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 11 +-
llvm/test/CodeGen/AArch64/llvm.sincos.ll | 561 +++---------------
llvm/test/CodeGen/ARM/llvm.sincos.ll | 241 --------
4 files changed, 101 insertions(+), 718 deletions(-)
diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index 268905881128f2..076350af7ace0d 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -15914,9 +15914,9 @@ The '``llvm.sincos.*``' intrinsics returns the sine and cosine of the operand.
Arguments:
""""""""""
-The argument is a :ref:`floating-point <t_floating>` or :ref:`vector <t_vector>`
-of floating-point values. Returns two values matching the argument type in a
-struct.
+The argument is a :ref:`floating-point <t_floating>` value or
+:ref:`vector <t_vector>` of floating-point values. Returns two values matching
+the argument type in a struct.
Semantics:
""""""""""
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 55cebc28e49275..3fcf957adc5f0f 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -12534,8 +12534,15 @@ SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) {
Scalars1.push_back(EltOp.getValue(1));
}
- SDValue Vec0 = getBuildVector(VT, dl, Scalars0);
- SDValue Vec1 = getBuildVector(VT1, dl, Scalars1);
+ for (; i < ResNE; ++i) {
+ Scalars0.push_back(getUNDEF(EltVT));
+ Scalars1.push_back(getUNDEF(EltVT1));
+ }
+
+ EVT VecVT = EVT::getVectorVT(*getContext(), EltVT, ResNE);
+ EVT VecVT1 = EVT::getVectorVT(*getContext(), EltVT1, ResNE);
+ SDValue Vec0 = getBuildVector(VecVT, dl, Scalars0);
+ SDValue Vec1 = getBuildVector(VecVT1, dl, Scalars1);
return getMergeValues({Vec0, Vec1}, dl);
}
diff --git a/llvm/test/CodeGen/AArch64/llvm.sincos.ll b/llvm/test/CodeGen/AArch64/llvm.sincos.ll
index 2e3c02877dfcfa..c5efc796e7a3c4 100644
--- a/llvm/test/CodeGen/AArch64/llvm.sincos.ll
+++ b/llvm/test/CodeGen/AArch64/llvm.sincos.ll
@@ -237,190 +237,6 @@ define { <2 x half>, <2 x half> } @test_sincos_v2f16(<2 x half> %a) {
ret { <2 x half>, <2 x half> } %result
}
-define <2 x half> @test_sincos_v2f16_only_use_sin(<2 x half> %a) {
-; CHECK-LABEL: test_sincos_v2f16_only_use_sin:
-; CHECK: // %bb.0:
-; CHECK-NEXT: sub sp, sp, #64
-; CHECK-NEXT: str x30, [sp, #48] // 8-byte Folded Spill
-; CHECK-NEXT: .cfi_def_cfa_offset 64
-; CHECK-NEXT: .cfi_offset w30, -16
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: mov h1, v0.h[1]
-; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
-; CHECK-NEXT: add x0, sp, #36
-; CHECK-NEXT: add x1, sp, #32
-; CHECK-NEXT: fcvt s0, h1
-; CHECK-NEXT: bl sincosf
-; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-NEXT: add x0, sp, #28
-; CHECK-NEXT: add x1, sp, #24
-; CHECK-NEXT: fcvt s0, h0
-; CHECK-NEXT: bl sincosf
-; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-NEXT: add x0, sp, #44
-; CHECK-NEXT: add x1, sp, #40
-; CHECK-NEXT: mov h0, v0.h[2]
-; CHECK-NEXT: fcvt s0, h0
-; CHECK-NEXT: bl sincosf
-; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-NEXT: add x0, sp, #60
-; CHECK-NEXT: add x1, sp, #56
-; CHECK-NEXT: mov h0, v0.h[3]
-; CHECK-NEXT: fcvt s0, h0
-; CHECK-NEXT: bl sincosf
-; CHECK-NEXT: ldr s0, [sp, #36]
-; CHECK-NEXT: ldr s1, [sp, #28]
-; CHECK-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload
-; CHECK-NEXT: fcvt h2, s0
-; CHECK-NEXT: fcvt h0, s1
-; CHECK-NEXT: ldr s1, [sp, #44]
-; CHECK-NEXT: fcvt h1, s1
-; CHECK-NEXT: mov v0.h[1], v2.h[0]
-; CHECK-NEXT: ldr s2, [sp, #60]
-; CHECK-NEXT: mov v0.h[2], v1.h[0]
-; CHECK-NEXT: fcvt h1, s2
-; CHECK-NEXT: mov v0.h[3], v1.h[0]
-; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
-; CHECK-NEXT: add sp, sp, #64
-; CHECK-NEXT: ret
-;
-; NO-LIBCALL-LABEL: test_sincos_v2f16_only_use_sin:
-; NO-LIBCALL: // %bb.0:
-; NO-LIBCALL-NEXT: sub sp, sp, #48
-; NO-LIBCALL-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
-; NO-LIBCALL-NEXT: .cfi_def_cfa_offset 48
-; NO-LIBCALL-NEXT: .cfi_offset w30, -16
-; NO-LIBCALL-NEXT: // kill: def $d0 killed $d0 def $q0
-; NO-LIBCALL-NEXT: mov h1, v0.h[1]
-; NO-LIBCALL-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
-; NO-LIBCALL-NEXT: fcvt s0, h1
-; NO-LIBCALL-NEXT: bl sinf
-; NO-LIBCALL-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
-; NO-LIBCALL-NEXT: fcvt h0, s0
-; NO-LIBCALL-NEXT: fcvt s1, h1
-; NO-LIBCALL-NEXT: str q0, [sp] // 16-byte Folded Spill
-; NO-LIBCALL-NEXT: fmov s0, s1
-; NO-LIBCALL-NEXT: bl sinf
-; NO-LIBCALL-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
-; NO-LIBCALL-NEXT: fcvt h2, s0
-; NO-LIBCALL-NEXT: mov h1, v1.h[2]
-; NO-LIBCALL-NEXT: fcvt s0, h1
-; NO-LIBCALL-NEXT: ldr q1, [sp] // 16-byte Folded Reload
-; NO-LIBCALL-NEXT: mov v2.h[1], v1.h[0]
-; NO-LIBCALL-NEXT: str q2, [sp] // 16-byte Folded Spill
-; NO-LIBCALL-NEXT: bl sinf
-; NO-LIBCALL-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
-; NO-LIBCALL-NEXT: fcvt h2, s0
-; NO-LIBCALL-NEXT: mov h1, v1.h[3]
-; NO-LIBCALL-NEXT: fcvt s0, h1
-; NO-LIBCALL-NEXT: ldr q1, [sp] // 16-byte Folded Reload
-; NO-LIBCALL-NEXT: mov v1.h[2], v2.h[0]
-; NO-LIBCALL-NEXT: str q1, [sp] // 16-byte Folded Spill
-; NO-LIBCALL-NEXT: bl sinf
-; NO-LIBCALL-NEXT: fcvt h1, s0
-; NO-LIBCALL-NEXT: ldr q0, [sp] // 16-byte Folded Reload
-; NO-LIBCALL-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
-; NO-LIBCALL-NEXT: mov v0.h[3], v1.h[0]
-; NO-LIBCALL-NEXT: // kill: def $d0 killed $d0 killed $q0
-; NO-LIBCALL-NEXT: add sp, sp, #48
-; NO-LIBCALL-NEXT: ret
- %result = call { <2 x half>, <2 x half> } @llvm.sincos.v2f16(<2 x half> %a)
- %result.0 = extractvalue { <2 x half>, <2 x half> } %result, 0
- ret <2 x half> %result.0
-}
-
-define <2 x half> @test_sincos_v2f16_only_use_cos(<2 x half> %a) {
-; CHECK-LABEL: test_sincos_v2f16_only_use_cos:
-; CHECK: // %bb.0:
-; CHECK-NEXT: sub sp, sp, #64
-; CHECK-NEXT: str x30, [sp, #48] // 8-byte Folded Spill
-; CHECK-NEXT: .cfi_def_cfa_offset 64
-; CHECK-NEXT: .cfi_offset w30, -16
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: mov h1, v0.h[1]
-; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
-; CHECK-NEXT: add x0, sp, #36
-; CHECK-NEXT: add x1, sp, #32
-; CHECK-NEXT: fcvt s0, h1
-; CHECK-NEXT: bl sincosf
-; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-NEXT: add x0, sp, #28
-; CHECK-NEXT: add x1, sp, #24
-; CHECK-NEXT: fcvt s0, h0
-; CHECK-NEXT: bl sincosf
-; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-NEXT: add x0, sp, #44
-; CHECK-NEXT: add x1, sp, #40
-; CHECK-NEXT: mov h0, v0.h[2]
-; CHECK-NEXT: fcvt s0, h0
-; CHECK-NEXT: bl sincosf
-; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-NEXT: add x0, sp, #60
-; CHECK-NEXT: add x1, sp, #56
-; CHECK-NEXT: mov h0, v0.h[3]
-; CHECK-NEXT: fcvt s0, h0
-; CHECK-NEXT: bl sincosf
-; CHECK-NEXT: ldr s0, [sp, #32]
-; CHECK-NEXT: ldr s1, [sp, #24]
-; CHECK-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload
-; CHECK-NEXT: fcvt h2, s0
-; CHECK-NEXT: fcvt h0, s1
-; CHECK-NEXT: ldr s1, [sp, #40]
-; CHECK-NEXT: fcvt h1, s1
-; CHECK-NEXT: mov v0.h[1], v2.h[0]
-; CHECK-NEXT: ldr s2, [sp, #56]
-; CHECK-NEXT: mov v0.h[2], v1.h[0]
-; CHECK-NEXT: fcvt h1, s2
-; CHECK-NEXT: mov v0.h[3], v1.h[0]
-; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
-; CHECK-NEXT: add sp, sp, #64
-; CHECK-NEXT: ret
-;
-; NO-LIBCALL-LABEL: test_sincos_v2f16_only_use_cos:
-; NO-LIBCALL: // %bb.0:
-; NO-LIBCALL-NEXT: sub sp, sp, #48
-; NO-LIBCALL-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
-; NO-LIBCALL-NEXT: .cfi_def_cfa_offset 48
-; NO-LIBCALL-NEXT: .cfi_offset w30, -16
-; NO-LIBCALL-NEXT: // kill: def $d0 killed $d0 def $q0
-; NO-LIBCALL-NEXT: mov h1, v0.h[1]
-; NO-LIBCALL-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
-; NO-LIBCALL-NEXT: fcvt s0, h1
-; NO-LIBCALL-NEXT: bl cosf
-; NO-LIBCALL-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
-; NO-LIBCALL-NEXT: fcvt h0, s0
-; NO-LIBCALL-NEXT: fcvt s1, h1
-; NO-LIBCALL-NEXT: str q0, [sp] // 16-byte Folded Spill
-; NO-LIBCALL-NEXT: fmov s0, s1
-; NO-LIBCALL-NEXT: bl cosf
-; NO-LIBCALL-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
-; NO-LIBCALL-NEXT: fcvt h2, s0
-; NO-LIBCALL-NEXT: mov h1, v1.h[2]
-; NO-LIBCALL-NEXT: fcvt s0, h1
-; NO-LIBCALL-NEXT: ldr q1, [sp] // 16-byte Folded Reload
-; NO-LIBCALL-NEXT: mov v2.h[1], v1.h[0]
-; NO-LIBCALL-NEXT: str q2, [sp] // 16-byte Folded Spill
-; NO-LIBCALL-NEXT: bl cosf
-; NO-LIBCALL-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
-; NO-LIBCALL-NEXT: fcvt h2, s0
-; NO-LIBCALL-NEXT: mov h1, v1.h[3]
-; NO-LIBCALL-NEXT: fcvt s0, h1
-; NO-LIBCALL-NEXT: ldr q1, [sp] // 16-byte Folded Reload
-; NO-LIBCALL-NEXT: mov v1.h[2], v2.h[0]
-; NO-LIBCALL-NEXT: str q1, [sp] // 16-byte Folded Spill
-; NO-LIBCALL-NEXT: bl cosf
-; NO-LIBCALL-NEXT: fcvt h1, s0
-; NO-LIBCALL-NEXT: ldr q0, [sp] // 16-byte Folded Reload
-; NO-LIBCALL-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
-; NO-LIBCALL-NEXT: mov v0.h[3], v1.h[0]
-; NO-LIBCALL-NEXT: // kill: def $d0 killed $d0 killed $q0
-; NO-LIBCALL-NEXT: add sp, sp, #48
-; NO-LIBCALL-NEXT: ret
- %result = call { <2 x half>, <2 x half> } @llvm.sincos.v2f16(<2 x half> %a)
- %result.1 = extractvalue { <2 x half>, <2 x half> } %result, 1
- ret <2 x half> %result.1
-}
-
define { float, float } @test_sincos_f32(float %a) {
; CHECK-LABEL: test_sincos_f32:
; CHECK: // %bb.0:
@@ -456,46 +272,101 @@ define { float, float } @test_sincos_f32(float %a) {
ret { float, float } %result
}
-define float @test_sincos_f32_only_use_sin(float %a) {
-; CHECK-LABEL: test_sincos_f32_only_use_sin:
+define { <3 x float>, <3 x float> } @test_sincos_v3f32(<3 x float> %a) {
+; CHECK-LABEL: test_sincos_v3f32:
; CHECK: // %bb.0:
-; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: .cfi_offset w30, -16
-; CHECK-NEXT: add x0, sp, #12
-; CHECK-NEXT: add x1, sp, #8
+; CHECK-NEXT: sub sp, sp, #80
+; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
+; CHECK-NEXT: stp x22, x21, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT: stp x20, x19, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 80
+; CHECK-NEXT: .cfi_offset w19, -8
+; CHECK-NEXT: .cfi_offset w20, -16
+; CHECK-NEXT: .cfi_offset w21, -24
+; CHECK-NEXT: .cfi_offset w22, -32
+; CHECK-NEXT: .cfi_offset w30, -48
+; CHECK-NEXT: add x0, sp, #20
+; CHECK-NEXT: add x1, sp, #16
+; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0
; CHECK-NEXT: bl sincosf
-; CHECK-NEXT: ldr s0, [sp, #12]
-; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT: ret
-;
-; NO-LIBCALL-LABEL: test_sincos_f32_only_use_sin:
-; NO-LIBCALL: // %bb.0:
-; NO-LIBCALL-NEXT: b sinf
- %result = call { float, float } @llvm.sincos.f32(float %a)
- %result.0 = extractvalue { float, float } %result, 0
- ret float %result.0
-}
-
-define float @test_sincos_f32_only_use_cos(float %a) {
-; CHECK-LABEL: test_sincos_f32_only_use_cos:
-; CHECK: // %bb.0:
-; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: .cfi_offset w30, -16
-; CHECK-NEXT: add x0, sp, #12
-; CHECK-NEXT: add x1, sp, #8
+; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: add x0, sp, #28
+; CHECK-NEXT: add x1, sp, #24
+; CHECK-NEXT: add x19, sp, #28
+; CHECK-NEXT: add x20, sp, #24
+; CHECK-NEXT: mov s0, v0.s[1]
; CHECK-NEXT: bl sincosf
-; CHECK-NEXT: ldr s0, [sp, #8]
-; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: add x0, sp, #44
+; CHECK-NEXT: add x1, sp, #40
+; CHECK-NEXT: add x21, sp, #44
+; CHECK-NEXT: add x22, sp, #40
+; CHECK-NEXT: mov s0, v0.s[2]
+; CHECK-NEXT: bl sincosf
+; CHECK-NEXT: ldp s1, s0, [sp, #16]
+; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
+; CHECK-NEXT: ld1 { v0.s }[1], [x19]
+; CHECK-NEXT: ld1 { v1.s }[1], [x20]
+; CHECK-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT: ld1 { v0.s }[2], [x21]
+; CHECK-NEXT: ld1 { v1.s }[2], [x22]
+; CHECK-NEXT: ldp x22, x21, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT: add sp, sp, #80
; CHECK-NEXT: ret
;
-; NO-LIBCALL-LABEL: test_sincos_f32_only_use_cos:
+; NO-LIBCALL-LABEL: test_sincos_v3f32:
; NO-LIBCALL: // %bb.0:
-; NO-LIBCALL-NEXT: b cosf
- %result = call { float, float } @llvm.sincos.f32(float %a)
- %result.1 = extractvalue { float, float } %result, 1
- ret float %result.1
+; NO-LIBCALL-NEXT: sub sp, sp, #80
+; NO-LIBCALL-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: str x30, [sp, #64] // 8-byte Folded Spill
+; NO-LIBCALL-NEXT: .cfi_def_cfa_offset 80
+; NO-LIBCALL-NEXT: .cfi_offset w30, -16
+; NO-LIBCALL-NEXT: .cfi_offset b8, -24
+; NO-LIBCALL-NEXT: .cfi_offset b9, -32
+; NO-LIBCALL-NEXT: mov s8, v0.s[1]
+; NO-LIBCALL-NEXT: str q0, [sp, #32] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: fmov s0, s8
+; NO-LIBCALL-NEXT: bl sinf
+; NO-LIBCALL-NEXT: // kill: def $s0 killed $s0 def $q0
+; NO-LIBCALL-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: // kill: def $s0 killed $s0 killed $q0
+; NO-LIBCALL-NEXT: bl sinf
+; NO-LIBCALL-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: // kill: def $s0 killed $s0 def $q0
+; NO-LIBCALL-NEXT: mov v0.s[1], v1.s[0]
+; NO-LIBCALL-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: mov s9, v0.s[2]
+; NO-LIBCALL-NEXT: fmov s0, s9
+; NO-LIBCALL-NEXT: bl sinf
+; NO-LIBCALL-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: // kill: def $s0 killed $s0 def $q0
+; NO-LIBCALL-NEXT: mov v1.s[2], v0.s[0]
+; NO-LIBCALL-NEXT: fmov s0, s8
+; NO-LIBCALL-NEXT: str q1, [sp, #16] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: bl cosf
+; NO-LIBCALL-NEXT: // kill: def $s0 killed $s0 def $q0
+; NO-LIBCALL-NEXT: str q0, [sp] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: // kill: def $s0 killed $s0 killed $q0
+; NO-LIBCALL-NEXT: bl cosf
+; NO-LIBCALL-NEXT: ldr q1, [sp] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: // kill: def $s0 killed $s0 def $q0
+; NO-LIBCALL-NEXT: mov v0.s[1], v1.s[0]
+; NO-LIBCALL-NEXT: str q0, [sp, #32] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: fmov s0, s9
+; NO-LIBCALL-NEXT: bl cosf
+; NO-LIBCALL-NEXT: fmov s2, s0
+; NO-LIBCALL-NEXT: ldp q0, q1, [sp, #16] // 32-byte Folded Reload
+; NO-LIBCALL-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
+; NO-LIBCALL-NEXT: mov v1.s[2], v2.s[0]
+; NO-LIBCALL-NEXT: add sp, sp, #80
+; NO-LIBCALL-NEXT: ret
+ %result = call { <3 x float>, <3 x float> } @llvm.sincos.v3f32(<3 x float> %a)
+ ret { <3 x float>, <3 x float> } %result
}
define { <2 x float>, <2 x float> } @test_sincos_v2f32(<2 x float> %a) {
@@ -573,114 +444,6 @@ define { <2 x float>, <2 x float> } @test_sincos_v2f32(<2 x float> %a) {
ret { <2 x float>, <2 x float> } %result
}
-define <2 x float> @test_sincos_v2f32_only_use_sin(<2 x float> %a) {
-; CHECK-LABEL: test_sincos_v2f32_only_use_sin:
-; CHECK: // %bb.0:
-; CHECK-NEXT: sub sp, sp, #48
-; CHECK-NEXT: stp x30, x19, [sp, #32] // 16-byte Folded Spill
-; CHECK-NEXT: .cfi_def_cfa_offset 48
-; CHECK-NEXT: .cfi_offset w19, -8
-; CHECK-NEXT: .cfi_offset w30, -16
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: add x0, sp, #28
-; CHECK-NEXT: add x1, sp, #24
-; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
-; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0
-; CHECK-NEXT: bl sincosf
-; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-NEXT: add x0, sp, #20
-; CHECK-NEXT: add x1, sp, #16
-; CHECK-NEXT: add x19, sp, #20
-; CHECK-NEXT: mov s0, v0.s[1]
-; CHECK-NEXT: bl sincosf
-; CHECK-NEXT: ldr s0, [sp, #28]
-; CHECK-NEXT: ld1 { v0.s }[1], [x19]
-; CHECK-NEXT: ldp x30, x19, [sp, #32] // 16-byte Folded Reload
-; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
-; CHECK-NEXT: add sp, sp, #48
-; CHECK-NEXT: ret
-;
-; NO-LIBCALL-LABEL: test_sincos_v2f32_only_use_sin:
-; NO-LIBCALL: // %bb.0:
-; NO-LIBCALL-NEXT: sub sp, sp, #48
-; NO-LIBCALL-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
-; NO-LIBCALL-NEXT: .cfi_def_cfa_offset 48
-; NO-LIBCALL-NEXT: .cfi_offset w30, -16
-; NO-LIBCALL-NEXT: // kill: def $d0 killed $d0 def $q0
-; NO-LIBCALL-NEXT: str q0, [sp] // 16-byte Folded Spill
-; NO-LIBCALL-NEXT: mov s0, v0.s[1]
-; NO-LIBCALL-NEXT: bl sinf
-; NO-LIBCALL-NEXT: // kill: def $s0 killed $s0 def $q0
-; NO-LIBCALL-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
-; NO-LIBCALL-NEXT: ldr q0, [sp] // 16-byte Folded Reload
-; NO-LIBCALL-NEXT: // kill: def $s0 killed $s0 killed $q0
-; NO-LIBCALL-NEXT: bl sinf
-; NO-LIBCALL-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
-; NO-LIBCALL-NEXT: // kill: def $s0 killed $s0 def $q0
-; NO-LIBCALL-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
-; NO-LIBCALL-NEXT: mov v0.s[1], v1.s[0]
-; NO-LIBCALL-NEXT: // kill: def $d0 killed $d0 killed $q0
-; NO-LIBCALL-NEXT: add sp, sp, #48
-; NO-LIBCALL-NEXT: ret
- %result = call { <2 x float>, <2 x float> } @llvm.sincos.v2f32(<2 x float> %a)
- %result.0 = extractvalue { <2 x float>, <2 x float> } %result, 0
- ret <2 x float> %result.0
-}
-
-define <2 x float> @test_sincos_v2f32_only_use_cos(<2 x float> %a) {
-; CHECK-LABEL: test_sincos_v2f32_only_use_cos:
-; CHECK: // %bb.0:
-; CHECK-NEXT: sub sp, sp, #48
-; CHECK-NEXT: stp x30, x19, [sp, #32] // 16-byte Folded Spill
-; CHECK-NEXT: .cfi_def_cfa_offset 48
-; CHECK-NEXT: .cfi_offset w19, -8
-; CHECK-NEXT: .cfi_offset w30, -16
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: add x0, sp, #28
-; CHECK-NEXT: add x1, sp, #24
-; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
-; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0
-; CHECK-NEXT: bl sincosf
-; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-NEXT: add x0, sp, #20
-; CHECK-NEXT: add x1, sp, #16
-; CHECK-NEXT: add x19, sp, #16
-; CHECK-NEXT: mov s0, v0.s[1]
-; CHECK-NEXT: bl sincosf
-; CHECK-NEXT: ldr s0, [sp, #24]
-; CHECK-NEXT: ld1 { v0.s }[1], [x19]
-; CHECK-NEXT: ldp x30, x19, [sp, #32] // 16-byte Folded Reload
-; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
-; CHECK-NEXT: add sp, sp, #48
-; CHECK-NEXT: ret
-;
-; NO-LIBCALL-LABEL: test_sincos_v2f32_only_use_cos:
-; NO-LIBCALL: // %bb.0:
-; NO-LIBCALL-NEXT: sub sp, sp, #48
-; NO-LIBCALL-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
-; NO-LIBCALL-NEXT: .cfi_def_cfa_offset 48
-; NO-LIBCALL-NEXT: .cfi_offset w30, -16
-; NO-LIBCALL-NEXT: // kill: def $d0 killed $d0 def $q0
-; NO-LIBCALL-NEXT: str q0, [sp] // 16-byte Folded Spill
-; NO-LIBCALL-NEXT: mov s0, v0.s[1]
-; NO-LIBCALL-NEXT: bl cosf
-; NO-LIBCALL-NEXT: // kill: def $s0 killed $s0 def $q0
-; NO-LIBCALL-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
-; NO-LIBCALL-NEXT: ldr q0, [sp] // 16-byte Folded Reload
-; NO-LIBCALL-NEXT: // kill: def $s0 killed $s0 killed $q0
-; NO-LIBCALL-NEXT: bl cosf
-; NO-LIBCALL-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
-; NO-LIBCALL-NEXT: // kill: def $s0 killed $s0 def $q0
-; NO-LIBCALL-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
-; NO-LIBCALL-NEXT: mov v0.s[1], v1.s[0]
-; NO-LIBCALL-NEXT: // kill: def $d0 killed $d0 killed $q0
-; NO-LIBCALL-NEXT: add sp, sp, #48
-; NO-LIBCALL-NEXT: ret
- %result = call { <2 x float>, <2 x float> } @llvm.sincos.v2f32(<2 x float> %a)
- %result.1 = extractvalue { <2 x float>, <2 x float> } %result, 1
- ret <2 x float> %result.1
-}
-
define { double, double } @test_sincos_f64(double %a) {
; CHECK-LABEL: test_sincos_f64:
; CHECK: // %bb.0:
@@ -719,52 +482,6 @@ define { double, double } @test_sincos_f64(double %a) {
ret { double, double } %result
}
-define double @test_sincos_f64_only_use_sin(double %a) {
-; CHECK-LABEL: test_sincos_f64_only_use_sin:
-; CHECK: // %bb.0:
-; CHECK-NEXT: sub sp, sp, #32
-; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
-; CHECK-NEXT: .cfi_def_cfa_offset 32
-; CHECK-NEXT: .cfi_offset w30, -16
-; CHECK-NEXT: add x0, sp, #24
-; CHECK-NEXT: add x1, sp, #8
-; CHECK-NEXT: bl sincos
-; CHECK-NEXT: ldr d0, [sp, #24]
-; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
-; CHECK-NEXT: add sp, sp, #32
-; CHECK-NEXT: ret
-;
-; NO-LIBCALL-LABEL: test_sincos_f64_only_use_sin:
-; NO-LIBCALL: // %bb.0:
-; NO-LIBCALL-NEXT: b sin
- %result = call { double, double } @llvm.sincos.f64(double %a)
- %result.0 = extractvalue { double, double } %result, 0
- ret double %result.0
-}
-
-define double @test_sincos_f64_only_use_cos(double %a) {
-; CHECK-LABEL: test_sincos_f64_only_use_cos:
-; CHECK: // %bb.0:
-; CHECK-NEXT: sub sp, sp, #32
-; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
-; CHECK-NEXT: .cfi_def_cfa_offset 32
-; CHECK-NEXT: .cfi_offset w30, -16
-; CHECK-NEXT: add x0, sp, #24
-; CHECK-NEXT: add x1, sp, #8
-; CHECK-NEXT: bl sincos
-; CHECK-NEXT: ldr d0, [sp, #8]
-; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
-; CHECK-NEXT: add sp, sp, #32
-; CHECK-NEXT: ret
-;
-; NO-LIBCALL-LABEL: test_sincos_f64_only_use_cos:
-; NO-LIBCALL: // %bb.0:
-; NO-LIBCALL-NEXT: b cos
- %result = call { double, double } @llvm.sincos.f64(double %a)
- %result.1 = extractvalue { double, double } %result, 1
- ret double %result.1
-}
-
define { <2 x double>, <2 x double> } @test_sincos_v2f64(<2 x double> %a) {
; CHECK-LABEL: test_sincos_v2f64:
; CHECK: // %bb.0:
@@ -834,103 +551,3 @@ define { <2 x double>, <2 x double> } @test_sincos_v2f64(<2 x double> %a) {
%result = call { <2 x double>, <2 x double> } @llvm.sincos.v2f64(<2 x double> %a)
ret { <2 x double>, <2 x double> } %result
}
-
-define <2 x double> @test_sincos_v2f64_only_use_sin(<2 x double> %a) {
-; CHECK-LABEL: test_sincos_v2f64_only_use_sin:
-; CHECK: // %bb.0:
-; CHECK-NEXT: sub sp, sp, #64
-; CHECK-NEXT: stp x30, x19, [sp, #48] // 16-byte Folded Spill
-; CHECK-NEXT: .cfi_def_cfa_offset 64
-; CHECK-NEXT: .cfi_offset w19, -8
-; CHECK-NEXT: .cfi_offset w30, -16
-; CHECK-NEXT: add x0, sp, #40
-; CHECK-NEXT: add x1, sp, #32
-; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
-; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
-; CHECK-NEXT: bl sincos
-; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-NEXT: add x0, sp, #24
-; CHECK-NEXT: add x1, sp, #16
-; CHECK-NEXT: add x19, sp, #24
-; CHECK-NEXT: mov d0, v0.d[1]
-; CHECK-NEXT: bl sincos
-; CHECK-NEXT: ldr d0, [sp, #40]
-; CHECK-NEXT: ld1 { v0.d }[1], [x19]
-; CHECK-NEXT: ldp x30, x19, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT: add sp, sp, #64
-; CHECK-NEXT: ret
-;
-; NO-LIBCALL-LABEL: test_sincos_v2f64_only_use_sin:
-; NO-LIBCALL: // %bb.0:
-; NO-LIBCALL-NEXT: sub sp, sp, #48
-; NO-LIBCALL-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
-; NO-LIBCALL-NEXT: .cfi_def_cfa_offset 48
-; NO-LIBCALL-NEXT: .cfi_offset w30, -16
-; NO-LIBCALL-NEXT: str q0, [sp] // 16-byte Folded Spill
-; NO-LIBCALL-NEXT: mov d0, v0.d[1]
-; NO-LIBCALL-NEXT: bl sin
-; NO-LIBCALL-NEXT: // kill: def $d0 killed $d0 def $q0
-; NO-LIBCALL-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
-; NO-LIBCALL-NEXT: ldr q0, [sp] // 16-byte Folded Reload
-; NO-LIBCALL-NEXT: // kill: def $d0 killed $d0 killed $q0
-; NO-LIBCALL-NEXT: bl sin
-; NO-LIBCALL-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
-; NO-LIBCALL-NEXT: // kill: def $d0 killed $d0 def $q0
-; NO-LIBCALL-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
-; NO-LIBCALL-NEXT: mov v0.d[1], v1.d[0]
-; NO-LIBCALL-NEXT: add sp, sp, #48
-; NO-LIBCALL-NEXT: ret
- %result = call { <2 x double>, <2 x double> } @llvm.sincos.v2f64(<2 x double> %a)
- %result.0 = extractvalue { <2 x double>, <2 x double> } %result, 0
- ret <2 x double> %result.0
-}
-
-define <2 x double> @test_sincos_v2f64_only_use_cos(<2 x double> %a) {
-; CHECK-LABEL: test_sincos_v2f64_only_use_cos:
-; CHECK: // %bb.0:
-; CHECK-NEXT: sub sp, sp, #64
-; CHECK-NEXT: stp x30, x19, [sp, #48] // 16-byte Folded Spill
-; CHECK-NEXT: .cfi_def_cfa_offset 64
-; CHECK-NEXT: .cfi_offset w19, -8
-; CHECK-NEXT: .cfi_offset w30, -16
-; CHECK-NEXT: add x0, sp, #40
-; CHECK-NEXT: add x1, sp, #32
-; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
-; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
-; CHECK-NEXT: bl sincos
-; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-NEXT: add x0, sp, #24
-; CHECK-NEXT: add x1, sp, #16
-; CHECK-NEXT: add x19, sp, #16
-; CHECK-NEXT: mov d0, v0.d[1]
-; CHECK-NEXT: bl sincos
-; CHECK-NEXT: ldr d0, [sp, #32]
-; CHECK-NEXT: ld1 { v0.d }[1], [x19]
-; CHECK-NEXT: ldp x30, x19, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT: add sp, sp, #64
-; CHECK-NEXT: ret
-;
-; NO-LIBCALL-LABEL: test_sincos_v2f64_only_use_cos:
-; NO-LIBCALL: // %bb.0:
-; NO-LIBCALL-NEXT: sub sp, sp, #48
-; NO-LIBCALL-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
-; NO-LIBCALL-NEXT: .cfi_def_cfa_offset 48
-; NO-LIBCALL-NEXT: .cfi_offset w30, -16
-; NO-LIBCALL-NEXT: str q0, [sp] // 16-byte Folded Spill
-; NO-LIBCALL-NEXT: mov d0, v0.d[1]
-; NO-LIBCALL-NEXT: bl cos
-; NO-LIBCALL-NEXT: // kill: def $d0 killed $d0 def $q0
-; NO-LIBCALL-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
-; NO-LIBCALL-NEXT: ldr q0, [sp] // 16-byte Folded Reload
-; NO-LIBCALL-NEXT: // kill: def $d0 killed $d0 killed $q0
-; NO-LIBCALL-NEXT: bl cos
-; NO-LIBCALL-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
-; NO-LIBCALL-NEXT: // kill: def $d0 killed $d0 def $q0
-; NO-LIBCALL-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
-; NO-LIBCALL-NEXT: mov v0.d[1], v1.d[0]
-; NO-LIBCALL-NEXT: add sp, sp, #48
-; NO-LIBCALL-NEXT: ret
- %result = call { <2 x double>, <2 x double> } @llvm.sincos.v2f64(<2 x double> %a)
- %result.1 = extractvalue { <2 x double>, <2 x double> } %result, 1
- ret <2 x double> %result.1
-}
diff --git a/llvm/test/CodeGen/ARM/llvm.sincos.ll b/llvm/test/CodeGen/ARM/llvm.sincos.ll
index b08a642fd37178..9628405df6bcb9 100644
--- a/llvm/test/CodeGen/ARM/llvm.sincos.ll
+++ b/llvm/test/CodeGen/ARM/llvm.sincos.ll
@@ -107,76 +107,6 @@ define { <2 x half>, <2 x half> } @test_sincos_v2f16(<2 x half> %a) {
ret { <2 x half>, <2 x half> } %result
}
-define <2 x half> @test_sincos_v2f16_only_use_sin(<2 x half> %a) {
-; CHECK-LABEL: test_sincos_v2f16_only_use_sin:
-; CHECK: @ %bb.0:
-; CHECK-NEXT: push {r4, lr}
-; CHECK-NEXT: sub sp, #24
-; CHECK-NEXT: mov r4, r0
-; CHECK-NEXT: mov r0, r1
-; CHECK-NEXT: bl __gnu_h2f_ieee
-; CHECK-NEXT: add r1, sp, #16
-; CHECK-NEXT: add r2, sp, #12
-; CHECK-NEXT: bl sincosf
-; CHECK-NEXT: mov r0, r4
-; CHECK-NEXT: bl __gnu_h2f_ieee
-; CHECK-NEXT: add r1, sp, #8
-; CHECK-NEXT: add r2, sp, #4
-; CHECK-NEXT: bl sincosf
-; CHECK-NEXT: ldr r0, [sp, #16]
-; CHECK-NEXT: bl __gnu_f2h_ieee
-; CHECK-NEXT: ldr r1, [sp, #8]
-; CHECK-NEXT: strh.w r0, [sp, #22]
-; CHECK-NEXT: mov r0, r1
-; CHECK-NEXT: bl __gnu_f2h_ieee
-; CHECK-NEXT: strh.w r0, [sp, #20]
-; CHECK-NEXT: add r0, sp, #20
-; CHECK-NEXT: vld1.32 {d16[0]}, [r0:32]
-; CHECK-NEXT: vmovl.u16 q8, d16
-; CHECK-NEXT: vmov.32 r0, d16[0]
-; CHECK-NEXT: vmov.32 r1, d16[1]
-; CHECK-NEXT: add sp, #24
-; CHECK-NEXT: pop {r4, pc}
- %result = call { <2 x half>, <2 x half> } @llvm.sincos.v2f16(<2 x half> %a)
- %result.0 = extractvalue { <2 x half>, <2 x half> } %result, 0
- ret <2 x half> %result.0
-}
-
-define <2 x half> @test_sincos_v2f16_only_use_cos(<2 x half> %a) {
-; CHECK-LABEL: test_sincos_v2f16_only_use_cos:
-; CHECK: @ %bb.0:
-; CHECK-NEXT: push {r4, lr}
-; CHECK-NEXT: sub sp, #24
-; CHECK-NEXT: mov r4, r0
-; CHECK-NEXT: mov r0, r1
-; CHECK-NEXT: bl __gnu_h2f_ieee
-; CHECK-NEXT: add r1, sp, #16
-; CHECK-NEXT: add r2, sp, #12
-; CHECK-NEXT: bl sincosf
-; CHECK-NEXT: mov r0, r4
-; CHECK-NEXT: bl __gnu_h2f_ieee
-; CHECK-NEXT: add r1, sp, #8
-; CHECK-NEXT: add r2, sp, #4
-; CHECK-NEXT: bl sincosf
-; CHECK-NEXT: ldr r0, [sp, #12]
-; CHECK-NEXT: bl __gnu_f2h_ieee
-; CHECK-NEXT: ldr r1, [sp, #4]
-; CHECK-NEXT: strh.w r0, [sp, #22]
-; CHECK-NEXT: mov r0, r1
-; CHECK-NEXT: bl __gnu_f2h_ieee
-; CHECK-NEXT: strh.w r0, [sp, #20]
-; CHECK-NEXT: add r0, sp, #20
-; CHECK-NEXT: vld1.32 {d16[0]}, [r0:32]
-; CHECK-NEXT: vmovl.u16 q8, d16
-; CHECK-NEXT: vmov.32 r0, d16[0]
-; CHECK-NEXT: vmov.32 r1, d16[1]
-; CHECK-NEXT: add sp, #24
-; CHECK-NEXT: pop {r4, pc}
- %result = call { <2 x half>, <2 x half> } @llvm.sincos.v2f16(<2 x half> %a)
- %result.1 = extractvalue { <2 x half>, <2 x half> } %result, 1
- ret <2 x half> %result.1
-}
-
define { float, float } @test_sincos_f32(float %a) {
; CHECK-LABEL: test_sincos_f32:
; CHECK: @ %bb.0:
@@ -191,37 +121,6 @@ define { float, float } @test_sincos_f32(float %a) {
ret { float, float } %result
}
-define float @test_sincos_f32_only_use_sin(float %a) {
-; CHECK-LABEL: test_sincos_f32_only_use_sin:
-; CHECK: @ %bb.0:
-; CHECK-NEXT: push {r7, lr}
-; CHECK-NEXT: sub sp, #8
-; CHECK-NEXT: add r1, sp, #4
-; CHECK-NEXT: mov r2, sp
-; CHECK-NEXT: bl sincosf
-; CHECK-NEXT: ldr r0, [sp, #4]
-; CHECK-NEXT: add sp, #8
-; CHECK-NEXT: pop {r7, pc}
- %result = call { float, float } @llvm.sincos.f32(float %a)
- %result.0 = extractvalue { float, float } %result, 0
- ret float %result.0
-}
-
-define float @test_sincos_f32_only_use_cos(float %a) {
-; CHECK-LABEL: test_sincos_f32_only_use_cos:
-; CHECK: @ %bb.0:
-; CHECK-NEXT: push {r7, lr}
-; CHECK-NEXT: sub sp, #8
-; CHECK-NEXT: add r1, sp, #4
-; CHECK-NEXT: mov r2, sp
-; CHECK-NEXT: bl sincosf
-; CHECK-NEXT: ldr r0, [sp], #8
-; CHECK-NEXT: pop {r7, pc}
- %result = call { float, float } @llvm.sincos.f32(float %a)
- %result.1 = extractvalue { float, float } %result, 1
- ret float %result.1
-}
-
define { <2 x float>, <2 x float> } @test_sincos_v2f32(<2 x float> %a) {
; CHECK-LABEL: test_sincos_v2f32:
; CHECK: @ %bb.0:
@@ -250,58 +149,6 @@ define { <2 x float>, <2 x float> } @test_sincos_v2f32(<2 x float> %a) {
ret { <2 x float>, <2 x float> } %result
}
-define <2 x float> @test_sincos_v2f32_only_use_sin(<2 x float> %a) {
-; CHECK-LABEL: test_sincos_v2f32_only_use_sin:
-; CHECK: @ %bb.0:
-; CHECK-NEXT: push {r7, lr}
-; CHECK-NEXT: vpush {d8}
-; CHECK-NEXT: sub sp, #16
-; CHECK-NEXT: vmov d8, r0, r1
-; CHECK-NEXT: add r1, sp, #4
-; CHECK-NEXT: mov r2, sp
-; CHECK-NEXT: vmov r0, s17
-; CHECK-NEXT: bl sincosf
-; CHECK-NEXT: vmov r0, s16
-; CHECK-NEXT: add r1, sp, #12
-; CHECK-NEXT: add r2, sp, #8
-; CHECK-NEXT: bl sincosf
-; CHECK-NEXT: vldr s1, [sp, #4]
-; CHECK-NEXT: vldr s0, [sp, #12]
-; CHECK-NEXT: vmov r0, r1, d0
-; CHECK-NEXT: add sp, #16
-; CHECK-NEXT: vpop {d8}
-; CHECK-NEXT: pop {r7, pc}
- %result = call { <2 x float>, <2 x float> } @llvm.sincos.v2f32(<2 x float> %a)
- %result.0 = extractvalue { <2 x float>, <2 x float> } %result, 0
- ret <2 x float> %result.0
-}
-
-define <2 x float> @test_sincos_v2f32_only_use_cos(<2 x float> %a) {
-; CHECK-LABEL: test_sincos_v2f32_only_use_cos:
-; CHECK: @ %bb.0:
-; CHECK-NEXT: push {r7, lr}
-; CHECK-NEXT: vpush {d8}
-; CHECK-NEXT: sub sp, #16
-; CHECK-NEXT: vmov d8, r0, r1
-; CHECK-NEXT: add r1, sp, #4
-; CHECK-NEXT: mov r2, sp
-; CHECK-NEXT: vmov r0, s17
-; CHECK-NEXT: bl sincosf
-; CHECK-NEXT: vmov r0, s16
-; CHECK-NEXT: add r1, sp, #12
-; CHECK-NEXT: add r2, sp, #8
-; CHECK-NEXT: bl sincosf
-; CHECK-NEXT: vldr s1, [sp]
-; CHECK-NEXT: vldr s0, [sp, #8]
-; CHECK-NEXT: vmov r0, r1, d0
-; CHECK-NEXT: add sp, #16
-; CHECK-NEXT: vpop {d8}
-; CHECK-NEXT: pop {r7, pc}
- %result = call { <2 x float>, <2 x float> } @llvm.sincos.v2f32(<2 x float> %a)
- %result.1 = extractvalue { <2 x float>, <2 x float> } %result, 1
- ret <2 x float> %result.1
-}
-
define { double, double } @test_sincos_f64(double %a) {
; CHECK-LABEL: test_sincos_f64:
; CHECK: @ %bb.0:
@@ -317,37 +164,6 @@ define { double, double } @test_sincos_f64(double %a) {
ret { double, double } %result
}
-define double @test_sincos_f64_only_use_sin(double %a) {
-; CHECK-LABEL: test_sincos_f64_only_use_sin:
-; CHECK: @ %bb.0:
-; CHECK-NEXT: push {r7, lr}
-; CHECK-NEXT: sub sp, #16
-; CHECK-NEXT: add r2, sp, #8
-; CHECK-NEXT: mov r3, sp
-; CHECK-NEXT: bl sincos
-; CHECK-NEXT: ldrd r0, r1, [sp, #8]
-; CHECK-NEXT: add sp, #16
-; CHECK-NEXT: pop {r7, pc}
- %result = call { double, double } @llvm.sincos.f64(double %a)
- %result.0 = extractvalue { double, double } %result, 0
- ret double %result.0
-}
-
-define double @test_sincos_f64_only_use_cos(double %a) {
-; CHECK-LABEL: test_sincos_f64_only_use_cos:
-; CHECK: @ %bb.0:
-; CHECK-NEXT: push {r7, lr}
-; CHECK-NEXT: sub sp, #16
-; CHECK-NEXT: add r2, sp, #8
-; CHECK-NEXT: mov r3, sp
-; CHECK-NEXT: bl sincos
-; CHECK-NEXT: ldrd r0, r1, [sp], #16
-; CHECK-NEXT: pop {r7, pc}
- %result = call { double, double } @llvm.sincos.f64(double %a)
- %result.1 = extractvalue { double, double } %result, 1
- ret double %result.1
-}
-
define { <2 x double>, <2 x double> } @test_sincos_v2f64(<2 x double> %a) {
; CHECK-LABEL: test_sincos_v2f64:
; CHECK: @ %bb.0:
@@ -376,63 +192,6 @@ define { <2 x double>, <2 x double> } @test_sincos_v2f64(<2 x double> %a) {
ret { <2 x double>, <2 x double> } %result
}
-define <2 x double> @test_sincos_v2f64_only_use_sin(<2 x double> %a) {
-; CHECK-LABEL: test_sincos_v2f64_only_use_sin:
-; CHECK: @ %bb.0:
-; CHECK-NEXT: push {r4, r5, r6, r7, lr}
-; CHECK-NEXT: sub sp, #36
-; CHECK-NEXT: mov r6, r3
-; CHECK-NEXT: mov r4, r2
-; CHECK-NEXT: add r2, sp, #24
-; CHECK-NEXT: add r3, sp, #16
-; CHECK-NEXT: mov r7, r1
-; CHECK-NEXT: mov r5, r0
-; CHECK-NEXT: mov r0, r4
-; CHECK-NEXT: mov r1, r6
-; CHECK-NEXT: bl sincos
-; CHECK-NEXT: add r2, sp, #8
-; CHECK-NEXT: mov r3, sp
-; CHECK-NEXT: mov r0, r5
-; CHECK-NEXT: mov r1, r7
-; CHECK-NEXT: bl sincos
-; CHECK-NEXT: vldr d16, [sp, #24]
-; CHECK-NEXT: ldrd r0, r1, [sp, #8]
-; CHECK-NEXT: vmov r2, r3, d16
-; CHECK-NEXT: add sp, #36
-; CHECK-NEXT: pop {r4, r5, r6, r7, pc}
- %result = call { <2 x double>, <2 x double> } @llvm.sincos.v2f64(<2 x double> %a)
- %result.0 = extractvalue { <2 x double>, <2 x double> } %result, 0
- ret <2 x double> %result.0
-}
-
-define <2 x double> @test_sincos_v2f64_only_use_cos(<2 x double> %a) {
-; CHECK-LABEL: test_sincos_v2f64_only_use_cos:
-; CHECK: @ %bb.0:
-; CHECK-NEXT: push {r4, r5, r6, r7, lr}
-; CHECK-NEXT: sub sp, #36
-; CHECK-NEXT: mov r6, r3
-; CHECK-NEXT: mov r4, r2
-; CHECK-NEXT: add r2, sp, #24
-; CHECK-NEXT: add r3, sp, #16
-; CHECK-NEXT: mov r7, r1
-; CHECK-NEXT: mov r5, r0
-; CHECK-NEXT: mov r0, r4
-; CHECK-NEXT: mov r1, r6
-; CHECK-NEXT: bl sincos
-; CHECK-NEXT: add r2, sp, #8
-; CHECK-NEXT: mov r3, sp
-; CHECK-NEXT: mov r0, r5
-; CHECK-NEXT: mov r1, r7
-; CHECK-NEXT: bl sincos
-; CHECK-NEXT: vldr d16, [sp, #16]
-; CHECK-NEXT: vmov r2, r3, d16
-; CHECK-NEXT: ldrd r0, r1, [sp], #36
-; CHECK-NEXT: pop {r4, r5, r6, r7, pc}
- %result = call { <2 x double>, <2 x double> } @llvm.sincos.v2f64(<2 x double> %a)
- %result.1 = extractvalue { <2 x double>, <2 x double> } %result, 1
- ret <2 x double> %result.1
-}
-
define { fp128, fp128 } @test_sincos_f128(fp128 %a) {
; CHECK-LABEL: test_sincos_f128:
; CHECK: @ %bb.0:
>From d8ca29309b2f48bdfee65935cf6972b83c3ab472 Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Mon, 21 Oct 2024 11:34:09 +0000
Subject: [PATCH 7/7] Add and use `WidenVecRes_UnaryOpWithTwoResults` for
SINCOS + FREXPR
---
llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h | 4 +-
.../SelectionDAG/LegalizeVectorTypes.cpp | 46 ++++++--
llvm/test/CodeGen/AArch64/llvm.frexp.ll | 101 ++++++++++++++++++
3 files changed, 143 insertions(+), 8 deletions(-)
create mode 100644 llvm/test/CodeGen/AArch64/llvm.frexp.ll
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 33befb8d4ac0e2..c7e0bd86795f6d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -1072,7 +1072,9 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
SDValue WidenVecRes_ExpOp(SDNode *N);
SDValue WidenVecRes_Unary(SDNode *N);
SDValue WidenVecRes_InregOp(SDNode *N);
- SDValue WidenVecRes_FSINCOS(SDNode *N);
+ SDValue WidenVecRes_UnaryOpWithTwoResults(SDNode *N, unsigned ResNo);
+ void ReplaceOtherWidenResult(SDNode *N, SDNode *WidenNode,
+ unsigned WidenResNo);
// Widen Vector Operand.
bool WidenVectorOperand(SDNode *N, unsigned OpNo);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 65c9bb64b3cc14..18480327217e14 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -4435,6 +4435,22 @@ SDValue DAGTypeLegalizer::SplitVecOp_VECTOR_HISTOGRAM(SDNode *N) {
// Result Vector Widening
//===----------------------------------------------------------------------===//
+void DAGTypeLegalizer::ReplaceOtherWidenResult(SDNode *N, SDNode *WidenNode,
+ unsigned WidenResNo) {
+ assert(N->getNumValues() == 2 && "expected node with two results");
+ unsigned OtherNo = 1 - WidenResNo;
+ EVT OtherVT = N->getValueType(OtherNo);
+ if (getTypeAction(OtherVT) == TargetLowering::TypeWidenVector) {
+ SetWidenedVector(SDValue(N, OtherNo), SDValue(WidenNode, OtherNo));
+ } else {
+ SDLoc DL(N);
+ SDValue OtherVal = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, OtherVT,
+ SDValue(WidenNode, OtherNo),
+ DAG.getVectorIdxConstant(0, DL));
+ ReplaceValueWith(SDValue(N, OtherNo), OtherVal);
+ }
+}
+
void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
LLVM_DEBUG(dbgs() << "Widen node result " << ResNo << ": "; N->dump(&DAG));
@@ -4454,6 +4470,8 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
if (!TLI.isOperationLegalOrCustomOrPromote(N->getOpcode(), WideVecVT) &&
TLI.isOperationExpand(N->getOpcode(), VT.getScalarType())) {
Res = DAG.UnrollVectorOp(N, WideVecVT.getVectorNumElements());
+ if (N->getNumValues() == 2)
+ ReplaceOtherWidenResult(N, Res.getNode(), ResNo);
return true;
}
return false;
@@ -4758,12 +4776,10 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
case ISD::VP_FSHR:
Res = WidenVecRes_Ternary(N);
break;
+ case ISD::FFREXP:
case ISD::FSINCOS: {
if (!unrollExpandedOp())
- Res = WidenVecRes_FSINCOS(N);
- for (unsigned ResNum = 0; ResNum < N->getNumValues(); ResNum++)
- SetWidenedVector(SDValue(N, ResNum), Res.getValue(ResNum));
- Res = SDValue();
+ Res = WidenVecRes_UnaryOpWithTwoResults(N, ResNo);
break;
}
}
@@ -5514,10 +5530,26 @@ SDValue DAGTypeLegalizer::WidenVecRes_InregOp(SDNode *N) {
WidenVT, WidenLHS, DAG.getValueType(ExtVT));
}
-SDValue DAGTypeLegalizer::WidenVecRes_FSINCOS(SDNode *N) {
- EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+SDValue DAGTypeLegalizer::WidenVecRes_UnaryOpWithTwoResults(SDNode *N,
+ unsigned ResNo) {
+ LLVMContext &Ctx = *DAG.getContext();
SDValue InOp = GetWidenedVector(N->getOperand(0));
- return DAG.getNode(N->getOpcode(), SDLoc(N), {WidenVT, WidenVT}, InOp);
+
+ EVT WidenVT = TLI.getTypeToTransformTo(Ctx, N->getValueType(ResNo));
+ ElementCount WidenEC = WidenVT.getVectorElementCount();
+
+ EVT VT0 = N->getValueType(0);
+ EVT VT1 = N->getValueType(1);
+
+ EVT WidenVT0 = EVT::getVectorVT(Ctx, VT0.getVectorElementType(), WidenEC);
+ EVT WidenVT1 = EVT::getVectorVT(Ctx, VT1.getVectorElementType(), WidenEC);
+
+ SDNode *WidenNode =
+ DAG.getNode(N->getOpcode(), SDLoc(N), {WidenVT0, WidenVT1}, InOp)
+ .getNode();
+
+ ReplaceOtherWidenResult(N, WidenNode, ResNo);
+ return SDValue(WidenNode, ResNo);
}
SDValue DAGTypeLegalizer::WidenVecRes_MERGE_VALUES(SDNode *N, unsigned ResNo) {
diff --git a/llvm/test/CodeGen/AArch64/llvm.frexp.ll b/llvm/test/CodeGen/AArch64/llvm.frexp.ll
new file mode 100644
index 00000000000000..e4cb8ed6eaf90f
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/llvm.frexp.ll
@@ -0,0 +1,101 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc -mtriple=aarch64-gnu-linux < %s | FileCheck -check-prefixes=CHECK %s
+
+define { <2 x half>, <2 x i32> } @test_frexp_v2f16_v2i32(<2 x half> %a) {
+; CHECK-LABEL: test_frexp_v2f16_v2i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sub sp, sp, #64
+; CHECK-NEXT: stp x30, x19, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: .cfi_offset w19, -8
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: mov h1, v0.h[1]
+; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT: add x0, sp, #36
+; CHECK-NEXT: add x19, sp, #36
+; CHECK-NEXT: fcvt s0, h1
+; CHECK-NEXT: bl frexpf
+; CHECK-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: add x0, sp, #32
+; CHECK-NEXT: fcvt s1, h1
+; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT: fmov s0, s1
+; CHECK-NEXT: bl frexpf
+; CHECK-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT: fcvt h2, s0
+; CHECK-NEXT: add x0, sp, #40
+; CHECK-NEXT: mov h1, v1.h[2]
+; CHECK-NEXT: fcvt s0, h1
+; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: mov v2.h[1], v1.h[0]
+; CHECK-NEXT: str q2, [sp] // 16-byte Folded Spill
+; CHECK-NEXT: bl frexpf
+; CHECK-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT: fcvt h2, s0
+; CHECK-NEXT: add x0, sp, #44
+; CHECK-NEXT: mov h1, v1.h[3]
+; CHECK-NEXT: fcvt s0, h1
+; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: mov v1.h[2], v2.h[0]
+; CHECK-NEXT: str q1, [sp] // 16-byte Folded Spill
+; CHECK-NEXT: bl frexpf
+; CHECK-NEXT: fcvt h2, s0
+; CHECK-NEXT: ldr s1, [sp, #32]
+; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: ld1 { v1.s }[1], [x19]
+; CHECK-NEXT: ldp x30, x19, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q1
+; CHECK-NEXT: mov v0.h[3], v2.h[0]
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: add sp, sp, #64
+; CHECK-NEXT: ret
+ %result = call { <2 x half>, <2 x i32> } @llvm.frexp.v2f16.v2i32(<2 x half> %a)
+ ret { <2 x half>, <2 x i32> } %result
+}
+
+define { <3 x float>, <3 x i32> } @test_frexp_v3f16_v3i32(<3 x float> %a) {
+; CHECK-LABEL: test_frexp_v3f16_v3i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sub sp, sp, #80
+; CHECK-NEXT: str x30, [sp, #48] // 8-byte Folded Spill
+; CHECK-NEXT: stp x20, x19, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 80
+; CHECK-NEXT: .cfi_offset w19, -8
+; CHECK-NEXT: .cfi_offset w20, -16
+; CHECK-NEXT: .cfi_offset w30, -32
+; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT: mov s0, v0.s[1]
+; CHECK-NEXT: add x0, sp, #56
+; CHECK-NEXT: add x19, sp, #56
+; CHECK-NEXT: bl frexpf
+; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0
+; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT: add x0, sp, #44
+; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0
+; CHECK-NEXT: bl frexpf
+; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0
+; CHECK-NEXT: add x0, sp, #60
+; CHECK-NEXT: add x20, sp, #60
+; CHECK-NEXT: mov v0.s[1], v1.s[0]
+; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT: mov s0, v0.s[2]
+; CHECK-NEXT: bl frexpf
+; CHECK-NEXT: ldr s1, [sp, #44]
+; CHECK-NEXT: ldr q2, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0
+; CHECK-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload
+; CHECK-NEXT: ld1 { v1.s }[1], [x19]
+; CHECK-NEXT: mov v2.s[2], v0.s[0]
+; CHECK-NEXT: ld1 { v1.s }[2], [x20]
+; CHECK-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT: mov v0.16b, v2.16b
+; CHECK-NEXT: add sp, sp, #80
+; CHECK-NEXT: ret
+ %result = call { <3 x float>, <3 x i32> } @llvm.frexp.v3float.v3i32(<3 x float> %a)
+ ret { <3 x float>, <3 x i32> } %result
+}
More information about the llvm-commits
mailing list