[llvm] 701223a - [IR] Add llvm.sincospi intrinsic (#125873)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Feb 11 01:01:34 PST 2025
Author: Benjamin Maxwell
Date: 2025-02-11T09:01:30Z
New Revision: 701223ac20a45d23b9b01c8a514294eb16219d79
URL: https://github.com/llvm/llvm-project/commit/701223ac20a45d23b9b01c8a514294eb16219d79
DIFF: https://github.com/llvm/llvm-project/commit/701223ac20a45d23b9b01c8a514294eb16219d79.diff
LOG: [IR] Add llvm.sincospi intrinsic (#125873)
This adds the `llvm.sincospi` intrinsic, legalization, and lowering
(mostly reusing the lowering for sincos and frexp).
The `llvm.sincospi` intrinsic takes a floating-point value and returns
both the sine and cosine of the value multiplied by pi. It computes the
result more accurately than the naive approach of doing the
multiplication ahead of time, especially for large input values.
```
declare { float, float } @llvm.sincospi.f32(float %Val)
declare { double, double } @llvm.sincospi.f64(double %Val)
declare { x86_fp80, x86_fp80 } @llvm.sincospi.f80(x86_fp80 %Val)
declare { fp128, fp128 } @llvm.sincospi.f128(fp128 %Val)
declare { ppc_fp128, ppc_fp128 } @llvm.sincospi.ppcf128(ppc_fp128 %Val)
declare { <4 x float>, <4 x float> } @llvm.sincospi.v4f32(<4 x float> %Val)
```
Currently, the default lowering of this intrinsic relies on the
`sincospi[f|l]` functions being available in the target's runtime (e.g.
libc).
Added:
llvm/test/CodeGen/AArch64/llvm.sincospi.ll
llvm/test/CodeGen/AArch64/veclib-llvm.sincospi.ll
Modified:
llvm/docs/LangRef.rst
llvm/include/llvm/CodeGen/BasicTTIImpl.h
llvm/include/llvm/CodeGen/ISDOpcodes.h
llvm/include/llvm/CodeGen/RuntimeLibcallUtil.h
llvm/include/llvm/IR/Intrinsics.td
llvm/include/llvm/IR/RuntimeLibcalls.def
llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
llvm/lib/CodeGen/TargetLoweringBase.cpp
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Removed:
################################################################################
diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index 5cdb19fa03fc71f..2d72e548ec82a37 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -16126,6 +16126,52 @@ of the argument.
When specified with the fast-math-flag 'afn', the result may be approximated
using a less accurate calculation.
+'``llvm.sincospi.*``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+This is an overloaded intrinsic. You can use ``llvm.sincospi`` on any
+floating-point or vector of floating-point type. Not all targets support
+all types however.
+
+::
+
+ declare { float, float } @llvm.sincospi.f32(float %Val)
+ declare { double, double } @llvm.sincospi.f64(double %Val)
+ declare { x86_fp80, x86_fp80 } @llvm.sincospi.f80(x86_fp80 %Val)
+ declare { fp128, fp128 } @llvm.sincospi.f128(fp128 %Val)
+ declare { ppc_fp128, ppc_fp128 } @llvm.sincospi.ppcf128(ppc_fp128 %Val)
+ declare { <4 x float>, <4 x float> } @llvm.sincospi.v4f32(<4 x float> %Val)
+
+Overview:
+"""""""""
+
+The '``llvm.sincospi.*``' intrinsics returns the sine and cosine of pi*operand.
+
+Arguments:
+""""""""""
+
+The argument is a :ref:`floating-point <t_floating>` value or
+:ref:`vector <t_vector>` of floating-point values. Returns two values matching
+the argument type in a struct.
+
+Semantics:
+""""""""""
+
+This is equivalent to the ``llvm.sincos.*`` intrinsic where the argument has been
+multiplied by pi, however, it computes the result more accurately especially
+for large input values.
+
+.. note::
+
+ Currently, the default lowering of this intrinsic relies on the ``sincospi[f|l]``
+ functions being available in the target's runtime (e.g. libc).
+
+When specified with the fast-math-flag 'afn', the result may be approximated
+using a less accurate calculation.
+
'``llvm.modf.*``' Intrinsic
^^^^^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index 8468992ed4b7a3d..339b83637fa8f1c 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -2101,6 +2101,9 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
case Intrinsic::sincos:
ISD = ISD::FSINCOS;
break;
+ case Intrinsic::sincospi:
+ ISD = ISD::FSINCOSPI;
+ break;
case Intrinsic::modf:
ISD = ISD::FMODF;
break;
diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h
index 046d9befd0e9158..68ed812222dfdf8 100644
--- a/llvm/include/llvm/CodeGen/ISDOpcodes.h
+++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h
@@ -1058,6 +1058,10 @@ enum NodeType {
/// FSINCOS - Compute both fsin and fcos as a single operation.
FSINCOS,
+ /// FSINCOSPI - Compute both the sine and cosine times pi more accurately
+ /// than FSINCOS(pi*x), especially for large x.
+ FSINCOSPI,
+
/// FMODF - Decomposes the operand into integral and fractional parts, each
/// having the same type and sign as the operand.
FMODF,
diff --git a/llvm/include/llvm/CodeGen/RuntimeLibcallUtil.h b/llvm/include/llvm/CodeGen/RuntimeLibcallUtil.h
index 59313520e0d831c..34d783ae3f513e9 100644
--- a/llvm/include/llvm/CodeGen/RuntimeLibcallUtil.h
+++ b/llvm/include/llvm/CodeGen/RuntimeLibcallUtil.h
@@ -66,6 +66,10 @@ Libcall getFREXP(EVT RetVT);
/// UNKNOWN_LIBCALL if there is none.
Libcall getFSINCOS(EVT RetVT);
+/// getSINCOSPI - Return the SINCOSPI_* value for the given types, or
+/// UNKNOWN_LIBCALL if there is none.
+Libcall getSINCOSPI(EVT RetVT);
+
/// getMODF - Return the MODF_* value for the given types, or
/// UNKNOWN_LIBCALL if there is none.
Libcall getMODF(EVT RetVT);
diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
index 9671c81ab3d32d4..14ecae41ff08f92 100644
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -1075,6 +1075,8 @@ let IntrProperties = [IntrNoMem, IntrSpeculatable, IntrWillReturn] in {
def int_roundeven : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
def int_sincos : DefaultAttrsIntrinsic<[LLVMMatchType<0>, LLVMMatchType<0>],
[llvm_anyfloat_ty]>;
+ def int_sincospi : DefaultAttrsIntrinsic<[LLVMMatchType<0>, LLVMMatchType<0>],
+ [llvm_anyfloat_ty]>;
def int_modf : DefaultAttrsIntrinsic<[LLVMMatchType<0>, LLVMMatchType<0>],
[llvm_anyfloat_ty]>;
diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.def b/llvm/include/llvm/IR/RuntimeLibcalls.def
index dc69b1ae19769ea..a7963543c4350de 100644
--- a/llvm/include/llvm/IR/RuntimeLibcalls.def
+++ b/llvm/include/llvm/IR/RuntimeLibcalls.def
@@ -354,6 +354,11 @@ HANDLE_LIBCALL(FREXP_F64, "frexp")
HANDLE_LIBCALL(FREXP_F80, "frexpl")
HANDLE_LIBCALL(FREXP_F128, "frexpl")
HANDLE_LIBCALL(FREXP_PPCF128, "frexpl")
+HANDLE_LIBCALL(SINCOSPI_F32, "sincospif")
+HANDLE_LIBCALL(SINCOSPI_F64, "sincospi")
+HANDLE_LIBCALL(SINCOSPI_F80, "sincospil")
+HANDLE_LIBCALL(SINCOSPI_F128, "sincospil")
+HANDLE_LIBCALL(SINCOSPI_PPCF128, "sincospil")
HANDLE_LIBCALL(MODF_F32, "modff")
HANDLE_LIBCALL(MODF_F64, "modf")
HANDLE_LIBCALL(MODF_F80, "modfl")
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index f61928a66eb3cff..66d7f57b93fb773 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -4520,11 +4520,15 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
ExpandFPLibCall(Node, RTLIB::TANH_F32, RTLIB::TANH_F64, RTLIB::TANH_F80,
RTLIB::TANH_F128, RTLIB::TANH_PPCF128, Results);
break;
- case ISD::FSINCOS: {
- RTLIB::Libcall LC = RTLIB::getFSINCOS(Node->getValueType(0));
+ case ISD::FSINCOS:
+ case ISD::FSINCOSPI: {
+ EVT VT = Node->getValueType(0);
+ RTLIB::Libcall LC = Node->getOpcode() == ISD::FSINCOS
+ ? RTLIB::getFSINCOS(VT)
+ : RTLIB::getSINCOSPI(VT);
bool Expanded = DAG.expandMultipleResultFPLibCall(LC, Node, Results);
if (!Expanded)
- llvm_unreachable("Expected scalar FSINCOS to expand to libcall!");
+ llvm_unreachable("Expected scalar FSINCOS[PI] to expand to libcall!");
break;
}
case ISD::FLOG:
@@ -5507,7 +5511,8 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
break;
}
case ISD::FMODF:
- case ISD::FSINCOS: {
+ case ISD::FSINCOS:
+ case ISD::FSINCOSPI: {
Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0));
Tmp2 = DAG.getNode(Node->getOpcode(), dl, DAG.getVTList(NVT, NVT), Tmp1,
Node->getFlags());
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 2a4eed1ed527a81..6dcc2464f61f267 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -2768,6 +2768,7 @@ void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) {
case ISD::FMODF:
case ISD::FSINCOS:
+ case ISD::FSINCOSPI:
R = PromoteFloatRes_UnaryWithTwoFPResults(N);
break;
case ISD::FP_ROUND: R = PromoteFloatRes_FP_ROUND(N); break;
@@ -3230,6 +3231,7 @@ void DAGTypeLegalizer::SoftPromoteHalfResult(SDNode *N, unsigned ResNo) {
case ISD::FMODF:
case ISD::FSINCOS:
+ case ISD::FSINCOSPI:
R = SoftPromoteHalfRes_UnaryWithTwoFPResults(N);
break;
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 416da1bb7bfcf01..111b08aeab185a6 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -456,6 +456,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
case ISD::FFREXP:
case ISD::FMODF:
case ISD::FSINCOS:
+ case ISD::FSINCOSPI:
case ISD::SADDSAT:
case ISD::UADDSAT:
case ISD::SSUBSAT:
@@ -1217,9 +1218,12 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
return;
break;
- case ISD::FSINCOS: {
- RTLIB::Libcall LC =
- RTLIB::getFSINCOS(Node->getValueType(0).getVectorElementType());
+ case ISD::FSINCOS:
+ case ISD::FSINCOSPI: {
+ EVT VT = Node->getValueType(0).getVectorElementType();
+ RTLIB::Libcall LC = Node->getOpcode() == ISD::FSINCOS
+ ? RTLIB::getFSINCOS(VT)
+ : RTLIB::getSINCOSPI(VT);
if (DAG.expandMultipleResultFPLibCall(LC, Node, Results))
return;
break;
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index da793a34879b8e9..915ee2d1103320d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -136,6 +136,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
case ISD::FMODF:
case ISD::FFREXP:
case ISD::FSINCOS:
+ case ISD::FSINCOSPI:
R = ScalarizeVecRes_UnaryOpWithTwoResults(N, ResNo);
break;
case ISD::ADD:
@@ -1265,6 +1266,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::FMODF:
case ISD::FFREXP:
case ISD::FSINCOS:
+ case ISD::FSINCOSPI:
SplitVecRes_UnaryOpWithTwoResults(N, ResNo, Lo, Hi);
break;
@@ -4815,7 +4817,8 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
break;
case ISD::FMODF:
case ISD::FFREXP:
- case ISD::FSINCOS: {
+ case ISD::FSINCOS:
+ case ISD::FSINCOSPI: {
if (!unrollExpandedOp())
Res = WidenVecRes_UnaryOpWithTwoResults(N, ResNo);
break;
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 7178f6398bede50..5a5596a542f72bb 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -6978,6 +6978,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
return;
case Intrinsic::modf:
case Intrinsic::sincos:
+ case Intrinsic::sincospi:
case Intrinsic::frexp: {
unsigned Opcode;
switch (Intrinsic) {
@@ -6986,6 +6987,9 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
case Intrinsic::sincos:
Opcode = ISD::FSINCOS;
break;
+ case Intrinsic::sincospi:
+ Opcode = ISD::FSINCOSPI;
+ break;
case Intrinsic::modf:
Opcode = ISD::FMODF;
break;
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index 7ab8beab1bda2eb..8de537173e52c8a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -216,6 +216,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::FCOS: return "fcos";
case ISD::STRICT_FCOS: return "strict_fcos";
case ISD::FSINCOS: return "fsincos";
+ case ISD::FSINCOSPI: return "fsincospi";
case ISD::FMODF: return "fmodf";
case ISD::FTAN: return "ftan";
case ISD::STRICT_FTAN: return "strict_ftan";
diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp
index 1f39ec205c51794..d9a19dfceb6d315 100644
--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -407,6 +407,11 @@ RTLIB::Libcall RTLIB::getFSINCOS(EVT RetVT) {
SINCOS_PPCF128);
}
+RTLIB::Libcall RTLIB::getSINCOSPI(EVT RetVT) {
+ return getFPLibCall(RetVT, SINCOSPI_F32, SINCOSPI_F64, SINCOSPI_F80,
+ SINCOSPI_F128, SINCOSPI_PPCF128);
+}
+
RTLIB::Libcall RTLIB::getMODF(EVT RetVT) {
return getFPLibCall(RetVT, MODF_F32, MODF_F64, MODF_F80, MODF_F128,
MODF_PPCF128);
@@ -781,7 +786,7 @@ void TargetLoweringBase::initActions() {
// These library functions default to expand.
setOperationAction({ISD::FROUND, ISD::FPOWI, ISD::FLDEXP, ISD::FFREXP,
- ISD::FSINCOS, ISD::FMODF},
+ ISD::FSINCOS, ISD::FSINCOSPI, ISD::FMODF},
VT, Expand);
// These operations default to expand for vector types.
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 34464d317beafe4..0d1608a97bfd300 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -735,19 +735,20 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FCOPYSIGN, MVT::bf16, Promote);
}
- for (auto Op : {ISD::FREM, ISD::FPOW, ISD::FPOWI,
- ISD::FCOS, ISD::FSIN, ISD::FSINCOS,
- ISD::FMODF, ISD::FACOS, ISD::FASIN,
- ISD::FATAN, ISD::FATAN2, ISD::FCOSH,
- ISD::FSINH, ISD::FTANH, ISD::FTAN,
- ISD::FEXP, ISD::FEXP2, ISD::FEXP10,
- ISD::FLOG, ISD::FLOG2, ISD::FLOG10,
- ISD::STRICT_FREM, ISD::STRICT_FPOW, ISD::STRICT_FPOWI,
- ISD::STRICT_FCOS, ISD::STRICT_FSIN, ISD::STRICT_FACOS,
- ISD::STRICT_FASIN, ISD::STRICT_FATAN, ISD::STRICT_FATAN2,
- ISD::STRICT_FCOSH, ISD::STRICT_FSINH, ISD::STRICT_FTANH,
- ISD::STRICT_FEXP, ISD::STRICT_FEXP2, ISD::STRICT_FLOG,
- ISD::STRICT_FLOG2, ISD::STRICT_FLOG10, ISD::STRICT_FTAN}) {
+ for (auto Op : {ISD::FREM, ISD::FPOW, ISD::FPOWI,
+ ISD::FCOS, ISD::FSIN, ISD::FSINCOS,
+ ISD::FSINCOSPI, ISD::FMODF, ISD::FACOS,
+ ISD::FASIN, ISD::FATAN, ISD::FATAN2,
+ ISD::FCOSH, ISD::FSINH, ISD::FTANH,
+ ISD::FTAN, ISD::FEXP, ISD::FEXP2,
+ ISD::FEXP10, ISD::FLOG, ISD::FLOG2,
+ ISD::FLOG10, ISD::STRICT_FREM, ISD::STRICT_FPOW,
+ ISD::STRICT_FPOWI, ISD::STRICT_FCOS, ISD::STRICT_FSIN,
+ ISD::STRICT_FACOS, ISD::STRICT_FASIN, ISD::STRICT_FATAN,
+ ISD::STRICT_FATAN2, ISD::STRICT_FCOSH, ISD::STRICT_FSINH,
+ ISD::STRICT_FTANH, ISD::STRICT_FEXP, ISD::STRICT_FEXP2,
+ ISD::STRICT_FLOG, ISD::STRICT_FLOG2, ISD::STRICT_FLOG10,
+ ISD::STRICT_FTAN}) {
setOperationAction(Op, MVT::f16, Promote);
setOperationAction(Op, MVT::v4f16, Expand);
setOperationAction(Op, MVT::v8f16, Expand);
@@ -1208,7 +1209,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
ISD::FSIN, ISD::FCOS, ISD::FTAN,
ISD::FASIN, ISD::FACOS, ISD::FATAN,
ISD::FSINH, ISD::FCOSH, ISD::FTANH,
- ISD::FPOW, ISD::FLOG, ISD::FLOG2,
+ ISD::FPOW, ISD::FLOG, ISD::FLOG2,
ISD::FLOG10, ISD::FEXP, ISD::FEXP2,
ISD::FEXP10, ISD::FRINT, ISD::FROUND,
ISD::FROUNDEVEN, ISD::FTRUNC, ISD::FMINNUM,
@@ -1217,7 +1218,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
ISD::STRICT_FADD, ISD::STRICT_FSUB, ISD::STRICT_FMUL,
ISD::STRICT_FDIV, ISD::STRICT_FMA, ISD::STRICT_FCEIL,
ISD::STRICT_FFLOOR, ISD::STRICT_FSQRT, ISD::STRICT_FRINT,
- ISD::STRICT_FNEARBYINT, ISD::STRICT_FROUND, ISD::STRICT_FTRUNC,
+ ISD::STRICT_FNEARBYINT, ISD::STRICT_FROUND, ISD::STRICT_FTRUNC,
ISD::STRICT_FROUNDEVEN, ISD::STRICT_FMINNUM, ISD::STRICT_FMAXNUM,
ISD::STRICT_FMINIMUM, ISD::STRICT_FMAXIMUM})
setOperationAction(Op, MVT::v1f64, Expand);
diff --git a/llvm/test/CodeGen/AArch64/llvm.sincospi.ll b/llvm/test/CodeGen/AArch64/llvm.sincospi.ll
new file mode 100644
index 000000000000000..d1d7d92adc05ae3
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/llvm.sincospi.ll
@@ -0,0 +1,268 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc -mtriple=aarch64-gnu-linux < %s | FileCheck -check-prefixes=CHECK %s
+
+define { half, half } @test_sincospi_f16(half %a) {
+; CHECK-LABEL: test_sincospi_f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: add x0, sp, #12
+; CHECK-NEXT: add x1, sp, #8
+; CHECK-NEXT: bl sincospif
+; CHECK-NEXT: ldp s1, s0, [sp, #8]
+; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: fcvt h1, s1
+; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+ %result = call { half, half } @llvm.sincospi.f16(half %a)
+ ret { half, half } %result
+}
+
+define half @test_sincospi_f16_only_use_sin(half %a) {
+; CHECK-LABEL: test_sincospi_f16_only_use_sin:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: add x0, sp, #12
+; CHECK-NEXT: add x1, sp, #8
+; CHECK-NEXT: bl sincospif
+; CHECK-NEXT: ldr s0, [sp, #12]
+; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+ %result = call { half, half } @llvm.sincospi.f16(half %a)
+ %result.0 = extractvalue { half, half } %result, 0
+ ret half %result.0
+}
+
+define half @test_sincospi_f16_only_use_cos(half %a) {
+; CHECK-LABEL: test_sincospi_f16_only_use_cos:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: add x0, sp, #12
+; CHECK-NEXT: add x1, sp, #8
+; CHECK-NEXT: bl sincospif
+; CHECK-NEXT: ldr s0, [sp, #8]
+; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+ %result = call { half, half } @llvm.sincospi.f16(half %a)
+ %result.1 = extractvalue { half, half } %result, 1
+ ret half %result.1
+}
+
+define { <2 x half>, <2 x half> } @test_sincospi_v2f16(<2 x half> %a) {
+; CHECK-LABEL: test_sincospi_v2f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sub sp, sp, #64
+; CHECK-NEXT: str x30, [sp, #48] // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: mov h1, v0.h[1]
+; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT: add x0, sp, #36
+; CHECK-NEXT: add x1, sp, #32
+; CHECK-NEXT: fcvt s0, h1
+; CHECK-NEXT: bl sincospif
+; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: add x0, sp, #28
+; CHECK-NEXT: add x1, sp, #24
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: bl sincospif
+; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: add x0, sp, #44
+; CHECK-NEXT: add x1, sp, #40
+; CHECK-NEXT: mov h0, v0.h[2]
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: bl sincospif
+; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: add x0, sp, #60
+; CHECK-NEXT: add x1, sp, #56
+; CHECK-NEXT: mov h0, v0.h[3]
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: bl sincospif
+; CHECK-NEXT: ldp s2, s0, [sp, #32]
+; CHECK-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload
+; CHECK-NEXT: ldp s3, s1, [sp, #24]
+; CHECK-NEXT: fcvt h4, s0
+; CHECK-NEXT: fcvt h2, s2
+; CHECK-NEXT: fcvt h0, s1
+; CHECK-NEXT: fcvt h1, s3
+; CHECK-NEXT: ldp s5, s3, [sp, #40]
+; CHECK-NEXT: fcvt h3, s3
+; CHECK-NEXT: mov v0.h[1], v4.h[0]
+; CHECK-NEXT: fcvt h4, s5
+; CHECK-NEXT: mov v1.h[1], v2.h[0]
+; CHECK-NEXT: ldp s5, s2, [sp, #56]
+; CHECK-NEXT: mov v0.h[2], v3.h[0]
+; CHECK-NEXT: fcvt h2, s2
+; CHECK-NEXT: fcvt h3, s5
+; CHECK-NEXT: mov v1.h[2], v4.h[0]
+; CHECK-NEXT: mov v0.h[3], v2.h[0]
+; CHECK-NEXT: mov v1.h[3], v3.h[0]
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q1
+; CHECK-NEXT: add sp, sp, #64
+; CHECK-NEXT: ret
+ %result = call { <2 x half>, <2 x half> } @llvm.sincospi.v2f16(<2 x half> %a)
+ ret { <2 x half>, <2 x half> } %result
+}
+
+define { float, float } @test_sincospi_f32(float %a) {
+; CHECK-LABEL: test_sincospi_f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: add x0, sp, #12
+; CHECK-NEXT: add x1, sp, #8
+; CHECK-NEXT: bl sincospif
+; CHECK-NEXT: ldp s1, s0, [sp, #8]
+; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+ %result = call { float, float } @llvm.sincospi.f32(float %a)
+ ret { float, float } %result
+}
+
+define { <3 x float>, <3 x float> } @test_sincospi_v3f32(<3 x float> %a) {
+; CHECK-LABEL: test_sincospi_v3f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sub sp, sp, #80
+; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
+; CHECK-NEXT: stp x22, x21, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT: stp x20, x19, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 80
+; CHECK-NEXT: .cfi_offset w19, -8
+; CHECK-NEXT: .cfi_offset w20, -16
+; CHECK-NEXT: .cfi_offset w21, -24
+; CHECK-NEXT: .cfi_offset w22, -32
+; CHECK-NEXT: .cfi_offset w30, -48
+; CHECK-NEXT: add x0, sp, #20
+; CHECK-NEXT: add x1, sp, #16
+; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0
+; CHECK-NEXT: bl sincospif
+; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: add x0, sp, #28
+; CHECK-NEXT: add x1, sp, #24
+; CHECK-NEXT: add x19, sp, #28
+; CHECK-NEXT: add x20, sp, #24
+; CHECK-NEXT: mov s0, v0.s[1]
+; CHECK-NEXT: bl sincospif
+; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: add x0, sp, #44
+; CHECK-NEXT: add x1, sp, #40
+; CHECK-NEXT: add x21, sp, #44
+; CHECK-NEXT: add x22, sp, #40
+; CHECK-NEXT: mov s0, v0.s[2]
+; CHECK-NEXT: bl sincospif
+; CHECK-NEXT: ldp s1, s0, [sp, #16]
+; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
+; CHECK-NEXT: ld1 { v0.s }[1], [x19]
+; CHECK-NEXT: ld1 { v1.s }[1], [x20]
+; CHECK-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT: ld1 { v0.s }[2], [x21]
+; CHECK-NEXT: ld1 { v1.s }[2], [x22]
+; CHECK-NEXT: ldp x22, x21, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT: add sp, sp, #80
+; CHECK-NEXT: ret
+ %result = call { <3 x float>, <3 x float> } @llvm.sincospi.v3f32(<3 x float> %a)
+ ret { <3 x float>, <3 x float> } %result
+}
+
+define { <2 x float>, <2 x float> } @test_sincospi_v2f32(<2 x float> %a) {
+; CHECK-LABEL: test_sincospi_v2f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sub sp, sp, #64
+; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
+; CHECK-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: .cfi_offset w19, -8
+; CHECK-NEXT: .cfi_offset w20, -16
+; CHECK-NEXT: .cfi_offset w30, -32
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: add x0, sp, #44
+; CHECK-NEXT: add x1, sp, #40
+; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0
+; CHECK-NEXT: bl sincospif
+; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: add x0, sp, #28
+; CHECK-NEXT: add x1, sp, #24
+; CHECK-NEXT: add x19, sp, #28
+; CHECK-NEXT: add x20, sp, #24
+; CHECK-NEXT: mov s0, v0.s[1]
+; CHECK-NEXT: bl sincospif
+; CHECK-NEXT: ldp s1, s0, [sp, #40]
+; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
+; CHECK-NEXT: ld1 { v0.s }[1], [x19]
+; CHECK-NEXT: ld1 { v1.s }[1], [x20]
+; CHECK-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q1
+; CHECK-NEXT: add sp, sp, #64
+; CHECK-NEXT: ret
+ %result = call { <2 x float>, <2 x float> } @llvm.sincospi.v2f32(<2 x float> %a)
+ ret { <2 x float>, <2 x float> } %result
+}
+
+define { double, double } @test_sincospi_f64(double %a) {
+; CHECK-LABEL: test_sincospi_f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sub sp, sp, #32
+; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: add x0, sp, #24
+; CHECK-NEXT: add x1, sp, #8
+; CHECK-NEXT: bl sincospi
+; CHECK-NEXT: ldr d0, [sp, #24]
+; CHECK-NEXT: ldr d1, [sp, #8]
+; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-NEXT: add sp, sp, #32
+; CHECK-NEXT: ret
+ %result = call { double, double } @llvm.sincospi.f64(double %a)
+ ret { double, double } %result
+}
+
+define { <2 x double>, <2 x double> } @test_sincospi_v2f64(<2 x double> %a) {
+; CHECK-LABEL: test_sincospi_v2f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sub sp, sp, #80
+; CHECK-NEXT: str x30, [sp, #48] // 8-byte Folded Spill
+; CHECK-NEXT: stp x20, x19, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 80
+; CHECK-NEXT: .cfi_offset w19, -8
+; CHECK-NEXT: .cfi_offset w20, -16
+; CHECK-NEXT: .cfi_offset w30, -32
+; CHECK-NEXT: add x0, sp, #56
+; CHECK-NEXT: add x1, sp, #40
+; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: bl sincospi
+; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: add x0, sp, #32
+; CHECK-NEXT: add x1, sp, #24
+; CHECK-NEXT: add x19, sp, #32
+; CHECK-NEXT: add x20, sp, #24
+; CHECK-NEXT: mov d0, v0.d[1]
+; CHECK-NEXT: bl sincospi
+; CHECK-NEXT: ldr d0, [sp, #56]
+; CHECK-NEXT: ldr d1, [sp, #40]
+; CHECK-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload
+; CHECK-NEXT: ld1 { v0.d }[1], [x19]
+; CHECK-NEXT: ld1 { v1.d }[1], [x20]
+; CHECK-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT: add sp, sp, #80
+; CHECK-NEXT: ret
+ %result = call { <2 x double>, <2 x double> } @llvm.sincospi.v2f64(<2 x double> %a)
+ ret { <2 x double>, <2 x double> } %result
+}
diff --git a/llvm/test/CodeGen/AArch64/veclib-llvm.sincospi.ll b/llvm/test/CodeGen/AArch64/veclib-llvm.sincospi.ll
new file mode 100644
index 000000000000000..fad865d20f7df83
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/veclib-llvm.sincospi.ll
@@ -0,0 +1,109 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=aarch64-gnu-linux -mattr=+neon,+sve -vector-library=sleefgnuabi < %s | FileCheck %s -check-prefix=SLEEF
+; RUN: llc -mtriple=aarch64-gnu-linux -mattr=+neon,+sve -vector-library=ArmPL < %s | FileCheck %s -check-prefix=ARMPL
+
+define void @test_sincospi_v4f32(<4 x float> %x, ptr noalias %out_sin, ptr noalias %out_cos) {
+; SLEEF-LABEL: test_sincospi_v4f32:
+; SLEEF: // %bb.0:
+; SLEEF-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; SLEEF-NEXT: .cfi_def_cfa_offset 16
+; SLEEF-NEXT: .cfi_offset w30, -16
+; SLEEF-NEXT: bl _ZGVnN4vl4l4_sincospif
+; SLEEF-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; SLEEF-NEXT: ret
+;
+; ARMPL-LABEL: test_sincospi_v4f32:
+; ARMPL: // %bb.0:
+; ARMPL-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; ARMPL-NEXT: .cfi_def_cfa_offset 16
+; ARMPL-NEXT: .cfi_offset w30, -16
+; ARMPL-NEXT: bl armpl_vsincospiq_f32
+; ARMPL-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; ARMPL-NEXT: ret
+ %result = call { <4 x float>, <4 x float> } @llvm.sincospi.v4f32(<4 x float> %x)
+ %result.0 = extractvalue { <4 x float>, <4 x float> } %result, 0
+ %result.1 = extractvalue { <4 x float>, <4 x float> } %result, 1
+ store <4 x float> %result.0, ptr %out_sin, align 4
+ store <4 x float> %result.1, ptr %out_cos, align 4
+ ret void
+}
+
+define void @test_sincospi_v2f64(<2 x double> %x, ptr noalias %out_sin, ptr noalias %out_cos) {
+; SLEEF-LABEL: test_sincospi_v2f64:
+; SLEEF: // %bb.0:
+; SLEEF-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; SLEEF-NEXT: .cfi_def_cfa_offset 16
+; SLEEF-NEXT: .cfi_offset w30, -16
+; SLEEF-NEXT: bl _ZGVnN2vl8l8_sincospi
+; SLEEF-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; SLEEF-NEXT: ret
+;
+; ARMPL-LABEL: test_sincospi_v2f64:
+; ARMPL: // %bb.0:
+; ARMPL-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; ARMPL-NEXT: .cfi_def_cfa_offset 16
+; ARMPL-NEXT: .cfi_offset w30, -16
+; ARMPL-NEXT: bl armpl_vsincospiq_f64
+; ARMPL-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; ARMPL-NEXT: ret
+ %result = call { <2 x double>, <2 x double> } @llvm.sincospi.v2f64(<2 x double> %x)
+ %result.0 = extractvalue { <2 x double>, <2 x double> } %result, 0
+ %result.1 = extractvalue { <2 x double>, <2 x double> } %result, 1
+ store <2 x double> %result.0, ptr %out_sin, align 8
+ store <2 x double> %result.1, ptr %out_cos, align 8
+ ret void
+}
+
+define void @test_sincospi_nxv4f32(<vscale x 4 x float> %x, ptr noalias %out_sin, ptr noalias %out_cos) {
+; SLEEF-LABEL: test_sincospi_nxv4f32:
+; SLEEF: // %bb.0:
+; SLEEF-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; SLEEF-NEXT: .cfi_def_cfa_offset 16
+; SLEEF-NEXT: .cfi_offset w30, -16
+; SLEEF-NEXT: bl _ZGVsNxvl4l4_sincospif
+; SLEEF-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; SLEEF-NEXT: ret
+;
+; ARMPL-LABEL: test_sincospi_nxv4f32:
+; ARMPL: // %bb.0:
+; ARMPL-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; ARMPL-NEXT: .cfi_def_cfa_offset 16
+; ARMPL-NEXT: .cfi_offset w30, -16
+; ARMPL-NEXT: ptrue p0.s
+; ARMPL-NEXT: bl armpl_svsincospi_f32_x
+; ARMPL-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; ARMPL-NEXT: ret
+ %result = call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.sincospi.nxv4f32(<vscale x 4 x float> %x)
+ %result.0 = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } %result, 0
+ %result.1 = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } %result, 1
+ store <vscale x 4 x float> %result.0, ptr %out_sin, align 4
+ store <vscale x 4 x float> %result.1, ptr %out_cos, align 4
+ ret void
+}
+
+define void @test_sincospi_nxv2f64(<vscale x 2 x double> %x, ptr noalias %out_sin, ptr noalias %out_cos) {
+; SLEEF-LABEL: test_sincospi_nxv2f64:
+; SLEEF: // %bb.0:
+; SLEEF-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; SLEEF-NEXT: .cfi_def_cfa_offset 16
+; SLEEF-NEXT: .cfi_offset w30, -16
+; SLEEF-NEXT: bl _ZGVsNxvl8l8_sincospi
+; SLEEF-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; SLEEF-NEXT: ret
+;
+; ARMPL-LABEL: test_sincospi_nxv2f64:
+; ARMPL: // %bb.0:
+; ARMPL-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; ARMPL-NEXT: .cfi_def_cfa_offset 16
+; ARMPL-NEXT: .cfi_offset w30, -16
+; ARMPL-NEXT: ptrue p0.d
+; ARMPL-NEXT: bl armpl_svsincospi_f64_x
+; ARMPL-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; ARMPL-NEXT: ret
+ %result = call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.sincospi.nxv2f64(<vscale x 2 x double> %x)
+ %result.0 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %result, 0
+ %result.1 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %result, 1
+ store <vscale x 2 x double> %result.0, ptr %out_sin, align 8
+ store <vscale x 2 x double> %result.1, ptr %out_cos, align 8
+ ret void
+}
More information about the llvm-commits
mailing list