[llvm] [X86][CodeGen] Add base atan2 intrinsic lowering (p4) (PR #110760)
Tex Riddell via llvm-commits
llvm-commits at lists.llvm.org
Fri Oct 11 15:34:50 PDT 2024
https://github.com/tex3d updated https://github.com/llvm/llvm-project/pull/110760
>From 9b3ffe09a150522bc2857997f654e6d17cebad7a Mon Sep 17 00:00:00 2001
From: Tex Riddell <texr at microsoft.com>
Date: Sat, 14 Sep 2024 18:14:20 -0700
Subject: [PATCH 1/2] [X86][CodeGen] Add base atan2 intrinsic lowering (p4)
This change is part of this proposal: https://discourse.llvm.org/t/rfc-all-the-math-intrinsics/78294
Based on example PR #96222 and fix PR #101268, with some differences due to 2-arg intrinsic and intermediate refactor (RuntimeLibCalls.cpp).
- Add llvm.experimental.constrained.atan2 - Intrinsics.td, ConstrainedOps.def, LangRef.rst
- Add to ISDOpcodes.h and TargetSelectionDAG.td, connect to intrinsic in BasicTTIImpl.h, and LibFunc_ in SelectionDAGBuilder.cpp, and map generic op in SelectionDAGCompat.td
- Update LegalizeDAG.cpp, LegalizeFloatTypes.cpp, LegalizeVectorOps.cpp, and LegalizeVectorTypes.cpp
- Update isKnownNeverNaN in SelectionDAG.cpp
- Update SelectionDAGDumper.cpp
- Update libcalls - RuntimeLibcalls.def, RuntimeLibcalls.cpp, LegalizerHelper.cpp
- Update isKnownNeverNaN for generic opcode in GlobalISel/Utils.cpp
- TargetLoweringBase.cpp - Expand for vectors, promote f16
- X86ISelLowering.cpp - Expand f80, promote f32 to f64 for MSVC
---
llvm/include/llvm/CodeGen/BasicTTIImpl.h | 3 +
llvm/include/llvm/CodeGen/ISDOpcodes.h | 3 +
llvm/include/llvm/IR/ConstrainedOps.def | 1 +
llvm/include/llvm/IR/Intrinsics.td | 5 +
llvm/include/llvm/IR/RuntimeLibcalls.def | 5 +
.../Target/GlobalISel/SelectionDAGCompat.td | 1 +
.../include/llvm/Target/TargetSelectionDAG.td | 6 +
.../CodeGen/GlobalISel/LegalizerHelper.cpp | 4 +
llvm/lib/CodeGen/GlobalISel/Utils.cpp | 2 +
llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 7 +
.../SelectionDAG/LegalizeFloatTypes.cpp | 22 ++
llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h | 2 +
.../SelectionDAG/LegalizeVectorOps.cpp | 1 +
.../SelectionDAG/LegalizeVectorTypes.cpp | 3 +
.../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 1 +
.../SelectionDAG/SelectionDAGBuilder.cpp | 12 +
.../SelectionDAG/SelectionDAGDumper.cpp | 2 +
llvm/lib/CodeGen/TargetLoweringBase.cpp | 7 +-
llvm/lib/IR/RuntimeLibcalls.cpp | 1 +
llvm/lib/Target/X86/X86ISelLowering.cpp | 2 +
llvm/test/Assembler/fp-intrinsics-attr.ll | 8 +
llvm/test/CodeGen/X86/fp-intrinsics.ll | 61 ++++-
.../test/CodeGen/X86/fp128-libcalls-strict.ll | 51 ++++
llvm/test/CodeGen/X86/fp80-strict-libcalls.ll | 36 +++
llvm/test/CodeGen/X86/llvm.atan2.ll | 80 ++++++
.../X86/vector-constrained-fp-intrinsics.ll | 258 ++++++++++++++++++
llvm/test/Feature/fp-intrinsics.ll | 15 +-
27 files changed, 594 insertions(+), 5 deletions(-)
create mode 100644 llvm/test/CodeGen/X86/llvm.atan2.ll
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index 57d1fa33c8482c..db3b5cddd7c1c3 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -1998,6 +1998,9 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
case Intrinsic::atan:
ISD = ISD::FATAN;
break;
+ case Intrinsic::atan2:
+ ISD = ISD::FATAN2;
+ break;
case Intrinsic::sinh:
ISD = ISD::FSINH;
break;
diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h
index da43f5be10ff3b..0b6d155b6d161e 100644
--- a/llvm/include/llvm/CodeGen/ISDOpcodes.h
+++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h
@@ -425,6 +425,7 @@ enum NodeType {
STRICT_FASIN,
STRICT_FACOS,
STRICT_FATAN,
+ STRICT_FATAN2,
STRICT_FSINH,
STRICT_FCOSH,
STRICT_FTANH,
@@ -994,6 +995,8 @@ enum NodeType {
FPOWI,
/// FLDEXP - ldexp, inspired by libm (op0 * 2**op1).
FLDEXP,
+ /// FATAN2 - atan2, inspired by libm.
+ FATAN2,
/// FFREXP - frexp, extract fractional and exponent component of a
/// floating-point value. Returns the two components as separate return
diff --git a/llvm/include/llvm/IR/ConstrainedOps.def b/llvm/include/llvm/IR/ConstrainedOps.def
index 56304c377b8393..30a82bf633d575 100644
--- a/llvm/include/llvm/IR/ConstrainedOps.def
+++ b/llvm/include/llvm/IR/ConstrainedOps.def
@@ -72,6 +72,7 @@ CMP_INSTRUCTION(FCmp, 2, 0, experimental_constrained_fcmps, FSETCCS
DAG_FUNCTION(acos, 1, 1, experimental_constrained_acos, FACOS)
DAG_FUNCTION(asin, 1, 1, experimental_constrained_asin, FASIN)
DAG_FUNCTION(atan, 1, 1, experimental_constrained_atan, FATAN)
+DAG_FUNCTION(atan2, 2, 1, experimental_constrained_atan2, FATAN2)
DAG_FUNCTION(ceil, 1, 0, experimental_constrained_ceil, FCEIL)
DAG_FUNCTION(cos, 1, 1, experimental_constrained_cos, FCOS)
DAG_FUNCTION(cosh, 1, 1, experimental_constrained_cosh, FCOSH)
diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
index 8a0721cf23f538..94e53f372127da 100644
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -1235,6 +1235,11 @@ let IntrProperties = [IntrInaccessibleMemOnly, IntrWillReturn, IntrStrictFP] in
[ LLVMMatchType<0>,
llvm_metadata_ty,
llvm_metadata_ty ]>;
+ def int_experimental_constrained_atan2 : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ],
+ [ LLVMMatchType<0>,
+ LLVMMatchType<0>,
+ llvm_metadata_ty,
+ llvm_metadata_ty ]>;
def int_experimental_constrained_sin : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ],
[ LLVMMatchType<0>,
llvm_metadata_ty,
diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.def b/llvm/include/llvm/IR/RuntimeLibcalls.def
index 69cf43140ad4bd..4aab658a86690c 100644
--- a/llvm/include/llvm/IR/RuntimeLibcalls.def
+++ b/llvm/include/llvm/IR/RuntimeLibcalls.def
@@ -232,6 +232,11 @@ HANDLE_LIBCALL(ATAN_F64, "atan")
HANDLE_LIBCALL(ATAN_F80, "atanl")
HANDLE_LIBCALL(ATAN_F128,"atanl")
HANDLE_LIBCALL(ATAN_PPCF128, "atanl")
+HANDLE_LIBCALL(ATAN2_F32, "atan2f")
+HANDLE_LIBCALL(ATAN2_F64, "atan2")
+HANDLE_LIBCALL(ATAN2_F80, "atan2l")
+HANDLE_LIBCALL(ATAN2_F128,"atan2l")
+HANDLE_LIBCALL(ATAN2_PPCF128, "atan2l")
HANDLE_LIBCALL(SINCOS_F32, nullptr)
HANDLE_LIBCALL(SINCOS_F64, nullptr)
HANDLE_LIBCALL(SINCOS_F80, nullptr)
diff --git a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
index d9121cf166e5aa..83bf3c335cac89 100644
--- a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
+++ b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
@@ -154,6 +154,7 @@ def : GINodeEquiv<G_FTAN, ftan>;
def : GINodeEquiv<G_FACOS, facos>;
def : GINodeEquiv<G_FASIN, fasin>;
def : GINodeEquiv<G_FATAN, fatan>;
+def : GINodeEquiv<G_FATAN2, fatan2>;
def : GINodeEquiv<G_FCOSH, fcosh>;
def : GINodeEquiv<G_FSINH, fsinh>;
def : GINodeEquiv<G_FTANH, ftanh>;
diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td
index adf8a75f620225..fa516fc9b10175 100644
--- a/llvm/include/llvm/Target/TargetSelectionDAG.td
+++ b/llvm/include/llvm/Target/TargetSelectionDAG.td
@@ -534,6 +534,7 @@ def ftan : SDNode<"ISD::FTAN" , SDTFPUnaryOp>;
def fasin : SDNode<"ISD::FASIN" , SDTFPUnaryOp>;
def facos : SDNode<"ISD::FACOS" , SDTFPUnaryOp>;
def fatan : SDNode<"ISD::FATAN" , SDTFPUnaryOp>;
+def fatan2 : SDNode<"ISD::FATAN2" , SDTFPBinOp>;
def fsinh : SDNode<"ISD::FSINH" , SDTFPUnaryOp>;
def fcosh : SDNode<"ISD::FCOSH" , SDTFPUnaryOp>;
def ftanh : SDNode<"ISD::FTANH" , SDTFPUnaryOp>;
@@ -602,6 +603,8 @@ def strict_facos : SDNode<"ISD::STRICT_FACOS",
SDTFPUnaryOp, [SDNPHasChain]>;
def strict_fatan : SDNode<"ISD::STRICT_FATAN",
SDTFPUnaryOp, [SDNPHasChain]>;
+def strict_fatan2 : SDNode<"ISD::STRICT_FATAN2",
+ SDTFPBinOp, [SDNPHasChain]>;
def strict_fsinh : SDNode<"ISD::STRICT_FSINH",
SDTFPUnaryOp, [SDNPHasChain]>;
def strict_fcosh : SDNode<"ISD::STRICT_FCOSH",
@@ -1588,6 +1591,9 @@ def any_facos : PatFrags<(ops node:$src),
def any_fatan : PatFrags<(ops node:$src),
[(strict_fatan node:$src),
(fatan node:$src)]>;
+def any_fatan2 : PatFrags<(ops node:$src1, node:$src2),
+ [(strict_fatan2 node:$src1, node:$src2),
+ (fatan2 node:$src1, node:$src2)]>;
def any_fsinh : PatFrags<(ops node:$src),
[(strict_fsinh node:$src),
(fsinh node:$src)]>;
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 3b2fd95076c465..d5222641342ac1 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -457,6 +457,8 @@ static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
RTLIBCASE(ACOS_F);
case TargetOpcode::G_FATAN:
RTLIBCASE(ATAN_F);
+ case TargetOpcode::G_FATAN2:
+ RTLIBCASE(ATAN2_F);
case TargetOpcode::G_FSINH:
RTLIBCASE(SINH_F);
case TargetOpcode::G_FCOSH:
@@ -1202,6 +1204,7 @@ LegalizerHelper::libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver) {
case TargetOpcode::G_FACOS:
case TargetOpcode::G_FASIN:
case TargetOpcode::G_FATAN:
+ case TargetOpcode::G_FATAN2:
case TargetOpcode::G_FCOSH:
case TargetOpcode::G_FSINH:
case TargetOpcode::G_FTANH:
@@ -3122,6 +3125,7 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
case TargetOpcode::G_FACOS:
case TargetOpcode::G_FASIN:
case TargetOpcode::G_FATAN:
+ case TargetOpcode::G_FATAN2:
case TargetOpcode::G_FCOSH:
case TargetOpcode::G_FSINH:
case TargetOpcode::G_FTANH:
diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index 9574464207d99f..722ceea29c951c 100644
--- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -828,6 +828,7 @@ bool llvm::isKnownNeverNaN(Register Val, const MachineRegisterInfo &MRI,
case TargetOpcode::G_FACOS:
case TargetOpcode::G_FASIN:
case TargetOpcode::G_FATAN:
+ case TargetOpcode::G_FATAN2:
case TargetOpcode::G_FCOSH:
case TargetOpcode::G_FSINH:
case TargetOpcode::G_FTANH:
@@ -1715,6 +1716,7 @@ bool llvm::isPreISelGenericFloatingPointOpcode(unsigned Opc) {
case TargetOpcode::G_FACOS:
case TargetOpcode::G_FASIN:
case TargetOpcode::G_FATAN:
+ case TargetOpcode::G_FATAN2:
case TargetOpcode::G_FCOSH:
case TargetOpcode::G_FSINH:
case TargetOpcode::G_FTANH:
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index ea22b4670d6f1f..e0a03383358b76 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -4600,6 +4600,11 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
ExpandFPLibCall(Node, RTLIB::ATAN_F32, RTLIB::ATAN_F64, RTLIB::ATAN_F80,
RTLIB::ATAN_F128, RTLIB::ATAN_PPCF128, Results);
break;
+ case ISD::FATAN2:
+ case ISD::STRICT_FATAN2:
+ ExpandFPLibCall(Node, RTLIB::ATAN2_F32, RTLIB::ATAN2_F64, RTLIB::ATAN2_F80,
+ RTLIB::ATAN2_F128, RTLIB::ATAN2_PPCF128, Results);
+ break;
case ISD::FSINH:
case ISD::STRICT_FSINH:
ExpandFPLibCall(Node, RTLIB::SINH_F32, RTLIB::SINH_F64, RTLIB::SINH_F80,
@@ -5486,6 +5491,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
case ISD::FMINIMUMNUM:
case ISD::FMAXIMUMNUM:
case ISD::FPOW:
+ case ISD::FATAN2:
Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0));
Tmp2 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(1));
Tmp3 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2,
@@ -5502,6 +5508,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
case ISD::STRICT_FMAXNUM:
case ISD::STRICT_FREM:
case ISD::STRICT_FPOW:
+ case ISD::STRICT_FATAN2:
Tmp1 = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {NVT, MVT::Other},
{Node->getOperand(0), Node->getOperand(1)});
Tmp2 = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {NVT, MVT::Other},
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 2c81c829e75cbb..73c258f0f6f18c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -84,6 +84,8 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
case ISD::FASIN: R = SoftenFloatRes_FASIN(N); break;
case ISD::STRICT_FATAN:
case ISD::FATAN: R = SoftenFloatRes_FATAN(N); break;
+ case ISD::STRICT_FATAN2:
+ case ISD::FATAN2: R = SoftenFloatRes_FATAN2(N); break;
case ISD::FCBRT: R = SoftenFloatRes_FCBRT(N); break;
case ISD::STRICT_FCEIL:
case ISD::FCEIL: R = SoftenFloatRes_FCEIL(N); break;
@@ -366,6 +368,13 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FATAN(SDNode *N) {
RTLIB::ATAN_F80, RTLIB::ATAN_F128, RTLIB::ATAN_PPCF128));
}
+SDValue DAGTypeLegalizer::SoftenFloatRes_FATAN2(SDNode *N) {
+ return SoftenFloatRes_Binary(
+ N,
+ GetFPLibCall(N->getValueType(0), RTLIB::ATAN2_F32, RTLIB::ATAN2_F64,
+ RTLIB::ATAN2_F80, RTLIB::ATAN2_F128, RTLIB::ATAN2_PPCF128));
+}
+
SDValue DAGTypeLegalizer::SoftenFloatRes_FCBRT(SDNode *N) {
return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
RTLIB::CBRT_F32,
@@ -1430,6 +1439,8 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) {
case ISD::FASIN: ExpandFloatRes_FASIN(N, Lo, Hi); break;
case ISD::STRICT_FATAN:
case ISD::FATAN: ExpandFloatRes_FATAN(N, Lo, Hi); break;
+ case ISD::STRICT_FATAN2:
+ case ISD::FATAN2: ExpandFloatRes_FATAN2(N, Lo, Hi); break;
case ISD::FCBRT: ExpandFloatRes_FCBRT(N, Lo, Hi); break;
case ISD::STRICT_FCEIL:
case ISD::FCEIL: ExpandFloatRes_FCEIL(N, Lo, Hi); break;
@@ -1631,6 +1642,15 @@ void DAGTypeLegalizer::ExpandFloatRes_FATAN(SDNode *N, SDValue &Lo,
Lo, Hi);
}
+void DAGTypeLegalizer::ExpandFloatRes_FATAN2(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ ExpandFloatRes_Binary(N,
+ GetFPLibCall(N->getValueType(0), RTLIB::ATAN2_F32,
+ RTLIB::ATAN2_F64, RTLIB::ATAN2_F80,
+ RTLIB::ATAN2_F128, RTLIB::ATAN2_PPCF128),
+ Lo, Hi);
+}
+
void DAGTypeLegalizer::ExpandFloatRes_FCBRT(SDNode *N, SDValue &Lo,
SDValue &Hi) {
ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), RTLIB::CBRT_F32,
@@ -2673,6 +2693,7 @@ void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) {
case ISD::FMINNUM_IEEE:
case ISD::FMUL:
case ISD::FPOW:
+ case ISD::FATAN2:
case ISD::FREM:
case ISD::FSUB: R = PromoteFloatRes_BinOp(N); break;
@@ -3115,6 +3136,7 @@ void DAGTypeLegalizer::SoftPromoteHalfResult(SDNode *N, unsigned ResNo) {
case ISD::FMINNUM:
case ISD::FMUL:
case ISD::FPOW:
+ case ISD::FATAN2:
case ISD::FREM:
case ISD::FSUB: R = SoftPromoteHalfRes_BinOp(N); break;
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index d14516ef3e2fbb..868da25ca8cb47 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -567,6 +567,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
SDValue SoftenFloatRes_FACOS(SDNode *N);
SDValue SoftenFloatRes_FASIN(SDNode *N);
SDValue SoftenFloatRes_FATAN(SDNode *N);
+ SDValue SoftenFloatRes_FATAN2(SDNode *N);
SDValue SoftenFloatRes_FMINNUM(SDNode *N);
SDValue SoftenFloatRes_FMAXNUM(SDNode *N);
SDValue SoftenFloatRes_FMINIMUMNUM(SDNode *N);
@@ -661,6 +662,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
void ExpandFloatRes_FACOS (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FASIN (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FATAN (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FATAN2 (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FMINNUM (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FMAXNUM (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FMINIMUMNUM(SDNode *N, SDValue &Lo, SDValue &Hi);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index ffecca78a2252c..a8042fc3e7a69a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -410,6 +410,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
case ISD::FASIN:
case ISD::FACOS:
case ISD::FATAN:
+ case ISD::FATAN2:
case ISD::FSINH:
case ISD::FCOSH:
case ISD::FTANH:
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 9674de77388386..de73ab24053b95 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -164,6 +164,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
case ISD::USHLSAT:
case ISD::FPOW:
+ case ISD::FATAN2:
case ISD::FREM:
case ISD::FSUB:
case ISD::MUL:
@@ -1293,6 +1294,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::UDIV: case ISD::VP_UDIV:
case ISD::FDIV: case ISD::VP_FDIV:
case ISD::FPOW:
+ case ISD::FATAN2:
case ISD::AND: case ISD::VP_AND:
case ISD::OR: case ISD::VP_OR:
case ISD::XOR: case ISD::VP_XOR:
@@ -4581,6 +4583,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
break;
case ISD::FPOW:
+ case ISD::FATAN2:
case ISD::FREM:
if (unrollExpandedOp())
break;
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 498debf0955980..9b18400bd38da8 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -5460,6 +5460,7 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op, bool SNaN, unsigned Depth) const
case ISD::FASIN:
case ISD::FACOS:
case ISD::FATAN:
+ case ISD::FATAN2:
case ISD::FSINH:
case ISD::FCOSH:
case ISD::FTANH:
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index a981e9cc79289a..40ffebb83b9ff8 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -6866,6 +6866,12 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
getValue(I.getArgOperand(0)), Flags));
return;
}
+ case Intrinsic::atan2:
+ setValue(&I, DAG.getNode(ISD::FATAN2, sdl,
+ getValue(I.getArgOperand(0)).getValueType(),
+ getValue(I.getArgOperand(0)),
+ getValue(I.getArgOperand(1)), Flags));
+ return;
case Intrinsic::lround:
case Intrinsic::llround:
case Intrinsic::lrint:
@@ -9358,6 +9364,12 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
if (visitUnaryFloatCall(I, ISD::FATAN))
return;
break;
+ case LibFunc_atan2:
+ case LibFunc_atan2f:
+ case LibFunc_atan2l:
+ if (visitBinaryFloatCall(I, ISD::FATAN2))
+ return;
+ break;
case LibFunc_sinh:
case LibFunc_sinhf:
case LibFunc_sinhl:
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index 56fc538172f9fc..703efb70089742 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -227,6 +227,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::STRICT_FACOS: return "strict_facos";
case ISD::FATAN: return "fatan";
case ISD::STRICT_FATAN: return "strict_fatan";
+ case ISD::FATAN2: return "fatan2";
+ case ISD::STRICT_FATAN2: return "strict_fatan2";
case ISD::FSINH: return "fsinh";
case ISD::STRICT_FSINH: return "strict_fsinh";
case ISD::FCOSH: return "fcosh";
diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp
index 1f49d60c970593..7a28f7892cbf31 100644
--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -783,7 +783,7 @@ void TargetLoweringBase::initActions() {
ISD::SIGN_EXTEND_VECTOR_INREG, ISD::ZERO_EXTEND_VECTOR_INREG,
ISD::SPLAT_VECTOR, ISD::LRINT, ISD::LLRINT, ISD::LROUND,
ISD::LLROUND, ISD::FTAN, ISD::FACOS, ISD::FASIN, ISD::FATAN,
- ISD::FCOSH, ISD::FSINH, ISD::FTANH},
+ ISD::FCOSH, ISD::FSINH, ISD::FTANH, ISD::FATAN2},
VT, Expand);
// Constrained floating-point operations default to expand.
@@ -842,7 +842,8 @@ void TargetLoweringBase::initActions() {
ISD::FEXP, ISD::FEXP2, ISD::FEXP10, ISD::FFLOOR,
ISD::FNEARBYINT, ISD::FCEIL, ISD::FRINT, ISD::FTRUNC,
ISD::FROUNDEVEN, ISD::FTAN, ISD::FACOS, ISD::FASIN,
- ISD::FATAN, ISD::FCOSH, ISD::FSINH, ISD::FTANH},
+ ISD::FATAN, ISD::FCOSH, ISD::FSINH, ISD::FTANH,
+ ISD::FATAN2},
{MVT::f32, MVT::f64, MVT::f128}, Expand);
// FIXME: Query RuntimeLibCalls to make the decision.
@@ -850,7 +851,7 @@ void TargetLoweringBase::initActions() {
{MVT::f32, MVT::f64, MVT::f128}, LibCall);
setOperationAction({ISD::FTAN, ISD::FACOS, ISD::FASIN, ISD::FATAN, ISD::FCOSH,
- ISD::FSINH, ISD::FTANH},
+ ISD::FSINH, ISD::FTANH, ISD::FATAN2},
MVT::f16, Promote);
// Default ISD::TRAP to expand (which turns it into abort).
setOperationAction(ISD::TRAP, MVT::Other, Expand);
diff --git a/llvm/lib/IR/RuntimeLibcalls.cpp b/llvm/lib/IR/RuntimeLibcalls.cpp
index d806f8093459ee..06167559a77697 100644
--- a/llvm/lib/IR/RuntimeLibcalls.cpp
+++ b/llvm/lib/IR/RuntimeLibcalls.cpp
@@ -49,6 +49,7 @@ void RuntimeLibcallsInfo::initLibcalls(const Triple &TT) {
setLibcallName(RTLIB::ASIN_F128, "asinf128");
setLibcallName(RTLIB::ACOS_F128, "acosf128");
setLibcallName(RTLIB::ATAN_F128, "atanf128");
+ setLibcallName(RTLIB::ATAN2_F128, "atan2f128");
setLibcallName(RTLIB::SINH_F128, "sinhf128");
setLibcallName(RTLIB::COSH_F128, "coshf128");
setLibcallName(RTLIB::TANH_F128, "tanhf128");
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index de88db22279797..1c59929ad8cfc4 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -858,6 +858,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FASIN , MVT::f80, Expand);
setOperationAction(ISD::FACOS , MVT::f80, Expand);
setOperationAction(ISD::FATAN , MVT::f80, Expand);
+ setOperationAction(ISD::FATAN2 , MVT::f80, Expand);
setOperationAction(ISD::FSINH , MVT::f80, Expand);
setOperationAction(ISD::FCOSH , MVT::f80, Expand);
setOperationAction(ISD::FTANH , MVT::f80, Expand);
@@ -2562,6 +2563,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
{ISD::FACOS, ISD::STRICT_FACOS,
ISD::FASIN, ISD::STRICT_FASIN,
ISD::FATAN, ISD::STRICT_FATAN,
+ ISD::FATAN2, ISD::STRICT_FATAN2,
ISD::FCEIL, ISD::STRICT_FCEIL,
ISD::FCOS, ISD::STRICT_FCOS,
ISD::FCOSH, ISD::STRICT_FCOSH,
diff --git a/llvm/test/Assembler/fp-intrinsics-attr.ll b/llvm/test/Assembler/fp-intrinsics-attr.ll
index da6507f051766c..5b9a44710763e4 100644
--- a/llvm/test/Assembler/fp-intrinsics-attr.ll
+++ b/llvm/test/Assembler/fp-intrinsics-attr.ll
@@ -105,6 +105,11 @@ define void @func(double %a, double %b, double %c, i32 %i) strictfp {
metadata !"round.dynamic",
metadata !"fpexcept.strict")
+ %atan2 = call double @llvm.experimental.constrained.atan2.f64(
+ double %a, double %b,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+
%cosh = call double @llvm.experimental.constrained.cosh.f64(
double %a,
metadata !"round.dynamic",
@@ -291,6 +296,9 @@ declare double @llvm.experimental.constrained.acos.f64(double, metadata, metadat
declare double @llvm.experimental.constrained.atan.f64(double, metadata, metadata)
; CHECK: @llvm.experimental.constrained.atan.f64({{.*}}) #[[ATTR1]]
+declare double @llvm.experimental.constrained.atan2.f64(double, double, metadata, metadata)
+; CHECK: @llvm.experimental.constrained.atan2.f64({{.*}}) #[[ATTR1]]
+
declare double @llvm.experimental.constrained.sinh.f64(double, metadata, metadata)
; CHECK: @llvm.experimental.constrained.sinh.f64({{.*}}) #[[ATTR1]]
diff --git a/llvm/test/CodeGen/X86/fp-intrinsics.ll b/llvm/test/CodeGen/X86/fp-intrinsics.ll
index bb87252e0b9b08..3a8facff83fd55 100644
--- a/llvm/test/CodeGen/X86/fp-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/fp-intrinsics.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --function fatan2 --version 2
; RUN: llc -O3 -mtriple=i686-pc-linux -mattr=+cmov < %s | FileCheck %s --check-prefix=X87
; RUN: llc -O3 -mtriple=i686-pc-linux -mattr=sse2 < %s | FileCheck %s --check-prefix=X86-SSE
; RUN: llc -O3 -mtriple=x86_64-pc-linux < %s | FileCheck %s --check-prefix=SSE
@@ -2962,6 +2962,64 @@ entry:
ret double %result
}
+; Verify that atan2(42.1, 3.0) isn't simplified when the rounding mode is unknown.
+define double @fatan2() #0 {
+; X87-LABEL: fatan2:
+; X87: # %bb.0: # %entry
+; X87-NEXT: subl $28, %esp
+; X87-NEXT: .cfi_def_cfa_offset 32
+; X87-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}
+; X87-NEXT: fstpl {{[0-9]+}}(%esp)
+; X87-NEXT: fldl {{\.?LCPI[0-9]+_[0-9]+}}
+; X87-NEXT: fstpl (%esp)
+; X87-NEXT: wait
+; X87-NEXT: calll atan2
+; X87-NEXT: addl $28, %esp
+; X87-NEXT: .cfi_def_cfa_offset 4
+; X87-NEXT: retl
+;
+; X86-SSE-LABEL: fatan2:
+; X86-SSE: # %bb.0: # %entry
+; X86-SSE-NEXT: subl $28, %esp
+; X86-SSE-NEXT: .cfi_def_cfa_offset 32
+; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = [3.0E+0,0.0E+0]
+; X86-SSE-NEXT: movsd %xmm0, {{[0-9]+}}(%esp)
+; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
+; X86-SSE-NEXT: movsd %xmm0, (%esp)
+; X86-SSE-NEXT: calll atan2
+; X86-SSE-NEXT: addl $28, %esp
+; X86-SSE-NEXT: .cfi_def_cfa_offset 4
+; X86-SSE-NEXT: retl
+;
+; SSE-LABEL: fatan2:
+; SSE: # %bb.0: # %entry
+; SSE-NEXT: pushq %rax
+; SSE-NEXT: .cfi_def_cfa_offset 16
+; SSE-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
+; SSE-NEXT: movsd {{.*#+}} xmm1 = [3.0E+0,0.0E+0]
+; SSE-NEXT: callq atan2 at PLT
+; SSE-NEXT: popq %rax
+; SSE-NEXT: .cfi_def_cfa_offset 8
+; SSE-NEXT: retq
+;
+; AVX-LABEL: fatan2:
+; AVX: # %bb.0: # %entry
+; AVX-NEXT: pushq %rax
+; AVX-NEXT: .cfi_def_cfa_offset 16
+; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
+; AVX-NEXT: vmovsd {{.*#+}} xmm1 = [3.0E+0,0.0E+0]
+; AVX-NEXT: callq atan2 at PLT
+; AVX-NEXT: popq %rax
+; AVX-NEXT: .cfi_def_cfa_offset 8
+; AVX-NEXT: retq
+entry:
+ %result = call double @llvm.experimental.constrained.atan2.f64(double 42.1,
+ double 3.0,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret double %result
+}
+
; Verify that cosh(42.0) isn't simplified when the rounding mode is unknown.
define double @fcosh() #0 {
; X87-LABEL: fcosh:
@@ -3132,6 +3190,7 @@ declare double @llvm.experimental.constrained.tan.f64(double, metadata, metadata
declare double @llvm.experimental.constrained.asin.f64(double, metadata, metadata)
declare double @llvm.experimental.constrained.acos.f64(double, metadata, metadata)
declare double @llvm.experimental.constrained.atan.f64(double, metadata, metadata)
+declare double @llvm.experimental.constrained.atan2.f64(double, double, metadata, metadata)
declare double @llvm.experimental.constrained.sinh.f64(double, metadata, metadata)
declare double @llvm.experimental.constrained.cosh.f64(double, metadata, metadata)
declare double @llvm.experimental.constrained.tanh.f64(double, metadata, metadata)
diff --git a/llvm/test/CodeGen/X86/fp128-libcalls-strict.ll b/llvm/test/CodeGen/X86/fp128-libcalls-strict.ll
index 9e84dfa5c41ae6..ffaa9f6297ed8c 100644
--- a/llvm/test/CodeGen/X86/fp128-libcalls-strict.ll
+++ b/llvm/test/CodeGen/X86/fp128-libcalls-strict.ll
@@ -1247,6 +1247,50 @@ entry:
ret fp128 %atan
}
+define fp128 @atan2(fp128 %x, fp128 %y) nounwind strictfp {
+; ANDROID-LABEL: atan2:
+; ANDROID: # %bb.0: # %entry
+; ANDROID-NEXT: pushq %rax
+; ANDROID-NEXT: callq atan2l at PLT
+; ANDROID-NEXT: popq %rax
+; ANDROID-NEXT: retq
+;
+; GNU-LABEL: atan2:
+; GNU: # %bb.0: # %entry
+; GNU-NEXT: pushq %rax
+; GNU-NEXT: callq atan2f128 at PLT
+; GNU-NEXT: popq %rax
+; GNU-NEXT: retq
+;
+; X86-LABEL: atan2:
+; X86: # %bb.0: # %entry
+; X86-NEXT: pushl %esi
+; X86-NEXT: subl $24, %esp
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: subl $12, %esp
+; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: pushl %eax
+; X86-NEXT: calll atan2l
+; X86-NEXT: addl $44, %esp
+; X86-NEXT: movaps (%esp), %xmm0
+; X86-NEXT: movaps %xmm0, (%esi)
+; X86-NEXT: movl %esi, %eax
+; X86-NEXT: addl $24, %esp
+; X86-NEXT: popl %esi
+; X86-NEXT: retl $4
+entry:
+ %atan2 = call fp128 @llvm.experimental.constrained.atan2.f128(fp128 %x, fp128 %y, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
+ ret fp128 %atan2
+}
+
define fp128 @tan(fp128 %x) nounwind strictfp {
; ANDROID-LABEL: tan:
; ANDROID: # %bb.0: # %entry
@@ -1926,7 +1970,9 @@ declare fp128 @llvm.experimental.constrained.fdiv.f128(fp128, fp128, metadata, m
declare fp128 @llvm.experimental.constrained.fma.f128(fp128, fp128, fp128, metadata, metadata)
declare fp128 @llvm.experimental.constrained.frem.f128(fp128, fp128, metadata, metadata)
declare fp128 @llvm.experimental.constrained.ceil.f128(fp128, metadata)
+declare fp128 @llvm.experimental.constrained.acos.f128(fp128, metadata, metadata)
declare fp128 @llvm.experimental.constrained.cos.f128(fp128, metadata, metadata)
+declare fp128 @llvm.experimental.constrained.cosh.f128(fp128, metadata, metadata)
declare fp128 @llvm.experimental.constrained.exp.f128(fp128, metadata, metadata)
declare fp128 @llvm.experimental.constrained.exp2.f128(fp128, metadata, metadata)
declare fp128 @llvm.experimental.constrained.floor.f128(fp128, metadata)
@@ -1941,9 +1987,14 @@ declare fp128 @llvm.experimental.constrained.powi.f128(fp128, i32, metadata, met
declare fp128 @llvm.experimental.constrained.rint.f128(fp128, metadata, metadata)
declare fp128 @llvm.experimental.constrained.round.f128(fp128, metadata)
declare fp128 @llvm.experimental.constrained.roundeven.f128(fp128, metadata)
+declare fp128 @llvm.experimental.constrained.asin.f128(fp128, metadata, metadata)
declare fp128 @llvm.experimental.constrained.sin.f128(fp128, metadata, metadata)
+declare fp128 @llvm.experimental.constrained.sinh.f128(fp128, metadata, metadata)
declare fp128 @llvm.experimental.constrained.sqrt.f128(fp128, metadata, metadata)
+declare fp128 @llvm.experimental.constrained.atan.f128(fp128, metadata, metadata)
+declare fp128 @llvm.experimental.constrained.atan2.f128(fp128, fp128, metadata, metadata)
declare fp128 @llvm.experimental.constrained.tan.f128(fp128, metadata, metadata)
+declare fp128 @llvm.experimental.constrained.tanh.f128(fp128, metadata, metadata)
declare fp128 @llvm.experimental.constrained.trunc.f128(fp128, metadata)
declare i32 @llvm.experimental.constrained.lrint.i32.f128(fp128, metadata, metadata)
declare i64 @llvm.experimental.constrained.llrint.i64.f128(fp128, metadata, metadata)
diff --git a/llvm/test/CodeGen/X86/fp80-strict-libcalls.ll b/llvm/test/CodeGen/X86/fp80-strict-libcalls.ll
index c14e99f3acb34e..8bbc6247dbafd6 100644
--- a/llvm/test/CodeGen/X86/fp80-strict-libcalls.ll
+++ b/llvm/test/CodeGen/X86/fp80-strict-libcalls.ll
@@ -629,6 +629,35 @@ entry:
ret x86_fp80 %atan
}
+define x86_fp80 @atan2(x86_fp80 %x, x86_fp80 %y) nounwind strictfp {
+; X86-LABEL: atan2:
+; X86: # %bb.0: # %entry
+; X86-NEXT: subl $24, %esp
+; X86-NEXT: fldt {{[0-9]+}}(%esp)
+; X86-NEXT: fldt {{[0-9]+}}(%esp)
+; X86-NEXT: fstpt {{[0-9]+}}(%esp)
+; X86-NEXT: fstpt (%esp)
+; X86-NEXT: wait
+; X86-NEXT: calll atan2l
+; X86-NEXT: addl $24, %esp
+; X86-NEXT: retl
+;
+; X64-LABEL: atan2:
+; X64: # %bb.0: # %entry
+; X64-NEXT: subq $40, %rsp
+; X64-NEXT: fldt {{[0-9]+}}(%rsp)
+; X64-NEXT: fldt {{[0-9]+}}(%rsp)
+; X64-NEXT: fstpt {{[0-9]+}}(%rsp)
+; X64-NEXT: fstpt (%rsp)
+; X64-NEXT: wait
+; X64-NEXT: callq atan2l at PLT
+; X64-NEXT: addq $40, %rsp
+; X64-NEXT: retq
+entry:
+ %atan2 = call x86_fp80 @llvm.experimental.constrained.atan2.f80(x86_fp80 %x, x86_fp80 %y, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
+ ret x86_fp80 %atan2
+}
+
define x86_fp80 @tan(x86_fp80 %x) nounwind strictfp {
; X86-LABEL: tan:
; X86: # %bb.0: # %entry
@@ -809,7 +838,9 @@ attributes #0 = { strictfp }
declare x86_fp80 @llvm.experimental.constrained.fma.f80(x86_fp80, x86_fp80, x86_fp80, metadata, metadata)
declare x86_fp80 @llvm.experimental.constrained.frem.f80(x86_fp80, x86_fp80, metadata, metadata)
declare x86_fp80 @llvm.experimental.constrained.ceil.f80(x86_fp80, metadata)
+declare x86_fp80 @llvm.experimental.constrained.acos.f80(x86_fp80, metadata, metadata)
declare x86_fp80 @llvm.experimental.constrained.cos.f80(x86_fp80, metadata, metadata)
+declare x86_fp80 @llvm.experimental.constrained.cosh.f80(x86_fp80, metadata, metadata)
declare x86_fp80 @llvm.experimental.constrained.exp.f80(x86_fp80, metadata, metadata)
declare x86_fp80 @llvm.experimental.constrained.exp2.f80(x86_fp80, metadata, metadata)
declare x86_fp80 @llvm.experimental.constrained.floor.f80(x86_fp80, metadata)
@@ -824,8 +855,13 @@ declare x86_fp80 @llvm.experimental.constrained.powi.f80(x86_fp80, i32, metadata
declare x86_fp80 @llvm.experimental.constrained.rint.f80(x86_fp80, metadata, metadata)
declare x86_fp80 @llvm.experimental.constrained.round.f80(x86_fp80, metadata)
declare x86_fp80 @llvm.experimental.constrained.roundeven.f80(x86_fp80, metadata)
+declare x86_fp80 @llvm.experimental.constrained.asin.f80(x86_fp80, metadata, metadata)
declare x86_fp80 @llvm.experimental.constrained.sin.f80(x86_fp80, metadata, metadata)
+declare x86_fp80 @llvm.experimental.constrained.sinh.f80(x86_fp80, metadata, metadata)
+declare x86_fp80 @llvm.experimental.constrained.atan.f80(x86_fp80, metadata, metadata)
+declare x86_fp80 @llvm.experimental.constrained.atan2.f80(x86_fp80, x86_fp80, metadata, metadata)
declare x86_fp80 @llvm.experimental.constrained.tan.f80(x86_fp80, metadata, metadata)
+declare x86_fp80 @llvm.experimental.constrained.tanh.f80(x86_fp80, metadata, metadata)
declare x86_fp80 @llvm.experimental.constrained.trunc.f80(x86_fp80, metadata)
declare i32 @llvm.experimental.constrained.lrint.i32.f80(x86_fp80, metadata, metadata)
declare i64 @llvm.experimental.constrained.llrint.i64.f80(x86_fp80, metadata, metadata)
diff --git a/llvm/test/CodeGen/X86/llvm.atan2.ll b/llvm/test/CodeGen/X86/llvm.atan2.ll
new file mode 100644
index 00000000000000..ef2e4be36203be
--- /dev/null
+++ b/llvm/test/CodeGen/X86/llvm.atan2.ll
@@ -0,0 +1,80 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
+
+define half @use_atan2f16(half %a, half %b) nounwind {
+; CHECK-LABEL: use_atan2f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT: movaps %xmm1, %xmm0
+; CHECK-NEXT: callq __extendhfsf2 at PLT
+; CHECK-NEXT: movss %xmm0, (%rsp) # 4-byte Spill
+; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero
+; CHECK-NEXT: callq __extendhfsf2 at PLT
+; CHECK-NEXT: movss (%rsp), %xmm1 # 4-byte Reload
+; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero
+; CHECK-NEXT: callq atan2f at PLT
+; CHECK-NEXT: callq __truncsfhf2 at PLT
+; CHECK-NEXT: popq %rax
+; CHECK-NEXT: retq
+ %x = call half @llvm.atan2.f16(half %a, half %b)
+ ret half %x
+}
+
+define float @use_atan2f32(float %a, float %b) nounwind {
+; CHECK-LABEL: use_atan2f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: jmp atan2f at PLT # TAILCALL
+ %x = call float @llvm.atan2.f32(float %a, float %b)
+ ret float %x
+}
+
+define double @use_atan2f64(double %a, double %b) nounwind {
+; CHECK-LABEL: use_atan2f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: jmp atan2 at PLT # TAILCALL
+ %x = call double @llvm.atan2.f64(double %a, double %b)
+ ret double %x
+}
+
+define x86_fp80 @use_atan2f80(x86_fp80 %a, x86_fp80 %b) nounwind {
+; CHECK-LABEL: use_atan2f80:
+; CHECK: # %bb.0:
+; CHECK-NEXT: subq $40, %rsp
+; CHECK-NEXT: fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT: fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT: fstpt {{[0-9]+}}(%rsp)
+; CHECK-NEXT: fstpt (%rsp)
+; CHECK-NEXT: callq atan2l at PLT
+; CHECK-NEXT: addq $40, %rsp
+; CHECK-NEXT: retq
+ %x = call x86_fp80 @llvm.atan2.f80(x86_fp80 %a, x86_fp80 %b)
+ ret x86_fp80 %x
+}
+
+define fp128 @use_atan2fp128(fp128 %a, fp128 %b) nounwind {
+; CHECK-LABEL: use_atan2fp128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: jmp atan2f128 at PLT # TAILCALL
+ %x = call fp128 @llvm.atan2.f128(fp128 %a, fp128 %b)
+ ret fp128 %x
+}
+
+define ppc_fp128 @use_atan2ppc_fp128(ppc_fp128 %a, ppc_fp128 %b) nounwind {
+; CHECK-LABEL: use_atan2ppc_fp128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: callq atan2l at PLT
+; CHECK-NEXT: popq %rax
+; CHECK-NEXT: retq
+ %x = call ppc_fp128 @llvm.atan2.ppcf128(ppc_fp128 %a, ppc_fp128 %b)
+ ret ppc_fp128 %x
+}
+
+declare half @llvm.atan2.f16(half, half)
+declare float @llvm.atan2.f32(float, float)
+declare double @llvm.atan2.f64(double, double)
+declare x86_fp80 @llvm.atan2.f80(x86_fp80, x86_fp80)
+declare fp128 @llvm.atan2.f128(fp128, fp128)
+declare ppc_fp128 @llvm.atan2.ppcf128(ppc_fp128, ppc_fp128)
diff --git a/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll
index b486014678466e..21dfdc3c2abe49 100644
--- a/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll
@@ -8672,6 +8672,263 @@ entry:
ret <4 x double> %atan
}
+define <1 x float> @constrained_vector_atan2_v1f32() #0 {
+; CHECK-LABEL: constrained_vector_atan2_v1f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT: movss {{.*#+}} xmm1 = [2.3E+1,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT: callq atan2f at PLT
+; CHECK-NEXT: popq %rax
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
+;
+; AVX-LABEL: constrained_vector_atan2_v1f32:
+; AVX: # %bb.0: # %entry
+; AVX-NEXT: pushq %rax
+; AVX-NEXT: .cfi_def_cfa_offset 16
+; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
+; AVX-NEXT: vmovss {{.*#+}} xmm1 = [2.3E+1,0.0E+0,0.0E+0,0.0E+0]
+; AVX-NEXT: callq atan2f at PLT
+; AVX-NEXT: popq %rax
+; AVX-NEXT: .cfi_def_cfa_offset 8
+; AVX-NEXT: retq
+entry:
+ %atan2 = call <1 x float> @llvm.experimental.constrained.atan2.v1f32(
+ <1 x float> <float 42.0>,
+ <1 x float> <float 23.0>,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret <1 x float> %atan2
+}
+
+define <2 x double> @constrained_vector_atan2_v2f64() #0 {
+; CHECK-LABEL: constrained_vector_atan2_v2f64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: subq $24, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
+; CHECK-NEXT: movsd {{.*#+}} xmm1 = [2.3100000000000001E+1,0.0E+0]
+; CHECK-NEXT: callq atan2 at PLT
+; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
+; CHECK-NEXT: movsd {{.*#+}} xmm1 = [2.3E+1,0.0E+0]
+; CHECK-NEXT: callq atan2 at PLT
+; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = xmm0[0],mem[0]
+; CHECK-NEXT: addq $24, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
+;
+; AVX-LABEL: constrained_vector_atan2_v2f64:
+; AVX: # %bb.0: # %entry
+; AVX-NEXT: subq $24, %rsp
+; AVX-NEXT: .cfi_def_cfa_offset 32
+; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
+; AVX-NEXT: vmovsd {{.*#+}} xmm1 = [2.3100000000000001E+1,0.0E+0]
+; AVX-NEXT: callq atan2 at PLT
+; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
+; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
+; AVX-NEXT: vmovsd {{.*#+}} xmm1 = [2.3E+1,0.0E+0]
+; AVX-NEXT: callq atan2 at PLT
+; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX-NEXT: # xmm0 = xmm0[0],mem[0]
+; AVX-NEXT: addq $24, %rsp
+; AVX-NEXT: .cfi_def_cfa_offset 8
+; AVX-NEXT: retq
+entry:
+ %atan2 = call <2 x double> @llvm.experimental.constrained.atan2.v2f64(
+ <2 x double> <double 42.0, double 42.1>,
+ <2 x double> <double 23.0, double 23.1>,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret <2 x double> %atan2
+}
+
+define <3 x float> @constrained_vector_atan2_v3f32() #0 {
+; CHECK-LABEL: constrained_vector_atan2_v3f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: subq $40, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 48
+; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT: movss {{.*#+}} xmm1 = [2.5E+1,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT: callq atan2f at PLT
+; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT: movss {{.*#+}} xmm1 = [2.3E+1,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT: callq atan2f at PLT
+; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT: movss {{.*#+}} xmm1 = [2.4E+1,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT: callq atan2f at PLT
+; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload
+; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm1 = xmm1[0],mem[0]
+; CHECK-NEXT: movaps %xmm1, %xmm0
+; CHECK-NEXT: addq $40, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
+;
+; AVX-LABEL: constrained_vector_atan2_v3f32:
+; AVX: # %bb.0: # %entry
+; AVX-NEXT: subq $40, %rsp
+; AVX-NEXT: .cfi_def_cfa_offset 48
+; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0]
+; AVX-NEXT: vmovss {{.*#+}} xmm1 = [2.5E+1,0.0E+0,0.0E+0,0.0E+0]
+; AVX-NEXT: callq atan2f at PLT
+; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
+; AVX-NEXT: vmovss {{.*#+}} xmm1 = [2.3E+1,0.0E+0,0.0E+0,0.0E+0]
+; AVX-NEXT: callq atan2f at PLT
+; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
+; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0]
+; AVX-NEXT: vmovss {{.*#+}} xmm1 = [2.4E+1,0.0E+0,0.0E+0,0.0E+0]
+; AVX-NEXT: callq atan2f at PLT
+; AVX-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
+; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
+; AVX-NEXT: vinsertps $32, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX-NEXT: # xmm0 = xmm0[0,1],mem[0],xmm0[3]
+; AVX-NEXT: addq $40, %rsp
+; AVX-NEXT: .cfi_def_cfa_offset 8
+; AVX-NEXT: retq
+entry:
+ %atan2 = call <3 x float> @llvm.experimental.constrained.atan2.v3f32(
+ <3 x float> <float 42.0, float 43.0, float 44.0>,
+ <3 x float> <float 23.0, float 24.0, float 25.0>,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret <3 x float> %atan2
+}
+
+define <3 x double> @constrained_vector_atan2_v3f64() #0 {
+; CHECK-LABEL: constrained_vector_atan2_v3f64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: subq $24, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
+; CHECK-NEXT: movsd {{.*#+}} xmm1 = [2.3100000000000001E+1,0.0E+0]
+; CHECK-NEXT: callq atan2 at PLT
+; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
+; CHECK-NEXT: movsd {{.*#+}} xmm1 = [2.3E+1,0.0E+0]
+; CHECK-NEXT: callq atan2 at PLT
+; CHECK-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill
+; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0]
+; CHECK-NEXT: movsd {{.*#+}} xmm1 = [2.3199999999999999E+1,0.0E+0]
+; CHECK-NEXT: callq atan2 at PLT
+; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp)
+; CHECK-NEXT: fldl {{[0-9]+}}(%rsp)
+; CHECK-NEXT: wait
+; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload
+; CHECK-NEXT: # xmm0 = mem[0],zero
+; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
+; CHECK-NEXT: # xmm1 = mem[0],zero
+; CHECK-NEXT: addq $24, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
+;
+; AVX-LABEL: constrained_vector_atan2_v3f64:
+; AVX: # %bb.0: # %entry
+; AVX-NEXT: subq $40, %rsp
+; AVX-NEXT: .cfi_def_cfa_offset 48
+; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
+; AVX-NEXT: vmovsd {{.*#+}} xmm1 = [2.3100000000000001E+1,0.0E+0]
+; AVX-NEXT: callq atan2 at PLT
+; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
+; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
+; AVX-NEXT: vmovsd {{.*#+}} xmm1 = [2.3E+1,0.0E+0]
+; AVX-NEXT: callq atan2 at PLT
+; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX-NEXT: # xmm0 = xmm0[0],mem[0]
+; AVX-NEXT: vmovups %ymm0, (%rsp) # 32-byte Spill
+; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0]
+; AVX-NEXT: vmovsd {{.*#+}} xmm1 = [2.3199999999999999E+1,0.0E+0]
+; AVX-NEXT: vzeroupper
+; AVX-NEXT: callq atan2 at PLT
+; AVX-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload
+; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX-NEXT: addq $40, %rsp
+; AVX-NEXT: .cfi_def_cfa_offset 8
+; AVX-NEXT: retq
+entry:
+ %atan2 = call <3 x double> @llvm.experimental.constrained.atan2.v3f64(
+ <3 x double> <double 42.0, double 42.1, double 42.2>,
+ <3 x double> <double 23.0, double 23.1, double 23.2>,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret <3 x double> %atan2
+}
+
+define <4 x double> @constrained_vector_atan2_v4f64() #0 {
+; CHECK-LABEL: constrained_vector_atan2_v4f64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: subq $40, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 48
+; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
+; CHECK-NEXT: movsd {{.*#+}} xmm1 = [2.3100000000000001E+1,0.0E+0]
+; CHECK-NEXT: callq atan2 at PLT
+; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
+; CHECK-NEXT: movsd {{.*#+}} xmm1 = [2.3E+1,0.0E+0]
+; CHECK-NEXT: callq atan2 at PLT
+; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = xmm0[0],mem[0]
+; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2299999999999997E+1,0.0E+0]
+; CHECK-NEXT: movsd {{.*#+}} xmm1 = [2.3300000000000001E+1,0.0E+0]
+; CHECK-NEXT: callq atan2 at PLT
+; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0]
+; CHECK-NEXT: movsd {{.*#+}} xmm1 = [2.3199999999999999E+1,0.0E+0]
+; CHECK-NEXT: callq atan2 at PLT
+; CHECK-NEXT: movaps %xmm0, %xmm1
+; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm1 = xmm1[0],mem[0]
+; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
+; CHECK-NEXT: addq $40, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
+;
+; AVX-LABEL: constrained_vector_atan2_v4f64:
+; AVX: # %bb.0: # %entry
+; AVX-NEXT: subq $40, %rsp
+; AVX-NEXT: .cfi_def_cfa_offset 48
+; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2299999999999997E+1,0.0E+0]
+; AVX-NEXT: vmovsd {{.*#+}} xmm1 = [2.3300000000000001E+1,0.0E+0]
+; AVX-NEXT: callq atan2 at PLT
+; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
+; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0]
+; AVX-NEXT: vmovsd {{.*#+}} xmm1 = [2.3199999999999999E+1,0.0E+0]
+; AVX-NEXT: callq atan2 at PLT
+; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX-NEXT: # xmm0 = xmm0[0],mem[0]
+; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
+; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
+; AVX-NEXT: vmovsd {{.*#+}} xmm1 = [2.3100000000000001E+1,0.0E+0]
+; AVX-NEXT: callq atan2 at PLT
+; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
+; AVX-NEXT: vmovsd {{.*#+}} xmm1 = [2.3E+1,0.0E+0]
+; AVX-NEXT: callq atan2 at PLT
+; AVX-NEXT: vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX-NEXT: # xmm0 = xmm0[0],mem[0]
+; AVX-NEXT: vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload
+; AVX-NEXT: addq $40, %rsp
+; AVX-NEXT: .cfi_def_cfa_offset 8
+; AVX-NEXT: retq
+entry:
+ %atan2 = call <4 x double> @llvm.experimental.constrained.atan2.v4f64(
+ <4 x double> <double 42.0, double 42.1,
+ double 42.2, double 42.3>,
+ <4 x double> <double 23.0, double 23.1,
+ double 23.2, double 23.3>,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret <4 x double> %atan2
+}
+
define <1 x float> @constrained_vector_cosh_v1f32() #0 {
; CHECK-LABEL: constrained_vector_cosh_v1f32:
; CHECK: # %bb.0: # %entry
@@ -9546,6 +9803,7 @@ declare <4 x double> @llvm.experimental.constrained.tan.v4f64(<4 x double>, meta
declare <4 x double> @llvm.experimental.constrained.asin.v4f64(<4 x double>, metadata, metadata)
declare <4 x double> @llvm.experimental.constrained.acos.v4f64(<4 x double>, metadata, metadata)
declare <4 x double> @llvm.experimental.constrained.atan.v4f64(<4 x double>, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.atan2.v4f64(<4 x double>, <4 x double>, metadata, metadata)
declare <4 x double> @llvm.experimental.constrained.sinh.v4f64(<4 x double>, metadata, metadata)
declare <4 x double> @llvm.experimental.constrained.cosh.v4f64(<4 x double>, metadata, metadata)
declare <4 x double> @llvm.experimental.constrained.tanh.v4f64(<4 x double>, metadata, metadata)
diff --git a/llvm/test/Feature/fp-intrinsics.ll b/llvm/test/Feature/fp-intrinsics.ll
index 80f8b15abfaabe..ada22c39abc9e7 100644
--- a/llvm/test/Feature/fp-intrinsics.ll
+++ b/llvm/test/Feature/fp-intrinsics.ll
@@ -184,7 +184,7 @@ entry:
ret double %result
}
-; Verify that atan(42.0) isn't simplified when the rounding mode is unknown.
+; Verify that atan(42.0, 23.0) isn't simplified when the rounding mode is unknown.
; CHECK-LABEL: fatan
; CHECK: call double @llvm.experimental.constrained.atan
define double @fatan() #0 {
@@ -195,6 +195,19 @@ entry:
ret double %result
}
+; Verify that atan2(42.0) isn't simplified when the rounding mode is unknown.
+; CHECK-LABEL: fatan2
+; CHECK: call double @llvm.experimental.constrained.atan2
+define double @fatan2() #0 {
+entry:
+ %result = call double @llvm.experimental.constrained.atan2.f64(
+ double 42.0,
+ double 23.0,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict") #0
+ ret double %result
+}
+
; Verify that cosh(42.0) isn't simplified when the rounding mode is unknown.
; CHECK-LABEL: fcosh
; CHECK: call double @llvm.experimental.constrained.cosh
>From de9269f0f4592040502944e9360af3300cc1a3c4 Mon Sep 17 00:00:00 2001
From: Tex Riddell <texr at microsoft.com>
Date: Thu, 10 Oct 2024 15:32:39 -0700
Subject: [PATCH 2/2] Remove GlobalISel changes not needed yet
---
llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td | 1 -
llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp | 4 ----
llvm/lib/CodeGen/GlobalISel/Utils.cpp | 2 --
3 files changed, 7 deletions(-)
diff --git a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
index 83bf3c335cac89..d9121cf166e5aa 100644
--- a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
+++ b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
@@ -154,7 +154,6 @@ def : GINodeEquiv<G_FTAN, ftan>;
def : GINodeEquiv<G_FACOS, facos>;
def : GINodeEquiv<G_FASIN, fasin>;
def : GINodeEquiv<G_FATAN, fatan>;
-def : GINodeEquiv<G_FATAN2, fatan2>;
def : GINodeEquiv<G_FCOSH, fcosh>;
def : GINodeEquiv<G_FSINH, fsinh>;
def : GINodeEquiv<G_FTANH, ftanh>;
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index d5222641342ac1..3b2fd95076c465 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -457,8 +457,6 @@ static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
RTLIBCASE(ACOS_F);
case TargetOpcode::G_FATAN:
RTLIBCASE(ATAN_F);
- case TargetOpcode::G_FATAN2:
- RTLIBCASE(ATAN2_F);
case TargetOpcode::G_FSINH:
RTLIBCASE(SINH_F);
case TargetOpcode::G_FCOSH:
@@ -1204,7 +1202,6 @@ LegalizerHelper::libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver) {
case TargetOpcode::G_FACOS:
case TargetOpcode::G_FASIN:
case TargetOpcode::G_FATAN:
- case TargetOpcode::G_FATAN2:
case TargetOpcode::G_FCOSH:
case TargetOpcode::G_FSINH:
case TargetOpcode::G_FTANH:
@@ -3125,7 +3122,6 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
case TargetOpcode::G_FACOS:
case TargetOpcode::G_FASIN:
case TargetOpcode::G_FATAN:
- case TargetOpcode::G_FATAN2:
case TargetOpcode::G_FCOSH:
case TargetOpcode::G_FSINH:
case TargetOpcode::G_FTANH:
diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index 722ceea29c951c..9574464207d99f 100644
--- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -828,7 +828,6 @@ bool llvm::isKnownNeverNaN(Register Val, const MachineRegisterInfo &MRI,
case TargetOpcode::G_FACOS:
case TargetOpcode::G_FASIN:
case TargetOpcode::G_FATAN:
- case TargetOpcode::G_FATAN2:
case TargetOpcode::G_FCOSH:
case TargetOpcode::G_FSINH:
case TargetOpcode::G_FTANH:
@@ -1716,7 +1715,6 @@ bool llvm::isPreISelGenericFloatingPointOpcode(unsigned Opc) {
case TargetOpcode::G_FACOS:
case TargetOpcode::G_FASIN:
case TargetOpcode::G_FATAN:
- case TargetOpcode::G_FATAN2:
case TargetOpcode::G_FCOSH:
case TargetOpcode::G_FSINH:
case TargetOpcode::G_FTANH:
More information about the llvm-commits
mailing list