[llvm] 1d87433 - [x86] Add tan intrinsic part 4 (#90503)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Jun 5 12:01:38 PDT 2024
Author: Farzon Lotfi
Date: 2024-06-05T15:01:33-04:00
New Revision: 1d8743359360d3d960d599e03e62537dc108aae2
URL: https://github.com/llvm/llvm-project/commit/1d8743359360d3d960d599e03e62537dc108aae2
DIFF: https://github.com/llvm/llvm-project/commit/1d8743359360d3d960d599e03e62537dc108aae2.diff
LOG: [x86] Add tan intrinsic part 4 (#90503)
This change is an implementation of #87367's investigation on supporting
IEEE math operations as intrinsics.
Which was discussed in this RFC:
https://discourse.llvm.org/t/rfc-all-the-math-intrinsics/78294
Much of this change was following how G_FSIN and G_FCOS were used.
Changes:
- `llvm/docs/GlobalISel/GenericOpcode.rst` - Document the `G_FTAN`
opcode
- `llvm/docs/LangRef.rst` - Document the tan intrinsic
- `llvm/include/llvm/Analysis/VecFuncs.def` - Associate the tan
intrinsic as a vector function similar to the tanf libcall.
- `llvm/include/llvm/CodeGen/BasicTTIImpl.h` - Map the tan intrinsic to
`ISD::FTAN`
- `llvm/include/llvm/CodeGen/ISDOpcodes.h` - Define ISD opcodes for
`FTAN` and `STRICT_FTAN`
- `llvm/include/llvm/IR/Intrinsics.td` - Create the tan intrinsic
- `llvm/include/llvm/IR/RuntimeLibcalls.def` - Define tan libcall
mappings
- `llvm/include/llvm/Target/GenericOpcodes.td` - Define the `G_FTAN`
Opcode
- `llvm/include/llvm/Support/TargetOpcodes.def` - Create a `G_FTAN`
Opcode handler
- `llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td` - Map
`G_FTAN` to `ftan`
- `llvm/include/llvm/Target/TargetSelectionDAG.td` - Define `ftan`,
`strict_ftan`, and `any_ftan` and map them to the ISD opcodes for `FTAN`
and `STRICT_FTAN`
- `llvm/lib/Analysis/VectorUtils.cpp` - Associate the tan intrinsic as a
vector intrinsic
- `llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp` Map the tan intrinsic
to `G_FTAN` Opcode
- `llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp` - Add `G_FTAN` to
the list of floating point math operations also associate `G_FTAN` with
the `TAN_F` runtime lib.
- `llvm/lib/CodeGen/GlobalISel/Utils.cpp` - More floating point math
operation common behaviors.
- llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp - List the function
expansion operations for `FTAN` and `STRICT_FTAN`. Also define both
opcodes in `PromoteNode`.
- `llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp` - More `FTAN`
and `STRICT_FTAN` handling in the legalizer
- `llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h` - Define
`SoftenFloatRes_FTAN` and `ExpandFloatRes_FTAN`.
- `llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp` - Define `FTAN`
as a legal vector operation.
- `llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp` - Define
`FTAN` as a legal vector operation.
- `llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp` - define tan as an
intrinsic that doesn't return NaN.
- `llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp` Map
`LibFunc_tan`, `LibFunc_tanf`, and `LibFunc_tanl` to `ISD::FTAN`. Map
`Intrinsic::tan` to `ISD::FTAN` and add selection dag handling for
`Intrinsic::tan`.
- `llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp` - Define `ftan`
and `strict_ftan` names for the equivalent ISD opcodes.
- `llvm/lib/CodeGen/TargetLoweringBase.cpp` -Define a Tan128 libcall and
ISD::FTAN as a target lowering action.
- `llvm/lib/Target/X86/X86ISelLowering.cpp` - Add x86_64 lowering for
tan intrinsic
resolves https://github.com/llvm/llvm-project/issues/70082
Added:
llvm/test/CodeGen/X86/llvm.tan.ll
Modified:
llvm/include/llvm/Analysis/VecFuncs.def
llvm/include/llvm/CodeGen/BasicTTIImpl.h
llvm/include/llvm/CodeGen/ISDOpcodes.h
llvm/include/llvm/IR/RuntimeLibcalls.def
llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
llvm/include/llvm/Target/TargetSelectionDAG.td
llvm/lib/Analysis/VectorUtils.cpp
llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
llvm/lib/CodeGen/GlobalISel/Utils.cpp
llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
llvm/lib/CodeGen/TargetLoweringBase.cpp
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/vec-libcalls.ll
llvm/test/Transforms/LoopVectorize/X86/amdlibm-calls.ll
llvm/test/Transforms/LoopVectorize/X86/libm-vector-calls-VF2-VF8.ll
llvm/test/Transforms/LoopVectorize/X86/svml-calls.ll
llvm/test/Transforms/LoopVectorize/X86/veclib-calls.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/Analysis/VecFuncs.def b/llvm/include/llvm/Analysis/VecFuncs.def
index e12eb7095b908..402189769c20c 100644
--- a/llvm/include/llvm/Analysis/VecFuncs.def
+++ b/llvm/include/llvm/Analysis/VecFuncs.def
@@ -49,6 +49,7 @@ TLI_DEFINE_VECFUNC("llvm.sin.f32", "vsinf", FIXED(4), "_ZGV_LLVM_N4v")
TLI_DEFINE_VECFUNC("cosf", "vcosf", FIXED(4), "_ZGV_LLVM_N4v")
TLI_DEFINE_VECFUNC("llvm.cos.f32", "vcosf", FIXED(4), "_ZGV_LLVM_N4v")
TLI_DEFINE_VECFUNC("tanf", "vtanf", FIXED(4), "_ZGV_LLVM_N4v")
+TLI_DEFINE_VECFUNC("llvm.tan.f32", "vtanf", FIXED(4), "_ZGV_LLVM_N4v")
TLI_DEFINE_VECFUNC("asinf", "vasinf", FIXED(4), "_ZGV_LLVM_N4v")
TLI_DEFINE_VECFUNC("acosf", "vacosf", FIXED(4), "_ZGV_LLVM_N4v")
TLI_DEFINE_VECFUNC("atanf", "vatanf", FIXED(4), "_ZGV_LLVM_N4v")
@@ -142,6 +143,18 @@ TLI_DEFINE_VECFUNC("llvm.cos.f64", "_ZGVdN4v_cos", FIXED(4), "_ZGV_LLVM_N4v")
TLI_DEFINE_VECFUNC("llvm.cos.f32", "_ZGVbN4v_cosf", FIXED(4), "_ZGV_LLVM_N4v")
TLI_DEFINE_VECFUNC("llvm.cos.f32", "_ZGVdN8v_cosf", FIXED(8), "_ZGV_LLVM_N8v")
+TLI_DEFINE_VECFUNC("tan", "_ZGVbN2v_tan", FIXED(2), "_ZGV_LLVM_N2v")
+TLI_DEFINE_VECFUNC("tan", "_ZGVdN4v_tan", FIXED(4), "_ZGV_LLVM_N4v")
+
+TLI_DEFINE_VECFUNC("tanf", "_ZGVbN4v_tanf", FIXED(4), "_ZGV_LLVM_N4v")
+TLI_DEFINE_VECFUNC("tanf", "_ZGVdN8v_tanf", FIXED(8), "_ZGV_LLVM_N8v")
+
+TLI_DEFINE_VECFUNC("llvm.tan.f64", "_ZGVbN2v_tan", FIXED(2), "_ZGV_LLVM_N2v")
+TLI_DEFINE_VECFUNC("llvm.tan.f64", "_ZGVdN4v_tan", FIXED(4), "_ZGV_LLVM_N4v")
+
+TLI_DEFINE_VECFUNC("llvm.tan.f32", "_ZGVbN4v_tanf", FIXED(4), "_ZGV_LLVM_N4v")
+TLI_DEFINE_VECFUNC("llvm.tan.f32", "_ZGVdN8v_tanf", FIXED(8), "_ZGV_LLVM_N8v")
+
TLI_DEFINE_VECFUNC("pow", "_ZGVbN2vv_pow", FIXED(2), "_ZGV_LLVM_N2vv")
TLI_DEFINE_VECFUNC("pow", "_ZGVdN4vv_pow", FIXED(4), "_ZGV_LLVM_N4vv")
@@ -303,6 +316,22 @@ TLI_DEFINE_VECFUNC("llvm.cos.f32", "__svml_cosf4", FIXED(4), "_ZGV_LLVM_N4v")
TLI_DEFINE_VECFUNC("llvm.cos.f32", "__svml_cosf8", FIXED(8), "_ZGV_LLVM_N8v")
TLI_DEFINE_VECFUNC("llvm.cos.f32", "__svml_cosf16", FIXED(16), "_ZGV_LLVM_N16v")
+TLI_DEFINE_VECFUNC("tan", "__svml_tan2", FIXED(2), "_ZGV_LLVM_N2v")
+TLI_DEFINE_VECFUNC("tan", "__svml_tan4", FIXED(4), "_ZGV_LLVM_N4v")
+TLI_DEFINE_VECFUNC("tan", "__svml_tan8", FIXED(8), "_ZGV_LLVM_N8v")
+
+TLI_DEFINE_VECFUNC("tanf", "__svml_tanf4", FIXED(4), "_ZGV_LLVM_N4v")
+TLI_DEFINE_VECFUNC("tanf", "__svml_tanf8", FIXED(8), "_ZGV_LLVM_N8v")
+TLI_DEFINE_VECFUNC("tanf", "__svml_tanf16", FIXED(16), "_ZGV_LLVM_N16v")
+
+TLI_DEFINE_VECFUNC("llvm.tan.f64", "__svml_tan2", FIXED(2), "_ZGV_LLVM_N2v")
+TLI_DEFINE_VECFUNC("llvm.tan.f64", "__svml_tan4", FIXED(4), "_ZGV_LLVM_N4v")
+TLI_DEFINE_VECFUNC("llvm.tan.f64", "__svml_tan8", FIXED(8), "_ZGV_LLVM_N8v")
+
+TLI_DEFINE_VECFUNC("llvm.tan.f32", "__svml_tanf4", FIXED(4), "_ZGV_LLVM_N4v")
+TLI_DEFINE_VECFUNC("llvm.tan.f32", "__svml_tanf8", FIXED(8), "_ZGV_LLVM_N8v")
+TLI_DEFINE_VECFUNC("llvm.tan.f32", "__svml_tanf16", FIXED(16), "_ZGV_LLVM_N16v")
+
TLI_DEFINE_VECFUNC("pow", "__svml_pow2", FIXED(2), "_ZGV_LLVM_N2vv")
TLI_DEFINE_VECFUNC("pow", "__svml_pow4", FIXED(4), "_ZGV_LLVM_N4vv")
TLI_DEFINE_VECFUNC("pow", "__svml_pow8", FIXED(8), "_ZGV_LLVM_N8vv")
@@ -1237,6 +1266,13 @@ TLI_DEFINE_VECFUNC("tanf", "amd_vrs4_tanf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
TLI_DEFINE_VECFUNC("tanf", "amd_vrs8_tanf", FIXED(8), NOMASK, "_ZGV_LLVM_N8v")
TLI_DEFINE_VECFUNC("tanf", "amd_vrs16_tanf", FIXED(16), NOMASK, "_ZGV_LLVM_N16v")
+TLI_DEFINE_VECFUNC("llvm.tan.f32", "amd_vrs16_tanf", FIXED(16), NOMASK, "_ZGV_LLVM_N16v")
+TLI_DEFINE_VECFUNC("llvm.tan.f32", "amd_vrs8_tanf", FIXED(8), NOMASK, "_ZGV_LLVM_N8v")
+TLI_DEFINE_VECFUNC("llvm.tan.f32", "amd_vrs4_tanf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
+TLI_DEFINE_VECFUNC("llvm.tan.f64", "amd_vrd8_tan", FIXED(8), NOMASK, "_ZGV_LLVM_N8v")
+TLI_DEFINE_VECFUNC("llvm.tan.f64", "amd_vrd4_tan", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
+TLI_DEFINE_VECFUNC("llvm.tan.f64", "amd_vrd2_tan", FIXED(2), NOMASK, "_ZGV_LLVM_N2v")
+
TLI_DEFINE_VECFUNC("asin", "amd_vrd8_asin", FIXED(8), NOMASK, "_ZGV_LLVM_N8v")
TLI_DEFINE_VECFUNC("asinf", "amd_vrs4_asinf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
TLI_DEFINE_VECFUNC("asinf", "amd_vrs8_asinf", FIXED(8), NOMASK, "_ZGV_LLVM_N8v")
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index ef4e0fd29768f..9f8d3ded9b3c1 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -1977,6 +1977,9 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
case Intrinsic::cos:
ISD = ISD::FCOS;
break;
+ case Intrinsic::tan:
+ ISD = ISD::FTAN;
+ break;
case Intrinsic::exp:
ISD = ISD::FEXP;
break;
diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h
index 0f87e062e2da6..c8c86ed5eef29 100644
--- a/llvm/include/llvm/CodeGen/ISDOpcodes.h
+++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h
@@ -415,6 +415,7 @@ enum NodeType {
STRICT_FLDEXP,
STRICT_FSIN,
STRICT_FCOS,
+ STRICT_FTAN,
STRICT_FEXP,
STRICT_FEXP2,
STRICT_FLOG,
@@ -934,6 +935,7 @@ enum NodeType {
FCBRT,
FSIN,
FCOS,
+ FTAN,
FPOW,
FPOWI,
/// FLDEXP - ldexp, inspired by libm (op0 * 2**op1).
diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.def b/llvm/include/llvm/IR/RuntimeLibcalls.def
index a5a72884a6bf5..e900bcdbcdd04 100644
--- a/llvm/include/llvm/IR/RuntimeLibcalls.def
+++ b/llvm/include/llvm/IR/RuntimeLibcalls.def
@@ -197,6 +197,11 @@ HANDLE_LIBCALL(COS_F64, "cos")
HANDLE_LIBCALL(COS_F80, "cosl")
HANDLE_LIBCALL(COS_F128, "cosl")
HANDLE_LIBCALL(COS_PPCF128, "cosl")
+HANDLE_LIBCALL(TAN_F32, "tanf")
+HANDLE_LIBCALL(TAN_F64, "tan")
+HANDLE_LIBCALL(TAN_F80, "tanl")
+HANDLE_LIBCALL(TAN_F128,"tanl")
+HANDLE_LIBCALL(TAN_PPCF128, "tanl")
HANDLE_LIBCALL(SINCOS_F32, nullptr)
HANDLE_LIBCALL(SINCOS_F64, nullptr)
HANDLE_LIBCALL(SINCOS_F80, nullptr)
diff --git a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
index 8fa0e4b86d6dc..560d3b434d07d 100644
--- a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
+++ b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
@@ -148,6 +148,7 @@ def : GINodeEquiv<G_BUILD_VECTOR, build_vector>;
def : GINodeEquiv<G_FCEIL, fceil>;
def : GINodeEquiv<G_FCOS, fcos>;
def : GINodeEquiv<G_FSIN, fsin>;
+def : GINodeEquiv<G_FTAN, ftan>;
def : GINodeEquiv<G_FABS, fabs>;
def : GINodeEquiv<G_FSQRT, fsqrt>;
def : GINodeEquiv<G_FFLOOR, ffloor>;
diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td
index 1c95a60909846..15e02eb49271d 100644
--- a/llvm/include/llvm/Target/TargetSelectionDAG.td
+++ b/llvm/include/llvm/Target/TargetSelectionDAG.td
@@ -509,6 +509,7 @@ def fneg : SDNode<"ISD::FNEG" , SDTFPUnaryOp>;
def fsqrt : SDNode<"ISD::FSQRT" , SDTFPUnaryOp>;
def fsin : SDNode<"ISD::FSIN" , SDTFPUnaryOp>;
def fcos : SDNode<"ISD::FCOS" , SDTFPUnaryOp>;
+def ftan : SDNode<"ISD::FTAN" , SDTFPUnaryOp>;
def fexp2 : SDNode<"ISD::FEXP2" , SDTFPUnaryOp>;
def fexp10 : SDNode<"ISD::FEXP10" , SDTFPUnaryOp>;
def fpow : SDNode<"ISD::FPOW" , SDTFPBinOp>;
@@ -562,6 +563,8 @@ def strict_fsin : SDNode<"ISD::STRICT_FSIN",
SDTFPUnaryOp, [SDNPHasChain]>;
def strict_fcos : SDNode<"ISD::STRICT_FCOS",
SDTFPUnaryOp, [SDNPHasChain]>;
+def strict_ftan : SDNode<"ISD::STRICT_FTAN",
+ SDTFPUnaryOp, [SDNPHasChain]>;
def strict_fexp2 : SDNode<"ISD::STRICT_FEXP2",
SDTFPUnaryOp, [SDNPHasChain]>;
def strict_fpow : SDNode<"ISD::STRICT_FPOW",
@@ -1517,6 +1520,9 @@ def any_fsin : PatFrags<(ops node:$src),
def any_fcos : PatFrags<(ops node:$src),
[(strict_fcos node:$src),
(fcos node:$src)]>;
+def any_ftan : PatFrags<(ops node:$src),
+ [(strict_ftan node:$src),
+ (ftan node:$src)]>;
def any_fexp2 : PatFrags<(ops node:$src),
[(strict_fexp2 node:$src),
(fexp2 node:$src)]>;
diff --git a/llvm/lib/Analysis/VectorUtils.cpp b/llvm/lib/Analysis/VectorUtils.cpp
index 917094267d05a..30728ed587509 100644
--- a/llvm/lib/Analysis/VectorUtils.cpp
+++ b/llvm/lib/Analysis/VectorUtils.cpp
@@ -68,6 +68,7 @@ bool llvm::isTriviallyVectorizable(Intrinsic::ID ID) {
case Intrinsic::sqrt: // Begin floating-point.
case Intrinsic::sin:
case Intrinsic::cos:
+ case Intrinsic::tan:
case Intrinsic::exp:
case Intrinsic::exp2:
case Intrinsic::log:
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 6f0cae2edab17..9830b521797c1 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -449,6 +449,8 @@ static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
RTLIBCASE(SIN_F);
case TargetOpcode::G_FCOS:
RTLIBCASE(COS_F);
+ case TargetOpcode::G_FTAN:
+ RTLIBCASE(TAN_F);
case TargetOpcode::G_FLOG10:
RTLIBCASE(LOG10_F);
case TargetOpcode::G_FLOG:
@@ -1037,6 +1039,7 @@ LegalizerHelper::libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver) {
case TargetOpcode::G_FREM:
case TargetOpcode::G_FCOS:
case TargetOpcode::G_FSIN:
+ case TargetOpcode::G_FTAN:
case TargetOpcode::G_FLOG10:
case TargetOpcode::G_FLOG:
case TargetOpcode::G_FLOG2:
@@ -2893,6 +2896,7 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
case TargetOpcode::G_FFLOOR:
case TargetOpcode::G_FCOS:
case TargetOpcode::G_FSIN:
+ case TargetOpcode::G_FTAN:
case TargetOpcode::G_FLOG10:
case TargetOpcode::G_FLOG:
case TargetOpcode::G_FLOG2:
@@ -4659,6 +4663,7 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
case G_INTRINSIC_TRUNC:
case G_FCOS:
case G_FSIN:
+ case G_FTAN:
case G_FSQRT:
case G_BSWAP:
case G_BITREVERSE:
diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index e8438be94b3cd..129e6963aef33 100644
--- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -833,6 +833,7 @@ bool llvm::isKnownNeverNaN(Register Val, const MachineRegisterInfo &MRI,
case TargetOpcode::G_FREM:
case TargetOpcode::G_FSIN:
case TargetOpcode::G_FCOS:
+ case TargetOpcode::G_FTAN:
case TargetOpcode::G_FMA:
case TargetOpcode::G_FMAD:
if (SNaN)
@@ -1713,6 +1714,7 @@ bool llvm::isPreISelGenericFloatingPointOpcode(unsigned Opc) {
case TargetOpcode::G_FREM:
case TargetOpcode::G_FRINT:
case TargetOpcode::G_FSIN:
+ case TargetOpcode::G_FTAN:
case TargetOpcode::G_FSQRT:
case TargetOpcode::G_FSUB:
case TargetOpcode::G_INTRINSIC_ROUND:
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 8cd2bb60d81f2..27c45cab2e0d0 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -4514,6 +4514,11 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
RTLIB::COS_F80, RTLIB::COS_F128,
RTLIB::COS_PPCF128, Results);
break;
+ case ISD::FTAN:
+ case ISD::STRICT_FTAN:
+ ExpandFPLibCall(Node, RTLIB::TAN_F32, RTLIB::TAN_F64, RTLIB::TAN_F80,
+ RTLIB::TAN_F128, RTLIB::TAN_PPCF128, Results);
+ break;
case ISD::FSINCOS:
// Expand into sincos libcall.
ExpandSinCosLibCall(Node, Results);
@@ -5468,6 +5473,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
case ISD::FSQRT:
case ISD::FSIN:
case ISD::FCOS:
+ case ISD::FTAN:
case ISD::FLOG:
case ISD::FLOG2:
case ISD::FLOG10:
@@ -5492,6 +5498,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
case ISD::STRICT_FSQRT:
case ISD::STRICT_FSIN:
case ISD::STRICT_FCOS:
+ case ISD::STRICT_FTAN:
case ISD::STRICT_FLOG:
case ISD::STRICT_FLOG2:
case ISD::STRICT_FLOG10:
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index fb1424f75e097..aa116c9de5d8c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -131,6 +131,8 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
case ISD::FSQRT: R = SoftenFloatRes_FSQRT(N); break;
case ISD::STRICT_FSUB:
case ISD::FSUB: R = SoftenFloatRes_FSUB(N); break;
+ case ISD::STRICT_FTAN:
+ case ISD::FTAN: R = SoftenFloatRes_FTAN(N); break;
case ISD::STRICT_FTRUNC:
case ISD::FTRUNC: R = SoftenFloatRes_FTRUNC(N); break;
case ISD::LOAD: R = SoftenFloatRes_LOAD(N); break;
@@ -774,6 +776,12 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FSUB(SDNode *N) {
RTLIB::SUB_PPCF128));
}
+SDValue DAGTypeLegalizer::SoftenFloatRes_FTAN(SDNode *N) {
+ return SoftenFloatRes_Unary(
+ N, GetFPLibCall(N->getValueType(0), RTLIB::TAN_F32, RTLIB::TAN_F64,
+ RTLIB::TAN_F80, RTLIB::TAN_F128, RTLIB::TAN_PPCF128));
+}
+
SDValue DAGTypeLegalizer::SoftenFloatRes_FTRUNC(SDNode *N) {
return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
RTLIB::TRUNC_F32,
@@ -1330,7 +1338,7 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) {
#endif
report_fatal_error("Do not know how to expand the result of this "
"operator!");
-
+ // clang-format off
case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break;
case ISD::SELECT: SplitRes_Select(N, Lo, Hi); break;
case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break;
@@ -1399,6 +1407,8 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) {
case ISD::FSQRT: ExpandFloatRes_FSQRT(N, Lo, Hi); break;
case ISD::STRICT_FSUB:
case ISD::FSUB: ExpandFloatRes_FSUB(N, Lo, Hi); break;
+ case ISD::STRICT_FTAN:
+ case ISD::FTAN: ExpandFloatRes_FTAN(N, Lo, Hi); break;
case ISD::STRICT_FTRUNC:
case ISD::FTRUNC: ExpandFloatRes_FTRUNC(N, Lo, Hi); break;
case ISD::LOAD: ExpandFloatRes_LOAD(N, Lo, Hi); break;
@@ -1408,6 +1418,7 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) {
case ISD::UINT_TO_FP: ExpandFloatRes_XINT_TO_FP(N, Lo, Hi); break;
case ISD::STRICT_FREM:
case ISD::FREM: ExpandFloatRes_FREM(N, Lo, Hi); break;
+ // clang-format on
}
// If Lo/Hi is null, the sub-method took care of registering results etc.
@@ -1768,6 +1779,15 @@ void DAGTypeLegalizer::ExpandFloatRes_FSUB(SDNode *N, SDValue &Lo,
RTLIB::SUB_PPCF128), Lo, Hi);
}
+void DAGTypeLegalizer::ExpandFloatRes_FTAN(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ ExpandFloatRes_Unary(N,
+ GetFPLibCall(N->getValueType(0), RTLIB::TAN_F32,
+ RTLIB::TAN_F64, RTLIB::TAN_F80,
+ RTLIB::TAN_F128, RTLIB::TAN_PPCF128),
+ Lo, Hi);
+}
+
void DAGTypeLegalizer::ExpandFloatRes_FTRUNC(SDNode *N,
SDValue &Lo, SDValue &Hi) {
ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
@@ -2479,6 +2499,7 @@ void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) {
case ISD::FSIN:
case ISD::FSQRT:
case ISD::FTRUNC:
+ case ISD::FTAN:
case ISD::FCANONICALIZE: R = PromoteFloatRes_UnaryOp(N); break;
// Binary FP Operations
@@ -2914,6 +2935,7 @@ void DAGTypeLegalizer::SoftPromoteHalfResult(SDNode *N, unsigned ResNo) {
case ISD::FSIN:
case ISD::FSQRT:
case ISD::FTRUNC:
+ case ISD::FTAN:
case ISD::FCANONICALIZE: R = SoftPromoteHalfRes_UnaryOp(N); break;
// Binary FP Operations
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index bec9cb49b5864..2350b562a0343 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -586,6 +586,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
SDValue SoftenFloatRes_FSIN(SDNode *N);
SDValue SoftenFloatRes_FSQRT(SDNode *N);
SDValue SoftenFloatRes_FSUB(SDNode *N);
+ SDValue SoftenFloatRes_FTAN(SDNode *N);
SDValue SoftenFloatRes_FTRUNC(SDNode *N);
SDValue SoftenFloatRes_LOAD(SDNode *N);
SDValue SoftenFloatRes_ATOMIC_LOAD(SDNode *N);
@@ -635,6 +636,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_Binary(SDNode *N, RTLIB::Libcall LC,
SDValue &Lo, SDValue &Hi);
+ // clang-format off
void ExpandFloatRes_FABS (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FMINNUM (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FMAXNUM (SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -667,9 +669,11 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
void ExpandFloatRes_FSIN (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FSQRT (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FSUB (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FTAN (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FTRUNC (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_LOAD (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo, SDValue &Hi);
+ // clang-format on
// Float Operand Expansion.
bool ExpandFloatOperand(SDNode *N, unsigned OpNo);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 6acbc044d6731..5c5347b7f314e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -397,6 +397,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
case ISD::FSQRT:
case ISD::FSIN:
case ISD::FCOS:
+ case ISD::FTAN:
case ISD::FLDEXP:
case ISD::FPOWI:
case ISD::FPOW:
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 361416edb554c..92ce3b17ed6c9 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -108,6 +108,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
case ISD::FROUNDEVEN:
case ISD::FSIN:
case ISD::FSQRT:
+ case ISD::FTAN:
case ISD::FTRUNC:
case ISD::SIGN_EXTEND:
case ISD::SINT_TO_FP:
@@ -1140,6 +1141,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::VP_FROUNDEVEN:
case ISD::FSIN:
case ISD::FSQRT: case ISD::VP_SQRT:
+ case ISD::FTAN:
case ISD::FTRUNC:
case ISD::VP_FROUNDTOZERO:
case ISD::SINT_TO_FP:
@@ -4400,6 +4402,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
case ISD::FROUNDEVEN:
case ISD::FSIN:
case ISD::FSQRT:
+ case ISD::FTAN:
case ISD::FTRUNC:
if (unrollExpandedOp())
break;
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 4a6a431696b52..09cdec8adb275 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -5375,6 +5375,7 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op, bool SNaN, unsigned Depth) const
case ISD::FREM:
case ISD::FSIN:
case ISD::FCOS:
+ case ISD::FTAN:
case ISD::FMA:
case ISD::FMAD: {
if (SNaN)
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index ba76456b5836a..64d6fd458c62f 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -6742,6 +6742,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
case Intrinsic::fabs:
case Intrinsic::sin:
case Intrinsic::cos:
+ case Intrinsic::tan:
case Intrinsic::exp10:
case Intrinsic::floor:
case Intrinsic::ceil:
@@ -6759,6 +6760,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
case Intrinsic::fabs: Opcode = ISD::FABS; break;
case Intrinsic::sin: Opcode = ISD::FSIN; break;
case Intrinsic::cos: Opcode = ISD::FCOS; break;
+ case Intrinsic::tan: Opcode = ISD::FTAN; break;
case Intrinsic::exp10: Opcode = ISD::FEXP10; break;
case Intrinsic::floor: Opcode = ISD::FFLOOR; break;
case Intrinsic::ceil: Opcode = ISD::FCEIL; break;
@@ -9160,6 +9162,12 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
if (visitUnaryFloatCall(I, ISD::FCOS))
return;
break;
+ case LibFunc_tan:
+ case LibFunc_tanf:
+ case LibFunc_tanl:
+ if (visitUnaryFloatCall(I, ISD::FTAN))
+ return;
+ break;
case LibFunc_sqrt:
case LibFunc_sqrtf:
case LibFunc_sqrtl:
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index 2198c2354483c..52da24b594513 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -210,6 +210,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::FCOS: return "fcos";
case ISD::STRICT_FCOS: return "strict_fcos";
case ISD::FSINCOS: return "fsincos";
+ case ISD::FTAN: return "ftan";
+ case ISD::STRICT_FTAN: return "strict_ftan";
case ISD::FTRUNC: return "ftrunc";
case ISD::STRICT_FTRUNC: return "strict_ftrunc";
case ISD::FFLOOR: return "ffloor";
diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp
index 3aec7049e0cc8..8240a1fd7e2ff 100644
--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -141,6 +141,7 @@ void TargetLoweringBase::InitLibcalls(const Triple &TT) {
setLibcallName(RTLIB::EXP10_F128, "exp10f128");
setLibcallName(RTLIB::SIN_F128, "sinf128");
setLibcallName(RTLIB::COS_F128, "cosf128");
+ setLibcallName(RTLIB::TAN_F128, "tanf128");
setLibcallName(RTLIB::SINCOS_F128, "sincosf128");
setLibcallName(RTLIB::POW_F128, "powf128");
setLibcallName(RTLIB::POW_FINITE_F128, "__powf128_finite");
@@ -1015,7 +1016,8 @@ void TargetLoweringBase::initActions() {
setOperationAction({ISD::FCBRT, ISD::FLOG, ISD::FLOG2, ISD::FLOG10, ISD::FEXP,
ISD::FEXP2, ISD::FEXP10, ISD::FFLOOR, ISD::FNEARBYINT,
ISD::FCEIL, ISD::FRINT, ISD::FTRUNC, ISD::LROUND,
- ISD::LLROUND, ISD::LRINT, ISD::LLRINT, ISD::FROUNDEVEN},
+ ISD::LLROUND, ISD::LRINT, ISD::LLRINT, ISD::FROUNDEVEN,
+ ISD::FTAN},
{MVT::f32, MVT::f64, MVT::f128}, Expand);
// Default ISD::TRAP to expand (which turns it into abort).
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 7d30de15f84d2..af1e45d25aac4 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -615,6 +615,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FSIN, VT, Action);
setOperationAction(ISD::FCOS, VT, Action);
setOperationAction(ISD::FSINCOS, VT, Action);
+ setOperationAction(ISD::FTAN, VT, Action);
setOperationAction(ISD::FSQRT, VT, Action);
setOperationAction(ISD::FPOW, VT, Action);
setOperationAction(ISD::FLOG, VT, Action);
@@ -833,9 +834,12 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
}
// Always expand sin/cos functions even though x87 has an instruction.
+ // clang-format off
setOperationAction(ISD::FSIN , MVT::f80, Expand);
setOperationAction(ISD::FCOS , MVT::f80, Expand);
setOperationAction(ISD::FSINCOS, MVT::f80, Expand);
+ setOperationAction(ISD::FTAN , MVT::f80, Expand);
+ // clang-format on
setOperationAction(ISD::FFLOOR, MVT::f80, Expand);
setOperationAction(ISD::FCEIL, MVT::f80, Expand);
@@ -888,11 +892,15 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FNEG, MVT::f128, Custom);
setOperationAction(ISD::FCOPYSIGN, MVT::f128, Custom);
+ // clang-format off
setOperationAction(ISD::FSIN, MVT::f128, LibCall);
setOperationAction(ISD::STRICT_FSIN, MVT::f128, LibCall);
setOperationAction(ISD::FCOS, MVT::f128, LibCall);
setOperationAction(ISD::STRICT_FCOS, MVT::f128, LibCall);
setOperationAction(ISD::FSINCOS, MVT::f128, LibCall);
+ setOperationAction(ISD::FTAN, MVT::f128, LibCall);
+ setOperationAction(ISD::STRICT_FTAN, MVT::f128, LibCall);
+ // clang-format on
// No STRICT_FSINCOS
setOperationAction(ISD::FSQRT, MVT::f128, LibCall);
setOperationAction(ISD::STRICT_FSQRT, MVT::f128, LibCall);
@@ -944,9 +952,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
for (auto VT : { MVT::v8f16, MVT::v16f16, MVT::v32f16,
MVT::v4f32, MVT::v8f32, MVT::v16f32,
MVT::v2f64, MVT::v4f64, MVT::v8f64 }) {
+ // clang-format off
setOperationAction(ISD::FSIN, VT, Expand);
setOperationAction(ISD::FSINCOS, VT, Expand);
setOperationAction(ISD::FCOS, VT, Expand);
+ setOperationAction(ISD::FTAN, VT, Expand);
setOperationAction(ISD::FREM, VT, Expand);
setOperationAction(ISD::FCOPYSIGN, VT, Expand);
setOperationAction(ISD::FPOW, VT, Expand);
@@ -956,6 +966,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FEXP, VT, Expand);
setOperationAction(ISD::FEXP2, VT, Expand);
setOperationAction(ISD::FEXP10, VT, Expand);
+ // clang-format on
}
// First set operation action for all vector types to either promote
@@ -2473,7 +2484,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// function casting to f64 and calling `fmod`.
if (Subtarget.is32Bit() &&
(Subtarget.isTargetWindowsMSVC() || Subtarget.isTargetWindowsItanium()))
- for (ISD::NodeType Op :
+ // clang-format off
+ for (ISD::NodeType Op :
{ISD::FCEIL, ISD::STRICT_FCEIL,
ISD::FCOS, ISD::STRICT_FCOS,
ISD::FEXP, ISD::STRICT_FEXP,
@@ -2482,9 +2494,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
ISD::FLOG, ISD::STRICT_FLOG,
ISD::FLOG10, ISD::STRICT_FLOG10,
ISD::FPOW, ISD::STRICT_FPOW,
- ISD::FSIN, ISD::STRICT_FSIN})
+ ISD::FSIN, ISD::STRICT_FSIN,
+ ISD::FTAN, ISD::STRICT_FTAN})
if (isOperationExpand(Op, MVT::f32))
setOperationAction(Op, MVT::f32, Promote);
+ // clang-format on
// We have target-specific dag combine patterns for the following nodes:
setTargetDAGCombine({ISD::VECTOR_SHUFFLE,
diff --git a/llvm/test/CodeGen/X86/llvm.tan.ll b/llvm/test/CodeGen/X86/llvm.tan.ll
new file mode 100644
index 0000000000000..24b30038687f2
--- /dev/null
+++ b/llvm/test/CodeGen/X86/llvm.tan.ll
@@ -0,0 +1,70 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
+
+define half @use_tanf16(half %a) nounwind {
+; CHECK-LABEL: use_tanf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: callq __extendhfsf2 at PLT
+; CHECK-NEXT: callq tanf at PLT
+; CHECK-NEXT: callq __truncsfhf2 at PLT
+; CHECK-NEXT: popq %rax
+; CHECK-NEXT: retq
+ %x = call half @llvm.tan.f16(half %a)
+ ret half %x
+}
+
+define float @use_tanf32(float %a) nounwind {
+; CHECK-LABEL: use_tanf32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: jmp tanf at PLT # TAILCALL
+ %x = call float @llvm.tan.f32(float %a)
+ ret float %x
+}
+
+define double @use_tanf64(double %a) nounwind {
+; CHECK-LABEL: use_tanf64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: jmp tan at PLT # TAILCALL
+ %x = call double @llvm.tan.f64(double %a)
+ ret double %x
+}
+
+define x86_fp80 @use_tanf80(x86_fp80 %a) nounwind {
+; CHECK-LABEL: use_tanf80:
+; CHECK: # %bb.0:
+; CHECK-NEXT: subq $24, %rsp
+; CHECK-NEXT: fldt 32(%rsp)
+; CHECK-NEXT: fstpt (%rsp)
+; CHECK-NEXT: callq tanl at PLT
+; CHECK-NEXT: addq $24, %rsp
+; CHECK-NEXT: retq
+ %x = call x86_fp80 @llvm.tan.f80(x86_fp80 %a)
+ ret x86_fp80 %x
+}
+
+define fp128 @use_tanfp128(fp128 %a) nounwind {
+; CHECK-LABEL: use_tanfp128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: jmp tanf128 at PLT # TAILCALL
+ %x = call fp128 @llvm.tan.f128(fp128 %a)
+ ret fp128 %x
+}
+
+define ppc_fp128 @use_tanppc_fp128(ppc_fp128 %a) nounwind {
+; CHECK-LABEL: use_tanppc_fp128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: callq tanl at PLT
+; CHECK-NEXT: popq %rax
+; CHECK-NEXT: retq
+ %x = call ppc_fp128 @llvm.tan.ppcf128(ppc_fp128 %a)
+ ret ppc_fp128 %x
+}
+
+declare half @llvm.tan.f16(half)
+declare float @llvm.tan.f32(float)
+declare double @llvm.tan.f64(double)
+declare x86_fp80 @llvm.tan.f80(x86_fp80)
+declare fp128 @llvm.tan.f128(fp128)
+declare ppc_fp128 @llvm.tan.ppcf128(ppc_fp128)
diff --git a/llvm/test/CodeGen/X86/vec-libcalls.ll b/llvm/test/CodeGen/X86/vec-libcalls.ll
index 3a1315446d7a2..6857101d3d75b 100644
--- a/llvm/test/CodeGen/X86/vec-libcalls.ll
+++ b/llvm/test/CodeGen/X86/vec-libcalls.ll
@@ -17,6 +17,14 @@ declare <5 x float> @llvm.sin.v5f32(<5 x float>)
declare <6 x float> @llvm.sin.v6f32(<6 x float>)
declare <3 x double> @llvm.sin.v3f64(<3 x double>)
+declare <1 x float> @llvm.tan.v1f32(<1 x float>)
+declare <2 x float> @llvm.tan.v2f32(<2 x float>)
+declare <3 x float> @llvm.tan.v3f32(<3 x float>)
+declare <4 x float> @llvm.tan.v4f32(<4 x float>)
+declare <5 x float> @llvm.tan.v5f32(<5 x float>)
+declare <6 x float> @llvm.tan.v6f32(<6 x float>)
+declare <3 x double> @llvm.tan.v3f64(<3 x double>)
+
; Verify that all of the potential libcall candidates are handled.
; Some of these have custom lowering, so those cases won't have
; libcalls.
@@ -230,6 +238,200 @@ define <3 x double> @sin_v3f64(<3 x double> %x) nounwind {
ret <3 x double> %r
}
+define <1 x float> @tan_v1f32(<1 x float> %x) nounwind {
+; CHECK-LABEL: tan_v1f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: callq tanf at PLT
+; CHECK-NEXT: popq %rax
+; CHECK-NEXT: retq
+ %r = call <1 x float> @llvm.tan.v1f32(<1 x float> %x)
+ ret <1 x float> %r
+}
+
+define <2 x float> @tan_v2f32(<2 x float> %x) nounwind {
+; CHECK-LABEL: tan_v2f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: subq $40, %rsp
+; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: callq tanf at PLT
+; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
+; CHECK-NEXT: callq tanf at PLT
+; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
+; CHECK-NEXT: addq $40, %rsp
+; CHECK-NEXT: retq
+ %r = call <2 x float> @llvm.tan.v2f32(<2 x float> %x)
+ ret <2 x float> %r
+}
+
+define <3 x float> @tan_v3f32(<3 x float> %x) nounwind {
+; CHECK-LABEL: tan_v3f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: subq $40, %rsp
+; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: callq tanf at PLT
+; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
+; CHECK-NEXT: callq tanf at PLT
+; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
+; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
+; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = mem[1,0]
+; CHECK-NEXT: callq tanf at PLT
+; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
+; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
+; CHECK-NEXT: addq $40, %rsp
+; CHECK-NEXT: retq
+ %r = call <3 x float> @llvm.tan.v3f32(<3 x float> %x)
+ ret <3 x float> %r
+}
+
+define <4 x float> @tan_v4f32(<4 x float> %x) nounwind {
+; CHECK-LABEL: tan_v4f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: subq $40, %rsp
+; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: callq tanf at PLT
+; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
+; CHECK-NEXT: callq tanf at PLT
+; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
+; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
+; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = mem[1,0]
+; CHECK-NEXT: callq tanf at PLT
+; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
+; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
+; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = mem[3,3,3,3]
+; CHECK-NEXT: callq tanf at PLT
+; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
+; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
+; CHECK-NEXT: addq $40, %rsp
+; CHECK-NEXT: retq
+ %r = call <4 x float> @llvm.tan.v4f32(<4 x float> %x)
+ ret <4 x float> %r
+}
+
+define <5 x float> @tan_v5f32(<5 x float> %x) nounwind {
+; CHECK-LABEL: tan_v5f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: subq $72, %rsp
+; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
+; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: callq tanf at PLT
+; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
+; CHECK-NEXT: callq tanf at PLT
+; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
+; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
+; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = mem[1,0]
+; CHECK-NEXT: callq tanf at PLT
+; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
+; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
+; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = mem[3,3,3,3]
+; CHECK-NEXT: callq tanf at PLT
+; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
+; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
+; CHECK-NEXT: vmovups %ymm0, (%rsp) # 32-byte Spill
+; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
+; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: callq tanf at PLT
+; CHECK-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload
+; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; CHECK-NEXT: addq $72, %rsp
+; CHECK-NEXT: retq
+ %r = call <5 x float> @llvm.tan.v5f32(<5 x float> %x)
+ ret <5 x float> %r
+}
+
+define <6 x float> @tan_v6f32(<6 x float> %x) nounwind {
+; CHECK-LABEL: tan_v6f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: subq $72, %rsp
+; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
+; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
+; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: callq tanf at PLT
+; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
+; CHECK-NEXT: callq tanf at PLT
+; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
+; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
+; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: callq tanf at PLT
+; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = mem[1,1,3,3]
+; CHECK-NEXT: callq tanf at PLT
+; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
+; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
+; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = mem[1,0]
+; CHECK-NEXT: callq tanf at PLT
+; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
+; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
+; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = mem[3,3,3,3]
+; CHECK-NEXT: callq tanf at PLT
+; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
+; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
+; CHECK-NEXT: vinsertf128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload
+; CHECK-NEXT: addq $72, %rsp
+; CHECK-NEXT: retq
+ %r = call <6 x float> @llvm.tan.v6f32(<6 x float> %x)
+ ret <6 x float> %r
+}
+
+define <3 x double> @tan_v3f64(<3 x double> %x) nounwind {
+; CHECK-LABEL: tan_v3f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: subq $72, %rsp
+; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
+; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: callq tan at PLT
+; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT: # xmm0 = mem[1,0]
+; CHECK-NEXT: callq tan at PLT
+; CHECK-NEXT: vmovapd (%rsp), %xmm1 # 16-byte Reload
+; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; CHECK-NEXT: vmovupd %ymm0, (%rsp) # 32-byte Spill
+; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
+; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: callq tan at PLT
+; CHECK-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload
+; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; CHECK-NEXT: addq $72, %rsp
+; CHECK-NEXT: retq
+ %r = call <3 x double> @llvm.tan.v3f64(<3 x double> %x)
+ ret <3 x double> %r
+}
+
define <2 x float> @fabs_v2f32(<2 x float> %x) nounwind {
; CHECK-LABEL: fabs_v2f32:
; CHECK: # %bb.0:
diff --git a/llvm/test/Transforms/LoopVectorize/X86/amdlibm-calls.ll b/llvm/test/Transforms/LoopVectorize/X86/amdlibm-calls.ll
index 8d2820a245d95..1627292732b6a 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/amdlibm-calls.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/amdlibm-calls.ll
@@ -15,6 +15,11 @@ declare float @cosf(float) #0
declare double @llvm.cos.f64(double) #0
declare float @llvm.cos.f32(float) #0
+declare double @tan(double) #0
+declare float @tanf(float) #0
+declare double @llvm.tan.f64(double) #0
+declare float @llvm.tan.f32(float) #0
+
declare double @pow(double, double) #0
declare float @powf(float, float) #0
declare double @llvm.pow.f64(double, double) #0
@@ -264,6 +269,114 @@ for.end:
ret void
}
+define void @tan_f64(ptr nocapture %varray) {
+; CHECK-LABEL: @tan_f64(
+; CHECK: [[TMP5:%.*]] = call <4 x double> @amd_vrd4_tan(<4 x double> [[TMP4:%.*]])
+; CHECK: ret void
+;
+; CHECK-AVX512-VF8-LABEL: @tan_f64(
+; CHECK-AVX512-VF8: [[TMP5:%.*]] = call <8 x double> @amd_vrd8_tan(<8 x double> [[TMP4:%.*]])
+; CHECK-AVX512-VF8: ret void
+;
+entry:
+ br label %for.body
+
+for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %tmp = trunc i64 %iv to i32
+ %conv = sitofp i32 %tmp to double
+ %call = tail call double @tan(double %conv)
+ %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+ store double %call, ptr %arrayidx, align 4
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond = icmp eq i64 %iv.next, 1000
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+ ret void
+}
+
+define void @tan_f32(ptr nocapture %varray) {
+; CHECK-LABEL: @tan_f32(
+; CHECK: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_tanf(<4 x float> [[TMP4:%.*]])
+; CHECK: ret void
+;
+; CHECK-AVX512-VF16-LABEL: @tan_f32(
+; CHECK-AVX512-VF16: [[TMP5:%.*]] = call <16 x float> @amd_vrs16_tanf(<16 x float> [[TMP4:%.*]])
+; CHECK-AVX512-VF16: ret void
+;
+entry:
+ br label %for.body
+
+for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %tmp = trunc i64 %iv to i32
+ %conv = sitofp i32 %tmp to float
+ %call = tail call float @tanf(float %conv)
+ %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+ store float %call, ptr %arrayidx, align 4
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond = icmp eq i64 %iv.next, 1000
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+ ret void
+}
+
+define void @tan_f64_intrinsic(ptr nocapture %varray) {
+; CHECK-LABEL: @tan_f64_intrinsic(
+; CHECK: [[TMP5:%.*]] = call <4 x double> @amd_vrd4_tan(<4 x double> [[TMP4:%.*]])
+; CHECK: ret void
+;
+; CHECK-AVX512-VF8-LABEL: @tan_f64_intrinsic(
+; CHECK-AVX512-VF8: [[TMP5:%.*]] = call <8 x double> @amd_vrd8_tan(<8 x double> [[TMP4:%.*]])
+; CHECK-AVX512-VF8: ret void
+;
+entry:
+ br label %for.body
+
+for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %tmp = trunc i64 %iv to i32
+ %conv = sitofp i32 %tmp to double
+ %call = tail call double @llvm.tan.f64(double %conv)
+ %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+ store double %call, ptr %arrayidx, align 4
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond = icmp eq i64 %iv.next, 1000
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+ ret void
+}
+
+define void @tan_f32_intrinsic(ptr nocapture %varray) {
+; CHECK-LABEL: @tan_f32_intrinsic(
+; CHECK: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_tanf(<4 x float> [[TMP4:%.*]])
+; CHECK: ret void
+;
+; CHECK-AVX512-VF16-LABEL: @tan_f32_intrinsic(
+; CHECK-AVX512-VF16: [[TMP5:%.*]] = call <16 x float> @amd_vrs16_tanf(<16 x float> [[TMP4:%.*]])
+; CHECK-AVX512-VF16: ret void
+;
+entry:
+ br label %for.body
+
+for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %tmp = trunc i64 %iv to i32
+ %conv = sitofp i32 %tmp to float
+ %call = tail call float @llvm.tan.f32(float %conv)
+ %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+ store float %call, ptr %arrayidx, align 4
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond = icmp eq i64 %iv.next, 1000
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+ ret void
+}
+
define void @pow_f64(ptr nocapture %varray, ptr nocapture readonly %exp) {
; CHECK-LABEL: @pow_f64(
; CHECK: [[TMP8:%.*]] = call <4 x double> @amd_vrd4_pow(<4 x double> [[TMP4:%.*]], <4 x double> [[WIDE_LOAD:%.*]])
diff --git a/llvm/test/Transforms/LoopVectorize/X86/libm-vector-calls-VF2-VF8.ll b/llvm/test/Transforms/LoopVectorize/X86/libm-vector-calls-VF2-VF8.ll
index 038852f55f455..67a2cf2b80e70 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/libm-vector-calls-VF2-VF8.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/libm-vector-calls-VF2-VF8.ll
@@ -356,6 +356,117 @@ for.end: ; preds = %for.body
!132 = !{!"llvm.loop.vectorize.width", i32 8}
!133 = !{!"llvm.loop.vectorize.enable", i1 true}
+define void @tan_f64(ptr nocapture %varray) {
+; CHECK-LABEL: @tan_f64(
+; CHECK-LABEL: vector.body
+; CHECK: [[TMP5:%.*]] = call <2 x double> @_ZGVbN2v_tan(<2 x double> [[TMP4:%.*]])
+;
+entry:
+ br label %for.body
+
+for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %tmp = trunc i64 %iv to i32
+ %conv = sitofp i32 %tmp to double
+ %call = tail call double @tan(double %conv)
+ %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+ store double %call, ptr %arrayidx, align 4
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond = icmp eq i64 %iv.next, 1000
+ br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !1
+
+for.end:
+ ret void
+}
+
+!141 = distinct !{!141, !142, !143}
+!142 = !{!"llvm.loop.vectorize.width", i32 2}
+!143 = !{!"llvm.loop.vectorize.enable", i1 true}
+
+
+define void @tan_f32(ptr nocapture %varray) {
+; CHECK-LABEL: @tan_f32(
+; CHECK-LABEL: vector.body
+; CHECK: [[TMP5:%.*]] = call <8 x float> @_ZGVdN8v_tanf(<8 x float> [[TMP4:%.*]])
+;
+entry:
+ br label %for.body
+
+for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %tmp = trunc i64 %iv to i32
+ %conv = sitofp i32 %tmp to float
+ %call = tail call float @tanf(float %conv)
+ %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+ store float %call, ptr %arrayidx, align 4
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond = icmp eq i64 %iv.next, 1000
+ br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !21
+
+for.end:
+ ret void
+}
+
+!151 = distinct !{!151, !152, !153}
+!152 = !{!"llvm.loop.vectorize.width", i32 8}
+!153 = !{!"llvm.loop.vectorize.enable", i1 true}
+
+define void @tan_f64_intrinsic(ptr nocapture %varray) {
+; CHECK-LABEL: @tan_f64_intrinsic(
+; CHECK-LABEL: vector.body
+; CHECK: [[TMP5:%.*]] = call <2 x double> @_ZGVbN2v_tan(<2 x double> [[TMP4:%.*]])
+;
+entry:
+ br label %for.body
+
+for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %tmp = trunc i64 %iv to i32
+ %conv = sitofp i32 %tmp to double
+ %call = tail call double @llvm.tan.f64(double %conv)
+ %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+ store double %call, ptr %arrayidx, align 4
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond = icmp eq i64 %iv.next, 1000
+ br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !31
+
+for.end:
+ ret void
+}
+
+!161 = distinct !{!161, !162, !163}
+!162 = !{!"llvm.loop.vectorize.width", i32 2}
+!163 = !{!"llvm.loop.vectorize.enable", i1 true}
+
+define void @tan_f32_intrinsic(ptr nocapture %varray) {
+; CHECK-LABEL: @tan_f32_intrinsic(
+; CHECK-LABEL: vector.body
+; CHECK: [[TMP5:%.*]] = call <8 x float> @_ZGVdN8v_tanf(<8 x float> [[TMP4:%.*]])
+;
+entry:
+ br label %for.body
+
+for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %tmp = trunc i64 %iv to i32
+ %conv = sitofp i32 %tmp to float
+ %call = tail call float @llvm.tan.f32(float %conv)
+ %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+ store float %call, ptr %arrayidx, align 4
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond = icmp eq i64 %iv.next, 1000
+ br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !41
+
+for.end:
+ ret void
+}
+
+
+
+!171 = distinct !{!171, !172, !173}
+!172 = !{!"llvm.loop.vectorize.width", i32 8}
+!173 = !{!"llvm.loop.vectorize.enable", i1 true}
+
attributes #0 = { nounwind readnone }
declare double @sin(double) #0
@@ -366,6 +477,10 @@ declare double @cos(double) #0
declare float @cosf(float) #0
declare double @llvm.cos.f64(double) #0
declare float @llvm.cos.f32(float) #0
+declare double @tan(double) #0
+declare float @tanf(float) #0
+declare double @llvm.tan.f64(double) #0
+declare float @llvm.tan.f32(float) #0
declare float @expf(float) #0
declare float @powf(float, float) #0
declare float @llvm.exp.f32(float) #0
diff --git a/llvm/test/Transforms/LoopVectorize/X86/svml-calls.ll b/llvm/test/Transforms/LoopVectorize/X86/svml-calls.ll
index 005557d7445ca..2e78e3632feab 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/svml-calls.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/svml-calls.ll
@@ -230,6 +230,52 @@ for.end:
ret void
}
+define void @tan_f64_intrinsic(ptr nocapture %varray) {
+; CHECK-LABEL: @tan_f64_intrinsic(
+; CHECK: [[TMP5:%.*]] = call <4 x double> @__svml_tan4(<4 x double> [[TMP4:%.*]])
+; CHECK: ret void
+;
+entry:
+ br label %for.body
+
+for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %tmp = trunc i64 %iv to i32
+ %conv = sitofp i32 %tmp to double
+ %call = tail call double @llvm.tan.f64(double %conv)
+ %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+ store double %call, ptr %arrayidx, align 4
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond = icmp eq i64 %iv.next, 1000
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+ ret void
+}
+
+define void @tan_f32_intrinsic(ptr nocapture %varray) {
+; CHECK-LABEL: @tan_f32_intrinsic(
+; CHECK: [[TMP5:%.*]] = call <4 x float> @__svml_tanf4(<4 x float> [[TMP4:%.*]])
+; CHECK: ret void
+;
+entry:
+ br label %for.body
+
+for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %tmp = trunc i64 %iv to i32
+ %conv = sitofp i32 %tmp to float
+ %call = tail call float @llvm.tan.f32(float %conv)
+ %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+ store float %call, ptr %arrayidx, align 4
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond = icmp eq i64 %iv.next, 1000
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+ ret void
+}
+
define void @pow_f64(ptr nocapture %varray, ptr nocapture readonly %exp) {
; CHECK-LABEL: @pow_f64(
; CHECK: [[TMP8:%.*]] = call <4 x double> @__svml_pow4(<4 x double> [[TMP4:%.*]], <4 x double> [[WIDE_LOAD:%.*]])
diff --git a/llvm/test/Transforms/LoopVectorize/X86/veclib-calls.ll b/llvm/test/Transforms/LoopVectorize/X86/veclib-calls.ll
index 2e78a96a44b74..27038f3a24b66 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/veclib-calls.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/veclib-calls.ll
@@ -406,6 +406,31 @@ for.end: ; preds = %for.body, %entry
ret void
}
+;CHECK-LABEL: @tan_f32_intrinsic(
+;CHECK: vtanf{{.*}}<4 x float>
+;CHECK: ret void
+declare float @llvm.tan.f32(float) nounwind readnone
+define void @tan_f32_intrinsic(i32 %n, ptr noalias %y, ptr noalias %x) nounwind uwtable {
+entry:
+ %cmp6 = icmp sgt i32 %n, 0
+ br i1 %cmp6, label %for.body, label %for.end
+
+for.body: ; preds = %entry, %for.body
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds float, ptr %y, i64 %indvars.iv
+ %0 = load float, ptr %arrayidx, align 4
+ %call = tail call float @llvm.tan.f32(float %0) nounwind readnone
+ %arrayidx2 = getelementptr inbounds float, ptr %x, i64 %indvars.iv
+ store float %call, ptr %arrayidx2, align 4
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %entry
+ ret void
+}
+
;CHECK-LABEL: @asin_f32(
;CHECK: vasinf{{.*}}<4 x float>
;CHECK: ret void
More information about the llvm-commits
mailing list