[clang] [llvm] [SPIRV] Add tan intrinsic part 3 (PR #90278)

Fri Apr 26 20:56:32 PDT 2024

https://github.com/farzonl updated https://github.com/llvm/llvm-project/pull/90278

>From cc60de7a9300717559c4d511af553e142d368284 Mon Sep 17 00:00:00 2001
From: Farzon Lotfi <farzonlotfi at microsoft.com>
Date: Wed, 24 Apr 2024 17:34:04 -0400
Subject: [PATCH 1/7] start of tan intrinsic

---
 clang/docs/LanguageExtensions.rst             |  3 +-
 clang/include/clang/Basic/Builtins.td         |  6 ++
 clang/lib/CodeGen/CGBuiltin.cpp               |  4 +-
 clang/lib/Headers/hlsl/hlsl_intrinsics.h      | 23 ++++++++
 clang/lib/Sema/SemaChecking.cpp               |  2 +
 .../test/CodeGen/builtins-elementwise-math.c  | 16 ++++++
 .../CodeGen/strictfp-elementwise-bulitins.cpp | 10 ++++
 clang/test/CodeGenHLSL/builtins/tan.hlsl      | 56 +++++++++++++++++++
 clang/test/Sema/aarch64-sve-vector-trig-ops.c |  6 ++
 clang/test/Sema/builtins-elementwise-math.c   | 21 +++++++
 clang/test/Sema/riscv-rvv-vector-trig-ops.c   |  6 ++
 .../SemaCXX/builtins-elementwise-math.cpp     |  7 +++
 llvm/docs/GlobalISel/GenericOpcode.rst        |  2 +-
 llvm/docs/LangRef.rst                         | 37 ++++++++++++
 llvm/include/llvm/Analysis/VecFuncs.def       |  1 +
 llvm/include/llvm/CodeGen/BasicTTIImpl.h      |  3 +
 llvm/include/llvm/CodeGen/ISDOpcodes.h        |  2 +
 llvm/include/llvm/IR/Intrinsics.td            |  1 +
 llvm/include/llvm/IR/RuntimeLibcalls.def      |  5 ++
 llvm/include/llvm/Support/TargetOpcodes.def   |  3 +
 llvm/include/llvm/Target/GenericOpcodes.td    |  7 +++
 .../Target/GlobalISel/SelectionDAGCompat.td   |  1 +
 .../include/llvm/Target/TargetSelectionDAG.td |  6 ++
 llvm/lib/Analysis/VectorUtils.cpp             |  1 +
 llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp  |  2 +
 .../CodeGen/GlobalISel/LegalizerHelper.cpp    |  5 ++
 llvm/lib/CodeGen/GlobalISel/Utils.cpp         |  2 +
 llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp |  8 +++
 .../SelectionDAG/LegalizeFloatTypes.cpp       | 23 ++++++++
 llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h |  2 +
 .../SelectionDAG/LegalizeVectorOps.cpp        |  1 +
 .../SelectionDAG/LegalizeVectorTypes.cpp      |  3 +
 .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp |  1 +
 .../SelectionDAG/SelectionDAGBuilder.cpp      |  8 +++
 .../SelectionDAG/SelectionDAGDumper.cpp       |  2 +
 llvm/lib/CodeGen/TargetLoweringBase.cpp       |  3 +-
 .../Target/SPIRV/SPIRVInstructionSelector.cpp |  2 +
 llvm/lib/Target/X86/X86ISelLowering.cpp       |  1 +
 38 files changed, 288 insertions(+), 4 deletions(-)
 create mode 100644 clang/test/CodeGenHLSL/builtins/tan.hlsl

diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst
index f18b946efd4bfa..305d6da366d25a 100644
--- a/clang/docs/LanguageExtensions.rst
+++ b/clang/docs/LanguageExtensions.rst
@@ -656,6 +656,7 @@ Unless specified otherwise operation(±0) = ±0 and operation(±infinity) = ±in
  T __builtin_elementwise_ceil(T x)           return the smallest integral value greater than or equal to x    floating point types
  T __builtin_elementwise_sin(T x)            return the sine of x interpreted as an angle in radians          floating point types
  T __builtin_elementwise_cos(T x)            return the cosine of x interpreted as an angle in radians        floating point types
+ T __builtin_elementwise_tan(T x)            return the tangent of x interpreted as an angle in radians       floating point types
  T __builtin_elementwise_floor(T x)          return the largest integral value less than or equal to x        floating point types
  T __builtin_elementwise_log(T x)            return the natural logarithm of x                                floating point types
  T __builtin_elementwise_log2(T x)           return the base 2 logarithm of x                                 floating point types
@@ -664,7 +665,7 @@ Unless specified otherwise operation(±0) = ±0 and operation(±infinity) = ±in
  T __builtin_elementwise_bitreverse(T x)     return the integer represented after reversing the bits of x     integer types
  T __builtin_elementwise_exp(T x)            returns the base-e exponential, e^x, of the specified value      floating point types
  T __builtin_elementwise_exp2(T x)           returns the base-2 exponential, 2^x, of the specified value      floating point types
-
+ 
  T __builtin_elementwise_sqrt(T x)           return the square root of a floating-point number                floating point types
  T __builtin_elementwise_roundeven(T x)      round x to the nearest integer value in floating point format,   floating point types
                                              rounding halfway cases to even (that is, to the nearest value
diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td
index de721a87b3341d..11982af3fa609b 100644
--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -1326,6 +1326,12 @@ def ElementwiseSqrt : Builtin {
   let Prototype = "void(...)";
 }
 
+def ElementwiseTan : Builtin {
+  let Spellings = ["__builtin_elementwise_tan"];
+  let Attributes = [NoThrow, Const, CustomTypeChecking];
+  let Prototype = "void(...)";
+}
+
 def ElementwiseTrunc : Builtin {
   let Spellings = ["__builtin_elementwise_trunc"];
   let Attributes = [NoThrow, Const, CustomTypeChecking];
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index d08ab539148914..f941e93b0ced2b 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -3821,7 +3821,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
   case Builtin::BI__builtin_elementwise_sin:
     return RValue::get(
         emitUnaryBuiltin(*this, E, llvm::Intrinsic::sin, "elt.sin"));
-
+  case Builtin::BI__builtin_elementwise_tan:
+    return RValue::get(
+        emitUnaryBuiltin(*this, E, llvm::Intrinsic::tan, "elt.tan"));
   case Builtin::BI__builtin_elementwise_trunc:
     return RValue::get(
         emitUnaryBuiltin(*this, E, llvm::Intrinsic::trunc, "elt.trunc"));
diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_intrinsics.h
index 06409c6fc77417..3390f0962f67d0 100644
--- a/clang/lib/Headers/hlsl/hlsl_intrinsics.h
+++ b/clang/lib/Headers/hlsl/hlsl_intrinsics.h
@@ -1441,6 +1441,29 @@ float3 sqrt(float3);
 _HLSL_BUILTIN_ALIAS(__builtin_elementwise_sqrt)
 float4 sqrt(float4);
 
+//===----------------------------------------------------------------------===//
+// tan builtins
+//===----------------------------------------------------------------------===//
+#ifdef __HLSL_ENABLE_16_BIT
+_HLSL_BUILTIN_ALIAS(__builtin_elementwise_tan)
+half tan(half);
+_HLSL_BUILTIN_ALIAS(__builtin_elementwise_tan)
+half2 tan(half2);
+_HLSL_BUILTIN_ALIAS(__builtin_elementwise_tan)
+half3 tan(half3);
+_HLSL_BUILTIN_ALIAS(__builtin_elementwise_tan)
+half4 tan(half4);
+#endif
+
+_HLSL_BUILTIN_ALIAS(__builtin_elementwise_tan)
+float tan(float);
+_HLSL_BUILTIN_ALIAS(__builtin_elementwise_tan)
+float2 tan(float2);
+_HLSL_BUILTIN_ALIAS(__builtin_elementwise_tan)
+float3 tan(float3);
+_HLSL_BUILTIN_ALIAS(__builtin_elementwise_tan)
+float4 tan(float4);
+
 //===----------------------------------------------------------------------===//
 // trunc builtins
 //===----------------------------------------------------------------------===//
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index e33113ab9c4c1d..fcc901392e1161 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -3049,6 +3049,7 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID,
   case Builtin::BI__builtin_elementwise_nearbyint:
   case Builtin::BI__builtin_elementwise_sin:
   case Builtin::BI__builtin_elementwise_sqrt:
+  case Builtin::BI__builtin_elementwise_tan:
   case Builtin::BI__builtin_elementwise_trunc:
   case Builtin::BI__builtin_elementwise_canonicalize: {
     if (PrepareBuiltinElementwiseMathOneArgCall(TheCall))
@@ -5664,6 +5665,7 @@ bool Sema::CheckHLSLBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
   case Builtin::BI__builtin_elementwise_roundeven:
   case Builtin::BI__builtin_elementwise_sin:
   case Builtin::BI__builtin_elementwise_sqrt:
+  case Builtin::BI__builtin_elementwise_tan:
   case Builtin::BI__builtin_elementwise_trunc: {
     if (CheckFloatOrHalfRepresentations(this, TheCall))
       return true;
diff --git a/clang/test/CodeGen/builtins-elementwise-math.c b/clang/test/CodeGen/builtins-elementwise-math.c
index 1c667e5bff1eab..1b5466abd347d7 100644
--- a/clang/test/CodeGen/builtins-elementwise-math.c
+++ b/clang/test/CodeGen/builtins-elementwise-math.c
@@ -604,6 +604,22 @@ void test_builtin_elementwise_sqrt(float f1, float f2, double d1, double d2,
   vf2 = __builtin_elementwise_sqrt(vf1);
 }
 
+void test_builtin_elementwise_tan(float f1, float f2, double d1, double d2,
+                                  float4 vf1, float4 vf2) {
+  // CHECK-LABEL: define void @test_builtin_elementwise_tan(
+  // CHECK:      [[F1:%.+]] = load float, ptr %f1.addr, align 4
+  // CHECK-NEXT:  call float @llvm.tan.f32(float [[F1]])
+  f2 = __builtin_elementwise_tan(f1);
+
+  // CHECK:      [[D1:%.+]] = load double, ptr %d1.addr, align 8
+  // CHECK-NEXT: call double @llvm.tan.f64(double [[D1]])
+  d2 = __builtin_elementwise_tan(d1);
+
+  // CHECK:      [[VF1:%.+]] = load <4 x float>, ptr %vf1.addr, align 16
+  // CHECK-NEXT: call <4 x float> @llvm.tan.v4f32(<4 x float> [[VF1]])
+  vf2 = __builtin_elementwise_tan(vf1);
+}
+
 void test_builtin_elementwise_trunc(float f1, float f2, double d1, double d2,
                                     float4 vf1, float4 vf2) {
   // CHECK-LABEL: define void @test_builtin_elementwise_trunc(
diff --git a/clang/test/CodeGen/strictfp-elementwise-bulitins.cpp b/clang/test/CodeGen/strictfp-elementwise-bulitins.cpp
index fdf865ebbe8911..c72d5949916911 100644
--- a/clang/test/CodeGen/strictfp-elementwise-bulitins.cpp
+++ b/clang/test/CodeGen/strictfp-elementwise-bulitins.cpp
@@ -187,6 +187,16 @@ float4 strict_elementwise_sqrt(float4 a) {
   return __builtin_elementwise_sqrt(a);
 }
 
+// CHECK-LABEL: define dso_local noundef <4 x float> @_Z22strict_elementwise_tanDv4_f
+// CHECK-SAME: (<4 x float> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[ELT_TAN:%.*]] = tail call <4 x float> @llvm.tan.v4f32(<4 x float> [[A]]) #[[ATTR4]]
+// CHECK-NEXT:    ret <4 x float> [[ELT_TAN]]
+//
+float4 strict_elementwise_tan(float4 a) {
+  return __builtin_elementwise_tan(a);
+}
+
 // CHECK-LABEL: define dso_local noundef <4 x float> @_Z24strict_elementwise_truncDv4_f
 // CHECK-SAME: (<4 x float> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] {
 // CHECK-NEXT:  entry:
diff --git a/clang/test/CodeGenHLSL/builtins/tan.hlsl b/clang/test/CodeGenHLSL/builtins/tan.hlsl
new file mode 100644
index 00000000000000..88206eee146b0a
--- /dev/null
+++ b/clang/test/CodeGenHLSL/builtins/tan.hlsl
@@ -0,0 +1,56 @@
+// RUN: %clang_cc1 -std=hlsl2021 -finclude-default-header -x hlsl -triple \
+// RUN:   dxil-pc-shadermodel6.3-library %s -fnative-half-type \
+// RUN:   -emit-llvm -disable-llvm-passes -O3 -o - | FileCheck %s
+// RUN: %clang_cc1 -std=hlsl2021 -finclude-default-header -x hlsl -triple \
+// RUN:   dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \
+// RUN:   -D__HLSL_ENABLE_16_BIT -o - | FileCheck %s --check-prefix=NO_HALF
+
+// CHECK: define noundef half @
+// CHECK: call half @llvm.tan.f16(
+// NO_HALF: define noundef float @"?test_tan_half@@YA$halff@$halff@@Z"(
+// NO_HALF: call float @llvm.tan.f32(
+half test_tan_half ( half p0 ) {
+  return tan ( p0 );
+}
+// CHECK: define noundef <2 x half> @
+// CHECK: call <2 x half> @llvm.tan.v2f16
+// NO_HALF: define noundef <2 x float> @"?test_tan_float2@@YAT?$__vector at M$01 at __clang@@T12@@Z"(
+// NO_HALF: call <2 x float> @llvm.tan.v2f32(
+half2 test_tan_half2 ( half2 p0 ) {
+  return tan ( p0 );
+}
+// CHECK: define noundef <3 x half> @
+// CHECK: call <3 x half> @llvm.tan.v3f16
+// NO_HALF: define noundef <3 x float> @"?test_tan_float3@@YAT?$__vector at M$02 at __clang@@T12@@Z"(
+// NO_HALF: call <3 x float> @llvm.tan.v3f32(
+half3 test_tan_half3 ( half3 p0 ) {
+  return tan ( p0 );
+}
+// CHECK: define noundef <4 x half> @
+// CHECK: call <4 x half> @llvm.tan.v4f16
+// NO_HALF: define noundef <4 x float> @"?test_tan_float4@@YAT?$__vector at M$03 at __clang@@T12@@Z"(
+// NO_HALF: call <4 x float> @llvm.tan.v4f32(
+half4 test_tan_half4 ( half4 p0 ) {
+  return tan ( p0 );
+}
+
+// CHECK: define noundef float @
+// CHECK: call float @llvm.tan.f32(
+float test_tan_float ( float p0 ) {
+  return tan ( p0 );
+}
+// CHECK: define noundef <2 x float> @
+// CHECK: call <2 x float> @llvm.tan.v2f32
+float2 test_tan_float2 ( float2 p0 ) {
+  return tan ( p0 );
+}
+// CHECK: define noundef <3 x float> @
+// CHECK: call <3 x float> @llvm.tan.v3f32
+float3 test_tan_float3 ( float3 p0 ) {
+  return tan ( p0 );
+}
+// CHECK: define noundef <4 x float> @
+// CHECK: call <4 x float> @llvm.tan.v4f32
+float4 test_tan_float4 ( float4 p0 ) {
+  return tan ( p0 );
+}
diff --git a/clang/test/Sema/aarch64-sve-vector-trig-ops.c b/clang/test/Sema/aarch64-sve-vector-trig-ops.c
index 7ca941f578c70d..5039599d43ad81 100644
--- a/clang/test/Sema/aarch64-sve-vector-trig-ops.c
+++ b/clang/test/Sema/aarch64-sve-vector-trig-ops.c
@@ -16,3 +16,9 @@ svfloat32_t test_cos_vv_i8mf8(svfloat32_t v) {
   return __builtin_elementwise_cos(v);
   // expected-error at -1 {{1st argument must be a vector, integer or floating point type}}
 }
+
+svfloat32_t test_tan_vv_i8mf8(svfloat32_t v) {
+
+  return __builtin_elementwise_tan(v);
+  // expected-error at -1 {{1st argument must be a vector, integer or floating point type}}
+}
diff --git a/clang/test/Sema/builtins-elementwise-math.c b/clang/test/Sema/builtins-elementwise-math.c
index 2e05337273ee41..2e4319d158e7a0 100644
--- a/clang/test/Sema/builtins-elementwise-math.c
+++ b/clang/test/Sema/builtins-elementwise-math.c
@@ -626,6 +626,27 @@ void test_builtin_elementwise_sqrt(int i, float f, double d, float4 v, int3 iv,
   // expected-error at -1 {{1st argument must be a floating point type (was 'unsigned4' (vector of 4 'unsigned int' values))}}
 }
 
+void test_builtin_elementwise_tan(int i, float f, double d, float4 v, int3 iv, unsigned u, unsigned4 uv) {
+
+  struct Foo s = __builtin_elementwise_tan(f);
+  // expected-error at -1 {{initializing 'struct Foo' with an expression of incompatible type 'float'}}
+
+  i = __builtin_elementwise_tan();
+  // expected-error at -1 {{too few arguments to function call, expected 1, have 0}}
+
+  i = __builtin_elementwise_tan(i);
+  // expected-error at -1 {{1st argument must be a floating point type (was 'int')}}
+
+  i = __builtin_elementwise_tan(f, f);
+  // expected-error at -1 {{too many arguments to function call, expected 1, have 2}}
+
+  u = __builtin_elementwise_tan(u);
+  // expected-error at -1 {{1st argument must be a floating point type (was 'unsigned int')}}
+
+  uv = __builtin_elementwise_tan(uv);
+  // expected-error at -1 {{1st argument must be a floating point type (was 'unsigned4' (vector of 4 'unsigned int' values))}}
+}
+
 void test_builtin_elementwise_trunc(int i, float f, double d, float4 v, int3 iv, unsigned u, unsigned4 uv) {
 
   struct Foo s = __builtin_elementwise_trunc(f);
diff --git a/clang/test/Sema/riscv-rvv-vector-trig-ops.c b/clang/test/Sema/riscv-rvv-vector-trig-ops.c
index a457e484860602..ee4c596c431841 100644
--- a/clang/test/Sema/riscv-rvv-vector-trig-ops.c
+++ b/clang/test/Sema/riscv-rvv-vector-trig-ops.c
@@ -17,3 +17,9 @@ vfloat32mf2_t test_cos_vv_i8mf8(vfloat32mf2_t v) {
   return __builtin_elementwise_cos(v);
   // expected-error at -1 {{1st argument must be a vector, integer or floating point type}}
 }
+
+vfloat32mf2_t test_tan_vv_i8mf8(vfloat32mf2_t v) {
+
+  return __builtin_elementwise_tan(v);
+  // expected-error at -1 {{1st argument must be a vector, integer or floating point type}}
+}
diff --git a/clang/test/SemaCXX/builtins-elementwise-math.cpp b/clang/test/SemaCXX/builtins-elementwise-math.cpp
index 44a44ab055e997..499f2795ddb272 100644
--- a/clang/test/SemaCXX/builtins-elementwise-math.cpp
+++ b/clang/test/SemaCXX/builtins-elementwise-math.cpp
@@ -111,6 +111,13 @@ void test_builtin_elementwise_sin() {
   static_assert(!is_const<decltype(__builtin_elementwise_sin(b))>::value);
 }
 
+void test_builtin_elementwise_tan() {
+  const float a = 42.0;
+  float b = 42.3;
+  static_assert(!is_const<decltype(__builtin_elementwise_tan(a))>::value);
+  static_assert(!is_const<decltype(__builtin_elementwise_tan(b))>::value);
+}
+
 void test_builtin_elementwise_sqrt() {
   const float a = 42.0;
   float b = 42.3;
diff --git a/llvm/docs/GlobalISel/GenericOpcode.rst b/llvm/docs/GlobalISel/GenericOpcode.rst
index 492d30280f4776..462bda522145e3 100644
--- a/llvm/docs/GlobalISel/GenericOpcode.rst
+++ b/llvm/docs/GlobalISel/GenericOpcode.rst
@@ -592,7 +592,7 @@ G_FLOG, G_FLOG2, G_FLOG10
 
 Calculate the base-e, base-2, or base-10 respectively.
 
-G_FCEIL, G_FCOS, G_FSIN, G_FSQRT, G_FFLOOR, G_FRINT, G_FNEARBYINT
+G_FCEIL, G_FCOS, G_FSIN, G_FTAN, G_FSQRT, G_FFLOOR, G_FRINT, G_FNEARBYINT
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 These correspond to the standard C functions of the same name.
diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index f169ab941c457b..144733202f9e0e 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -15270,6 +15270,43 @@ trapping or setting ``errno``.
 When specified with the fast-math-flag 'afn', the result may be approximated
 using a less accurate calculation.
 
+'``llvm.tan.*``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+This is an overloaded intrinsic. You can use ``llvm.tan`` on any
+floating-point or vector of floating-point type. Not all targets support
+all types however.
+
+::
+
+      declare float     @llvm.tan.f32(float  %Val)
+      declare double    @llvm.tan.f64(double %Val)
+      declare x86_fp80  @llvm.tan.f80(x86_fp80  %Val)
+      declare fp128     @llvm.tan.f128(fp128 %Val)
+      declare ppc_fp128 @llvm.tan.ppcf128(ppc_fp128  %Val)
+
+Overview:
+"""""""""
+
+The '``llvm.tan.*``' intrinsics return the tangent of the operand.
+
+Arguments:
+""""""""""
+
+The argument and return value are floating-point numbers of the same type.
+
+Semantics:
+""""""""""
+
+Return the same value as a corresponding libm '``tan``' function but without
+trapping or setting ``errno``.
+
+When specified with the fast-math-flag 'afn', the result may be approximated
+using a less accurate calculation.
+
 '``llvm.pow.*``' Intrinsic
 ^^^^^^^^^^^^^^^^^^^^^^^^^^
 
diff --git a/llvm/include/llvm/Analysis/VecFuncs.def b/llvm/include/llvm/Analysis/VecFuncs.def
index 10f1333cf8885c..a8cb3c13e0b6e5 100644
--- a/llvm/include/llvm/Analysis/VecFuncs.def
+++ b/llvm/include/llvm/Analysis/VecFuncs.def
@@ -54,6 +54,7 @@ TLI_DEFINE_VECFUNC("llvm.sin.f32", "vsinf", FIXED(4), "_ZGV_LLVM_N4v")
 TLI_DEFINE_VECFUNC("cosf", "vcosf", FIXED(4), "_ZGV_LLVM_N4v")
 TLI_DEFINE_VECFUNC("llvm.cos.f32", "vcosf", FIXED(4), "_ZGV_LLVM_N4v")
 TLI_DEFINE_VECFUNC("tanf", "vtanf", FIXED(4), "_ZGV_LLVM_N4v")
+TLI_DEFINE_VECFUNC("llvm.tan.f32", "vtanf", FIXED(4))
 TLI_DEFINE_VECFUNC("asinf", "vasinf", FIXED(4), "_ZGV_LLVM_N4v")
 TLI_DEFINE_VECFUNC("acosf", "vacosf", FIXED(4), "_ZGV_LLVM_N4v")
 TLI_DEFINE_VECFUNC("atanf", "vatanf", FIXED(4), "_ZGV_LLVM_N4v")
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index 4a3a03dc5ad488..740d6ef2b9f016 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -1924,6 +1924,9 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
     case Intrinsic::cos:
       ISD = ISD::FCOS;
       break;
+    case Intrinsic::tan:
+      ISD = ISD::FTAN;
+      break;
     case Intrinsic::exp:
       ISD = ISD::FEXP;
       break;
diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h
index 6429947958ee91..c38c83f5b80dfa 100644
--- a/llvm/include/llvm/CodeGen/ISDOpcodes.h
+++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h
@@ -415,6 +415,7 @@ enum NodeType {
   STRICT_FLDEXP,
   STRICT_FSIN,
   STRICT_FCOS,
+  STRICT_FTAN,
   STRICT_FEXP,
   STRICT_FEXP2,
   STRICT_FLOG,
@@ -934,6 +935,7 @@ enum NodeType {
   FCBRT,
   FSIN,
   FCOS,
+  FTAN,
   FPOW,
   FPOWI,
   /// FLDEXP - ldexp, inspired by libm (op0 * 2**op1).
diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
index a14e9dedef8c9e..bf0ca1687a34d4 100644
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -1025,6 +1025,7 @@ let IntrProperties = [IntrNoMem, IntrSpeculatable, IntrWillReturn] in {
   def int_powi : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, llvm_anyint_ty]>;
   def int_sin  : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
   def int_cos  : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
+  def int_tan  : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
   def int_pow  : DefaultAttrsIntrinsic<[llvm_anyfloat_ty],
                            [LLVMMatchType<0>, LLVMMatchType<0>]>;
   def int_log  : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.def b/llvm/include/llvm/IR/RuntimeLibcalls.def
index 5e082769fa974c..f4f93bf7ba24c5 100644
--- a/llvm/include/llvm/IR/RuntimeLibcalls.def
+++ b/llvm/include/llvm/IR/RuntimeLibcalls.def
@@ -197,6 +197,11 @@ HANDLE_LIBCALL(COS_F64, "cos")
 HANDLE_LIBCALL(COS_F80, "cosl")
 HANDLE_LIBCALL(COS_F128, "cosl")
 HANDLE_LIBCALL(COS_PPCF128, "cosl")
+HANDLE_LIBCALL(TAN_F32, "tanf")
+HANDLE_LIBCALL(TAN_F64, "tan")
+HANDLE_LIBCALL(TAN_F80, "tanl")
+HANDLE_LIBCALL(TAN_F128,"tanl")
+HANDLE_LIBCALL(TAN_PPCF128, "tanl")
 HANDLE_LIBCALL(SINCOS_F32, nullptr)
 HANDLE_LIBCALL(SINCOS_F64, nullptr)
 HANDLE_LIBCALL(SINCOS_F80, nullptr)
diff --git a/llvm/include/llvm/Support/TargetOpcodes.def b/llvm/include/llvm/Support/TargetOpcodes.def
index cb98f96af522f7..559a588c251482 100644
--- a/llvm/include/llvm/Support/TargetOpcodes.def
+++ b/llvm/include/llvm/Support/TargetOpcodes.def
@@ -781,6 +781,9 @@ HANDLE_TARGET_OPCODE(G_FCOS)
 /// Floating point sine.
 HANDLE_TARGET_OPCODE(G_FSIN)
 
+/// Floating point Tangent.
+HANDLE_TARGET_OPCODE(G_FTAN)
+
 /// Floating point square root.
 HANDLE_TARGET_OPCODE(G_FSQRT)
 
diff --git a/llvm/include/llvm/Target/GenericOpcodes.td b/llvm/include/llvm/Target/GenericOpcodes.td
index 8380d2738d164b..c40498e5542154 100644
--- a/llvm/include/llvm/Target/GenericOpcodes.td
+++ b/llvm/include/llvm/Target/GenericOpcodes.td
@@ -988,6 +988,13 @@ def G_FSIN : GenericInstruction {
   let hasSideEffects = false;
 }
 
+// Floating point tangent of a value.
+def G_FTAN : GenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$src1);
+  let hasSideEffects = false;
+}
+
 // Floating point square root of a value.
 // This returns NaN for negative nonzero values.
 // NOTE: Unlike libm sqrt(), this never sets errno. In all other respects it's
diff --git a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
index 8fa0e4b86d6dc9..560d3b434d07d5 100644
--- a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
+++ b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
@@ -148,6 +148,7 @@ def : GINodeEquiv<G_BUILD_VECTOR, build_vector>;
 def : GINodeEquiv<G_FCEIL, fceil>;
 def : GINodeEquiv<G_FCOS, fcos>;
 def : GINodeEquiv<G_FSIN, fsin>;
+def : GINodeEquiv<G_FTAN, ftan>;
 def : GINodeEquiv<G_FABS, fabs>;
 def : GINodeEquiv<G_FSQRT, fsqrt>;
 def : GINodeEquiv<G_FFLOOR, ffloor>;
diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td
index 1684b424e3b442..bf2413c09e4c9a 100644
--- a/llvm/include/llvm/Target/TargetSelectionDAG.td
+++ b/llvm/include/llvm/Target/TargetSelectionDAG.td
@@ -509,6 +509,7 @@ def fneg       : SDNode<"ISD::FNEG"       , SDTFPUnaryOp>;
 def fsqrt      : SDNode<"ISD::FSQRT"      , SDTFPUnaryOp>;
 def fsin       : SDNode<"ISD::FSIN"       , SDTFPUnaryOp>;
 def fcos       : SDNode<"ISD::FCOS"       , SDTFPUnaryOp>;
+def ftan       : SDNode<"ISD::FTAN"       , SDTFPUnaryOp>;
 def fexp2      : SDNode<"ISD::FEXP2"      , SDTFPUnaryOp>;
 def fexp10     : SDNode<"ISD::FEXP10"     , SDTFPUnaryOp>;
 def fpow       : SDNode<"ISD::FPOW"       , SDTFPBinOp>;
@@ -562,6 +563,8 @@ def strict_fsin       : SDNode<"ISD::STRICT_FSIN",
                                SDTFPUnaryOp, [SDNPHasChain]>;
 def strict_fcos       : SDNode<"ISD::STRICT_FCOS",
                                SDTFPUnaryOp, [SDNPHasChain]>;
+def strict_ftan       : SDNode<"ISD::STRICT_FTAN",
+                               SDTFPUnaryOp, [SDNPHasChain]>;
 def strict_fexp2      : SDNode<"ISD::STRICT_FEXP2",
                                SDTFPUnaryOp, [SDNPHasChain]>;
 def strict_fpow       : SDNode<"ISD::STRICT_FPOW",
@@ -1514,6 +1517,9 @@ def any_fsin       : PatFrags<(ops node:$src),
 def any_fcos       : PatFrags<(ops node:$src),
                               [(strict_fcos node:$src),
                                (fcos node:$src)]>;
+def any_ftan       : PatFrags<(ops node:$src),
+                              [(strict_ftan node:$src),
+                               (ftan node:$src)]>;
 def any_fexp2      : PatFrags<(ops node:$src),
                               [(strict_fexp2 node:$src),
                                (fexp2 node:$src)]>;
diff --git a/llvm/lib/Analysis/VectorUtils.cpp b/llvm/lib/Analysis/VectorUtils.cpp
index 917094267d05ae..30728ed5875090 100644
--- a/llvm/lib/Analysis/VectorUtils.cpp
+++ b/llvm/lib/Analysis/VectorUtils.cpp
@@ -68,6 +68,7 @@ bool llvm::isTriviallyVectorizable(Intrinsic::ID ID) {
   case Intrinsic::sqrt: // Begin floating-point.
   case Intrinsic::sin:
   case Intrinsic::cos:
+  case Intrinsic::tan:
   case Intrinsic::exp:
   case Intrinsic::exp2:
   case Intrinsic::log:
diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index d7b0c9aa166735..3dadab8501c900 100644
--- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -1945,6 +1945,8 @@ unsigned IRTranslator::getSimpleIntrinsicOpcode(Intrinsic::ID ID) {
       return TargetOpcode::G_FSIN;
     case Intrinsic::sqrt:
       return TargetOpcode::G_FSQRT;
+    case Intrinsic::tan:
+      return TargetOpcode::G_FTAN;
     case Intrinsic::trunc:
       return TargetOpcode::G_INTRINSIC_TRUNC;
     case Intrinsic::readcyclecounter:
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 6a76ad7f5db749..ca2fb24b5b85c4 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -448,6 +448,8 @@ static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
     RTLIBCASE(SIN_F);
   case TargetOpcode::G_FCOS:
     RTLIBCASE(COS_F);
+  case TargetOpcode::G_FTAN:
+    RTLIBCASE(TAN_F);
   case TargetOpcode::G_FLOG10:
     RTLIBCASE(LOG10_F);
   case TargetOpcode::G_FLOG:
@@ -1036,6 +1038,7 @@ LegalizerHelper::libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver) {
   case TargetOpcode::G_FREM:
   case TargetOpcode::G_FCOS:
   case TargetOpcode::G_FSIN:
+  case TargetOpcode::G_FTAN:
   case TargetOpcode::G_FLOG10:
   case TargetOpcode::G_FLOG:
   case TargetOpcode::G_FLOG2:
@@ -2890,6 +2893,7 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
   case TargetOpcode::G_FFLOOR:
   case TargetOpcode::G_FCOS:
   case TargetOpcode::G_FSIN:
+  case TargetOpcode::G_FTAN:
   case TargetOpcode::G_FLOG10:
   case TargetOpcode::G_FLOG:
   case TargetOpcode::G_FLOG2:
@@ -4656,6 +4660,7 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
   case G_INTRINSIC_TRUNC:
   case G_FCOS:
   case G_FSIN:
+  case G_FTAN:
   case G_FSQRT:
   case G_BSWAP:
   case G_BITREVERSE:
diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index ae43e9ccf6112d..b314970caec07d 100644
--- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -819,6 +819,7 @@ bool llvm::isKnownNeverNaN(Register Val, const MachineRegisterInfo &MRI,
   case TargetOpcode::G_FREM:
   case TargetOpcode::G_FSIN:
   case TargetOpcode::G_FCOS:
+  case TargetOpcode::G_FTAN:
   case TargetOpcode::G_FMA:
   case TargetOpcode::G_FMAD:
     if (SNaN)
@@ -1699,6 +1700,7 @@ bool llvm::isPreISelGenericFloatingPointOpcode(unsigned Opc) {
   case TargetOpcode::G_FREM:
   case TargetOpcode::G_FRINT:
   case TargetOpcode::G_FSIN:
+  case TargetOpcode::G_FTAN:
   case TargetOpcode::G_FSQRT:
   case TargetOpcode::G_FSUB:
   case TargetOpcode::G_INTRINSIC_ROUND:
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index c381870ae5f41b..61a07771789f00 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -4423,6 +4423,12 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
                     RTLIB::COS_F80, RTLIB::COS_F128,
                     RTLIB::COS_PPCF128, Results);
     break;
+  case ISD::FTAN:
+  case ISD::STRICT_FTAN:
+    ExpandFPLibCall(Node, RTLIB::TAN_F32, RTLIB::TAN_F64,
+                    RTLIB::TAN_F80, RTLIB::TAN_F128,
+                    RTLIB::TAN_PPCF128, Results);
+    break;
   case ISD::FSINCOS:
     // Expand into sincos libcall.
     ExpandSinCosLibCall(Node, Results);
@@ -5367,6 +5373,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
   case ISD::FSQRT:
   case ISD::FSIN:
   case ISD::FCOS:
+  case ISD::FTAN:
   case ISD::FLOG:
   case ISD::FLOG2:
   case ISD::FLOG10:
@@ -5391,6 +5398,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
   case ISD::STRICT_FSQRT:
   case ISD::STRICT_FSIN:
   case ISD::STRICT_FCOS:
+  case ISD::STRICT_FTAN:
   case ISD::STRICT_FLOG:
   case ISD::STRICT_FLOG2:
   case ISD::STRICT_FLOG10:
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index abe5be76382556..6f338584739f4b 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -131,6 +131,8 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
     case ISD::FSQRT:       R = SoftenFloatRes_FSQRT(N); break;
     case ISD::STRICT_FSUB:
     case ISD::FSUB:        R = SoftenFloatRes_FSUB(N); break;
+    case ISD::STRICT_FTAN:
+    case ISD::FTAN:        R = SoftenFloatRes_FTAN(N); break;
     case ISD::STRICT_FTRUNC:
     case ISD::FTRUNC:      R = SoftenFloatRes_FTRUNC(N); break;
     case ISD::LOAD:        R = SoftenFloatRes_LOAD(N); break;
@@ -773,6 +775,15 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FSUB(SDNode *N) {
                                                RTLIB::SUB_PPCF128));
 }
 
+SDValue DAGTypeLegalizer::SoftenFloatRes_FTAN(SDNode *N) {
+  return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+                                              RTLIB::TAN_F32,
+                                              RTLIB::TAN_F64,
+                                              RTLIB::TAN_F80,
+                                              RTLIB::TAN_F128,
+                                              RTLIB::TAN_PPCF128));
+}
+
 SDValue DAGTypeLegalizer::SoftenFloatRes_FTRUNC(SDNode *N) {
   return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
                                               RTLIB::TRUNC_F32,
@@ -1361,6 +1372,8 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) {
   case ISD::FSQRT:      ExpandFloatRes_FSQRT(N, Lo, Hi); break;
   case ISD::STRICT_FSUB:
   case ISD::FSUB:       ExpandFloatRes_FSUB(N, Lo, Hi); break;
+  case ISD::STRICT_FTAN:
+  case ISD::FTAN:       ExpandFloatRes_FTAN(N, Lo, Hi); break;
   case ISD::STRICT_FTRUNC:
   case ISD::FTRUNC:     ExpandFloatRes_FTRUNC(N, Lo, Hi); break;
   case ISD::LOAD:       ExpandFloatRes_LOAD(N, Lo, Hi); break;
@@ -1730,6 +1743,14 @@ void DAGTypeLegalizer::ExpandFloatRes_FSUB(SDNode *N, SDValue &Lo,
                                         RTLIB::SUB_PPCF128), Lo, Hi);
 }
 
+void DAGTypeLegalizer::ExpandFloatRes_FTAN(SDNode *N,
+                                           SDValue &Lo, SDValue &Hi) {
+  ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+                                       RTLIB::TAN_F32, RTLIB::TAN_F64,
+                                       RTLIB::TAN_F80, RTLIB::TAN_F128,
+                                       RTLIB::TAN_PPCF128), Lo, Hi);
+}
+
 void DAGTypeLegalizer::ExpandFloatRes_FTRUNC(SDNode *N,
                                              SDValue &Lo, SDValue &Hi) {
   ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
@@ -2423,6 +2444,7 @@ void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) {
     case ISD::FSIN:
     case ISD::FSQRT:
     case ISD::FTRUNC:
+    case ISD::FTAN:
     case ISD::FCANONICALIZE: R = PromoteFloatRes_UnaryOp(N); break;
 
     // Binary FP Operations
@@ -2854,6 +2876,7 @@ void DAGTypeLegalizer::SoftPromoteHalfResult(SDNode *N, unsigned ResNo) {
   case ISD::FSIN:
   case ISD::FSQRT:
   case ISD::FTRUNC:
+  case ISD::FTAN:
   case ISD::FCANONICALIZE: R = SoftPromoteHalfRes_UnaryOp(N); break;
 
   // Binary FP Operations
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 4a2c7b355eb528..bd6edf75a91143 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -566,6 +566,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
   SDValue SoftenFloatRes_FSIN(SDNode *N);
   SDValue SoftenFloatRes_FSQRT(SDNode *N);
   SDValue SoftenFloatRes_FSUB(SDNode *N);
+  SDValue SoftenFloatRes_FTAN(SDNode *N);
   SDValue SoftenFloatRes_FTRUNC(SDNode *N);
   SDValue SoftenFloatRes_LOAD(SDNode *N);
   SDValue SoftenFloatRes_SELECT(SDNode *N);
@@ -645,6 +646,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
   void ExpandFloatRes_FSIN      (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandFloatRes_FSQRT     (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandFloatRes_FSUB      (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandFloatRes_FTAN      (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandFloatRes_FTRUNC    (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandFloatRes_LOAD      (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo, SDValue &Hi);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 1de43a4f60e3a2..c5ee042a0cf53b 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -402,6 +402,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
   case ISD::FSQRT:
   case ISD::FSIN:
   case ISD::FCOS:
+  case ISD::FTAN:
   case ISD::FLDEXP:
   case ISD::FPOWI:
   case ISD::FPOW:
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 985c9f16ab97cd..77ccaefc4cb268 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -107,6 +107,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
   case ISD::FROUNDEVEN:
   case ISD::FSIN:
   case ISD::FSQRT:
+  case ISD::FTAN:
   case ISD::FTRUNC:
   case ISD::SIGN_EXTEND:
   case ISD::SINT_TO_FP:
@@ -1111,6 +1112,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
   case ISD::VP_FROUNDEVEN:
   case ISD::FSIN:
   case ISD::FSQRT: case ISD::VP_SQRT:
+  case ISD::FTAN:
   case ISD::FTRUNC:
   case ISD::VP_FROUNDTOZERO:
   case ISD::SINT_TO_FP:
@@ -4308,6 +4310,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
   case ISD::FROUNDEVEN:
   case ISD::FSIN:
   case ISD::FSQRT:
+  case ISD::FTAN:
   case ISD::FTRUNC:
     if (unrollExpandedOp())
       break;
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index dde10fd4b8c8af..be829e000d73f8 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -5255,6 +5255,7 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op, bool SNaN, unsigned Depth) const
   case ISD::FREM:
   case ISD::FSIN:
   case ISD::FCOS:
+  case ISD::FTAN:
   case ISD::FMA:
   case ISD::FMAD: {
     if (SNaN)
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 0db484a5e06bcd..46bd8e54108d69 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -6690,6 +6690,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
   case Intrinsic::fabs:
   case Intrinsic::sin:
   case Intrinsic::cos:
+  case Intrinsic::tan:
   case Intrinsic::exp10:
   case Intrinsic::floor:
   case Intrinsic::ceil:
@@ -6706,6 +6707,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
     case Intrinsic::fabs:      Opcode = ISD::FABS;       break;
     case Intrinsic::sin:       Opcode = ISD::FSIN;       break;
     case Intrinsic::cos:       Opcode = ISD::FCOS;       break;
+    case Intrinsic::tan:       Opcode = ISD::FTAN;       break;
     case Intrinsic::exp10:     Opcode = ISD::FEXP10;     break;
     case Intrinsic::floor:     Opcode = ISD::FFLOOR;     break;
     case Intrinsic::ceil:      Opcode = ISD::FCEIL;      break;
@@ -9081,6 +9083,12 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
         if (visitUnaryFloatCall(I, ISD::FCOS))
           return;
         break;
+      case LibFunc_tan:
+      case LibFunc_tanf:
+      case LibFunc_tanl:
+        if (visitUnaryFloatCall(I, ISD::FTAN))
+          return;
+        break;
       case LibFunc_sqrt:
       case LibFunc_sqrtf:
       case LibFunc_sqrtl:
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index 4ad4a938ca97f2..7ead23537cb691 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -210,6 +210,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
   case ISD::FCOS:                       return "fcos";
   case ISD::STRICT_FCOS:                return "strict_fcos";
   case ISD::FSINCOS:                    return "fsincos";
+  case ISD::FTAN:                       return "ftan";
+  case ISD::STRICT_FTAN:                return "strict_ftan";
   case ISD::FTRUNC:                     return "ftrunc";
   case ISD::STRICT_FTRUNC:              return "strict_ftrunc";
   case ISD::FFLOOR:                     return "ffloor";
diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp
index 6e7b67ded23c84..29a9f2e1a26420 100644
--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -141,6 +141,7 @@ void TargetLoweringBase::InitLibcalls(const Triple &TT) {
     setLibcallName(RTLIB::EXP10_F128, "exp10f128");
     setLibcallName(RTLIB::SIN_F128, "sinf128");
     setLibcallName(RTLIB::COS_F128, "cosf128");
+    setLibcallName(RTLIB::TAN_F128, "tanf128");
     setLibcallName(RTLIB::SINCOS_F128, "sincosf128");
     setLibcallName(RTLIB::POW_F128, "powf128");
     setLibcallName(RTLIB::POW_FINITE_F128, "__powf128_finite");
@@ -987,7 +988,7 @@ void TargetLoweringBase::initActions() {
   setOperationAction({ISD::FCBRT, ISD::FLOG, ISD::FLOG2, ISD::FLOG10, ISD::FEXP,
                       ISD::FEXP2, ISD::FEXP10, ISD::FFLOOR, ISD::FNEARBYINT,
                       ISD::FCEIL, ISD::FRINT, ISD::FTRUNC, ISD::LROUND,
-                      ISD::LLROUND, ISD::LRINT, ISD::LLRINT, ISD::FROUNDEVEN},
+                      ISD::LLROUND, ISD::LRINT, ISD::LLRINT, ISD::FROUNDEVEN, ISD::FTAN},
                      {MVT::f32, MVT::f64, MVT::f128}, Expand);
 
   // Default ISD::TRAP to expand (which turns it into abort).
diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
index 9994a966c82c31..2051cdc7e01ff8 100644
--- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
@@ -467,6 +467,8 @@ bool SPIRVInstructionSelector::spvSelect(Register ResVReg,
     return selectExtInst(ResVReg, ResType, I, CL::cos, GL::Cos);
   case TargetOpcode::G_FSIN:
     return selectExtInst(ResVReg, ResType, I, CL::sin, GL::Sin);
+  case TargetOpcode::G_FTAN:
+    return selectExtInst(ResVReg, ResType, I, CL::tan, GL::Tan);
 
   case TargetOpcode::G_FSQRT:
     return selectExtInst(ResVReg, ResType, I, CL::sqrt, GL::Sqrt);
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index f0cec6224e84e4..a01c7602af76e0 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -598,6 +598,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     setOperationAction(ISD::FSIN, VT, Action);
     setOperationAction(ISD::FCOS, VT, Action);
     setOperationAction(ISD::FSINCOS, VT, Action);
+    setOperationAction(ISD::FTAN, VT, Action);
     setOperationAction(ISD::FSQRT, VT, Action);
     setOperationAction(ISD::FPOW, VT, Action);
     setOperationAction(ISD::FLOG, VT, Action);

>From c04b4f455fa60dda0f1c5d543a38070a345c9b88 Mon Sep 17 00:00:00 2001
From: Farzon Lotfi <farzonlotfi at microsoft.com>
Date: Wed, 24 Apr 2024 23:28:06 -0400
Subject: [PATCH 2/7] copy over X86ISelLowering and x86 test cases from
 @junaire's https://reviews.llvm.org/D146905

---
 llvm/include/llvm/Analysis/VecFuncs.def |   2 +-
 llvm/lib/Target/X86/X86ISelLowering.cpp |  11 +-
 llvm/test/CodeGen/X86/llvm.tan.ll       |  60 +++++++
 llvm/test/CodeGen/X86/vec-libcalls.ll   | 202 ++++++++++++++++++++++++
 4 files changed, 273 insertions(+), 2 deletions(-)
 create mode 100644 llvm/test/CodeGen/X86/llvm.tan.ll

diff --git a/llvm/include/llvm/Analysis/VecFuncs.def b/llvm/include/llvm/Analysis/VecFuncs.def
index a8cb3c13e0b6e5..de94621791f54f 100644
--- a/llvm/include/llvm/Analysis/VecFuncs.def
+++ b/llvm/include/llvm/Analysis/VecFuncs.def
@@ -54,7 +54,7 @@ TLI_DEFINE_VECFUNC("llvm.sin.f32", "vsinf", FIXED(4), "_ZGV_LLVM_N4v")
 TLI_DEFINE_VECFUNC("cosf", "vcosf", FIXED(4), "_ZGV_LLVM_N4v")
 TLI_DEFINE_VECFUNC("llvm.cos.f32", "vcosf", FIXED(4), "_ZGV_LLVM_N4v")
 TLI_DEFINE_VECFUNC("tanf", "vtanf", FIXED(4), "_ZGV_LLVM_N4v")
-TLI_DEFINE_VECFUNC("llvm.tan.f32", "vtanf", FIXED(4))
+TLI_DEFINE_VECFUNC("llvm.tan.f32", "vtanf", FIXED(4), "_ZGV_LLVM_N4v")
 TLI_DEFINE_VECFUNC("asinf", "vasinf", FIXED(4), "_ZGV_LLVM_N4v")
 TLI_DEFINE_VECFUNC("acosf", "vacosf", FIXED(4), "_ZGV_LLVM_N4v")
 TLI_DEFINE_VECFUNC("atanf", "vatanf", FIXED(4), "_ZGV_LLVM_N4v")
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index a01c7602af76e0..f41c6ccf22b4f5 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -655,6 +655,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
       setOperationAction(ISD::FSIN   , VT, Expand);
       setOperationAction(ISD::FCOS   , VT, Expand);
       setOperationAction(ISD::FSINCOS, VT, Expand);
+      setOperationAction(ISD::FTAN   , VT, Expand);
     }
 
     // Half type will be promoted by default.
@@ -730,12 +731,14 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     setOperationAction(ISD::FSIN   , MVT::f32, Expand);
     setOperationAction(ISD::FCOS   , MVT::f32, Expand);
     setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
+    setOperationAction(ISD::FTAN   , MVT::f32, Expand);
 
     if (UseX87) {
       // Always expand sin/cos functions even though x87 has an instruction.
       setOperationAction(ISD::FSIN, MVT::f64, Expand);
       setOperationAction(ISD::FCOS, MVT::f64, Expand);
       setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
+      setOperationAction(ISD::FTAN, MVT::f64, Expand);
     }
   } else if (UseX87) {
     // f32 and f64 in x87.
@@ -751,6 +754,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
       setOperationAction(ISD::FSIN   , VT, Expand);
       setOperationAction(ISD::FCOS   , VT, Expand);
       setOperationAction(ISD::FSINCOS, VT, Expand);
+      setOperationAction(ISD::FTAN   , VT, Expand);
     }
   }
 
@@ -820,6 +824,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     setOperationAction(ISD::FSIN   , MVT::f80, Expand);
     setOperationAction(ISD::FCOS   , MVT::f80, Expand);
     setOperationAction(ISD::FSINCOS, MVT::f80, Expand);
+    setOperationAction(ISD::FTAN   , MVT::f80, Expand);
 
     setOperationAction(ISD::FFLOOR, MVT::f80, Expand);
     setOperationAction(ISD::FCEIL,  MVT::f80, Expand);
@@ -877,6 +882,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     setOperationAction(ISD::FCOS,         MVT::f128, LibCall);
     setOperationAction(ISD::STRICT_FCOS,  MVT::f128, LibCall);
     setOperationAction(ISD::FSINCOS,      MVT::f128, LibCall);
+    setOperationAction(ISD::FTAN,         MVT::f128, LibCall);
+    setOperationAction(ISD::STRICT_FTAN,  MVT::f128, LibCall);
     // No STRICT_FSINCOS
     setOperationAction(ISD::FSQRT,        MVT::f128, LibCall);
     setOperationAction(ISD::STRICT_FSQRT, MVT::f128, LibCall);
@@ -931,6 +938,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     setOperationAction(ISD::FSIN,      VT, Expand);
     setOperationAction(ISD::FSINCOS,   VT, Expand);
     setOperationAction(ISD::FCOS,      VT, Expand);
+    setOperationAction(ISD::FTAN,      VT, Expand);
     setOperationAction(ISD::FREM,      VT, Expand);
     setOperationAction(ISD::FCOPYSIGN, VT, Expand);
     setOperationAction(ISD::FPOW,      VT, Expand);
@@ -2458,7 +2466,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
           ISD::FLOG,   ISD::STRICT_FLOG,
           ISD::FLOG10, ISD::STRICT_FLOG10,
           ISD::FPOW,   ISD::STRICT_FPOW,
-          ISD::FSIN,   ISD::STRICT_FSIN})
+          ISD::FSIN,   ISD::STRICT_FSIN,
+          ISD::FTAN,   ISD::STRICT_FTAN})
       if (isOperationExpand(Op, MVT::f32))
         setOperationAction(Op, MVT::f32, Promote);
 
diff --git a/llvm/test/CodeGen/X86/llvm.tan.ll b/llvm/test/CodeGen/X86/llvm.tan.ll
new file mode 100644
index 00000000000000..7aec95d19dbf08
--- /dev/null
+++ b/llvm/test/CodeGen/X86/llvm.tan.ll
@@ -0,0 +1,60 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
+
+define half @use_tanf16(half %a) {
+; CHECK-LABEL: use_tanf16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    pushq %rax
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    callq __extendhfsf2 at PLT
+; CHECK-NEXT:    callq tanf at PLT
+; CHECK-NEXT:    callq __truncsfhf2 at PLT
+; CHECK-NEXT:    popq %rax
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-NEXT:    retq
+  %x = call half @llvm.tan.f16(half %a)
+  ret half %x
+}
+
+define float @use_tanf32(float %a) {
+; CHECK-LABEL: use_tanf32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    jmp tanf at PLT # TAILCALL
+  %x = call float @llvm.tan.f32(float %a)
+  ret float %x
+}
+
+define double @use_tanf64(double %a) {
+; CHECK-LABEL: use_tanf64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    jmp tan at PLT # TAILCALL
+  %x = call double @llvm.tan.f64(double %a)
+  ret double %x
+}
+
+define fp128 @use_tanfp128(fp128 %a) {
+; CHECK-LABEL: use_tanfp128:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    jmp tanl at PLT # TAILCALL
+  %x = call fp128 @llvm.tan.f128(fp128 %a)
+  ret fp128 %x
+}
+
+define ppc_fp128 @use_tanppc_fp128(ppc_fp128 %a) {
+; CHECK-LABEL: use_tanppc_fp128:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    pushq %rax
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    callq tanl at PLT
+; CHECK-NEXT:    popq %rax
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-NEXT:    retq
+  %x = call ppc_fp128 @llvm.tan.ppcf128(ppc_fp128 %a)
+  ret ppc_fp128 %x
+}
+
+declare half @llvm.tan.f16(half)
+declare float @llvm.tan.f32(float)
+declare double @llvm.tan.f64(double)
+declare fp128 @llvm.tan.f128(fp128)
+declare ppc_fp128 @llvm.tan.ppcf128(ppc_fp128)
diff --git a/llvm/test/CodeGen/X86/vec-libcalls.ll b/llvm/test/CodeGen/X86/vec-libcalls.ll
index 3a1315446d7a2c..6857101d3d75bb 100644
--- a/llvm/test/CodeGen/X86/vec-libcalls.ll
+++ b/llvm/test/CodeGen/X86/vec-libcalls.ll
@@ -17,6 +17,14 @@ declare <5 x float> @llvm.sin.v5f32(<5 x float>)
 declare <6 x float> @llvm.sin.v6f32(<6 x float>)
 declare <3 x double> @llvm.sin.v3f64(<3 x double>)
 
+declare <1 x float> @llvm.tan.v1f32(<1 x float>)
+declare <2 x float> @llvm.tan.v2f32(<2 x float>)
+declare <3 x float> @llvm.tan.v3f32(<3 x float>)
+declare <4 x float> @llvm.tan.v4f32(<4 x float>)
+declare <5 x float> @llvm.tan.v5f32(<5 x float>)
+declare <6 x float> @llvm.tan.v6f32(<6 x float>)
+declare <3 x double> @llvm.tan.v3f64(<3 x double>)
+
 ; Verify that all of the potential libcall candidates are handled.
 ; Some of these have custom lowering, so those cases won't have
 ; libcalls.
@@ -230,6 +238,200 @@ define <3 x double> @sin_v3f64(<3 x double> %x) nounwind {
   ret <3 x double> %r
 }
 
+define <1 x float> @tan_v1f32(<1 x float> %x) nounwind {
+; CHECK-LABEL: tan_v1f32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    pushq %rax
+; CHECK-NEXT:    callq tanf at PLT
+; CHECK-NEXT:    popq %rax
+; CHECK-NEXT:    retq
+  %r = call <1 x float> @llvm.tan.v1f32(<1 x float> %x)
+  ret <1 x float> %r
+}
+
+define <2 x float> @tan_v2f32(<2 x float> %x) nounwind {
+; CHECK-LABEL: tan_v2f32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    subq $40, %rsp
+; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT:    callq tanf at PLT
+; CHECK-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT:    vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT:    # xmm0 = mem[1,1,3,3]
+; CHECK-NEXT:    callq tanf at PLT
+; CHECK-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
+; CHECK-NEXT:    addq $40, %rsp
+; CHECK-NEXT:    retq
+  %r = call <2 x float> @llvm.tan.v2f32(<2 x float> %x)
+  ret <2 x float> %r
+}
+
+define <3 x float> @tan_v3f32(<3 x float> %x) nounwind {
+; CHECK-LABEL: tan_v3f32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    subq $40, %rsp
+; CHECK-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT:    callq tanf at PLT
+; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT:    vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT:    # xmm0 = mem[1,1,3,3]
+; CHECK-NEXT:    callq tanf at PLT
+; CHECK-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
+; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
+; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT:    vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT:    # xmm0 = mem[1,0]
+; CHECK-NEXT:    callq tanf at PLT
+; CHECK-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
+; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
+; CHECK-NEXT:    addq $40, %rsp
+; CHECK-NEXT:    retq
+  %r = call <3 x float> @llvm.tan.v3f32(<3 x float> %x)
+  ret <3 x float> %r
+}
+
+define <4 x float> @tan_v4f32(<4 x float> %x) nounwind {
+; CHECK-LABEL: tan_v4f32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    subq $40, %rsp
+; CHECK-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT:    callq tanf at PLT
+; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT:    vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT:    # xmm0 = mem[1,1,3,3]
+; CHECK-NEXT:    callq tanf at PLT
+; CHECK-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
+; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
+; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT:    vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT:    # xmm0 = mem[1,0]
+; CHECK-NEXT:    callq tanf at PLT
+; CHECK-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
+; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
+; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT:    vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT:    # xmm0 = mem[3,3,3,3]
+; CHECK-NEXT:    callq tanf at PLT
+; CHECK-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
+; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
+; CHECK-NEXT:    addq $40, %rsp
+; CHECK-NEXT:    retq
+  %r = call <4 x float> @llvm.tan.v4f32(<4 x float> %x)
+  ret <4 x float> %r
+}
+
+define <5 x float> @tan_v5f32(<5 x float> %x) nounwind {
+; CHECK-LABEL: tan_v5f32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    subq $72, %rsp
+; CHECK-NEXT:    vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
+; CHECK-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    callq tanf at PLT
+; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT:    vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT:    # xmm0 = mem[1,1,3,3]
+; CHECK-NEXT:    callq tanf at PLT
+; CHECK-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
+; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
+; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT:    vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT:    # xmm0 = mem[1,0]
+; CHECK-NEXT:    callq tanf at PLT
+; CHECK-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
+; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
+; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT:    vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT:    # xmm0 = mem[3,3,3,3]
+; CHECK-NEXT:    callq tanf at PLT
+; CHECK-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
+; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
+; CHECK-NEXT:    vmovups %ymm0, (%rsp) # 32-byte Spill
+; CHECK-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
+; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    callq tanf at PLT
+; CHECK-NEXT:    vmovups (%rsp), %ymm1 # 32-byte Reload
+; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; CHECK-NEXT:    addq $72, %rsp
+; CHECK-NEXT:    retq
+  %r = call <5 x float> @llvm.tan.v5f32(<5 x float> %x)
+  ret <5 x float> %r
+}
+
+define <6 x float> @tan_v6f32(<6 x float> %x) nounwind {
+; CHECK-LABEL: tan_v6f32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    subq $72, %rsp
+; CHECK-NEXT:    vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
+; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    callq tanf at PLT
+; CHECK-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT:    vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT:    # xmm0 = mem[1,1,3,3]
+; CHECK-NEXT:    callq tanf at PLT
+; CHECK-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
+; CHECK-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
+; CHECK-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    callq tanf at PLT
+; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT:    vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT:    # xmm0 = mem[1,1,3,3]
+; CHECK-NEXT:    callq tanf at PLT
+; CHECK-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
+; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
+; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT:    vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT:    # xmm0 = mem[1,0]
+; CHECK-NEXT:    callq tanf at PLT
+; CHECK-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
+; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
+; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT:    vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT:    # xmm0 = mem[3,3,3,3]
+; CHECK-NEXT:    callq tanf at PLT
+; CHECK-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
+; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
+; CHECK-NEXT:    vinsertf128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload
+; CHECK-NEXT:    addq $72, %rsp
+; CHECK-NEXT:    retq
+  %r = call <6 x float> @llvm.tan.v6f32(<6 x float> %x)
+  ret <6 x float> %r
+}
+
+define <3 x double> @tan_v3f64(<3 x double> %x) nounwind {
+; CHECK-LABEL: tan_v3f64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    subq $72, %rsp
+; CHECK-NEXT:    vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
+; CHECK-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    callq tan at PLT
+; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT:    vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT:    # xmm0 = mem[1,0]
+; CHECK-NEXT:    callq tan at PLT
+; CHECK-NEXT:    vmovapd (%rsp), %xmm1 # 16-byte Reload
+; CHECK-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; CHECK-NEXT:    vmovupd %ymm0, (%rsp) # 32-byte Spill
+; CHECK-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
+; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    callq tan at PLT
+; CHECK-NEXT:    vmovups (%rsp), %ymm1 # 32-byte Reload
+; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; CHECK-NEXT:    addq $72, %rsp
+; CHECK-NEXT:    retq
+  %r = call <3 x double> @llvm.tan.v3f64(<3 x double> %x)
+  ret <3 x double> %r
+}
+
 define <2 x float> @fabs_v2f32(<2 x float> %x) nounwind {
 ; CHECK-LABEL: fabs_v2f32:
 ; CHECK:       # %bb.0:

>From 2979cb6212ef05b751b8e9d234a6c5701ac61c56 Mon Sep 17 00:00:00 2001
From: Farzon Lotfi <farzonlotfi at microsoft.com>
Date: Wed, 24 Apr 2024 23:44:34 -0400
Subject: [PATCH 3/7] fix test case

---
 llvm/test/CodeGen/X86/llvm.tan.ll | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/test/CodeGen/X86/llvm.tan.ll b/llvm/test/CodeGen/X86/llvm.tan.ll
index 7aec95d19dbf08..407b8a5e20297c 100644
--- a/llvm/test/CodeGen/X86/llvm.tan.ll
+++ b/llvm/test/CodeGen/X86/llvm.tan.ll
@@ -35,7 +35,7 @@ define double @use_tanf64(double %a) {
 define fp128 @use_tanfp128(fp128 %a) {
 ; CHECK-LABEL: use_tanfp128:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    jmp tanl at PLT # TAILCALL
+; CHECK-NEXT:    jmp  tanf128 at PLT # TAILCALL
   %x = call fp128 @llvm.tan.f128(fp128 %a)
   ret fp128 %x
 }

>From b1d33574ade98ae03427bbc2b97ef02f2aeea668 Mon Sep 17 00:00:00 2001
From: Farzon Lotfi <farzonlotfi at microsoft.com>
Date: Thu, 25 Apr 2024 01:43:06 -0400
Subject: [PATCH 4/7] Add hlsl spirv and dxil backend tests.

---
 clang/docs/LanguageExtensions.rst             |  2 +-
 clang/test/CodeGenHLSL/builtins/tan.hlsl      | 57 ++++++++++---------
 .../BuiltIns/half-float-only-errors.hlsl      |  1 +
 llvm/lib/Target/DirectX/DXIL.td               |  3 +
 llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp  |  1 +
 llvm/test/CodeGen/DirectX/tan.ll              | 20 +++++++
 llvm/test/CodeGen/DirectX/tan_error.ll        | 10 ++++
 .../test/CodeGen/SPIRV/hlsl-intrinsics/tan.ll | 45 +++++++++++++++
 8 files changed, 111 insertions(+), 28 deletions(-)
 create mode 100644 llvm/test/CodeGen/DirectX/tan.ll
 create mode 100644 llvm/test/CodeGen/DirectX/tan_error.ll
 create mode 100644 llvm/test/CodeGen/SPIRV/hlsl-intrinsics/tan.ll

diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst
index 305d6da366d25a..aa61e239d6984c 100644
--- a/clang/docs/LanguageExtensions.rst
+++ b/clang/docs/LanguageExtensions.rst
@@ -665,7 +665,7 @@ Unless specified otherwise operation(±0) = ±0 and operation(±infinity) = ±in
  T __builtin_elementwise_bitreverse(T x)     return the integer represented after reversing the bits of x     integer types
  T __builtin_elementwise_exp(T x)            returns the base-e exponential, e^x, of the specified value      floating point types
  T __builtin_elementwise_exp2(T x)           returns the base-2 exponential, 2^x, of the specified value      floating point types
- 
+
  T __builtin_elementwise_sqrt(T x)           return the square root of a floating-point number                floating point types
  T __builtin_elementwise_roundeven(T x)      round x to the nearest integer value in floating point format,   floating point types
                                              rounding halfway cases to even (that is, to the nearest value
diff --git a/clang/test/CodeGenHLSL/builtins/tan.hlsl b/clang/test/CodeGenHLSL/builtins/tan.hlsl
index 88206eee146b0a..aa542fac226d0b 100644
--- a/clang/test/CodeGenHLSL/builtins/tan.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/tan.hlsl
@@ -1,55 +1,58 @@
-// RUN: %clang_cc1 -std=hlsl2021 -finclude-default-header -x hlsl -triple \
+// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
 // RUN:   dxil-pc-shadermodel6.3-library %s -fnative-half-type \
-// RUN:   -emit-llvm -disable-llvm-passes -O3 -o - | FileCheck %s
-// RUN: %clang_cc1 -std=hlsl2021 -finclude-default-header -x hlsl -triple \
-// RUN:   dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \
-// RUN:   -D__HLSL_ENABLE_16_BIT -o - | FileCheck %s --check-prefix=NO_HALF
-
-// CHECK: define noundef half @
-// CHECK: call half @llvm.tan.f16(
-// NO_HALF: define noundef float @"?test_tan_half@@YA$halff@$halff@@Z"(
-// NO_HALF: call float @llvm.tan.f32(
+// RUN:   -emit-llvm -disable-llvm-passes -o - | FileCheck %s \ 
+// RUN:   --check-prefixes=CHECK,NATIVE_HALF
+// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
+// RUN:   spirv-unknown-vulkan-compute %s -emit-llvm -disable-llvm-passes \
+// RUN:   -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF
+
+// CHECK-LABEL: test_tan_half
+// NATIVE_HALF: call half @llvm.tan.f16
+// NO_HALF: call float @llvm.tan.f32
 half test_tan_half ( half p0 ) {
   return tan ( p0 );
 }
-// CHECK: define noundef <2 x half> @
-// CHECK: call <2 x half> @llvm.tan.v2f16
-// NO_HALF: define noundef <2 x float> @"?test_tan_float2@@YAT?$__vector at M$01 at __clang@@T12@@Z"(
-// NO_HALF: call <2 x float> @llvm.tan.v2f32(
+
+// CHECK-LABEL: test_tan_half2
+// NATIVE_HALF: call <2 x half> @llvm.tan.v2f16
+// NO_HALF: call <2 x float> @llvm.tan.v2f32
 half2 test_tan_half2 ( half2 p0 ) {
   return tan ( p0 );
 }
-// CHECK: define noundef <3 x half> @
-// CHECK: call <3 x half> @llvm.tan.v3f16
-// NO_HALF: define noundef <3 x float> @"?test_tan_float3@@YAT?$__vector at M$02 at __clang@@T12@@Z"(
-// NO_HALF: call <3 x float> @llvm.tan.v3f32(
+
+// CHECK-LABEL: test_tan_half3
+// NATIVE_HALF: call <3 x half> @llvm.tan.v3f16
+// NO_HALF: call <3 x float> @llvm.tan.v3f32
 half3 test_tan_half3 ( half3 p0 ) {
   return tan ( p0 );
 }
-// CHECK: define noundef <4 x half> @
-// CHECK: call <4 x half> @llvm.tan.v4f16
-// NO_HALF: define noundef <4 x float> @"?test_tan_float4@@YAT?$__vector at M$03 at __clang@@T12@@Z"(
-// NO_HALF: call <4 x float> @llvm.tan.v4f32(
+
+// CHECK-LABEL: test_tan_half4
+// NATIVE_HALF: call <4 x half> @llvm.tan.v4f16
+// NO_HALF: call <4 x float> @llvm.tan.v4f32
 half4 test_tan_half4 ( half4 p0 ) {
   return tan ( p0 );
 }
 
-// CHECK: define noundef float @
-// CHECK: call float @llvm.tan.f32(
+// CHECK-LABEL: test_tan_float
+// CHECK: call float @llvm.tan.f32
 float test_tan_float ( float p0 ) {
   return tan ( p0 );
 }
-// CHECK: define noundef <2 x float> @
+
+// CHECK-LABEL: test_tan_float2
 // CHECK: call <2 x float> @llvm.tan.v2f32
 float2 test_tan_float2 ( float2 p0 ) {
   return tan ( p0 );
 }
-// CHECK: define noundef <3 x float> @
+
+// CHECK-LABEL: test_tan_float3
 // CHECK: call <3 x float> @llvm.tan.v3f32
 float3 test_tan_float3 ( float3 p0 ) {
   return tan ( p0 );
 }
-// CHECK: define noundef <4 x float> @
+
+// CHECK-LABEL: test_tan_float4
 // CHECK: call <4 x float> @llvm.tan.v4f32
 float4 test_tan_float4 ( float4 p0 ) {
   return tan ( p0 );
diff --git a/clang/test/SemaHLSL/BuiltIns/half-float-only-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/half-float-only-errors.hlsl
index ef0928f8fef0d6..4089188134d321 100644
--- a/clang/test/SemaHLSL/BuiltIns/half-float-only-errors.hlsl
+++ b/clang/test/SemaHLSL/BuiltIns/half-float-only-errors.hlsl
@@ -9,6 +9,7 @@
 // RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm-only -disable-llvm-passes -verify -DTEST_FUNC=__builtin_elementwise_sin
 // RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm-only -disable-llvm-passes -verify -DTEST_FUNC=__builtin_elementwise_sqrt
 // RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm-only -disable-llvm-passes -verify -DTEST_FUNC=__builtin_elementwise_roundeven
+// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm-only -disable-llvm-passes -verify -DTEST_FUNC=__builtin_elementwise_tan
 // RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm-only -disable-llvm-passes -verify -DTEST_FUNC=__builtin_elementwise_trunc
 
 double2 test_double_builtin(double2 p0) {
diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td
index cd388ed3e3191b..24a0c8524230c5 100644
--- a/llvm/lib/Target/DirectX/DXIL.td
+++ b/llvm/lib/Target/DirectX/DXIL.td
@@ -266,6 +266,9 @@ def Cos  : DXILOpMapping<12, unary, int_cos,
 def Sin  : DXILOpMapping<13, unary, int_sin,
                          "Returns sine(theta) for theta in radians.",
                          [llvm_halforfloat_ty, LLVMMatchType<0>]>;
+def Tan  : DXILOpMapping<14, unary, int_tan,
+                         "Returns tangent(theta) for theta in radians.",
+                         [llvm_halforfloat_ty, LLVMMatchType<0>]>;
 def Exp2 : DXILOpMapping<21, unary, int_exp2,
                          "Returns the base 2 exponential, or 2**x, of the specified value."
                          "exp2(x) = 2**x.",
diff --git a/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp b/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp
index 4b871bdd5d0758..e7b35555293a3e 100644
--- a/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp
@@ -277,6 +277,7 @@ SPIRVLegalizerInfo::SPIRVLegalizerInfo(const SPIRVSubtarget &ST) {
                                G_FCEIL,
                                G_FCOS,
                                G_FSIN,
+                               G_FTAN,
                                G_FSQRT,
                                G_FFLOOR,
                                G_FRINT,
diff --git a/llvm/test/CodeGen/DirectX/tan.ll b/llvm/test/CodeGen/DirectX/tan.ll
new file mode 100644
index 00000000000000..567ab02d40f918
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/tan.ll
@@ -0,0 +1,20 @@
+; RUN: opt -S -dxil-op-lower < %s | FileCheck %s
+
+; Make sure dxil operation function calls for tan are generated for float and half.
+
+define noundef float @tan_float(float noundef %a) #0 {
+entry:
+; CHECK:call float @dx.op.unary.f32(i32 14, float %{{.*}})
+  %elt.tan = call float @llvm.tan.f32(float %a)
+  ret float %elt.tan
+}
+
+define noundef half @tan_half(half noundef %a) #0 {
+entry:
+; CHECK:call half @dx.op.unary.f16(i32 14, half %{{.*}})
+  %elt.tan = call half @llvm.tan.f16(half %a)
+  ret half %elt.tan
+}
+
+declare half @llvm.tan.f16(half)
+declare float @llvm.tan.f32(float)
diff --git a/llvm/test/CodeGen/DirectX/tan_error.ll b/llvm/test/CodeGen/DirectX/tan_error.ll
new file mode 100644
index 00000000000000..c870c36f54925d
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/tan_error.ll
@@ -0,0 +1,10 @@
+; RUN: not opt -S -dxil-op-lower %s 2>&1 | FileCheck %s
+
+; DXIL operation tan does not support double overload type
+; CHECK: LLVM ERROR: Invalid Overload
+
+define noundef double @tan_double(double noundef %a) #0 {
+entry:
+  %1 = call double @llvm.tan.f64(double %a)
+  ret double %1
+}
diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/tan.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/tan.ll
new file mode 100644
index 00000000000000..7bdce99dbfaa7e
--- /dev/null
+++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/tan.ll
@@ -0,0 +1,45 @@
+; RUN: llc -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s
+; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-unknown %s -o - -filetype=obj | spirv-val %}
+
+; CHECK-DAG: %[[#op_ext_glsl:]] = OpExtInstImport "GLSL.std.450"
+; CHECK-DAG: %[[#float_32:]] = OpTypeFloat 32
+; CHECK-DAG: %[[#float_16:]] = OpTypeFloat 16
+; CHECK-DAG: %[[#vec4_float_32:]] = OpTypeVector %[[#float_32]] 4
+; CHECK-DAG: %[[#vec4_float_16:]] = OpTypeVector %[[#float_16]] 4
+
+define noundef float @tan_float(float noundef %a) {
+entry:
+; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#]]
+; CHECK: %[[#]] = OpExtInst %[[#float_32]] %[[#op_ext_glsl]] Tan %[[#arg0]]
+  %elt.tan = call float @llvm.tan.f32(float %a)
+  ret float %elt.tan
+}
+
+define noundef half @tan_half(half noundef %a) {
+entry:
+; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#]]
+; CHECK: %[[#]] = OpExtInst %[[#float_16]] %[[#op_ext_glsl]] Tan %[[#arg0]]
+  %elt.tan = call half @llvm.tan.f16(half %a)
+  ret half %elt.tan
+}
+
+define noundef <4 x float> @tan_float4(<4 x float> noundef %a) {
+entry:
+  ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#]]
+  ; CHECK: %[[#]] = OpExtInst %[[#vec4_float_32]] %[[#op_ext_glsl]] Tan %[[#arg0]]
+  %elt.tan = call <4 x float> @llvm.tan.v4f32(<4 x float> %a)
+  ret <4 x float> %elt.tan
+}
+
+define noundef <4 x half> @tan_half4(<4 x half> noundef %a) {
+entry:
+  ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#]]
+  ; CHECK: %[[#]] = OpExtInst %[[#vec4_float_16]] %[[#op_ext_glsl]] Tan %[[#arg0]]
+  %elt.tan = call <4 x half> @llvm.tan.v4f16(<4 x half> %a)
+  ret <4 x half> %elt.tan
+}
+
+declare half @llvm.tan.f16(half)
+declare float @llvm.tan.f32(float)
+declare <4 x half> @llvm.tan.v4f16(<4 x half>)
+declare <4 x float> @llvm.tan.v4f32(<4 x float>)

>From fddcb0d36362ce644c95fe1d9dad5db48963b26b Mon Sep 17 00:00:00 2001
From: Farzon Lotfi <farzonlotfi at microsoft.com>
Date: Thu, 25 Apr 2024 12:19:56 -0400
Subject: [PATCH 5/7] run clang format

---
 llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp |  5 ++--
 .../SelectionDAG/LegalizeFloatTypes.cpp       | 26 ++++++++---------
 llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h |  2 +-
 .../SelectionDAG/SelectionDAGBuilder.cpp      |  4 ++-
 llvm/lib/CodeGen/TargetLoweringBase.cpp       |  3 +-
 llvm/lib/Target/X86/X86ISelLowering.cpp       | 29 ++++++++-----------
 6 files changed, 33 insertions(+), 36 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 61a07771789f00..3d8b282479efec 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -4425,9 +4425,8 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
     break;
   case ISD::FTAN:
   case ISD::STRICT_FTAN:
-    ExpandFPLibCall(Node, RTLIB::TAN_F32, RTLIB::TAN_F64,
-                    RTLIB::TAN_F80, RTLIB::TAN_F128,
-                    RTLIB::TAN_PPCF128, Results);
+    ExpandFPLibCall(Node, RTLIB::TAN_F32, RTLIB::TAN_F64, RTLIB::TAN_F80,
+                    RTLIB::TAN_F128, RTLIB::TAN_PPCF128, Results);
     break;
   case ISD::FSINCOS:
     // Expand into sincos libcall.
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 6f338584739f4b..2fa2bdafd46802 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -776,12 +776,9 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FSUB(SDNode *N) {
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FTAN(SDNode *N) {
-  return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
-                                              RTLIB::TAN_F32,
-                                              RTLIB::TAN_F64,
-                                              RTLIB::TAN_F80,
-                                              RTLIB::TAN_F128,
-                                              RTLIB::TAN_PPCF128));
+  return SoftenFloatRes_Unary(
+      N, GetFPLibCall(N->getValueType(0), RTLIB::TAN_F32, RTLIB::TAN_F64,
+                      RTLIB::TAN_F80, RTLIB::TAN_F128, RTLIB::TAN_PPCF128));
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FTRUNC(SDNode *N) {
@@ -1373,7 +1370,9 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) {
   case ISD::STRICT_FSUB:
   case ISD::FSUB:       ExpandFloatRes_FSUB(N, Lo, Hi); break;
   case ISD::STRICT_FTAN:
-  case ISD::FTAN:       ExpandFloatRes_FTAN(N, Lo, Hi); break;
+  case ISD::FTAN:
+    ExpandFloatRes_FTAN(N, Lo, Hi);
+    break;
   case ISD::STRICT_FTRUNC:
   case ISD::FTRUNC:     ExpandFloatRes_FTRUNC(N, Lo, Hi); break;
   case ISD::LOAD:       ExpandFloatRes_LOAD(N, Lo, Hi); break;
@@ -1743,12 +1742,13 @@ void DAGTypeLegalizer::ExpandFloatRes_FSUB(SDNode *N, SDValue &Lo,
                                         RTLIB::SUB_PPCF128), Lo, Hi);
 }
 
-void DAGTypeLegalizer::ExpandFloatRes_FTAN(SDNode *N,
-                                           SDValue &Lo, SDValue &Hi) {
-  ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
-                                       RTLIB::TAN_F32, RTLIB::TAN_F64,
-                                       RTLIB::TAN_F80, RTLIB::TAN_F128,
-                                       RTLIB::TAN_PPCF128), Lo, Hi);
+void DAGTypeLegalizer::ExpandFloatRes_FTAN(SDNode *N, SDValue &Lo,
+                                           SDValue &Hi) {
+  ExpandFloatRes_Unary(N,
+                       GetFPLibCall(N->getValueType(0), RTLIB::TAN_F32,
+                                    RTLIB::TAN_F64, RTLIB::TAN_F80,
+                                    RTLIB::TAN_F128, RTLIB::TAN_PPCF128),
+                       Lo, Hi);
 }
 
 void DAGTypeLegalizer::ExpandFloatRes_FTRUNC(SDNode *N,
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index bd6edf75a91143..3d29e98524a07a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -646,7 +646,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
   void ExpandFloatRes_FSIN      (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandFloatRes_FSQRT     (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandFloatRes_FSUB      (SDNode *N, SDValue &Lo, SDValue &Hi);
-  void ExpandFloatRes_FTAN      (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandFloatRes_FTAN(SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandFloatRes_FTRUNC    (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandFloatRes_LOAD      (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo, SDValue &Hi);
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 46bd8e54108d69..3b7fcd56182735 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -6707,7 +6707,9 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
     case Intrinsic::fabs:      Opcode = ISD::FABS;       break;
     case Intrinsic::sin:       Opcode = ISD::FSIN;       break;
     case Intrinsic::cos:       Opcode = ISD::FCOS;       break;
-    case Intrinsic::tan:       Opcode = ISD::FTAN;       break;
+    case Intrinsic::tan:
+      Opcode = ISD::FTAN;
+      break;
     case Intrinsic::exp10:     Opcode = ISD::FEXP10;     break;
     case Intrinsic::floor:     Opcode = ISD::FFLOOR;     break;
     case Intrinsic::ceil:      Opcode = ISD::FCEIL;      break;
diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp
index 29a9f2e1a26420..eb766345cd3b39 100644
--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -988,7 +988,8 @@ void TargetLoweringBase::initActions() {
   setOperationAction({ISD::FCBRT, ISD::FLOG, ISD::FLOG2, ISD::FLOG10, ISD::FEXP,
                       ISD::FEXP2, ISD::FEXP10, ISD::FFLOOR, ISD::FNEARBYINT,
                       ISD::FCEIL, ISD::FRINT, ISD::FTRUNC, ISD::LROUND,
-                      ISD::LLROUND, ISD::LRINT, ISD::LLRINT, ISD::FROUNDEVEN, ISD::FTAN},
+                      ISD::LLROUND, ISD::LRINT, ISD::LLRINT, ISD::FROUNDEVEN,
+                      ISD::FTAN},
                      {MVT::f32, MVT::f64, MVT::f128}, Expand);
 
   // Default ISD::TRAP to expand (which turns it into abort).
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index f41c6ccf22b4f5..da4ed85dfa64d6 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -655,7 +655,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
       setOperationAction(ISD::FSIN   , VT, Expand);
       setOperationAction(ISD::FCOS   , VT, Expand);
       setOperationAction(ISD::FSINCOS, VT, Expand);
-      setOperationAction(ISD::FTAN   , VT, Expand);
+      setOperationAction(ISD::FTAN, VT, Expand);
     }
 
     // Half type will be promoted by default.
@@ -731,7 +731,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     setOperationAction(ISD::FSIN   , MVT::f32, Expand);
     setOperationAction(ISD::FCOS   , MVT::f32, Expand);
     setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
-    setOperationAction(ISD::FTAN   , MVT::f32, Expand);
+    setOperationAction(ISD::FTAN, MVT::f32, Expand);
 
     if (UseX87) {
       // Always expand sin/cos functions even though x87 has an instruction.
@@ -754,7 +754,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
       setOperationAction(ISD::FSIN   , VT, Expand);
       setOperationAction(ISD::FCOS   , VT, Expand);
       setOperationAction(ISD::FSINCOS, VT, Expand);
-      setOperationAction(ISD::FTAN   , VT, Expand);
+      setOperationAction(ISD::FTAN, VT, Expand);
     }
   }
 
@@ -824,7 +824,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     setOperationAction(ISD::FSIN   , MVT::f80, Expand);
     setOperationAction(ISD::FCOS   , MVT::f80, Expand);
     setOperationAction(ISD::FSINCOS, MVT::f80, Expand);
-    setOperationAction(ISD::FTAN   , MVT::f80, Expand);
+    setOperationAction(ISD::FTAN, MVT::f80, Expand);
 
     setOperationAction(ISD::FFLOOR, MVT::f80, Expand);
     setOperationAction(ISD::FCEIL,  MVT::f80, Expand);
@@ -882,8 +882,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     setOperationAction(ISD::FCOS,         MVT::f128, LibCall);
     setOperationAction(ISD::STRICT_FCOS,  MVT::f128, LibCall);
     setOperationAction(ISD::FSINCOS,      MVT::f128, LibCall);
-    setOperationAction(ISD::FTAN,         MVT::f128, LibCall);
-    setOperationAction(ISD::STRICT_FTAN,  MVT::f128, LibCall);
+    setOperationAction(ISD::FTAN, MVT::f128, LibCall);
+    setOperationAction(ISD::STRICT_FTAN, MVT::f128, LibCall);
     // No STRICT_FSINCOS
     setOperationAction(ISD::FSQRT,        MVT::f128, LibCall);
     setOperationAction(ISD::STRICT_FSQRT, MVT::f128, LibCall);
@@ -938,7 +938,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     setOperationAction(ISD::FSIN,      VT, Expand);
     setOperationAction(ISD::FSINCOS,   VT, Expand);
     setOperationAction(ISD::FCOS,      VT, Expand);
-    setOperationAction(ISD::FTAN,      VT, Expand);
+    setOperationAction(ISD::FTAN, VT, Expand);
     setOperationAction(ISD::FREM,      VT, Expand);
     setOperationAction(ISD::FCOPYSIGN, VT, Expand);
     setOperationAction(ISD::FPOW,      VT, Expand);
@@ -2458,16 +2458,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
   if (Subtarget.is32Bit() &&
       (Subtarget.isTargetWindowsMSVC() || Subtarget.isTargetWindowsItanium()))
     for (ISD::NodeType Op :
-         {ISD::FCEIL,  ISD::STRICT_FCEIL,
-          ISD::FCOS,   ISD::STRICT_FCOS,
-          ISD::FEXP,   ISD::STRICT_FEXP,
-          ISD::FFLOOR, ISD::STRICT_FFLOOR,
-          ISD::FREM,   ISD::STRICT_FREM,
-          ISD::FLOG,   ISD::STRICT_FLOG,
-          ISD::FLOG10, ISD::STRICT_FLOG10,
-          ISD::FPOW,   ISD::STRICT_FPOW,
-          ISD::FSIN,   ISD::STRICT_FSIN,
-          ISD::FTAN,   ISD::STRICT_FTAN})
+         {ISD::FCEIL,  ISD::STRICT_FCEIL,  ISD::FCOS,   ISD::STRICT_FCOS,
+          ISD::FEXP,   ISD::STRICT_FEXP,   ISD::FFLOOR, ISD::STRICT_FFLOOR,
+          ISD::FREM,   ISD::STRICT_FREM,   ISD::FLOG,   ISD::STRICT_FLOG,
+          ISD::FLOG10, ISD::STRICT_FLOG10, ISD::FPOW,   ISD::STRICT_FPOW,
+          ISD::FSIN,   ISD::STRICT_FSIN,   ISD::FTAN,   ISD::STRICT_FTAN})
       if (isOperationExpand(Op, MVT::f32))
         setOperationAction(Op, MVT::f32, Promote);
 

>From a13ed2bfefdfd26fdbc8a558fc25fe6f626b3664 Mon Sep 17 00:00:00 2001
From: Farzon Lotfi <farzonlotfi at microsoft.com>
Date: Thu, 25 Apr 2024 15:21:01 -0400
Subject: [PATCH 6/7] fix unit test and documentation

---
 llvm/docs/GlobalISel/GenericOpcode.rst                         | 2 +-
 .../CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir   | 3 +++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/llvm/docs/GlobalISel/GenericOpcode.rst b/llvm/docs/GlobalISel/GenericOpcode.rst
index 462bda522145e3..52dc039df7779e 100644
--- a/llvm/docs/GlobalISel/GenericOpcode.rst
+++ b/llvm/docs/GlobalISel/GenericOpcode.rst
@@ -593,7 +593,7 @@ G_FLOG, G_FLOG2, G_FLOG10
 Calculate the base-e, base-2, or base-10 respectively.
 
 G_FCEIL, G_FCOS, G_FSIN, G_FTAN, G_FSQRT, G_FFLOOR, G_FRINT, G_FNEARBYINT
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 These correspond to the standard C functions of the same name.
 
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
index 20133158e4fa9a..d71111b57efe51 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
@@ -674,6 +674,9 @@
 # DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}}
 # DEBUG-NEXT: .. the first uncovered type index: 1, OK
 # DEBUG-NEXT: .. the first uncovered imm index: 0, OK
+# DEBUG-NEXT: G_FTAN (opcode {{[0-9]+}}): 1 type index, 0 imm indices
+# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined
+# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined
 # DEBUG-NEXT: G_FSQRT (opcode {{[0-9]+}}): 1 type index, 0 imm indices
 # DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}}
 # DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected

>From 7f91cde1db218cb31eb82857a175d642a4190603 Mon Sep 17 00:00:00 2001
From: Farzon Lotfi <farzonlotfi at microsoft.com>
Date: Fri, 26 Apr 2024 17:13:04 -0400
Subject: [PATCH 7/7] remove everything not needed by spirv

---
 clang/docs/LanguageExtensions.rst             |   1 -
 clang/include/clang/Basic/Builtins.td         |   6 -
 clang/lib/CodeGen/CGBuiltin.cpp               |   4 +-
 clang/lib/Headers/hlsl/hlsl_intrinsics.h      |  23 --
 clang/lib/Sema/SemaChecking.cpp               |   2 -
 .../test/CodeGen/builtins-elementwise-math.c  |  16 --
 .../CodeGen/strictfp-elementwise-bulitins.cpp |  10 -
 clang/test/CodeGenHLSL/builtins/tan.hlsl      |  59 -----
 clang/test/Sema/aarch64-sve-vector-trig-ops.c |   6 -
 clang/test/Sema/builtins-elementwise-math.c   |  21 --
 clang/test/Sema/riscv-rvv-vector-trig-ops.c   |   6 -
 .../SemaCXX/builtins-elementwise-math.cpp     |   7 -
 .../BuiltIns/half-float-only-errors.hlsl      |   1 -
 llvm/docs/LangRef.rst                         |  37 ----
 llvm/include/llvm/Analysis/VecFuncs.def       |   1 -
 llvm/include/llvm/CodeGen/BasicTTIImpl.h      |   3 -
 llvm/include/llvm/CodeGen/ISDOpcodes.h        |   2 -
 llvm/include/llvm/IR/RuntimeLibcalls.def      |   5 -
 .../Target/GlobalISel/SelectionDAGCompat.td   |   1 -
 .../include/llvm/Target/TargetSelectionDAG.td |   6 -
 llvm/lib/Analysis/VectorUtils.cpp             |   1 -
 .../CodeGen/GlobalISel/LegalizerHelper.cpp    |   5 -
 llvm/lib/CodeGen/GlobalISel/Utils.cpp         |   2 -
 llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp |   7 -
 .../SelectionDAG/LegalizeFloatTypes.cpp       |  23 --
 llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h |   2 -
 .../SelectionDAG/LegalizeVectorOps.cpp        |   1 -
 .../SelectionDAG/LegalizeVectorTypes.cpp      |   3 -
 .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp |   1 -
 .../SelectionDAG/SelectionDAGBuilder.cpp      |  10 -
 .../SelectionDAG/SelectionDAGDumper.cpp       |   2 -
 llvm/lib/CodeGen/TargetLoweringBase.cpp       |   4 +-
 llvm/lib/Target/DirectX/DXIL.td               |   3 -
 llvm/lib/Target/X86/X86ISelLowering.cpp       |  23 +-
 llvm/test/CodeGen/DirectX/tan.ll              |  20 --
 llvm/test/CodeGen/DirectX/tan_error.ll        |  10 -
 llvm/test/CodeGen/X86/llvm.tan.ll             |  60 ------
 llvm/test/CodeGen/X86/vec-libcalls.ll         | 202 ------------------
 38 files changed, 11 insertions(+), 585 deletions(-)
 delete mode 100644 clang/test/CodeGenHLSL/builtins/tan.hlsl
 delete mode 100644 llvm/test/CodeGen/DirectX/tan.ll
 delete mode 100644 llvm/test/CodeGen/DirectX/tan_error.ll
 delete mode 100644 llvm/test/CodeGen/X86/llvm.tan.ll

diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst
index aa61e239d6984c..f18b946efd4bfa 100644
--- a/clang/docs/LanguageExtensions.rst
+++ b/clang/docs/LanguageExtensions.rst
@@ -656,7 +656,6 @@ Unless specified otherwise operation(±0) = ±0 and operation(±infinity) = ±in
  T __builtin_elementwise_ceil(T x)           return the smallest integral value greater than or equal to x    floating point types
  T __builtin_elementwise_sin(T x)            return the sine of x interpreted as an angle in radians          floating point types
  T __builtin_elementwise_cos(T x)            return the cosine of x interpreted as an angle in radians        floating point types
- T __builtin_elementwise_tan(T x)            return the tangent of x interpreted as an angle in radians       floating point types
  T __builtin_elementwise_floor(T x)          return the largest integral value less than or equal to x        floating point types
  T __builtin_elementwise_log(T x)            return the natural logarithm of x                                floating point types
  T __builtin_elementwise_log2(T x)           return the base 2 logarithm of x                                 floating point types
diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td
index 11982af3fa609b..de721a87b3341d 100644
--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -1326,12 +1326,6 @@ def ElementwiseSqrt : Builtin {
   let Prototype = "void(...)";
 }
 
-def ElementwiseTan : Builtin {
-  let Spellings = ["__builtin_elementwise_tan"];
-  let Attributes = [NoThrow, Const, CustomTypeChecking];
-  let Prototype = "void(...)";
-}
-
 def ElementwiseTrunc : Builtin {
   let Spellings = ["__builtin_elementwise_trunc"];
   let Attributes = [NoThrow, Const, CustomTypeChecking];
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index f941e93b0ced2b..d08ab539148914 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -3821,9 +3821,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
   case Builtin::BI__builtin_elementwise_sin:
     return RValue::get(
         emitUnaryBuiltin(*this, E, llvm::Intrinsic::sin, "elt.sin"));
-  case Builtin::BI__builtin_elementwise_tan:
-    return RValue::get(
-        emitUnaryBuiltin(*this, E, llvm::Intrinsic::tan, "elt.tan"));
+
   case Builtin::BI__builtin_elementwise_trunc:
     return RValue::get(
         emitUnaryBuiltin(*this, E, llvm::Intrinsic::trunc, "elt.trunc"));
diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_intrinsics.h
index 3390f0962f67d0..06409c6fc77417 100644
--- a/clang/lib/Headers/hlsl/hlsl_intrinsics.h
+++ b/clang/lib/Headers/hlsl/hlsl_intrinsics.h
@@ -1441,29 +1441,6 @@ float3 sqrt(float3);
 _HLSL_BUILTIN_ALIAS(__builtin_elementwise_sqrt)
 float4 sqrt(float4);
 
-//===----------------------------------------------------------------------===//
-// tan builtins
-//===----------------------------------------------------------------------===//
-#ifdef __HLSL_ENABLE_16_BIT
-_HLSL_BUILTIN_ALIAS(__builtin_elementwise_tan)
-half tan(half);
-_HLSL_BUILTIN_ALIAS(__builtin_elementwise_tan)
-half2 tan(half2);
-_HLSL_BUILTIN_ALIAS(__builtin_elementwise_tan)
-half3 tan(half3);
-_HLSL_BUILTIN_ALIAS(__builtin_elementwise_tan)
-half4 tan(half4);
-#endif
-
-_HLSL_BUILTIN_ALIAS(__builtin_elementwise_tan)
-float tan(float);
-_HLSL_BUILTIN_ALIAS(__builtin_elementwise_tan)
-float2 tan(float2);
-_HLSL_BUILTIN_ALIAS(__builtin_elementwise_tan)
-float3 tan(float3);
-_HLSL_BUILTIN_ALIAS(__builtin_elementwise_tan)
-float4 tan(float4);
-
 //===----------------------------------------------------------------------===//
 // trunc builtins
 //===----------------------------------------------------------------------===//
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index fcc901392e1161..e33113ab9c4c1d 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -3049,7 +3049,6 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID,
   case Builtin::BI__builtin_elementwise_nearbyint:
   case Builtin::BI__builtin_elementwise_sin:
   case Builtin::BI__builtin_elementwise_sqrt:
-  case Builtin::BI__builtin_elementwise_tan:
   case Builtin::BI__builtin_elementwise_trunc:
   case Builtin::BI__builtin_elementwise_canonicalize: {
     if (PrepareBuiltinElementwiseMathOneArgCall(TheCall))
@@ -5665,7 +5664,6 @@ bool Sema::CheckHLSLBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
   case Builtin::BI__builtin_elementwise_roundeven:
   case Builtin::BI__builtin_elementwise_sin:
   case Builtin::BI__builtin_elementwise_sqrt:
-  case Builtin::BI__builtin_elementwise_tan:
   case Builtin::BI__builtin_elementwise_trunc: {
     if (CheckFloatOrHalfRepresentations(this, TheCall))
       return true;
diff --git a/clang/test/CodeGen/builtins-elementwise-math.c b/clang/test/CodeGen/builtins-elementwise-math.c
index 1b5466abd347d7..1c667e5bff1eab 100644
--- a/clang/test/CodeGen/builtins-elementwise-math.c
+++ b/clang/test/CodeGen/builtins-elementwise-math.c
@@ -604,22 +604,6 @@ void test_builtin_elementwise_sqrt(float f1, float f2, double d1, double d2,
   vf2 = __builtin_elementwise_sqrt(vf1);
 }
 
-void test_builtin_elementwise_tan(float f1, float f2, double d1, double d2,
-                                  float4 vf1, float4 vf2) {
-  // CHECK-LABEL: define void @test_builtin_elementwise_tan(
-  // CHECK:      [[F1:%.+]] = load float, ptr %f1.addr, align 4
-  // CHECK-NEXT:  call float @llvm.tan.f32(float [[F1]])
-  f2 = __builtin_elementwise_tan(f1);
-
-  // CHECK:      [[D1:%.+]] = load double, ptr %d1.addr, align 8
-  // CHECK-NEXT: call double @llvm.tan.f64(double [[D1]])
-  d2 = __builtin_elementwise_tan(d1);
-
-  // CHECK:      [[VF1:%.+]] = load <4 x float>, ptr %vf1.addr, align 16
-  // CHECK-NEXT: call <4 x float> @llvm.tan.v4f32(<4 x float> [[VF1]])
-  vf2 = __builtin_elementwise_tan(vf1);
-}
-
 void test_builtin_elementwise_trunc(float f1, float f2, double d1, double d2,
                                     float4 vf1, float4 vf2) {
   // CHECK-LABEL: define void @test_builtin_elementwise_trunc(
diff --git a/clang/test/CodeGen/strictfp-elementwise-bulitins.cpp b/clang/test/CodeGen/strictfp-elementwise-bulitins.cpp
index c72d5949916911..fdf865ebbe8911 100644
--- a/clang/test/CodeGen/strictfp-elementwise-bulitins.cpp
+++ b/clang/test/CodeGen/strictfp-elementwise-bulitins.cpp
@@ -187,16 +187,6 @@ float4 strict_elementwise_sqrt(float4 a) {
   return __builtin_elementwise_sqrt(a);
 }
 
-// CHECK-LABEL: define dso_local noundef <4 x float> @_Z22strict_elementwise_tanDv4_f
-// CHECK-SAME: (<4 x float> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] {
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[ELT_TAN:%.*]] = tail call <4 x float> @llvm.tan.v4f32(<4 x float> [[A]]) #[[ATTR4]]
-// CHECK-NEXT:    ret <4 x float> [[ELT_TAN]]
-//
-float4 strict_elementwise_tan(float4 a) {
-  return __builtin_elementwise_tan(a);
-}
-
 // CHECK-LABEL: define dso_local noundef <4 x float> @_Z24strict_elementwise_truncDv4_f
 // CHECK-SAME: (<4 x float> noundef [[A:%.*]]) local_unnamed_addr #[[ATTR2]] {
 // CHECK-NEXT:  entry:
diff --git a/clang/test/CodeGenHLSL/builtins/tan.hlsl b/clang/test/CodeGenHLSL/builtins/tan.hlsl
deleted file mode 100644
index aa542fac226d0b..00000000000000
--- a/clang/test/CodeGenHLSL/builtins/tan.hlsl
+++ /dev/null
@@ -1,59 +0,0 @@
-// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
-// RUN:   dxil-pc-shadermodel6.3-library %s -fnative-half-type \
-// RUN:   -emit-llvm -disable-llvm-passes -o - | FileCheck %s \ 
-// RUN:   --check-prefixes=CHECK,NATIVE_HALF
-// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
-// RUN:   spirv-unknown-vulkan-compute %s -emit-llvm -disable-llvm-passes \
-// RUN:   -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF
-
-// CHECK-LABEL: test_tan_half
-// NATIVE_HALF: call half @llvm.tan.f16
-// NO_HALF: call float @llvm.tan.f32
-half test_tan_half ( half p0 ) {
-  return tan ( p0 );
-}
-
-// CHECK-LABEL: test_tan_half2
-// NATIVE_HALF: call <2 x half> @llvm.tan.v2f16
-// NO_HALF: call <2 x float> @llvm.tan.v2f32
-half2 test_tan_half2 ( half2 p0 ) {
-  return tan ( p0 );
-}
-
-// CHECK-LABEL: test_tan_half3
-// NATIVE_HALF: call <3 x half> @llvm.tan.v3f16
-// NO_HALF: call <3 x float> @llvm.tan.v3f32
-half3 test_tan_half3 ( half3 p0 ) {
-  return tan ( p0 );
-}
-
-// CHECK-LABEL: test_tan_half4
-// NATIVE_HALF: call <4 x half> @llvm.tan.v4f16
-// NO_HALF: call <4 x float> @llvm.tan.v4f32
-half4 test_tan_half4 ( half4 p0 ) {
-  return tan ( p0 );
-}
-
-// CHECK-LABEL: test_tan_float
-// CHECK: call float @llvm.tan.f32
-float test_tan_float ( float p0 ) {
-  return tan ( p0 );
-}
-
-// CHECK-LABEL: test_tan_float2
-// CHECK: call <2 x float> @llvm.tan.v2f32
-float2 test_tan_float2 ( float2 p0 ) {
-  return tan ( p0 );
-}
-
-// CHECK-LABEL: test_tan_float3
-// CHECK: call <3 x float> @llvm.tan.v3f32
-float3 test_tan_float3 ( float3 p0 ) {
-  return tan ( p0 );
-}
-
-// CHECK-LABEL: test_tan_float4
-// CHECK: call <4 x float> @llvm.tan.v4f32
-float4 test_tan_float4 ( float4 p0 ) {
-  return tan ( p0 );
-}
diff --git a/clang/test/Sema/aarch64-sve-vector-trig-ops.c b/clang/test/Sema/aarch64-sve-vector-trig-ops.c
index 5039599d43ad81..7ca941f578c70d 100644
--- a/clang/test/Sema/aarch64-sve-vector-trig-ops.c
+++ b/clang/test/Sema/aarch64-sve-vector-trig-ops.c
@@ -16,9 +16,3 @@ svfloat32_t test_cos_vv_i8mf8(svfloat32_t v) {
   return __builtin_elementwise_cos(v);
   // expected-error at -1 {{1st argument must be a vector, integer or floating point type}}
 }
-
-svfloat32_t test_tan_vv_i8mf8(svfloat32_t v) {
-
-  return __builtin_elementwise_tan(v);
-  // expected-error at -1 {{1st argument must be a vector, integer or floating point type}}
-}
diff --git a/clang/test/Sema/builtins-elementwise-math.c b/clang/test/Sema/builtins-elementwise-math.c
index 2e4319d158e7a0..2e05337273ee41 100644
--- a/clang/test/Sema/builtins-elementwise-math.c
+++ b/clang/test/Sema/builtins-elementwise-math.c
@@ -626,27 +626,6 @@ void test_builtin_elementwise_sqrt(int i, float f, double d, float4 v, int3 iv,
   // expected-error at -1 {{1st argument must be a floating point type (was 'unsigned4' (vector of 4 'unsigned int' values))}}
 }
 
-void test_builtin_elementwise_tan(int i, float f, double d, float4 v, int3 iv, unsigned u, unsigned4 uv) {
-
-  struct Foo s = __builtin_elementwise_tan(f);
-  // expected-error at -1 {{initializing 'struct Foo' with an expression of incompatible type 'float'}}
-
-  i = __builtin_elementwise_tan();
-  // expected-error at -1 {{too few arguments to function call, expected 1, have 0}}
-
-  i = __builtin_elementwise_tan(i);
-  // expected-error at -1 {{1st argument must be a floating point type (was 'int')}}
-
-  i = __builtin_elementwise_tan(f, f);
-  // expected-error at -1 {{too many arguments to function call, expected 1, have 2}}
-
-  u = __builtin_elementwise_tan(u);
-  // expected-error at -1 {{1st argument must be a floating point type (was 'unsigned int')}}
-
-  uv = __builtin_elementwise_tan(uv);
-  // expected-error at -1 {{1st argument must be a floating point type (was 'unsigned4' (vector of 4 'unsigned int' values))}}
-}
-
 void test_builtin_elementwise_trunc(int i, float f, double d, float4 v, int3 iv, unsigned u, unsigned4 uv) {
 
   struct Foo s = __builtin_elementwise_trunc(f);
diff --git a/clang/test/Sema/riscv-rvv-vector-trig-ops.c b/clang/test/Sema/riscv-rvv-vector-trig-ops.c
index ee4c596c431841..a457e484860602 100644
--- a/clang/test/Sema/riscv-rvv-vector-trig-ops.c
+++ b/clang/test/Sema/riscv-rvv-vector-trig-ops.c
@@ -17,9 +17,3 @@ vfloat32mf2_t test_cos_vv_i8mf8(vfloat32mf2_t v) {
   return __builtin_elementwise_cos(v);
   // expected-error at -1 {{1st argument must be a vector, integer or floating point type}}
 }
-
-vfloat32mf2_t test_tan_vv_i8mf8(vfloat32mf2_t v) {
-
-  return __builtin_elementwise_tan(v);
-  // expected-error at -1 {{1st argument must be a vector, integer or floating point type}}
-}
diff --git a/clang/test/SemaCXX/builtins-elementwise-math.cpp b/clang/test/SemaCXX/builtins-elementwise-math.cpp
index 499f2795ddb272..44a44ab055e997 100644
--- a/clang/test/SemaCXX/builtins-elementwise-math.cpp
+++ b/clang/test/SemaCXX/builtins-elementwise-math.cpp
@@ -111,13 +111,6 @@ void test_builtin_elementwise_sin() {
   static_assert(!is_const<decltype(__builtin_elementwise_sin(b))>::value);
 }
 
-void test_builtin_elementwise_tan() {
-  const float a = 42.0;
-  float b = 42.3;
-  static_assert(!is_const<decltype(__builtin_elementwise_tan(a))>::value);
-  static_assert(!is_const<decltype(__builtin_elementwise_tan(b))>::value);
-}
-
 void test_builtin_elementwise_sqrt() {
   const float a = 42.0;
   float b = 42.3;
diff --git a/clang/test/SemaHLSL/BuiltIns/half-float-only-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/half-float-only-errors.hlsl
index 4089188134d321..ef0928f8fef0d6 100644
--- a/clang/test/SemaHLSL/BuiltIns/half-float-only-errors.hlsl
+++ b/clang/test/SemaHLSL/BuiltIns/half-float-only-errors.hlsl
@@ -9,7 +9,6 @@
 // RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm-only -disable-llvm-passes -verify -DTEST_FUNC=__builtin_elementwise_sin
 // RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm-only -disable-llvm-passes -verify -DTEST_FUNC=__builtin_elementwise_sqrt
 // RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm-only -disable-llvm-passes -verify -DTEST_FUNC=__builtin_elementwise_roundeven
-// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm-only -disable-llvm-passes -verify -DTEST_FUNC=__builtin_elementwise_tan
 // RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm-only -disable-llvm-passes -verify -DTEST_FUNC=__builtin_elementwise_trunc
 
 double2 test_double_builtin(double2 p0) {
diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index 144733202f9e0e..f169ab941c457b 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -15270,43 +15270,6 @@ trapping or setting ``errno``.
 When specified with the fast-math-flag 'afn', the result may be approximated
 using a less accurate calculation.
 
-'``llvm.tan.*``' Intrinsic
-^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-Syntax:
-"""""""
-
-This is an overloaded intrinsic. You can use ``llvm.tan`` on any
-floating-point or vector of floating-point type. Not all targets support
-all types however.
-
-::
-
-      declare float     @llvm.tan.f32(float  %Val)
-      declare double    @llvm.tan.f64(double %Val)
-      declare x86_fp80  @llvm.tan.f80(x86_fp80  %Val)
-      declare fp128     @llvm.tan.f128(fp128 %Val)
-      declare ppc_fp128 @llvm.tan.ppcf128(ppc_fp128  %Val)
-
-Overview:
-"""""""""
-
-The '``llvm.tan.*``' intrinsics return the tangent of the operand.
-
-Arguments:
-""""""""""
-
-The argument and return value are floating-point numbers of the same type.
-
-Semantics:
-""""""""""
-
-Return the same value as a corresponding libm '``tan``' function but without
-trapping or setting ``errno``.
-
-When specified with the fast-math-flag 'afn', the result may be approximated
-using a less accurate calculation.
-
 '``llvm.pow.*``' Intrinsic
 ^^^^^^^^^^^^^^^^^^^^^^^^^^
 
diff --git a/llvm/include/llvm/Analysis/VecFuncs.def b/llvm/include/llvm/Analysis/VecFuncs.def
index de94621791f54f..10f1333cf8885c 100644
--- a/llvm/include/llvm/Analysis/VecFuncs.def
+++ b/llvm/include/llvm/Analysis/VecFuncs.def
@@ -54,7 +54,6 @@ TLI_DEFINE_VECFUNC("llvm.sin.f32", "vsinf", FIXED(4), "_ZGV_LLVM_N4v")
 TLI_DEFINE_VECFUNC("cosf", "vcosf", FIXED(4), "_ZGV_LLVM_N4v")
 TLI_DEFINE_VECFUNC("llvm.cos.f32", "vcosf", FIXED(4), "_ZGV_LLVM_N4v")
 TLI_DEFINE_VECFUNC("tanf", "vtanf", FIXED(4), "_ZGV_LLVM_N4v")
-TLI_DEFINE_VECFUNC("llvm.tan.f32", "vtanf", FIXED(4), "_ZGV_LLVM_N4v")
 TLI_DEFINE_VECFUNC("asinf", "vasinf", FIXED(4), "_ZGV_LLVM_N4v")
 TLI_DEFINE_VECFUNC("acosf", "vacosf", FIXED(4), "_ZGV_LLVM_N4v")
 TLI_DEFINE_VECFUNC("atanf", "vatanf", FIXED(4), "_ZGV_LLVM_N4v")
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index 740d6ef2b9f016..4a3a03dc5ad488 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -1924,9 +1924,6 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
     case Intrinsic::cos:
       ISD = ISD::FCOS;
       break;
-    case Intrinsic::tan:
-      ISD = ISD::FTAN;
-      break;
     case Intrinsic::exp:
       ISD = ISD::FEXP;
       break;
diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h
index c38c83f5b80dfa..6429947958ee91 100644
--- a/llvm/include/llvm/CodeGen/ISDOpcodes.h
+++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h
@@ -415,7 +415,6 @@ enum NodeType {
   STRICT_FLDEXP,
   STRICT_FSIN,
   STRICT_FCOS,
-  STRICT_FTAN,
   STRICT_FEXP,
   STRICT_FEXP2,
   STRICT_FLOG,
@@ -935,7 +934,6 @@ enum NodeType {
   FCBRT,
   FSIN,
   FCOS,
-  FTAN,
   FPOW,
   FPOWI,
   /// FLDEXP - ldexp, inspired by libm (op0 * 2**op1).
diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.def b/llvm/include/llvm/IR/RuntimeLibcalls.def
index f4f93bf7ba24c5..5e082769fa974c 100644
--- a/llvm/include/llvm/IR/RuntimeLibcalls.def
+++ b/llvm/include/llvm/IR/RuntimeLibcalls.def
@@ -197,11 +197,6 @@ HANDLE_LIBCALL(COS_F64, "cos")
 HANDLE_LIBCALL(COS_F80, "cosl")
 HANDLE_LIBCALL(COS_F128, "cosl")
 HANDLE_LIBCALL(COS_PPCF128, "cosl")
-HANDLE_LIBCALL(TAN_F32, "tanf")
-HANDLE_LIBCALL(TAN_F64, "tan")
-HANDLE_LIBCALL(TAN_F80, "tanl")
-HANDLE_LIBCALL(TAN_F128,"tanl")
-HANDLE_LIBCALL(TAN_PPCF128, "tanl")
 HANDLE_LIBCALL(SINCOS_F32, nullptr)
 HANDLE_LIBCALL(SINCOS_F64, nullptr)
 HANDLE_LIBCALL(SINCOS_F80, nullptr)
diff --git a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
index 560d3b434d07d5..8fa0e4b86d6dc9 100644
--- a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
+++ b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
@@ -148,7 +148,6 @@ def : GINodeEquiv<G_BUILD_VECTOR, build_vector>;
 def : GINodeEquiv<G_FCEIL, fceil>;
 def : GINodeEquiv<G_FCOS, fcos>;
 def : GINodeEquiv<G_FSIN, fsin>;
-def : GINodeEquiv<G_FTAN, ftan>;
 def : GINodeEquiv<G_FABS, fabs>;
 def : GINodeEquiv<G_FSQRT, fsqrt>;
 def : GINodeEquiv<G_FFLOOR, ffloor>;
diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td
index bf2413c09e4c9a..1684b424e3b442 100644
--- a/llvm/include/llvm/Target/TargetSelectionDAG.td
+++ b/llvm/include/llvm/Target/TargetSelectionDAG.td
@@ -509,7 +509,6 @@ def fneg       : SDNode<"ISD::FNEG"       , SDTFPUnaryOp>;
 def fsqrt      : SDNode<"ISD::FSQRT"      , SDTFPUnaryOp>;
 def fsin       : SDNode<"ISD::FSIN"       , SDTFPUnaryOp>;
 def fcos       : SDNode<"ISD::FCOS"       , SDTFPUnaryOp>;
-def ftan       : SDNode<"ISD::FTAN"       , SDTFPUnaryOp>;
 def fexp2      : SDNode<"ISD::FEXP2"      , SDTFPUnaryOp>;
 def fexp10     : SDNode<"ISD::FEXP10"     , SDTFPUnaryOp>;
 def fpow       : SDNode<"ISD::FPOW"       , SDTFPBinOp>;
@@ -563,8 +562,6 @@ def strict_fsin       : SDNode<"ISD::STRICT_FSIN",
                                SDTFPUnaryOp, [SDNPHasChain]>;
 def strict_fcos       : SDNode<"ISD::STRICT_FCOS",
                                SDTFPUnaryOp, [SDNPHasChain]>;
-def strict_ftan       : SDNode<"ISD::STRICT_FTAN",
-                               SDTFPUnaryOp, [SDNPHasChain]>;
 def strict_fexp2      : SDNode<"ISD::STRICT_FEXP2",
                                SDTFPUnaryOp, [SDNPHasChain]>;
 def strict_fpow       : SDNode<"ISD::STRICT_FPOW",
@@ -1517,9 +1514,6 @@ def any_fsin       : PatFrags<(ops node:$src),
 def any_fcos       : PatFrags<(ops node:$src),
                               [(strict_fcos node:$src),
                                (fcos node:$src)]>;
-def any_ftan       : PatFrags<(ops node:$src),
-                              [(strict_ftan node:$src),
-                               (ftan node:$src)]>;
 def any_fexp2      : PatFrags<(ops node:$src),
                               [(strict_fexp2 node:$src),
                                (fexp2 node:$src)]>;
diff --git a/llvm/lib/Analysis/VectorUtils.cpp b/llvm/lib/Analysis/VectorUtils.cpp
index 30728ed5875090..917094267d05ae 100644
--- a/llvm/lib/Analysis/VectorUtils.cpp
+++ b/llvm/lib/Analysis/VectorUtils.cpp
@@ -68,7 +68,6 @@ bool llvm::isTriviallyVectorizable(Intrinsic::ID ID) {
   case Intrinsic::sqrt: // Begin floating-point.
   case Intrinsic::sin:
   case Intrinsic::cos:
-  case Intrinsic::tan:
   case Intrinsic::exp:
   case Intrinsic::exp2:
   case Intrinsic::log:
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index ca2fb24b5b85c4..6a76ad7f5db749 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -448,8 +448,6 @@ static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
     RTLIBCASE(SIN_F);
   case TargetOpcode::G_FCOS:
     RTLIBCASE(COS_F);
-  case TargetOpcode::G_FTAN:
-    RTLIBCASE(TAN_F);
   case TargetOpcode::G_FLOG10:
     RTLIBCASE(LOG10_F);
   case TargetOpcode::G_FLOG:
@@ -1038,7 +1036,6 @@ LegalizerHelper::libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver) {
   case TargetOpcode::G_FREM:
   case TargetOpcode::G_FCOS:
   case TargetOpcode::G_FSIN:
-  case TargetOpcode::G_FTAN:
   case TargetOpcode::G_FLOG10:
   case TargetOpcode::G_FLOG:
   case TargetOpcode::G_FLOG2:
@@ -2893,7 +2890,6 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
   case TargetOpcode::G_FFLOOR:
   case TargetOpcode::G_FCOS:
   case TargetOpcode::G_FSIN:
-  case TargetOpcode::G_FTAN:
   case TargetOpcode::G_FLOG10:
   case TargetOpcode::G_FLOG:
   case TargetOpcode::G_FLOG2:
@@ -4660,7 +4656,6 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
   case G_INTRINSIC_TRUNC:
   case G_FCOS:
   case G_FSIN:
-  case G_FTAN:
   case G_FSQRT:
   case G_BSWAP:
   case G_BITREVERSE:
diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index b314970caec07d..ae43e9ccf6112d 100644
--- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -819,7 +819,6 @@ bool llvm::isKnownNeverNaN(Register Val, const MachineRegisterInfo &MRI,
   case TargetOpcode::G_FREM:
   case TargetOpcode::G_FSIN:
   case TargetOpcode::G_FCOS:
-  case TargetOpcode::G_FTAN:
   case TargetOpcode::G_FMA:
   case TargetOpcode::G_FMAD:
     if (SNaN)
@@ -1700,7 +1699,6 @@ bool llvm::isPreISelGenericFloatingPointOpcode(unsigned Opc) {
   case TargetOpcode::G_FREM:
   case TargetOpcode::G_FRINT:
   case TargetOpcode::G_FSIN:
-  case TargetOpcode::G_FTAN:
   case TargetOpcode::G_FSQRT:
   case TargetOpcode::G_FSUB:
   case TargetOpcode::G_INTRINSIC_ROUND:
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 3d8b282479efec..c381870ae5f41b 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -4423,11 +4423,6 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
                     RTLIB::COS_F80, RTLIB::COS_F128,
                     RTLIB::COS_PPCF128, Results);
     break;
-  case ISD::FTAN:
-  case ISD::STRICT_FTAN:
-    ExpandFPLibCall(Node, RTLIB::TAN_F32, RTLIB::TAN_F64, RTLIB::TAN_F80,
-                    RTLIB::TAN_F128, RTLIB::TAN_PPCF128, Results);
-    break;
   case ISD::FSINCOS:
     // Expand into sincos libcall.
     ExpandSinCosLibCall(Node, Results);
@@ -5372,7 +5367,6 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
   case ISD::FSQRT:
   case ISD::FSIN:
   case ISD::FCOS:
-  case ISD::FTAN:
   case ISD::FLOG:
   case ISD::FLOG2:
   case ISD::FLOG10:
@@ -5397,7 +5391,6 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
   case ISD::STRICT_FSQRT:
   case ISD::STRICT_FSIN:
   case ISD::STRICT_FCOS:
-  case ISD::STRICT_FTAN:
   case ISD::STRICT_FLOG:
   case ISD::STRICT_FLOG2:
   case ISD::STRICT_FLOG10:
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 2fa2bdafd46802..abe5be76382556 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -131,8 +131,6 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
     case ISD::FSQRT:       R = SoftenFloatRes_FSQRT(N); break;
     case ISD::STRICT_FSUB:
     case ISD::FSUB:        R = SoftenFloatRes_FSUB(N); break;
-    case ISD::STRICT_FTAN:
-    case ISD::FTAN:        R = SoftenFloatRes_FTAN(N); break;
     case ISD::STRICT_FTRUNC:
     case ISD::FTRUNC:      R = SoftenFloatRes_FTRUNC(N); break;
     case ISD::LOAD:        R = SoftenFloatRes_LOAD(N); break;
@@ -775,12 +773,6 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FSUB(SDNode *N) {
                                                RTLIB::SUB_PPCF128));
 }
 
-SDValue DAGTypeLegalizer::SoftenFloatRes_FTAN(SDNode *N) {
-  return SoftenFloatRes_Unary(
-      N, GetFPLibCall(N->getValueType(0), RTLIB::TAN_F32, RTLIB::TAN_F64,
-                      RTLIB::TAN_F80, RTLIB::TAN_F128, RTLIB::TAN_PPCF128));
-}
-
 SDValue DAGTypeLegalizer::SoftenFloatRes_FTRUNC(SDNode *N) {
   return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
                                               RTLIB::TRUNC_F32,
@@ -1369,10 +1361,6 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) {
   case ISD::FSQRT:      ExpandFloatRes_FSQRT(N, Lo, Hi); break;
   case ISD::STRICT_FSUB:
   case ISD::FSUB:       ExpandFloatRes_FSUB(N, Lo, Hi); break;
-  case ISD::STRICT_FTAN:
-  case ISD::FTAN:
-    ExpandFloatRes_FTAN(N, Lo, Hi);
-    break;
   case ISD::STRICT_FTRUNC:
   case ISD::FTRUNC:     ExpandFloatRes_FTRUNC(N, Lo, Hi); break;
   case ISD::LOAD:       ExpandFloatRes_LOAD(N, Lo, Hi); break;
@@ -1742,15 +1730,6 @@ void DAGTypeLegalizer::ExpandFloatRes_FSUB(SDNode *N, SDValue &Lo,
                                         RTLIB::SUB_PPCF128), Lo, Hi);
 }
 
-void DAGTypeLegalizer::ExpandFloatRes_FTAN(SDNode *N, SDValue &Lo,
-                                           SDValue &Hi) {
-  ExpandFloatRes_Unary(N,
-                       GetFPLibCall(N->getValueType(0), RTLIB::TAN_F32,
-                                    RTLIB::TAN_F64, RTLIB::TAN_F80,
-                                    RTLIB::TAN_F128, RTLIB::TAN_PPCF128),
-                       Lo, Hi);
-}
-
 void DAGTypeLegalizer::ExpandFloatRes_FTRUNC(SDNode *N,
                                              SDValue &Lo, SDValue &Hi) {
   ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
@@ -2444,7 +2423,6 @@ void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) {
     case ISD::FSIN:
     case ISD::FSQRT:
     case ISD::FTRUNC:
-    case ISD::FTAN:
     case ISD::FCANONICALIZE: R = PromoteFloatRes_UnaryOp(N); break;
 
     // Binary FP Operations
@@ -2876,7 +2854,6 @@ void DAGTypeLegalizer::SoftPromoteHalfResult(SDNode *N, unsigned ResNo) {
   case ISD::FSIN:
   case ISD::FSQRT:
   case ISD::FTRUNC:
-  case ISD::FTAN:
   case ISD::FCANONICALIZE: R = SoftPromoteHalfRes_UnaryOp(N); break;
 
   // Binary FP Operations
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 3d29e98524a07a..4a2c7b355eb528 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -566,7 +566,6 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
   SDValue SoftenFloatRes_FSIN(SDNode *N);
   SDValue SoftenFloatRes_FSQRT(SDNode *N);
   SDValue SoftenFloatRes_FSUB(SDNode *N);
-  SDValue SoftenFloatRes_FTAN(SDNode *N);
   SDValue SoftenFloatRes_FTRUNC(SDNode *N);
   SDValue SoftenFloatRes_LOAD(SDNode *N);
   SDValue SoftenFloatRes_SELECT(SDNode *N);
@@ -646,7 +645,6 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
   void ExpandFloatRes_FSIN      (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandFloatRes_FSQRT     (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandFloatRes_FSUB      (SDNode *N, SDValue &Lo, SDValue &Hi);
-  void ExpandFloatRes_FTAN(SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandFloatRes_FTRUNC    (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandFloatRes_LOAD      (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo, SDValue &Hi);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index c5ee042a0cf53b..1de43a4f60e3a2 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -402,7 +402,6 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
   case ISD::FSQRT:
   case ISD::FSIN:
   case ISD::FCOS:
-  case ISD::FTAN:
   case ISD::FLDEXP:
   case ISD::FPOWI:
   case ISD::FPOW:
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 77ccaefc4cb268..985c9f16ab97cd 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -107,7 +107,6 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
   case ISD::FROUNDEVEN:
   case ISD::FSIN:
   case ISD::FSQRT:
-  case ISD::FTAN:
   case ISD::FTRUNC:
   case ISD::SIGN_EXTEND:
   case ISD::SINT_TO_FP:
@@ -1112,7 +1111,6 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
   case ISD::VP_FROUNDEVEN:
   case ISD::FSIN:
   case ISD::FSQRT: case ISD::VP_SQRT:
-  case ISD::FTAN:
   case ISD::FTRUNC:
   case ISD::VP_FROUNDTOZERO:
   case ISD::SINT_TO_FP:
@@ -4310,7 +4308,6 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
   case ISD::FROUNDEVEN:
   case ISD::FSIN:
   case ISD::FSQRT:
-  case ISD::FTAN:
   case ISD::FTRUNC:
     if (unrollExpandedOp())
       break;
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index be829e000d73f8..dde10fd4b8c8af 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -5255,7 +5255,6 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op, bool SNaN, unsigned Depth) const
   case ISD::FREM:
   case ISD::FSIN:
   case ISD::FCOS:
-  case ISD::FTAN:
   case ISD::FMA:
   case ISD::FMAD: {
     if (SNaN)
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 3b7fcd56182735..0db484a5e06bcd 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -6690,7 +6690,6 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
   case Intrinsic::fabs:
   case Intrinsic::sin:
   case Intrinsic::cos:
-  case Intrinsic::tan:
   case Intrinsic::exp10:
   case Intrinsic::floor:
   case Intrinsic::ceil:
@@ -6707,9 +6706,6 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
     case Intrinsic::fabs:      Opcode = ISD::FABS;       break;
     case Intrinsic::sin:       Opcode = ISD::FSIN;       break;
     case Intrinsic::cos:       Opcode = ISD::FCOS;       break;
-    case Intrinsic::tan:
-      Opcode = ISD::FTAN;
-      break;
     case Intrinsic::exp10:     Opcode = ISD::FEXP10;     break;
     case Intrinsic::floor:     Opcode = ISD::FFLOOR;     break;
     case Intrinsic::ceil:      Opcode = ISD::FCEIL;      break;
@@ -9085,12 +9081,6 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
         if (visitUnaryFloatCall(I, ISD::FCOS))
           return;
         break;
-      case LibFunc_tan:
-      case LibFunc_tanf:
-      case LibFunc_tanl:
-        if (visitUnaryFloatCall(I, ISD::FTAN))
-          return;
-        break;
       case LibFunc_sqrt:
       case LibFunc_sqrtf:
       case LibFunc_sqrtl:
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index 7ead23537cb691..4ad4a938ca97f2 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -210,8 +210,6 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
   case ISD::FCOS:                       return "fcos";
   case ISD::STRICT_FCOS:                return "strict_fcos";
   case ISD::FSINCOS:                    return "fsincos";
-  case ISD::FTAN:                       return "ftan";
-  case ISD::STRICT_FTAN:                return "strict_ftan";
   case ISD::FTRUNC:                     return "ftrunc";
   case ISD::STRICT_FTRUNC:              return "strict_ftrunc";
   case ISD::FFLOOR:                     return "ffloor";
diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp
index eb766345cd3b39..6e7b67ded23c84 100644
--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -141,7 +141,6 @@ void TargetLoweringBase::InitLibcalls(const Triple &TT) {
     setLibcallName(RTLIB::EXP10_F128, "exp10f128");
     setLibcallName(RTLIB::SIN_F128, "sinf128");
     setLibcallName(RTLIB::COS_F128, "cosf128");
-    setLibcallName(RTLIB::TAN_F128, "tanf128");
     setLibcallName(RTLIB::SINCOS_F128, "sincosf128");
     setLibcallName(RTLIB::POW_F128, "powf128");
     setLibcallName(RTLIB::POW_FINITE_F128, "__powf128_finite");
@@ -988,8 +987,7 @@ void TargetLoweringBase::initActions() {
   setOperationAction({ISD::FCBRT, ISD::FLOG, ISD::FLOG2, ISD::FLOG10, ISD::FEXP,
                       ISD::FEXP2, ISD::FEXP10, ISD::FFLOOR, ISD::FNEARBYINT,
                       ISD::FCEIL, ISD::FRINT, ISD::FTRUNC, ISD::LROUND,
-                      ISD::LLROUND, ISD::LRINT, ISD::LLRINT, ISD::FROUNDEVEN,
-                      ISD::FTAN},
+                      ISD::LLROUND, ISD::LRINT, ISD::LLRINT, ISD::FROUNDEVEN},
                      {MVT::f32, MVT::f64, MVT::f128}, Expand);
 
   // Default ISD::TRAP to expand (which turns it into abort).
diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td
index 24a0c8524230c5..cd388ed3e3191b 100644
--- a/llvm/lib/Target/DirectX/DXIL.td
+++ b/llvm/lib/Target/DirectX/DXIL.td
@@ -266,9 +266,6 @@ def Cos  : DXILOpMapping<12, unary, int_cos,
 def Sin  : DXILOpMapping<13, unary, int_sin,
                          "Returns sine(theta) for theta in radians.",
                          [llvm_halforfloat_ty, LLVMMatchType<0>]>;
-def Tan  : DXILOpMapping<14, unary, int_tan,
-                         "Returns tangent(theta) for theta in radians.",
-                         [llvm_halforfloat_ty, LLVMMatchType<0>]>;
 def Exp2 : DXILOpMapping<21, unary, int_exp2,
                          "Returns the base 2 exponential, or 2**x, of the specified value."
                          "exp2(x) = 2**x.",
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index da4ed85dfa64d6..f0cec6224e84e4 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -598,7 +598,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     setOperationAction(ISD::FSIN, VT, Action);
     setOperationAction(ISD::FCOS, VT, Action);
     setOperationAction(ISD::FSINCOS, VT, Action);
-    setOperationAction(ISD::FTAN, VT, Action);
     setOperationAction(ISD::FSQRT, VT, Action);
     setOperationAction(ISD::FPOW, VT, Action);
     setOperationAction(ISD::FLOG, VT, Action);
@@ -655,7 +654,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
       setOperationAction(ISD::FSIN   , VT, Expand);
       setOperationAction(ISD::FCOS   , VT, Expand);
       setOperationAction(ISD::FSINCOS, VT, Expand);
-      setOperationAction(ISD::FTAN, VT, Expand);
     }
 
     // Half type will be promoted by default.
@@ -731,14 +729,12 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     setOperationAction(ISD::FSIN   , MVT::f32, Expand);
     setOperationAction(ISD::FCOS   , MVT::f32, Expand);
     setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
-    setOperationAction(ISD::FTAN, MVT::f32, Expand);
 
     if (UseX87) {
       // Always expand sin/cos functions even though x87 has an instruction.
       setOperationAction(ISD::FSIN, MVT::f64, Expand);
       setOperationAction(ISD::FCOS, MVT::f64, Expand);
       setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
-      setOperationAction(ISD::FTAN, MVT::f64, Expand);
     }
   } else if (UseX87) {
     // f32 and f64 in x87.
@@ -754,7 +750,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
       setOperationAction(ISD::FSIN   , VT, Expand);
       setOperationAction(ISD::FCOS   , VT, Expand);
       setOperationAction(ISD::FSINCOS, VT, Expand);
-      setOperationAction(ISD::FTAN, VT, Expand);
     }
   }
 
@@ -824,7 +819,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     setOperationAction(ISD::FSIN   , MVT::f80, Expand);
     setOperationAction(ISD::FCOS   , MVT::f80, Expand);
     setOperationAction(ISD::FSINCOS, MVT::f80, Expand);
-    setOperationAction(ISD::FTAN, MVT::f80, Expand);
 
     setOperationAction(ISD::FFLOOR, MVT::f80, Expand);
     setOperationAction(ISD::FCEIL,  MVT::f80, Expand);
@@ -882,8 +876,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     setOperationAction(ISD::FCOS,         MVT::f128, LibCall);
     setOperationAction(ISD::STRICT_FCOS,  MVT::f128, LibCall);
     setOperationAction(ISD::FSINCOS,      MVT::f128, LibCall);
-    setOperationAction(ISD::FTAN, MVT::f128, LibCall);
-    setOperationAction(ISD::STRICT_FTAN, MVT::f128, LibCall);
     // No STRICT_FSINCOS
     setOperationAction(ISD::FSQRT,        MVT::f128, LibCall);
     setOperationAction(ISD::STRICT_FSQRT, MVT::f128, LibCall);
@@ -938,7 +930,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     setOperationAction(ISD::FSIN,      VT, Expand);
     setOperationAction(ISD::FSINCOS,   VT, Expand);
     setOperationAction(ISD::FCOS,      VT, Expand);
-    setOperationAction(ISD::FTAN, VT, Expand);
     setOperationAction(ISD::FREM,      VT, Expand);
     setOperationAction(ISD::FCOPYSIGN, VT, Expand);
     setOperationAction(ISD::FPOW,      VT, Expand);
@@ -2458,11 +2449,15 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
   if (Subtarget.is32Bit() &&
       (Subtarget.isTargetWindowsMSVC() || Subtarget.isTargetWindowsItanium()))
     for (ISD::NodeType Op :
-         {ISD::FCEIL,  ISD::STRICT_FCEIL,  ISD::FCOS,   ISD::STRICT_FCOS,
-          ISD::FEXP,   ISD::STRICT_FEXP,   ISD::FFLOOR, ISD::STRICT_FFLOOR,
-          ISD::FREM,   ISD::STRICT_FREM,   ISD::FLOG,   ISD::STRICT_FLOG,
-          ISD::FLOG10, ISD::STRICT_FLOG10, ISD::FPOW,   ISD::STRICT_FPOW,
-          ISD::FSIN,   ISD::STRICT_FSIN,   ISD::FTAN,   ISD::STRICT_FTAN})
+         {ISD::FCEIL,  ISD::STRICT_FCEIL,
+          ISD::FCOS,   ISD::STRICT_FCOS,
+          ISD::FEXP,   ISD::STRICT_FEXP,
+          ISD::FFLOOR, ISD::STRICT_FFLOOR,
+          ISD::FREM,   ISD::STRICT_FREM,
+          ISD::FLOG,   ISD::STRICT_FLOG,
+          ISD::FLOG10, ISD::STRICT_FLOG10,
+          ISD::FPOW,   ISD::STRICT_FPOW,
+          ISD::FSIN,   ISD::STRICT_FSIN})
       if (isOperationExpand(Op, MVT::f32))
         setOperationAction(Op, MVT::f32, Promote);
 
diff --git a/llvm/test/CodeGen/DirectX/tan.ll b/llvm/test/CodeGen/DirectX/tan.ll
deleted file mode 100644
index 567ab02d40f918..00000000000000
--- a/llvm/test/CodeGen/DirectX/tan.ll
+++ /dev/null
@@ -1,20 +0,0 @@
-; RUN: opt -S -dxil-op-lower < %s | FileCheck %s
-
-; Make sure dxil operation function calls for tan are generated for float and half.
-
-define noundef float @tan_float(float noundef %a) #0 {
-entry:
-; CHECK:call float @dx.op.unary.f32(i32 14, float %{{.*}})
-  %elt.tan = call float @llvm.tan.f32(float %a)
-  ret float %elt.tan
-}
-
-define noundef half @tan_half(half noundef %a) #0 {
-entry:
-; CHECK:call half @dx.op.unary.f16(i32 14, half %{{.*}})
-  %elt.tan = call half @llvm.tan.f16(half %a)
-  ret half %elt.tan
-}
-
-declare half @llvm.tan.f16(half)
-declare float @llvm.tan.f32(float)
diff --git a/llvm/test/CodeGen/DirectX/tan_error.ll b/llvm/test/CodeGen/DirectX/tan_error.ll
deleted file mode 100644
index c870c36f54925d..00000000000000
--- a/llvm/test/CodeGen/DirectX/tan_error.ll
+++ /dev/null
@@ -1,10 +0,0 @@
-; RUN: not opt -S -dxil-op-lower %s 2>&1 | FileCheck %s
-
-; DXIL operation tan does not support double overload type
-; CHECK: LLVM ERROR: Invalid Overload
-
-define noundef double @tan_double(double noundef %a) #0 {
-entry:
-  %1 = call double @llvm.tan.f64(double %a)
-  ret double %1
-}
diff --git a/llvm/test/CodeGen/X86/llvm.tan.ll b/llvm/test/CodeGen/X86/llvm.tan.ll
deleted file mode 100644
index 407b8a5e20297c..00000000000000
--- a/llvm/test/CodeGen/X86/llvm.tan.ll
+++ /dev/null
@@ -1,60 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
-; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
-
-define half @use_tanf16(half %a) {
-; CHECK-LABEL: use_tanf16:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    pushq %rax
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    callq __extendhfsf2 at PLT
-; CHECK-NEXT:    callq tanf at PLT
-; CHECK-NEXT:    callq __truncsfhf2 at PLT
-; CHECK-NEXT:    popq %rax
-; CHECK-NEXT:    .cfi_def_cfa_offset 8
-; CHECK-NEXT:    retq
-  %x = call half @llvm.tan.f16(half %a)
-  ret half %x
-}
-
-define float @use_tanf32(float %a) {
-; CHECK-LABEL: use_tanf32:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    jmp tanf at PLT # TAILCALL
-  %x = call float @llvm.tan.f32(float %a)
-  ret float %x
-}
-
-define double @use_tanf64(double %a) {
-; CHECK-LABEL: use_tanf64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    jmp tan at PLT # TAILCALL
-  %x = call double @llvm.tan.f64(double %a)
-  ret double %x
-}
-
-define fp128 @use_tanfp128(fp128 %a) {
-; CHECK-LABEL: use_tanfp128:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    jmp  tanf128 at PLT # TAILCALL
-  %x = call fp128 @llvm.tan.f128(fp128 %a)
-  ret fp128 %x
-}
-
-define ppc_fp128 @use_tanppc_fp128(ppc_fp128 %a) {
-; CHECK-LABEL: use_tanppc_fp128:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    pushq %rax
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    callq tanl at PLT
-; CHECK-NEXT:    popq %rax
-; CHECK-NEXT:    .cfi_def_cfa_offset 8
-; CHECK-NEXT:    retq
-  %x = call ppc_fp128 @llvm.tan.ppcf128(ppc_fp128 %a)
-  ret ppc_fp128 %x
-}
-
-declare half @llvm.tan.f16(half)
-declare float @llvm.tan.f32(float)
-declare double @llvm.tan.f64(double)
-declare fp128 @llvm.tan.f128(fp128)
-declare ppc_fp128 @llvm.tan.ppcf128(ppc_fp128)
diff --git a/llvm/test/CodeGen/X86/vec-libcalls.ll b/llvm/test/CodeGen/X86/vec-libcalls.ll
index 6857101d3d75bb..3a1315446d7a2c 100644
--- a/llvm/test/CodeGen/X86/vec-libcalls.ll
+++ b/llvm/test/CodeGen/X86/vec-libcalls.ll
@@ -17,14 +17,6 @@ declare <5 x float> @llvm.sin.v5f32(<5 x float>)
 declare <6 x float> @llvm.sin.v6f32(<6 x float>)
 declare <3 x double> @llvm.sin.v3f64(<3 x double>)
 
-declare <1 x float> @llvm.tan.v1f32(<1 x float>)
-declare <2 x float> @llvm.tan.v2f32(<2 x float>)
-declare <3 x float> @llvm.tan.v3f32(<3 x float>)
-declare <4 x float> @llvm.tan.v4f32(<4 x float>)
-declare <5 x float> @llvm.tan.v5f32(<5 x float>)
-declare <6 x float> @llvm.tan.v6f32(<6 x float>)
-declare <3 x double> @llvm.tan.v3f64(<3 x double>)
-
 ; Verify that all of the potential libcall candidates are handled.
 ; Some of these have custom lowering, so those cases won't have
 ; libcalls.
@@ -238,200 +230,6 @@ define <3 x double> @sin_v3f64(<3 x double> %x) nounwind {
   ret <3 x double> %r
 }
 
-define <1 x float> @tan_v1f32(<1 x float> %x) nounwind {
-; CHECK-LABEL: tan_v1f32:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    pushq %rax
-; CHECK-NEXT:    callq tanf at PLT
-; CHECK-NEXT:    popq %rax
-; CHECK-NEXT:    retq
-  %r = call <1 x float> @llvm.tan.v1f32(<1 x float> %x)
-  ret <1 x float> %r
-}
-
-define <2 x float> @tan_v2f32(<2 x float> %x) nounwind {
-; CHECK-LABEL: tan_v2f32:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    subq $40, %rsp
-; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
-; CHECK-NEXT:    callq tanf at PLT
-; CHECK-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; CHECK-NEXT:    vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload
-; CHECK-NEXT:    # xmm0 = mem[1,1,3,3]
-; CHECK-NEXT:    callq tanf at PLT
-; CHECK-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
-; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
-; CHECK-NEXT:    addq $40, %rsp
-; CHECK-NEXT:    retq
-  %r = call <2 x float> @llvm.tan.v2f32(<2 x float> %x)
-  ret <2 x float> %r
-}
-
-define <3 x float> @tan_v3f32(<3 x float> %x) nounwind {
-; CHECK-LABEL: tan_v3f32:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    subq $40, %rsp
-; CHECK-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; CHECK-NEXT:    callq tanf at PLT
-; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
-; CHECK-NEXT:    vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
-; CHECK-NEXT:    # xmm0 = mem[1,1,3,3]
-; CHECK-NEXT:    callq tanf at PLT
-; CHECK-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
-; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
-; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
-; CHECK-NEXT:    vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
-; CHECK-NEXT:    # xmm0 = mem[1,0]
-; CHECK-NEXT:    callq tanf at PLT
-; CHECK-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
-; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
-; CHECK-NEXT:    addq $40, %rsp
-; CHECK-NEXT:    retq
-  %r = call <3 x float> @llvm.tan.v3f32(<3 x float> %x)
-  ret <3 x float> %r
-}
-
-define <4 x float> @tan_v4f32(<4 x float> %x) nounwind {
-; CHECK-LABEL: tan_v4f32:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    subq $40, %rsp
-; CHECK-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; CHECK-NEXT:    callq tanf at PLT
-; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
-; CHECK-NEXT:    vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
-; CHECK-NEXT:    # xmm0 = mem[1,1,3,3]
-; CHECK-NEXT:    callq tanf at PLT
-; CHECK-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
-; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
-; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
-; CHECK-NEXT:    vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
-; CHECK-NEXT:    # xmm0 = mem[1,0]
-; CHECK-NEXT:    callq tanf at PLT
-; CHECK-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
-; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
-; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
-; CHECK-NEXT:    vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
-; CHECK-NEXT:    # xmm0 = mem[3,3,3,3]
-; CHECK-NEXT:    callq tanf at PLT
-; CHECK-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
-; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
-; CHECK-NEXT:    addq $40, %rsp
-; CHECK-NEXT:    retq
-  %r = call <4 x float> @llvm.tan.v4f32(<4 x float> %x)
-  ret <4 x float> %r
-}
-
-define <5 x float> @tan_v5f32(<5 x float> %x) nounwind {
-; CHECK-LABEL: tan_v5f32:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    subq $72, %rsp
-; CHECK-NEXT:    vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
-; CHECK-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
-; CHECK-NEXT:    vzeroupper
-; CHECK-NEXT:    callq tanf at PLT
-; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
-; CHECK-NEXT:    vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
-; CHECK-NEXT:    # xmm0 = mem[1,1,3,3]
-; CHECK-NEXT:    callq tanf at PLT
-; CHECK-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
-; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
-; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
-; CHECK-NEXT:    vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
-; CHECK-NEXT:    # xmm0 = mem[1,0]
-; CHECK-NEXT:    callq tanf at PLT
-; CHECK-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
-; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
-; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
-; CHECK-NEXT:    vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
-; CHECK-NEXT:    # xmm0 = mem[3,3,3,3]
-; CHECK-NEXT:    callq tanf at PLT
-; CHECK-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
-; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
-; CHECK-NEXT:    vmovups %ymm0, (%rsp) # 32-byte Spill
-; CHECK-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
-; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm0
-; CHECK-NEXT:    vzeroupper
-; CHECK-NEXT:    callq tanf at PLT
-; CHECK-NEXT:    vmovups (%rsp), %ymm1 # 32-byte Reload
-; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
-; CHECK-NEXT:    addq $72, %rsp
-; CHECK-NEXT:    retq
-  %r = call <5 x float> @llvm.tan.v5f32(<5 x float> %x)
-  ret <5 x float> %r
-}
-
-define <6 x float> @tan_v6f32(<6 x float> %x) nounwind {
-; CHECK-LABEL: tan_v6f32:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    subq $72, %rsp
-; CHECK-NEXT:    vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
-; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm0
-; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
-; CHECK-NEXT:    vzeroupper
-; CHECK-NEXT:    callq tanf at PLT
-; CHECK-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; CHECK-NEXT:    vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload
-; CHECK-NEXT:    # xmm0 = mem[1,1,3,3]
-; CHECK-NEXT:    callq tanf at PLT
-; CHECK-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
-; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
-; CHECK-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; CHECK-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
-; CHECK-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
-; CHECK-NEXT:    vzeroupper
-; CHECK-NEXT:    callq tanf at PLT
-; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
-; CHECK-NEXT:    vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
-; CHECK-NEXT:    # xmm0 = mem[1,1,3,3]
-; CHECK-NEXT:    callq tanf at PLT
-; CHECK-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
-; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
-; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
-; CHECK-NEXT:    vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
-; CHECK-NEXT:    # xmm0 = mem[1,0]
-; CHECK-NEXT:    callq tanf at PLT
-; CHECK-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
-; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
-; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
-; CHECK-NEXT:    vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
-; CHECK-NEXT:    # xmm0 = mem[3,3,3,3]
-; CHECK-NEXT:    callq tanf at PLT
-; CHECK-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
-; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
-; CHECK-NEXT:    vinsertf128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload
-; CHECK-NEXT:    addq $72, %rsp
-; CHECK-NEXT:    retq
-  %r = call <6 x float> @llvm.tan.v6f32(<6 x float> %x)
-  ret <6 x float> %r
-}
-
-define <3 x double> @tan_v3f64(<3 x double> %x) nounwind {
-; CHECK-LABEL: tan_v3f64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    subq $72, %rsp
-; CHECK-NEXT:    vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
-; CHECK-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
-; CHECK-NEXT:    vzeroupper
-; CHECK-NEXT:    callq tan at PLT
-; CHECK-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
-; CHECK-NEXT:    vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
-; CHECK-NEXT:    # xmm0 = mem[1,0]
-; CHECK-NEXT:    callq tan at PLT
-; CHECK-NEXT:    vmovapd (%rsp), %xmm1 # 16-byte Reload
-; CHECK-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
-; CHECK-NEXT:    vmovupd %ymm0, (%rsp) # 32-byte Spill
-; CHECK-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
-; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm0
-; CHECK-NEXT:    vzeroupper
-; CHECK-NEXT:    callq tan at PLT
-; CHECK-NEXT:    vmovups (%rsp), %ymm1 # 32-byte Reload
-; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
-; CHECK-NEXT:    addq $72, %rsp
-; CHECK-NEXT:    retq
-  %r = call <3 x double> @llvm.tan.v3f64(<3 x double> %x)
-  ret <3 x double> %r
-}
-
 define <2 x float> @fabs_v2f32(<2 x float> %x) nounwind {
 ; CHECK-LABEL: fabs_v2f32:
 ; CHECK:       # %bb.0: