[llvm] [X86][CodeGen] Add base atan2 intrinsic lowering (p4) (PR #110760)

Thu Oct 10 15:42:08 PDT 2024

https://github.com/tex3d updated https://github.com/llvm/llvm-project/pull/110760

>From 8227c918a8f99ac081e630fb2a1bfa14be6e82c6 Mon Sep 17 00:00:00 2001
From: Tex Riddell <texr at microsoft.com>
Date: Thu, 3 Oct 2024 20:46:41 -0700
Subject: [PATCH 1/9] Fix scalar overload name constructed by
 ReplaceWithVeclib.cpp

ReplaceWithVeclib.cpp would construct overload name using all the arguments in the intrinsic, but overloads should only be constructed from arguments for which isVectorIntrinsicWithOverloadTypeAtArg returns true, including the return (-1).

Fixes #111093
---
 llvm/lib/CodeGen/ReplaceWithVeclib.cpp          | 16 +++++++++++++++-
 .../AArch64/replace-with-veclib-armpl.ll        | 17 +++++------------
 2 files changed, 20 insertions(+), 13 deletions(-)

diff --git a/llvm/lib/CodeGen/ReplaceWithVeclib.cpp b/llvm/lib/CodeGen/ReplaceWithVeclib.cpp
index 9fbb7b461364b1..551210db85713a 100644
--- a/llvm/lib/CodeGen/ReplaceWithVeclib.cpp
+++ b/llvm/lib/CodeGen/ReplaceWithVeclib.cpp
@@ -108,8 +108,22 @@ static bool replaceWithCallToVeclib(const TargetLibraryInfo &TLI,
   // all vector operands match the previously found EC.
   SmallVector<Type *, 8> ScalarArgTypes;
   Intrinsic::ID IID = II->getIntrinsicID();
+
+  // OloadTys collects types used in scalar intrinsic overload name.
+  SmallVector<Type *, 3> OloadTys;
+  if (VTy && isVectorIntrinsicWithOverloadTypeAtArg(IID, -1))
+    OloadTys.push_back(VTy->getElementType());
+
   for (auto Arg : enumerate(II->args())) {
     auto *ArgTy = Arg.value()->getType();
+    // Gather type if it is used in the overload name.
+    if (isVectorIntrinsicWithOverloadTypeAtArg(IID, Arg.index())) {
+      if (!isVectorIntrinsicWithScalarOpAtArg(IID, Arg.index()) && isa<VectorType>(ArgTy))
+        OloadTys.push_back(cast<VectorType>(ArgTy)->getElementType());
+      else
+        OloadTys.push_back(ArgTy);
+    }
+
     if (isVectorIntrinsicWithScalarOpAtArg(IID, Arg.index())) {
       ScalarArgTypes.push_back(ArgTy);
     } else if (auto *VectorArgTy = dyn_cast<VectorType>(ArgTy)) {
@@ -129,7 +143,7 @@ static bool replaceWithCallToVeclib(const TargetLibraryInfo &TLI,
   // using scalar argument types.
   std::string ScalarName =
       Intrinsic::isOverloaded(IID)
-          ? Intrinsic::getName(IID, ScalarArgTypes, II->getModule())
+          ? Intrinsic::getName(IID, OloadTys, II->getModule())
           : Intrinsic::getName(IID).str();
 
   // Try to find the mapping for the scalar version of this intrinsic and the
diff --git a/llvm/test/CodeGen/AArch64/replace-with-veclib-armpl.ll b/llvm/test/CodeGen/AArch64/replace-with-veclib-armpl.ll
index f7e95008b71237..7b173bda561553 100644
--- a/llvm/test/CodeGen/AArch64/replace-with-veclib-armpl.ll
+++ b/llvm/test/CodeGen/AArch64/replace-with-veclib-armpl.ll
@@ -15,7 +15,7 @@ declare <vscale x 2 x double> @llvm.cos.nxv2f64(<vscale x 2 x double>)
 declare <vscale x 4 x float> @llvm.cos.nxv4f32(<vscale x 4 x float>)
 
 ;.
-; CHECK: @llvm.compiler.used = appending global [60 x ptr] [ptr @armpl_vcosq_f64, ptr @armpl_vcosq_f32, ptr @armpl_svcos_f64_x, ptr @armpl_svcos_f32_x, ptr @armpl_vexpq_f64, ptr @armpl_vexpq_f32, ptr @armpl_svexp_f64_x, ptr @armpl_svexp_f32_x, ptr @armpl_vexp10q_f64, ptr @armpl_vexp10q_f32, ptr @armpl_svexp10_f64_x, ptr @armpl_svexp10_f32_x, ptr @armpl_vexp2q_f64, ptr @armpl_vexp2q_f32, ptr @armpl_svexp2_f64_x, ptr @armpl_svexp2_f32_x, ptr @armpl_vlogq_f64, ptr @armpl_vlogq_f32, ptr @armpl_svlog_f64_x, ptr @armpl_svlog_f32_x, ptr @armpl_vlog10q_f64, ptr @armpl_vlog10q_f32, ptr @armpl_svlog10_f64_x, ptr @armpl_svlog10_f32_x, ptr @armpl_vlog2q_f64, ptr @armpl_vlog2q_f32, ptr @armpl_svlog2_f64_x, ptr @armpl_svlog2_f32_x, ptr @armpl_vsinq_f64, ptr @armpl_vsinq_f32, ptr @armpl_svsin_f64_x, ptr @armpl_svsin_f32_x, ptr @armpl_vtanq_f64, ptr @armpl_vtanq_f32, ptr @armpl_svtan_f64_x, ptr @armpl_svtan_f32_x, ptr @armpl_vacosq_f64, ptr @armpl_vacosq_f32, ptr @armpl_svacos_f64_x, ptr @armpl_svacos_f32_x, ptr @armpl_vasinq_f64, ptr @armpl_vasinq_f32, ptr @armpl_svasin_f64_x, ptr @armpl_svasin_f32_x, ptr @armpl_vatanq_f64, ptr @armpl_vatanq_f32, ptr @armpl_svatan_f64_x, ptr @armpl_svatan_f32_x, ptr @armpl_vcoshq_f64, ptr @armpl_vcoshq_f32, ptr @armpl_svcosh_f64_x, ptr @armpl_svcosh_f32_x, ptr @armpl_vsinhq_f64, ptr @armpl_vsinhq_f32, ptr @armpl_svsinh_f64_x, ptr @armpl_svsinh_f32_x, ptr @armpl_vtanhq_f64, ptr @armpl_vtanhq_f32, ptr @armpl_svtanh_f64_x, ptr @armpl_svtanh_f32_x], section "llvm.metadata"
+; CHECK: @llvm.compiler.used = appending global [64 x ptr] [ptr @armpl_vcosq_f64, ptr @armpl_vcosq_f32, ptr @armpl_svcos_f64_x, ptr @armpl_svcos_f32_x, ptr @armpl_vexpq_f64, ptr @armpl_vexpq_f32, ptr @armpl_svexp_f64_x, ptr @armpl_svexp_f32_x, ptr @armpl_vexp10q_f64, ptr @armpl_vexp10q_f32, ptr @armpl_svexp10_f64_x, ptr @armpl_svexp10_f32_x, ptr @armpl_vexp2q_f64, ptr @armpl_vexp2q_f32, ptr @armpl_svexp2_f64_x, ptr @armpl_svexp2_f32_x, ptr @armpl_vlogq_f64, ptr @armpl_vlogq_f32, ptr @armpl_svlog_f64_x, ptr @armpl_svlog_f32_x, ptr @armpl_vlog10q_f64, ptr @armpl_vlog10q_f32, ptr @armpl_svlog10_f64_x, ptr @armpl_svlog10_f32_x, ptr @armpl_vlog2q_f64, ptr @armpl_vlog2q_f32, ptr @armpl_svlog2_f64_x, ptr @armpl_svlog2_f32_x, ptr @armpl_vpowq_f64, ptr @armpl_vpowq_f32, ptr @armpl_svpow_f64_x, ptr @armpl_svpow_f32_x, ptr @armpl_vsinq_f64, ptr @armpl_vsinq_f32, ptr @armpl_svsin_f64_x, ptr @armpl_svsin_f32_x, ptr @armpl_vtanq_f64, ptr @armpl_vtanq_f32, ptr @armpl_svtan_f64_x, ptr @armpl_svtan_f32_x, ptr @armpl_vacosq_f64, ptr @armpl_vacosq_f32, ptr @armpl_svacos_f64_x, ptr @armpl_svacos_f32_x, ptr @armpl_vasinq_f64, ptr @armpl_vasinq_f32, ptr @armpl_svasin_f64_x, ptr @armpl_svasin_f32_x, ptr @armpl_vatanq_f64, ptr @armpl_vatanq_f32, ptr @armpl_svatan_f64_x, ptr @armpl_svatan_f32_x, ptr @armpl_vcoshq_f64, ptr @armpl_vcoshq_f32, ptr @armpl_svcosh_f64_x, ptr @armpl_svcosh_f32_x, ptr @armpl_vsinhq_f64, ptr @armpl_vsinhq_f32, ptr @armpl_svsinh_f64_x, ptr @armpl_svsinh_f32_x, ptr @armpl_vtanhq_f64, ptr @armpl_vtanhq_f32, ptr @armpl_svtanh_f64_x, ptr @armpl_svtanh_f32_x], section "llvm.metadata"
 
 ;.
 define <2 x double> @llvm_cos_f64(<2 x double> %in) {
@@ -333,17 +333,10 @@ declare <4 x float> @llvm.pow.v4f32(<4 x float>, <4 x float>)
 declare <vscale x 2 x double> @llvm.pow.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)
 declare <vscale x 4 x float> @llvm.pow.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>)
 
-;
-; There is a bug in the replace-with-veclib pass, and for intrinsics which take
-; more than one arguments, but has just one overloaded type, it incorrectly
-; reconstructs the scalar name, for pow specifically it is searching for:
-; llvm.pow.f64.f64 and llvm.pow.f32.f32
-;
-
 define <2 x double> @llvm_pow_f64(<2 x double> %in, <2 x double> %power) {
 ; CHECK-LABEL: define <2 x double> @llvm_pow_f64
 ; CHECK-SAME: (<2 x double> [[IN:%.*]], <2 x double> [[POWER:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x double> @llvm.pow.v2f64(<2 x double> [[IN]], <2 x double> [[POWER]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x double> @armpl_vpowq_f64(<2 x double> [[IN]], <2 x double> [[POWER]])
 ; CHECK-NEXT:    ret <2 x double> [[TMP1]]
 ;
   %1 = call fast <2 x double> @llvm.pow.v2f64(<2 x double> %in, <2 x double> %power)
@@ -353,7 +346,7 @@ define <2 x double> @llvm_pow_f64(<2 x double> %in, <2 x double> %power) {
 define <4 x float> @llvm_pow_f32(<4 x float> %in, <4 x float> %power) {
 ; CHECK-LABEL: define <4 x float> @llvm_pow_f32
 ; CHECK-SAME: (<4 x float> [[IN:%.*]], <4 x float> [[POWER:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @llvm.pow.v4f32(<4 x float> [[IN]], <4 x float> [[POWER]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @armpl_vpowq_f32(<4 x float> [[IN]], <4 x float> [[POWER]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
   %1 = call fast <4 x float> @llvm.pow.v4f32(<4 x float> %in, <4 x float> %power)
@@ -363,7 +356,7 @@ define <4 x float> @llvm_pow_f32(<4 x float> %in, <4 x float> %power) {
 define <vscale x 2 x double> @llvm_pow_vscale_f64(<vscale x 2 x double> %in, <vscale x 2 x double> %power) #0 {
 ; CHECK-LABEL: define <vscale x 2 x double> @llvm_pow_vscale_f64
 ; CHECK-SAME: (<vscale x 2 x double> [[IN:%.*]], <vscale x 2 x double> [[POWER:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 2 x double> @llvm.pow.nxv2f64(<vscale x 2 x double> [[IN]], <vscale x 2 x double> [[POWER]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 2 x double> @armpl_svpow_f64_x(<vscale x 2 x double> [[IN]], <vscale x 2 x double> [[POWER]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
 ; CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 ;
   %1 = call fast <vscale x 2 x double> @llvm.pow.nxv2f64(<vscale x 2 x double> %in, <vscale x 2 x double> %power)
@@ -373,7 +366,7 @@ define <vscale x 2 x double> @llvm_pow_vscale_f64(<vscale x 2 x double> %in, <vs
 define <vscale x 4 x float> @llvm_pow_vscale_f32(<vscale x 4 x float> %in, <vscale x 4 x float> %power) #0 {
 ; CHECK-LABEL: define <vscale x 4 x float> @llvm_pow_vscale_f32
 ; CHECK-SAME: (<vscale x 4 x float> [[IN:%.*]], <vscale x 4 x float> [[POWER:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 4 x float> @llvm.pow.nxv4f32(<vscale x 4 x float> [[IN]], <vscale x 4 x float> [[POWER]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 4 x float> @armpl_svpow_f32_x(<vscale x 4 x float> [[IN]], <vscale x 4 x float> [[POWER]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
 ; CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 ;
   %1 = call fast <vscale x 4 x float> @llvm.pow.nxv4f32(<vscale x 4 x float> %in, <vscale x 4 x float> %power)

>From f0557de28123377baff92fc12d712244451b3708 Mon Sep 17 00:00:00 2001
From: Tex Riddell <texr at microsoft.com>
Date: Mon, 7 Oct 2024 11:48:16 -0700
Subject: [PATCH 2/9] Simplify logic a bit, handle scalar return overload type.

---
 llvm/lib/CodeGen/ReplaceWithVeclib.cpp | 25 +++++++++----------------
 1 file changed, 9 insertions(+), 16 deletions(-)

diff --git a/llvm/lib/CodeGen/ReplaceWithVeclib.cpp b/llvm/lib/CodeGen/ReplaceWithVeclib.cpp
index 551210db85713a..740712d17fe68a 100644
--- a/llvm/lib/CodeGen/ReplaceWithVeclib.cpp
+++ b/llvm/lib/CodeGen/ReplaceWithVeclib.cpp
@@ -104,6 +104,7 @@ static bool replaceWithCallToVeclib(const TargetLibraryInfo &TLI,
   // a void type.
   auto *VTy = dyn_cast<VectorType>(II->getType());
   ElementCount EC(VTy ? VTy->getElementCount() : ElementCount::getFixed(0));
+  Type *ScalarRetTy = II->getType()->getScalarType();
   // Compute the argument types of the corresponding scalar call and check that
   // all vector operands match the previously found EC.
   SmallVector<Type *, 8> ScalarArgTypes;
@@ -111,30 +112,23 @@ static bool replaceWithCallToVeclib(const TargetLibraryInfo &TLI,
 
   // OloadTys collects types used in scalar intrinsic overload name.
   SmallVector<Type *, 3> OloadTys;
-  if (VTy && isVectorIntrinsicWithOverloadTypeAtArg(IID, -1))
-    OloadTys.push_back(VTy->getElementType());
+  if (isVectorIntrinsicWithOverloadTypeAtArg(IID, -1))
+    OloadTys.push_back(ScalarRetTy);
 
   for (auto Arg : enumerate(II->args())) {
     auto *ArgTy = Arg.value()->getType();
-    // Gather type if it is used in the overload name.
-    if (isVectorIntrinsicWithOverloadTypeAtArg(IID, Arg.index())) {
-      if (!isVectorIntrinsicWithScalarOpAtArg(IID, Arg.index()) && isa<VectorType>(ArgTy))
-        OloadTys.push_back(cast<VectorType>(ArgTy)->getElementType());
-      else
-        OloadTys.push_back(ArgTy);
-    }
-
-    if (isVectorIntrinsicWithScalarOpAtArg(IID, Arg.index())) {
-      ScalarArgTypes.push_back(ArgTy);
-    } else if (auto *VectorArgTy = dyn_cast<VectorType>(ArgTy)) {
-      ScalarArgTypes.push_back(VectorArgTy->getElementType());
+    auto *ScalarArgTy = ArgTy->getScalarType();
+    ScalarArgTypes.push_back(ScalarArgTy);
+    if (isVectorIntrinsicWithOverloadTypeAtArg(IID, Arg.index()))
+        OloadTys.push_back(ScalarArgTy);
+    if (auto *VectorArgTy = dyn_cast<VectorType>(ArgTy)) {
       // When return type is void, set EC to the first vector argument, and
       // disallow vector arguments with different ECs.
       if (EC.isZero())
         EC = VectorArgTy->getElementCount();
       else if (EC != VectorArgTy->getElementCount())
         return false;
-    } else
+    } else if (!isVectorIntrinsicWithScalarOpAtArg(IID, Arg.index()))
       // Exit when it is supposed to be a vector argument but it isn't.
       return false;
   }
@@ -160,7 +154,6 @@ static bool replaceWithCallToVeclib(const TargetLibraryInfo &TLI,
 
   // Replace the call to the intrinsic with a call to the vector library
   // function.
-  Type *ScalarRetTy = II->getType()->getScalarType();
   FunctionType *ScalarFTy =
       FunctionType::get(ScalarRetTy, ScalarArgTypes, /*isVarArg*/ false);
   const std::string MangledName = VD->getVectorFunctionABIVariantString();

>From 2c7028519c6b141838b0b013d9e4ba74d9cca1cd Mon Sep 17 00:00:00 2001
From: Tex Riddell <texr at microsoft.com>
Date: Mon, 7 Oct 2024 14:30:22 -0700
Subject: [PATCH 3/9] Skip not_intrinsic; skip void return for OloadTys; update
 tests

---
 llvm/lib/CodeGen/ReplaceWithVeclib.cpp                     | 7 +++++--
 .../CodeGen/AArch64/replace-with-veclib-sleef-scalable.ll  | 6 +++---
 llvm/test/CodeGen/AArch64/replace-with-veclib-sleef.ll     | 6 +++---
 3 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/llvm/lib/CodeGen/ReplaceWithVeclib.cpp b/llvm/lib/CodeGen/ReplaceWithVeclib.cpp
index 740712d17fe68a..3e359123d51d71 100644
--- a/llvm/lib/CodeGen/ReplaceWithVeclib.cpp
+++ b/llvm/lib/CodeGen/ReplaceWithVeclib.cpp
@@ -100,6 +100,9 @@ static void replaceWithTLIFunction(IntrinsicInst *II, VFInfo &Info,
 static bool replaceWithCallToVeclib(const TargetLibraryInfo &TLI,
                                     IntrinsicInst *II) {
   assert(II != nullptr && "Intrinsic cannot be null");
+  Intrinsic::ID IID = II->getIntrinsicID();
+  if (IID == Intrinsic::not_intrinsic)
+    return false;
   // At the moment VFABI assumes the return type is always widened unless it is
   // a void type.
   auto *VTy = dyn_cast<VectorType>(II->getType());
@@ -108,11 +111,11 @@ static bool replaceWithCallToVeclib(const TargetLibraryInfo &TLI,
   // Compute the argument types of the corresponding scalar call and check that
   // all vector operands match the previously found EC.
   SmallVector<Type *, 8> ScalarArgTypes;
-  Intrinsic::ID IID = II->getIntrinsicID();
 
   // OloadTys collects types used in scalar intrinsic overload name.
   SmallVector<Type *, 3> OloadTys;
-  if (isVectorIntrinsicWithOverloadTypeAtArg(IID, -1))
+  if (!ScalarRetTy->isVoidTy() &&
+      isVectorIntrinsicWithOverloadTypeAtArg(IID, -1))
     OloadTys.push_back(ScalarRetTy);
 
   for (auto Arg : enumerate(II->args())) {
diff --git a/llvm/test/CodeGen/AArch64/replace-with-veclib-sleef-scalable.ll b/llvm/test/CodeGen/AArch64/replace-with-veclib-sleef-scalable.ll
index f3f27344ad80e3..155587026b464b 100644
--- a/llvm/test/CodeGen/AArch64/replace-with-veclib-sleef-scalable.ll
+++ b/llvm/test/CodeGen/AArch64/replace-with-veclib-sleef-scalable.ll
@@ -4,7 +4,7 @@
 target triple = "aarch64-unknown-linux-gnu"
 
 ;.
-; CHECK: @llvm.compiler.used = appending global [30 x ptr] [ptr @_ZGVsMxv_cos, ptr @_ZGVsMxv_cosf, ptr @_ZGVsMxv_exp, ptr @_ZGVsMxv_expf, ptr @_ZGVsMxv_exp10, ptr @_ZGVsMxv_exp10f, ptr @_ZGVsMxv_exp2, ptr @_ZGVsMxv_exp2f, ptr @_ZGVsMxv_log, ptr @_ZGVsMxv_logf, ptr @_ZGVsMxv_log10, ptr @_ZGVsMxv_log10f, ptr @_ZGVsMxv_log2, ptr @_ZGVsMxv_log2f, ptr @_ZGVsMxv_sin, ptr @_ZGVsMxv_sinf, ptr @_ZGVsMxv_tan, ptr @_ZGVsMxv_tanf, ptr @_ZGVsMxv_acos, ptr @_ZGVsMxv_acosf, ptr @_ZGVsMxv_asin, ptr @_ZGVsMxv_asinf, ptr @_ZGVsMxv_atan, ptr @_ZGVsMxv_atanf, ptr @_ZGVsMxv_cosh, ptr @_ZGVsMxv_coshf, ptr @_ZGVsMxv_sinh, ptr @_ZGVsMxv_sinhf, ptr @_ZGVsMxv_tanh, ptr @_ZGVsMxv_tanhf], section "llvm.metadata"
+; CHECK: @llvm.compiler.used = appending global [32 x ptr] [ptr @_ZGVsMxv_cos, ptr @_ZGVsMxv_cosf, ptr @_ZGVsMxv_exp, ptr @_ZGVsMxv_expf, ptr @_ZGVsMxv_exp10, ptr @_ZGVsMxv_exp10f, ptr @_ZGVsMxv_exp2, ptr @_ZGVsMxv_exp2f, ptr @_ZGVsMxv_log, ptr @_ZGVsMxv_logf, ptr @_ZGVsMxv_log10, ptr @_ZGVsMxv_log10f, ptr @_ZGVsMxv_log2, ptr @_ZGVsMxv_log2f, ptr @_ZGVsMxvv_pow, ptr @_ZGVsMxvv_powf, ptr @_ZGVsMxv_sin, ptr @_ZGVsMxv_sinf, ptr @_ZGVsMxv_tan, ptr @_ZGVsMxv_tanf, ptr @_ZGVsMxv_acos, ptr @_ZGVsMxv_acosf, ptr @_ZGVsMxv_asin, ptr @_ZGVsMxv_asinf, ptr @_ZGVsMxv_atan, ptr @_ZGVsMxv_atanf, ptr @_ZGVsMxv_cosh, ptr @_ZGVsMxv_coshf, ptr @_ZGVsMxv_sinh, ptr @_ZGVsMxv_sinhf, ptr @_ZGVsMxv_tanh, ptr @_ZGVsMxv_tanhf], section "llvm.metadata"
 ;.
 define <vscale x 2 x double> @llvm_ceil_vscale_f64(<vscale x 2 x double> %in) {
 ; CHECK-LABEL: @llvm_ceil_vscale_f64(
@@ -278,7 +278,7 @@ define <vscale x 4 x float> @llvm_nearbyint_vscale_f32(<vscale x 4 x float> %in)
 
 define <vscale x 2 x double> @llvm_pow_vscale_f64(<vscale x 2 x double> %in, <vscale x 2 x double> %pow) {
 ; CHECK-LABEL: @llvm_pow_vscale_f64(
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 2 x double> @llvm.pow.nxv2f64(<vscale x 2 x double> [[IN:%.*]], <vscale x 2 x double> [[POW:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 2 x double> @_ZGVsMxvv_pow(<vscale x 2 x double> [[IN:%.*]], <vscale x 2 x double> [[POW:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
 ; CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 ;
   %1 = call fast <vscale x 2 x double> @llvm.pow.nxv2f64(<vscale x 2 x double> %in, <vscale x 2 x double> %pow)
@@ -287,7 +287,7 @@ define <vscale x 2 x double> @llvm_pow_vscale_f64(<vscale x 2 x double> %in, <vs
 
 define <vscale x 4 x float> @llvm_pow_vscale_f32(<vscale x 4 x float> %in, <vscale x 4 x float> %pow) {
 ; CHECK-LABEL: @llvm_pow_vscale_f32(
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 4 x float> @llvm.pow.nxv4f32(<vscale x 4 x float> [[IN:%.*]], <vscale x 4 x float> [[POW:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <vscale x 4 x float> @_ZGVsMxvv_powf(<vscale x 4 x float> [[IN:%.*]], <vscale x 4 x float> [[POW:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
 ; CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 ;
   %1 = call fast <vscale x 4 x float> @llvm.pow.nxv4f32(<vscale x 4 x float> %in, <vscale x 4 x float> %pow)
diff --git a/llvm/test/CodeGen/AArch64/replace-with-veclib-sleef.ll b/llvm/test/CodeGen/AArch64/replace-with-veclib-sleef.ll
index 59c2f94e167633..b89bf3d6f2ca97 100644
--- a/llvm/test/CodeGen/AArch64/replace-with-veclib-sleef.ll
+++ b/llvm/test/CodeGen/AArch64/replace-with-veclib-sleef.ll
@@ -4,7 +4,7 @@
 target triple = "aarch64-unknown-linux-gnu"
 
 ;.
-; CHECK: @llvm.compiler.used = appending global [30 x ptr] [ptr @_ZGVnN2v_cos, ptr @_ZGVnN4v_cosf, ptr @_ZGVnN2v_exp, ptr @_ZGVnN4v_expf, ptr @_ZGVnN2v_exp10, ptr @_ZGVnN4v_exp10f, ptr @_ZGVnN2v_exp2, ptr @_ZGVnN4v_exp2f, ptr @_ZGVnN2v_log, ptr @_ZGVnN4v_logf, ptr @_ZGVnN2v_log10, ptr @_ZGVnN4v_log10f, ptr @_ZGVnN2v_log2, ptr @_ZGVnN4v_log2f, ptr @_ZGVnN2v_sin, ptr @_ZGVnN4v_sinf, ptr @_ZGVnN2v_tan, ptr @_ZGVnN4v_tanf, ptr @_ZGVnN2v_acos, ptr @_ZGVnN4v_acosf, ptr @_ZGVnN2v_asin, ptr @_ZGVnN4v_asinf, ptr @_ZGVnN2v_atan, ptr @_ZGVnN4v_atanf, ptr @_ZGVnN2v_cosh, ptr @_ZGVnN4v_coshf, ptr @_ZGVnN2v_sinh, ptr @_ZGVnN4v_sinhf, ptr @_ZGVnN2v_tanh, ptr @_ZGVnN4v_tanhf], section "llvm.metadata"
+; CHECK: @llvm.compiler.used = appending global [32 x ptr] [ptr @_ZGVnN2v_cos, ptr @_ZGVnN4v_cosf, ptr @_ZGVnN2v_exp, ptr @_ZGVnN4v_expf, ptr @_ZGVnN2v_exp10, ptr @_ZGVnN4v_exp10f, ptr @_ZGVnN2v_exp2, ptr @_ZGVnN4v_exp2f, ptr @_ZGVnN2v_log, ptr @_ZGVnN4v_logf, ptr @_ZGVnN2v_log10, ptr @_ZGVnN4v_log10f, ptr @_ZGVnN2v_log2, ptr @_ZGVnN4v_log2f, ptr @_ZGVnN2vv_pow, ptr @_ZGVnN4vv_powf, ptr @_ZGVnN2v_sin, ptr @_ZGVnN4v_sinf, ptr @_ZGVnN2v_tan, ptr @_ZGVnN4v_tanf, ptr @_ZGVnN2v_acos, ptr @_ZGVnN4v_acosf, ptr @_ZGVnN2v_asin, ptr @_ZGVnN4v_asinf, ptr @_ZGVnN2v_atan, ptr @_ZGVnN4v_atanf, ptr @_ZGVnN2v_cosh, ptr @_ZGVnN4v_coshf, ptr @_ZGVnN2v_sinh, ptr @_ZGVnN4v_sinhf, ptr @_ZGVnN2v_tanh, ptr @_ZGVnN4v_tanhf], section "llvm.metadata"
 ;.
 define <2 x double> @llvm_ceil_f64(<2 x double> %in) {
 ; CHECK-LABEL: @llvm_ceil_f64(
@@ -278,7 +278,7 @@ define <4 x float> @llvm_nearbyint_f32(<4 x float> %in) {
 
 define <2 x double> @llvm_pow_f64(<2 x double> %in, <2 x double> %pow) {
 ; CHECK-LABEL: @llvm_pow_f64(
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x double> @llvm.pow.v2f64(<2 x double> [[IN:%.*]], <2 x double> [[POW:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x double> @_ZGVnN2vv_pow(<2 x double> [[IN:%.*]], <2 x double> [[POW:%.*]])
 ; CHECK-NEXT:    ret <2 x double> [[TMP1]]
 ;
   %1 = call fast <2 x double> @llvm.pow.v2f64(<2 x double> %in, <2 x double> %pow)
@@ -287,7 +287,7 @@ define <2 x double> @llvm_pow_f64(<2 x double> %in, <2 x double> %pow) {
 
 define <4 x float> @llvm_pow_f32(<4 x float> %in, <4 x float> %pow) {
 ; CHECK-LABEL: @llvm_pow_f32(
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @llvm.pow.v4f32(<4 x float> [[IN:%.*]], <4 x float> [[POW:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @_ZGVnN4vv_powf(<4 x float> [[IN:%.*]], <4 x float> [[POW:%.*]])
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
   %1 = call fast <4 x float> @llvm.pow.v4f32(<4 x float> %in, <4 x float> %pow)

>From 5e92c3d8b39178dc0d8286465268305d1ab79a9d Mon Sep 17 00:00:00 2001
From: Tex Riddell <texr at microsoft.com>
Date: Tue, 8 Oct 2024 18:37:02 -0700
Subject: [PATCH 4/9] Rework logic to preserve "scalar" operand/return type,
 even if vector

If isVectorIntrinsicWithScalarOpAtArg() returns true for an argument or return, we should not attempt to modify the type or use the element count, even if it is a vector.  This allows the op to be identified as something that doesn't vectorize/scalarize with the width of the intrinsic, but remains the same, even if it is a vector argument.

Initial ElementCount will only be the return element count if it's a vector and isVectorIntrinsicWithScalarOpAtArg returns false.

This also returns the control flow in the loop to be closer to the original.

Remove inconsistent comment about VFABI return type assumption, since we have the accessors that tell us how to handle it for the operation.

Fix formatting.
---
 llvm/lib/CodeGen/ReplaceWithVeclib.cpp | 34 ++++++++++++++++----------
 1 file changed, 21 insertions(+), 13 deletions(-)

diff --git a/llvm/lib/CodeGen/ReplaceWithVeclib.cpp b/llvm/lib/CodeGen/ReplaceWithVeclib.cpp
index 3e359123d51d71..f14f165a98715d 100644
--- a/llvm/lib/CodeGen/ReplaceWithVeclib.cpp
+++ b/llvm/lib/CodeGen/ReplaceWithVeclib.cpp
@@ -103,35 +103,43 @@ static bool replaceWithCallToVeclib(const TargetLibraryInfo &TLI,
   Intrinsic::ID IID = II->getIntrinsicID();
   if (IID == Intrinsic::not_intrinsic)
     return false;
-  // At the moment VFABI assumes the return type is always widened unless it is
-  // a void type.
-  auto *VTy = dyn_cast<VectorType>(II->getType());
-  ElementCount EC(VTy ? VTy->getElementCount() : ElementCount::getFixed(0));
-  Type *ScalarRetTy = II->getType()->getScalarType();
+
+  // RetIsScalar: Return type is not widened.
+  bool RetIsScalar = isVectorIntrinsicWithScalarOpAtArg(IID, -1);
+  Type *RetTy = II->getType();
+  Type *ScalarRetTy = RetTy->getScalarType();
+
   // Compute the argument types of the corresponding scalar call and check that
   // all vector operands match the previously found EC.
   SmallVector<Type *, 8> ScalarArgTypes;
+  auto *VTy = dyn_cast<VectorType>(RetTy);
+  ElementCount EC(!RetIsScalar && VTy ? VTy->getElementCount()
+                                    : ElementCount::getFixed(0));
 
   // OloadTys collects types used in scalar intrinsic overload name.
   SmallVector<Type *, 3> OloadTys;
-  if (!ScalarRetTy->isVoidTy() &&
-      isVectorIntrinsicWithOverloadTypeAtArg(IID, -1))
-    OloadTys.push_back(ScalarRetTy);
+  if (!RetTy->isVoidTy() && isVectorIntrinsicWithOverloadTypeAtArg(IID, -1))
+    OloadTys.push_back(RetIsScalar ? RetTy : ScalarRetTy);
 
   for (auto Arg : enumerate(II->args())) {
     auto *ArgTy = Arg.value()->getType();
-    auto *ScalarArgTy = ArgTy->getScalarType();
-    ScalarArgTypes.push_back(ScalarArgTy);
-    if (isVectorIntrinsicWithOverloadTypeAtArg(IID, Arg.index()))
+    bool IsOloadTy = isVectorIntrinsicWithOverloadTypeAtArg(IID, Arg.index());
+    if (isVectorIntrinsicWithScalarOpAtArg(IID, Arg.index())) {
+      ScalarArgTypes.push_back(ArgTy);
+      if (IsOloadTy)
+        OloadTys.push_back(ArgTy);
+    } else if (auto *VectorArgTy = dyn_cast<VectorType>(ArgTy)) {
+      auto *ScalarArgTy = VectorArgTy->getElementType();
+      ScalarArgTypes.push_back(ScalarArgTy);
+      if (IsOloadTy)
         OloadTys.push_back(ScalarArgTy);
-    if (auto *VectorArgTy = dyn_cast<VectorType>(ArgTy)) {
       // When return type is void, set EC to the first vector argument, and
       // disallow vector arguments with different ECs.
       if (EC.isZero())
         EC = VectorArgTy->getElementCount();
       else if (EC != VectorArgTy->getElementCount())
         return false;
-    } else if (!isVectorIntrinsicWithScalarOpAtArg(IID, Arg.index()))
+    } else
       // Exit when it is supposed to be a vector argument but it isn't.
       return false;
   }

>From 9adc0469e5443740553025434a2bb840d4dea5bd Mon Sep 17 00:00:00 2001
From: Tex Riddell <texr at microsoft.com>
Date: Wed, 9 Oct 2024 09:57:14 -0700
Subject: [PATCH 5/9] fix formatting

---
 llvm/lib/CodeGen/ReplaceWithVeclib.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/CodeGen/ReplaceWithVeclib.cpp b/llvm/lib/CodeGen/ReplaceWithVeclib.cpp
index f14f165a98715d..e835a8651cac05 100644
--- a/llvm/lib/CodeGen/ReplaceWithVeclib.cpp
+++ b/llvm/lib/CodeGen/ReplaceWithVeclib.cpp
@@ -114,7 +114,7 @@ static bool replaceWithCallToVeclib(const TargetLibraryInfo &TLI,
   SmallVector<Type *, 8> ScalarArgTypes;
   auto *VTy = dyn_cast<VectorType>(RetTy);
   ElementCount EC(!RetIsScalar && VTy ? VTy->getElementCount()
-                                    : ElementCount::getFixed(0));
+                                      : ElementCount::getFixed(0));
 
   // OloadTys collects types used in scalar intrinsic overload name.
   SmallVector<Type *, 3> OloadTys;

>From 6a07812555f39b5e2c0885b756cc0364c63c7d62 Mon Sep 17 00:00:00 2001
From: Tex Riddell <texr at microsoft.com>
Date: Wed, 9 Oct 2024 15:22:52 -0700
Subject: [PATCH 6/9] Revert change to use isVectorIntrinsicWithScalarOpAtArg
 for return type

---
 llvm/lib/CodeGen/ReplaceWithVeclib.cpp | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

diff --git a/llvm/lib/CodeGen/ReplaceWithVeclib.cpp b/llvm/lib/CodeGen/ReplaceWithVeclib.cpp
index e835a8651cac05..2e63e289910934 100644
--- a/llvm/lib/CodeGen/ReplaceWithVeclib.cpp
+++ b/llvm/lib/CodeGen/ReplaceWithVeclib.cpp
@@ -103,24 +103,21 @@ static bool replaceWithCallToVeclib(const TargetLibraryInfo &TLI,
   Intrinsic::ID IID = II->getIntrinsicID();
   if (IID == Intrinsic::not_intrinsic)
     return false;
-
-  // RetIsScalar: Return type is not widened.
-  bool RetIsScalar = isVectorIntrinsicWithScalarOpAtArg(IID, -1);
   Type *RetTy = II->getType();
   Type *ScalarRetTy = RetTy->getScalarType();
-
-  // Compute the argument types of the corresponding scalar call and check that
-  // all vector operands match the previously found EC.
-  SmallVector<Type *, 8> ScalarArgTypes;
+  // At the moment VFABI assumes the return type is always widened unless it is
+  // a void type.
   auto *VTy = dyn_cast<VectorType>(RetTy);
-  ElementCount EC(!RetIsScalar && VTy ? VTy->getElementCount()
-                                      : ElementCount::getFixed(0));
+  ElementCount EC(VTy ? VTy->getElementCount() : ElementCount::getFixed(0));
 
   // OloadTys collects types used in scalar intrinsic overload name.
   SmallVector<Type *, 3> OloadTys;
   if (!RetTy->isVoidTy() && isVectorIntrinsicWithOverloadTypeAtArg(IID, -1))
-    OloadTys.push_back(RetIsScalar ? RetTy : ScalarRetTy);
+    OloadTys.push_back(ScalarRetTy);
 
+  // Compute the argument types of the corresponding scalar call and check that
+  // all vector operands match the previously found EC.
+  SmallVector<Type *, 8> ScalarArgTypes;
   for (auto Arg : enumerate(II->args())) {
     auto *ArgTy = Arg.value()->getType();
     bool IsOloadTy = isVectorIntrinsicWithOverloadTypeAtArg(IID, Arg.index());

>From 5a7e2c3d80b7f025790214732742004dc3eb404f Mon Sep 17 00:00:00 2001
From: Tex Riddell <texr at microsoft.com>
Date: Thu, 10 Oct 2024 11:15:26 -0700
Subject: [PATCH 7/9] Move intrinsic ID check to loop

---
 llvm/lib/CodeGen/ReplaceWithVeclib.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/CodeGen/ReplaceWithVeclib.cpp b/llvm/lib/CodeGen/ReplaceWithVeclib.cpp
index 2e63e289910934..7f3c5cf6cb4436 100644
--- a/llvm/lib/CodeGen/ReplaceWithVeclib.cpp
+++ b/llvm/lib/CodeGen/ReplaceWithVeclib.cpp
@@ -101,8 +101,6 @@ static bool replaceWithCallToVeclib(const TargetLibraryInfo &TLI,
                                     IntrinsicInst *II) {
   assert(II != nullptr && "Intrinsic cannot be null");
   Intrinsic::ID IID = II->getIntrinsicID();
-  if (IID == Intrinsic::not_intrinsic)
-    return false;
   Type *RetTy = II->getType();
   Type *ScalarRetTy = RetTy->getScalarType();
   // At the moment VFABI assumes the return type is always widened unless it is
@@ -208,6 +206,8 @@ static bool runImpl(const TargetLibraryInfo &TLI, Function &F) {
   for (auto &I : instructions(F)) {
     // Process only intrinsic calls that return void or a vector.
     if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
+      if (II->getIntrinsicID() == Intrinsic::not_intrinsic)
+        continue;
       if (!II->getType()->isVectorTy() && !II->getType()->isVoidTy())
         continue;
 

>From 0618ccdc50ac796aa98b7ba40d5f8a28b4fb6625 Mon Sep 17 00:00:00 2001
From: Tex Riddell <texr at microsoft.com>
Date: Sat, 14 Sep 2024 18:14:20 -0700
Subject: [PATCH 8/9] [X86][CodeGen] Add base atan2 intrinsic lowering (p4)

This change is part of this proposal: https://discourse.llvm.org/t/rfc-all-the-math-intrinsics/78294

Based on example PR #96222 and fix PR #101268, with some differences due to 2-arg intrinsic and intermediate refactor (RuntimeLibCalls.cpp).

- Add llvm.experimental.constrained.atan2 - Intrinsics.td, ConstrainedOps.def, LangRef.rst
- Add to ISDOpcodes.h and TargetSelectionDAG.td, connect to intrinsic in BasicTTIImpl.h, and LibFunc_ in SelectionDAGBuilder.cpp, and map generic op in SelectionDAGCompat.td
- Update LegalizeDAG.cpp, LegalizeFloatTypes.cpp, LegalizeVectorOps.cpp, and LegalizeVectorTypes.cpp
- Update isKnownNeverNaN in SelectionDAG.cpp
- Update SelectionDAGDumper.cpp
- Update libcalls - RuntimeLibcalls.def, RuntimeLibcalls.cpp, LegalizerHelper.cpp
- Update isKnownNeverNaN for generic opcode in GlobalISel/Utils.cpp
- TargetLoweringBase.cpp - Expand for vectors, promote f16
- X86ISelLowering.cpp - Expand f80, promote f32 to f64 for MSVC
---
 llvm/include/llvm/CodeGen/BasicTTIImpl.h      |   3 +
 llvm/include/llvm/CodeGen/ISDOpcodes.h        |   3 +
 llvm/include/llvm/IR/ConstrainedOps.def       |   1 +
 llvm/include/llvm/IR/Intrinsics.td            |   5 +
 llvm/include/llvm/IR/RuntimeLibcalls.def      |   5 +
 .../Target/GlobalISel/SelectionDAGCompat.td   |   1 +
 .../include/llvm/Target/TargetSelectionDAG.td |   6 +
 .../CodeGen/GlobalISel/LegalizerHelper.cpp    |   4 +
 llvm/lib/CodeGen/GlobalISel/Utils.cpp         |   2 +
 llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp |   7 +
 .../SelectionDAG/LegalizeFloatTypes.cpp       |  22 ++
 llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h |   2 +
 .../SelectionDAG/LegalizeVectorOps.cpp        |   1 +
 .../SelectionDAG/LegalizeVectorTypes.cpp      |   3 +
 .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp |   1 +
 .../SelectionDAG/SelectionDAGBuilder.cpp      |  12 +
 .../SelectionDAG/SelectionDAGDumper.cpp       |   2 +
 llvm/lib/CodeGen/TargetLoweringBase.cpp       |   7 +-
 llvm/lib/IR/RuntimeLibcalls.cpp               |   1 +
 llvm/lib/Target/X86/X86ISelLowering.cpp       |   2 +
 llvm/test/Assembler/fp-intrinsics-attr.ll     |   8 +
 llvm/test/CodeGen/X86/fp-intrinsics.ll        |  61 ++++-
 .../test/CodeGen/X86/fp128-libcalls-strict.ll |  51 ++++
 llvm/test/CodeGen/X86/fp80-strict-libcalls.ll |  36 +++
 llvm/test/CodeGen/X86/llvm.atan2.ll           |  80 ++++++
 .../X86/vector-constrained-fp-intrinsics.ll   | 258 ++++++++++++++++++
 llvm/test/Feature/fp-intrinsics.ll            |  15 +-
 27 files changed, 594 insertions(+), 5 deletions(-)
 create mode 100644 llvm/test/CodeGen/X86/llvm.atan2.ll

diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index c36a346c1b2e05..9d875e093d6424 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -1993,6 +1993,9 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
     case Intrinsic::atan:
       ISD = ISD::FATAN;
       break;
+    case Intrinsic::atan2:
+      ISD = ISD::FATAN2;
+      break;
     case Intrinsic::sinh:
       ISD = ISD::FSINH;
       break;
diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h
index da43f5be10ff3b..0b6d155b6d161e 100644
--- a/llvm/include/llvm/CodeGen/ISDOpcodes.h
+++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h
@@ -425,6 +425,7 @@ enum NodeType {
   STRICT_FASIN,
   STRICT_FACOS,
   STRICT_FATAN,
+  STRICT_FATAN2,
   STRICT_FSINH,
   STRICT_FCOSH,
   STRICT_FTANH,
@@ -994,6 +995,8 @@ enum NodeType {
   FPOWI,
   /// FLDEXP - ldexp, inspired by libm (op0 * 2**op1).
   FLDEXP,
+  /// FATAN2 - atan2, inspired by libm.
+  FATAN2,
 
   /// FFREXP - frexp, extract fractional and exponent component of a
   /// floating-point value. Returns the two components as separate return
diff --git a/llvm/include/llvm/IR/ConstrainedOps.def b/llvm/include/llvm/IR/ConstrainedOps.def
index 56304c377b8393..30a82bf633d575 100644
--- a/llvm/include/llvm/IR/ConstrainedOps.def
+++ b/llvm/include/llvm/IR/ConstrainedOps.def
@@ -72,6 +72,7 @@ CMP_INSTRUCTION(FCmp,         2, 0, experimental_constrained_fcmps,      FSETCCS
 DAG_FUNCTION(acos,            1, 1, experimental_constrained_acos,       FACOS)
 DAG_FUNCTION(asin,            1, 1, experimental_constrained_asin,       FASIN)
 DAG_FUNCTION(atan,            1, 1, experimental_constrained_atan,       FATAN)
+DAG_FUNCTION(atan2,           2, 1, experimental_constrained_atan2,      FATAN2)
 DAG_FUNCTION(ceil,            1, 0, experimental_constrained_ceil,       FCEIL)
 DAG_FUNCTION(cos,             1, 1, experimental_constrained_cos,        FCOS)
 DAG_FUNCTION(cosh,            1, 1, experimental_constrained_cosh,       FCOSH)
diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
index 079ac61adef6e0..d946da4e4622dd 100644
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -1226,6 +1226,11 @@ let IntrProperties = [IntrInaccessibleMemOnly, IntrWillReturn, IntrStrictFP] in
                                                     [ LLVMMatchType<0>,
                                                       llvm_metadata_ty,
                                                       llvm_metadata_ty ]>;
+  def int_experimental_constrained_atan2 : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ],
+                                                    [ LLVMMatchType<0>,
+                                                      LLVMMatchType<0>,
+                                                      llvm_metadata_ty,
+                                                      llvm_metadata_ty ]>;
   def int_experimental_constrained_sin  : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ],
                                                     [ LLVMMatchType<0>,
                                                       llvm_metadata_ty,
diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.def b/llvm/include/llvm/IR/RuntimeLibcalls.def
index 69cf43140ad4bd..4aab658a86690c 100644
--- a/llvm/include/llvm/IR/RuntimeLibcalls.def
+++ b/llvm/include/llvm/IR/RuntimeLibcalls.def
@@ -232,6 +232,11 @@ HANDLE_LIBCALL(ATAN_F64, "atan")
 HANDLE_LIBCALL(ATAN_F80, "atanl")
 HANDLE_LIBCALL(ATAN_F128,"atanl")
 HANDLE_LIBCALL(ATAN_PPCF128, "atanl")
+HANDLE_LIBCALL(ATAN2_F32, "atan2f")
+HANDLE_LIBCALL(ATAN2_F64, "atan2")
+HANDLE_LIBCALL(ATAN2_F80, "atan2l")
+HANDLE_LIBCALL(ATAN2_F128,"atan2l")
+HANDLE_LIBCALL(ATAN2_PPCF128, "atan2l")
 HANDLE_LIBCALL(SINCOS_F32, nullptr)
 HANDLE_LIBCALL(SINCOS_F64, nullptr)
 HANDLE_LIBCALL(SINCOS_F80, nullptr)
diff --git a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
index d9121cf166e5aa..83bf3c335cac89 100644
--- a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
+++ b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
@@ -154,6 +154,7 @@ def : GINodeEquiv<G_FTAN, ftan>;
 def : GINodeEquiv<G_FACOS, facos>;
 def : GINodeEquiv<G_FASIN, fasin>;
 def : GINodeEquiv<G_FATAN, fatan>;
+def : GINodeEquiv<G_FATAN2, fatan2>;
 def : GINodeEquiv<G_FCOSH, fcosh>;
 def : GINodeEquiv<G_FSINH, fsinh>;
 def : GINodeEquiv<G_FTANH, ftanh>;
diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td
index adf8a75f620225..fa516fc9b10175 100644
--- a/llvm/include/llvm/Target/TargetSelectionDAG.td
+++ b/llvm/include/llvm/Target/TargetSelectionDAG.td
@@ -534,6 +534,7 @@ def ftan       : SDNode<"ISD::FTAN"       , SDTFPUnaryOp>;
 def fasin      : SDNode<"ISD::FASIN"      , SDTFPUnaryOp>;
 def facos      : SDNode<"ISD::FACOS"      , SDTFPUnaryOp>;
 def fatan      : SDNode<"ISD::FATAN"      , SDTFPUnaryOp>;
+def fatan2     : SDNode<"ISD::FATAN2"     , SDTFPBinOp>;
 def fsinh      : SDNode<"ISD::FSINH"      , SDTFPUnaryOp>;
 def fcosh      : SDNode<"ISD::FCOSH"      , SDTFPUnaryOp>;
 def ftanh      : SDNode<"ISD::FTANH"      , SDTFPUnaryOp>;
@@ -602,6 +603,8 @@ def strict_facos      : SDNode<"ISD::STRICT_FACOS",
                                SDTFPUnaryOp, [SDNPHasChain]>;
 def strict_fatan      : SDNode<"ISD::STRICT_FATAN",
                                SDTFPUnaryOp, [SDNPHasChain]>;
+def strict_fatan2     : SDNode<"ISD::STRICT_FATAN2",
+                               SDTFPBinOp, [SDNPHasChain]>;
 def strict_fsinh      : SDNode<"ISD::STRICT_FSINH",
                                SDTFPUnaryOp, [SDNPHasChain]>;
 def strict_fcosh      : SDNode<"ISD::STRICT_FCOSH",
@@ -1588,6 +1591,9 @@ def any_facos      : PatFrags<(ops node:$src),
 def any_fatan      : PatFrags<(ops node:$src),
                               [(strict_fatan node:$src),
                                (fatan node:$src)]>;
+def any_fatan2      : PatFrags<(ops node:$src1, node:$src2),
+                              [(strict_fatan2 node:$src1, node:$src2),
+                               (fatan2 node:$src1, node:$src2)]>;
 def any_fsinh      : PatFrags<(ops node:$src),
                               [(strict_fsinh node:$src),
                                (fsinh node:$src)]>;
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 3b2fd95076c465..d5222641342ac1 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -457,6 +457,8 @@ static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
     RTLIBCASE(ACOS_F);
   case TargetOpcode::G_FATAN:
     RTLIBCASE(ATAN_F);
+  case TargetOpcode::G_FATAN2:
+    RTLIBCASE(ATAN2_F);
   case TargetOpcode::G_FSINH:
     RTLIBCASE(SINH_F);
   case TargetOpcode::G_FCOSH:
@@ -1202,6 +1204,7 @@ LegalizerHelper::libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver) {
   case TargetOpcode::G_FACOS:
   case TargetOpcode::G_FASIN:
   case TargetOpcode::G_FATAN:
+  case TargetOpcode::G_FATAN2:
   case TargetOpcode::G_FCOSH:
   case TargetOpcode::G_FSINH:
   case TargetOpcode::G_FTANH:
@@ -3122,6 +3125,7 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
   case TargetOpcode::G_FACOS:
   case TargetOpcode::G_FASIN:
   case TargetOpcode::G_FATAN:
+  case TargetOpcode::G_FATAN2:
   case TargetOpcode::G_FCOSH:
   case TargetOpcode::G_FSINH:
   case TargetOpcode::G_FTANH:
diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index 9574464207d99f..722ceea29c951c 100644
--- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -828,6 +828,7 @@ bool llvm::isKnownNeverNaN(Register Val, const MachineRegisterInfo &MRI,
   case TargetOpcode::G_FACOS:
   case TargetOpcode::G_FASIN:
   case TargetOpcode::G_FATAN:
+  case TargetOpcode::G_FATAN2:
   case TargetOpcode::G_FCOSH:
   case TargetOpcode::G_FSINH:
   case TargetOpcode::G_FTANH:
@@ -1715,6 +1716,7 @@ bool llvm::isPreISelGenericFloatingPointOpcode(unsigned Opc) {
   case TargetOpcode::G_FACOS:
   case TargetOpcode::G_FASIN:
   case TargetOpcode::G_FATAN:
+  case TargetOpcode::G_FATAN2:
   case TargetOpcode::G_FCOSH:
   case TargetOpcode::G_FSINH:
   case TargetOpcode::G_FTANH:
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 04eb891f719d28..88106112d433ca 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -4599,6 +4599,11 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
     ExpandFPLibCall(Node, RTLIB::ATAN_F32, RTLIB::ATAN_F64, RTLIB::ATAN_F80,
                     RTLIB::ATAN_F128, RTLIB::ATAN_PPCF128, Results);
     break;
+  case ISD::FATAN2:
+  case ISD::STRICT_FATAN2:
+    ExpandFPLibCall(Node, RTLIB::ATAN2_F32, RTLIB::ATAN2_F64, RTLIB::ATAN2_F80,
+                    RTLIB::ATAN2_F128, RTLIB::ATAN2_PPCF128, Results);
+    break;
   case ISD::FSINH:
   case ISD::STRICT_FSINH:
     ExpandFPLibCall(Node, RTLIB::SINH_F32, RTLIB::SINH_F64, RTLIB::SINH_F80,
@@ -5485,6 +5490,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
   case ISD::FMINIMUMNUM:
   case ISD::FMAXIMUMNUM:
   case ISD::FPOW:
+  case ISD::FATAN2:
     Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0));
     Tmp2 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(1));
     Tmp3 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2,
@@ -5501,6 +5507,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
   case ISD::STRICT_FMAXNUM:
   case ISD::STRICT_FREM:
   case ISD::STRICT_FPOW:
+  case ISD::STRICT_FATAN2:
     Tmp1 = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {NVT, MVT::Other},
                        {Node->getOperand(0), Node->getOperand(1)});
     Tmp2 = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {NVT, MVT::Other},
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 2c81c829e75cbb..73c258f0f6f18c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -84,6 +84,8 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
     case ISD::FASIN:       R = SoftenFloatRes_FASIN(N); break;
     case ISD::STRICT_FATAN:
     case ISD::FATAN:       R = SoftenFloatRes_FATAN(N); break;
+    case ISD::STRICT_FATAN2:
+    case ISD::FATAN2:      R = SoftenFloatRes_FATAN2(N); break;
     case ISD::FCBRT:       R = SoftenFloatRes_FCBRT(N); break;
     case ISD::STRICT_FCEIL:
     case ISD::FCEIL:       R = SoftenFloatRes_FCEIL(N); break;
@@ -366,6 +368,13 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FATAN(SDNode *N) {
                       RTLIB::ATAN_F80, RTLIB::ATAN_F128, RTLIB::ATAN_PPCF128));
 }
 
+SDValue DAGTypeLegalizer::SoftenFloatRes_FATAN2(SDNode *N) {
+  return SoftenFloatRes_Binary(
+      N,
+      GetFPLibCall(N->getValueType(0), RTLIB::ATAN2_F32, RTLIB::ATAN2_F64,
+                   RTLIB::ATAN2_F80, RTLIB::ATAN2_F128, RTLIB::ATAN2_PPCF128));
+}
+
 SDValue DAGTypeLegalizer::SoftenFloatRes_FCBRT(SDNode *N) {
   return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
                                            RTLIB::CBRT_F32,
@@ -1430,6 +1439,8 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) {
   case ISD::FASIN:      ExpandFloatRes_FASIN(N, Lo, Hi); break;
   case ISD::STRICT_FATAN:
   case ISD::FATAN:      ExpandFloatRes_FATAN(N, Lo, Hi); break;
+  case ISD::STRICT_FATAN2:
+  case ISD::FATAN2:     ExpandFloatRes_FATAN2(N, Lo, Hi); break;
   case ISD::FCBRT:      ExpandFloatRes_FCBRT(N, Lo, Hi); break;
   case ISD::STRICT_FCEIL:
   case ISD::FCEIL:      ExpandFloatRes_FCEIL(N, Lo, Hi); break;
@@ -1631,6 +1642,15 @@ void DAGTypeLegalizer::ExpandFloatRes_FATAN(SDNode *N, SDValue &Lo,
                        Lo, Hi);
 }
 
+void DAGTypeLegalizer::ExpandFloatRes_FATAN2(SDNode *N, SDValue &Lo,
+                                             SDValue &Hi) {
+  ExpandFloatRes_Binary(N,
+                        GetFPLibCall(N->getValueType(0), RTLIB::ATAN2_F32,
+                                     RTLIB::ATAN2_F64, RTLIB::ATAN2_F80,
+                                     RTLIB::ATAN2_F128, RTLIB::ATAN2_PPCF128),
+                        Lo, Hi);
+}
+
 void DAGTypeLegalizer::ExpandFloatRes_FCBRT(SDNode *N, SDValue &Lo,
                                             SDValue &Hi) {
   ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), RTLIB::CBRT_F32,
@@ -2673,6 +2693,7 @@ void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) {
     case ISD::FMINNUM_IEEE:
     case ISD::FMUL:
     case ISD::FPOW:
+    case ISD::FATAN2:
     case ISD::FREM:
     case ISD::FSUB:       R = PromoteFloatRes_BinOp(N); break;
 
@@ -3115,6 +3136,7 @@ void DAGTypeLegalizer::SoftPromoteHalfResult(SDNode *N, unsigned ResNo) {
   case ISD::FMINNUM:
   case ISD::FMUL:
   case ISD::FPOW:
+  case ISD::FATAN2:
   case ISD::FREM:
   case ISD::FSUB:        R = SoftPromoteHalfRes_BinOp(N); break;
 
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index d14516ef3e2fbb..868da25ca8cb47 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -567,6 +567,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
   SDValue SoftenFloatRes_FACOS(SDNode *N);
   SDValue SoftenFloatRes_FASIN(SDNode *N);
   SDValue SoftenFloatRes_FATAN(SDNode *N);
+  SDValue SoftenFloatRes_FATAN2(SDNode *N);
   SDValue SoftenFloatRes_FMINNUM(SDNode *N);
   SDValue SoftenFloatRes_FMAXNUM(SDNode *N);
   SDValue SoftenFloatRes_FMINIMUMNUM(SDNode *N);
@@ -661,6 +662,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
   void ExpandFloatRes_FACOS     (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandFloatRes_FASIN     (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandFloatRes_FATAN     (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandFloatRes_FATAN2    (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandFloatRes_FMINNUM   (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandFloatRes_FMAXNUM   (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandFloatRes_FMINIMUMNUM(SDNode *N, SDValue &Lo, SDValue &Hi);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 0adf3cfb34c949..00b0c63378241f 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -410,6 +410,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
   case ISD::FASIN:
   case ISD::FACOS:
   case ISD::FATAN:
+  case ISD::FATAN2:
   case ISD::FSINH:
   case ISD::FCOSH:
   case ISD::FTANH:
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 0a22f06271984e..2e70e22af3a62e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -164,6 +164,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
   case ISD::USHLSAT:
 
   case ISD::FPOW:
+  case ISD::FATAN2:
   case ISD::FREM:
   case ISD::FSUB:
   case ISD::MUL:
@@ -1291,6 +1292,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
   case ISD::UDIV: case ISD::VP_UDIV:
   case ISD::FDIV: case ISD::VP_FDIV:
   case ISD::FPOW:
+  case ISD::FATAN2:
   case ISD::AND: case ISD::VP_AND:
   case ISD::OR: case ISD::VP_OR:
   case ISD::XOR: case ISD::VP_XOR:
@@ -4579,6 +4581,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
     break;
 
   case ISD::FPOW:
+  case ISD::FATAN2:
   case ISD::FREM:
     if (unrollExpandedOp())
       break;
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 64414e2d44e65a..ddce4d0caa0297 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -5460,6 +5460,7 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op, bool SNaN, unsigned Depth) const
   case ISD::FASIN:
   case ISD::FACOS:
   case ISD::FATAN:
+  case ISD::FATAN2:
   case ISD::FSINH:
   case ISD::FCOSH:
   case ISD::FTANH:
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 25213f587116d5..91ce9ced5e1a18 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -6850,6 +6850,12 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
                              getValue(I.getArgOperand(0)), Flags));
     return;
   }
+  case Intrinsic::atan2:
+    setValue(&I, DAG.getNode(ISD::FATAN2, sdl,
+                             getValue(I.getArgOperand(0)).getValueType(),
+                             getValue(I.getArgOperand(0)),
+                             getValue(I.getArgOperand(1)), Flags));
+    return;
   case Intrinsic::lround:
   case Intrinsic::llround:
   case Intrinsic::lrint:
@@ -9342,6 +9348,12 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
         if (visitUnaryFloatCall(I, ISD::FATAN))
           return;
         break;
+      case LibFunc_atan2:
+      case LibFunc_atan2f:
+      case LibFunc_atan2l:
+        if (visitBinaryFloatCall(I, ISD::FATAN2))
+          return;
+        break;
       case LibFunc_sinh:
       case LibFunc_sinhf:
       case LibFunc_sinhl:
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index 56fc538172f9fc..703efb70089742 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -227,6 +227,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
   case ISD::STRICT_FACOS:               return "strict_facos";
   case ISD::FATAN:                      return "fatan";
   case ISD::STRICT_FATAN:               return "strict_fatan";
+  case ISD::FATAN2:                     return "fatan2";
+  case ISD::STRICT_FATAN2:              return "strict_fatan2";
   case ISD::FSINH:                      return "fsinh";
   case ISD::STRICT_FSINH:               return "strict_fsinh";
   case ISD::FCOSH:                      return "fcosh";
diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp
index 1f49d60c970593..7a28f7892cbf31 100644
--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -783,7 +783,7 @@ void TargetLoweringBase::initActions() {
            ISD::SIGN_EXTEND_VECTOR_INREG, ISD::ZERO_EXTEND_VECTOR_INREG,
            ISD::SPLAT_VECTOR, ISD::LRINT, ISD::LLRINT, ISD::LROUND,
            ISD::LLROUND, ISD::FTAN, ISD::FACOS, ISD::FASIN, ISD::FATAN,
-           ISD::FCOSH, ISD::FSINH, ISD::FTANH},
+           ISD::FCOSH, ISD::FSINH, ISD::FTANH, ISD::FATAN2},
           VT, Expand);
 
       // Constrained floating-point operations default to expand.
@@ -842,7 +842,8 @@ void TargetLoweringBase::initActions() {
                       ISD::FEXP,       ISD::FEXP2, ISD::FEXP10, ISD::FFLOOR,
                       ISD::FNEARBYINT, ISD::FCEIL, ISD::FRINT,  ISD::FTRUNC,
                       ISD::FROUNDEVEN, ISD::FTAN,  ISD::FACOS,  ISD::FASIN,
-                      ISD::FATAN,      ISD::FCOSH, ISD::FSINH,  ISD::FTANH},
+                      ISD::FATAN,      ISD::FCOSH, ISD::FSINH,  ISD::FTANH,
+                      ISD::FATAN2},
                      {MVT::f32, MVT::f64, MVT::f128}, Expand);
 
   // FIXME: Query RuntimeLibCalls to make the decision.
@@ -850,7 +851,7 @@ void TargetLoweringBase::initActions() {
                      {MVT::f32, MVT::f64, MVT::f128}, LibCall);
 
   setOperationAction({ISD::FTAN, ISD::FACOS, ISD::FASIN, ISD::FATAN, ISD::FCOSH,
-                      ISD::FSINH, ISD::FTANH},
+                      ISD::FSINH, ISD::FTANH, ISD::FATAN2},
                      MVT::f16, Promote);
   // Default ISD::TRAP to expand (which turns it into abort).
   setOperationAction(ISD::TRAP, MVT::Other, Expand);
diff --git a/llvm/lib/IR/RuntimeLibcalls.cpp b/llvm/lib/IR/RuntimeLibcalls.cpp
index d806f8093459ee..06167559a77697 100644
--- a/llvm/lib/IR/RuntimeLibcalls.cpp
+++ b/llvm/lib/IR/RuntimeLibcalls.cpp
@@ -49,6 +49,7 @@ void RuntimeLibcallsInfo::initLibcalls(const Triple &TT) {
     setLibcallName(RTLIB::ASIN_F128, "asinf128");
     setLibcallName(RTLIB::ACOS_F128, "acosf128");
     setLibcallName(RTLIB::ATAN_F128, "atanf128");
+    setLibcallName(RTLIB::ATAN2_F128, "atan2f128");
     setLibcallName(RTLIB::SINH_F128, "sinhf128");
     setLibcallName(RTLIB::COSH_F128, "coshf128");
     setLibcallName(RTLIB::TANH_F128, "tanhf128");
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 73f7f52846f625..1b453d49105519 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -858,6 +858,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     setOperationAction(ISD::FASIN  , MVT::f80, Expand);
     setOperationAction(ISD::FACOS  , MVT::f80, Expand);
     setOperationAction(ISD::FATAN  , MVT::f80, Expand);
+    setOperationAction(ISD::FATAN2 , MVT::f80, Expand);
     setOperationAction(ISD::FSINH  , MVT::f80, Expand);
     setOperationAction(ISD::FCOSH  , MVT::f80, Expand);
     setOperationAction(ISD::FTANH  , MVT::f80, Expand);
@@ -2562,6 +2563,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
          {ISD::FACOS,  ISD::STRICT_FACOS,
           ISD::FASIN,  ISD::STRICT_FASIN,
           ISD::FATAN,  ISD::STRICT_FATAN,
+          ISD::FATAN2, ISD::STRICT_FATAN2,
           ISD::FCEIL,  ISD::STRICT_FCEIL,
           ISD::FCOS,   ISD::STRICT_FCOS,
           ISD::FCOSH,  ISD::STRICT_FCOSH,
diff --git a/llvm/test/Assembler/fp-intrinsics-attr.ll b/llvm/test/Assembler/fp-intrinsics-attr.ll
index da6507f051766c..5b9a44710763e4 100644
--- a/llvm/test/Assembler/fp-intrinsics-attr.ll
+++ b/llvm/test/Assembler/fp-intrinsics-attr.ll
@@ -105,6 +105,11 @@ define void @func(double %a, double %b, double %c, i32 %i) strictfp {
                                                metadata !"round.dynamic",
                                                metadata !"fpexcept.strict")
 
+  %atan2 = call double @llvm.experimental.constrained.atan2.f64(
+                                               double %a, double %b,
+                                               metadata !"round.dynamic",
+                                               metadata !"fpexcept.strict")
+
   %cosh = call double @llvm.experimental.constrained.cosh.f64(
                                                double %a,
                                                metadata !"round.dynamic",
@@ -291,6 +296,9 @@ declare double @llvm.experimental.constrained.acos.f64(double, metadata, metadat
 declare double @llvm.experimental.constrained.atan.f64(double, metadata, metadata)
 ; CHECK: @llvm.experimental.constrained.atan.f64({{.*}}) #[[ATTR1]]
 
+declare double @llvm.experimental.constrained.atan2.f64(double, double, metadata, metadata)
+; CHECK: @llvm.experimental.constrained.atan2.f64({{.*}}) #[[ATTR1]]
+
 declare double @llvm.experimental.constrained.sinh.f64(double, metadata, metadata)
 ; CHECK: @llvm.experimental.constrained.sinh.f64({{.*}}) #[[ATTR1]]
 
diff --git a/llvm/test/CodeGen/X86/fp-intrinsics.ll b/llvm/test/CodeGen/X86/fp-intrinsics.ll
index bb87252e0b9b08..3a8facff83fd55 100644
--- a/llvm/test/CodeGen/X86/fp-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/fp-intrinsics.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --function fatan2 --version 2
 ; RUN: llc -O3 -mtriple=i686-pc-linux -mattr=+cmov < %s | FileCheck %s --check-prefix=X87
 ; RUN: llc -O3 -mtriple=i686-pc-linux -mattr=sse2 < %s | FileCheck %s --check-prefix=X86-SSE
 ; RUN: llc -O3 -mtriple=x86_64-pc-linux < %s | FileCheck %s --check-prefix=SSE
@@ -2962,6 +2962,64 @@ entry:
   ret double %result
 }
 
+; Verify that atan2(42.1, 3.0) isn't simplified when the rounding mode is unknown.
+define double @fatan2() #0 {
+; X87-LABEL: fatan2:
+; X87:       # %bb.0: # %entry
+; X87-NEXT:    subl $28, %esp
+; X87-NEXT:    .cfi_def_cfa_offset 32
+; X87-NEXT:    flds {{\.?LCPI[0-9]+_[0-9]+}}
+; X87-NEXT:    fstpl {{[0-9]+}}(%esp)
+; X87-NEXT:    fldl {{\.?LCPI[0-9]+_[0-9]+}}
+; X87-NEXT:    fstpl (%esp)
+; X87-NEXT:    wait
+; X87-NEXT:    calll atan2
+; X87-NEXT:    addl $28, %esp
+; X87-NEXT:    .cfi_def_cfa_offset 4
+; X87-NEXT:    retl
+;
+; X86-SSE-LABEL: fatan2:
+; X86-SSE:       # %bb.0: # %entry
+; X86-SSE-NEXT:    subl $28, %esp
+; X86-SSE-NEXT:    .cfi_def_cfa_offset 32
+; X86-SSE-NEXT:    movsd {{.*#+}} xmm0 = [3.0E+0,0.0E+0]
+; X86-SSE-NEXT:    movsd %xmm0, {{[0-9]+}}(%esp)
+; X86-SSE-NEXT:    movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
+; X86-SSE-NEXT:    movsd %xmm0, (%esp)
+; X86-SSE-NEXT:    calll atan2
+; X86-SSE-NEXT:    addl $28, %esp
+; X86-SSE-NEXT:    .cfi_def_cfa_offset 4
+; X86-SSE-NEXT:    retl
+;
+; SSE-LABEL: fatan2:
+; SSE:       # %bb.0: # %entry
+; SSE-NEXT:    pushq %rax
+; SSE-NEXT:    .cfi_def_cfa_offset 16
+; SSE-NEXT:    movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
+; SSE-NEXT:    movsd {{.*#+}} xmm1 = [3.0E+0,0.0E+0]
+; SSE-NEXT:    callq atan2 at PLT
+; SSE-NEXT:    popq %rax
+; SSE-NEXT:    .cfi_def_cfa_offset 8
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: fatan2:
+; AVX:       # %bb.0: # %entry
+; AVX-NEXT:    pushq %rax
+; AVX-NEXT:    .cfi_def_cfa_offset 16
+; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
+; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = [3.0E+0,0.0E+0]
+; AVX-NEXT:    callq atan2 at PLT
+; AVX-NEXT:    popq %rax
+; AVX-NEXT:    .cfi_def_cfa_offset 8
+; AVX-NEXT:    retq
+entry:
+  %result = call double @llvm.experimental.constrained.atan2.f64(double 42.1,
+                                               double 3.0,
+                                               metadata !"round.dynamic",
+                                               metadata !"fpexcept.strict") #0
+  ret double %result
+}
+
 ; Verify that cosh(42.0) isn't simplified when the rounding mode is unknown.
 define double @fcosh() #0 {
 ; X87-LABEL: fcosh:
@@ -3132,6 +3190,7 @@ declare double @llvm.experimental.constrained.tan.f64(double, metadata, metadata
 declare double @llvm.experimental.constrained.asin.f64(double, metadata, metadata)
 declare double @llvm.experimental.constrained.acos.f64(double, metadata, metadata)
 declare double @llvm.experimental.constrained.atan.f64(double, metadata, metadata)
+declare double @llvm.experimental.constrained.atan2.f64(double, double, metadata, metadata)
 declare double @llvm.experimental.constrained.sinh.f64(double, metadata, metadata)
 declare double @llvm.experimental.constrained.cosh.f64(double, metadata, metadata)
 declare double @llvm.experimental.constrained.tanh.f64(double, metadata, metadata)
diff --git a/llvm/test/CodeGen/X86/fp128-libcalls-strict.ll b/llvm/test/CodeGen/X86/fp128-libcalls-strict.ll
index 9e84dfa5c41ae6..ffaa9f6297ed8c 100644
--- a/llvm/test/CodeGen/X86/fp128-libcalls-strict.ll
+++ b/llvm/test/CodeGen/X86/fp128-libcalls-strict.ll
@@ -1247,6 +1247,50 @@ entry:
   ret fp128 %atan
 }
 
+define fp128 @atan2(fp128 %x, fp128 %y) nounwind strictfp {
+; ANDROID-LABEL: atan2:
+; ANDROID:       # %bb.0: # %entry
+; ANDROID-NEXT:    pushq %rax
+; ANDROID-NEXT:    callq atan2l at PLT
+; ANDROID-NEXT:    popq %rax
+; ANDROID-NEXT:    retq
+;
+; GNU-LABEL: atan2:
+; GNU:       # %bb.0: # %entry
+; GNU-NEXT:    pushq %rax
+; GNU-NEXT:    callq atan2f128 at PLT
+; GNU-NEXT:    popq %rax
+; GNU-NEXT:    retq
+;
+; X86-LABEL: atan2:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    subl $24, %esp
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    subl $12, %esp
+; X86-NEXT:    leal {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    pushl %eax
+; X86-NEXT:    calll atan2l
+; X86-NEXT:    addl $44, %esp
+; X86-NEXT:    movaps (%esp), %xmm0
+; X86-NEXT:    movaps %xmm0, (%esi)
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    addl $24, %esp
+; X86-NEXT:    popl %esi
+; X86-NEXT:    retl $4
+entry:
+  %atan2 = call fp128 @llvm.experimental.constrained.atan2.f128(fp128 %x, fp128 %y, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
+  ret fp128 %atan2
+}
+
 define fp128 @tan(fp128 %x) nounwind strictfp {
 ; ANDROID-LABEL: tan:
 ; ANDROID:       # %bb.0: # %entry
@@ -1926,7 +1970,9 @@ declare fp128 @llvm.experimental.constrained.fdiv.f128(fp128, fp128, metadata, m
 declare fp128 @llvm.experimental.constrained.fma.f128(fp128, fp128, fp128, metadata, metadata)
 declare fp128 @llvm.experimental.constrained.frem.f128(fp128, fp128, metadata, metadata)
 declare fp128 @llvm.experimental.constrained.ceil.f128(fp128, metadata)
+declare fp128 @llvm.experimental.constrained.acos.f128(fp128, metadata, metadata)
 declare fp128 @llvm.experimental.constrained.cos.f128(fp128, metadata, metadata)
+declare fp128 @llvm.experimental.constrained.cosh.f128(fp128, metadata, metadata)
 declare fp128 @llvm.experimental.constrained.exp.f128(fp128, metadata, metadata)
 declare fp128 @llvm.experimental.constrained.exp2.f128(fp128, metadata, metadata)
 declare fp128 @llvm.experimental.constrained.floor.f128(fp128, metadata)
@@ -1941,9 +1987,14 @@ declare fp128 @llvm.experimental.constrained.powi.f128(fp128, i32, metadata, met
 declare fp128 @llvm.experimental.constrained.rint.f128(fp128, metadata, metadata)
 declare fp128 @llvm.experimental.constrained.round.f128(fp128, metadata)
 declare fp128 @llvm.experimental.constrained.roundeven.f128(fp128, metadata)
+declare fp128 @llvm.experimental.constrained.asin.f128(fp128, metadata, metadata)
 declare fp128 @llvm.experimental.constrained.sin.f128(fp128, metadata, metadata)
+declare fp128 @llvm.experimental.constrained.sinh.f128(fp128, metadata, metadata)
 declare fp128 @llvm.experimental.constrained.sqrt.f128(fp128, metadata, metadata)
+declare fp128 @llvm.experimental.constrained.atan.f128(fp128, metadata, metadata)
+declare fp128 @llvm.experimental.constrained.atan2.f128(fp128, fp128, metadata, metadata)
 declare fp128 @llvm.experimental.constrained.tan.f128(fp128, metadata, metadata)
+declare fp128 @llvm.experimental.constrained.tanh.f128(fp128, metadata, metadata)
 declare fp128 @llvm.experimental.constrained.trunc.f128(fp128, metadata)
 declare i32 @llvm.experimental.constrained.lrint.i32.f128(fp128, metadata, metadata)
 declare i64 @llvm.experimental.constrained.llrint.i64.f128(fp128, metadata, metadata)
diff --git a/llvm/test/CodeGen/X86/fp80-strict-libcalls.ll b/llvm/test/CodeGen/X86/fp80-strict-libcalls.ll
index c14e99f3acb34e..8bbc6247dbafd6 100644
--- a/llvm/test/CodeGen/X86/fp80-strict-libcalls.ll
+++ b/llvm/test/CodeGen/X86/fp80-strict-libcalls.ll
@@ -629,6 +629,35 @@ entry:
   ret x86_fp80 %atan
 }
 
+define x86_fp80 @atan2(x86_fp80 %x, x86_fp80 %y) nounwind strictfp {
+; X86-LABEL: atan2:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    subl $24, %esp
+; X86-NEXT:    fldt {{[0-9]+}}(%esp)
+; X86-NEXT:    fldt {{[0-9]+}}(%esp)
+; X86-NEXT:    fstpt {{[0-9]+}}(%esp)
+; X86-NEXT:    fstpt (%esp)
+; X86-NEXT:    wait
+; X86-NEXT:    calll atan2l
+; X86-NEXT:    addl $24, %esp
+; X86-NEXT:    retl
+;
+; X64-LABEL: atan2:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    subq $40, %rsp
+; X64-NEXT:    fldt {{[0-9]+}}(%rsp)
+; X64-NEXT:    fldt {{[0-9]+}}(%rsp)
+; X64-NEXT:    fstpt {{[0-9]+}}(%rsp)
+; X64-NEXT:    fstpt (%rsp)
+; X64-NEXT:    wait
+; X64-NEXT:    callq atan2l at PLT
+; X64-NEXT:    addq $40, %rsp
+; X64-NEXT:    retq
+entry:
+  %atan2 = call x86_fp80 @llvm.experimental.constrained.atan2.f80(x86_fp80 %x, x86_fp80 %y, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
+  ret x86_fp80 %atan2
+}
+
 define x86_fp80 @tan(x86_fp80 %x) nounwind strictfp {
 ; X86-LABEL: tan:
 ; X86:       # %bb.0: # %entry
@@ -809,7 +838,9 @@ attributes #0 = { strictfp }
 declare x86_fp80 @llvm.experimental.constrained.fma.f80(x86_fp80, x86_fp80, x86_fp80, metadata, metadata)
 declare x86_fp80 @llvm.experimental.constrained.frem.f80(x86_fp80, x86_fp80, metadata, metadata)
 declare x86_fp80 @llvm.experimental.constrained.ceil.f80(x86_fp80, metadata)
+declare x86_fp80 @llvm.experimental.constrained.acos.f80(x86_fp80, metadata, metadata)
 declare x86_fp80 @llvm.experimental.constrained.cos.f80(x86_fp80, metadata, metadata)
+declare x86_fp80 @llvm.experimental.constrained.cosh.f80(x86_fp80, metadata, metadata)
 declare x86_fp80 @llvm.experimental.constrained.exp.f80(x86_fp80, metadata, metadata)
 declare x86_fp80 @llvm.experimental.constrained.exp2.f80(x86_fp80, metadata, metadata)
 declare x86_fp80 @llvm.experimental.constrained.floor.f80(x86_fp80, metadata)
@@ -824,8 +855,13 @@ declare x86_fp80 @llvm.experimental.constrained.powi.f80(x86_fp80, i32, metadata
 declare x86_fp80 @llvm.experimental.constrained.rint.f80(x86_fp80, metadata, metadata)
 declare x86_fp80 @llvm.experimental.constrained.round.f80(x86_fp80, metadata)
 declare x86_fp80 @llvm.experimental.constrained.roundeven.f80(x86_fp80, metadata)
+declare x86_fp80 @llvm.experimental.constrained.asin.f80(x86_fp80, metadata, metadata)
 declare x86_fp80 @llvm.experimental.constrained.sin.f80(x86_fp80, metadata, metadata)
+declare x86_fp80 @llvm.experimental.constrained.sinh.f80(x86_fp80, metadata, metadata)
+declare x86_fp80 @llvm.experimental.constrained.atan.f80(x86_fp80, metadata, metadata)
+declare x86_fp80 @llvm.experimental.constrained.atan2.f80(x86_fp80, x86_fp80, metadata, metadata)
 declare x86_fp80 @llvm.experimental.constrained.tan.f80(x86_fp80, metadata, metadata)
+declare x86_fp80 @llvm.experimental.constrained.tanh.f80(x86_fp80, metadata, metadata)
 declare x86_fp80 @llvm.experimental.constrained.trunc.f80(x86_fp80, metadata)
 declare i32 @llvm.experimental.constrained.lrint.i32.f80(x86_fp80, metadata, metadata)
 declare i64 @llvm.experimental.constrained.llrint.i64.f80(x86_fp80, metadata, metadata)
diff --git a/llvm/test/CodeGen/X86/llvm.atan2.ll b/llvm/test/CodeGen/X86/llvm.atan2.ll
new file mode 100644
index 00000000000000..ef2e4be36203be
--- /dev/null
+++ b/llvm/test/CodeGen/X86/llvm.atan2.ll
@@ -0,0 +1,80 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
+
+define half @use_atan2f16(half %a, half %b) nounwind {
+; CHECK-LABEL: use_atan2f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    pushq %rax
+; CHECK-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT:    movaps %xmm1, %xmm0
+; CHECK-NEXT:    callq __extendhfsf2 at PLT
+; CHECK-NEXT:    movss %xmm0, (%rsp) # 4-byte Spill
+; CHECK-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; CHECK-NEXT:    # xmm0 = mem[0],zero,zero,zero
+; CHECK-NEXT:    callq __extendhfsf2 at PLT
+; CHECK-NEXT:    movss (%rsp), %xmm1 # 4-byte Reload
+; CHECK-NEXT:    # xmm1 = mem[0],zero,zero,zero
+; CHECK-NEXT:    callq atan2f at PLT
+; CHECK-NEXT:    callq __truncsfhf2 at PLT
+; CHECK-NEXT:    popq %rax
+; CHECK-NEXT:    retq
+  %x = call half @llvm.atan2.f16(half %a, half %b)
+  ret half %x
+}
+
+define float @use_atan2f32(float %a, float %b) nounwind {
+; CHECK-LABEL: use_atan2f32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    jmp atan2f at PLT # TAILCALL
+  %x = call float @llvm.atan2.f32(float %a, float %b)
+  ret float %x
+}
+
+define double @use_atan2f64(double %a, double %b) nounwind {
+; CHECK-LABEL: use_atan2f64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    jmp atan2 at PLT # TAILCALL
+  %x = call double @llvm.atan2.f64(double %a, double %b)
+  ret double %x
+}
+
+define x86_fp80 @use_atan2f80(x86_fp80 %a, x86_fp80 %b) nounwind {
+; CHECK-LABEL: use_atan2f80:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    subq $40, %rsp
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fstpt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fstpt (%rsp)
+; CHECK-NEXT:    callq atan2l at PLT
+; CHECK-NEXT:    addq $40, %rsp
+; CHECK-NEXT:    retq
+  %x = call x86_fp80 @llvm.atan2.f80(x86_fp80 %a, x86_fp80 %b)
+  ret x86_fp80 %x
+}
+
+define fp128 @use_atan2fp128(fp128 %a, fp128 %b) nounwind {
+; CHECK-LABEL: use_atan2fp128:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    jmp atan2f128 at PLT # TAILCALL
+  %x = call fp128 @llvm.atan2.f128(fp128 %a, fp128 %b)
+  ret fp128 %x
+}
+
+define ppc_fp128 @use_atan2ppc_fp128(ppc_fp128 %a, ppc_fp128 %b) nounwind {
+; CHECK-LABEL: use_atan2ppc_fp128:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    pushq %rax
+; CHECK-NEXT:    callq atan2l at PLT
+; CHECK-NEXT:    popq %rax
+; CHECK-NEXT:    retq
+  %x = call ppc_fp128 @llvm.atan2.ppcf128(ppc_fp128 %a, ppc_fp128 %b)
+  ret ppc_fp128 %x
+}
+
+declare half @llvm.atan2.f16(half, half)
+declare float @llvm.atan2.f32(float, float)
+declare double @llvm.atan2.f64(double, double)
+declare x86_fp80 @llvm.atan2.f80(x86_fp80, x86_fp80)
+declare fp128 @llvm.atan2.f128(fp128, fp128)
+declare ppc_fp128 @llvm.atan2.ppcf128(ppc_fp128, ppc_fp128)
diff --git a/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll
index b486014678466e..21dfdc3c2abe49 100644
--- a/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll
@@ -8672,6 +8672,263 @@ entry:
   ret <4 x double> %atan
 }
 
+define <1 x float> @constrained_vector_atan2_v1f32() #0 {
+; CHECK-LABEL: constrained_vector_atan2_v1f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    pushq %rax
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT:    movss {{.*#+}} xmm1 = [2.3E+1,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT:    callq atan2f at PLT
+; CHECK-NEXT:    popq %rax
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-NEXT:    retq
+;
+; AVX-LABEL: constrained_vector_atan2_v1f32:
+; AVX:       # %bb.0: # %entry
+; AVX-NEXT:    pushq %rax
+; AVX-NEXT:    .cfi_def_cfa_offset 16
+; AVX-NEXT:    vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
+; AVX-NEXT:    vmovss {{.*#+}} xmm1 = [2.3E+1,0.0E+0,0.0E+0,0.0E+0]
+; AVX-NEXT:    callq atan2f at PLT
+; AVX-NEXT:    popq %rax
+; AVX-NEXT:    .cfi_def_cfa_offset 8
+; AVX-NEXT:    retq
+entry:
+  %atan2 = call <1 x float> @llvm.experimental.constrained.atan2.v1f32(
+                             <1 x float> <float 42.0>,
+                             <1 x float> <float 23.0>,
+                             metadata !"round.dynamic",
+                             metadata !"fpexcept.strict") #0
+  ret <1 x float> %atan2
+}
+
+define <2 x double> @constrained_vector_atan2_v2f64() #0 {
+; CHECK-LABEL: constrained_vector_atan2_v2f64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    subq $24, %rsp
+; CHECK-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
+; CHECK-NEXT:    movsd {{.*#+}} xmm1 = [2.3100000000000001E+1,0.0E+0]
+; CHECK-NEXT:    callq atan2 at PLT
+; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
+; CHECK-NEXT:    movsd {{.*#+}} xmm1 = [2.3E+1,0.0E+0]
+; CHECK-NEXT:    callq atan2 at PLT
+; CHECK-NEXT:    unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0]
+; CHECK-NEXT:    addq $24, %rsp
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-NEXT:    retq
+;
+; AVX-LABEL: constrained_vector_atan2_v2f64:
+; AVX:       # %bb.0: # %entry
+; AVX-NEXT:    subq $24, %rsp
+; AVX-NEXT:    .cfi_def_cfa_offset 32
+; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
+; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = [2.3100000000000001E+1,0.0E+0]
+; AVX-NEXT:    callq atan2 at PLT
+; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
+; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
+; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = [2.3E+1,0.0E+0]
+; AVX-NEXT:    callq atan2 at PLT
+; AVX-NEXT:    vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
+; AVX-NEXT:    addq $24, %rsp
+; AVX-NEXT:    .cfi_def_cfa_offset 8
+; AVX-NEXT:    retq
+entry:
+  %atan2 = call <2 x double> @llvm.experimental.constrained.atan2.v2f64(
+                             <2 x double> <double 42.0, double 42.1>,
+                             <2 x double> <double 23.0, double 23.1>,
+                             metadata !"round.dynamic",
+                             metadata !"fpexcept.strict") #0
+  ret <2 x double> %atan2
+}
+
+define <3 x float> @constrained_vector_atan2_v3f32() #0 {
+; CHECK-LABEL: constrained_vector_atan2_v3f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    subq $40, %rsp
+; CHECK-NEXT:    .cfi_def_cfa_offset 48
+; CHECK-NEXT:    movss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT:    movss {{.*#+}} xmm1 = [2.5E+1,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT:    callq atan2f at PLT
+; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT:    movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT:    movss {{.*#+}} xmm1 = [2.3E+1,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT:    callq atan2f at PLT
+; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT:    movss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT:    movss {{.*#+}} xmm1 = [2.4E+1,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT:    callq atan2f at PLT
+; CHECK-NEXT:    movaps (%rsp), %xmm1 # 16-byte Reload
+; CHECK-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; CHECK-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
+; CHECK-NEXT:    # xmm1 = xmm1[0],mem[0]
+; CHECK-NEXT:    movaps %xmm1, %xmm0
+; CHECK-NEXT:    addq $40, %rsp
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-NEXT:    retq
+;
+; AVX-LABEL: constrained_vector_atan2_v3f32:
+; AVX:       # %bb.0: # %entry
+; AVX-NEXT:    subq $40, %rsp
+; AVX-NEXT:    .cfi_def_cfa_offset 48
+; AVX-NEXT:    vmovss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0]
+; AVX-NEXT:    vmovss {{.*#+}} xmm1 = [2.5E+1,0.0E+0,0.0E+0,0.0E+0]
+; AVX-NEXT:    callq atan2f at PLT
+; AVX-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX-NEXT:    vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0]
+; AVX-NEXT:    vmovss {{.*#+}} xmm1 = [2.3E+1,0.0E+0,0.0E+0,0.0E+0]
+; AVX-NEXT:    callq atan2f at PLT
+; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
+; AVX-NEXT:    vmovss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0]
+; AVX-NEXT:    vmovss {{.*#+}} xmm1 = [2.4E+1,0.0E+0,0.0E+0,0.0E+0]
+; AVX-NEXT:    callq atan2f at PLT
+; AVX-NEXT:    vmovaps (%rsp), %xmm1 # 16-byte Reload
+; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
+; AVX-NEXT:    vinsertps $32, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX-NEXT:    # xmm0 = xmm0[0,1],mem[0],xmm0[3]
+; AVX-NEXT:    addq $40, %rsp
+; AVX-NEXT:    .cfi_def_cfa_offset 8
+; AVX-NEXT:    retq
+entry:
+  %atan2 = call <3 x float> @llvm.experimental.constrained.atan2.v3f32(
+                              <3 x float> <float 42.0, float 43.0, float 44.0>,
+                              <3 x float> <float 23.0, float 24.0, float 25.0>,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict") #0
+  ret <3 x float> %atan2
+}
+
+define <3 x double> @constrained_vector_atan2_v3f64() #0 {
+; CHECK-LABEL: constrained_vector_atan2_v3f64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    subq $24, %rsp
+; CHECK-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
+; CHECK-NEXT:    movsd {{.*#+}} xmm1 = [2.3100000000000001E+1,0.0E+0]
+; CHECK-NEXT:    callq atan2 at PLT
+; CHECK-NEXT:    movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
+; CHECK-NEXT:    movsd {{.*#+}} xmm1 = [2.3E+1,0.0E+0]
+; CHECK-NEXT:    callq atan2 at PLT
+; CHECK-NEXT:    movsd %xmm0, (%rsp) # 8-byte Spill
+; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0]
+; CHECK-NEXT:    movsd {{.*#+}} xmm1 = [2.3199999999999999E+1,0.0E+0]
+; CHECK-NEXT:    callq atan2 at PLT
+; CHECK-NEXT:    movsd %xmm0, {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldl {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    wait
+; CHECK-NEXT:    movsd (%rsp), %xmm0 # 8-byte Reload
+; CHECK-NEXT:    # xmm0 = mem[0],zero
+; CHECK-NEXT:    movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
+; CHECK-NEXT:    # xmm1 = mem[0],zero
+; CHECK-NEXT:    addq $24, %rsp
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-NEXT:    retq
+;
+; AVX-LABEL: constrained_vector_atan2_v3f64:
+; AVX:       # %bb.0: # %entry
+; AVX-NEXT:    subq $40, %rsp
+; AVX-NEXT:    .cfi_def_cfa_offset 48
+; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
+; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = [2.3100000000000001E+1,0.0E+0]
+; AVX-NEXT:    callq atan2 at PLT
+; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
+; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
+; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = [2.3E+1,0.0E+0]
+; AVX-NEXT:    callq atan2 at PLT
+; AVX-NEXT:    vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
+; AVX-NEXT:    vmovups %ymm0, (%rsp) # 32-byte Spill
+; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0]
+; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = [2.3199999999999999E+1,0.0E+0]
+; AVX-NEXT:    vzeroupper
+; AVX-NEXT:    callq atan2 at PLT
+; AVX-NEXT:    vmovups (%rsp), %ymm1 # 32-byte Reload
+; AVX-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX-NEXT:    addq $40, %rsp
+; AVX-NEXT:    .cfi_def_cfa_offset 8
+; AVX-NEXT:    retq
+entry:
+  %atan2 = call <3 x double> @llvm.experimental.constrained.atan2.v3f64(
+                          <3 x double> <double 42.0, double 42.1, double 42.2>,
+                          <3 x double> <double 23.0, double 23.1, double 23.2>,
+                          metadata !"round.dynamic",
+                          metadata !"fpexcept.strict") #0
+  ret <3 x double> %atan2
+}
+
+define <4 x double> @constrained_vector_atan2_v4f64() #0 {
+; CHECK-LABEL: constrained_vector_atan2_v4f64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    subq $40, %rsp
+; CHECK-NEXT:    .cfi_def_cfa_offset 48
+; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
+; CHECK-NEXT:    movsd {{.*#+}} xmm1 = [2.3100000000000001E+1,0.0E+0]
+; CHECK-NEXT:    callq atan2 at PLT
+; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
+; CHECK-NEXT:    movsd {{.*#+}} xmm1 = [2.3E+1,0.0E+0]
+; CHECK-NEXT:    callq atan2 at PLT
+; CHECK-NEXT:    unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0]
+; CHECK-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
+; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2299999999999997E+1,0.0E+0]
+; CHECK-NEXT:    movsd {{.*#+}} xmm1 = [2.3300000000000001E+1,0.0E+0]
+; CHECK-NEXT:    callq atan2 at PLT
+; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT:    movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0]
+; CHECK-NEXT:    movsd {{.*#+}} xmm1 = [2.3199999999999999E+1,0.0E+0]
+; CHECK-NEXT:    callq atan2 at PLT
+; CHECK-NEXT:    movaps %xmm0, %xmm1
+; CHECK-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
+; CHECK-NEXT:    # xmm1 = xmm1[0],mem[0]
+; CHECK-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
+; CHECK-NEXT:    addq $40, %rsp
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-NEXT:    retq
+;
+; AVX-LABEL: constrained_vector_atan2_v4f64:
+; AVX:       # %bb.0: # %entry
+; AVX-NEXT:    subq $40, %rsp
+; AVX-NEXT:    .cfi_def_cfa_offset 48
+; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2299999999999997E+1,0.0E+0]
+; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = [2.3300000000000001E+1,0.0E+0]
+; AVX-NEXT:    callq atan2 at PLT
+; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
+; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0]
+; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = [2.3199999999999999E+1,0.0E+0]
+; AVX-NEXT:    callq atan2 at PLT
+; AVX-NEXT:    vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
+; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
+; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0]
+; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = [2.3100000000000001E+1,0.0E+0]
+; AVX-NEXT:    callq atan2 at PLT
+; AVX-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0]
+; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = [2.3E+1,0.0E+0]
+; AVX-NEXT:    callq atan2 at PLT
+; AVX-NEXT:    vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
+; AVX-NEXT:    vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload
+; AVX-NEXT:    addq $40, %rsp
+; AVX-NEXT:    .cfi_def_cfa_offset 8
+; AVX-NEXT:    retq
+entry:
+  %atan2 = call <4 x double> @llvm.experimental.constrained.atan2.v4f64(
+                             <4 x double> <double 42.0, double 42.1,
+                                           double 42.2, double 42.3>,
+                             <4 x double> <double 23.0, double 23.1,
+                                           double 23.2, double 23.3>,
+                             metadata !"round.dynamic",
+                             metadata !"fpexcept.strict") #0
+  ret <4 x double> %atan2
+}
+
 define <1 x float> @constrained_vector_cosh_v1f32() #0 {
 ; CHECK-LABEL: constrained_vector_cosh_v1f32:
 ; CHECK:       # %bb.0: # %entry
@@ -9546,6 +9803,7 @@ declare <4 x double> @llvm.experimental.constrained.tan.v4f64(<4 x double>, meta
 declare <4 x double> @llvm.experimental.constrained.asin.v4f64(<4 x double>, metadata, metadata)
 declare <4 x double> @llvm.experimental.constrained.acos.v4f64(<4 x double>, metadata, metadata)
 declare <4 x double> @llvm.experimental.constrained.atan.v4f64(<4 x double>, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.atan2.v4f64(<4 x double>, <4 x double>, metadata, metadata)
 declare <4 x double> @llvm.experimental.constrained.sinh.v4f64(<4 x double>, metadata, metadata)
 declare <4 x double> @llvm.experimental.constrained.cosh.v4f64(<4 x double>, metadata, metadata)
 declare <4 x double> @llvm.experimental.constrained.tanh.v4f64(<4 x double>, metadata, metadata)
diff --git a/llvm/test/Feature/fp-intrinsics.ll b/llvm/test/Feature/fp-intrinsics.ll
index 80f8b15abfaabe..ada22c39abc9e7 100644
--- a/llvm/test/Feature/fp-intrinsics.ll
+++ b/llvm/test/Feature/fp-intrinsics.ll
@@ -184,7 +184,7 @@ entry:
   ret double %result
 }
 
-; Verify that atan(42.0) isn't simplified when the rounding mode is unknown.
+; Verify that atan(42.0, 23.0) isn't simplified when the rounding mode is unknown.
 ; CHECK-LABEL: fatan
 ; CHECK: call double @llvm.experimental.constrained.atan
 define double @fatan() #0 {
@@ -195,6 +195,19 @@ entry:
   ret double %result
 }
 
+; Verify that atan2(42.0) isn't simplified when the rounding mode is unknown.
+; CHECK-LABEL: fatan2
+; CHECK: call double @llvm.experimental.constrained.atan2
+define double @fatan2() #0 {
+entry:
+  %result = call double @llvm.experimental.constrained.atan2.f64(
+                                              double 42.0,
+                                              double 23.0,
+                                               metadata !"round.dynamic",
+                                               metadata !"fpexcept.strict") #0
+  ret double %result
+}
+
 ; Verify that cosh(42.0) isn't simplified when the rounding mode is unknown.
 ; CHECK-LABEL: fcosh
 ; CHECK: call double @llvm.experimental.constrained.cosh

>From 7a87c0326faad84cdd0419fd173e3b4d3613ec9a Mon Sep 17 00:00:00 2001
From: Tex Riddell <texr at microsoft.com>
Date: Thu, 10 Oct 2024 15:32:39 -0700
Subject: [PATCH 9/9] Remove GlobalISel changes not needed yet

---
 llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td | 1 -
 llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp           | 4 ----
 llvm/lib/CodeGen/GlobalISel/Utils.cpp                     | 2 --
 3 files changed, 7 deletions(-)

diff --git a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
index 83bf3c335cac89..d9121cf166e5aa 100644
--- a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
+++ b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
@@ -154,7 +154,6 @@ def : GINodeEquiv<G_FTAN, ftan>;
 def : GINodeEquiv<G_FACOS, facos>;
 def : GINodeEquiv<G_FASIN, fasin>;
 def : GINodeEquiv<G_FATAN, fatan>;
-def : GINodeEquiv<G_FATAN2, fatan2>;
 def : GINodeEquiv<G_FCOSH, fcosh>;
 def : GINodeEquiv<G_FSINH, fsinh>;
 def : GINodeEquiv<G_FTANH, ftanh>;
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index d5222641342ac1..3b2fd95076c465 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -457,8 +457,6 @@ static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
     RTLIBCASE(ACOS_F);
   case TargetOpcode::G_FATAN:
     RTLIBCASE(ATAN_F);
-  case TargetOpcode::G_FATAN2:
-    RTLIBCASE(ATAN2_F);
   case TargetOpcode::G_FSINH:
     RTLIBCASE(SINH_F);
   case TargetOpcode::G_FCOSH:
@@ -1204,7 +1202,6 @@ LegalizerHelper::libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver) {
   case TargetOpcode::G_FACOS:
   case TargetOpcode::G_FASIN:
   case TargetOpcode::G_FATAN:
-  case TargetOpcode::G_FATAN2:
   case TargetOpcode::G_FCOSH:
   case TargetOpcode::G_FSINH:
   case TargetOpcode::G_FTANH:
@@ -3125,7 +3122,6 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
   case TargetOpcode::G_FACOS:
   case TargetOpcode::G_FASIN:
   case TargetOpcode::G_FATAN:
-  case TargetOpcode::G_FATAN2:
   case TargetOpcode::G_FCOSH:
   case TargetOpcode::G_FSINH:
   case TargetOpcode::G_FTANH:
diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index 722ceea29c951c..9574464207d99f 100644
--- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -828,7 +828,6 @@ bool llvm::isKnownNeverNaN(Register Val, const MachineRegisterInfo &MRI,
   case TargetOpcode::G_FACOS:
   case TargetOpcode::G_FASIN:
   case TargetOpcode::G_FATAN:
-  case TargetOpcode::G_FATAN2:
   case TargetOpcode::G_FCOSH:
   case TargetOpcode::G_FSINH:
   case TargetOpcode::G_FTANH:
@@ -1716,7 +1715,6 @@ bool llvm::isPreISelGenericFloatingPointOpcode(unsigned Opc) {
   case TargetOpcode::G_FACOS:
   case TargetOpcode::G_FASIN:
   case TargetOpcode::G_FATAN:
-  case TargetOpcode::G_FATAN2:
   case TargetOpcode::G_FCOSH:
   case TargetOpcode::G_FSINH:
   case TargetOpcode::G_FTANH: