[clang] [llvm] [llvm] Change `fp128` lowering to use `f128` functions by default (PR #76558)

Sun Jul 13 22:45:32 PDT 2025

https://github.com/tgross35 updated https://github.com/llvm/llvm-project/pull/76558

>From 7f5dd712c8d5653d63386723507b3dfc3ac490ac Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross at umich.edu>
Date: Sat, 12 Jul 2025 04:20:51 -0400
Subject: [PATCH 1/7] [RuntimeLibcalls] Use a multiclass for all libm impls

`LibmLongDoubleLibcalls` currently handles generating definitions for
the three long double variants, but `F32` and `F64` always get a written
definition. Simplify this by changing `LibmLongDoubleLibcalls` to
`LibmLibcallImpls` that also expands `F32` and `F64`.

As part of this, `LibmLibcallImpls` can take a function name with an `X`
placeholder, to be replaced with the float type suffix. This allows the
multiclass to also be used for libcalls with the suffix in the middle
rather than strictly at the end.
---
 llvm/include/llvm/IR/RuntimeLibcalls.td | 283 ++++++------------------
 1 file changed, 64 insertions(+), 219 deletions(-)

diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.td b/llvm/include/llvm/IR/RuntimeLibcalls.td
index 9781bef8e37b6..e0b29bd52a87e 100644
--- a/llvm/include/llvm/IR/RuntimeLibcalls.td
+++ b/llvm/include/llvm/IR/RuntimeLibcalls.td
@@ -363,17 +363,26 @@ def MIPS16_RET_DF : RuntimeLibcall;
 def MIPS16_RET_SC : RuntimeLibcall;
 def MIPS16_RET_SF : RuntimeLibcall;
 
-multiclass LibmLongDoubleLibCall<string libcall_basename = !toupper(NAME),
-                                 string rtbasename = NAME> {
+// Produce libcall impls for all float types. If provided, `rtbasename` should
+// contain an `X` that will be replaced with the `f`/`l`/`fX` suffix (if not
+// provided, it is appended to the def name).
+multiclass LibmLibcallImpls<string libcall_basename = !toupper(NAME),
+                            string rtbasename = !strconcat(NAME, "X")> {
+  def NAME#"f"
+      : RuntimeLibcallImpl<!cast<RuntimeLibcall>(libcall_basename#"_F32"),
+                           !subst("X", "f", rtbasename)>;
+  def NAME#""
+      : RuntimeLibcallImpl<!cast<RuntimeLibcall>(libcall_basename#"_F64"),
+                           !subst("X", "", rtbasename)>;
   def NAME#"_f128"
       : RuntimeLibcallImpl<!cast<RuntimeLibcall>(libcall_basename#"_F128"),
-                           !strconcat(rtbasename, "l")>;
+                           !subst("X", "l", rtbasename)>;
   def NAME#"_ppcf128"
       : RuntimeLibcallImpl<!cast<RuntimeLibcall>(libcall_basename#"_PPCF128"),
-                           !strconcat(rtbasename, "l")>;
+                           !subst("X", "l", rtbasename)>;
   def NAME#"_f80"
       : RuntimeLibcallImpl<!cast<RuntimeLibcall>(libcall_basename#"_F80"),
-                           !strconcat(rtbasename, "l")>;
+                           !subst("X", "l", rtbasename)>;
 }
 
 // AArch64 calls
@@ -701,217 +710,55 @@ def __clear_cache : RuntimeLibcallImpl<CLEAR_CACHE>;
 // libm
 //--------------------------------------------------------------------
 
-def fmodf : RuntimeLibcallImpl<REM_F32>;
-def fmod : RuntimeLibcallImpl<REM_F64>;
-def fmodl_f128 : RuntimeLibcallImpl<REM_F128, "fmodl">;
-def fmodl_f80 : RuntimeLibcallImpl<REM_F80, "fmodl">;
-def fmodl_ppc128 : RuntimeLibcallImpl<REM_PPCF128, "fmodl">;
-
-def fmaf : RuntimeLibcallImpl<FMA_F32>;
-def fma : RuntimeLibcallImpl<FMA_F64>;
-defm fma : LibmLongDoubleLibCall;
-
-def sqrtf : RuntimeLibcallImpl<SQRT_F32>;
-def sqrt : RuntimeLibcallImpl<SQRT_F64>;
-defm sqrt : LibmLongDoubleLibCall;
-
-def cbrtf : RuntimeLibcallImpl<CBRT_F32>;
-def cbrt : RuntimeLibcallImpl<CBRT_F64>;
-defm cbrt : LibmLongDoubleLibCall;
-
-def logf : RuntimeLibcallImpl<LOG_F32>;
-def log : RuntimeLibcallImpl<LOG_F64>;
-defm log : LibmLongDoubleLibCall;
-
-def __logf_finite : RuntimeLibcallImpl<LOG_FINITE_F32>;
-def __log_finite : RuntimeLibcallImpl<LOG_FINITE_F64>;
-def __logl_finite_f80 : RuntimeLibcallImpl<LOG_FINITE_F80, "__logl_finite">;
-def __logl_finite_f128 : RuntimeLibcallImpl<LOG_FINITE_F128, "__logl_finite">;
-def __logl_finite_ppcf128 : RuntimeLibcallImpl<LOG_FINITE_PPCF128, "__logl_finite">;
-
-def log2f : RuntimeLibcallImpl<LOG2_F32>;
-def log2 : RuntimeLibcallImpl<LOG2_F64>;
-defm log2 : LibmLongDoubleLibCall;
-
-def __log2f_finite : RuntimeLibcallImpl<LOG2_FINITE_F32>;
-def __log2_finite : RuntimeLibcallImpl<LOG2_FINITE_F64>;
-def __log2l_finite_f80 : RuntimeLibcallImpl<LOG2_FINITE_F80, "__log2l_finite">;
-def __log2l_finite_f128 : RuntimeLibcallImpl<LOG2_FINITE_F128, "__log2l_finite">;
-def __log2l_finite_ppcf128 : RuntimeLibcallImpl<LOG2_FINITE_PPCF128, "__log2l_finite">;
-
-def log10f : RuntimeLibcallImpl<LOG10_F32>;
-def log10 : RuntimeLibcallImpl<LOG10_F64>;
-defm log10 : LibmLongDoubleLibCall;
-
-def __log10f_finite : RuntimeLibcallImpl<LOG10_FINITE_F32>;
-def __log10_finite : RuntimeLibcallImpl<LOG10_FINITE_F64>;
-def __log10l_finite_f80 : RuntimeLibcallImpl<LOG10_FINITE_F80, "__log10l_finite">;
-def __log10l_finite_f128 : RuntimeLibcallImpl<LOG10_FINITE_F128, "__log10l_finite">;
-def __log10l_finite_ppcf128 : RuntimeLibcallImpl<LOG10_FINITE_PPCF128, "__log10l_finite">;
-
-def expf : RuntimeLibcallImpl<EXP_F32>;
-def exp : RuntimeLibcallImpl<EXP_F64>;
-defm exp : LibmLongDoubleLibCall<"EXP", "exp">;
-
-def __expf_finite : RuntimeLibcallImpl<EXP_FINITE_F32>;
-def __exp_finite : RuntimeLibcallImpl<EXP_FINITE_F64>;
-def __expl_finite_f80 : RuntimeLibcallImpl<EXP_FINITE_F80, "__expl_finite">;
-def __expl_finite_f128 : RuntimeLibcallImpl<EXP_FINITE_F128, "__expl_finite">;
-def __expl_finite_ppcf128 : RuntimeLibcallImpl<EXP_FINITE_PPCF128, "__expl_finite">;
-
-def exp2f : RuntimeLibcallImpl<EXP2_F32>;
-def exp2 : RuntimeLibcallImpl<EXP2_F64>;
-defm exp2 : LibmLongDoubleLibCall<"EXP2", "exp2">;
-
-def __exp2f_finite : RuntimeLibcallImpl<EXP2_FINITE_F32>;
-def __exp2_finite : RuntimeLibcallImpl<EXP2_FINITE_F64>;
-def __exp2l_finite_f80 : RuntimeLibcallImpl<EXP2_FINITE_F80, "__exp2l_finite">;
-def __exp2l_finite_f128 : RuntimeLibcallImpl<EXP2_FINITE_F128, "__exp2l_finite">;
-def __exp2l_finite_ppcf128 : RuntimeLibcallImpl<EXP2_FINITE_PPCF128, "__exp2l_finite">;
-
-def exp10f : RuntimeLibcallImpl<EXP10_F32>;
-def exp10 : RuntimeLibcallImpl<EXP10_F64>;
-def exp10l_f80 : RuntimeLibcallImpl<EXP10_F80, "exp10l">;
-def exp10l_f128 : RuntimeLibcallImpl<EXP10_F128, "exp10l">;
-def exp10l_ppcf128 : RuntimeLibcallImpl<EXP10_PPCF128, "exp10l">;
-
-def sinf : RuntimeLibcallImpl<SIN_F32>;
-def sin : RuntimeLibcallImpl<SIN_F64>;
-defm sin : LibmLongDoubleLibCall;
-
-def cosf : RuntimeLibcallImpl<COS_F32>;
-def cos : RuntimeLibcallImpl<COS_F64>;
-defm cos : LibmLongDoubleLibCall;
-
-def tanf : RuntimeLibcallImpl<TAN_F32>;
-def tan : RuntimeLibcallImpl<TAN_F64>;
-defm tan : LibmLongDoubleLibCall;
-
-def sinhf : RuntimeLibcallImpl<SINH_F32>;
-def sinh : RuntimeLibcallImpl<SINH_F64>;
-defm sinh : LibmLongDoubleLibCall;
-
-def coshf : RuntimeLibcallImpl<COSH_F32>;
-def cosh : RuntimeLibcallImpl<COSH_F64>;
-defm cosh : LibmLongDoubleLibCall;
-
-def tanhf : RuntimeLibcallImpl<TANH_F32>;
-def tanh : RuntimeLibcallImpl<TANH_F64>;
-defm tanh : LibmLongDoubleLibCall;
-
-def asinf : RuntimeLibcallImpl<ASIN_F32>;
-def asin : RuntimeLibcallImpl<ASIN_F64>;
-defm asin : LibmLongDoubleLibCall;
-
-def acosf : RuntimeLibcallImpl<ACOS_F32>;
-def acos : RuntimeLibcallImpl<ACOS_F64>;
-defm acos : LibmLongDoubleLibCall;
-
-def atanf : RuntimeLibcallImpl<ATAN_F32>;
-def atan : RuntimeLibcallImpl<ATAN_F64>;
-defm atan : LibmLongDoubleLibCall;
-
-def atan2f : RuntimeLibcallImpl<ATAN2_F32>;
-def atan2 : RuntimeLibcallImpl<ATAN2_F64>;
-defm atan2 : LibmLongDoubleLibCall;
-
-def powf : RuntimeLibcallImpl<POW_F32>;
-def pow : RuntimeLibcallImpl<POW_F64>;
-defm pow : LibmLongDoubleLibCall;
-
-def __powf_finite : RuntimeLibcallImpl<POW_FINITE_F32>;
-def __pow_finite : RuntimeLibcallImpl<POW_FINITE_F64>;
-def __powl_finite_f80 : RuntimeLibcallImpl<POW_FINITE_F80, "__powl_finite">;
-def __powl_finite_f128 : RuntimeLibcallImpl<POW_FINITE_F128, "__powl_finite">;
-def __powl_finite_ppcf128 : RuntimeLibcallImpl<POW_FINITE_PPCF128, "__powl_finite">;
-
-def ceilf : RuntimeLibcallImpl<CEIL_F32>;
-def ceil : RuntimeLibcallImpl<CEIL_F64>;
-defm ceil : LibmLongDoubleLibCall;
-
-def truncf : RuntimeLibcallImpl<TRUNC_F32>;
-def trunc : RuntimeLibcallImpl<TRUNC_F64>;
-defm trunc : LibmLongDoubleLibCall;
-
-def rintf : RuntimeLibcallImpl<RINT_F32>;
-def rint : RuntimeLibcallImpl<RINT_F64>;
-defm rint : LibmLongDoubleLibCall;
-
-def nearbyintf : RuntimeLibcallImpl<NEARBYINT_F32>;
-def nearbyint : RuntimeLibcallImpl<NEARBYINT_F64>;
-defm nearbyint : LibmLongDoubleLibCall;
-
-def roundf : RuntimeLibcallImpl<ROUND_F32>;
-def round : RuntimeLibcallImpl<ROUND_F64>;
-defm round : LibmLongDoubleLibCall;
-
-def roundevenf : RuntimeLibcallImpl<ROUNDEVEN_F32>;
-def roundeven : RuntimeLibcallImpl<ROUNDEVEN_F64>;
-defm roundeven : LibmLongDoubleLibCall;
-
-def floorf : RuntimeLibcallImpl<FLOOR_F32>;
-def floor : RuntimeLibcallImpl<FLOOR_F64>;
-defm floor : LibmLongDoubleLibCall;
-
-def copysignf : RuntimeLibcallImpl<COPYSIGN_F32>;
-def copysign : RuntimeLibcallImpl<COPYSIGN_F64>;
-defm copysign : LibmLongDoubleLibCall;
-
-def fminf : RuntimeLibcallImpl<FMIN_F32>;
-def fmin : RuntimeLibcallImpl<FMIN_F64>;
-defm fmin : LibmLongDoubleLibCall;
-
-def fmaxf : RuntimeLibcallImpl<FMAX_F32>;
-def fmax : RuntimeLibcallImpl<FMAX_F64>;
-defm fmax : LibmLongDoubleLibCall;
-
-def fminimumf : RuntimeLibcallImpl<FMINIMUM_F32>;
-def fminimum : RuntimeLibcallImpl<FMINIMUM_F64>;
-defm fminimum : LibmLongDoubleLibCall;
-
-def fmaximumf : RuntimeLibcallImpl<FMAXIMUM_F32>;
-def fmaximum : RuntimeLibcallImpl<FMAXIMUM_F64>;
-defm fmaximum : LibmLongDoubleLibCall;
-
-def fminimum_numf : RuntimeLibcallImpl<FMINIMUM_NUM_F32>;
-def fminimum_num : RuntimeLibcallImpl<FMINIMUM_NUM_F64>;
-defm fminimum_num : LibmLongDoubleLibCall;
-
-def fmaximum_numf : RuntimeLibcallImpl<FMAXIMUM_NUM_F32>;
-def fmaximum_num : RuntimeLibcallImpl<FMAXIMUM_NUM_F64>;
-defm fmaximum_num : LibmLongDoubleLibCall;
-
-def lroundf : RuntimeLibcallImpl<LROUND_F32>;
-def lround : RuntimeLibcallImpl<LROUND_F64>;
-defm lround : LibmLongDoubleLibCall;
-
-def llroundf : RuntimeLibcallImpl<LLROUND_F32>;
-def llround : RuntimeLibcallImpl<LLROUND_F64>;
-defm llround : LibmLongDoubleLibCall;
-
-def lrintf : RuntimeLibcallImpl<LRINT_F32>;
-def lrint : RuntimeLibcallImpl<LRINT_F64>;
-defm lrint : LibmLongDoubleLibCall;
-
-def llrintf : RuntimeLibcallImpl<LLRINT_F32>;
-def llrint : RuntimeLibcallImpl<LLRINT_F64>;
-defm llrint : LibmLongDoubleLibCall;
-
-def ldexpf : RuntimeLibcallImpl<LDEXP_F32>;
-def ldexp : RuntimeLibcallImpl<LDEXP_F64>;
-defm ldexp : LibmLongDoubleLibCall;
-
-def frexpf : RuntimeLibcallImpl<FREXP_F32>;
-def frexp : RuntimeLibcallImpl<FREXP_F64>;
-defm frexp : LibmLongDoubleLibCall;
-
-def sincospif : RuntimeLibcallImpl<SINCOSPI_F32>;
-def sincospi : RuntimeLibcallImpl<SINCOSPI_F64>;
-defm sincospi : LibmLongDoubleLibCall;
-
-def modff : RuntimeLibcallImpl<MODF_F32>;
-def modf : RuntimeLibcallImpl<MODF_F64>;
-defm modf : LibmLongDoubleLibCall;
+defm fmod : LibmLibcallImpls<"REM">;
+defm fma : LibmLibcallImpls;
+defm sqrt : LibmLibcallImpls;
+defm cbrt : LibmLibcallImpls;
+defm log : LibmLibcallImpls;
+defm __log_finite : LibmLibcallImpls<"LOG_FINITE", "__logX_finite">;
+defm log2 : LibmLibcallImpls;
+defm __log2_finite : LibmLibcallImpls<"LOG2_FINITE", "__log2X_finite">;
+defm log10 : LibmLibcallImpls;
+defm __log10_finite : LibmLibcallImpls<"LOG10_FINITE", "__log10X_finite">;
+defm exp : LibmLibcallImpls;
+defm __exp_finite : LibmLibcallImpls<"EXP_FINITE", "__expX_finite">;
+defm exp2 : LibmLibcallImpls;
+defm __exp2_finite : LibmLibcallImpls<"EXP2_FINITE", "__exp2X_finite">;
+defm exp10 : LibmLibcallImpls;
+defm sin : LibmLibcallImpls;
+defm cos : LibmLibcallImpls;
+defm tan : LibmLibcallImpls;
+defm sinh : LibmLibcallImpls;
+defm cosh : LibmLibcallImpls;
+defm tanh : LibmLibcallImpls;
+defm asin : LibmLibcallImpls;
+defm acos : LibmLibcallImpls;
+defm atan : LibmLibcallImpls;
+defm atan2 : LibmLibcallImpls;
+defm pow : LibmLibcallImpls;
+defm __pow_finite : LibmLibcallImpls<"POW_FINITE", "__powX_finite">;
+defm ceil : LibmLibcallImpls;
+defm trunc : LibmLibcallImpls;
+defm rint : LibmLibcallImpls;
+defm nearbyint : LibmLibcallImpls;
+defm round : LibmLibcallImpls;
+defm roundeven : LibmLibcallImpls;
+defm floor : LibmLibcallImpls;
+defm copysign : LibmLibcallImpls;
+defm fmin : LibmLibcallImpls;
+defm fmax : LibmLibcallImpls;
+defm fminimum : LibmLibcallImpls;
+defm fmaximum : LibmLibcallImpls;
+defm fminimum_num : LibmLibcallImpls;
+defm fmaximum_num : LibmLibcallImpls;
+defm lround : LibmLibcallImpls;
+defm llround : LibmLibcallImpls;
+defm lrint : LibmLibcallImpls;
+defm llrint : LibmLibcallImpls;
+defm ldexp : LibmLibcallImpls;
+defm frexp : LibmLibcallImpls;
+defm sincospi : LibmLibcallImpls;
+defm modf : LibmLibcallImpls;
 
 // Floating point environment
 def fegetenv : RuntimeLibcallImpl<FEGETENV>;
@@ -947,9 +794,7 @@ def __exp10 : RuntimeLibcallImpl<EXP10_F64>;
 def __sincosf_stret : RuntimeLibcallImpl<SINCOS_STRET_F32>;
 def __sincos_stret : RuntimeLibcallImpl<SINCOS_STRET_F64>;
 
-def sincosf : RuntimeLibcallImpl<SINCOS_F32>;
-def sincos : RuntimeLibcallImpl<SINCOS_F64>;
-defm sincos : LibmLongDoubleLibCall;
+defm sincos : LibmLibcallImpls;
 
 def bzero : RuntimeLibcallImpl<BZERO>;
 def __bzero : RuntimeLibcallImpl<BZERO>;

>From f766252e4075694efd40f1fa6502b50f5f14da57 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross at umich.edu>
Date: Fri, 11 Aug 2023 22:16:01 -0400
Subject: [PATCH 2/7] [IR] Add a test for `f128` libm libcall lowering (NFC)

`f128` intrinsic functions from libm sometimes lower to `long double`
library calls when they instead need to be `f128` versions. Add a
generic test demonstrating current behavior.
---
 .../CodeGen/Generic/fp128-math-libcalls.ll    | 337 ++++++++++++++++++
 1 file changed, 337 insertions(+)
 create mode 100644 llvm/test/CodeGen/Generic/fp128-math-libcalls.ll

diff --git a/llvm/test/CodeGen/Generic/fp128-math-libcalls.ll b/llvm/test/CodeGen/Generic/fp128-math-libcalls.ll
new file mode 100644
index 0000000000000..159b3bda6b794
--- /dev/null
+++ b/llvm/test/CodeGen/Generic/fp128-math-libcalls.ll
@@ -0,0 +1,337 @@
+; Verify that fp128 intrinsics only lower to `long double` calls (e.g. sinl) on
+; platforms where 128 and `long double` have the same layout.Otherwise, lower
+; to f128 versions (e.g. sinf128).
+;
+; Targets include:
+; * aarch64 (long double == f128, should use ld syms)
+; * arm (long double == f64, should use f128 syms)
+; * s390x (long double == f128, should use ld syms, some hardware support)
+; * x86, x64 (80-bit long double, should use ld syms)
+; * gnu (has f128 symbols on all platforms so we can use those)
+; * musl (no f128 symbols available)
+; * Windows and MacOS (no f128 symbols, long double == f64)
+
+; FIXME(#44744): arm32, x86-{32,64} musl targets, MacOS, and Windows don't have
+; f128 long double. They should be passing with CHECK-F128 rather than
+; CHECK-USELD.
+
+; RUN: %if aarch64-registered-target %{ llc < %s -mtriple=aarch64-unknown-linux-gnu    -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-USELD %}
+; RUN: %if aarch64-registered-target %{ llc < %s -mtriple=aarch64-unknown-linux-musl   -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-USELD %}
+; RUN: %if aarch64-registered-target %{ llc < %s -mtriple=aarch64-unknown-none         -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-USELD %}
+; RUN: %if aarch64-registered-target %{ llc < %s -mtriple=arm64-apple-macosx           -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-USELD %}
+; RUN: %if arm-registered-target     %{ llc < %s -mtriple=arm-none-eabi                -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-USELD %}
+; RUN: %if arm-registered-target     %{ llc < %s -mtriple=arm-unknown-linux-gnueabi    -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-USELD %}
+; RUN: %if powerpc-registered-target %{ llc < %s -mtriple=powerpc-unknown-linux-gnu    -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-F128  %}
+; RUN: %if powerpc-registered-target %{ llc < %s -mtriple=powerpc64-unknown-linux-gnu  -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-F128  %}
+; RUN: %if powerpc-registered-target %{ llc < %s -mtriple=powerpc64-unknown-linux-musl -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-F128  %}
+; RUN: %if riscv-registered-target   %{ llc < %s -mtriple=riscv32-unknown-linux-gnu    -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-USELD %}
+; RUN: %if systemz-registered-target %{ llc < %s -mtriple=s390x-unknown-linux-gnu      -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-S390X %}
+; RUN: %if x86-registered-target     %{ llc < %s -mtriple=i686-unknown-linux-gnu       -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-F128  %}
+; RUN: %if x86-registered-target     %{ llc < %s -mtriple=i686-unknown-linux-musl      -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-USELD %}
+; RUN: %if x86-registered-target     %{ llc < %s -mtriple=x86_64-unknown-linux-gnu     -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-F128  %}
+; RUN: %if x86-registered-target     %{ llc < %s -mtriple=x86_64-unknown-linux-musl    -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-USELD %}
+;
+; FIXME(#144006): Windows-MSVC should also be run but has a ldexp selection
+; failure.
+; %if x86-registered-target     %{ llc < %s -mtriple=x86_64-pc-windows-msvc       -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-F128  %}
+
+define fp128 @test_acos(fp128 %a) {
+; CHECK-ALL-LABEL:  test_acos:
+; CHECK-F128:       acosf128
+; CHECK-USELD:      acosl
+; CHECK-S390X:      acosl
+start:
+  %0 = tail call fp128 @llvm.acos.f128(fp128 %a)
+  ret fp128 %0
+}
+
+define fp128 @test_asin(fp128 %a) {
+; CHECK-ALL-LABEL:  test_asin:
+; CHECK-F128:       asinf128
+; CHECK-USELD:      asinl
+; CHECK-S390X:      asinl
+start:
+  %0 = tail call fp128 @llvm.asin.f128(fp128 %a)
+  ret fp128 %0
+}
+
+define fp128 @test_atan(fp128 %a) {
+; CHECK-ALL-LABEL:      test_atan:
+; CHECK-F128:       atanf128
+; CHECK-USELD:      atanl
+; CHECK-S390X:      atanl
+start:
+  %0 = tail call fp128 @llvm.atan.f128(fp128 %a)
+  ret fp128 %0
+}
+
+define fp128 @test_ceil(fp128 %a) {
+; CHECK-ALL-LABEL:      test_ceil:
+; CHECK-F128:       ceilf128
+; CHECK-USELD:      ceill
+; CHECK-S390X:      ceill
+start:
+  %0 = tail call fp128 @llvm.ceil.f128(fp128 %a)
+  ret fp128 %0
+}
+
+define fp128 @test_copysign(fp128 %a, fp128 %b) {
+; copysign should always get lowered to assembly.
+; CHECK-ALL-LABEL:      test_copysign:
+; CHECK-ALL-NOT:        copysignf128
+; CHECK-ALL-NOT:        copysignl
+start:
+  %0 = tail call fp128 @llvm.copysign.f128(fp128 %a, fp128 %b)
+  ret fp128 %0
+}
+
+define fp128 @test_cos(fp128 %a) {
+; CHECK-ALL-LABEL:  test_cos:
+; CHECK-F128:       cosf128
+; CHECK-USELD:      cosl
+; CHECK-S390X:      cosl
+start:
+  %0 = tail call fp128 @llvm.cos.f128(fp128 %a)
+  ret fp128 %0
+}
+
+define fp128 @test_exp10(fp128 %a) {
+; CHECK-ALL-LABEL:  test_exp10:
+; CHECK-F128:       exp10f128
+; CHECK-USELD:      exp10l
+; CHECK-S390X:      exp10l
+start:
+  %0 = tail call fp128 @llvm.exp10.f128(fp128 %a)
+  ret fp128 %0
+}
+
+define fp128 @test_exp2(fp128 %a) {
+; CHECK-ALL-LABEL:  test_exp2:
+; CHECK-F128:       exp2f128
+; CHECK-USELD:      exp2l
+; CHECK-S390X:      exp2l
+start:
+  %0 = tail call fp128 @llvm.exp2.f128(fp128 %a)
+  ret fp128 %0
+}
+
+
+define fp128 @test_exp(fp128 %a) {
+; CHECK-ALL-LABEL:  test_exp:
+; CHECK-F128:       expf128
+; CHECK-USELD:      expl
+; CHECK-S390X:      expl
+start:
+  %0 = tail call fp128 @llvm.exp.f128(fp128 %a)
+  ret fp128 %0
+}
+
+define fp128 @test_fabs(fp128 %a) {
+; fabs should always get lowered to assembly.
+; CHECK-ALL-LABEL:  test_fabs:
+; CHECK-ALL-NOT:    fabsf128
+; CHECK-ALL-NOT:    fabsl
+start:
+  %0 = tail call fp128 @llvm.fabs.f128(fp128 %a)
+  ret fp128 %0
+}
+
+define fp128 @test_floor(fp128 %a) {
+; CHECK-ALL-LABEL:  test_floor:
+; CHECK-F128:       floorf128
+; CHECK-USELD:      floorl
+; CHECK-S390X:      floorl
+start:
+  %0 = tail call fp128 @llvm.floor.f128(fp128 %a)
+  ret fp128 %0
+}
+
+define fp128 @test_fma(fp128 %a, fp128 %b, fp128 %c) {
+; CHECK-ALL-LABEL:  test_fma:
+; CHECK-F128:       fmaf128
+; CHECK-USELD:      fmal
+; CHECK-S390X:      fmal
+start:
+  %0 = tail call fp128 @llvm.fma.f128(fp128 %a, fp128 %b, fp128 %c)
+  ret fp128 %0
+}
+
+define { fp128, i32 } @test_frexp(fp128 %a) {
+; CHECK-ALL-LABEL:  test_frexp:
+; CHECK-F128:       frexpf128
+; CHECK-USELD:      frexpl
+; CHECK-S390X:      frexpl
+start:
+  %0 = tail call { fp128, i32 } @llvm.frexp.f128(fp128 %a)
+  ret { fp128, i32 } %0
+}
+
+define fp128 @test_ldexp(fp128 %a, i32 %b) {
+; CHECK-ALL-LABEL:  test_ldexp:
+; CHECK-F128:       ldexpf128
+; CHECK-USELD:      ldexpl
+; CHECK-S390X:      ldexpl
+start:
+  %0 = tail call fp128 @llvm.ldexp.f128(fp128 %a, i32 %b)
+  ret fp128 %0
+}
+
+define i64 @test_llrint(fp128 %a) {
+; CHECK-ALL-LABEL:  test_llrint:
+; CHECK-F128:       llrintf128
+; CHECK-USELD:      llrintl
+; CHECK-S390X:      llrintl
+start:
+  %0 = tail call i64 @llvm.llrint.f128(fp128 %a)
+  ret i64 %0
+}
+
+define i64 @test_llround(fp128 %a) {
+; CHECK-ALL-LABEL:  test_llround:
+; CHECK-F128:       llroundf128
+; CHECK-USELD:      llroundl
+; CHECK-S390X:      llroundl
+start:
+  %0 = tail call i64 @llvm.llround.i64.f128(fp128 %a)
+  ret i64 %0
+}
+
+define fp128 @test_log10(fp128 %a) {
+; CHECK-ALL-LABEL:  test_log10:
+; CHECK-F128:       log10f128
+; CHECK-USELD:      log10l
+; CHECK-S390X:      log10l
+start:
+  %0 = tail call fp128 @llvm.log10.f128(fp128 %a)
+  ret fp128 %0
+}
+
+define fp128 @test_log2(fp128 %a) {
+; CHECK-ALL-LABEL:  test_log2:
+; CHECK-F128:       log2f128
+; CHECK-USELD:      log2l
+; CHECK-S390X:      log2l
+start:
+  %0 = tail call fp128 @llvm.log2.f128(fp128 %a)
+  ret fp128 %0
+}
+
+define fp128 @test_log(fp128 %a) {
+; CHECK-ALL-LABEL:  test_log:
+; CHECK-F128:       logf128
+; CHECK-USELD:      logl
+; CHECK-S390X:      logl
+start:
+  %0 = tail call fp128 @llvm.log.f128(fp128 %a)
+  ret fp128 %0
+}
+
+define i64 @test_lrint(fp128 %a) {
+; CHECK-ALL-LABEL:  test_lrint:
+; CHECK-F128:       lrintf128
+; CHECK-USELD:      lrintl
+; CHECK-S390X:      lrintl
+start:
+  %0 = tail call i64 @llvm.lrint.f128(fp128 %a)
+  ret i64 %0
+}
+
+define i64 @test_lround(fp128 %a) {
+; CHECK-ALL-LABEL:  test_lround:
+; CHECK-F128:       lroundf128
+; CHECK-USELD:      lroundl
+; CHECK-S390X:      lroundl
+start:
+  %0 = tail call i64 @llvm.lround.i64.f128(fp128 %a)
+  ret i64 %0
+}
+
+define fp128 @test_nearbyint(fp128 %a) {
+; CHECK-ALL-LABEL:  test_nearbyint:
+; CHECK-F128:       nearbyintf128
+; CHECK-USELD:      nearbyintl
+; CHECK-S390X:      nearbyintl
+start:
+  %0 = tail call fp128 @llvm.nearbyint.f128(fp128 %a)
+  ret fp128 %0
+}
+
+define fp128 @test_pow(fp128 %a, fp128 %b) {
+; CHECK-ALL-LABEL:  test_pow:
+; CHECK-F128:       powf128
+; CHECK-USELD:      powl
+; CHECK-S390X:      powl
+start:
+  %0 = tail call fp128 @llvm.pow.f128(fp128 %a, fp128 %b)
+  ret fp128 %0
+}
+
+define fp128 @test_rint(fp128 %a) {
+; CHECK-ALL-LABEL:  test_rint:
+; CHECK-F128:       rintf128
+; CHECK-USELD:      rintl
+; CHECK-S390X:      fixbr {{%.*}}, 0, {{%.*}}
+start:
+  %0 = tail call fp128 @llvm.rint.f128(fp128 %a)
+  ret fp128 %0
+}
+
+define fp128 @test_roundeven(fp128 %a) {
+; CHECK-ALL-LABEL:  test_roundeven:
+; CHECK-F128:       roundevenf128
+; CHECK-USELD:      roundevenl
+; CHECK-S390X:      roundevenl
+start:
+  %0 = tail call fp128 @llvm.roundeven.f128(fp128 %a)
+  ret fp128 %0
+}
+
+define fp128 @test_round(fp128 %a) {
+; CHECK-ALL-LABEL:  test_round:
+; CHECK-F128:       roundf128
+; CHECK-USELD:      roundl
+; CHECK-S390X:      roundl
+start:
+  %0 = tail call fp128 @llvm.round.f128(fp128 %a)
+  ret fp128 %0
+}
+
+define fp128 @test_sin(fp128 %a) {
+; CHECK-ALL-LABEL:  test_sin:
+; CHECK-F128:       sinf128
+; CHECK-USELD:      sinl
+; CHECK-S390X:      sinl
+start:
+  %0 = tail call fp128 @llvm.sin.f128(fp128 %a)
+  ret fp128 %0
+}
+
+define fp128 @test_sqrt(fp128 %a) {
+; CHECK-ALL-LABEL:  test_sqrt:
+; CHECK-F128:       sqrtf128
+; CHECK-USELD:      sqrtl
+; CHECK-S390X:      sqxbr {{%.*}}, {{%.*}}
+start:
+  %0 = tail call fp128 @llvm.sqrt.f128(fp128 %a)
+  ret fp128 %0
+}
+
+define fp128 @test_tan(fp128 %a) {
+; CHECK-ALL-LABEL:  test_tan:
+; CHECK-F128:       tanf128
+; CHECK-USELD:      tanl
+; CHECK-S390X:      tanl
+start:
+  %0 = tail call fp128 @llvm.tan.f128(fp128 %a)
+  ret fp128 %0
+}
+
+define fp128 @test_trunc(fp128 %a) {
+; CHECK-ALL-LABEL:  test_trunc:
+; CHECK-F128:       truncf128
+; CHECK-USELD:      truncl
+; CHECK-S390X:      truncl
+start:
+  %0 = tail call fp128 @llvm.trunc.f128(fp128 %a)
+  ret fp128 %0
+}

>From fac24c7f5bc4dbd9e8b9d4824ba637fc057726fd Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross at umich.edu>
Date: Thu, 28 Dec 2023 04:01:22 -0500
Subject: [PATCH 3/7] [IR] Change `fp128` lowering to use `f128` functions by
 default

LLVM currently emits calls to `*l` (`long double`) libm symbols for
`fp128` intrinsics. This works on platforms where `long double` and
`_Float128` are the same type, but is incorrect on many platforms.

Change RuntimeLibcalls such that `*f128` libcalls are used by default,
which is always safe and correct but may not be available. On platforms
where it is likely that `sqrtf128` and similar are not available, keep
the current behavior of lowering to `*l` symbols if `long double` is
`binary128`.

The logic for whether f128 is `long double` is based on the platforms in
Clang that set `LongDoubleFormat` to `llvm::APFloat::IEEEquad`.

Fixes https://github.com/llvm/llvm-project/issues/44744
---
 llvm/include/llvm/CodeGen/TargetLowering.h    |   3 +-
 llvm/include/llvm/IR/RuntimeLibcalls.td       | 160 ++++++++++--------
 llvm/include/llvm/TargetParser/Triple.h       |  18 +-
 .../CodeGen/SelectionDAG/TargetLowering.cpp   |   7 +-
 llvm/lib/IR/RuntimeLibcalls.cpp               | 107 ++++++------
 llvm/lib/Target/Mips/MipsCCState.cpp          |  15 +-
 llvm/lib/TargetParser/Triple.cpp              |  37 ++++
 .../test/CodeGen/AArch64/illegal-float-ops.ll |  58 ++++---
 llvm/test/CodeGen/AArch64/sincos-expansion.ll |   2 +-
 .../vecreduce-fmax-legalization-nan.ll        |   2 +-
 llvm/test/CodeGen/ARM/ldexp.ll                |   2 +-
 llvm/test/CodeGen/ARM/llvm.sincos.ll          |   2 +-
 .../CodeGen/Generic/fp128-math-libcalls.ll    |  14 +-
 llvm/test/CodeGen/Mips/cconv/fmaxl_call.ll    |   2 +-
 llvm/test/CodeGen/Mips/cconv/roundl-call.ll   |   4 +-
 llvm/test/CodeGen/Mips/llrint-conv.ll         |   4 +-
 llvm/test/CodeGen/Mips/llround-conv.ll        |   4 +-
 llvm/test/CodeGen/Mips/lrint-conv.ll          |   4 +-
 llvm/test/CodeGen/Mips/lround-conv.ll         |   4 +-
 llvm/test/CodeGen/PowerPC/f128-arith.ll       |  56 +++---
 .../test/CodeGen/SystemZ/atomicrmw-fmax-03.ll |   2 +-
 .../test/CodeGen/SystemZ/atomicrmw-fmin-03.ll |   2 +-
 llvm/test/CodeGen/SystemZ/fp-libcall.ll       |  20 +--
 llvm/test/CodeGen/SystemZ/fp-mul-13.ll        |   2 +-
 llvm/test/CodeGen/SystemZ/fp-round-01.ll      |  12 +-
 llvm/test/CodeGen/SystemZ/fp-sincos-01.ll     |   2 +-
 llvm/test/CodeGen/SystemZ/fp-strict-mul-13.ll |   2 +-
 .../CodeGen/SystemZ/fp-strict-round-01.ll     |  12 +-
 .../test/CodeGen/X86/fp128-libcalls-strict.ll | 128 +++++++-------
 llvm/test/CodeGen/X86/fp128-libcalls.ll       |  80 ++++-----
 30 files changed, 435 insertions(+), 332 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index a248eb7444b20..ad6944fe7aa90 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -3564,7 +3564,8 @@ class LLVM_ABI TargetLoweringBase {
     return Libcalls.getLibcallImpl(Call);
   }
 
-  /// Get the libcall routine name for the specified libcall.
+  /// Get the libcall routine name for the specified libcall if implemented,
+  /// otherwise NULL.
   const char *getLibcallName(RTLIB::Libcall Call) const {
     return Libcalls.getLibcallName(Call);
   }
diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.td b/llvm/include/llvm/IR/RuntimeLibcalls.td
index e0b29bd52a87e..32925b85ae4a3 100644
--- a/llvm/include/llvm/IR/RuntimeLibcalls.td
+++ b/llvm/include/llvm/IR/RuntimeLibcalls.td
@@ -22,6 +22,8 @@ def isOSWindows : RuntimeLibcallPredicate<"TT.isOSWindows()">;
 def darwinHasSinCosStret : RuntimeLibcallPredicate<"darwinHasSinCosStret(TT)">;
 def darwinHasExp10 : RuntimeLibcallPredicate<"darwinHasExp10(TT)">;
 def hasSinCos : RuntimeLibcallPredicate<"hasSinCos(TT)">;
+def f128LibmShouldUseLongDouble
+    : RuntimeLibcallPredicate<"f128LibmShouldUseLongDouble(TT)">;
 
 //--------------------------------------------------------------------
 // Declare all kinds of used libcalls
@@ -363,6 +365,9 @@ def MIPS16_RET_DF : RuntimeLibcall;
 def MIPS16_RET_SC : RuntimeLibcall;
 def MIPS16_RET_SF : RuntimeLibcall;
 
+// Create libcall impls for `long double` and `_Float128`. See also `_ld128`
+// impls defined at `LibmF128AsLongDoubleLibcalls`.
+
 // Produce libcall impls for all float types. If provided, `rtbasename` should
 // contain an `X` that will be replaced with the `f`/`l`/`fX` suffix (if not
 // provided, it is appended to the def name).
@@ -376,7 +381,7 @@ multiclass LibmLibcallImpls<string libcall_basename = !toupper(NAME),
                            !subst("X", "", rtbasename)>;
   def NAME#"_f128"
       : RuntimeLibcallImpl<!cast<RuntimeLibcall>(libcall_basename#"_F128"),
-                           !subst("X", "l", rtbasename)>;
+                           !subst("X", "f128", rtbasename)>;
   def NAME#"_ppcf128"
       : RuntimeLibcallImpl<!cast<RuntimeLibcall>(libcall_basename#"_PPCF128"),
                            !subst("X", "l", rtbasename)>;
@@ -385,6 +390,14 @@ multiclass LibmLibcallImpls<string libcall_basename = !toupper(NAME),
                            !subst("X", "l", rtbasename)>;
 }
 
+multiclass LibmF128AsLongDoubleImpls<string libcall_basename = !toupper(NAME),
+                                     string rtbasename =
+                                         !strconcat(NAME, "X")> {
+  def NAME#"_ld128"
+      : RuntimeLibcallImpl<!cast<RuntimeLibcall>(libcall_basename#"_F128"),
+                           !subst("X", "l", rtbasename)>;
+}
+
 // AArch64 calls
 def SC_MEMCPY : RuntimeLibcall;
 def SC_MEMMOVE : RuntimeLibcall;
@@ -806,60 +819,65 @@ def __riscv_flush_icache : RuntimeLibcallImpl<RISCV_FLUSH_ICACHE>;
 // F128 libm Runtime Libcalls
 //===----------------------------------------------------------------------===//
 
-defset list<RuntimeLibcallImpl> LibmF128Libcalls = {
-  def logf128 : RuntimeLibcallImpl<LOG_F128>;
-  def log2f128 : RuntimeLibcallImpl<LOG2_F128>;
-  def log10f128 : RuntimeLibcallImpl<LOG10_F128>;
-  def expf128 : RuntimeLibcallImpl<EXP_F128>;
-  def exp2f128 : RuntimeLibcallImpl<EXP2_F128>;
-  def exp10f128 : RuntimeLibcallImpl<EXP10_F128>;
-  def sinf128 : RuntimeLibcallImpl<SIN_F128>;
-  def cosf128 : RuntimeLibcallImpl<COS_F128>;
-  def tanf128 : RuntimeLibcallImpl<TAN_F128>;
-  def tanhf128 : RuntimeLibcallImpl<TANH_F128>;
-  def sincosf128 : RuntimeLibcallImpl<SINCOS_F128>;
-  def powf128 : RuntimeLibcallImpl<POW_F128>;
-  def fminf128 : RuntimeLibcallImpl<FMIN_F128>;
-  def fmaxf128 : RuntimeLibcallImpl<FMAX_F128>;
-  def fmodf128 : RuntimeLibcallImpl<REM_F128>;
-  def sqrtf128 : RuntimeLibcallImpl<SQRT_F128>;
-  def ceilf128 : RuntimeLibcallImpl<CEIL_F128>;
-  def floorf128 : RuntimeLibcallImpl<FLOOR_F128>;
-  def truncf128 : RuntimeLibcallImpl<TRUNC_F128>;
-  def roundf128 : RuntimeLibcallImpl<ROUND_F128>;
-  def lroundf128 : RuntimeLibcallImpl<LROUND_F128>;
-  def llroundf128 : RuntimeLibcallImpl<LLROUND_F128>;
-  def rintf128 : RuntimeLibcallImpl<RINT_F128>;
-  def lrintf128 : RuntimeLibcallImpl<LRINT_F128>;
-  def llrintf128 : RuntimeLibcallImpl<LLRINT_F128>;
-  def nearbyintf128 : RuntimeLibcallImpl<NEARBYINT_F128>;
-  def fmaf128 : RuntimeLibcallImpl<FMA_F128>;
-  def frexpf128 : RuntimeLibcallImpl<FREXP_F128>;
-  def cbrtf128 : RuntimeLibcallImpl<CBRT_F128>;
-  def fminimumf128 : RuntimeLibcallImpl<FMINIMUM_F128>;
-  def fmaximumf128 : RuntimeLibcallImpl<FMAXIMUM_F128>;
-  def fminimum_numf128 : RuntimeLibcallImpl<FMINIMUM_NUM_F128>;
-  def fmaximum_numf128 : RuntimeLibcallImpl<FMAXIMUM_NUM_F128>;
-  def asinf128 : RuntimeLibcallImpl<ASIN_F128>;
-  def acosf128 : RuntimeLibcallImpl<ACOS_F128>;
-  def atanf128 : RuntimeLibcallImpl<ATAN_F128>;
-  def atan2f128 : RuntimeLibcallImpl<ATAN2_F128>;
-  def ldexpf128 : RuntimeLibcallImpl<LDEXP_F128>;
-  def roundevenf128 : RuntimeLibcallImpl<ROUNDEVEN_F128>;
-  def modff128 : RuntimeLibcallImpl<MODF_F128>;
-  def sinhf128 : RuntimeLibcallImpl<SINH_F128>;
-  def coshf128 : RuntimeLibcallImpl<COSH_F128>;
-  def copysignf128 : RuntimeLibcallImpl<COPYSIGN_F128>;
+// Impls for treating `fp128` as `long double`
+defset list<RuntimeLibcallImpl> LibmF128AsLongDoubleLibcalls = {
+  defm log : LibmF128AsLongDoubleImpls;
+  defm log2 : LibmF128AsLongDoubleImpls;
+  defm log10 : LibmF128AsLongDoubleImpls;
+  defm exp : LibmF128AsLongDoubleImpls;
+  defm exp2 : LibmF128AsLongDoubleImpls;
+  defm exp10 : LibmF128AsLongDoubleImpls;
+  defm sin : LibmF128AsLongDoubleImpls;
+  defm cos : LibmF128AsLongDoubleImpls;
+  defm tan : LibmF128AsLongDoubleImpls;
+  defm tanh : LibmF128AsLongDoubleImpls;
+  defm sincos : LibmF128AsLongDoubleImpls;
+  defm pow : LibmF128AsLongDoubleImpls;
+  defm fmin : LibmF128AsLongDoubleImpls;
+  defm fmax : LibmF128AsLongDoubleImpls;
+  defm fmod : LibmF128AsLongDoubleImpls<"REM">;
+  defm sqrt : LibmF128AsLongDoubleImpls;
+  defm ceil : LibmF128AsLongDoubleImpls;
+  defm floor : LibmF128AsLongDoubleImpls;
+  defm trunc : LibmF128AsLongDoubleImpls;
+  defm round : LibmF128AsLongDoubleImpls;
+  defm lround : LibmF128AsLongDoubleImpls;
+  defm llround : LibmF128AsLongDoubleImpls;
+  defm rint : LibmF128AsLongDoubleImpls;
+  defm lrint : LibmF128AsLongDoubleImpls;
+  defm llrint : LibmF128AsLongDoubleImpls;
+  defm nearbyint : LibmF128AsLongDoubleImpls;
+  defm fma : LibmF128AsLongDoubleImpls;
+  defm frexp : LibmF128AsLongDoubleImpls;
+  defm cbrt : LibmF128AsLongDoubleImpls;
+  defm fminimum : LibmF128AsLongDoubleImpls;
+  defm fmaximum : LibmF128AsLongDoubleImpls;
+  defm fminimum_num : LibmF128AsLongDoubleImpls;
+  defm fmaximum_num : LibmF128AsLongDoubleImpls;
+  defm asin : LibmF128AsLongDoubleImpls;
+  defm acos : LibmF128AsLongDoubleImpls;
+  defm atan : LibmF128AsLongDoubleImpls;
+  defm atan2 : LibmF128AsLongDoubleImpls;
+  defm ldexp : LibmF128AsLongDoubleImpls;
+  defm roundeven : LibmF128AsLongDoubleImpls;
+  defm modf : LibmF128AsLongDoubleImpls;
+  defm sinh : LibmF128AsLongDoubleImpls;
+  defm cosh : LibmF128AsLongDoubleImpls;
+  defm copysign : LibmF128AsLongDoubleImpls;
 }
 
-defset list<RuntimeLibcallImpl> LibmF128FiniteLibcalls = {
-  def __logf128_finite : RuntimeLibcallImpl<LOG_FINITE_F128>;
-  def __log2f128_finite : RuntimeLibcallImpl<LOG2_FINITE_F128>;
-  def __log10f128_finite : RuntimeLibcallImpl<LOG10_FINITE_F128>;
-  def __expf128_finite : RuntimeLibcallImpl<EXP_FINITE_F128>;
-  def __exp2f128_finite : RuntimeLibcallImpl<EXP2_FINITE_F128>;
-  def __exp10f128_finite : RuntimeLibcallImpl<EXP10_FINITE_F128>;
-  def __powf128_finite : RuntimeLibcallImpl<POW_FINITE_F128>;
+defset list<RuntimeLibcallImpl> LibmF128AsLongDoubleFiniteLibcalls = {
+  defm __log_finite : LibmF128AsLongDoubleImpls<"LOG_FINITE", "__logX_finite">;
+  defm __log2_finite
+      : LibmF128AsLongDoubleImpls<"LOG2_FINITE", "__log2X_finite">;
+  defm __log10_finite
+      : LibmF128AsLongDoubleImpls<"LOG10_FINITE", "__log10X_finite">;
+  defm __exp_finite : LibmF128AsLongDoubleImpls<"EXP_FINITE", "__expX_finite">;
+  defm __exp2_finite
+      : LibmF128AsLongDoubleImpls<"EXP2_FINITE", "__exp2X_finite">;
+  defm __exp10_finite
+      : LibmF128AsLongDoubleImpls<"EXP10_FINITE", "__exp10X_finite">;
+  defm __pow_finite : LibmF128AsLongDoubleImpls<"POW_FINITE", "__powX_finite">;
 }
 
 //===----------------------------------------------------------------------===//
@@ -893,10 +911,6 @@ defvar DefaultRuntimeLibcallImpls_ppcf128 =
     !filter(entry, AllDefaultRuntimeLibcallImpls,
             !match(!cast<string>(entry.Provides), "PPCF128"));
 
-defvar DefaultRuntimeLibcallImpls_f128 =
-    !filter(entry, AllDefaultRuntimeLibcallImpls,
-            !match(!cast<string>(entry.Provides), "_F128"));
-
 defvar DefaultRuntimeLibcallImpls =
 !listremove(
   !listremove(
@@ -1690,7 +1704,7 @@ def NVPTXSystemLibrary : SystemRuntimeLibrary<isNVPTX, (add)>;
 //===----------------------------------------------------------------------===//
 
 // For IEEE quad-precision libcall names, PPC uses "kf" instead of "tf".
-defset list<RuntimeLibcallImpl> PPCRuntimeLibcalls = {
+defset list<RuntimeLibcallImpl> PPCOverriddenRuntimeLibcalls = {
   def __addkf3 : RuntimeLibcallImpl<ADD_F128>;
   def __subkf3 : RuntimeLibcallImpl<SUB_F128>;
   def __mulkf3 : RuntimeLibcallImpl<MUL_F128>;
@@ -1734,7 +1748,17 @@ defset list<RuntimeLibcallImpl> PPC32AIXCallList = {
   def ___bzero : RuntimeLibcallImpl<BZERO>;
 }
 
-defvar PPCOverrides = !foreach(entry, PPCRuntimeLibcalls, entry.Provides);
+// List of overriden libcalls as strings, `["ADD_F128", "SUB_F128", ...]`
+defvar PPCOverriddenNames = !foreach(entry, PPCOverriddenRuntimeLibcalls,
+                                     !cast<string>(entry.Provides));
+
+// Default libcalls except for those that appear in `PPCOverrideNames`
+defvar PPCNonOverriddenImpls = !filter(
+    default_entry, DefaultRuntimeLibcallImpls,
+    // `!contains` does not exist, `not->empty->filter` is a slightly hacky way
+    !not(!empty(
+        !filter(overridden, PPCOverriddenNames,
+                !eq(overridden, !cast<string>(default_entry.Provides))))));
 
 def isPPC : RuntimeLibcallPredicate<"TT.isPPC()">;
 def isPPC32 : RuntimeLibcallPredicate<"TT.isPPC32()">;
@@ -1747,20 +1771,22 @@ def isPPC64_AIX : RuntimeLibcallPredicate<"(TT.isPPC64() && TT.isOSAIX())">;
 def AIX32Calls : LibcallImpls<(add PPC32AIXCallList), isPPC32_AIX>;
 def AIX64Calls : LibcallImpls<(add PPC64AIXCallList), isPPC64_AIX>;
 
+// Replace overridden values, adjust mem* symbols, add ppc_f128<->f128
+// conversions.
+defvar PPCDefaultRuntimeLibcallImpls = (add
+    (sub DefaultRuntimeLibcallImpls, PPCNonOverriddenImpls, memcpy),
+    PPCOverriddenRuntimeLibcalls, __extendkftf2, __trunctfkf2,
+    DefaultRuntimeLibcallImpls_ppcf128, AIX32Calls, AIX64Calls);
+
 // FIXME: Current emission behavior with multiple implementations is
 // janky. We need to filter out the conflicting cases with different
 // f128 names, and then add the overrides. We should switch to
 // explicitly adding subsets of the default calls.
 def PPCSystemLibrary
     : SystemRuntimeLibrary<isPPC,
-      (add PPCRuntimeLibcalls,
-           (sub DefaultRuntimeLibcallImpls, memcpy,
-                DefaultRuntimeLibcallImpls_f128),
-           __extendkftf2, __trunctfkf2,
-           DefaultRuntimeLibcallImpls_ppcf128,
-           LibmF128Libcalls, AIX32Calls, AIX64Calls,
-           AvailableIf<memcpy, isNotAIX>,
-           LibcallImpls<(add Int128RTLibcalls), isPPC64>)>;
+                           (add PPCDefaultRuntimeLibcallImpls,
+                               AvailableIf<memcpy, isNotAIX>,
+                               LibcallImpls<(add Int128RTLibcalls), isPPC64>)>;
 
 //===----------------------------------------------------------------------===//
 // RISCV Runtime Libcalls
diff --git a/llvm/include/llvm/TargetParser/Triple.h b/llvm/include/llvm/TargetParser/Triple.h
index 657f4230379e8..f75773ed95708 100644
--- a/llvm/include/llvm/TargetParser/Triple.h
+++ b/llvm/include/llvm/TargetParser/Triple.h
@@ -266,13 +266,13 @@ class Triple {
     EABIHF,
     Android,
     Musl,
-    MuslABIN32,
-    MuslABI64,
-    MuslEABI,
-    MuslEABIHF,
-    MuslF32,
-    MuslSF,
-    MuslX32,
+    MuslABIN32, ///< MIPS N32 ABI
+    MuslABI64,  ///< MIPS N64 ABI
+    MuslEABI,   ///< Arm32 EABI
+    MuslEABIHF, ///< Arm32 EABI + HF
+    MuslF32,    ///< LoongArch ILP32F/LP64F
+    MuslSF,     ///< LoongArch ILP32S/LP64S
+    MuslX32,    ///< Musl using 32-bit ABI on x86_64
     LLVM,
 
     MSVC,
@@ -1274,6 +1274,10 @@ class Triple {
   /// or an invalid version tuple if this triple doesn't have one.
   LLVM_ABI VersionTuple getMinimumSupportedOSVersion() const;
 
+  /// Return true if `_Float128` libcalls should lower to e.g. `sqrtf` (`long
+  /// double`) rather than the default `sqrtf128`.
+  bool f128LibmShouldUseLongDouble() const;
+
   /// @}
   /// @name Static helpers for IDs.
   /// @{
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index e0597988e8907..7ffa7d02f38f4 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -182,8 +182,11 @@ TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT,
   }
 
   const char *LibcallName = getLibcallName(LC);
-  if (LC == RTLIB::UNKNOWN_LIBCALL || !LibcallName)
-    reportFatalInternalError("unsupported library call operation");
+  if (LC == RTLIB::UNKNOWN_LIBCALL || !LibcallName) {
+    reportFatalInternalError("unsupported library call operation: "
+                             "libcall " +
+                             Twine(LC));
+  }
 
   SDValue Callee =
       DAG.getExternalSymbol(LibcallName, getPointerTy(DAG.getDataLayout()));
diff --git a/llvm/lib/IR/RuntimeLibcalls.cpp b/llvm/lib/IR/RuntimeLibcalls.cpp
index 45c4bd12658a6..1896078851307 100644
--- a/llvm/lib/IR/RuntimeLibcalls.cpp
+++ b/llvm/lib/IR/RuntimeLibcalls.cpp
@@ -62,59 +62,60 @@ static void setARMLibcallNames(RuntimeLibcallsInfo &Info, const Triple &TT,
     Info.setLibcallImplCallingConv(Impl, CallingConv::ARM_AAPCS);
 }
 
+/// Set all libm libcalls for _Float128 to `long double` (`*l`) symbols.
 static void setLongDoubleIsF128Libm(RuntimeLibcallsInfo &Info,
                                     bool FiniteOnlyFuncs = false) {
-  Info.setLibcallImpl(RTLIB::REM_F128, RTLIB::fmodf128);
-  Info.setLibcallImpl(RTLIB::FMA_F128, RTLIB::fmaf128);
-  Info.setLibcallImpl(RTLIB::SQRT_F128, RTLIB::sqrtf128);
-  Info.setLibcallImpl(RTLIB::CBRT_F128, RTLIB::cbrtf128);
-  Info.setLibcallImpl(RTLIB::LOG_F128, RTLIB::logf128);
-  Info.setLibcallImpl(RTLIB::LOG2_F128, RTLIB::log2f128);
-  Info.setLibcallImpl(RTLIB::LOG10_F128, RTLIB::log10f128);
-  Info.setLibcallImpl(RTLIB::EXP_F128, RTLIB::expf128);
-  Info.setLibcallImpl(RTLIB::EXP2_F128, RTLIB::exp2f128);
-  Info.setLibcallImpl(RTLIB::EXP10_F128, RTLIB::exp10f128);
-  Info.setLibcallImpl(RTLIB::SIN_F128, RTLIB::sinf128);
-  Info.setLibcallImpl(RTLIB::COS_F128, RTLIB::cosf128);
-  Info.setLibcallImpl(RTLIB::TAN_F128, RTLIB::tanf128);
-  Info.setLibcallImpl(RTLIB::SINCOS_F128, RTLIB::sincosf128);
-  Info.setLibcallImpl(RTLIB::ASIN_F128, RTLIB::asinf128);
-  Info.setLibcallImpl(RTLIB::ACOS_F128, RTLIB::acosf128);
-  Info.setLibcallImpl(RTLIB::ATAN_F128, RTLIB::atanf128);
-  Info.setLibcallImpl(RTLIB::ATAN2_F128, RTLIB::atan2f128);
-  Info.setLibcallImpl(RTLIB::SINH_F128, RTLIB::sinhf128);
-  Info.setLibcallImpl(RTLIB::COSH_F128, RTLIB::coshf128);
-  Info.setLibcallImpl(RTLIB::TANH_F128, RTLIB::tanhf128);
-  Info.setLibcallImpl(RTLIB::POW_F128, RTLIB::powf128);
-  Info.setLibcallImpl(RTLIB::CEIL_F128, RTLIB::ceilf128);
-  Info.setLibcallImpl(RTLIB::TRUNC_F128, RTLIB::truncf128);
-  Info.setLibcallImpl(RTLIB::RINT_F128, RTLIB::rintf128);
-  Info.setLibcallImpl(RTLIB::NEARBYINT_F128, RTLIB::nearbyintf128);
-  Info.setLibcallImpl(RTLIB::ROUND_F128, RTLIB::roundf128);
-  Info.setLibcallImpl(RTLIB::ROUNDEVEN_F128, RTLIB::roundevenf128);
-  Info.setLibcallImpl(RTLIB::FLOOR_F128, RTLIB::floorf128);
-  Info.setLibcallImpl(RTLIB::COPYSIGN_F128, RTLIB::copysignf128);
-  Info.setLibcallImpl(RTLIB::FMIN_F128, RTLIB::fminf128);
-  Info.setLibcallImpl(RTLIB::FMAX_F128, RTLIB::fmaxf128);
-  Info.setLibcallImpl(RTLIB::FMINIMUM_F128, RTLIB::fminimumf128);
-  Info.setLibcallImpl(RTLIB::FMAXIMUM_F128, RTLIB::fmaximumf128);
-  Info.setLibcallImpl(RTLIB::FMINIMUM_NUM_F128, RTLIB::fminimum_numf128);
-  Info.setLibcallImpl(RTLIB::FMAXIMUM_NUM_F128, RTLIB::fmaximum_numf128);
-  Info.setLibcallImpl(RTLIB::LROUND_F128, RTLIB::lroundf128);
-  Info.setLibcallImpl(RTLIB::LLROUND_F128, RTLIB::llroundf128);
-  Info.setLibcallImpl(RTLIB::LRINT_F128, RTLIB::lrintf128);
-  Info.setLibcallImpl(RTLIB::LLRINT_F128, RTLIB::llrintf128);
-  Info.setLibcallImpl(RTLIB::LDEXP_F128, RTLIB::ldexpf128);
-  Info.setLibcallImpl(RTLIB::FREXP_F128, RTLIB::frexpf128);
-  Info.setLibcallImpl(RTLIB::MODF_F128, RTLIB::modff128);
+  Info.setLibcallImpl(RTLIB::REM_F128, RTLIB::fmod_ld128);
+  Info.setLibcallImpl(RTLIB::FMA_F128, RTLIB::fma_ld128);
+  Info.setLibcallImpl(RTLIB::SQRT_F128, RTLIB::sqrt_ld128);
+  Info.setLibcallImpl(RTLIB::CBRT_F128, RTLIB::cbrt_ld128);
+  Info.setLibcallImpl(RTLIB::LOG_F128, RTLIB::log_ld128);
+  Info.setLibcallImpl(RTLIB::LOG2_F128, RTLIB::log2_ld128);
+  Info.setLibcallImpl(RTLIB::LOG10_F128, RTLIB::log10_ld128);
+  Info.setLibcallImpl(RTLIB::EXP_F128, RTLIB::exp_ld128);
+  Info.setLibcallImpl(RTLIB::EXP2_F128, RTLIB::exp2_ld128);
+  Info.setLibcallImpl(RTLIB::EXP10_F128, RTLIB::exp10_ld128);
+  Info.setLibcallImpl(RTLIB::SIN_F128, RTLIB::sin_ld128);
+  Info.setLibcallImpl(RTLIB::COS_F128, RTLIB::cos_ld128);
+  Info.setLibcallImpl(RTLIB::TAN_F128, RTLIB::tan_ld128);
+  Info.setLibcallImpl(RTLIB::SINCOS_F128, RTLIB::sincos_ld128);
+  Info.setLibcallImpl(RTLIB::ASIN_F128, RTLIB::asin_ld128);
+  Info.setLibcallImpl(RTLIB::ACOS_F128, RTLIB::acos_ld128);
+  Info.setLibcallImpl(RTLIB::ATAN_F128, RTLIB::atan_ld128);
+  Info.setLibcallImpl(RTLIB::ATAN2_F128, RTLIB::atan2_ld128);
+  Info.setLibcallImpl(RTLIB::SINH_F128, RTLIB::sinh_ld128);
+  Info.setLibcallImpl(RTLIB::COSH_F128, RTLIB::cosh_ld128);
+  Info.setLibcallImpl(RTLIB::TANH_F128, RTLIB::tanh_ld128);
+  Info.setLibcallImpl(RTLIB::POW_F128, RTLIB::pow_ld128);
+  Info.setLibcallImpl(RTLIB::CEIL_F128, RTLIB::ceil_ld128);
+  Info.setLibcallImpl(RTLIB::TRUNC_F128, RTLIB::trunc_ld128);
+  Info.setLibcallImpl(RTLIB::RINT_F128, RTLIB::rint_ld128);
+  Info.setLibcallImpl(RTLIB::NEARBYINT_F128, RTLIB::nearbyint_ld128);
+  Info.setLibcallImpl(RTLIB::ROUND_F128, RTLIB::round_ld128);
+  Info.setLibcallImpl(RTLIB::ROUNDEVEN_F128, RTLIB::roundeven_ld128);
+  Info.setLibcallImpl(RTLIB::FLOOR_F128, RTLIB::floor_ld128);
+  Info.setLibcallImpl(RTLIB::COPYSIGN_F128, RTLIB::copysign_ld128);
+  Info.setLibcallImpl(RTLIB::FMIN_F128, RTLIB::fmin_ld128);
+  Info.setLibcallImpl(RTLIB::FMAX_F128, RTLIB::fmax_ld128);
+  Info.setLibcallImpl(RTLIB::FMINIMUM_F128, RTLIB::fminimum_ld128);
+  Info.setLibcallImpl(RTLIB::FMAXIMUM_F128, RTLIB::fmaximum_ld128);
+  Info.setLibcallImpl(RTLIB::FMINIMUM_NUM_F128, RTLIB::fminimum_num_ld128);
+  Info.setLibcallImpl(RTLIB::FMAXIMUM_NUM_F128, RTLIB::fmaximum_num_ld128);
+  Info.setLibcallImpl(RTLIB::LROUND_F128, RTLIB::lround_ld128);
+  Info.setLibcallImpl(RTLIB::LLROUND_F128, RTLIB::llround_ld128);
+  Info.setLibcallImpl(RTLIB::LRINT_F128, RTLIB::lrint_ld128);
+  Info.setLibcallImpl(RTLIB::LLRINT_F128, RTLIB::llrint_ld128);
+  Info.setLibcallImpl(RTLIB::LDEXP_F128, RTLIB::ldexp_ld128);
+  Info.setLibcallImpl(RTLIB::FREXP_F128, RTLIB::frexp_ld128);
+  Info.setLibcallImpl(RTLIB::MODF_F128, RTLIB::modf_ld128);
 
   if (FiniteOnlyFuncs) {
-    Info.setLibcallImpl(RTLIB::LOG_FINITE_F128, RTLIB::__logf128_finite);
-    Info.setLibcallImpl(RTLIB::LOG2_FINITE_F128, RTLIB::__log2f128_finite);
-    Info.setLibcallImpl(RTLIB::LOG10_FINITE_F128, RTLIB::__log10f128_finite);
-    Info.setLibcallImpl(RTLIB::EXP_FINITE_F128, RTLIB::__expf128_finite);
-    Info.setLibcallImpl(RTLIB::EXP2_FINITE_F128, RTLIB::__exp2f128_finite);
-    Info.setLibcallImpl(RTLIB::POW_FINITE_F128, RTLIB::__powf128_finite);
+    Info.setLibcallImpl(RTLIB::LOG_FINITE_F128, RTLIB::__log_finite_ld128);
+    Info.setLibcallImpl(RTLIB::LOG2_FINITE_F128, RTLIB::__log2_finite_ld128);
+    Info.setLibcallImpl(RTLIB::LOG10_FINITE_F128, RTLIB::__log10_finite_ld128);
+    Info.setLibcallImpl(RTLIB::EXP_FINITE_F128, RTLIB::__exp_finite_ld128);
+    Info.setLibcallImpl(RTLIB::EXP2_FINITE_F128, RTLIB::__exp2_finite_ld128);
+    Info.setLibcallImpl(RTLIB::POW_FINITE_F128, RTLIB::__pow_finite_ld128);
   } else {
     Info.setLibcallImpl(RTLIB::LOG_FINITE_F128, RTLIB::Unsupported);
     Info.setLibcallImpl(RTLIB::LOG2_FINITE_F128, RTLIB::Unsupported);
@@ -139,8 +140,12 @@ void RuntimeLibcallsInfo::initLibcalls(const Triple &TT,
                                        EABI EABIVersion, StringRef ABIName) {
   setTargetRuntimeLibcallSets(TT, FloatABI);
 
-  // Use the f128 variants of math functions on x86
-  if (TT.isX86() && TT.isGNUEnvironment())
+  // By default fp128 libcalls get lowered to `*f128` symbols, which is
+  // safest because the symbols are only ever for binary128 on all platforms.
+  // Unfortunately many platforms only have the `*l` (`long double`) symbols,
+  // which vary by architecture and compilation flags, so we have to use them
+  // sometimes.
+  if (TT.f128LibmShouldUseLongDouble())
     setLongDoubleIsF128Libm(*this, /*FiniteOnlyFuncs=*/true);
 
   if (TT.isX86() || TT.isVE() || TT.isARM() || TT.isThumb()) {
diff --git a/llvm/lib/Target/Mips/MipsCCState.cpp b/llvm/lib/Target/Mips/MipsCCState.cpp
index 9e8cd2ea2fd43..9111d142cea39 100644
--- a/llvm/lib/Target/Mips/MipsCCState.cpp
+++ b/llvm/lib/Target/Mips/MipsCCState.cpp
@@ -21,11 +21,16 @@ bool MipsCCState::isF128SoftLibCall(const char *CallSym) {
       "__floatuntitf", "__getf2",      "__gttf2",       "__letf2",
       "__lttf2",       "__multf3",     "__netf2",       "__powitf2",
       "__subtf3",      "__trunctfdf2", "__trunctfsf2",  "__unordtf2",
-      "ceill",         "copysignl",    "cosl",          "exp2l",
-      "expl",          "floorl",       "fmal",          "fmaxl",
-      "fmodl",         "log10l",       "log2l",         "logl",
-      "nearbyintl",    "powl",         "rintl",         "roundl",
-      "sinl",          "sqrtl",        "truncl"};
+      "ceilf128",      "ceill",        "copysignf128",  "copysignl",
+      "cosf128",       "cosl",         "exp2f128",      "exp2l",
+      "expf128",       "expl",         "floorf128",     "floorl",
+      "fmaf128",       "fmal",         "fmaxf128",      "fmaxl",
+      "fmodf128",      "fmodl",        "log10f128",     "log10l",
+      "log2f128",      "log2l",        "logf128",       "logl",
+      "nearbyintf128", "nearbyintl",   "powf128",       "powl",
+      "rintf128",      "rintl",        "roundf128",     "roundl",
+      "sinf128",       "sinl",         "sqrtf128",      "sqrtl",
+      "truncf128",     "truncl"};
 
   // Check that LibCalls is sorted alphabetically.
   auto Comp = [](const char *S1, const char *S2) { return strcmp(S1, S2) < 0; };
diff --git a/llvm/lib/TargetParser/Triple.cpp b/llvm/lib/TargetParser/Triple.cpp
index 0584c941d2e6e..f80d072160c62 100644
--- a/llvm/lib/TargetParser/Triple.cpp
+++ b/llvm/lib/TargetParser/Triple.cpp
@@ -2306,6 +2306,43 @@ ExceptionHandling Triple::getDefaultExceptionHandling() const {
   return ExceptionHandling::None;
 }
 
+bool Triple::f128LibmShouldUseLongDouble() const {
+  // Always prefer to lower to `*f128` symbols when they are likely to be
+  // available, to avoid any inaccuracies or problems from libc config.
+  //
+  // Note that the logic should be kept in sync with Clang's LongDoubleFormat.
+
+  // Windows and Apple always use f64 as `long double`.
+  if (isOSWindows() || isOSDarwin())
+    return false;
+
+  // PowerPC has a complicated `long double` situation so `*f128` is always
+  // used.
+  if (isPPC())
+    return false;
+
+  // Most 64-bit architectures use use binary128, a few are binary128 on both
+  // 64- and 32-bit.
+  if (isAArch64() || isLoongArch() || isRISCV() || isSPARC() || isSystemZ() ||
+      isVE() || isWasm())
+    return true;
+
+  // MIPS64 is usually f128, except on FreeBSD-like operating systems. MIPS32
+  // is f128 with N32 but f64 with the O32 ABI. Triple doesn't know about ABI
+  // here, so allow MIPS32 to hit the safer `ld !== f128` default.
+  if (isMIPS64() && !(isOSFreeBSD() || isOSKFreeBSD() || isOSDragonFly()))
+    return true;
+
+  // Android and Ohos use binary128 on x86_64.
+  if (getArch() == Triple::x86_64 && (isAndroid() || isOHOSFamily()))
+    return true;
+
+  // By default, make the safe assumption that `long double !== f128`. This
+  // also catches x86 (`long double` is x87 `f80`) and PowerPC (`long double`
+  // is `f64` or PPC double-double).
+  return false;
+}
+
 // HLSL triple environment orders are relied on in the front end
 static_assert(Triple::Vertex - Triple::Pixel == 1,
               "incorrect HLSL stage order");
diff --git a/llvm/test/CodeGen/AArch64/illegal-float-ops.ll b/llvm/test/CodeGen/AArch64/illegal-float-ops.ll
index 5ec66b784c037..9f0b7ed74e1c5 100644
--- a/llvm/test/CodeGen/AArch64/illegal-float-ops.ll
+++ b/llvm/test/CodeGen/AArch64/illegal-float-ops.ll
@@ -1,5 +1,5 @@
-; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs -o - %s | FileCheck %s
-; RUN: llc -mtriple=aarch64-linux-android -verify-machineinstrs -o - %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK-GNU
+; RUN: llc -mtriple=aarch64-linux-android -verify-machineinstrs -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK-ANDROID
 
 @varfloat = global float 0.0
 @vardouble = global double 0.0
@@ -22,7 +22,8 @@ define void @test_cos(float %float, double %double, fp128 %fp128) {
 
    %cosfp128 = call fp128 @llvm.cos.f128(fp128 %fp128)
    store fp128 %cosfp128, ptr @varfp128
-; CHECK: bl cosl
+; CHECK-GNU: bl cosf128
+; CHECK-ANDROID: bl cosl
 
   ret void
 }
@@ -44,7 +45,8 @@ define void @test_exp(float %float, double %double, fp128 %fp128) {
 
    %expfp128 = call fp128 @llvm.exp.f128(fp128 %fp128)
    store fp128 %expfp128, ptr @varfp128
-; CHECK: bl expl
+; CHECK-GNU: bl expf128
+; CHECK-ANDROID: bl expl
 
   ret void
 }
@@ -66,7 +68,8 @@ define void @test_exp2(float %float, double %double, fp128 %fp128) {
 
    %exp2fp128 = call fp128 @llvm.exp2.f128(fp128 %fp128)
    store fp128 %exp2fp128, ptr @varfp128
-; CHECK: bl exp2l
+; CHECK-GNU: bl exp2f128
+; CHECK-ANDROID: bl exp2l
   ret void
 
 }
@@ -88,7 +91,8 @@ define void @test_log(float %float, double %double, fp128 %fp128) {
 
    %logfp128 = call fp128 @llvm.log.f128(fp128 %fp128)
    store fp128 %logfp128, ptr @varfp128
-; CHECK: bl logl
+; CHECK-GNU: bl logf128
+; CHECK-ANDROID: bl logl
 
   ret void
 }
@@ -110,7 +114,8 @@ define void @test_log2(float %float, double %double, fp128 %fp128) {
 
    %log2fp128 = call fp128 @llvm.log2.f128(fp128 %fp128)
    store fp128 %log2fp128, ptr @varfp128
-; CHECK: bl log2l
+; CHECK-GNU: bl log2f128
+; CHECK-ANDROID: bl log2l
   ret void
 
 }
@@ -132,7 +137,8 @@ define void @test_log10(float %float, double %double, fp128 %fp128) {
 
    %log10fp128 = call fp128 @llvm.log10.f128(fp128 %fp128)
    store fp128 %log10fp128, ptr @varfp128
-; CHECK: bl log10l
+; CHECK-GNU: bl log10f128
+; CHECK-ANDROID: bl log10l
 
   ret void
 }
@@ -154,7 +160,8 @@ define void @test_sin(float %float, double %double, fp128 %fp128) {
 
    %sinfp128 = call fp128 @llvm.sin.f128(fp128 %fp128)
    store fp128 %sinfp128, ptr @varfp128
-; CHECK: bl sinl
+; CHECK-GNU: bl sinf128
+; CHECK-ANDROID: bl sinl
   ret void
 
 }
@@ -176,7 +183,8 @@ define void @test_tan(float %float, double %double, fp128 %fp128) {
 
    %tanfp128 = call fp128 @llvm.tan.f128(fp128 %fp128)
    store fp128 %tanfp128, ptr @varfp128
-; CHECK: bl tanl
+; CHECK-GNU: bl tanf128
+; CHECK-ANDROID: bl tanl
   ret void
 }
 
@@ -197,7 +205,8 @@ define void @test_acos(float %float, double %double, fp128 %fp128) {
 
    %acosfp128 = call fp128 @llvm.acos.f128(fp128 %fp128)
    store fp128 %acosfp128, ptr @varfp128
-; CHECK: bl acosl
+; CHECK-GNU: bl acosf128
+; CHECK-ANDROID: bl acosl
   ret void
 }
 
@@ -218,7 +227,8 @@ define void @test_asin(float %float, double %double, fp128 %fp128) {
 
    %asinfp128 = call fp128 @llvm.asin.f128(fp128 %fp128)
    store fp128 %asinfp128, ptr @varfp128
-; CHECK: bl asinl
+; CHECK-GNU: bl asinf128
+; CHECK-ANDROID: bl asinl
   ret void
 }
 
@@ -239,7 +249,8 @@ define void @test_atan(float %float, double %double, fp128 %fp128) {
 
    %atanfp128 = call fp128 @llvm.atan.f128(fp128 %fp128)
    store fp128 %atanfp128, ptr @varfp128
-; CHECK: bl atanl
+; CHECK-GNU: bl atanf128
+; CHECK-ANDROID: bl atanl
   ret void
 }
 
@@ -260,7 +271,8 @@ define void @test_atan2(float %float1, double %double1, fp128 %fp1281, float %fl
 
    %atan2fp128 = call fp128 @llvm.atan2.f128(fp128 %fp1281, fp128 %fp1282)
    store fp128 %atan2fp128, ptr @varfp128
-; CHECK: bl atan2l
+; CHECK-GNU: bl atan2f128
+; CHECK-ANDROID: bl atan2l
   ret void
 }
 
@@ -281,7 +293,8 @@ define void @test_cosh(float %float, double %double, fp128 %fp128) {
 
    %coshfp128 = call fp128 @llvm.cosh.f128(fp128 %fp128)
    store fp128 %coshfp128, ptr @varfp128
-; CHECK: bl coshl
+; CHECK-GNU: bl coshf128
+; CHECK-ANDROID: bl coshl
   ret void
 }
 
@@ -302,7 +315,8 @@ define void @test_sinh(float %float, double %double, fp128 %fp128) {
 
    %sinhfp128 = call fp128 @llvm.sinh.f128(fp128 %fp128)
    store fp128 %sinhfp128, ptr @varfp128
-; CHECK: bl sinhl
+; CHECK-GNU: bl sinhf128
+; CHECK-ANDROID: bl sinhl
   ret void
 }
 
@@ -323,7 +337,8 @@ define void @test_tanh(float %float, double %double, fp128 %fp128) {
 
    %tanhfp128 = call fp128 @llvm.tanh.f128(fp128 %fp128)
    store fp128 %tanhfp128, ptr @varfp128
-; CHECK: bl tanhl
+; CHECK-GNU: bl tanhf128
+; CHECK-ANDROID: bl tanhl
   ret void
 }
 
@@ -344,7 +359,8 @@ define void @test_pow(float %float, double %double, fp128 %fp128) {
 
    %powfp128 = call fp128 @llvm.pow.f128(fp128 %fp128, fp128 %fp128)
    store fp128 %powfp128, ptr @varfp128
-; CHECK: bl powl
+; CHECK-GNU: bl powf128
+; CHECK-ANDROID: bl powl
 
   ret void
 }
@@ -384,7 +400,8 @@ define void @test_frem(float %float, double %double, fp128 %fp128) {
 
   %fremfp128 = frem fp128 %fp128, %fp128
   store fp128 %fremfp128, ptr @varfp128
-; CHECK: bl fmodl
+; CHECK-GNU: bl fmodf128
+; CHECK-ANDROID: bl fmodl
 
   ret void
 }
@@ -396,7 +413,8 @@ define void @test_fma(fp128 %fp128) {
 
   %fmafp128 = call fp128 @llvm.fma.f128(fp128 %fp128, fp128 %fp128, fp128 %fp128)
   store fp128 %fmafp128, ptr @varfp128
-; CHECK: bl fmal
+; CHECK-GNU: bl fmaf128
+; CHECK-ANDROID: bl fmal
 
   ret void
 }
diff --git a/llvm/test/CodeGen/AArch64/sincos-expansion.ll b/llvm/test/CodeGen/AArch64/sincos-expansion.ll
index 41ee40378b4fc..05e753aca0836 100644
--- a/llvm/test/CodeGen/AArch64/sincos-expansion.ll
+++ b/llvm/test/CodeGen/AArch64/sincos-expansion.ll
@@ -43,7 +43,7 @@ define fp128 @test_sincos_f128(fp128 %f) {
   %sin = call fp128 @sinl(fp128 %f) readnone
   %cos = call fp128 @cosl(fp128 %f) readnone
   %val = fadd fp128 %sin, %cos
-; CHECK: bl sincosl
+; CHECK: bl sincosf128
   ret fp128 %val
 }
 
diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization-nan.ll b/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization-nan.ll
index 1d295a30a994b..644bab3698926 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization-nan.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization-nan.ll
@@ -583,7 +583,7 @@ define float @test_v3f32_ninf(<3 x float> %a) nounwind {
 define fp128 @test_v2f128(<2 x fp128> %a) nounwind {
 ; CHECK-LABEL: test_v2f128:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    b fmaxl
+; CHECK-NEXT:    b fmaxf128
   %b = call fp128 @llvm.vector.reduce.fmax.v2f128(<2 x fp128> %a)
   ret fp128 %b
 }
diff --git a/llvm/test/CodeGen/ARM/ldexp.ll b/llvm/test/CodeGen/ARM/ldexp.ll
index cdf91eb902e05..2661a1e270798 100644
--- a/llvm/test/CodeGen/ARM/ldexp.ll
+++ b/llvm/test/CodeGen/ARM/ldexp.ll
@@ -38,7 +38,7 @@ entry:
 declare float @ldexpf(float, i32) memory(none)
 
 define fp128 @testExpl(fp128 %val, i32 %a) {
-; LINUX:    bl ldexpl
+; LINUX:    bl ldexpf128
 ; WINDOWS:    b.w ldexpl
 entry:
   %call = tail call fp128 @ldexpl(fp128 %val, i32 %a)
diff --git a/llvm/test/CodeGen/ARM/llvm.sincos.ll b/llvm/test/CodeGen/ARM/llvm.sincos.ll
index 9628405df6bcb..da1ad5ecbe008 100644
--- a/llvm/test/CodeGen/ARM/llvm.sincos.ll
+++ b/llvm/test/CodeGen/ARM/llvm.sincos.ll
@@ -206,7 +206,7 @@ define { fp128, fp128 } @test_sincos_f128(fp128 %a) {
 ; CHECK-NEXT:    mov r0, r1
 ; CHECK-NEXT:    mov r1, r2
 ; CHECK-NEXT:    mov r2, r12
-; CHECK-NEXT:    bl sincosl
+; CHECK-NEXT:    bl sincosf128
 ; CHECK-NEXT:    ldrd r2, r3, [sp, #16]
 ; CHECK-NEXT:    ldrd r12, r1, [sp, #8]
 ; CHECK-NEXT:    str r3, [r4, #28]
diff --git a/llvm/test/CodeGen/Generic/fp128-math-libcalls.ll b/llvm/test/CodeGen/Generic/fp128-math-libcalls.ll
index 159b3bda6b794..b116c26d3ccbb 100644
--- a/llvm/test/CodeGen/Generic/fp128-math-libcalls.ll
+++ b/llvm/test/CodeGen/Generic/fp128-math-libcalls.ll
@@ -11,25 +11,21 @@
 ; * musl (no f128 symbols available)
 ; * Windows and MacOS (no f128 symbols, long double == f64)
 
-; FIXME(#44744): arm32, x86-{32,64} musl targets, MacOS, and Windows don't have
-; f128 long double. They should be passing with CHECK-F128 rather than
-; CHECK-USELD.
-
 ; RUN: %if aarch64-registered-target %{ llc < %s -mtriple=aarch64-unknown-linux-gnu    -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-USELD %}
 ; RUN: %if aarch64-registered-target %{ llc < %s -mtriple=aarch64-unknown-linux-musl   -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-USELD %}
 ; RUN: %if aarch64-registered-target %{ llc < %s -mtriple=aarch64-unknown-none         -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-USELD %}
-; RUN: %if aarch64-registered-target %{ llc < %s -mtriple=arm64-apple-macosx           -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-USELD %}
-; RUN: %if arm-registered-target     %{ llc < %s -mtriple=arm-none-eabi                -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-USELD %}
-; RUN: %if arm-registered-target     %{ llc < %s -mtriple=arm-unknown-linux-gnueabi    -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-USELD %}
+; RUN: %if aarch64-registered-target %{ llc < %s -mtriple=arm64-apple-macosx           -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-F128  %}
+; RUN: %if arm-registered-target     %{ llc < %s -mtriple=arm-none-eabi                -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-F128  %}
+; RUN: %if arm-registered-target     %{ llc < %s -mtriple=arm-unknown-linux-gnueabi    -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-F128  %}
 ; RUN: %if powerpc-registered-target %{ llc < %s -mtriple=powerpc-unknown-linux-gnu    -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-F128  %}
 ; RUN: %if powerpc-registered-target %{ llc < %s -mtriple=powerpc64-unknown-linux-gnu  -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-F128  %}
 ; RUN: %if powerpc-registered-target %{ llc < %s -mtriple=powerpc64-unknown-linux-musl -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-F128  %}
 ; RUN: %if riscv-registered-target   %{ llc < %s -mtriple=riscv32-unknown-linux-gnu    -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-USELD %}
 ; RUN: %if systemz-registered-target %{ llc < %s -mtriple=s390x-unknown-linux-gnu      -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-S390X %}
 ; RUN: %if x86-registered-target     %{ llc < %s -mtriple=i686-unknown-linux-gnu       -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-F128  %}
-; RUN: %if x86-registered-target     %{ llc < %s -mtriple=i686-unknown-linux-musl      -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-USELD %}
+; RUN: %if x86-registered-target     %{ llc < %s -mtriple=i686-unknown-linux-musl      -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-F128  %}
 ; RUN: %if x86-registered-target     %{ llc < %s -mtriple=x86_64-unknown-linux-gnu     -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-F128  %}
-; RUN: %if x86-registered-target     %{ llc < %s -mtriple=x86_64-unknown-linux-musl    -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-USELD %}
+; RUN: %if x86-registered-target     %{ llc < %s -mtriple=x86_64-unknown-linux-musl    -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-F128  %}
 ;
 ; FIXME(#144006): Windows-MSVC should also be run but has a ldexp selection
 ; failure.
diff --git a/llvm/test/CodeGen/Mips/cconv/fmaxl_call.ll b/llvm/test/CodeGen/Mips/cconv/fmaxl_call.ll
index 0e3078edae45d..a2dfede48226c 100644
--- a/llvm/test/CodeGen/Mips/cconv/fmaxl_call.ll
+++ b/llvm/test/CodeGen/Mips/cconv/fmaxl_call.ll
@@ -8,7 +8,7 @@ define fp128 @call_fmaxl(fp128 %a, fp128 %b) {
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    sd $ra, 8($sp) # 8-byte Folded Spill
 ; CHECK-NEXT:    .cfi_offset 31, -8
-; CHECK-NEXT:    jal fmaxl
+; CHECK-NEXT:    jal fmaxf128
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    mov.d $f12, $f0
 ; CHECK-NEXT:    jal f
diff --git a/llvm/test/CodeGen/Mips/cconv/roundl-call.ll b/llvm/test/CodeGen/Mips/cconv/roundl-call.ll
index 98bda89546e43..895055bac0ae2 100644
--- a/llvm/test/CodeGen/Mips/cconv/roundl-call.ll
+++ b/llvm/test/CodeGen/Mips/cconv/roundl-call.ll
@@ -25,8 +25,8 @@
 define void @roundl_call(fp128 %value) {
 entry:
 ; ALL-LABEL: roundl_call:
-; N32:          lw      $25, %call16(roundl)($gp)
-; N64:          ld      $25, %call16(roundl)($gp)
+; N32:          lw      $25, %call16(roundf128)($gp)
+; N64:          ld      $25, %call16(roundf128)($gp)
 
 ; SOFT-FLOAT:   sd      $4, 8(${{[0-9]+}})
 ; SOFT-FLOAT:   sd      $2, 0(${{[0-9]+}})
diff --git a/llvm/test/CodeGen/Mips/llrint-conv.ll b/llvm/test/CodeGen/Mips/llrint-conv.ll
index dcb4e5657e80b..6848a8f7e733f 100644
--- a/llvm/test/CodeGen/Mips/llrint-conv.ll
+++ b/llvm/test/CodeGen/Mips/llrint-conv.ll
@@ -36,7 +36,7 @@ entry:
 
 define signext i32 @testmswl(fp128 %x) {
 ; CHECK-LABEL: testmswl:
-; CHECK:       jal     llrintl
+; CHECK:       jal     llrintf128
 entry:
   %0 = tail call i64 @llvm.llrint.f128(fp128 %x)
   %conv = trunc i64 %0 to i32
@@ -45,7 +45,7 @@ entry:
 
 define i64 @testmsll(fp128 %x) {
 ; CHECK-LABEL: testmsll:
-; CHECK:       jal     llrintl
+; CHECK:       jal     llrintf128
 entry:
   %0 = tail call i64 @llvm.llrint.f128(fp128 %x)
   ret i64 %0
diff --git a/llvm/test/CodeGen/Mips/llround-conv.ll b/llvm/test/CodeGen/Mips/llround-conv.ll
index 77482a8663319..9ad97ccbda389 100644
--- a/llvm/test/CodeGen/Mips/llround-conv.ll
+++ b/llvm/test/CodeGen/Mips/llround-conv.ll
@@ -36,7 +36,7 @@ entry:
 
 define signext i32 @testmswl(fp128 %x) {
 ; CHECK-LABEL: testmswl:
-; CHECK:       jal     llroundl
+; CHECK:       jal     llroundf128
 entry:
   %0 = tail call i64 @llvm.llround.f128(fp128 %x)
   %conv = trunc i64 %0 to i32
@@ -45,7 +45,7 @@ entry:
 
 define i64 @testmsll(fp128 %x) {
 ; CHECK-LABEL: testmsll:
-; CHECK:       jal     llroundl
+; CHECK:       jal     llroundf128
 entry:
   %0 = tail call i64 @llvm.llround.f128(fp128 %x)
   ret i64 %0
diff --git a/llvm/test/CodeGen/Mips/lrint-conv.ll b/llvm/test/CodeGen/Mips/lrint-conv.ll
index bd3f7b3babe10..98748289d4047 100644
--- a/llvm/test/CodeGen/Mips/lrint-conv.ll
+++ b/llvm/test/CodeGen/Mips/lrint-conv.ll
@@ -36,7 +36,7 @@ entry:
 
 define signext i32 @testmswl(fp128 %x) {
 ; CHECK-LABEL: testmswl:
-; CHECK:       jal     lrintl
+; CHECK:       jal     lrintf128
 entry:
   %0 = tail call i64 @llvm.lrint.i64.f128(fp128 %x)
   %conv = trunc i64 %0 to i32
@@ -45,7 +45,7 @@ entry:
 
 define signext i64 @testmsll(fp128 %x) {
 ; CHECK-LABEL: testmsll:
-; CHECK:       jal     lrintl
+; CHECK:       jal     lrintf128
 entry:
   %0 = tail call i64 @llvm.lrint.i64.f128(fp128 %x)
   ret i64 %0
diff --git a/llvm/test/CodeGen/Mips/lround-conv.ll b/llvm/test/CodeGen/Mips/lround-conv.ll
index acca565c0fad2..f8f5228433694 100644
--- a/llvm/test/CodeGen/Mips/lround-conv.ll
+++ b/llvm/test/CodeGen/Mips/lround-conv.ll
@@ -36,7 +36,7 @@ entry:
 
 define signext i32 @testmswl(fp128 %x) {
 ; CHECK-LABEL: testmswl:
-; CHECK:       jal     lroundl
+; CHECK:       jal     lroundf128
 entry:
   %0 = tail call i64 @llvm.lround.i64.f128(fp128 %x)
   %conv = trunc i64 %0 to i32
@@ -45,7 +45,7 @@ entry:
 
 define signext i64 @testmsll(fp128 %x) {
 ; CHECK-LABEL: testmsll:
-; CHECK:       jal     lroundl
+; CHECK:       jal     lroundf128
 entry:
   %0 = tail call i64 @llvm.lround.i64.f128(fp128 %x)
   ret i64 %0
diff --git a/llvm/test/CodeGen/PowerPC/f128-arith.ll b/llvm/test/CodeGen/PowerPC/f128-arith.ll
index f9c953d483ff2..4eb66fd04a945 100644
--- a/llvm/test/CodeGen/PowerPC/f128-arith.ll
+++ b/llvm/test/CodeGen/PowerPC/f128-arith.ll
@@ -425,14 +425,19 @@ define fp128 @qp_sincos(ptr nocapture readonly %a) nounwind {
 ; CHECK-NEXT:    mflr r0
 ; CHECK-NEXT:    stdu r1, -64(r1)
 ; CHECK-NEXT:    std r0, 80(r1)
-; CHECK-NEXT:    addi r5, r1, 48
-; CHECK-NEXT:    addi r6, r1, 32
-; CHECK-NEXT:    lxv v2, 0(r3)
-; CHECK-NEXT:    bl sincosf128
+; CHECK-NEXT:    stxv v31, 48(r1) # 16-byte Folded Spill
+; CHECK-NEXT:    stxv v30, 32(r1) # 16-byte Folded Spill
+; CHECK-NEXT:    lxv v31, 0(r3)
+; CHECK-NEXT:    vmr v2, v31
+; CHECK-NEXT:    bl cosf128
 ; CHECK-NEXT:    nop
-; CHECK-NEXT:    lxv v2, 48(r1)
-; CHECK-NEXT:    lxv v3, 32(r1)
-; CHECK-NEXT:    xsmulqp v2, v3, v2
+; CHECK-NEXT:    vmr v30, v2
+; CHECK-NEXT:    vmr v2, v31
+; CHECK-NEXT:    bl sinf128
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    xsmulqp v2, v30, v2
+; CHECK-NEXT:    lxv v31, 48(r1) # 16-byte Folded Reload
+; CHECK-NEXT:    lxv v30, 32(r1) # 16-byte Folded Reload
 ; CHECK-NEXT:    addi r1, r1, 64
 ; CHECK-NEXT:    ld r0, 16(r1)
 ; CHECK-NEXT:    mtlr r0
@@ -441,28 +446,31 @@ define fp128 @qp_sincos(ptr nocapture readonly %a) nounwind {
 ; CHECK-P8-LABEL: qp_sincos:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    mflr r0
-; CHECK-P8-NEXT:    std r29, -24(r1) # 8-byte Folded Spill
-; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-P8-NEXT:    stdu r1, -96(r1)
-; CHECK-P8-NEXT:    std r0, 112(r1)
-; CHECK-P8-NEXT:    addi r30, r1, 48
-; CHECK-P8-NEXT:    addi r29, r1, 32
+; CHECK-P8-NEXT:    stdu r1, -80(r1)
+; CHECK-P8-NEXT:    std r0, 96(r1)
+; CHECK-P8-NEXT:    li r4, 48
 ; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
-; CHECK-P8-NEXT:    mr r5, r30
-; CHECK-P8-NEXT:    mr r6, r29
-; CHECK-P8-NEXT:    xxswapd v2, vs0
-; CHECK-P8-NEXT:    bl sincosf128
+; CHECK-P8-NEXT:    stxvd2x v30, r1, r4 # 16-byte Folded Spill
+; CHECK-P8-NEXT:    li r4, 64
+; CHECK-P8-NEXT:    stxvd2x v31, r1, r4 # 16-byte Folded Spill
+; CHECK-P8-NEXT:    xxswapd v31, vs0
+; CHECK-P8-NEXT:    vmr v2, v31
+; CHECK-P8-NEXT:    bl cosf128
 ; CHECK-P8-NEXT:    nop
-; CHECK-P8-NEXT:    lxvd2x vs0, 0, r29
-; CHECK-P8-NEXT:    xxswapd v2, vs0
-; CHECK-P8-NEXT:    lxvd2x vs0, 0, r30
-; CHECK-P8-NEXT:    xxswapd v3, vs0
+; CHECK-P8-NEXT:    vmr v30, v2
+; CHECK-P8-NEXT:    vmr v2, v31
+; CHECK-P8-NEXT:    bl sinf128
+; CHECK-P8-NEXT:    nop
+; CHECK-P8-NEXT:    vmr v3, v2
+; CHECK-P8-NEXT:    vmr v2, v30
 ; CHECK-P8-NEXT:    bl __mulkf3
 ; CHECK-P8-NEXT:    nop
-; CHECK-P8-NEXT:    addi r1, r1, 96
+; CHECK-P8-NEXT:    li r3, 64
+; CHECK-P8-NEXT:    lxvd2x v31, r1, r3 # 16-byte Folded Reload
+; CHECK-P8-NEXT:    li r3, 48
+; CHECK-P8-NEXT:    lxvd2x v30, r1, r3 # 16-byte Folded Reload
+; CHECK-P8-NEXT:    addi r1, r1, 80
 ; CHECK-P8-NEXT:    ld r0, 16(r1)
-; CHECK-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
-; CHECK-P8-NEXT:    ld r29, -24(r1) # 8-byte Folded Reload
 ; CHECK-P8-NEXT:    mtlr r0
 ; CHECK-P8-NEXT:    blr
 entry:
diff --git a/llvm/test/CodeGen/SystemZ/atomicrmw-fmax-03.ll b/llvm/test/CodeGen/SystemZ/atomicrmw-fmax-03.ll
index 21e7c6e586dfe..83f66d0ef15f4 100644
--- a/llvm/test/CodeGen/SystemZ/atomicrmw-fmax-03.ll
+++ b/llvm/test/CodeGen/SystemZ/atomicrmw-fmax-03.ll
@@ -19,7 +19,7 @@ define void @f1(ptr %ret, ptr %src, ptr %b) {
 ; CHECK: la %r4, 160(%r15)
 ; CHECK: std [[FSL]], 176(%r15)
 ; CHECK: std [[FSH]], 184(%r15)
-; CHECK: brasl %r14, fmaxl at PLT
+; CHECK: brasl %r14, fmaxf128 at PLT
 ; CHECK: lg [[RH:%r[0-9]+]], 200(%r15)
 ; CHECK: lg [[RL:%r[0-9]+]], 192(%r15)
 ; CHECK: lgdr [[RSH:%r[0-9]+]], [[FSH]]
diff --git a/llvm/test/CodeGen/SystemZ/atomicrmw-fmin-03.ll b/llvm/test/CodeGen/SystemZ/atomicrmw-fmin-03.ll
index 1c6f8e20aa4f8..c48ba3ff661f3 100644
--- a/llvm/test/CodeGen/SystemZ/atomicrmw-fmin-03.ll
+++ b/llvm/test/CodeGen/SystemZ/atomicrmw-fmin-03.ll
@@ -19,7 +19,7 @@ define void @f1(ptr %ret, ptr %src, ptr %b) {
 ; CHECK: la %r4, 160(%r15)
 ; CHECK: std [[FSL]], 176(%r15)
 ; CHECK: std [[FSH]], 184(%r15)
-; CHECK: brasl %r14, fminl at PLT
+; CHECK: brasl %r14, fminf128 at PLT
 ; CHECK: lg [[RH:%r[0-9]+]], 200(%r15)
 ; CHECK: lg [[RL:%r[0-9]+]], 192(%r15)
 ; CHECK: lgdr [[RSH:%r[0-9]+]], [[FSH]]
diff --git a/llvm/test/CodeGen/SystemZ/fp-libcall.ll b/llvm/test/CodeGen/SystemZ/fp-libcall.ll
index 5069b9b257b80..51f91315b284d 100644
--- a/llvm/test/CodeGen/SystemZ/fp-libcall.ll
+++ b/llvm/test/CodeGen/SystemZ/fp-libcall.ll
@@ -39,7 +39,7 @@ define double @f5(double %x, double %y) {
 
 define fp128 @f6(fp128 %x, fp128 %y) {
 ; CHECK-LABEL: f6:
-; CHECK: brasl %r14, powl at PLT
+; CHECK: brasl %r14, powf128 at PLT
   %tmp = call fp128 @llvm.pow.f128(fp128 %x, fp128 %y)
   ret fp128 %tmp
 }
@@ -60,7 +60,7 @@ define double @f8(double %x) {
 
 define fp128 @f9(fp128 %x) {
 ; CHECK-LABEL: f9:
-; CHECK: brasl %r14, sinl at PLT
+; CHECK: brasl %r14, sinf128 at PLT
   %tmp = call fp128 @llvm.sin.f128(fp128 %x)
   ret fp128 %tmp
 }
@@ -81,7 +81,7 @@ define double @f11(double %x) {
 
 define fp128 @f12(fp128 %x) {
 ; CHECK-LABEL: f12:
-; CHECK: brasl %r14, cosl at PLT
+; CHECK: brasl %r14, cosf128 at PLT
   %tmp = call fp128 @llvm.cos.f128(fp128 %x)
   ret fp128 %tmp
 }
@@ -102,7 +102,7 @@ define double @f14(double %x) {
 
 define fp128 @f15(fp128 %x) {
 ; CHECK-LABEL: f15:
-; CHECK: brasl %r14, expl at PLT
+; CHECK: brasl %r14, expf128 at PLT
   %tmp = call fp128 @llvm.exp.f128(fp128 %x)
   ret fp128 %tmp
 }
@@ -123,7 +123,7 @@ define double @f17(double %x) {
 
 define fp128 @f18(fp128 %x) {
 ; CHECK-LABEL: f18:
-; CHECK: brasl %r14, exp2l at PLT
+; CHECK: brasl %r14, exp2f128 at PLT
   %tmp = call fp128 @llvm.exp2.f128(fp128 %x)
   ret fp128 %tmp
 }
@@ -144,7 +144,7 @@ define double @f20(double %x) {
 
 define fp128 @f21(fp128 %x) {
 ; CHECK-LABEL: f21:
-; CHECK: brasl %r14, logl at PLT
+; CHECK: brasl %r14, logf128 at PLT
   %tmp = call fp128 @llvm.log.f128(fp128 %x)
   ret fp128 %tmp
 }
@@ -165,7 +165,7 @@ define double @f23(double %x) {
 
 define fp128 @f24(fp128 %x) {
 ; CHECK-LABEL: f24:
-; CHECK: brasl %r14, log2l at PLT
+; CHECK: brasl %r14, log2f128 at PLT
   %tmp = call fp128 @llvm.log2.f128(fp128 %x)
   ret fp128 %tmp
 }
@@ -186,7 +186,7 @@ define double @f26(double %x) {
 
 define fp128 @f27(fp128 %x) {
 ; CHECK-LABEL: f27:
-; CHECK: brasl %r14, log10l at PLT
+; CHECK: brasl %r14, log10f128 at PLT
   %tmp = call fp128 @llvm.log10.f128(fp128 %x)
   ret fp128 %tmp
 }
@@ -207,7 +207,7 @@ define double @f29(double %x, double %y) {
 
 define fp128 @f30(fp128 %x, fp128 %y) {
 ; CHECK-LABEL: f30:
-; CHECK: brasl %r14, fminl at PLT
+; CHECK: brasl %r14, fminf128 at PLT
   %tmp = call fp128 @llvm.minnum.f128(fp128 %x, fp128 %y)
   ret fp128 %tmp
 }
@@ -238,7 +238,7 @@ define double @f32(double %x, double %y) {
 
 define fp128 @f33(fp128 %x, fp128 %y) {
 ; CHECK-LABEL: f33:
-; CHECK: brasl %r14, fmaxl at PLT
+; CHECK: brasl %r14, fmaxf128 at PLT
   %tmp = call fp128 @llvm.maxnum.f128(fp128 %x, fp128 %y)
   ret fp128 %tmp
 }
diff --git a/llvm/test/CodeGen/SystemZ/fp-mul-13.ll b/llvm/test/CodeGen/SystemZ/fp-mul-13.ll
index e6870fb9a81db..70cd966fbb1b4 100644
--- a/llvm/test/CodeGen/SystemZ/fp-mul-13.ll
+++ b/llvm/test/CodeGen/SystemZ/fp-mul-13.ll
@@ -4,7 +4,7 @@ declare fp128 @llvm.fma.f128(fp128 %f1, fp128 %f2, fp128 %f3)
 
 define void @f1(ptr %ptr1, ptr %ptr2, ptr %ptr3, ptr %dst) {
 ; CHECK-LABEL: f1:
-; CHECK: brasl %r14, fmal
+; CHECK: brasl %r14, fmaf128
 ; CHECK: br %r14
   %f1 = load fp128, ptr %ptr1
   %f2 = load fp128, ptr %ptr2
diff --git a/llvm/test/CodeGen/SystemZ/fp-round-01.ll b/llvm/test/CodeGen/SystemZ/fp-round-01.ll
index 21b354c7a83c4..4a3ea23966d80 100644
--- a/llvm/test/CodeGen/SystemZ/fp-round-01.ll
+++ b/llvm/test/CodeGen/SystemZ/fp-round-01.ll
@@ -70,7 +70,7 @@ define double @f5(double %f) {
 declare fp128 @llvm.nearbyint.f128(fp128 %f)
 define void @f6(ptr %ptr) {
 ; CHECK-LABEL: f6:
-; CHECK: brasl %r14, nearbyintl at PLT
+; CHECK: brasl %r14, nearbyintf128 at PLT
 ; CHECK: br %r14
   %src = load fp128, ptr %ptr
   %res = call fp128 @llvm.nearbyint.f128(fp128 %src)
@@ -114,7 +114,7 @@ define double @f8(double %f) {
 declare fp128 @llvm.floor.f128(fp128 %f)
 define void @f9(ptr %ptr) {
 ; CHECK-LABEL: f9:
-; CHECK: brasl %r14, floorl at PLT
+; CHECK: brasl %r14, floorf128 at PLT
 ; CHECK: br %r14
   %src = load fp128, ptr %ptr
   %res = call fp128 @llvm.floor.f128(fp128 %src)
@@ -158,7 +158,7 @@ define double @f11(double %f) {
 declare fp128 @llvm.ceil.f128(fp128 %f)
 define void @f12(ptr %ptr) {
 ; CHECK-LABEL: f12:
-; CHECK: brasl %r14, ceill at PLT
+; CHECK: brasl %r14, ceilf128 at PLT
 ; CHECK: br %r14
   %src = load fp128, ptr %ptr
   %res = call fp128 @llvm.ceil.f128(fp128 %src)
@@ -190,7 +190,7 @@ define double @f14(double %f) {
 declare fp128 @llvm.trunc.f128(fp128 %f)
 define void @f15(ptr %ptr) {
 ; CHECK-LABEL: f15:
-; CHECK: brasl %r14, truncl at PLT
+; CHECK: brasl %r14, truncf128 at PLT
 ; CHECK: br %r14
   %src = load fp128, ptr %ptr
   %res = call fp128 @llvm.trunc.f128(fp128 %src)
@@ -234,7 +234,7 @@ define double @f17(double %f) {
 declare fp128 @llvm.round.f128(fp128 %f)
 define void @f18(ptr %ptr) {
 ; CHECK-LABEL: f18:
-; CHECK: brasl %r14, roundl at PLT
+; CHECK: brasl %r14, roundf128 at PLT
 ; CHECK: br %r14
   %src = load fp128, ptr %ptr
   %res = call fp128 @llvm.round.f128(fp128 %src)
@@ -266,7 +266,7 @@ define double @f20(double %f) {
 declare fp128 @llvm.roundeven.f128(fp128 %f)
 define void @f21(ptr %ptr) {
 ; CHECK-LABEL: f21:
-; CHECK: brasl %r14, roundevenl at PLT
+; CHECK: brasl %r14, roundevenf128 at PLT
 ; CHECK: br %r14
   %src = load fp128, ptr %ptr
   %res = call fp128 @llvm.roundeven.f128(fp128 %src)
diff --git a/llvm/test/CodeGen/SystemZ/fp-sincos-01.ll b/llvm/test/CodeGen/SystemZ/fp-sincos-01.ll
index 4a38d7afba2c9..03da3e4b8c5ac 100644
--- a/llvm/test/CodeGen/SystemZ/fp-sincos-01.ll
+++ b/llvm/test/CodeGen/SystemZ/fp-sincos-01.ll
@@ -51,7 +51,7 @@ define double @f2_errno(double %x) {
 
 define fp128 @f3(fp128 %x) {
 ; CHECK-OPT-LABEL: f3:
-; CHECK-OPT: brasl %r14, sincosl at PLT
+; CHECK-OPT: brasl %r14, sincosf128 at PLT
 ; CHECK-OPT: axbr
   %tmp1 = call fp128 @sinl(fp128 %x) readnone
   %tmp2 = call fp128 @cosl(fp128 %x) readnone
diff --git a/llvm/test/CodeGen/SystemZ/fp-strict-mul-13.ll b/llvm/test/CodeGen/SystemZ/fp-strict-mul-13.ll
index 4247c97533746..44ef22add70f6 100644
--- a/llvm/test/CodeGen/SystemZ/fp-strict-mul-13.ll
+++ b/llvm/test/CodeGen/SystemZ/fp-strict-mul-13.ll
@@ -4,7 +4,7 @@ declare fp128 @llvm.experimental.constrained.fma.f128(fp128 %f1, fp128 %f2, fp12
 
 define void @f1(ptr %ptr1, ptr %ptr2, ptr %ptr3, ptr %dst) #0 {
 ; CHECK-LABEL: f1:
-; CHECK: brasl %r14, fmal
+; CHECK: brasl %r14, fmaf128
 ; CHECK: br %r14
   %f1 = load fp128, ptr %ptr1
   %f2 = load fp128, ptr %ptr2
diff --git a/llvm/test/CodeGen/SystemZ/fp-strict-round-01.ll b/llvm/test/CodeGen/SystemZ/fp-strict-round-01.ll
index 95a5fa1af832b..5c825161541a1 100644
--- a/llvm/test/CodeGen/SystemZ/fp-strict-round-01.ll
+++ b/llvm/test/CodeGen/SystemZ/fp-strict-round-01.ll
@@ -88,7 +88,7 @@ define double @f5(double %f) #0 {
 declare fp128 @llvm.experimental.constrained.nearbyint.f128(fp128, metadata, metadata)
 define void @f6(ptr %ptr) #0 {
 ; CHECK-LABEL: f6:
-; CHECK: brasl %r14, nearbyintl at PLT
+; CHECK: brasl %r14, nearbyintf128 at PLT
 ; CHECK: br %r14
   %src = load fp128, ptr %ptr
   %res = call fp128 @llvm.experimental.constrained.nearbyint.f128(
@@ -141,7 +141,7 @@ define double @f8(double %f) #0 {
 declare fp128 @llvm.experimental.constrained.floor.f128(fp128, metadata)
 define void @f9(ptr %ptr) #0 {
 ; CHECK-LABEL: f9:
-; CHECK: brasl %r14, floorl at PLT
+; CHECK: brasl %r14, floorf128 at PLT
 ; CHECK: br %r14
   %src = load fp128, ptr %ptr
   %res = call fp128 @llvm.experimental.constrained.floor.f128(
@@ -193,7 +193,7 @@ define double @f11(double %f) #0 {
 declare fp128 @llvm.experimental.constrained.ceil.f128(fp128, metadata)
 define void @f12(ptr %ptr) #0 {
 ; CHECK-LABEL: f12:
-; CHECK: brasl %r14, ceill at PLT
+; CHECK: brasl %r14, ceilf128 at PLT
 ; CHECK: br %r14
   %src = load fp128, ptr %ptr
   %res = call fp128 @llvm.experimental.constrained.ceil.f128(
@@ -245,7 +245,7 @@ define double @f14(double %f) #0 {
 declare fp128 @llvm.experimental.constrained.trunc.f128(fp128, metadata)
 define void @f15(ptr %ptr) #0 {
 ; CHECK-LABEL: f15:
-; CHECK: brasl %r14, truncl at PLT
+; CHECK: brasl %r14, truncf128 at PLT
 ; CHECK: br %r14
   %src = load fp128, ptr %ptr
   %res = call fp128 @llvm.experimental.constrained.trunc.f128(
@@ -297,7 +297,7 @@ define double @f17(double %f) #0 {
 declare fp128 @llvm.experimental.constrained.round.f128(fp128, metadata)
 define void @f18(ptr %ptr) #0 {
 ; CHECK-LABEL: f18:
-; CHECK: brasl %r14, roundl at PLT
+; CHECK: brasl %r14, roundf128 at PLT
 ; CHECK: br %r14
   %src = load fp128, ptr %ptr
   %res = call fp128 @llvm.experimental.constrained.round.f128(
@@ -335,7 +335,7 @@ define double @f20(double %f) #0 {
 declare fp128 @llvm.experimental.constrained.roundeven.f128(fp128, metadata)
 define void @f21(ptr %ptr) #0 {
 ; CHECK-LABEL: f21:
-; CHECK: brasl %r14, roundevenl at PLT
+; CHECK: brasl %r14, roundevenf128 at PLT
 ; CHECK: br %r14
   %src = load fp128, ptr %ptr
   %res = call fp128 @llvm.experimental.constrained.roundeven.f128(
diff --git a/llvm/test/CodeGen/X86/fp128-libcalls-strict.ll b/llvm/test/CodeGen/X86/fp128-libcalls-strict.ll
index a7eea04181f60..aa3df9bbcbc81 100644
--- a/llvm/test/CodeGen/X86/fp128-libcalls-strict.ll
+++ b/llvm/test/CodeGen/X86/fp128-libcalls-strict.ll
@@ -473,7 +473,7 @@ define fp128 @fma(fp128 %x, fp128 %y, fp128 %z) nounwind strictfp {
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %r8
-; WIN-NEXT:    callq fmal
+; WIN-NEXT:    callq fmaf128
 ; WIN-NEXT:    addq $88, %rsp
 ; WIN-NEXT:    retq
 ;
@@ -500,7 +500,7 @@ define fp128 @fma(fp128 %x, fp128 %y, fp128 %z) nounwind strictfp {
 ; WIN-X86-NEXT:    pushl 16(%ebp)
 ; WIN-X86-NEXT:    pushl 12(%ebp)
 ; WIN-X86-NEXT:    pushl %eax
-; WIN-X86-NEXT:    calll _fmal
+; WIN-X86-NEXT:    calll _fmaf128
 ; WIN-X86-NEXT:    addl $52, %esp
 ; WIN-X86-NEXT:    movl (%esp), %eax
 ; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
@@ -570,7 +570,7 @@ define fp128 @frem(fp128 %x, fp128 %y) nounwind strictfp {
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
-; WIN-NEXT:    callq fmodl
+; WIN-NEXT:    callq fmodf128
 ; WIN-NEXT:    addq $72, %rsp
 ; WIN-NEXT:    retq
 ;
@@ -593,7 +593,7 @@ define fp128 @frem(fp128 %x, fp128 %y) nounwind strictfp {
 ; WIN-X86-NEXT:    pushl 16(%ebp)
 ; WIN-X86-NEXT:    pushl 12(%ebp)
 ; WIN-X86-NEXT:    pushl %eax
-; WIN-X86-NEXT:    calll _fmodl
+; WIN-X86-NEXT:    calll _fmodf128
 ; WIN-X86-NEXT:    addl $36, %esp
 ; WIN-X86-NEXT:    movl (%esp), %eax
 ; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
@@ -656,7 +656,7 @@ define fp128 @ceil(fp128 %x) nounwind strictfp {
 ; WIN-NEXT:    movaps (%rcx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
-; WIN-NEXT:    callq ceill
+; WIN-NEXT:    callq ceilf128
 ; WIN-NEXT:    addq $56, %rsp
 ; WIN-NEXT:    retq
 ;
@@ -675,7 +675,7 @@ define fp128 @ceil(fp128 %x) nounwind strictfp {
 ; WIN-X86-NEXT:    pushl 16(%ebp)
 ; WIN-X86-NEXT:    pushl 12(%ebp)
 ; WIN-X86-NEXT:    pushl %eax
-; WIN-X86-NEXT:    calll _ceill
+; WIN-X86-NEXT:    calll _ceilf128
 ; WIN-X86-NEXT:    addl $20, %esp
 ; WIN-X86-NEXT:    movl (%esp), %eax
 ; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
@@ -738,7 +738,7 @@ define fp128 @acos(fp128 %x) nounwind strictfp {
 ; WIN-NEXT:    movaps (%rcx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
-; WIN-NEXT:    callq acosl
+; WIN-NEXT:    callq acosf128
 ; WIN-NEXT:    addq $56, %rsp
 ; WIN-NEXT:    retq
 ;
@@ -757,7 +757,7 @@ define fp128 @acos(fp128 %x) nounwind strictfp {
 ; WIN-X86-NEXT:    pushl 16(%ebp)
 ; WIN-X86-NEXT:    pushl 12(%ebp)
 ; WIN-X86-NEXT:    pushl %eax
-; WIN-X86-NEXT:    calll _acosl
+; WIN-X86-NEXT:    calll _acosf128
 ; WIN-X86-NEXT:    addl $20, %esp
 ; WIN-X86-NEXT:    movl (%esp), %eax
 ; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
@@ -820,7 +820,7 @@ define fp128 @cos(fp128 %x) nounwind strictfp {
 ; WIN-NEXT:    movaps (%rcx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
-; WIN-NEXT:    callq cosl
+; WIN-NEXT:    callq cosf128
 ; WIN-NEXT:    addq $56, %rsp
 ; WIN-NEXT:    retq
 ;
@@ -839,7 +839,7 @@ define fp128 @cos(fp128 %x) nounwind strictfp {
 ; WIN-X86-NEXT:    pushl 16(%ebp)
 ; WIN-X86-NEXT:    pushl 12(%ebp)
 ; WIN-X86-NEXT:    pushl %eax
-; WIN-X86-NEXT:    calll _cosl
+; WIN-X86-NEXT:    calll _cosf128
 ; WIN-X86-NEXT:    addl $20, %esp
 ; WIN-X86-NEXT:    movl (%esp), %eax
 ; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
@@ -902,7 +902,7 @@ define fp128 @cosh(fp128 %x) nounwind strictfp {
 ; WIN-NEXT:    movaps (%rcx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
-; WIN-NEXT:    callq coshl
+; WIN-NEXT:    callq coshf128
 ; WIN-NEXT:    addq $56, %rsp
 ; WIN-NEXT:    retq
 ;
@@ -921,7 +921,7 @@ define fp128 @cosh(fp128 %x) nounwind strictfp {
 ; WIN-X86-NEXT:    pushl 16(%ebp)
 ; WIN-X86-NEXT:    pushl 12(%ebp)
 ; WIN-X86-NEXT:    pushl %eax
-; WIN-X86-NEXT:    calll _coshl
+; WIN-X86-NEXT:    calll _coshf128
 ; WIN-X86-NEXT:    addl $20, %esp
 ; WIN-X86-NEXT:    movl (%esp), %eax
 ; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
@@ -984,7 +984,7 @@ define fp128 @exp(fp128 %x) nounwind strictfp {
 ; WIN-NEXT:    movaps (%rcx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
-; WIN-NEXT:    callq expl
+; WIN-NEXT:    callq expf128
 ; WIN-NEXT:    addq $56, %rsp
 ; WIN-NEXT:    retq
 ;
@@ -1003,7 +1003,7 @@ define fp128 @exp(fp128 %x) nounwind strictfp {
 ; WIN-X86-NEXT:    pushl 16(%ebp)
 ; WIN-X86-NEXT:    pushl 12(%ebp)
 ; WIN-X86-NEXT:    pushl %eax
-; WIN-X86-NEXT:    calll _expl
+; WIN-X86-NEXT:    calll _expf128
 ; WIN-X86-NEXT:    addl $20, %esp
 ; WIN-X86-NEXT:    movl (%esp), %eax
 ; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
@@ -1066,7 +1066,7 @@ define fp128 @exp2(fp128 %x) nounwind strictfp {
 ; WIN-NEXT:    movaps (%rcx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
-; WIN-NEXT:    callq exp2l
+; WIN-NEXT:    callq exp2f128
 ; WIN-NEXT:    addq $56, %rsp
 ; WIN-NEXT:    retq
 ;
@@ -1085,7 +1085,7 @@ define fp128 @exp2(fp128 %x) nounwind strictfp {
 ; WIN-X86-NEXT:    pushl 16(%ebp)
 ; WIN-X86-NEXT:    pushl 12(%ebp)
 ; WIN-X86-NEXT:    pushl %eax
-; WIN-X86-NEXT:    calll _exp2l
+; WIN-X86-NEXT:    calll _exp2f128
 ; WIN-X86-NEXT:    addl $20, %esp
 ; WIN-X86-NEXT:    movl (%esp), %eax
 ; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
@@ -1148,7 +1148,7 @@ define fp128 @floor(fp128 %x) nounwind strictfp {
 ; WIN-NEXT:    movaps (%rcx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
-; WIN-NEXT:    callq floorl
+; WIN-NEXT:    callq floorf128
 ; WIN-NEXT:    addq $56, %rsp
 ; WIN-NEXT:    retq
 ;
@@ -1167,7 +1167,7 @@ define fp128 @floor(fp128 %x) nounwind strictfp {
 ; WIN-X86-NEXT:    pushl 16(%ebp)
 ; WIN-X86-NEXT:    pushl 12(%ebp)
 ; WIN-X86-NEXT:    pushl %eax
-; WIN-X86-NEXT:    calll _floorl
+; WIN-X86-NEXT:    calll _floorf128
 ; WIN-X86-NEXT:    addl $20, %esp
 ; WIN-X86-NEXT:    movl (%esp), %eax
 ; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
@@ -1230,7 +1230,7 @@ define fp128 @log(fp128 %x) nounwind strictfp {
 ; WIN-NEXT:    movaps (%rcx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
-; WIN-NEXT:    callq logl
+; WIN-NEXT:    callq logf128
 ; WIN-NEXT:    addq $56, %rsp
 ; WIN-NEXT:    retq
 ;
@@ -1249,7 +1249,7 @@ define fp128 @log(fp128 %x) nounwind strictfp {
 ; WIN-X86-NEXT:    pushl 16(%ebp)
 ; WIN-X86-NEXT:    pushl 12(%ebp)
 ; WIN-X86-NEXT:    pushl %eax
-; WIN-X86-NEXT:    calll _logl
+; WIN-X86-NEXT:    calll _logf128
 ; WIN-X86-NEXT:    addl $20, %esp
 ; WIN-X86-NEXT:    movl (%esp), %eax
 ; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
@@ -1312,7 +1312,7 @@ define fp128 @log10(fp128 %x) nounwind strictfp {
 ; WIN-NEXT:    movaps (%rcx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
-; WIN-NEXT:    callq log10l
+; WIN-NEXT:    callq log10f128
 ; WIN-NEXT:    addq $56, %rsp
 ; WIN-NEXT:    retq
 ;
@@ -1331,7 +1331,7 @@ define fp128 @log10(fp128 %x) nounwind strictfp {
 ; WIN-X86-NEXT:    pushl 16(%ebp)
 ; WIN-X86-NEXT:    pushl 12(%ebp)
 ; WIN-X86-NEXT:    pushl %eax
-; WIN-X86-NEXT:    calll _log10l
+; WIN-X86-NEXT:    calll _log10f128
 ; WIN-X86-NEXT:    addl $20, %esp
 ; WIN-X86-NEXT:    movl (%esp), %eax
 ; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
@@ -1394,7 +1394,7 @@ define fp128 @log2(fp128 %x) nounwind strictfp {
 ; WIN-NEXT:    movaps (%rcx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
-; WIN-NEXT:    callq log2l
+; WIN-NEXT:    callq log2f128
 ; WIN-NEXT:    addq $56, %rsp
 ; WIN-NEXT:    retq
 ;
@@ -1413,7 +1413,7 @@ define fp128 @log2(fp128 %x) nounwind strictfp {
 ; WIN-X86-NEXT:    pushl 16(%ebp)
 ; WIN-X86-NEXT:    pushl 12(%ebp)
 ; WIN-X86-NEXT:    pushl %eax
-; WIN-X86-NEXT:    calll _log2l
+; WIN-X86-NEXT:    calll _log2f128
 ; WIN-X86-NEXT:    addl $20, %esp
 ; WIN-X86-NEXT:    movl (%esp), %eax
 ; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
@@ -1483,7 +1483,7 @@ define fp128 @maxnum(fp128 %x, fp128 %y) nounwind strictfp {
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
-; WIN-NEXT:    callq fmaxl
+; WIN-NEXT:    callq fmaxf128
 ; WIN-NEXT:    addq $72, %rsp
 ; WIN-NEXT:    retq
 ;
@@ -1506,7 +1506,7 @@ define fp128 @maxnum(fp128 %x, fp128 %y) nounwind strictfp {
 ; WIN-X86-NEXT:    pushl 16(%ebp)
 ; WIN-X86-NEXT:    pushl 12(%ebp)
 ; WIN-X86-NEXT:    pushl %eax
-; WIN-X86-NEXT:    calll _fmaxl
+; WIN-X86-NEXT:    calll _fmaxf128
 ; WIN-X86-NEXT:    addl $36, %esp
 ; WIN-X86-NEXT:    movl (%esp), %eax
 ; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
@@ -1576,7 +1576,7 @@ define fp128 @minnum(fp128 %x, fp128 %y) nounwind strictfp {
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
-; WIN-NEXT:    callq fminl
+; WIN-NEXT:    callq fminf128
 ; WIN-NEXT:    addq $72, %rsp
 ; WIN-NEXT:    retq
 ;
@@ -1599,7 +1599,7 @@ define fp128 @minnum(fp128 %x, fp128 %y) nounwind strictfp {
 ; WIN-X86-NEXT:    pushl 16(%ebp)
 ; WIN-X86-NEXT:    pushl 12(%ebp)
 ; WIN-X86-NEXT:    pushl %eax
-; WIN-X86-NEXT:    calll _fminl
+; WIN-X86-NEXT:    calll _fminf128
 ; WIN-X86-NEXT:    addl $36, %esp
 ; WIN-X86-NEXT:    movl (%esp), %eax
 ; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
@@ -1662,7 +1662,7 @@ define fp128 @nearbyint(fp128 %x) nounwind strictfp {
 ; WIN-NEXT:    movaps (%rcx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
-; WIN-NEXT:    callq nearbyintl
+; WIN-NEXT:    callq nearbyintf128
 ; WIN-NEXT:    addq $56, %rsp
 ; WIN-NEXT:    retq
 ;
@@ -1681,7 +1681,7 @@ define fp128 @nearbyint(fp128 %x) nounwind strictfp {
 ; WIN-X86-NEXT:    pushl 16(%ebp)
 ; WIN-X86-NEXT:    pushl 12(%ebp)
 ; WIN-X86-NEXT:    pushl %eax
-; WIN-X86-NEXT:    calll _nearbyintl
+; WIN-X86-NEXT:    calll _nearbyintf128
 ; WIN-X86-NEXT:    addl $20, %esp
 ; WIN-X86-NEXT:    movl (%esp), %eax
 ; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
@@ -1751,7 +1751,7 @@ define fp128 @pow(fp128 %x, fp128 %y) nounwind strictfp {
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
-; WIN-NEXT:    callq powl
+; WIN-NEXT:    callq powf128
 ; WIN-NEXT:    addq $72, %rsp
 ; WIN-NEXT:    retq
 ;
@@ -1774,7 +1774,7 @@ define fp128 @pow(fp128 %x, fp128 %y) nounwind strictfp {
 ; WIN-X86-NEXT:    pushl 16(%ebp)
 ; WIN-X86-NEXT:    pushl 12(%ebp)
 ; WIN-X86-NEXT:    pushl %eax
-; WIN-X86-NEXT:    calll _powl
+; WIN-X86-NEXT:    calll _powf128
 ; WIN-X86-NEXT:    addl $36, %esp
 ; WIN-X86-NEXT:    movl (%esp), %eax
 ; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
@@ -1928,7 +1928,7 @@ define fp128 @rint(fp128 %x) nounwind strictfp {
 ; WIN-NEXT:    movaps (%rcx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
-; WIN-NEXT:    callq rintl
+; WIN-NEXT:    callq rintf128
 ; WIN-NEXT:    addq $56, %rsp
 ; WIN-NEXT:    retq
 ;
@@ -1947,7 +1947,7 @@ define fp128 @rint(fp128 %x) nounwind strictfp {
 ; WIN-X86-NEXT:    pushl 16(%ebp)
 ; WIN-X86-NEXT:    pushl 12(%ebp)
 ; WIN-X86-NEXT:    pushl %eax
-; WIN-X86-NEXT:    calll _rintl
+; WIN-X86-NEXT:    calll _rintf128
 ; WIN-X86-NEXT:    addl $20, %esp
 ; WIN-X86-NEXT:    movl (%esp), %eax
 ; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
@@ -2010,7 +2010,7 @@ define fp128 @round(fp128 %x) nounwind strictfp {
 ; WIN-NEXT:    movaps (%rcx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
-; WIN-NEXT:    callq roundl
+; WIN-NEXT:    callq roundf128
 ; WIN-NEXT:    addq $56, %rsp
 ; WIN-NEXT:    retq
 ;
@@ -2029,7 +2029,7 @@ define fp128 @round(fp128 %x) nounwind strictfp {
 ; WIN-X86-NEXT:    pushl 16(%ebp)
 ; WIN-X86-NEXT:    pushl 12(%ebp)
 ; WIN-X86-NEXT:    pushl %eax
-; WIN-X86-NEXT:    calll _roundl
+; WIN-X86-NEXT:    calll _roundf128
 ; WIN-X86-NEXT:    addl $20, %esp
 ; WIN-X86-NEXT:    movl (%esp), %eax
 ; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
@@ -2092,7 +2092,7 @@ define fp128 @roundeven(fp128 %x) nounwind strictfp {
 ; WIN-NEXT:    movaps (%rcx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
-; WIN-NEXT:    callq roundevenl
+; WIN-NEXT:    callq roundevenf128
 ; WIN-NEXT:    addq $56, %rsp
 ; WIN-NEXT:    retq
 ;
@@ -2111,7 +2111,7 @@ define fp128 @roundeven(fp128 %x) nounwind strictfp {
 ; WIN-X86-NEXT:    pushl 16(%ebp)
 ; WIN-X86-NEXT:    pushl 12(%ebp)
 ; WIN-X86-NEXT:    pushl %eax
-; WIN-X86-NEXT:    calll _roundevenl
+; WIN-X86-NEXT:    calll _roundevenf128
 ; WIN-X86-NEXT:    addl $20, %esp
 ; WIN-X86-NEXT:    movl (%esp), %eax
 ; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
@@ -2174,7 +2174,7 @@ define fp128 @asin(fp128 %x) nounwind strictfp {
 ; WIN-NEXT:    movaps (%rcx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
-; WIN-NEXT:    callq asinl
+; WIN-NEXT:    callq asinf128
 ; WIN-NEXT:    addq $56, %rsp
 ; WIN-NEXT:    retq
 ;
@@ -2193,7 +2193,7 @@ define fp128 @asin(fp128 %x) nounwind strictfp {
 ; WIN-X86-NEXT:    pushl 16(%ebp)
 ; WIN-X86-NEXT:    pushl 12(%ebp)
 ; WIN-X86-NEXT:    pushl %eax
-; WIN-X86-NEXT:    calll _asinl
+; WIN-X86-NEXT:    calll _asinf128
 ; WIN-X86-NEXT:    addl $20, %esp
 ; WIN-X86-NEXT:    movl (%esp), %eax
 ; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
@@ -2256,7 +2256,7 @@ define fp128 @sin(fp128 %x) nounwind strictfp {
 ; WIN-NEXT:    movaps (%rcx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
-; WIN-NEXT:    callq sinl
+; WIN-NEXT:    callq sinf128
 ; WIN-NEXT:    addq $56, %rsp
 ; WIN-NEXT:    retq
 ;
@@ -2275,7 +2275,7 @@ define fp128 @sin(fp128 %x) nounwind strictfp {
 ; WIN-X86-NEXT:    pushl 16(%ebp)
 ; WIN-X86-NEXT:    pushl 12(%ebp)
 ; WIN-X86-NEXT:    pushl %eax
-; WIN-X86-NEXT:    calll _sinl
+; WIN-X86-NEXT:    calll _sinf128
 ; WIN-X86-NEXT:    addl $20, %esp
 ; WIN-X86-NEXT:    movl (%esp), %eax
 ; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
@@ -2338,7 +2338,7 @@ define fp128 @sinh(fp128 %x) nounwind strictfp {
 ; WIN-NEXT:    movaps (%rcx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
-; WIN-NEXT:    callq sinhl
+; WIN-NEXT:    callq sinhf128
 ; WIN-NEXT:    addq $56, %rsp
 ; WIN-NEXT:    retq
 ;
@@ -2357,7 +2357,7 @@ define fp128 @sinh(fp128 %x) nounwind strictfp {
 ; WIN-X86-NEXT:    pushl 16(%ebp)
 ; WIN-X86-NEXT:    pushl 12(%ebp)
 ; WIN-X86-NEXT:    pushl %eax
-; WIN-X86-NEXT:    calll _sinhl
+; WIN-X86-NEXT:    calll _sinhf128
 ; WIN-X86-NEXT:    addl $20, %esp
 ; WIN-X86-NEXT:    movl (%esp), %eax
 ; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
@@ -2420,7 +2420,7 @@ define fp128 @sqrt(fp128 %x) nounwind strictfp {
 ; WIN-NEXT:    movaps (%rcx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
-; WIN-NEXT:    callq sqrtl
+; WIN-NEXT:    callq sqrtf128
 ; WIN-NEXT:    addq $56, %rsp
 ; WIN-NEXT:    retq
 ;
@@ -2439,7 +2439,7 @@ define fp128 @sqrt(fp128 %x) nounwind strictfp {
 ; WIN-X86-NEXT:    pushl 16(%ebp)
 ; WIN-X86-NEXT:    pushl 12(%ebp)
 ; WIN-X86-NEXT:    pushl %eax
-; WIN-X86-NEXT:    calll _sqrtl
+; WIN-X86-NEXT:    calll _sqrtf128
 ; WIN-X86-NEXT:    addl $20, %esp
 ; WIN-X86-NEXT:    movl (%esp), %eax
 ; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
@@ -2502,7 +2502,7 @@ define fp128 @atan(fp128 %x) nounwind strictfp {
 ; WIN-NEXT:    movaps (%rcx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
-; WIN-NEXT:    callq atanl
+; WIN-NEXT:    callq atanf128
 ; WIN-NEXT:    addq $56, %rsp
 ; WIN-NEXT:    retq
 ;
@@ -2521,7 +2521,7 @@ define fp128 @atan(fp128 %x) nounwind strictfp {
 ; WIN-X86-NEXT:    pushl 16(%ebp)
 ; WIN-X86-NEXT:    pushl 12(%ebp)
 ; WIN-X86-NEXT:    pushl %eax
-; WIN-X86-NEXT:    calll _atanl
+; WIN-X86-NEXT:    calll _atanf128
 ; WIN-X86-NEXT:    addl $20, %esp
 ; WIN-X86-NEXT:    movl (%esp), %eax
 ; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
@@ -2591,7 +2591,7 @@ define fp128 @atan2(fp128 %x, fp128 %y) nounwind strictfp {
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
-; WIN-NEXT:    callq atan2l
+; WIN-NEXT:    callq atan2f128
 ; WIN-NEXT:    addq $72, %rsp
 ; WIN-NEXT:    retq
 ;
@@ -2614,7 +2614,7 @@ define fp128 @atan2(fp128 %x, fp128 %y) nounwind strictfp {
 ; WIN-X86-NEXT:    pushl 16(%ebp)
 ; WIN-X86-NEXT:    pushl 12(%ebp)
 ; WIN-X86-NEXT:    pushl %eax
-; WIN-X86-NEXT:    calll _atan2l
+; WIN-X86-NEXT:    calll _atan2f128
 ; WIN-X86-NEXT:    addl $36, %esp
 ; WIN-X86-NEXT:    movl (%esp), %eax
 ; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
@@ -2677,7 +2677,7 @@ define fp128 @tan(fp128 %x) nounwind strictfp {
 ; WIN-NEXT:    movaps (%rcx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
-; WIN-NEXT:    callq tanl
+; WIN-NEXT:    callq tanf128
 ; WIN-NEXT:    addq $56, %rsp
 ; WIN-NEXT:    retq
 ;
@@ -2696,7 +2696,7 @@ define fp128 @tan(fp128 %x) nounwind strictfp {
 ; WIN-X86-NEXT:    pushl 16(%ebp)
 ; WIN-X86-NEXT:    pushl 12(%ebp)
 ; WIN-X86-NEXT:    pushl %eax
-; WIN-X86-NEXT:    calll _tanl
+; WIN-X86-NEXT:    calll _tanf128
 ; WIN-X86-NEXT:    addl $20, %esp
 ; WIN-X86-NEXT:    movl (%esp), %eax
 ; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
@@ -2759,7 +2759,7 @@ define fp128 @tanh(fp128 %x) nounwind strictfp {
 ; WIN-NEXT:    movaps (%rcx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
-; WIN-NEXT:    callq tanhl
+; WIN-NEXT:    callq tanhf128
 ; WIN-NEXT:    addq $56, %rsp
 ; WIN-NEXT:    retq
 ;
@@ -2778,7 +2778,7 @@ define fp128 @tanh(fp128 %x) nounwind strictfp {
 ; WIN-X86-NEXT:    pushl 16(%ebp)
 ; WIN-X86-NEXT:    pushl 12(%ebp)
 ; WIN-X86-NEXT:    pushl %eax
-; WIN-X86-NEXT:    calll _tanhl
+; WIN-X86-NEXT:    calll _tanhf128
 ; WIN-X86-NEXT:    addl $20, %esp
 ; WIN-X86-NEXT:    movl (%esp), %eax
 ; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
@@ -2841,7 +2841,7 @@ define fp128 @trunc(fp128 %x) nounwind strictfp {
 ; WIN-NEXT:    movaps (%rcx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
-; WIN-NEXT:    callq truncl
+; WIN-NEXT:    callq truncf128
 ; WIN-NEXT:    addq $56, %rsp
 ; WIN-NEXT:    retq
 ;
@@ -2860,7 +2860,7 @@ define fp128 @trunc(fp128 %x) nounwind strictfp {
 ; WIN-X86-NEXT:    pushl 16(%ebp)
 ; WIN-X86-NEXT:    pushl 12(%ebp)
 ; WIN-X86-NEXT:    pushl %eax
-; WIN-X86-NEXT:    calll _truncl
+; WIN-X86-NEXT:    calll _truncf128
 ; WIN-X86-NEXT:    addl $20, %esp
 ; WIN-X86-NEXT:    movl (%esp), %eax
 ; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
@@ -2913,7 +2913,7 @@ define i32 @lrint(fp128 %x) nounwind strictfp {
 ; WIN-NEXT:    movaps (%rcx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
-; WIN-NEXT:    callq lrintl
+; WIN-NEXT:    callq lrintf128
 ; WIN-NEXT:    addq $56, %rsp
 ; WIN-NEXT:    retq
 ;
@@ -2923,7 +2923,7 @@ define i32 @lrint(fp128 %x) nounwind strictfp {
 ; WIN-X86-NEXT:    pushl {{[0-9]+}}(%esp)
 ; WIN-X86-NEXT:    pushl {{[0-9]+}}(%esp)
 ; WIN-X86-NEXT:    pushl {{[0-9]+}}(%esp)
-; WIN-X86-NEXT:    calll _lrintl
+; WIN-X86-NEXT:    calll _lrintf128
 ; WIN-X86-NEXT:    addl $16, %esp
 ; WIN-X86-NEXT:    retl
 entry:
@@ -2963,7 +2963,7 @@ define i64 @llrint(fp128 %x) nounwind strictfp {
 ; WIN-NEXT:    movaps (%rcx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
-; WIN-NEXT:    callq llrintl
+; WIN-NEXT:    callq llrintf128
 ; WIN-NEXT:    addq $56, %rsp
 ; WIN-NEXT:    retq
 ;
@@ -2973,7 +2973,7 @@ define i64 @llrint(fp128 %x) nounwind strictfp {
 ; WIN-X86-NEXT:    pushl {{[0-9]+}}(%esp)
 ; WIN-X86-NEXT:    pushl {{[0-9]+}}(%esp)
 ; WIN-X86-NEXT:    pushl {{[0-9]+}}(%esp)
-; WIN-X86-NEXT:    calll _llrintl
+; WIN-X86-NEXT:    calll _llrintf128
 ; WIN-X86-NEXT:    addl $16, %esp
 ; WIN-X86-NEXT:    retl
 entry:
@@ -3013,7 +3013,7 @@ define i32 @lround(fp128 %x) nounwind strictfp {
 ; WIN-NEXT:    movaps (%rcx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
-; WIN-NEXT:    callq lroundl
+; WIN-NEXT:    callq lroundf128
 ; WIN-NEXT:    addq $56, %rsp
 ; WIN-NEXT:    retq
 ;
@@ -3023,7 +3023,7 @@ define i32 @lround(fp128 %x) nounwind strictfp {
 ; WIN-X86-NEXT:    pushl {{[0-9]+}}(%esp)
 ; WIN-X86-NEXT:    pushl {{[0-9]+}}(%esp)
 ; WIN-X86-NEXT:    pushl {{[0-9]+}}(%esp)
-; WIN-X86-NEXT:    calll _lroundl
+; WIN-X86-NEXT:    calll _lroundf128
 ; WIN-X86-NEXT:    addl $16, %esp
 ; WIN-X86-NEXT:    retl
 entry:
@@ -3063,7 +3063,7 @@ define i64 @llround(fp128 %x) nounwind strictfp {
 ; WIN-NEXT:    movaps (%rcx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
-; WIN-NEXT:    callq llroundl
+; WIN-NEXT:    callq llroundf128
 ; WIN-NEXT:    addq $56, %rsp
 ; WIN-NEXT:    retq
 ;
@@ -3073,7 +3073,7 @@ define i64 @llround(fp128 %x) nounwind strictfp {
 ; WIN-X86-NEXT:    pushl {{[0-9]+}}(%esp)
 ; WIN-X86-NEXT:    pushl {{[0-9]+}}(%esp)
 ; WIN-X86-NEXT:    pushl {{[0-9]+}}(%esp)
-; WIN-X86-NEXT:    calll _llroundl
+; WIN-X86-NEXT:    calll _llroundf128
 ; WIN-X86-NEXT:    addl $16, %esp
 ; WIN-X86-NEXT:    retl
 entry:
diff --git a/llvm/test/CodeGen/X86/fp128-libcalls.ll b/llvm/test/CodeGen/X86/fp128-libcalls.ll
index f727a79078627..ebab9292a2411 100644
--- a/llvm/test/CodeGen/X86/fp128-libcalls.ll
+++ b/llvm/test/CodeGen/X86/fp128-libcalls.ll
@@ -857,7 +857,7 @@ define dso_local void @Test128Rem(fp128 %d1, fp128 %d2) nounwind {
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
-; WIN-NEXT:    callq fmodl
+; WIN-NEXT:    callq fmodf128
 ; WIN-NEXT:    movaps %xmm0, vf128(%rip)
 ; WIN-NEXT:    addq $72, %rsp
 ; WIN-NEXT:    retq
@@ -879,7 +879,7 @@ define dso_local void @Test128Rem(fp128 %d1, fp128 %d2) nounwind {
 ; WIN-X86-NEXT:    pushl 12(%ebp)
 ; WIN-X86-NEXT:    pushl 8(%ebp)
 ; WIN-X86-NEXT:    pushl %eax
-; WIN-X86-NEXT:    calll _fmodl
+; WIN-X86-NEXT:    calll _fmodf128
 ; WIN-X86-NEXT:    addl $36, %esp
 ; WIN-X86-NEXT:    movl (%esp), %eax
 ; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
@@ -949,7 +949,7 @@ define dso_local void @Test128_1Rem(fp128 %d1) nounwind {
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
-; WIN-NEXT:    callq fmodl
+; WIN-NEXT:    callq fmodf128
 ; WIN-NEXT:    movaps %xmm0, vf128(%rip)
 ; WIN-NEXT:    addq $72, %rsp
 ; WIN-NEXT:    retq
@@ -971,7 +971,7 @@ define dso_local void @Test128_1Rem(fp128 %d1) nounwind {
 ; WIN-X86-NEXT:    pushl _vf128+4
 ; WIN-X86-NEXT:    pushl _vf128
 ; WIN-X86-NEXT:    pushl %eax
-; WIN-X86-NEXT:    calll _fmodl
+; WIN-X86-NEXT:    calll _fmodf128
 ; WIN-X86-NEXT:    addl $36, %esp
 ; WIN-X86-NEXT:    movl (%esp), %eax
 ; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
@@ -1031,7 +1031,7 @@ define dso_local void @Test128Sqrt(fp128 %d1) nounwind {
 ; WIN-NEXT:    movaps (%rcx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
-; WIN-NEXT:    callq sqrtl
+; WIN-NEXT:    callq sqrtf128
 ; WIN-NEXT:    movaps %xmm0, vf128(%rip)
 ; WIN-NEXT:    addq $56, %rsp
 ; WIN-NEXT:    retq
@@ -1049,7 +1049,7 @@ define dso_local void @Test128Sqrt(fp128 %d1) nounwind {
 ; WIN-X86-NEXT:    pushl 12(%ebp)
 ; WIN-X86-NEXT:    pushl 8(%ebp)
 ; WIN-X86-NEXT:    pushl %eax
-; WIN-X86-NEXT:    calll _sqrtl
+; WIN-X86-NEXT:    calll _sqrtf128
 ; WIN-X86-NEXT:    addl $20, %esp
 ; WIN-X86-NEXT:    movl (%esp), %eax
 ; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
@@ -1109,7 +1109,7 @@ define dso_local void @Test128Sin(fp128 %d1) nounwind {
 ; WIN-NEXT:    movaps (%rcx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
-; WIN-NEXT:    callq sinl
+; WIN-NEXT:    callq sinf128
 ; WIN-NEXT:    movaps %xmm0, vf128(%rip)
 ; WIN-NEXT:    addq $56, %rsp
 ; WIN-NEXT:    retq
@@ -1127,7 +1127,7 @@ define dso_local void @Test128Sin(fp128 %d1) nounwind {
 ; WIN-X86-NEXT:    pushl 12(%ebp)
 ; WIN-X86-NEXT:    pushl 8(%ebp)
 ; WIN-X86-NEXT:    pushl %eax
-; WIN-X86-NEXT:    calll _sinl
+; WIN-X86-NEXT:    calll _sinf128
 ; WIN-X86-NEXT:    addl $20, %esp
 ; WIN-X86-NEXT:    movl (%esp), %eax
 ; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
@@ -1187,7 +1187,7 @@ define dso_local void @Test128Cos(fp128 %d1) nounwind {
 ; WIN-NEXT:    movaps (%rcx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
-; WIN-NEXT:    callq cosl
+; WIN-NEXT:    callq cosf128
 ; WIN-NEXT:    movaps %xmm0, vf128(%rip)
 ; WIN-NEXT:    addq $56, %rsp
 ; WIN-NEXT:    retq
@@ -1205,7 +1205,7 @@ define dso_local void @Test128Cos(fp128 %d1) nounwind {
 ; WIN-X86-NEXT:    pushl 12(%ebp)
 ; WIN-X86-NEXT:    pushl 8(%ebp)
 ; WIN-X86-NEXT:    pushl %eax
-; WIN-X86-NEXT:    calll _cosl
+; WIN-X86-NEXT:    calll _cosf128
 ; WIN-X86-NEXT:    addl $20, %esp
 ; WIN-X86-NEXT:    movl (%esp), %eax
 ; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
@@ -1265,7 +1265,7 @@ define dso_local void @Test128Ceil(fp128 %d1) nounwind {
 ; WIN-NEXT:    movaps (%rcx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
-; WIN-NEXT:    callq ceill
+; WIN-NEXT:    callq ceilf128
 ; WIN-NEXT:    movaps %xmm0, vf128(%rip)
 ; WIN-NEXT:    addq $56, %rsp
 ; WIN-NEXT:    retq
@@ -1283,7 +1283,7 @@ define dso_local void @Test128Ceil(fp128 %d1) nounwind {
 ; WIN-X86-NEXT:    pushl 12(%ebp)
 ; WIN-X86-NEXT:    pushl 8(%ebp)
 ; WIN-X86-NEXT:    pushl %eax
-; WIN-X86-NEXT:    calll _ceill
+; WIN-X86-NEXT:    calll _ceilf128
 ; WIN-X86-NEXT:    addl $20, %esp
 ; WIN-X86-NEXT:    movl (%esp), %eax
 ; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
@@ -1343,7 +1343,7 @@ define dso_local void @Test128Floor(fp128 %d1) nounwind {
 ; WIN-NEXT:    movaps (%rcx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
-; WIN-NEXT:    callq floorl
+; WIN-NEXT:    callq floorf128
 ; WIN-NEXT:    movaps %xmm0, vf128(%rip)
 ; WIN-NEXT:    addq $56, %rsp
 ; WIN-NEXT:    retq
@@ -1361,7 +1361,7 @@ define dso_local void @Test128Floor(fp128 %d1) nounwind {
 ; WIN-X86-NEXT:    pushl 12(%ebp)
 ; WIN-X86-NEXT:    pushl 8(%ebp)
 ; WIN-X86-NEXT:    pushl %eax
-; WIN-X86-NEXT:    calll _floorl
+; WIN-X86-NEXT:    calll _floorf128
 ; WIN-X86-NEXT:    addl $20, %esp
 ; WIN-X86-NEXT:    movl (%esp), %eax
 ; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
@@ -1421,7 +1421,7 @@ define dso_local void @Test128Trunc(fp128 %d1) nounwind {
 ; WIN-NEXT:    movaps (%rcx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
-; WIN-NEXT:    callq truncl
+; WIN-NEXT:    callq truncf128
 ; WIN-NEXT:    movaps %xmm0, vf128(%rip)
 ; WIN-NEXT:    addq $56, %rsp
 ; WIN-NEXT:    retq
@@ -1439,7 +1439,7 @@ define dso_local void @Test128Trunc(fp128 %d1) nounwind {
 ; WIN-X86-NEXT:    pushl 12(%ebp)
 ; WIN-X86-NEXT:    pushl 8(%ebp)
 ; WIN-X86-NEXT:    pushl %eax
-; WIN-X86-NEXT:    calll _truncl
+; WIN-X86-NEXT:    calll _truncf128
 ; WIN-X86-NEXT:    addl $20, %esp
 ; WIN-X86-NEXT:    movl (%esp), %eax
 ; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
@@ -1499,7 +1499,7 @@ define dso_local void @Test128Nearbyint(fp128 %d1) nounwind {
 ; WIN-NEXT:    movaps (%rcx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
-; WIN-NEXT:    callq nearbyintl
+; WIN-NEXT:    callq nearbyintf128
 ; WIN-NEXT:    movaps %xmm0, vf128(%rip)
 ; WIN-NEXT:    addq $56, %rsp
 ; WIN-NEXT:    retq
@@ -1517,7 +1517,7 @@ define dso_local void @Test128Nearbyint(fp128 %d1) nounwind {
 ; WIN-X86-NEXT:    pushl 12(%ebp)
 ; WIN-X86-NEXT:    pushl 8(%ebp)
 ; WIN-X86-NEXT:    pushl %eax
-; WIN-X86-NEXT:    calll _nearbyintl
+; WIN-X86-NEXT:    calll _nearbyintf128
 ; WIN-X86-NEXT:    addl $20, %esp
 ; WIN-X86-NEXT:    movl (%esp), %eax
 ; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
@@ -1577,7 +1577,7 @@ define dso_local void @Test128Rint(fp128 %d1) nounwind {
 ; WIN-NEXT:    movaps (%rcx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
-; WIN-NEXT:    callq rintl
+; WIN-NEXT:    callq rintf128
 ; WIN-NEXT:    movaps %xmm0, vf128(%rip)
 ; WIN-NEXT:    addq $56, %rsp
 ; WIN-NEXT:    retq
@@ -1595,7 +1595,7 @@ define dso_local void @Test128Rint(fp128 %d1) nounwind {
 ; WIN-X86-NEXT:    pushl 12(%ebp)
 ; WIN-X86-NEXT:    pushl 8(%ebp)
 ; WIN-X86-NEXT:    pushl %eax
-; WIN-X86-NEXT:    calll _rintl
+; WIN-X86-NEXT:    calll _rintf128
 ; WIN-X86-NEXT:    addl $20, %esp
 ; WIN-X86-NEXT:    movl (%esp), %eax
 ; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
@@ -1655,7 +1655,7 @@ define dso_local void @Test128Round(fp128 %d1) nounwind {
 ; WIN-NEXT:    movaps (%rcx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
-; WIN-NEXT:    callq roundl
+; WIN-NEXT:    callq roundf128
 ; WIN-NEXT:    movaps %xmm0, vf128(%rip)
 ; WIN-NEXT:    addq $56, %rsp
 ; WIN-NEXT:    retq
@@ -1673,7 +1673,7 @@ define dso_local void @Test128Round(fp128 %d1) nounwind {
 ; WIN-X86-NEXT:    pushl 12(%ebp)
 ; WIN-X86-NEXT:    pushl 8(%ebp)
 ; WIN-X86-NEXT:    pushl %eax
-; WIN-X86-NEXT:    calll _roundl
+; WIN-X86-NEXT:    calll _roundf128
 ; WIN-X86-NEXT:    addl $20, %esp
 ; WIN-X86-NEXT:    movl (%esp), %eax
 ; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
@@ -1744,7 +1744,7 @@ define fp128 @Test128FMA(fp128 %a, fp128 %b, fp128 %c) nounwind {
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %r8
-; WIN-NEXT:    callq fmal
+; WIN-NEXT:    callq fmaf128
 ; WIN-NEXT:    addq $88, %rsp
 ; WIN-NEXT:    retq
 ;
@@ -1771,7 +1771,7 @@ define fp128 @Test128FMA(fp128 %a, fp128 %b, fp128 %c) nounwind {
 ; WIN-X86-NEXT:    pushl 16(%ebp)
 ; WIN-X86-NEXT:    pushl 12(%ebp)
 ; WIN-X86-NEXT:    pushl %eax
-; WIN-X86-NEXT:    calll _fmal
+; WIN-X86-NEXT:    calll _fmaf128
 ; WIN-X86-NEXT:    addl $52, %esp
 ; WIN-X86-NEXT:    movl (%esp), %eax
 ; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
@@ -1829,7 +1829,7 @@ define fp128 @Test128Acos(fp128 %a) nounwind {
 ; WIN-NEXT:    movaps (%rcx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
-; WIN-NEXT:    callq acosl
+; WIN-NEXT:    callq acosf128
 ; WIN-NEXT:    addq $56, %rsp
 ; WIN-NEXT:    retq
 ;
@@ -1848,7 +1848,7 @@ define fp128 @Test128Acos(fp128 %a) nounwind {
 ; WIN-X86-NEXT:    pushl 16(%ebp)
 ; WIN-X86-NEXT:    pushl 12(%ebp)
 ; WIN-X86-NEXT:    pushl %eax
-; WIN-X86-NEXT:    calll _acosl
+; WIN-X86-NEXT:    calll _acosf128
 ; WIN-X86-NEXT:    addl $20, %esp
 ; WIN-X86-NEXT:    movl (%esp), %eax
 ; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
@@ -1904,7 +1904,7 @@ define fp128 @Test128Asin(fp128 %a) nounwind {
 ; WIN-NEXT:    movaps (%rcx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
-; WIN-NEXT:    callq asinl
+; WIN-NEXT:    callq asinf128
 ; WIN-NEXT:    addq $56, %rsp
 ; WIN-NEXT:    retq
 ;
@@ -1923,7 +1923,7 @@ define fp128 @Test128Asin(fp128 %a) nounwind {
 ; WIN-X86-NEXT:    pushl 16(%ebp)
 ; WIN-X86-NEXT:    pushl 12(%ebp)
 ; WIN-X86-NEXT:    pushl %eax
-; WIN-X86-NEXT:    calll _asinl
+; WIN-X86-NEXT:    calll _asinf128
 ; WIN-X86-NEXT:    addl $20, %esp
 ; WIN-X86-NEXT:    movl (%esp), %eax
 ; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
@@ -1979,7 +1979,7 @@ define fp128 @Test128Atan(fp128 %a) nounwind {
 ; WIN-NEXT:    movaps (%rcx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
-; WIN-NEXT:    callq atanl
+; WIN-NEXT:    callq atanf128
 ; WIN-NEXT:    addq $56, %rsp
 ; WIN-NEXT:    retq
 ;
@@ -1998,7 +1998,7 @@ define fp128 @Test128Atan(fp128 %a) nounwind {
 ; WIN-X86-NEXT:    pushl 16(%ebp)
 ; WIN-X86-NEXT:    pushl 12(%ebp)
 ; WIN-X86-NEXT:    pushl %eax
-; WIN-X86-NEXT:    calll _atanl
+; WIN-X86-NEXT:    calll _atanf128
 ; WIN-X86-NEXT:    addl $20, %esp
 ; WIN-X86-NEXT:    movl (%esp), %eax
 ; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
@@ -2061,7 +2061,7 @@ define fp128 @Test128Atan2(fp128 %a, fp128 %b) nounwind {
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
-; WIN-NEXT:    callq atan2l
+; WIN-NEXT:    callq atan2f128
 ; WIN-NEXT:    addq $72, %rsp
 ; WIN-NEXT:    retq
 ;
@@ -2084,7 +2084,7 @@ define fp128 @Test128Atan2(fp128 %a, fp128 %b) nounwind {
 ; WIN-X86-NEXT:    pushl 16(%ebp)
 ; WIN-X86-NEXT:    pushl 12(%ebp)
 ; WIN-X86-NEXT:    pushl %eax
-; WIN-X86-NEXT:    calll _atan2l
+; WIN-X86-NEXT:    calll _atan2f128
 ; WIN-X86-NEXT:    addl $36, %esp
 ; WIN-X86-NEXT:    movl (%esp), %eax
 ; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
@@ -2140,7 +2140,7 @@ define fp128 @Test128Cosh(fp128 %a) nounwind {
 ; WIN-NEXT:    movaps (%rcx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
-; WIN-NEXT:    callq coshl
+; WIN-NEXT:    callq coshf128
 ; WIN-NEXT:    addq $56, %rsp
 ; WIN-NEXT:    retq
 ;
@@ -2159,7 +2159,7 @@ define fp128 @Test128Cosh(fp128 %a) nounwind {
 ; WIN-X86-NEXT:    pushl 16(%ebp)
 ; WIN-X86-NEXT:    pushl 12(%ebp)
 ; WIN-X86-NEXT:    pushl %eax
-; WIN-X86-NEXT:    calll _coshl
+; WIN-X86-NEXT:    calll _coshf128
 ; WIN-X86-NEXT:    addl $20, %esp
 ; WIN-X86-NEXT:    movl (%esp), %eax
 ; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
@@ -2215,7 +2215,7 @@ define fp128 @Test128Sinh(fp128 %a) nounwind {
 ; WIN-NEXT:    movaps (%rcx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
-; WIN-NEXT:    callq sinhl
+; WIN-NEXT:    callq sinhf128
 ; WIN-NEXT:    addq $56, %rsp
 ; WIN-NEXT:    retq
 ;
@@ -2234,7 +2234,7 @@ define fp128 @Test128Sinh(fp128 %a) nounwind {
 ; WIN-X86-NEXT:    pushl 16(%ebp)
 ; WIN-X86-NEXT:    pushl 12(%ebp)
 ; WIN-X86-NEXT:    pushl %eax
-; WIN-X86-NEXT:    calll _sinhl
+; WIN-X86-NEXT:    calll _sinhf128
 ; WIN-X86-NEXT:    addl $20, %esp
 ; WIN-X86-NEXT:    movl (%esp), %eax
 ; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
@@ -2290,7 +2290,7 @@ define fp128 @Test128Tan(fp128 %a) nounwind {
 ; WIN-NEXT:    movaps (%rcx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
-; WIN-NEXT:    callq tanl
+; WIN-NEXT:    callq tanf128
 ; WIN-NEXT:    addq $56, %rsp
 ; WIN-NEXT:    retq
 ;
@@ -2309,7 +2309,7 @@ define fp128 @Test128Tan(fp128 %a) nounwind {
 ; WIN-X86-NEXT:    pushl 16(%ebp)
 ; WIN-X86-NEXT:    pushl 12(%ebp)
 ; WIN-X86-NEXT:    pushl %eax
-; WIN-X86-NEXT:    calll _tanl
+; WIN-X86-NEXT:    calll _tanf128
 ; WIN-X86-NEXT:    addl $20, %esp
 ; WIN-X86-NEXT:    movl (%esp), %eax
 ; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
@@ -2365,7 +2365,7 @@ define fp128 @Test128Tanh(fp128 %a) nounwind {
 ; WIN-NEXT:    movaps (%rcx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
-; WIN-NEXT:    callq tanhl
+; WIN-NEXT:    callq tanhf128
 ; WIN-NEXT:    addq $56, %rsp
 ; WIN-NEXT:    retq
 ;
@@ -2384,7 +2384,7 @@ define fp128 @Test128Tanh(fp128 %a) nounwind {
 ; WIN-X86-NEXT:    pushl 16(%ebp)
 ; WIN-X86-NEXT:    pushl 12(%ebp)
 ; WIN-X86-NEXT:    pushl %eax
-; WIN-X86-NEXT:    calll _tanhl
+; WIN-X86-NEXT:    calll _tanhf128
 ; WIN-X86-NEXT:    addl $20, %esp
 ; WIN-X86-NEXT:    movl (%esp), %eax
 ; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx

>From 89b88f3dfc8846b8924a98d22a73f657b55a74fe Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross at umich.edu>
Date: Sun, 13 Jul 2025 06:20:08 -0400
Subject: [PATCH 4/7] Update tests

---
 .../CodeGen/SelectionDAG/TargetLowering.cpp   |  3 +-
 llvm/lib/IR/RuntimeLibcalls.cpp               |  9 ++-
 .../test/CodeGen/AArch64/illegal-float-ops.ll | 58 +++++++------------
 llvm/test/CodeGen/AArch64/sincos-expansion.ll |  2 +-
 .../vecreduce-fmax-legalization-nan.ll        |  2 +-
 llvm/test/CodeGen/Mips/cconv/fmaxl_call.ll    |  2 +-
 llvm/test/CodeGen/Mips/cconv/roundl-call.ll   |  4 +-
 llvm/test/CodeGen/Mips/llrint-conv.ll         |  4 +-
 llvm/test/CodeGen/Mips/llround-conv.ll        |  4 +-
 llvm/test/CodeGen/Mips/lrint-conv.ll          |  4 +-
 llvm/test/CodeGen/Mips/lround-conv.ll         |  4 +-
 .../test/CodeGen/SystemZ/atomicrmw-fmax-03.ll |  2 +-
 .../test/CodeGen/SystemZ/atomicrmw-fmin-03.ll |  2 +-
 llvm/test/CodeGen/SystemZ/fp-libcall.ll       | 20 +++----
 llvm/test/CodeGen/SystemZ/fp-mul-13.ll        |  2 +-
 llvm/test/CodeGen/SystemZ/fp-round-01.ll      | 12 ++--
 llvm/test/CodeGen/SystemZ/fp-sincos-01.ll     |  2 +-
 llvm/test/CodeGen/SystemZ/fp-strict-mul-13.ll |  2 +-
 .../CodeGen/SystemZ/fp-strict-round-01.ll     | 12 ++--
 llvm/test/CodeGen/X86/fp128-libcalls.ll       |  4 +-
 20 files changed, 70 insertions(+), 84 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 7ffa7d02f38f4..95d0ef2a7ab26 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -183,8 +183,7 @@ TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT,
 
   const char *LibcallName = getLibcallName(LC);
   if (LC == RTLIB::UNKNOWN_LIBCALL || !LibcallName) {
-    reportFatalInternalError("unsupported library call operation: "
-                             "libcall " +
+    reportFatalInternalError("unsupported library call operation: libcall " +
                              Twine(LC));
   }
 
diff --git a/llvm/lib/IR/RuntimeLibcalls.cpp b/llvm/lib/IR/RuntimeLibcalls.cpp
index 1896078851307..82bda3244f4e6 100644
--- a/llvm/lib/IR/RuntimeLibcalls.cpp
+++ b/llvm/lib/IR/RuntimeLibcalls.cpp
@@ -138,6 +138,7 @@ void RuntimeLibcallsInfo::initLibcalls(const Triple &TT,
                                        ExceptionHandling ExceptionModel,
                                        FloatABI::ABIType FloatABI,
                                        EABI EABIVersion, StringRef ABIName) {
+  LibcallImpl SinCosF128Impl = RTLIB::Unsupported;
   setTargetRuntimeLibcallSets(TT, FloatABI);
 
   // By default fp128 libcalls get lowered to `*f128` symbols, which is
@@ -145,8 +146,12 @@ void RuntimeLibcallsInfo::initLibcalls(const Triple &TT,
   // Unfortunately many platforms only have the `*l` (`long double`) symbols,
   // which vary by architecture and compilation flags, so we have to use them
   // sometimes.
-  if (TT.f128LibmShouldUseLongDouble())
+  if (TT.f128LibmShouldUseLongDouble()) {
     setLongDoubleIsF128Libm(*this, /*FiniteOnlyFuncs=*/true);
+    SinCosF128Impl = RTLIB::sincos_ld128;
+  } else {
+    SinCosF128Impl = RTLIB::sincos_f128;
+  }
 
   if (TT.isX86() || TT.isVE() || TT.isARM() || TT.isThumb()) {
     if (ExceptionModel == ExceptionHandling::SjLj)
@@ -191,7 +196,7 @@ void RuntimeLibcallsInfo::initLibcalls(const Triple &TT,
     setLibcallImpl(RTLIB::SINCOS_F32, RTLIB::sincosf);
     setLibcallImpl(RTLIB::SINCOS_F64, RTLIB::sincos);
     setLibcallImpl(RTLIB::SINCOS_F80, RTLIB::sincos_f80);
-    setLibcallImpl(RTLIB::SINCOS_F128, RTLIB::sincos_f128);
+    setLibcallImpl(RTLIB::SINCOS_F128, SinCosF128Impl);
     setLibcallImpl(RTLIB::SINCOS_PPCF128, RTLIB::sincos_ppcf128);
   }
 
diff --git a/llvm/test/CodeGen/AArch64/illegal-float-ops.ll b/llvm/test/CodeGen/AArch64/illegal-float-ops.ll
index 9f0b7ed74e1c5..5ec66b784c037 100644
--- a/llvm/test/CodeGen/AArch64/illegal-float-ops.ll
+++ b/llvm/test/CodeGen/AArch64/illegal-float-ops.ll
@@ -1,5 +1,5 @@
-; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK-GNU
-; RUN: llc -mtriple=aarch64-linux-android -verify-machineinstrs -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK-ANDROID
+; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs -o - %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-android -verify-machineinstrs -o - %s | FileCheck %s
 
 @varfloat = global float 0.0
 @vardouble = global double 0.0
@@ -22,8 +22,7 @@ define void @test_cos(float %float, double %double, fp128 %fp128) {
 
    %cosfp128 = call fp128 @llvm.cos.f128(fp128 %fp128)
    store fp128 %cosfp128, ptr @varfp128
-; CHECK-GNU: bl cosf128
-; CHECK-ANDROID: bl cosl
+; CHECK: bl cosl
 
   ret void
 }
@@ -45,8 +44,7 @@ define void @test_exp(float %float, double %double, fp128 %fp128) {
 
    %expfp128 = call fp128 @llvm.exp.f128(fp128 %fp128)
    store fp128 %expfp128, ptr @varfp128
-; CHECK-GNU: bl expf128
-; CHECK-ANDROID: bl expl
+; CHECK: bl expl
 
   ret void
 }
@@ -68,8 +66,7 @@ define void @test_exp2(float %float, double %double, fp128 %fp128) {
 
    %exp2fp128 = call fp128 @llvm.exp2.f128(fp128 %fp128)
    store fp128 %exp2fp128, ptr @varfp128
-; CHECK-GNU: bl exp2f128
-; CHECK-ANDROID: bl exp2l
+; CHECK: bl exp2l
   ret void
 
 }
@@ -91,8 +88,7 @@ define void @test_log(float %float, double %double, fp128 %fp128) {
 
    %logfp128 = call fp128 @llvm.log.f128(fp128 %fp128)
    store fp128 %logfp128, ptr @varfp128
-; CHECK-GNU: bl logf128
-; CHECK-ANDROID: bl logl
+; CHECK: bl logl
 
   ret void
 }
@@ -114,8 +110,7 @@ define void @test_log2(float %float, double %double, fp128 %fp128) {
 
    %log2fp128 = call fp128 @llvm.log2.f128(fp128 %fp128)
    store fp128 %log2fp128, ptr @varfp128
-; CHECK-GNU: bl log2f128
-; CHECK-ANDROID: bl log2l
+; CHECK: bl log2l
   ret void
 
 }
@@ -137,8 +132,7 @@ define void @test_log10(float %float, double %double, fp128 %fp128) {
 
    %log10fp128 = call fp128 @llvm.log10.f128(fp128 %fp128)
    store fp128 %log10fp128, ptr @varfp128
-; CHECK-GNU: bl log10f128
-; CHECK-ANDROID: bl log10l
+; CHECK: bl log10l
 
   ret void
 }
@@ -160,8 +154,7 @@ define void @test_sin(float %float, double %double, fp128 %fp128) {
 
    %sinfp128 = call fp128 @llvm.sin.f128(fp128 %fp128)
    store fp128 %sinfp128, ptr @varfp128
-; CHECK-GNU: bl sinf128
-; CHECK-ANDROID: bl sinl
+; CHECK: bl sinl
   ret void
 
 }
@@ -183,8 +176,7 @@ define void @test_tan(float %float, double %double, fp128 %fp128) {
 
    %tanfp128 = call fp128 @llvm.tan.f128(fp128 %fp128)
    store fp128 %tanfp128, ptr @varfp128
-; CHECK-GNU: bl tanf128
-; CHECK-ANDROID: bl tanl
+; CHECK: bl tanl
   ret void
 }
 
@@ -205,8 +197,7 @@ define void @test_acos(float %float, double %double, fp128 %fp128) {
 
    %acosfp128 = call fp128 @llvm.acos.f128(fp128 %fp128)
    store fp128 %acosfp128, ptr @varfp128
-; CHECK-GNU: bl acosf128
-; CHECK-ANDROID: bl acosl
+; CHECK: bl acosl
   ret void
 }
 
@@ -227,8 +218,7 @@ define void @test_asin(float %float, double %double, fp128 %fp128) {
 
    %asinfp128 = call fp128 @llvm.asin.f128(fp128 %fp128)
    store fp128 %asinfp128, ptr @varfp128
-; CHECK-GNU: bl asinf128
-; CHECK-ANDROID: bl asinl
+; CHECK: bl asinl
   ret void
 }
 
@@ -249,8 +239,7 @@ define void @test_atan(float %float, double %double, fp128 %fp128) {
 
    %atanfp128 = call fp128 @llvm.atan.f128(fp128 %fp128)
    store fp128 %atanfp128, ptr @varfp128
-; CHECK-GNU: bl atanf128
-; CHECK-ANDROID: bl atanl
+; CHECK: bl atanl
   ret void
 }
 
@@ -271,8 +260,7 @@ define void @test_atan2(float %float1, double %double1, fp128 %fp1281, float %fl
 
    %atan2fp128 = call fp128 @llvm.atan2.f128(fp128 %fp1281, fp128 %fp1282)
    store fp128 %atan2fp128, ptr @varfp128
-; CHECK-GNU: bl atan2f128
-; CHECK-ANDROID: bl atan2l
+; CHECK: bl atan2l
   ret void
 }
 
@@ -293,8 +281,7 @@ define void @test_cosh(float %float, double %double, fp128 %fp128) {
 
    %coshfp128 = call fp128 @llvm.cosh.f128(fp128 %fp128)
    store fp128 %coshfp128, ptr @varfp128
-; CHECK-GNU: bl coshf128
-; CHECK-ANDROID: bl coshl
+; CHECK: bl coshl
   ret void
 }
 
@@ -315,8 +302,7 @@ define void @test_sinh(float %float, double %double, fp128 %fp128) {
 
    %sinhfp128 = call fp128 @llvm.sinh.f128(fp128 %fp128)
    store fp128 %sinhfp128, ptr @varfp128
-; CHECK-GNU: bl sinhf128
-; CHECK-ANDROID: bl sinhl
+; CHECK: bl sinhl
   ret void
 }
 
@@ -337,8 +323,7 @@ define void @test_tanh(float %float, double %double, fp128 %fp128) {
 
    %tanhfp128 = call fp128 @llvm.tanh.f128(fp128 %fp128)
    store fp128 %tanhfp128, ptr @varfp128
-; CHECK-GNU: bl tanhf128
-; CHECK-ANDROID: bl tanhl
+; CHECK: bl tanhl
   ret void
 }
 
@@ -359,8 +344,7 @@ define void @test_pow(float %float, double %double, fp128 %fp128) {
 
    %powfp128 = call fp128 @llvm.pow.f128(fp128 %fp128, fp128 %fp128)
    store fp128 %powfp128, ptr @varfp128
-; CHECK-GNU: bl powf128
-; CHECK-ANDROID: bl powl
+; CHECK: bl powl
 
   ret void
 }
@@ -400,8 +384,7 @@ define void @test_frem(float %float, double %double, fp128 %fp128) {
 
   %fremfp128 = frem fp128 %fp128, %fp128
   store fp128 %fremfp128, ptr @varfp128
-; CHECK-GNU: bl fmodf128
-; CHECK-ANDROID: bl fmodl
+; CHECK: bl fmodl
 
   ret void
 }
@@ -413,8 +396,7 @@ define void @test_fma(fp128 %fp128) {
 
   %fmafp128 = call fp128 @llvm.fma.f128(fp128 %fp128, fp128 %fp128, fp128 %fp128)
   store fp128 %fmafp128, ptr @varfp128
-; CHECK-GNU: bl fmaf128
-; CHECK-ANDROID: bl fmal
+; CHECK: bl fmal
 
   ret void
 }
diff --git a/llvm/test/CodeGen/AArch64/sincos-expansion.ll b/llvm/test/CodeGen/AArch64/sincos-expansion.ll
index 05e753aca0836..41ee40378b4fc 100644
--- a/llvm/test/CodeGen/AArch64/sincos-expansion.ll
+++ b/llvm/test/CodeGen/AArch64/sincos-expansion.ll
@@ -43,7 +43,7 @@ define fp128 @test_sincos_f128(fp128 %f) {
   %sin = call fp128 @sinl(fp128 %f) readnone
   %cos = call fp128 @cosl(fp128 %f) readnone
   %val = fadd fp128 %sin, %cos
-; CHECK: bl sincosf128
+; CHECK: bl sincosl
   ret fp128 %val
 }
 
diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization-nan.ll b/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization-nan.ll
index 644bab3698926..1d295a30a994b 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization-nan.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization-nan.ll
@@ -583,7 +583,7 @@ define float @test_v3f32_ninf(<3 x float> %a) nounwind {
 define fp128 @test_v2f128(<2 x fp128> %a) nounwind {
 ; CHECK-LABEL: test_v2f128:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    b fmaxf128
+; CHECK-NEXT:    b fmaxl
   %b = call fp128 @llvm.vector.reduce.fmax.v2f128(<2 x fp128> %a)
   ret fp128 %b
 }
diff --git a/llvm/test/CodeGen/Mips/cconv/fmaxl_call.ll b/llvm/test/CodeGen/Mips/cconv/fmaxl_call.ll
index a2dfede48226c..0e3078edae45d 100644
--- a/llvm/test/CodeGen/Mips/cconv/fmaxl_call.ll
+++ b/llvm/test/CodeGen/Mips/cconv/fmaxl_call.ll
@@ -8,7 +8,7 @@ define fp128 @call_fmaxl(fp128 %a, fp128 %b) {
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    sd $ra, 8($sp) # 8-byte Folded Spill
 ; CHECK-NEXT:    .cfi_offset 31, -8
-; CHECK-NEXT:    jal fmaxf128
+; CHECK-NEXT:    jal fmaxl
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    mov.d $f12, $f0
 ; CHECK-NEXT:    jal f
diff --git a/llvm/test/CodeGen/Mips/cconv/roundl-call.ll b/llvm/test/CodeGen/Mips/cconv/roundl-call.ll
index 895055bac0ae2..98bda89546e43 100644
--- a/llvm/test/CodeGen/Mips/cconv/roundl-call.ll
+++ b/llvm/test/CodeGen/Mips/cconv/roundl-call.ll
@@ -25,8 +25,8 @@
 define void @roundl_call(fp128 %value) {
 entry:
 ; ALL-LABEL: roundl_call:
-; N32:          lw      $25, %call16(roundf128)($gp)
-; N64:          ld      $25, %call16(roundf128)($gp)
+; N32:          lw      $25, %call16(roundl)($gp)
+; N64:          ld      $25, %call16(roundl)($gp)
 
 ; SOFT-FLOAT:   sd      $4, 8(${{[0-9]+}})
 ; SOFT-FLOAT:   sd      $2, 0(${{[0-9]+}})
diff --git a/llvm/test/CodeGen/Mips/llrint-conv.ll b/llvm/test/CodeGen/Mips/llrint-conv.ll
index 6848a8f7e733f..dcb4e5657e80b 100644
--- a/llvm/test/CodeGen/Mips/llrint-conv.ll
+++ b/llvm/test/CodeGen/Mips/llrint-conv.ll
@@ -36,7 +36,7 @@ entry:
 
 define signext i32 @testmswl(fp128 %x) {
 ; CHECK-LABEL: testmswl:
-; CHECK:       jal     llrintf128
+; CHECK:       jal     llrintl
 entry:
   %0 = tail call i64 @llvm.llrint.f128(fp128 %x)
   %conv = trunc i64 %0 to i32
@@ -45,7 +45,7 @@ entry:
 
 define i64 @testmsll(fp128 %x) {
 ; CHECK-LABEL: testmsll:
-; CHECK:       jal     llrintf128
+; CHECK:       jal     llrintl
 entry:
   %0 = tail call i64 @llvm.llrint.f128(fp128 %x)
   ret i64 %0
diff --git a/llvm/test/CodeGen/Mips/llround-conv.ll b/llvm/test/CodeGen/Mips/llround-conv.ll
index 9ad97ccbda389..77482a8663319 100644
--- a/llvm/test/CodeGen/Mips/llround-conv.ll
+++ b/llvm/test/CodeGen/Mips/llround-conv.ll
@@ -36,7 +36,7 @@ entry:
 
 define signext i32 @testmswl(fp128 %x) {
 ; CHECK-LABEL: testmswl:
-; CHECK:       jal     llroundf128
+; CHECK:       jal     llroundl
 entry:
   %0 = tail call i64 @llvm.llround.f128(fp128 %x)
   %conv = trunc i64 %0 to i32
@@ -45,7 +45,7 @@ entry:
 
 define i64 @testmsll(fp128 %x) {
 ; CHECK-LABEL: testmsll:
-; CHECK:       jal     llroundf128
+; CHECK:       jal     llroundl
 entry:
   %0 = tail call i64 @llvm.llround.f128(fp128 %x)
   ret i64 %0
diff --git a/llvm/test/CodeGen/Mips/lrint-conv.ll b/llvm/test/CodeGen/Mips/lrint-conv.ll
index 98748289d4047..bd3f7b3babe10 100644
--- a/llvm/test/CodeGen/Mips/lrint-conv.ll
+++ b/llvm/test/CodeGen/Mips/lrint-conv.ll
@@ -36,7 +36,7 @@ entry:
 
 define signext i32 @testmswl(fp128 %x) {
 ; CHECK-LABEL: testmswl:
-; CHECK:       jal     lrintf128
+; CHECK:       jal     lrintl
 entry:
   %0 = tail call i64 @llvm.lrint.i64.f128(fp128 %x)
   %conv = trunc i64 %0 to i32
@@ -45,7 +45,7 @@ entry:
 
 define signext i64 @testmsll(fp128 %x) {
 ; CHECK-LABEL: testmsll:
-; CHECK:       jal     lrintf128
+; CHECK:       jal     lrintl
 entry:
   %0 = tail call i64 @llvm.lrint.i64.f128(fp128 %x)
   ret i64 %0
diff --git a/llvm/test/CodeGen/Mips/lround-conv.ll b/llvm/test/CodeGen/Mips/lround-conv.ll
index f8f5228433694..acca565c0fad2 100644
--- a/llvm/test/CodeGen/Mips/lround-conv.ll
+++ b/llvm/test/CodeGen/Mips/lround-conv.ll
@@ -36,7 +36,7 @@ entry:
 
 define signext i32 @testmswl(fp128 %x) {
 ; CHECK-LABEL: testmswl:
-; CHECK:       jal     lroundf128
+; CHECK:       jal     lroundl
 entry:
   %0 = tail call i64 @llvm.lround.i64.f128(fp128 %x)
   %conv = trunc i64 %0 to i32
@@ -45,7 +45,7 @@ entry:
 
 define signext i64 @testmsll(fp128 %x) {
 ; CHECK-LABEL: testmsll:
-; CHECK:       jal     lroundf128
+; CHECK:       jal     lroundl
 entry:
   %0 = tail call i64 @llvm.lround.i64.f128(fp128 %x)
   ret i64 %0
diff --git a/llvm/test/CodeGen/SystemZ/atomicrmw-fmax-03.ll b/llvm/test/CodeGen/SystemZ/atomicrmw-fmax-03.ll
index 83f66d0ef15f4..21e7c6e586dfe 100644
--- a/llvm/test/CodeGen/SystemZ/atomicrmw-fmax-03.ll
+++ b/llvm/test/CodeGen/SystemZ/atomicrmw-fmax-03.ll
@@ -19,7 +19,7 @@ define void @f1(ptr %ret, ptr %src, ptr %b) {
 ; CHECK: la %r4, 160(%r15)
 ; CHECK: std [[FSL]], 176(%r15)
 ; CHECK: std [[FSH]], 184(%r15)
-; CHECK: brasl %r14, fmaxf128 at PLT
+; CHECK: brasl %r14, fmaxl at PLT
 ; CHECK: lg [[RH:%r[0-9]+]], 200(%r15)
 ; CHECK: lg [[RL:%r[0-9]+]], 192(%r15)
 ; CHECK: lgdr [[RSH:%r[0-9]+]], [[FSH]]
diff --git a/llvm/test/CodeGen/SystemZ/atomicrmw-fmin-03.ll b/llvm/test/CodeGen/SystemZ/atomicrmw-fmin-03.ll
index c48ba3ff661f3..1c6f8e20aa4f8 100644
--- a/llvm/test/CodeGen/SystemZ/atomicrmw-fmin-03.ll
+++ b/llvm/test/CodeGen/SystemZ/atomicrmw-fmin-03.ll
@@ -19,7 +19,7 @@ define void @f1(ptr %ret, ptr %src, ptr %b) {
 ; CHECK: la %r4, 160(%r15)
 ; CHECK: std [[FSL]], 176(%r15)
 ; CHECK: std [[FSH]], 184(%r15)
-; CHECK: brasl %r14, fminf128 at PLT
+; CHECK: brasl %r14, fminl at PLT
 ; CHECK: lg [[RH:%r[0-9]+]], 200(%r15)
 ; CHECK: lg [[RL:%r[0-9]+]], 192(%r15)
 ; CHECK: lgdr [[RSH:%r[0-9]+]], [[FSH]]
diff --git a/llvm/test/CodeGen/SystemZ/fp-libcall.ll b/llvm/test/CodeGen/SystemZ/fp-libcall.ll
index 51f91315b284d..5069b9b257b80 100644
--- a/llvm/test/CodeGen/SystemZ/fp-libcall.ll
+++ b/llvm/test/CodeGen/SystemZ/fp-libcall.ll
@@ -39,7 +39,7 @@ define double @f5(double %x, double %y) {
 
 define fp128 @f6(fp128 %x, fp128 %y) {
 ; CHECK-LABEL: f6:
-; CHECK: brasl %r14, powf128 at PLT
+; CHECK: brasl %r14, powl at PLT
   %tmp = call fp128 @llvm.pow.f128(fp128 %x, fp128 %y)
   ret fp128 %tmp
 }
@@ -60,7 +60,7 @@ define double @f8(double %x) {
 
 define fp128 @f9(fp128 %x) {
 ; CHECK-LABEL: f9:
-; CHECK: brasl %r14, sinf128 at PLT
+; CHECK: brasl %r14, sinl at PLT
   %tmp = call fp128 @llvm.sin.f128(fp128 %x)
   ret fp128 %tmp
 }
@@ -81,7 +81,7 @@ define double @f11(double %x) {
 
 define fp128 @f12(fp128 %x) {
 ; CHECK-LABEL: f12:
-; CHECK: brasl %r14, cosf128 at PLT
+; CHECK: brasl %r14, cosl at PLT
   %tmp = call fp128 @llvm.cos.f128(fp128 %x)
   ret fp128 %tmp
 }
@@ -102,7 +102,7 @@ define double @f14(double %x) {
 
 define fp128 @f15(fp128 %x) {
 ; CHECK-LABEL: f15:
-; CHECK: brasl %r14, expf128 at PLT
+; CHECK: brasl %r14, expl at PLT
   %tmp = call fp128 @llvm.exp.f128(fp128 %x)
   ret fp128 %tmp
 }
@@ -123,7 +123,7 @@ define double @f17(double %x) {
 
 define fp128 @f18(fp128 %x) {
 ; CHECK-LABEL: f18:
-; CHECK: brasl %r14, exp2f128 at PLT
+; CHECK: brasl %r14, exp2l at PLT
   %tmp = call fp128 @llvm.exp2.f128(fp128 %x)
   ret fp128 %tmp
 }
@@ -144,7 +144,7 @@ define double @f20(double %x) {
 
 define fp128 @f21(fp128 %x) {
 ; CHECK-LABEL: f21:
-; CHECK: brasl %r14, logf128 at PLT
+; CHECK: brasl %r14, logl at PLT
   %tmp = call fp128 @llvm.log.f128(fp128 %x)
   ret fp128 %tmp
 }
@@ -165,7 +165,7 @@ define double @f23(double %x) {
 
 define fp128 @f24(fp128 %x) {
 ; CHECK-LABEL: f24:
-; CHECK: brasl %r14, log2f128 at PLT
+; CHECK: brasl %r14, log2l at PLT
   %tmp = call fp128 @llvm.log2.f128(fp128 %x)
   ret fp128 %tmp
 }
@@ -186,7 +186,7 @@ define double @f26(double %x) {
 
 define fp128 @f27(fp128 %x) {
 ; CHECK-LABEL: f27:
-; CHECK: brasl %r14, log10f128 at PLT
+; CHECK: brasl %r14, log10l at PLT
   %tmp = call fp128 @llvm.log10.f128(fp128 %x)
   ret fp128 %tmp
 }
@@ -207,7 +207,7 @@ define double @f29(double %x, double %y) {
 
 define fp128 @f30(fp128 %x, fp128 %y) {
 ; CHECK-LABEL: f30:
-; CHECK: brasl %r14, fminf128 at PLT
+; CHECK: brasl %r14, fminl at PLT
   %tmp = call fp128 @llvm.minnum.f128(fp128 %x, fp128 %y)
   ret fp128 %tmp
 }
@@ -238,7 +238,7 @@ define double @f32(double %x, double %y) {
 
 define fp128 @f33(fp128 %x, fp128 %y) {
 ; CHECK-LABEL: f33:
-; CHECK: brasl %r14, fmaxf128 at PLT
+; CHECK: brasl %r14, fmaxl at PLT
   %tmp = call fp128 @llvm.maxnum.f128(fp128 %x, fp128 %y)
   ret fp128 %tmp
 }
diff --git a/llvm/test/CodeGen/SystemZ/fp-mul-13.ll b/llvm/test/CodeGen/SystemZ/fp-mul-13.ll
index 70cd966fbb1b4..e6870fb9a81db 100644
--- a/llvm/test/CodeGen/SystemZ/fp-mul-13.ll
+++ b/llvm/test/CodeGen/SystemZ/fp-mul-13.ll
@@ -4,7 +4,7 @@ declare fp128 @llvm.fma.f128(fp128 %f1, fp128 %f2, fp128 %f3)
 
 define void @f1(ptr %ptr1, ptr %ptr2, ptr %ptr3, ptr %dst) {
 ; CHECK-LABEL: f1:
-; CHECK: brasl %r14, fmaf128
+; CHECK: brasl %r14, fmal
 ; CHECK: br %r14
   %f1 = load fp128, ptr %ptr1
   %f2 = load fp128, ptr %ptr2
diff --git a/llvm/test/CodeGen/SystemZ/fp-round-01.ll b/llvm/test/CodeGen/SystemZ/fp-round-01.ll
index 4a3ea23966d80..21b354c7a83c4 100644
--- a/llvm/test/CodeGen/SystemZ/fp-round-01.ll
+++ b/llvm/test/CodeGen/SystemZ/fp-round-01.ll
@@ -70,7 +70,7 @@ define double @f5(double %f) {
 declare fp128 @llvm.nearbyint.f128(fp128 %f)
 define void @f6(ptr %ptr) {
 ; CHECK-LABEL: f6:
-; CHECK: brasl %r14, nearbyintf128 at PLT
+; CHECK: brasl %r14, nearbyintl at PLT
 ; CHECK: br %r14
   %src = load fp128, ptr %ptr
   %res = call fp128 @llvm.nearbyint.f128(fp128 %src)
@@ -114,7 +114,7 @@ define double @f8(double %f) {
 declare fp128 @llvm.floor.f128(fp128 %f)
 define void @f9(ptr %ptr) {
 ; CHECK-LABEL: f9:
-; CHECK: brasl %r14, floorf128 at PLT
+; CHECK: brasl %r14, floorl at PLT
 ; CHECK: br %r14
   %src = load fp128, ptr %ptr
   %res = call fp128 @llvm.floor.f128(fp128 %src)
@@ -158,7 +158,7 @@ define double @f11(double %f) {
 declare fp128 @llvm.ceil.f128(fp128 %f)
 define void @f12(ptr %ptr) {
 ; CHECK-LABEL: f12:
-; CHECK: brasl %r14, ceilf128 at PLT
+; CHECK: brasl %r14, ceill at PLT
 ; CHECK: br %r14
   %src = load fp128, ptr %ptr
   %res = call fp128 @llvm.ceil.f128(fp128 %src)
@@ -190,7 +190,7 @@ define double @f14(double %f) {
 declare fp128 @llvm.trunc.f128(fp128 %f)
 define void @f15(ptr %ptr) {
 ; CHECK-LABEL: f15:
-; CHECK: brasl %r14, truncf128 at PLT
+; CHECK: brasl %r14, truncl at PLT
 ; CHECK: br %r14
   %src = load fp128, ptr %ptr
   %res = call fp128 @llvm.trunc.f128(fp128 %src)
@@ -234,7 +234,7 @@ define double @f17(double %f) {
 declare fp128 @llvm.round.f128(fp128 %f)
 define void @f18(ptr %ptr) {
 ; CHECK-LABEL: f18:
-; CHECK: brasl %r14, roundf128 at PLT
+; CHECK: brasl %r14, roundl at PLT
 ; CHECK: br %r14
   %src = load fp128, ptr %ptr
   %res = call fp128 @llvm.round.f128(fp128 %src)
@@ -266,7 +266,7 @@ define double @f20(double %f) {
 declare fp128 @llvm.roundeven.f128(fp128 %f)
 define void @f21(ptr %ptr) {
 ; CHECK-LABEL: f21:
-; CHECK: brasl %r14, roundevenf128 at PLT
+; CHECK: brasl %r14, roundevenl at PLT
 ; CHECK: br %r14
   %src = load fp128, ptr %ptr
   %res = call fp128 @llvm.roundeven.f128(fp128 %src)
diff --git a/llvm/test/CodeGen/SystemZ/fp-sincos-01.ll b/llvm/test/CodeGen/SystemZ/fp-sincos-01.ll
index 03da3e4b8c5ac..4a38d7afba2c9 100644
--- a/llvm/test/CodeGen/SystemZ/fp-sincos-01.ll
+++ b/llvm/test/CodeGen/SystemZ/fp-sincos-01.ll
@@ -51,7 +51,7 @@ define double @f2_errno(double %x) {
 
 define fp128 @f3(fp128 %x) {
 ; CHECK-OPT-LABEL: f3:
-; CHECK-OPT: brasl %r14, sincosf128 at PLT
+; CHECK-OPT: brasl %r14, sincosl at PLT
 ; CHECK-OPT: axbr
   %tmp1 = call fp128 @sinl(fp128 %x) readnone
   %tmp2 = call fp128 @cosl(fp128 %x) readnone
diff --git a/llvm/test/CodeGen/SystemZ/fp-strict-mul-13.ll b/llvm/test/CodeGen/SystemZ/fp-strict-mul-13.ll
index 44ef22add70f6..4247c97533746 100644
--- a/llvm/test/CodeGen/SystemZ/fp-strict-mul-13.ll
+++ b/llvm/test/CodeGen/SystemZ/fp-strict-mul-13.ll
@@ -4,7 +4,7 @@ declare fp128 @llvm.experimental.constrained.fma.f128(fp128 %f1, fp128 %f2, fp12
 
 define void @f1(ptr %ptr1, ptr %ptr2, ptr %ptr3, ptr %dst) #0 {
 ; CHECK-LABEL: f1:
-; CHECK: brasl %r14, fmaf128
+; CHECK: brasl %r14, fmal
 ; CHECK: br %r14
   %f1 = load fp128, ptr %ptr1
   %f2 = load fp128, ptr %ptr2
diff --git a/llvm/test/CodeGen/SystemZ/fp-strict-round-01.ll b/llvm/test/CodeGen/SystemZ/fp-strict-round-01.ll
index 5c825161541a1..95a5fa1af832b 100644
--- a/llvm/test/CodeGen/SystemZ/fp-strict-round-01.ll
+++ b/llvm/test/CodeGen/SystemZ/fp-strict-round-01.ll
@@ -88,7 +88,7 @@ define double @f5(double %f) #0 {
 declare fp128 @llvm.experimental.constrained.nearbyint.f128(fp128, metadata, metadata)
 define void @f6(ptr %ptr) #0 {
 ; CHECK-LABEL: f6:
-; CHECK: brasl %r14, nearbyintf128 at PLT
+; CHECK: brasl %r14, nearbyintl at PLT
 ; CHECK: br %r14
   %src = load fp128, ptr %ptr
   %res = call fp128 @llvm.experimental.constrained.nearbyint.f128(
@@ -141,7 +141,7 @@ define double @f8(double %f) #0 {
 declare fp128 @llvm.experimental.constrained.floor.f128(fp128, metadata)
 define void @f9(ptr %ptr) #0 {
 ; CHECK-LABEL: f9:
-; CHECK: brasl %r14, floorf128 at PLT
+; CHECK: brasl %r14, floorl at PLT
 ; CHECK: br %r14
   %src = load fp128, ptr %ptr
   %res = call fp128 @llvm.experimental.constrained.floor.f128(
@@ -193,7 +193,7 @@ define double @f11(double %f) #0 {
 declare fp128 @llvm.experimental.constrained.ceil.f128(fp128, metadata)
 define void @f12(ptr %ptr) #0 {
 ; CHECK-LABEL: f12:
-; CHECK: brasl %r14, ceilf128 at PLT
+; CHECK: brasl %r14, ceill at PLT
 ; CHECK: br %r14
   %src = load fp128, ptr %ptr
   %res = call fp128 @llvm.experimental.constrained.ceil.f128(
@@ -245,7 +245,7 @@ define double @f14(double %f) #0 {
 declare fp128 @llvm.experimental.constrained.trunc.f128(fp128, metadata)
 define void @f15(ptr %ptr) #0 {
 ; CHECK-LABEL: f15:
-; CHECK: brasl %r14, truncf128 at PLT
+; CHECK: brasl %r14, truncl at PLT
 ; CHECK: br %r14
   %src = load fp128, ptr %ptr
   %res = call fp128 @llvm.experimental.constrained.trunc.f128(
@@ -297,7 +297,7 @@ define double @f17(double %f) #0 {
 declare fp128 @llvm.experimental.constrained.round.f128(fp128, metadata)
 define void @f18(ptr %ptr) #0 {
 ; CHECK-LABEL: f18:
-; CHECK: brasl %r14, roundf128 at PLT
+; CHECK: brasl %r14, roundl at PLT
 ; CHECK: br %r14
   %src = load fp128, ptr %ptr
   %res = call fp128 @llvm.experimental.constrained.round.f128(
@@ -335,7 +335,7 @@ define double @f20(double %f) #0 {
 declare fp128 @llvm.experimental.constrained.roundeven.f128(fp128, metadata)
 define void @f21(ptr %ptr) #0 {
 ; CHECK-LABEL: f21:
-; CHECK: brasl %r14, roundevenf128 at PLT
+; CHECK: brasl %r14, roundevenl at PLT
 ; CHECK: br %r14
   %src = load fp128, ptr %ptr
   %res = call fp128 @llvm.experimental.constrained.roundeven.f128(
diff --git a/llvm/test/CodeGen/X86/fp128-libcalls.ll b/llvm/test/CodeGen/X86/fp128-libcalls.ll
index ebab9292a2411..369a36120009d 100644
--- a/llvm/test/CodeGen/X86/fp128-libcalls.ll
+++ b/llvm/test/CodeGen/X86/fp128-libcalls.ll
@@ -2455,7 +2455,7 @@ define { fp128, fp128 } @Test128Modf(fp128 %a) nounwind {
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
-; WIN-NEXT:    callq modfl
+; WIN-NEXT:    callq modff128
 ; WIN-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm1
 ; WIN-NEXT:    addq $72, %rsp
 ; WIN-NEXT:    retq
@@ -2478,7 +2478,7 @@ define { fp128, fp128 } @Test128Modf(fp128 %a) nounwind {
 ; WIN-X86-NEXT:    pushl 16(%ebp)
 ; WIN-X86-NEXT:    pushl 12(%ebp)
 ; WIN-X86-NEXT:    pushl %ecx
-; WIN-X86-NEXT:    calll _modfl
+; WIN-X86-NEXT:    calll _modff128
 ; WIN-X86-NEXT:    addl $24, %esp
 ; WIN-X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; WIN-X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill

>From 40c166fdf7af357e42f1065cf46dbeb28f110483 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross at umich.edu>
Date: Mon, 14 Jul 2025 01:01:49 -0400
Subject: [PATCH 5/7] update hexagon test

---
 llvm/test/CodeGen/Hexagon/llvm.sincos.ll | 30 ++++++++++++------------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/llvm/test/CodeGen/Hexagon/llvm.sincos.ll b/llvm/test/CodeGen/Hexagon/llvm.sincos.ll
index f02ac2ca8480f..095c53cce003a 100644
--- a/llvm/test/CodeGen/Hexagon/llvm.sincos.ll
+++ b/llvm/test/CodeGen/Hexagon/llvm.sincos.ll
@@ -1108,11 +1108,11 @@ define { fp128, fp128 } @test_sincos_f128(fp128 %a) #0 {
 ; BASE-NEXT:     memd(r29+#40) = r19:18
 ; BASE-NEXT:    } // 8-byte Folded Spill
 ; BASE-NEXT:    {
-; BASE-NEXT:     call sinl
+; BASE-NEXT:     call sinf128
 ; BASE-NEXT:     r19:18 = combine(r3,r2)
 ; BASE-NEXT:    }
 ; BASE-NEXT:    {
-; BASE-NEXT:     call cosl
+; BASE-NEXT:     call cosf128
 ; BASE-NEXT:     r0 = add(r29,#16)
 ; BASE-NEXT:     r3:2 = combine(r19,r18)
 ; BASE-NEXT:     r5:4 = combine(r17,r16)
@@ -1156,7 +1156,7 @@ define { fp128, fp128 } @test_sincos_f128(fp128 %a) #0 {
 ; GNU-NEXT:     memw(r29+#4) = r7.new
 ; GNU-NEXT:    }
 ; GNU-NEXT:    {
-; GNU-NEXT:     call sincosl
+; GNU-NEXT:     call sincosf128
 ; GNU-NEXT:     memw(r29+#0) = r6
 ; GNU-NEXT:    }
 ; GNU-NEXT:    {
@@ -1194,11 +1194,11 @@ define { fp128, fp128 } @test_sincos_f128(fp128 %a) #0 {
 ; MUSL-NEXT:     memd(r29+#40) = r19:18
 ; MUSL-NEXT:    } // 8-byte Folded Spill
 ; MUSL-NEXT:    {
-; MUSL-NEXT:     call sinl
+; MUSL-NEXT:     call sinf128
 ; MUSL-NEXT:     r19:18 = combine(r3,r2)
 ; MUSL-NEXT:    }
 ; MUSL-NEXT:    {
-; MUSL-NEXT:     call cosl
+; MUSL-NEXT:     call cosf128
 ; MUSL-NEXT:     r0 = add(r29,#16)
 ; MUSL-NEXT:     r3:2 = combine(r19,r18)
 ; MUSL-NEXT:     r5:4 = combine(r17,r16)
@@ -1251,24 +1251,24 @@ define { <2 x fp128>, <2 x fp128> } @test_sincos_v2f128(<2 x fp128> %a) #0 {
 ; BASE-NEXT:     memd(r29+#64) = r25:24
 ; BASE-NEXT:    } // 8-byte Folded Spill
 ; BASE-NEXT:    {
-; BASE-NEXT:     call sinl
+; BASE-NEXT:     call sinf128
 ; BASE-NEXT:     r23:22 = memd(r29+#112)
 ; BASE-NEXT:     r25:24 = memd(r29+#120)
 ; BASE-NEXT:    }
 ; BASE-NEXT:    {
-; BASE-NEXT:     call sinl
+; BASE-NEXT:     call sinf128
 ; BASE-NEXT:     r0 = add(r29,#0)
 ; BASE-NEXT:     r3:2 = combine(r23,r22)
 ; BASE-NEXT:     r5:4 = combine(r25,r24)
 ; BASE-NEXT:    }
 ; BASE-NEXT:    {
-; BASE-NEXT:     call cosl
+; BASE-NEXT:     call cosf128
 ; BASE-NEXT:     r0 = add(r29,#48)
 ; BASE-NEXT:     r3:2 = combine(r19,r18)
 ; BASE-NEXT:     r5:4 = combine(r17,r16)
 ; BASE-NEXT:    }
 ; BASE-NEXT:    {
-; BASE-NEXT:     call cosl
+; BASE-NEXT:     call cosf128
 ; BASE-NEXT:     r0 = add(r29,#16)
 ; BASE-NEXT:     r3:2 = combine(r23,r22)
 ; BASE-NEXT:     r5:4 = combine(r25,r24)
@@ -1338,7 +1338,7 @@ define { <2 x fp128>, <2 x fp128> } @test_sincos_v2f128(<2 x fp128> %a) #0 {
 ; GNU-NEXT:     memw(r17+#0) = r6.new
 ; GNU-NEXT:    }
 ; GNU-NEXT:    {
-; GNU-NEXT:     call sincosl
+; GNU-NEXT:     call sincosf128
 ; GNU-NEXT:     r21:20 = memd(r29+#144)
 ; GNU-NEXT:     memw(r17+#4) = r1
 ; GNU-NEXT:    }
@@ -1349,7 +1349,7 @@ define { <2 x fp128>, <2 x fp128> } @test_sincos_v2f128(<2 x fp128> %a) #0 {
 ; GNU-NEXT:     memw(r17+#4) = r0.new
 ; GNU-NEXT:    }
 ; GNU-NEXT:    {
-; GNU-NEXT:     call sincosl
+; GNU-NEXT:     call sincosf128
 ; GNU-NEXT:     r0 = add(r29,#40)
 ; GNU-NEXT:     r1 = add(r29,#8)
 ; GNU-NEXT:     memw(r17+#0) = r1.new
@@ -1414,24 +1414,24 @@ define { <2 x fp128>, <2 x fp128> } @test_sincos_v2f128(<2 x fp128> %a) #0 {
 ; MUSL-NEXT:     memd(r29+#64) = r25:24
 ; MUSL-NEXT:    } // 8-byte Folded Spill
 ; MUSL-NEXT:    {
-; MUSL-NEXT:     call sinl
+; MUSL-NEXT:     call sinf128
 ; MUSL-NEXT:     r23:22 = memd(r29+#112)
 ; MUSL-NEXT:     r25:24 = memd(r29+#120)
 ; MUSL-NEXT:    }
 ; MUSL-NEXT:    {
-; MUSL-NEXT:     call sinl
+; MUSL-NEXT:     call sinf128
 ; MUSL-NEXT:     r0 = add(r29,#0)
 ; MUSL-NEXT:     r3:2 = combine(r23,r22)
 ; MUSL-NEXT:     r5:4 = combine(r25,r24)
 ; MUSL-NEXT:    }
 ; MUSL-NEXT:    {
-; MUSL-NEXT:     call cosl
+; MUSL-NEXT:     call cosf128
 ; MUSL-NEXT:     r0 = add(r29,#48)
 ; MUSL-NEXT:     r3:2 = combine(r19,r18)
 ; MUSL-NEXT:     r5:4 = combine(r17,r16)
 ; MUSL-NEXT:    }
 ; MUSL-NEXT:    {
-; MUSL-NEXT:     call cosl
+; MUSL-NEXT:     call cosf128
 ; MUSL-NEXT:     r0 = add(r29,#16)
 ; MUSL-NEXT:     r3:2 = combine(r23,r22)
 ; MUSL-NEXT:     r5:4 = combine(r25,r24)

>From 68c943fc457745698034671e225b0339a3eecdcd Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross at umich.edu>
Date: Mon, 14 Jul 2025 01:19:26 -0400
Subject: [PATCH 6/7] Add a check against Clang

---
 clang/lib/CodeGen/CodeGenModule.cpp | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index c8866f15745c2..2d68cb7638076 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -394,6 +394,16 @@ static void checkDataLayoutConsistency(const TargetInfo &Target,
     Check("__ibm128", llvm::Type::getPPC_FP128Ty(Context), Target.Ibm128Align);
 
   Check("void*", llvm::PointerType::getUnqual(Context), Target.PointerAlign);
+
+  if (Triple.f128LibmShouldUseLongDouble() &&
+      &Target.getLongDoubleFormat() != &llvm::APFloat::IEEEquad()) {
+    llvm::errs()
+        << "For target " << Triple.str()
+        << " LLVM wants to use `long double` symbols for `_Float128` libm call "
+           "lowering, but clang specifies `long double` as "
+        << Target.getLongDoubleFormat() << "\n";
+    abort();
+  }
 #endif
 }
 

>From 25999960fe18a98c81f0926ca5a8d5a1108cd286 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross at umich.edu>
Date: Mon, 14 Jul 2025 01:45:19 -0400
Subject: [PATCH 7/7] update wasm test

---
 llvm/test/CodeGen/WebAssembly/llvm.sincos.ll | 120 +++++++++----------
 1 file changed, 60 insertions(+), 60 deletions(-)

diff --git a/llvm/test/CodeGen/WebAssembly/llvm.sincos.ll b/llvm/test/CodeGen/WebAssembly/llvm.sincos.ll
index 3c10b09525573..87499e63bff2f 100644
--- a/llvm/test/CodeGen/WebAssembly/llvm.sincos.ll
+++ b/llvm/test/CodeGen/WebAssembly/llvm.sincos.ll
@@ -384,38 +384,38 @@ define { fp128, fp128 } @test_sincos_f128(fp128 %a) #0 {
 ; WASM32-NEXT:    .local i32
 ; WASM32-NEXT:  # %bb.0:
 ; WASM32-NEXT:    global.get __stack_pointer
-; WASM32-NEXT:    i32.const 32
+; WASM32-NEXT:    i32.const 48
 ; WASM32-NEXT:    i32.sub
 ; WASM32-NEXT:    local.tee 3
 ; WASM32-NEXT:    global.set __stack_pointer
 ; WASM32-NEXT:    local.get 3
 ; WASM32-NEXT:    local.get 1
 ; WASM32-NEXT:    local.get 2
-; WASM32-NEXT:    call cosl
+; WASM32-NEXT:    local.get 3
+; WASM32-NEXT:    i32.const 32
+; WASM32-NEXT:    i32.add
 ; WASM32-NEXT:    local.get 3
 ; WASM32-NEXT:    i32.const 16
 ; WASM32-NEXT:    i32.add
-; WASM32-NEXT:    local.get 1
-; WASM32-NEXT:    local.get 2
-; WASM32-NEXT:    call sinl
+; WASM32-NEXT:    call sincosl
 ; WASM32-NEXT:    local.get 0
 ; WASM32-NEXT:    local.get 3
-; WASM32-NEXT:    i64.load 8
+; WASM32-NEXT:    i64.load 24
 ; WASM32-NEXT:    i64.store 24
 ; WASM32-NEXT:    local.get 0
 ; WASM32-NEXT:    local.get 3
-; WASM32-NEXT:    i64.load 0
+; WASM32-NEXT:    i64.load 16
 ; WASM32-NEXT:    i64.store 16
 ; WASM32-NEXT:    local.get 0
 ; WASM32-NEXT:    local.get 3
-; WASM32-NEXT:    i64.load 24
+; WASM32-NEXT:    i64.load 40
 ; WASM32-NEXT:    i64.store 8
 ; WASM32-NEXT:    local.get 0
 ; WASM32-NEXT:    local.get 3
-; WASM32-NEXT:    i64.load 16
+; WASM32-NEXT:    i64.load 32
 ; WASM32-NEXT:    i64.store 0
 ; WASM32-NEXT:    local.get 3
-; WASM32-NEXT:    i32.const 32
+; WASM32-NEXT:    i32.const 48
 ; WASM32-NEXT:    i32.add
 ; WASM32-NEXT:    global.set __stack_pointer
 ; WASM32-NEXT:    # fallthrough-return
@@ -425,38 +425,38 @@ define { fp128, fp128 } @test_sincos_f128(fp128 %a) #0 {
 ; WASM64-NEXT:    .local i64
 ; WASM64-NEXT:  # %bb.0:
 ; WASM64-NEXT:    global.get __stack_pointer
-; WASM64-NEXT:    i64.const 32
+; WASM64-NEXT:    i64.const 48
 ; WASM64-NEXT:    i64.sub
 ; WASM64-NEXT:    local.tee 3
 ; WASM64-NEXT:    global.set __stack_pointer
 ; WASM64-NEXT:    local.get 3
 ; WASM64-NEXT:    local.get 1
 ; WASM64-NEXT:    local.get 2
-; WASM64-NEXT:    call cosl
+; WASM64-NEXT:    local.get 3
+; WASM64-NEXT:    i64.const 32
+; WASM64-NEXT:    i64.add
 ; WASM64-NEXT:    local.get 3
 ; WASM64-NEXT:    i64.const 16
 ; WASM64-NEXT:    i64.add
-; WASM64-NEXT:    local.get 1
-; WASM64-NEXT:    local.get 2
-; WASM64-NEXT:    call sinl
+; WASM64-NEXT:    call sincosl
 ; WASM64-NEXT:    local.get 0
 ; WASM64-NEXT:    local.get 3
-; WASM64-NEXT:    i64.load 8
+; WASM64-NEXT:    i64.load 24
 ; WASM64-NEXT:    i64.store 24
 ; WASM64-NEXT:    local.get 0
 ; WASM64-NEXT:    local.get 3
-; WASM64-NEXT:    i64.load 0
+; WASM64-NEXT:    i64.load 16
 ; WASM64-NEXT:    i64.store 16
 ; WASM64-NEXT:    local.get 0
 ; WASM64-NEXT:    local.get 3
-; WASM64-NEXT:    i64.load 24
+; WASM64-NEXT:    i64.load 40
 ; WASM64-NEXT:    i64.store 8
 ; WASM64-NEXT:    local.get 0
 ; WASM64-NEXT:    local.get 3
-; WASM64-NEXT:    i64.load 16
+; WASM64-NEXT:    i64.load 32
 ; WASM64-NEXT:    i64.store 0
 ; WASM64-NEXT:    local.get 3
-; WASM64-NEXT:    i64.const 32
+; WASM64-NEXT:    i64.const 48
 ; WASM64-NEXT:    i64.add
 ; WASM64-NEXT:    global.set __stack_pointer
 ; WASM64-NEXT:    # fallthrough-return
@@ -470,66 +470,66 @@ define { <2 x fp128>, <2 x fp128> } @test_sincos_v2f128(<2 x fp128> %a) #0 {
 ; WASM32-NEXT:    .local i32
 ; WASM32-NEXT:  # %bb.0:
 ; WASM32-NEXT:    global.get __stack_pointer
-; WASM32-NEXT:    i32.const 64
+; WASM32-NEXT:    i32.const 96
 ; WASM32-NEXT:    i32.sub
 ; WASM32-NEXT:    local.tee 5
 ; WASM32-NEXT:    global.set __stack_pointer
 ; WASM32-NEXT:    local.get 5
-; WASM32-NEXT:    i32.const 32
+; WASM32-NEXT:    i32.const 48
 ; WASM32-NEXT:    i32.add
 ; WASM32-NEXT:    local.get 3
 ; WASM32-NEXT:    local.get 4
-; WASM32-NEXT:    call cosl
+; WASM32-NEXT:    local.get 5
+; WASM32-NEXT:    i32.const 80
+; WASM32-NEXT:    i32.add
+; WASM32-NEXT:    local.get 5
+; WASM32-NEXT:    i32.const 64
+; WASM32-NEXT:    i32.add
+; WASM32-NEXT:    call sincosl
 ; WASM32-NEXT:    local.get 5
 ; WASM32-NEXT:    local.get 1
 ; WASM32-NEXT:    local.get 2
-; WASM32-NEXT:    call cosl
 ; WASM32-NEXT:    local.get 5
-; WASM32-NEXT:    i32.const 48
+; WASM32-NEXT:    i32.const 32
 ; WASM32-NEXT:    i32.add
-; WASM32-NEXT:    local.get 3
-; WASM32-NEXT:    local.get 4
-; WASM32-NEXT:    call sinl
 ; WASM32-NEXT:    local.get 5
 ; WASM32-NEXT:    i32.const 16
 ; WASM32-NEXT:    i32.add
-; WASM32-NEXT:    local.get 1
-; WASM32-NEXT:    local.get 2
-; WASM32-NEXT:    call sinl
+; WASM32-NEXT:    call sincosl
 ; WASM32-NEXT:    local.get 0
 ; WASM32-NEXT:    local.get 5
-; WASM32-NEXT:    i64.load 40
+; WASM32-NEXT:    i64.load 72
 ; WASM32-NEXT:    i64.store 56
 ; WASM32-NEXT:    local.get 0
 ; WASM32-NEXT:    local.get 5
-; WASM32-NEXT:    i64.load 32
+; WASM32-NEXT:    i64.load 64
 ; WASM32-NEXT:    i64.store 48
 ; WASM32-NEXT:    local.get 0
 ; WASM32-NEXT:    local.get 5
-; WASM32-NEXT:    i64.load 8
+; WASM32-NEXT:    i64.load 24
 ; WASM32-NEXT:    i64.store 40
 ; WASM32-NEXT:    local.get 0
 ; WASM32-NEXT:    local.get 5
-; WASM32-NEXT:    i64.load 0
+; WASM32-NEXT:    i64.load 16
 ; WASM32-NEXT:    i64.store 32
 ; WASM32-NEXT:    local.get 0
 ; WASM32-NEXT:    local.get 5
-; WASM32-NEXT:    i64.load 56
+; WASM32-NEXT:    i64.load 88
 ; WASM32-NEXT:    i64.store 24
 ; WASM32-NEXT:    local.get 0
 ; WASM32-NEXT:    local.get 5
-; WASM32-NEXT:    i64.load 48
+; WASM32-NEXT:    i64.load 80
 ; WASM32-NEXT:    i64.store 16
 ; WASM32-NEXT:    local.get 0
 ; WASM32-NEXT:    local.get 5
-; WASM32-NEXT:    i64.load 24
+; WASM32-NEXT:    i64.load 40
 ; WASM32-NEXT:    i64.store 8
 ; WASM32-NEXT:    local.get 0
 ; WASM32-NEXT:    local.get 5
-; WASM32-NEXT:    i64.load 16
+; WASM32-NEXT:    i64.load 32
 ; WASM32-NEXT:    i64.store 0
 ; WASM32-NEXT:    local.get 5
-; WASM32-NEXT:    i32.const 64
+; WASM32-NEXT:    i32.const 96
 ; WASM32-NEXT:    i32.add
 ; WASM32-NEXT:    global.set __stack_pointer
 ; WASM32-NEXT:    # fallthrough-return
@@ -539,66 +539,66 @@ define { <2 x fp128>, <2 x fp128> } @test_sincos_v2f128(<2 x fp128> %a) #0 {
 ; WASM64-NEXT:    .local i64
 ; WASM64-NEXT:  # %bb.0:
 ; WASM64-NEXT:    global.get __stack_pointer
-; WASM64-NEXT:    i64.const 64
+; WASM64-NEXT:    i64.const 96
 ; WASM64-NEXT:    i64.sub
 ; WASM64-NEXT:    local.tee 5
 ; WASM64-NEXT:    global.set __stack_pointer
 ; WASM64-NEXT:    local.get 5
-; WASM64-NEXT:    i64.const 32
+; WASM64-NEXT:    i64.const 48
 ; WASM64-NEXT:    i64.add
 ; WASM64-NEXT:    local.get 3
 ; WASM64-NEXT:    local.get 4
-; WASM64-NEXT:    call cosl
+; WASM64-NEXT:    local.get 5
+; WASM64-NEXT:    i64.const 80
+; WASM64-NEXT:    i64.add
+; WASM64-NEXT:    local.get 5
+; WASM64-NEXT:    i64.const 64
+; WASM64-NEXT:    i64.add
+; WASM64-NEXT:    call sincosl
 ; WASM64-NEXT:    local.get 5
 ; WASM64-NEXT:    local.get 1
 ; WASM64-NEXT:    local.get 2
-; WASM64-NEXT:    call cosl
 ; WASM64-NEXT:    local.get 5
-; WASM64-NEXT:    i64.const 48
+; WASM64-NEXT:    i64.const 32
 ; WASM64-NEXT:    i64.add
-; WASM64-NEXT:    local.get 3
-; WASM64-NEXT:    local.get 4
-; WASM64-NEXT:    call sinl
 ; WASM64-NEXT:    local.get 5
 ; WASM64-NEXT:    i64.const 16
 ; WASM64-NEXT:    i64.add
-; WASM64-NEXT:    local.get 1
-; WASM64-NEXT:    local.get 2
-; WASM64-NEXT:    call sinl
+; WASM64-NEXT:    call sincosl
 ; WASM64-NEXT:    local.get 0
 ; WASM64-NEXT:    local.get 5
-; WASM64-NEXT:    i64.load 40
+; WASM64-NEXT:    i64.load 72
 ; WASM64-NEXT:    i64.store 56
 ; WASM64-NEXT:    local.get 0
 ; WASM64-NEXT:    local.get 5
-; WASM64-NEXT:    i64.load 32
+; WASM64-NEXT:    i64.load 64
 ; WASM64-NEXT:    i64.store 48
 ; WASM64-NEXT:    local.get 0
 ; WASM64-NEXT:    local.get 5
-; WASM64-NEXT:    i64.load 8
+; WASM64-NEXT:    i64.load 24
 ; WASM64-NEXT:    i64.store 40
 ; WASM64-NEXT:    local.get 0
 ; WASM64-NEXT:    local.get 5
-; WASM64-NEXT:    i64.load 0
+; WASM64-NEXT:    i64.load 16
 ; WASM64-NEXT:    i64.store 32
 ; WASM64-NEXT:    local.get 0
 ; WASM64-NEXT:    local.get 5
-; WASM64-NEXT:    i64.load 56
+; WASM64-NEXT:    i64.load 88
 ; WASM64-NEXT:    i64.store 24
 ; WASM64-NEXT:    local.get 0
 ; WASM64-NEXT:    local.get 5
-; WASM64-NEXT:    i64.load 48
+; WASM64-NEXT:    i64.load 80
 ; WASM64-NEXT:    i64.store 16
 ; WASM64-NEXT:    local.get 0
 ; WASM64-NEXT:    local.get 5
-; WASM64-NEXT:    i64.load 24
+; WASM64-NEXT:    i64.load 40
 ; WASM64-NEXT:    i64.store 8
 ; WASM64-NEXT:    local.get 0
 ; WASM64-NEXT:    local.get 5
-; WASM64-NEXT:    i64.load 16
+; WASM64-NEXT:    i64.load 32
 ; WASM64-NEXT:    i64.store 0
 ; WASM64-NEXT:    local.get 5
-; WASM64-NEXT:    i64.const 64
+; WASM64-NEXT:    i64.const 96
 ; WASM64-NEXT:    i64.add
 ; WASM64-NEXT:    global.set __stack_pointer
 ; WASM64-NEXT:    # fallthrough-return