[libc-commits] [libc] [libc][NFC] Tighten up guard conditions for sqrt and polyeval (PR #93791)

Thu May 30 05:10:25 PDT 2024

https://github.com/gchatelet updated https://github.com/llvm/llvm-project/pull/93791

>From 47015ae8c504f3b7bcc35dd1597e1a298ef89579 Mon Sep 17 00:00:00 2001
From: Guillaume Chatelet <gchatelet at google.com>
Date: Thu, 30 May 2024 09:03:25 +0000
Subject: [PATCH 1/2] [libc][NFC] Tighten up guard conditions for sqrt and
 polyeval

---
 libc/src/__support/FPUtil/sqrt.h            | 3 ++-
 libc/src/__support/FPUtil/x86_64/PolyEval.h | 6 ++++--
 libc/src/__support/FPUtil/x86_64/sqrt.h     | 5 +++--
 3 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/libc/src/__support/FPUtil/sqrt.h b/libc/src/__support/FPUtil/sqrt.h
index 3ba1bdf687a3e..eb86ddfa89d8e 100644
--- a/libc/src/__support/FPUtil/sqrt.h
+++ b/libc/src/__support/FPUtil/sqrt.h
@@ -10,8 +10,9 @@
 #define LLVM_LIBC_SRC___SUPPORT_FPUTIL_SQRT_H
 
 #include "src/__support/macros/properties/architectures.h"
+#include "src/__support/macros/properties/cpu_features.h"
 
-#if defined(LIBC_TARGET_ARCH_IS_X86_64)
+#if defined(LIBC_TARGET_ARCH_IS_X86_64) && defined(LIBC_TARGET_CPU_HAS_SSE2)
 #include "x86_64/sqrt.h"
 #elif defined(LIBC_TARGET_ARCH_IS_AARCH64)
 #include "aarch64/sqrt.h"
diff --git a/libc/src/__support/FPUtil/x86_64/PolyEval.h b/libc/src/__support/FPUtil/x86_64/PolyEval.h
index 69fd776320799..713fa029021e2 100644
--- a/libc/src/__support/FPUtil/x86_64/PolyEval.h
+++ b/libc/src/__support/FPUtil/x86_64/PolyEval.h
@@ -11,9 +11,11 @@
 
 #include "src/__support/common.h"
 #include "src/__support/macros/properties/architectures.h"
+#include "src/__support/macros/properties/cpu_features.h"
 
-#if !defined(LIBC_TARGET_ARCH_IS_X86_64)
-#error "Invalid include"
+#if !(defined(LIBC_TARGET_ARCH_IS_X86_64) &&                                   \
+      defined(LIBC_TARGET_CPU_HAS_SSE2) && defined(LIBC_TARGET_CPU_HAS_FMA))
+#error "Missing FMA and SS2 support"
 #endif
 
 #include <immintrin.h>
diff --git a/libc/src/__support/FPUtil/x86_64/sqrt.h b/libc/src/__support/FPUtil/x86_64/sqrt.h
index 93ba8c0b33fdc..bfcc5e98834d6 100644
--- a/libc/src/__support/FPUtil/x86_64/sqrt.h
+++ b/libc/src/__support/FPUtil/x86_64/sqrt.h
@@ -11,9 +11,10 @@
 
 #include "src/__support/common.h"
 #include "src/__support/macros/properties/architectures.h"
+#include "src/__support/macros/properties/cpu_features.h"
 
-#if !defined(LIBC_TARGET_ARCH_IS_X86)
-#error "Invalid include"
+#if !(defined(LIBC_TARGET_ARCH_IS_X86_64) && defined(LIBC_TARGET_CPU_HAS_SSE2))
+#error "sqrtss / sqrtsd need SSE2"
 #endif
 
 #include "src/__support/FPUtil/generic/sqrt.h"

>From e135cb90c25876813d199a79d5eea2e9de1dac06 Mon Sep 17 00:00:00 2001
From: Guillaume Chatelet <gchatelet at google.com>
Date: Thu, 30 May 2024 12:09:32 +0000
Subject: [PATCH 2/2] Remove unused file

---
 libc/src/__support/FPUtil/x86_64/PolyEval.h | 87 ---------------------
 1 file changed, 87 deletions(-)
 delete mode 100644 libc/src/__support/FPUtil/x86_64/PolyEval.h

diff --git a/libc/src/__support/FPUtil/x86_64/PolyEval.h b/libc/src/__support/FPUtil/x86_64/PolyEval.h
deleted file mode 100644
index 713fa029021e2..0000000000000
--- a/libc/src/__support/FPUtil/x86_64/PolyEval.h
+++ /dev/null
@@ -1,87 +0,0 @@
-//===-- Optimized PolyEval implementations for x86_64 --------- C++ -----*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIBC_SRC___SUPPORT_FPUTIL_X86_64_POLYEVAL_H
-#define LLVM_LIBC_SRC___SUPPORT_FPUTIL_X86_64_POLYEVAL_H
-
-#include "src/__support/common.h"
-#include "src/__support/macros/properties/architectures.h"
-#include "src/__support/macros/properties/cpu_features.h"
-
-#if !(defined(LIBC_TARGET_ARCH_IS_X86_64) &&                                   \
-      defined(LIBC_TARGET_CPU_HAS_SSE2) && defined(LIBC_TARGET_CPU_HAS_FMA))
-#error "Missing FMA and SS2 support"
-#endif
-
-#include <immintrin.h>
-
-namespace LIBC_NAMESPACE {
-namespace fputil {
-
-// Cubic polynomials:
-//   polyeval(x, a0, a1, a2, a3) = a3*x^3 + a2*x^2 + a1*x + a0
-template <>
-LIBC_INLINE float polyeval(float x, float a0, float a1, float a2, float a3) {
-  __m128 xmm = _mm_set1_ps(x);                 // NOLINT
-  __m128 a13 = _mm_set_ps(0.0f, x, a3, a1);    // NOLINT
-  __m128 a02 = _mm_set_ps(0.0f, 0.0f, a2, a0); // NOLINT
-  // r = (0, x^2, a3*x + a2, a1*x + a0)
-  __m128 r = _mm_fmadd_ps(a13, xmm, a02); // NOLINT
-  // result = (a3*x + a2) * x^2 + (a1*x + a0)
-  return fma(r[2], r[1], r[0]);
-}
-
-template <>
-LIBC_INLINE double polyeval(double x, double a0, double a1, double a2,
-                            double a3) {
-  __m256d xmm = _mm256_set1_pd(x);               // NOLINT
-  __m256d a13 = _mm256_set_pd(0.0, x, a3, a1);   // NOLINT
-  __m256d a02 = _mm256_set_pd(0.0, 0.0, a2, a0); // NOLINT
-  // r = (0, x^2, a3*x + a2, a1*x + a0)
-  __m256d r = _mm256_fmadd_pd(a13, xmm, a02); // NOLINT
-  // result = (a3*x + a2) * x^2 + (a1*x + a0)
-  return fma(r[2], r[1], r[0]);
-}
-
-// Quintic polynomials:
-//   polyeval(x, a0, a1, a2, a3, a4, a5) = a5*x^5 + a4*x^4 + a3*x^3 + a2*x^2 +
-//                                         + a1*x + a0
-template <>
-LIBC_INLINE float polyeval(float x, float a0, float a1, float a2, float a3,
-                           float a4, float a5) {
-  __m128 xmm = _mm_set1_ps(x);                 // NOLINT
-  __m128 a25 = _mm_set_ps(0.0f, x, a5, a2);    // NOLINT
-  __m128 a14 = _mm_set_ps(0.0f, 0.0f, a4, a1); // NOLINT
-  __m128 a03 = _mm_set_ps(0.0f, 0.0f, a3, a0); // NOLINT
-  // r1 = (0, x^2, a5*x + a4, a2*x + a1)
-  __m128 r1 = _mm_fmadd_ps(a25, xmm, a14); // NOLINT
-  // r2 = (0, x^3, (a5*x + a4)*x + a3, (a2*x + a1)*x + a0
-  __m128 r2 = _mm_fmadd_ps(r1, xmm, a03); // NOLINT
-  // result = ((a5*x + a4)*x + a3) * x^3 + ((a2*x + a1)*x + a0)
-  return fma(r2[2], r2[1], r2[0]);
-}
-
-template <>
-LIBC_INLINE double polyeval(double x, double a0, double a1, double a2,
-                            double a3, double a4, double a5) {
-  __m256d xmm = _mm256_set1_pd(x);               // NOLINT
-  __m256d a25 = _mm256_set_pd(0.0, x, a5, a2);   // NOLINT
-  __m256d a14 = _mm256_set_pd(0.0, 0.0, a4, a1); // NOLINT
-  __m256d a03 = _mm256_set_pd(0.0, 0.0, a3, a0); // NOLINT
-  // r1 = (0, x^2, a5*x + a4, a2*x + a1)
-  __m256d r1 = _mm256_fmadd_pd(a25, xmm, a14); // NOLINT
-  // r2 = (0, x^3, (a5*x + a4)*x + a3, (a2*x + a1)*x + a0
-  __m256d r2 = _mm256_fmadd_pd(r1, xmm, a03); // NOLINT
-  // result = ((a5*x + a4)*x + a3) * x^3 + ((a2*x + a1)*x + a0)
-  return fma(r2[2], r2[1], r2[0]);
-}
-
-} // namespace fputil
-} // namespace LIBC_NAMESPACE
-
-#endif // LLVM_LIBC_SRC___SUPPORT_FPUTIL_X86_64_POLYEVAL_H