[libc] [llvm] [libc] Use correct instruction for arm32 sqrt inline asm. (PR #134968)

via llvm-commits llvm-commits at lists.llvm.org
Tue Apr 8 20:47:03 PDT 2025


https://github.com/lntue created https://github.com/llvm/llvm-project/pull/134968

https://godbolt.org/z/3jT7jdrs9

>From 58917e120218d1e473b72600349e94566c6af4a1 Mon Sep 17 00:00:00 2001
From: Tue Ly <lntue at google.com>
Date: Tue, 8 Apr 2025 23:44:56 -0400
Subject: [PATCH] [libc] Use correct instruction for arm32 sqrt inline asm.

---
 libc/src/__support/FPUtil/aarch64/sqrt.h      | 43 +++++++++++++++++++
 libc/src/__support/FPUtil/arm/sqrt.h          |  6 +--
 libc/src/__support/FPUtil/sqrt.h              |  4 +-
 .../llvm-project-overlay/libc/BUILD.bazel     |  2 +-
 4 files changed, 50 insertions(+), 5 deletions(-)
 create mode 100644 libc/src/__support/FPUtil/aarch64/sqrt.h

diff --git a/libc/src/__support/FPUtil/aarch64/sqrt.h b/libc/src/__support/FPUtil/aarch64/sqrt.h
new file mode 100644
index 0000000000000..2bda8e895b375
--- /dev/null
+++ b/libc/src/__support/FPUtil/aarch64/sqrt.h
@@ -0,0 +1,43 @@
+//===-- Square root of IEEE 754 floating point numbers ----------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC___SUPPORT_FPUTIL_AARCH64_SQRT_H
+#define LLVM_LIBC_SRC___SUPPORT_FPUTIL_AARCH64_SQRT_H
+
+#include "src/__support/common.h"
+#include "src/__support/macros/config.h"
+#include "src/__support/macros/properties/architectures.h"
+#include "src/__support/macros/properties/cpu_features.h"
+
+#if !defined(LIBC_TARGET_ARCH_IS_AARCH64)
+#error "Invalid include"
+#endif
+
+namespace LIBC_NAMESPACE_DECL {
+namespace fputil {
+
+#ifdef LIBC_TARGET_CPU_HAS_FPU_FLOAT
+template <> LIBC_INLINE float sqrt<float>(float x) {
+  float y;
+  asm("fsqrt %s0, %s1\n\t" : "=w"(y) : "w"(x));
+  return y;
+}
+#endif // LIBC_TARGET_CPU_HAS_FPU_FLOAT
+
+#ifdef LIBC_TARGET_CPU_HAS_FPU_DOUBLE
+template <> LIBC_INLINE double sqrt<double>(double x) {
+  double y;
+  asm("fsqrt %d0, %d1\n\t" : "=w"(y) : "w"(x));
+  return y;
+}
+#endif // LIBC_TARGET_CPU_HAS_FPU_DOUBLE
+
+} // namespace fputil
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC___SUPPORT_FPUTIL_AARCH64_SQRT_H
diff --git a/libc/src/__support/FPUtil/arm/sqrt.h b/libc/src/__support/FPUtil/arm/sqrt.h
index 39ac5395f869e..497dbc504b79f 100644
--- a/libc/src/__support/FPUtil/arm/sqrt.h
+++ b/libc/src/__support/FPUtil/arm/sqrt.h
@@ -14,7 +14,7 @@
 #include "src/__support/macros/properties/architectures.h"
 #include "src/__support/macros/properties/cpu_features.h"
 
-#if !defined(LIBC_TARGET_ARCH_IS_ANY_ARM)
+#if !defined(LIBC_TARGET_ARCH_IS_ARM)
 #error "Invalid include"
 #endif
 
@@ -24,7 +24,7 @@ namespace fputil {
 #ifdef LIBC_TARGET_CPU_HAS_FPU_FLOAT
 template <> LIBC_INLINE float sqrt<float>(float x) {
   float y;
-  asm("fsqrt %s0, %s1\n\t" : "=w"(y) : "w"(x));
+  asm("vsqrt %0, %1\n\t" : "=w"(y) : "w"(x));
   return y;
 }
 #endif // LIBC_TARGET_CPU_HAS_FPU_FLOAT
@@ -32,7 +32,7 @@ template <> LIBC_INLINE float sqrt<float>(float x) {
 #ifdef LIBC_TARGET_CPU_HAS_FPU_DOUBLE
 template <> LIBC_INLINE double sqrt<double>(double x) {
   double y;
-  asm("fsqrt %d0, %d1\n\t" : "=w"(y) : "w"(x));
+  asm("vsqrt %0, %1\n\t" : "=w"(y) : "w"(x));
   return y;
 }
 #endif // LIBC_TARGET_CPU_HAS_FPU_DOUBLE
diff --git a/libc/src/__support/FPUtil/sqrt.h b/libc/src/__support/FPUtil/sqrt.h
index 9b151c4c5e1b3..89da44ff2970f 100644
--- a/libc/src/__support/FPUtil/sqrt.h
+++ b/libc/src/__support/FPUtil/sqrt.h
@@ -42,7 +42,9 @@ template <> LIBC_INLINE double sqrt<double>(double x) {
 // Use inline assembly when __builtin_elementwise_sqrt is not available.
 #if defined(LIBC_TARGET_CPU_HAS_SSE2)
 #include "x86_64/sqrt.h"
-#elif defined(LIBC_TARGET_ARCH_IS_ANY_ARM)
+#elif defined(LIBC_TARGET_ARCH_IS_AARCH64)
+#include "aarch64/sqrt.h"
+#elif defined(LIBC_TARGET_ARCH_IS_ARM)
 #include "arm/sqrt.h"
 #elif defined(LIBC_TARGET_ARCH_IS_ANY_RISCV)
 #include "riscv/sqrt.h"
diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
index dcb4d53f9dad0..4412f2beaffcd 100644
--- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
@@ -1179,7 +1179,7 @@ sqrt_hdrs = selects.with_or({
         "src/__support/FPUtil/x86_64/sqrt.h",
     ],
     PLATFORM_CPU_ARM64: sqrt_common_hdrs + [
-        "src/__support/FPUtil/arm/sqrt.h",
+        "src/__support/FPUtil/aarch64/sqrt.h",
     ],
 })
 



More information about the llvm-commits mailing list