[libc-commits] [libc] [libc][stdfix] Add integer square root with fixed point output functions. (PR #83959)

Mon Mar 4 21:22:47 PST 2024

https://github.com/lntue created https://github.com/llvm/llvm-project/pull/83959

Fix https://github.com/llvm/llvm-project/issues/83924.

>From 8f74d732db924f828ea9c2f4968f4c90573e2f97 Mon Sep 17 00:00:00 2001
From: Tue Ly <lntue at google.com>
Date: Tue, 5 Mar 2024 00:18:47 -0500
Subject: [PATCH] [libc][stdfix] Add integer square root with fixed point
 output functions.

---
 libc/config/baremetal/arm/entrypoints.txt     |   2 +
 libc/config/baremetal/riscv/entrypoints.txt   |   2 +
 libc/config/linux/x86_64/entrypoints.txt      |   2 +
 libc/spec/stdc_ext.td                         |   3 +
 libc/src/__support/fixed_point/CMakeLists.txt |   1 +
 libc/src/__support/fixed_point/fx_rep.h       |  24 ++
 libc/src/__support/fixed_point/sqrt.h         | 207 ++++++++++++++----
 libc/src/stdfix/CMakeLists.txt                |  26 +++
 libc/src/stdfix/uhksqrtus.cpp                 |  23 ++
 libc/src/stdfix/uhksqrtus.h                   |  20 ++
 libc/src/stdfix/uksqrtui.cpp                  |  23 ++
 libc/src/stdfix/uksqrtui.h                    |  20 ++
 libc/test/src/stdfix/CMakeLists.txt           |  40 ++++
 libc/test/src/stdfix/ISqrtTest.h              |  66 ++++++
 libc/test/src/stdfix/uhksqrtus_test.cpp       |  20 ++
 libc/test/src/stdfix/uksqrtui_test.cpp        |  20 ++
 16 files changed, 460 insertions(+), 39 deletions(-)
 create mode 100644 libc/src/stdfix/uhksqrtus.cpp
 create mode 100644 libc/src/stdfix/uhksqrtus.h
 create mode 100644 libc/src/stdfix/uksqrtui.cpp
 create mode 100644 libc/src/stdfix/uksqrtui.h
 create mode 100644 libc/test/src/stdfix/ISqrtTest.h
 create mode 100644 libc/test/src/stdfix/uhksqrtus_test.cpp
 create mode 100644 libc/test/src/stdfix/uksqrtui_test.cpp

diff --git a/libc/config/baremetal/arm/entrypoints.txt b/libc/config/baremetal/arm/entrypoints.txt
index c9887b6e855a6d..99796ad5edf5d5 100644
--- a/libc/config/baremetal/arm/entrypoints.txt
+++ b/libc/config/baremetal/arm/entrypoints.txt
@@ -306,6 +306,8 @@ if(LIBC_COMPILER_HAS_FIXED_POINT)
     libc.src.stdfix.sqrtur
     # libc.src.stdfix.sqrtulk
     libc.src.stdfix.sqrtulr
+    libc.src.stdfix.uhksqrtus
+    libc.src.stdfix.uksqrtui
   )
 endif()
 
diff --git a/libc/config/baremetal/riscv/entrypoints.txt b/libc/config/baremetal/riscv/entrypoints.txt
index c9887b6e855a6d..99796ad5edf5d5 100644
--- a/libc/config/baremetal/riscv/entrypoints.txt
+++ b/libc/config/baremetal/riscv/entrypoints.txt
@@ -306,6 +306,8 @@ if(LIBC_COMPILER_HAS_FIXED_POINT)
     libc.src.stdfix.sqrtur
     # libc.src.stdfix.sqrtulk
     libc.src.stdfix.sqrtulr
+    libc.src.stdfix.uhksqrtus
+    libc.src.stdfix.uksqrtui
   )
 endif()
 
diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
index bc10512d942fa7..4ae2f7fc69ca20 100644
--- a/libc/config/linux/x86_64/entrypoints.txt
+++ b/libc/config/linux/x86_64/entrypoints.txt
@@ -490,6 +490,8 @@ if(LIBC_COMPILER_HAS_FIXED_POINT)
     libc.src.stdfix.sqrtur
     # libc.src.stdfix.sqrtulk
     libc.src.stdfix.sqrtulr
+    libc.src.stdfix.uhksqrtus
+    libc.src.stdfix.uksqrtui
   )
 endif()
 
diff --git a/libc/spec/stdc_ext.td b/libc/spec/stdc_ext.td
index be1e6d4ba2fcdc..4e433e222ce47c 100644
--- a/libc/spec/stdc_ext.td
+++ b/libc/spec/stdc_ext.td
@@ -55,6 +55,9 @@ def StdcExt : StandardSpec<"stdc_ext"> {
           GuardedFunctionSpec<"sqrtuhk", RetValSpec<UnsignedShortAccumType>, [ArgSpec<UnsignedShortAccumType>], "LIBC_COMPILER_HAS_FIXED_POINT">,
           GuardedFunctionSpec<"sqrtuk", RetValSpec<UnsignedAccumType>, [ArgSpec<UnsignedAccumType>], "LIBC_COMPILER_HAS_FIXED_POINT">,
           GuardedFunctionSpec<"sqrtulk", RetValSpec<UnsignedLongAccumType>, [ArgSpec<UnsignedLongAccumType>], "LIBC_COMPILER_HAS_FIXED_POINT">,
+
+          GuardedFunctionSpec<"uhksqrtus", RetValSpec<UnsignedShortAccumType>, [ArgSpec<UnsignedShortType>], "LIBC_COMPILER_HAS_FIXED_POINT">,
+          GuardedFunctionSpec<"uksqrtui", RetValSpec<UnsignedAccumType>, [ArgSpec<UnsignedIntType>], "LIBC_COMPILER_HAS_FIXED_POINT">,
       ]
   >;
 
diff --git a/libc/src/__support/fixed_point/CMakeLists.txt b/libc/src/__support/fixed_point/CMakeLists.txt
index 0ed118f2408849..3b744081765e4f 100644
--- a/libc/src/__support/fixed_point/CMakeLists.txt
+++ b/libc/src/__support/fixed_point/CMakeLists.txt
@@ -32,5 +32,6 @@ add_header_library(
     libc.src.__support.macros.attributes
     libc.src.__support.macros.optimization
     libc.src.__support.CPP.bit
+    libc.src.__support.CPP.limits
     libc.src.__support.CPP.type_traits
 )
diff --git a/libc/src/__support/fixed_point/fx_rep.h b/libc/src/__support/fixed_point/fx_rep.h
index f8593a93684cbc..042cd2b20714c6 100644
--- a/libc/src/__support/fixed_point/fx_rep.h
+++ b/libc/src/__support/fixed_point/fx_rep.h
@@ -48,6 +48,8 @@ template <> struct FXRep<short fract> {
   LIBC_INLINE static constexpr Type MAX() { return SFRACT_MIN; }
   LIBC_INLINE static constexpr Type ZERO() { return 0.0HR; }
   LIBC_INLINE static constexpr Type EPS() { return SFRACT_EPSILON; }
+  LIBC_INLINE static constexpr Type ONE_HALF() { return 0.5HR; }
+  LIBC_INLINE static constexpr Type ONE_FOURTH() { return 0.25HR; }
 
   using StorageType = typename internal::Storage<TOTAL_LEN>::Type;
   using CompType = cpp::make_signed_t<StorageType>;
@@ -66,6 +68,8 @@ template <> struct FXRep<unsigned short fract> {
   LIBC_INLINE static constexpr Type MAX() { return USFRACT_MIN; }
   LIBC_INLINE static constexpr Type ZERO() { return 0.0UHR; }
   LIBC_INLINE static constexpr Type EPS() { return USFRACT_EPSILON; }
+  LIBC_INLINE static constexpr Type ONE_HALF() { return 0.5UHR; }
+  LIBC_INLINE static constexpr Type ONE_FOURTH() { return 0.25UHR; }
 
   using StorageType = typename internal::Storage<TOTAL_LEN>::Type;
   using CompType = cpp::make_unsigned_t<StorageType>;
@@ -84,6 +88,8 @@ template <> struct FXRep<fract> {
   LIBC_INLINE static constexpr Type MAX() { return FRACT_MIN; }
   LIBC_INLINE static constexpr Type ZERO() { return 0.0R; }
   LIBC_INLINE static constexpr Type EPS() { return FRACT_EPSILON; }
+  LIBC_INLINE static constexpr Type ONE_HALF() { return 0.5R; }
+  LIBC_INLINE static constexpr Type ONE_FOURTH() { return 0.25R; }
 
   using StorageType = typename internal::Storage<TOTAL_LEN>::Type;
   using CompType = cpp::make_signed_t<StorageType>;
@@ -102,6 +108,8 @@ template <> struct FXRep<unsigned fract> {
   LIBC_INLINE static constexpr Type MAX() { return UFRACT_MIN; }
   LIBC_INLINE static constexpr Type ZERO() { return 0.0UR; }
   LIBC_INLINE static constexpr Type EPS() { return UFRACT_EPSILON; }
+  LIBC_INLINE static constexpr Type ONE_HALF() { return 0.5UR; }
+  LIBC_INLINE static constexpr Type ONE_FOURTH() { return 0.25UR; }
 
   using StorageType = typename internal::Storage<TOTAL_LEN>::Type;
   using CompType = cpp::make_unsigned_t<StorageType>;
@@ -120,6 +128,8 @@ template <> struct FXRep<long fract> {
   LIBC_INLINE static constexpr Type MAX() { return LFRACT_MIN; }
   LIBC_INLINE static constexpr Type ZERO() { return 0.0LR; }
   LIBC_INLINE static constexpr Type EPS() { return LFRACT_EPSILON; }
+  LIBC_INLINE static constexpr Type ONE_HALF() { return 0.5LR; }
+  LIBC_INLINE static constexpr Type ONE_FOURTH() { return 0.25LR; }
 
   using StorageType = typename internal::Storage<TOTAL_LEN>::Type;
   using CompType = cpp::make_signed_t<StorageType>;
@@ -138,6 +148,8 @@ template <> struct FXRep<unsigned long fract> {
   LIBC_INLINE static constexpr Type MAX() { return ULFRACT_MIN; }
   LIBC_INLINE static constexpr Type ZERO() { return 0.0ULR; }
   LIBC_INLINE static constexpr Type EPS() { return ULFRACT_EPSILON; }
+  LIBC_INLINE static constexpr Type ONE_HALF() { return 0.5ULR; }
+  LIBC_INLINE static constexpr Type ONE_FOURTH() { return 0.25ULR; }
 
   using StorageType = typename internal::Storage<TOTAL_LEN>::Type;
   using CompType = cpp::make_unsigned_t<StorageType>;
@@ -156,6 +168,8 @@ template <> struct FXRep<short accum> {
   LIBC_INLINE static constexpr Type MAX() { return SACCUM_MIN; }
   LIBC_INLINE static constexpr Type ZERO() { return 0.0HK; }
   LIBC_INLINE static constexpr Type EPS() { return SACCUM_EPSILON; }
+  LIBC_INLINE static constexpr Type ONE_HALF() { return 0.5HK; }
+  LIBC_INLINE static constexpr Type ONE_FOURTH() { return 0.25HK; }
 
   using StorageType = typename internal::Storage<TOTAL_LEN>::Type;
   using CompType = cpp::make_signed_t<StorageType>;
@@ -174,6 +188,8 @@ template <> struct FXRep<unsigned short accum> {
   LIBC_INLINE static constexpr Type MAX() { return USACCUM_MIN; }
   LIBC_INLINE static constexpr Type ZERO() { return 0.0UHK; }
   LIBC_INLINE static constexpr Type EPS() { return USACCUM_EPSILON; }
+  LIBC_INLINE static constexpr Type ONE_HALF() { return 0.5UHK; }
+  LIBC_INLINE static constexpr Type ONE_FOURTH() { return 0.25UHK; }
 
   using StorageType = typename internal::Storage<TOTAL_LEN>::Type;
   using CompType = cpp::make_unsigned_t<StorageType>;
@@ -192,6 +208,8 @@ template <> struct FXRep<accum> {
   LIBC_INLINE static constexpr Type MAX() { return ACCUM_MIN; }
   LIBC_INLINE static constexpr Type ZERO() { return 0.0K; }
   LIBC_INLINE static constexpr Type EPS() { return ACCUM_EPSILON; }
+  LIBC_INLINE static constexpr Type ONE_HALF() { return 0.5K; }
+  LIBC_INLINE static constexpr Type ONE_FOURTH() { return 0.25K; }
 
   using StorageType = typename internal::Storage<TOTAL_LEN>::Type;
   using CompType = cpp::make_signed_t<StorageType>;
@@ -210,6 +228,8 @@ template <> struct FXRep<unsigned accum> {
   LIBC_INLINE static constexpr Type MAX() { return UACCUM_MIN; }
   LIBC_INLINE static constexpr Type ZERO() { return 0.0UK; }
   LIBC_INLINE static constexpr Type EPS() { return UACCUM_EPSILON; }
+  LIBC_INLINE static constexpr Type ONE_HALF() { return 0.5UK; }
+  LIBC_INLINE static constexpr Type ONE_FOURTH() { return 0.25UK; }
 
   using StorageType = typename internal::Storage<TOTAL_LEN>::Type;
   using CompType = cpp::make_unsigned_t<StorageType>;
@@ -228,6 +248,8 @@ template <> struct FXRep<long accum> {
   LIBC_INLINE static constexpr Type MAX() { return LACCUM_MIN; }
   LIBC_INLINE static constexpr Type ZERO() { return 0.0LK; }
   LIBC_INLINE static constexpr Type EPS() { return LACCUM_EPSILON; }
+  LIBC_INLINE static constexpr Type ONE_HALF() { return 0.5LK; }
+  LIBC_INLINE static constexpr Type ONE_FOURTH() { return 0.25LK; }
 
   using StorageType = typename internal::Storage<TOTAL_LEN>::Type;
   using CompType = cpp::make_signed_t<StorageType>;
@@ -246,6 +268,8 @@ template <> struct FXRep<unsigned long accum> {
   LIBC_INLINE static constexpr Type MAX() { return ULACCUM_MIN; }
   LIBC_INLINE static constexpr Type ZERO() { return 0.0ULK; }
   LIBC_INLINE static constexpr Type EPS() { return ULACCUM_EPSILON; }
+  LIBC_INLINE static constexpr Type ONE_HALF() { return 0.5ULK; }
+  LIBC_INLINE static constexpr Type ONE_FOURTH() { return 0.25ULK; }
 
   using StorageType = typename internal::Storage<TOTAL_LEN>::Type;
   using CompType = cpp::make_unsigned_t<StorageType>;
diff --git a/libc/src/__support/fixed_point/sqrt.h b/libc/src/__support/fixed_point/sqrt.h
index d8df294b18a1a8..4ec016ceab0028 100644
--- a/libc/src/__support/fixed_point/sqrt.h
+++ b/libc/src/__support/fixed_point/sqrt.h
@@ -11,6 +11,7 @@
 
 #include "include/llvm-libc-macros/stdfix-macros.h"
 #include "src/__support/CPP/bit.h"
+#include "src/__support/CPP/limits.h" // CHAR_BIT
 #include "src/__support/CPP/type_traits.h"
 #include "src/__support/macros/attributes.h"   // LIBC_INLINE
 #include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
@@ -28,16 +29,73 @@ template <typename T> struct SqrtConfig;
 template <> struct SqrtConfig<unsigned short fract> {
   using Type = unsigned short fract;
   static constexpr int EXTRA_STEPS = 0;
+
+  // Linear approximation for the initial values, with errors bounded by:
+  //   max(1.5 * 2^-11, eps)
+  // Generated with Sollya:
+  // > for i from 4 to 15 do {
+  //     P = fpminimax(sqrt(x), 1, [|8, 8|], [i * 2^-4, (i + 1)*2^-4],
+  //                   fixed, absolute);
+  //     print("{", coeff(P, 1), "uhr,", coeff(P, 0), "uhr},");
+  //   };
+  static constexpr Type FIRST_APPROX[12][2] = {
+      {0x1.e8p-1uhr, 0x1.0cp-2uhr}, {0x1.bap-1uhr, 0x1.28p-2uhr},
+      {0x1.94p-1uhr, 0x1.44p-2uhr}, {0x1.74p-1uhr, 0x1.6p-2uhr},
+      {0x1.6p-1uhr, 0x1.74p-2uhr},  {0x1.4ep-1uhr, 0x1.88p-2uhr},
+      {0x1.3ep-1uhr, 0x1.9cp-2uhr}, {0x1.32p-1uhr, 0x1.acp-2uhr},
+      {0x1.22p-1uhr, 0x1.c4p-2uhr}, {0x1.18p-1uhr, 0x1.d4p-2uhr},
+      {0x1.08p-1uhr, 0x1.fp-2uhr},  {0x1.04p-1uhr, 0x1.f8p-2uhr},
+  };
 };
 
 template <> struct SqrtConfig<unsigned fract> {
   using Type = unsigned fract;
   static constexpr int EXTRA_STEPS = 1;
+
+  // Linear approximation for the initial values, with errors bounded by:
+  //   max(1.5 * 2^-11, eps)
+  // Generated with Sollya:
+  // > for i from 4 to 15 do {
+  //     P = fpminimax(sqrt(x), 1, [|16, 16|], [i * 2^-4, (i + 1)*2^-4],
+  //                   fixed, absolute);
+  //     print("{", coeff(P, 1), "ur,", coeff(P, 0), "ur},");
+  //   };
+  static constexpr Type FIRST_APPROX[12][2] = {
+      {0x1.e378p-1ur, 0x1.0ebp-2ur},  {0x1.b512p-1ur, 0x1.2b94p-2ur},
+      {0x1.91fp-1ur, 0x1.45dcp-2ur},  {0x1.7622p-1ur, 0x1.5e24p-2ur},
+      {0x1.5f5ap-1ur, 0x1.74e4p-2ur}, {0x1.4c58p-1ur, 0x1.8a4p-2ur},
+      {0x1.3c1ep-1ur, 0x1.9e84p-2ur}, {0x1.2e0cp-1ur, 0x1.b1d8p-2ur},
+      {0x1.21aap-1ur, 0x1.c468p-2ur}, {0x1.16bap-1ur, 0x1.d62cp-2ur},
+      {0x1.0cfp-1ur, 0x1.e74cp-2ur},  {0x1.0418p-1ur, 0x1.f7ep-2ur},
+  };
 };
 
 template <> struct SqrtConfig<unsigned long fract> {
   using Type = unsigned long fract;
   static constexpr int EXTRA_STEPS = 2;
+
+  // Linear approximation for the initial values, with errors bounded by:
+  //   max(1.5 * 2^-11, eps)
+  // Generated with Sollya:
+  // > for i from 4 to 15 do {
+  //     P = fpminimax(sqrt(x), 1, [|32, 32|], [i * 2^-4, (i + 1)*2^-4],
+  //                   fixed, absolute);
+  //     print("{", coeff(P, 1), "ulr,", coeff(P, 0), "ulr},");
+  //   };
+  static constexpr Type FIRST_APPROX[12][2] = {
+      {0x1.e3779b98p-1ulr, 0x1.0eaff788p-2ulr},
+      {0x1.b5167872p-1ulr, 0x1.2b908ad4p-2ulr},
+      {0x1.91f195cap-1ulr, 0x1.45da800cp-2ulr},
+      {0x1.761ebcb4p-1ulr, 0x1.5e27004cp-2ulr},
+      {0x1.5f619986p-1ulr, 0x1.74db933cp-2ulr},
+      {0x1.4c583adep-1ulr, 0x1.8a3fbfccp-2ulr},
+      {0x1.3c1a591cp-1ulr, 0x1.9e88373cp-2ulr},
+      {0x1.2e08545ap-1ulr, 0x1.b1dd2534p-2ulr},
+      {0x1.21b05c0ap-1ulr, 0x1.c45e023p-2ulr},
+      {0x1.16becd02p-1ulr, 0x1.d624031p-2ulr},
+      {0x1.0cf49fep-1ulr, 0x1.e743b844p-2ulr},
+      {0x1.04214e9cp-1ulr, 0x1.f7ce2c3cp-2ulr},
+  };
 };
 
 template <>
@@ -46,46 +104,38 @@ struct SqrtConfig<unsigned short accum> : SqrtConfig<unsigned fract> {};
 template <>
 struct SqrtConfig<unsigned accum> : SqrtConfig<unsigned long fract> {};
 
-// TODO: unsigned long accum type is 64-bit, and will need 64-bit fract type.
-// Probably we will use DyadicFloat<64> for intermediate computations instead.
-
-// Linear approximation for the initial values, with errors bounded by:
-//   max(1.5 * 2^-11, eps)
-// Generated with Sollya:
-// > for i from 4 to 15 do {
-//     P = fpminimax(sqrt(x), 1, [|8, 8|], [i * 2^-4, (i + 1)*2^-4],
-//                   fixed, absolute);
-//     print("{", coeff(P, 1), "uhr,", coeff(P, 0), "uhr},");
-//   };
-static constexpr unsigned short fract SQRT_FIRST_APPROX[12][2] = {
-    {0x1.e8p-1uhr, 0x1.0cp-2uhr}, {0x1.bap-1uhr, 0x1.28p-2uhr},
-    {0x1.94p-1uhr, 0x1.44p-2uhr}, {0x1.74p-1uhr, 0x1.6p-2uhr},
-    {0x1.6p-1uhr, 0x1.74p-2uhr},  {0x1.4ep-1uhr, 0x1.88p-2uhr},
-    {0x1.3ep-1uhr, 0x1.9cp-2uhr}, {0x1.32p-1uhr, 0x1.acp-2uhr},
-    {0x1.22p-1uhr, 0x1.c4p-2uhr}, {0x1.18p-1uhr, 0x1.d4p-2uhr},
-    {0x1.08p-1uhr, 0x1.fp-2uhr},  {0x1.04p-1uhr, 0x1.f8p-2uhr},
+// Integer square root
+template <> struct SqrtConfig<unsigned short> {
+  using OutType = unsigned short accum;
+  using FracType = unsigned fract;
+  // For fast-but-less-accurate version
+  using FastFracType = unsigned short fract;
+  using HalfType = unsigned char;
 };
 
-} // namespace internal
+template <> struct SqrtConfig<unsigned int> {
+  using OutType = unsigned accum;
+  using FracType = unsigned long fract;
+  // For fast-but-less-accurate version
+  using FastFracType = unsigned fract;
+  using HalfType = unsigned short;
+};
 
-template <typename T>
-LIBC_INLINE constexpr cpp::enable_if_t<cpp::is_fixed_point_v<T>, T> sqrt(T x) {
-  using BitType = typename FXRep<T>::StorageType;
-  BitType x_bit = cpp::bit_cast<BitType>(x);
+// TODO: unsigned long accum type is 64-bit, and will need 64-bit fract type.
+// Probably we will use DyadicFloat<64> for intermediate computations instead.
 
-  if (LIBC_UNLIKELY(x_bit == 0))
-    return FXRep<T>::ZERO();
+} // namespace internal
 
-  int leading_zeros = cpp::countl_zero(x_bit);
-  constexpr int STORAGE_LENGTH = sizeof(BitType) * CHAR_BIT;
-  constexpr int EXP_ADJUSTMENT = STORAGE_LENGTH - FXRep<T>::FRACTION_LEN - 1;
-  // x_exp is the real exponent of the leading bit of x.
-  int x_exp = EXP_ADJUSTMENT - leading_zeros;
-  int shift = EXP_ADJUSTMENT - 1 - (x_exp & (~1));
-  // Normalize.
-  x_bit <<= shift;
-  using FracType = typename internal::SqrtConfig<T>::Type;
-  FracType x_frac = cpp::bit_cast<FracType>(x_bit);
+// Core computation for sqrt with normalized inputs (0.25 <= x < 1).
+template <typename Config>
+LIBC_INLINE constexpr typename Config::Type
+sqrt_core(typename Config::Type x_frac) {
+  using FracType = typename Config::Type;
+  using FXRep = FXRep<FracType>;
+  using StorageType = typename FXRep::StorageType;
+  // Exact case:
+  if (x_frac == FXRep::ONE_FOURTH())
+    return FXRep::ONE_HALF();
 
   // Use use Newton method to approximate sqrt(a):
   //   x_{n + 1} = 1/2 (x_n + a / x_n)
@@ -96,9 +146,10 @@ LIBC_INLINE constexpr cpp::enable_if_t<cpp::is_fixed_point_v<T>, T> sqrt(T x) {
   // are between 0b0100 and 0b1111.  Hence the lookup table only needs 12
   // entries, and we can get the index by subtracting the leading 4 bits of
   // x_frac by 4 = 0b0100.
-  int index = (x_bit >> (STORAGE_LENGTH - 4)) - 4;
-  FracType a = static_cast<FracType>(internal::SQRT_FIRST_APPROX[index][0]);
-  FracType b = static_cast<FracType>(internal::SQRT_FIRST_APPROX[index][1]);
+  StorageType x_bit = cpp::bit_cast<StorageType>(x_frac);
+  int index = (static_cast<int>(x_bit >> (FXRep::TOTAL_LEN - 4))) - 4;
+  FracType a = Config::FIRST_APPROX[index][0];
+  FracType b = Config::FIRST_APPROX[index][1];
 
   // Initial approximation step.
   // Estimated error bounds: | r - sqrt(x_frac) | < max(1.5 * 2^-11, eps).
@@ -112,9 +163,34 @@ LIBC_INLINE constexpr cpp::enable_if_t<cpp::is_fixed_point_v<T>, T> sqrt(T x) {
   //   Blanchard, J. D. and Chamberland, M., "Newton's Method Without Division",
   //   The American Mathematical Monthly (2023).
   //   https://chamberland.math.grinnell.edu/papers/newton.pdf
-  for (int i = 0; i < internal::SqrtConfig<T>::EXTRA_STEPS; ++i)
+  for (int i = 0; i < Config::EXTRA_STEPS; ++i)
     r = (r >> 1) + (x_frac >> 1) / r;
 
+  return r;
+}
+
+template <typename T>
+LIBC_INLINE constexpr cpp::enable_if_t<cpp::is_fixed_point_v<T>, T> sqrt(T x) {
+  using BitType = typename FXRep<T>::StorageType;
+  BitType x_bit = cpp::bit_cast<BitType>(x);
+
+  if (LIBC_UNLIKELY(x_bit == 0))
+    return FXRep<T>::ZERO();
+
+  int leading_zeros = cpp::countl_zero(x_bit);
+  constexpr int STORAGE_LENGTH = sizeof(BitType) * CHAR_BIT;
+  constexpr int EXP_ADJUSTMENT = STORAGE_LENGTH - FXRep<T>::FRACTION_LEN - 1;
+  // x_exp is the real exponent of the leading bit of x.
+  int x_exp = EXP_ADJUSTMENT - leading_zeros;
+  int shift = EXP_ADJUSTMENT - 1 - (x_exp & (~1));
+  // Normalize.
+  x_bit <<= shift;
+  using FracType = typename internal::SqrtConfig<T>::Type;
+  FracType x_frac = cpp::bit_cast<FracType>(x_bit);
+
+  // Compute sqrt(x_frac) using Newton-method.
+  FracType r = sqrt_core<internal::SqrtConfig<T>>(x_frac);
+
   // Re-scaling
   r >>= EXP_ADJUSTMENT - (x_exp >> 1);
 
@@ -122,6 +198,59 @@ LIBC_INLINE constexpr cpp::enable_if_t<cpp::is_fixed_point_v<T>, T> sqrt(T x) {
   return cpp::bit_cast<T>(r);
 }
 
+// Integer square root - Accurate version:
+// Absolute errors < 2^(-fraction length).
+template <typename T>
+LIBC_INLINE constexpr typename internal::SqrtConfig<T>::OutType isqrt(T x) {
+  using OutType = typename internal::SqrtConfig<T>::OutType;
+  using FracType = typename internal::SqrtConfig<T>::FracType;
+
+  if (x == 0)
+    return FXRep<OutType>::ZERO();
+
+  // Normalize the leading bits to the first two bits.
+  // Shift and then Bit cast x to x_frac gives us:
+  //   x = 2^(FRACTION_LEN + 1 - shift) * x_frac;
+  int leading_zeros = cpp::countl_zero(x);
+  int shift = ((leading_zeros >> 1) << 1);
+  x <<= shift;
+  // Convert to frac type and compute square root.
+  FracType x_frac = cpp::bit_cast<FracType>(x);
+  FracType r = sqrt_core<internal::SqrtConfig<FracType>>(x_frac);
+  // To rescale back to the OutType (Accum)
+  r >>= (shift >> 1);
+
+  return cpp::bit_cast<OutType>(r);
+}
+
+// Integer square root - Fast but less accurate version:
+// Relative errors < 2^(-fraction length).
+template <typename T>
+LIBC_INLINE constexpr typename internal::SqrtConfig<T>::OutType
+isqrt_fast(T x) {
+  using OutType = typename internal::SqrtConfig<T>::OutType;
+  using FracType = typename internal::SqrtConfig<T>::FastFracType;
+  using StorageType = typename FXRep<FracType>::StorageType;
+
+  if (x == 0)
+    return FXRep<OutType>::ZERO();
+
+  // Normalize the leading bits to the first two bits.
+  // Shift and then Bit cast x to x_frac gives us:
+  //   x = 2^(FRACTION_LEN + 1 - shift) * x_frac;
+  int leading_zeros = cpp::countl_zero(x);
+  int shift = (leading_zeros & (~1));
+  x <<= shift;
+  // Convert to frac type and compute square root.
+  FracType x_frac = cpp::bit_cast<FracType>(
+      static_cast<StorageType>(x >> FXRep<FracType>::FRACTION_LEN));
+  OutType r =
+      static_cast<OutType>(sqrt_core<internal::SqrtConfig<FracType>>(x_frac));
+  // To rescale back to the OutType (Accum)
+  r <<= (FXRep<OutType>::INTEGRAL_LEN - (shift >> 1));
+  return cpp::bit_cast<OutType>(r);
+}
+
 } // namespace LIBC_NAMESPACE::fixed_point
 
 #endif // LIBC_COMPILER_HAS_FIXED_POINT
diff --git a/libc/src/stdfix/CMakeLists.txt b/libc/src/stdfix/CMakeLists.txt
index cb2134fe33cf9c..a5c37183fbbb7c 100644
--- a/libc/src/stdfix/CMakeLists.txt
+++ b/libc/src/stdfix/CMakeLists.txt
@@ -46,3 +46,29 @@ foreach(suffix IN ITEMS hr r lr hk k lk uhr ur ulr uhk uk ulk)
       libc.src.__support.fixed_point.fx_bits
   )
 endforeach()
+
+add_entrypoint_object(
+  uhksqrtus
+  HDRS
+    uhksqrtus.h
+  SRCS
+    uhksqrtus.cpp
+  COMPILE_OPTIONS
+    -O3
+    -ffixed-point
+  DEPENDS
+    libc.src.__support.fixed_point.sqrt
+)
+
+add_entrypoint_object(
+  uksqrtui
+  HDRS
+    uksqrtui.h
+  SRCS
+    uksqrtui.cpp
+  COMPILE_OPTIONS
+    -O3
+    -ffixed-point
+  DEPENDS
+    libc.src.__support.fixed_point.sqrt
+)
diff --git a/libc/src/stdfix/uhksqrtus.cpp b/libc/src/stdfix/uhksqrtus.cpp
new file mode 100644
index 00000000000000..335750ae902b28
--- /dev/null
+++ b/libc/src/stdfix/uhksqrtus.cpp
@@ -0,0 +1,23 @@
+//===-- Implementation of uhksqrtus function ------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "uhksqrtus.h"
+#include "src/__support/common.h"
+#include "src/__support/fixed_point/sqrt.h"
+
+namespace LIBC_NAMESPACE {
+
+LLVM_LIBC_FUNCTION(unsigned short accum, uhksqrtus, (unsigned short x)) {
+#ifdef LIBC_FAST_MATH
+  return fixed_point::isqrt_fast(x);
+#else
+  return fixed_point::isqrt(x);
+#endif
+}
+
+} // namespace LIBC_NAMESPACE
diff --git a/libc/src/stdfix/uhksqrtus.h b/libc/src/stdfix/uhksqrtus.h
new file mode 100644
index 00000000000000..c24846a8003037
--- /dev/null
+++ b/libc/src/stdfix/uhksqrtus.h
@@ -0,0 +1,20 @@
+//===-- Implementation header for uhksqrtus ---------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_STDFIX_UHKSQRTUS_H
+#define LLVM_LIBC_SRC_STDFIX_UHKSQRTUS_H
+
+#include "include/llvm-libc-macros/stdfix-macros.h"
+
+namespace LIBC_NAMESPACE {
+
+unsigned short accum uhksqrtus(unsigned short x);
+
+} // namespace LIBC_NAMESPACE
+
+#endif // LLVM_LIBC_SRC_STDFIX_UHKSQRTUS_H
diff --git a/libc/src/stdfix/uksqrtui.cpp b/libc/src/stdfix/uksqrtui.cpp
new file mode 100644
index 00000000000000..ee1ae1335027a2
--- /dev/null
+++ b/libc/src/stdfix/uksqrtui.cpp
@@ -0,0 +1,23 @@
+//===-- Implementation of uksqrtui function -------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "uksqrtui.h"
+#include "src/__support/common.h"
+#include "src/__support/fixed_point/sqrt.h"
+
+namespace LIBC_NAMESPACE {
+
+LLVM_LIBC_FUNCTION(unsigned accum, uksqrtui, (unsigned int x)) {
+#ifdef LIBC_FAST_MATH
+  return fixed_point::isqrt_fast(x);
+#else
+  return fixed_point::isqrt(x);
+#endif
+}
+
+} // namespace LIBC_NAMESPACE
diff --git a/libc/src/stdfix/uksqrtui.h b/libc/src/stdfix/uksqrtui.h
new file mode 100644
index 00000000000000..cd4ff41ea100a7
--- /dev/null
+++ b/libc/src/stdfix/uksqrtui.h
@@ -0,0 +1,20 @@
+//===-- Implementation header for uksqrtui ----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_STDFIX_UKSQRTUI_H
+#define LLVM_LIBC_SRC_STDFIX_UKSQRTUI_H
+
+#include "include/llvm-libc-macros/stdfix-macros.h"
+
+namespace LIBC_NAMESPACE {
+
+unsigned accum uksqrtui(unsigned int x);
+
+} // namespace LIBC_NAMESPACE
+
+#endif // LLVM_LIBC_SRC_STDFIX_UKSQRTUI_H
diff --git a/libc/test/src/stdfix/CMakeLists.txt b/libc/test/src/stdfix/CMakeLists.txt
index 4140b5b29f3b3c..49e904a62771c1 100644
--- a/libc/test/src/stdfix/CMakeLists.txt
+++ b/libc/test/src/stdfix/CMakeLists.txt
@@ -61,3 +61,43 @@ foreach(suffix IN ITEMS hr r lr hk k lk uhr ur ulr uhk uk ulk)
       libc.src.__support.fixed_point.fx_bits
   )
 endforeach()
+
+add_libc_test(
+  uhksqrtus_test
+  SUITE
+    libc-stdfix-tests
+  HDRS
+    ISqrtTest.h
+  SRCS
+    uhksqrtus_test.cpp
+  COMPILE_OPTIONS
+    -O3
+    -ffixed-point
+  DEPENDS
+    libc.src.stdfix.uhksqrtus
+    libc.src.__support.CPP.bit
+    libc.src.__support.fixed_point.fx_rep
+    libc.src.__support.fixed_point.sqrt
+    libc.src.__support.FPUtil.basic_operations
+    libc.src.__support.FPUtil.sqrt
+)
+
+add_libc_test(
+  uksqrtui_test
+  SUITE
+    libc-stdfix-tests
+  HDRS
+    ISqrtTest.h
+  SRCS
+    uksqrtui_test.cpp
+  COMPILE_OPTIONS
+    -O3
+    -ffixed-point
+  DEPENDS
+    libc.src.stdfix.uksqrtui
+    libc.src.__support.CPP.bit
+    libc.src.__support.fixed_point.fx_rep
+    libc.src.__support.fixed_point.sqrt
+    libc.src.__support.FPUtil.basic_operations
+    libc.src.__support.FPUtil.sqrt
+)
diff --git a/libc/test/src/stdfix/ISqrtTest.h b/libc/test/src/stdfix/ISqrtTest.h
new file mode 100644
index 00000000000000..bed57ba1ec6fdc
--- /dev/null
+++ b/libc/test/src/stdfix/ISqrtTest.h
@@ -0,0 +1,66 @@
+//===-- Utility class to test integer sqrt ----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "test/UnitTest/FPMatcher.h"
+#include "test/UnitTest/Test.h"
+
+#include "src/__support/CPP/bit.h"
+#include "src/__support/FPUtil/BasicOperations.h"
+#include "src/__support/FPUtil/sqrt.h"
+#include "src/__support/fixed_point/fx_rep.h"
+#include "src/__support/fixed_point/sqrt.h"
+
+template <typename T> class ISqrtTest : public LIBC_NAMESPACE::testing::Test {
+
+  using OutType =
+      typename LIBC_NAMESPACE::fixed_point::internal::SqrtConfig<T>::OutType;
+  using FXRep = LIBC_NAMESPACE::fixed_point::FXRep<OutType>;
+  static constexpr OutType zero = FXRep::ZERO();
+  static constexpr OutType one = static_cast<OutType>(1);
+  static constexpr OutType eps = FXRep::EPS();
+
+public:
+  typedef OutType (*SqrtFunc)(T);
+
+  void testSpecialNumbers(SqrtFunc func) {
+    EXPECT_EQ(zero, func(T(0)));
+
+    EXPECT_EQ(one, func(T(1)));
+    EXPECT_EQ(static_cast<OutType>(2.0), func(T(4)));
+    EXPECT_EQ(static_cast<OutType>(3.0), func(T(9)));
+    EXPECT_EQ(static_cast<OutType>(16.0), func(T(256)));
+
+    constexpr int COUNT = 255;
+    constexpr double ERR = 3.0 * static_cast<double>(eps);
+    double x_d = 0.0;
+    T x = 0;
+    for (int i = 0; i < COUNT; ++i) {
+      x_d += 1.0;
+      ++x;
+      double y_d = static_cast<double>(func(x));
+      double result = LIBC_NAMESPACE::fputil::sqrt(x_d);
+      double errors = LIBC_NAMESPACE::fputil::abs((y_d / result) - 1.0);
+      if (errors > ERR) {
+        // Print out the failure input and output.
+        EXPECT_EQ(x, T(0));
+        EXPECT_EQ(func(x), zero);
+        EXPECT_FP_EQ(result, 0.0);
+        EXPECT_FP_EQ(errors, 0.0);
+        EXPECT_FP_EQ(ERR, 0.0);
+      }
+      ASSERT_TRUE(errors <= ERR);
+    }
+  }
+};
+
+#define LIST_ISQRT_TESTS(Name, T, func)                                        \
+  using LlvmLibcISqrt##Name##Test = ISqrtTest<T>;                              \
+  TEST_F(LlvmLibcISqrt##Name##Test, SpecialNumbers) {                          \
+    testSpecialNumbers(&func);                                                 \
+  }                                                                            \
+  static_assert(true, "Require semicolon.")
diff --git a/libc/test/src/stdfix/uhksqrtus_test.cpp b/libc/test/src/stdfix/uhksqrtus_test.cpp
new file mode 100644
index 00000000000000..a33297413980de
--- /dev/null
+++ b/libc/test/src/stdfix/uhksqrtus_test.cpp
@@ -0,0 +1,20 @@
+//===-- Unittests for uhksqrtus -------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "ISqrtTest.h"
+
+#include "src/__support/fixed_point/sqrt.h"
+#include "src/stdfix/uhksqrtus.h"
+
+unsigned short accum uhksqrtus_fast(unsigned short x) {
+  return LIBC_NAMESPACE::fixed_point::isqrt_fast(x);
+}
+
+LIST_ISQRT_TESTS(US, unsigned short, LIBC_NAMESPACE::uhksqrtus);
+
+LIST_ISQRT_TESTS(USFast, unsigned short, uhksqrtus_fast);
diff --git a/libc/test/src/stdfix/uksqrtui_test.cpp b/libc/test/src/stdfix/uksqrtui_test.cpp
new file mode 100644
index 00000000000000..0f4c057099daaf
--- /dev/null
+++ b/libc/test/src/stdfix/uksqrtui_test.cpp
@@ -0,0 +1,20 @@
+//===-- Unittests for uksqrtui --------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "ISqrtTest.h"
+
+#include "src/__support/fixed_point/sqrt.h"
+#include "src/stdfix/uksqrtui.h"
+
+unsigned accum uksqrtui_fast(unsigned int x) {
+  return LIBC_NAMESPACE::fixed_point::isqrt_fast(x);
+}
+
+LIST_ISQRT_TESTS(UI, unsigned int, LIBC_NAMESPACE::uksqrtui);
+
+LIST_ISQRT_TESTS(UIFast, unsigned int, uksqrtui_fast);