[libc-commits] [libc] Float128 arith operator (PR #187425)

Wed Mar 18 19:01:22 PDT 2026

llvmbot wrote:




@llvm/pr-subscribers-libc

Author: None (Emmaliu2006git)

<details>
<summary>Changes</summary>

This patch adds basic arithmetic operators for Float128.

Changes

- Implement +, -, *, / for Float128
- Add constructor taking a float128 value from the unified float128 interface

Tests

- Add tests for arithmetic behavior
- Cover zero and signed zero
- Cover special values (inf, NaN)

Notes

The operators take Float128 operands, perform the computation based on their stored values, and return the result as float128.
This PR is based on #187421 

---
Full diff: https://github.com/llvm/llvm-project/pull/187425.diff


5 Files Affected:

- (modified) libc/include/llvm-libc-types/float128.h (+4) 
- (modified) libc/src/__support/FPUtil/cast.h (+5-1) 
- (added) libc/src/__support/FPUtil/float128.h (+63) 
- (modified) libc/test/src/__support/FPUtil/CMakeLists.txt (+14) 
- (added) libc/test/src/__support/FPUtil/float128_test.cpp (+185) 


``````````diff

diff --git a/libc/include/llvm-libc-types/float128.h b/libc/include/llvm-libc-types/float128.h
index 82ebb79f1f580..a7ae7d80f9d92 100644
--- a/libc/include/llvm-libc-types/float128.h
+++ b/libc/include/llvm-libc-types/float128.h
@@ -31,6 +31,10 @@ typedef __float128 float128;
 #elif (LDBL_MANT_DIG == 113)
 #define LIBC_TYPES_HAS_FLOAT128
 typedef long double float128;
+#else
+#define LIBC_TYPES_HAS_FLOAT128
+#include "src/__support/FPUtil/float128.h"
+typedef LIBC_NAMESPACE::fputil::Float128 float128;
 #endif
 
 #endif // LLVM_LIBC_TYPES_FLOAT128_H
diff --git a/libc/src/__support/FPUtil/cast.h b/libc/src/__support/FPUtil/cast.h
index 54c80e862523a..3b02d998e84fd 100644
--- a/libc/src/__support/FPUtil/cast.h
+++ b/libc/src/__support/FPUtil/cast.h
@@ -35,7 +35,11 @@ cast(InType x) {
 #if defined(LIBC_TYPES_HAS_FLOAT16) && !defined(__LIBC_USE_FLOAT16_CONVERSION)
                   || cpp::is_same_v<OutType, float16> ||
                   cpp::is_same_v<InType, float16>
-#endif
+#endif   
+#if defined(LIBC_TYPES_HAS_FLOAT128)
+                  || cpp::is_same_v<OutType, float128> ||
+                  cpp::is_same_v<InType, float128>
+#endif     
     ) {
       using InFPBits = FPBits<InType>;
       using InStorageType = typename InFPBits::StorageType;
diff --git a/libc/src/__support/FPUtil/float128.h b/libc/src/__support/FPUtil/float128.h
new file mode 100644
index 0000000000000..01d258caf1b6e
--- /dev/null
+++ b/libc/src/__support/FPUtil/float128.h
@@ -0,0 +1,63 @@
+//===-- Float128 software wrapper ----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a minimal software-backed Float128 wrapper type used when
+// the host compiler does not provide a native 128-bit floating-point type.
+// The wrapper currently only stores the raw 128-bit representation.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_SUPPORT_FPUTIL_FLOAT128_H
+#define LLVM_LIBC_SRC_SUPPORT_FPUTIL_FLOAT128_H
+
+#include "src/__support/uint128.h"
+#include "src/__support/FPUtil/generic/add_sub.h"
+#include "src/__support/FPUtil/generic/div.h"
+#include "src/__support/FPUtil/generic/mul.h"
+#include "src/__support/FPUtil/cast.h"
+
+namespace LIBC_NAMESPACE_DECL {
+namespace fputil {
+
+struct Float128 {
+  UInt128 bits = 0;
+
+  constexpr Float128() = default;
+  constexpr explicit Float128(UInt128 value) : bits(value) {}
+  constexpr explicit Float128(float128 v) : bits(cpp::bit_cast<UInt128>(v)) {} //add constructor
+  constexpr UInt128 get_bits() const { return bits;}
+
+  //basic arithmetic operators
+  constexpr LIBC_INLINE float128 operator+(const Float128 &other) const {
+    float128 a = cpp::bit_cast<float128>(bits);
+    float128 b = cpp::bit_cast<float128>(other.bits);
+    return a + b;
+  }
+
+  constexpr LIBC_INLINE float128 operator-(const Float128 &other) const {
+    float128 a = cpp::bit_cast<float128>(bits);
+    float128 b = cpp::bit_cast<float128>(other.bits);
+    return a - b;
+  }
+
+  constexpr LIBC_INLINE float128 operator*(const Float128 &other) const {
+    float128 a = cpp::bit_cast<float128>(bits);
+    float128 b = cpp::bit_cast<float128>(other.bits);
+    return a * b;
+  }
+
+  constexpr LIBC_INLINE float128 operator/(const Float128 &other) const {
+    float128 a = cpp::bit_cast<float128>(bits);
+    float128 b = cpp::bit_cast<float128>(other.bits);
+    return a / b;
+  }
+}; 
+} // namespace fputil
+}// namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC_SUPPORT_FPUTIL_FLOAT128_H
diff --git a/libc/test/src/__support/FPUtil/CMakeLists.txt b/libc/test/src/__support/FPUtil/CMakeLists.txt
index 81db4ccae44c6..820102fa84238 100644
--- a/libc/test/src/__support/FPUtil/CMakeLists.txt
+++ b/libc/test/src/__support/FPUtil/CMakeLists.txt
@@ -39,6 +39,19 @@ add_fp_unittest(
     libc.src.__support.FPUtil.rounding_mode
 )
 
+add_libc_test(
+  float128_test
+  NEED_MPFR
+  SUITE
+    libc-fputil-tests
+  SRCS
+    float128_test.cpp
+  DEPENDS
+    libc.src.__support.FPUtil.cast
+    libc.src.__support.FPUtil.fp_bits
+    libc.src.__support.macros.properties.types
+)
+
 # TODO: Temporally disable bfloat16 test until MPCommon target is updated
 # https://github.com/llvm/llvm-project/pull/149678
 if(LLVM_LIBC_FULL_BUILD)
@@ -67,3 +80,4 @@ add_fp_unittest(
     libc.src.__support.FPUtil.comparison_operations
     libc.src.__support.macros.properties.types
 )
+
diff --git a/libc/test/src/__support/FPUtil/float128_test.cpp b/libc/test/src/__support/FPUtil/float128_test.cpp
new file mode 100644
index 0000000000000..614663588d54b
--- /dev/null
+++ b/libc/test/src/__support/FPUtil/float128_test.cpp
@@ -0,0 +1,185 @@
+//===----------------------------------------------------------------------===//
+// Float128 Tests
+//
+// These tests validate the basic integration of the float128 type with:
+//   - type traits (is_floating_point)
+//   - FPBits functionality
+//   - Float128 casting
+// The goal is to ensure that both the type alias (float128) and the fallback
+// implementation behave consistently with other floating-point types.
+//===----------------------------------------------------------------------===//
+#include "src/__support/FPUtil/cast.h"
+#include "src/__support/macros/properties/types.h"
+#include "test/UnitTest/Test.h"
+#include "src/__support/FPUtil/bfloat16.h"
+#include "src/__support/FPUtil/float128.h"
+
+using LIBC_NAMESPACE::fputil::cast;
+using LIBC_NAMESPACE::fputil::Float128;
+
+// Test: float128 is recognized as a floating-point type.
+TEST(LlvmLibcTypeTraitsTest, Float128IsFloatingPoint) {
+  using LIBC_NAMESPACE::cpp::is_floating_point_v;
+
+  EXPECT_TRUE(is_floating_point_v<float128>);
+}
+
+// Test: Basic FPBits usage with float128 default initialization.
+// Verifies zero initialization and basic classification APIs.
+TEST(LlvmLibcFPBitsTest, Float128BasicUsage) {
+  using LIBC_NAMESPACE::fputil::FPBits;
+
+  float128 x{}; // Default-initialized to zero
+  FPBits<float128> bits(x);
+
+  EXPECT_TRUE(bits.is_zero());
+  EXPECT_TRUE(bits.is_finite());
+  EXPECT_FALSE(bits.is_nan());
+  EXPECT_FALSE(bits.is_inf());
+}
+
+// Test: Construct FPBits<float128> directly from raw bits.
+// Verifies that zero bit pattern is interpreted correctly.
+TEST(LlvmLibcFPBitsTest, Float128FromBits) {
+  using LIBC_NAMESPACE::fputil::FPBits;
+  UInt128 raw = 0;
+  FPBits<float128> bits(raw);
+
+  EXPECT_TRUE(bits.is_zero());
+}
+
+// Test: Special values (infinity and NaN) for float128.
+// Ensures FPBits builders work correctly for float128.
+TEST(LlvmLibcFPBitsTest, Float128SpecialValues) {
+  using LIBC_NAMESPACE::fputil::FPBits;
+
+  auto inf = FPBits<float128>::inf();
+  EXPECT_TRUE(inf.is_inf());
+
+  auto nan = FPBits<float128>::quiet_nan();
+  EXPECT_TRUE(nan.is_nan());
+}
+
+//Test float to float128 casting
+TEST(LlvmLibcCastTest, FloatToFloat128ToFloat) {
+  float x = 1.25f;
+  float128 q = cast<float128>(x);
+  float y = cast<float>(q);
+  EXPECT_TRUE(x == y);
+}
+
+//Test double -> float128 -> double casting
+TEST(LlvmLibcCastTest, DoubleToFloat128ToDouble) {
+  double x = 1.5;
+  float128 q = cast<float128>(x);
+  double y = cast<double>(q);
+  EXPECT_TRUE(x == y);
+}
+
+//Test bfloat16 -> float128 casting
+TEST(LlvmLibcCastTest, bfloat16ToFloat128Tobfloat16) {
+  bfloat16 x = cast<bfloat16>(0.1);
+  float128 q = cast<float128>(x);
+  bfloat16 y = cast<bfloat16>(q);
+  EXPECT_TRUE(x == y);
+}
+
+TEST(LlvmLibcCastTest, RoundingBehavior) {
+  double x = 0.1;
+  float128 q = cast<float128>(x);
+  double y = cast<double>(q);
+  EXPECT_TRUE(x == y);
+}
+
+TEST(LlvmLibcCastTest, ZeroAndNegativeZero) {
+  using LIBC_NAMESPACE::fputil::cast;
+  double pos_zero = 0.0;
+  double neg_zero = -0.0;
+
+  float128 q1 = cast<float128>(pos_zero);
+  float128 q2 = cast<float128>(neg_zero);
+
+  double y1 = cast<double>(q1);
+  double y2 = cast<double>(q2);
+
+  EXPECT_TRUE(y1 == 0.0);
+  EXPECT_TRUE(y2 == 0.0);
+  EXPECT_TRUE(__builtin_signbit(y2) != 0);
+}
+
+TEST(LlvmLibcCastTest, SpecialValues) {
+  using LIBC_NAMESPACE::fputil::cast;
+
+  double inf = __builtin_inf();
+  double nan = __builtin_nan("");
+
+  float128 q_inf = cast<float128>(inf);
+  float128 q_nan = cast<float128>(nan);
+
+  double y_inf = cast<double>(q_inf);
+  double y_nan = cast<double>(q_nan);
+
+  EXPECT_TRUE(__builtin_isinf(y_inf) != 0);
+  EXPECT_TRUE(__builtin_isnan(y_nan) != 0);
+}
+
+//test operators
+TEST(LlvmLibcFloat128Test, BasicArithmetic) {
+  float128 a = cast<float128>(1.5);
+  float128 b = cast<float128>(2.0);
+
+  Float128 x(a);
+  Float128 y(b);
+
+  EXPECT_TRUE((x + y) == cast<float128>(3.5));
+  EXPECT_TRUE((x - y) == cast<float128>(-0.5));
+  EXPECT_TRUE((x * y) == cast<float128>(3.0));
+  EXPECT_TRUE((x / y) == cast<float128>(0.75));
+}
+
+TEST(LlvmLibcFloat128Test, ZeroBehavior) {
+  float128 pos_zero = cast<float128>(0.0);
+  float128 neg_zero = cast<float128>(-0.0);
+
+  Float128 x(pos_zero);
+  Float128 y(neg_zero);
+
+  float128 r1 = x + y;
+  float128 r2 = x - y;
+
+  EXPECT_TRUE(r1 == 0.0);
+  EXPECT_TRUE(r2 == 0.0);
+  EXPECT_TRUE(__builtin_signbit(r2) == 0);
+}
+
+TEST(LlvmLibcFloat128Test, SpecialValues) {
+  float128 inf = cast<float128>(__builtin_inf());
+  float128 nan = cast<float128>(__builtin_nan(""));
+
+  Float128 x(inf);
+  Float128 y(nan);
+
+  EXPECT_TRUE(__builtin_isinf(x + Float128(cast<float128>(1.0))) != 0);
+  EXPECT_TRUE(__builtin_isnan(y + Float128(cast<float128>(1.0))) != 0);
+}
+
+TEST(LlvmLibcFloat128Test, PrecisionSanity) {
+  float128 a = cast<float128>(0.1);
+  float128 b = cast<float128>(0.2);
+
+  Float128 x(a);
+  Float128 y(b);
+
+  float128 r = x + y;
+
+  EXPECT_TRUE(r == a + b);
+}
+
+TEST(LlvmLibcFloat128Test, RoundTripConsistency) {
+  float128 a = cast<float128>(1.25);
+
+  Float128 x(a);
+  float128 r = x + Float128(cast<float128>(0.0));
+
+  EXPECT_TRUE(r == a);
+}
\ No newline at end of file

``````````

</details>


https://github.com/llvm/llvm-project/pull/187425