[libc-commits] [libc] [libc][math] Improve fmul performance by using double-double arithmetic. (PR #107517)

Wed Sep 11 15:29:31 PDT 2024

https://github.com/Jobhdez updated https://github.com/llvm/llvm-project/pull/107517

>From 6bca4f7d3addd4f03b44f13c57e1ce9a582b9c2b Mon Sep 17 00:00:00 2001
From: Job Hernandez <hj93 at protonmail.com>
Date: Thu, 5 Sep 2024 21:26:49 -0700
Subject: [PATCH 01/12] add draft, one more test to go

---
 libc/src/math/generic/CMakeLists.txt |  4 +++-
 libc/src/math/generic/fmul.cpp       | 18 ++++++++++++++++--
 2 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt
index 350072f4b9649d..5a1ee3b8b83c77 100644
--- a/libc/src/math/generic/CMakeLists.txt
+++ b/libc/src/math/generic/CMakeLists.txt
@@ -2958,7 +2958,9 @@ add_entrypoint_object(
   HDRS
     ../fmul.h
   DEPENDS
-    libc.src.__support.FPUtil.generic.mul
+    libc.hdr.errno_macros
+    libc.hdr.fenv_macros
+    libc.src.__support.FPUtil.double_double
   COMPILE_OPTIONS
     -O3
 )
diff --git a/libc/src/math/generic/fmul.cpp b/libc/src/math/generic/fmul.cpp
index 64c27d6e2f9564..fe8f5b0bb7c45b 100644
--- a/libc/src/math/generic/fmul.cpp
+++ b/libc/src/math/generic/fmul.cpp
@@ -5,16 +5,30 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
-
+#include "hdr/errno_macros.h"
+#include "hdr/fenv_macros.h"
 #include "src/math/fmul.h"
 #include "src/__support/FPUtil/generic/mul.h"
 #include "src/__support/common.h"
 #include "src/__support/macros/config.h"
+#include "src/__support/FPUtil/double_double.h"
 
 namespace LIBC_NAMESPACE_DECL {
-
+  /*
 LLVM_LIBC_FUNCTION(float, fmul, (double x, double y)) {
   return fputil::generic::mul<float>(x, y);
 }
+  */
+LLVM_LIBC_FUNCTION(float, fmul, (double x, double y)) {
+  fputil::DoubleDouble prod = fputil::exact_mult(x, y);
+  if (LIBC_UNLIKELY(fputil::FPBits<double>(prod.hi).is_inf_or_nan() || fputil::FPBits<double>(prod.hi).is_zero()))
+    return static_cast<float>(prod.hi);
+  if (LIBC_UNLIKELY(fputil::FPBits<double>(prod.hi).is_inf() || fputil::FPBits<double>(prod.hi).is_zero())) {
+    fputil::set_errno_if_required(EDOM);
+    fputil::raise_except_if_required(FE_INVALID);
+    return fputil::FPBits<double>::quiet_nan().get_val();
+  }
+  return static_cast<float>(prod.hi + prod.lo);
+}
 
 } // namespace LIBC_NAMESPACE_DECL

>From db2fe1ef877461b168dcede8b4d3c44f95116139 Mon Sep 17 00:00:00 2001
From: Job Hernandez Lara <hj93 at protonmail.com>
Date: Fri, 6 Sep 2024 16:33:54 -0700
Subject: [PATCH 02/12] add darwin entrypoints

---
 libc/config/darwin/arm/entrypoints.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/libc/config/darwin/arm/entrypoints.txt b/libc/config/darwin/arm/entrypoints.txt
index a012504daa5c54..e764eab64d2418 100644
--- a/libc/config/darwin/arm/entrypoints.txt
+++ b/libc/config/darwin/arm/entrypoints.txt
@@ -171,6 +171,7 @@ set(TARGET_LIBM_ENTRYPOINTS
     libc.src.math.fminl
     libc.src.math.fmod
     libc.src.math.fmodf
+    libc.src.math.fmul
     libc.src.math.frexp
     libc.src.math.frexpf
     libc.src.math.frexpl

>From c4edba3f76f15e704b95a5c3e188f16efeb35808 Mon Sep 17 00:00:00 2001
From: Job Hernandez Lara <hj93 at protonmail.com>
Date: Fri, 6 Sep 2024 16:35:42 -0700
Subject: [PATCH 03/12] format code

---
 libc/src/math/generic/fmul.cpp | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/libc/src/math/generic/fmul.cpp b/libc/src/math/generic/fmul.cpp
index fe8f5b0bb7c45b..c2536645b593bd 100644
--- a/libc/src/math/generic/fmul.cpp
+++ b/libc/src/math/generic/fmul.cpp
@@ -5,25 +5,27 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
+#include "src/math/fmul.h"
 #include "hdr/errno_macros.h"
 #include "hdr/fenv_macros.h"
-#include "src/math/fmul.h"
+#include "src/__support/FPUtil/double_double.h"
 #include "src/__support/FPUtil/generic/mul.h"
 #include "src/__support/common.h"
 #include "src/__support/macros/config.h"
-#include "src/__support/FPUtil/double_double.h"
 
 namespace LIBC_NAMESPACE_DECL {
-  /*
+/*
 LLVM_LIBC_FUNCTION(float, fmul, (double x, double y)) {
-  return fputil::generic::mul<float>(x, y);
+return fputil::generic::mul<float>(x, y);
 }
-  */
+*/
 LLVM_LIBC_FUNCTION(float, fmul, (double x, double y)) {
   fputil::DoubleDouble prod = fputil::exact_mult(x, y);
-  if (LIBC_UNLIKELY(fputil::FPBits<double>(prod.hi).is_inf_or_nan() || fputil::FPBits<double>(prod.hi).is_zero()))
+  if (LIBC_UNLIKELY(fputil::FPBits<double>(prod.hi).is_inf_or_nan() ||
+                    fputil::FPBits<double>(prod.hi).is_zero()))
     return static_cast<float>(prod.hi);
-  if (LIBC_UNLIKELY(fputil::FPBits<double>(prod.hi).is_inf() || fputil::FPBits<double>(prod.hi).is_zero())) {
+  if (LIBC_UNLIKELY(fputil::FPBits<double>(prod.hi).is_inf() ||
+                    fputil::FPBits<double>(prod.hi).is_zero())) {
     fputil::set_errno_if_required(EDOM);
     fputil::raise_except_if_required(FE_INVALID);
     return fputil::FPBits<double>::quiet_nan().get_val();

>From 9533e593f54d247bb068e3172dfdfab23280faf1 Mon Sep 17 00:00:00 2001
From: Job Hernandez Lara <hj93 at protonmail.com>
Date: Sat, 7 Sep 2024 20:14:50 -0700
Subject: [PATCH 04/12] add new case

---
 libc/config/darwin/arm/entrypoints.txt |  2 ++
 libc/src/math/generic/fmul.cpp         | 17 +++++++++++++++--
 libc/test/src/math/smoke/fmul_test.cpp | 17 +++++++++++++++++
 3 files changed, 34 insertions(+), 2 deletions(-)

diff --git a/libc/config/darwin/arm/entrypoints.txt b/libc/config/darwin/arm/entrypoints.txt
index e764eab64d2418..2d5dbeff485747 100644
--- a/libc/config/darwin/arm/entrypoints.txt
+++ b/libc/config/darwin/arm/entrypoints.txt
@@ -81,6 +81,8 @@ set(TARGET_LIBC_ENTRYPOINTS
     libc.src.stdlib.llabs
     libc.src.stdlib.lldiv
     libc.src.stdlib.qsort
+    libc.src.stdlib.rand
+    libc.src.stdlib.srand
     libc.src.stdlib.strtod
     libc.src.stdlib.strtof
     libc.src.stdlib.strtol
diff --git a/libc/src/math/generic/fmul.cpp b/libc/src/math/generic/fmul.cpp
index c2536645b593bd..28d96448cbb2df 100644
--- a/libc/src/math/generic/fmul.cpp
+++ b/libc/src/math/generic/fmul.cpp
@@ -21,8 +21,9 @@ return fputil::generic::mul<float>(x, y);
 */
 LLVM_LIBC_FUNCTION(float, fmul, (double x, double y)) {
   fputil::DoubleDouble prod = fputil::exact_mult(x, y);
-  if (LIBC_UNLIKELY(fputil::FPBits<double>(prod.hi).is_inf_or_nan() ||
-                    fputil::FPBits<double>(prod.hi).is_zero()))
+  fputil::FPBits<double> hi_bits(prod.hi), lo_bits(prod.lo);
+  
+  if (LIBC_UNLIKELY(hi_bits.is_inf_or_nan() || hi_bits.is_zero()))
     return static_cast<float>(prod.hi);
   if (LIBC_UNLIKELY(fputil::FPBits<double>(prod.hi).is_inf() ||
                     fputil::FPBits<double>(prod.hi).is_zero())) {
@@ -30,7 +31,19 @@ LLVM_LIBC_FUNCTION(float, fmul, (double x, double y)) {
     fputil::raise_except_if_required(FE_INVALID);
     return fputil::FPBits<double>::quiet_nan().get_val();
   }
+  if (prod.lo == 0.0)
+    return static_cast<float>(prod.hi);
+  
+  if (lo_bits.sign() != hi_bits.sign()) {
+     // Check if sticky bit of hi are all 0
+    constexpr uint64_t STICKY_MASK = 0xFFF'FFFF;  // Lower (52 - 23 - 1 = 28 bits)
+    uint64_t sticky_bits = (hi_bits.uintval() & STICKY_MASK);
+    uint64_t result_bits = (sticky_bits == 0) ? (hi_bits.uintval() - 1) : hi_bits.uintval();
+    double result = fputil::FPBits<double>(result_bits).get_val();
+    return static_cast<float>(result);
+}
   return static_cast<float>(prod.hi + prod.lo);
+    
 }
 
 } // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/test/src/math/smoke/fmul_test.cpp b/libc/test/src/math/smoke/fmul_test.cpp
index 3f6df66456bac5..e4165b23da1389 100644
--- a/libc/test/src/math/smoke/fmul_test.cpp
+++ b/libc/test/src/math/smoke/fmul_test.cpp
@@ -11,3 +11,20 @@
 #include "src/math/fmul.h"
 
 LIST_MUL_TESTS(float, double, LIBC_NAMESPACE::fmul)
+
+TEST_F(LlvmLibcMulTest, SpecialInputs) {
+  constexpr double INPUTS[][2] = {
+    {0x1.0100010002p8, 0x1.fffcp14},
+  };
+
+  constexpr float RESULTS[] = {
+    0x1.00fdfep+23f,
+  };
+
+  constexpr size_t N = sizeof(RESULTS) / sizeof(RESULTS[0]);
+
+  for (size_t i = 0; i < N; ++i) {
+    float result = LIBC_NAMESPACE::fmul(INPUTS[i][0], INPUTS[i][1]);
+    EXPECT_FP_EQ(RESULTS[i], result);
+  }
+}

>From 908eb18ee1ca94803f8e5980bc0375e94a2821c0 Mon Sep 17 00:00:00 2001
From: Job Hernandez Lara <hj93 at protonmail.com>
Date: Sat, 7 Sep 2024 20:15:16 -0700
Subject: [PATCH 05/12] format code

---
 libc/src/math/generic/fmul.cpp         | 15 ++++++++-------
 libc/test/src/math/smoke/fmul_test.cpp |  4 ++--
 2 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/libc/src/math/generic/fmul.cpp b/libc/src/math/generic/fmul.cpp
index 28d96448cbb2df..a9de90edd2a7ab 100644
--- a/libc/src/math/generic/fmul.cpp
+++ b/libc/src/math/generic/fmul.cpp
@@ -22,7 +22,7 @@ return fputil::generic::mul<float>(x, y);
 LLVM_LIBC_FUNCTION(float, fmul, (double x, double y)) {
   fputil::DoubleDouble prod = fputil::exact_mult(x, y);
   fputil::FPBits<double> hi_bits(prod.hi), lo_bits(prod.lo);
-  
+
   if (LIBC_UNLIKELY(hi_bits.is_inf_or_nan() || hi_bits.is_zero()))
     return static_cast<float>(prod.hi);
   if (LIBC_UNLIKELY(fputil::FPBits<double>(prod.hi).is_inf() ||
@@ -33,17 +33,18 @@ LLVM_LIBC_FUNCTION(float, fmul, (double x, double y)) {
   }
   if (prod.lo == 0.0)
     return static_cast<float>(prod.hi);
-  
+
   if (lo_bits.sign() != hi_bits.sign()) {
-     // Check if sticky bit of hi are all 0
-    constexpr uint64_t STICKY_MASK = 0xFFF'FFFF;  // Lower (52 - 23 - 1 = 28 bits)
+    // Check if sticky bit of hi are all 0
+    constexpr uint64_t STICKY_MASK =
+        0xFFF'FFFF; // Lower (52 - 23 - 1 = 28 bits)
     uint64_t sticky_bits = (hi_bits.uintval() & STICKY_MASK);
-    uint64_t result_bits = (sticky_bits == 0) ? (hi_bits.uintval() - 1) : hi_bits.uintval();
+    uint64_t result_bits =
+        (sticky_bits == 0) ? (hi_bits.uintval() - 1) : hi_bits.uintval();
     double result = fputil::FPBits<double>(result_bits).get_val();
     return static_cast<float>(result);
-}
+  }
   return static_cast<float>(prod.hi + prod.lo);
-    
 }
 
 } // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/test/src/math/smoke/fmul_test.cpp b/libc/test/src/math/smoke/fmul_test.cpp
index e4165b23da1389..48a5afd247bcfb 100644
--- a/libc/test/src/math/smoke/fmul_test.cpp
+++ b/libc/test/src/math/smoke/fmul_test.cpp
@@ -14,11 +14,11 @@ LIST_MUL_TESTS(float, double, LIBC_NAMESPACE::fmul)
 
 TEST_F(LlvmLibcMulTest, SpecialInputs) {
   constexpr double INPUTS[][2] = {
-    {0x1.0100010002p8, 0x1.fffcp14},
+      {0x1.0100010002p8, 0x1.fffcp14},
   };
 
   constexpr float RESULTS[] = {
-    0x1.00fdfep+23f,
+      0x1.00fdfep+23f,
   };
 
   constexpr size_t N = sizeof(RESULTS) / sizeof(RESULTS[0]);

>From 7f9a4bf35c0a252686ca09880da24bd613bc9c46 Mon Sep 17 00:00:00 2001
From: Job Hernandez Lara <hj93 at protonmail.com>
Date: Mon, 9 Sep 2024 19:30:08 -0700
Subject: [PATCH 06/12] run mpfr tests

---
 libc/test/src/math/fmul_test.cpp              | 27 +++++++++++++++++++
 .../math/performance_testing/CMakeLists.txt   |  2 ++
 .../math/performance_testing/fmul_perf.cpp    |  4 ++-
 3 files changed, 32 insertions(+), 1 deletion(-)

diff --git a/libc/test/src/math/fmul_test.cpp b/libc/test/src/math/fmul_test.cpp
index 3f6df66456bac5..c134c3fa299d85 100644
--- a/libc/test/src/math/fmul_test.cpp
+++ b/libc/test/src/math/fmul_test.cpp
@@ -10,4 +10,31 @@
 
 #include "src/math/fmul.h"
 
+#include "test/UnitTest/Test.h"
+#include "utils/MPFRWrapper/MPFRUtils.h"
+
 LIST_MUL_TESTS(float, double, LIBC_NAMESPACE::fmul)
+
+TEST_F(LlvmLibcMulTest, SpecialInputs) {
+  namespace mpfr = LIBC_NAMESPACE::testing::mpfr;
+  double INPUTS[][2] = {
+      {0x1.0100010002p8, 0x1.fffcp14},
+  };
+
+  constexpr float RESULTS[] = {
+      0x1.00fdfep+23f,
+  };
+
+  
+  constexpr size_t N = sizeof(RESULTS) / sizeof(RESULTS[0]);
+
+  for (size_t i = 0; i < N; ++i) {
+    double a = INPUTS[i][0];
+
+    for (int j = 0; j < 180; ++j) {
+      a *= 0.5;
+      mpfr::BinaryInput<double> input{INPUTS[i][0], INPUTS[i][1]};
+      ASSERT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Mul, input, LIBC_NAMESPACE::fmul(INPUTS[i][0], INPUTS[i][1]), 0.5);
+    }
+  }
+}
diff --git a/libc/test/src/math/performance_testing/CMakeLists.txt b/libc/test/src/math/performance_testing/CMakeLists.txt
index ed1b03f3493c7d..60c074a248f72a 100644
--- a/libc/test/src/math/performance_testing/CMakeLists.txt
+++ b/libc/test/src/math/performance_testing/CMakeLists.txt
@@ -484,6 +484,8 @@ add_perf_binary(
   DEPENDS
     .binary_op_single_output_diff
     libc.src.math.fmul
+    libc.src.__support.FPUtil.generic.mul
+    libc.src.__support.FPUtil.fp_bits
   COMPILE_OPTIONS
     -fno-builtin
 )
diff --git a/libc/test/src/math/performance_testing/fmul_perf.cpp b/libc/test/src/math/performance_testing/fmul_perf.cpp
index a215405eb6aa5d..d6156f1519c507 100644
--- a/libc/test/src/math/performance_testing/fmul_perf.cpp
+++ b/libc/test/src/math/performance_testing/fmul_perf.cpp
@@ -7,12 +7,14 @@
 //===----------------------------------------------------------------------===//
 
 #include "BinaryOpSingleOutputPerf.h"
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/FPUtil/generic/mul.h"
 #include "src/math/fmul.h"
 
 static constexpr size_t DOUBLE_ROUNDS = 40;
 
 float fmul_placeholder_binary(double x, double y) {
-  return static_cast<float>(x * y);
+  return LIBC_NAMESPACE::fputil::generic::mul<float>(x, y);
 }
 
 int main() {

>From 78a06b41edc80e87e8b400373f8c859b2cf351ee Mon Sep 17 00:00:00 2001
From: Job Hernandez Lara <hj93 at protonmail.com>
Date: Mon, 9 Sep 2024 19:30:34 -0700
Subject: [PATCH 07/12] format code

---
 libc/test/src/math/fmul_test.cpp | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/libc/test/src/math/fmul_test.cpp b/libc/test/src/math/fmul_test.cpp
index c134c3fa299d85..d1caca0c38de20 100644
--- a/libc/test/src/math/fmul_test.cpp
+++ b/libc/test/src/math/fmul_test.cpp
@@ -25,7 +25,6 @@ TEST_F(LlvmLibcMulTest, SpecialInputs) {
       0x1.00fdfep+23f,
   };
 
-  
   constexpr size_t N = sizeof(RESULTS) / sizeof(RESULTS[0]);
 
   for (size_t i = 0; i < N; ++i) {
@@ -34,7 +33,9 @@ TEST_F(LlvmLibcMulTest, SpecialInputs) {
     for (int j = 0; j < 180; ++j) {
       a *= 0.5;
       mpfr::BinaryInput<double> input{INPUTS[i][0], INPUTS[i][1]};
-      ASSERT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Mul, input, LIBC_NAMESPACE::fmul(INPUTS[i][0], INPUTS[i][1]), 0.5);
+      ASSERT_MPFR_MATCH_ALL_ROUNDING(
+          mpfr::Operation::Mul, input,
+          LIBC_NAMESPACE::fmul(INPUTS[i][0], INPUTS[i][1]), 0.5);
     }
   }
 }

>From 2fbc9b6ff15ff1cf656d08f7928fee4383d1d0d7 Mon Sep 17 00:00:00 2001
From: Job Hernandez Lara <hj93 at protonmail.com>
Date: Tue, 10 Sep 2024 09:08:23 -0700
Subject: [PATCH 08/12] handle all cases

Co-authored-by: Tue Ly <lntue at google.com>
---
 libc/src/math/generic/fmul.cpp         | 4 +++-
 libc/test/src/math/smoke/fmul_test.cpp | 3 +++
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/libc/src/math/generic/fmul.cpp b/libc/src/math/generic/fmul.cpp
index a9de90edd2a7ab..6d3b4c3016ff2b 100644
--- a/libc/src/math/generic/fmul.cpp
+++ b/libc/src/math/generic/fmul.cpp
@@ -44,7 +44,9 @@ LLVM_LIBC_FUNCTION(float, fmul, (double x, double y)) {
     double result = fputil::FPBits<double>(result_bits).get_val();
     return static_cast<float>(result);
   }
-  return static_cast<float>(prod.hi + prod.lo);
+
+  double result = fputil::FPBits<double>(hi_bits.uintval() | 1).get_val();
+  return static_cast<float>(result);
 }
 
 } // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/test/src/math/smoke/fmul_test.cpp b/libc/test/src/math/smoke/fmul_test.cpp
index 48a5afd247bcfb..dd0d89143f623d 100644
--- a/libc/test/src/math/smoke/fmul_test.cpp
+++ b/libc/test/src/math/smoke/fmul_test.cpp
@@ -15,10 +15,12 @@ LIST_MUL_TESTS(float, double, LIBC_NAMESPACE::fmul)
 TEST_F(LlvmLibcMulTest, SpecialInputs) {
   constexpr double INPUTS[][2] = {
       {0x1.0100010002p8, 0x1.fffcp14},
+      {0x1.000000b92144p-7, 0x1.62p7},
   };
 
   constexpr float RESULTS[] = {
       0x1.00fdfep+23f,
+      0x1.620002p0f,
   };
 
   constexpr size_t N = sizeof(RESULTS) / sizeof(RESULTS[0]);
@@ -28,3 +30,4 @@ TEST_F(LlvmLibcMulTest, SpecialInputs) {
     EXPECT_FP_EQ(RESULTS[i], result);
   }
 }
+    

>From 8060a9fdc4b10c0779894abc2262c157a12d82a7 Mon Sep 17 00:00:00 2001
From: Job Hernandez Lara <hj93 at protonmail.com>
Date: Tue, 10 Sep 2024 18:03:01 -0700
Subject: [PATCH 09/12] handle exceptions

---
 libc/src/math/generic/fmul.cpp | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/libc/src/math/generic/fmul.cpp b/libc/src/math/generic/fmul.cpp
index 6d3b4c3016ff2b..836779d0cad8ae 100644
--- a/libc/src/math/generic/fmul.cpp
+++ b/libc/src/math/generic/fmul.cpp
@@ -24,13 +24,9 @@ LLVM_LIBC_FUNCTION(float, fmul, (double x, double y)) {
   fputil::FPBits<double> hi_bits(prod.hi), lo_bits(prod.lo);
 
   if (LIBC_UNLIKELY(hi_bits.is_inf_or_nan() || hi_bits.is_zero()))
-    return static_cast<float>(prod.hi);
-  if (LIBC_UNLIKELY(fputil::FPBits<double>(prod.hi).is_inf() ||
-                    fputil::FPBits<double>(prod.hi).is_zero())) {
     fputil::set_errno_if_required(EDOM);
     fputil::raise_except_if_required(FE_INVALID);
-    return fputil::FPBits<double>::quiet_nan().get_val();
-  }
+    return static_cast<float>(prod.hi);
   if (prod.lo == 0.0)
     return static_cast<float>(prod.hi);
 

>From 403c7d3b3470ab6c3540d2975ceb203714004aa1 Mon Sep 17 00:00:00 2001
From: Job Hernandez Lara <hj93 at protonmail.com>
Date: Tue, 10 Sep 2024 18:03:27 -0700
Subject: [PATCH 10/12] format code

---
 libc/src/math/generic/fmul.cpp         | 22 +++++++++++-----------
 libc/test/src/math/smoke/fmul_test.cpp |  1 -
 2 files changed, 11 insertions(+), 12 deletions(-)

diff --git a/libc/src/math/generic/fmul.cpp b/libc/src/math/generic/fmul.cpp
index 836779d0cad8ae..a1e1c65e768fc5 100644
--- a/libc/src/math/generic/fmul.cpp
+++ b/libc/src/math/generic/fmul.cpp
@@ -27,18 +27,18 @@ LLVM_LIBC_FUNCTION(float, fmul, (double x, double y)) {
     fputil::set_errno_if_required(EDOM);
     fputil::raise_except_if_required(FE_INVALID);
     return static_cast<float>(prod.hi);
-  if (prod.lo == 0.0)
-    return static_cast<float>(prod.hi);
+    if (prod.lo == 0.0)
+      return static_cast<float>(prod.hi);
 
-  if (lo_bits.sign() != hi_bits.sign()) {
-    // Check if sticky bit of hi are all 0
-    constexpr uint64_t STICKY_MASK =
-        0xFFF'FFFF; // Lower (52 - 23 - 1 = 28 bits)
-    uint64_t sticky_bits = (hi_bits.uintval() & STICKY_MASK);
-    uint64_t result_bits =
-        (sticky_bits == 0) ? (hi_bits.uintval() - 1) : hi_bits.uintval();
-    double result = fputil::FPBits<double>(result_bits).get_val();
-    return static_cast<float>(result);
+    if (lo_bits.sign() != hi_bits.sign()) {
+      // Check if sticky bit of hi are all 0
+      constexpr uint64_t STICKY_MASK =
+          0xFFF'FFFF; // Lower (52 - 23 - 1 = 28 bits)
+      uint64_t sticky_bits = (hi_bits.uintval() & STICKY_MASK);
+      uint64_t result_bits =
+          (sticky_bits == 0) ? (hi_bits.uintval() - 1) : hi_bits.uintval();
+      double result = fputil::FPBits<double>(result_bits).get_val();
+      return static_cast<float>(result);
   }
 
   double result = fputil::FPBits<double>(hi_bits.uintval() | 1).get_val();
diff --git a/libc/test/src/math/smoke/fmul_test.cpp b/libc/test/src/math/smoke/fmul_test.cpp
index dd0d89143f623d..3fcf514bcd9f05 100644
--- a/libc/test/src/math/smoke/fmul_test.cpp
+++ b/libc/test/src/math/smoke/fmul_test.cpp
@@ -30,4 +30,3 @@ TEST_F(LlvmLibcMulTest, SpecialInputs) {
     EXPECT_FP_EQ(RESULTS[i], result);
   }
 }
-    

>From a0fc3daeb52af4c8771a4774bfab5bb06775e00f Mon Sep 17 00:00:00 2001
From: Job Hernandez <hj93 at protonmail.com>
Date: Tue, 10 Sep 2024 18:28:17 -0700
Subject: [PATCH 11/12] fix typo

---
 libc/src/math/generic/fmul.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/libc/src/math/generic/fmul.cpp b/libc/src/math/generic/fmul.cpp
index a1e1c65e768fc5..8b2315792a74c1 100644
--- a/libc/src/math/generic/fmul.cpp
+++ b/libc/src/math/generic/fmul.cpp
@@ -23,10 +23,11 @@ LLVM_LIBC_FUNCTION(float, fmul, (double x, double y)) {
   fputil::DoubleDouble prod = fputil::exact_mult(x, y);
   fputil::FPBits<double> hi_bits(prod.hi), lo_bits(prod.lo);
 
-  if (LIBC_UNLIKELY(hi_bits.is_inf_or_nan() || hi_bits.is_zero()))
+  if (LIBC_UNLIKELY(hi_bits.is_inf_or_nan() || hi_bits.is_zero())) {
     fputil::set_errno_if_required(EDOM);
     fputil::raise_except_if_required(FE_INVALID);
     return static_cast<float>(prod.hi);
+  }
     if (prod.lo == 0.0)
       return static_cast<float>(prod.hi);
 

>From 98f1b1ddee121a3808c5fbeae2dd2fb3f5d77dd4 Mon Sep 17 00:00:00 2001
From: Job Hernandez Lara <hj93 at protonmail.com>
Date: Wed, 11 Sep 2024 15:29:02 -0700
Subject: [PATCH 12/12] handle exceptions

---
 libc/src/math/generic/fmul.cpp | 89 ++++++++++++++++++++++++++++------
 1 file changed, 73 insertions(+), 16 deletions(-)

diff --git a/libc/src/math/generic/fmul.cpp b/libc/src/math/generic/fmul.cpp
index 8b2315792a74c1..c0974c6413d552 100644
--- a/libc/src/math/generic/fmul.cpp
+++ b/libc/src/math/generic/fmul.cpp
@@ -23,23 +23,80 @@ LLVM_LIBC_FUNCTION(float, fmul, (double x, double y)) {
   fputil::DoubleDouble prod = fputil::exact_mult(x, y);
   fputil::FPBits<double> hi_bits(prod.hi), lo_bits(prod.lo);
 
-  if (LIBC_UNLIKELY(hi_bits.is_inf_or_nan() || hi_bits.is_zero())) {
-    fputil::set_errno_if_required(EDOM);
-    fputil::raise_except_if_required(FE_INVALID);
-    return static_cast<float>(prod.hi);
+  float prod_hif = static_cast<float>(prod.hi);
+  fputil::FPBits<float> hif_bits(prod_hif);
+  using OutFPBits = fputil::FPBits<float>;
+  using OutStorageType = typename OutFPBits::StorageType;
+  using InFPBits = FPBits<double>;
+  using InStorageType = typename InFPBits::StorageType;
+
+  InFPBits x_bits(x);
+  InFPBits y_bits(y);
+
+  
+  Sign result_sign = x_bits.sign() == y_bits.sign() ? Sign::POS : Sign::NEG;
+
+  if (LIBC_UNLIKELY(x_bits.is_inf_or_nan() || y_bits.is_inf_or_nan() ||
+                    x_bits.is_zero() || y_bits.is_zero())) {
+    if (x_bits.is_nan() || y_bits.is_nan()) {
+      if (x_bits.is_signaling_nan() || y_bits.is_signaling_nan())
+        raise_except_if_required(FE_INVALID);
+
+      if (x_bits.is_quiet_nan()) {
+        InStorageType x_payload = x_bits.get_mantissa();
+        x_payload >>= InFPBits::FRACTION_LEN - OutFPBits::FRACTION_LEN;
+        return OutFPBits::quiet_nan(x_bits.sign(),
+                                    static_cast<OutStorageType>(x_payload))
+            .get_val();
+      }
+
+      if (y_bits.is_quiet_nan()) {
+        InStorageType y_payload = y_bits.get_mantissa();
+        y_payload >>= InFPBits::FRACTION_LEN - OutFPBits::FRACTION_LEN;
+        return OutFPBits::quiet_nan(y_bits.sign(),
+                                    static_cast<OutStorageType>(y_payload))
+            .get_val();
+      }
+
+      return OutFPBits::quiet_nan().get_val();
+    }
+
+    if (x_bits.is_inf()) {
+      if (y_bits.is_zero()) {
+        set_errno_if_required(EDOM);
+        raise_except_if_required(FE_INVALID);
+        return OutFPBits::quiet_nan().get_val();
+      }
+
+      return OutFPBits::inf(result_sign).get_val();
+    }
+
+    if (y_bits.is_inf()) {
+      if (x_bits.is_zero()) {
+        set_errno_if_required(EDOM);
+        raise_except_if_required(FE_INVALID);
+        return OutFPBits::quiet_nan().get_val();
+      }
+
+      return OutFPBits::inf(result_sign).get_val();
+    }
+
+    // Now either x or y is zero, and the other one is finite.
+    return OutFPBits::zero(result_sign).get_val();
   }
-    if (prod.lo == 0.0)
-      return static_cast<float>(prod.hi);
-
-    if (lo_bits.sign() != hi_bits.sign()) {
-      // Check if sticky bit of hi are all 0
-      constexpr uint64_t STICKY_MASK =
-          0xFFF'FFFF; // Lower (52 - 23 - 1 = 28 bits)
-      uint64_t sticky_bits = (hi_bits.uintval() & STICKY_MASK);
-      uint64_t result_bits =
-          (sticky_bits == 0) ? (hi_bits.uintval() - 1) : hi_bits.uintval();
-      double result = fputil::FPBits<double>(result_bits).get_val();
-      return static_cast<float>(result);
+  
+  if (prod.lo == 0.0)
+    return static_cast<float>(prod.hi);
+
+  if (lo_bits.sign() != hi_bits.sign()) {
+    // Check if sticky bit of hi are all 0
+    constexpr uint64_t STICKY_MASK =
+      0xFFF'FFFF; // Lower (52 - 23 - 1 = 28 bits)
+    uint64_t sticky_bits = (hi_bits.uintval() & STICKY_MASK);
+    uint64_t result_bits =
+      (sticky_bits == 0) ? (hi_bits.uintval() - 1) : hi_bits.uintval();
+    double result = fputil::FPBits<double>(result_bits).get_val();
+    return static_cast<float>(result);
   }
 
   double result = fputil::FPBits<double>(hi_bits.uintval() | 1).get_val();