[libc-commits] [libc] ae3b59e - [libc] Use MPFR for strtofloat fuzzing

Mon May 22 11:05:00 PDT 2023

Author: Michael Jones
Date: 2023-05-22T11:04:53-07:00
New Revision: ae3b59e62398e8dc86dec647be58c5ee032447c1

URL: https://github.com/llvm/llvm-project/commit/ae3b59e62398e8dc86dec647be58c5ee032447c1
DIFF: https://github.com/llvm/llvm-project/commit/ae3b59e62398e8dc86dec647be58c5ee032447c1.diff

LOG: [libc] Use MPFR for strtofloat fuzzing

The previous string to float tests didn't check correctness, but due to
the atof differential test proving unreliable the strtofloat fuzz test
has been changed to use MPFR for correctness checking. Some minor bugs
have been found and fixed as well.

Reviewed By: lntue

Differential Revision: https://reviews.llvm.org/D150905

Added: 
    libc/utils/MPFRWrapper/mpfr_inc.h

Modified: 
    libc/cmake/modules/LLVMLibCTestRules.cmake
    libc/fuzzing/stdlib/CMakeLists.txt
    libc/fuzzing/stdlib/atof_differential_fuzz.cpp
    libc/fuzzing/stdlib/strtofloat_fuzz.cpp
    libc/src/__support/str_to_float.h
    libc/test/src/stdlib/strtod_test.cpp
    libc/test/src/stdlib/strtold_test.cpp
    libc/utils/MPFRWrapper/CMakeLists.txt
    libc/utils/MPFRWrapper/MPFRUtils.cpp
    utils/bazel/llvm-project-overlay/libc/utils/MPFRWrapper/BUILD.bazel

Removed: 
    


################################################################################
diff  --git a/libc/cmake/modules/LLVMLibCTestRules.cmake b/libc/cmake/modules/LLVMLibCTestRules.cmake
index ab1f46cfd2dc6..e1e0f24803a5c 100644

--- a/libc/cmake/modules/LLVMLibCTestRules.cmake
+++ b/libc/cmake/modules/LLVMLibCTestRules.cmake
@@ -323,7 +323,7 @@ endfunction(add_libc_long_running_testsuite)
 function(add_libc_fuzzer target_name)
   cmake_parse_arguments(
     "LIBC_FUZZER"
-    "" # No optional arguments
+    "NEED_MPFR" # Optional arguments
     "" # Single value arguments
     "SRCS;HDRS;DEPENDS;COMPILE_OPTIONS" # Multi-value arguments
     ${ARGN}
@@ -337,6 +337,16 @@ function(add_libc_fuzzer target_name)
                         "'add_entrypoint_object' targets.")
   endif()
 
+  list(APPEND LIBC_FUZZER_LINK_LIBRARIES "")
+  if(LIBC_FUZZER_NEED_MPFR)
+    if(NOT LIBC_TESTS_CAN_USE_MPFR)
+      message(VERBOSE "Fuzz test ${name} will be skipped as MPFR library is not available.")
+      return()
+    endif()
+    list(APPEND LIBC_FUZZER_LINK_LIBRARIES mpfr gmp)
+  endif()
+
+
   get_fq_target_name(${target_name} fq_target_name)
   get_fq_deps_list(fq_deps_list ${LIBC_FUZZER_DEPENDS})
   get_object_files_for_test(
@@ -372,7 +382,10 @@ function(add_libc_fuzzer target_name)
       ${LIBC_BUILD_DIR}/include
   )
 
-  target_link_libraries(${fq_target_name} PRIVATE ${link_object_files})
+  target_link_libraries(${fq_target_name} PRIVATE 
+    ${link_object_files} 
+    ${LIBC_FUZZER_LINK_LIBRARIES}
+  )
 
   set_target_properties(${fq_target_name}
       PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})

diff  --git a/libc/fuzzing/stdlib/CMakeLists.txt b/libc/fuzzing/stdlib/CMakeLists.txt
index 785efd82a1ed3..767d13fb84e33 100644
--- a/libc/fuzzing/stdlib/CMakeLists.txt
+++ b/libc/fuzzing/stdlib/CMakeLists.txt
@@ -14,12 +14,11 @@ add_libc_fuzzer(
     StringParserOutputDiff.h
   DEPENDS
     libc.src.stdlib.atof
-  COMPILE_OPTIONS
-    -DLLVM_LIBC_ATOF_DIF_FUZZ_SKIP_GLIBC_HEX_SUBNORMAL_ERR
 )
 
 add_libc_fuzzer(
   strtofloat_fuzz
+  NEED_MPFR
   SRCS
     strtofloat_fuzz.cpp
   DEPENDS

diff  --git a/libc/fuzzing/stdlib/atof_
diff erential_fuzz.cpp b/libc/fuzzing/stdlib/atof_
diff erential_fuzz.cpp
index 2f330e1af2e32..b368129960d3b 100644
--- a/libc/fuzzing/stdlib/atof_
diff erential_fuzz.cpp
+++ b/libc/fuzzing/stdlib/atof_
diff erential_fuzz.cpp
@@ -16,40 +16,6 @@
 
 #include "fuzzing/stdlib/StringParserOutputDiff.h"
 
-// TODO: Remove this once glibc fixes hex subnormal rounding. See
-// https://sourceware.org/bugzilla/show_bug.cgi?id=30220
-#ifdef LLVM_LIBC_ATOF_DIF_FUZZ_SKIP_GLIBC_HEX_SUBNORMAL_ERR
-#include <ctype.h>
-constexpr double MIN_NORMAL = 0x1p-1022;
-
-bool has_hex_prefix(const uint8_t *str) {
-  size_t index = 0;
-
-  // Skip over leading whitespace
-  while (isspace(str[index])) {
-    ++index;
-  }
-  // Skip over sign
-  if (str[index] == '-' || str[index] == '+') {
-    ++index;
-  }
-  return str[index] == '0' && (tolower(str[index + 1])) == 'x';
-}
-
-bool should_be_skipped(const uint8_t *str) {
-  double init_result = ::atof(reinterpret_cast<const char *>(str));
-  if (init_result < 0) {
-    init_result = -init_result;
-  }
-  if (init_result < MIN_NORMAL && init_result != 0) {
-    return has_hex_prefix(str);
-  }
-  return false;
-}
-#else
-bool should_be_skipped(const uint8_t *) { return false; }
-#endif // LLVM_LIBC_ATOF_DIF_FUZZ_SKIP_GLIBC_HEX_SUBNORMAL_ERR
-
 extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
   uint8_t *container = new uint8_t[size + 1];
   if (!container)
@@ -60,10 +26,7 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
     container[i] = data[i];
   container[size] = '\0'; // Add null terminator to container.
 
-  if (!should_be_skipped(container)) {
-    StringParserOutputDiff<double>(&__llvm_libc::atof, &::atof, container,
-                                   size);
-  }
+  StringParserOutputDiff<double>(&__llvm_libc::atof, &::atof, container, size);
   delete[] container;
   return 0;
 }

diff  --git a/libc/fuzzing/stdlib/strtofloat_fuzz.cpp b/libc/fuzzing/stdlib/strtofloat_fuzz.cpp
index 3366c5c64dda6..192aab8ffa8c8 100644
--- a/libc/fuzzing/stdlib/strtofloat_fuzz.cpp
+++ b/libc/fuzzing/stdlib/strtofloat_fuzz.cpp
@@ -13,50 +13,87 @@
 #include "src/stdlib/strtod.h"
 #include "src/stdlib/strtof.h"
 #include "src/stdlib/strtold.h"
+
 #include <math.h>
 #include <stddef.h>
 #include <stdint.h>
 
+#include "utils/MPFRWrapper/mpfr_inc.h"
+
 extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
   uint8_t *container = new uint8_t[size + 1];
   if (!container)
     __builtin_trap();
   size_t i;
 
-  for (i = 0; i < size; ++i)
-    container[i] = data[i];
+  for (i = 0; i < size; ++i) {
+    // MPFR's strtofr uses "@" as a base-independent exponent symbol
+    if (data[i] != '@')
+      container[i] = data[i];
+    else {
+      container[i] = '#';
+    }
+  }
   container[size] = '\0'; // Add null terminator to container.
 
   const char *str_ptr = reinterpret_cast<const char *>(container);
 
   char *out_ptr = nullptr;
 
-  // This fuzzer only checks that the algorithms didn't read beyond the end of
-  // the string in container. Combined with sanitizers, this will check that the
-  // code is not reading memory beyond what's expected. This test does not
-  // effectively check the correctness of the result.
+  mpfr_t result;
+  mpfr_init2(result, 256);
+  mpfr_t bin_result;
+  mpfr_init2(bin_result, 256);
+  mpfr_strtofr(result, str_ptr, &out_ptr, 0 /* base */, MPFR_RNDN);
+  ptr
diff _t result_strlen = out_ptr - str_ptr;
+  mpfr_strtofr(bin_result, str_ptr, &out_ptr, 2 /* base */, MPFR_RNDN);
+  ptr
diff _t bin_result_strlen = out_ptr - str_ptr;
+
+  long double bin_result_ld = mpfr_get_ld(bin_result, MPFR_RNDN);
+  long double result_ld = mpfr_get_ld(result, MPFR_RNDN);
+
+  // This detects if mpfr's strtofr selected a base of 2, which libc does not
+  // support. If a base 2 decoding is detected, it is replaced by a base 10
+  // decoding.
+  if ((bin_result_ld != 0.0 || bin_result_strlen == result_strlen) &&
+      bin_result_ld == result_ld) {
+    mpfr_strtofr(result, str_ptr, &out_ptr, 10 /* base */, MPFR_RNDN);
+    result_strlen = out_ptr - str_ptr;
+  }
+
   auto volatile atof_output = __llvm_libc::atof(str_ptr);
   auto volatile strtof_output = __llvm_libc::strtof(str_ptr, &out_ptr);
-  if (str_ptr + size < out_ptr)
+  ptr
diff _t strtof_strlen = out_ptr - str_ptr;
+  if (result_strlen != strtof_strlen)
     __builtin_trap();
   auto volatile strtod_output = __llvm_libc::strtod(str_ptr, &out_ptr);
-  if (str_ptr + size < out_ptr)
+  ptr
diff _t strtod_strlen = out_ptr - str_ptr;
+  if (result_strlen != strtod_strlen)
     __builtin_trap();
   auto volatile strtold_output = __llvm_libc::strtold(str_ptr, &out_ptr);
-  if (str_ptr + size < out_ptr)
+  ptr
diff _t strtold_strlen = out_ptr - str_ptr;
+  if (result_strlen != strtold_strlen)
     __builtin_trap();
 
-  // If any of the outputs are NaN
-  if (isnan(atof_output) || isnan(strtof_output) || isnan(strtod_output) ||
-      isnan(strtold_output)) {
-    // Then all the outputs should be NaN.
-    // This is a trivial check meant to silence the "unused variable" warnings.
-    if (!isnan(atof_output) || !isnan(strtof_output) || !isnan(strtod_output) ||
-        !isnan(strtold_output)) {
+  // If any result is NaN, all of them should be NaN. We can't use the usual
+  // comparisons because NaN != NaN.
+  if (isnan(result_ld)) {
+    if (!(isnan(atof_output) && isnan(strtof_output) && isnan(strtod_output) &&
+          isnan(strtold_output)))
+      __builtin_trap();
+  } else {
+    if (mpfr_get_d(result, MPFR_RNDN) != atof_output)
+      __builtin_trap();
+    if (mpfr_get_flt(result, MPFR_RNDN) != strtof_output)
+      __builtin_trap();
+    if (mpfr_get_d(result, MPFR_RNDN) != strtod_output)
+      __builtin_trap();
+    if (mpfr_get_ld(result, MPFR_RNDN) != strtold_output)
       __builtin_trap();
-    }
   }
 
+  mpfr_clear(result);
+  mpfr_clear(bin_result);
   delete[] container;
   return 0;
 }

diff  --git a/libc/src/__support/str_to_float.h b/libc/src/__support/str_to_float.h
index 0697cf0a68c32..b36c73fbe8ef6 100644
--- a/libc/src/__support/str_to_float.h
+++ b/libc/src/__support/str_to_float.h
@@ -265,10 +265,15 @@ eisel_lemire<long double>(ExpandedFloat<long double> init_num,
   UInt128 approx_upper = static_cast<UInt128>(high64(mantissa)) *
                          static_cast<UInt128>(power_of_ten[1]);
 
-  UInt128 approx_middle = static_cast<UInt128>(high64(mantissa)) *
-                              static_cast<UInt128>(power_of_ten[0]) +
-                          static_cast<UInt128>(low64(mantissa)) *
-                              static_cast<UInt128>(power_of_ten[1]);
+  UInt128 approx_middle_a = static_cast<UInt128>(high64(mantissa)) *
+                            static_cast<UInt128>(power_of_ten[0]);
+  UInt128 approx_middle_b = static_cast<UInt128>(low64(mantissa)) *
+                            static_cast<UInt128>(power_of_ten[1]);
+
+  UInt128 approx_middle = approx_middle_a + approx_middle_b;
+
+  // Handle overflow in the middle
+  approx_upper += (approx_middle < approx_middle_a) ? UInt128(1) << 64 : 0;
 
   UInt128 approx_lower = static_cast<UInt128>(low64(mantissa)) *
                          static_cast<UInt128>(power_of_ten[0]);
@@ -928,8 +933,11 @@ decimal_string_to_float(const char *__restrict src, const char DECIMAL_POINT,
     return output;
 
   if (tolower(src[index]) == EXPONENT_MARKER) {
-    if (src[index + 1] == '+' || src[index + 1] == '-' ||
-        isdigit(src[index + 1])) {
+    bool has_sign = false;
+    if (src[index + 1] == '+' || src[index + 1] == '-') {
+      has_sign = true;
+    }
+    if (isdigit(src[index + 1 + static_cast<size_t>(has_sign)])) {
       ++index;
       auto result = strtointeger<int32_t>(src + index, 10);
       if (result.has_error())
@@ -1036,8 +1044,11 @@ hexadecimal_string_to_float(const char *__restrict src,
   exponent *= 4;
 
   if (tolower(src[index]) == EXPONENT_MARKER) {
-    if (src[index + 1] == '+' || src[index + 1] == '-' ||
-        isdigit(src[index + 1])) {
+    bool has_sign = false;
+    if (src[index + 1] == '+' || src[index + 1] == '-') {
+      has_sign = true;
+    }
+    if (isdigit(src[index + 1 + static_cast<size_t>(has_sign)])) {
       ++index;
       auto result = strtointeger<int32_t>(src + index, 10);
       if (result.has_error())

diff  --git a/libc/test/src/stdlib/strtod_test.cpp b/libc/test/src/stdlib/strtod_test.cpp
index fee32cd5fab34..b2e73ba6e91d3 100644
--- a/libc/test/src/stdlib/strtod_test.cpp
+++ b/libc/test/src/stdlib/strtod_test.cpp
@@ -223,6 +223,11 @@ TEST_F(LlvmLibcStrToDTest, FuzzFailures) {
   // Same as above but for hex.
   run_test("0x0164810157p2047", 17, uint64_t(0x7ff0000000000000), ERANGE);
 
+  // This test ensures that only the correct number of characters is accepted.
+  // An exponent symbol followed by a sign isn't a valid exponent.
+  run_test("2e+", 1, uint64_t(0x4000000000000000));
+  run_test("0x2p+", 3, uint64_t(0x4000000000000000));
+
   // This bug was in the handling of very large exponents in the exponent
   // marker. Previously anything greater than 10,000 would be set to 10,000.
   // This caused incorrect behavior if there were more than 10,000 '0's in the

diff  --git a/libc/test/src/stdlib/strtold_test.cpp b/libc/test/src/stdlib/strtold_test.cpp
index 8db4b4e26b372..dfa1fb7e939b5 100644
--- a/libc/test/src/stdlib/strtold_test.cpp
+++ b/libc/test/src/stdlib/strtold_test.cpp
@@ -147,6 +147,16 @@ TEST_F(LlvmLibcStrToLDTest, Float64SpecificFailures) {
                             UInt128(0x8803000000000000)));
 }
 
+TEST_F(LlvmLibcStrToLDTest, Float80SpecificFailures) {
+  run_test("7777777777777777777777777777777777777777777777777777777777777777777"
+           "777777777777777777777777777777777",
+           100,
+           SELECT_CONST(uint64_t(0x54ac729b8fcaf734),
+                        (UInt128(0x414ae394dc) << 40) + UInt128(0x7e57b9a0c2),
+                        (UInt128(0x414ac729b8fcaf73) << 64) +
+                            UInt128(0x4184a3d793224129)));
+}
+
 TEST_F(LlvmLibcStrToLDTest, MaxSizeNumbers) {
   run_test("1.1897314953572317650e4932", 26,
            SELECT_CONST(uint64_t(0x7FF0000000000000),

diff  --git a/libc/utils/MPFRWrapper/CMakeLists.txt b/libc/utils/MPFRWrapper/CMakeLists.txt
index f1fcf26b6f0bd..e863867126131 100644
--- a/libc/utils/MPFRWrapper/CMakeLists.txt
+++ b/libc/utils/MPFRWrapper/CMakeLists.txt
@@ -2,6 +2,7 @@ if(LIBC_TESTS_CAN_USE_MPFR)
   add_library(libcMPFRWrapper
     MPFRUtils.cpp
     MPFRUtils.h
+    mpfr_inc.h
   )
   add_compile_options(
     -O3

diff  --git a/libc/utils/MPFRWrapper/MPFRUtils.cpp b/libc/utils/MPFRWrapper/MPFRUtils.cpp
index f19899c3098fd..d92510f2e51ce 100644
--- a/libc/utils/MPFRWrapper/MPFRUtils.cpp
+++ b/libc/utils/MPFRWrapper/MPFRUtils.cpp
@@ -19,16 +19,7 @@
 #include <memory>
 #include <stdint.h>
 
-#ifdef CUSTOM_MPFR_INCLUDER
-// Some downstream repos are monoliths carrying MPFR sources in their third
-// party directory. In such repos, including the MPFR header as
-// `#include <mpfr.h>` is either disallowed or not possible. If that is the
-// case, a file named `CustomMPFRIncluder.h` should be added through which the
-// MPFR header can be included in manner allowed in that repo.
-#include "CustomMPFRIncluder.h"
-#else
-#include <mpfr.h>
-#endif
+#include "mpfr_inc.h"
 
 template <typename T> using FPBits = __llvm_libc::fputil::FPBits<T>;
 

diff  --git a/libc/utils/MPFRWrapper/mpfr_inc.h b/libc/utils/MPFRWrapper/mpfr_inc.h
new file mode 100644
index 0000000000000..58fa7b25a9f21
--- /dev/null
+++ b/libc/utils/MPFRWrapper/mpfr_inc.h
@@ -0,0 +1,23 @@
+//===-- MPFRUtils.h ---------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_UTILS_MPFRWRAPPER_MPFR_INC_H
+#define LLVM_LIBC_UTILS_MPFRWRAPPER_MPFR_INC_H
+
+#ifdef CUSTOM_MPFR_INCLUDER
+// Some downstream repos are monoliths carrying MPFR sources in their third
+// party directory. In such repos, including the MPFR header as
+// `#include <mpfr.h>` is either disallowed or not possible. If that is the
+// case, a file named `CustomMPFRIncluder.h` should be added through which the
+// MPFR header can be included in manner allowed in that repo.
+#include "CustomMPFRIncluder.h"
+#else
+#include <mpfr.h>
+#endif
+
+#endif // LLVM_LIBC_UTILS_MPFRWRAPPER_MPFR_INC_H

diff  --git a/utils/bazel/llvm-project-overlay/libc/utils/MPFRWrapper/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/utils/MPFRWrapper/BUILD.bazel
index 893bb51aa0345..441bb047782b1 100644
--- a/utils/bazel/llvm-project-overlay/libc/utils/MPFRWrapper/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/libc/utils/MPFRWrapper/BUILD.bazel
@@ -10,6 +10,7 @@ licenses(["notice"])
 
 cc_library(
     name = "mpfr_impl",
+    hdrs = ["mpfr_inc.h"],
     target_compatible_with = select({
         "//conditions:default": [],
         "//libc:mpfr_disable": ["@platforms//:incompatible"],