[libc-commits] [libc] [libc] Add simple long double to printf float fuzz (PR #68449)

Fri Oct 6 14:04:50 PDT 2023

https://github.com/michaelrj-google created https://github.com/llvm/llvm-project/pull/68449

Recent testing has uncovered some hard-to-find bugs in printf's long
double support. This patch adds an extra long double path to the fuzzer
with minimal extra effort. While a more thorough long double fuzzer
would be useful, it would need to handle the non-standard cases of 80
bit long doubles such as unnormal and pseudo-denormal numbers. For that
reason, a standalone long double fuzzer is left for future development.


>From 169d3e1d36ff50b1ef6c68a575d8137ae63ea78f Mon Sep 17 00:00:00 2001
From: Michael Jones <michaelrj at google.com>
Date: Fri, 6 Oct 2023 14:00:48 -0700
Subject: [PATCH] [libc] Add simple long double to printf float fuzz

Recent testing has uncovered some hard-to-find bugs in printf's long
double support. This patch adds an extra long double path to the fuzzer
with minimal extra effort. While a more thorough long double fuzzer
would be useful, it would need to handle the non-standard cases of 80
bit long doubles such as unnormal and pseudo-denormal numbers. For that
reason, a standalone long double fuzzer is left for future development.
---
 libc/fuzzing/stdio/printf_float_conv_fuzz.cpp | 30 +++++++++++++++----
 .../stdio/printf_core/float_hex_converter.h   |  5 ++--
 libc/test/src/stdio/sprintf_test.cpp          |  3 ++
 3 files changed, 30 insertions(+), 8 deletions(-)

diff --git a/libc/fuzzing/stdio/printf_float_conv_fuzz.cpp b/libc/fuzzing/stdio/printf_float_conv_fuzz.cpp
index dd3902eebda6171..798e1a3866fddf3 100644
--- a/libc/fuzzing/stdio/printf_float_conv_fuzz.cpp
+++ b/libc/fuzzing/stdio/printf_float_conv_fuzz.cpp
@@ -29,6 +29,14 @@ inline bool simple_streq(char *first, char *second, int length) {
   return true;
 }
 
+inline int simple_strlen(const char *str) {
+  int i = 0;
+  for (; *str; ++str, ++i) {
+    ;
+  }
+  return i;
+}
+
 enum class TestResult {
   Success,
   BufferSizeFailed,
@@ -36,7 +44,8 @@ enum class TestResult {
   StringsNotEqual,
 };
 
-inline TestResult test_vals(const char *fmt, double num, int prec, int width) {
+template <typename F>
+inline TestResult test_vals(const char *fmt, F num, int prec, int width) {
   // Call snprintf on a nullptr to get the buffer size.
   int buffer_size = LIBC_NAMESPACE::snprintf(nullptr, 0, fmt, width, prec, num);
 
@@ -70,10 +79,7 @@ inline TestResult test_vals(const char *fmt, double num, int prec, int width) {
 }
 
 constexpr char const *fmt_arr[] = {
-    "%*.*f",
-    "%*.*e",
-    "%*.*g",
-    "%*.*a",
+    "%*.*f", "%*.*e", "%*.*g", "%*.*a", "%*.*Lf", "%*.*Le", "%*.*Lg", "%*.*La",
 };
 
 extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
@@ -100,6 +106,12 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
 
   num = LIBC_NAMESPACE::fputil::FPBits<double>(raw_num).get_val();
 
+  // While we could create a "ld_raw_num" from additional bytes, it's much
+  // easier to stick with simply casting num to long double. This avoids the
+  // issues around 80 bit long doubles, especially unnormal and pseudo-denormal
+  // numbers, which MPFR doesn't handle well.
+  long double ld_num = static_cast<long double>(num);
+
   if (width > MAX_SIZE) {
     width = MAX_SIZE;
   } else if (width < -MAX_SIZE) {
@@ -114,7 +126,13 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
 
   for (size_t cur_fmt = 0; cur_fmt < sizeof(fmt_arr) / sizeof(char *);
        ++cur_fmt) {
-    TestResult result = test_vals(fmt_arr[cur_fmt], num, prec, width);
+    int fmt_len = simple_strlen(fmt_arr[cur_fmt]);
+    TestResult result;
+    if (fmt_arr[cur_fmt][fmt_len - 2] == 'L') {
+      result = test_vals<long double>(fmt_arr[cur_fmt], ld_num, prec, width);
+    } else {
+      result = test_vals<double>(fmt_arr[cur_fmt], num, prec, width);
+    }
     if (result != TestResult::Success) {
       __builtin_trap();
     }
diff --git a/libc/src/stdio/printf_core/float_hex_converter.h b/libc/src/stdio/printf_core/float_hex_converter.h
index e264af9844bd244..6a980a74d4a6f2f 100644
--- a/libc/src/stdio/printf_core/float_hex_converter.h
+++ b/libc/src/stdio/printf_core/float_hex_converter.h
@@ -75,8 +75,9 @@ LIBC_INLINE int convert_float_hex_exp(Writer *writer,
 
   // This is to handle situations where the mantissa isn't an even number of hex
   // digits. This is primarily relevant for x86 80 bit long doubles, which have
-  // 63 bit mantissas.
-  if (mantissa_width % BITS_IN_HEX_DIGIT != 0) {
+  // 63 bit mantissas. In the case where the mantissa is 0, however, the
+  // exponent should stay as 0.
+  if (mantissa_width % BITS_IN_HEX_DIGIT != 0 && mantissa > 0) {
     exponent -= mantissa_width % BITS_IN_HEX_DIGIT;
   }
 
diff --git a/libc/test/src/stdio/sprintf_test.cpp b/libc/test/src/stdio/sprintf_test.cpp
index b7e8b7548588107..f3d5dd698cbea44 100644
--- a/libc/test/src/stdio/sprintf_test.cpp
+++ b/libc/test/src/stdio/sprintf_test.cpp
@@ -748,6 +748,9 @@ TEST_F(LlvmLibcSPrintfTest, FloatHexExpConv) {
   written = LIBC_NAMESPACE::sprintf(buff, "%.5a", nan);
   ASSERT_STREQ_LEN(written, buff, "nan");
 
+  written = LIBC_NAMESPACE::sprintf(buff, "%La", 0.0L);
+  ASSERT_STREQ_LEN(written, buff, "0x0p+0");
+
   written = LIBC_NAMESPACE::sprintf(buff, "%.1La", 0.1L);
 #if defined(SPECIAL_X86_LONG_DOUBLE)
   ASSERT_STREQ_LEN(written, buff, "0xc.dp-7");