[libc-commits] [libc] 652ecb2 - [libc] add printf hex conversion

Thu Jun 16 09:51:13 PDT 2022

Author: Michael Jones
Date: 2022-06-16T09:51:09-07:00
New Revision: 652ecb251ec9fa65bc506c8934271ef231872b53

URL: https://github.com/llvm/llvm-project/commit/652ecb251ec9fa65bc506c8934271ef231872b53
DIFF: https://github.com/llvm/llvm-project/commit/652ecb251ec9fa65bc506c8934271ef231872b53.diff

LOG: [libc] add printf hex conversion

The hex converter handles the %x and %X conversions.

Reviewed By: sivachandra

Differential Revision: https://reviews.llvm.org/D126082

Added: 
    libc/src/stdio/printf_core/converter_utils.h
    libc/src/stdio/printf_core/hex_converter.h

Modified: 
    libc/src/stdio/printf_core/CMakeLists.txt
    libc/src/stdio/printf_core/char_converter.h
    libc/src/stdio/printf_core/converter.cpp
    libc/src/stdio/printf_core/converter_atlas.h
    libc/src/stdio/printf_core/core_structs.h
    libc/src/stdio/printf_core/int_converter.h
    libc/src/stdio/printf_core/string_converter.h
    libc/test/src/stdio/printf_core/converter_test.cpp
    libc/test/src/stdio/sprintf_test.cpp

Removed: 
    


################################################################################
diff  --git a/libc/src/stdio/printf_core/CMakeLists.txt b/libc/src/stdio/printf_core/CMakeLists.txt
index 71f7cbbb467da..f39fa2965de0c 100644

--- a/libc/src/stdio/printf_core/CMakeLists.txt
+++ b/libc/src/stdio/printf_core/CMakeLists.txt
@@ -57,9 +57,11 @@ add_object_library(
   HDRS
     converter.h
     converter_atlas.h
+    converter_utils.h
     string_converter.h
     char_converter.h
     int_converter.h
+    hex_converter.h
   DEPENDS
     .writer
     .core_structs

diff  --git a/libc/src/stdio/printf_core/char_converter.h b/libc/src/stdio/printf_core/char_converter.h
index bc74156b14491..73af2a471be0b 100644
--- a/libc/src/stdio/printf_core/char_converter.h
+++ b/libc/src/stdio/printf_core/char_converter.h
@@ -9,6 +9,7 @@
 #ifndef LLVM_LIBC_SRC_STDIO_PRINTF_CORE_CHAR_CONVERTER_H
 #define LLVM_LIBC_SRC_STDIO_PRINTF_CORE_CHAR_CONVERTER_H
 
+#include "src/stdio/printf_core/converter_utils.h"
 #include "src/stdio/printf_core/core_structs.h"
 #include "src/stdio/printf_core/writer.h"
 

diff  --git a/libc/src/stdio/printf_core/converter.cpp b/libc/src/stdio/printf_core/converter.cpp
index 90390424e826f..79e43d53cbedf 100644
--- a/libc/src/stdio/printf_core/converter.cpp
+++ b/libc/src/stdio/printf_core/converter.cpp
@@ -43,7 +43,7 @@ int convert(Writer *writer, const FormatSection &to_conv) {
     // return convert_oct(writer, to_conv);
   case 'x':
   case 'X':
-    // return convert_hex(writer, to_conv);
+    return convert_hex(writer, to_conv);
   // TODO(michaelrj): add a flag to disable float point values here
   case 'f':
   case 'F':

diff  --git a/libc/src/stdio/printf_core/converter_atlas.h b/libc/src/stdio/printf_core/converter_atlas.h
index f140ce1ab4c1d..144be3383334a 100644
--- a/libc/src/stdio/printf_core/converter_atlas.h
+++ b/libc/src/stdio/printf_core/converter_atlas.h
@@ -24,6 +24,7 @@
 
 // defines convert_oct
 // defines convert_hex
+#include "src/stdio/printf_core/hex_converter.h"
 
 // TODO(michaelrj): add a flag to disable float point values here
 // defines convert_float_decimal

diff  --git a/libc/src/stdio/printf_core/converter_utils.h b/libc/src/stdio/printf_core/converter_utils.h
new file mode 100644
index 0000000000000..85895f1ac5b8c
--- /dev/null
+++ b/libc/src/stdio/printf_core/converter_utils.h
@@ -0,0 +1,56 @@
+//===-- Shared Converter Utilities for printf -------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_STDIO_PRINTF_CORE_CONVERTER_UTILS_H
+#define LLVM_LIBC_SRC_STDIO_PRINTF_CORE_CONVERTER_UTILS_H
+
+#include "src/__support/CPP/Limits.h"
+#include "src/stdio/printf_core/core_structs.h"
+
+#include <inttypes.h>
+#include <stddef.h>
+
+namespace __llvm_libc {
+namespace printf_core {
+
+inline uintmax_t apply_length_modifier(uintmax_t num, LengthModifier lm) {
+  switch (lm) {
+  case LengthModifier::none:
+    return num & cpp::NumericLimits<unsigned int>::max();
+  case LengthModifier::l:
+    return num & cpp::NumericLimits<unsigned long>::max();
+  case LengthModifier::ll:
+  case LengthModifier::L:
+    return num & cpp::NumericLimits<unsigned long long>::max();
+  case LengthModifier::h:
+    return num & cpp::NumericLimits<unsigned short>::max();
+  case LengthModifier::hh:
+    return num & cpp::NumericLimits<unsigned char>::max();
+  case LengthModifier::z:
+    return num & cpp::NumericLimits<size_t>::max();
+  case LengthModifier::t:
+    // We don't have unsigned ptr
diff  so uintptr_t is used, since we need an
+    // unsigned type and ptr
diff  is usually the same size as a pointer.
+    static_assert(sizeof(ptr
diff _t) == sizeof(uintptr_t));
+    return num & cpp::NumericLimits<uintptr_t>::max();
+  case LengthModifier::j:
+    return num; // j is intmax, so no mask is necessary.
+  }
+}
+
+#define RET_IF_RESULT_NEGATIVE(func)                                           \
+  {                                                                            \
+    int result = (func);                                                       \
+    if (result < 0)                                                            \
+      return result;                                                           \
+  }
+
+} // namespace printf_core
+} // namespace __llvm_libc
+
+#endif // LLVM_LIBC_SRC_STDIO_PRINTF_CORE_CONVERTER_UTILS_H

diff  --git a/libc/src/stdio/printf_core/core_structs.h b/libc/src/stdio/printf_core/core_structs.h
index 93c92f1fcf3d7..9ecc25bb3ea43 100644
--- a/libc/src/stdio/printf_core/core_structs.h
+++ b/libc/src/stdio/printf_core/core_structs.h
@@ -78,14 +78,6 @@ struct FormatSection {
     return true;
   }
 };
-
-#define RET_IF_RESULT_NEGATIVE(func)                                           \
-  {                                                                            \
-    int result = (func);                                                       \
-    if (result < 0)                                                            \
-      return result;                                                           \
-  }
-
 } // namespace printf_core
 } // namespace __llvm_libc
 

diff  --git a/libc/src/stdio/printf_core/hex_converter.h b/libc/src/stdio/printf_core/hex_converter.h
new file mode 100644
index 0000000000000..4c8239cefb63a
--- /dev/null
+++ b/libc/src/stdio/printf_core/hex_converter.h
@@ -0,0 +1,133 @@
+//===-- Hexadecimal Converter for printf ------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_STDIO_PRINTF_CORE_HEX_CONVERTER_H
+#define LLVM_LIBC_SRC_STDIO_PRINTF_CORE_HEX_CONVERTER_H
+
+#include "src/stdio/printf_core/converter_utils.h"
+#include "src/stdio/printf_core/core_structs.h"
+#include "src/stdio/printf_core/writer.h"
+
+#include <inttypes.h>
+#include <stddef.h>
+
+namespace __llvm_libc {
+namespace printf_core {
+
+int convert_hex(Writer *writer, const FormatSection &to_conv) {
+  // This approximates the number of digits it takes to represent a hexadecimal
+  // value of a certain number of bits. Each hex digit represents 4 bits, so the
+  // exact value is the number of bytes multiplied by 2.
+  static constexpr size_t BUFF_LEN = sizeof(uintmax_t) * 2;
+  uintmax_t num = to_conv.conv_val_raw;
+  char buffer[BUFF_LEN];
+
+  // All of the characters will be defined relative to variable a, which will be
+  // the appropriate case based on the name of the conversion.
+  char a;
+  if (to_conv.conv_name == 'x')
+    a = 'a';
+  else
+    a = 'A';
+
+  num = apply_length_modifier(num, to_conv.length_modifier);
+
+  // buff_cur can never reach 0, since the buffer is sized to always be able to
+  // contain the whole integer. This means that bounds checking it should be
+  // unnecessary.
+  size_t buff_cur = BUFF_LEN;
+  for (; num > 0 /* && buff_cur > 0 */; --buff_cur, num /= 16)
+    buffer[buff_cur - 1] =
+        ((num % 16) > 9) ? ((num % 16) - 10 + a) : ((num % 16) + '0');
+
+  size_t digits_written = BUFF_LEN - buff_cur;
+
+  // these are signed to prevent underflow due to negative values. The eventual
+  // values will always be non-negative.
+  int zeroes;
+  int spaces;
+
+  // prefix is "0x"
+  int prefix_len;
+  char prefix[2];
+  if ((to_conv.flags & FormatFlags::ALTERNATE_FORM) ==
+      FormatFlags::ALTERNATE_FORM) {
+    prefix_len = 2;
+    prefix[0] = '0';
+    prefix[1] = a + ('x' - 'a');
+  } else {
+    prefix_len = 0;
+    prefix[0] = 0;
+  }
+
+  // negative precision indicates that it was not specified.
+  if (to_conv.precision < 0) {
+    if ((to_conv.flags &
+         (FormatFlags::LEADING_ZEROES | FormatFlags::LEFT_JUSTIFIED)) ==
+        FormatFlags::LEADING_ZEROES) {
+      // if this conv has flag 0 but not - and no specified precision, it's
+      // padded with 0's instead of spaces identically to if precision =
+      // min_width - (2 if prefix). For example: ("%#04x", 15) -> "0x0f"
+      zeroes = to_conv.min_width - digits_written - prefix_len;
+      if (zeroes < 0)
+        zeroes = 0;
+      spaces = 0;
+    } else if (digits_written < 1) {
+      // if no precision is specified, precision defaults to 1. This means that
+      // if the integer passed to the conversion is 0, a 0 will be printed.
+      // Example: ("%3x", 0) -> "  0"
+      zeroes = 1;
+      spaces = to_conv.min_width - zeroes - prefix_len;
+    } else {
+      // If there are enough digits to pass over the precision, just write the
+      // number, padded by spaces.
+      zeroes = 0;
+      spaces = to_conv.min_width - digits_written - prefix_len;
+    }
+  } else {
+    // if precision was specified, possibly write zeroes, and possibly write
+    // spaces. Example: ("%5.4x", 0x10000) -> "10000"
+    // If the check for if zeroes is negative was not there, spaces would be
+    // incorrectly evaluated as 1.
+    zeroes = to_conv.precision - digits_written; // a negative value means 0
+    if (zeroes < 0)
+      zeroes = 0;
+    spaces = to_conv.min_width - zeroes - digits_written - prefix_len;
+  }
+  if (spaces < 0)
+    spaces = 0;
+
+  if ((to_conv.flags & FormatFlags::LEFT_JUSTIFIED) ==
+      FormatFlags::LEFT_JUSTIFIED) {
+    // if left justified it goes prefix zeroes digits spaces
+    if (prefix[0] != 0)
+      RET_IF_RESULT_NEGATIVE(writer->write(prefix, 2));
+    if (zeroes > 0)
+      RET_IF_RESULT_NEGATIVE(writer->write_chars('0', zeroes));
+    if (digits_written > 0)
+      RET_IF_RESULT_NEGATIVE(writer->write(buffer + buff_cur, digits_written));
+    if (spaces > 0)
+      RET_IF_RESULT_NEGATIVE(writer->write_chars(' ', spaces));
+  } else {
+    // else it goes spaces prefix zeroes digits
+    if (spaces > 0)
+      RET_IF_RESULT_NEGATIVE(writer->write_chars(' ', spaces));
+    if (prefix[0] != 0)
+      RET_IF_RESULT_NEGATIVE(writer->write(prefix, 2));
+    if (zeroes > 0)
+      RET_IF_RESULT_NEGATIVE(writer->write_chars('0', zeroes));
+    if (digits_written > 0)
+      RET_IF_RESULT_NEGATIVE(writer->write(buffer + buff_cur, digits_written));
+  }
+  return 0;
+}
+
+} // namespace printf_core
+} // namespace __llvm_libc
+
+#endif // LLVM_LIBC_SRC_STDIO_PRINTF_CORE_HEX_CONVERTER_H

diff  --git a/libc/src/stdio/printf_core/int_converter.h b/libc/src/stdio/printf_core/int_converter.h
index ad2dad6b7284c..33234c0f372ac 100644
--- a/libc/src/stdio/printf_core/int_converter.h
+++ b/libc/src/stdio/printf_core/int_converter.h
@@ -9,7 +9,7 @@
 #ifndef LLVM_LIBC_SRC_STDIO_PRINTF_CORE_INT_CONVERTER_H
 #define LLVM_LIBC_SRC_STDIO_PRINTF_CORE_INT_CONVERTER_H
 
-#include "src/__support/CPP/Limits.h"
+#include "src/stdio/printf_core/converter_utils.h"
 #include "src/stdio/printf_core/core_structs.h"
 #include "src/stdio/printf_core/writer.h"
 
@@ -52,37 +52,7 @@ int inline convert_int(Writer *writer, const FormatSection &to_conv) {
     }
   }
 
-  switch (to_conv.length_modifier) {
-  case LengthModifier::none:
-    num = num & cpp::NumericLimits<unsigned int>::max();
-    break;
-
-  case LengthModifier::l:
-    num = num & cpp::NumericLimits<unsigned long>::max();
-    break;
-  case LengthModifier::ll:
-  case LengthModifier::L:
-    num = num & cpp::NumericLimits<unsigned long long>::max();
-    break;
-  case LengthModifier::h:
-    num = num & cpp::NumericLimits<unsigned short>::max();
-    break;
-  case LengthModifier::hh:
-    num = num & cpp::NumericLimits<unsigned char>::max();
-    break;
-  case LengthModifier::z:
-    num = num & cpp::NumericLimits<size_t>::max();
-    break;
-  case LengthModifier::t:
-    // We don't have unsigned ptr
diff  so uintptr_t is used, since we need an
-    // unsigned type and ptr
diff  is usually the same size as a pointer.
-    static_assert(sizeof(ptr
diff _t) == sizeof(uintptr_t));
-    num = num & cpp::NumericLimits<uintptr_t>::max();
-    break;
-  case LengthModifier::j:
-    // j is intmax, so no mask is necessary.
-    break;
-  }
+  num = apply_length_modifier(num, to_conv.length_modifier);
 
   // buff_cur can never reach 0, since the buffer is sized to always be able to
   // contain the whole integer. This means that bounds checking it should be

diff  --git a/libc/src/stdio/printf_core/string_converter.h b/libc/src/stdio/printf_core/string_converter.h
index 888854d44853e..267310701e6c8 100644
--- a/libc/src/stdio/printf_core/string_converter.h
+++ b/libc/src/stdio/printf_core/string_converter.h
@@ -9,6 +9,7 @@
 #ifndef LLVM_LIBC_SRC_STDIO_PRINTF_CORE_STRING_CONVERTER_H
 #define LLVM_LIBC_SRC_STDIO_PRINTF_CORE_STRING_CONVERTER_H
 
+#include "src/stdio/printf_core/converter_utils.h"
 #include "src/stdio/printf_core/core_structs.h"
 #include "src/stdio/printf_core/writer.h"
 

diff  --git a/libc/test/src/stdio/printf_core/converter_test.cpp b/libc/test/src/stdio/printf_core/converter_test.cpp
index 2e95d60d71a9e..d0025618ca314 100644
--- a/libc/test/src/stdio/printf_core/converter_test.cpp
+++ b/libc/test/src/stdio/printf_core/converter_test.cpp
@@ -195,3 +195,31 @@ TEST_F(LlvmLibcPrintfConverterTest, IntConversionSimple) {
   ASSERT_STREQ(str, "12345");
   ASSERT_EQ(writer.get_chars_written(), 5);
 }
+
+// This needs to be switched to the new testing layout, but that's still in the
+// int patch so I need to land that first. This is what I get for not keeping my
+// patches small and focused.
+TEST(LlvmLibcPrintfConverterTest, HexConversion) {
+  char str[20];
+  __llvm_libc::printf_core::StringWriter str_writer(str);
+  __llvm_libc::printf_core::Writer writer(
+      reinterpret_cast<void *>(&str_writer),
+      __llvm_libc::printf_core::write_to_string);
+
+  __llvm_libc::printf_core::FormatSection left_justified_conv;
+  left_justified_conv.has_conv = true;
+  left_justified_conv.raw_string = "%#018x";
+  left_justified_conv.raw_len = 6;
+  left_justified_conv.conv_name = 'x';
+  left_justified_conv.flags =
+      static_cast<__llvm_libc::printf_core::FormatFlags>(
+          __llvm_libc::printf_core::FormatFlags::ALTERNATE_FORM |
+          __llvm_libc::printf_core::FormatFlags::LEADING_ZEROES);
+  left_justified_conv.min_width = 18;
+  left_justified_conv.conv_val_raw = 0x123456ab;
+  __llvm_libc::printf_core::convert(&writer, left_justified_conv);
+
+  str_writer.terminate();
+  ASSERT_STREQ(str, "0x00000000123456ab");
+  ASSERT_EQ(writer.get_chars_written(), 18);
+}

diff  --git a/libc/test/src/stdio/sprintf_test.cpp b/libc/test/src/stdio/sprintf_test.cpp
index 38407de9b0f13..974674eb2ea44 100644
--- a/libc/test/src/stdio/sprintf_test.cpp
+++ b/libc/test/src/stdio/sprintf_test.cpp
@@ -221,6 +221,131 @@ TEST(LlvmLibcSPrintfTest, IntConv) {
   ASSERT_STREQ(buff, " 0127 68719476736 +002  ");
 }
 
+TEST(LlvmLibcSPrintfTest, HexConv) {
+  char buff[64];
+  int written;
+
+  // Basic Tests.
+
+  written = __llvm_libc::sprintf(buff, "%x", 0x123a);
+  EXPECT_EQ(written, 4);
+  ASSERT_STREQ(buff, "123a");
+
+  written = __llvm_libc::sprintf(buff, "%X", 0x456b);
+  EXPECT_EQ(written, 4);
+  ASSERT_STREQ(buff, "456B");
+
+  // Length Modifier Tests.
+
+  written = __llvm_libc::sprintf(buff, "%hhx", 0x10001);
+  EXPECT_EQ(written, 1);
+  ASSERT_STREQ(buff, "1");
+
+  written = __llvm_libc::sprintf(buff, "%llx", 0xffffffffffffffffull);
+  EXPECT_EQ(written, 16);
+  ASSERT_STREQ(buff, "ffffffffffffffff"); // ull max
+
+  written = __llvm_libc::sprintf(buff, "%tX", ~ptr
diff _t(0));
+  if (sizeof(ptr
diff _t) == 8) {
+    EXPECT_EQ(written, 16);
+    ASSERT_STREQ(buff, "FFFFFFFFFFFFFFFF");
+  } else if (sizeof(ptr
diff _t) == 4) {
+    EXPECT_EQ(written, 8);
+    ASSERT_STREQ(buff, "FFFFFFFF");
+  }
+
+  // Min Width Tests.
+
+  written = __llvm_libc::sprintf(buff, "%4x", 0x789);
+  EXPECT_EQ(written, 4);
+  ASSERT_STREQ(buff, " 789");
+
+  written = __llvm_libc::sprintf(buff, "%2X", 0x987);
+  EXPECT_EQ(written, 3);
+  ASSERT_STREQ(buff, "987");
+
+  // Precision Tests.
+
+  written = __llvm_libc::sprintf(buff, "%x", 0);
+  EXPECT_EQ(written, 1);
+  ASSERT_STREQ(buff, "0");
+
+  written = __llvm_libc::sprintf(buff, "%.0x", 0);
+  EXPECT_EQ(written, 0);
+  ASSERT_STREQ(buff, "");
+
+  written = __llvm_libc::sprintf(buff, "%.5x", 0x1F3);
+  EXPECT_EQ(written, 5);
+  ASSERT_STREQ(buff, "001f3");
+
+  written = __llvm_libc::sprintf(buff, "%.2x", 0x135);
+  EXPECT_EQ(written, 3);
+  ASSERT_STREQ(buff, "135");
+
+  // Flag Tests.
+
+  written = __llvm_libc::sprintf(buff, "%-5x", 0x246);
+  EXPECT_EQ(written, 5);
+  ASSERT_STREQ(buff, "246  ");
+
+  written = __llvm_libc::sprintf(buff, "%#x", 0xd3f);
+  EXPECT_EQ(written, 5);
+  ASSERT_STREQ(buff, "0xd3f");
+
+  written = __llvm_libc::sprintf(buff, "%#X", 0xE40);
+  EXPECT_EQ(written, 5);
+  ASSERT_STREQ(buff, "0XE40");
+
+  written = __llvm_libc::sprintf(buff, "%05x", 0x470);
+  EXPECT_EQ(written, 5);
+  ASSERT_STREQ(buff, "00470");
+
+  written = __llvm_libc::sprintf(buff, "%0#6x", 0x8c3);
+  EXPECT_EQ(written, 6);
+  ASSERT_STREQ(buff, "0x08c3");
+
+  written = __llvm_libc::sprintf(buff, "%-#6x", 0x5f0);
+  EXPECT_EQ(written, 6);
+  ASSERT_STREQ(buff, "0x5f0 ");
+
+  // Combined Tests.
+
+  written = __llvm_libc::sprintf(buff, "%#-07x", 0x703);
+  EXPECT_EQ(written, 7);
+  ASSERT_STREQ(buff, "0x703  ");
+
+  written = __llvm_libc::sprintf(buff, "%7.5x", 0x814);
+  EXPECT_EQ(written, 7);
+  ASSERT_STREQ(buff, "  00814");
+
+  written = __llvm_libc::sprintf(buff, "%#9.5X", 0x9d4);
+  EXPECT_EQ(written, 9);
+  ASSERT_STREQ(buff, "  0X009D4");
+
+  written = __llvm_libc::sprintf(buff, "%-7.5x", 0x260);
+  EXPECT_EQ(written, 7);
+  ASSERT_STREQ(buff, "00260  ");
+
+  written = __llvm_libc::sprintf(buff, "%5.4x", 0x10000);
+  EXPECT_EQ(written, 5);
+  ASSERT_STREQ(buff, "10000");
+
+  // Multiple Conversion Tests.
+
+  written = __llvm_libc::sprintf(buff, "%10X %-#10x", 0x45b, 0x789);
+  EXPECT_EQ(written, 21);
+  ASSERT_STREQ(buff, "       45B 0x789     ");
+
+  written = __llvm_libc::sprintf(buff, "%-5.4x%#.4x", 0x75, 0x25);
+  EXPECT_EQ(written, 11);
+  ASSERT_STREQ(buff, "0075 0x0025");
+
+  written = __llvm_libc::sprintf(buff, "%04hhX %#.5llx %-6.3zX", 256 + 0x7f,
+                                 0x1000000000ll, size_t(2));
+  EXPECT_EQ(written, 24);
+  ASSERT_STREQ(buff, "007F 0x1000000000 002   ");
+}
+
 #ifndef LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE
 TEST(LlvmLibcSPrintfTest, IndexModeParsing) {
   char buff[64];