[libc-commits] [libc] bf279f9 - [libc] Prevent printf index mode crashes

Wed Feb 22 16:27:30 PST 2023

Author: Michael Jones
Date: 2023-02-22T16:27:26-08:00
New Revision: bf279f903b3f0e952d06af87d679ca7ae65234e3

URL: https://github.com/llvm/llvm-project/commit/bf279f903b3f0e952d06af87d679ca7ae65234e3
DIFF: https://github.com/llvm/llvm-project/commit/bf279f903b3f0e952d06af87d679ca7ae65234e3.diff

LOG: [libc] Prevent printf index mode crashes

The posix standard defines an alternate mode for printf where the
conversions also have an index that describes which argument to select.
Due to how variadic arguments work in C, to reach the nth argument all
n-1 previous arguments must be read with their correct types. If the
format string does not specify the types for a continuous set of
arguments, then the arguments after the discontinuity cannot be safely
read. This patch causes all conversions requesting an argument that
comes after a gap be treated as raw (i.e. the conversion string is
printed literally).

Reviewed By: sivachandra

Differential Revision: https://reviews.llvm.org/D143782

Added: 
    

Modified: 
    libc/src/stdio/printf_core/core_structs.h
    libc/src/stdio/printf_core/parser.cpp
    libc/src/stdio/printf_core/parser.h
    libc/test/src/stdio/printf_core/parser_test.cpp

Removed: 
    


################################################################################
diff  --git a/libc/src/stdio/printf_core/core_structs.h b/libc/src/stdio/printf_core/core_structs.h
index 47d488cd8cf46..771d5a8cff437 100644

--- a/libc/src/stdio/printf_core/core_structs.h
+++ b/libc/src/stdio/printf_core/core_structs.h
@@ -77,7 +77,7 @@ struct FormatSection {
   }
 };
 
-enum PrimaryType : uint8_t { Integer = 0, Float = 1, Pointer = 2 };
+enum PrimaryType : uint8_t { Unknown = 0, Float = 1, Pointer = 2, Integer = 3 };
 
 // TypeDesc stores the information about a type that is relevant to printf in
 // a relatively compact manner.
@@ -91,7 +91,7 @@ struct TypeDesc {
 
 template <typename T> LIBC_INLINE constexpr TypeDesc type_desc_from_type() {
   if constexpr (cpp::is_same_v<T, void>) {
-    return TypeDesc{0, PrimaryType::Integer};
+    return TypeDesc{0, PrimaryType::Unknown};
   } else {
     constexpr bool isPointer = cpp::is_pointer_v<T>;
     constexpr bool isFloat = cpp::is_floating_point_v<T>;

diff  --git a/libc/src/stdio/printf_core/parser.cpp b/libc/src/stdio/printf_core/parser.cpp
index cef335292c3bd..85b38da3db8bf 100644
--- a/libc/src/stdio/printf_core/parser.cpp
+++ b/libc/src/stdio/printf_core/parser.cpp
@@ -13,7 +13,9 @@
 #include "src/__support/arg_list.h"
 
 #include "src/__support/CPP/bit.h"
+#include "src/__support/CPP/optional.h"
 #include "src/__support/CPP/string_view.h"
+#include "src/__support/CPP/type_traits.h"
 #include "src/__support/FPUtil/FPBits.h"
 #include "src/__support/ctype_utils.h"
 #include "src/__support/str_to_integer.h"
@@ -22,10 +24,29 @@
 namespace __llvm_libc {
 namespace printf_core {
 
+template <typename T> struct int_type_of {
+  using type = T;
+};
+template <> struct int_type_of<double> {
+  using type = fputil::FPBits<double>::UIntType;
+};
+template <> struct int_type_of<long double> {
+  using type = fputil::FPBits<long double>::UIntType;
+};
+template <typename T> using int_type_of_v = typename int_type_of<T>::type;
+
 #ifndef LIBC_COPT_PRINTF_DISABLE_INDEX_MODE
-#define GET_ARG_VAL_SIMPLEST(arg_type, index) get_arg_value<arg_type>(index)
+#define WRITE_ARG_VAL_SIMPLEST(dst, arg_type, index)                           \
+  {                                                                            \
+    auto temp = get_arg_value<arg_type>(index);                                \
+    if (!temp.has_value()) {                                                   \
+      section.has_conv = false;                                                \
+    }                                                                          \
+    dst = cpp::bit_cast<int_type_of_v<arg_type>>(temp.value());                \
+  }
 #else
-#define GET_ARG_VAL_SIMPLEST(arg_type, _) get_next_arg_value<arg_type>()
+#define WRITE_ARG_VAL_SIMPLEST(dst, arg_type, _)                               \
+  dst = cpp::bit_cast<int_type_of_v<arg_type>>(get_next_arg_value<arg_type>())
 #endif // LIBC_COPT_PRINTF_DISABLE_INDEX_MODE
 
 FormatSection Parser::get_next_section() {
@@ -49,7 +70,7 @@ FormatSection Parser::get_next_section() {
     if (str[cur_pos] == '*') {
       ++cur_pos;
 
-      section.min_width = GET_ARG_VAL_SIMPLEST(int, parse_index(&cur_pos));
+      WRITE_ARG_VAL_SIMPLEST(section.min_width, int, parse_index(&cur_pos));
     } else if (internal::isdigit(str[cur_pos])) {
       auto result = internal::strtointeger<int>(str + cur_pos, 10);
       section.min_width = result.value;
@@ -70,7 +91,7 @@ FormatSection Parser::get_next_section() {
       if (str[cur_pos] == '*') {
         ++cur_pos;
 
-        section.precision = GET_ARG_VAL_SIMPLEST(int, parse_index(&cur_pos));
+        WRITE_ARG_VAL_SIMPLEST(section.precision, int, parse_index(&cur_pos));
 
       } else if (internal::isdigit(str[cur_pos])) {
         auto result = internal::strtointeger<int>(str + cur_pos, 10);
@@ -87,7 +108,7 @@ FormatSection Parser::get_next_section() {
     case ('%'):
       break;
     case ('c'):
-      section.conv_val_raw = GET_ARG_VAL_SIMPLEST(int, conv_index);
+      WRITE_ARG_VAL_SIMPLEST(section.conv_val_raw, int, conv_index);
       break;
     case ('d'):
     case ('i'):
@@ -99,24 +120,28 @@ FormatSection Parser::get_next_section() {
       case (LengthModifier::hh):
       case (LengthModifier::h):
       case (LengthModifier::none):
-        section.conv_val_raw = GET_ARG_VAL_SIMPLEST(int, conv_index);
+        WRITE_ARG_VAL_SIMPLEST(section.conv_val_raw, int, conv_index);
         break;
       case (LengthModifier::l):
-        section.conv_val_raw = GET_ARG_VAL_SIMPLEST(long, conv_index);
+        WRITE_ARG_VAL_SIMPLEST(section.conv_val_raw, long, conv_index);
         break;
       case (LengthModifier::ll):
       case (LengthModifier::L): // This isn't in the standard, but is in other
                                 // libc implementations.
-        section.conv_val_raw = GET_ARG_VAL_SIMPLEST(long long, conv_index);
+
+        WRITE_ARG_VAL_SIMPLEST(section.conv_val_raw, long long, conv_index);
         break;
       case (LengthModifier::j):
-        section.conv_val_raw = GET_ARG_VAL_SIMPLEST(intmax_t, conv_index);
+
+        WRITE_ARG_VAL_SIMPLEST(section.conv_val_raw, intmax_t, conv_index);
         break;
       case (LengthModifier::z):
-        section.conv_val_raw = GET_ARG_VAL_SIMPLEST(size_t, conv_index);
+
+        WRITE_ARG_VAL_SIMPLEST(section.conv_val_raw, size_t, conv_index);
         break;
       case (LengthModifier::t):
-        section.conv_val_raw = GET_ARG_VAL_SIMPLEST(ptr
diff _t, conv_index);
+
+        WRITE_ARG_VAL_SIMPLEST(section.conv_val_raw, ptr
diff _t, conv_index);
         break;
       }
       break;
@@ -129,13 +154,11 @@ FormatSection Parser::get_next_section() {
     case ('A'):
     case ('g'):
     case ('G'):
-      if (lm != LengthModifier::L)
-        section.conv_val_raw =
-            cpp::bit_cast<uint64_t>(GET_ARG_VAL_SIMPLEST(double, conv_index));
-      else
-        section.conv_val_raw =
-            cpp::bit_cast<fputil::FPBits<long double>::UIntType>(
-                GET_ARG_VAL_SIMPLEST(long double, conv_index));
+      if (lm != LengthModifier::L) {
+        WRITE_ARG_VAL_SIMPLEST(section.conv_val_raw, double, conv_index);
+      } else {
+        WRITE_ARG_VAL_SIMPLEST(section.conv_val_raw, long double, conv_index);
+      }
       break;
 #endif // LIBC_COPT_PRINTF_DISABLE_FLOAT
 #ifndef LIBC_COPT_PRINTF_DISABLE_WRITE_INT
@@ -143,7 +166,7 @@ FormatSection Parser::get_next_section() {
 #endif // LIBC_COPT_PRINTF_DISABLE_WRITE_INT
     case ('p'):
     case ('s'):
-      section.conv_val_ptr = GET_ARG_VAL_SIMPLEST(void *, conv_index);
+      WRITE_ARG_VAL_SIMPLEST(section.conv_val_ptr, void *, conv_index);
       break;
     default:
       // if the conversion is undefined, change this to a raw section.
@@ -300,7 +323,8 @@ TypeDesc Parser::get_type_desc(size_t index) {
       // has been for skipping past this conversion properly to avoid
       // weirdness with %%.
       if (conv_index == 0) {
-        ++local_pos;
+        if (str[local_pos] != '\0')
+          ++local_pos;
         continue;
       }
 
@@ -380,12 +404,12 @@ TypeDesc Parser::get_type_desc(size_t index) {
       ++local_pos;
   }
 
-  // If there is no size for the requested index, then just guess that it's an
-  // int.
-  return type_desc_from_type<int>();
+  // If there is no size for the requested index, then it's unknown. Return
+  // void.
+  return type_desc_from_type<void>();
 }
 
-void Parser::args_to_index(size_t index) {
+bool Parser::args_to_index(size_t index) {
   if (args_index > index) {
     args_index = 1;
     args_cur = args_start;
@@ -399,6 +423,13 @@ void Parser::args_to_index(size_t index) {
     if (cur_type_desc == type_desc_from_type<void>())
       cur_type_desc = get_type_desc(args_index);
 
+    // A type of void represents the type being unknown. If the type for the
+    // requested index isn't in the desc_arr and isn't found by parsing the
+    // string, then then advancing to the requested index is impossible. In that
+    // case the function returns false.
+    if (cur_type_desc == type_desc_from_type<void>())
+      return false;
+
     if (cur_type_desc == type_desc_from_type<uint32_t>())
       args_cur.next_var<uint32_t>();
     else if (cur_type_desc == type_desc_from_type<uint64_t>())
@@ -418,6 +449,7 @@ void Parser::args_to_index(size_t index) {
 
     ++args_index;
   }
+  return true;
 }
 
 #endif // LIBC_COPT_PRINTF_DISABLE_INDEX_MODE

diff  --git a/libc/src/stdio/printf_core/parser.h b/libc/src/stdio/printf_core/parser.h
index 3aedbe0fac2be..50a1d9df4fcbd 100644
--- a/libc/src/stdio/printf_core/parser.h
+++ b/libc/src/stdio/printf_core/parser.h
@@ -9,6 +9,7 @@
 #ifndef LLVM_LIBC_SRC_STDIO_PRINTF_CORE_PARSER_H
 #define LLVM_LIBC_SRC_STDIO_PRINTF_CORE_PARSER_H
 
+#include "src/__support/CPP/optional.h"
 #include "src/__support/CPP/type_traits.h"
 #include "src/__support/arg_list.h"
 #include "src/__support/common.h"
@@ -47,7 +48,7 @@ class Parser {
   // TypeDesc objects, which store the size as well as minimal type information.
   // This is necessary because some systems separate the floating point and
   // integer values in va_args.
-  TypeDesc desc_arr[DESC_ARR_LEN] = {{0, Integer}};
+  TypeDesc desc_arr[DESC_ARR_LEN] = {type_desc_from_type<void>()};
 
   // TODO: Look into object stores for optimization.
 
@@ -106,10 +107,18 @@ class Parser {
 
   // get_arg_value gets the value from the arg list at index (starting at 1).
   // This may require parsing the format string. An index of 0 is interpreted as
-  // the next value.
-  template <class T> LIBC_INLINE T get_arg_value(size_t index) {
-    if (!(index == 0 || index == args_index))
-      args_to_index(index);
+  // the next value. If the format string is not valid, it may have gaps in its
+  // indexes. Requesting the value for any index after a gap will fail, since
+  // the arg list must be read in order and with the correct types.
+  template <class T> LIBC_INLINE cpp::optional<T> get_arg_value(size_t index) {
+    if (!(index == 0 || index == args_index)) {
+      bool success = args_to_index(index);
+      if (!success) {
+        // If we can't get to this index, then the value of the arg can't be
+        // found.
+        return cpp::optional<T>();
+      }
+    }
 
     set_type_desc(index, type_desc_from_type<T>());
 
@@ -122,7 +131,7 @@ class Parser {
   // It moves cur_args to the index requested so the the appropriate value may
   // be read. This may involve parsing the format string, and is in the worst
   // case an O(n^2) operation.
-  void args_to_index(size_t index);
+  bool args_to_index(size_t index);
 
   // get_type_desc assumes that this format string uses index mode. It iterates
   // through the format string until it finds a format specifier that defines

diff  --git a/libc/test/src/stdio/printf_core/parser_test.cpp b/libc/test/src/stdio/printf_core/parser_test.cpp
index 522c51c574555..b0a1cfc5b8a00 100644
--- a/libc/test/src/stdio/printf_core/parser_test.cpp
+++ b/libc/test/src/stdio/printf_core/parser_test.cpp
@@ -474,4 +474,53 @@ TEST(LlvmLibcPrintfParserTest, IndexModeComplexParsing) {
   EXPECT_PFORMAT_EQ(expected9, format_arr[9]);
 }
 
+TEST(LlvmLibcPrintfParserTest, IndexModeGapCheck) {
+  __llvm_libc::printf_core::FormatSection format_arr[10];
+  const char *str = "%1$d%2$d%4$d";
+  int arg1 = 1;
+  int arg2 = 2;
+  int arg3 = 3;
+  int arg4 = 4;
+
+  evaluate(format_arr, str, arg1, arg2, arg3, arg4);
+
+  __llvm_libc::printf_core::FormatSection expected0, expected1, expected2;
+
+  expected0.has_conv = true;
+  expected0.raw_string = {str, 4};
+  expected0.conv_val_raw = arg1;
+  expected0.conv_name = 'd';
+
+  EXPECT_PFORMAT_EQ(expected0, format_arr[0]);
+
+  expected1.has_conv = true;
+  expected1.raw_string = {str + 4, 4};
+  expected1.conv_val_raw = arg2;
+  expected1.conv_name = 'd';
+
+  EXPECT_PFORMAT_EQ(expected1, format_arr[1]);
+
+  expected2.has_conv = false;
+  expected2.raw_string = {str + 8, 4};
+
+  EXPECT_PFORMAT_EQ(expected2, format_arr[2]);
+}
+
+TEST(LlvmLibcPrintfParserTest, IndexModeTrailingPercentCrash) {
+  __llvm_libc::printf_core::FormatSection format_arr[10];
+  const char *str = "%2$d%";
+  evaluate(format_arr, str, 1, 2);
+
+  __llvm_libc::printf_core::FormatSection expected0, expected1;
+  expected0.has_conv = false;
+
+  expected0.raw_string = {str, 4};
+  EXPECT_PFORMAT_EQ(expected0, format_arr[0]);
+
+  expected1.has_conv = false;
+
+  expected1.raw_string = {str + 4, 1};
+  EXPECT_PFORMAT_EQ(expected1, format_arr[1]);
+}
+
 #endif // LIBC_COPT_PRINTF_DISABLE_INDEX_MODE