[libc-commits] [libc] ce4d671 - [libc][NFC] make atoi undefined cases match std

Michael Jones via libc-commits libc-commits at lists.llvm.org
Tue Dec 20 10:21:30 PST 2022


Author: Michael Jones
Date: 2022-12-20T10:21:25-08:00
New Revision: ce4d6717f85eeddd0b374d2829f46548972c5d4e

URL: https://github.com/llvm/llvm-project/commit/ce4d6717f85eeddd0b374d2829f46548972c5d4e
DIFF: https://github.com/llvm/llvm-project/commit/ce4d6717f85eeddd0b374d2829f46548972c5d4e.diff

LOG: [libc][NFC] make atoi undefined cases match std

The standard describes atoi as:

"equivalent to atoi: (int)strtol(nptr, (char **)NULL, 10)"

Previously, our behavior was slightly different on numbers larger than
INT_MAX, but this patch changes it to just do the cast instead. Both of
these are valid since the standard says

"If the value of the result cannot be represented, the
behavior is undefined."

But matching existing behavior makes differential fuzzing easier.

Reviewed By: sivachandra

Differential Revision: https://reviews.llvm.org/D140350

Added: 
    

Modified: 
    libc/src/stdlib/atoi.cpp
    libc/test/src/stdlib/AtoiTest.h

Removed: 
    


################################################################################
diff  --git a/libc/src/stdlib/atoi.cpp b/libc/src/stdlib/atoi.cpp
index f61e365ac1053..d9bc866009a8e 100644
--- a/libc/src/stdlib/atoi.cpp
+++ b/libc/src/stdlib/atoi.cpp
@@ -13,11 +13,13 @@
 namespace __llvm_libc {
 
 LLVM_LIBC_FUNCTION(int, atoi, (const char *str)) {
-  auto result = internal::strtointeger<int>(str, 10);
+  // This is done because the standard specifies that atoi is identical to
+  // (int)(strtol).
+  auto result = internal::strtointeger<long>(str, 10);
   if (result.has_error())
     errno = result.error;
 
-  return result;
+  return static_cast<int>(result);
 }
 
 } // namespace __llvm_libc

diff  --git a/libc/test/src/stdlib/AtoiTest.h b/libc/test/src/stdlib/AtoiTest.h
index 4013800f42490..db48945e3cd93 100644
--- a/libc/test/src/stdlib/AtoiTest.h
+++ b/libc/test/src/stdlib/AtoiTest.h
@@ -6,10 +6,13 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "src/__support/CPP/type_traits.h"
 #include "utils/UnitTest/Test.h"
 
 #include <limits.h>
 
+using __llvm_libc::cpp::is_same_v;
+
 template <typename ReturnT> struct AtoTest : public __llvm_libc::testing::Test {
   using FunctionT = ReturnT (*)(const char *);
 
@@ -54,6 +57,26 @@ template <typename ReturnT> struct AtoTest : public __llvm_libc::testing::Test {
       const char *smallest_long_long = "-9223372036854775808";
       ASSERT_EQ(func(smallest_long_long), static_cast<ReturnT>(LLONG_MIN));
     }
+
+    // If this is atoi and the size of int is less than the size of long, then
+    // we parse as long and cast to int to match existing behavior. This only
+    // matters for cases where the result would be outside of the int range, and
+    // those cases are undefined, so we can choose whatever output value we
+    // want. In this case we have chosen to cast since that matches existing
+    // implementations and makes 
diff erential fuzzing easier, but no user should
+    // rely on this behavior.
+    if constexpr (is_same_v<ReturnT, int> && sizeof(ReturnT) < sizeof(long)) {
+
+      static_assert(sizeof(int) == 4);
+
+      const char *bigger_than_biggest_int = "2147483649";
+      ASSERT_EQ(func(bigger_than_biggest_int),
+                static_cast<ReturnT>(2147483649));
+
+      const char *smaller_than_smallest_int = "-2147483649";
+      ASSERT_EQ(func(smaller_than_smallest_int),
+                static_cast<ReturnT>(-2147483649));
+    }
   }
 
   void nonBaseTenWholeNumbers(FunctionT func) {


        


More information about the libc-commits mailing list