[llvm] r282164 - [Support] Add StringRef::consumeInteger.

Zachary Turner via llvm-commits llvm-commits at lists.llvm.org
Thu Sep 22 08:05:19 PDT 2016


Author: zturner
Date: Thu Sep 22 10:05:19 2016
New Revision: 282164

URL: http://llvm.org/viewvc/llvm-project?rev=282164&view=rev
Log:
[Support] Add StringRef::consumeInteger.

StringRef::getInteger() exists and treats the entire string as
an integer of the specified radix, failing if any invalid characters
are encountered or the number overflows.

Sometimes you might have something like "123456foo" and you want
to get the number 123456 and leave the string "foo" remaining.
This is similar to what would be possible by using the standard
runtime library functions strtoul et al and specifying an end
pointer.

This patch adds consumeInteger(), which does exactly that.  It
consumes as much as possible until an invalid character is found,
and modifies the StringRef in place so that upon return only
the portion of the StringRef after the number remains.

Differential Revision: https://reviews.llvm.org/D24778

Modified:
    llvm/trunk/include/llvm/ADT/StringRef.h
    llvm/trunk/lib/Support/StringRef.cpp
    llvm/trunk/unittests/ADT/StringRefTest.cpp

Modified: llvm/trunk/include/llvm/ADT/StringRef.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/ADT/StringRef.h?rev=282164&r1=282163&r2=282164&view=diff
==============================================================================
--- llvm/trunk/include/llvm/ADT/StringRef.h (original)
+++ llvm/trunk/include/llvm/ADT/StringRef.h Thu Sep 22 10:05:19 2016
@@ -32,6 +32,10 @@ namespace llvm {
 
   bool getAsSignedInteger(StringRef Str, unsigned Radix, long long &Result);
 
+  bool consumeUnsignedInteger(StringRef &Str, unsigned Radix,
+                              unsigned long long &Result);
+  bool consumeSignedInteger(StringRef &Str, unsigned Radix, long long &Result);
+
   /// StringRef - Represent a constant reference to a string, i.e. a character
   /// array and a length, which need not be null terminated.
   ///
@@ -395,6 +399,37 @@ namespace llvm {
         return true;
       Result = ULLVal;
       return false;
+    }
+
+    /// Parse the current string as an integer of the specified radix.  If
+    /// \p Radix is specified as zero, this does radix autosensing using
+    /// extended C rules: 0 is octal, 0x is hex, 0b is binary.
+    ///
+    /// If the string does not begin with a number of the specified radix,
+    /// this returns true to signify the error. The string is considered
+    /// erroneous if empty or if it overflows T.
+    /// The portion of the string representing the discovered numeric value
+    /// is removed from the beginning of the string.
+    template <typename T>
+    typename std::enable_if<std::numeric_limits<T>::is_signed, bool>::type
+    consumeInteger(unsigned Radix, T &Result) {
+      long long LLVal;
+      if (consumeSignedInteger(*this, Radix, LLVal) ||
+          static_cast<long long>(static_cast<T>(LLVal)) != LLVal)
+        return true;
+      Result = LLVal;
+      return false;
+    }
+
+    template <typename T>
+    typename std::enable_if<!std::numeric_limits<T>::is_signed, bool>::type
+    consumeInteger(unsigned Radix, T &Result) {
+      unsigned long long ULLVal;
+      if (consumeUnsignedInteger(*this, Radix, ULLVal) ||
+          static_cast<long long>(static_cast<T>(ULLVal)) != ULLVal)
+        return true;
+      Result = ULLVal;
+      return false;
     }
 
     /// Parse the current string as an integer of the specified \p Radix, or of

Modified: llvm/trunk/lib/Support/StringRef.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Support/StringRef.cpp?rev=282164&r1=282163&r2=282164&view=diff
==============================================================================
--- llvm/trunk/lib/Support/StringRef.cpp (original)
+++ llvm/trunk/lib/Support/StringRef.cpp Thu Sep 22 10:05:19 2016
@@ -366,17 +366,16 @@ static unsigned GetAutoSenseRadix(String
     return 8;
   }
 
-  if (Str.startswith("0"))
+  if (Str[0] == '0' && Str.size() > 1 && ascii_isdigit(Str[1])) {
+    Str = Str.substr(1);
     return 8;
-  
+  }
+
   return 10;
 }
 
-
-/// GetAsUnsignedInteger - Workhorse method that converts a integer character
-/// sequence of radix up to 36 to an unsigned long long value.
-bool llvm::getAsUnsignedInteger(StringRef Str, unsigned Radix,
-                                unsigned long long &Result) {
+bool llvm::consumeUnsignedInteger(StringRef &Str, unsigned Radix,
+                                  unsigned long long &Result) {
   // Autosense radix if not specified.
   if (Radix == 0)
     Radix = GetAutoSenseRadix(Str);
@@ -385,44 +384,51 @@ bool llvm::getAsUnsignedInteger(StringRe
   if (Str.empty()) return true;
 
   // Parse all the bytes of the string given this radix.  Watch for overflow.
+  StringRef Str2 = Str;
   Result = 0;
-  while (!Str.empty()) {
+  while (!Str2.empty()) {
     unsigned CharVal;
-    if (Str[0] >= '0' && Str[0] <= '9')
-      CharVal = Str[0]-'0';
-    else if (Str[0] >= 'a' && Str[0] <= 'z')
-      CharVal = Str[0]-'a'+10;
-    else if (Str[0] >= 'A' && Str[0] <= 'Z')
-      CharVal = Str[0]-'A'+10;
+    if (Str2[0] >= '0' && Str2[0] <= '9')
+      CharVal = Str2[0] - '0';
+    else if (Str2[0] >= 'a' && Str2[0] <= 'z')
+      CharVal = Str2[0] - 'a' + 10;
+    else if (Str2[0] >= 'A' && Str2[0] <= 'Z')
+      CharVal = Str2[0] - 'A' + 10;
     else
-      return true;
+      break;
 
-    // If the parsed value is larger than the integer radix, the string is
-    // invalid.
+    // If the parsed value is larger than the integer radix, we cannot
+    // consume any more characters.
     if (CharVal >= Radix)
-      return true;
+      break;
 
     // Add in this character.
     unsigned long long PrevResult = Result;
-    Result = Result*Radix+CharVal;
+    Result = Result * Radix + CharVal;
 
     // Check for overflow by shifting back and seeing if bits were lost.
-    if (Result/Radix < PrevResult)
+    if (Result / Radix < PrevResult)
       return true;
 
-    Str = Str.substr(1);
+    Str2 = Str2.substr(1);
   }
 
+  // We consider the operation a failure if no characters were consumed
+  // successfully.
+  if (Str.size() == Str2.size())
+    return true;
+
+  Str = Str2;
   return false;
 }
 
-bool llvm::getAsSignedInteger(StringRef Str, unsigned Radix,
-                              long long &Result) {
+bool llvm::consumeSignedInteger(StringRef &Str, unsigned Radix,
+                                long long &Result) {
   unsigned long long ULLVal;
 
   // Handle positive strings first.
   if (Str.empty() || Str.front() != '-') {
-    if (getAsUnsignedInteger(Str, Radix, ULLVal) ||
+    if (consumeUnsignedInteger(Str, Radix, ULLVal) ||
         // Check for value so large it overflows a signed value.
         (long long)ULLVal < 0)
       return true;
@@ -431,17 +437,41 @@ bool llvm::getAsSignedInteger(StringRef
   }
 
   // Get the positive part of the value.
-  if (getAsUnsignedInteger(Str.substr(1), Radix, ULLVal) ||
+  StringRef Str2 = Str.drop_front(1);
+  if (consumeUnsignedInteger(Str2, Radix, ULLVal) ||
       // Reject values so large they'd overflow as negative signed, but allow
       // "-0".  This negates the unsigned so that the negative isn't undefined
       // on signed overflow.
       (long long)-ULLVal > 0)
     return true;
 
+  Str = Str2;
   Result = -ULLVal;
   return false;
 }
 
+/// GetAsUnsignedInteger - Workhorse method that converts a integer character
+/// sequence of radix up to 36 to an unsigned long long value.
+bool llvm::getAsUnsignedInteger(StringRef Str, unsigned Radix,
+                                unsigned long long &Result) {
+  if (consumeUnsignedInteger(Str, Radix, Result))
+    return true;
+
+  // For getAsUnsignedInteger, we require the whole string to be consumed or
+  // else we consider it a failure.
+  return !Str.empty();
+}
+
+bool llvm::getAsSignedInteger(StringRef Str, unsigned Radix,
+                              long long &Result) {
+  if (consumeSignedInteger(Str, Radix, Result))
+    return true;
+
+  // For getAsSignedInteger, we require the whole string to be consumed or else
+  // we consider it a failure.
+  return !Str.empty();
+}
+
 bool StringRef::getAsInteger(unsigned Radix, APInt &Result) const {
   StringRef Str = *this;
 

Modified: llvm/trunk/unittests/ADT/StringRefTest.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/unittests/ADT/StringRefTest.cpp?rev=282164&r1=282163&r2=282164&view=diff
==============================================================================
--- llvm/trunk/unittests/ADT/StringRefTest.cpp (original)
+++ llvm/trunk/unittests/ADT/StringRefTest.cpp Thu Sep 22 10:05:19 2016
@@ -590,6 +590,183 @@ TEST(StringRefTest, getAsUnsignedInteger
   }
 }
 
+struct ConsumeUnsignedPair {
+  const char *Str;
+  uint64_t Expected;
+  const char *Leftover;
+} ConsumeUnsigned[] = {
+    {"0", 0, ""},
+    {"255", 255, ""},
+    {"256", 256, ""},
+    {"65535", 65535, ""},
+    {"65536", 65536, ""},
+    {"4294967295", 4294967295ULL, ""},
+    {"4294967296", 4294967296ULL, ""},
+    {"255A376", 255, "A376"},
+    {"18446744073709551615", 18446744073709551615ULL, ""},
+    {"18446744073709551615ABC", 18446744073709551615ULL, "ABC"},
+    {"042", 34, ""},
+    {"0x42", 66, ""},
+    {"0x42-0x34", 66, "-0x34"},
+    {"0b101010", 42, ""},
+    {"0429F", 042, "9F"},            // Auto-sensed octal radix, invalid digit
+    {"0x42G12", 0x42, "G12"},        // Auto-sensed hex radix, invalid digit
+    {"0b10101020101", 42, "20101"}}; // Auto-sensed binary radix, invalid digit.
+
+struct ConsumeSignedPair {
+  const char *Str;
+  int64_t Expected;
+  const char *Leftover;
+} ConsumeSigned[] = {
+    {"0", 0, ""},
+    {"-0", 0, ""},
+    {"0-1", 0, "-1"},
+    {"-0-1", 0, "-1"},
+    {"127", 127, ""},
+    {"128", 128, ""},
+    {"127-1", 127, "-1"},
+    {"128-1", 128, "-1"},
+    {"-128", -128, ""},
+    {"-129", -129, ""},
+    {"-128-1", -128, "-1"},
+    {"-129-1", -129, "-1"},
+    {"32767", 32767, ""},
+    {"32768", 32768, ""},
+    {"32767-1", 32767, "-1"},
+    {"32768-1", 32768, "-1"},
+    {"-32768", -32768, ""},
+    {"-32769", -32769, ""},
+    {"-32768-1", -32768, "-1"},
+    {"-32769-1", -32769, "-1"},
+    {"2147483647", 2147483647LL, ""},
+    {"2147483648", 2147483648LL, ""},
+    {"2147483647-1", 2147483647LL, "-1"},
+    {"2147483648-1", 2147483648LL, "-1"},
+    {"-2147483648", -2147483648LL, ""},
+    {"-2147483649", -2147483649LL, ""},
+    {"-2147483648-1", -2147483648LL, "-1"},
+    {"-2147483649-1", -2147483649LL, "-1"},
+    {"-9223372036854775808", -(9223372036854775807LL) - 1, ""},
+    {"-9223372036854775808-1", -(9223372036854775807LL) - 1, "-1"},
+    {"042", 34, ""},
+    {"042-1", 34, "-1"},
+    {"0x42", 66, ""},
+    {"0x42-1", 66, "-1"},
+    {"0b101010", 42, ""},
+    {"0b101010-1", 42, "-1"},
+    {"-042", -34, ""},
+    {"-042-1", -34, "-1"},
+    {"-0x42", -66, ""},
+    {"-0x42-1", -66, "-1"},
+    {"-0b101010", -42, ""},
+    {"-0b101010-1", -42, "-1"}};
+
+TEST(StringRefTest, consumeIntegerUnsigned) {
+  uint8_t U8;
+  uint16_t U16;
+  uint32_t U32;
+  uint64_t U64;
+
+  for (size_t i = 0; i < array_lengthof(ConsumeUnsigned); ++i) {
+    StringRef Str = ConsumeUnsigned[i].Str;
+    bool U8Success = Str.consumeInteger(0, U8);
+    if (static_cast<uint8_t>(ConsumeUnsigned[i].Expected) ==
+        ConsumeUnsigned[i].Expected) {
+      ASSERT_FALSE(U8Success);
+      EXPECT_EQ(U8, ConsumeUnsigned[i].Expected);
+      EXPECT_EQ(Str, ConsumeUnsigned[i].Leftover);
+    } else {
+      ASSERT_TRUE(U8Success);
+    }
+
+    Str = ConsumeUnsigned[i].Str;
+    bool U16Success = Str.consumeInteger(0, U16);
+    if (static_cast<uint16_t>(ConsumeUnsigned[i].Expected) ==
+        ConsumeUnsigned[i].Expected) {
+      ASSERT_FALSE(U16Success);
+      EXPECT_EQ(U16, ConsumeUnsigned[i].Expected);
+      EXPECT_EQ(Str, ConsumeUnsigned[i].Leftover);
+    } else {
+      ASSERT_TRUE(U16Success);
+    }
+
+    Str = ConsumeUnsigned[i].Str;
+    bool U32Success = Str.consumeInteger(0, U32);
+    if (static_cast<uint32_t>(ConsumeUnsigned[i].Expected) ==
+        ConsumeUnsigned[i].Expected) {
+      ASSERT_FALSE(U32Success);
+      EXPECT_EQ(U32, ConsumeUnsigned[i].Expected);
+      EXPECT_EQ(Str, ConsumeUnsigned[i].Leftover);
+    } else {
+      ASSERT_TRUE(U32Success);
+    }
+
+    Str = ConsumeUnsigned[i].Str;
+    bool U64Success = Str.consumeInteger(0, U64);
+    if (static_cast<uint64_t>(ConsumeUnsigned[i].Expected) ==
+        ConsumeUnsigned[i].Expected) {
+      ASSERT_FALSE(U64Success);
+      EXPECT_EQ(U64, ConsumeUnsigned[i].Expected);
+      EXPECT_EQ(Str, ConsumeUnsigned[i].Leftover);
+    } else {
+      ASSERT_TRUE(U64Success);
+    }
+  }
+}
+
+TEST(StringRefTest, consumeIntegerSigned) {
+  int8_t S8;
+  int16_t S16;
+  int32_t S32;
+  int64_t S64;
+
+  for (size_t i = 0; i < array_lengthof(ConsumeSigned); ++i) {
+    StringRef Str = ConsumeSigned[i].Str;
+    bool S8Success = Str.consumeInteger(0, S8);
+    if (static_cast<int8_t>(ConsumeSigned[i].Expected) ==
+        ConsumeSigned[i].Expected) {
+      ASSERT_FALSE(S8Success);
+      EXPECT_EQ(S8, ConsumeSigned[i].Expected);
+      EXPECT_EQ(Str, ConsumeSigned[i].Leftover);
+    } else {
+      ASSERT_TRUE(S8Success);
+    }
+
+    Str = ConsumeSigned[i].Str;
+    bool S16Success = Str.consumeInteger(0, S16);
+    if (static_cast<int16_t>(ConsumeSigned[i].Expected) ==
+        ConsumeSigned[i].Expected) {
+      ASSERT_FALSE(S16Success);
+      EXPECT_EQ(S16, ConsumeSigned[i].Expected);
+      EXPECT_EQ(Str, ConsumeSigned[i].Leftover);
+    } else {
+      ASSERT_TRUE(S16Success);
+    }
+
+    Str = ConsumeSigned[i].Str;
+    bool S32Success = Str.consumeInteger(0, S32);
+    if (static_cast<int32_t>(ConsumeSigned[i].Expected) ==
+        ConsumeSigned[i].Expected) {
+      ASSERT_FALSE(S32Success);
+      EXPECT_EQ(S32, ConsumeSigned[i].Expected);
+      EXPECT_EQ(Str, ConsumeSigned[i].Leftover);
+    } else {
+      ASSERT_TRUE(S32Success);
+    }
+
+    Str = ConsumeSigned[i].Str;
+    bool S64Success = Str.consumeInteger(0, S64);
+    if (static_cast<int64_t>(ConsumeSigned[i].Expected) ==
+        ConsumeSigned[i].Expected) {
+      ASSERT_FALSE(S64Success);
+      EXPECT_EQ(S64, ConsumeSigned[i].Expected);
+      EXPECT_EQ(Str, ConsumeSigned[i].Leftover);
+    } else {
+      ASSERT_TRUE(S64Success);
+    }
+  }
+}
+
 static const char *join_input[] = { "a", "b", "c" };
 static const char join_result1[] = "a";
 static const char join_result2[] = "a:b:c";




More information about the llvm-commits mailing list