[PATCH] Fix 'leading zeros' caveat of StringRef::compare_numeric()

Sanne Wouda snnw at gruttepier.net
Tue Feb 3 13:11:41 PST 2015


Hi chandlerc,

compare_numeric() now ignores leading zeros of any numbers embedded in StringRef.  Previously, a longer number would always be considered larger than the shorter number. For example, "007" would be considered larger than "8". The behaviour is now more intuitive.

REPOSITORY
  rL LLVM

http://reviews.llvm.org/D7388

Files:
  lib/Support/StringRef.cpp
  unittests/ADT/StringRefTest.cpp

Index: lib/Support/StringRef.cpp
===================================================================
--- lib/Support/StringRef.cpp
+++ lib/Support/StringRef.cpp
@@ -71,33 +71,42 @@
 
 /// compare_numeric - Compare strings, handle embedded numbers.
 int StringRef::compare_numeric(StringRef RHS) const {
-  for (size_t I = 0, E = std::min(Length, RHS.Length); I != E; ++I) {
+  size_t IL, IR;
+  for (IL = 0, IR = 0; IL != Length && IR != RHS.Length; ++IL, ++IR) {
     // Check for sequences of digits.
-    if (ascii_isdigit(Data[I]) && ascii_isdigit(RHS.Data[I])) {
-      // The longer sequence of numbers is considered larger.
-      // This doesn't really handle prefixed zeros well.
-      size_t J;
-      for (J = I + 1; J != E + 1; ++J) {
-        bool ld = J < Length && ascii_isdigit(Data[J]);
-        bool rd = J < RHS.Length && ascii_isdigit(RHS.Data[J]);
+    if (ascii_isdigit(Data[IL]) && ascii_isdigit(RHS.Data[IR])) {
+      // Skip any leading zeros.
+      while (Data[IL] == '0' && IL + 1 != Length && ascii_isdigit(Data[IL + 1]))
+        ++IL;
+      while (RHS.Data[IR] == '0' && IR + 1 != RHS.Length &&
+             ascii_isdigit(RHS.Data[IR + 1]))
+        ++IR;
+
+      size_t JL, JR;
+      for (JL = IL + 1, JR = IR + 1; JL != Length + 1 && JR != RHS.Length + 1;
+           ++JL, ++JR) {
+        bool ld = JL < Length && ascii_isdigit(Data[JL]);
+        bool rd = JR < RHS.Length && ascii_isdigit(RHS.Data[JR]);
         if (ld != rd)
           return rd ? -1 : 1;
         if (!rd)
           break;
       }
-      // The two number sequences have the same length (J-I), just memcmp them.
-      if (int Res = compareMemory(Data + I, RHS.Data + I, J - I))
+      // The two number sequences (excluding leading zeros) have the same
+      // length, just memcmp them.
+      if (int Res = compareMemory(Data + IL, RHS.Data + IR, JL - IL))
         return Res < 0 ? -1 : 1;
       // Identical number sequences, continue search after the numbers.
-      I = J - 1;
+      IL = JL - 1;
+      IR = JR - 1;
       continue;
     }
-    if (Data[I] != RHS.Data[I])
-      return (unsigned char)Data[I] < (unsigned char)RHS.Data[I] ? -1 : 1;
+    if (Data[IL] != RHS.Data[IR])
+      return (unsigned char)Data[IL] < (unsigned char)RHS.Data[IR] ? -1 : 1;
   }
-  if (Length == RHS.Length)
+  if (Length - IL == RHS.Length - IR)
     return 0;
-  return Length < RHS.Length ? -1 : 1;
+  return Length - IL < RHS.Length - IR ? -1 : 1;
 }
 
 // Compute the edit distance between the two given strings.
Index: unittests/ADT/StringRefTest.cpp
===================================================================
--- unittests/ADT/StringRefTest.cpp
+++ unittests/ADT/StringRefTest.cpp
@@ -78,6 +78,10 @@
   EXPECT_EQ( 0, StringRef("10").compare_numeric("10"));
   EXPECT_EQ( 0, StringRef("10a").compare_numeric("10a"));
   EXPECT_EQ( 1, StringRef("2").compare_numeric("1"));
+  EXPECT_EQ(-1, StringRef("0a").compare_numeric("1"));
+  EXPECT_EQ( 1, StringRef("2").compare_numeric("01"));
+  EXPECT_EQ(-1, StringRef("2").compare_numeric("03"));
+  EXPECT_EQ( 0, StringRef("007 is 1337").compare_numeric("7 is 01337"));
   EXPECT_EQ( 0, StringRef("llvm_v1i64_ty").compare_numeric("llvm_v1i64_ty"));
   EXPECT_EQ( 1, StringRef("\xFF").compare_numeric("\1"));
   EXPECT_EQ( 1, StringRef("V16").compare_numeric("V1_q0"));

EMAIL PREFERENCES
  http://reviews.llvm.org/settings/panel/emailpreferences/
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D7388.19260.patch
Type: text/x-patch
Size: 3341 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20150203/38e8455f/attachment.bin>


More information about the llvm-commits mailing list