[PATCH] Fix 'leading zeros' caveat of StringRef::compare_numeric()
Sanne Wouda
snnw at gruttepier.net
Tue Feb 3 13:11:41 PST 2015
Hi chandlerc,
compare_numeric() now ignores leading zeros of any numbers embedded in StringRef. Previously, a longer number would always be considered larger than the shorter number. For example, "007" would be considered larger than "8". The behaviour is now more intuitive.
REPOSITORY
rL LLVM
http://reviews.llvm.org/D7388
Files:
lib/Support/StringRef.cpp
unittests/ADT/StringRefTest.cpp
Index: lib/Support/StringRef.cpp
===================================================================
--- lib/Support/StringRef.cpp
+++ lib/Support/StringRef.cpp
@@ -71,33 +71,42 @@
/// compare_numeric - Compare strings, handle embedded numbers.
int StringRef::compare_numeric(StringRef RHS) const {
- for (size_t I = 0, E = std::min(Length, RHS.Length); I != E; ++I) {
+ size_t IL, IR;
+ for (IL = 0, IR = 0; IL != Length && IR != RHS.Length; ++IL, ++IR) {
// Check for sequences of digits.
- if (ascii_isdigit(Data[I]) && ascii_isdigit(RHS.Data[I])) {
- // The longer sequence of numbers is considered larger.
- // This doesn't really handle prefixed zeros well.
- size_t J;
- for (J = I + 1; J != E + 1; ++J) {
- bool ld = J < Length && ascii_isdigit(Data[J]);
- bool rd = J < RHS.Length && ascii_isdigit(RHS.Data[J]);
+ if (ascii_isdigit(Data[IL]) && ascii_isdigit(RHS.Data[IR])) {
+ // Skip any leading zeros.
+ while (Data[IL] == '0' && IL + 1 != Length && ascii_isdigit(Data[IL + 1]))
+ ++IL;
+ while (RHS.Data[IR] == '0' && IR + 1 != RHS.Length &&
+ ascii_isdigit(RHS.Data[IR + 1]))
+ ++IR;
+
+ size_t JL, JR;
+ for (JL = IL + 1, JR = IR + 1; JL != Length + 1 && JR != RHS.Length + 1;
+ ++JL, ++JR) {
+ bool ld = JL < Length && ascii_isdigit(Data[JL]);
+ bool rd = JR < RHS.Length && ascii_isdigit(RHS.Data[JR]);
if (ld != rd)
return rd ? -1 : 1;
if (!rd)
break;
}
- // The two number sequences have the same length (J-I), just memcmp them.
- if (int Res = compareMemory(Data + I, RHS.Data + I, J - I))
+ // The two number sequences (excluding leading zeros) have the same
+ // length, just memcmp them.
+ if (int Res = compareMemory(Data + IL, RHS.Data + IR, JL - IL))
return Res < 0 ? -1 : 1;
// Identical number sequences, continue search after the numbers.
- I = J - 1;
+ IL = JL - 1;
+ IR = JR - 1;
continue;
}
- if (Data[I] != RHS.Data[I])
- return (unsigned char)Data[I] < (unsigned char)RHS.Data[I] ? -1 : 1;
+ if (Data[IL] != RHS.Data[IR])
+ return (unsigned char)Data[IL] < (unsigned char)RHS.Data[IR] ? -1 : 1;
}
- if (Length == RHS.Length)
+ if (Length - IL == RHS.Length - IR)
return 0;
- return Length < RHS.Length ? -1 : 1;
+ return Length - IL < RHS.Length - IR ? -1 : 1;
}
// Compute the edit distance between the two given strings.
Index: unittests/ADT/StringRefTest.cpp
===================================================================
--- unittests/ADT/StringRefTest.cpp
+++ unittests/ADT/StringRefTest.cpp
@@ -78,6 +78,10 @@
EXPECT_EQ( 0, StringRef("10").compare_numeric("10"));
EXPECT_EQ( 0, StringRef("10a").compare_numeric("10a"));
EXPECT_EQ( 1, StringRef("2").compare_numeric("1"));
+ EXPECT_EQ(-1, StringRef("0a").compare_numeric("1"));
+ EXPECT_EQ( 1, StringRef("2").compare_numeric("01"));
+ EXPECT_EQ(-1, StringRef("2").compare_numeric("03"));
+ EXPECT_EQ( 0, StringRef("007 is 1337").compare_numeric("7 is 01337"));
EXPECT_EQ( 0, StringRef("llvm_v1i64_ty").compare_numeric("llvm_v1i64_ty"));
EXPECT_EQ( 1, StringRef("\xFF").compare_numeric("\1"));
EXPECT_EQ( 1, StringRef("V16").compare_numeric("V1_q0"));
EMAIL PREFERENCES
http://reviews.llvm.org/settings/panel/emailpreferences/
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D7388.19260.patch
Type: text/x-patch
Size: 3341 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20150203/38e8455f/attachment.bin>
More information about the llvm-commits
mailing list