[llvm] r247249 - [ADT] Fix a confusing interface spec and some annoying peculiarities

Chandler Carruth via llvm-commits llvm-commits at lists.llvm.org
Thu Sep 10 00:51:37 PDT 2015


Author: chandlerc
Date: Thu Sep 10 02:51:37 2015
New Revision: 247249

URL: http://llvm.org/viewvc/llvm-project?rev=247249&view=rev
Log:
[ADT] Fix a confusing interface spec and some annoying peculiarities
with the StringRef::split method when used with a MaxSplit argument
other than '-1' (which nobody really does today, but which should
actually work).

The spec claimed both to split up to MaxSplit times, but also to append
<= MaxSplit strings to the vector. One of these doesn't make sense.
Given the name "MaxSplit", let's go with it being a max over how many
*splits* occur, which means the max on how many strings get appended is
MaxSplit+1. I'm not actually sure the implementation correctly provided
this logic either, as it used a really opaque loop structure.

The implementation was also playing weird games with nullptr in the data
field to try to rely on a totally opaque hidden property of the split
method that returns a pair. Nasty IMO.

Replace all of this with what is (IMO) simpler code that doesn't use the
pair returning split method, and instead just finds each separator and
appends directly. I think this is a lot easier to read, and it most
definitely matches the spec. Added some tests that exercise the corner
cases around StringRef() and StringRef("") that all now pass.

I'll start using this in code in the next commit.

Modified:
    llvm/trunk/include/llvm/ADT/StringRef.h
    llvm/trunk/lib/Support/StringRef.cpp
    llvm/trunk/unittests/ADT/StringRefTest.cpp

Modified: llvm/trunk/include/llvm/ADT/StringRef.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/ADT/StringRef.h?rev=247249&r1=247248&r2=247249&view=diff
==============================================================================
--- llvm/trunk/include/llvm/ADT/StringRef.h (original)
+++ llvm/trunk/include/llvm/ADT/StringRef.h Thu Sep 10 02:51:37 2015
@@ -474,7 +474,7 @@ namespace llvm {
     /// Split into substrings around the occurrences of a separator string.
     ///
     /// Each substring is stored in \p A. If \p MaxSplit is >= 0, at most
-    /// \p MaxSplit splits are done and consequently <= \p MaxSplit
+    /// \p MaxSplit splits are done and consequently <= \p MaxSplit + 1
     /// elements are added to A.
     /// If \p KeepEmpty is false, empty strings are not added to \p A. They
     /// still count when considering \p MaxSplit
@@ -492,7 +492,7 @@ namespace llvm {
     /// Split into substrings around the occurrences of a separator character.
     ///
     /// Each substring is stored in \p A. If \p MaxSplit is >= 0, at most
-    /// \p MaxSplit splits are done and consequently <= \p MaxSplit
+    /// \p MaxSplit splits are done and consequently <= \p MaxSplit + 1
     /// elements are added to A.
     /// If \p KeepEmpty is false, empty strings are not added to \p A. They
     /// still count when considering \p MaxSplit

Modified: llvm/trunk/lib/Support/StringRef.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Support/StringRef.cpp?rev=247249&r1=247248&r2=247249&view=diff
==============================================================================
--- llvm/trunk/lib/Support/StringRef.cpp (original)
+++ llvm/trunk/lib/Support/StringRef.cpp Thu Sep 10 02:51:37 2015
@@ -274,44 +274,56 @@ StringRef::size_type StringRef::find_las
 }
 
 void StringRef::split(SmallVectorImpl<StringRef> &A,
-                      StringRef Separators, int MaxSplit,
+                      StringRef Separator, int MaxSplit,
                       bool KeepEmpty) const {
-  StringRef rest = *this;
+  StringRef S = *this;
 
-  // rest.data() is used to distinguish cases like "a," that splits into
-  // "a" + "" and "a" that splits into "a" + 0.
-  for (int splits = 0;
-       rest.data() != nullptr && (MaxSplit < 0 || splits < MaxSplit);
-       ++splits) {
-    std::pair<StringRef, StringRef> p = rest.split(Separators);
-
-    if (KeepEmpty || p.first.size() != 0)
-      A.push_back(p.first);
-    rest = p.second;
+  // Count down from MaxSplit. When MaxSplit is -1, this will just split
+  // "forever". This doesn't support splitting more than 2^31 times
+  // intentionally; if we ever want that we can make MaxSplit a 64-bit integer
+  // but that seems unlikely to be useful.
+  while (MaxSplit-- != 0) {
+    size_t Idx = S.find(Separator);
+    if (Idx == npos)
+      break;
+
+    // Push this split.
+    if (KeepEmpty || Idx > 0)
+      A.push_back(S.slice(0, Idx));
+
+    // Jump forward.
+    S = S.slice(Idx + Separator.size(), npos);
   }
-  // If we have a tail left, add it.
-  if (rest.data() != nullptr && (rest.size() != 0 || KeepEmpty))
-    A.push_back(rest);
+
+  // Push the tail.
+  if (KeepEmpty || !S.empty())
+    A.push_back(S);
 }
 
 void StringRef::split(SmallVectorImpl<StringRef> &A, char Separator,
                       int MaxSplit, bool KeepEmpty) const {
-  StringRef rest = *this;
+  StringRef S = *this;
 
-  // rest.data() is used to distinguish cases like "a," that splits into
-  // "a" + "" and "a" that splits into "a" + 0.
-  for (int splits = 0;
-       rest.data() != nullptr && (MaxSplit < 0 || splits < MaxSplit);
-       ++splits) {
-    std::pair<StringRef, StringRef> p = rest.split(Separator);
-
-    if (KeepEmpty || p.first.size() != 0)
-      A.push_back(p.first);
-    rest = p.second;
+  // Count down from MaxSplit. When MaxSplit is -1, this will just split
+  // "forever". This doesn't support splitting more than 2^31 times
+  // intentionally; if we ever want that we can make MaxSplit a 64-bit integer
+  // but that seems unlikely to be useful.
+  while (MaxSplit-- != 0) {
+    size_t Idx = S.find(Separator);
+    if (Idx == npos)
+      break;
+
+    // Push this split.
+    if (KeepEmpty || Idx > 0)
+      A.push_back(S.slice(0, Idx));
+
+    // Jump forward.
+    S = S.slice(Idx + 1, npos);
   }
-  // If we have a tail left, add it.
-  if (rest.data() != nullptr && (rest.size() != 0 || KeepEmpty))
-    A.push_back(rest);
+
+  // Push the tail.
+  if (KeepEmpty || !S.empty())
+    A.push_back(S);
 }
 
 //===----------------------------------------------------------------------===//

Modified: llvm/trunk/unittests/ADT/StringRefTest.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/unittests/ADT/StringRefTest.cpp?rev=247249&r1=247248&r2=247249&view=diff
==============================================================================
--- llvm/trunk/unittests/ADT/StringRefTest.cpp (original)
+++ llvm/trunk/unittests/ADT/StringRefTest.cpp Thu Sep 10 02:51:37 2015
@@ -230,6 +230,54 @@ TEST(StringRefTest, Split2) {
   expected.push_back("a"); expected.push_back("b"); expected.push_back("c");
   StringRef("a,,b,c").split(parts, ',', 3, false);
   EXPECT_TRUE(parts == expected);
+
+  expected.clear(); parts.clear();
+  expected.push_back("");
+  StringRef().split(parts, ",", 0, true);
+  EXPECT_TRUE(parts == expected);
+
+  expected.clear(); parts.clear();
+  expected.push_back(StringRef());
+  StringRef("").split(parts, ",", 0, true);
+  EXPECT_TRUE(parts == expected);
+
+  expected.clear(); parts.clear();
+  StringRef("").split(parts, ",", 0, false);
+  EXPECT_TRUE(parts == expected);
+  StringRef().split(parts, ",", 0, false);
+  EXPECT_TRUE(parts == expected);
+
+  expected.clear(); parts.clear();
+  expected.push_back("a");
+  expected.push_back("");
+  expected.push_back("b");
+  expected.push_back("c,d");
+  StringRef("a,,b,c,d").split(parts, ",", 3, true);
+  EXPECT_TRUE(parts == expected);
+
+  expected.clear(); parts.clear();
+  expected.push_back("");
+  StringRef().split(parts, ',', 0, true);
+  EXPECT_TRUE(parts == expected);
+
+  expected.clear(); parts.clear();
+  expected.push_back(StringRef());
+  StringRef("").split(parts, ',', 0, true);
+  EXPECT_TRUE(parts == expected);
+
+  expected.clear(); parts.clear();
+  StringRef("").split(parts, ',', 0, false);
+  EXPECT_TRUE(parts == expected);
+  StringRef().split(parts, ',', 0, false);
+  EXPECT_TRUE(parts == expected);
+
+  expected.clear(); parts.clear();
+  expected.push_back("a");
+  expected.push_back("");
+  expected.push_back("b");
+  expected.push_back("c,d");
+  StringRef("a,,b,c,d").split(parts, ',', 3, true);
+  EXPECT_TRUE(parts == expected);
 }
 
 TEST(StringRefTest, Trim) {




More information about the llvm-commits mailing list