[llvm] r247244 - [ADT] Add a single-character version of the small vector split routine
Chandler Carruth via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 9 23:07:03 PDT 2015
Author: chandlerc
Date: Thu Sep 10 01:07:03 2015
New Revision: 247244
URL: http://llvm.org/viewvc/llvm-project?rev=247244&view=rev
Log:
[ADT] Add a single-character version of the small vector split routine
on StringRef. Finding and splitting on a single character is
substantially faster than doing it on even a single character StringRef
-- we immediately get to a *very* tuned memchr call this way.
Even nicer, we get to this even in a debug build, shaving 18% off the
runtime of TripleTest.Normalization, helping PR23676 some more.
Modified:
llvm/trunk/include/llvm/ADT/StringRef.h
llvm/trunk/lib/Support/StringRef.cpp
llvm/trunk/lib/Support/Triple.cpp
llvm/trunk/unittests/ADT/StringRefTest.cpp
Modified: llvm/trunk/include/llvm/ADT/StringRef.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/ADT/StringRef.h?rev=247244&r1=247243&r2=247244&view=diff
==============================================================================
--- llvm/trunk/include/llvm/ADT/StringRef.h (original)
+++ llvm/trunk/include/llvm/ADT/StringRef.h Thu Sep 10 01:07:03 2015
@@ -489,6 +489,23 @@ namespace llvm {
StringRef Separator, int MaxSplit = -1,
bool KeepEmpty = true) const;
+ /// Split into substrings around the occurrences of a separator character.
+ ///
+ /// Each substring is stored in \p A. If \p MaxSplit is >= 0, at most
+ /// \p MaxSplit splits are done and consequently <= \p MaxSplit
+ /// elements are added to A.
+ /// If \p KeepEmpty is false, empty strings are not added to \p A. They
+ /// still count when considering \p MaxSplit
+ /// An useful invariant is that
+ /// Separator.join(A) == *this if MaxSplit == -1 and KeepEmpty == true
+ ///
+ /// \param A - Where to put the substrings.
+ /// \param Separator - The string to split on.
+ /// \param MaxSplit - The maximum number of times the string is split.
+ /// \param KeepEmpty - True if empty substring should be added.
+ void split(SmallVectorImpl<StringRef> &A, char Separator, int MaxSplit = -1,
+ bool KeepEmpty = true) const;
+
/// Split into two substrings around the last occurrence of a separator
/// character.
///
Modified: llvm/trunk/lib/Support/StringRef.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Support/StringRef.cpp?rev=247244&r1=247243&r2=247244&view=diff
==============================================================================
--- llvm/trunk/lib/Support/StringRef.cpp (original)
+++ llvm/trunk/lib/Support/StringRef.cpp Thu Sep 10 01:07:03 2015
@@ -294,6 +294,26 @@ void StringRef::split(SmallVectorImpl<St
A.push_back(rest);
}
+void StringRef::split(SmallVectorImpl<StringRef> &A, char Separator,
+ int MaxSplit, bool KeepEmpty) const {
+ StringRef rest = *this;
+
+ // rest.data() is used to distinguish cases like "a," that splits into
+ // "a" + "" and "a" that splits into "a" + 0.
+ for (int splits = 0;
+ rest.data() != nullptr && (MaxSplit < 0 || splits < MaxSplit);
+ ++splits) {
+ std::pair<StringRef, StringRef> p = rest.split(Separator);
+
+ if (KeepEmpty || p.first.size() != 0)
+ A.push_back(p.first);
+ rest = p.second;
+ }
+ // If we have a tail left, add it.
+ if (rest.data() != nullptr && (rest.size() != 0 || KeepEmpty))
+ A.push_back(rest);
+}
+
//===----------------------------------------------------------------------===//
// Helpful Algorithms
//===----------------------------------------------------------------------===//
Modified: llvm/trunk/lib/Support/Triple.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Support/Triple.cpp?rev=247244&r1=247243&r2=247244&view=diff
==============================================================================
--- llvm/trunk/lib/Support/Triple.cpp (original)
+++ llvm/trunk/lib/Support/Triple.cpp Thu Sep 10 01:07:03 2015
@@ -624,7 +624,7 @@ std::string Triple::normalize(StringRef
// Parse into components.
SmallVector<StringRef, 4> Components;
- Str.split(Components, "-");
+ Str.split(Components, '-');
// If the first component corresponds to a known architecture, preferentially
// use it for the architecture. If the second component corresponds to a
Modified: llvm/trunk/unittests/ADT/StringRefTest.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/unittests/ADT/StringRefTest.cpp?rev=247244&r1=247243&r2=247244&view=diff
==============================================================================
--- llvm/trunk/unittests/ADT/StringRefTest.cpp (original)
+++ llvm/trunk/unittests/ADT/StringRefTest.cpp Thu Sep 10 01:07:03 2015
@@ -225,6 +225,11 @@ TEST(StringRefTest, Split2) {
expected.push_back("a"); expected.push_back("b"); expected.push_back("c");
StringRef("a,,b,c").split(parts, ",", 3, false);
EXPECT_TRUE(parts == expected);
+
+ expected.clear(); parts.clear();
+ expected.push_back("a"); expected.push_back("b"); expected.push_back("c");
+ StringRef("a,,b,c").split(parts, ',', 3, false);
+ EXPECT_TRUE(parts == expected);
}
TEST(StringRefTest, Trim) {
More information about the llvm-commits
mailing list