[llvm] cf8d19f - [ADT] Add methods to SmallString for efficient concatenation

Nathan James via llvm-commits llvm-commits at lists.llvm.org
Fri Oct 30 03:07:54 PDT 2020


Author: Nathan James
Date: 2020-10-30T10:07:40Z
New Revision: cf8d19f4fb2ca0eb6b7f8169d1d7ff68ba95d9f5

URL: https://github.com/llvm/llvm-project/commit/cf8d19f4fb2ca0eb6b7f8169d1d7ff68ba95d9f5
DIFF: https://github.com/llvm/llvm-project/commit/cf8d19f4fb2ca0eb6b7f8169d1d7ff68ba95d9f5.diff

LOG: [ADT] Add methods to SmallString for efficient concatenation

A common pattern when using SmallString is to repeatedly call append to build a larger string.
The issue here is the optimizer can't see through this and often has to check there is enough space in the storage for each string you try to append.
This results in lots of conditional branches and potentially multiple calls to grow needing to be emitted if the buffer wasn't large enough.
By taking an initializer_list of StringRefs, SmallString can preallocate the storage it needs for all of the StringRefs which only need to grow one time at most, then use a fast path of copying all the strings into its storage knowing there is guaranteed to be enough capacity.
By using StringRefs, this also means you can append different string like types in one go as they will all be implicitly converted to a StringRef.

Reviewed By: dblaikie

Differential Revision: https://reviews.llvm.org/D90386

Added: 
    

Modified: 
    llvm/include/llvm/ADT/SmallString.h
    llvm/unittests/ADT/SmallStringTest.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/ADT/SmallString.h b/llvm/include/llvm/ADT/SmallString.h
index cd6f2173d04f..2fa47c7f4e23 100644
--- a/llvm/include/llvm/ADT/SmallString.h
+++ b/llvm/include/llvm/ADT/SmallString.h
@@ -30,6 +30,12 @@ class SmallString : public SmallVector<char, InternalLen> {
   /// Initialize from a StringRef.
   SmallString(StringRef S) : SmallVector<char, InternalLen>(S.begin(), S.end()) {}
 
+  /// Initialize by concatenating a list of StringRefs.
+  SmallString(std::initializer_list<StringRef> Refs)
+      : SmallVector<char, InternalLen>() {
+    this->append(Refs);
+  }
+
   /// Initialize with a range.
   template<typename ItTy>
   SmallString(ItTy S, ItTy E) : SmallVector<char, InternalLen>(S, E) {}
@@ -65,6 +71,12 @@ class SmallString : public SmallVector<char, InternalLen> {
     SmallVectorImpl<char>::append(RHS.begin(), RHS.end());
   }
 
+  /// Assign from a list of StringRefs.
+  void assign(std::initializer_list<StringRef> Refs) {
+    this->clear();
+    append(Refs);
+  }
+
   /// @}
   /// @name String Concatenation
   /// @{
@@ -89,6 +101,20 @@ class SmallString : public SmallVector<char, InternalLen> {
     SmallVectorImpl<char>::append(RHS.begin(), RHS.end());
   }
 
+  /// Append from a list of StringRefs.
+  void append(std::initializer_list<StringRef> Refs) {
+    size_t SizeNeeded = this->size();
+    for (const StringRef &Ref : Refs)
+      SizeNeeded += Ref.size();
+    this->reserve(SizeNeeded);
+    auto CurEnd = this->end();
+    for (const StringRef &Ref : Refs) {
+      this->uninitialized_copy(Ref.begin(), Ref.end(), CurEnd);
+      CurEnd += Ref.size();
+    }
+    this->set_size(SizeNeeded);
+  }
+
   /// @}
   /// @name String Comparison
   /// @{

diff  --git a/llvm/unittests/ADT/SmallStringTest.cpp b/llvm/unittests/ADT/SmallStringTest.cpp
index e78da9fe5289..3401a8266a6c 100644
--- a/llvm/unittests/ADT/SmallStringTest.cpp
+++ b/llvm/unittests/ADT/SmallStringTest.cpp
@@ -71,6 +71,12 @@ TEST_F(SmallStringTest, AssignSmallVector) {
   EXPECT_STREQ("abc", theString.c_str());
 }
 
+TEST_F(SmallStringTest, AssignStringRefs) {
+  theString.assign({"abc", "def", "ghi"});
+  EXPECT_EQ(9u, theString.size());
+  EXPECT_STREQ("abcdefghi", theString.c_str());
+}
+
 TEST_F(SmallStringTest, AppendIterPair) {
   StringRef abc = "abc";
   theString.append(abc.begin(), abc.end());
@@ -96,6 +102,19 @@ TEST_F(SmallStringTest, AppendSmallVector) {
   EXPECT_STREQ("abcabc", theString.c_str());
 }
 
+TEST_F(SmallStringTest, AppendStringRefs) {
+  theString.append({"abc", "def", "ghi"});
+  EXPECT_EQ(9u, theString.size());
+  EXPECT_STREQ("abcdefghi", theString.c_str());
+  StringRef Jkl = "jkl";
+  std::string Mno = "mno";
+  SmallString<4> Pqr("pqr");
+  const char *Stu = "stu";
+  theString.append({Jkl, Mno, Pqr, Stu});
+  EXPECT_EQ(21u, theString.size());
+  EXPECT_STREQ("abcdefghijklmnopqrstu", theString.c_str());
+}
+
 TEST_F(SmallStringTest, StringRefConversion) {
   StringRef abc = "abc";
   theString.assign(abc.begin(), abc.end());


        


More information about the llvm-commits mailing list