[clang] 05bea53 - [clang-format] [PR49298] Sort includes pass will sort inside raw strings

via cfe-commits cfe-commits at lists.llvm.org
Sun Dec 12 09:01:14 PST 2021


Author: mydeveloperday
Date: 2021-12-12T17:00:43Z
New Revision: 05bea533d1fc1bcd63ccb84df9d0edab9701ad70

URL: https://github.com/llvm/llvm-project/commit/05bea533d1fc1bcd63ccb84df9d0edab9701ad70
DIFF: https://github.com/llvm/llvm-project/commit/05bea533d1fc1bcd63ccb84df9d0edab9701ad70.diff

LOG: [clang-format] [PR49298] Sort includes pass will sort inside raw strings

https://github.com/llvm/llvm-project/issues/48642

clang-format does not respect raw string literals when sorting includes

```
const char *RawStr = R"(
)";
```

Running clang-format over with SortIncludes enabled transforms this code to:

```
const char *RawStr = R"(
)";
```
The following code tries to minimize this impact during IncludeSorting, by treating R"( and )" as equivalent of // clang-format off/on

Reviewed By: HazardyKnusperkeks, curdeius

Differential Revision: https://reviews.llvm.org/D115168

Fixes #48642

Added: 
    

Modified: 
    clang/lib/Format/Format.cpp
    clang/unittests/Format/SortIncludesTest.cpp

Removed: 
    


################################################################################
diff  --git a/clang/lib/Format/Format.cpp b/clang/lib/Format/Format.cpp
index 17de1075aeaa4..8fb1fa4d618fc 100644
--- a/clang/lib/Format/Format.cpp
+++ b/clang/lib/Format/Format.cpp
@@ -2586,12 +2586,31 @@ tooling::Replacements sortCppIncludes(const FormatStyle &Style, StringRef Code,
   bool MainIncludeFound = false;
   bool FormattingOff = false;
 
+  llvm::Regex RawStringRegex(
+      "R\"(([\\[A-Za-z0-9_{}#<>%:;.?*+/^&\\$|~!=,'\\-]|])*)\\(");
+  SmallVector<StringRef, 2> RawStringMatches;
+  std::string RawStringTermination = ")\"";
+
   for (;;) {
     auto Pos = Code.find('\n', SearchFrom);
     StringRef Line =
         Code.substr(Prev, (Pos != StringRef::npos ? Pos : Code.size()) - Prev);
 
     StringRef Trimmed = Line.trim();
+
+    // #includes inside raw string literals need to be ignored.
+    // or we will sort the contents of the string.
+    // Skip past until we think we are at the rawstring literal close.
+    if (RawStringRegex.match(Trimmed, &RawStringMatches)) {
+      std::string CharSequence = RawStringMatches[1].str();
+      RawStringTermination = ")" + CharSequence + "\"";
+      FormattingOff = true;
+    }
+
+    if (Trimmed.contains(RawStringTermination)) {
+      FormattingOff = false;
+    }
+
     if (Trimmed == "// clang-format off" || Trimmed == "/* clang-format off */")
       FormattingOff = true;
     else if (Trimmed == "// clang-format on" ||

diff  --git a/clang/unittests/Format/SortIncludesTest.cpp b/clang/unittests/Format/SortIncludesTest.cpp
index 4efeb96124bf4..1d215af715382 100644
--- a/clang/unittests/Format/SortIncludesTest.cpp
+++ b/clang/unittests/Format/SortIncludesTest.cpp
@@ -1045,6 +1045,153 @@ TEST_F(SortIncludesTest, DisableFormatDisablesIncludeSorting) {
   EXPECT_EQ(Unsorted, sort(Unsorted, "input.cpp", 0));
 }
 
+TEST_F(SortIncludesTest, DisableRawStringLiteralSorting) {
+
+  EXPECT_EQ("const char *t = R\"(\n"
+            "#include <b.h>\n"
+            "#include <a.h>\n"
+            ")\";",
+            sort("const char *t = R\"(\n"
+                 "#include <b.h>\n"
+                 "#include <a.h>\n"
+                 ")\";",
+                 "test.cxx", 0));
+  EXPECT_EQ("const char *t = R\"x(\n"
+            "#include <b.h>\n"
+            "#include <a.h>\n"
+            ")x\";",
+            sort("const char *t = R\"x(\n"
+                 "#include <b.h>\n"
+                 "#include <a.h>\n"
+                 ")x\";",
+                 "test.cxx", 0));
+  EXPECT_EQ("const char *t = R\"xyz(\n"
+            "#include <b.h>\n"
+            "#include <a.h>\n"
+            ")xyz\";",
+            sort("const char *t = R\"xyz(\n"
+                 "#include <b.h>\n"
+                 "#include <a.h>\n"
+                 ")xyz\";",
+                 "test.cxx", 0));
+
+  EXPECT_EQ("#include <a.h>\n"
+            "#include <b.h>\n"
+            "const char *t = R\"(\n"
+            "#include <b.h>\n"
+            "#include <a.h>\n"
+            ")\";\n"
+            "#include <c.h>\n"
+            "#include <d.h>\n"
+            "const char *t = R\"x(\n"
+            "#include <f.h>\n"
+            "#include <e.h>\n"
+            ")x\";\n"
+            "#include <g.h>\n"
+            "#include <h.h>\n"
+            "const char *t = R\"xyz(\n"
+            "#include <j.h>\n"
+            "#include <i.h>\n"
+            ")xyz\";\n"
+            "#include <k.h>\n"
+            "#include <l.h>",
+            sort("#include <b.h>\n"
+                 "#include <a.h>\n"
+                 "const char *t = R\"(\n"
+                 "#include <b.h>\n"
+                 "#include <a.h>\n"
+                 ")\";\n"
+                 "#include <d.h>\n"
+                 "#include <c.h>\n"
+                 "const char *t = R\"x(\n"
+                 "#include <f.h>\n"
+                 "#include <e.h>\n"
+                 ")x\";\n"
+                 "#include <h.h>\n"
+                 "#include <g.h>\n"
+                 "const char *t = R\"xyz(\n"
+                 "#include <j.h>\n"
+                 "#include <i.h>\n"
+                 ")xyz\";\n"
+                 "#include <l.h>\n"
+                 "#include <k.h>",
+                 "test.cc", 4));
+
+  EXPECT_EQ("const char *t = R\"AMZ029amz(\n"
+            "#include <b.h>\n"
+            "#include <a.h>\n"
+            ")AMZ029amz\";",
+            sort("const char *t = R\"AMZ029amz(\n"
+                 "#include <b.h>\n"
+                 "#include <a.h>\n"
+                 ")AMZ029amz\";",
+                 "test.cxx", 0));
+
+  EXPECT_EQ("const char *t = R\"-AMZ029amz(\n"
+            "#include <b.h>\n"
+            "#include <a.h>\n"
+            ")-AMZ029amz\";",
+            sort("const char *t = R\"-AMZ029amz(\n"
+                 "#include <b.h>\n"
+                 "#include <a.h>\n"
+                 ")-AMZ029amz\";",
+                 "test.cxx", 0));
+
+  EXPECT_EQ("const char *t = R\"AMZ029amz-(\n"
+            "#include <b.h>\n"
+            "#include <a.h>\n"
+            ")AMZ029amz-\";",
+            sort("const char *t = R\"AMZ029amz-(\n"
+                 "#include <b.h>\n"
+                 "#include <a.h>\n"
+                 ")AMZ029amz-\";",
+                 "test.cxx", 0));
+
+  EXPECT_EQ("const char *t = R\"AM|029amz-(\n"
+            "#include <b.h>\n"
+            "#include <a.h>\n"
+            ")AM|029amz-\";",
+            sort("const char *t = R\"AM|029amz-(\n"
+                 "#include <b.h>\n"
+                 "#include <a.h>\n"
+                 ")AM|029amz-\";",
+                 "test.cxx", 0));
+
+  EXPECT_EQ("const char *t = R\"AM[029amz-(\n"
+            "#include <b.h>\n"
+            "#include <a.h>\n"
+            ")AM[029amz-\";",
+            sort("const char *t = R\"AM[029amz-(\n"
+                 "#include <b.h>\n"
+                 "#include <a.h>\n"
+                 ")AM[029amz-\";",
+                 "test.cxx", 0));
+
+  EXPECT_EQ("const char *t = R\"AM]029amz-(\n"
+            "#include <b.h>\n"
+            "#include <a.h>\n"
+            ")AM]029amz-\";",
+            sort("const char *t = R\"AM]029amz-(\n"
+                 "#include <b.h>\n"
+                 "#include <a.h>\n"
+                 ")AM]029amz-\";",
+                 "test.cxx", 0));
+
+#define X "AMZ029amz{}+!%*=_:;',.<>|/?#~-$"
+
+  EXPECT_EQ("const char *t = R\"" X "(\n"
+            "#include <b.h>\n"
+            "#include <a.h>\n"
+            ")" X "\";",
+            sort("const char *t = R\"" X "(\n"
+                 "#include <b.h>\n"
+                 "#include <a.h>\n"
+                 ")" X "\";",
+                 "test.cxx", 0));
+
+#undef X
+}
+
 } // end namespace
 } // end namespace format
 } // end namespace clang


        


More information about the cfe-commits mailing list