[clang] 9d3437f - [ADT] [NFC] Add StringRef::detectEOL

Chris Bieneman via cfe-commits cfe-commits at lists.llvm.org
Fri Jan 21 07:48:03 PST 2022


Author: Chris Bieneman
Date: 2022-01-21T09:47:02-06:00
New Revision: 9d3437fbf3419502351d41ff9e28f06b0c3f06e8

URL: https://github.com/llvm/llvm-project/commit/9d3437fbf3419502351d41ff9e28f06b0c3f06e8
DIFF: https://github.com/llvm/llvm-project/commit/9d3437fbf3419502351d41ff9e28f06b0c3f06e8.diff

LOG: [ADT] [NFC] Add StringRef::detectEOL

This change moves EOL detection out of the clang::InclusionRewriter into
llvm::StringRef so that it can be easily reused elsewhere. It also adds
additional explicit test cases to verify the correct and expected return
results.

Reviewed By: dblaikie

Differential Revision: https://reviews.llvm.org/D117626

Added: 
    

Modified: 
    clang/lib/Frontend/Rewrite/InclusionRewriter.cpp
    llvm/include/llvm/ADT/StringRef.h
    llvm/unittests/ADT/StringRefTest.cpp

Removed: 
    


################################################################################
diff  --git a/clang/lib/Frontend/Rewrite/InclusionRewriter.cpp b/clang/lib/Frontend/Rewrite/InclusionRewriter.cpp
index 931f3a24c5888..3e8d582f90c27 100644
--- a/clang/lib/Frontend/Rewrite/InclusionRewriter.cpp
+++ b/clang/lib/Frontend/Rewrite/InclusionRewriter.cpp
@@ -251,28 +251,12 @@ bool InclusionRewriter::IsIfAtLocationTrue(SourceLocation Loc) const {
   return false;
 }
 
-/// Detect the likely line ending style of \p FromFile by examining the first
-/// newline found within it.
-static StringRef DetectEOL(const MemoryBufferRef &FromFile) {
-  // Detect what line endings the file uses, so that added content does not mix
-  // the style. We need to check for "\r\n" first because "\n\r" will match
-  // "\r\n\r\n".
-  const char *Pos = strchr(FromFile.getBufferStart(), '\n');
-  if (!Pos)
-    return "\n";
-  if (Pos - 1 >= FromFile.getBufferStart() && Pos[-1] == '\r')
-    return "\r\n";
-  if (Pos + 1 < FromFile.getBufferEnd() && Pos[1] == '\r')
-    return "\n\r";
-  return "\n";
-}
-
 void InclusionRewriter::detectMainFileEOL() {
   Optional<MemoryBufferRef> FromFile = *SM.getBufferOrNone(SM.getMainFileID());
   assert(FromFile);
   if (!FromFile)
     return; // Should never happen, but whatever.
-  MainEOL = DetectEOL(*FromFile);
+  MainEOL = FromFile->getBuffer().detectEOL();
 }
 
 /// Writes out bytes from \p FromFile, starting at \p NextToWrite and ending at
@@ -378,7 +362,7 @@ void InclusionRewriter::Process(FileID FileId,
   Lexer RawLex(FileId, FromFile, PP.getSourceManager(), PP.getLangOpts());
   RawLex.SetCommentRetentionState(false);
 
-  StringRef LocalEOL = DetectEOL(FromFile);
+  StringRef LocalEOL = FromFile.getBuffer().detectEOL();
 
   // Per the GNU docs: "1" indicates entering a new file.
   if (FileId == SM.getMainFileID() || FileId == PP.getPredefinesFileID())

diff  --git a/llvm/include/llvm/ADT/StringRef.h b/llvm/include/llvm/ADT/StringRef.h
index 3950910f0635a..9f64250c58a36 100644
--- a/llvm/include/llvm/ADT/StringRef.h
+++ b/llvm/include/llvm/ADT/StringRef.h
@@ -877,6 +877,25 @@ namespace llvm {
       return ltrim(Chars).rtrim(Chars);
     }
 
+    /// Detect the line ending style of the string.
+    ///
+    /// If the string contains a line ending, return the line ending character
+    /// sequence that is detected. Otherwise return '\n' for unix line endings.
+    ///
+    /// \return - The line ending character sequence.
+    LLVM_NODISCARD
+    StringRef detectEOL() const {
+      size_t Pos = find('\r');
+      if (Pos == npos) {
+        // If there is no carriage return, assume unix
+        return "\n";
+      }
+      if (Pos + 1 < Length && Data[Pos + 1] == '\n')
+        return "\r\n"; // Windows
+      if (Pos > 0 && Data[Pos - 1] == '\n')
+        return "\n\r"; // You monster!
+      return "\r";     // Classic Mac
+    }
     /// @}
   };
 

diff  --git a/llvm/unittests/ADT/StringRefTest.cpp b/llvm/unittests/ADT/StringRefTest.cpp
index 41c35804f1226..e80a25a19969c 100644
--- a/llvm/unittests/ADT/StringRefTest.cpp
+++ b/llvm/unittests/ADT/StringRefTest.cpp
@@ -1109,6 +1109,36 @@ TEST(StringRefTest, GTestPrinter) {
   EXPECT_EQ(R"("foo")", ::testing::PrintToString(StringRef("foo")));
 }
 
+TEST(StringRefTest, LFLineEnding) {
+  constexpr StringRef Cases[] = {"\nDoggo\nPupper", "Floofer\n", "Woofer"};
+  EXPECT_EQ(StringRef("\n"), Cases[0].detectEOL());
+  EXPECT_EQ(StringRef("\n"), Cases[1].detectEOL());
+  EXPECT_EQ(StringRef("\n"), Cases[2].detectEOL());
+}
+
+TEST(StringRefTest, CRLineEnding) {
+  constexpr StringRef Cases[] = {"\rDoggo\rPupper", "Floofer\r", "Woo\rfer\n"};
+  EXPECT_EQ(StringRef("\r"), Cases[0].detectEOL());
+  EXPECT_EQ(StringRef("\r"), Cases[1].detectEOL());
+  EXPECT_EQ(StringRef("\r"), Cases[2].detectEOL());
+}
+
+TEST(StringRefTest, CRLFLineEnding) {
+  constexpr StringRef Cases[] = {"\r\nDoggo\r\nPupper", "Floofer\r\n",
+                                 "Woofer\r\nSubWoofer\n"};
+  EXPECT_EQ(StringRef("\r\n"), Cases[0].detectEOL());
+  EXPECT_EQ(StringRef("\r\n"), Cases[1].detectEOL());
+  EXPECT_EQ(StringRef("\r\n"), Cases[2].detectEOL());
+}
+
+TEST(StringRefTest, LFCRLineEnding) {
+  constexpr StringRef Cases[] = {"\n\rDoggo\n\rPupper", "Floofer\n\r",
+                                 "Woofer\n\rSubWoofer\n"};
+  EXPECT_EQ(StringRef("\n\r"), Cases[0].detectEOL());
+  EXPECT_EQ(StringRef("\n\r"), Cases[1].detectEOL());
+  EXPECT_EQ(StringRef("\n\r"), Cases[2].detectEOL());
+}
+
 static_assert(std::is_trivially_copyable<StringRef>::value,
               "trivially copyable");
 


        


More information about the cfe-commits mailing list