[clang] 8fa5e98 - [clang-format] Remove duplciate code from Invalid BOM detection

via cfe-commits cfe-commits at lists.llvm.org
Thu Oct 24 12:25:10 PDT 2019


Author: paulhoad
Date: 2019-10-24T20:24:44+01:00
New Revision: 8fa5e98fd191d02fc7e0e220d74af267b9140e6a

URL: https://github.com/llvm/llvm-project/commit/8fa5e98fd191d02fc7e0e220d74af267b9140e6a
DIFF: https://github.com/llvm/llvm-project/commit/8fa5e98fd191d02fc7e0e220d74af267b9140e6a.diff

LOG: [clang-format] Remove duplciate code from Invalid BOM detection

Summary:
Review comments on {D68767} asked that this duplicated code in clang-format was moved to one central location that being SourceManager (where it had originally be copied from I assume)

Moved function into static function  ContentCache::getInvalidBOM(...)  - (closest class to where it was defined before)
Updated clang-format to call this static function

Added unit tests for said new function in BasicTests

Sorry not my normal code area so may have the wrong reviewers. (but your names were on the recent history)

Reviewers: bruno, arphaman, klimek, owenpan, mitchell-stellar, dexonsmith

Reviewed By: owenpan

Subscribers: cfe-commits

Tags: #clang, #clang-format, #clang-tools-extra

Differential Revision: https://reviews.llvm.org/D68914

Added: 
    

Modified: 
    clang/include/clang/Basic/SourceManager.h
    clang/lib/Basic/SourceManager.cpp
    clang/tools/clang-format/ClangFormat.cpp
    clang/unittests/Basic/SourceManagerTest.cpp

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Basic/SourceManager.h b/clang/include/clang/Basic/SourceManager.h
index 3185ca0f4a25..ec1b0bcf9897 100644
--- a/clang/include/clang/Basic/SourceManager.h
+++ b/clang/include/clang/Basic/SourceManager.h
@@ -226,6 +226,10 @@ namespace SrcMgr {
     bool shouldFreeBuffer() const {
       return (Buffer.getInt() & DoNotFreeFlag) == 0;
     }
+
+    // If BufStr has an invalid BOM, returns the BOM name; otherwise, returns
+    // nullptr
+    static const char *getInvalidBOM(StringRef BufStr);
   };
 
   // Assert that the \c ContentCache objects will always be 8-byte aligned so

diff  --git a/clang/lib/Basic/SourceManager.cpp b/clang/lib/Basic/SourceManager.cpp
index 58b95289eaf2..5f457d6f9e3d 100644
--- a/clang/lib/Basic/SourceManager.cpp
+++ b/clang/lib/Basic/SourceManager.cpp
@@ -95,6 +95,29 @@ void ContentCache::replaceBuffer(const llvm::MemoryBuffer *B, bool DoNotFree) {
   Buffer.setInt((B && DoNotFree) ? DoNotFreeFlag : 0);
 }
 
+const char *ContentCache::getInvalidBOM(StringRef BufStr) {
+  // If the buffer is valid, check to see if it has a UTF Byte Order Mark
+  // (BOM).  We only support UTF-8 with and without a BOM right now.  See
+  // http://en.wikipedia.org/wiki/Byte_order_mark for more information.
+  const char *InvalidBOM =
+      llvm::StringSwitch<const char *>(BufStr)
+          .StartsWith(llvm::StringLiteral::withInnerNUL("\x00\x00\xFE\xFF"),
+                      "UTF-32 (BE)")
+          .StartsWith(llvm::StringLiteral::withInnerNUL("\xFF\xFE\x00\x00"),
+                      "UTF-32 (LE)")
+          .StartsWith("\xFE\xFF", "UTF-16 (BE)")
+          .StartsWith("\xFF\xFE", "UTF-16 (LE)")
+          .StartsWith("\x2B\x2F\x76", "UTF-7")
+          .StartsWith("\xF7\x64\x4C", "UTF-1")
+          .StartsWith("\xDD\x73\x66\x73", "UTF-EBCDIC")
+          .StartsWith("\x0E\xFE\xFF", "SCSU")
+          .StartsWith("\xFB\xEE\x28", "BOCU-1")
+          .StartsWith("\x84\x31\x95\x33", "GB-18030")
+          .Default(nullptr);
+
+  return InvalidBOM;
+}
+
 const llvm::MemoryBuffer *ContentCache::getBuffer(DiagnosticsEngine &Diag,
                                                   FileManager &FM,
                                                   SourceLocation Loc,
@@ -190,20 +213,7 @@ const llvm::MemoryBuffer *ContentCache::getBuffer(DiagnosticsEngine &Diag,
   // (BOM).  We only support UTF-8 with and without a BOM right now.  See
   // http://en.wikipedia.org/wiki/Byte_order_mark for more information.
   StringRef BufStr = Buffer.getPointer()->getBuffer();
-  const char *InvalidBOM = llvm::StringSwitch<const char *>(BufStr)
-    .StartsWith(llvm::StringLiteral::withInnerNUL("\x00\x00\xFE\xFF"),
-                                                  "UTF-32 (BE)")
-    .StartsWith(llvm::StringLiteral::withInnerNUL("\xFF\xFE\x00\x00"),
-                                                  "UTF-32 (LE)")
-    .StartsWith("\xFE\xFF", "UTF-16 (BE)")
-    .StartsWith("\xFF\xFE", "UTF-16 (LE)")
-    .StartsWith("\x2B\x2F\x76", "UTF-7")
-    .StartsWith("\xF7\x64\x4C", "UTF-1")
-    .StartsWith("\xDD\x73\x66\x73", "UTF-EBCDIC")
-    .StartsWith("\x0E\xFE\xFF", "SCSU")
-    .StartsWith("\xFB\xEE\x28", "BOCU-1")
-    .StartsWith("\x84\x31\x95\x33", "GB-18030")
-    .Default(nullptr);
+  const char *InvalidBOM = getInvalidBOM(BufStr);
 
   if (InvalidBOM) {
     Diag.Report(Loc, diag::err_unsupported_bom)

diff  --git a/clang/tools/clang-format/ClangFormat.cpp b/clang/tools/clang-format/ClangFormat.cpp
index a10541d88f07..cbbb52bd0aa8 100644
--- a/clang/tools/clang-format/ClangFormat.cpp
+++ b/clang/tools/clang-format/ClangFormat.cpp
@@ -289,31 +289,6 @@ static void outputReplacementsXML(const Replacements &Replaces) {
   }
 }
 
-// If BufStr has an invalid BOM, returns the BOM name; otherwise, returns
-// nullptr.
-static const char *getInValidBOM(StringRef BufStr) {
-  // Check to see if the buffer has a UTF Byte Order Mark (BOM).
-  // We only support UTF-8 with and without a BOM right now.  See
-  // https://en.wikipedia.org/wiki/Byte_order_mark#Byte_order_marks_by_encoding
-  // for more information.
-  const char *InvalidBOM =
-      llvm::StringSwitch<const char *>(BufStr)
-          .StartsWith(llvm::StringLiteral::withInnerNUL("\x00\x00\xFE\xFF"),
-                      "UTF-32 (BE)")
-          .StartsWith(llvm::StringLiteral::withInnerNUL("\xFF\xFE\x00\x00"),
-                      "UTF-32 (LE)")
-          .StartsWith("\xFE\xFF", "UTF-16 (BE)")
-          .StartsWith("\xFF\xFE", "UTF-16 (LE)")
-          .StartsWith("\x2B\x2F\x76", "UTF-7")
-          .StartsWith("\xF7\x64\x4C", "UTF-1")
-          .StartsWith("\xDD\x73\x66\x73", "UTF-EBCDIC")
-          .StartsWith("\x0E\xFE\xFF", "SCSU")
-          .StartsWith("\xFB\xEE\x28", "BOCU-1")
-          .StartsWith("\x84\x31\x95\x33", "GB-18030")
-          .Default(nullptr);
-  return InvalidBOM;
-}
-
 static bool
 emitReplacementWarnings(const Replacements &Replaces, StringRef AssumedFileName,
                         const std::unique_ptr<llvm::MemoryBuffer> &Code) {
@@ -412,7 +387,7 @@ static bool format(StringRef FileName) {
 
   StringRef BufStr = Code->getBuffer();
 
-  const char *InvalidBOM = getInValidBOM(BufStr);
+  const char *InvalidBOM = SrcMgr::ContentCache::getInvalidBOM(BufStr);
 
   if (InvalidBOM) {
     errs() << "error: encoding with unsupported byte order mark \""

diff  --git a/clang/unittests/Basic/SourceManagerTest.cpp b/clang/unittests/Basic/SourceManagerTest.cpp
index bc7031e1fd29..465f7a06f71a 100644
--- a/clang/unittests/Basic/SourceManagerTest.cpp
+++ b/clang/unittests/Basic/SourceManagerTest.cpp
@@ -200,6 +200,47 @@ TEST_F(SourceManagerTest, locationPrintTest) {
             "</mainFile.cpp:1:1, /test-header.h:1:1>");
 }
 
+TEST_F(SourceManagerTest, getInvalidBOM) {
+  ASSERT_EQ(SrcMgr::ContentCache::getInvalidBOM(""), nullptr);
+  ASSERT_EQ(SrcMgr::ContentCache::getInvalidBOM("\x00\x00\x00"), nullptr);
+  ASSERT_EQ(SrcMgr::ContentCache::getInvalidBOM("\xFF\xFF\xFF"), nullptr);
+  ASSERT_EQ(SrcMgr::ContentCache::getInvalidBOM("#include <iostream>"),
+            nullptr);
+
+  ASSERT_EQ(StringRef(SrcMgr::ContentCache::getInvalidBOM(
+                "\xFE\xFF#include <iostream>")),
+            "UTF-16 (BE)");
+  ASSERT_EQ(StringRef(SrcMgr::ContentCache::getInvalidBOM(
+                "\xFF\xFE#include <iostream>")),
+            "UTF-16 (LE)");
+  ASSERT_EQ(StringRef(SrcMgr::ContentCache::getInvalidBOM(
+                "\x2B\x2F\x76#include <iostream>")),
+            "UTF-7");
+  ASSERT_EQ(StringRef(SrcMgr::ContentCache::getInvalidBOM(
+                "\xF7\x64\x4C#include <iostream>")),
+            "UTF-1");
+  ASSERT_EQ(StringRef(SrcMgr::ContentCache::getInvalidBOM(
+                "\xDD\x73\x66\x73#include <iostream>")),
+            "UTF-EBCDIC");
+  ASSERT_EQ(StringRef(SrcMgr::ContentCache::getInvalidBOM(
+                "\x0E\xFE\xFF#include <iostream>")),
+            "SCSU");
+  ASSERT_EQ(StringRef(SrcMgr::ContentCache::getInvalidBOM(
+                "\xFB\xEE\x28#include <iostream>")),
+            "BOCU-1");
+  ASSERT_EQ(StringRef(SrcMgr::ContentCache::getInvalidBOM(
+                "\x84\x31\x95\x33#include <iostream>")),
+            "GB-18030");
+  ASSERT_EQ(StringRef(SrcMgr::ContentCache::getInvalidBOM(
+                llvm::StringLiteral::withInnerNUL(
+                    "\x00\x00\xFE\xFF#include <iostream>"))),
+            "UTF-32 (BE)");
+  ASSERT_EQ(StringRef(SrcMgr::ContentCache::getInvalidBOM(
+                llvm::StringLiteral::withInnerNUL(
+                    "\xFF\xFE\x00\x00#include <iostream>"))),
+            "UTF-32 (LE)");
+}
+
 #if defined(LLVM_ON_UNIX)
 
 TEST_F(SourceManagerTest, getMacroArgExpandedLocation) {


        


More information about the cfe-commits mailing list