[clang] c227192 - Make clang-format fuzz through Lexing with asserts enabled.

Manuel Klimek via cfe-commits cfe-commits at lists.llvm.org
Fri Nov 19 05:44:19 PST 2021


Author: Manuel Klimek
Date: 2021-11-19T14:44:06+01:00
New Revision: c2271926a4fc395e05cf75a8e57c2dfab1f02d3d

URL: https://github.com/llvm/llvm-project/commit/c2271926a4fc395e05cf75a8e57c2dfab1f02d3d
DIFF: https://github.com/llvm/llvm-project/commit/c2271926a4fc395e05cf75a8e57c2dfab1f02d3d.diff

LOG: Make clang-format fuzz through Lexing with asserts enabled.

Makes clang-format bail out if an in-memory source file with an
unsupported BOM is handed in instead of creating source locations that
are violating clang's assumptions.

In the future, we should add support to better transport error messages
like this through clang-format instead of printing to stderr and not
creating any changes.

Added: 
    

Modified: 
    clang/lib/Format/Format.cpp
    clang/lib/Format/QualifierAlignmentFixer.cpp
    clang/lib/Format/SortJavaScriptImports.cpp
    clang/lib/Format/TokenAnalyzer.cpp
    clang/lib/Format/TokenAnalyzer.h

Removed: 
    


################################################################################
diff  --git a/clang/lib/Format/Format.cpp b/clang/lib/Format/Format.cpp
index 1d60f2b3a321b..085cca8853e62 100644
--- a/clang/lib/Format/Format.cpp
+++ b/clang/lib/Format/Format.cpp
@@ -2989,8 +2989,10 @@ reformat(const FormatStyle &Style, StringRef Code,
   if (Style.isJson()) {
     std::vector<tooling::Range> Ranges(1, tooling::Range(0, Code.size()));
     auto Env =
-        std::make_unique<Environment>(Code, FileName, Ranges, FirstStartColumn,
+        Environment::make(Code, FileName, Ranges, FirstStartColumn,
                                       NextStartColumn, LastStartColumn);
+    if (!Env)
+      return {};
     // Perform the actual formatting pass.
     tooling::Replacements Replaces =
         Formatter(*Env, Style, Status).process().first;
@@ -3046,9 +3048,10 @@ reformat(const FormatStyle &Style, StringRef Code,
       return TrailingCommaInserter(Env, Expanded).process();
     });
 
-  auto Env =
-      std::make_unique<Environment>(Code, FileName, Ranges, FirstStartColumn,
-                                    NextStartColumn, LastStartColumn);
+  auto Env = Environment::make(Code, FileName, Ranges, FirstStartColumn,
+                               NextStartColumn, LastStartColumn);
+  if (!Env)
+    return {};
   llvm::Optional<std::string> CurrentCode = None;
   tooling::Replacements Fixes;
   unsigned Penalty = 0;
@@ -3061,10 +3064,12 @@ reformat(const FormatStyle &Style, StringRef Code,
       Penalty += PassFixes.second;
       if (I + 1 < E) {
         CurrentCode = std::move(*NewCode);
-        Env = std::make_unique<Environment>(
+        Env = Environment::make(
             *CurrentCode, FileName,
             tooling::calculateRangesAfterReplacements(Fixes, Ranges),
             FirstStartColumn, NextStartColumn, LastStartColumn);
+        if (!Env)
+          return {};
       }
     }
   }
@@ -3090,7 +3095,10 @@ tooling::Replacements cleanup(const FormatStyle &Style, StringRef Code,
   // cleanups only apply to C++ (they mostly concern ctor commas etc.)
   if (Style.Language != FormatStyle::LK_Cpp)
     return tooling::Replacements();
-  return Cleaner(Environment(Code, FileName, Ranges), Style).process().first;
+  auto Env = Environment::make(Code, FileName, Ranges);
+  if (!Env)
+    return {};
+  return Cleaner(*Env, Style).process().first;
 }
 
 tooling::Replacements reformat(const FormatStyle &Style, StringRef Code,
@@ -3107,7 +3115,10 @@ tooling::Replacements fixNamespaceEndComments(const FormatStyle &Style,
                                               StringRef Code,
                                               ArrayRef<tooling::Range> Ranges,
                                               StringRef FileName) {
-  return NamespaceEndCommentsFixer(Environment(Code, FileName, Ranges), Style)
+  auto Env = Environment::make(Code, FileName, Ranges);
+  if (!Env)
+    return {};
+  return NamespaceEndCommentsFixer(*Env, Style)
       .process()
       .first;
 }
@@ -3116,7 +3127,10 @@ tooling::Replacements sortUsingDeclarations(const FormatStyle &Style,
                                             StringRef Code,
                                             ArrayRef<tooling::Range> Ranges,
                                             StringRef FileName) {
-  return UsingDeclarationsSorter(Environment(Code, FileName, Ranges), Style)
+  auto Env = Environment::make(Code, FileName, Ranges);
+  if (!Env)
+    return {};
+  return UsingDeclarationsSorter(*Env, Style)
       .process()
       .first;
 }

diff  --git a/clang/lib/Format/QualifierAlignmentFixer.cpp b/clang/lib/Format/QualifierAlignmentFixer.cpp
index c70705a1cd7ff..5a89225c7fc86 100644
--- a/clang/lib/Format/QualifierAlignmentFixer.cpp
+++ b/clang/lib/Format/QualifierAlignmentFixer.cpp
@@ -61,10 +61,10 @@ QualifierAlignmentFixer::QualifierAlignmentFixer(
 std::pair<tooling::Replacements, unsigned> QualifierAlignmentFixer::analyze(
     TokenAnnotator &Annotator, SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
     FormatTokenLexer &Tokens) {
-
-  auto Env =
-      std::make_unique<Environment>(Code, FileName, Ranges, FirstStartColumn,
-                                    NextStartColumn, LastStartColumn);
+  auto Env = Environment::make(Code, FileName, Ranges, FirstStartColumn,
+                               NextStartColumn, LastStartColumn);
+  if (!Env)
+    return {};
   llvm::Optional<std::string> CurrentCode = None;
   tooling::Replacements Fixes;
   for (size_t I = 0, E = Passes.size(); I < E; ++I) {
@@ -75,10 +75,12 @@ std::pair<tooling::Replacements, unsigned> QualifierAlignmentFixer::analyze(
       Fixes = Fixes.merge(PassFixes.first);
       if (I + 1 < E) {
         CurrentCode = std::move(*NewCode);
-        Env = std::make_unique<Environment>(
+        Env = Environment::make(
             *CurrentCode, FileName,
             tooling::calculateRangesAfterReplacements(Fixes, Ranges),
             FirstStartColumn, NextStartColumn, LastStartColumn);
+        if (!Env)
+          return {};
       }
     }
   }

diff  --git a/clang/lib/Format/SortJavaScriptImports.cpp b/clang/lib/Format/SortJavaScriptImports.cpp
index a5e3ce69207bd..515cfce725a42 100644
--- a/clang/lib/Format/SortJavaScriptImports.cpp
+++ b/clang/lib/Format/SortJavaScriptImports.cpp
@@ -550,7 +550,10 @@ tooling::Replacements sortJavaScriptImports(const FormatStyle &Style,
                                             ArrayRef<tooling::Range> Ranges,
                                             StringRef FileName) {
   // FIXME: Cursor support.
-  return JavaScriptImportSorter(Environment(Code, FileName, Ranges), Style)
+  auto Env = Environment::make(Code, FileName, Ranges);
+  if (!Env)
+    return {};
+  return JavaScriptImportSorter(*Env, Style)
       .process()
       .first;
 }

diff  --git a/clang/lib/Format/TokenAnalyzer.cpp b/clang/lib/Format/TokenAnalyzer.cpp
index f1459a808ff84..a619c6d939e99 100644
--- a/clang/lib/Format/TokenAnalyzer.cpp
+++ b/clang/lib/Format/TokenAnalyzer.cpp
@@ -26,26 +26,61 @@
 #include "clang/Basic/SourceManager.h"
 #include "clang/Format/Format.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/Debug.h"
+#include <type_traits>
 
 #define DEBUG_TYPE "format-formatter"
 
 namespace clang {
 namespace format {
 
+// FIXME: Instead of printing the diagnostic we should store it and have a
+// better way to return errors through the format APIs.
+class FatalDiagnosticConsumer: public DiagnosticConsumer {
+public:
+  void HandleDiagnostic(DiagnosticsEngine::Level DiagLevel,
+                        const Diagnostic &Info) override {
+    if (DiagLevel == DiagnosticsEngine::Fatal) {
+      Fatal = true;
+      llvm::SmallVector<char, 128> Message;
+      Info.FormatDiagnostic(Message);
+      llvm::errs() << Message << "\n";
+    }
+  }
+
+  bool fatalError() const { return Fatal; }
+
+private:
+  bool Fatal = false;
+};
+
+std::unique_ptr<Environment>
+Environment::make(StringRef Code, StringRef FileName,
+                  ArrayRef<tooling::Range> Ranges, unsigned FirstStartColumn,
+                  unsigned NextStartColumn, unsigned LastStartColumn) {
+  auto Env = std::make_unique<Environment>(Code, FileName, FirstStartColumn,
+                                           NextStartColumn, LastStartColumn);
+  FatalDiagnosticConsumer Diags;
+  Env->SM.getDiagnostics().setClient(&Diags, /*ShouldOwnClient=*/false);
+  SourceLocation StartOfFile = Env->SM.getLocForStartOfFile(Env->ID);
+  for (const tooling::Range &Range : Ranges) {
+    SourceLocation Start = StartOfFile.getLocWithOffset(Range.getOffset());
+    SourceLocation End = Start.getLocWithOffset(Range.getLength());
+    Env->CharRanges.push_back(CharSourceRange::getCharRange(Start, End));
+  }
+  // Validate that we can get the buffer data without a fatal error.
+  Env->SM.getBufferData(Env->ID);
+  if (Diags.fatalError()) return nullptr;
+  return Env;
+}
+
 Environment::Environment(StringRef Code, StringRef FileName,
-                         ArrayRef<tooling::Range> Ranges,
                          unsigned FirstStartColumn, unsigned NextStartColumn,
                          unsigned LastStartColumn)
     : VirtualSM(new SourceManagerForFile(FileName, Code)), SM(VirtualSM->get()),
       ID(VirtualSM->get().getMainFileID()), FirstStartColumn(FirstStartColumn),
       NextStartColumn(NextStartColumn), LastStartColumn(LastStartColumn) {
-  SourceLocation StartOfFile = SM.getLocForStartOfFile(ID);
-  for (const tooling::Range &Range : Ranges) {
-    SourceLocation Start = StartOfFile.getLocWithOffset(Range.getOffset());
-    SourceLocation End = Start.getLocWithOffset(Range.getLength());
-    CharRanges.push_back(CharSourceRange::getCharRange(Start, End));
-  }
 }
 
 TokenAnalyzer::TokenAnalyzer(const Environment &Env, const FormatStyle &Style)

diff  --git a/clang/lib/Format/TokenAnalyzer.h b/clang/lib/Format/TokenAnalyzer.h
index 5ce44a0f3ea79..aaca518df41f3 100644
--- a/clang/lib/Format/TokenAnalyzer.h
+++ b/clang/lib/Format/TokenAnalyzer.h
@@ -29,6 +29,7 @@
 #include "clang/Format/Format.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/Debug.h"
+#include <memory>
 
 namespace clang {
 namespace format {
@@ -40,8 +41,7 @@ class Environment {
   // that the next lines of \p Code should start at \p NextStartColumn, and
   // that \p Code should end at \p LastStartColumn if it ends in newline.
   // See also the documentation of clang::format::internal::reformat.
-  Environment(StringRef Code, StringRef FileName,
-              ArrayRef<tooling::Range> Ranges, unsigned FirstStartColumn = 0,
+  Environment(StringRef Code, StringRef FileName, unsigned FirstStartColumn = 0,
               unsigned NextStartColumn = 0, unsigned LastStartColumn = 0);
 
   FileID getFileID() const { return ID; }
@@ -62,6 +62,14 @@ class Environment {
   // environment should end if it ends in a newline.
   unsigned getLastStartColumn() const { return LastStartColumn; }
 
+  // Returns nullptr and prints a diagnostic to stderr if the environment
+  // can't be created.
+  static std::unique_ptr<Environment> make(StringRef Code, StringRef FileName,
+                                           ArrayRef<tooling::Range> Ranges,
+                                           unsigned FirstStartColumn = 0,
+                                           unsigned NextStartColumn = 0,
+                                           unsigned LastStartColumn = 0);
+
 private:
   // This is only set if constructed from string.
   std::unique_ptr<SourceManagerForFile> VirtualSM;


        


More information about the cfe-commits mailing list