[clang] [clang-format] Optimize processing .clang-format-ignore files (PR #76733)

Owen Pan via cfe-commits cfe-commits at lists.llvm.org
Wed Jan 3 19:58:36 PST 2024


https://github.com/owenca updated https://github.com/llvm/llvm-project/pull/76733

>From 7f8da18dc59706df8f1ee15d22076b4794881579 Mon Sep 17 00:00:00 2001
From: Owen Pan <owenpiano at gmail.com>
Date: Mon, 1 Jan 2024 19:10:30 -0800
Subject: [PATCH 1/2] [clang-format] Optimize processing .clang-format-ignore
 files

Reuse the patterns governing the previous input file being formatted if the
the current input file is from the same directory.
---
 clang/docs/ClangFormat.rst                |  6 ++-
 clang/test/Format/clang-format-ignore.cpp | 17 +++++-
 clang/tools/clang-format/ClangFormat.cpp  | 65 ++++++++++++++++-------
 3 files changed, 65 insertions(+), 23 deletions(-)

diff --git a/clang/docs/ClangFormat.rst b/clang/docs/ClangFormat.rst
index 8d4017b29fb8ee..819d9ee9f9cde1 100644
--- a/clang/docs/ClangFormat.rst
+++ b/clang/docs/ClangFormat.rst
@@ -131,6 +131,9 @@ An easy way to create the ``.clang-format`` file is:
 
 Available style options are described in :doc:`ClangFormatStyleOptions`.
 
+.clang-format-ignore
+====================
+
 You can create ``.clang-format-ignore`` files to make ``clang-format`` ignore
 certain files. A ``.clang-format-ignore`` file consists of patterns of file path
 names. It has the following format:
@@ -141,7 +144,8 @@ names. It has the following format:
 * A non-comment line is a single pattern.
 * The slash (``/``) is used as the directory separator.
 * A pattern is relative to the directory of the ``.clang-format-ignore`` file
-  (or the root directory if the pattern starts with a slash).
+  (or the root directory if the pattern starts with a slash). Patterns
+  containing drive names (e.g. ``C:``) are not supported.
 * Patterns follow the rules specified in `POSIX 2.13.1, 2.13.2, and Rule 1 of
   2.13.3 <https://pubs.opengroup.org/onlinepubs/9699919799/utilities/
   V3_chap02.html#tag_18_13>`_.
diff --git a/clang/test/Format/clang-format-ignore.cpp b/clang/test/Format/clang-format-ignore.cpp
index 0d6396a64a668d..4d0c4073308edc 100644
--- a/clang/test/Format/clang-format-ignore.cpp
+++ b/clang/test/Format/clang-format-ignore.cpp
@@ -29,5 +29,18 @@
 // RUN: grep "Formatting \[1/2] foo.c" %t.stderr
 // RUN: not grep "Formatting \[2/2] foo.js" %t.stderr
 
-// RUN: cd ../../..
-// RUN: rm -rf %t.dir
+// RUN: cd ../..
+// RUN: clang-format -verbose *.cc level1/*.c* level1/level2/foo.* 2> %t.stderr
+// RUN: grep "Formatting \[1/5] level1/level2/foo.c" %t.stderr
+// RUN: not grep "Formatting \[2/5] level1/level2/foo.js" %t.stderr
+
+// RUN: rm .clang-format-ignore
+// RUN: clang-format -verbose *.cc level1/*.c* level1/level2/foo.* 2> %t.stderr
+// RUN: grep "Formatting \[1/5] foo.cc" %t.stderr
+// RUN: grep "Formatting \[2/5] level1/bar.cc" %t.stderr
+// RUN: grep "Formatting \[3/5] level1/baz.c" %t.stderr
+// RUN: grep "Formatting \[4/5] level1/level2/foo.c" %t.stderr
+// RUN: not grep "Formatting \[5/5] level1/level2/foo.js" %t.stderr
+
+// RUN: cd ..
+// RUN: rm -r %t.dir
diff --git a/clang/tools/clang-format/ClangFormat.cpp b/clang/tools/clang-format/ClangFormat.cpp
index be34dbbe886a15..787e56a6eccc0e 100644
--- a/clang/tools/clang-format/ClangFormat.cpp
+++ b/clang/tools/clang-format/ClangFormat.cpp
@@ -571,6 +571,11 @@ static int dumpConfig(bool IsSTDIN) {
   return 0;
 }
 
+using String = SmallString<128>;
+static String IgnoreDir;             // Directory of .clang-format-ignore file.
+static StringRef PrevDir;            // Directory of previous `FilePath`.
+static SmallVector<String> Patterns; // Patterns in .clang-format-ignore file.
+
 // Check whether `FilePath` is ignored according to the nearest
 // .clang-format-ignore file based on the rules below:
 // - A blank line is skipped.
@@ -586,33 +591,50 @@ static bool isIgnored(StringRef FilePath) {
   if (!is_regular_file(FilePath))
     return false;
 
-  using namespace llvm::sys::path;
-  SmallString<128> Path, AbsPath{FilePath};
+  String Path;
+  String AbsPath{FilePath};
 
+  using namespace llvm::sys::path;
   make_absolute(AbsPath);
   remove_dots(AbsPath, /*remove_dot_dot=*/true);
 
-  StringRef IgnoreDir{AbsPath};
-  do {
-    IgnoreDir = parent_path(IgnoreDir);
-    if (IgnoreDir.empty())
+  if (StringRef Dir{parent_path(AbsPath)}; PrevDir != Dir) {
+    PrevDir = Dir;
+
+    for (;;) {
+      Path = Dir;
+      append(Path, ".clang-format-ignore");
+      if (is_regular_file(Path))
+        break;
+      Dir = parent_path(Dir);
+      if (Dir.empty())
+        return false;
+    }
+
+    IgnoreDir = convert_to_slash(Dir);
+
+    std::ifstream IgnoreFile{Path.c_str()};
+    if (!IgnoreFile.good())
       return false;
 
-    Path = IgnoreDir;
-    append(Path, ".clang-format-ignore");
-  } while (!is_regular_file(Path));
+    Patterns.clear();
 
-  std::ifstream IgnoreFile{Path.c_str()};
-  if (!IgnoreFile.good())
-    return false;
+    for (std::string Line; std::getline(IgnoreFile, Line);) {
+      if (const auto Pattern{StringRef{Line}.trim()};
+          // Skip empty and comment lines.
+          !Pattern.empty() && Pattern[0] != '#') {
+        Patterns.push_back(Pattern);
+      }
+    }
+  }
 
-  const auto Pathname = convert_to_slash(AbsPath);
-  for (std::string Line; std::getline(IgnoreFile, Line);) {
-    auto Pattern = StringRef(Line).trim();
-    if (Pattern.empty() || Pattern[0] == '#')
-      continue;
+  if (IgnoreDir.empty())
+    return false;
 
-    const bool IsNegated = Pattern[0] == '!';
+  const auto Pathname{convert_to_slash(AbsPath)};
+  for (const auto &Pat : Patterns) {
+    const bool IsNegated = Pat[0] == '!';
+    StringRef Pattern{Pat};
     if (IsNegated)
       Pattern = Pattern.drop_front();
 
@@ -620,11 +642,14 @@ static bool isIgnored(StringRef FilePath) {
       continue;
 
     Pattern = Pattern.ltrim();
+
+    // `Pattern` is relative to `IgnoreDir` unless it starts with a slash.
+    // This doesn't support patterns containing drive names (e.g. `C:`).
     if (Pattern[0] != '/') {
-      Path = convert_to_slash(IgnoreDir);
+      Path = IgnoreDir;
       append(Path, Style::posix, Pattern);
       remove_dots(Path, /*remove_dot_dot=*/true, Style::posix);
-      Pattern = Path.str();
+      Pattern = Path;
     }
 
     if (clang::format::matchFilePath(Pattern, Pathname) == !IsNegated)

>From b8b12c321aee0c7748744e77843e765cc008e446 Mon Sep 17 00:00:00 2001
From: Owen Pan <owenpiano at gmail.com>
Date: Wed, 3 Jan 2024 19:57:21 -0800
Subject: [PATCH 2/2] Fix test cases for Windows.

---
 clang/test/Format/clang-format-ignore.cpp | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/clang/test/Format/clang-format-ignore.cpp b/clang/test/Format/clang-format-ignore.cpp
index 4d0c4073308edc..5a2267b302d22f 100644
--- a/clang/test/Format/clang-format-ignore.cpp
+++ b/clang/test/Format/clang-format-ignore.cpp
@@ -21,26 +21,26 @@
 
 // RUN: touch .clang-format-ignore
 // RUN: clang-format -verbose foo.c foo.js 2> %t.stderr
-// RUN: grep "Formatting \[1/2] foo.c" %t.stderr
-// RUN: grep "Formatting \[2/2] foo.js" %t.stderr
+// RUN: grep -Fx "Formatting [1/2] foo.c" %t.stderr
+// RUN: grep -Fx "Formatting [2/2] foo.js" %t.stderr
 
 // RUN: echo "*.js" > .clang-format-ignore
 // RUN: clang-format -verbose foo.c foo.js 2> %t.stderr
-// RUN: grep "Formatting \[1/2] foo.c" %t.stderr
-// RUN: not grep "Formatting \[2/2] foo.js" %t.stderr
+// RUN: grep -Fx "Formatting [1/2] foo.c" %t.stderr
+// RUN: not grep -F foo.js %t.stderr
 
 // RUN: cd ../..
 // RUN: clang-format -verbose *.cc level1/*.c* level1/level2/foo.* 2> %t.stderr
-// RUN: grep "Formatting \[1/5] level1/level2/foo.c" %t.stderr
-// RUN: not grep "Formatting \[2/5] level1/level2/foo.js" %t.stderr
+// RUN: grep -x "Formatting \[1/5] .*foo\.c" %t.stderr
+// RUN: not grep -F foo.js %t.stderr
 
 // RUN: rm .clang-format-ignore
 // RUN: clang-format -verbose *.cc level1/*.c* level1/level2/foo.* 2> %t.stderr
-// RUN: grep "Formatting \[1/5] foo.cc" %t.stderr
-// RUN: grep "Formatting \[2/5] level1/bar.cc" %t.stderr
-// RUN: grep "Formatting \[3/5] level1/baz.c" %t.stderr
-// RUN: grep "Formatting \[4/5] level1/level2/foo.c" %t.stderr
-// RUN: not grep "Formatting \[5/5] level1/level2/foo.js" %t.stderr
+// RUN: grep -x "Formatting \[1/5] .*foo\.cc" %t.stderr
+// RUN: grep -x "Formatting \[2/5] .*bar\.cc" %t.stderr
+// RUN: grep -x "Formatting \[3/5] .*baz\.c" %t.stderr
+// RUN: grep -x "Formatting \[4/5] .*foo\.c" %t.stderr
+// RUN: not grep -F foo.js %t.stderr
 
 // RUN: cd ..
 // RUN: rm -r %t.dir



More information about the cfe-commits mailing list