[clang] [clang-format] Add an fnmatch-like function for .clang-format-ignore (PR #76021)

Owen Pan via cfe-commits cfe-commits at lists.llvm.org
Tue Dec 19 23:32:56 PST 2023


https://github.com/owenca created https://github.com/llvm/llvm-project/pull/76021

This is needed because Windows doesn't have anything equivalent to the POSIX fnmatch() function.

>From b53c8b6c6d34857168d868d99c8d7ea7a69621eb Mon Sep 17 00:00:00 2001
From: Owen Pan <owenpiano at gmail.com>
Date: Tue, 19 Dec 2023 23:25:57 -0800
Subject: [PATCH] [clang-format] Add an fnmatch-like function for
 .clang-format-ignore

This is needed because Windows doesn't have anything equivalent to the POSIX
fnmatch() function.
---
 clang/lib/Format/CMakeLists.txt              |   1 +
 clang/lib/Format/MatchFilePath.cpp           | 112 +++++++++++++
 clang/lib/Format/MatchFilePath.h             |  22 +++
 clang/unittests/Format/CMakeLists.txt        |   1 +
 clang/unittests/Format/MatchFilePathTest.cpp | 156 +++++++++++++++++++
 5 files changed, 292 insertions(+)
 create mode 100644 clang/lib/Format/MatchFilePath.cpp
 create mode 100644 clang/lib/Format/MatchFilePath.h
 create mode 100644 clang/unittests/Format/MatchFilePathTest.cpp

diff --git a/clang/lib/Format/CMakeLists.txt b/clang/lib/Format/CMakeLists.txt
index 015ec7c0cc84e3..84a3c136f650a8 100644
--- a/clang/lib/Format/CMakeLists.txt
+++ b/clang/lib/Format/CMakeLists.txt
@@ -11,6 +11,7 @@ add_clang_library(clangFormat
   IntegerLiteralSeparatorFixer.cpp
   MacroCallReconstructor.cpp
   MacroExpander.cpp
+  MatchFilePath.cpp
   NamespaceEndCommentsFixer.cpp
   ObjCPropertyAttributeOrderFixer.cpp
   QualifierAlignmentFixer.cpp
diff --git a/clang/lib/Format/MatchFilePath.cpp b/clang/lib/Format/MatchFilePath.cpp
new file mode 100644
index 00000000000000..203a900e3d3bdd
--- /dev/null
+++ b/clang/lib/Format/MatchFilePath.cpp
@@ -0,0 +1,112 @@
+//===--- MatchFilePath.cpp - Match file path with pattern -------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file implements the functionality of matching a file path name to
+/// a pattern, similar to the POSIX fnmatch() function.
+///
+//===----------------------------------------------------------------------===//
+
+#include "MatchFilePath.h"
+
+using namespace llvm;
+
+namespace clang {
+namespace format {
+
+// Check whether `FilePath` matches `Pattern` based on POSIX Section 2.13.
+bool matchFilePath(StringRef Pattern, StringRef FilePath) {
+  assert(!Pattern.empty());
+  assert(!FilePath.empty());
+
+  constexpr auto Separator = '/';
+  const auto EOP = Pattern.size();  // End of `Pattern`.
+  const auto End = FilePath.size(); // End of `FilePath`.
+  unsigned I = 0;                   // Index to `Pattern`.
+
+  // No match if `Pattern` ends with a non-meta character not equal to the last
+  // character of `FilePath`.
+  if (const auto C = Pattern.back(); !strchr("?*]", C) && C != FilePath.back())
+    return false;
+
+  for (const auto &F : FilePath) {
+    if (I == EOP)
+      return false;
+
+    switch (Pattern[I]) {
+    case '\\':
+      if (++I == EOP || F != Pattern[I])
+        return false;
+      ++I;
+      break;
+    case '?':
+      if (F == Separator)
+        return false;
+      ++I;
+      break;
+    case '*': {
+      unsigned J = &F - FilePath.data(); // Index of `F`.
+      // Skip consecutive stars.
+      do {
+        if (++I == EOP)
+          return FilePath.find(Separator, J + 1) == StringRef::npos;
+      } while (Pattern[I] == '*');
+      while (FilePath[J] != Separator) {
+        if (matchFilePath(Pattern.substr(I), FilePath.substr(J)))
+          return true;
+        if (++J == End)
+          return false;
+      }
+      break;
+    }
+    case '[':
+      // Skip e.g. `[!]`.
+      if (I + 3 < EOP || (I + 3 == EOP && Pattern[I + 1] != '!')) {
+        // Skip unpaired `[`, brackets containing slashes, and `[]`.
+        if (const auto J = Pattern.find_first_of("]/", I + 1);
+            J != StringRef::npos && Pattern[J] == ']' && J > I + 1) {
+          if (F == Separator)
+            return false;
+          ++I; // After the `[`.
+          bool Negated = false;
+          if (Pattern[I] == '!') {
+            Negated = true;
+            ++I; // After the `!`.
+          }
+          bool Match = false;
+          do {
+            if (I + 2 < J && Pattern[I + 1] == '-') {
+              Match = Pattern[I] <= F && F <= Pattern[I + 2];
+              I += 3; // After the range, e.g. `A-Z`.
+            } else {
+              Match = F == Pattern[I++];
+            }
+          } while (!Match && I < J);
+          if (Negated ? Match : !Match)
+            return false;
+          I = J + 1; // After the `]`.
+          break;
+        }
+      }
+      [[fallthrough]]; // Match `[` literally.
+    default:
+      if (F != Pattern[I])
+        return false;
+      ++I;
+    }
+  }
+
+  // Match trailing stars with null strings.
+  while (I < EOP && Pattern[I] == '*')
+    ++I;
+
+  return I == EOP;
+}
+
+} // namespace format
+} // namespace clang
diff --git a/clang/lib/Format/MatchFilePath.h b/clang/lib/Format/MatchFilePath.h
new file mode 100644
index 00000000000000..482dab7c748e51
--- /dev/null
+++ b/clang/lib/Format/MatchFilePath.h
@@ -0,0 +1,22 @@
+//===--- MatchFilePath.h ----------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LIB_FORMAT_MATCHFILEPATH_H
+#define LLVM_CLANG_LIB_FORMAT_MATCHFILEPATH_H
+
+#include "llvm/ADT/StringRef.h"
+
+namespace clang {
+namespace format {
+
+bool matchFilePath(llvm::StringRef Pattern, llvm::StringRef FilePath);
+
+} // end namespace format
+} // end namespace clang
+
+#endif
diff --git a/clang/unittests/Format/CMakeLists.txt b/clang/unittests/Format/CMakeLists.txt
index 53136328928f5c..71f5886d946c80 100644
--- a/clang/unittests/Format/CMakeLists.txt
+++ b/clang/unittests/Format/CMakeLists.txt
@@ -27,6 +27,7 @@ add_clang_unittest(FormatTests
   IntegerLiteralSeparatorTest.cpp
   MacroCallReconstructorTest.cpp
   MacroExpanderTest.cpp
+  MatchFilePathTest.cpp
   NamespaceEndCommentsFixerTest.cpp
   ObjCPropertyAttributeOrderFixerTest.cpp
   QualifierFixerTest.cpp
diff --git a/clang/unittests/Format/MatchFilePathTest.cpp b/clang/unittests/Format/MatchFilePathTest.cpp
new file mode 100644
index 00000000000000..f236987cfa744f
--- /dev/null
+++ b/clang/unittests/Format/MatchFilePathTest.cpp
@@ -0,0 +1,156 @@
+//===- unittest/Format/MatchFilePathTest.cpp ------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "../../lib/Format/MatchFilePath.h"
+#include "gtest/gtest.h"
+
+namespace clang {
+namespace format {
+namespace {
+
+class MatchFilePathTest : public ::testing::Test {
+protected:
+  bool match(llvm::StringRef FilePath, llvm::StringRef Pattern) {
+    return matchFilePath(Pattern, FilePath);
+  }
+};
+
+// Almost all of the test cases below are from:
+// https://github.com/python/cpython/blob/main/Lib/test/test_fnmatch.py
+
+TEST_F(MatchFilePathTest, Wildcard) {
+  EXPECT_TRUE(match("abc", "?*?"));
+  EXPECT_TRUE(match("abc", "???*"));
+  EXPECT_TRUE(match("abc", "*???"));
+  EXPECT_TRUE(match("abc", "???"));
+  EXPECT_TRUE(match("abc", "*"));
+  EXPECT_TRUE(match("abc", "ab[cd]"));
+  EXPECT_TRUE(match("abc", "ab[!de]"));
+  EXPECT_TRUE(!match("abc", "ab[de]"));
+  EXPECT_TRUE(!match("a", "??"));
+  EXPECT_TRUE(!match("a", "b"));
+}
+
+TEST_F(MatchFilePathTest, Backslash) {
+  EXPECT_TRUE(match("a?", R"(a\?)"));
+  EXPECT_TRUE(!match("a\\", R"(a\)"));
+  EXPECT_TRUE(match("\\", R"([\])"));
+  EXPECT_TRUE(match("a", R"([!\])"));
+  EXPECT_TRUE(!match("\\", R"([!\])"));
+}
+
+TEST_F(MatchFilePathTest, Newline) {
+  EXPECT_TRUE(match("foo\nbar", "foo*"));
+  EXPECT_TRUE(match("foo\nbar\n", "foo*"));
+  EXPECT_TRUE(!match("\nfoo", "foo*"));
+  EXPECT_TRUE(match("\n", "*"));
+}
+
+TEST_F(MatchFilePathTest, Star) {
+  EXPECT_TRUE(match(std::string(50, 'a').c_str(), "*a*a*a*a*a*a*a*a*a*a"));
+  EXPECT_TRUE(
+      !match((std::string(50, 'a') + 'b').c_str(), "*a*a*a*a*a*a*a*a*a*a"));
+}
+
+TEST_F(MatchFilePathTest, CaseSensitive) {
+  EXPECT_TRUE(match("abc", "abc"));
+  EXPECT_TRUE(!match("AbC", "abc"));
+  EXPECT_TRUE(!match("abc", "AbC"));
+  EXPECT_TRUE(match("AbC", "AbC"));
+}
+
+TEST_F(MatchFilePathTest, PathSeparators) {
+  EXPECT_TRUE(match("usr/bin", "usr/bin"));
+  EXPECT_TRUE(match("usr\\bin", R"(usr\\bin)"));
+}
+
+TEST_F(MatchFilePathTest, NumericEscapeSequence) {
+  EXPECT_TRUE(match("test", "te*"));
+  EXPECT_TRUE(match("test\xff", "te*\xff"));
+  EXPECT_TRUE(match("foo\nbar", "foo*"));
+}
+
+TEST_F(MatchFilePathTest, ValidBrackets) {
+  EXPECT_TRUE(match("z", "[az]"));
+  EXPECT_TRUE(!match("z", "[!az]"));
+  EXPECT_TRUE(match("a", "[aa]"));
+  EXPECT_TRUE(match("^", "[^az]"));
+  EXPECT_TRUE(match("[", "[[az]"));
+  EXPECT_TRUE(!match("]", "[!]]"));
+}
+
+TEST_F(MatchFilePathTest, InvalidBrackets) {
+  EXPECT_TRUE(match("[", "["));
+  EXPECT_TRUE(match("[]", "[]"));
+  EXPECT_TRUE(match("[!", "[!"));
+  EXPECT_TRUE(match("[!]", "[!]"));
+}
+
+TEST_F(MatchFilePathTest, Range) {
+  EXPECT_TRUE(match("c", "[b-d]"));
+  EXPECT_TRUE(!match("c", "[!b-d]"));
+  EXPECT_TRUE(match("y", "[b-dx-z]"));
+  EXPECT_TRUE(!match("y", "[!b-dx-z]"));
+}
+
+TEST_F(MatchFilePathTest, Hyphen) {
+  EXPECT_TRUE(!match("#", "[!-#]"));
+  EXPECT_TRUE(!match("-", "[!--.]"));
+  EXPECT_TRUE(match("_", "[^-`]"));
+  EXPECT_TRUE(match("]", "[[-^]"));
+  EXPECT_TRUE(match("]", R"([\-^])"));
+  EXPECT_TRUE(match("-", "[b-]"));
+  EXPECT_TRUE(!match("-", "[!b-]"));
+  EXPECT_TRUE(match("-", "[-b]"));
+  EXPECT_TRUE(!match("-", "[!-b]"));
+  EXPECT_TRUE(match("-", "[-]"));
+  EXPECT_TRUE(!match("-", "[!-]"));
+}
+
+TEST_F(MatchFilePathTest, UpperLELower) {
+  EXPECT_TRUE(!match("c", "[d-b]"));
+  EXPECT_TRUE(match("c", "[!d-b]"));
+  EXPECT_TRUE(match("y", "[d-bx-z]"));
+  EXPECT_TRUE(!match("y", "[!d-bx-z]"));
+  EXPECT_TRUE(match("_", "[d-b^-`]"));
+  EXPECT_TRUE(match("]", "[d-b[-^]"));
+  EXPECT_TRUE(match("b", "[b-b]"));
+}
+
+TEST_F(MatchFilePathTest, SlashAndBackslashInBrackets) {
+  EXPECT_TRUE(!match("/", "[/]"));
+  EXPECT_TRUE(match("\\", R"([\])"));
+  EXPECT_TRUE(match("[/]", "[/]"));
+  EXPECT_TRUE(match("\\", R"([\t])"));
+  EXPECT_TRUE(match("t", R"([\t])"));
+  EXPECT_TRUE(!match("\t", R"([\t])"));
+}
+
+TEST_F(MatchFilePathTest, SlashAndBackslashInRange) {
+  EXPECT_TRUE(!match("a/b", "a[.-0]b"));
+  EXPECT_TRUE(match("a\\b", "a[Z-^]b"));
+  EXPECT_TRUE(!match("a/b", "a[/-0]b"));
+  EXPECT_TRUE(match("a[/-0]b", "a[/-0]b"));
+  EXPECT_TRUE(!match("a/b", "a[.-/]b"));
+  EXPECT_TRUE(match("a[.-/]b", "a[.-/]b"));
+  EXPECT_TRUE(match("a\\b", R"(a[\-^]b)"));
+  EXPECT_TRUE(match("a\\b", R"(a[Z-\]b)"));
+}
+
+TEST_F(MatchFilePathTest, Brackets) {
+  EXPECT_TRUE(match("[", "[[]"));
+  EXPECT_TRUE(match("&", "[a&&b]"));
+  EXPECT_TRUE(match("|", "[a||b]"));
+  EXPECT_TRUE(match("~", "[a~~b]"));
+  EXPECT_TRUE(match(",", "[a-z+--A-Z]"));
+  EXPECT_TRUE(!match(".", "[a-z--/A-Z]"));
+}
+
+} // namespace
+} // namespace format
+} // namespace clang



More information about the cfe-commits mailing list