[clang] [clang-format] Add an fnmatch-like function for .clang-format-ignore (PR #76021)
Owen Pan via cfe-commits
cfe-commits at lists.llvm.org
Tue Dec 19 23:32:56 PST 2023
https://github.com/owenca created https://github.com/llvm/llvm-project/pull/76021
This is needed because Windows doesn't have anything equivalent to the POSIX fnmatch() function.
>From b53c8b6c6d34857168d868d99c8d7ea7a69621eb Mon Sep 17 00:00:00 2001
From: Owen Pan <owenpiano at gmail.com>
Date: Tue, 19 Dec 2023 23:25:57 -0800
Subject: [PATCH] [clang-format] Add an fnmatch-like function for
.clang-format-ignore
This is needed because Windows doesn't have anything equivalent to the POSIX
fnmatch() function.
---
clang/lib/Format/CMakeLists.txt | 1 +
clang/lib/Format/MatchFilePath.cpp | 112 +++++++++++++
clang/lib/Format/MatchFilePath.h | 22 +++
clang/unittests/Format/CMakeLists.txt | 1 +
clang/unittests/Format/MatchFilePathTest.cpp | 156 +++++++++++++++++++
5 files changed, 292 insertions(+)
create mode 100644 clang/lib/Format/MatchFilePath.cpp
create mode 100644 clang/lib/Format/MatchFilePath.h
create mode 100644 clang/unittests/Format/MatchFilePathTest.cpp
diff --git a/clang/lib/Format/CMakeLists.txt b/clang/lib/Format/CMakeLists.txt
index 015ec7c0cc84e3..84a3c136f650a8 100644
--- a/clang/lib/Format/CMakeLists.txt
+++ b/clang/lib/Format/CMakeLists.txt
@@ -11,6 +11,7 @@ add_clang_library(clangFormat
IntegerLiteralSeparatorFixer.cpp
MacroCallReconstructor.cpp
MacroExpander.cpp
+ MatchFilePath.cpp
NamespaceEndCommentsFixer.cpp
ObjCPropertyAttributeOrderFixer.cpp
QualifierAlignmentFixer.cpp
diff --git a/clang/lib/Format/MatchFilePath.cpp b/clang/lib/Format/MatchFilePath.cpp
new file mode 100644
index 00000000000000..203a900e3d3bdd
--- /dev/null
+++ b/clang/lib/Format/MatchFilePath.cpp
@@ -0,0 +1,112 @@
+//===--- MatchFilePath.cpp - Match file path with pattern -------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file implements the functionality of matching a file path name to
+/// a pattern, similar to the POSIX fnmatch() function.
+///
+//===----------------------------------------------------------------------===//
+
+#include "MatchFilePath.h"
+
+using namespace llvm;
+
+namespace clang {
+namespace format {
+
+// Check whether `FilePath` matches `Pattern` based on POSIX Section 2.13.
+bool matchFilePath(StringRef Pattern, StringRef FilePath) {
+ assert(!Pattern.empty());
+ assert(!FilePath.empty());
+
+ constexpr auto Separator = '/';
+ const auto EOP = Pattern.size(); // End of `Pattern`.
+ const auto End = FilePath.size(); // End of `FilePath`.
+ unsigned I = 0; // Index to `Pattern`.
+
+ // No match if `Pattern` ends with a non-meta character not equal to the last
+ // character of `FilePath`.
+ if (const auto C = Pattern.back(); !strchr("?*]", C) && C != FilePath.back())
+ return false;
+
+ for (const auto &F : FilePath) {
+ if (I == EOP)
+ return false;
+
+ switch (Pattern[I]) {
+ case '\\':
+ if (++I == EOP || F != Pattern[I])
+ return false;
+ ++I;
+ break;
+ case '?':
+ if (F == Separator)
+ return false;
+ ++I;
+ break;
+ case '*': {
+ unsigned J = &F - FilePath.data(); // Index of `F`.
+ // Skip consecutive stars.
+ do {
+ if (++I == EOP)
+ return FilePath.find(Separator, J + 1) == StringRef::npos;
+ } while (Pattern[I] == '*');
+ while (FilePath[J] != Separator) {
+ if (matchFilePath(Pattern.substr(I), FilePath.substr(J)))
+ return true;
+ if (++J == End)
+ return false;
+ }
+ break;
+ }
+ case '[':
+ // Skip e.g. `[!]`.
+ if (I + 3 < EOP || (I + 3 == EOP && Pattern[I + 1] != '!')) {
+ // Skip unpaired `[`, brackets containing slashes, and `[]`.
+ if (const auto J = Pattern.find_first_of("]/", I + 1);
+ J != StringRef::npos && Pattern[J] == ']' && J > I + 1) {
+ if (F == Separator)
+ return false;
+ ++I; // After the `[`.
+ bool Negated = false;
+ if (Pattern[I] == '!') {
+ Negated = true;
+ ++I; // After the `!`.
+ }
+ bool Match = false;
+ do {
+ if (I + 2 < J && Pattern[I + 1] == '-') {
+ Match = Pattern[I] <= F && F <= Pattern[I + 2];
+ I += 3; // After the range, e.g. `A-Z`.
+ } else {
+ Match = F == Pattern[I++];
+ }
+ } while (!Match && I < J);
+ if (Negated ? Match : !Match)
+ return false;
+ I = J + 1; // After the `]`.
+ break;
+ }
+ }
+ [[fallthrough]]; // Match `[` literally.
+ default:
+ if (F != Pattern[I])
+ return false;
+ ++I;
+ }
+ }
+
+ // Match trailing stars with null strings.
+ while (I < EOP && Pattern[I] == '*')
+ ++I;
+
+ return I == EOP;
+}
+
+} // namespace format
+} // namespace clang
diff --git a/clang/lib/Format/MatchFilePath.h b/clang/lib/Format/MatchFilePath.h
new file mode 100644
index 00000000000000..482dab7c748e51
--- /dev/null
+++ b/clang/lib/Format/MatchFilePath.h
@@ -0,0 +1,22 @@
+//===--- MatchFilePath.h ----------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LIB_FORMAT_MATCHFILEPATH_H
+#define LLVM_CLANG_LIB_FORMAT_MATCHFILEPATH_H
+
+#include "llvm/ADT/StringRef.h"
+
+namespace clang {
+namespace format {
+
+bool matchFilePath(llvm::StringRef Pattern, llvm::StringRef FilePath);
+
+} // end namespace format
+} // end namespace clang
+
+#endif
diff --git a/clang/unittests/Format/CMakeLists.txt b/clang/unittests/Format/CMakeLists.txt
index 53136328928f5c..71f5886d946c80 100644
--- a/clang/unittests/Format/CMakeLists.txt
+++ b/clang/unittests/Format/CMakeLists.txt
@@ -27,6 +27,7 @@ add_clang_unittest(FormatTests
IntegerLiteralSeparatorTest.cpp
MacroCallReconstructorTest.cpp
MacroExpanderTest.cpp
+ MatchFilePathTest.cpp
NamespaceEndCommentsFixerTest.cpp
ObjCPropertyAttributeOrderFixerTest.cpp
QualifierFixerTest.cpp
diff --git a/clang/unittests/Format/MatchFilePathTest.cpp b/clang/unittests/Format/MatchFilePathTest.cpp
new file mode 100644
index 00000000000000..f236987cfa744f
--- /dev/null
+++ b/clang/unittests/Format/MatchFilePathTest.cpp
@@ -0,0 +1,156 @@
+//===- unittest/Format/MatchFilePathTest.cpp ------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "../../lib/Format/MatchFilePath.h"
+#include "gtest/gtest.h"
+
+namespace clang {
+namespace format {
+namespace {
+
+class MatchFilePathTest : public ::testing::Test {
+protected:
+ bool match(llvm::StringRef FilePath, llvm::StringRef Pattern) {
+ return matchFilePath(Pattern, FilePath);
+ }
+};
+
+// Almost all of the test cases below are from:
+// https://github.com/python/cpython/blob/main/Lib/test/test_fnmatch.py
+
+TEST_F(MatchFilePathTest, Wildcard) {
+ EXPECT_TRUE(match("abc", "?*?"));
+ EXPECT_TRUE(match("abc", "???*"));
+ EXPECT_TRUE(match("abc", "*???"));
+ EXPECT_TRUE(match("abc", "???"));
+ EXPECT_TRUE(match("abc", "*"));
+ EXPECT_TRUE(match("abc", "ab[cd]"));
+ EXPECT_TRUE(match("abc", "ab[!de]"));
+ EXPECT_TRUE(!match("abc", "ab[de]"));
+ EXPECT_TRUE(!match("a", "??"));
+ EXPECT_TRUE(!match("a", "b"));
+}
+
+TEST_F(MatchFilePathTest, Backslash) {
+ EXPECT_TRUE(match("a?", R"(a\?)"));
+ EXPECT_TRUE(!match("a\\", R"(a\)"));
+ EXPECT_TRUE(match("\\", R"([\])"));
+ EXPECT_TRUE(match("a", R"([!\])"));
+ EXPECT_TRUE(!match("\\", R"([!\])"));
+}
+
+TEST_F(MatchFilePathTest, Newline) {
+ EXPECT_TRUE(match("foo\nbar", "foo*"));
+ EXPECT_TRUE(match("foo\nbar\n", "foo*"));
+ EXPECT_TRUE(!match("\nfoo", "foo*"));
+ EXPECT_TRUE(match("\n", "*"));
+}
+
+TEST_F(MatchFilePathTest, Star) {
+ EXPECT_TRUE(match(std::string(50, 'a').c_str(), "*a*a*a*a*a*a*a*a*a*a"));
+ EXPECT_TRUE(
+ !match((std::string(50, 'a') + 'b').c_str(), "*a*a*a*a*a*a*a*a*a*a"));
+}
+
+TEST_F(MatchFilePathTest, CaseSensitive) {
+ EXPECT_TRUE(match("abc", "abc"));
+ EXPECT_TRUE(!match("AbC", "abc"));
+ EXPECT_TRUE(!match("abc", "AbC"));
+ EXPECT_TRUE(match("AbC", "AbC"));
+}
+
+TEST_F(MatchFilePathTest, PathSeparators) {
+ EXPECT_TRUE(match("usr/bin", "usr/bin"));
+ EXPECT_TRUE(match("usr\\bin", R"(usr\\bin)"));
+}
+
+TEST_F(MatchFilePathTest, NumericEscapeSequence) {
+ EXPECT_TRUE(match("test", "te*"));
+ EXPECT_TRUE(match("test\xff", "te*\xff"));
+ EXPECT_TRUE(match("foo\nbar", "foo*"));
+}
+
+TEST_F(MatchFilePathTest, ValidBrackets) {
+ EXPECT_TRUE(match("z", "[az]"));
+ EXPECT_TRUE(!match("z", "[!az]"));
+ EXPECT_TRUE(match("a", "[aa]"));
+ EXPECT_TRUE(match("^", "[^az]"));
+ EXPECT_TRUE(match("[", "[[az]"));
+ EXPECT_TRUE(!match("]", "[!]]"));
+}
+
+TEST_F(MatchFilePathTest, InvalidBrackets) {
+ EXPECT_TRUE(match("[", "["));
+ EXPECT_TRUE(match("[]", "[]"));
+ EXPECT_TRUE(match("[!", "[!"));
+ EXPECT_TRUE(match("[!]", "[!]"));
+}
+
+TEST_F(MatchFilePathTest, Range) {
+ EXPECT_TRUE(match("c", "[b-d]"));
+ EXPECT_TRUE(!match("c", "[!b-d]"));
+ EXPECT_TRUE(match("y", "[b-dx-z]"));
+ EXPECT_TRUE(!match("y", "[!b-dx-z]"));
+}
+
+TEST_F(MatchFilePathTest, Hyphen) {
+ EXPECT_TRUE(!match("#", "[!-#]"));
+ EXPECT_TRUE(!match("-", "[!--.]"));
+ EXPECT_TRUE(match("_", "[^-`]"));
+ EXPECT_TRUE(match("]", "[[-^]"));
+ EXPECT_TRUE(match("]", R"([\-^])"));
+ EXPECT_TRUE(match("-", "[b-]"));
+ EXPECT_TRUE(!match("-", "[!b-]"));
+ EXPECT_TRUE(match("-", "[-b]"));
+ EXPECT_TRUE(!match("-", "[!-b]"));
+ EXPECT_TRUE(match("-", "[-]"));
+ EXPECT_TRUE(!match("-", "[!-]"));
+}
+
+TEST_F(MatchFilePathTest, UpperLELower) {
+ EXPECT_TRUE(!match("c", "[d-b]"));
+ EXPECT_TRUE(match("c", "[!d-b]"));
+ EXPECT_TRUE(match("y", "[d-bx-z]"));
+ EXPECT_TRUE(!match("y", "[!d-bx-z]"));
+ EXPECT_TRUE(match("_", "[d-b^-`]"));
+ EXPECT_TRUE(match("]", "[d-b[-^]"));
+ EXPECT_TRUE(match("b", "[b-b]"));
+}
+
+TEST_F(MatchFilePathTest, SlashAndBackslashInBrackets) {
+ EXPECT_TRUE(!match("/", "[/]"));
+ EXPECT_TRUE(match("\\", R"([\])"));
+ EXPECT_TRUE(match("[/]", "[/]"));
+ EXPECT_TRUE(match("\\", R"([\t])"));
+ EXPECT_TRUE(match("t", R"([\t])"));
+ EXPECT_TRUE(!match("\t", R"([\t])"));
+}
+
+TEST_F(MatchFilePathTest, SlashAndBackslashInRange) {
+ EXPECT_TRUE(!match("a/b", "a[.-0]b"));
+ EXPECT_TRUE(match("a\\b", "a[Z-^]b"));
+ EXPECT_TRUE(!match("a/b", "a[/-0]b"));
+ EXPECT_TRUE(match("a[/-0]b", "a[/-0]b"));
+ EXPECT_TRUE(!match("a/b", "a[.-/]b"));
+ EXPECT_TRUE(match("a[.-/]b", "a[.-/]b"));
+ EXPECT_TRUE(match("a\\b", R"(a[\-^]b)"));
+ EXPECT_TRUE(match("a\\b", R"(a[Z-\]b)"));
+}
+
+TEST_F(MatchFilePathTest, Brackets) {
+ EXPECT_TRUE(match("[", "[[]"));
+ EXPECT_TRUE(match("&", "[a&&b]"));
+ EXPECT_TRUE(match("|", "[a||b]"));
+ EXPECT_TRUE(match("~", "[a~~b]"));
+ EXPECT_TRUE(match(",", "[a-z+--A-Z]"));
+ EXPECT_TRUE(!match(".", "[a-z--/A-Z]"));
+}
+
+} // namespace
+} // namespace format
+} // namespace clang
More information about the cfe-commits
mailing list