[clang] [llvm] Make sanitizer special case list slash-agnostic (PR #149886)
Vitaly Buka via cfe-commits
cfe-commits at lists.llvm.org
Fri Jun 12 10:48:01 PDT 2026
https://github.com/vitalybuka updated https://github.com/llvm/llvm-project/pull/149886
>From c613019535f3251d3201b0f1408988366f5732c2 Mon Sep 17 00:00:00 2001
From: Devon Loehr <dloehr at google.com>
Date: Mon, 21 Jul 2025 19:07:23 +0000
Subject: [PATCH 01/30] Make special case matcher slash-agnostic
---
clang/docs/SanitizerSpecialCaseList.rst | 1 +
clang/unittests/Basic/DiagnosticTest.cpp | 23 +++++++++++++++++++++++
llvm/docs/ReleaseNotes.md | 4 ++++
llvm/include/llvm/Support/GlobPattern.h | 1 +
llvm/lib/Support/GlobPattern.cpp | 4 ++++
5 files changed, 33 insertions(+)
diff --git a/clang/docs/SanitizerSpecialCaseList.rst b/clang/docs/SanitizerSpecialCaseList.rst
index 307c001664fba..f2a04dc9adcf1 100644
--- a/clang/docs/SanitizerSpecialCaseList.rst
+++ b/clang/docs/SanitizerSpecialCaseList.rst
@@ -174,6 +174,7 @@ tool-specific docs.
# Lines starting with # are ignored.
# Turn off checks for the source file
# Entries without sections are placed into [*] and apply to all sanitizers
+ # "/" matches both windows and unix path separators ("/" and "\")
src:path/to/source/file.c
src:*/source/file.c
# Turn off checks for this main file, including files included by it.
diff --git a/clang/unittests/Basic/DiagnosticTest.cpp b/clang/unittests/Basic/DiagnosticTest.cpp
index 4b3af00c3b0ce..a6557b1e35c4b 100644
--- a/clang/unittests/Basic/DiagnosticTest.cpp
+++ b/clang/unittests/Basic/DiagnosticTest.cpp
@@ -360,4 +360,27 @@ TEST_F(SuppressionMappingTest, ParsingRespectsOtherWarningOpts) {
clang::ProcessWarningOptions(Diags, Diags.getDiagnosticOptions(), *FS);
EXPECT_THAT(diags(), IsEmpty());
}
+
+TEST_F(SuppressionMappingTest, ForwardSlashMatchesBothDirections) {
+ llvm::StringLiteral SuppressionMappingFile = R"(
+ [unused]
+ src:*clang/*
+ src:*clang/lib/Sema/*=emit
+ src:*clang/lib\\Sema/foo*)";
+ Diags.getDiagnosticOptions().DiagnosticSuppressionMappingsFile = "foo.txt";
+ FS->addFile("foo.txt", /*ModificationTime=*/{},
+ llvm::MemoryBuffer::getMemBuffer(SuppressionMappingFile));
+ clang::ProcessWarningOptions(Diags, Diags.getDiagnosticOptions(), *FS);
+ EXPECT_THAT(diags(), IsEmpty());
+
+ EXPECT_TRUE(Diags.isSuppressedViaMapping(
+ diag::warn_unused_function, locForFile(R"(clang/lib/Basic/foo.h)")));
+ EXPECT_FALSE(Diags.isSuppressedViaMapping(
+ diag::warn_unused_function, locForFile(R"(clang/lib/Sema\bar.h)")));
+ EXPECT_TRUE(Diags.isSuppressedViaMapping(
+ diag::warn_unused_function, locForFile(R"(clang\lib\Sema/foo.h)")));
+ // The third pattern requires a literal backslash before Sema
+ EXPECT_FALSE(Diags.isSuppressedViaMapping(
+ diag::warn_unused_function, locForFile(R"(clang/lib/Sema/foo.h)")));
+}
} // namespace
diff --git a/llvm/docs/ReleaseNotes.md b/llvm/docs/ReleaseNotes.md
index 85c16b9c33f10..6ff8d18b07e84 100644
--- a/llvm/docs/ReleaseNotes.md
+++ b/llvm/docs/ReleaseNotes.md
@@ -174,6 +174,10 @@ Changes to BOLT
Changes to Sanitizers
---------------------
+* The [sanitizer special case list format](https://clang.llvm.org/docs/SanitizerSpecialCaseList.html#format)
+ now treats forward slashes as either a forward or a backslash, to handle
+ paths with mixed unix and window styles.
+
Other Changes
-------------
diff --git a/llvm/include/llvm/Support/GlobPattern.h b/llvm/include/llvm/Support/GlobPattern.h
index 62ed4a0f23fd9..af92c63331282 100644
--- a/llvm/include/llvm/Support/GlobPattern.h
+++ b/llvm/include/llvm/Support/GlobPattern.h
@@ -35,6 +35,7 @@ namespace llvm {
/// expansions are not supported. If \p MaxSubPatterns is empty then
/// brace expansions are not supported and characters `{,}` are treated as
/// literals.
+/// * `/` matches both unix and windows path separators: `/` and `\`.
/// * `\` escapes the next character so it is treated as a literal.
///
/// Some known edge cases are:
diff --git a/llvm/lib/Support/GlobPattern.cpp b/llvm/lib/Support/GlobPattern.cpp
index 7004adf461a0c..26b3724863ee8 100644
--- a/llvm/lib/Support/GlobPattern.cpp
+++ b/llvm/lib/Support/GlobPattern.cpp
@@ -231,6 +231,10 @@ bool GlobPattern::SubGlobPattern::match(StringRef Str) const {
++S;
continue;
}
+ } else if (*P == '/' && (*S == '/' || *S == '\\')) {
+ ++P;
+ ++S;
+ continue;
} else if (*P == *S || *P == '?') {
++P;
++S;
>From a81b5509f3d9633eee2276c2242c595378d1cfdc Mon Sep 17 00:00:00 2001
From: Devon Loehr <dloehr at google.com>
Date: Tue, 22 Jul 2025 14:57:34 +0000
Subject: [PATCH 02/30] Enable only for special case list
---
llvm/include/llvm/Support/GlobPattern.h | 9 +++++++--
llvm/lib/Support/GlobPattern.cpp | 12 +++++++-----
llvm/lib/Support/SpecialCaseList.cpp | 3 ++-
3 files changed, 16 insertions(+), 8 deletions(-)
diff --git a/llvm/include/llvm/Support/GlobPattern.h b/llvm/include/llvm/Support/GlobPattern.h
index af92c63331282..2729ba9a56649 100644
--- a/llvm/include/llvm/Support/GlobPattern.h
+++ b/llvm/include/llvm/Support/GlobPattern.h
@@ -56,8 +56,10 @@ class GlobPattern {
/// \param MaxSubPatterns if provided limit the number of allowed subpatterns
/// created from expanding braces otherwise disable
/// brace expansion
+ /// \param IsSlashAgnostic whether to treat '/' as matching '\\' as well
LLVM_ABI static Expected<GlobPattern>
- create(StringRef Pat, std::optional<size_t> MaxSubPatterns = {});
+ create(StringRef Pat, std::optional<size_t> MaxSubPatterns = {},
+ bool IsSlashAgnostic = false);
/// \returns \p true if \p S matches this glob pattern
LLVM_ABI bool match(StringRef S) const;
@@ -76,7 +78,9 @@ class GlobPattern {
struct SubGlobPattern {
/// \param Pat the pattern to match against
- LLVM_ABI static Expected<SubGlobPattern> create(StringRef Pat);
+ /// \param SlashAgnostic whether to treat '/' as matching '\\' as well
+ LLVM_ABI static Expected<SubGlobPattern> create(StringRef Pat,
+ bool SlashAgnostic);
/// \returns \p true if \p S matches this glob pattern
LLVM_ABI bool match(StringRef S) const;
StringRef getPat() const { return StringRef(Pat.data(), Pat.size()); }
@@ -88,6 +92,7 @@ class GlobPattern {
};
SmallVector<Bracket, 0> Brackets;
SmallVector<char, 0> Pat;
+ bool IsSlashAgnostic;
};
SmallVector<SubGlobPattern, 1> SubGlobs;
};
diff --git a/llvm/lib/Support/GlobPattern.cpp b/llvm/lib/Support/GlobPattern.cpp
index 26b3724863ee8..4aa30a81c3fbf 100644
--- a/llvm/lib/Support/GlobPattern.cpp
+++ b/llvm/lib/Support/GlobPattern.cpp
@@ -132,8 +132,9 @@ parseBraceExpansions(StringRef S, std::optional<size_t> MaxSubPatterns) {
return std::move(SubPatterns);
}
-Expected<GlobPattern>
-GlobPattern::create(StringRef S, std::optional<size_t> MaxSubPatterns) {
+Expected<GlobPattern> GlobPattern::create(StringRef S,
+ std::optional<size_t> MaxSubPatterns,
+ bool IsSlashAgnostic) {
GlobPattern Pat;
// Store the prefix that does not contain any metacharacter.
@@ -147,7 +148,7 @@ GlobPattern::create(StringRef S, std::optional<size_t> MaxSubPatterns) {
if (auto Err = parseBraceExpansions(S, MaxSubPatterns).moveInto(SubPats))
return std::move(Err);
for (StringRef SubPat : SubPats) {
- auto SubGlobOrErr = SubGlobPattern::create(SubPat);
+ auto SubGlobOrErr = SubGlobPattern::create(SubPat, IsSlashAgnostic);
if (!SubGlobOrErr)
return SubGlobOrErr.takeError();
Pat.SubGlobs.push_back(*SubGlobOrErr);
@@ -157,8 +158,9 @@ GlobPattern::create(StringRef S, std::optional<size_t> MaxSubPatterns) {
}
Expected<GlobPattern::SubGlobPattern>
-GlobPattern::SubGlobPattern::create(StringRef S) {
+GlobPattern::SubGlobPattern::create(StringRef S, bool SlashAgnostic) {
SubGlobPattern Pat;
+ Pat.IsSlashAgnostic = SlashAgnostic;
// Parse brackets.
Pat.Pat.assign(S.begin(), S.end());
@@ -231,7 +233,7 @@ bool GlobPattern::SubGlobPattern::match(StringRef Str) const {
++S;
continue;
}
- } else if (*P == '/' && (*S == '/' || *S == '\\')) {
+ } else if (IsSlashAgnostic && *P == '/' && (*S == '/' || *S == '\\')) {
++P;
++S;
continue;
diff --git a/llvm/lib/Support/SpecialCaseList.cpp b/llvm/lib/Support/SpecialCaseList.cpp
index 8d4e043bc1c9f..c597f03188507 100644
--- a/llvm/lib/Support/SpecialCaseList.cpp
+++ b/llvm/lib/Support/SpecialCaseList.cpp
@@ -59,7 +59,8 @@ Error SpecialCaseList::Matcher::insert(StringRef Pattern, unsigned LineNumber,
Glob->LineNo = LineNumber;
// We must be sure to use the string in `Glob` rather than the provided
// reference which could be destroyed before match() is called
- if (auto Err = GlobPattern::create(Glob->Name, /*MaxSubPatterns=*/1024)
+ if (auto Err = GlobPattern::create(Glob->Name, /*MaxSubPatterns=*/1024,
+ /*IsSlashAgnostic=*/true)
.moveInto(Glob->Pattern))
return Err;
Globs.push_back(std::move(Glob));
>From 7bfc6ad6afbcafe9f230d3854b28b4406f83537b Mon Sep 17 00:00:00 2001
From: Devon Loehr <dloehr at google.com>
Date: Fri, 5 Sep 2025 16:48:17 +0000
Subject: [PATCH 03/30] Enable only on windows
---
clang/docs/SanitizerSpecialCaseList.rst | 2 +-
clang/unittests/Basic/DiagnosticTest.cpp | 3 +++
llvm/docs/ReleaseNotes.md | 4 ++--
llvm/include/llvm/Support/GlobPattern.h | 3 ++-
llvm/lib/Support/SpecialCaseList.cpp | 6 +++++-
5 files changed, 13 insertions(+), 5 deletions(-)
diff --git a/clang/docs/SanitizerSpecialCaseList.rst b/clang/docs/SanitizerSpecialCaseList.rst
index f2a04dc9adcf1..e14b654536b8a 100644
--- a/clang/docs/SanitizerSpecialCaseList.rst
+++ b/clang/docs/SanitizerSpecialCaseList.rst
@@ -174,7 +174,7 @@ tool-specific docs.
# Lines starting with # are ignored.
# Turn off checks for the source file
# Entries without sections are placed into [*] and apply to all sanitizers
- # "/" matches both windows and unix path separators ("/" and "\")
+ # On windows, "/" matches both styles of path separator ("/" and "\")
src:path/to/source/file.c
src:*/source/file.c
# Turn off checks for this main file, including files included by it.
diff --git a/clang/unittests/Basic/DiagnosticTest.cpp b/clang/unittests/Basic/DiagnosticTest.cpp
index a6557b1e35c4b..7e9653bdd3c7e 100644
--- a/clang/unittests/Basic/DiagnosticTest.cpp
+++ b/clang/unittests/Basic/DiagnosticTest.cpp
@@ -361,6 +361,8 @@ TEST_F(SuppressionMappingTest, ParsingRespectsOtherWarningOpts) {
EXPECT_THAT(diags(), IsEmpty());
}
+#ifdef _WIN32
+// We're only slash-agnostic on windows hosts
TEST_F(SuppressionMappingTest, ForwardSlashMatchesBothDirections) {
llvm::StringLiteral SuppressionMappingFile = R"(
[unused]
@@ -383,4 +385,5 @@ TEST_F(SuppressionMappingTest, ForwardSlashMatchesBothDirections) {
EXPECT_FALSE(Diags.isSuppressedViaMapping(
diag::warn_unused_function, locForFile(R"(clang/lib/Sema/foo.h)")));
}
+#endif
} // namespace
diff --git a/llvm/docs/ReleaseNotes.md b/llvm/docs/ReleaseNotes.md
index 6ff8d18b07e84..ae6222d2fa145 100644
--- a/llvm/docs/ReleaseNotes.md
+++ b/llvm/docs/ReleaseNotes.md
@@ -174,9 +174,9 @@ Changes to BOLT
Changes to Sanitizers
---------------------
-* The [sanitizer special case list format](https://clang.llvm.org/docs/SanitizerSpecialCaseList.html#format)
+* On windows hosts, the [sanitizer special case list format](https://clang.llvm.org/docs/SanitizerSpecialCaseList.html#format)
now treats forward slashes as either a forward or a backslash, to handle
- paths with mixed unix and window styles.
+ paths with mixed unix and windows styles.
Other Changes
-------------
diff --git a/llvm/include/llvm/Support/GlobPattern.h b/llvm/include/llvm/Support/GlobPattern.h
index 2729ba9a56649..4abd6b1874593 100644
--- a/llvm/include/llvm/Support/GlobPattern.h
+++ b/llvm/include/llvm/Support/GlobPattern.h
@@ -35,7 +35,8 @@ namespace llvm {
/// expansions are not supported. If \p MaxSubPatterns is empty then
/// brace expansions are not supported and characters `{,}` are treated as
/// literals.
-/// * `/` matches both unix and windows path separators: `/` and `\`.
+/// * If IsSlashAgnostic is passed, `/` matches both unix and windows path
+/// separators: `/` and `\`.
/// * `\` escapes the next character so it is treated as a literal.
///
/// Some known edge cases are:
diff --git a/llvm/lib/Support/SpecialCaseList.cpp b/llvm/lib/Support/SpecialCaseList.cpp
index c597f03188507..89ec193e1991c 100644
--- a/llvm/lib/Support/SpecialCaseList.cpp
+++ b/llvm/lib/Support/SpecialCaseList.cpp
@@ -18,6 +18,8 @@
#include "llvm/Support/LineIterator.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/VirtualFileSystem.h"
+#include "llvm/TargetParser/Host.h"
+#include "llvm/TargetParser/Triple.h"
#include <stdio.h>
#include <string>
#include <system_error>
@@ -57,10 +59,12 @@ Error SpecialCaseList::Matcher::insert(StringRef Pattern, unsigned LineNumber,
auto Glob = std::make_unique<Matcher::Glob>();
Glob->Name = Pattern.str();
Glob->LineNo = LineNumber;
+ // Backslashes are valid in posix-style filenames.
+ bool IsSlashAgnostic = Triple(sys::getDefaultTargetTriple()).isOSWindows();
// We must be sure to use the string in `Glob` rather than the provided
// reference which could be destroyed before match() is called
if (auto Err = GlobPattern::create(Glob->Name, /*MaxSubPatterns=*/1024,
- /*IsSlashAgnostic=*/true)
+ /*IsSlashAgnostic=*/IsSlashAgnostic)
.moveInto(Glob->Pattern))
return Err;
Globs.push_back(std::move(Glob));
>From a5eaf5880c295a6a50f0be504d3c421641f28932 Mon Sep 17 00:00:00 2001
From: Devon Loehr <dloehr at google.com>
Date: Mon, 8 Sep 2025 15:00:04 +0000
Subject: [PATCH 04/30] Incorporate review feedback
---
clang/docs/SanitizerSpecialCaseList.rst | 2 +-
clang/lib/Basic/Diagnostic.cpp | 5 ++++-
clang/lib/Basic/SanitizerSpecialCaseList.cpp | 2 +-
clang/unittests/Basic/DiagnosticTest.cpp | 12 ++++++-----
llvm/docs/ReleaseNotes.md | 4 ++--
llvm/include/llvm/Support/GlobPattern.h | 19 +++++++----------
llvm/include/llvm/Support/SpecialCaseList.h | 5 +++--
llvm/lib/Support/GlobPattern.cpp | 21 +++++++++----------
llvm/lib/Support/SpecialCaseList.cpp | 22 +++++++++++---------
9 files changed, 48 insertions(+), 44 deletions(-)
diff --git a/clang/docs/SanitizerSpecialCaseList.rst b/clang/docs/SanitizerSpecialCaseList.rst
index e14b654536b8a..752602c1b3093 100644
--- a/clang/docs/SanitizerSpecialCaseList.rst
+++ b/clang/docs/SanitizerSpecialCaseList.rst
@@ -174,7 +174,7 @@ tool-specific docs.
# Lines starting with # are ignored.
# Turn off checks for the source file
# Entries without sections are placed into [*] and apply to all sanitizers
- # On windows, "/" matches both styles of path separator ("/" and "\")
+ # On windows, "/" also matches "\" in filenames
src:path/to/source/file.c
src:*/source/file.c
# Turn off checks for this main file, including files included by it.
diff --git a/clang/lib/Basic/Diagnostic.cpp b/clang/lib/Basic/Diagnostic.cpp
index dc3778bbf339c..9dd133cb4c03e 100644
--- a/clang/lib/Basic/Diagnostic.cpp
+++ b/clang/lib/Basic/Diagnostic.cpp
@@ -622,6 +622,8 @@ bool WarningsSpecialCaseList::isDiagSuppressed(diag::kind DiagId,
bool WarningsSpecialCaseList::globsMatches(
const llvm::StringMap<Matcher> &CategoriesToMatchers,
StringRef FilePath) const {
+ static bool HaveWindowsPathStyle =
+ llvm::sys::path::is_style_windows(llvm::sys::path::Style::native);
StringRef LongestMatch;
bool LongestIsPositive = false;
for (const auto &Entry : CategoriesToMatchers) {
@@ -631,7 +633,8 @@ bool WarningsSpecialCaseList::globsMatches(
for (const auto &Glob : Matcher.Globs) {
if (Glob->Name.size() < LongestMatch.size())
continue;
- if (!Glob->Pattern.match(FilePath))
+ if (!Glob->Pattern.match(FilePath,
+ /*IsSlashAgnostic=*/HaveWindowsPathStyle))
continue;
LongestMatch = Glob->Name;
LongestIsPositive = IsPositive;
diff --git a/clang/lib/Basic/SanitizerSpecialCaseList.cpp b/clang/lib/Basic/SanitizerSpecialCaseList.cpp
index f7bc1d5545d75..4ad35d4d73fdd 100644
--- a/clang/lib/Basic/SanitizerSpecialCaseList.cpp
+++ b/clang/lib/Basic/SanitizerSpecialCaseList.cpp
@@ -42,7 +42,7 @@ void SanitizerSpecialCaseList::createSanitizerSections() {
SanitizerMask Mask;
#define SANITIZER(NAME, ID) \
- if (S.SectionMatcher->match(NAME)) \
+ if (S.SectionMatcher->match(NAME, /*IsFilename=*/false)) \
Mask |= SanitizerKind::ID;
#define SANITIZER_GROUP(NAME, ID, ALIAS) SANITIZER(NAME, ID)
diff --git a/clang/unittests/Basic/DiagnosticTest.cpp b/clang/unittests/Basic/DiagnosticTest.cpp
index 7e9653bdd3c7e..2af86b6a5ef38 100644
--- a/clang/unittests/Basic/DiagnosticTest.cpp
+++ b/clang/unittests/Basic/DiagnosticTest.cpp
@@ -363,12 +363,13 @@ TEST_F(SuppressionMappingTest, ParsingRespectsOtherWarningOpts) {
#ifdef _WIN32
// We're only slash-agnostic on windows hosts
-TEST_F(SuppressionMappingTest, ForwardSlashMatchesBothDirections) {
+TEST_F(SuppressionMappingTest, TreatsFilesAsSlashAgnosticOnWindows) {
llvm::StringLiteral SuppressionMappingFile = R"(
[unused]
src:*clang/*
src:*clang/lib/Sema/*=emit
- src:*clang/lib\\Sema/foo*)";
+ src:*clang/lib\\Sema/foo*
+ fun:suppress/me)";
Diags.getDiagnosticOptions().DiagnosticSuppressionMappingsFile = "foo.txt";
FS->addFile("foo.txt", /*ModificationTime=*/{},
llvm::MemoryBuffer::getMemBuffer(SuppressionMappingFile));
@@ -376,12 +377,13 @@ TEST_F(SuppressionMappingTest, ForwardSlashMatchesBothDirections) {
EXPECT_THAT(diags(), IsEmpty());
EXPECT_TRUE(Diags.isSuppressedViaMapping(
- diag::warn_unused_function, locForFile(R"(clang/lib/Basic/foo.h)")));
+ diag::warn_unused_function, locForFile(R"(clang/lib/Basic/bar.h)")));
EXPECT_FALSE(Diags.isSuppressedViaMapping(
- diag::warn_unused_function, locForFile(R"(clang/lib/Sema\bar.h)")));
+ diag::warn_unused_function, locForFile(R"(clang/lib/Sema\baz.h)")));
+
+ // We require a literal backslash before "Sema"
EXPECT_TRUE(Diags.isSuppressedViaMapping(
diag::warn_unused_function, locForFile(R"(clang\lib\Sema/foo.h)")));
- // The third pattern requires a literal backslash before Sema
EXPECT_FALSE(Diags.isSuppressedViaMapping(
diag::warn_unused_function, locForFile(R"(clang/lib/Sema/foo.h)")));
}
diff --git a/llvm/docs/ReleaseNotes.md b/llvm/docs/ReleaseNotes.md
index ae6222d2fa145..39c2a82239fe0 100644
--- a/llvm/docs/ReleaseNotes.md
+++ b/llvm/docs/ReleaseNotes.md
@@ -175,8 +175,8 @@ Changes to Sanitizers
---------------------
* On windows hosts, the [sanitizer special case list format](https://clang.llvm.org/docs/SanitizerSpecialCaseList.html#format)
- now treats forward slashes as either a forward or a backslash, to handle
- paths with mixed unix and windows styles.
+ now treats forward slashes in filenames as matching either a forward or a
+ backslash, to accommodate paths with mixed unix and windows styles.
Other Changes
-------------
diff --git a/llvm/include/llvm/Support/GlobPattern.h b/llvm/include/llvm/Support/GlobPattern.h
index 4abd6b1874593..5fd1e0764cc7a 100644
--- a/llvm/include/llvm/Support/GlobPattern.h
+++ b/llvm/include/llvm/Support/GlobPattern.h
@@ -35,9 +35,9 @@ namespace llvm {
/// expansions are not supported. If \p MaxSubPatterns is empty then
/// brace expansions are not supported and characters `{,}` are treated as
/// literals.
-/// * If IsSlashAgnostic is passed, `/` matches both unix and windows path
-/// separators: `/` and `\`.
/// * `\` escapes the next character so it is treated as a literal.
+/// * If \p IsSlashAgnostic is passed to the match function, then forward
+/// slashes `/` also match backslashes `\`.
///
/// Some known edge cases are:
/// * The literal `]` is allowed as the first character in a character class,
@@ -57,12 +57,11 @@ class GlobPattern {
/// \param MaxSubPatterns if provided limit the number of allowed subpatterns
/// created from expanding braces otherwise disable
/// brace expansion
- /// \param IsSlashAgnostic whether to treat '/' as matching '\\' as well
LLVM_ABI static Expected<GlobPattern>
- create(StringRef Pat, std::optional<size_t> MaxSubPatterns = {},
- bool IsSlashAgnostic = false);
+ create(StringRef Pat, std::optional<size_t> MaxSubPatterns = {});
+ /// \param IsSlashAgnostic whether to treat '/' as also matching '\'
/// \returns \p true if \p S matches this glob pattern
- LLVM_ABI bool match(StringRef S) const;
+ LLVM_ABI bool match(StringRef S, bool IsSlashAgnostic = false) const;
// Returns true for glob pattern "*". Can be used to avoid expensive
// preparation/acquisition of the input for match().
@@ -79,11 +78,10 @@ class GlobPattern {
struct SubGlobPattern {
/// \param Pat the pattern to match against
- /// \param SlashAgnostic whether to treat '/' as matching '\\' as well
- LLVM_ABI static Expected<SubGlobPattern> create(StringRef Pat,
- bool SlashAgnostic);
+ LLVM_ABI static Expected<SubGlobPattern> create(StringRef Pat);
+ /// \param IsSlashAgnostic whether to treat '/' as also matching '\'
/// \returns \p true if \p S matches this glob pattern
- LLVM_ABI bool match(StringRef S) const;
+ LLVM_ABI bool match(StringRef S, bool IsSlashAgnostic) const;
StringRef getPat() const { return StringRef(Pat.data(), Pat.size()); }
// Brackets with their end position and matched bytes.
@@ -93,7 +91,6 @@ class GlobPattern {
};
SmallVector<Bracket, 0> Brackets;
SmallVector<char, 0> Pat;
- bool IsSlashAgnostic;
};
SmallVector<SubGlobPattern, 1> SubGlobs;
};
diff --git a/llvm/include/llvm/Support/SpecialCaseList.h b/llvm/include/llvm/Support/SpecialCaseList.h
index 22a62eac9e01a..951f27eed8ee8 100644
--- a/llvm/include/llvm/Support/SpecialCaseList.h
+++ b/llvm/include/llvm/Support/SpecialCaseList.h
@@ -124,8 +124,9 @@ class SpecialCaseList {
LLVM_ABI Error insert(StringRef Pattern, unsigned LineNumber,
bool UseRegex);
// Returns the line number in the source file that this query matches to.
- // Returns zero if no match is found.
- LLVM_ABI unsigned match(StringRef Query) const;
+ // On windows, treat '/' as also matching '\' in filenames when using globs.
+ // Returns zero if no match is found
+ LLVM_ABI unsigned match(StringRef Query, bool IsFilename) const;
struct Glob {
std::string Name;
diff --git a/llvm/lib/Support/GlobPattern.cpp b/llvm/lib/Support/GlobPattern.cpp
index 4aa30a81c3fbf..578c0dd0760d2 100644
--- a/llvm/lib/Support/GlobPattern.cpp
+++ b/llvm/lib/Support/GlobPattern.cpp
@@ -132,9 +132,8 @@ parseBraceExpansions(StringRef S, std::optional<size_t> MaxSubPatterns) {
return std::move(SubPatterns);
}
-Expected<GlobPattern> GlobPattern::create(StringRef S,
- std::optional<size_t> MaxSubPatterns,
- bool IsSlashAgnostic) {
+Expected<GlobPattern>
+GlobPattern::create(StringRef S, std::optional<size_t> MaxSubPatterns) {
GlobPattern Pat;
// Store the prefix that does not contain any metacharacter.
@@ -148,7 +147,7 @@ Expected<GlobPattern> GlobPattern::create(StringRef S,
if (auto Err = parseBraceExpansions(S, MaxSubPatterns).moveInto(SubPats))
return std::move(Err);
for (StringRef SubPat : SubPats) {
- auto SubGlobOrErr = SubGlobPattern::create(SubPat, IsSlashAgnostic);
+ auto SubGlobOrErr = SubGlobPattern::create(SubPat);
if (!SubGlobOrErr)
return SubGlobOrErr.takeError();
Pat.SubGlobs.push_back(*SubGlobOrErr);
@@ -158,9 +157,8 @@ Expected<GlobPattern> GlobPattern::create(StringRef S,
}
Expected<GlobPattern::SubGlobPattern>
-GlobPattern::SubGlobPattern::create(StringRef S, bool SlashAgnostic) {
+GlobPattern::SubGlobPattern::create(StringRef S) {
SubGlobPattern Pat;
- Pat.IsSlashAgnostic = SlashAgnostic;
// Parse brackets.
Pat.Pat.assign(S.begin(), S.end());
@@ -192,21 +190,22 @@ GlobPattern::SubGlobPattern::create(StringRef S, bool SlashAgnostic) {
return Pat;
}
-bool GlobPattern::match(StringRef S) const {
+bool GlobPattern::match(StringRef S, bool IsSlashAgnostic) const {
if (!S.consume_front(Prefix))
return false;
if (SubGlobs.empty() && S.empty())
return true;
for (auto &Glob : SubGlobs)
- if (Glob.match(S))
+ if (Glob.match(S, IsSlashAgnostic))
return true;
return false;
}
// Factor the pattern into segments split by '*'. The segment is matched
-// sequentianlly by finding the first occurrence past the end of the previous
+// sequentially by finding the first occurrence past the end of the previous
// match.
-bool GlobPattern::SubGlobPattern::match(StringRef Str) const {
+bool GlobPattern::SubGlobPattern::match(StringRef Str,
+ bool IsSlashAgnostic) const {
const char *P = Pat.data(), *SegmentBegin = nullptr, *S = Str.data(),
*SavedS = S;
const char *const PEnd = P + Pat.size(), *const End = S + Str.size();
@@ -233,7 +232,7 @@ bool GlobPattern::SubGlobPattern::match(StringRef Str) const {
++S;
continue;
}
- } else if (IsSlashAgnostic && *P == '/' && (*S == '/' || *S == '\\')) {
+ } else if (IsSlashAgnostic && *P == '/' && *S == '\\') {
++P;
++S;
continue;
diff --git a/llvm/lib/Support/SpecialCaseList.cpp b/llvm/lib/Support/SpecialCaseList.cpp
index 89ec193e1991c..c65cb977c005c 100644
--- a/llvm/lib/Support/SpecialCaseList.cpp
+++ b/llvm/lib/Support/SpecialCaseList.cpp
@@ -17,9 +17,8 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/Support/LineIterator.h"
#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Path.h"
#include "llvm/Support/VirtualFileSystem.h"
-#include "llvm/TargetParser/Host.h"
-#include "llvm/TargetParser/Triple.h"
#include <stdio.h>
#include <string>
#include <system_error>
@@ -59,21 +58,22 @@ Error SpecialCaseList::Matcher::insert(StringRef Pattern, unsigned LineNumber,
auto Glob = std::make_unique<Matcher::Glob>();
Glob->Name = Pattern.str();
Glob->LineNo = LineNumber;
- // Backslashes are valid in posix-style filenames.
- bool IsSlashAgnostic = Triple(sys::getDefaultTargetTriple()).isOSWindows();
// We must be sure to use the string in `Glob` rather than the provided
// reference which could be destroyed before match() is called
- if (auto Err = GlobPattern::create(Glob->Name, /*MaxSubPatterns=*/1024,
- /*IsSlashAgnostic=*/IsSlashAgnostic)
+ if (auto Err = GlobPattern::create(Glob->Name, /*MaxSubPatterns=*/1024)
.moveInto(Glob->Pattern))
return Err;
Globs.push_back(std::move(Glob));
return Error::success();
}
-unsigned SpecialCaseList::Matcher::match(StringRef Query) const {
+unsigned SpecialCaseList::Matcher::match(StringRef Query,
+ bool IsFilename) const {
+ static bool HaveWindowsPathStyle =
+ llvm::sys::path::is_style_windows(llvm::sys::path::Style::native);
for (const auto &Glob : reverse(Globs))
- if (Glob->Pattern.match(Query))
+ if (Glob->Pattern.match(
+ Query, /*IsSlashAgnostic=*/(HaveWindowsPathStyle && IsFilename)))
return Glob->LineNo;
for (const auto &[Regex, LineNumber] : reverse(RegExes))
if (Regex->match(Query))
@@ -223,7 +223,8 @@ std::pair<unsigned, unsigned>
SpecialCaseList::inSectionBlame(StringRef Section, StringRef Prefix,
StringRef Query, StringRef Category) const {
for (const auto &S : reverse(Sections)) {
- if (S.SectionMatcher->match(Section)) {
+ bool IsFilename = Prefix == "src" || Prefix == "mainfile";
+ if (S.SectionMatcher->match(Section, IsFilename)) {
unsigned Blame = inSectionBlame(S.Entries, Prefix, Query, Category);
if (Blame)
return {S.FileIdx, Blame};
@@ -242,7 +243,8 @@ unsigned SpecialCaseList::inSectionBlame(const SectionEntries &Entries,
if (II == I->second.end())
return 0;
- return II->getValue().match(Query);
+ bool IsFilename = Prefix == "src" || Prefix == "mainfile";
+ return II->getValue().match(Query, IsFilename);
}
} // namespace llvm
>From 47236f1866050ac23126a64a3d340b755fa89918 Mon Sep 17 00:00:00 2001
From: Devon Loehr <dloehr at google.com>
Date: Thu, 2 Oct 2025 14:56:49 +0000
Subject: [PATCH 05/30] Add glob pattern unittest
---
llvm/lib/Support/GlobPattern.cpp | 2 +-
llvm/unittests/Support/GlobPatternTest.cpp | 9 +++++++++
2 files changed, 10 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Support/GlobPattern.cpp b/llvm/lib/Support/GlobPattern.cpp
index 578c0dd0760d2..e19c8c84d19a9 100644
--- a/llvm/lib/Support/GlobPattern.cpp
+++ b/llvm/lib/Support/GlobPattern.cpp
@@ -137,7 +137,7 @@ GlobPattern::create(StringRef S, std::optional<size_t> MaxSubPatterns) {
GlobPattern Pat;
// Store the prefix that does not contain any metacharacter.
- size_t PrefixSize = S.find_first_of("?*[{\\");
+ size_t PrefixSize = S.find_first_of("?*[{\\/");
Pat.Prefix = S.substr(0, PrefixSize);
if (PrefixSize == std::string::npos)
return Pat;
diff --git a/llvm/unittests/Support/GlobPatternTest.cpp b/llvm/unittests/Support/GlobPatternTest.cpp
index e4f1025b00956..ba33e233c70bc 100644
--- a/llvm/unittests/Support/GlobPatternTest.cpp
+++ b/llvm/unittests/Support/GlobPatternTest.cpp
@@ -271,4 +271,13 @@ TEST_F(GlobPatternTest, Pathological) {
EXPECT_FALSE(Pat->match(S));
EXPECT_TRUE(Pat->match(S + 'b'));
}
+
+TEST_F(GlobPatternTest, SlashAgnostic) {
+ auto Pat = GlobPattern::create("clang/*");
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_TRUE(Pat->match("clang/foo"));
+ EXPECT_FALSE(Pat->match(R"(clang\foo)"));
+ EXPECT_TRUE(Pat->match("clang/foo", /*isSlashAgnostic=*/true));
+ EXPECT_TRUE(Pat->match(R"(clang\foo)", /*isSlashAgnostic=*/true));
+}
}
>From 8535a1144ebad85b6282cac531d6048c7f45b4d7 Mon Sep 17 00:00:00 2001
From: Devon Loehr <dloehr at google.com>
Date: Mon, 6 Oct 2025 17:48:19 +0000
Subject: [PATCH 06/30] Canonicalize special case list filenames when loading
---
clang/docs/SanitizerSpecialCaseList.rst | 18 ++++++++++-
clang/lib/Basic/Diagnostic.cpp | 21 ++++++++-----
clang/lib/Basic/SanitizerSpecialCaseList.cpp | 2 +-
clang/unittests/Basic/DiagnosticTest.cpp | 16 +++++++---
llvm/docs/ReleaseNotes.md | 7 +++--
llvm/include/llvm/Support/GlobPattern.h | 8 ++---
llvm/include/llvm/Support/SpecialCaseList.h | 8 ++---
llvm/lib/Support/GlobPattern.cpp | 15 +++------
llvm/lib/Support/SpecialCaseList.cpp | 31 ++++++++++---------
llvm/unittests/Support/GlobPatternTest.cpp | 9 ------
.../unittests/Support/SpecialCaseListTest.cpp | 18 +++++++++++
11 files changed, 92 insertions(+), 61 deletions(-)
diff --git a/clang/docs/SanitizerSpecialCaseList.rst b/clang/docs/SanitizerSpecialCaseList.rst
index 752602c1b3093..f5c45c1b81df0 100644
--- a/clang/docs/SanitizerSpecialCaseList.rst
+++ b/clang/docs/SanitizerSpecialCaseList.rst
@@ -174,7 +174,6 @@ tool-specific docs.
# Lines starting with # are ignored.
# Turn off checks for the source file
# Entries without sections are placed into [*] and apply to all sanitizers
- # On windows, "/" also matches "\" in filenames
src:path/to/source/file.c
src:*/source/file.c
# Turn off checks for this main file, including files included by it.
@@ -197,6 +196,23 @@ tool-specific docs.
[{cfi-vcall,cfi-icall}]
fun:*BadCfiCall
+
+.. note::
+
+ By default, ``src`` and ``mainfile`` are matched against the filename as seen
+ by LLVM. On Windows, this might involve a mix of forward and backslashes as
+ file separators, and writing patterns to match both variants can be
+ inconvenient.
+
+ If the special case list file begins with ``#!canonical-paths``, then paths
+ will be canonicalized before patterns are matched against them. This involves
+ stripping any leading dots and slashes, and (on Windows only) converting all
+ backslashes to forward slashes.
+
+ If the file uses both ``#!special-case-list-v1`` and ``#!canonical-paths``,
+ then they should occupy the first two lines, and ``#!canonical-paths`` must
+ appear on the second line.
+
``mainfile`` is similar to applying ``-fno-sanitize=`` to a set of files but
does not need plumbing into the build system. This works well for internal
linkage functions but has a caveat for C++ vague linkage functions.
diff --git a/clang/lib/Basic/Diagnostic.cpp b/clang/lib/Basic/Diagnostic.cpp
index 9dd133cb4c03e..71762d10aefa6 100644
--- a/clang/lib/Basic/Diagnostic.cpp
+++ b/clang/lib/Basic/Diagnostic.cpp
@@ -612,18 +612,24 @@ bool WarningsSpecialCaseList::isDiagSuppressed(diag::kind DiagId,
SrcEntriesIt->getValue();
// We also use presumed locations here to improve reproducibility for
// preprocessed inputs.
- if (PresumedLoc PLoc = SM.getPresumedLoc(DiagLoc); PLoc.isValid())
- return globsMatches(
- CategoriesToMatchers,
- llvm::sys::path::remove_leading_dotslash(PLoc.getFilename()));
+ if (PresumedLoc PLoc = SM.getPresumedLoc(DiagLoc); PLoc.isValid()) {
+ if (CanonicalizePaths) {
+ return globsMatches(
+ CategoriesToMatchers,
+ llvm::sys::path::convert_to_slash(
+ llvm::sys::path::remove_leading_dotslash(PLoc.getFilename())));
+ } else {
+ return globsMatches(
+ CategoriesToMatchers,
+ llvm::sys::path::remove_leading_dotslash(PLoc.getFilename()));
+ }
+ }
return false;
}
bool WarningsSpecialCaseList::globsMatches(
const llvm::StringMap<Matcher> &CategoriesToMatchers,
StringRef FilePath) const {
- static bool HaveWindowsPathStyle =
- llvm::sys::path::is_style_windows(llvm::sys::path::Style::native);
StringRef LongestMatch;
bool LongestIsPositive = false;
for (const auto &Entry : CategoriesToMatchers) {
@@ -633,8 +639,7 @@ bool WarningsSpecialCaseList::globsMatches(
for (const auto &Glob : Matcher.Globs) {
if (Glob->Name.size() < LongestMatch.size())
continue;
- if (!Glob->Pattern.match(FilePath,
- /*IsSlashAgnostic=*/HaveWindowsPathStyle))
+ if (!Glob->Pattern.match(FilePath))
continue;
LongestMatch = Glob->Name;
LongestIsPositive = IsPositive;
diff --git a/clang/lib/Basic/SanitizerSpecialCaseList.cpp b/clang/lib/Basic/SanitizerSpecialCaseList.cpp
index 4ad35d4d73fdd..f7bc1d5545d75 100644
--- a/clang/lib/Basic/SanitizerSpecialCaseList.cpp
+++ b/clang/lib/Basic/SanitizerSpecialCaseList.cpp
@@ -42,7 +42,7 @@ void SanitizerSpecialCaseList::createSanitizerSections() {
SanitizerMask Mask;
#define SANITIZER(NAME, ID) \
- if (S.SectionMatcher->match(NAME, /*IsFilename=*/false)) \
+ if (S.SectionMatcher->match(NAME)) \
Mask |= SanitizerKind::ID;
#define SANITIZER_GROUP(NAME, ID, ALIAS) SANITIZER(NAME, ID)
diff --git a/clang/unittests/Basic/DiagnosticTest.cpp b/clang/unittests/Basic/DiagnosticTest.cpp
index 2af86b6a5ef38..2e052b9e5eaf3 100644
--- a/clang/unittests/Basic/DiagnosticTest.cpp
+++ b/clang/unittests/Basic/DiagnosticTest.cpp
@@ -362,9 +362,8 @@ TEST_F(SuppressionMappingTest, ParsingRespectsOtherWarningOpts) {
}
#ifdef _WIN32
-// We're only slash-agnostic on windows hosts
-TEST_F(SuppressionMappingTest, TreatsFilesAsSlashAgnosticOnWindows) {
- llvm::StringLiteral SuppressionMappingFile = R"(
+TEST_F(SuppressionMappingTest, CanonicalizesSlashesOnWindows) {
+ llvm::StringLiteral SuppressionMappingFile = R"(#!canonical-paths
[unused]
src:*clang/*
src:*clang/lib/Sema/*=emit
@@ -378,14 +377,21 @@ TEST_F(SuppressionMappingTest, TreatsFilesAsSlashAgnosticOnWindows) {
EXPECT_TRUE(Diags.isSuppressedViaMapping(
diag::warn_unused_function, locForFile(R"(clang/lib/Basic/bar.h)")));
+ EXPECT_TRUE(Diags.isSuppressedViaMapping(
+ diag::warn_unused_function, locForFile(R"(clang/lib/Basic\bar.h)")));
+ EXPECT_TRUE(Diags.isSuppressedViaMapping(
+ diag::warn_unused_function, locForFile(R"(clang\lib/Basic/bar.h)")));
+ EXPECT_FALSE(Diags.isSuppressedViaMapping(
+ diag::warn_unused_function, locForFile(R"(clang/lib/Sema/baz.h)")));
EXPECT_FALSE(Diags.isSuppressedViaMapping(
diag::warn_unused_function, locForFile(R"(clang/lib/Sema\baz.h)")));
- // We require a literal backslash before "Sema"
- EXPECT_TRUE(Diags.isSuppressedViaMapping(
+ // The backslash gets canonicalized so we never match the third pattern
+ EXPECT_FALSE(Diags.isSuppressedViaMapping(
diag::warn_unused_function, locForFile(R"(clang\lib\Sema/foo.h)")));
EXPECT_FALSE(Diags.isSuppressedViaMapping(
diag::warn_unused_function, locForFile(R"(clang/lib/Sema/foo.h)")));
}
#endif
+
} // namespace
diff --git a/llvm/docs/ReleaseNotes.md b/llvm/docs/ReleaseNotes.md
index 39c2a82239fe0..5a7f41ef3f0dd 100644
--- a/llvm/docs/ReleaseNotes.md
+++ b/llvm/docs/ReleaseNotes.md
@@ -174,9 +174,10 @@ Changes to BOLT
Changes to Sanitizers
---------------------
-* On windows hosts, the [sanitizer special case list format](https://clang.llvm.org/docs/SanitizerSpecialCaseList.html#format)
- now treats forward slashes in filenames as matching either a forward or a
- backslash, to accommodate paths with mixed unix and windows styles.
+* (Sanitizer Special Case Lists)[https://clang.llvm.org/docs/SanitizerSpecialCaseList.html]
+ may now be prefixed with ``#!canonical-paths`` to specify that filename patterns
+ should be matched against canonicalized paths, without leading dots or slashes
+ and (on Windows only) without any backslashes.
Other Changes
-------------
diff --git a/llvm/include/llvm/Support/GlobPattern.h b/llvm/include/llvm/Support/GlobPattern.h
index 5fd1e0764cc7a..62ed4a0f23fd9 100644
--- a/llvm/include/llvm/Support/GlobPattern.h
+++ b/llvm/include/llvm/Support/GlobPattern.h
@@ -36,8 +36,6 @@ namespace llvm {
/// brace expansions are not supported and characters `{,}` are treated as
/// literals.
/// * `\` escapes the next character so it is treated as a literal.
-/// * If \p IsSlashAgnostic is passed to the match function, then forward
-/// slashes `/` also match backslashes `\`.
///
/// Some known edge cases are:
/// * The literal `]` is allowed as the first character in a character class,
@@ -59,9 +57,8 @@ class GlobPattern {
/// brace expansion
LLVM_ABI static Expected<GlobPattern>
create(StringRef Pat, std::optional<size_t> MaxSubPatterns = {});
- /// \param IsSlashAgnostic whether to treat '/' as also matching '\'
/// \returns \p true if \p S matches this glob pattern
- LLVM_ABI bool match(StringRef S, bool IsSlashAgnostic = false) const;
+ LLVM_ABI bool match(StringRef S) const;
// Returns true for glob pattern "*". Can be used to avoid expensive
// preparation/acquisition of the input for match().
@@ -79,9 +76,8 @@ class GlobPattern {
struct SubGlobPattern {
/// \param Pat the pattern to match against
LLVM_ABI static Expected<SubGlobPattern> create(StringRef Pat);
- /// \param IsSlashAgnostic whether to treat '/' as also matching '\'
/// \returns \p true if \p S matches this glob pattern
- LLVM_ABI bool match(StringRef S, bool IsSlashAgnostic) const;
+ LLVM_ABI bool match(StringRef S) const;
StringRef getPat() const { return StringRef(Pat.data(), Pat.size()); }
// Brackets with their end position and matched bytes.
diff --git a/llvm/include/llvm/Support/SpecialCaseList.h b/llvm/include/llvm/Support/SpecialCaseList.h
index 951f27eed8ee8..5c5df23a8623d 100644
--- a/llvm/include/llvm/Support/SpecialCaseList.h
+++ b/llvm/include/llvm/Support/SpecialCaseList.h
@@ -122,11 +122,10 @@ class SpecialCaseList {
class Matcher {
public:
LLVM_ABI Error insert(StringRef Pattern, unsigned LineNumber,
- bool UseRegex);
+ bool UseGlobs);
// Returns the line number in the source file that this query matches to.
- // On windows, treat '/' as also matching '\' in filenames when using globs.
- // Returns zero if no match is found
- LLVM_ABI unsigned match(StringRef Query, bool IsFilename) const;
+ // Returns zero if no match is found.
+ LLVM_ABI unsigned match(StringRef Query) const;
struct Glob {
std::string Name;
@@ -155,6 +154,7 @@ class SpecialCaseList {
};
std::vector<Section> Sections;
+ bool CanonicalizePaths = false;
LLVM_ABI Expected<Section *> addSection(StringRef SectionStr,
unsigned FileIdx, unsigned LineNo,
diff --git a/llvm/lib/Support/GlobPattern.cpp b/llvm/lib/Support/GlobPattern.cpp
index e19c8c84d19a9..7004adf461a0c 100644
--- a/llvm/lib/Support/GlobPattern.cpp
+++ b/llvm/lib/Support/GlobPattern.cpp
@@ -137,7 +137,7 @@ GlobPattern::create(StringRef S, std::optional<size_t> MaxSubPatterns) {
GlobPattern Pat;
// Store the prefix that does not contain any metacharacter.
- size_t PrefixSize = S.find_first_of("?*[{\\/");
+ size_t PrefixSize = S.find_first_of("?*[{\\");
Pat.Prefix = S.substr(0, PrefixSize);
if (PrefixSize == std::string::npos)
return Pat;
@@ -190,22 +190,21 @@ GlobPattern::SubGlobPattern::create(StringRef S) {
return Pat;
}
-bool GlobPattern::match(StringRef S, bool IsSlashAgnostic) const {
+bool GlobPattern::match(StringRef S) const {
if (!S.consume_front(Prefix))
return false;
if (SubGlobs.empty() && S.empty())
return true;
for (auto &Glob : SubGlobs)
- if (Glob.match(S, IsSlashAgnostic))
+ if (Glob.match(S))
return true;
return false;
}
// Factor the pattern into segments split by '*'. The segment is matched
-// sequentially by finding the first occurrence past the end of the previous
+// sequentianlly by finding the first occurrence past the end of the previous
// match.
-bool GlobPattern::SubGlobPattern::match(StringRef Str,
- bool IsSlashAgnostic) const {
+bool GlobPattern::SubGlobPattern::match(StringRef Str) const {
const char *P = Pat.data(), *SegmentBegin = nullptr, *S = Str.data(),
*SavedS = S;
const char *const PEnd = P + Pat.size(), *const End = S + Str.size();
@@ -232,10 +231,6 @@ bool GlobPattern::SubGlobPattern::match(StringRef Str,
++S;
continue;
}
- } else if (IsSlashAgnostic && *P == '/' && *S == '\\') {
- ++P;
- ++S;
- continue;
} else if (*P == *S || *P == '?') {
++P;
++S;
diff --git a/llvm/lib/Support/SpecialCaseList.cpp b/llvm/lib/Support/SpecialCaseList.cpp
index c65cb977c005c..4de7478aaf53b 100644
--- a/llvm/lib/Support/SpecialCaseList.cpp
+++ b/llvm/lib/Support/SpecialCaseList.cpp
@@ -17,7 +17,6 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/Support/LineIterator.h"
#include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/Path.h"
#include "llvm/Support/VirtualFileSystem.h"
#include <stdio.h>
#include <string>
@@ -67,13 +66,9 @@ Error SpecialCaseList::Matcher::insert(StringRef Pattern, unsigned LineNumber,
return Error::success();
}
-unsigned SpecialCaseList::Matcher::match(StringRef Query,
- bool IsFilename) const {
- static bool HaveWindowsPathStyle =
- llvm::sys::path::is_style_windows(llvm::sys::path::Style::native);
+unsigned SpecialCaseList::Matcher::match(StringRef Query) const {
for (const auto &Glob : reverse(Globs))
- if (Glob->Pattern.match(
- Query, /*IsSlashAgnostic=*/(HaveWindowsPathStyle && IsFilename)))
+ if (Glob->Pattern.match(Query))
return Glob->LineNo;
for (const auto &[Regex, LineNumber] : reverse(RegExes))
if (Regex->match(Query))
@@ -158,12 +153,17 @@ bool SpecialCaseList::parse(unsigned FileIdx, const MemoryBuffer *MB,
return false;
}
+ // Scan the start of the file for special comments. These don't appear when
+ // iterating below because comment lines are automatically skipped.
+ StringRef Buffer = MB->getBuffer();
// In https://reviews.llvm.org/D154014 we added glob support and planned to
// remove regex support in patterns. We temporarily support the original
- // behavior using regexes if "#!special-case-list-v1" is the first line of the
- // file. For more details, see
+ // behavior using regexes if "#!special-case-list-v1" is the first line of
+ // the file. For more details, see
// https://discourse.llvm.org/t/use-glob-instead-of-regex-for-specialcaselists/71666
- bool UseGlobs = !MB->getBuffer().starts_with("#!special-case-list-v1\n");
+ bool UseGlobs = !Buffer.consume_front("#!special-case-list-v1\n");
+ // Specifies that patterns should be matched against canonicalized filepaths.
+ CanonicalizePaths = Buffer.consume_front("#!canonical-paths\n");
for (line_iterator LineIt(*MB, /*SkipBlanks=*/true, /*CommentMarker=*/'#');
!LineIt.is_at_eof(); LineIt++) {
@@ -223,8 +223,7 @@ std::pair<unsigned, unsigned>
SpecialCaseList::inSectionBlame(StringRef Section, StringRef Prefix,
StringRef Query, StringRef Category) const {
for (const auto &S : reverse(Sections)) {
- bool IsFilename = Prefix == "src" || Prefix == "mainfile";
- if (S.SectionMatcher->match(Section, IsFilename)) {
+ if (S.SectionMatcher->match(Section)) {
unsigned Blame = inSectionBlame(S.Entries, Prefix, Query, Category);
if (Blame)
return {S.FileIdx, Blame};
@@ -243,8 +242,12 @@ unsigned SpecialCaseList::inSectionBlame(const SectionEntries &Entries,
if (II == I->second.end())
return 0;
- bool IsFilename = Prefix == "src" || Prefix == "mainfile";
- return II->getValue().match(Query, IsFilename);
+ if (CanonicalizePaths && (Prefix == "src" || Prefix == "mainfile")) {
+ return II->getValue().match(llvm::sys::path::convert_to_slash(
+ llvm::sys::path::remove_leading_dotslash(Query)));
+ } else {
+ return II->getValue().match(Query);
+ }
}
} // namespace llvm
diff --git a/llvm/unittests/Support/GlobPatternTest.cpp b/llvm/unittests/Support/GlobPatternTest.cpp
index ba33e233c70bc..e4f1025b00956 100644
--- a/llvm/unittests/Support/GlobPatternTest.cpp
+++ b/llvm/unittests/Support/GlobPatternTest.cpp
@@ -271,13 +271,4 @@ TEST_F(GlobPatternTest, Pathological) {
EXPECT_FALSE(Pat->match(S));
EXPECT_TRUE(Pat->match(S + 'b'));
}
-
-TEST_F(GlobPatternTest, SlashAgnostic) {
- auto Pat = GlobPattern::create("clang/*");
- ASSERT_TRUE((bool)Pat);
- EXPECT_TRUE(Pat->match("clang/foo"));
- EXPECT_FALSE(Pat->match(R"(clang\foo)"));
- EXPECT_TRUE(Pat->match("clang/foo", /*isSlashAgnostic=*/true));
- EXPECT_TRUE(Pat->match(R"(clang\foo)", /*isSlashAgnostic=*/true));
-}
}
diff --git a/llvm/unittests/Support/SpecialCaseListTest.cpp b/llvm/unittests/Support/SpecialCaseListTest.cpp
index 5be2b9e3a7a5d..5fc077f3d94ac 100644
--- a/llvm/unittests/Support/SpecialCaseListTest.cpp
+++ b/llvm/unittests/Support/SpecialCaseListTest.cpp
@@ -372,4 +372,22 @@ TEST_F(SpecialCaseListTest, FileIdx) {
sys::fs::remove(Path);
}
+#ifdef _WIN32
+TEST_F(SpecialCaseListTest, CanonicalizePathsOnWindows) {
+ std::unique_ptr<SpecialCaseList> SCL =
+ makeSpecialCaseList("#!canonical-paths\n"
+ "\n"
+ "src:*foo/bar*\n"
+ "src:*foo\\\\baz\n"
+ "fun:hi\\\\bye=category\n");
+ EXPECT_TRUE(SCL->inSection("", "src", "foo/bar"));
+ EXPECT_TRUE(SCL->inSection("", "src", "foo\\bar"));
+ // The baz pattern doesn't match because paths are canonicalized first
+ EXPECT_FALSE(SCL->inSection("", "src", "foo/baz"));
+ EXPECT_FALSE(SCL->inSection("", "src", "foo\\baz"));
+ // The canonicalization only applies to files
+ EXPECT_TRUE(SCL->inSection("", "fun", "hi\\bye", "category"));
+}
+#endif
+
} // namespace
>From 85cdb803db0b6091a647ca340cadb269e366d02c Mon Sep 17 00:00:00 2001
From: Vitaly Buka <vitalybuka at google.com>
Date: Tue, 9 Jun 2026 10:22:39 -0700
Subject: [PATCH 07/30] Resolve merge conflicts
---
llvm/include/llvm/Support/SpecialCaseList.h | 2 +-
llvm/lib/Support/SpecialCaseList.cpp | 35 +++++++++++----------
2 files changed, 19 insertions(+), 18 deletions(-)
diff --git a/llvm/include/llvm/Support/SpecialCaseList.h b/llvm/include/llvm/Support/SpecialCaseList.h
index 97c08f84cc800..deda39d93199c 100644
--- a/llvm/include/llvm/Support/SpecialCaseList.h
+++ b/llvm/include/llvm/Support/SpecialCaseList.h
@@ -119,7 +119,7 @@ class SpecialCaseList {
class Section {
public:
LLVM_ABI Section(StringRef Name, unsigned FileIdx, bool UseGlobs);
- bool UseGlobs);
+ LLVM_ABI Section(Section &&);
LLVM_ABI ~Section();
// Returns name of the section, its entire string in [].
diff --git a/llvm/lib/Support/SpecialCaseList.cpp b/llvm/lib/Support/SpecialCaseList.cpp
index 1afa609784d22..66f0efc57029a 100644
--- a/llvm/lib/Support/SpecialCaseList.cpp
+++ b/llvm/lib/Support/SpecialCaseList.cpp
@@ -23,6 +23,7 @@
#include "llvm/Support/Compiler.h"
#include "llvm/Support/GlobPattern.h"
#include "llvm/Support/LineIterator.h"
+#include "llvm/Support/Path.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Regex.h"
#include "llvm/Support/VirtualFileSystem.h"
@@ -322,11 +323,14 @@ SpecialCaseList::addSection(StringRef SectionStr, unsigned FileNo,
bool SpecialCaseList::parse(unsigned FileIdx, const MemoryBuffer *MB,
std::string &Error) {
+ StringRef Buffer = MB->getBuffer();
unsigned long long Version = 2;
+ if (Buffer.consume_front("#!special-case-list-v")) {
+ consumeUnsignedInteger(Buffer, 10, Version);
+ Buffer = Buffer.ltrim(" \t\r\n");
+ }
- StringRef Header = MB->getBuffer();
- if (Header.consume_front("#!special-case-list-v"))
- consumeUnsignedInteger(Header, 10, Version);
+ CanonicalizePaths = Buffer.consume_front("#!canonical-paths\n");
// In https://reviews.llvm.org/D154014 we added glob support and planned
// to remove regex support in patterns. We temporarily support the
@@ -344,14 +348,9 @@ bool SpecialCaseList::parse(unsigned FileIdx, const MemoryBuffer *MB,
}
Section::SectionImpl *CurrentImpl = ErrOrSection.get()->Impl.get();
- // Scan the start of the file for special comments. These don't appear when
- // iterating below because comment lines are automatically skipped.
- StringRef Buffer = MB->getBuffer();
- // behavior using regexes if "#!special-case-list-v1" is the first line of
- // the file. For more details, see
- bool UseGlobs = !Buffer.consume_front("#!special-case-list-v1\n");
- // Specifies that patterns should be matched against canonicalized filepaths.
- CanonicalizePaths = Buffer.consume_front("#!canonical-paths\n");
+ // This is the current list of prefixes for all existing users matching file
+ // path. We may need parametrization in constructor in future.
+ constexpr StringRef PathPrefixes[] = {"src", "!src", "mainfile", "source"};
for (line_iterator LineIt(*MB, /*SkipBlanks=*/true, /*CommentMarker=*/'#');
!LineIt.is_at_eof(); LineIt++) {
@@ -415,6 +414,12 @@ bool SpecialCaseList::inSection(StringRef Section, StringRef Prefix,
std::pair<unsigned, unsigned>
SpecialCaseList::inSectionBlame(StringRef Section, StringRef Prefix,
StringRef Query, StringRef Category) const {
+ std::string CanonicalizedQuery;
+ if (CanonicalizePaths && (Prefix == "src" || Prefix == "mainfile")) {
+ CanonicalizedQuery = llvm::sys::path::convert_to_slash(
+ llvm::sys::path::remove_leading_dotslash(Query));
+ Query = CanonicalizedQuery;
+ }
for (const auto &S : reverse(Sections)) {
if (S.Impl->SectionMatcher.matchAny(Section)) {
unsigned Blame = S.getLastMatch(Prefix, Query, Category);
@@ -459,12 +464,8 @@ unsigned SpecialCaseList::Section::getLastMatch(StringRef Prefix,
return 0;
}
- if (CanonicalizePaths && (Prefix == "src" || Prefix == "mainfile")) {
- return II->getValue().match(llvm::sys::path::convert_to_slash(
- llvm::sys::path::remove_leading_dotslash(Query)));
- } else {
- return II->getValue().match(Query);
- }
+bool SpecialCaseList::Section::hasPrefix(StringRef Prefix) const {
+ return Impl->Entries.contains(Prefix);
}
} // namespace llvm
>From d2345cfb80b7b4422d70e6be135ed3af5b918ef8 Mon Sep 17 00:00:00 2001
From: Vitaly Buka <vitalybuka at google.com>
Date: Tue, 9 Jun 2026 10:31:09 -0700
Subject: [PATCH 08/30] [SpecialCaseList] Replace #!canonical-paths with
#!special-case-list-v4
---
clang/docs/SanitizerSpecialCaseList.rst | 7 ++-----
clang/unittests/Basic/DiagnosticTest.cpp | 2 +-
llvm/docs/ReleaseNotes.md | 6 +++---
llvm/lib/Support/SpecialCaseList.cpp | 2 +-
llvm/unittests/Support/SpecialCaseListTest.cpp | 3 ++-
5 files changed, 9 insertions(+), 11 deletions(-)
diff --git a/clang/docs/SanitizerSpecialCaseList.rst b/clang/docs/SanitizerSpecialCaseList.rst
index dcda6283b6889..9fe070825f60d 100644
--- a/clang/docs/SanitizerSpecialCaseList.rst
+++ b/clang/docs/SanitizerSpecialCaseList.rst
@@ -238,15 +238,12 @@ tool-specific docs.
file separators, and writing patterns to match both variants can be
inconvenient.
- If the special case list file begins with ``#!canonical-paths``, then paths
+ File path canonicalization is supported in version 4 or later (indicated by
+ starting the file with ``#!special-case-list-v4``). In this version, paths
will be canonicalized before patterns are matched against them. This involves
stripping any leading dots and slashes, and (on Windows only) converting all
backslashes to forward slashes.
- If the file uses both ``#!special-case-list-v1`` and ``#!canonical-paths``,
- then they should occupy the first two lines, and ``#!canonical-paths`` must
- appear on the second line.
-
``mainfile`` is similar to applying ``-fno-sanitize=`` to a set of files but
does not need plumbing into the build system. This works well for internal
linkage functions but has a caveat for C++ vague linkage functions.
diff --git a/clang/unittests/Basic/DiagnosticTest.cpp b/clang/unittests/Basic/DiagnosticTest.cpp
index 4fea63f5f2a39..7e86e232493cd 100644
--- a/clang/unittests/Basic/DiagnosticTest.cpp
+++ b/clang/unittests/Basic/DiagnosticTest.cpp
@@ -417,7 +417,7 @@ TEST_F(SuppressionMappingTest, ParsingRespectsOtherWarningOpts) {
#ifdef _WIN32
TEST_F(SuppressionMappingTest, CanonicalizesSlashesOnWindows) {
- llvm::StringLiteral SuppressionMappingFile = R"(#!canonical-paths
+ llvm::StringLiteral SuppressionMappingFile = R"(#!special-case-list-v4
[unused]
src:*clang/*
src:*clang/lib/Sema/*=emit
diff --git a/llvm/docs/ReleaseNotes.md b/llvm/docs/ReleaseNotes.md
index 6a2c554c94d1f..c6af634682a36 100644
--- a/llvm/docs/ReleaseNotes.md
+++ b/llvm/docs/ReleaseNotes.md
@@ -292,9 +292,9 @@ Makes programs 10x faster by doing Special New Thing.
a zero input results in poison.
* (Sanitizer Special Case Lists)[https://clang.llvm.org/docs/SanitizerSpecialCaseList.html]
- may now be prefixed with ``#!canonical-paths`` to specify that filename patterns
- should be matched against canonicalized paths, without leading dots or slashes
- and (on Windows only) without any backslashes.
+ version 4 (indicated by ``#!special-case-list-v4``) now specifies that filename
+ patterns should be matched against canonicalized paths, without leading dots or
+ slashes and (on Windows only) without any backslashes.
* Renamed G_CTLZ_ZERO_UNDEF to G_CTLZ_ZERO_POISON opcode to make it clear that
diff --git a/llvm/lib/Support/SpecialCaseList.cpp b/llvm/lib/Support/SpecialCaseList.cpp
index 66f0efc57029a..b527314c111bf 100644
--- a/llvm/lib/Support/SpecialCaseList.cpp
+++ b/llvm/lib/Support/SpecialCaseList.cpp
@@ -330,7 +330,7 @@ bool SpecialCaseList::parse(unsigned FileIdx, const MemoryBuffer *MB,
Buffer = Buffer.ltrim(" \t\r\n");
}
- CanonicalizePaths = Buffer.consume_front("#!canonical-paths\n");
+ CanonicalizePaths = Version > 3;
// In https://reviews.llvm.org/D154014 we added glob support and planned
// to remove regex support in patterns. We temporarily support the
diff --git a/llvm/unittests/Support/SpecialCaseListTest.cpp b/llvm/unittests/Support/SpecialCaseListTest.cpp
index 54f55dd714273..508593e0bc380 100644
--- a/llvm/unittests/Support/SpecialCaseListTest.cpp
+++ b/llvm/unittests/Support/SpecialCaseListTest.cpp
@@ -419,7 +419,7 @@ TEST_F(SpecialCaseListTest, FileIdx) {
#ifdef _WIN32
TEST_F(SpecialCaseListTest, CanonicalizePathsOnWindows) {
std::unique_ptr<SpecialCaseList> SCL =
- makeSpecialCaseList("#!canonical-paths\n"
+ makeSpecialCaseList("#!special-case-list-v4\n"
"\n"
"src:*foo/bar*\n"
"src:*foo\\\\baz\n"
@@ -432,6 +432,7 @@ TEST_F(SpecialCaseListTest, CanonicalizePathsOnWindows) {
// The canonicalization only applies to files
EXPECT_TRUE(SCL->inSection("", "fun", "hi\\bye", "category"));
}
+
#endif
} // namespace
>From ecd5f9c22ff3e1aeefa44864be9c81b1b6dacc0f Mon Sep 17 00:00:00 2001
From: Vitaly Buka <vitalybuka at google.com>
Date: Tue, 9 Jun 2026 10:58:45 -0700
Subject: [PATCH 09/30] Revert "[SpecialCaseList] Canonicalize paths in
Matcher::match"
---
clang/docs/ReleaseNotes.rst | 5 +++
clang/lib/Basic/Diagnostic.cpp | 15 +--------
llvm/docs/ReleaseNotes.md | 11 ++++---
llvm/include/llvm/Support/SpecialCaseList.h | 2 ++
llvm/lib/Support/SpecialCaseList.cpp | 34 +++++++++++----------
5 files changed, 32 insertions(+), 35 deletions(-)
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index a569572af43ca..09fa6e4fc2e4e 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -264,6 +264,11 @@ Non-comprehensive list of changes in this release
- Added support for floating point and pointer values in most ``__atomic_``
builtins.
+- Warning suppression mappings (``--warning-suppression-mappings``) now recognize
+ version 4 of the Special Case List format (indicated by ``#!special-case-list-v4``),
+ which specifies that filename patterns should be matched against canonicalized
+ paths (without leading dots or slashes, and on Windows, without backslashes).
+
- Added ``__builtin_stdc_rotate_left`` and ``__builtin_stdc_rotate_right``
for bit rotation of unsigned integers including ``_BitInt`` types. Rotation
counts are normalized modulo the bit-width and support negative values.
diff --git a/clang/lib/Basic/Diagnostic.cpp b/clang/lib/Basic/Diagnostic.cpp
index 24ad02df8159b..348c9927e87b9 100644
--- a/clang/lib/Basic/Diagnostic.cpp
+++ b/clang/lib/Basic/Diagnostic.cpp
@@ -585,22 +585,9 @@ bool WarningsSpecialCaseList::isDiagSuppressed(diag::kind DiagId,
if (!DiagSection)
return false;
- StringRef F = llvm::sys::path::remove_leading_dotslash(PLoc.getFilename());
-
+ StringRef F = PLoc.getFilename();
unsigned LastSup = DiagSection->getLastMatch("src", F, "");
if (LastSup == 0)
- if (PresumedLoc PLoc = SM.getPresumedLoc(DiagLoc); PLoc.isValid()) {
- if (CanonicalizePaths) {
- return globsMatches(
- CategoriesToMatchers,
- llvm::sys::path::convert_to_slash(
- llvm::sys::path::remove_leading_dotslash(PLoc.getFilename())));
- } else {
- return globsMatches(
- CategoriesToMatchers,
- llvm::sys::path::remove_leading_dotslash(PLoc.getFilename()));
- }
- }
return false;
unsigned LastEmit = DiagSection->getLastMatch("src", F, "emit");
diff --git a/llvm/docs/ReleaseNotes.md b/llvm/docs/ReleaseNotes.md
index c6af634682a36..a91af78704dfa 100644
--- a/llvm/docs/ReleaseNotes.md
+++ b/llvm/docs/ReleaseNotes.md
@@ -291,11 +291,7 @@ Makes programs 10x faster by doing Special New Thing.
* Renamed ISD::CTTZ_ZERO_UNDEF to ISD::CTTZ_ZERO_POISON opcode to make it clear that
a zero input results in poison.
-* (Sanitizer Special Case Lists)[https://clang.llvm.org/docs/SanitizerSpecialCaseList.html]
- version 4 (indicated by ``#!special-case-list-v4``) now specifies that filename
- patterns should be matched against canonicalized paths, without leading dots or
- slashes and (on Windows only) without any backslashes.
-
+### Changes to the GlobalISel infrastructure
* Renamed G_CTLZ_ZERO_UNDEF to G_CTLZ_ZERO_POISON opcode to make it clear that
a zero input results in poison.
@@ -406,6 +402,11 @@ Makes programs 10x faster by doing Special New Thing.
* Add a random delay into ThreadSanitizer to help find rare thread interleavings.
+* [Sanitizer Special Case Lists](https://clang.llvm.org/docs/SanitizerSpecialCaseList.html)
+ version 4 (indicated by ``#!special-case-list-v4``) now specifies that filename
+ patterns should be matched against canonicalized paths, without leading dots or
+ slashes and (on Windows only) without any backslashes.
+
### Other Changes
## External Open Source Projects Using LLVM {{env.config.release}}
diff --git a/llvm/include/llvm/Support/SpecialCaseList.h b/llvm/include/llvm/Support/SpecialCaseList.h
index deda39d93199c..1fc23d763c300 100644
--- a/llvm/include/llvm/Support/SpecialCaseList.h
+++ b/llvm/include/llvm/Support/SpecialCaseList.h
@@ -105,6 +105,8 @@ class SpecialCaseList {
inSectionBlame(StringRef Section, StringRef Prefix, StringRef Query,
StringRef Category = StringRef()) const;
+
+
protected:
// Implementations of the create*() functions that can also be used by derived
// classes.
diff --git a/llvm/lib/Support/SpecialCaseList.cpp b/llvm/lib/Support/SpecialCaseList.cpp
index b527314c111bf..d2e2d0b986eed 100644
--- a/llvm/lib/Support/SpecialCaseList.cpp
+++ b/llvm/lib/Support/SpecialCaseList.cpp
@@ -91,7 +91,7 @@ class GlobMatcher {
/// Represents a set of patterns and their line numbers
class Matcher {
public:
- Matcher(bool UseGlobs, bool RemoveDotSlash);
+ Matcher(bool UseGlobs, bool RemoveDotSlash, bool CanonicalizePaths);
Error insert(StringRef Pattern, unsigned LineNumber);
unsigned match(StringRef Query) const;
@@ -100,6 +100,7 @@ class Matcher {
std::variant<RegexMatcher, GlobMatcher> M;
bool RemoveDotSlash;
+ bool CanonicalizePaths;
};
Error RegexMatcher::insert(StringRef Pattern, unsigned LineNumber) {
@@ -219,8 +220,8 @@ unsigned GlobMatcher::match(StringRef Query) const {
return Best < 0 ? 0 : Globs[Best].LineNo;
}
-Matcher::Matcher(bool UseGlobs, bool RemoveDotSlash)
- : RemoveDotSlash(RemoveDotSlash) {
+Matcher::Matcher(bool UseGlobs, bool RemoveDotSlash, bool CanonicalizePaths)
+ : RemoveDotSlash(RemoveDotSlash), CanonicalizePaths(CanonicalizePaths) {
if (UseGlobs)
M.emplace<GlobMatcher>();
else
@@ -232,8 +233,14 @@ Error Matcher::insert(StringRef Pattern, unsigned LineNumber) {
}
unsigned Matcher::match(StringRef Query) const {
- if (RemoveDotSlash)
+ std::string CanonicalizedQuery;
+ if (CanonicalizePaths) {
+ CanonicalizedQuery = llvm::sys::path::convert_to_slash(
+ llvm::sys::path::remove_leading_dotslash(Query));
+ Query = CanonicalizedQuery;
+ } else if (RemoveDotSlash) {
Query = llvm::sys::path::remove_leading_dotslash(Query);
+ }
return std::visit([&](auto &V) -> unsigned { return V.match(Query); }, M);
}
} // namespace
@@ -245,7 +252,7 @@ class SpecialCaseList::Section::SectionImpl {
using SectionEntries = StringMap<StringMap<Matcher>>;
explicit SectionImpl(bool UseGlobs)
- : SectionMatcher(UseGlobs, /*RemoveDotSlash=*/false) {}
+ : SectionMatcher(UseGlobs, /*RemoveDotSlash=*/false, /*CanonicalizePaths=*/false) {}
Matcher SectionMatcher;
SectionEntries Entries;
@@ -325,10 +332,8 @@ bool SpecialCaseList::parse(unsigned FileIdx, const MemoryBuffer *MB,
std::string &Error) {
StringRef Buffer = MB->getBuffer();
unsigned long long Version = 2;
- if (Buffer.consume_front("#!special-case-list-v")) {
+ if (Buffer.consume_front("#!special-case-list-v"))
consumeUnsignedInteger(Buffer, 10, Version);
- Buffer = Buffer.ltrim(" \t\r\n");
- }
CanonicalizePaths = Version > 3;
@@ -387,9 +392,12 @@ bool SpecialCaseList::parse(unsigned FileIdx, const MemoryBuffer *MB,
}
auto [Pattern, Category] = Postfix.split("=");
+ bool MatcherRemoveDotSlash =
+ RemoveDotSlash && llvm::is_contained(PathPrefixes, Prefix);
+ bool MatcherCanonicalizePaths =
+ CanonicalizePaths && llvm::is_contained(PathPrefixes, Prefix);
auto [It, _] = CurrentImpl->Entries[Prefix].try_emplace(
- Category, UseGlobs,
- RemoveDotSlash && llvm::is_contained(PathPrefixes, Prefix));
+ Category, UseGlobs, MatcherRemoveDotSlash, MatcherCanonicalizePaths);
Pattern = Pattern.copy(StrAlloc);
if (auto Err = It->second.insert(Pattern, LineNo)) {
Error =
@@ -414,12 +422,6 @@ bool SpecialCaseList::inSection(StringRef Section, StringRef Prefix,
std::pair<unsigned, unsigned>
SpecialCaseList::inSectionBlame(StringRef Section, StringRef Prefix,
StringRef Query, StringRef Category) const {
- std::string CanonicalizedQuery;
- if (CanonicalizePaths && (Prefix == "src" || Prefix == "mainfile")) {
- CanonicalizedQuery = llvm::sys::path::convert_to_slash(
- llvm::sys::path::remove_leading_dotslash(Query));
- Query = CanonicalizedQuery;
- }
for (const auto &S : reverse(Sections)) {
if (S.Impl->SectionMatcher.matchAny(Section)) {
unsigned Blame = S.getLastMatch(Prefix, Query, Category);
>From 52373bb50f1626ff77bb1e83151dc60a45cf1364 Mon Sep 17 00:00:00 2001
From: Vitaly Buka <vitalybuka at google.com>
Date: Tue, 9 Jun 2026 12:29:06 -0700
Subject: [PATCH 10/30] [Support] Remove CanonicalizePaths member from
SpecialCaseList
---
llvm/include/llvm/Support/SpecialCaseList.h | 3 ---
llvm/lib/Support/SpecialCaseList.cpp | 17 ++++++++---------
2 files changed, 8 insertions(+), 12 deletions(-)
diff --git a/llvm/include/llvm/Support/SpecialCaseList.h b/llvm/include/llvm/Support/SpecialCaseList.h
index 1fc23d763c300..28e264f078d89 100644
--- a/llvm/include/llvm/Support/SpecialCaseList.h
+++ b/llvm/include/llvm/Support/SpecialCaseList.h
@@ -105,8 +105,6 @@ class SpecialCaseList {
inSectionBlame(StringRef Section, StringRef Prefix, StringRef Query,
StringRef Category = StringRef()) const;
-
-
protected:
// Implementations of the create*() functions that can also be used by derived
// classes.
@@ -155,7 +153,6 @@ class SpecialCaseList {
private:
BumpPtrAllocator StrAlloc;
std::vector<Section> Sections;
- bool CanonicalizePaths = false;
LLVM_ABI Expected<Section *> addSection(StringRef SectionStr,
unsigned FileIdx, unsigned LineNo,
diff --git a/llvm/lib/Support/SpecialCaseList.cpp b/llvm/lib/Support/SpecialCaseList.cpp
index d2e2d0b986eed..e45cb9d05f5f7 100644
--- a/llvm/lib/Support/SpecialCaseList.cpp
+++ b/llvm/lib/Support/SpecialCaseList.cpp
@@ -330,12 +330,13 @@ SpecialCaseList::addSection(StringRef SectionStr, unsigned FileNo,
bool SpecialCaseList::parse(unsigned FileIdx, const MemoryBuffer *MB,
std::string &Error) {
- StringRef Buffer = MB->getBuffer();
unsigned long long Version = 2;
- if (Buffer.consume_front("#!special-case-list-v"))
- consumeUnsignedInteger(Buffer, 10, Version);
- CanonicalizePaths = Version > 3;
+ StringRef Header = MB->getBuffer();
+ if (Header.consume_front("#!special-case-list-v"))
+ consumeUnsignedInteger(Header, 10, Version);
+
+ bool CanonicalizePaths = Version > 3;
// In https://reviews.llvm.org/D154014 we added glob support and planned
// to remove regex support in patterns. We temporarily support the
@@ -392,12 +393,10 @@ bool SpecialCaseList::parse(unsigned FileIdx, const MemoryBuffer *MB,
}
auto [Pattern, Category] = Postfix.split("=");
- bool MatcherRemoveDotSlash =
- RemoveDotSlash && llvm::is_contained(PathPrefixes, Prefix);
- bool MatcherCanonicalizePaths =
- CanonicalizePaths && llvm::is_contained(PathPrefixes, Prefix);
auto [It, _] = CurrentImpl->Entries[Prefix].try_emplace(
- Category, UseGlobs, MatcherRemoveDotSlash, MatcherCanonicalizePaths);
+ Category, UseGlobs,
+ RemoveDotSlash && llvm::is_contained(PathPrefixes, Prefix),
+ CanonicalizePaths && llvm::is_contained(PathPrefixes, Prefix));
Pattern = Pattern.copy(StrAlloc);
if (auto Err = It->second.insert(Pattern, LineNo)) {
Error =
>From fe17675058df4bd5d62141b405a0458f59e55869 Mon Sep 17 00:00:00 2001
From: Vitaly Buka <vitalybuka at google.com>
Date: Tue, 9 Jun 2026 12:35:18 -0700
Subject: [PATCH 11/30] [Support] Rename CanonicalizePaths to
CanonicalizeSlashes
---
llvm/lib/Support/SpecialCaseList.cpp | 24 ++++++++++++------------
1 file changed, 12 insertions(+), 12 deletions(-)
diff --git a/llvm/lib/Support/SpecialCaseList.cpp b/llvm/lib/Support/SpecialCaseList.cpp
index e45cb9d05f5f7..7c069adfa50dd 100644
--- a/llvm/lib/Support/SpecialCaseList.cpp
+++ b/llvm/lib/Support/SpecialCaseList.cpp
@@ -91,7 +91,7 @@ class GlobMatcher {
/// Represents a set of patterns and their line numbers
class Matcher {
public:
- Matcher(bool UseGlobs, bool RemoveDotSlash, bool CanonicalizePaths);
+ Matcher(bool UseGlobs, bool RemoveDotSlash, bool CanonicalizeSlashes);
Error insert(StringRef Pattern, unsigned LineNumber);
unsigned match(StringRef Query) const;
@@ -100,7 +100,7 @@ class Matcher {
std::variant<RegexMatcher, GlobMatcher> M;
bool RemoveDotSlash;
- bool CanonicalizePaths;
+ bool CanonicalizeSlashes;
};
Error RegexMatcher::insert(StringRef Pattern, unsigned LineNumber) {
@@ -220,8 +220,8 @@ unsigned GlobMatcher::match(StringRef Query) const {
return Best < 0 ? 0 : Globs[Best].LineNo;
}
-Matcher::Matcher(bool UseGlobs, bool RemoveDotSlash, bool CanonicalizePaths)
- : RemoveDotSlash(RemoveDotSlash), CanonicalizePaths(CanonicalizePaths) {
+Matcher::Matcher(bool UseGlobs, bool RemoveDotSlash, bool CanonicalizeSlashes)
+ : RemoveDotSlash(RemoveDotSlash), CanonicalizeSlashes(CanonicalizeSlashes) {
if (UseGlobs)
M.emplace<GlobMatcher>();
else
@@ -234,13 +234,12 @@ Error Matcher::insert(StringRef Pattern, unsigned LineNumber) {
unsigned Matcher::match(StringRef Query) const {
std::string CanonicalizedQuery;
- if (CanonicalizePaths) {
- CanonicalizedQuery = llvm::sys::path::convert_to_slash(
- llvm::sys::path::remove_leading_dotslash(Query));
+ if (CanonicalizeSlashes) {
+ CanonicalizedQuery = llvm::sys::path::convert_to_slash(Query);
Query = CanonicalizedQuery;
- } else if (RemoveDotSlash) {
- Query = llvm::sys::path::remove_leading_dotslash(Query);
}
+ if (RemoveDotSlash)
+ Query = llvm::sys::path::remove_leading_dotslash(Query);
return std::visit([&](auto &V) -> unsigned { return V.match(Query); }, M);
}
} // namespace
@@ -252,7 +251,8 @@ class SpecialCaseList::Section::SectionImpl {
using SectionEntries = StringMap<StringMap<Matcher>>;
explicit SectionImpl(bool UseGlobs)
- : SectionMatcher(UseGlobs, /*RemoveDotSlash=*/false, /*CanonicalizePaths=*/false) {}
+ : SectionMatcher(UseGlobs, /*RemoveDotSlash=*/false,
+ /*CanonicalizeSlashes=*/false) {}
Matcher SectionMatcher;
SectionEntries Entries;
@@ -336,7 +336,7 @@ bool SpecialCaseList::parse(unsigned FileIdx, const MemoryBuffer *MB,
if (Header.consume_front("#!special-case-list-v"))
consumeUnsignedInteger(Header, 10, Version);
- bool CanonicalizePaths = Version > 3;
+ bool CanonicalizeSlashes = Version > 3;
// In https://reviews.llvm.org/D154014 we added glob support and planned
// to remove regex support in patterns. We temporarily support the
@@ -396,7 +396,7 @@ bool SpecialCaseList::parse(unsigned FileIdx, const MemoryBuffer *MB,
auto [It, _] = CurrentImpl->Entries[Prefix].try_emplace(
Category, UseGlobs,
RemoveDotSlash && llvm::is_contained(PathPrefixes, Prefix),
- CanonicalizePaths && llvm::is_contained(PathPrefixes, Prefix));
+ CanonicalizeSlashes && llvm::is_contained(PathPrefixes, Prefix));
Pattern = Pattern.copy(StrAlloc);
if (auto Err = It->second.insert(Pattern, LineNo)) {
Error =
>From be45576387c7f8ca89da65666e67b1bd29fe6722 Mon Sep 17 00:00:00 2001
From: Vitaly Buka <vitalybuka at google.com>
Date: Tue, 9 Jun 2026 12:40:32 -0700
Subject: [PATCH 12/30] Update docs
---
clang/docs/ReleaseNotes.rst | 11 ++++++-----
clang/docs/SanitizerSpecialCaseList.rst | 2 +-
2 files changed, 7 insertions(+), 6 deletions(-)
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 09fa6e4fc2e4e..e7d891c3725ef 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -264,11 +264,6 @@ Non-comprehensive list of changes in this release
- Added support for floating point and pointer values in most ``__atomic_``
builtins.
-- Warning suppression mappings (``--warning-suppression-mappings``) now recognize
- version 4 of the Special Case List format (indicated by ``#!special-case-list-v4``),
- which specifies that filename patterns should be matched against canonicalized
- paths (without leading dots or slashes, and on Windows, without backslashes).
-
- Added ``__builtin_stdc_rotate_left`` and ``__builtin_stdc_rotate_right``
for bit rotation of unsigned integers including ``_BitInt`` types. Rotation
counts are normalized modulo the bit-width and support negative values.
@@ -973,6 +968,12 @@ Sanitizers
----------
- UndefinedBehaviorSanitizer now supports ``__ubsan_default_suppressions``.
+- Sanitizer Special Case Lists (``-fsanitize-ignorelist``) and warning
+ suppression mappings (``--warning-suppression-mappings``) now recognize version
+ 4 of the Special Case List format (indicated by ``#!special-case-list-v4``),
+ which specifies that filename patterns should be matched against canonicalized
+ paths (without leading dots or slashes, and on Windows, without backslashes).
+
Python Binding Changes
----------------------
- Add deprecation warnings to ``CompletionChunk.isKind...`` methods.
diff --git a/clang/docs/SanitizerSpecialCaseList.rst b/clang/docs/SanitizerSpecialCaseList.rst
index 9fe070825f60d..f23139f868235 100644
--- a/clang/docs/SanitizerSpecialCaseList.rst
+++ b/clang/docs/SanitizerSpecialCaseList.rst
@@ -237,7 +237,7 @@ tool-specific docs.
by LLVM. On Windows, this might involve a mix of forward and backslashes as
file separators, and writing patterns to match both variants can be
inconvenient.
-
+
File path canonicalization is supported in version 4 or later (indicated by
starting the file with ``#!special-case-list-v4``). In this version, paths
will be canonicalized before patterns are matched against them. This involves
>From 85564ef434c6deec8b1291b8cda19bb1882e9727 Mon Sep 17 00:00:00 2001
From: Vitaly Buka <vitalybuka at google.com>
Date: Tue, 9 Jun 2026 12:59:18 -0700
Subject: [PATCH 13/30] windows only
---
clang/docs/ReleaseNotes.rst | 4 ++--
clang/docs/SanitizerSpecialCaseList.rst | 7 ++-----
llvm/lib/Support/SpecialCaseList.cpp | 9 +++++----
3 files changed, 9 insertions(+), 11 deletions(-)
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index e7d891c3725ef..a4299825f1fd7 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -971,8 +971,8 @@ Sanitizers
- Sanitizer Special Case Lists (``-fsanitize-ignorelist``) and warning
suppression mappings (``--warning-suppression-mappings``) now recognize version
4 of the Special Case List format (indicated by ``#!special-case-list-v4``),
- which specifies that filename patterns should be matched against canonicalized
- paths (without leading dots or slashes, and on Windows, without backslashes).
+ which canonicalizes path separators by converting backslashes to forward slashes
+ on Windows hosts.
Python Binding Changes
----------------------
diff --git a/clang/docs/SanitizerSpecialCaseList.rst b/clang/docs/SanitizerSpecialCaseList.rst
index f23139f868235..40fa19b37d93b 100644
--- a/clang/docs/SanitizerSpecialCaseList.rst
+++ b/clang/docs/SanitizerSpecialCaseList.rst
@@ -238,11 +238,8 @@ tool-specific docs.
file separators, and writing patterns to match both variants can be
inconvenient.
- File path canonicalization is supported in version 4 or later (indicated by
- starting the file with ``#!special-case-list-v4``). In this version, paths
- will be canonicalized before patterns are matched against them. This involves
- stripping any leading dots and slashes, and (on Windows only) converting all
- backslashes to forward slashes.
+ Starting with version 4 (indicated by ``#!special-case-list-v4``), path separators
+ on Windows hosts are canonicalized by converting backslashes to forward slashes.
``mainfile`` is similar to applying ``-fno-sanitize=`` to a set of files but
does not need plumbing into the build system. This works well for internal
diff --git a/llvm/lib/Support/SpecialCaseList.cpp b/llvm/lib/Support/SpecialCaseList.cpp
index 7c069adfa50dd..6be3d38ac18b6 100644
--- a/llvm/lib/Support/SpecialCaseList.cpp
+++ b/llvm/lib/Support/SpecialCaseList.cpp
@@ -336,7 +336,8 @@ bool SpecialCaseList::parse(unsigned FileIdx, const MemoryBuffer *MB,
if (Header.consume_front("#!special-case-list-v"))
consumeUnsignedInteger(Header, 10, Version);
- bool CanonicalizeSlashes = Version > 3;
+ bool CanonicalizeSlashes =
+ Version > 3 && llvm::sys::path::is_separator('\\');
// In https://reviews.llvm.org/D154014 we added glob support and planned
// to remove regex support in patterns. We temporarily support the
@@ -393,10 +394,10 @@ bool SpecialCaseList::parse(unsigned FileIdx, const MemoryBuffer *MB,
}
auto [Pattern, Category] = Postfix.split("=");
+ bool IsPath = llvm::is_contained(PathPrefixes, Prefix);
auto [It, _] = CurrentImpl->Entries[Prefix].try_emplace(
- Category, UseGlobs,
- RemoveDotSlash && llvm::is_contained(PathPrefixes, Prefix),
- CanonicalizeSlashes && llvm::is_contained(PathPrefixes, Prefix));
+ Category, UseGlobs, RemoveDotSlash && IsPath,
+ CanonicalizeSlashes && IsPath);
Pattern = Pattern.copy(StrAlloc);
if (auto Err = It->second.insert(Pattern, LineNo)) {
Error =
>From 005fa63fe8ddac69f291b0700399bd88feece998 Mon Sep 17 00:00:00 2001
From: Vitaly Buka <vitalybuka at google.com>
Date: Tue, 9 Jun 2026 12:59:49 -0700
Subject: [PATCH 14/30] format
---
llvm/lib/Support/SpecialCaseList.cpp | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/Support/SpecialCaseList.cpp b/llvm/lib/Support/SpecialCaseList.cpp
index 6be3d38ac18b6..11a752c68acfd 100644
--- a/llvm/lib/Support/SpecialCaseList.cpp
+++ b/llvm/lib/Support/SpecialCaseList.cpp
@@ -23,8 +23,8 @@
#include "llvm/Support/Compiler.h"
#include "llvm/Support/GlobPattern.h"
#include "llvm/Support/LineIterator.h"
-#include "llvm/Support/Path.h"
#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Path.h"
#include "llvm/Support/Regex.h"
#include "llvm/Support/VirtualFileSystem.h"
#include "llvm/Support/raw_ostream.h"
@@ -336,8 +336,7 @@ bool SpecialCaseList::parse(unsigned FileIdx, const MemoryBuffer *MB,
if (Header.consume_front("#!special-case-list-v"))
consumeUnsignedInteger(Header, 10, Version);
- bool CanonicalizeSlashes =
- Version > 3 && llvm::sys::path::is_separator('\\');
+ bool CanonicalizeSlashes = Version > 3 && llvm::sys::path::is_separator('\\');
// In https://reviews.llvm.org/D154014 we added glob support and planned
// to remove regex support in patterns. We temporarily support the
>From 950e225ad8d3e3982c86d7fef4f24b2d0eb4170a Mon Sep 17 00:00:00 2001
From: Vitaly Buka <vitalybuka at google.com>
Date: Tue, 9 Jun 2026 13:00:09 -0700
Subject: [PATCH 15/30] [Support] Move CanonicalizeSlashes after RemoveDotSlash
---
llvm/lib/Support/SpecialCaseList.cpp | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Support/SpecialCaseList.cpp b/llvm/lib/Support/SpecialCaseList.cpp
index 11a752c68acfd..d3ba7d0b2defb 100644
--- a/llvm/lib/Support/SpecialCaseList.cpp
+++ b/llvm/lib/Support/SpecialCaseList.cpp
@@ -336,8 +336,6 @@ bool SpecialCaseList::parse(unsigned FileIdx, const MemoryBuffer *MB,
if (Header.consume_front("#!special-case-list-v"))
consumeUnsignedInteger(Header, 10, Version);
- bool CanonicalizeSlashes = Version > 3 && llvm::sys::path::is_separator('\\');
-
// In https://reviews.llvm.org/D154014 we added glob support and planned
// to remove regex support in patterns. We temporarily support the
// original behavior using regexes if "#!special-case-list-v1" is the
@@ -347,6 +345,8 @@ bool SpecialCaseList::parse(unsigned FileIdx, const MemoryBuffer *MB,
bool RemoveDotSlash = Version > 2;
+ bool CanonicalizeSlashes = Version > 3 && llvm::sys::path::is_separator('\\');
+
auto ErrOrSection = addSection("*", FileIdx, 1, true);
if (auto Err = ErrOrSection.takeError()) {
Error = toString(std::move(Err));
>From dddc25166eb422cf9b54cbc52bff2c1d628db53a Mon Sep 17 00:00:00 2001
From: Vitaly Buka <vitalybuka at google.com>
Date: Tue, 9 Jun 2026 17:24:45 -0700
Subject: [PATCH 16/30] [Support] Disallow backslashes in path patterns for
SpecialCaseList v4
---
clang/docs/SanitizerSpecialCaseList.rst | 4 +-
llvm/docs/ReleaseNotes.md | 7 ++--
llvm/lib/Support/SpecialCaseList.cpp | 6 +++
.../unittests/Support/SpecialCaseListTest.cpp | 39 ++++++++++++++-----
4 files changed, 42 insertions(+), 14 deletions(-)
diff --git a/clang/docs/SanitizerSpecialCaseList.rst b/clang/docs/SanitizerSpecialCaseList.rst
index 40fa19b37d93b..7181fe7c5652e 100644
--- a/clang/docs/SanitizerSpecialCaseList.rst
+++ b/clang/docs/SanitizerSpecialCaseList.rst
@@ -238,8 +238,10 @@ tool-specific docs.
file separators, and writing patterns to match both variants can be
inconvenient.
- Starting with version 4 (indicated by ``#!special-case-list-v4``), path separators
+ Starting with version 4 (indicated by ``#!special-case-list-v4``), matched paths
on Windows hosts are canonicalized by converting backslashes to forward slashes.
+ To ensure cross-platform compatibility, path patterns (such as ``src``) cannot
+ contain backslashes, and using them will result in a parsing error.
``mainfile`` is similar to applying ``-fno-sanitize=`` to a set of files but
does not need plumbing into the build system. This works well for internal
diff --git a/llvm/docs/ReleaseNotes.md b/llvm/docs/ReleaseNotes.md
index 1e7460f8531a3..d6fbe9d76e93b 100644
--- a/llvm/docs/ReleaseNotes.md
+++ b/llvm/docs/ReleaseNotes.md
@@ -409,9 +409,10 @@ Makes programs 10x faster by doing Special New Thing.
* Add a random delay into ThreadSanitizer to help find rare thread interleavings.
* [Sanitizer Special Case Lists](https://clang.llvm.org/docs/SanitizerSpecialCaseList.html)
- version 4 (indicated by ``#!special-case-list-v4``) now specifies that filename
- patterns should be matched against canonicalized paths, without leading dots or
- slashes and (on Windows only) without any backslashes.
+ version 4 (indicated by ``#!special-case-list-v4``) now specifies that matched
+ paths on Windows hosts are canonicalized to use forward slashes. To ensure
+ cross-platform compatibility, path patterns (like ``src``) are forbidden from
+ containing backslashes and will trigger a parsing error.
### Other Changes
diff --git a/llvm/lib/Support/SpecialCaseList.cpp b/llvm/lib/Support/SpecialCaseList.cpp
index d3ba7d0b2defb..39bff19307951 100644
--- a/llvm/lib/Support/SpecialCaseList.cpp
+++ b/llvm/lib/Support/SpecialCaseList.cpp
@@ -394,6 +394,12 @@ bool SpecialCaseList::parse(unsigned FileIdx, const MemoryBuffer *MB,
auto [Pattern, Category] = Postfix.split("=");
bool IsPath = llvm::is_contained(PathPrefixes, Prefix);
+
+ if (Version >= 4 && IsPath && Pattern.contains("\\\\")) {
+ Error = (Twine("pattern cannot contain a backslash: ") + Pattern).str();
+ return false;
+ }
+
auto [It, _] = CurrentImpl->Entries[Prefix].try_emplace(
Category, UseGlobs, RemoveDotSlash && IsPath,
CanonicalizeSlashes && IsPath);
diff --git a/llvm/unittests/Support/SpecialCaseListTest.cpp b/llvm/unittests/Support/SpecialCaseListTest.cpp
index 508593e0bc380..31cecb8cb1688 100644
--- a/llvm/unittests/Support/SpecialCaseListTest.cpp
+++ b/llvm/unittests/Support/SpecialCaseListTest.cpp
@@ -416,23 +416,42 @@ TEST_F(SpecialCaseListTest, FileIdx) {
sys::fs::remove(Path);
}
+TEST_F(SpecialCaseListTest, PathPatternBackslashError) {
+ std::string Error;
+ // This should fail because 'src' pattern contains a double backslash.
+ std::unique_ptr<MemoryBuffer> MB =
+ MemoryBuffer::getMemBuffer("#!special-case-list-v4\n"
+ "src:*foo\\\\baz\n");
+ std::unique_ptr<SpecialCaseList> SCL =
+ SpecialCaseList::create(MB.get(), Error);
+ EXPECT_EQ(SCL, nullptr);
+ EXPECT_THAT(Error, HasSubstr("pattern cannot contain a backslash"));
+
+ // This should succeed because single backslash is allowed as escape.
+ std::unique_ptr<SpecialCaseList> SCL2 =
+ makeSpecialCaseList("src:*foo\\*baz\n", Error, 4);
+ EXPECT_TRUE(SCL2 != nullptr) << Error;
+
+ // This should succeed because 'fun' pattern is not a path pattern.
+ std::unique_ptr<SpecialCaseList> SCL3 =
+ makeSpecialCaseList("fun:hi\\\\bye=category\n", Error, 4);
+ ASSERT_TRUE(SCL3 != nullptr);
+ EXPECT_TRUE(SCL3->inSection("", "fun", "hi\\bye", "category"));
+ EXPECT_FALSE(SCL3->inSection("", "fun", "hi/bye", "category"));
+}
+
#ifdef _WIN32
TEST_F(SpecialCaseListTest, CanonicalizePathsOnWindows) {
std::unique_ptr<SpecialCaseList> SCL =
- makeSpecialCaseList("#!special-case-list-v4\n"
- "\n"
- "src:*foo/bar*\n"
- "src:*foo\\\\baz\n"
- "fun:hi\\\\bye=category\n");
+ makeSpecialCaseList("src:*foo/bar*\n"
+ "fun:hi\\\\bye=category\n",
+ 4);
+ ASSERT_TRUE(SCL != nullptr);
EXPECT_TRUE(SCL->inSection("", "src", "foo/bar"));
EXPECT_TRUE(SCL->inSection("", "src", "foo\\bar"));
- // The baz pattern doesn't match because paths are canonicalized first
- EXPECT_FALSE(SCL->inSection("", "src", "foo/baz"));
- EXPECT_FALSE(SCL->inSection("", "src", "foo\\baz"));
- // The canonicalization only applies to files
EXPECT_TRUE(SCL->inSection("", "fun", "hi\\bye", "category"));
+ EXPECT_FALSE(SCL->inSection("", "fun", "hi/bye", "category"));
}
-
#endif
} // namespace
>From 09a7fc6ba14729a85996962812de21e1342f24a4 Mon Sep 17 00:00:00 2001
From: Vitaly Buka <vitalybuka at google.com>
Date: Tue, 9 Jun 2026 17:47:36 -0700
Subject: [PATCH 17/30] Revert "[Support] Disallow backslashes in path patterns
for SpecialCaseList v4"
This reverts commit dddc25166eb422cf9b54cbc52bff2c1d628db53a.
---
clang/docs/SanitizerSpecialCaseList.rst | 4 +-
llvm/docs/ReleaseNotes.md | 7 ++--
llvm/lib/Support/SpecialCaseList.cpp | 6 ---
.../unittests/Support/SpecialCaseListTest.cpp | 39 +++++--------------
4 files changed, 14 insertions(+), 42 deletions(-)
diff --git a/clang/docs/SanitizerSpecialCaseList.rst b/clang/docs/SanitizerSpecialCaseList.rst
index 7181fe7c5652e..40fa19b37d93b 100644
--- a/clang/docs/SanitizerSpecialCaseList.rst
+++ b/clang/docs/SanitizerSpecialCaseList.rst
@@ -238,10 +238,8 @@ tool-specific docs.
file separators, and writing patterns to match both variants can be
inconvenient.
- Starting with version 4 (indicated by ``#!special-case-list-v4``), matched paths
+ Starting with version 4 (indicated by ``#!special-case-list-v4``), path separators
on Windows hosts are canonicalized by converting backslashes to forward slashes.
- To ensure cross-platform compatibility, path patterns (such as ``src``) cannot
- contain backslashes, and using them will result in a parsing error.
``mainfile`` is similar to applying ``-fno-sanitize=`` to a set of files but
does not need plumbing into the build system. This works well for internal
diff --git a/llvm/docs/ReleaseNotes.md b/llvm/docs/ReleaseNotes.md
index d6fbe9d76e93b..1e7460f8531a3 100644
--- a/llvm/docs/ReleaseNotes.md
+++ b/llvm/docs/ReleaseNotes.md
@@ -409,10 +409,9 @@ Makes programs 10x faster by doing Special New Thing.
* Add a random delay into ThreadSanitizer to help find rare thread interleavings.
* [Sanitizer Special Case Lists](https://clang.llvm.org/docs/SanitizerSpecialCaseList.html)
- version 4 (indicated by ``#!special-case-list-v4``) now specifies that matched
- paths on Windows hosts are canonicalized to use forward slashes. To ensure
- cross-platform compatibility, path patterns (like ``src``) are forbidden from
- containing backslashes and will trigger a parsing error.
+ version 4 (indicated by ``#!special-case-list-v4``) now specifies that filename
+ patterns should be matched against canonicalized paths, without leading dots or
+ slashes and (on Windows only) without any backslashes.
### Other Changes
diff --git a/llvm/lib/Support/SpecialCaseList.cpp b/llvm/lib/Support/SpecialCaseList.cpp
index 39bff19307951..d3ba7d0b2defb 100644
--- a/llvm/lib/Support/SpecialCaseList.cpp
+++ b/llvm/lib/Support/SpecialCaseList.cpp
@@ -394,12 +394,6 @@ bool SpecialCaseList::parse(unsigned FileIdx, const MemoryBuffer *MB,
auto [Pattern, Category] = Postfix.split("=");
bool IsPath = llvm::is_contained(PathPrefixes, Prefix);
-
- if (Version >= 4 && IsPath && Pattern.contains("\\\\")) {
- Error = (Twine("pattern cannot contain a backslash: ") + Pattern).str();
- return false;
- }
-
auto [It, _] = CurrentImpl->Entries[Prefix].try_emplace(
Category, UseGlobs, RemoveDotSlash && IsPath,
CanonicalizeSlashes && IsPath);
diff --git a/llvm/unittests/Support/SpecialCaseListTest.cpp b/llvm/unittests/Support/SpecialCaseListTest.cpp
index 31cecb8cb1688..508593e0bc380 100644
--- a/llvm/unittests/Support/SpecialCaseListTest.cpp
+++ b/llvm/unittests/Support/SpecialCaseListTest.cpp
@@ -416,42 +416,23 @@ TEST_F(SpecialCaseListTest, FileIdx) {
sys::fs::remove(Path);
}
-TEST_F(SpecialCaseListTest, PathPatternBackslashError) {
- std::string Error;
- // This should fail because 'src' pattern contains a double backslash.
- std::unique_ptr<MemoryBuffer> MB =
- MemoryBuffer::getMemBuffer("#!special-case-list-v4\n"
- "src:*foo\\\\baz\n");
- std::unique_ptr<SpecialCaseList> SCL =
- SpecialCaseList::create(MB.get(), Error);
- EXPECT_EQ(SCL, nullptr);
- EXPECT_THAT(Error, HasSubstr("pattern cannot contain a backslash"));
-
- // This should succeed because single backslash is allowed as escape.
- std::unique_ptr<SpecialCaseList> SCL2 =
- makeSpecialCaseList("src:*foo\\*baz\n", Error, 4);
- EXPECT_TRUE(SCL2 != nullptr) << Error;
-
- // This should succeed because 'fun' pattern is not a path pattern.
- std::unique_ptr<SpecialCaseList> SCL3 =
- makeSpecialCaseList("fun:hi\\\\bye=category\n", Error, 4);
- ASSERT_TRUE(SCL3 != nullptr);
- EXPECT_TRUE(SCL3->inSection("", "fun", "hi\\bye", "category"));
- EXPECT_FALSE(SCL3->inSection("", "fun", "hi/bye", "category"));
-}
-
#ifdef _WIN32
TEST_F(SpecialCaseListTest, CanonicalizePathsOnWindows) {
std::unique_ptr<SpecialCaseList> SCL =
- makeSpecialCaseList("src:*foo/bar*\n"
- "fun:hi\\\\bye=category\n",
- 4);
- ASSERT_TRUE(SCL != nullptr);
+ makeSpecialCaseList("#!special-case-list-v4\n"
+ "\n"
+ "src:*foo/bar*\n"
+ "src:*foo\\\\baz\n"
+ "fun:hi\\\\bye=category\n");
EXPECT_TRUE(SCL->inSection("", "src", "foo/bar"));
EXPECT_TRUE(SCL->inSection("", "src", "foo\\bar"));
+ // The baz pattern doesn't match because paths are canonicalized first
+ EXPECT_FALSE(SCL->inSection("", "src", "foo/baz"));
+ EXPECT_FALSE(SCL->inSection("", "src", "foo\\baz"));
+ // The canonicalization only applies to files
EXPECT_TRUE(SCL->inSection("", "fun", "hi\\bye", "category"));
- EXPECT_FALSE(SCL->inSection("", "fun", "hi/bye", "category"));
}
+
#endif
} // namespace
>From 448755d6dddc4ded565c91385e9f2d755c0b07c3 Mon Sep 17 00:00:00 2001
From: Vitaly Buka <vitalybuka at google.com>
Date: Tue, 9 Jun 2026 20:07:37 -0700
Subject: [PATCH 18/30] format
---
llvm/include/llvm/Support/GlobPattern.h | 9 ++-
llvm/lib/Support/GlobPattern.cpp | 67 +++++++++++++------
llvm/lib/Support/SpecialCaseList.cpp | 36 +++++-----
llvm/unittests/Support/GlobPatternTest.cpp | 62 +++++++++++++++++
.../unittests/Support/SpecialCaseListTest.cpp | 7 +-
5 files changed, 137 insertions(+), 44 deletions(-)
diff --git a/llvm/include/llvm/Support/GlobPattern.h b/llvm/include/llvm/Support/GlobPattern.h
index 1a6c0ab292ac8..ec3631ade518a 100644
--- a/llvm/include/llvm/Support/GlobPattern.h
+++ b/llvm/include/llvm/Support/GlobPattern.h
@@ -56,7 +56,8 @@ class GlobPattern {
/// created from expanding braces otherwise disable
/// brace expansion
LLVM_ABI static Expected<GlobPattern>
- create(StringRef Pat, std::optional<size_t> MaxSubPatterns = {});
+ create(StringRef Pat, std::optional<size_t> MaxSubPatterns = {},
+ bool SlashAgnostic = false);
/// \returns \p true if \p S matches this glob pattern
LLVM_ABI bool match(StringRef S) const;
@@ -87,12 +88,14 @@ class GlobPattern {
StringRef Pattern;
size_t PrefixSize = 0;
size_t SuffixSize = 0;
+ bool SlashAgnostic = false;
struct SubGlobPattern {
/// \param Pat the pattern to match against
- LLVM_ABI static Expected<SubGlobPattern> create(StringRef Pat);
+ LLVM_ABI static Expected<SubGlobPattern> create(StringRef Pat,
+ bool SlashAgnostic = false);
/// \returns \p true if \p S matches this glob pattern
- LLVM_ABI bool match(StringRef S) const;
+ LLVM_ABI bool match(StringRef S, bool SlashAgnostic) const;
StringRef getPat() const { return StringRef(Pat.data(), Pat.size()); }
// Brackets with their end position and matched bytes.
diff --git a/llvm/lib/Support/GlobPattern.cpp b/llvm/lib/Support/GlobPattern.cpp
index 2715229c65be1..4c050e6fd67f5 100644
--- a/llvm/lib/Support/GlobPattern.cpp
+++ b/llvm/lib/Support/GlobPattern.cpp
@@ -16,6 +16,11 @@
using namespace llvm;
+static constexpr char PrefixMetacharacters[] = "?*[{\\";
+static constexpr char SuffixMetacharacters[] = "?*[]{}\\";
+static constexpr char PrefixMetacharactersWithSlash[] = "?*[{\\/";
+static constexpr char SuffixMetacharactersWithSlash[] = "?*[]{}\\/";
+
// Expands character ranges and returns a bitmap.
// For example, "a-cf-hz" is expanded to "abcfghz".
static Expected<BitVector> expand(StringRef S, StringRef Original) {
@@ -132,10 +137,12 @@ parseBraceExpansions(StringRef S, std::optional<size_t> MaxSubPatterns) {
return std::move(SubPatterns);
}
-static StringRef maxPlainSubstring(StringRef S) {
+static StringRef maxPlainSubstring(StringRef S, bool SlashAgnostic) {
+ const char *Metas =
+ SlashAgnostic ? PrefixMetacharactersWithSlash : PrefixMetacharacters;
StringRef Best;
while (!S.empty()) {
- size_t PrefixSize = S.find_first_of("?*[{\\");
+ size_t PrefixSize = S.find_first_of(Metas);
if (PrefixSize == std::string::npos)
PrefixSize = S.size();
@@ -175,13 +182,20 @@ static StringRef maxPlainSubstring(StringRef S) {
return Best;
}
-Expected<GlobPattern>
-GlobPattern::create(StringRef S, std::optional<size_t> MaxSubPatterns) {
+Expected<GlobPattern> GlobPattern::create(StringRef S,
+ std::optional<size_t> MaxSubPatterns,
+ bool SlashAgnostic) {
GlobPattern Pat;
+ Pat.SlashAgnostic = SlashAgnostic;
Pat.Pattern = S;
+ const char *PrefixMetas =
+ SlashAgnostic ? PrefixMetacharactersWithSlash : PrefixMetacharacters;
+ const char *SuffixMetas =
+ SlashAgnostic ? SuffixMetacharactersWithSlash : SuffixMetacharacters;
+
// Store the prefix that does not contain any metacharacter.
- Pat.PrefixSize = S.find_first_of("?*[{\\");
+ Pat.PrefixSize = S.find_first_of(PrefixMetas);
if (Pat.PrefixSize == std::string::npos) {
Pat.PrefixSize = S.size();
return Pat;
@@ -189,7 +203,7 @@ GlobPattern::create(StringRef S, std::optional<size_t> MaxSubPatterns) {
S = S.substr(Pat.PrefixSize);
// Just in case we stop on unmatched opening brackets.
- size_t SuffixStart = S.find_last_of("?*[]{}\\");
+ size_t SuffixStart = S.find_last_of(SuffixMetas);
assert(SuffixStart != std::string::npos);
if (S[SuffixStart] == '\\')
++SuffixStart;
@@ -202,7 +216,7 @@ GlobPattern::create(StringRef S, std::optional<size_t> MaxSubPatterns) {
if (auto Err = parseBraceExpansions(S, MaxSubPatterns).moveInto(SubPats))
return std::move(Err);
for (StringRef SubPat : SubPats) {
- auto SubGlobOrErr = SubGlobPattern::create(SubPat);
+ auto SubGlobOrErr = SubGlobPattern::create(SubPat, SlashAgnostic);
if (!SubGlobOrErr)
return SubGlobOrErr.takeError();
Pat.SubGlobs.push_back(*SubGlobOrErr);
@@ -212,7 +226,7 @@ GlobPattern::create(StringRef S, std::optional<size_t> MaxSubPatterns) {
}
Expected<GlobPattern::SubGlobPattern>
-GlobPattern::SubGlobPattern::create(StringRef S) {
+GlobPattern::SubGlobPattern::create(StringRef S, bool SlashAgnostic) {
SubGlobPattern Pat;
// Parse brackets.
@@ -228,13 +242,20 @@ GlobPattern::SubGlobPattern::create(StringRef S) {
errc::invalid_argument);
StringRef Chars = S.substr(I, J - I);
bool Invert = S[I] == '^' || S[I] == '!';
- Expected<BitVector> BV =
+ Expected<BitVector> BVOrErr =
Invert ? expand(Chars.substr(1), S) : expand(Chars, S);
- if (!BV)
- return BV.takeError();
+ if (!BVOrErr)
+ return BVOrErr.takeError();
+ BitVector BV = std::move(*BVOrErr);
+ if (SlashAgnostic) {
+ if (BV['\\'] || BV['/']) {
+ BV.set('\\');
+ BV.set('/');
+ }
+ }
if (Invert)
- BV->flip();
- Pat.Brackets.push_back(Bracket{J + 1, std::move(*BV)});
+ BV.flip();
+ Pat.Brackets.push_back(Bracket{J + 1, std::move(BV)});
I = J;
} else if (S[I] == '\\') {
if (++I == E)
@@ -246,8 +267,8 @@ GlobPattern::SubGlobPattern::create(StringRef S) {
}
StringRef GlobPattern::longest_substr() const {
- return maxPlainSubstring(
- Pattern.drop_front(PrefixSize).drop_back(SuffixSize));
+ return maxPlainSubstring(Pattern.drop_front(PrefixSize).drop_back(SuffixSize),
+ SlashAgnostic);
}
bool GlobPattern::match(StringRef S) const {
@@ -258,15 +279,23 @@ bool GlobPattern::match(StringRef S) const {
if (SubGlobs.empty() && S.empty())
return true;
for (auto &Glob : SubGlobs)
- if (Glob.match(S))
+ if (Glob.match(S, SlashAgnostic))
return true;
return false;
}
+static bool matchChar(char PatC, char QueryC, bool SlashAgnostic) {
+ if (PatC == QueryC)
+ return true;
+ return SlashAgnostic && (PatC == '\\' || PatC == '/') &&
+ (QueryC == '\\' || QueryC == '/');
+}
+
// Factor the pattern into segments split by '*'. The segment is matched
// sequentianlly by finding the first occurrence past the end of the previous
// match.
-bool GlobPattern::SubGlobPattern::match(StringRef Str) const {
+bool GlobPattern::SubGlobPattern::match(StringRef Str,
+ bool SlashAgnostic) const {
const char *P = Pat.data(), *SegmentBegin = nullptr, *S = Str.data(),
*SavedS = S;
const char *const PEnd = P + Pat.size(), *const End = S + Str.size();
@@ -288,12 +317,12 @@ bool GlobPattern::SubGlobPattern::match(StringRef Str) const {
continue;
}
} else if (*P == '\\') {
- if (*++P == *S) {
+ if (matchChar(*++P, *S, SlashAgnostic)) {
++P;
++S;
continue;
}
- } else if (*P == *S || *P == '?') {
+ } else if (matchChar(*P, *S, SlashAgnostic) || *P == '?') {
++P;
++S;
continue;
diff --git a/llvm/lib/Support/SpecialCaseList.cpp b/llvm/lib/Support/SpecialCaseList.cpp
index d3ba7d0b2defb..1a51adb667601 100644
--- a/llvm/lib/Support/SpecialCaseList.cpp
+++ b/llvm/lib/Support/SpecialCaseList.cpp
@@ -43,7 +43,7 @@ namespace {
// Lagacy v1 matcher.
class RegexMatcher {
public:
- Error insert(StringRef Pattern, unsigned LineNumber);
+ Error insert(StringRef Pattern, unsigned LineNumber, bool SlashAgnostic);
unsigned match(StringRef Query) const;
private:
@@ -60,7 +60,7 @@ class RegexMatcher {
class GlobMatcher {
public:
- Error insert(StringRef Pattern, unsigned LineNumber);
+ Error insert(StringRef Pattern, unsigned LineNumber, bool SlashAgnostic);
unsigned match(StringRef Query) const;
private:
@@ -91,7 +91,7 @@ class GlobMatcher {
/// Represents a set of patterns and their line numbers
class Matcher {
public:
- Matcher(bool UseGlobs, bool RemoveDotSlash, bool CanonicalizeSlashes);
+ Matcher(bool UseGlobs, bool RemoveDotSlash, bool SlashAgnostic);
Error insert(StringRef Pattern, unsigned LineNumber);
unsigned match(StringRef Query) const;
@@ -100,10 +100,11 @@ class Matcher {
std::variant<RegexMatcher, GlobMatcher> M;
bool RemoveDotSlash;
- bool CanonicalizeSlashes;
+ bool SlashAgnostic;
};
-Error RegexMatcher::insert(StringRef Pattern, unsigned LineNumber) {
+Error RegexMatcher::insert(StringRef Pattern, unsigned LineNumber,
+ bool SlashAgnostic) {
if (Pattern.empty())
return createStringError(errc::invalid_argument,
"Supplied regex was blank");
@@ -134,11 +135,13 @@ unsigned RegexMatcher::match(StringRef Query) const {
return 0;
}
-Error GlobMatcher::insert(StringRef Pattern, unsigned LineNumber) {
+Error GlobMatcher::insert(StringRef Pattern, unsigned LineNumber,
+ bool SlashAgnostic) {
if (Pattern.empty())
return createStringError(errc::invalid_argument, "Supplied glob was blank");
- auto Res = GlobPattern::create(Pattern, /*MaxSubPatterns=*/1024);
+ auto Res =
+ GlobPattern::create(Pattern, /*MaxSubPatterns=*/1024, SlashAgnostic);
if (auto Err = Res.takeError())
return Err;
Globs.emplace_back(Pattern, LineNumber, std::move(Res.get()));
@@ -220,8 +223,8 @@ unsigned GlobMatcher::match(StringRef Query) const {
return Best < 0 ? 0 : Globs[Best].LineNo;
}
-Matcher::Matcher(bool UseGlobs, bool RemoveDotSlash, bool CanonicalizeSlashes)
- : RemoveDotSlash(RemoveDotSlash), CanonicalizeSlashes(CanonicalizeSlashes) {
+Matcher::Matcher(bool UseGlobs, bool RemoveDotSlash, bool SlashAgnostic)
+ : RemoveDotSlash(RemoveDotSlash), SlashAgnostic(SlashAgnostic) {
if (UseGlobs)
M.emplace<GlobMatcher>();
else
@@ -229,15 +232,11 @@ Matcher::Matcher(bool UseGlobs, bool RemoveDotSlash, bool CanonicalizeSlashes)
}
Error Matcher::insert(StringRef Pattern, unsigned LineNumber) {
- return std::visit([&](auto &V) { return V.insert(Pattern, LineNumber); }, M);
+ return std::visit(
+ [&](auto &V) { return V.insert(Pattern, LineNumber, SlashAgnostic); }, M);
}
unsigned Matcher::match(StringRef Query) const {
- std::string CanonicalizedQuery;
- if (CanonicalizeSlashes) {
- CanonicalizedQuery = llvm::sys::path::convert_to_slash(Query);
- Query = CanonicalizedQuery;
- }
if (RemoveDotSlash)
Query = llvm::sys::path::remove_leading_dotslash(Query);
return std::visit([&](auto &V) -> unsigned { return V.match(Query); }, M);
@@ -252,7 +251,7 @@ class SpecialCaseList::Section::SectionImpl {
explicit SectionImpl(bool UseGlobs)
: SectionMatcher(UseGlobs, /*RemoveDotSlash=*/false,
- /*CanonicalizeSlashes=*/false) {}
+ /*SlashAgnostic=*/false) {}
Matcher SectionMatcher;
SectionEntries Entries;
@@ -345,7 +344,7 @@ bool SpecialCaseList::parse(unsigned FileIdx, const MemoryBuffer *MB,
bool RemoveDotSlash = Version > 2;
- bool CanonicalizeSlashes = Version > 3 && llvm::sys::path::is_separator('\\');
+ bool SlashAgnostic = Version > 3 && llvm::sys::path::is_separator('\\');
auto ErrOrSection = addSection("*", FileIdx, 1, true);
if (auto Err = ErrOrSection.takeError()) {
@@ -395,8 +394,7 @@ bool SpecialCaseList::parse(unsigned FileIdx, const MemoryBuffer *MB,
auto [Pattern, Category] = Postfix.split("=");
bool IsPath = llvm::is_contained(PathPrefixes, Prefix);
auto [It, _] = CurrentImpl->Entries[Prefix].try_emplace(
- Category, UseGlobs, RemoveDotSlash && IsPath,
- CanonicalizeSlashes && IsPath);
+ Category, UseGlobs, RemoveDotSlash && IsPath, SlashAgnostic && IsPath);
Pattern = Pattern.copy(StrAlloc);
if (auto Err = It->second.insert(Pattern, LineNo)) {
Error =
diff --git a/llvm/unittests/Support/GlobPatternTest.cpp b/llvm/unittests/Support/GlobPatternTest.cpp
index 872a21e948d7a..35423e37a3ae0 100644
--- a/llvm/unittests/Support/GlobPatternTest.cpp
+++ b/llvm/unittests/Support/GlobPatternTest.cpp
@@ -327,6 +327,30 @@ TEST_F(GlobPatternTest, PrefixSuffix) {
ASSERT_TRUE((bool)Pat);
EXPECT_EQ("", Pat->prefix());
EXPECT_EQ("cd", Pat->suffix());
+
+ Pat = GlobPattern::create("ab/cd", /*MaxSubPatterns=*/{},
+ /*SlashAgnostic=*/true);
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("ab", Pat->prefix());
+ EXPECT_EQ("cd", Pat->suffix());
+
+ Pat = GlobPattern::create("ab\\cd", /*MaxSubPatterns=*/{},
+ /*SlashAgnostic=*/true);
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("ab", Pat->prefix());
+ EXPECT_EQ("d", Pat->suffix());
+
+ Pat = GlobPattern::create("ab/cd", /*MaxSubPatterns=*/{},
+ /*SlashAgnostic=*/false);
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("ab/cd", Pat->prefix());
+ EXPECT_EQ("", Pat->suffix());
+
+ Pat = GlobPattern::create("ab\\cd", /*MaxSubPatterns=*/{},
+ /*SlashAgnostic=*/false);
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("ab", Pat->prefix());
+ EXPECT_EQ("d", Pat->suffix());
}
TEST_F(GlobPatternTest, Substr) {
@@ -393,6 +417,26 @@ TEST_F(GlobPatternTest, Substr) {
Pat = GlobPattern::create("a*bcdef{g}*h");
ASSERT_TRUE((bool)Pat);
EXPECT_EQ("bcdef", Pat->longest_substr());
+
+ Pat = GlobPattern::create("a*bc/de*f", /*MaxSubPatterns=*/{},
+ /*SlashAgnostic=*/true);
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("bc", Pat->longest_substr());
+
+ Pat = GlobPattern::create("a*bc\\de*f", /*MaxSubPatterns=*/{},
+ /*SlashAgnostic=*/true);
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("bc", Pat->longest_substr());
+
+ Pat = GlobPattern::create("a*bc/de*f", /*MaxSubPatterns=*/{},
+ /*SlashAgnostic=*/false);
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("bc/de", Pat->longest_substr());
+
+ Pat = GlobPattern::create("a*bc\\de*f", /*MaxSubPatterns=*/{},
+ /*SlashAgnostic=*/false);
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("bc", Pat->longest_substr());
}
TEST_F(GlobPatternTest, Pathological) {
@@ -409,4 +453,22 @@ TEST_F(GlobPatternTest, Pathological) {
EXPECT_FALSE(Pat->match(S));
EXPECT_TRUE(Pat->match(S + 'b'));
}
+
+TEST_F(GlobPatternTest, SlashAgnosticMatch) {
+ auto Pat1 = GlobPattern::create("foo\\\\bar[a\\\\-z]", 1024,
+ /*SlashAgnostic=*/true);
+ ASSERT_TRUE((bool)Pat1);
+ EXPECT_TRUE(Pat1->match("foo/bar\\"));
+ EXPECT_TRUE(Pat1->match("foo/barb"));
+ EXPECT_TRUE(Pat1->match("foo/bar/"));
+}
+
+TEST_F(GlobPatternTest, SlashAgnosticMatchInverted) {
+ auto Pat = GlobPattern::create("foo\\\\bar[^a\\\\-z]", 1024,
+ /*SlashAgnostic=*/true);
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_FALSE(Pat->match("foo/bar/"));
+ EXPECT_FALSE(Pat->match("foo/barb"));
+ EXPECT_TRUE(Pat->match("foo/bar1"));
+}
}
diff --git a/llvm/unittests/Support/SpecialCaseListTest.cpp b/llvm/unittests/Support/SpecialCaseListTest.cpp
index 508593e0bc380..9a575616067a1 100644
--- a/llvm/unittests/Support/SpecialCaseListTest.cpp
+++ b/llvm/unittests/Support/SpecialCaseListTest.cpp
@@ -426,11 +426,12 @@ TEST_F(SpecialCaseListTest, CanonicalizePathsOnWindows) {
"fun:hi\\\\bye=category\n");
EXPECT_TRUE(SCL->inSection("", "src", "foo/bar"));
EXPECT_TRUE(SCL->inSection("", "src", "foo\\bar"));
- // The baz pattern doesn't match because paths are canonicalized first
- EXPECT_FALSE(SCL->inSection("", "src", "foo/baz"));
- EXPECT_FALSE(SCL->inSection("", "src", "foo\\baz"));
+ // The baz pattern matches because both paths and patterns are canonicalized
+ EXPECT_TRUE(SCL->inSection("", "src", "foo/baz"));
+ EXPECT_TRUE(SCL->inSection("", "src", "foo\\baz"));
// The canonicalization only applies to files
EXPECT_TRUE(SCL->inSection("", "fun", "hi\\bye", "category"));
+ EXPECT_FALSE(SCL->inSection("", "fun", "hi/bye", "category"));
}
#endif
>From bc5823abc916f8831bd6239dced77c1ed2214f0b Mon Sep 17 00:00:00 2001
From: Vitaly Buka <vitalybuka at google.com>
Date: Tue, 9 Jun 2026 20:10:41 -0700
Subject: [PATCH 19/30] Docs
---
clang/docs/ReleaseNotes.rst | 6 +++---
clang/docs/SanitizerSpecialCaseList.rst | 5 +++--
llvm/unittests/Support/SpecialCaseListTest.cpp | 6 +++---
3 files changed, 9 insertions(+), 8 deletions(-)
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 8a4dd7c3c87f0..4bcd92935031a 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -1000,9 +1000,9 @@ Sanitizers
- Sanitizer Special Case Lists (``-fsanitize-ignorelist``) and warning
suppression mappings (``--warning-suppression-mappings``) now recognize version
- 4 of the Special Case List format (indicated by ``#!special-case-list-v4``),
- which canonicalizes path separators by converting backslashes to forward slashes
- on Windows hosts.
+ 4 of the Special Case List format (indicated by ``#!special-case-list-v4``).
+ On Windows hosts, path matching is slash-agnostic (both forward slashes (``/``)
+ and backslashes (``\``) match either path separator in both patterns and paths).
Python Binding Changes
----------------------
diff --git a/clang/docs/SanitizerSpecialCaseList.rst b/clang/docs/SanitizerSpecialCaseList.rst
index 40fa19b37d93b..b816159dca735 100644
--- a/clang/docs/SanitizerSpecialCaseList.rst
+++ b/clang/docs/SanitizerSpecialCaseList.rst
@@ -238,8 +238,9 @@ tool-specific docs.
file separators, and writing patterns to match both variants can be
inconvenient.
- Starting with version 4 (indicated by ``#!special-case-list-v4``), path separators
- on Windows hosts are canonicalized by converting backslashes to forward slashes.
+ Starting with version 4 (indicated by ``#!special-case-list-v4``), path matching
+ on Windows hosts is slash-agnostic: both forward slashes (``/``) and backslashes
+ (``\``) match either path separator in both patterns and paths.
``mainfile`` is similar to applying ``-fno-sanitize=`` to a set of files but
does not need plumbing into the build system. This works well for internal
diff --git a/llvm/unittests/Support/SpecialCaseListTest.cpp b/llvm/unittests/Support/SpecialCaseListTest.cpp
index 9a575616067a1..32ef18550502a 100644
--- a/llvm/unittests/Support/SpecialCaseListTest.cpp
+++ b/llvm/unittests/Support/SpecialCaseListTest.cpp
@@ -417,7 +417,7 @@ TEST_F(SpecialCaseListTest, FileIdx) {
}
#ifdef _WIN32
-TEST_F(SpecialCaseListTest, CanonicalizePathsOnWindows) {
+TEST_F(SpecialCaseListTest, SlashAgnosticPathsOnWindows) {
std::unique_ptr<SpecialCaseList> SCL =
makeSpecialCaseList("#!special-case-list-v4\n"
"\n"
@@ -426,10 +426,10 @@ TEST_F(SpecialCaseListTest, CanonicalizePathsOnWindows) {
"fun:hi\\\\bye=category\n");
EXPECT_TRUE(SCL->inSection("", "src", "foo/bar"));
EXPECT_TRUE(SCL->inSection("", "src", "foo\\bar"));
- // The baz pattern matches because both paths and patterns are canonicalized
+ // The baz pattern matches because paths are matched slash-agnostically
EXPECT_TRUE(SCL->inSection("", "src", "foo/baz"));
EXPECT_TRUE(SCL->inSection("", "src", "foo\\baz"));
- // The canonicalization only applies to files
+ // Slash-agnostic matching only applies to files
EXPECT_TRUE(SCL->inSection("", "fun", "hi\\bye", "category"));
EXPECT_FALSE(SCL->inSection("", "fun", "hi/bye", "category"));
}
>From bee5a75972c20cd69f8da7d516887e9d65b18088 Mon Sep 17 00:00:00 2001
From: Vitaly Buka <vitalybuka at google.com>
Date: Tue, 9 Jun 2026 20:28:42 -0700
Subject: [PATCH 20/30] [NFC][Docs] Revert changes to llvm/docs/ReleaseNotes.md
These changes were originally added for the canonicalized paths design, which was abandoned in favor of slash-agnostic matching. The correct documentation is now in clang/docs/ReleaseNotes.rst.
Assisted-by: Gemini
---
llvm/docs/ReleaseNotes.md | 5 -----
1 file changed, 5 deletions(-)
diff --git a/llvm/docs/ReleaseNotes.md b/llvm/docs/ReleaseNotes.md
index 1e7460f8531a3..398e46214dc98 100644
--- a/llvm/docs/ReleaseNotes.md
+++ b/llvm/docs/ReleaseNotes.md
@@ -408,11 +408,6 @@ Makes programs 10x faster by doing Special New Thing.
* Add a random delay into ThreadSanitizer to help find rare thread interleavings.
-* [Sanitizer Special Case Lists](https://clang.llvm.org/docs/SanitizerSpecialCaseList.html)
- version 4 (indicated by ``#!special-case-list-v4``) now specifies that filename
- patterns should be matched against canonicalized paths, without leading dots or
- slashes and (on Windows only) without any backslashes.
-
### Other Changes
## External Open Source Projects Using LLVM {{env.config.release}}
>From 91036ab8309c31caad97a849da3e6ecdf14f4dc3 Mon Sep 17 00:00:00 2001
From: Vitaly Buka <vitalybuka at google.com>
Date: Tue, 9 Jun 2026 21:35:27 -0700
Subject: [PATCH 21/30] [NFC][Support] Refactor bracket parsing in GlobPattern
Extract BitVector from Expected before flipping to
prepare for future modifications.
Pull Request: https://github.com/llvm/llvm-project/pull/202848
---
llvm/lib/Support/GlobPattern.cpp | 14 ++++++++------
1 file changed, 8 insertions(+), 6 deletions(-)
diff --git a/llvm/lib/Support/GlobPattern.cpp b/llvm/lib/Support/GlobPattern.cpp
index 2715229c65be1..39552060ac406 100644
--- a/llvm/lib/Support/GlobPattern.cpp
+++ b/llvm/lib/Support/GlobPattern.cpp
@@ -228,13 +228,15 @@ GlobPattern::SubGlobPattern::create(StringRef S) {
errc::invalid_argument);
StringRef Chars = S.substr(I, J - I);
bool Invert = S[I] == '^' || S[I] == '!';
- Expected<BitVector> BV =
- Invert ? expand(Chars.substr(1), S) : expand(Chars, S);
- if (!BV)
- return BV.takeError();
if (Invert)
- BV->flip();
- Pat.Brackets.push_back(Bracket{J + 1, std::move(*BV)});
+ Chars = Chars.drop_front();
+ Expected<BitVector> BVOrErr = expand(Chars, S);
+ if (!BVOrErr)
+ return BVOrErr.takeError();
+ BitVector &BV = *BVOrErr;
+ if (Invert)
+ BV.flip();
+ Pat.Brackets.push_back(Bracket{J + 1, std::move(BV)});
I = J;
} else if (S[I] == '\\') {
if (++I == E)
>From 5f376916471b58f4e6e2d36dc523f9148b2dd113 Mon Sep 17 00:00:00 2001
From: Vitaly Buka <vitalybuka at google.com>
Date: Tue, 9 Jun 2026 21:54:24 -0700
Subject: [PATCH 22/30] [NFC][Support] Define Prefix/SuffixMetacharacters
constants
Extract literal metacharacter strings used in GlobPattern into static constexpr arrays to improve consistency and maintainability.
Assisted-by: Gemini
Pull Request: https://github.com/llvm/llvm-project/pull/202850
---
llvm/lib/Support/GlobPattern.cpp | 9 ++++++---
1 file changed, 6 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/Support/GlobPattern.cpp b/llvm/lib/Support/GlobPattern.cpp
index 39552060ac406..1aaddbb8408a3 100644
--- a/llvm/lib/Support/GlobPattern.cpp
+++ b/llvm/lib/Support/GlobPattern.cpp
@@ -16,6 +16,9 @@
using namespace llvm;
+static constexpr char PrefixMetacharacters[] = "?*[{\\";
+static constexpr char SuffixMetacharacters[] = "?*[]{}\\";
+
// Expands character ranges and returns a bitmap.
// For example, "a-cf-hz" is expanded to "abcfghz".
static Expected<BitVector> expand(StringRef S, StringRef Original) {
@@ -135,7 +138,7 @@ parseBraceExpansions(StringRef S, std::optional<size_t> MaxSubPatterns) {
static StringRef maxPlainSubstring(StringRef S) {
StringRef Best;
while (!S.empty()) {
- size_t PrefixSize = S.find_first_of("?*[{\\");
+ size_t PrefixSize = S.find_first_of(PrefixMetacharacters);
if (PrefixSize == std::string::npos)
PrefixSize = S.size();
@@ -181,7 +184,7 @@ GlobPattern::create(StringRef S, std::optional<size_t> MaxSubPatterns) {
Pat.Pattern = S;
// Store the prefix that does not contain any metacharacter.
- Pat.PrefixSize = S.find_first_of("?*[{\\");
+ Pat.PrefixSize = S.find_first_of(PrefixMetacharacters);
if (Pat.PrefixSize == std::string::npos) {
Pat.PrefixSize = S.size();
return Pat;
@@ -189,7 +192,7 @@ GlobPattern::create(StringRef S, std::optional<size_t> MaxSubPatterns) {
S = S.substr(Pat.PrefixSize);
// Just in case we stop on unmatched opening brackets.
- size_t SuffixStart = S.find_last_of("?*[]{}\\");
+ size_t SuffixStart = S.find_last_of(SuffixMetacharacters);
assert(SuffixStart != std::string::npos);
if (S[SuffixStart] == '\\')
++SuffixStart;
>From 9eb8db5e195411e95fff114fc221f61ae7fc859c Mon Sep 17 00:00:00 2001
From: Vitaly Buka <vitalybuka at google.com>
Date: Tue, 9 Jun 2026 22:07:08 -0700
Subject: [PATCH 23/30] [Support] Implement slash-agnostic path matching in
GlobPattern
Add a SlashAgnostic option to GlobPattern to allow matching path separators
(both forward slashes and backslashes) agnostically.
When enabled:
- We conservatively reduce the plain prefix and suffix by treating path
separators as metacharacters. This ensures that path separators are
matched via the slash-agnostic state machine rather than plain string
comparison.
- Brackets containing slashes are adjusted to match both separators.
- Character comparisons in the state machine (matchChar) treat '/' and
'\' as equivalent.
Co-authored-by: Devon Loehr <DKLoehr at users.noreply.github.com>
Assisted-by: Gemini
---
llvm/include/llvm/Support/GlobPattern.h | 9 ++--
llvm/lib/Support/GlobPattern.cpp | 51 +++++++++++++-----
llvm/unittests/Support/GlobPatternTest.cpp | 62 ++++++++++++++++++++++
3 files changed, 106 insertions(+), 16 deletions(-)
diff --git a/llvm/include/llvm/Support/GlobPattern.h b/llvm/include/llvm/Support/GlobPattern.h
index 1a6c0ab292ac8..47aa7a1cc74c9 100644
--- a/llvm/include/llvm/Support/GlobPattern.h
+++ b/llvm/include/llvm/Support/GlobPattern.h
@@ -56,7 +56,8 @@ class GlobPattern {
/// created from expanding braces otherwise disable
/// brace expansion
LLVM_ABI static Expected<GlobPattern>
- create(StringRef Pat, std::optional<size_t> MaxSubPatterns = {});
+ create(StringRef Pat, std::optional<size_t> MaxSubPatterns = {},
+ bool SlashAgnostic = false);
/// \returns \p true if \p S matches this glob pattern
LLVM_ABI bool match(StringRef S) const;
@@ -87,12 +88,14 @@ class GlobPattern {
StringRef Pattern;
size_t PrefixSize = 0;
size_t SuffixSize = 0;
+ bool SlashAgnostic = false;
struct SubGlobPattern {
/// \param Pat the pattern to match against
- LLVM_ABI static Expected<SubGlobPattern> create(StringRef Pat);
+ LLVM_ABI static Expected<SubGlobPattern> create(
+ StringRef Pat, bool SlashAgnostic = false);
/// \returns \p true if \p S matches this glob pattern
- LLVM_ABI bool match(StringRef S) const;
+ LLVM_ABI bool match(StringRef S, bool SlashAgnostic) const;
StringRef getPat() const { return StringRef(Pat.data(), Pat.size()); }
// Brackets with their end position and matched bytes.
diff --git a/llvm/lib/Support/GlobPattern.cpp b/llvm/lib/Support/GlobPattern.cpp
index 1aaddbb8408a3..62cc6f16eaf33 100644
--- a/llvm/lib/Support/GlobPattern.cpp
+++ b/llvm/lib/Support/GlobPattern.cpp
@@ -18,6 +18,8 @@ using namespace llvm;
static constexpr char PrefixMetacharacters[] = "?*[{\\";
static constexpr char SuffixMetacharacters[] = "?*[]{}\\";
+static constexpr char PrefixMetacharactersWithSlash[] = "?*[{\\/";
+static constexpr char SuffixMetacharactersWithSlash[] = "?*[]{}\\/";
// Expands character ranges and returns a bitmap.
// For example, "a-cf-hz" is expanded to "abcfghz".
@@ -135,10 +137,12 @@ parseBraceExpansions(StringRef S, std::optional<size_t> MaxSubPatterns) {
return std::move(SubPatterns);
}
-static StringRef maxPlainSubstring(StringRef S) {
+static StringRef maxPlainSubstring(StringRef S, bool SlashAgnostic) {
StringRef Best;
while (!S.empty()) {
- size_t PrefixSize = S.find_first_of(PrefixMetacharacters);
+ const char *Metas =
+ SlashAgnostic ? PrefixMetacharactersWithSlash : PrefixMetacharacters;
+ size_t PrefixSize = S.find_first_of(Metas);
if (PrefixSize == std::string::npos)
PrefixSize = S.size();
@@ -179,12 +183,19 @@ static StringRef maxPlainSubstring(StringRef S) {
}
Expected<GlobPattern>
-GlobPattern::create(StringRef S, std::optional<size_t> MaxSubPatterns) {
+GlobPattern::create(StringRef S, std::optional<size_t> MaxSubPatterns,
+ bool SlashAgnostic) {
GlobPattern Pat;
+ Pat.SlashAgnostic = SlashAgnostic;
Pat.Pattern = S;
+ const char *PrefixMetas =
+ SlashAgnostic ? PrefixMetacharactersWithSlash : PrefixMetacharacters;
+ const char *SuffixMetas =
+ SlashAgnostic ? SuffixMetacharactersWithSlash : SuffixMetacharacters;
+
// Store the prefix that does not contain any metacharacter.
- Pat.PrefixSize = S.find_first_of(PrefixMetacharacters);
+ Pat.PrefixSize = S.find_first_of(PrefixMetas);
if (Pat.PrefixSize == std::string::npos) {
Pat.PrefixSize = S.size();
return Pat;
@@ -192,7 +203,7 @@ GlobPattern::create(StringRef S, std::optional<size_t> MaxSubPatterns) {
S = S.substr(Pat.PrefixSize);
// Just in case we stop on unmatched opening brackets.
- size_t SuffixStart = S.find_last_of(SuffixMetacharacters);
+ size_t SuffixStart = S.find_last_of(SuffixMetas);
assert(SuffixStart != std::string::npos);
if (S[SuffixStart] == '\\')
++SuffixStart;
@@ -205,7 +216,7 @@ GlobPattern::create(StringRef S, std::optional<size_t> MaxSubPatterns) {
if (auto Err = parseBraceExpansions(S, MaxSubPatterns).moveInto(SubPats))
return std::move(Err);
for (StringRef SubPat : SubPats) {
- auto SubGlobOrErr = SubGlobPattern::create(SubPat);
+ auto SubGlobOrErr = SubGlobPattern::create(SubPat, SlashAgnostic);
if (!SubGlobOrErr)
return SubGlobOrErr.takeError();
Pat.SubGlobs.push_back(*SubGlobOrErr);
@@ -215,7 +226,7 @@ GlobPattern::create(StringRef S, std::optional<size_t> MaxSubPatterns) {
}
Expected<GlobPattern::SubGlobPattern>
-GlobPattern::SubGlobPattern::create(StringRef S) {
+GlobPattern::SubGlobPattern::create(StringRef S, bool SlashAgnostic) {
SubGlobPattern Pat;
// Parse brackets.
@@ -237,6 +248,12 @@ GlobPattern::SubGlobPattern::create(StringRef S) {
if (!BVOrErr)
return BVOrErr.takeError();
BitVector &BV = *BVOrErr;
+ if (SlashAgnostic) {
+ if (BV['\\'] || BV['/']) {
+ BV.set('\\');
+ BV.set('/');
+ }
+ }
if (Invert)
BV.flip();
Pat.Brackets.push_back(Bracket{J + 1, std::move(BV)});
@@ -251,8 +268,8 @@ GlobPattern::SubGlobPattern::create(StringRef S) {
}
StringRef GlobPattern::longest_substr() const {
- return maxPlainSubstring(
- Pattern.drop_front(PrefixSize).drop_back(SuffixSize));
+ return maxPlainSubstring(Pattern.drop_front(PrefixSize).drop_back(SuffixSize),
+ SlashAgnostic);
}
bool GlobPattern::match(StringRef S) const {
@@ -263,15 +280,23 @@ bool GlobPattern::match(StringRef S) const {
if (SubGlobs.empty() && S.empty())
return true;
for (auto &Glob : SubGlobs)
- if (Glob.match(S))
+ if (Glob.match(S, SlashAgnostic))
return true;
return false;
}
+static bool matchChar(char PatC, char QueryC, bool SlashAgnostic) {
+ if (PatC == QueryC)
+ return true;
+ return SlashAgnostic && (PatC == '\\' || PatC == '/') &&
+ (QueryC == '\\' || QueryC == '/');
+}
+
// Factor the pattern into segments split by '*'. The segment is matched
// sequentianlly by finding the first occurrence past the end of the previous
// match.
-bool GlobPattern::SubGlobPattern::match(StringRef Str) const {
+bool GlobPattern::SubGlobPattern::match(StringRef Str,
+ bool SlashAgnostic) const {
const char *P = Pat.data(), *SegmentBegin = nullptr, *S = Str.data(),
*SavedS = S;
const char *const PEnd = P + Pat.size(), *const End = S + Str.size();
@@ -293,12 +318,12 @@ bool GlobPattern::SubGlobPattern::match(StringRef Str) const {
continue;
}
} else if (*P == '\\') {
- if (*++P == *S) {
+ if (matchChar(*++P, *S, SlashAgnostic)) {
++P;
++S;
continue;
}
- } else if (*P == *S || *P == '?') {
+ } else if (matchChar(*P, *S, SlashAgnostic) || *P == '?') {
++P;
++S;
continue;
diff --git a/llvm/unittests/Support/GlobPatternTest.cpp b/llvm/unittests/Support/GlobPatternTest.cpp
index 872a21e948d7a..35423e37a3ae0 100644
--- a/llvm/unittests/Support/GlobPatternTest.cpp
+++ b/llvm/unittests/Support/GlobPatternTest.cpp
@@ -327,6 +327,30 @@ TEST_F(GlobPatternTest, PrefixSuffix) {
ASSERT_TRUE((bool)Pat);
EXPECT_EQ("", Pat->prefix());
EXPECT_EQ("cd", Pat->suffix());
+
+ Pat = GlobPattern::create("ab/cd", /*MaxSubPatterns=*/{},
+ /*SlashAgnostic=*/true);
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("ab", Pat->prefix());
+ EXPECT_EQ("cd", Pat->suffix());
+
+ Pat = GlobPattern::create("ab\\cd", /*MaxSubPatterns=*/{},
+ /*SlashAgnostic=*/true);
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("ab", Pat->prefix());
+ EXPECT_EQ("d", Pat->suffix());
+
+ Pat = GlobPattern::create("ab/cd", /*MaxSubPatterns=*/{},
+ /*SlashAgnostic=*/false);
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("ab/cd", Pat->prefix());
+ EXPECT_EQ("", Pat->suffix());
+
+ Pat = GlobPattern::create("ab\\cd", /*MaxSubPatterns=*/{},
+ /*SlashAgnostic=*/false);
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("ab", Pat->prefix());
+ EXPECT_EQ("d", Pat->suffix());
}
TEST_F(GlobPatternTest, Substr) {
@@ -393,6 +417,26 @@ TEST_F(GlobPatternTest, Substr) {
Pat = GlobPattern::create("a*bcdef{g}*h");
ASSERT_TRUE((bool)Pat);
EXPECT_EQ("bcdef", Pat->longest_substr());
+
+ Pat = GlobPattern::create("a*bc/de*f", /*MaxSubPatterns=*/{},
+ /*SlashAgnostic=*/true);
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("bc", Pat->longest_substr());
+
+ Pat = GlobPattern::create("a*bc\\de*f", /*MaxSubPatterns=*/{},
+ /*SlashAgnostic=*/true);
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("bc", Pat->longest_substr());
+
+ Pat = GlobPattern::create("a*bc/de*f", /*MaxSubPatterns=*/{},
+ /*SlashAgnostic=*/false);
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("bc/de", Pat->longest_substr());
+
+ Pat = GlobPattern::create("a*bc\\de*f", /*MaxSubPatterns=*/{},
+ /*SlashAgnostic=*/false);
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("bc", Pat->longest_substr());
}
TEST_F(GlobPatternTest, Pathological) {
@@ -409,4 +453,22 @@ TEST_F(GlobPatternTest, Pathological) {
EXPECT_FALSE(Pat->match(S));
EXPECT_TRUE(Pat->match(S + 'b'));
}
+
+TEST_F(GlobPatternTest, SlashAgnosticMatch) {
+ auto Pat1 = GlobPattern::create("foo\\\\bar[a\\\\-z]", 1024,
+ /*SlashAgnostic=*/true);
+ ASSERT_TRUE((bool)Pat1);
+ EXPECT_TRUE(Pat1->match("foo/bar\\"));
+ EXPECT_TRUE(Pat1->match("foo/barb"));
+ EXPECT_TRUE(Pat1->match("foo/bar/"));
+}
+
+TEST_F(GlobPatternTest, SlashAgnosticMatchInverted) {
+ auto Pat = GlobPattern::create("foo\\\\bar[^a\\\\-z]", 1024,
+ /*SlashAgnostic=*/true);
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_FALSE(Pat->match("foo/bar/"));
+ EXPECT_FALSE(Pat->match("foo/barb"));
+ EXPECT_TRUE(Pat->match("foo/bar1"));
+}
}
>From 255b45fe01d01f55f8ca0ced9ef44e36a2394645 Mon Sep 17 00:00:00 2001
From: Vitaly Buka <vitalybuka at google.com>
Date: Tue, 9 Jun 2026 22:15:19 -0700
Subject: [PATCH 24/30] fixed
---
clang/lib/Basic/Diagnostic.cpp | 3 ++-
llvm/include/llvm/Support/GlobPattern.h | 4 ++--
llvm/lib/Support/GlobPattern.cpp | 13 ++-----------
3 files changed, 6 insertions(+), 14 deletions(-)
diff --git a/clang/lib/Basic/Diagnostic.cpp b/clang/lib/Basic/Diagnostic.cpp
index 348c9927e87b9..4802478c379bb 100644
--- a/clang/lib/Basic/Diagnostic.cpp
+++ b/clang/lib/Basic/Diagnostic.cpp
@@ -585,7 +585,8 @@ bool WarningsSpecialCaseList::isDiagSuppressed(diag::kind DiagId,
if (!DiagSection)
return false;
- StringRef F = PLoc.getFilename();
+ StringRef F = llvm::sys::path::remove_leading_dotslash(PLoc.getFilename());
+
unsigned LastSup = DiagSection->getLastMatch("src", F, "");
if (LastSup == 0)
return false;
diff --git a/llvm/include/llvm/Support/GlobPattern.h b/llvm/include/llvm/Support/GlobPattern.h
index 47aa7a1cc74c9..ec3631ade518a 100644
--- a/llvm/include/llvm/Support/GlobPattern.h
+++ b/llvm/include/llvm/Support/GlobPattern.h
@@ -92,8 +92,8 @@ class GlobPattern {
struct SubGlobPattern {
/// \param Pat the pattern to match against
- LLVM_ABI static Expected<SubGlobPattern> create(
- StringRef Pat, bool SlashAgnostic = false);
+ LLVM_ABI static Expected<SubGlobPattern> create(StringRef Pat,
+ bool SlashAgnostic = false);
/// \returns \p true if \p S matches this glob pattern
LLVM_ABI bool match(StringRef S, bool SlashAgnostic) const;
StringRef getPat() const { return StringRef(Pat.data(), Pat.size()); }
diff --git a/llvm/lib/Support/GlobPattern.cpp b/llvm/lib/Support/GlobPattern.cpp
index d08ba021b8fac..06f56bcd52407 100644
--- a/llvm/lib/Support/GlobPattern.cpp
+++ b/llvm/lib/Support/GlobPattern.cpp
@@ -138,8 +138,6 @@ parseBraceExpansions(StringRef S, std::optional<size_t> MaxSubPatterns) {
}
static StringRef maxPlainSubstring(StringRef S, bool SlashAgnostic) {
- const char *Metas =
- SlashAgnostic ? PrefixMetacharactersWithSlash : PrefixMetacharacters;
StringRef Best;
while (!S.empty()) {
const char *Metas =
@@ -185,8 +183,8 @@ static StringRef maxPlainSubstring(StringRef S, bool SlashAgnostic) {
}
Expected<GlobPattern> GlobPattern::create(StringRef S,
-GlobPattern::create(StringRef S, std::optional<size_t> MaxSubPatterns,
- bool SlashAgnostic) {
+ std::optional<size_t> MaxSubPatterns,
+ bool SlashAgnostic) {
GlobPattern Pat;
Pat.SlashAgnostic = SlashAgnostic;
Pat.Pattern = S;
@@ -244,13 +242,6 @@ GlobPattern::SubGlobPattern::create(StringRef S, bool SlashAgnostic) {
errc::invalid_argument);
StringRef Chars = S.substr(I, J - I);
bool Invert = S[I] == '^' || S[I] == '!';
- BitVector BV = std::move(*BVOrErr);
- if (SlashAgnostic) {
- if (BV['\\'] || BV['/']) {
- BV.set('\\');
- BV.set('/');
- }
- }
if (Invert)
Chars = Chars.drop_front();
Expected<BitVector> BVOrErr = expand(Chars, S);
>From 4d4eb1d100996aaf8a4301b468237d0d1795d3b5 Mon Sep 17 00:00:00 2001
From: Vitaly Buka <vitalybuka at google.com>
Date: Tue, 9 Jun 2026 22:07:08 -0700
Subject: [PATCH 25/30] [Support] Implement slash-agnostic path matching in
GlobPattern
Add a SlashAgnostic option to GlobPattern to allow matching path separators
(both forward slashes and backslashes) agnostically.
When enabled:
- We conservatively reduce the plain prefix and suffix by treating path
separators as metacharacters. This ensures that path separators are
matched via the slash-agnostic state machine rather than plain string
comparison.
- Brackets containing slashes are adjusted to match both separators.
- Character comparisons in the state machine (matchChar) treat '/' and
'\' as equivalent.
Co-authored-by: Devon Loehr <DKLoehr at users.noreply.github.com>
Assisted-by: Gemini
Pull Request: https://github.com/llvm/llvm-project/pull/202855
---
llvm/include/llvm/Support/GlobPattern.h | 9 ++--
llvm/lib/Support/GlobPattern.cpp | 53 +++++++++++++-----
llvm/unittests/Support/GlobPatternTest.cpp | 62 ++++++++++++++++++++++
3 files changed, 107 insertions(+), 17 deletions(-)
diff --git a/llvm/include/llvm/Support/GlobPattern.h b/llvm/include/llvm/Support/GlobPattern.h
index 1a6c0ab292ac8..ec3631ade518a 100644
--- a/llvm/include/llvm/Support/GlobPattern.h
+++ b/llvm/include/llvm/Support/GlobPattern.h
@@ -56,7 +56,8 @@ class GlobPattern {
/// created from expanding braces otherwise disable
/// brace expansion
LLVM_ABI static Expected<GlobPattern>
- create(StringRef Pat, std::optional<size_t> MaxSubPatterns = {});
+ create(StringRef Pat, std::optional<size_t> MaxSubPatterns = {},
+ bool SlashAgnostic = false);
/// \returns \p true if \p S matches this glob pattern
LLVM_ABI bool match(StringRef S) const;
@@ -87,12 +88,14 @@ class GlobPattern {
StringRef Pattern;
size_t PrefixSize = 0;
size_t SuffixSize = 0;
+ bool SlashAgnostic = false;
struct SubGlobPattern {
/// \param Pat the pattern to match against
- LLVM_ABI static Expected<SubGlobPattern> create(StringRef Pat);
+ LLVM_ABI static Expected<SubGlobPattern> create(StringRef Pat,
+ bool SlashAgnostic = false);
/// \returns \p true if \p S matches this glob pattern
- LLVM_ABI bool match(StringRef S) const;
+ LLVM_ABI bool match(StringRef S, bool SlashAgnostic) const;
StringRef getPat() const { return StringRef(Pat.data(), Pat.size()); }
// Brackets with their end position and matched bytes.
diff --git a/llvm/lib/Support/GlobPattern.cpp b/llvm/lib/Support/GlobPattern.cpp
index 1aaddbb8408a3..e518c1df3d13d 100644
--- a/llvm/lib/Support/GlobPattern.cpp
+++ b/llvm/lib/Support/GlobPattern.cpp
@@ -18,6 +18,8 @@ using namespace llvm;
static constexpr char PrefixMetacharacters[] = "?*[{\\";
static constexpr char SuffixMetacharacters[] = "?*[]{}\\";
+static constexpr char PrefixMetacharactersWithSlash[] = "?*[{\\/";
+static constexpr char SuffixMetacharactersWithSlash[] = "?*[]{}\\/";
// Expands character ranges and returns a bitmap.
// For example, "a-cf-hz" is expanded to "abcfghz".
@@ -135,10 +137,12 @@ parseBraceExpansions(StringRef S, std::optional<size_t> MaxSubPatterns) {
return std::move(SubPatterns);
}
-static StringRef maxPlainSubstring(StringRef S) {
+static StringRef maxPlainSubstring(StringRef S, bool SlashAgnostic) {
+ const char *Metas =
+ SlashAgnostic ? PrefixMetacharactersWithSlash : PrefixMetacharacters;
StringRef Best;
while (!S.empty()) {
- size_t PrefixSize = S.find_first_of(PrefixMetacharacters);
+ size_t PrefixSize = S.find_first_of(Metas);
if (PrefixSize == std::string::npos)
PrefixSize = S.size();
@@ -178,13 +182,20 @@ static StringRef maxPlainSubstring(StringRef S) {
return Best;
}
-Expected<GlobPattern>
-GlobPattern::create(StringRef S, std::optional<size_t> MaxSubPatterns) {
+Expected<GlobPattern> GlobPattern::create(StringRef S,
+ std::optional<size_t> MaxSubPatterns,
+ bool SlashAgnostic) {
GlobPattern Pat;
+ Pat.SlashAgnostic = SlashAgnostic;
Pat.Pattern = S;
+ const char *PrefixMetas =
+ SlashAgnostic ? PrefixMetacharactersWithSlash : PrefixMetacharacters;
+ const char *SuffixMetas =
+ SlashAgnostic ? SuffixMetacharactersWithSlash : SuffixMetacharacters;
+
// Store the prefix that does not contain any metacharacter.
- Pat.PrefixSize = S.find_first_of(PrefixMetacharacters);
+ Pat.PrefixSize = S.find_first_of(PrefixMetas);
if (Pat.PrefixSize == std::string::npos) {
Pat.PrefixSize = S.size();
return Pat;
@@ -192,7 +203,7 @@ GlobPattern::create(StringRef S, std::optional<size_t> MaxSubPatterns) {
S = S.substr(Pat.PrefixSize);
// Just in case we stop on unmatched opening brackets.
- size_t SuffixStart = S.find_last_of(SuffixMetacharacters);
+ size_t SuffixStart = S.find_last_of(SuffixMetas);
assert(SuffixStart != std::string::npos);
if (S[SuffixStart] == '\\')
++SuffixStart;
@@ -205,7 +216,7 @@ GlobPattern::create(StringRef S, std::optional<size_t> MaxSubPatterns) {
if (auto Err = parseBraceExpansions(S, MaxSubPatterns).moveInto(SubPats))
return std::move(Err);
for (StringRef SubPat : SubPats) {
- auto SubGlobOrErr = SubGlobPattern::create(SubPat);
+ auto SubGlobOrErr = SubGlobPattern::create(SubPat, SlashAgnostic);
if (!SubGlobOrErr)
return SubGlobOrErr.takeError();
Pat.SubGlobs.push_back(*SubGlobOrErr);
@@ -215,7 +226,7 @@ GlobPattern::create(StringRef S, std::optional<size_t> MaxSubPatterns) {
}
Expected<GlobPattern::SubGlobPattern>
-GlobPattern::SubGlobPattern::create(StringRef S) {
+GlobPattern::SubGlobPattern::create(StringRef S, bool SlashAgnostic) {
SubGlobPattern Pat;
// Parse brackets.
@@ -237,6 +248,12 @@ GlobPattern::SubGlobPattern::create(StringRef S) {
if (!BVOrErr)
return BVOrErr.takeError();
BitVector &BV = *BVOrErr;
+ if (SlashAgnostic) {
+ if (BV['\\'] || BV['/']) {
+ BV.set('\\');
+ BV.set('/');
+ }
+ }
if (Invert)
BV.flip();
Pat.Brackets.push_back(Bracket{J + 1, std::move(BV)});
@@ -251,8 +268,8 @@ GlobPattern::SubGlobPattern::create(StringRef S) {
}
StringRef GlobPattern::longest_substr() const {
- return maxPlainSubstring(
- Pattern.drop_front(PrefixSize).drop_back(SuffixSize));
+ return maxPlainSubstring(Pattern.drop_front(PrefixSize).drop_back(SuffixSize),
+ SlashAgnostic);
}
bool GlobPattern::match(StringRef S) const {
@@ -263,15 +280,23 @@ bool GlobPattern::match(StringRef S) const {
if (SubGlobs.empty() && S.empty())
return true;
for (auto &Glob : SubGlobs)
- if (Glob.match(S))
+ if (Glob.match(S, SlashAgnostic))
return true;
return false;
}
+static bool matchChar(char PatC, char QueryC, bool SlashAgnostic) {
+ if (PatC == QueryC)
+ return true;
+ return SlashAgnostic && (PatC == '\\' || PatC == '/') &&
+ (QueryC == '\\' || QueryC == '/');
+}
+
// Factor the pattern into segments split by '*'. The segment is matched
// sequentianlly by finding the first occurrence past the end of the previous
// match.
-bool GlobPattern::SubGlobPattern::match(StringRef Str) const {
+bool GlobPattern::SubGlobPattern::match(StringRef Str,
+ bool SlashAgnostic) const {
const char *P = Pat.data(), *SegmentBegin = nullptr, *S = Str.data(),
*SavedS = S;
const char *const PEnd = P + Pat.size(), *const End = S + Str.size();
@@ -293,12 +318,12 @@ bool GlobPattern::SubGlobPattern::match(StringRef Str) const {
continue;
}
} else if (*P == '\\') {
- if (*++P == *S) {
+ if (matchChar(*++P, *S, SlashAgnostic)) {
++P;
++S;
continue;
}
- } else if (*P == *S || *P == '?') {
+ } else if (matchChar(*P, *S, SlashAgnostic) || *P == '?') {
++P;
++S;
continue;
diff --git a/llvm/unittests/Support/GlobPatternTest.cpp b/llvm/unittests/Support/GlobPatternTest.cpp
index 872a21e948d7a..35423e37a3ae0 100644
--- a/llvm/unittests/Support/GlobPatternTest.cpp
+++ b/llvm/unittests/Support/GlobPatternTest.cpp
@@ -327,6 +327,30 @@ TEST_F(GlobPatternTest, PrefixSuffix) {
ASSERT_TRUE((bool)Pat);
EXPECT_EQ("", Pat->prefix());
EXPECT_EQ("cd", Pat->suffix());
+
+ Pat = GlobPattern::create("ab/cd", /*MaxSubPatterns=*/{},
+ /*SlashAgnostic=*/true);
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("ab", Pat->prefix());
+ EXPECT_EQ("cd", Pat->suffix());
+
+ Pat = GlobPattern::create("ab\\cd", /*MaxSubPatterns=*/{},
+ /*SlashAgnostic=*/true);
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("ab", Pat->prefix());
+ EXPECT_EQ("d", Pat->suffix());
+
+ Pat = GlobPattern::create("ab/cd", /*MaxSubPatterns=*/{},
+ /*SlashAgnostic=*/false);
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("ab/cd", Pat->prefix());
+ EXPECT_EQ("", Pat->suffix());
+
+ Pat = GlobPattern::create("ab\\cd", /*MaxSubPatterns=*/{},
+ /*SlashAgnostic=*/false);
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("ab", Pat->prefix());
+ EXPECT_EQ("d", Pat->suffix());
}
TEST_F(GlobPatternTest, Substr) {
@@ -393,6 +417,26 @@ TEST_F(GlobPatternTest, Substr) {
Pat = GlobPattern::create("a*bcdef{g}*h");
ASSERT_TRUE((bool)Pat);
EXPECT_EQ("bcdef", Pat->longest_substr());
+
+ Pat = GlobPattern::create("a*bc/de*f", /*MaxSubPatterns=*/{},
+ /*SlashAgnostic=*/true);
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("bc", Pat->longest_substr());
+
+ Pat = GlobPattern::create("a*bc\\de*f", /*MaxSubPatterns=*/{},
+ /*SlashAgnostic=*/true);
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("bc", Pat->longest_substr());
+
+ Pat = GlobPattern::create("a*bc/de*f", /*MaxSubPatterns=*/{},
+ /*SlashAgnostic=*/false);
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("bc/de", Pat->longest_substr());
+
+ Pat = GlobPattern::create("a*bc\\de*f", /*MaxSubPatterns=*/{},
+ /*SlashAgnostic=*/false);
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("bc", Pat->longest_substr());
}
TEST_F(GlobPatternTest, Pathological) {
@@ -409,4 +453,22 @@ TEST_F(GlobPatternTest, Pathological) {
EXPECT_FALSE(Pat->match(S));
EXPECT_TRUE(Pat->match(S + 'b'));
}
+
+TEST_F(GlobPatternTest, SlashAgnosticMatch) {
+ auto Pat1 = GlobPattern::create("foo\\\\bar[a\\\\-z]", 1024,
+ /*SlashAgnostic=*/true);
+ ASSERT_TRUE((bool)Pat1);
+ EXPECT_TRUE(Pat1->match("foo/bar\\"));
+ EXPECT_TRUE(Pat1->match("foo/barb"));
+ EXPECT_TRUE(Pat1->match("foo/bar/"));
+}
+
+TEST_F(GlobPatternTest, SlashAgnosticMatchInverted) {
+ auto Pat = GlobPattern::create("foo\\\\bar[^a\\\\-z]", 1024,
+ /*SlashAgnostic=*/true);
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_FALSE(Pat->match("foo/bar/"));
+ EXPECT_FALSE(Pat->match("foo/barb"));
+ EXPECT_TRUE(Pat->match("foo/bar1"));
+}
}
>From ce4a9c4c0190ea46c8c31261342b06319085d6bc Mon Sep 17 00:00:00 2001
From: Vitaly Buka <vitalybuka at google.com>
Date: Tue, 9 Jun 2026 21:35:27 -0700
Subject: [PATCH 26/30] [NFC][Support] Refactor bracket parsing in GlobPattern
Extract BitVector from Expected before flipping to
prepare for future modifications.
Pull Request: https://github.com/llvm/llvm-project/pull/202848
---
llvm/lib/Support/GlobPattern.cpp | 14 ++++++++------
1 file changed, 8 insertions(+), 6 deletions(-)
diff --git a/llvm/lib/Support/GlobPattern.cpp b/llvm/lib/Support/GlobPattern.cpp
index 2715229c65be1..39552060ac406 100644
--- a/llvm/lib/Support/GlobPattern.cpp
+++ b/llvm/lib/Support/GlobPattern.cpp
@@ -228,13 +228,15 @@ GlobPattern::SubGlobPattern::create(StringRef S) {
errc::invalid_argument);
StringRef Chars = S.substr(I, J - I);
bool Invert = S[I] == '^' || S[I] == '!';
- Expected<BitVector> BV =
- Invert ? expand(Chars.substr(1), S) : expand(Chars, S);
- if (!BV)
- return BV.takeError();
if (Invert)
- BV->flip();
- Pat.Brackets.push_back(Bracket{J + 1, std::move(*BV)});
+ Chars = Chars.drop_front();
+ Expected<BitVector> BVOrErr = expand(Chars, S);
+ if (!BVOrErr)
+ return BVOrErr.takeError();
+ BitVector &BV = *BVOrErr;
+ if (Invert)
+ BV.flip();
+ Pat.Brackets.push_back(Bracket{J + 1, std::move(BV)});
I = J;
} else if (S[I] == '\\') {
if (++I == E)
>From bb3a72c711e015687179a56074f7d76cd00a4b9b Mon Sep 17 00:00:00 2001
From: Vitaly Buka <vitalybuka at google.com>
Date: Tue, 9 Jun 2026 21:54:24 -0700
Subject: [PATCH 27/30] [NFC][Support] Define Prefix/SuffixMetacharacters
constants
Extract literal metacharacter strings used in GlobPattern into static constexpr arrays to improve consistency and maintainability.
Assisted-by: Gemini
Pull Request: https://github.com/llvm/llvm-project/pull/202850
---
llvm/lib/Support/GlobPattern.cpp | 9 ++++++---
1 file changed, 6 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/Support/GlobPattern.cpp b/llvm/lib/Support/GlobPattern.cpp
index 39552060ac406..1aaddbb8408a3 100644
--- a/llvm/lib/Support/GlobPattern.cpp
+++ b/llvm/lib/Support/GlobPattern.cpp
@@ -16,6 +16,9 @@
using namespace llvm;
+static constexpr char PrefixMetacharacters[] = "?*[{\\";
+static constexpr char SuffixMetacharacters[] = "?*[]{}\\";
+
// Expands character ranges and returns a bitmap.
// For example, "a-cf-hz" is expanded to "abcfghz".
static Expected<BitVector> expand(StringRef S, StringRef Original) {
@@ -135,7 +138,7 @@ parseBraceExpansions(StringRef S, std::optional<size_t> MaxSubPatterns) {
static StringRef maxPlainSubstring(StringRef S) {
StringRef Best;
while (!S.empty()) {
- size_t PrefixSize = S.find_first_of("?*[{\\");
+ size_t PrefixSize = S.find_first_of(PrefixMetacharacters);
if (PrefixSize == std::string::npos)
PrefixSize = S.size();
@@ -181,7 +184,7 @@ GlobPattern::create(StringRef S, std::optional<size_t> MaxSubPatterns) {
Pat.Pattern = S;
// Store the prefix that does not contain any metacharacter.
- Pat.PrefixSize = S.find_first_of("?*[{\\");
+ Pat.PrefixSize = S.find_first_of(PrefixMetacharacters);
if (Pat.PrefixSize == std::string::npos) {
Pat.PrefixSize = S.size();
return Pat;
@@ -189,7 +192,7 @@ GlobPattern::create(StringRef S, std::optional<size_t> MaxSubPatterns) {
S = S.substr(Pat.PrefixSize);
// Just in case we stop on unmatched opening brackets.
- size_t SuffixStart = S.find_last_of("?*[]{}\\");
+ size_t SuffixStart = S.find_last_of(SuffixMetacharacters);
assert(SuffixStart != std::string::npos);
if (S[SuffixStart] == '\\')
++SuffixStart;
>From f3f4f7d41f94c4aa5507f66b41fdcc9d894261a8 Mon Sep 17 00:00:00 2001
From: Vitaly Buka <vitalybuka at google.com>
Date: Tue, 9 Jun 2026 22:07:08 -0700
Subject: [PATCH 28/30] [Support] Implement slash-agnostic path matching in
GlobPattern
Add a SlashAgnostic option to GlobPattern to allow matching path separators
(both forward slashes and backslashes) agnostically.
When enabled:
- We conservatively reduce the plain prefix and suffix by treating path
separators as metacharacters. This ensures that path separators are
matched via the slash-agnostic state machine rather than plain string
comparison.
- Brackets containing slashes are adjusted to match both separators.
- Character comparisons in the state machine (matchChar) treat '/' and
'\' as equivalent.
Co-authored-by: Devon Loehr <DKLoehr at users.noreply.github.com>
Assisted-by: Gemini
Pull Request: https://github.com/llvm/llvm-project/pull/202854
---
llvm/include/llvm/Support/GlobPattern.h | 9 ++--
llvm/lib/Support/GlobPattern.cpp | 53 +++++++++++++-----
llvm/unittests/Support/GlobPatternTest.cpp | 62 ++++++++++++++++++++++
3 files changed, 107 insertions(+), 17 deletions(-)
diff --git a/llvm/include/llvm/Support/GlobPattern.h b/llvm/include/llvm/Support/GlobPattern.h
index 1a6c0ab292ac8..ec3631ade518a 100644
--- a/llvm/include/llvm/Support/GlobPattern.h
+++ b/llvm/include/llvm/Support/GlobPattern.h
@@ -56,7 +56,8 @@ class GlobPattern {
/// created from expanding braces otherwise disable
/// brace expansion
LLVM_ABI static Expected<GlobPattern>
- create(StringRef Pat, std::optional<size_t> MaxSubPatterns = {});
+ create(StringRef Pat, std::optional<size_t> MaxSubPatterns = {},
+ bool SlashAgnostic = false);
/// \returns \p true if \p S matches this glob pattern
LLVM_ABI bool match(StringRef S) const;
@@ -87,12 +88,14 @@ class GlobPattern {
StringRef Pattern;
size_t PrefixSize = 0;
size_t SuffixSize = 0;
+ bool SlashAgnostic = false;
struct SubGlobPattern {
/// \param Pat the pattern to match against
- LLVM_ABI static Expected<SubGlobPattern> create(StringRef Pat);
+ LLVM_ABI static Expected<SubGlobPattern> create(StringRef Pat,
+ bool SlashAgnostic = false);
/// \returns \p true if \p S matches this glob pattern
- LLVM_ABI bool match(StringRef S) const;
+ LLVM_ABI bool match(StringRef S, bool SlashAgnostic) const;
StringRef getPat() const { return StringRef(Pat.data(), Pat.size()); }
// Brackets with their end position and matched bytes.
diff --git a/llvm/lib/Support/GlobPattern.cpp b/llvm/lib/Support/GlobPattern.cpp
index 1aaddbb8408a3..e518c1df3d13d 100644
--- a/llvm/lib/Support/GlobPattern.cpp
+++ b/llvm/lib/Support/GlobPattern.cpp
@@ -18,6 +18,8 @@ using namespace llvm;
static constexpr char PrefixMetacharacters[] = "?*[{\\";
static constexpr char SuffixMetacharacters[] = "?*[]{}\\";
+static constexpr char PrefixMetacharactersWithSlash[] = "?*[{\\/";
+static constexpr char SuffixMetacharactersWithSlash[] = "?*[]{}\\/";
// Expands character ranges and returns a bitmap.
// For example, "a-cf-hz" is expanded to "abcfghz".
@@ -135,10 +137,12 @@ parseBraceExpansions(StringRef S, std::optional<size_t> MaxSubPatterns) {
return std::move(SubPatterns);
}
-static StringRef maxPlainSubstring(StringRef S) {
+static StringRef maxPlainSubstring(StringRef S, bool SlashAgnostic) {
+ const char *Metas =
+ SlashAgnostic ? PrefixMetacharactersWithSlash : PrefixMetacharacters;
StringRef Best;
while (!S.empty()) {
- size_t PrefixSize = S.find_first_of(PrefixMetacharacters);
+ size_t PrefixSize = S.find_first_of(Metas);
if (PrefixSize == std::string::npos)
PrefixSize = S.size();
@@ -178,13 +182,20 @@ static StringRef maxPlainSubstring(StringRef S) {
return Best;
}
-Expected<GlobPattern>
-GlobPattern::create(StringRef S, std::optional<size_t> MaxSubPatterns) {
+Expected<GlobPattern> GlobPattern::create(StringRef S,
+ std::optional<size_t> MaxSubPatterns,
+ bool SlashAgnostic) {
GlobPattern Pat;
+ Pat.SlashAgnostic = SlashAgnostic;
Pat.Pattern = S;
+ const char *PrefixMetas =
+ SlashAgnostic ? PrefixMetacharactersWithSlash : PrefixMetacharacters;
+ const char *SuffixMetas =
+ SlashAgnostic ? SuffixMetacharactersWithSlash : SuffixMetacharacters;
+
// Store the prefix that does not contain any metacharacter.
- Pat.PrefixSize = S.find_first_of(PrefixMetacharacters);
+ Pat.PrefixSize = S.find_first_of(PrefixMetas);
if (Pat.PrefixSize == std::string::npos) {
Pat.PrefixSize = S.size();
return Pat;
@@ -192,7 +203,7 @@ GlobPattern::create(StringRef S, std::optional<size_t> MaxSubPatterns) {
S = S.substr(Pat.PrefixSize);
// Just in case we stop on unmatched opening brackets.
- size_t SuffixStart = S.find_last_of(SuffixMetacharacters);
+ size_t SuffixStart = S.find_last_of(SuffixMetas);
assert(SuffixStart != std::string::npos);
if (S[SuffixStart] == '\\')
++SuffixStart;
@@ -205,7 +216,7 @@ GlobPattern::create(StringRef S, std::optional<size_t> MaxSubPatterns) {
if (auto Err = parseBraceExpansions(S, MaxSubPatterns).moveInto(SubPats))
return std::move(Err);
for (StringRef SubPat : SubPats) {
- auto SubGlobOrErr = SubGlobPattern::create(SubPat);
+ auto SubGlobOrErr = SubGlobPattern::create(SubPat, SlashAgnostic);
if (!SubGlobOrErr)
return SubGlobOrErr.takeError();
Pat.SubGlobs.push_back(*SubGlobOrErr);
@@ -215,7 +226,7 @@ GlobPattern::create(StringRef S, std::optional<size_t> MaxSubPatterns) {
}
Expected<GlobPattern::SubGlobPattern>
-GlobPattern::SubGlobPattern::create(StringRef S) {
+GlobPattern::SubGlobPattern::create(StringRef S, bool SlashAgnostic) {
SubGlobPattern Pat;
// Parse brackets.
@@ -237,6 +248,12 @@ GlobPattern::SubGlobPattern::create(StringRef S) {
if (!BVOrErr)
return BVOrErr.takeError();
BitVector &BV = *BVOrErr;
+ if (SlashAgnostic) {
+ if (BV['\\'] || BV['/']) {
+ BV.set('\\');
+ BV.set('/');
+ }
+ }
if (Invert)
BV.flip();
Pat.Brackets.push_back(Bracket{J + 1, std::move(BV)});
@@ -251,8 +268,8 @@ GlobPattern::SubGlobPattern::create(StringRef S) {
}
StringRef GlobPattern::longest_substr() const {
- return maxPlainSubstring(
- Pattern.drop_front(PrefixSize).drop_back(SuffixSize));
+ return maxPlainSubstring(Pattern.drop_front(PrefixSize).drop_back(SuffixSize),
+ SlashAgnostic);
}
bool GlobPattern::match(StringRef S) const {
@@ -263,15 +280,23 @@ bool GlobPattern::match(StringRef S) const {
if (SubGlobs.empty() && S.empty())
return true;
for (auto &Glob : SubGlobs)
- if (Glob.match(S))
+ if (Glob.match(S, SlashAgnostic))
return true;
return false;
}
+static bool matchChar(char PatC, char QueryC, bool SlashAgnostic) {
+ if (PatC == QueryC)
+ return true;
+ return SlashAgnostic && (PatC == '\\' || PatC == '/') &&
+ (QueryC == '\\' || QueryC == '/');
+}
+
// Factor the pattern into segments split by '*'. The segment is matched
// sequentianlly by finding the first occurrence past the end of the previous
// match.
-bool GlobPattern::SubGlobPattern::match(StringRef Str) const {
+bool GlobPattern::SubGlobPattern::match(StringRef Str,
+ bool SlashAgnostic) const {
const char *P = Pat.data(), *SegmentBegin = nullptr, *S = Str.data(),
*SavedS = S;
const char *const PEnd = P + Pat.size(), *const End = S + Str.size();
@@ -293,12 +318,12 @@ bool GlobPattern::SubGlobPattern::match(StringRef Str) const {
continue;
}
} else if (*P == '\\') {
- if (*++P == *S) {
+ if (matchChar(*++P, *S, SlashAgnostic)) {
++P;
++S;
continue;
}
- } else if (*P == *S || *P == '?') {
+ } else if (matchChar(*P, *S, SlashAgnostic) || *P == '?') {
++P;
++S;
continue;
diff --git a/llvm/unittests/Support/GlobPatternTest.cpp b/llvm/unittests/Support/GlobPatternTest.cpp
index 872a21e948d7a..35423e37a3ae0 100644
--- a/llvm/unittests/Support/GlobPatternTest.cpp
+++ b/llvm/unittests/Support/GlobPatternTest.cpp
@@ -327,6 +327,30 @@ TEST_F(GlobPatternTest, PrefixSuffix) {
ASSERT_TRUE((bool)Pat);
EXPECT_EQ("", Pat->prefix());
EXPECT_EQ("cd", Pat->suffix());
+
+ Pat = GlobPattern::create("ab/cd", /*MaxSubPatterns=*/{},
+ /*SlashAgnostic=*/true);
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("ab", Pat->prefix());
+ EXPECT_EQ("cd", Pat->suffix());
+
+ Pat = GlobPattern::create("ab\\cd", /*MaxSubPatterns=*/{},
+ /*SlashAgnostic=*/true);
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("ab", Pat->prefix());
+ EXPECT_EQ("d", Pat->suffix());
+
+ Pat = GlobPattern::create("ab/cd", /*MaxSubPatterns=*/{},
+ /*SlashAgnostic=*/false);
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("ab/cd", Pat->prefix());
+ EXPECT_EQ("", Pat->suffix());
+
+ Pat = GlobPattern::create("ab\\cd", /*MaxSubPatterns=*/{},
+ /*SlashAgnostic=*/false);
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("ab", Pat->prefix());
+ EXPECT_EQ("d", Pat->suffix());
}
TEST_F(GlobPatternTest, Substr) {
@@ -393,6 +417,26 @@ TEST_F(GlobPatternTest, Substr) {
Pat = GlobPattern::create("a*bcdef{g}*h");
ASSERT_TRUE((bool)Pat);
EXPECT_EQ("bcdef", Pat->longest_substr());
+
+ Pat = GlobPattern::create("a*bc/de*f", /*MaxSubPatterns=*/{},
+ /*SlashAgnostic=*/true);
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("bc", Pat->longest_substr());
+
+ Pat = GlobPattern::create("a*bc\\de*f", /*MaxSubPatterns=*/{},
+ /*SlashAgnostic=*/true);
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("bc", Pat->longest_substr());
+
+ Pat = GlobPattern::create("a*bc/de*f", /*MaxSubPatterns=*/{},
+ /*SlashAgnostic=*/false);
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("bc/de", Pat->longest_substr());
+
+ Pat = GlobPattern::create("a*bc\\de*f", /*MaxSubPatterns=*/{},
+ /*SlashAgnostic=*/false);
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("bc", Pat->longest_substr());
}
TEST_F(GlobPatternTest, Pathological) {
@@ -409,4 +453,22 @@ TEST_F(GlobPatternTest, Pathological) {
EXPECT_FALSE(Pat->match(S));
EXPECT_TRUE(Pat->match(S + 'b'));
}
+
+TEST_F(GlobPatternTest, SlashAgnosticMatch) {
+ auto Pat1 = GlobPattern::create("foo\\\\bar[a\\\\-z]", 1024,
+ /*SlashAgnostic=*/true);
+ ASSERT_TRUE((bool)Pat1);
+ EXPECT_TRUE(Pat1->match("foo/bar\\"));
+ EXPECT_TRUE(Pat1->match("foo/barb"));
+ EXPECT_TRUE(Pat1->match("foo/bar/"));
+}
+
+TEST_F(GlobPatternTest, SlashAgnosticMatchInverted) {
+ auto Pat = GlobPattern::create("foo\\\\bar[^a\\\\-z]", 1024,
+ /*SlashAgnostic=*/true);
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_FALSE(Pat->match("foo/bar/"));
+ EXPECT_FALSE(Pat->match("foo/barb"));
+ EXPECT_TRUE(Pat->match("foo/bar1"));
+}
}
>From 1bd97d5ff2a8f70d4a078e4cb34bfd2677e226ae Mon Sep 17 00:00:00 2001
From: Vitaly Buka <vitalybuka at google.com>
Date: Tue, 9 Jun 2026 23:02:10 -0700
Subject: [PATCH 29/30] [NFC][Support] Use is_style_windows in SpecialCaseList
Use llvm::sys::path::is_style_windows(llvm::sys::path::Style::native) instead of is_separator('\\') to check if native path style is Windows.
Assisted-by: Gemini
---
llvm/lib/Support/SpecialCaseList.cpp | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Support/SpecialCaseList.cpp b/llvm/lib/Support/SpecialCaseList.cpp
index 1a51adb667601..b51d7d37bba62 100644
--- a/llvm/lib/Support/SpecialCaseList.cpp
+++ b/llvm/lib/Support/SpecialCaseList.cpp
@@ -344,7 +344,9 @@ bool SpecialCaseList::parse(unsigned FileIdx, const MemoryBuffer *MB,
bool RemoveDotSlash = Version > 2;
- bool SlashAgnostic = Version > 3 && llvm::sys::path::is_separator('\\');
+ bool SlashAgnostic = Version > 3 &&
+ llvm::sys::path::is_style_windows(
+ llvm::sys::path::Style::native);
auto ErrOrSection = addSection("*", FileIdx, 1, true);
if (auto Err = ErrOrSection.takeError()) {
>From d19dcda89e1a217e07efb0f0899e76c480b82c40 Mon Sep 17 00:00:00 2001
From: Vitaly Buka <vitalybuka at google.com>
Date: Tue, 9 Jun 2026 23:02:10 -0700
Subject: [PATCH 30/30] [NFC][Support] Use is_style_windows in SpecialCaseList
Use llvm::sys::path::is_style_windows(llvm::sys::path::Style::native) instead of is_separator('\\') to check if native path style is Windows.
Assisted-by: Gemini
---
llvm/lib/Support/SpecialCaseList.cpp | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Support/SpecialCaseList.cpp b/llvm/lib/Support/SpecialCaseList.cpp
index 1a51adb667601..a821e07ca26ca 100644
--- a/llvm/lib/Support/SpecialCaseList.cpp
+++ b/llvm/lib/Support/SpecialCaseList.cpp
@@ -344,7 +344,8 @@ bool SpecialCaseList::parse(unsigned FileIdx, const MemoryBuffer *MB,
bool RemoveDotSlash = Version > 2;
- bool SlashAgnostic = Version > 3 && llvm::sys::path::is_separator('\\');
+ bool SlashAgnostic = Version > 3 && llvm::sys::path::is_style_windows(
+ llvm::sys::path::Style::native);
auto ErrOrSection = addSection("*", FileIdx, 1, true);
if (auto Err = ErrOrSection.takeError()) {
More information about the cfe-commits
mailing list