[clang] [Clang] [Diagnostics] Simplify filenames that contain '..' (PR #143520)
via cfe-commits
cfe-commits at lists.llvm.org
Wed Jul 2 03:50:48 PDT 2025
https://github.com/Sirraide updated https://github.com/llvm/llvm-project/pull/143520
>From 15c0a79d6a0cd65d88fbe152275b224201e632a1 Mon Sep 17 00:00:00 2001
From: Sirraide <aeternalmail at gmail.com>
Date: Tue, 10 Jun 2025 14:32:32 +0200
Subject: [PATCH 1/5] [Clang] [Diagnostics] Simplify filenames that contain
'..'
---
clang/include/clang/Frontend/TextDiagnostic.h | 1 +
clang/lib/Frontend/TextDiagnostic.cpp | 32 ++++++++++++-------
2 files changed, 21 insertions(+), 12 deletions(-)
diff --git a/clang/include/clang/Frontend/TextDiagnostic.h b/clang/include/clang/Frontend/TextDiagnostic.h
index e2e88d4d648a2..9c77bc3e00e19 100644
--- a/clang/include/clang/Frontend/TextDiagnostic.h
+++ b/clang/include/clang/Frontend/TextDiagnostic.h
@@ -35,6 +35,7 @@ namespace clang {
class TextDiagnostic : public DiagnosticRenderer {
raw_ostream &OS;
const Preprocessor *PP;
+ llvm::StringMap<SmallString<128>> SimplifiedFileNameCache;
public:
TextDiagnostic(raw_ostream &OS, const LangOptions &LangOpts,
diff --git a/clang/lib/Frontend/TextDiagnostic.cpp b/clang/lib/Frontend/TextDiagnostic.cpp
index b9e681b52e509..edbad42b39950 100644
--- a/clang/lib/Frontend/TextDiagnostic.cpp
+++ b/clang/lib/Frontend/TextDiagnostic.cpp
@@ -738,12 +738,20 @@ void TextDiagnostic::printDiagnosticMessage(raw_ostream &OS,
}
void TextDiagnostic::emitFilename(StringRef Filename, const SourceManager &SM) {
-#ifdef _WIN32
- SmallString<4096> TmpFilename;
-#endif
- if (DiagOpts.AbsolutePath) {
- auto File = SM.getFileManager().getOptionalFileRef(Filename);
- if (File) {
+ auto File = SM.getFileManager().getOptionalFileRef(Filename);
+
+ // Try to simplify paths that contain '..' in any case since paths to
+ // standard library headers especially tend to get quite long otherwise.
+ // Only do that for local filesystems though to avoid slowing down
+ // compilation too much.
+ auto AlwaysSimplify = [&] {
+ return File->getName().contains("..") &&
+ llvm::sys::fs::is_local(File->getName());
+ };
+
+ if (File && (DiagOpts.AbsolutePath || AlwaysSimplify())) {
+ SmallString<128> &CacheEntry = SimplifiedFileNameCache[Filename];
+ if (CacheEntry.empty()) {
// We want to print a simplified absolute path, i. e. without "dots".
//
// The hardest part here are the paths like "<part1>/<link>/../<part2>".
@@ -759,15 +767,15 @@ void TextDiagnostic::emitFilename(StringRef Filename, const SourceManager &SM) {
// on Windows we can just use llvm::sys::path::remove_dots(), because,
// on that system, both aforementioned paths point to the same place.
#ifdef _WIN32
- TmpFilename = File->getName();
- llvm::sys::fs::make_absolute(TmpFilename);
- llvm::sys::path::native(TmpFilename);
- llvm::sys::path::remove_dots(TmpFilename, /* remove_dot_dot */ true);
- Filename = StringRef(TmpFilename.data(), TmpFilename.size());
+ CacheEntry = File->getName();
+ llvm::sys::fs::make_absolute(CacheEntry);
+ llvm::sys::path::native(CacheEntry);
+ llvm::sys::path::remove_dots(CacheEntry, /* remove_dot_dot */ true);
#else
- Filename = SM.getFileManager().getCanonicalName(*File);
+ CacheEntry = SM.getFileManager().getCanonicalName(*File);
#endif
}
+ Filename = CacheEntry;
}
OS << Filename;
>From 97c0accf4fb3dc983bbb9f344ad66d0281b231fb Mon Sep 17 00:00:00 2001
From: Sirraide <aeternalmail at gmail.com>
Date: Tue, 10 Jun 2025 14:47:43 +0200
Subject: [PATCH 2/5] Use whichever path ends up being shorter
---
clang/lib/Frontend/TextDiagnostic.cpp | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/clang/lib/Frontend/TextDiagnostic.cpp b/clang/lib/Frontend/TextDiagnostic.cpp
index edbad42b39950..b77c1797c51c5 100644
--- a/clang/lib/Frontend/TextDiagnostic.cpp
+++ b/clang/lib/Frontend/TextDiagnostic.cpp
@@ -774,6 +774,12 @@ void TextDiagnostic::emitFilename(StringRef Filename, const SourceManager &SM) {
#else
CacheEntry = SM.getFileManager().getCanonicalName(*File);
#endif
+
+ // In some cases, the resolved path may actually end up being longer (e.g.
+ // if it was originally a relative path), so just retain whichever one
+ // ends up being shorter.
+ if (!DiagOpts.AbsolutePath && CacheEntry.size() > Filename.size())
+ CacheEntry = Filename;
}
Filename = CacheEntry;
}
>From 529aac1a9ae4969b4389156cb5d44fcecd445cbc Mon Sep 17 00:00:00 2001
From: Sirraide <aeternalmail at gmail.com>
Date: Tue, 10 Jun 2025 20:36:14 +0200
Subject: [PATCH 3/5] Move path computation into SourceManager
---
clang/include/clang/Basic/SourceManager.h | 9 ++++
clang/lib/Basic/SourceManager.cpp | 55 +++++++++++++++++++++++
clang/lib/Frontend/SARIFDiagnostic.cpp | 31 +------------
clang/lib/Frontend/TextDiagnostic.cpp | 48 +-------------------
4 files changed, 66 insertions(+), 77 deletions(-)
diff --git a/clang/include/clang/Basic/SourceManager.h b/clang/include/clang/Basic/SourceManager.h
index cd3dac9133223..92080e7891eca 100644
--- a/clang/include/clang/Basic/SourceManager.h
+++ b/clang/include/clang/Basic/SourceManager.h
@@ -820,6 +820,12 @@ class SourceManager : public RefCountedBase<SourceManager> {
mutable std::unique_ptr<SrcMgr::SLocEntry> FakeSLocEntryForRecovery;
+ /// Cache for filenames used in diagnostics. See 'getNameForDiagnostic()'.
+ mutable llvm::StringMap<StringRef> DiagNames;
+
+ /// Allocator for absolute/short names.
+ mutable llvm::BumpPtrAllocator DiagNameAlloc;
+
/// Lazily computed map of macro argument chunks to their expanded
/// source location.
using MacroArgsMap = std::map<unsigned, SourceLocation>;
@@ -1847,6 +1853,9 @@ class SourceManager : public RefCountedBase<SourceManager> {
/// \return Location of the top-level macro caller.
SourceLocation getTopMacroCallerLoc(SourceLocation Loc) const;
+ /// Retrieve the name of a file suitable for diagnostics.
+ StringRef getNameForDiagnostic(StringRef Filename) const;
+
private:
friend class ASTReader;
friend class ASTWriter;
diff --git a/clang/lib/Basic/SourceManager.cpp b/clang/lib/Basic/SourceManager.cpp
index 09e5c6547fb51..c726f86e2d4e0 100644
--- a/clang/lib/Basic/SourceManager.cpp
+++ b/clang/lib/Basic/SourceManager.cpp
@@ -2403,3 +2403,58 @@ SourceManagerForFile::SourceManagerForFile(StringRef FileName,
assert(ID.isValid());
SourceMgr->setMainFileID(ID);
}
+
+StringRef SourceManager::getNameForDiagnostic(StringRef Filename) const {
+ OptionalFileEntryRef File = getFileManager().getOptionalFileRef(Filename);
+ if (!File)
+ return Filename;
+
+ // Try to simplify paths that contain '..' in any case since paths to
+ // standard library headers especially tend to get quite long otherwise.
+ // Only do that for local filesystems though to avoid slowing down
+ // compilation too much.
+ bool Absolute = Diag.getDiagnosticOptions().AbsolutePath;
+ bool AlwaysSimplify = File->getName().contains("..") &&
+ llvm::sys::fs::is_local(File->getName());
+ if (!Absolute && !AlwaysSimplify)
+ return Filename;
+
+ // This may involve computing canonical names, so cache the result.
+ StringRef &CacheEntry = DiagNames[Filename];
+ if (!CacheEntry.empty())
+ return CacheEntry;
+
+ // We want to print a simplified absolute path, i. e. without "dots".
+ //
+ // The hardest part here are the paths like "<part1>/<link>/../<part2>".
+ // On Unix-like systems, we cannot just collapse "<link>/..", because
+ // paths are resolved sequentially, and, thereby, the path
+ // "<part1>/<part2>" may point to a different location. That is why
+ // we use FileManager::getCanonicalName(), which expands all indirections
+ // with llvm::sys::fs::real_path() and caches the result.
+ //
+ // On the other hand, it would be better to preserve as much of the
+ // original path as possible, because that helps a user to recognize it.
+ // real_path() expands all links, which sometimes too much. Luckily,
+ // on Windows we can just use llvm::sys::path::remove_dots(), because,
+ // on that system, both aforementioned paths point to the same place.
+ SmallString<256> TempBuf;
+#ifdef _WIN32
+ TempBuf = File->getName();
+ llvm::sys::fs::make_absolute(TempBuf);
+ llvm::sys::path::native(TempBuf);
+ llvm::sys::path::remove_dots(TempBuf, /* remove_dot_dot */ true);
+#else
+ TempBuf = getFileManager().getCanonicalName(*File);
+#endif
+
+ // In some cases, the resolved path may actually end up being longer (e.g.
+ // if it was originally a relative path), so just retain whichever one
+ // ends up being shorter.
+ if (!Absolute && TempBuf.size() > Filename.size())
+ CacheEntry = Filename;
+ else
+ CacheEntry = TempBuf.str().copy(DiagNameAlloc);
+
+ return CacheEntry;
+}
diff --git a/clang/lib/Frontend/SARIFDiagnostic.cpp b/clang/lib/Frontend/SARIFDiagnostic.cpp
index e2aec7f677f12..c2f6ece2e91ff 100644
--- a/clang/lib/Frontend/SARIFDiagnostic.cpp
+++ b/clang/lib/Frontend/SARIFDiagnostic.cpp
@@ -163,36 +163,7 @@ SARIFDiagnostic::addDiagnosticLevelToRule(SarifRule Rule,
llvm::StringRef SARIFDiagnostic::emitFilename(StringRef Filename,
const SourceManager &SM) {
- if (DiagOpts.AbsolutePath) {
- auto File = SM.getFileManager().getOptionalFileRef(Filename);
- if (File) {
- // We want to print a simplified absolute path, i. e. without "dots".
- //
- // The hardest part here are the paths like "<part1>/<link>/../<part2>".
- // On Unix-like systems, we cannot just collapse "<link>/..", because
- // paths are resolved sequentially, and, thereby, the path
- // "<part1>/<part2>" may point to a different location. That is why
- // we use FileManager::getCanonicalName(), which expands all indirections
- // with llvm::sys::fs::real_path() and caches the result.
- //
- // On the other hand, it would be better to preserve as much of the
- // original path as possible, because that helps a user to recognize it.
- // real_path() expands all links, which is sometimes too much. Luckily,
- // on Windows we can just use llvm::sys::path::remove_dots(), because,
- // on that system, both aforementioned paths point to the same place.
-#ifdef _WIN32
- SmallString<256> TmpFilename = File->getName();
- llvm::sys::fs::make_absolute(TmpFilename);
- llvm::sys::path::native(TmpFilename);
- llvm::sys::path::remove_dots(TmpFilename, /* remove_dot_dot */ true);
- Filename = StringRef(TmpFilename.data(), TmpFilename.size());
-#else
- Filename = SM.getFileManager().getCanonicalName(*File);
-#endif
- }
- }
-
- return Filename;
+ return SM.getNameForDiagnostic(Filename);
}
/// Print out the file/line/column information and include trace.
diff --git a/clang/lib/Frontend/TextDiagnostic.cpp b/clang/lib/Frontend/TextDiagnostic.cpp
index b77c1797c51c5..354331f920aa4 100644
--- a/clang/lib/Frontend/TextDiagnostic.cpp
+++ b/clang/lib/Frontend/TextDiagnostic.cpp
@@ -738,53 +738,7 @@ void TextDiagnostic::printDiagnosticMessage(raw_ostream &OS,
}
void TextDiagnostic::emitFilename(StringRef Filename, const SourceManager &SM) {
- auto File = SM.getFileManager().getOptionalFileRef(Filename);
-
- // Try to simplify paths that contain '..' in any case since paths to
- // standard library headers especially tend to get quite long otherwise.
- // Only do that for local filesystems though to avoid slowing down
- // compilation too much.
- auto AlwaysSimplify = [&] {
- return File->getName().contains("..") &&
- llvm::sys::fs::is_local(File->getName());
- };
-
- if (File && (DiagOpts.AbsolutePath || AlwaysSimplify())) {
- SmallString<128> &CacheEntry = SimplifiedFileNameCache[Filename];
- if (CacheEntry.empty()) {
- // We want to print a simplified absolute path, i. e. without "dots".
- //
- // The hardest part here are the paths like "<part1>/<link>/../<part2>".
- // On Unix-like systems, we cannot just collapse "<link>/..", because
- // paths are resolved sequentially, and, thereby, the path
- // "<part1>/<part2>" may point to a different location. That is why
- // we use FileManager::getCanonicalName(), which expands all indirections
- // with llvm::sys::fs::real_path() and caches the result.
- //
- // On the other hand, it would be better to preserve as much of the
- // original path as possible, because that helps a user to recognize it.
- // real_path() expands all links, which sometimes too much. Luckily,
- // on Windows we can just use llvm::sys::path::remove_dots(), because,
- // on that system, both aforementioned paths point to the same place.
-#ifdef _WIN32
- CacheEntry = File->getName();
- llvm::sys::fs::make_absolute(CacheEntry);
- llvm::sys::path::native(CacheEntry);
- llvm::sys::path::remove_dots(CacheEntry, /* remove_dot_dot */ true);
-#else
- CacheEntry = SM.getFileManager().getCanonicalName(*File);
-#endif
-
- // In some cases, the resolved path may actually end up being longer (e.g.
- // if it was originally a relative path), so just retain whichever one
- // ends up being shorter.
- if (!DiagOpts.AbsolutePath && CacheEntry.size() > Filename.size())
- CacheEntry = Filename;
- }
- Filename = CacheEntry;
- }
-
- OS << Filename;
+ OS << SM.getNameForDiagnostic(Filename);
}
/// Print out the file/line/column information and include trace.
>From 5d308877424532874a06735707236474c03ddfab Mon Sep 17 00:00:00 2001
From: Sirraide <aeternalmail at gmail.com>
Date: Fri, 13 Jun 2025 20:32:59 +0200
Subject: [PATCH 4/5] Fix crash and disable nfs check on windows
---
clang/include/clang/Basic/SourceManager.h | 9 +++++-
clang/lib/Basic/SourceManager.cpp | 37 +++++++++++++++++------
clang/lib/Frontend/SARIFDiagnostic.cpp | 2 +-
clang/lib/Frontend/TextDiagnostic.cpp | 2 +-
clang/test/Frontend/absolute-paths.c | 6 ++--
5 files changed, 40 insertions(+), 16 deletions(-)
diff --git a/clang/include/clang/Basic/SourceManager.h b/clang/include/clang/Basic/SourceManager.h
index 92080e7891eca..bbf38f40a9534 100644
--- a/clang/include/clang/Basic/SourceManager.h
+++ b/clang/include/clang/Basic/SourceManager.h
@@ -1854,7 +1854,14 @@ class SourceManager : public RefCountedBase<SourceManager> {
SourceLocation getTopMacroCallerLoc(SourceLocation Loc) const;
/// Retrieve the name of a file suitable for diagnostics.
- StringRef getNameForDiagnostic(StringRef Filename) const;
+ // FIXME: Passing in the DiagnosticOptions here is a workaround for the
+ // fact that installapi does some weird things with DiagnosticsEngines,
+ // which causes the 'Diag' member of SourceManager (or at least the
+ // DiagnosticsOptions member thereof) to be a dangling reference
+ // sometimes. We should probably fix that or decouple the two classes
+ // to avoid this issue entirely.
+ StringRef getNameForDiagnostic(StringRef Filename,
+ const DiagnosticOptions &Opts) const;
private:
friend class ASTReader;
diff --git a/clang/lib/Basic/SourceManager.cpp b/clang/lib/Basic/SourceManager.cpp
index c726f86e2d4e0..7fa638567c55c 100644
--- a/clang/lib/Basic/SourceManager.cpp
+++ b/clang/lib/Basic/SourceManager.cpp
@@ -2404,19 +2404,36 @@ SourceManagerForFile::SourceManagerForFile(StringRef FileName,
SourceMgr->setMainFileID(ID);
}
-StringRef SourceManager::getNameForDiagnostic(StringRef Filename) const {
+StringRef
+SourceManager::getNameForDiagnostic(StringRef Filename,
+ const DiagnosticOptions &Opts) const {
OptionalFileEntryRef File = getFileManager().getOptionalFileRef(Filename);
if (!File)
return Filename;
- // Try to simplify paths that contain '..' in any case since paths to
- // standard library headers especially tend to get quite long otherwise.
- // Only do that for local filesystems though to avoid slowing down
- // compilation too much.
- bool Absolute = Diag.getDiagnosticOptions().AbsolutePath;
- bool AlwaysSimplify = File->getName().contains("..") &&
- llvm::sys::fs::is_local(File->getName());
- if (!Absolute && !AlwaysSimplify)
+ bool SimplifyPath = [&] {
+ if (Opts.AbsolutePath)
+ return true;
+
+ // Try to simplify paths that contain '..' in any case since paths to
+ // standard library headers especially tend to get quite long otherwise.
+ // Only do that for local filesystems though to avoid slowing down
+ // compilation too much.
+ if (!File->getName().contains(".."))
+ return false;
+
+ // If we're not on Windows, check if we're on a network file system and
+ // avoid simplifying the path in that case since that can be slow. On
+ // Windows, the check for a local filesystem is already slow, so skip it.
+#ifndef _WIN32
+ if (!llvm::sys::fs::is_local(File->getName()))
+ return false;
+#endif
+
+ return true;
+ }();
+
+ if (!SimplifyPath)
return Filename;
// This may involve computing canonical names, so cache the result.
@@ -2451,7 +2468,7 @@ StringRef SourceManager::getNameForDiagnostic(StringRef Filename) const {
// In some cases, the resolved path may actually end up being longer (e.g.
// if it was originally a relative path), so just retain whichever one
// ends up being shorter.
- if (!Absolute && TempBuf.size() > Filename.size())
+ if (!Opts.AbsolutePath && TempBuf.size() > Filename.size())
CacheEntry = Filename;
else
CacheEntry = TempBuf.str().copy(DiagNameAlloc);
diff --git a/clang/lib/Frontend/SARIFDiagnostic.cpp b/clang/lib/Frontend/SARIFDiagnostic.cpp
index c2f6ece2e91ff..b63345857baf1 100644
--- a/clang/lib/Frontend/SARIFDiagnostic.cpp
+++ b/clang/lib/Frontend/SARIFDiagnostic.cpp
@@ -163,7 +163,7 @@ SARIFDiagnostic::addDiagnosticLevelToRule(SarifRule Rule,
llvm::StringRef SARIFDiagnostic::emitFilename(StringRef Filename,
const SourceManager &SM) {
- return SM.getNameForDiagnostic(Filename);
+ return SM.getNameForDiagnostic(Filename, DiagOpts);
}
/// Print out the file/line/column information and include trace.
diff --git a/clang/lib/Frontend/TextDiagnostic.cpp b/clang/lib/Frontend/TextDiagnostic.cpp
index 354331f920aa4..174b4d30d2847 100644
--- a/clang/lib/Frontend/TextDiagnostic.cpp
+++ b/clang/lib/Frontend/TextDiagnostic.cpp
@@ -738,7 +738,7 @@ void TextDiagnostic::printDiagnosticMessage(raw_ostream &OS,
}
void TextDiagnostic::emitFilename(StringRef Filename, const SourceManager &SM) {
- OS << SM.getNameForDiagnostic(Filename);
+ OS << SM.getNameForDiagnostic(Filename, DiagOpts);
}
/// Print out the file/line/column information and include trace.
diff --git a/clang/test/Frontend/absolute-paths.c b/clang/test/Frontend/absolute-paths.c
index e06cf262dd8e2..e507f0d4c34b3 100644
--- a/clang/test/Frontend/absolute-paths.c
+++ b/clang/test/Frontend/absolute-paths.c
@@ -8,9 +8,9 @@
#include "absolute-paths.h"
-// Check whether the diagnostic from the header above includes the dummy
-// directory in the path.
-// NORMAL: SystemHeaderPrefix
+// Check that the bogus prefix we added is stripped out even if absolute paths
+// are disabled.
+// NORMAL-NOT: SystemHeaderPrefix
// ABSOLUTE-NOT: SystemHeaderPrefix
// CHECK: warning: non-void function does not return a value
>From c0f56c3dc484962c0a2916fb652df1304381d274 Mon Sep 17 00:00:00 2001
From: Sirraide <aeternalmail at gmail.com>
Date: Wed, 2 Jul 2025 12:50:37 +0200
Subject: [PATCH 5/5] remove unused member
---
clang/include/clang/Frontend/TextDiagnostic.h | 1 -
1 file changed, 1 deletion(-)
diff --git a/clang/include/clang/Frontend/TextDiagnostic.h b/clang/include/clang/Frontend/TextDiagnostic.h
index 9c77bc3e00e19..e2e88d4d648a2 100644
--- a/clang/include/clang/Frontend/TextDiagnostic.h
+++ b/clang/include/clang/Frontend/TextDiagnostic.h
@@ -35,7 +35,6 @@ namespace clang {
class TextDiagnostic : public DiagnosticRenderer {
raw_ostream &OS;
const Preprocessor *PP;
- llvm::StringMap<SmallString<128>> SimplifiedFileNameCache;
public:
TextDiagnostic(raw_ostream &OS, const LangOptions &LangOpts,
More information about the cfe-commits
mailing list