[llvm] [TableGen] correctly escape dependency filenames (PR #160834)
Ruoyu Zhong via llvm-commits
llvm-commits at lists.llvm.org
Fri Sep 26 08:25:20 PDT 2025
https://github.com/ZhongRuoyu updated https://github.com/llvm/llvm-project/pull/160834
>From 7c74a05456ef01aa7176a3e750dd006b2b290ba9 Mon Sep 17 00:00:00 2001
From: Ruoyu Zhong <zhongruoyu at outlook.com>
Date: Fri, 26 Sep 2025 16:31:08 +0800
Subject: [PATCH 1/7] [TableGen] correctly escape dependency filenames
Currently, *-tblgen do not escape special characters in dependency
filenames. This can lead to unnecessary rebuilds when the filenames
contain characters that require escaping, as the build system may not
treat them correctly.
For instance, when building LLVM in a directory that contains spaces
("/home/user/repos/llvm project" in the example below), the build system
always rebuilds a large portion of the project despite nothing having
changed, due to an unescaped space in the dependency filename:
$ ninja -C build -d explain
ninja: Entering directory `build'
ninja explain: /home/user/repos/llvm is dirty
...
$ cat build/include/llvm/IR/IntrinsicsRISCV.h.d
IntrinsicsRISCV.h: /home/user/repos/llvm project/llvm/include/llvm/CodeGen/SDNodeProperties.td ...
Fix this by escaping special characters in dependency filenames using
backslashes. This is consistent with how GCC, Clang [1] and lld [2]
handle this.
After this change (notice the escaped space):
$ cat build/include/llvm/IR/IntrinsicsRISCV.h.d
IntrinsicsRISCV.h: /home/user/repos/llvm\ project/llvm/include/llvm/CodeGen/SDNodeProperties.td ...
[1]: https://github.com/llvm/llvm-project/blob/2cacf7117ba0fb7c134413a1a51302f8d6649052/clang/lib/Frontend/DependencyFile.cpp#L267-L344
[2]: https://github.com/llvm/llvm-project/blob/2cacf7117ba0fb7c134413a1a51302f8d6649052/lld/ELF/Driver.cpp#L2482-L2503
Signed-off-by: Ruoyu Zhong <zhongruoyu at outlook.com>
---
llvm/lib/TableGen/Main.cpp | 47 +++++++++++++++++++++++++++++++-------
1 file changed, 39 insertions(+), 8 deletions(-)
diff --git a/llvm/lib/TableGen/Main.cpp b/llvm/lib/TableGen/Main.cpp
index f545706d6fe30..991228a7a0106 100644
--- a/llvm/lib/TableGen/Main.cpp
+++ b/llvm/lib/TableGen/Main.cpp
@@ -17,12 +17,14 @@
#include "llvm/TableGen/Main.h"
#include "TGLexer.h"
#include "TGParser.h"
+#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorOr.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Path.h"
#include "llvm/Support/SMLoc.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/ToolOutputFile.h"
@@ -39,24 +41,24 @@ using namespace llvm;
static cl::opt<std::string>
OutputFilename("o", cl::desc("Output filename"), cl::value_desc("filename"),
- cl::init("-"));
+ cl::init("-"));
static cl::opt<std::string>
DependFilename("d",
cl::desc("Dependency filename"),
- cl::value_desc("filename"),
- cl::init(""));
+ cl::value_desc("filename"),
+ cl::init(""));
static cl::opt<std::string>
-InputFilename(cl::Positional, cl::desc("<input file>"), cl::init("-"));
+ InputFilename(cl::Positional, cl::desc("<input file>"), cl::init("-"));
static cl::list<std::string>
IncludeDirs("I", cl::desc("Directory of include files"),
cl::value_desc("directory"), cl::Prefix);
static cl::list<std::string>
-MacroNames("D", cl::desc("Name of the macro to be defined"),
- cl::value_desc("macro name"), cl::Prefix);
+ MacroNames("D", cl::desc("Name of the macro to be defined"),
+ cl::value_desc("macro name"), cl::Prefix);
static cl::opt<bool>
WriteIfChanged("write-if-changed", cl::desc("Only write output if it changed"));
@@ -83,6 +85,35 @@ static int reportError(const char *ProgName, Twine Msg) {
return 1;
}
+/// Escape a filename in the dependency file so that it is correctly
+/// interpreted by `make`. This is consistent with Clang, GCC, and lld.
+static TGLexer::DependenciesSetTy::value_type escapeDependencyFilename(
+ const TGLexer::DependenciesSetTy::value_type &Filename) {
+ std::string Res;
+ raw_string_ostream OS(Res);
+
+ SmallString<256> NativePath;
+ sys::path::native(Filename, NativePath);
+
+ for (unsigned I = 0, E = NativePath.size(); I != E; ++I) {
+ if (NativePath[I] == '#') {
+ OS << '\\';
+ } else if (NativePath[I] == ' ') {
+ OS << '\\';
+ unsigned J = I;
+ while (J > 0 && NativePath[--J] == '\\') {
+ OS << '\\';
+ }
+ } else if (NativePath[I] == '$') {
+ OS << '$';
+ }
+ OS << NativePath[I];
+ }
+
+ OS.flush();
+ return Res;
+}
+
/// Create a dependency file for `-d` option.
///
/// This functionality is really only for the benefit of the build system.
@@ -96,9 +127,9 @@ static int createDependencyFile(const TGParser &Parser, const char *argv0) {
if (EC)
return reportError(argv0, "error opening " + DependFilename + ":" +
EC.message() + "\n");
- DepOut.os() << OutputFilename << ":";
+ DepOut.os() << escapeDependencyFilename(OutputFilename) << ":";
for (const auto &Dep : Parser.getDependencies()) {
- DepOut.os() << ' ' << Dep;
+ DepOut.os() << ' ' << escapeDependencyFilename(Dep);
}
DepOut.os() << "\n";
DepOut.keep();
>From a8d9a90d36bc896a949c9a9ae6de1c57a2f6baa6 Mon Sep 17 00:00:00 2001
From: Ruoyu Zhong <zhongruoyu at outlook.com>
Date: Fri, 26 Sep 2025 16:38:09 +0800
Subject: [PATCH 2/7] [TableGen] git clang-format
Signed-off-by: Ruoyu Zhong <zhongruoyu at outlook.com>
---
llvm/lib/TableGen/Main.cpp | 8 +++-----
1 file changed, 3 insertions(+), 5 deletions(-)
diff --git a/llvm/lib/TableGen/Main.cpp b/llvm/lib/TableGen/Main.cpp
index 991228a7a0106..76b161d087f60 100644
--- a/llvm/lib/TableGen/Main.cpp
+++ b/llvm/lib/TableGen/Main.cpp
@@ -39,13 +39,11 @@
#include <utility>
using namespace llvm;
-static cl::opt<std::string>
-OutputFilename("o", cl::desc("Output filename"), cl::value_desc("filename"),
+static cl::opt<std::string> OutputFilename("o", cl::desc("Output filename"),
+ cl::value_desc("filename"),
cl::init("-"));
-static cl::opt<std::string>
-DependFilename("d",
- cl::desc("Dependency filename"),
+static cl::opt<std::string> DependFilename("d", cl::desc("Dependency filename"),
cl::value_desc("filename"),
cl::init(""));
>From b6183f5990d5f3d29db0f09efecb3cf92a0ab368 Mon Sep 17 00:00:00 2001
From: Ruoyu Zhong <zhongruoyu at outlook.com>
Date: Fri, 26 Sep 2025 16:55:13 +0800
Subject: [PATCH 3/7] [TableGen] simplify types of `escapeDependencyFilename`
---
llvm/lib/TableGen/Main.cpp | 17 +++++++++--------
1 file changed, 9 insertions(+), 8 deletions(-)
diff --git a/llvm/lib/TableGen/Main.cpp b/llvm/lib/TableGen/Main.cpp
index 76b161d087f60..c06a1af7da338 100644
--- a/llvm/lib/TableGen/Main.cpp
+++ b/llvm/lib/TableGen/Main.cpp
@@ -50,19 +50,21 @@ static cl::opt<std::string> DependFilename("d", cl::desc("Dependency filename"),
static cl::opt<std::string>
InputFilename(cl::Positional, cl::desc("<input file>"), cl::init("-"));
-static cl::list<std::string>
-IncludeDirs("I", cl::desc("Directory of include files"),
- cl::value_desc("directory"), cl::Prefix);
+static cl::list<std::string> IncludeDirs("I",
+ cl::desc("Directory of include files"),
+ cl::value_desc("directory"),
+ cl::Prefix);
static cl::list<std::string>
MacroNames("D", cl::desc("Name of the macro to be defined"),
cl::value_desc("macro name"), cl::Prefix);
static cl::opt<bool>
-WriteIfChanged("write-if-changed", cl::desc("Only write output if it changed"));
+ WriteIfChanged("write-if-changed",
+ cl::desc("Only write output if it changed"));
-static cl::opt<bool>
-TimePhases("time-phases", cl::desc("Time phases of parser and backend"));
+static cl::opt<bool> TimePhases("time-phases",
+ cl::desc("Time phases of parser and backend"));
namespace llvm {
cl::opt<bool> EmitLongStrLiterals(
@@ -85,8 +87,7 @@ static int reportError(const char *ProgName, Twine Msg) {
/// Escape a filename in the dependency file so that it is correctly
/// interpreted by `make`. This is consistent with Clang, GCC, and lld.
-static TGLexer::DependenciesSetTy::value_type escapeDependencyFilename(
- const TGLexer::DependenciesSetTy::value_type &Filename) {
+static std::string escapeDependencyFilename(StringRef Filename) {
std::string Res;
raw_string_ostream OS(Res);
>From e742379ba96cd1304af3f241ddfa71d5b1485ba9 Mon Sep 17 00:00:00 2001
From: Ruoyu Zhong <zhongruoyu at outlook.com>
Date: Fri, 26 Sep 2025 18:07:27 +0800
Subject: [PATCH 4/7] [TableGen] fix style in `escapeDependencyFilename`
---
llvm/lib/TableGen/Main.cpp | 10 ++++------
1 file changed, 4 insertions(+), 6 deletions(-)
diff --git a/llvm/lib/TableGen/Main.cpp b/llvm/lib/TableGen/Main.cpp
index c06a1af7da338..67c1b8462fbbb 100644
--- a/llvm/lib/TableGen/Main.cpp
+++ b/llvm/lib/TableGen/Main.cpp
@@ -95,17 +95,15 @@ static std::string escapeDependencyFilename(StringRef Filename) {
sys::path::native(Filename, NativePath);
for (unsigned I = 0, E = NativePath.size(); I != E; ++I) {
- if (NativePath[I] == '#') {
+ if (NativePath[I] == '#')
OS << '\\';
- } else if (NativePath[I] == ' ') {
+ else if (NativePath[I] == ' ') {
OS << '\\';
unsigned J = I;
- while (J > 0 && NativePath[--J] == '\\') {
+ while (J > 0 && NativePath[--J] == '\\')
OS << '\\';
- }
- } else if (NativePath[I] == '$') {
+ } else if (NativePath[I] == '$')
OS << '$';
- }
OS << NativePath[I];
}
>From 17cf52bafafaa857dfe67e62c75e3ae1586b5e3a Mon Sep 17 00:00:00 2001
From: Ruoyu Zhong <zhongruoyu at outlook.com>
Date: Fri, 26 Sep 2025 17:52:36 +0800
Subject: [PATCH 5/7] [TableGen] add test for escaping dependency filenames
---
.../TableGen/escape-dependency-filenames.td | 38 +++++++++++++++++++
1 file changed, 38 insertions(+)
create mode 100644 llvm/test/TableGen/escape-dependency-filenames.td
diff --git a/llvm/test/TableGen/escape-dependency-filenames.td b/llvm/test/TableGen/escape-dependency-filenames.td
new file mode 100644
index 0000000000000..3d226cd51dbd2
--- /dev/null
+++ b/llvm/test/TableGen/escape-dependency-filenames.td
@@ -0,0 +1,38 @@
+// RUN: rm -rf %t; split-file %s %t
+
+//--- normal-file.td
+class NormalClass {}
+
+//--- file with spaces.td
+class SpaceClass {}
+
+//--- file#with#hash.td
+class HashClass {}
+
+//--- file$with$dollar.td
+class DollarClass {}
+
+//--- file with escape\ before spaces.td
+class EscapeBeforeSpacesClass {}
+
+//--- main.td
+include "normal-file.td"
+include "file with spaces.td"
+include "file#with#hash.td"
+include "file$with$dollar.td"
+include "file with escape\\ before spaces.td" // backslash itself needs escaping
+
+def Normal : NormalClass;
+def Spaces : SpaceClass;
+def Hash : HashClass;
+def Dollar : DollarClass;
+def EscapeBeforeSpaces : EscapeBeforeSpacesClass;
+
+// RUN: llvm-tblgen -I %t -d %t.d -o %t.out %t/main.td
+// RUN: FileCheck --input-file=%t.d %s
+
+// CHECK-DAG: normal-file.td
+// CHECK-DAG: file\ with\ spaces.td
+// CHECK-DAG: file\#with\#hash.td
+// CHECK-DAG: file$$with$$dollar.td
+// CHECK-DAG: file\ with\ escape\\\ before\ spaces.td
>From 7718c5506a2abe08a66bee58fc78967a0877090c Mon Sep 17 00:00:00 2001
From: Ruoyu Zhong <zhongruoyu at outlook.com>
Date: Fri, 26 Sep 2025 21:10:51 +0800
Subject: [PATCH 6/7] [TableGen] only transform dependency filename on Windows
---
llvm/lib/TableGen/Main.cpp | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/llvm/lib/TableGen/Main.cpp b/llvm/lib/TableGen/Main.cpp
index 67c1b8462fbbb..e30053f6de84a 100644
--- a/llvm/lib/TableGen/Main.cpp
+++ b/llvm/lib/TableGen/Main.cpp
@@ -91,8 +91,15 @@ static std::string escapeDependencyFilename(StringRef Filename) {
std::string Res;
raw_string_ostream OS(Res);
+ // Only transform the path to native on Windows, where backslashes are valid
+ // path separators. On non-Windows platforms, we don't want backslashes in
+ // filenames to be incorrectly treated as path separators.
+#ifdef _WIN32
SmallString<256> NativePath;
sys::path::native(Filename, NativePath);
+#else
+ StringRef NativePath = Filename;
+#endif
for (unsigned I = 0, E = NativePath.size(); I != E; ++I) {
if (NativePath[I] == '#')
>From edec47b352e71c467615ff1d95ef1c60bb7324df Mon Sep 17 00:00:00 2001
From: Ruoyu Zhong <zhongruoyu at outlook.com>
Date: Fri, 26 Sep 2025 23:25:05 +0800
Subject: [PATCH 7/7] [TableGen] update test for escape dependency filenames
---
llvm/test/TableGen/escape-dependency-filenames.td | 9 ++++++++-
1 file changed, 8 insertions(+), 1 deletion(-)
diff --git a/llvm/test/TableGen/escape-dependency-filenames.td b/llvm/test/TableGen/escape-dependency-filenames.td
index 3d226cd51dbd2..a97d87eb83ff2 100644
--- a/llvm/test/TableGen/escape-dependency-filenames.td
+++ b/llvm/test/TableGen/escape-dependency-filenames.td
@@ -12,6 +12,9 @@ class HashClass {}
//--- file$with$dollar.td
class DollarClass {}
+//--- file\with\escape.td
+class EscapeClass {}
+
//--- file with escape\ before spaces.td
class EscapeBeforeSpacesClass {}
@@ -20,12 +23,15 @@ include "normal-file.td"
include "file with spaces.td"
include "file#with#hash.td"
include "file$with$dollar.td"
-include "file with escape\\ before spaces.td" // backslash itself needs escaping
+// backslash itself needs escaping
+include "file\\with\\escape.td"
+include "file with escape\\ before spaces.td"
def Normal : NormalClass;
def Spaces : SpaceClass;
def Hash : HashClass;
def Dollar : DollarClass;
+def Escape : EscapeClass;
def EscapeBeforeSpaces : EscapeBeforeSpacesClass;
// RUN: llvm-tblgen -I %t -d %t.d -o %t.out %t/main.td
@@ -35,4 +41,5 @@ def EscapeBeforeSpaces : EscapeBeforeSpacesClass;
// CHECK-DAG: file\ with\ spaces.td
// CHECK-DAG: file\#with\#hash.td
// CHECK-DAG: file$$with$$dollar.td
+// CHECK-DAG: file\with\escape.td
// CHECK-DAG: file\ with\ escape\\\ before\ spaces.td
More information about the llvm-commits
mailing list