[clang] b672638 - [clang][deps] Ensure deterministic filename case
Jan Svoboda via cfe-commits
cfe-commits at lists.llvm.org
Fri Apr 8 00:18:28 PDT 2022
Author: Jan Svoboda
Date: 2022-04-08T09:18:00+02:00
New Revision: b672638dbc7cec9a51826520e8f8aef276bac3ee
URL: https://github.com/llvm/llvm-project/commit/b672638dbc7cec9a51826520e8f8aef276bac3ee
DIFF: https://github.com/llvm/llvm-project/commit/b672638dbc7cec9a51826520e8f8aef276bac3ee.diff
LOG: [clang][deps] Ensure deterministic filename case
The dependency scanner can reuse single FileManager instance across multiple translation units. This may lead to non-deterministic output depending on which TU gets processed first.
One of the problems is that Clang uses DirectoryEntry::getName in the header search algorithm. This function returns the path that was first used to construct the (shared) entry in FileManager. Using DirectoryEntryRef::getName instead preserves the case as it was spelled out for the current "get directory entry" request.
rdar://90647508
Reviewed By: dexonsmith
Differential Revision: https://reviews.llvm.org/D123229
Added:
clang/test/ClangScanDeps/header-search-case-sensitivity.c
Modified:
clang/include/clang/Lex/DirectoryLookup.h
clang/lib/Lex/HeaderSearch.cpp
Removed:
################################################################################
diff --git a/clang/include/clang/Lex/DirectoryLookup.h b/clang/include/clang/Lex/DirectoryLookup.h
index da2ae9fce1aa1..3602662029a48 100644
--- a/clang/include/clang/Lex/DirectoryLookup.h
+++ b/clang/include/clang/Lex/DirectoryLookup.h
@@ -91,6 +91,10 @@ class DirectoryLookup {
return isNormalDir() ? &u.Dir.getDirEntry() : nullptr;
}
+ Optional<DirectoryEntryRef> getDirRef() const {
+ return isNormalDir() ? Optional<DirectoryEntryRef>(u.Dir) : None;
+ }
+
/// getFrameworkDir - Return the directory that this framework refers to.
///
const DirectoryEntry *getFrameworkDir() const {
diff --git a/clang/lib/Lex/HeaderSearch.cpp b/clang/lib/Lex/HeaderSearch.cpp
index d16b9a52bff63..400a6c8e9c034 100644
--- a/clang/lib/Lex/HeaderSearch.cpp
+++ b/clang/lib/Lex/HeaderSearch.cpp
@@ -436,10 +436,10 @@ Optional<FileEntryRef> DirectoryLookup::LookupFile(
SmallString<1024> TmpDir;
if (isNormalDir()) {
// Concatenate the requested file onto the directory.
- TmpDir = getDir()->getName();
+ TmpDir = getDirRef()->getName();
llvm::sys::path::append(TmpDir, Filename);
if (SearchPath) {
- StringRef SearchPathRef(getDir()->getName());
+ StringRef SearchPathRef(getDirRef()->getName());
SearchPath->clear();
SearchPath->append(SearchPathRef.begin(), SearchPathRef.end());
}
diff --git a/clang/test/ClangScanDeps/header-search-case-sensitivity.c b/clang/test/ClangScanDeps/header-search-case-sensitivity.c
new file mode 100644
index 0000000000000..e8d2415cadd81
--- /dev/null
+++ b/clang/test/ClangScanDeps/header-search-case-sensitivity.c
@@ -0,0 +1,50 @@
+// This test checks that reusing FileManager produces deterministic results on case-insensitive filesystems.
+
+// RUN: rm -rf %t
+// RUN: split-file %s %t
+
+//--- dir1/arm/lower.h
+//--- dir2/ARM/upper.h
+//--- t1.c
+#include "upper.h"
+//--- t2.c
+#include "arm/lower.h"
+
+//--- cdb.json.template
+[{
+ "directory": "DIR",
+ "command": "clang -fsyntax-only DIR/t1.c -I DIR/dir2/ARM -I DIR/dir1",
+ "file": "DIR/t1.c"
+},{
+ "directory": "DIR",
+ "command": "clang -fsyntax-only DIR/t2.c -I DIR/dir2 -I DIR/dir1",
+ "file": "DIR/t2.c"
+}]
+
+//--- cdb-rev.json.template
+[{
+ "directory": "DIR",
+ "command": "clang -fsyntax-only DIR/t2.c -I DIR/dir2 -I DIR/dir1",
+ "file": "DIR/t2.c"
+},{
+ "directory": "DIR",
+ "command": "clang -fsyntax-only DIR/t1.c -I DIR/dir2/ARM -I DIR/dir1",
+ "file": "DIR/t1.c"
+}]
+
+// RUN: sed -e "s|DIR|%/t|g" %t/cdb.json.template > %t/cdb.json
+// RUN: sed -e "s|DIR|%/t|g" %t/cdb-rev.json.template > %t/cdb-rev.json
+
+// RUN: clang-scan-deps -compilation-database=%t/cdb.json -format make -j 1 | sed 's:\\\\\?:/:g' | FileCheck %s
+
+// In the reversed case, Clang starts by scanning "t2.c". When looking up the "arm/lower.h" header,
+// the string is appended to "DIR/dir2". That file ("DIR/dir2/arm/lower.h") doesn't exist, but when
+// learning so, the FileManager stats and caches the parent directory ("DIR/dir2/arm"), using the
+// UID as the key.
+// When scanning "t1.c" later on, the "DIR/dir2/ARM" search directory is assigned the **same**
+// directory entry (with lowercase "arm"), since they share the UID on case-insensitive filesystems.
+// To preserve the correct case throughout the compiler for any file within that directory, it's
+// important to use the spelling actually used, not just the cached one.
+// RUN: clang-scan-deps -compilation-database=%t/cdb-rev.json -format make -j 1 | sed 's:\\\\\?:/:g' | FileCheck %s
+
+// CHECK: ARM/upper.h
More information about the cfe-commits
mailing list