[clang] 6b80f00 - [clang][deps] NFC: Refactor and comment ModuleDeps sorting
Jan Svoboda via cfe-commits
cfe-commits at lists.llvm.org
Fri Apr 21 10:56:47 PDT 2023
Author: Jan Svoboda
Date: 2023-04-21T10:56:42-07:00
New Revision: 6b80f00bac6a4832af4f8ebde96564690a35f7e1
URL: https://github.com/llvm/llvm-project/commit/6b80f00bac6a4832af4f8ebde96564690a35f7e1
DIFF: https://github.com/llvm/llvm-project/commit/6b80f00bac6a4832af4f8ebde96564690a35f7e1.diff
LOG: [clang][deps] NFC: Refactor and comment ModuleDeps sorting
I once again stumbled across `IndexedModuleID` and was confused by the mutable `InputIndex` field.
This patch adds comments around that piece of code and unifies/simplifies related functions.
Reviewed By: Bigcheese
Differential Revision: https://reviews.llvm.org/D145197
Added:
Modified:
clang/include/clang/Tooling/DependencyScanning/ModuleDepCollector.h
clang/tools/clang-scan-deps/ClangScanDeps.cpp
Removed:
################################################################################
diff --git a/clang/include/clang/Tooling/DependencyScanning/ModuleDepCollector.h b/clang/include/clang/Tooling/DependencyScanning/ModuleDepCollector.h
index 24d7337d26e4c..0a5cbc4f046aa 100644
--- a/clang/include/clang/Tooling/DependencyScanning/ModuleDepCollector.h
+++ b/clang/include/clang/Tooling/DependencyScanning/ModuleDepCollector.h
@@ -59,7 +59,13 @@ struct ModuleID {
std::string ContextHash;
bool operator==(const ModuleID &Other) const {
- return ModuleName == Other.ModuleName && ContextHash == Other.ContextHash;
+ return std::tie(ModuleName, ContextHash) ==
+ std::tie(Other.ModuleName, Other.ContextHash);
+ }
+
+ bool operator<(const ModuleID& Other) const {
+ return std::tie(ModuleName, ContextHash) <
+ std::tie(Other.ModuleName, Other.ContextHash);
}
};
diff --git a/clang/tools/clang-scan-deps/ClangScanDeps.cpp b/clang/tools/clang-scan-deps/ClangScanDeps.cpp
index 93c60eb2cdf59..9018acc8e682f 100644
--- a/clang/tools/clang-scan-deps/ClangScanDeps.cpp
+++ b/clang/tools/clang-scan-deps/ClangScanDeps.cpp
@@ -323,11 +323,10 @@ static llvm::json::Array toJSONSorted(const llvm::StringSet<> &Set) {
return llvm::json::Array(Strings);
}
+// Technically, we don't need to sort the dependency list to get determinism.
+// Leaving these be will simply preserve the import order.
static llvm::json::Array toJSONSorted(std::vector<ModuleID> V) {
- llvm::sort(V, [](const ModuleID &A, const ModuleID &B) {
- return std::tie(A.ModuleName, A.ContextHash) <
- std::tie(B.ModuleName, B.ContextHash);
- });
+ llvm::sort(V);
llvm::json::Array Ret;
for (const ModuleID &MID : V)
@@ -406,11 +405,7 @@ class FullDeps {
std::vector<IndexedModuleID> ModuleIDs;
for (auto &&M : Modules)
ModuleIDs.push_back(M.first);
- llvm::sort(ModuleIDs,
- [](const IndexedModuleID &A, const IndexedModuleID &B) {
- return std::tie(A.ID.ModuleName, A.InputIndex) <
- std::tie(B.ID.ModuleName, B.InputIndex);
- });
+ llvm::sort(ModuleIDs);
using namespace llvm::json;
@@ -470,20 +465,37 @@ class FullDeps {
private:
struct IndexedModuleID {
ModuleID ID;
+
+ // FIXME: This is mutable so that it can still be updated after insertion
+ // into an unordered associative container. This is "fine", since this
+ // field doesn't contribute to the hash, but it's a brittle hack.
mutable size_t InputIndex;
bool operator==(const IndexedModuleID &Other) const {
- return ID.ModuleName == Other.ID.ModuleName &&
- ID.ContextHash == Other.ID.ContextHash;
+ return std::tie(ID.ModuleName, ID.ContextHash) ==
+ std::tie(Other.ID.ModuleName, Other.ID.ContextHash);
}
- };
-
- struct IndexedModuleIDHasher {
- std::size_t operator()(const IndexedModuleID &IMID) const {
- using llvm::hash_combine;
- return hash_combine(IMID.ID.ModuleName, IMID.ID.ContextHash);
+ bool operator<(const IndexedModuleID &Other) const {
+ /// We need the output of clang-scan-deps to be deterministic. However,
+ /// the dependency graph may contain two modules with the same name. How
+ /// do we decide which one to print first? If we made that decision based
+ /// on the context hash, the ordering would be deterministic, but
+ ///
diff erent across machines. This can happen for example when the inputs
+ /// or the SDKs (which both contribute to the "context" hash) live in
+ ///
diff erent absolute locations. We solve that by tracking the index of
+ /// the first input TU that (transitively) imports the dependency, which
+ /// is always the same for the same input, resulting in deterministic
+ /// sorting that's also reproducible across machines.
+ return std::tie(ID.ModuleName, InputIndex) <
+ std::tie(Other.ID.ModuleName, Other.InputIndex);
}
+
+ struct Hasher {
+ std::size_t operator()(const IndexedModuleID &IMID) const {
+ return llvm::hash_combine(IMID.ID.ModuleName, IMID.ID.ContextHash);
+ }
+ };
};
struct InputDeps {
@@ -496,7 +508,7 @@ class FullDeps {
};
std::mutex Lock;
- std::unordered_map<IndexedModuleID, ModuleDeps, IndexedModuleIDHasher>
+ std::unordered_map<IndexedModuleID, ModuleDeps, IndexedModuleID::Hasher>
Modules;
std::vector<InputDeps> Inputs;
};
More information about the cfe-commits
mailing list