[llvm] 655bea4 - [modules] Use `HashBuilder` and `MD5` for the module hash.

Alexandre Rames via llvm-commits llvm-commits at lists.llvm.org
Fri Sep 3 11:13:43 PDT 2021


Author: Alexandre Rames
Date: 2021-09-03T11:13:36-07:00
New Revision: 655bea4226b401a11164f99c6344e38d8742b8e4

URL: https://github.com/llvm/llvm-project/commit/655bea4226b401a11164f99c6344e38d8742b8e4
DIFF: https://github.com/llvm/llvm-project/commit/655bea4226b401a11164f99c6344e38d8742b8e4.diff

LOG: [modules] Use `HashBuilder` and `MD5` for the module hash.

Per the comments, `hash_code` values "are not stable to save or
persist", so are unsuitable for the module hash, which must persist
across compilations for the implicit module hashes to match. Note that
in practice, today, `hash_code` are stable. But this is an
implementation detail, with a clear `FIXME` indicating we should switch
to a per-execution seed.

The stability of `MD5` also allows modules cross-compilation use-cases.
The `size_t` underlying storage for `hash_code` varying across platforms
could cause mismatching hashes when cross-compiling from a 64bit
target to a 32bit target.

Note that native endianness is still used for the hash computation. So hashes
will differ between platforms of different endianness.

Reviewed By: jansvoboda11

Differential Revision: https://reviews.llvm.org/D102943

Added: 
    

Modified: 
    clang/include/clang/Basic/ObjCRuntime.h
    clang/include/clang/Basic/Sanitizers.h
    clang/include/clang/Lex/HeaderSearchOptions.h
    clang/include/clang/Serialization/ModuleFileExtension.h
    clang/lib/Frontend/CompilerInvocation.cpp
    clang/lib/Frontend/TestModuleFileExtension.cpp
    clang/lib/Frontend/TestModuleFileExtension.h
    clang/lib/Serialization/ModuleFileExtension.cpp
    clang/unittests/Frontend/CompilerInvocationTest.cpp
    llvm/include/llvm/Support/VersionTuple.h

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Basic/ObjCRuntime.h b/clang/include/clang/Basic/ObjCRuntime.h
index 26403bfa98c9d..30a5fde407541 100644
--- a/clang/include/clang/Basic/ObjCRuntime.h
+++ b/clang/include/clang/Basic/ObjCRuntime.h
@@ -18,6 +18,7 @@
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/HashBuilder.h"
 #include "llvm/Support/VersionTuple.h"
 #include <string>
 
@@ -480,6 +481,12 @@ class ObjCRuntime {
   friend llvm::hash_code hash_value(const ObjCRuntime &OCR) {
     return llvm::hash_combine(OCR.getKind(), OCR.getVersion());
   }
+
+  template <typename HasherT, llvm::support::endianness Endianness>
+  friend void addHash(llvm::HashBuilderImpl<HasherT, Endianness> &HBuilder,
+                      const ObjCRuntime &OCR) {
+    HBuilder.add(OCR.getKind(), OCR.getVersion());
+  }
 };
 
 raw_ostream &operator<<(raw_ostream &out, const ObjCRuntime &value);

diff  --git a/clang/include/clang/Basic/Sanitizers.h b/clang/include/clang/Basic/Sanitizers.h
index b12a3b7821d7a..db53010645ae3 100644
--- a/clang/include/clang/Basic/Sanitizers.h
+++ b/clang/include/clang/Basic/Sanitizers.h
@@ -16,6 +16,7 @@
 
 #include "clang/Basic/LLVM.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/Support/HashBuilder.h"
 #include "llvm/Transforms/Instrumentation/AddressSanitizerOptions.h"
 #include <cassert>
 #include <cstdint>
@@ -72,6 +73,12 @@ class SanitizerMask {
 
   llvm::hash_code hash_value() const;
 
+  template <typename HasherT, llvm::support::endianness Endianness>
+  friend void addHash(llvm::HashBuilderImpl<HasherT, Endianness> &HBuilder,
+                      const SanitizerMask &SM) {
+    HBuilder.addRange(&SM.maskLoToHigh[0], &SM.maskLoToHigh[kNumElem]);
+  }
+
   constexpr explicit operator bool() const {
     return maskLoToHigh[0] || maskLoToHigh[1];
   }

diff  --git a/clang/include/clang/Lex/HeaderSearchOptions.h b/clang/include/clang/Lex/HeaderSearchOptions.h
index 42f3cff8c57ae..4efdfc26c3c67 100644
--- a/clang/include/clang/Lex/HeaderSearchOptions.h
+++ b/clang/include/clang/Lex/HeaderSearchOptions.h
@@ -14,10 +14,11 @@
 #include "llvm/ADT/Hashing.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/Support/HashBuilder.h"
 #include <cstdint>
+#include <map>
 #include <string>
 #include <vector>
-#include <map>
 
 namespace clang {
 
@@ -256,11 +257,23 @@ inline llvm::hash_code hash_value(const HeaderSearchOptions::Entry &E) {
   return llvm::hash_combine(E.Path, E.Group, E.IsFramework, E.IgnoreSysRoot);
 }
 
+template <typename HasherT, llvm::support::endianness Endianness>
+inline void addHash(llvm::HashBuilderImpl<HasherT, Endianness> &HBuilder,
+                    const HeaderSearchOptions::Entry &E) {
+  HBuilder.add(E.Path, E.Group, E.IsFramework, E.IgnoreSysRoot);
+}
+
 inline llvm::hash_code
 hash_value(const HeaderSearchOptions::SystemHeaderPrefix &SHP) {
   return llvm::hash_combine(SHP.Prefix, SHP.IsSystemHeader);
 }
 
+template <typename HasherT, llvm::support::endianness Endianness>
+inline void addHash(llvm::HashBuilderImpl<HasherT, Endianness> &HBuilder,
+                    const HeaderSearchOptions::SystemHeaderPrefix &SHP) {
+  HBuilder.add(SHP.Prefix, SHP.IsSystemHeader);
+}
+
 } // namespace clang
 
 #endif // LLVM_CLANG_LEX_HEADERSEARCHOPTIONS_H

diff  --git a/clang/include/clang/Serialization/ModuleFileExtension.h b/clang/include/clang/Serialization/ModuleFileExtension.h
index 34ea870724a4c..3e84a65c4b805 100644
--- a/clang/include/clang/Serialization/ModuleFileExtension.h
+++ b/clang/include/clang/Serialization/ModuleFileExtension.h
@@ -11,13 +11,14 @@
 
 #include "llvm/ADT/IntrusiveRefCntPtr.h"
 #include "llvm/Support/ExtensibleRTTI.h"
+#include "llvm/Support/HashBuilder.h"
+#include "llvm/Support/MD5.h"
 #include <memory>
 #include <string>
 
 namespace llvm {
 class BitstreamCursor;
 class BitstreamWriter;
-class hash_code;
 class raw_ostream;
 }
 
@@ -74,19 +75,20 @@ class ModuleFileExtension
   virtual ModuleFileExtensionMetadata getExtensionMetadata() const = 0;
 
   /// Hash information about the presence of this extension into the
-  /// module hash code.
+  /// module hash.
   ///
-  /// The module hash code is used to distinguish 
diff erent variants
-  /// of a module that are incompatible. If the presence, absence, or
-  /// version of the module file extension should force the creation
-  /// of a separate set of module files, override this method to
-  /// combine that distinguishing information into the module hash
-  /// code.
+  /// The module hash is used to distinguish 
diff erent variants of a module that
+  /// are incompatible. If the presence, absence, or version of the module file
+  /// extension should force the creation of a separate set of module files,
+  /// override this method to combine that distinguishing information into the
+  /// module hash.
   ///
-  /// The default implementation of this function simply returns the
-  /// hash code as given, so the presence/absence of this extension
-  /// does not distinguish module files.
-  virtual llvm::hash_code hashExtension(llvm::hash_code c) const;
+  /// The default implementation of this function simply does nothing, so the
+  /// presence/absence of this extension does not distinguish module files.
+  using ExtensionHashBuilder =
+      llvm::HashBuilderImpl<llvm::MD5,
+                            llvm::support::endian::system_endianness()>;
+  virtual void hashExtension(ExtensionHashBuilder &HBuilder) const;
 
   /// Create a new module file extension writer, which will be
   /// responsible for writing the extension contents into a particular

diff  --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp
index 0da2cb3e2ebe5..014595c65acc9 100644
--- a/clang/lib/Frontend/CompilerInvocation.cpp
+++ b/clang/lib/Frontend/CompilerInvocation.cpp
@@ -78,6 +78,7 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/ErrorOr.h"
 #include "llvm/Support/FileSystem.h"
+#include "llvm/Support/HashBuilder.h"
 #include "llvm/Support/Host.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/MemoryBuffer.h"
@@ -4476,116 +4477,99 @@ bool CompilerInvocation::CreateFromArgs(CompilerInvocation &Invocation,
 }
 
 std::string CompilerInvocation::getModuleHash() const {
+  // FIXME: Consider using SHA1 instead of MD5.
+  llvm::HashBuilder<llvm::MD5, llvm::support::endianness::native> HBuilder;
+
   // Note: For QoI reasons, the things we use as a hash here should all be
   // dumped via the -module-info flag.
-  using llvm::hash_code;
-  using llvm::hash_value;
-  using llvm::hash_combine;
-  using llvm::hash_combine_range;
 
   // Start the signature with the compiler version.
-  // FIXME: We'd rather use something more cryptographically sound than
-  // CityHash, but this will do for now.
-  hash_code code = hash_value(getClangFullRepositoryVersion());
+  HBuilder.add(getClangFullRepositoryVersion());
 
   // Also include the serialization version, in case LLVM_APPEND_VC_REV is off
   // and getClangFullRepositoryVersion() doesn't include git revision.
-  code = hash_combine(code, serialization::VERSION_MAJOR,
-                      serialization::VERSION_MINOR);
+  HBuilder.add(serialization::VERSION_MAJOR, serialization::VERSION_MINOR);
 
   // Extend the signature with the language options
-#define LANGOPT(Name, Bits, Default, Description) \
-   code = hash_combine(code, LangOpts->Name);
-#define ENUM_LANGOPT(Name, Type, Bits, Default, Description) \
-  code = hash_combine(code, static_cast<unsigned>(LangOpts->get##Name()));
+#define LANGOPT(Name, Bits, Default, Description) HBuilder.add(LangOpts->Name);
+#define ENUM_LANGOPT(Name, Type, Bits, Default, Description)                   \
+  HBuilder.add(static_cast<unsigned>(LangOpts->get##Name()));
 #define BENIGN_LANGOPT(Name, Bits, Default, Description)
 #define BENIGN_ENUM_LANGOPT(Name, Type, Bits, Default, Description)
 #include "clang/Basic/LangOptions.def"
 
-  for (StringRef Feature : LangOpts->ModuleFeatures)
-    code = hash_combine(code, Feature);
+  HBuilder.addRange(LangOpts->ModuleFeatures);
 
-  code = hash_combine(code, LangOpts->ObjCRuntime);
-  const auto &BCN = LangOpts->CommentOpts.BlockCommandNames;
-  code = hash_combine(code, hash_combine_range(BCN.begin(), BCN.end()));
+  HBuilder.add(LangOpts->ObjCRuntime);
+  HBuilder.addRange(LangOpts->CommentOpts.BlockCommandNames);
 
   // Extend the signature with the target options.
-  code = hash_combine(code, TargetOpts->Triple, TargetOpts->CPU,
-                      TargetOpts->TuneCPU, TargetOpts->ABI);
-  for (const auto &FeatureAsWritten : TargetOpts->FeaturesAsWritten)
-    code = hash_combine(code, FeatureAsWritten);
+  HBuilder.add(TargetOpts->Triple, TargetOpts->CPU, TargetOpts->TuneCPU,
+               TargetOpts->ABI);
+  HBuilder.addRange(TargetOpts->FeaturesAsWritten);
 
   // Extend the signature with preprocessor options.
   const PreprocessorOptions &ppOpts = getPreprocessorOpts();
-  const HeaderSearchOptions &hsOpts = getHeaderSearchOpts();
-  code = hash_combine(code, ppOpts.UsePredefines, ppOpts.DetailedRecord);
+  HBuilder.add(ppOpts.UsePredefines, ppOpts.DetailedRecord);
 
-  for (const auto &I : getPreprocessorOpts().Macros) {
+  const HeaderSearchOptions &hsOpts = getHeaderSearchOpts();
+  for (const auto &Macro : getPreprocessorOpts().Macros) {
     // If we're supposed to ignore this macro for the purposes of modules,
     // don't put it into the hash.
     if (!hsOpts.ModulesIgnoreMacros.empty()) {
       // Check whether we're ignoring this macro.
-      StringRef MacroDef = I.first;
+      StringRef MacroDef = Macro.first;
       if (hsOpts.ModulesIgnoreMacros.count(
               llvm::CachedHashString(MacroDef.split('=').first)))
         continue;
     }
 
-    code = hash_combine(code, I.first, I.second);
+    HBuilder.add(Macro);
   }
 
   // Extend the signature with the sysroot and other header search options.
-  code = hash_combine(code, hsOpts.Sysroot,
-                      hsOpts.ModuleFormat,
-                      hsOpts.UseDebugInfo,
-                      hsOpts.UseBuiltinIncludes,
-                      hsOpts.UseStandardSystemIncludes,
-                      hsOpts.UseStandardCXXIncludes,
-                      hsOpts.UseLibcxx,
-                      hsOpts.ModulesValidateDiagnosticOptions);
-  code = hash_combine(code, hsOpts.ResourceDir);
+  HBuilder.add(hsOpts.Sysroot, hsOpts.ModuleFormat, hsOpts.UseDebugInfo,
+               hsOpts.UseBuiltinIncludes, hsOpts.UseStandardSystemIncludes,
+               hsOpts.UseStandardCXXIncludes, hsOpts.UseLibcxx,
+               hsOpts.ModulesValidateDiagnosticOptions);
+  HBuilder.add(hsOpts.ResourceDir);
 
   if (hsOpts.ModulesStrictContextHash) {
-    hash_code SHPC = hash_combine_range(hsOpts.SystemHeaderPrefixes.begin(),
-                                        hsOpts.SystemHeaderPrefixes.end());
-    hash_code UEC = hash_combine_range(hsOpts.UserEntries.begin(),
-                                       hsOpts.UserEntries.end());
-    code = hash_combine(code, hsOpts.SystemHeaderPrefixes.size(), SHPC,
-                        hsOpts.UserEntries.size(), UEC);
+    HBuilder.addRange(hsOpts.SystemHeaderPrefixes);
+    HBuilder.addRange(hsOpts.UserEntries);
 
     const DiagnosticOptions &diagOpts = getDiagnosticOpts();
-    #define DIAGOPT(Name, Bits, Default) \
-      code = hash_combine(code, diagOpts.Name);
-    #define ENUM_DIAGOPT(Name, Type, Bits, Default) \
-      code = hash_combine(code, diagOpts.get##Name());
-    #include "clang/Basic/DiagnosticOptions.def"
-    #undef DIAGOPT
-    #undef ENUM_DIAGOPT
+#define DIAGOPT(Name, Bits, Default) HBuilder.add(diagOpts.Name);
+#define ENUM_DIAGOPT(Name, Type, Bits, Default)                                \
+  HBuilder.add(diagOpts.get##Name());
+#include "clang/Basic/DiagnosticOptions.def"
+#undef DIAGOPT
+#undef ENUM_DIAGOPT
   }
 
   // Extend the signature with the user build path.
-  code = hash_combine(code, hsOpts.ModuleUserBuildPath);
+  HBuilder.add(hsOpts.ModuleUserBuildPath);
 
   // Extend the signature with the module file extensions.
-  const FrontendOptions &frontendOpts = getFrontendOpts();
-  for (const auto &ext : frontendOpts.ModuleFileExtensions) {
-    code = ext->hashExtension(code);
-  }
+  for (const auto &ext : getFrontendOpts().ModuleFileExtensions)
+    ext->hashExtension(HBuilder);
 
   // When compiling with -gmodules, also hash -fdebug-prefix-map as it
   // affects the debug info in the PCM.
   if (getCodeGenOpts().DebugTypeExtRefs)
-    for (const auto &KeyValue : getCodeGenOpts().DebugPrefixMap)
-      code = hash_combine(code, KeyValue.first, KeyValue.second);
+    HBuilder.addRange(getCodeGenOpts().DebugPrefixMap);
 
   // Extend the signature with the enabled sanitizers, if at least one is
   // enabled. Sanitizers which cannot affect AST generation aren't hashed.
   SanitizerSet SanHash = LangOpts->Sanitize;
   SanHash.clear(getPPTransparentSanitizers());
   if (!SanHash.empty())
-    code = hash_combine(code, SanHash.Mask);
+    HBuilder.add(SanHash.Mask);
 
-  return toString(llvm::APInt(64, code), 36, /*Signed=*/false);
+  llvm::MD5::MD5Result Result;
+  HBuilder.getHasher().final(Result);
+  uint64_t Hash = Result.high() ^ Result.low();
+  return toString(llvm::APInt(64, Hash), 36, /*Signed=*/false);
 }
 
 void CompilerInvocation::generateCC1CommandLine(

diff  --git a/clang/lib/Frontend/TestModuleFileExtension.cpp b/clang/lib/Frontend/TestModuleFileExtension.cpp
index 7d4026a7efc61..ea737e6891bfe 100644
--- a/clang/lib/Frontend/TestModuleFileExtension.cpp
+++ b/clang/lib/Frontend/TestModuleFileExtension.cpp
@@ -93,16 +93,14 @@ TestModuleFileExtension::getExtensionMetadata() const {
   return { BlockName, MajorVersion, MinorVersion, UserInfo };
 }
 
-llvm::hash_code TestModuleFileExtension::hashExtension(
-                  llvm::hash_code Code) const {
+void TestModuleFileExtension::hashExtension(
+    ExtensionHashBuilder &HBuilder) const {
   if (Hashed) {
-    Code = llvm::hash_combine(Code, BlockName);
-    Code = llvm::hash_combine(Code, MajorVersion);
-    Code = llvm::hash_combine(Code, MinorVersion);
-    Code = llvm::hash_combine(Code, UserInfo);
+    HBuilder.add(BlockName);
+    HBuilder.add(MajorVersion);
+    HBuilder.add(MinorVersion);
+    HBuilder.add(UserInfo);
   }
-
-  return Code;
 }
 
 std::unique_ptr<ModuleFileExtensionWriter>

diff  --git a/clang/lib/Frontend/TestModuleFileExtension.h b/clang/lib/Frontend/TestModuleFileExtension.h
index c8ca4cd4f210a..e22c87ed2d1b5 100644
--- a/clang/lib/Frontend/TestModuleFileExtension.h
+++ b/clang/lib/Frontend/TestModuleFileExtension.h
@@ -55,7 +55,7 @@ class TestModuleFileExtension
 
   ModuleFileExtensionMetadata getExtensionMetadata() const override;
 
-  llvm::hash_code hashExtension(llvm::hash_code Code) const override;
+  void hashExtension(ExtensionHashBuilder &HBuilder) const override;
 
   std::unique_ptr<ModuleFileExtensionWriter>
   createExtensionWriter(ASTWriter &Writer) override;

diff  --git a/clang/lib/Serialization/ModuleFileExtension.cpp b/clang/lib/Serialization/ModuleFileExtension.cpp
index 6b7fd1d543409..95fff41e0d7a8 100644
--- a/clang/lib/Serialization/ModuleFileExtension.cpp
+++ b/clang/lib/Serialization/ModuleFileExtension.cpp
@@ -11,12 +11,10 @@ using namespace clang;
 
 char ModuleFileExtension::ID = 0;
 
-ModuleFileExtension::~ModuleFileExtension() { }
+ModuleFileExtension::~ModuleFileExtension() {}
 
-llvm::hash_code ModuleFileExtension::hashExtension(llvm::hash_code Code) const {
-  return Code;
-}
+void ModuleFileExtension::hashExtension(ExtensionHashBuilder &HBuilder) const {}
 
-ModuleFileExtensionWriter::~ModuleFileExtensionWriter() { }
+ModuleFileExtensionWriter::~ModuleFileExtensionWriter() {}
 
-ModuleFileExtensionReader::~ModuleFileExtensionReader() { }
+ModuleFileExtensionReader::~ModuleFileExtensionReader() {}

diff  --git a/clang/unittests/Frontend/CompilerInvocationTest.cpp b/clang/unittests/Frontend/CompilerInvocationTest.cpp
index 9bcbf1fe6001d..692d04a100db7 100644
--- a/clang/unittests/Frontend/CompilerInvocationTest.cpp
+++ b/clang/unittests/Frontend/CompilerInvocationTest.cpp
@@ -860,9 +860,7 @@ struct DummyModuleFileExtension
     return {};
   };
 
-  llvm::hash_code hashExtension(llvm::hash_code Code) const override {
-    return {};
-  }
+  void hashExtension(ExtensionHashBuilder &HBuilder) const override {}
 
   std::unique_ptr<ModuleFileExtensionWriter>
   createExtensionWriter(ASTWriter &Writer) override {

diff  --git a/llvm/include/llvm/Support/VersionTuple.h b/llvm/include/llvm/Support/VersionTuple.h
index a48ae0bf52bd4..1a1072d228f11 100644
--- a/llvm/include/llvm/Support/VersionTuple.h
+++ b/llvm/include/llvm/Support/VersionTuple.h
@@ -17,6 +17,7 @@
 #include "llvm/ADT/DenseMapInfo.h"
 #include "llvm/ADT/Hashing.h"
 #include "llvm/ADT/Optional.h"
+#include "llvm/Support/HashBuilder.h"
 #include <string>
 #include <tuple>
 
@@ -164,6 +165,12 @@ class VersionTuple {
     return llvm::hash_combine(VT.Major, VT.Minor, VT.Subminor, VT.Build);
   }
 
+  template <typename HasherT, llvm::support::endianness Endianness>
+  friend void addHash(HashBuilderImpl<HasherT, Endianness> &HBuilder,
+                      const VersionTuple &VT) {
+    HBuilder.add(VT.Major, VT.Minor, VT.Subminor, VT.Build);
+  }
+
   /// Retrieve a string representation of the version number.
   std::string getAsString() const;
 


        


More information about the llvm-commits mailing list