[llvm] r265095 - Add a module Hash in the bitcode and the combined index, implementing a kind of "build-id"

Mehdi Amini via llvm-commits llvm-commits at lists.llvm.org
Thu Mar 31 18:30:06 PDT 2016


Author: mehdi_amini
Date: Thu Mar 31 20:30:06 2016
New Revision: 265095

URL: http://llvm.org/viewvc/llvm-project?rev=265095&view=rev
Log:
Add a module Hash in the bitcode and the combined index, implementing a kind of "build-id"

This is intended to be used for ThinLTO incremental build.

Differential Revision: http://reviews.llvm.org/D18213

From: Mehdi Amini <mehdi.amini at apple.com>

Added:
    llvm/trunk/test/Bitcode/module_hash.ll
Modified:
    llvm/trunk/include/llvm/Bitcode/BitstreamReader.h
    llvm/trunk/include/llvm/Bitcode/LLVMBitCodes.h
    llvm/trunk/include/llvm/Bitcode/ReaderWriter.h
    llvm/trunk/include/llvm/IR/ModuleSummaryIndex.h
    llvm/trunk/lib/Bitcode/Reader/BitcodeReader.cpp
    llvm/trunk/lib/Bitcode/Writer/BitcodeWriter.cpp
    llvm/trunk/lib/IR/ModuleSummaryIndex.cpp
    llvm/trunk/lib/Transforms/IPO/FunctionImport.cpp
    llvm/trunk/tools/llvm-as/llvm-as.cpp
    llvm/trunk/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp

Modified: llvm/trunk/include/llvm/Bitcode/BitstreamReader.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Bitcode/BitstreamReader.h?rev=265095&r1=265094&r2=265095&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Bitcode/BitstreamReader.h (original)
+++ llvm/trunk/include/llvm/Bitcode/BitstreamReader.h Thu Mar 31 20:30:06 2016
@@ -446,6 +446,8 @@ public:
   using SimpleBitstreamCursor::canSkipToPos;
   using SimpleBitstreamCursor::AtEndOfStream;
   using SimpleBitstreamCursor::GetCurrentBitNo;
+  using SimpleBitstreamCursor::getCurrentByteNo;
+  using SimpleBitstreamCursor::getPointerToByte;
   using SimpleBitstreamCursor::getBitStreamReader;
   using SimpleBitstreamCursor::JumpToBit;
   using SimpleBitstreamCursor::fillCurWord;

Modified: llvm/trunk/include/llvm/Bitcode/LLVMBitCodes.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Bitcode/LLVMBitCodes.h?rev=265095&r1=265094&r2=265095&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Bitcode/LLVMBitCodes.h (original)
+++ llvm/trunk/include/llvm/Bitcode/LLVMBitCodes.h Thu Mar 31 20:30:06 2016
@@ -107,6 +107,9 @@ enum ModuleCodes {
 
   // SOURCE_FILENAME: [namechar x N]
   MODULE_CODE_SOURCE_FILENAME = 16,
+
+  // HASH: [5*i32]
+  MODULE_CODE_HASH = 17,
 };
 
 /// PARAMATTR blocks have code for defining a parameter attribute set.
@@ -183,6 +186,7 @@ enum ValueSymtabCodes {
 // The module path symbol table only has one code (MST_CODE_ENTRY).
 enum ModulePathSymtabCodes {
   MST_CODE_ENTRY = 1, // MST_ENTRY: [modid, namechar x N]
+  MST_CODE_HASH = 2,  // MST_HASH:  [5*i32]
 };
 
 // The summary section uses different codes in the per-module

Modified: llvm/trunk/include/llvm/Bitcode/ReaderWriter.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Bitcode/ReaderWriter.h?rev=265095&r1=265094&r2=265095&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Bitcode/ReaderWriter.h (original)
+++ llvm/trunk/include/llvm/Bitcode/ReaderWriter.h Thu Mar 31 20:30:06 2016
@@ -107,7 +107,8 @@ namespace llvm {
   /// for use in ThinLTO optimization).
   void WriteBitcodeToFile(const Module *M, raw_ostream &Out,
                           bool ShouldPreserveUseListOrder = false,
-                          bool EmitSummaryIndex = false);
+                          bool EmitSummaryIndex = false,
+                          bool GenerateHash = false);
 
   /// Write the specified module summary index to the given raw output stream,
   /// where it will be written in a new bitcode block. This is used when

Modified: llvm/trunk/include/llvm/IR/ModuleSummaryIndex.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/IR/ModuleSummaryIndex.h?rev=265095&r1=265094&r2=265095&view=diff
==============================================================================
--- llvm/trunk/include/llvm/IR/ModuleSummaryIndex.h (original)
+++ llvm/trunk/include/llvm/IR/ModuleSummaryIndex.h Thu Mar 31 20:30:06 2016
@@ -25,6 +25,8 @@
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/raw_ostream.h"
 
+#include <array>
+
 namespace llvm {
 
 /// \brief Class to accumulate and hold information about a callee.
@@ -228,6 +230,9 @@ public:
   void setBitcodeIndex(uint64_t Offset) { BitcodeIndex = Offset; }
 };
 
+/// 160 bits SHA1
+typedef std::array<uint32_t, 5> ModuleHash;
+
 /// List of global value info structures for a particular value held
 /// in the GlobalValueMap. Requires a vector in the case of multiple
 /// COMDAT values of the same name.
@@ -245,9 +250,9 @@ typedef GlobalValueInfoMapTy::const_iter
 typedef GlobalValueInfoMapTy::iterator globalvalueinfo_iterator;
 
 /// String table to hold/own module path strings, which additionally holds the
-/// module ID assigned to each module during the plugin step. The StringMap
-/// makes a copy of and owns inserted strings.
-typedef StringMap<uint64_t> ModulePathStringTableTy;
+/// module ID assigned to each module during the plugin step, as well as a hash
+/// of the module. The StringMap makes a copy of and owns inserted strings.
+typedef StringMap<std::pair<uint64_t, ModuleHash>> ModulePathStringTableTy;
 
 /// Class to hold module path string table and global value map,
 /// and encapsulate methods for operating on them.
@@ -304,17 +309,26 @@ public:
     GlobalValueMap[ValueGUID].push_back(std::move(Info));
   }
 
-  /// Table of modules, containing an id.
-  const StringMap<uint64_t> &modulePaths() const {
+  /// Table of modules, containing module hash and id.
+  const StringMap<std::pair<uint64_t, ModuleHash>> &modulePaths() const {
     return ModulePathStringTable;
   }
 
-  /// Table of modules, containing an id.
-  StringMap<uint64_t> &modulePaths() { return ModulePathStringTable; }
+  /// Table of modules, containing hash and id.
+  StringMap<std::pair<uint64_t, ModuleHash>> &modulePaths() {
+    return ModulePathStringTable;
+  }
 
   /// Get the module ID recorded for the given module path.
   uint64_t getModuleId(const StringRef ModPath) const {
-    return ModulePathStringTable.lookup(ModPath);
+    return ModulePathStringTable.lookup(ModPath).first;
+  }
+
+  /// Get the module SHA1 hash recorded for the given module path.
+  const ModuleHash &getModuleHash(const StringRef ModPath) const {
+    auto It = ModulePathStringTable.find(ModPath);
+    assert(It != ModulePathStringTable.end() && "Module not registered");
+    return It->second.second;
   }
 
   /// Add the given per-module index into this module index/summary,
@@ -333,11 +347,14 @@ public:
     return NewName.str();
   }
 
-  /// Add a new module path, mapped to the given module Id, and return StringRef
-  /// owned by string table map.
-  StringRef addModulePath(StringRef ModPath, uint64_t ModId) {
-    return ModulePathStringTable.insert(std::make_pair(ModPath, ModId))
-        .first->first();
+  /// Add a new module path with the given \p Hash, mapped to the given \p
+  /// ModID, and return an iterator to the entry in the index.
+  ModulePathStringTableTy::iterator
+  addModulePath(StringRef ModPath, uint64_t ModId,
+                ModuleHash Hash = ModuleHash{{0}}) {
+    return ModulePathStringTable.insert(std::make_pair(
+                                            ModPath,
+                                            std::make_pair(ModId, Hash))).first;
   }
 
   /// Check if the given Module has any functions available for exporting

Modified: llvm/trunk/lib/Bitcode/Reader/BitcodeReader.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Bitcode/Reader/BitcodeReader.cpp?rev=265095&r1=265094&r2=265095&view=diff
==============================================================================
--- llvm/trunk/lib/Bitcode/Reader/BitcodeReader.cpp (original)
+++ llvm/trunk/lib/Bitcode/Reader/BitcodeReader.cpp Thu Mar 31 20:30:06 2016
@@ -5632,11 +5632,7 @@ std::error_code ModuleSummaryIndexBitcod
       }
       continue;
 
-    case BitstreamEntry::Record:
-      // Once we find the last record of interest, skip the rest.
-      if (VSTOffset > 0)
-        Stream.skipRecord(Entry.ID);
-      else {
+    case BitstreamEntry::Record: {
         Record.clear();
         auto BitCode = Stream.readRecord(Entry.ID, Record);
         switch (BitCode) {
@@ -5650,6 +5646,25 @@ std::error_code ModuleSummaryIndexBitcod
           SourceFileName = ValueName.c_str();
           break;
         }
+        /// MODULE_CODE_HASH: [5*i32]
+        case bitc::MODULE_CODE_HASH: {
+          if (Record.size() != 5)
+            return error("Invalid hash length " + Twine(Record.size()).str());
+          if (!TheIndex)
+            break;
+          if (TheIndex->modulePaths().empty())
+            // Does not have any summary emitted.
+            break;
+          if (TheIndex->modulePaths().size() != 1)
+            return error("Don't expect multiple modules defined?");
+          auto &Hash = TheIndex->modulePaths().begin()->second.second;
+          int Pos = 0;
+          for (auto &Val : Record) {
+            assert(!(Val >> 32) && "Unexpected high bits set");
+            Hash[Pos++] = Val;
+          }
+          break;
+        }
         /// MODULE_CODE_VSTOFFSET: [offset]
         case bitc::MODULE_CODE_VSTOFFSET:
           if (Record.size() < 1)
@@ -5761,7 +5776,7 @@ std::error_code ModuleSummaryIndexBitcod
       // module path string table entry with an empty (0) ID to take
       // ownership.
       FS->setModulePath(
-          TheIndex->addModulePath(Buffer->getBufferIdentifier(), 0));
+          TheIndex->addModulePath(Buffer->getBufferIdentifier(), 0)->first());
       static int RefListStartIndex = 4;
       int CallGraphEdgeStartIndex = RefListStartIndex + NumRefs;
       assert(Record.size() >= RefListStartIndex + NumRefs &&
@@ -5799,7 +5814,7 @@ std::error_code ModuleSummaryIndexBitcod
       std::unique_ptr<GlobalVarSummary> FS =
           llvm::make_unique<GlobalVarSummary>(getDecodedLinkage(RawLinkage));
       FS->setModulePath(
-          TheIndex->addModulePath(Buffer->getBufferIdentifier(), 0));
+          TheIndex->addModulePath(Buffer->getBufferIdentifier(), 0)->first());
       for (unsigned I = 2, E = Record.size(); I != E; ++I) {
         unsigned RefValueId = Record[I];
         uint64_t RefGUID = getGUIDFromValueId(RefValueId);
@@ -5887,6 +5902,7 @@ std::error_code ModuleSummaryIndexBitcod
   SmallVector<uint64_t, 64> Record;
 
   SmallString<128> ModulePath;
+  ModulePathStringTableTy::iterator LastSeenModulePath;
   while (1) {
     BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
 
@@ -5907,14 +5923,32 @@ std::error_code ModuleSummaryIndexBitcod
       break;
     case bitc::MST_CODE_ENTRY: {
       // MST_ENTRY: [modid, namechar x N]
+      uint64_t ModuleId = Record[0];
+
       if (convertToString(Record, 1, ModulePath))
         return error("Invalid record");
-      uint64_t ModuleId = Record[0];
-      StringRef ModulePathInMap = TheIndex->addModulePath(ModulePath, ModuleId);
-      ModuleIdMap[ModuleId] = ModulePathInMap;
+
+      LastSeenModulePath = TheIndex->addModulePath(ModulePath, ModuleId);
+      ModuleIdMap[ModuleId] = LastSeenModulePath->first();
+
       ModulePath.clear();
       break;
     }
+    /// MST_CODE_HASH: [5*i32]
+    case bitc::MST_CODE_HASH: {
+      if (Record.size() != 5)
+        return error("Invalid hash length " + Twine(Record.size()).str());
+      if (LastSeenModulePath == TheIndex->modulePaths().end())
+        return error("Invalid hash that does not follow a module path");
+      int Pos = 0;
+      for (auto &Val : Record) {
+        assert(!(Val >> 32) && "Unexpected high bits set");
+        LastSeenModulePath->second.second[Pos++] = Val;
+      }
+      // Reset LastSeenModulePath to avoid overriding the hash unexpectedly.
+      LastSeenModulePath = TheIndex->modulePaths().end();
+      break;
+    }
     }
   }
   llvm_unreachable("Exit infinite loop");

Modified: llvm/trunk/lib/Bitcode/Writer/BitcodeWriter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Bitcode/Writer/BitcodeWriter.cpp?rev=265095&r1=265094&r2=265095&view=diff
==============================================================================
--- llvm/trunk/lib/Bitcode/Writer/BitcodeWriter.cpp (original)
+++ llvm/trunk/lib/Bitcode/Writer/BitcodeWriter.cpp Thu Mar 31 20:30:06 2016
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "ValueEnumerator.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/Analysis/BlockFrequencyInfo.h"
@@ -39,6 +40,7 @@
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/Program.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/SHA1.h"
 #include <cctype>
 #include <map>
 using namespace llvm;
@@ -2852,8 +2854,18 @@ static void WriteModStrings(const Module
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6));
   unsigned Abbrev6Bit = Stream.EmitAbbrev(Abbv);
 
-  SmallVector<unsigned, 64> NameVals;
-  for (const StringMapEntry<uint64_t> &MPSE : I.modulePaths()) {
+  // Module Hash, 160 bits SHA1. Optionally, emitted after each MST_CODE_ENTRY.
+  Abbv = new BitCodeAbbrev();
+  Abbv->Add(BitCodeAbbrevOp(bitc::MST_CODE_HASH));
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
+  unsigned AbbrevHash = Stream.EmitAbbrev(Abbv);
+
+  SmallVector<unsigned, 64> Vals;
+  for (const auto &MPSE : I.modulePaths()) {
     StringEncoding Bits =
         getStringEncoding(MPSE.getKey().data(), MPSE.getKey().size());
     unsigned AbbrevToUse = Abbrev8Bit;
@@ -2862,14 +2874,29 @@ static void WriteModStrings(const Module
     else if (Bits == SE_Fixed7)
       AbbrevToUse = Abbrev7Bit;
 
-    NameVals.push_back(MPSE.getValue());
+    Vals.push_back(MPSE.getValue().first);
 
     for (const auto P : MPSE.getKey())
-      NameVals.push_back((unsigned char)P);
+      Vals.push_back((unsigned char)P);
 
     // Emit the finished record.
-    Stream.EmitRecord(bitc::MST_CODE_ENTRY, NameVals, AbbrevToUse);
-    NameVals.clear();
+    Stream.EmitRecord(bitc::MST_CODE_ENTRY, Vals, AbbrevToUse);
+
+    Vals.clear();
+    // Emit an optional hash for the module now
+    auto &Hash = MPSE.getValue().second;
+    bool AllZero = true; // Detect if the hash is empty, and do not generate it
+    for (auto Val : Hash) {
+      if (Val)
+        AllZero = false;
+      Vals.push_back(Val);
+    }
+    if (!AllZero) {
+      // Emit the hash record.
+      Stream.EmitRecord(bitc::MST_CODE_HASH, Vals, AbbrevHash);
+    }
+
+    Vals.clear();
   }
   Stream.ExitBlock();
 }
@@ -3177,11 +3204,36 @@ static void WriteIdentificationBlock(con
   Stream.ExitBlock();
 }
 
+static void writeModuleHash(BitstreamWriter &Stream,
+                            SmallVectorImpl<char> &Buffer,
+                            size_t BlockStartPos) {
+  // Emit the module's hash.
+  // MODULE_CODE_HASH: [5*i32]
+  SHA1 Hasher;
+  Hasher.update(ArrayRef<uint8_t>((uint8_t *)&Buffer[BlockStartPos],
+                                  Buffer.size() - BlockStartPos));
+  auto Hash = Hasher.result();
+  SmallVector<uint64_t, 20> Vals;
+  auto LShift = [&](unsigned char Val, unsigned Amount)
+                    -> uint64_t { return ((uint64_t)Val) << Amount; };
+  for (int Pos = 0; Pos < 20; Pos += 4) {
+    uint32_t SubHash = LShift(Hash[Pos + 0], 24);
+    SubHash |= LShift(Hash[Pos + 1], 16) | LShift(Hash[Pos + 2], 8) |
+               (unsigned)(unsigned char)Hash[Pos + 3];
+    Vals.push_back(SubHash);
+  }
+
+  // Emit the finished record.
+  Stream.EmitRecord(bitc::MODULE_CODE_HASH, Vals);
+}
+
 /// WriteModule - Emit the specified module to the bitstream.
 static void WriteModule(const Module *M, BitstreamWriter &Stream,
                         bool ShouldPreserveUseListOrder,
-                        uint64_t BitcodeStartBit, bool EmitSummaryIndex) {
+                        uint64_t BitcodeStartBit, bool EmitSummaryIndex,
+                        bool GenerateHash, SmallVectorImpl<char> &Buffer) {
   Stream.EnterSubblock(bitc::MODULE_BLOCK_ID, 3);
+  size_t BlockStartPos = Buffer.size();
 
   SmallVector<unsigned, 1> Vals;
   unsigned CurVersion = 1;
@@ -3238,6 +3290,10 @@ static void WriteModule(const Module *M,
   WriteValueSymbolTable(M->getValueSymbolTable(), VE, Stream,
                         VSTOffsetPlaceholder, BitcodeStartBit, &FunctionIndex);
 
+  if (GenerateHash) {
+    writeModuleHash(Stream, Buffer, BlockStartPos);
+  }
+
   Stream.ExitBlock();
 }
 
@@ -3322,7 +3378,7 @@ static void WriteBitcodeHeader(Bitstream
 /// stream.
 void llvm::WriteBitcodeToFile(const Module *M, raw_ostream &Out,
                               bool ShouldPreserveUseListOrder,
-                              bool EmitSummaryIndex) {
+                              bool EmitSummaryIndex, bool GenerateHash) {
   SmallVector<char, 0> Buffer;
   Buffer.reserve(256*1024);
 
@@ -3348,7 +3404,7 @@ void llvm::WriteBitcodeToFile(const Modu
 
     // Emit the module.
     WriteModule(M, Stream, ShouldPreserveUseListOrder, BitcodeStartBit,
-                EmitSummaryIndex);
+                EmitSummaryIndex, GenerateHash, Buffer);
   }
 
   if (TT.isOSDarwin() || TT.isOSBinFormatMachO())

Modified: llvm/trunk/lib/IR/ModuleSummaryIndex.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/IR/ModuleSummaryIndex.cpp?rev=265095&r1=265094&r2=265095&view=diff
==============================================================================
--- llvm/trunk/lib/IR/ModuleSummaryIndex.cpp (original)
+++ llvm/trunk/lib/IR/ModuleSummaryIndex.cpp Thu Mar 31 20:30:06 2016
@@ -37,9 +37,11 @@ void ModuleSummaryIndex::mergeFrom(std::
 
     // Add the module path string ref for this module if we haven't already
     // saved a reference to it.
-    if (ModPath.empty())
-      ModPath = addModulePath(Info->summary()->modulePath(), NextModuleId);
-    else
+    if (ModPath.empty()) {
+      auto Path = Info->summary()->modulePath();
+      ModPath = addModulePath(Path, NextModuleId, Other->getModuleHash(Path))
+                    ->first();
+    } else
       assert(ModPath == Info->summary()->modulePath() &&
              "Each module in the combined map should have a unique ID");
 

Modified: llvm/trunk/lib/Transforms/IPO/FunctionImport.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/IPO/FunctionImport.cpp?rev=265095&r1=265094&r2=265095&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/IPO/FunctionImport.cpp (original)
+++ llvm/trunk/lib/Transforms/IPO/FunctionImport.cpp Thu Mar 31 20:30:06 2016
@@ -61,7 +61,7 @@ static std::unique_ptr<Module> loadFile(
                           /* ShouldLazyLoadMetadata = */ true);
   if (!Result) {
     Err.print("function-import", errs());
-    return nullptr;
+    report_fatal_error("Abort");
   }
 
   return Result;

Added: llvm/trunk/test/Bitcode/module_hash.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Bitcode/module_hash.ll?rev=265095&view=auto
==============================================================================
--- llvm/trunk/test/Bitcode/module_hash.ll (added)
+++ llvm/trunk/test/Bitcode/module_hash.ll Thu Mar 31 20:30:06 2016
@@ -0,0 +1,35 @@
+; Check per module hash.
+; RUN: llvm-as  -module-hash  %s -o - | llvm-bcanalyzer -dump | FileCheck %s --check-prefix=MOD1
+; MOD1: <HASH op0={{[0-9]*}} op1={{[0-9]*}} op2={{[0-9]*}} op3={{[0-9]*}} op4={{[0-9]*}} (match)/>
+; RUN: llvm-as  -module-hash  %p/Inputs/module_hash.ll -o - | llvm-bcanalyzer -dump | FileCheck %s --check-prefix=MOD2
+; MOD2: <HASH op0={{[0-9]*}} op1={{[0-9]*}} op2={{[0-9]*}} op3={{[0-9]*}} op4={{[0-9]*}} (match)/>
+
+; Check that the hash matches in the combined index.
+
+; First regenerate the modules with a summary
+; RUN: llvm-as  -module-hash -module-summary %s -o %t.m1.bc
+; RUN: llvm-as  -module-hash -module-summary %p/Inputs/module_hash.ll -o %t.m2.bc
+
+; Recover the hashes from the modules themselves.
+; RUN: llvm-bcanalyzer -dump %t1 | grep '<HASH'  > %t.hash
+; RUN: llvm-bcanalyzer -dump %t2 | grep '<HASH'  >> %t.hash
+
+; Generate the combined index and gather the hashes there.
+; RUN: llvm-lto --thinlto-action=thinlink -o - %t.m1.bc %t.m2.bc | llvm-bcanalyzer -dump  | grep '<HASH ' >> %t.hash
+
+; Validate the output now, the hahes in the individual modules and the combined index are in the same file.
+; RUN: cat %t.hash | FileCheck %s --check-prefix=COMBINED
+
+; First capture the value of the hash for the two modules.
+; COMBINED: <HASH op0=[[HASH1_1:[0-9]*]] op1=[[HASH1_2:[0-9]*]] op2=[[HASH1_3:[0-9]*]] op3=[[HASH1_4:[0-9]*]] op4=[[HASH1_5:[0-9]*]] (match)/>
+; COMBINED: <HASH op0=[[HASH2_1:[0-9]*]] op1=[[HASH2_2:[0-9]*]] op2=[[HASH2_3:[0-9]*]] op3=[[HASH2_4:[0-9]*]] op4=[[HASH2_5:[0-9]*]] (match)/>
+
+; Validate against the value extracted from the combined index
+; COMBINED-DAG: <HASH abbrevid={{[0-9]*}} op0=[[HASH1_1]] op1=[[HASH1_2]] op2=[[HASH1_3]] op3=[[HASH1_4]] op4=[[HASH1_5]]/>
+; COMBINED-DAG: <HASH abbrevid={{[0-9]*}} op0=[[HASH2_1]] op1=[[HASH2_2]] op2=[[HASH2_3]] op3=[[HASH2_4]] op4=[[HASH2_5]]/>
+
+
+; Need a function for the combined index to be populated.
+define void @foo() {
+    ret void
+}

Modified: llvm/trunk/tools/llvm-as/llvm-as.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-as/llvm-as.cpp?rev=265095&r1=265094&r2=265095&view=diff
==============================================================================
--- llvm/trunk/tools/llvm-as/llvm-as.cpp (original)
+++ llvm/trunk/tools/llvm-as/llvm-as.cpp Thu Mar 31 20:30:06 2016
@@ -48,6 +48,9 @@ static cl::opt<bool> EmitSummaryIndex("m
                                       cl::desc("Emit module summary index"),
                                       cl::init(false));
 
+static cl::opt<bool> EmitModuleHash("module-hash", cl::desc("Emit module hash"),
+                                    cl::init(false));
+
 static cl::opt<bool>
 DumpAsm("d", cl::desc("Print assembly as parsed"), cl::Hidden);
 
@@ -82,7 +85,7 @@ static void WriteOutputFile(const Module
 
   if (Force || !CheckBitcodeOutputToConsole(Out->os(), true))
     WriteBitcodeToFile(M, Out->os(), PreserveBitcodeUseListOrder,
-                       EmitSummaryIndex);
+                       EmitSummaryIndex, EmitModuleHash);
 
   // Declare success.
   Out->keep();

Modified: llvm/trunk/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp?rev=265095&r1=265094&r2=265095&view=diff
==============================================================================
--- llvm/trunk/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp (original)
+++ llvm/trunk/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp Thu Mar 31 20:30:06 2016
@@ -29,6 +29,7 @@
 
 #include "llvm/Bitcode/BitstreamReader.h"
 #include "llvm/ADT/Optional.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/Bitcode/LLVMBitCodes.h"
 #include "llvm/Bitcode/ReaderWriter.h"
 #include "llvm/IR/Verifier.h"
@@ -38,8 +39,10 @@
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/PrettyStackTrace.h"
 #include "llvm/Support/Signals.h"
+#include "llvm/Support/SHA1.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
+#include <array>
 #include <cctype>
 #include <map>
 #include <system_error>
@@ -174,6 +177,7 @@ static const char *GetCodeName(unsigned
       STRINGIFY_CODE(MODULE_CODE, VSTOFFSET)
       STRINGIFY_CODE(MODULE_CODE, METADATA_VALUES_UNUSED)
       STRINGIFY_CODE(MODULE_CODE, SOURCE_FILENAME)
+      STRINGIFY_CODE(MODULE_CODE, HASH)
     }
   case bitc::IDENTIFICATION_BLOCK_ID:
     switch (CodeID) {
@@ -292,6 +296,7 @@ static const char *GetCodeName(unsigned
     default:
       return nullptr;
       STRINGIFY_CODE(MST_CODE, ENTRY)
+      STRINGIFY_CODE(MST_CODE, HASH)
     }
   case bitc::GLOBALVAL_SUMMARY_BLOCK_ID:
     switch (CodeID) {
@@ -481,6 +486,9 @@ static bool ParseBlock(BitstreamCursor &
   if (Stream.EnterSubBlock(BlockID, &NumWords))
     return Error("Malformed block record");
 
+  // Keep it for later, when we see a MODULE_HASH record
+  uint64_t BlockEntryPos = Stream.getCurrentByteNo();
+
   const char *BlockName = nullptr;
   if (DumpRecords) {
     outs() << Indent << "<";
@@ -552,6 +560,7 @@ static bool ParseBlock(BitstreamCursor &
     ++BlockStats.NumRecords;
 
     StringRef Blob;
+    unsigned CurrentRecordPos = Stream.getCurrentByteNo();
     unsigned Code = Stream.readRecord(Entry.ID, Record, &Blob);
 
     // Increment the # occurrences of this code.
@@ -586,6 +595,37 @@ static bool ParseBlock(BitstreamCursor &
       for (unsigned i = 0, e = Record.size(); i != e; ++i)
         outs() << " op" << i << "=" << (int64_t)Record[i];
 
+      // If we found a module hash, let's verify that it matches!
+      if (BlockID == bitc::MODULE_BLOCK_ID && Code == bitc::MODULE_CODE_HASH) {
+        if (Record.size() != 5)
+          outs() << " (invalid)";
+        else {
+          // Recompute the hash and compare it to the one in the bitcode
+          SHA1 Hasher;
+          StringRef Hash;
+          {
+            int BlockSize = CurrentRecordPos - BlockEntryPos;
+            auto Ptr = Stream.getPointerToByte(BlockEntryPos, BlockSize);
+            Hasher.update(ArrayRef<uint8_t>(Ptr, BlockSize));
+            Hash = Hasher.result();
+          }
+          SmallString<20> RecordedHash;
+          RecordedHash.resize(20);
+          int Pos = 0;
+          for (auto &Val : Record) {
+            assert(!(Val >> 32) && "Unexpected high bits set");
+            RecordedHash[Pos++] = (Val >> 24) & 0xFF;
+            RecordedHash[Pos++] = (Val >> 16) & 0xFF;
+            RecordedHash[Pos++] = (Val >> 8) & 0xFF;
+            RecordedHash[Pos++] = (Val >> 0) & 0xFF;
+          }
+          if (Hash == RecordedHash)
+            outs() << " (match)";
+          else
+            outs() << " (!mismatch!)";
+        }
+      }
+
       outs() << "/>";
 
       if (Abbv) {




More information about the llvm-commits mailing list