r176566 - [PCH] For HeaderFileInfoTrait, hash the key using size & time of the file.

Argyrios Kyrtzidis akyrtzi at gmail.com
Wed Mar 6 10:12:47 PST 2013


Author: akirtzidis
Date: Wed Mar  6 12:12:47 2013
New Revision: 176566

URL: http://llvm.org/viewvc/llvm-project?rev=176566&view=rev
Log:
[PCH] For HeaderFileInfoTrait, hash the key using size & time of the file.

Previously the hash would be the filename portion of the path, which could be
different for a filename with different case or a symbolic link with a different
name completely.
This did not actually create any issue so far because by validating all headers
in the PCH we created uniqued FileEntries based on inodes, so an #include of
a symbolic link (refering to a file from the PCH) would end up with a FileEntry
with filename same as the one recorded in the PCH.

Modified:
    cfe/trunk/lib/Serialization/ASTReader.cpp
    cfe/trunk/lib/Serialization/ASTReaderInternals.h
    cfe/trunk/lib/Serialization/ASTWriter.cpp

Modified: cfe/trunk/lib/Serialization/ASTReader.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Serialization/ASTReader.cpp?rev=176566&r1=176565&r2=176566&view=diff
==============================================================================
--- cfe/trunk/lib/Serialization/ASTReader.cpp (original)
+++ cfe/trunk/lib/Serialization/ASTReader.cpp Wed Mar  6 12:12:47 2013
@@ -41,6 +41,7 @@
 #include "clang/Serialization/GlobalModuleIndex.h"
 #include "clang/Serialization/ModuleManager.h"
 #include "clang/Serialization/SerializationDiagnostic.h"
+#include "llvm/ADT/Hashing.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Bitcode/BitstreamReader.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -1294,24 +1295,28 @@ ASTReader::getGlobalPreprocessedEntityID
   return LocalID + I->second;
 }
 
-unsigned HeaderFileInfoTrait::ComputeHash(const char *path) {
-  return llvm::HashString(llvm::sys::path::filename(path));
+unsigned HeaderFileInfoTrait::ComputeHash(internal_key_ref ikey) {
+  return llvm::hash_combine(ikey.Size, ikey.ModTime);
 }
     
 HeaderFileInfoTrait::internal_key_type 
-HeaderFileInfoTrait::GetInternalKey(const char *path) { return path; }
+HeaderFileInfoTrait::GetInternalKey(const FileEntry *FE) {
+  internal_key_type ikey = { FE->getSize(), FE->getModificationTime(),
+                             FE->getName() };
+  return ikey;
+}
     
-bool HeaderFileInfoTrait::EqualKey(internal_key_type a, internal_key_type b) {
-  if (strcmp(a, b) == 0)
-    return true;
-  
-  if (llvm::sys::path::filename(a) != llvm::sys::path::filename(b))
+bool HeaderFileInfoTrait::EqualKey(internal_key_ref a, internal_key_ref b) {
+  if (a.Size != b.Size || a.ModTime != b.ModTime)
     return false;
 
+  if (strcmp(a.Filename, b.Filename) == 0)
+    return true;
+  
   // Determine whether the actual files are equivalent.
   FileManager &FileMgr = Reader.getFileManager();
-  const FileEntry *FEA = FileMgr.getFile(a);
-  const FileEntry *FEB = FileMgr.getFile(b);
+  const FileEntry *FEA = FileMgr.getFile(a.Filename);
+  const FileEntry *FEB = FileMgr.getFile(b.Filename);
   return (FEA && FEA == FEB);
 }
     
@@ -1319,11 +1324,20 @@ std::pair<unsigned, unsigned>
 HeaderFileInfoTrait::ReadKeyDataLength(const unsigned char*& d) {
   unsigned KeyLen = (unsigned) clang::io::ReadUnalignedLE16(d);
   unsigned DataLen = (unsigned) *d++;
-  return std::make_pair(KeyLen + 1, DataLen);
+  return std::make_pair(KeyLen, DataLen);
 }
-    
+
+HeaderFileInfoTrait::internal_key_type
+HeaderFileInfoTrait::ReadKey(const unsigned char *d, unsigned) {
+  internal_key_type ikey;
+  ikey.Size = off_t(clang::io::ReadUnalignedLE64(d));
+  ikey.ModTime = time_t(clang::io::ReadUnalignedLE64(d));
+  ikey.Filename = (const char *)d;
+  return ikey;
+}
+
 HeaderFileInfoTrait::data_type 
-HeaderFileInfoTrait::ReadData(const internal_key_type, const unsigned char *d,
+HeaderFileInfoTrait::ReadData(internal_key_ref, const unsigned char *d,
                               unsigned DataLen) {
   const unsigned char *End = d + DataLen;
   using namespace clang::io;
@@ -4109,7 +4123,7 @@ namespace {
         return false;
 
       // Look in the on-disk hash table for an entry for this file name.
-      HeaderFileInfoLookupTable::iterator Pos = Table->find(This->FE->getName());
+      HeaderFileInfoLookupTable::iterator Pos = Table->find(This->FE);
       if (Pos == Table->end())
         return false;
 

Modified: cfe/trunk/lib/Serialization/ASTReaderInternals.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Serialization/ASTReaderInternals.h?rev=176566&r1=176565&r2=176566&view=diff
==============================================================================
--- cfe/trunk/lib/Serialization/ASTReaderInternals.h (original)
+++ cfe/trunk/lib/Serialization/ASTReaderInternals.h Wed Mar  6 12:12:47 2013
@@ -25,6 +25,7 @@ namespace clang {
 class ASTReader;
 class HeaderSearch;
 struct HeaderFileInfo;
+class FileEntry;
   
 namespace serialization {
 
@@ -198,8 +199,14 @@ class HeaderFileInfoTrait {
   const char *FrameworkStrings;
 
 public:
-  typedef const char *external_key_type;
-  typedef const char *internal_key_type;
+  typedef const FileEntry *external_key_type;
+
+  struct internal_key_type {
+    off_t Size;
+    time_t ModTime;
+    const char *Filename;
+  };
+  typedef const internal_key_type &internal_key_ref;
   
   typedef HeaderFileInfo data_type;
   
@@ -207,19 +214,16 @@ public:
                       const char *FrameworkStrings)
   : Reader(Reader), M(M), HS(HS), FrameworkStrings(FrameworkStrings) { }
   
-  static unsigned ComputeHash(const char *path);
-  static internal_key_type GetInternalKey(const char *path);
-  bool EqualKey(internal_key_type a, internal_key_type b);
+  static unsigned ComputeHash(internal_key_ref ikey);
+  static internal_key_type GetInternalKey(const FileEntry *FE);
+  bool EqualKey(internal_key_ref a, internal_key_ref b);
   
   static std::pair<unsigned, unsigned>
   ReadKeyDataLength(const unsigned char*& d);
   
-  static internal_key_type ReadKey(const unsigned char *d, unsigned) {
-    return (const char *)d;
-  }
+  static internal_key_type ReadKey(const unsigned char *d, unsigned);
   
-  data_type ReadData(const internal_key_type, const unsigned char *d,
-                     unsigned DataLen);
+  data_type ReadData(internal_key_ref,const unsigned char *d, unsigned DataLen);
 };
 
 /// \brief The on-disk hash table used for known header files.

Modified: cfe/trunk/lib/Serialization/ASTWriter.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Serialization/ASTWriter.cpp?rev=176566&r1=176565&r2=176566&view=diff
==============================================================================
--- cfe/trunk/lib/Serialization/ASTWriter.cpp (original)
+++ cfe/trunk/lib/Serialization/ASTWriter.cpp Wed Mar  6 12:12:47 2013
@@ -42,6 +42,7 @@
 #include "clang/Serialization/ASTReader.h"
 #include "llvm/ADT/APFloat.h"
 #include "llvm/ADT/APInt.h"
+#include "llvm/ADT/Hashing.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Bitcode/BitstreamWriter.h"
 #include "llvm/Support/FileSystem.h"
@@ -1372,32 +1373,38 @@ namespace {
     HeaderFileInfoTrait(ASTWriter &Writer)
       : Writer(Writer) { }
     
-    typedef const char *key_type;
-    typedef key_type key_type_ref;
+    struct key_type {
+      const FileEntry *FE;
+      const char *Filename;
+    };
+    typedef const key_type &key_type_ref;
     
     typedef HeaderFileInfo data_type;
     typedef const data_type &data_type_ref;
     
-    static unsigned ComputeHash(const char *path) {
-      // The hash is based only on the filename portion of the key, so that the
-      // reader can match based on filenames when symlinking or excess path
-      // elements ("foo/../", "../") change the form of the name. However,
-      // complete path is still the key.
-      return llvm::HashString(llvm::sys::path::filename(path));
+    static unsigned ComputeHash(key_type_ref key) {
+      // The hash is based only on size/time of the file, so that the reader can
+      // match even when symlinking or excess path elements ("foo/../", "../")
+      // change the form of the name. However, complete path is still the key.
+      return llvm::hash_combine(key.FE->getSize(),
+                                key.FE->getModificationTime());
     }
     
     std::pair<unsigned,unsigned>
-    EmitKeyDataLength(raw_ostream& Out, const char *path,
-                      data_type_ref Data) {
-      unsigned StrLen = strlen(path);
-      clang::io::Emit16(Out, StrLen);
+    EmitKeyDataLength(raw_ostream& Out, key_type_ref key, data_type_ref Data) {
+      unsigned KeyLen = strlen(key.Filename) + 1 + 8 + 8;
+      clang::io::Emit16(Out, KeyLen);
       unsigned DataLen = 1 + 2 + 4 + 4;
       clang::io::Emit8(Out, DataLen);
-      return std::make_pair(StrLen + 1, DataLen);
+      return std::make_pair(KeyLen, DataLen);
     }
     
-    void EmitKey(raw_ostream& Out, const char *path, unsigned KeyLen) {
-      Out.write(path, KeyLen);
+    void EmitKey(raw_ostream& Out, key_type_ref key, unsigned KeyLen) {
+      clang::io::Emit64(Out, key.FE->getSize());
+      KeyLen -= 8;
+      clang::io::Emit64(Out, key.FE->getModificationTime());
+      KeyLen -= 8;
+      Out.write(key.Filename, KeyLen);
     }
     
     void EmitData(raw_ostream &Out, key_type_ref,
@@ -1479,7 +1486,8 @@ void ASTWriter::WriteHeaderSearch(const
       SavedStrings.push_back(Filename);
     }
     
-    Generator.insert(Filename, HFI, GeneratorTrait);
+    HeaderFileInfoTrait::key_type key = { File, Filename };
+    Generator.insert(key, HFI, GeneratorTrait);
     ++NumHeaderSearchEntries;
   }
   





More information about the cfe-commits mailing list