[cfe-commits] r39079 - in /cfe/cfe/trunk: Lex/IdentifierTable.cpp Lex/Preprocessor.cpp include/clang/Lex/IdentifierTable.h

Wed Jul 11 09:27:22 PDT 2007

Author: sabre
Date: Wed Jul 11 11:27:21 2007
New Revision: 39079

URL: http://llvm.org/viewvc/llvm-project?rev=39079&view=rev
Log:
Pull the string hashtable out of the IdentifierTable, moving into LLVM's
libsupport.  Now it can be used for other things besides identifier hashing.

Modified:
    cfe/cfe/trunk/Lex/IdentifierTable.cpp
    cfe/cfe/trunk/Lex/Preprocessor.cpp
    cfe/cfe/trunk/include/clang/Lex/IdentifierTable.h

Modified: cfe/cfe/trunk/Lex/IdentifierTable.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/cfe/trunk/Lex/IdentifierTable.cpp?rev=39079&r1=39078&r2=39079&view=diff

==============================================================================

--- cfe/cfe/trunk/Lex/IdentifierTable.cpp (original)
+++ cfe/cfe/trunk/Lex/IdentifierTable.cpp Wed Jul 11 11:27:21 2007
@@ -39,178 +39,22 @@
 }
 
 //===----------------------------------------------------------------------===//
-// IdentifierVisitor Implementation
-//===----------------------------------------------------------------------===//
-
-IdentifierVisitor::~IdentifierVisitor() {
-}
-
-
-//===----------------------------------------------------------------------===//
 // IdentifierTable Implementation
 //===----------------------------------------------------------------------===//
 
-/// IdentifierBucket - The hash table consists of an array of these.  If Info is
-/// non-null, this is an extant entry, otherwise, it is a hole.
-struct IdentifierBucket {
-  /// FullHashValue - This remembers the full hash value of the identifier for
-  /// easy scanning.
-  unsigned FullHashValue;
-  
-  /// Info - This is a pointer to the actual identifier info object.
-  IdentifierInfo *Info;
-};
-
-IdentifierTable::IdentifierTable(const LangOptions &LangOpts) {
-  HashTableSize = 8192;   // Start with space for 8K identifiers.
-  IdentifierBucket *TableArray = new IdentifierBucket[HashTableSize]();
-  memset(TableArray, 0, HashTableSize*sizeof(IdentifierBucket));
-
-  TheTable = TableArray;
-  NumIdentifiers = 0;
+IdentifierTable::IdentifierTable(const LangOptions &LangOpts)
+  // Start with space for 8K identifiers.
+  : HashTable(8192) {
   
+  IdentifierInfo &Def = get("define");
+    
   // Populate the identifier table with info about keywords for the current
   // language.
   AddKeywords(LangOpts);
-}
-
-IdentifierTable::~IdentifierTable() {
-  IdentifierBucket *TableArray = (IdentifierBucket*)TheTable;
-  for (unsigned i = 0, e = HashTableSize; i != e; ++i) {
-    if (IdentifierInfo *Id = TableArray[i].Info) {
-      // Free memory referenced by the identifier (e.g. macro info).
-      Id->~IdentifierInfo();
-      Allocator.Deallocate(Id);
-    }
-  }
-  delete [] TableArray;
-}
-
-/// HashString - Compute a hash code for the specified string.
-///
-static unsigned HashString(const char *Start, const char *End) {
-  unsigned int Result = 0;
-  // Perl hash function.
-  while (Start != End)
-    Result = Result * 33 + *Start++;
-  Result = Result + (Result >> 5);
-  return Result;
-}
-
-IdentifierInfo &IdentifierTable::get(const char *NameStart,
-                                     const char *NameEnd) {
-  IdentifierBucket *TableArray = (IdentifierBucket*)TheTable;
-
-  unsigned HTSize = HashTableSize;
-  unsigned FullHashValue = HashString(NameStart, NameEnd);
-  unsigned BucketNo = FullHashValue & (HTSize-1);
-  unsigned Length = NameEnd-NameStart;
   
-  unsigned ProbeAmt = 1;
-  while (1) {
-    IdentifierBucket &Bucket = TableArray[BucketNo];
-    IdentifierInfo *BucketII = Bucket.Info;
-    // If we found an empty bucket, this identifier isn't in the table yet.
-    if (BucketII == 0) break;
-
-    // If the full hash value matches, check deeply for a match.  The common
-    // case here is that we are only looking at the buckets (for identifier info
-    // being non-null and for the full hash value) not at the identifiers.  This
-    // is important for cache locality.
-    if (Bucket.FullHashValue == FullHashValue &&
-        memcmp(BucketII->getName(), NameStart, Length) == 0)
-      // We found a match!
-      return *BucketII;
-   
-    // Okay, we didn't find the identifier.  Probe to the next bucket.
-    BucketNo = (BucketNo+ProbeAmt) & (HashTableSize-1);
-    
-    // Use quadratic probing, it has fewer clumping artifacts than linear
-    // probing and has good cache behavior in the common case.
-    ++ProbeAmt;
-  }
-  
-  // Okay, the identifier doesn't already exist, and BucketNo is the bucket to
-  // fill in.  Allocate a new identifier with space for the null-terminated
-  // string at the end.
-  unsigned AllocSize = sizeof(IdentifierInfo)+Length+1;
-
-  // FIXME: uses GCC extension.
-  unsigned Alignment = __alignof__(IdentifierInfo);
-  IdentifierInfo *Identifier =
-    (IdentifierInfo*)Allocator.Allocate(AllocSize, Alignment);
-  new (Identifier) IdentifierInfo();
-  ++NumIdentifiers;
-
-  // Copy the string information.
-  char *StrBuffer = (char*)(Identifier+1);
-  memcpy(StrBuffer, NameStart, Length);
-  StrBuffer[Length] = 0;  // Null terminate string.
-  
-  // Fill in the bucket for the hash table.
-  TableArray[BucketNo].Info = Identifier;
-  TableArray[BucketNo].FullHashValue = FullHashValue;
-  
-  // If the hash table is now more than 3/4 full, rehash into a larger table.
-  if (NumIdentifiers > HashTableSize*3/4)
-    RehashTable();
-  
-  return *Identifier;
-}
-
-IdentifierInfo &IdentifierTable::get(const std::string &Name) {
-  // Don't use c_str() here: no need to be null terminated.
-  const char *NameBytes = &Name[0];
-  unsigned Size = Name.size();
-  return get(NameBytes, NameBytes+Size);
-}
-
-void IdentifierTable::RehashTable() {
-  unsigned NewSize = HashTableSize*2;
-  IdentifierBucket *NewTableArray = new IdentifierBucket[NewSize]();
-  memset(NewTableArray, 0, NewSize*sizeof(IdentifierBucket));
-
-  // Rehash all the identifier into their new buckets.  Luckily we already have
-  // the hash values available :).
-  IdentifierBucket *CurTable = (IdentifierBucket *)TheTable;
-  for (IdentifierBucket *IB = CurTable, *E = CurTable+HashTableSize;
-       IB != E; ++IB) {
-    if (IB->Info) {
-      // Fast case, bucket available.
-      unsigned FullHash = IB->FullHashValue;
-      unsigned NewBucket = FullHash & (NewSize-1);
-      if (NewTableArray[NewBucket].Info == 0) {
-        NewTableArray[FullHash & (NewSize-1)].Info = IB->Info;
-        NewTableArray[FullHash & (NewSize-1)].FullHashValue = FullHash;
-        continue;
-      }
-      
-      unsigned ProbeSize = 1;
-      do {
-        NewBucket = (NewBucket + ProbeSize++) & (NewSize-1);
-      } while (NewTableArray[NewBucket].Info);
-        
-      // Finally found a slot.  Fill it in.
-      NewTableArray[NewBucket].Info = IB->Info;
-      NewTableArray[NewBucket].FullHashValue = FullHash;
-    }
-  }
-
-  delete[] CurTable;
-  
-  TheTable = NewTableArray;
-  HashTableSize = NewSize;
-}
-
+  IdentifierInfo &Def2 = get("define");
 
-/// VisitIdentifiers - This method walks through all of the identifiers,
-/// invoking IV->VisitIdentifier for each of them.
-void IdentifierTable::VisitIdentifiers(const IdentifierVisitor &IV) {
-  IdentifierBucket *TableArray = (IdentifierBucket*)TheTable;
-  for (unsigned i = 0, e = HashTableSize; i != e; ++i) {
-    if (IdentifierInfo *Id = TableArray[i].Info)
-      IV.VisitIdentifier(*Id);
-  }
+  std::cerr << "FOO\n";
 }
 
 //===----------------------------------------------------------------------===//
@@ -224,7 +68,8 @@
 /// The C90/C99/CPP flags are set to 0 if the token should be enabled in the
 /// specified langauge, set to 1 if it is an extension in the specified
 /// language, and set to 2 if disabled in the specified language.
-static void AddKeyword(const std::string &Keyword, tok::TokenKind TokenCode,
+static void AddKeyword(const char *Keyword, unsigned KWLen,
+                       tok::TokenKind TokenCode,
                        int C90, int C99, int CXX,
                        const LangOptions &LangOpts, IdentifierTable &Table) {
   int Flags = LangOpts.CPlusPlus ? CXX : (LangOpts.C99 ? C99 : C90);
@@ -233,8 +78,7 @@
   // and extensions are disabled.
   if (Flags + LangOpts.NoExtensions >= 2) return;
   
-  const char *Str = &Keyword[0];
-  IdentifierInfo &Info = Table.get(Str, Str+Keyword.size());
+  IdentifierInfo &Info = Table.get(Keyword, Keyword+KWLen);
   Info.setTokenID(TokenCode);
   Info.setIsExtensionToken(Flags == 1);
 }
@@ -273,12 +117,12 @@
   
   // Add keywords and tokens for the current language.
 #define KEYWORD(NAME, FLAGS) \
-  AddKeyword(#NAME, tok::kw_ ## NAME,  \
+  AddKeyword(#NAME, strlen(#NAME), tok::kw_ ## NAME,  \
              ((FLAGS) >> C90Shift) & Mask, \
              ((FLAGS) >> C99Shift) & Mask, \
              ((FLAGS) >> CPPShift) & Mask, LangOpts, *this);
 #define ALIAS(NAME, TOK) \
-  AddKeyword(NAME, tok::kw_ ## TOK, 0, 0, 0, LangOpts, *this);
+  AddKeyword(NAME, strlen(NAME), tok::kw_ ## TOK, 0, 0, 0, LangOpts, *this);
 #define PPKEYWORD(NAME) \
   AddPPKeyword(tok::pp_##NAME, #NAME, strlen(#NAME), *this);
 #define OBJC1_AT_KEYWORD(NAME) \
@@ -295,46 +139,45 @@
 // Stats Implementation
 //===----------------------------------------------------------------------===//
 
+class StatsVisitor : public CStringMapVisitor {
+  unsigned &IDLenTotal;
+  unsigned &MaxIDLen;
+public:
+  StatsVisitor(unsigned &idLenTotal, unsigned &maxIDLen)
+    : IDLenTotal(idLenTotal), MaxIDLen(maxIDLen) {}
+  void Visit(const char *Key, void *Value) const {
+    unsigned IdLen = strlen(Key);
+    IDLenTotal += IdLen;
+    if (MaxIDLen < IdLen)
+      MaxIDLen = IdLen;
+  }
+};
+
+
 /// PrintStats - Print statistics about how well the identifier table is doing
 /// at hashing identifiers.
 void IdentifierTable::PrintStats() const {
-  unsigned NumEmptyBuckets = 0;
+  unsigned NumBuckets = HashTable.getNumBuckets();
+  unsigned NumIdentifiers = HashTable.getNumItems();
+  unsigned NumEmptyBuckets = NumBuckets-NumIdentifiers;
   unsigned AverageIdentifierSize = 0;
   unsigned MaxIdentifierLength = 0;
-  unsigned NumProbed = 0;
   
-  IdentifierBucket *TableArray = (IdentifierBucket*)TheTable;
-  for (unsigned i = 0, e = HashTableSize; i != e; ++i) {
-    if (TableArray[i].Info == 0) {
-      ++NumEmptyBuckets;
-      continue;
-    }
-    IdentifierInfo *Id = TableArray[i].Info;
-    unsigned IdLen = strlen(Id->getName());
-    AverageIdentifierSize += IdLen;
-    if (MaxIdentifierLength < IdLen)
-      MaxIdentifierLength = IdLen;
-
-    // Count the number of times something was probed.
-    if ((TableArray[i].FullHashValue & (e-1)) != i)
-      ++NumProbed;
-
-    // TODO: Figure out maximum times an identifier had to probe for -stats.
-  }
+  // TODO: Figure out maximum times an identifier had to probe for -stats.
+  HashTable.VisitEntries(StatsVisitor(AverageIdentifierSize, 
+                                      MaxIdentifierLength));
   
   std::cerr << "\n*** Identifier Table Stats:\n";
   std::cerr << "# Identifiers:   " << NumIdentifiers << "\n";
   std::cerr << "# Empty Buckets: " << NumEmptyBuckets << "\n";
   std::cerr << "Hash density (#identifiers per bucket): "
-            << NumIdentifiers/(double)HashTableSize << "\n";
-  std::cerr << "Num probed identifiers: " << NumProbed << " ("
-            << NumProbed*100.0/NumIdentifiers << "%)\n";
+            << NumIdentifiers/(double)NumBuckets << "\n";
   std::cerr << "Ave identifier length: "
             << (AverageIdentifierSize/(double)NumIdentifiers) << "\n";
   std::cerr << "Max identifier length: " << MaxIdentifierLength << "\n";
   
   // Compute statistics about the memory allocated for identifiers.
-  Allocator.PrintStats();
+  HashTable.getAllocator().PrintStats();
 }
 
 

Modified: cfe/cfe/trunk/Lex/Preprocessor.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/cfe/trunk/Lex/Preprocessor.cpp?rev=39079&r1=39078&r2=39079&view=diff

==============================================================================
--- cfe/cfe/trunk/Lex/Preprocessor.cpp (original)
+++ cfe/cfe/trunk/Lex/Preprocessor.cpp Wed Jul 11 11:27:21 2007
@@ -882,11 +882,12 @@
 }
 
 namespace {
-struct UnusedIdentifierReporter : public IdentifierVisitor {
+struct UnusedIdentifierReporter : public CStringMapVisitor {
   Preprocessor &PP;
   UnusedIdentifierReporter(Preprocessor &pp) : PP(pp) {}
 
-  void VisitIdentifier(IdentifierInfo &II) const {
+  void Visit(const char *Key, void *Value) const {
+    IdentifierInfo &II = *static_cast<IdentifierInfo*>(Value);
     if (II.getMacroInfo() && !II.getMacroInfo()->isUsed())
       PP.Diag(II.getMacroInfo()->getDefinitionLoc(), diag::pp_macro_not_used);
   }

Modified: cfe/cfe/trunk/include/clang/Lex/IdentifierTable.h
URL: http://llvm.org/viewvc/llvm-project/cfe/cfe/trunk/include/clang/Lex/IdentifierTable.h?rev=39079&r1=39078&r2=39079&view=diff

==============================================================================
--- cfe/cfe/trunk/include/clang/Lex/IdentifierTable.h (original)
+++ cfe/cfe/trunk/include/clang/Lex/IdentifierTable.h Wed Jul 11 11:27:21 2007
@@ -7,8 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file defines the IdentifierInfo, IdentifierVisitor, and
-// IdentifierTable interfaces.
+// This file defines the IdentifierInfo and IdentifierTable interfaces.
 //
 //===----------------------------------------------------------------------===//
 
@@ -16,7 +15,7 @@
 #define LLVM_CLANG_IDENTIFIERTABLE_H
 
 #include "clang/Basic/TokenKinds.h"
-#include "llvm/Support/Allocator.h"
+#include "llvm/ADT/CStringMap.h"
 #include <string> 
 
 namespace llvm {
@@ -42,7 +41,7 @@
 public:
   IdentifierInfo();
   ~IdentifierInfo();
-  
+
   /// getName - Return the actual string for this identifier.  The length of
   /// this string is stored in NameLen, and the returned string is properly null
   /// terminated.
@@ -99,14 +98,6 @@
   void setFETokenInfo(void *T) { FETokenInfo = T; }
 };
 
-/// IdentifierVisitor - Subclasses of this class may be implemented to walk all
-/// of the defined identifiers.
-class IdentifierVisitor {
-public:
-  virtual ~IdentifierVisitor();
-  virtual void VisitIdentifier(IdentifierInfo &II) const = 0;
-};
-
 
 
 /// IdentifierTable - This table implements an efficient mapping from strings to
@@ -114,37 +105,40 @@
 /// extremely performance-critical piece of the code, as each occurrance of
 /// every identifier goes through here when lexed.
 class IdentifierTable {
-  void *TheTable;
   // Shark shows that using MallocAllocator is *much* slower than using this
   // BumpPtrAllocator!
-#if 1
-  BumpPtrAllocator Allocator;
-#else
-  MallocAllocator Allocator;
-#endif
-  unsigned HashTableSize;
-  unsigned NumIdentifiers;
+  CStringMap<IdentifierInfo, BumpPtrAllocator> HashTable;
 public:
   /// IdentifierTable ctor - Create the identifier table, populating it with
   /// info about the language keywords for the language specified by LangOpts.
   IdentifierTable(const LangOptions &LangOpts);
-  ~IdentifierTable();
   
   /// get - Return the identifier token info for the specified named identifier.
   ///
-  IdentifierInfo &get(const char *NameStart, const char *NameEnd);
-  IdentifierInfo &get(const std::string &Name);
+  IdentifierInfo &get(const char *NameStart, const char *NameEnd) {
+    return HashTable.GetOrCreateValue(NameStart, NameEnd);
+  }
+  
+  IdentifierInfo &get(const char *Name) {
+    return get(Name, Name+strlen(Name));
+  }
+  IdentifierInfo &get(const std::string &Name) {
+    // Don't use c_str() here: no need to be null terminated.
+    const char *NameBytes = &Name[0];
+    return get(NameBytes, NameBytes+Name.size());
+  }
   
   /// VisitIdentifiers - This method walks through all of the identifiers,
   /// invoking IV->VisitIdentifier for each of them.
-  void VisitIdentifiers(const IdentifierVisitor &IV);
+  void VisitIdentifiers(const CStringMapVisitor &IV) {
+    HashTable.VisitEntries(IV);
+  }
   
   /// PrintStats - Print some statistics to stderr that indicate how well the
   /// hashing is doing.
   void PrintStats() const;
 private:
   void AddKeywords(const LangOptions &LangOpts);
-  void RehashTable();
 };
 
 }  // end namespace llvm