[llvm] r301487 - Replace HashString algorithm with xxHash64

Rui Ueyama via llvm-commits llvm-commits at lists.llvm.org
Wed Apr 26 15:45:05 PDT 2017


Author: ruiu
Date: Wed Apr 26 17:45:04 2017
New Revision: 301487

URL: http://llvm.org/viewvc/llvm-project?rev=301487&view=rev
Log:
Replace HashString algorithm with xxHash64

The previous algorithm processed one character at a time, which is very
painful on a modern CPU. Replace it with xxHash64, which both already
exists in the codebase and is fairly fast.

Patch from Scott Smith!

Differential Revision: https://reviews.llvm.org/D32509

Modified:
    llvm/trunk/include/llvm/ADT/StringExtras.h
    llvm/trunk/include/llvm/Support/xxhash.h
    llvm/trunk/lib/Support/xxhash.cpp
    llvm/trunk/test/DebugInfo/Generic/accel-table-hash-collisions.ll
    llvm/trunk/test/DebugInfo/PDB/pdbdump-headers.test
    llvm/trunk/test/DebugInfo/X86/gnu-public-names.ll

Modified: llvm/trunk/include/llvm/ADT/StringExtras.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/ADT/StringExtras.h?rev=301487&r1=301486&r2=301487&view=diff
==============================================================================
--- llvm/trunk/include/llvm/ADT/StringExtras.h (original)
+++ llvm/trunk/include/llvm/ADT/StringExtras.h Wed Apr 26 17:45:04 2017
@@ -16,6 +16,7 @@
 
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/DataTypes.h"
+#include "llvm/Support/xxhash.h"
 #include <iterator>
 
 namespace llvm {
@@ -151,15 +152,11 @@ void SplitString(StringRef Source,
 
 /// HashString - Hash function for strings.
 ///
-/// This is the Bernstein hash function.
-//
-// FIXME: Investigate whether a modified bernstein hash function performs
-// better: http://eternallyconfuzzled.com/tuts/algorithms/jsw_tut_hashing.aspx
-//   X*33+c -> X*33^c
-static inline unsigned HashString(StringRef Str, unsigned Result = 0) {
-  for (StringRef::size_type i = 0, e = Str.size(); i != e; ++i)
-    Result = Result * 33 + (unsigned char)Str[i];
-  return Result;
+/// Just fall back on xxHash64.  Yes we drop the high bits on platforms where
+/// unsigned == 4 bytes (which includes x86_64), but xxHash64 already has good
+/// avalanching, so we wouldn't gain much if anything.
+static inline unsigned HashString(StringRef Str, unsigned Seed = 0) {
+  return xxHash64(Str, Seed);
 }
 
 /// Returns the English suffix for an ordinal integer (-st, -nd, -rd, -th).

Modified: llvm/trunk/include/llvm/Support/xxhash.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Support/xxhash.h?rev=301487&r1=301486&r2=301487&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Support/xxhash.h (original)
+++ llvm/trunk/include/llvm/Support/xxhash.h Wed Apr 26 17:45:04 2017
@@ -41,7 +41,7 @@
 #include "llvm/ADT/StringRef.h"
 
 namespace llvm {
-uint64_t xxHash64(llvm::StringRef Data);
+uint64_t xxHash64(llvm::StringRef Data, uint64_t Seed = 0);
 }
 
 #endif

Modified: llvm/trunk/lib/Support/xxhash.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Support/xxhash.cpp?rev=301487&r1=301486&r2=301487&view=diff
==============================================================================
--- llvm/trunk/lib/Support/xxhash.cpp (original)
+++ llvm/trunk/lib/Support/xxhash.cpp Wed Apr 26 17:45:04 2017
@@ -68,9 +68,8 @@ static uint64_t mergeRound(uint64_t Acc,
   return Acc;
 }
 
-uint64_t llvm::xxHash64(StringRef Data) {
+uint64_t llvm::xxHash64(StringRef Data, uint64_t Seed) {
   size_t Len = Data.size();
-  uint64_t Seed = 0;
   const char *P = Data.data();
   const char *const BEnd = P + Len;
   uint64_t H64;

Modified: llvm/trunk/test/DebugInfo/Generic/accel-table-hash-collisions.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/DebugInfo/Generic/accel-table-hash-collisions.ll?rev=301487&r1=301486&r2=301487&view=diff
==============================================================================
--- llvm/trunk/test/DebugInfo/Generic/accel-table-hash-collisions.ll (original)
+++ llvm/trunk/test/DebugInfo/Generic/accel-table-hash-collisions.ll Wed Apr 26 17:45:04 2017
@@ -27,12 +27,12 @@
 
 ; Check that all the names are present in the output
 ; CHECK:  Hash = 0x00597841
-; CHECK:    Name: {{[0-9a-f]*}} "is"
 ; CHECK:    Name: {{[0-9a-f]*}} "k1"
+; CHECK:    Name: {{[0-9a-f]*}} "is"
 
 ; CHECK: Hash = 0xa4b42a1e
-; CHECK:    Name: {{[0-9a-f]*}} "_ZN5clang23DataRecursiveASTVisitorIN12_GLOBAL__N_124UnusedBackingIvarCheckerEE26TraverseCUDAKernelCallExprEPNS_18CUDAKernelCallExprE"
 ; CHECK:    Name: {{[0-9a-f]*}} "_ZN4llvm16DenseMapIteratorIPNS_10MDLocationENS_6detail13DenseSetEmptyENS_10MDNodeInfoIS1_EENS3_12DenseSetPairIS2_EELb0EE23AdvancePastEmptyBucketsEv"
+; CHECK:    Name: {{[0-9a-f]*}} "_ZN5clang23DataRecursiveASTVisitorIN12_GLOBAL__N_124UnusedBackingIvarCheckerEE26TraverseCUDAKernelCallExprEPNS_18CUDAKernelCallExprE"
 
 ; CHECK: Hash = 0xeee7c0b2
 ; CHECK:    Name: {{[0-9a-f]*}} "_ZNK4llvm12LivePhysRegs5printERNS_11raw_ostreamE"

Modified: llvm/trunk/test/DebugInfo/PDB/pdbdump-headers.test
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/DebugInfo/PDB/pdbdump-headers.test?rev=301487&r1=301486&r2=301487&view=diff
==============================================================================
--- llvm/trunk/test/DebugInfo/PDB/pdbdump-headers.test (original)
+++ llvm/trunk/test/DebugInfo/PDB/pdbdump-headers.test Wed Apr 26 17:45:04 2017
@@ -73,8 +73,8 @@
 ; EMPTY-NEXT:   Guid: {0B355641-86A0-A249-896F-9988FAE52FF0}
 ; EMPTY-NEXT:   Features: 0x1
 ; EMPTY-NEXT:   Named Streams {
-; EMPTY-NEXT:     /names: 13
 ; EMPTY-NEXT:     /LinkInfo: 5
+; EMPTY-NEXT:     /names: 13
 ; EMPTY-NEXT:     /src/headerblock: 9
 ; EMPTY-NEXT:   }
 ; EMPTY-NEXT: }
@@ -1837,8 +1837,8 @@
 ; BIG-NEXT:   Guid: {880ECC89-DF81-0B4F-839C-58CBD052E937}
 ; BIG-NEXT:   Features: 0x1
 ; BIG-NEXT:   Named Streams {
-; BIG-NEXT:     /names: 13
 ; BIG-NEXT:     /LinkInfo: 5
+; BIG-NEXT:     /names: 13
 ; BIG-NEXT:     /src/headerblock: 61
 ; BIG-NEXT:   }
 ; BIG-NEXT: }

Modified: llvm/trunk/test/DebugInfo/X86/gnu-public-names.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/DebugInfo/X86/gnu-public-names.ll?rev=301487&r1=301486&r2=301487&view=diff
==============================================================================
--- llvm/trunk/test/DebugInfo/X86/gnu-public-names.ll (original)
+++ llvm/trunk/test/DebugInfo/X86/gnu-public-names.ll Wed Apr 26 17:45:04 2017
@@ -66,6 +66,7 @@
 
 ; ASM: .section        .debug_gnu_pubnames
 ; ASM: .byte   32                      # Kind: VARIABLE, EXTERNAL
+; ASM: .byte   32                      # Kind: VARIABLE, EXTERNAL
 ; ASM-NEXT: .asciz  "global_variable"       # External Name
 
 ; ASM: .section        .debug_gnu_pubtypes
@@ -197,27 +198,29 @@
 ; CHECK-LABEL: .debug_gnu_pubnames contents:
 ; CHECK-NEXT: length = {{.*}} version = 0x0002 unit_offset = 0x00000000 unit_size = {{.*}}
 ; CHECK-NEXT: Offset     Linkage  Kind     Name
+; CHECK-NEXT:  [[ANON_INNER_B]] STATIC VARIABLE "(anonymous namespace)::inner::b"
+; CHECK-NEXT:  [[MEM_FUNC]] EXTERNAL FUNCTION "C::member_function"
+; CHECK-NEXT:  [[OUTER]] EXTERNAL TYPE "outer"
+; CHECK-NEXT:  [[GLOB_NS_VAR]] EXTERNAL VARIABLE "ns::global_namespace_variable"
+; CHECK-NEXT:  [[GLOB_VAR]] EXTERNAL VARIABLE "global_variable"
+; CHECK-NEXT:  EXTERNAL FUNCTION "f7"
+; CHECK-NEXT:  [[OUTER_ANON]] EXTERNAL TYPE "outer::(anonymous namespace)"
 ; CHECK-NEXT:  [[GLOBAL_FUNC]] EXTERNAL FUNCTION "global_function"
+; CHECK-NEXT:  [[GLOB_NS_FUNC]] EXTERNAL FUNCTION "ns::global_namespace_function"
 ; CHECK-NEXT:  [[NS]] EXTERNAL TYPE     "ns"
+; CHECK-NEXT:  [[ANON]] EXTERNAL TYPE "(anonymous namespace)"
 ; CHECK-NEXT:  [[OUTER_ANON_C]] STATIC VARIABLE "outer::(anonymous namespace)::c"
+; CHECK-NEXT:  [[D_VAR]] EXTERNAL VARIABLE "ns::d"
+; CHECK-NEXT:  [[STATIC_MEM_FUNC]] EXTERNAL FUNCTION "C::static_member_function"
+; CHECK-NEXT:  [[STATIC_MEM_VAR]] EXTERNAL VARIABLE "C::static_member_variable"
 ; CHECK-NEXT:  [[ANON_I]] STATIC VARIABLE "(anonymous namespace)::i"
+; CHECK-NEXT:  [[ANON_INNER]] EXTERNAL TYPE "(anonymous namespace)::inner"
+; CHECK-NEXT:  EXTERNAL FUNCTION "f3"
 ; GCC Doesn't put local statics in pubnames, but it seems not unreasonable and
 ; comes out naturally from LLVM's implementation, so I'm OK with it for now. If
 ; it's demonstrated that this is a major size concern or degrades debug info
 ; consumer behavior, feel free to change it.
 ; CHECK-NEXT:  [[F3_Z]] STATIC VARIABLE "f3::z"
-; CHECK-NEXT:  [[ANON]] EXTERNAL TYPE "(anonymous namespace)"
-; CHECK-NEXT:  [[OUTER_ANON]] EXTERNAL TYPE "outer::(anonymous namespace)"
-; CHECK-NEXT:  [[ANON_INNER_B]] STATIC VARIABLE "(anonymous namespace)::inner::b"
-; CHECK-NEXT:  [[OUTER]] EXTERNAL TYPE "outer"
-; CHECK-NEXT:  [[MEM_FUNC]] EXTERNAL FUNCTION "C::member_function"
-; CHECK-NEXT:  [[GLOB_VAR]] EXTERNAL VARIABLE "global_variable"
-; CHECK-NEXT:  [[GLOB_NS_VAR]] EXTERNAL VARIABLE "ns::global_namespace_variable"
-; CHECK-NEXT:  [[ANON_INNER]] EXTERNAL TYPE "(anonymous namespace)::inner"
-; CHECK-NEXT:  [[D_VAR]] EXTERNAL VARIABLE "ns::d"
-; CHECK-NEXT:  [[GLOB_NS_FUNC]] EXTERNAL FUNCTION "ns::global_namespace_function"
-; CHECK-NEXT:  [[STATIC_MEM_VAR]] EXTERNAL VARIABLE "C::static_member_variable"
-; CHECK-NEXT:  [[STATIC_MEM_FUNC]] EXTERNAL FUNCTION "C::static_member_function"
 
 ; CHECK-LABEL: debug_gnu_pubtypes contents:
 ; CHECK: Offset     Linkage  Kind     Name




More information about the llvm-commits mailing list