[llvm] r231732 - DwarfAccelTable: Fix handling of hash collisions.

Frederic Riss friss at apple.com
Mon Mar 9 17:46:31 PDT 2015


Author: friss
Date: Mon Mar  9 19:46:31 2015
New Revision: 231732

URL: http://llvm.org/viewvc/llvm-project?rev=231732&view=rev
Log:
DwarfAccelTable: Fix handling of hash collisions.

It turns out accelerator tables where totally broken if they contained
entries with colliding hashes. The failure mode is pretty bad, as it not
only impacted the colliding entries, but would basically make all the
entries after the first hash collision pointing in the wrong place.

The testcase uses the symbol names that where found to collide during a
clang build.

>From a performance point of view, the patch adds a sort and a linear
walk over each bucket contents. While it has a measurable impact on the
accelerator table emission, it's not showing up significantly in clang
profiles (and I'd argue that correctness is priceless :-)).

Added:
    llvm/trunk/test/DebugInfo/accel-table-hash-collisions.ll
Modified:
    llvm/trunk/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp

Modified: llvm/trunk/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp?rev=231732&r1=231731&r2=231732&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp (original)
+++ llvm/trunk/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp Mon Mar  9 19:46:31 2015
@@ -98,6 +98,15 @@ void DwarfAccelTable::FinalizeTable(AsmP
     Buckets[bucket].push_back(Data[i]);
     Data[i]->Sym = Asm->GetTempSymbol(Prefix, i);
   }
+
+  // Sort the contents of the buckets by hash value so that hash
+  // collisions end up together. Stable sort makes testing easier and
+  // doesn't cost much more.
+  for (size_t i = 0; i < Buckets.size(); ++i)
+    std::stable_sort(Buckets[i].begin(), Buckets[i].end(),
+                     [] (HashData *LHS, HashData *RHS) {
+                       return LHS->HashValue < RHS->HashValue;
+                     });
 }
 
 // Emits the header for the table via the AsmPrinter.
@@ -137,19 +146,32 @@ void DwarfAccelTable::EmitBuckets(AsmPri
       Asm->EmitInt32(index);
     else
       Asm->EmitInt32(UINT32_MAX);
-    index += Buckets[i].size();
+    // Buckets point in the list of hashes, not to the data. Do not
+    // increment the index multiple times in case of hash collisions.
+    uint64_t PrevHash = UINT64_MAX;
+    for (auto *HD : Buckets[i]) {
+      uint32_t HashValue = HD->HashValue;
+      if (PrevHash != HashValue)
+        ++index;
+      PrevHash = HashValue;
+    }
   }
 }
 
 // Walk through the buckets and emit the individual hashes for each
 // bucket.
 void DwarfAccelTable::EmitHashes(AsmPrinter *Asm) {
+  uint64_t PrevHash = UINT64_MAX;
   for (size_t i = 0, e = Buckets.size(); i < e; ++i) {
     for (HashList::const_iterator HI = Buckets[i].begin(),
                                   HE = Buckets[i].end();
          HI != HE; ++HI) {
+      uint32_t HashValue = (*HI)->HashValue;
+      if (PrevHash == HashValue)
+        continue;
       Asm->OutStreamer.AddComment("Hash in Bucket " + Twine(i));
-      Asm->EmitInt32((*HI)->HashValue);
+      Asm->EmitInt32(HashValue);
+      PrevHash = HashValue;
     }
   }
 }
@@ -159,10 +181,15 @@ void DwarfAccelTable::EmitHashes(AsmPrin
 // beginning of the section. The non-section symbol will be output later
 // when we emit the actual data.
 void DwarfAccelTable::EmitOffsets(AsmPrinter *Asm, MCSymbol *SecBegin) {
+  uint64_t PrevHash = UINT64_MAX;
   for (size_t i = 0, e = Buckets.size(); i < e; ++i) {
     for (HashList::const_iterator HI = Buckets[i].begin(),
                                   HE = Buckets[i].end();
          HI != HE; ++HI) {
+      uint32_t HashValue = (*HI)->HashValue;
+      if (PrevHash == HashValue)
+        continue;
+      PrevHash = HashValue;
       Asm->OutStreamer.AddComment("Offset in Bucket " + Twine(i));
       MCContext &Context = Asm->OutStreamer.getContext();
       const MCExpr *Sub = MCBinaryExpr::CreateSub(
@@ -183,6 +210,10 @@ void DwarfAccelTable::EmitData(AsmPrinte
     for (HashList::const_iterator HI = Buckets[i].begin(),
                                   HE = Buckets[i].end();
          HI != HE; ++HI) {
+      // Terminate the previous entry if there is no hash collision
+      // with the current one.
+      if (PrevHash != UINT64_MAX && PrevHash != (*HI)->HashValue)
+        Asm->EmitInt32(0);
       // Remember to emit the label for our offset.
       Asm->OutStreamer.EmitLabel((*HI)->Sym);
       Asm->OutStreamer.AddComment((*HI)->Str);
@@ -201,11 +232,10 @@ void DwarfAccelTable::EmitData(AsmPrinte
           Asm->EmitInt8(HD->Flags);
         }
       }
-      // Emit a 0 to terminate the data unless we have a hash collision.
-      if (PrevHash != (*HI)->HashValue)
-        Asm->EmitInt32(0);
       PrevHash = (*HI)->HashValue;
     }
+    // Emit the final end marker for the bucket.
+    Asm->EmitInt32(0);
   }
 }
 

Added: llvm/trunk/test/DebugInfo/accel-table-hash-collisions.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/DebugInfo/accel-table-hash-collisions.ll?rev=231732&view=auto
==============================================================================
--- llvm/trunk/test/DebugInfo/accel-table-hash-collisions.ll (added)
+++ llvm/trunk/test/DebugInfo/accel-table-hash-collisions.ll Mon Mar  9 19:46:31 2015
@@ -0,0 +1,92 @@
+; REQUIRES: object-emission
+; RUN: %llc_dwarf -dwarf-accel-tables=Enable -filetype=obj -o - < %s | llvm-dwarfdump -debug-dump=apple_names - | FileCheck %s
+
+; Generated from the following C code using
+; clang -S -emit-llvm hash-collision.c
+;
+; The names of the variables have been chosen so that they produce hash collisions.
+; There are 12 names here that are hashed to only 6 hashes (each pair of lines
+; hashes to the same value, see the CHECK lines below).
+;
+; int ForceTopDown;
+; int _ZNSt3__116allocator_traitsINS_9allocatorINS_11__tree_nodeINS_12__value_typeIPN4llvm10BasicBlockEPNS4_10RegionNodeEEEPvEEEEE11__constructIS9_JNS_4pairIS6_S8_EEEEEvNS_17integral_constantIbLb1EEERSC_PT_DpOT0_;
+; int _ZN5clang23DataRecursiveASTVisitorIN12_GLOBAL__N_124UnusedBackingIvarCheckerEE26TraverseCUDAKernelCallExprEPNS_18CUDAKernelCallExprE; 
+; int _ZN4llvm16DenseMapIteratorIPNS_10MDLocationENS_6detail13DenseSetEmptyENS_10MDNodeInfoIS1_EENS3_12DenseSetPairIS2_EELb0EE23AdvancePastEmptyBucketsEv;
+; int _ZNK4llvm12LivePhysRegs5printERNS_11raw_ostreamE;
+; int _ZN4llvm15ScalarEvolution14getSignedRangeEPKNS_4SCEVE;
+; int k1;
+; int is;
+; int setStmt;
+; int _ZN4llvm5TwineC1Ei;
+; int _ZNK5clang12OverrideAttr5cloneERNS_10ASTContextE;
+; int _ZN4llvm22MachineModuleInfoMachOD2Ev;
+
+; Check that we have the right amount of hashes.
+; CHECK: Bucket count = 6
+; CHECK: Hashes count = 6
+
+; Check that all the names are present in the output
+; CHECK:  Hash = 0x00597841
+; CHECK:    Name: {{[0-9a-f]*}} "is"
+; CHECK:    Name: {{[0-9a-f]*}} "k1"
+
+; CHECK: Hash = 0xa4b42a1e
+; CHECK:    Name: {{[0-9a-f]*}} "_ZN5clang23DataRecursiveASTVisitorIN12_GLOBAL__N_124UnusedBackingIvarCheckerEE26TraverseCUDAKernelCallExprEPNS_18CUDAKernelCallExprE"
+; CHECK:    Name: {{[0-9a-f]*}} "_ZN4llvm16DenseMapIteratorIPNS_10MDLocationENS_6detail13DenseSetEmptyENS_10MDNodeInfoIS1_EENS3_12DenseSetPairIS2_EELb0EE23AdvancePastEmptyBucketsEv"
+
+; CHECK: Hash = 0xeee7c0b2
+; CHECK:    Name: {{[0-9a-f]*}} "_ZNK4llvm12LivePhysRegs5printERNS_11raw_ostreamE"
+; CHECK:    Name: {{[0-9a-f]*}} "_ZN4llvm15ScalarEvolution14getSignedRangeEPKNS_4SCEVE"
+
+; CHECK: Hash = 0xea48ac5f
+; CHECK:    Name: {{[0-9a-f]*}} "ForceTopDown"
+; CHECK:    Name: {{[0-9a-f]*}} "_ZNSt3__116allocator_traitsINS_9allocatorINS_11__tree_nodeINS_12__value_typeIPN4llvm10BasicBlockEPNS4_10RegionNodeEEEPvEEEEE11__constructIS9_JNS_4pairIS6_S8_EEEEEvNS_17integral_constantIbLb1EEERSC_PT_DpOT0_"
+
+; CHECK:  Hash = 0x6b22f71f
+; CHECK:    Name: {{[0-9a-f]*}} "_ZNK5clang12OverrideAttr5cloneERNS_10ASTContextE"
+; CHECK:    Name: {{[0-9a-f]*}} "_ZN4llvm22MachineModuleInfoMachOD2Ev"
+
+; CHECK:  Hash = 0x8c248979
+; CHECK:    Name: {{[0-9a-f]*}} "setStmt"
+; CHECK:    Name: {{[0-9a-f]*}} "_ZN4llvm5TwineC1Ei"
+
+
+
+ at ForceTopDown = common global i32 0, align 4
+ at _ZNSt3__116allocator_traitsINS_9allocatorINS_11__tree_nodeINS_12__value_typeIPN4llvm10BasicBlockEPNS4_10RegionNodeEEEPvEEEEE11__constructIS9_JNS_4pairIS6_S8_EEEEEvNS_17integral_constantIbLb1EEERSC_PT_DpOT0_ = common global i32 0, align 4
+ at _ZN5clang23DataRecursiveASTVisitorIN12_GLOBAL__N_124UnusedBackingIvarCheckerEE26TraverseCUDAKernelCallExprEPNS_18CUDAKernelCallExprE = common global i32 0, align 4
+ at _ZN4llvm16DenseMapIteratorIPNS_10MDLocationENS_6detail13DenseSetEmptyENS_10MDNodeInfoIS1_EENS3_12DenseSetPairIS2_EELb0EE23AdvancePastEmptyBucketsEv = common global i32 0, align 4
+ at _ZNK4llvm12LivePhysRegs5printERNS_11raw_ostreamE = common global i32 0, align 4
+ at _ZN4llvm15ScalarEvolution14getSignedRangeEPKNS_4SCEVE = common global i32 0, align 4
+ at k1 = common global i32 0, align 4
+ at is = common global i32 0, align 4
+ at setStmt = common global i32 0, align 4
+ at _ZN4llvm5TwineC1Ei = common global i32 0, align 4
+ at _ZNK5clang12OverrideAttr5cloneERNS_10ASTContextE = common global i32 0, align 4
+ at _ZN4llvm22MachineModuleInfoMachOD2Ev = common global i32 0, align 4
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!17, !18, !19}
+!llvm.ident = !{!20}
+
+!0 = !MDCompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.7.0 (trunk 231548) (llvm/trunk 231547)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !2, subprograms: !2, globals: !3, imports: !2)
+!1 = !MDFile(filename: "hash-collisions.c", directory: "/tmp")
+!2 = !{}
+!3 = !{!4, !6, !7, !8, !9, !10, !11, !12, !13, !14, !15, !16}
+!4 = !MDGlobalVariable(name: "ForceTopDown", scope: !0, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, variable: i32* @ForceTopDown)
+!5 = !MDBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!6 = !MDGlobalVariable(name: "_ZNSt3__116allocator_traitsINS_9allocatorINS_11__tree_nodeINS_12__value_typeIPN4llvm10BasicBlockEPNS4_10RegionNodeEEEPvEEEEE11__constructIS9_JNS_4pairIS6_S8_EEEEEvNS_17integral_constantIbLb1EEERSC_PT_DpOT0_", scope: !0, file: !1, line: 2, type: !5, isLocal: false, isDefinition: true, variable: i32* @_ZNSt3__116allocator_traitsINS_9allocatorINS_11__tree_nodeINS_12__value_typeIPN4llvm10BasicBlockEPNS4_10RegionNodeEEEPvEEEEE11__constructIS9_JNS_4pairIS6_S8_EEEEEvNS_17integral_constantIbLb1EEERSC_PT_DpOT0_)
+!7 = !MDGlobalVariable(name: "_ZN5clang23DataRecursiveASTVisitorIN12_GLOBAL__N_124UnusedBackingIvarCheckerEE26TraverseCUDAKernelCallExprEPNS_18CUDAKernelCallExprE", scope: !0, file: !1, line: 3, type: !5, isLocal: false, isDefinition: true, variable: i32* @_ZN5clang23DataRecursiveASTVisitorIN12_GLOBAL__N_124UnusedBackingIvarCheckerEE26TraverseCUDAKernelCallExprEPNS_18CUDAKernelCallExprE)
+!8 = !MDGlobalVariable(name: "_ZN4llvm16DenseMapIteratorIPNS_10MDLocationENS_6detail13DenseSetEmptyENS_10MDNodeInfoIS1_EENS3_12DenseSetPairIS2_EELb0EE23AdvancePastEmptyBucketsEv", scope: !0, file: !1, line: 4, type: !5, isLocal: false, isDefinition: true, variable: i32* @_ZN4llvm16DenseMapIteratorIPNS_10MDLocationENS_6detail13DenseSetEmptyENS_10MDNodeInfoIS1_EENS3_12DenseSetPairIS2_EELb0EE23AdvancePastEmptyBucketsEv)
+!9 = !MDGlobalVariable(name: "_ZNK4llvm12LivePhysRegs5printERNS_11raw_ostreamE", scope: !0, file: !1, line: 5, type: !5, isLocal: false, isDefinition: true, variable: i32* @_ZNK4llvm12LivePhysRegs5printERNS_11raw_ostreamE)
+!10 = !MDGlobalVariable(name: "_ZN4llvm15ScalarEvolution14getSignedRangeEPKNS_4SCEVE", scope: !0, file: !1, line: 6, type: !5, isLocal: false, isDefinition: true, variable: i32* @_ZN4llvm15ScalarEvolution14getSignedRangeEPKNS_4SCEVE)
+!11 = !MDGlobalVariable(name: "k1", scope: !0, file: !1, line: 7, type: !5, isLocal: false, isDefinition: true, variable: i32* @k1)
+!12 = !MDGlobalVariable(name: "is", scope: !0, file: !1, line: 8, type: !5, isLocal: false, isDefinition: true, variable: i32* @is)
+!13 = !MDGlobalVariable(name: "setStmt", scope: !0, file: !1, line: 9, type: !5, isLocal: false, isDefinition: true, variable: i32* @setStmt)
+!14 = !MDGlobalVariable(name: "_ZN4llvm5TwineC1Ei", scope: !0, file: !1, line: 10, type: !5, isLocal: false, isDefinition: true, variable: i32* @_ZN4llvm5TwineC1Ei)
+!15 = !MDGlobalVariable(name: "_ZNK5clang12OverrideAttr5cloneERNS_10ASTContextE", scope: !0, file: !1, line: 11, type: !5, isLocal: false, isDefinition: true, variable: i32* @_ZNK5clang12OverrideAttr5cloneERNS_10ASTContextE)
+!16 = !MDGlobalVariable(name: "_ZN4llvm22MachineModuleInfoMachOD2Ev", scope: !0, file: !1, line: 12, type: !5, isLocal: false, isDefinition: true, variable: i32* @_ZN4llvm22MachineModuleInfoMachOD2Ev)
+!17 = !{i32 2, !"Dwarf Version", i32 2}
+!18 = !{i32 2, !"Debug Info Version", i32 3}
+!19 = !{i32 1, !"PIC Level", i32 2}
+!20 = !{!"clang version 3.7.0 (trunk 231548) (llvm/trunk 231547)"}





More information about the llvm-commits mailing list