[llvm] [DWARF] Refactor .debug_names bucket count computation (PR #88087)

Mon Apr 8 21:22:43 PDT 2024

https://github.com/MaskRay created https://github.com/llvm/llvm-project/pull/88087

`getDebugNamesBucketAndHashCount` lures users to provide an array to
compute the bucket count using an O(n log n) sort. This is inefficient
as hash table based uniquifying is faster.

The performance issue matters less for Clang as the number of names is
relatively small. For `ld.lld --debug-names`, I plan to compute the
unique hash count as a side product of parallel entry pool computation,
and I just need a function to suggest a bucket count.


>From a21bed726344339c83d3fde9e70fe7eebe965b33 Mon Sep 17 00:00:00 2001
From: Fangrui Song <i at maskray.me>
Date: Mon, 8 Apr 2024 21:22:30 -0700
Subject: [PATCH] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20initia?=
 =?UTF-8?q?l=20version?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Created using spr 1.3.5-bogner
---
 llvm/include/llvm/BinaryFormat/Dwarf.h     | 22 ++++++----------------
 llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp |  7 +++----
 2 files changed, 9 insertions(+), 20 deletions(-)

diff --git a/llvm/include/llvm/BinaryFormat/Dwarf.h b/llvm/include/llvm/BinaryFormat/Dwarf.h
index a53e79bf6e39c1..298700c8941eec 100644
--- a/llvm/include/llvm/BinaryFormat/Dwarf.h
+++ b/llvm/include/llvm/BinaryFormat/Dwarf.h
@@ -613,23 +613,13 @@ enum AcceleratorTable {
   DW_hash_function_djb = 0u
 };
 
-// Uniquify the string hashes and calculate the bucket count for the
-// DWARF v5 Accelerator Table. NOTE: This function effectively consumes the
-// 'Hashes' input parameter.
-inline std::pair<uint32_t, uint32_t>
-getDebugNamesBucketAndHashCount(MutableArrayRef<uint32_t> Hashes) {
-  uint32_t BucketCount = 0;
-
-  sort(Hashes);
-  uint32_t UniqueHashCount = llvm::unique(Hashes) - Hashes.begin();
+// Return a suggested bucket count for the DWARF v5 Accelerator Table.
+inline uint32_t getDebugNamesBucketCount(uint32_t UniqueHashCount) {
   if (UniqueHashCount > 1024)
-    BucketCount = UniqueHashCount / 4;
-  else if (UniqueHashCount > 16)
-    BucketCount = UniqueHashCount / 2;
-  else
-    BucketCount = std::max<uint32_t>(UniqueHashCount, 1);
-
-  return {BucketCount, UniqueHashCount};
+    return UniqueHashCount / 4;
+  if (UniqueHashCount > 16)
+    return UniqueHashCount / 2;
+  return std::max<uint32_t>(UniqueHashCount, 1);
 }
 
 // Constants for the GNU pubnames/pubtypes extensions supporting gdb index.
diff --git a/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp b/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp
index 2e8e7d0a88af03..5b679fd3b9f92b 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp
@@ -32,14 +32,13 @@
 using namespace llvm;
 
 void AccelTableBase::computeBucketCount() {
-  // First get the number of unique hashes.
   SmallVector<uint32_t, 0> Uniques;
   Uniques.reserve(Entries.size());
   for (const auto &E : Entries)
     Uniques.push_back(E.second.HashValue);
-
-  std::tie(BucketCount, UniqueHashCount) =
-      llvm::dwarf::getDebugNamesBucketAndHashCount(Uniques);
+  llvm::sort(Uniques);
+  UniqueHashCount = llvm::unique(Uniques) - Uniques.begin();
+  BucketCount = dwarf::getDebugNamesBucketCount(UniqueHashCount);
 }
 
 void AccelTableBase::finalize(AsmPrinter *Asm, StringRef Prefix) {