[PATCH] D147890: Fix sysvhash function

Nathan Sidwell via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Sun Apr 9 17:03:47 PDT 2023


urnathan updated this revision to Diff 512048.

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D147890/new/

https://reviews.llvm.org/D147890

Files:
  llvm/include/llvm/Object/ELF.h
  llvm/unittests/Object/ELFTest.cpp


Index: llvm/unittests/Object/ELFTest.cpp
===================================================================
--- llvm/unittests/Object/ELFTest.cpp
+++ llvm/unittests/Object/ELFTest.cpp
@@ -271,3 +271,19 @@
   EXPECT_THAT_ERROR(Region[3].takeError(), FailedWithMessage(ErrMsg2));
   EXPECT_THAT_ERROR(Region[4].takeError(), FailedWithMessage(ErrMsg2));
 }
+
+// Test the sysV and the gnu hash functions, particularly with utf8 unicode
+TEST(ELFTest, Hash) {
+  // Symbols long enough to have feedback, we have to explicitly encode the utf8
+  // to avoid encoding transliterations.
+  EXPECT_EQ(hashSysV("FooBarBazToto"), 0x5ec3e8fU);
+  EXPECT_EQ(hashGnu("FooBarBazToto"), 0x5478be61U);
+  // boom💥pants
+  EXPECT_EQ(hashSysV("boom\xf0\x9f\x92\xa5pants"), 0x5a0cf53U);
+  EXPECT_EQ(hashGnu("boom\xf0\x9f\x92\xa5pants"), 0xf5dda2deU);
+  // woot!🧙 💑 🌈
+  EXPECT_EQ(hashSysV("woot!\xf0\x9f\xa7\x99 \xf0\x9f\x92\x91 "
+                     "\xf0\x9f\x8c\x88"), 0x3522e38U);
+  EXPECT_EQ(hashGnu("woot!\xf0\x9f\xa7\x99 \xf0\x9f\x92\x91 "
+                    "\xf0\x9f\x8c\x88"), 0xf7603f3U);
+}
Index: llvm/include/llvm/Object/ELF.h
===================================================================
--- llvm/include/llvm/Object/ELF.h
+++ llvm/include/llvm/Object/ELF.h
@@ -1237,15 +1237,12 @@
 /// Name of the API remains consistent as specified in the libelf
 /// REF : http://www.sco.com/developers/gabi/latest/ch5.dynamic.html#hash
 inline unsigned hashSysV(StringRef SymbolName) {
-  unsigned h = 0, g;
-  for (char C : SymbolName) {
-    h = (h << 4) + C;
-    g = h & 0xf0000000L;
-    if (g != 0)
-      h ^= g >> 24;
-    h &= ~g;
+  uint32_t H = 0;
+  for (uint8_t C : SymbolName) {
+    H = (H << 4) + C;
+    H ^= (H >> 24) & 0xf0;
   }
-  return h;
+  return H & 0xfffffff;
 }
 
 /// This function returns the hash value for a symbol in the .dynsym section


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D147890.512048.patch
Type: text/x-patch
Size: 1880 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20230410/e08103ed/attachment.bin>


More information about the llvm-commits mailing list