[lld] d3e5b6f - [ELF] Implement --build-id={md5, sha1} with truncated BLAKE3

Fangrui Song via llvm-commits llvm-commits at lists.llvm.org
Thu Mar 24 11:31:52 PDT 2022


Author: Fangrui Song
Date: 2022-03-24T11:31:39-07:00
New Revision: d3e5b6f7539b86995aef6e2075c1edb3059385ce

URL: https://github.com/llvm/llvm-project/commit/d3e5b6f7539b86995aef6e2075c1edb3059385ce
DIFF: https://github.com/llvm/llvm-project/commit/d3e5b6f7539b86995aef6e2075c1edb3059385ce.diff

LOG: [ELF] Implement --build-id={md5,sha1} with truncated BLAKE3

--build-id was introduced as "approximation of true uniqueness across all
binaries that might be used by overlapping sets of people". It does not require
the some resistance mentioned below. In practice, people just use --build-id=md5
for 16-byte build ID and --build-id=sha1 for 20-byte build ID.

BLAKE3 has 256-bit key length, which provides 128-bit security against
(second-)preimage, collision, and differentiability attacks. Its portable
implementation is fast. It additionally provides Arm Neon/AVX2/AVX-512. Just
implement --build-id={md5,sha1} with truncated BLAKE3.

Linking clang 14 RelWithDebInfo with --threads=8 on a Skylake CPU:

* 1.13x as fast with --build-id=md5
* 1.15x as fast with --build-id=sha1

--threads=4 on Apple m1:

* 1.25x as fast with --build-id=md5
* 1.17x as fast with --build-id=sha1

Reviewed By: ikudrin

Differential Revision: https://reviews.llvm.org/D121531

Added: 
    

Modified: 
    lld/ELF/Writer.cpp
    lld/test/ELF/build-id.s
    lld/test/ELF/partition-notes.s

Removed: 
    


################################################################################
diff  --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp
index 7c596bb788f58..153095e44b01a 100644
--- a/lld/ELF/Writer.cpp
+++ b/lld/ELF/Writer.cpp
@@ -25,10 +25,9 @@
 #include "lld/Common/Filesystem.h"
 #include "lld/Common/Strings.h"
 #include "llvm/ADT/StringMap.h"
-#include "llvm/Support/MD5.h"
+#include "llvm/Support/BLAKE3.h"
 #include "llvm/Support/Parallel.h"
 #include "llvm/Support/RandomNumberGenerator.h"
-#include "llvm/Support/SHA1.h"
 #include "llvm/Support/TimeProfiler.h"
 #include "llvm/Support/xxhash.h"
 #include <climits>
@@ -2925,6 +2924,12 @@ template <class ELFT> void Writer<ELFT>::writeBuildId() {
   MutableArrayRef<uint8_t> output(buildId.get(), hashSize);
   llvm::ArrayRef<uint8_t> input{Out::bufferStart, size_t(fileSize)};
 
+  // Fedora introduced build ID as "approximation of true uniqueness across all
+  // binaries that might be used by overlapping sets of people". It does not
+  // need some security goals that some hash algorithms strive to provide, e.g.
+  // (second-)preimage and collision resistance. In practice people use 'md5'
+  // and 'sha1' just for 
diff erent lengths. Implement them with the more
+  // efficient BLAKE3.
   switch (config->buildId) {
   case BuildIdKind::Fast:
     computeHash(output, input, [](uint8_t *dest, ArrayRef<uint8_t> arr) {
@@ -2933,12 +2938,12 @@ template <class ELFT> void Writer<ELFT>::writeBuildId() {
     break;
   case BuildIdKind::Md5:
     computeHash(output, input, [&](uint8_t *dest, ArrayRef<uint8_t> arr) {
-      memcpy(dest, MD5::hash(arr).data(), hashSize);
+      memcpy(dest, BLAKE3::hash<16>(arr).data(), hashSize);
     });
     break;
   case BuildIdKind::Sha1:
     computeHash(output, input, [&](uint8_t *dest, ArrayRef<uint8_t> arr) {
-      memcpy(dest, SHA1::hash(arr).data(), hashSize);
+      memcpy(dest, BLAKE3::hash<20>(arr).data(), hashSize);
     });
     break;
   case BuildIdKind::Uuid:

diff  --git a/lld/test/ELF/build-id.s b/lld/test/ELF/build-id.s
index 63e488ef6d294..05a5e8d61f92e 100644
--- a/lld/test/ELF/build-id.s
+++ b/lld/test/ELF/build-id.s
@@ -69,11 +69,11 @@ _start:
 
 # MD5:      Contents of section .note.gnu.build-id:
 # MD5-NEXT: 04000000 10000000 03000000 474e5500  ............GNU.
-# MD5-NEXT: 7b00fd9e 054ceb4b 06f64d0e 482cb476
+# MD5-NEXT: dbf0bc13 b3ff11e9 fde6e17c 0304983c
 
 # SHA1:      Contents of section .note.gnu.build-id:
 # SHA1-NEXT: 04000000 14000000 03000000 474e5500  ............GNU.
-# SHA1-NEXT: 221a99da dd1d2bf3 05e48a91 dde8a0cb
+# SHA1-NEXT: 1215775f d3b60050 70afd970 e8a10972
 
 # UUID:      Contents of section .note.gnu.build-id:
 # UUID-NEXT: 04000000 10000000 03000000 474e5500  ............GNU.
@@ -89,11 +89,11 @@ _start:
 
 # SEPARATE:      Hex dump of section '.note.gnu.build-id':
 # SEPARATE-NEXT: 0x00200198 04000000 14000000 03000000 474e5500
-# SEPARATE-NEXT: 0x002001a8 96820adf d90d5470 0a0c32ff a88c4017
+# SEPARATE-NEXT: 0x002001a8 5cd067a4 2631c0fd 42029037 4b8e0938
 
 # RUN: ld.lld --build-id=sha1 --no-rosegment %t -o %t2
 # RUN: llvm-readelf -x .note.gnu.build-id %t2 | FileCheck --check-prefix=NORO %s
 
 # NORO:      Hex dump of section '.note.gnu.build-id':
 # NORO-NEXT: 0x00200160 04000000 14000000 03000000 474e5500
-# NORO-NEXT: 0x00200170 cf6d7b3a 0b3297c3 5b47c079 ce048349
+# NORO-NEXT: 0x00200170 a328cc99 45bfc3fc a9fc8615 37102f9d

diff  --git a/lld/test/ELF/partition-notes.s b/lld/test/ELF/partition-notes.s
index b02e490fff218..9bc43f2fbf9ee 100644
--- a/lld/test/ELF/partition-notes.s
+++ b/lld/test/ELF/partition-notes.s
@@ -37,7 +37,7 @@
 // CHECK-NEXT:       Owner: GNU
 // CHECK-NEXT:       Data size:
 // CHECK-NEXT:       Type: NT_GNU_BUILD_ID (unique build ID bitstring)
-// CHECK-NEXT:       Build ID: bb5542bd74252653e286044980d602874d237ae0
+// CHECK-NEXT:       Build ID: ab81108a3d85b729980356331fddc2bfc4c10177{{$}}
 // CHECK-NEXT:     }
 // CHECK-NEXT:   }
 // CHECK-NEXT: ]


        


More information about the llvm-commits mailing list