[llvm] Clean up / speed up ULEB128 decoding (PR #73585)

Adrian Prantl via llvm-commits llvm-commits at lists.llvm.org
Mon Nov 27 14:53:53 PST 2023


https://github.com/adrian-prantl created https://github.com/llvm/llvm-project/pull/73585

This series of patches simplifies the two [U]LEB128 decoder functions in LLVM and makes them ever so slightly faster in the process.

As a quick performance test decoding DWARF I instructed dwarfdump to print all DIEs with the name "end" in clang.dSYM  without using the accelerator tables:
```
_build.ninja.noassert$ time bin/llvm-dwarfdump-old -n end -o /dev/null bin/clang-18.dSYM ; time bin/llvm-dwarfdump-new -n end -o /dev/null bin/clang-18.dSYM
bin/llvm-dwarfdump-old -n end -o /dev/null bin/clang-18.dSYM  20.34s user 0.51s system 98% cpu 21.151 total
bin/llvm-dwarfdump-new -n end -o /dev/null bin/clang-18.dSYM  20.15s user 0.50s system 98% cpu 20.950 total
_build.ninja.noassert$ time bin/llvm-dwarfdump-old -n end -o /dev/null bin/clang-18.dSYM ; time bin/llvm-dwarfdump-new -n end -o /dev/null bin/clang-18.dSYM
bin/llvm-dwarfdump-old -n end -o /dev/null bin/clang-18.dSYM  20.33s user 0.50s system 98% cpu 21.178 total
bin/llvm-dwarfdump-new -n end -o /dev/null bin/clang-18.dSYM  20.21s user 0.50s system 98% cpu 21.027 total
_build.ninja.noassert$ time bin/llvm-dwarfdump-old -n end -o /dev/null bin/clang-18.dSYM ; time bin/llvm-dwarfdump-new -n end -o /dev/null bin/clang-18.dSYM
bin/llvm-dwarfdump-old -n end -o /dev/null bin/clang-18.dSYM  20.35s user 0.53s system 98% cpu 21.224 total
bin/llvm-dwarfdump-new -n end -o /dev/null bin/clang-18.dSYM  20.27s user 0.49s system 98% cpu 21.057 total
```

>From c45fd0ed47f48e7c61911d9731bd5e1137423fed Mon Sep 17 00:00:00 2001
From: Adrian Prantl <aprantl at apple.com>
Date: Mon, 27 Nov 2023 10:42:57 -0800
Subject: [PATCH 1/4] [LEB128] Don't initialize error on success

This change removes an unnecessary branch from a hot path. It's also
questionable API to override any previous error unconditonally.
---
 llvm/include/llvm/Support/LEB128.h  | 10 ++++++----
 llvm/lib/Object/MachOObjectFile.cpp |  2 +-
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/llvm/include/llvm/Support/LEB128.h b/llvm/include/llvm/Support/LEB128.h
index a5d367279aefe64..7ec19fee5d8ba51 100644
--- a/llvm/include/llvm/Support/LEB128.h
+++ b/llvm/include/llvm/Support/LEB128.h
@@ -125,14 +125,15 @@ inline unsigned encodeULEB128(uint64_t Value, uint8_t *p,
 }
 
 /// Utility function to decode a ULEB128 value.
+///
+/// If \p error is non-null, it will point to a static error message,
+/// if an error occured. It will not be modified on success.
 inline uint64_t decodeULEB128(const uint8_t *p, unsigned *n = nullptr,
                               const uint8_t *end = nullptr,
                               const char **error = nullptr) {
   const uint8_t *orig_p = p;
   uint64_t Value = 0;
   unsigned Shift = 0;
-  if (error)
-    *error = nullptr;
   do {
     if (p == end) {
       if (error)
@@ -158,6 +159,9 @@ inline uint64_t decodeULEB128(const uint8_t *p, unsigned *n = nullptr,
 }
 
 /// Utility function to decode a SLEB128 value.
+///
+/// If \p error is non-null, it will point to a static error message,
+/// if an error occured. It will not be modified on success.
 inline int64_t decodeSLEB128(const uint8_t *p, unsigned *n = nullptr,
                              const uint8_t *end = nullptr,
                              const char **error = nullptr) {
@@ -165,8 +169,6 @@ inline int64_t decodeSLEB128(const uint8_t *p, unsigned *n = nullptr,
   int64_t Value = 0;
   unsigned Shift = 0;
   uint8_t Byte;
-  if (error)
-    *error = nullptr;
   do {
     if (p == end) {
       if (error)
diff --git a/llvm/lib/Object/MachOObjectFile.cpp b/llvm/lib/Object/MachOObjectFile.cpp
index aa57de16ed18f44..11ad8aeae65da5d 100644
--- a/llvm/lib/Object/MachOObjectFile.cpp
+++ b/llvm/lib/Object/MachOObjectFile.cpp
@@ -2996,7 +2996,7 @@ void ExportEntry::pushNode(uint64_t offset) {
   ErrorAsOutParameter ErrAsOutParam(E);
   const uint8_t *Ptr = Trie.begin() + offset;
   NodeState State(Ptr);
-  const char *error;
+  const char *error = nullptr;
   uint64_t ExportInfoSize = readULEB128(State.Current, &error);
   if (error) {
     *E = malformedError("export info size " + Twine(error) +

>From bc4e3e250ea5d82bcce1affa4f879c876f432106 Mon Sep 17 00:00:00 2001
From: Adrian Prantl <aprantl at apple.com>
Date: Mon, 27 Nov 2023 14:15:06 -0800
Subject: [PATCH 2/4] [LEB128] Factor out redundant code

---
 llvm/include/llvm/Support/LEB128.h | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/llvm/include/llvm/Support/LEB128.h b/llvm/include/llvm/Support/LEB128.h
index 7ec19fee5d8ba51..76c93acaadfdcc7 100644
--- a/llvm/include/llvm/Support/LEB128.h
+++ b/llvm/include/llvm/Support/LEB128.h
@@ -138,17 +138,15 @@ inline uint64_t decodeULEB128(const uint8_t *p, unsigned *n = nullptr,
     if (p == end) {
       if (error)
         *error = "malformed uleb128, extends past end";
-      if (n)
-        *n = (unsigned)(p - orig_p);
-      return 0;
+      Value = 0;
+      break;
     }
     uint64_t Slice = *p & 0x7f;
     if ((Shift >= 64 && Slice != 0) || Slice << Shift >> Shift != Slice) {
       if (error)
         *error = "uleb128 too big for uint64";
-      if (n)
-        *n = (unsigned)(p - orig_p);
-      return 0;
+      Value = 0;
+      break;
     }
     Value += Slice << Shift;
     Shift += 7;

>From 772b9c45d24033ea9e7895fa0682ebe1d34c1de4 Mon Sep 17 00:00:00 2001
From: Adrian Prantl <aprantl at apple.com>
Date: Mon, 27 Nov 2023 14:15:12 -0800
Subject: [PATCH 3/4] [LEB128] Don't handle edge cases in every loop iteration

Previously the overflow check was done for every byte even though it
is only needed for the case where Shift == 63.
---
 llvm/include/llvm/Support/LEB128.h | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/llvm/include/llvm/Support/LEB128.h b/llvm/include/llvm/Support/LEB128.h
index 76c93acaadfdcc7..22e995df2391ff1 100644
--- a/llvm/include/llvm/Support/LEB128.h
+++ b/llvm/include/llvm/Support/LEB128.h
@@ -142,7 +142,8 @@ inline uint64_t decodeULEB128(const uint8_t *p, unsigned *n = nullptr,
       break;
     }
     uint64_t Slice = *p & 0x7f;
-    if ((Shift >= 64 && Slice != 0) || Slice << Shift >> Shift != Slice) {
+    if (Shift >= 63 && ((Shift == 63 && ((Slice << Shift) >> Shift) != Slice) ||
+                        (Shift > 63 && Slice != 0))) {
       if (error)
         *error = "uleb128 too big for uint64";
       Value = 0;
@@ -177,8 +178,8 @@ inline int64_t decodeSLEB128(const uint8_t *p, unsigned *n = nullptr,
     }
     Byte = *p;
     uint64_t Slice = Byte & 0x7f;
-    if ((Shift >= 64 && Slice != (Value < 0 ? 0x7f : 0x00)) ||
-        (Shift == 63 && Slice != 0 && Slice != 0x7f)) {
+    if ((Shift >= 63) && ((Shift == 63 && Slice != 0 && Slice != 0x7f) ||
+                          (Shift > 63 && Slice != (Value < 0 ? 0x7f : 0x00)))) {
       if (error)
         *error = "sleb128 too big for int64";
       if (n)

>From 4374a286bca03bdfc05bdb1c7fb4157acaf78baa Mon Sep 17 00:00:00 2001
From: Adrian Prantl <aprantl at apple.com>
Date: Mon, 27 Nov 2023 14:46:57 -0800
Subject: [PATCH 4/4] [LEB128] Mark error condition with LLVM_UNLIKELY

---
 llvm/include/llvm/Support/LEB128.h | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/llvm/include/llvm/Support/LEB128.h b/llvm/include/llvm/Support/LEB128.h
index 22e995df2391ff1..3d5e98c4b2cddee 100644
--- a/llvm/include/llvm/Support/LEB128.h
+++ b/llvm/include/llvm/Support/LEB128.h
@@ -135,15 +135,16 @@ inline uint64_t decodeULEB128(const uint8_t *p, unsigned *n = nullptr,
   uint64_t Value = 0;
   unsigned Shift = 0;
   do {
-    if (p == end) {
+    if (LLVM_UNLIKELY(p == end)) {
       if (error)
         *error = "malformed uleb128, extends past end";
       Value = 0;
       break;
     }
     uint64_t Slice = *p & 0x7f;
-    if (Shift >= 63 && ((Shift == 63 && ((Slice << Shift) >> Shift) != Slice) ||
-                        (Shift > 63 && Slice != 0))) {
+    if (LLVM_UNLIKELY(Shift >= 63) &&
+        ((Shift == 63 && ((Slice << Shift) >> Shift) != Slice) ||
+         (Shift > 63 && Slice != 0))) {
       if (error)
         *error = "uleb128 too big for uint64";
       Value = 0;
@@ -169,7 +170,7 @@ inline int64_t decodeSLEB128(const uint8_t *p, unsigned *n = nullptr,
   unsigned Shift = 0;
   uint8_t Byte;
   do {
-    if (p == end) {
+    if (LLVM_UNLIKELY(p == end)) {
       if (error)
         *error = "malformed sleb128, extends past end";
       if (n)
@@ -178,8 +179,9 @@ inline int64_t decodeSLEB128(const uint8_t *p, unsigned *n = nullptr,
     }
     Byte = *p;
     uint64_t Slice = Byte & 0x7f;
-    if ((Shift >= 63) && ((Shift == 63 && Slice != 0 && Slice != 0x7f) ||
-                          (Shift > 63 && Slice != (Value < 0 ? 0x7f : 0x00)))) {
+    if (LLVM_UNLIKELY(Shift >= 63) &&
+        ((Shift == 63 && Slice != 0 && Slice != 0x7f) ||
+         (Shift > 63 && Slice != (Value < 0 ? 0x7f : 0x00)))) {
       if (error)
         *error = "sleb128 too big for int64";
       if (n)



More information about the llvm-commits mailing list