[llvm] [lld] Clean up / speed up ULEB128 decoding (PR #73585)

Adrian Prantl via llvm-commits llvm-commits at lists.llvm.org
Wed Nov 29 08:50:09 PST 2023


https://github.com/adrian-prantl updated https://github.com/llvm/llvm-project/pull/73585

>From 95083955aef602fd8651412043d22bb27718fbe5 Mon Sep 17 00:00:00 2001
From: Adrian Prantl <aprantl at apple.com>
Date: Mon, 27 Nov 2023 10:42:57 -0800
Subject: [PATCH 1/4] [LEB128] Don't initialize error on success

This change removes an unnecessary branch from a hot path. It's also
questionable API to override any previous error unconditonally.
---
 lld/COFF/Driver.cpp                   |  2 +-
 lld/ELF/Driver.cpp                    |  2 +-
 llvm/include/llvm/Support/LEB128.h    | 10 ++++++----
 llvm/lib/Object/MachOObjectFile.cpp   |  4 ++--
 llvm/lib/Support/DataExtractor.cpp    |  2 +-
 llvm/tools/llvm-readobj/ELFDumper.cpp |  2 +-
 6 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/lld/COFF/Driver.cpp b/lld/COFF/Driver.cpp
index f5cb379c5a4bf95..327df6078fef546 100644
--- a/lld/COFF/Driver.cpp
+++ b/lld/COFF/Driver.cpp
@@ -1258,7 +1258,7 @@ static void findKeepUniqueSections(COFFLinkerContext &ctx) {
       const uint8_t *cur = contents.begin();
       while (cur != contents.end()) {
         unsigned size;
-        const char *err;
+        const char *err = nullptr;
         uint64_t symIndex = decodeULEB128(cur, &size, contents.end(), &err);
         if (err)
           fatal(toString(obj) + ": could not decode addrsig section: " + err);
diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp
index 6290880c43d3b95..6bef09eeca015aa 100644
--- a/lld/ELF/Driver.cpp
+++ b/lld/ELF/Driver.cpp
@@ -2296,7 +2296,7 @@ static void findKeepUniqueSections(opt::InputArgList &args) {
       const uint8_t *cur = contents.begin();
       while (cur != contents.end()) {
         unsigned size;
-        const char *err;
+        const char *err = nullptr;
         uint64_t symIndex = decodeULEB128(cur, &size, contents.end(), &err);
         if (err)
           fatal(toString(f) + ": could not decode addrsig section: " + err);
diff --git a/llvm/include/llvm/Support/LEB128.h b/llvm/include/llvm/Support/LEB128.h
index a5d367279aefe64..7ec19fee5d8ba51 100644
--- a/llvm/include/llvm/Support/LEB128.h
+++ b/llvm/include/llvm/Support/LEB128.h
@@ -125,14 +125,15 @@ inline unsigned encodeULEB128(uint64_t Value, uint8_t *p,
 }
 
 /// Utility function to decode a ULEB128 value.
+///
+/// If \p error is non-null, it will point to a static error message,
+/// if an error occured. It will not be modified on success.
 inline uint64_t decodeULEB128(const uint8_t *p, unsigned *n = nullptr,
                               const uint8_t *end = nullptr,
                               const char **error = nullptr) {
   const uint8_t *orig_p = p;
   uint64_t Value = 0;
   unsigned Shift = 0;
-  if (error)
-    *error = nullptr;
   do {
     if (p == end) {
       if (error)
@@ -158,6 +159,9 @@ inline uint64_t decodeULEB128(const uint8_t *p, unsigned *n = nullptr,
 }
 
 /// Utility function to decode a SLEB128 value.
+///
+/// If \p error is non-null, it will point to a static error message,
+/// if an error occured. It will not be modified on success.
 inline int64_t decodeSLEB128(const uint8_t *p, unsigned *n = nullptr,
                              const uint8_t *end = nullptr,
                              const char **error = nullptr) {
@@ -165,8 +169,6 @@ inline int64_t decodeSLEB128(const uint8_t *p, unsigned *n = nullptr,
   int64_t Value = 0;
   unsigned Shift = 0;
   uint8_t Byte;
-  if (error)
-    *error = nullptr;
   do {
     if (p == end) {
       if (error)
diff --git a/llvm/lib/Object/MachOObjectFile.cpp b/llvm/lib/Object/MachOObjectFile.cpp
index aa57de16ed18f44..5e6c6ea79427537 100644
--- a/llvm/lib/Object/MachOObjectFile.cpp
+++ b/llvm/lib/Object/MachOObjectFile.cpp
@@ -2996,7 +2996,7 @@ void ExportEntry::pushNode(uint64_t offset) {
   ErrorAsOutParameter ErrAsOutParam(E);
   const uint8_t *Ptr = Trie.begin() + offset;
   NodeState State(Ptr);
-  const char *error;
+  const char *error = nullptr;
   uint64_t ExportInfoSize = readULEB128(State.Current, &error);
   if (error) {
     *E = malformedError("export info size " + Twine(error) +
@@ -3131,7 +3131,7 @@ void ExportEntry::pushNode(uint64_t offset) {
 
 void ExportEntry::pushDownUntilBottom() {
   ErrorAsOutParameter ErrAsOutParam(E);
-  const char *error;
+  const char *error = nullptr;
   while (Stack.back().NextChildIndex < Stack.back().ChildCount) {
     NodeState &Top = Stack.back();
     CumulativeString.resize(Top.ParentStringLength);
diff --git a/llvm/lib/Support/DataExtractor.cpp b/llvm/lib/Support/DataExtractor.cpp
index 59a44f4071b5fa0..eac3c32cfd3b796 100644
--- a/llvm/lib/Support/DataExtractor.cpp
+++ b/llvm/lib/Support/DataExtractor.cpp
@@ -202,7 +202,7 @@ static T getLEB128(StringRef Data, uint64_t *OffsetPtr, Error *Err,
   if (isError(Err))
     return T();
 
-  const char *error;
+  const char *error = nullptr;
   unsigned bytes_read;
   T result =
       Decoder(Bytes.data() + *OffsetPtr, &bytes_read, Bytes.end(), &error);
diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp
index ab26f1369407bf2..d6d0ea35044ab30 100644
--- a/llvm/tools/llvm-readobj/ELFDumper.cpp
+++ b/llvm/tools/llvm-readobj/ELFDumper.cpp
@@ -5004,7 +5004,7 @@ static Expected<std::vector<uint64_t>> toULEB128Array(ArrayRef<uint8_t> Data) {
   const uint8_t *End = Data.end();
   while (Cur != End) {
     unsigned Size;
-    const char *Err;
+    const char *Err = nullptr;
     Ret.push_back(decodeULEB128(Cur, &Size, End, &Err));
     if (Err)
       return createError(Err);

>From 1e69effd9c7d46e9904deac88255afef4585b7c4 Mon Sep 17 00:00:00 2001
From: Adrian Prantl <aprantl at apple.com>
Date: Mon, 27 Nov 2023 14:15:06 -0800
Subject: [PATCH 2/4] [LEB128] Factor out redundant code

---
 llvm/include/llvm/Support/LEB128.h | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/llvm/include/llvm/Support/LEB128.h b/llvm/include/llvm/Support/LEB128.h
index 7ec19fee5d8ba51..76c93acaadfdcc7 100644
--- a/llvm/include/llvm/Support/LEB128.h
+++ b/llvm/include/llvm/Support/LEB128.h
@@ -138,17 +138,15 @@ inline uint64_t decodeULEB128(const uint8_t *p, unsigned *n = nullptr,
     if (p == end) {
       if (error)
         *error = "malformed uleb128, extends past end";
-      if (n)
-        *n = (unsigned)(p - orig_p);
-      return 0;
+      Value = 0;
+      break;
     }
     uint64_t Slice = *p & 0x7f;
     if ((Shift >= 64 && Slice != 0) || Slice << Shift >> Shift != Slice) {
       if (error)
         *error = "uleb128 too big for uint64";
-      if (n)
-        *n = (unsigned)(p - orig_p);
-      return 0;
+      Value = 0;
+      break;
     }
     Value += Slice << Shift;
     Shift += 7;

>From 2bcdfa45464992186c928175be082ed437c9bd59 Mon Sep 17 00:00:00 2001
From: Adrian Prantl <aprantl at apple.com>
Date: Mon, 27 Nov 2023 14:15:12 -0800
Subject: [PATCH 3/4] [LEB128] Don't handle edge cases in every loop iteration

Previously the overflow check was done for every byte even though it
is only needed for the case where Shift == 63.
---
 llvm/include/llvm/Support/LEB128.h | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/llvm/include/llvm/Support/LEB128.h b/llvm/include/llvm/Support/LEB128.h
index 76c93acaadfdcc7..40b6cf79bacf758 100644
--- a/llvm/include/llvm/Support/LEB128.h
+++ b/llvm/include/llvm/Support/LEB128.h
@@ -142,7 +142,8 @@ inline uint64_t decodeULEB128(const uint8_t *p, unsigned *n = nullptr,
       break;
     }
     uint64_t Slice = *p & 0x7f;
-    if ((Shift >= 64 && Slice != 0) || Slice << Shift >> Shift != Slice) {
+    if (Shift >= 63 && ((Shift == 63 && (Slice << Shift >> Shift) != Slice) ||
+                        (Shift > 63 && Slice != 0))) {
       if (error)
         *error = "uleb128 too big for uint64";
       Value = 0;
@@ -177,8 +178,8 @@ inline int64_t decodeSLEB128(const uint8_t *p, unsigned *n = nullptr,
     }
     Byte = *p;
     uint64_t Slice = Byte & 0x7f;
-    if ((Shift >= 64 && Slice != (Value < 0 ? 0x7f : 0x00)) ||
-        (Shift == 63 && Slice != 0 && Slice != 0x7f)) {
+    if ((Shift >= 63) && ((Shift == 63 && Slice != 0 && Slice != 0x7f) ||
+                          (Shift > 63 && Slice != (Value < 0 ? 0x7f : 0x00)))) {
       if (error)
         *error = "sleb128 too big for int64";
       if (n)

>From a9f97367cea36102e0d20d1a2cb0bdcfd7c86ff4 Mon Sep 17 00:00:00 2001
From: Adrian Prantl <aprantl at apple.com>
Date: Mon, 27 Nov 2023 14:46:57 -0800
Subject: [PATCH 4/4] [LEB128] Mark error condition with LLVM_UNLIKELY

---
 llvm/include/llvm/Support/LEB128.h | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/llvm/include/llvm/Support/LEB128.h b/llvm/include/llvm/Support/LEB128.h
index 40b6cf79bacf758..7fc572b6ff06efc 100644
--- a/llvm/include/llvm/Support/LEB128.h
+++ b/llvm/include/llvm/Support/LEB128.h
@@ -135,15 +135,16 @@ inline uint64_t decodeULEB128(const uint8_t *p, unsigned *n = nullptr,
   uint64_t Value = 0;
   unsigned Shift = 0;
   do {
-    if (p == end) {
+    if (LLVM_UNLIKELY(p == end)) {
       if (error)
         *error = "malformed uleb128, extends past end";
       Value = 0;
       break;
     }
     uint64_t Slice = *p & 0x7f;
-    if (Shift >= 63 && ((Shift == 63 && (Slice << Shift >> Shift) != Slice) ||
-                        (Shift > 63 && Slice != 0))) {
+    if (LLVM_UNLIKELY(Shift >= 63) &&
+        ((Shift == 63 && (Slice << Shift >> Shift) != Slice) ||
+         (Shift > 63 && Slice != 0))) {
       if (error)
         *error = "uleb128 too big for uint64";
       Value = 0;
@@ -169,7 +170,7 @@ inline int64_t decodeSLEB128(const uint8_t *p, unsigned *n = nullptr,
   unsigned Shift = 0;
   uint8_t Byte;
   do {
-    if (p == end) {
+    if (LLVM_UNLIKELY(p == end)) {
       if (error)
         *error = "malformed sleb128, extends past end";
       if (n)
@@ -178,8 +179,9 @@ inline int64_t decodeSLEB128(const uint8_t *p, unsigned *n = nullptr,
     }
     Byte = *p;
     uint64_t Slice = Byte & 0x7f;
-    if ((Shift >= 63) && ((Shift == 63 && Slice != 0 && Slice != 0x7f) ||
-                          (Shift > 63 && Slice != (Value < 0 ? 0x7f : 0x00)))) {
+    if (LLVM_UNLIKELY(Shift >= 63) &&
+        ((Shift == 63 && Slice != 0 && Slice != 0x7f) ||
+         (Shift > 63 && Slice != (Value < 0 ? 0x7f : 0x00)))) {
       if (error)
         *error = "sleb128 too big for int64";
       if (n)



More information about the llvm-commits mailing list