[llvm] [lld] Clean up / speed up ULEB128 decoding (PR #73585)
Adrian Prantl via llvm-commits
llvm-commits at lists.llvm.org
Wed Nov 29 08:50:09 PST 2023
https://github.com/adrian-prantl updated https://github.com/llvm/llvm-project/pull/73585
>From 95083955aef602fd8651412043d22bb27718fbe5 Mon Sep 17 00:00:00 2001
From: Adrian Prantl <aprantl at apple.com>
Date: Mon, 27 Nov 2023 10:42:57 -0800
Subject: [PATCH 1/4] [LEB128] Don't initialize error on success
This change removes an unnecessary branch from a hot path. It's also
questionable API to override any previous error unconditonally.
---
lld/COFF/Driver.cpp | 2 +-
lld/ELF/Driver.cpp | 2 +-
llvm/include/llvm/Support/LEB128.h | 10 ++++++----
llvm/lib/Object/MachOObjectFile.cpp | 4 ++--
llvm/lib/Support/DataExtractor.cpp | 2 +-
llvm/tools/llvm-readobj/ELFDumper.cpp | 2 +-
6 files changed, 12 insertions(+), 10 deletions(-)
diff --git a/lld/COFF/Driver.cpp b/lld/COFF/Driver.cpp
index f5cb379c5a4bf95..327df6078fef546 100644
--- a/lld/COFF/Driver.cpp
+++ b/lld/COFF/Driver.cpp
@@ -1258,7 +1258,7 @@ static void findKeepUniqueSections(COFFLinkerContext &ctx) {
const uint8_t *cur = contents.begin();
while (cur != contents.end()) {
unsigned size;
- const char *err;
+ const char *err = nullptr;
uint64_t symIndex = decodeULEB128(cur, &size, contents.end(), &err);
if (err)
fatal(toString(obj) + ": could not decode addrsig section: " + err);
diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp
index 6290880c43d3b95..6bef09eeca015aa 100644
--- a/lld/ELF/Driver.cpp
+++ b/lld/ELF/Driver.cpp
@@ -2296,7 +2296,7 @@ static void findKeepUniqueSections(opt::InputArgList &args) {
const uint8_t *cur = contents.begin();
while (cur != contents.end()) {
unsigned size;
- const char *err;
+ const char *err = nullptr;
uint64_t symIndex = decodeULEB128(cur, &size, contents.end(), &err);
if (err)
fatal(toString(f) + ": could not decode addrsig section: " + err);
diff --git a/llvm/include/llvm/Support/LEB128.h b/llvm/include/llvm/Support/LEB128.h
index a5d367279aefe64..7ec19fee5d8ba51 100644
--- a/llvm/include/llvm/Support/LEB128.h
+++ b/llvm/include/llvm/Support/LEB128.h
@@ -125,14 +125,15 @@ inline unsigned encodeULEB128(uint64_t Value, uint8_t *p,
}
/// Utility function to decode a ULEB128 value.
+///
+/// If \p error is non-null, it will point to a static error message,
+/// if an error occured. It will not be modified on success.
inline uint64_t decodeULEB128(const uint8_t *p, unsigned *n = nullptr,
const uint8_t *end = nullptr,
const char **error = nullptr) {
const uint8_t *orig_p = p;
uint64_t Value = 0;
unsigned Shift = 0;
- if (error)
- *error = nullptr;
do {
if (p == end) {
if (error)
@@ -158,6 +159,9 @@ inline uint64_t decodeULEB128(const uint8_t *p, unsigned *n = nullptr,
}
/// Utility function to decode a SLEB128 value.
+///
+/// If \p error is non-null, it will point to a static error message,
+/// if an error occured. It will not be modified on success.
inline int64_t decodeSLEB128(const uint8_t *p, unsigned *n = nullptr,
const uint8_t *end = nullptr,
const char **error = nullptr) {
@@ -165,8 +169,6 @@ inline int64_t decodeSLEB128(const uint8_t *p, unsigned *n = nullptr,
int64_t Value = 0;
unsigned Shift = 0;
uint8_t Byte;
- if (error)
- *error = nullptr;
do {
if (p == end) {
if (error)
diff --git a/llvm/lib/Object/MachOObjectFile.cpp b/llvm/lib/Object/MachOObjectFile.cpp
index aa57de16ed18f44..5e6c6ea79427537 100644
--- a/llvm/lib/Object/MachOObjectFile.cpp
+++ b/llvm/lib/Object/MachOObjectFile.cpp
@@ -2996,7 +2996,7 @@ void ExportEntry::pushNode(uint64_t offset) {
ErrorAsOutParameter ErrAsOutParam(E);
const uint8_t *Ptr = Trie.begin() + offset;
NodeState State(Ptr);
- const char *error;
+ const char *error = nullptr;
uint64_t ExportInfoSize = readULEB128(State.Current, &error);
if (error) {
*E = malformedError("export info size " + Twine(error) +
@@ -3131,7 +3131,7 @@ void ExportEntry::pushNode(uint64_t offset) {
void ExportEntry::pushDownUntilBottom() {
ErrorAsOutParameter ErrAsOutParam(E);
- const char *error;
+ const char *error = nullptr;
while (Stack.back().NextChildIndex < Stack.back().ChildCount) {
NodeState &Top = Stack.back();
CumulativeString.resize(Top.ParentStringLength);
diff --git a/llvm/lib/Support/DataExtractor.cpp b/llvm/lib/Support/DataExtractor.cpp
index 59a44f4071b5fa0..eac3c32cfd3b796 100644
--- a/llvm/lib/Support/DataExtractor.cpp
+++ b/llvm/lib/Support/DataExtractor.cpp
@@ -202,7 +202,7 @@ static T getLEB128(StringRef Data, uint64_t *OffsetPtr, Error *Err,
if (isError(Err))
return T();
- const char *error;
+ const char *error = nullptr;
unsigned bytes_read;
T result =
Decoder(Bytes.data() + *OffsetPtr, &bytes_read, Bytes.end(), &error);
diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp
index ab26f1369407bf2..d6d0ea35044ab30 100644
--- a/llvm/tools/llvm-readobj/ELFDumper.cpp
+++ b/llvm/tools/llvm-readobj/ELFDumper.cpp
@@ -5004,7 +5004,7 @@ static Expected<std::vector<uint64_t>> toULEB128Array(ArrayRef<uint8_t> Data) {
const uint8_t *End = Data.end();
while (Cur != End) {
unsigned Size;
- const char *Err;
+ const char *Err = nullptr;
Ret.push_back(decodeULEB128(Cur, &Size, End, &Err));
if (Err)
return createError(Err);
>From 1e69effd9c7d46e9904deac88255afef4585b7c4 Mon Sep 17 00:00:00 2001
From: Adrian Prantl <aprantl at apple.com>
Date: Mon, 27 Nov 2023 14:15:06 -0800
Subject: [PATCH 2/4] [LEB128] Factor out redundant code
---
llvm/include/llvm/Support/LEB128.h | 10 ++++------
1 file changed, 4 insertions(+), 6 deletions(-)
diff --git a/llvm/include/llvm/Support/LEB128.h b/llvm/include/llvm/Support/LEB128.h
index 7ec19fee5d8ba51..76c93acaadfdcc7 100644
--- a/llvm/include/llvm/Support/LEB128.h
+++ b/llvm/include/llvm/Support/LEB128.h
@@ -138,17 +138,15 @@ inline uint64_t decodeULEB128(const uint8_t *p, unsigned *n = nullptr,
if (p == end) {
if (error)
*error = "malformed uleb128, extends past end";
- if (n)
- *n = (unsigned)(p - orig_p);
- return 0;
+ Value = 0;
+ break;
}
uint64_t Slice = *p & 0x7f;
if ((Shift >= 64 && Slice != 0) || Slice << Shift >> Shift != Slice) {
if (error)
*error = "uleb128 too big for uint64";
- if (n)
- *n = (unsigned)(p - orig_p);
- return 0;
+ Value = 0;
+ break;
}
Value += Slice << Shift;
Shift += 7;
>From 2bcdfa45464992186c928175be082ed437c9bd59 Mon Sep 17 00:00:00 2001
From: Adrian Prantl <aprantl at apple.com>
Date: Mon, 27 Nov 2023 14:15:12 -0800
Subject: [PATCH 3/4] [LEB128] Don't handle edge cases in every loop iteration
Previously the overflow check was done for every byte even though it
is only needed for the case where Shift == 63.
---
llvm/include/llvm/Support/LEB128.h | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/llvm/include/llvm/Support/LEB128.h b/llvm/include/llvm/Support/LEB128.h
index 76c93acaadfdcc7..40b6cf79bacf758 100644
--- a/llvm/include/llvm/Support/LEB128.h
+++ b/llvm/include/llvm/Support/LEB128.h
@@ -142,7 +142,8 @@ inline uint64_t decodeULEB128(const uint8_t *p, unsigned *n = nullptr,
break;
}
uint64_t Slice = *p & 0x7f;
- if ((Shift >= 64 && Slice != 0) || Slice << Shift >> Shift != Slice) {
+ if (Shift >= 63 && ((Shift == 63 && (Slice << Shift >> Shift) != Slice) ||
+ (Shift > 63 && Slice != 0))) {
if (error)
*error = "uleb128 too big for uint64";
Value = 0;
@@ -177,8 +178,8 @@ inline int64_t decodeSLEB128(const uint8_t *p, unsigned *n = nullptr,
}
Byte = *p;
uint64_t Slice = Byte & 0x7f;
- if ((Shift >= 64 && Slice != (Value < 0 ? 0x7f : 0x00)) ||
- (Shift == 63 && Slice != 0 && Slice != 0x7f)) {
+ if ((Shift >= 63) && ((Shift == 63 && Slice != 0 && Slice != 0x7f) ||
+ (Shift > 63 && Slice != (Value < 0 ? 0x7f : 0x00)))) {
if (error)
*error = "sleb128 too big for int64";
if (n)
>From a9f97367cea36102e0d20d1a2cb0bdcfd7c86ff4 Mon Sep 17 00:00:00 2001
From: Adrian Prantl <aprantl at apple.com>
Date: Mon, 27 Nov 2023 14:46:57 -0800
Subject: [PATCH 4/4] [LEB128] Mark error condition with LLVM_UNLIKELY
---
llvm/include/llvm/Support/LEB128.h | 14 ++++++++------
1 file changed, 8 insertions(+), 6 deletions(-)
diff --git a/llvm/include/llvm/Support/LEB128.h b/llvm/include/llvm/Support/LEB128.h
index 40b6cf79bacf758..7fc572b6ff06efc 100644
--- a/llvm/include/llvm/Support/LEB128.h
+++ b/llvm/include/llvm/Support/LEB128.h
@@ -135,15 +135,16 @@ inline uint64_t decodeULEB128(const uint8_t *p, unsigned *n = nullptr,
uint64_t Value = 0;
unsigned Shift = 0;
do {
- if (p == end) {
+ if (LLVM_UNLIKELY(p == end)) {
if (error)
*error = "malformed uleb128, extends past end";
Value = 0;
break;
}
uint64_t Slice = *p & 0x7f;
- if (Shift >= 63 && ((Shift == 63 && (Slice << Shift >> Shift) != Slice) ||
- (Shift > 63 && Slice != 0))) {
+ if (LLVM_UNLIKELY(Shift >= 63) &&
+ ((Shift == 63 && (Slice << Shift >> Shift) != Slice) ||
+ (Shift > 63 && Slice != 0))) {
if (error)
*error = "uleb128 too big for uint64";
Value = 0;
@@ -169,7 +170,7 @@ inline int64_t decodeSLEB128(const uint8_t *p, unsigned *n = nullptr,
unsigned Shift = 0;
uint8_t Byte;
do {
- if (p == end) {
+ if (LLVM_UNLIKELY(p == end)) {
if (error)
*error = "malformed sleb128, extends past end";
if (n)
@@ -178,8 +179,9 @@ inline int64_t decodeSLEB128(const uint8_t *p, unsigned *n = nullptr,
}
Byte = *p;
uint64_t Slice = Byte & 0x7f;
- if ((Shift >= 63) && ((Shift == 63 && Slice != 0 && Slice != 0x7f) ||
- (Shift > 63 && Slice != (Value < 0 ? 0x7f : 0x00)))) {
+ if (LLVM_UNLIKELY(Shift >= 63) &&
+ ((Shift == 63 && Slice != 0 && Slice != 0x7f) ||
+ (Shift > 63 && Slice != (Value < 0 ? 0x7f : 0x00)))) {
if (error)
*error = "sleb128 too big for int64";
if (n)
More information about the llvm-commits
mailing list