[llvm] Clean up / speed up ULEB128 decoding (PR #73585)
Adrian Prantl via llvm-commits
llvm-commits at lists.llvm.org
Mon Nov 27 14:53:53 PST 2023
https://github.com/adrian-prantl created https://github.com/llvm/llvm-project/pull/73585
This series of patches simplifies the two [U]LEB128 decoder functions in LLVM and makes them ever so slightly faster in the process.
As a quick performance test decoding DWARF I instructed dwarfdump to print all DIEs with the name "end" in clang.dSYM without using the accelerator tables:
```
_build.ninja.noassert$ time bin/llvm-dwarfdump-old -n end -o /dev/null bin/clang-18.dSYM ; time bin/llvm-dwarfdump-new -n end -o /dev/null bin/clang-18.dSYM
bin/llvm-dwarfdump-old -n end -o /dev/null bin/clang-18.dSYM 20.34s user 0.51s system 98% cpu 21.151 total
bin/llvm-dwarfdump-new -n end -o /dev/null bin/clang-18.dSYM 20.15s user 0.50s system 98% cpu 20.950 total
_build.ninja.noassert$ time bin/llvm-dwarfdump-old -n end -o /dev/null bin/clang-18.dSYM ; time bin/llvm-dwarfdump-new -n end -o /dev/null bin/clang-18.dSYM
bin/llvm-dwarfdump-old -n end -o /dev/null bin/clang-18.dSYM 20.33s user 0.50s system 98% cpu 21.178 total
bin/llvm-dwarfdump-new -n end -o /dev/null bin/clang-18.dSYM 20.21s user 0.50s system 98% cpu 21.027 total
_build.ninja.noassert$ time bin/llvm-dwarfdump-old -n end -o /dev/null bin/clang-18.dSYM ; time bin/llvm-dwarfdump-new -n end -o /dev/null bin/clang-18.dSYM
bin/llvm-dwarfdump-old -n end -o /dev/null bin/clang-18.dSYM 20.35s user 0.53s system 98% cpu 21.224 total
bin/llvm-dwarfdump-new -n end -o /dev/null bin/clang-18.dSYM 20.27s user 0.49s system 98% cpu 21.057 total
```
>From c45fd0ed47f48e7c61911d9731bd5e1137423fed Mon Sep 17 00:00:00 2001
From: Adrian Prantl <aprantl at apple.com>
Date: Mon, 27 Nov 2023 10:42:57 -0800
Subject: [PATCH 1/4] [LEB128] Don't initialize error on success
This change removes an unnecessary branch from a hot path. It's also
questionable API to override any previous error unconditonally.
---
llvm/include/llvm/Support/LEB128.h | 10 ++++++----
llvm/lib/Object/MachOObjectFile.cpp | 2 +-
2 files changed, 7 insertions(+), 5 deletions(-)
diff --git a/llvm/include/llvm/Support/LEB128.h b/llvm/include/llvm/Support/LEB128.h
index a5d367279aefe64..7ec19fee5d8ba51 100644
--- a/llvm/include/llvm/Support/LEB128.h
+++ b/llvm/include/llvm/Support/LEB128.h
@@ -125,14 +125,15 @@ inline unsigned encodeULEB128(uint64_t Value, uint8_t *p,
}
/// Utility function to decode a ULEB128 value.
+///
+/// If \p error is non-null, it will point to a static error message,
+/// if an error occured. It will not be modified on success.
inline uint64_t decodeULEB128(const uint8_t *p, unsigned *n = nullptr,
const uint8_t *end = nullptr,
const char **error = nullptr) {
const uint8_t *orig_p = p;
uint64_t Value = 0;
unsigned Shift = 0;
- if (error)
- *error = nullptr;
do {
if (p == end) {
if (error)
@@ -158,6 +159,9 @@ inline uint64_t decodeULEB128(const uint8_t *p, unsigned *n = nullptr,
}
/// Utility function to decode a SLEB128 value.
+///
+/// If \p error is non-null, it will point to a static error message,
+/// if an error occured. It will not be modified on success.
inline int64_t decodeSLEB128(const uint8_t *p, unsigned *n = nullptr,
const uint8_t *end = nullptr,
const char **error = nullptr) {
@@ -165,8 +169,6 @@ inline int64_t decodeSLEB128(const uint8_t *p, unsigned *n = nullptr,
int64_t Value = 0;
unsigned Shift = 0;
uint8_t Byte;
- if (error)
- *error = nullptr;
do {
if (p == end) {
if (error)
diff --git a/llvm/lib/Object/MachOObjectFile.cpp b/llvm/lib/Object/MachOObjectFile.cpp
index aa57de16ed18f44..11ad8aeae65da5d 100644
--- a/llvm/lib/Object/MachOObjectFile.cpp
+++ b/llvm/lib/Object/MachOObjectFile.cpp
@@ -2996,7 +2996,7 @@ void ExportEntry::pushNode(uint64_t offset) {
ErrorAsOutParameter ErrAsOutParam(E);
const uint8_t *Ptr = Trie.begin() + offset;
NodeState State(Ptr);
- const char *error;
+ const char *error = nullptr;
uint64_t ExportInfoSize = readULEB128(State.Current, &error);
if (error) {
*E = malformedError("export info size " + Twine(error) +
>From bc4e3e250ea5d82bcce1affa4f879c876f432106 Mon Sep 17 00:00:00 2001
From: Adrian Prantl <aprantl at apple.com>
Date: Mon, 27 Nov 2023 14:15:06 -0800
Subject: [PATCH 2/4] [LEB128] Factor out redundant code
---
llvm/include/llvm/Support/LEB128.h | 10 ++++------
1 file changed, 4 insertions(+), 6 deletions(-)
diff --git a/llvm/include/llvm/Support/LEB128.h b/llvm/include/llvm/Support/LEB128.h
index 7ec19fee5d8ba51..76c93acaadfdcc7 100644
--- a/llvm/include/llvm/Support/LEB128.h
+++ b/llvm/include/llvm/Support/LEB128.h
@@ -138,17 +138,15 @@ inline uint64_t decodeULEB128(const uint8_t *p, unsigned *n = nullptr,
if (p == end) {
if (error)
*error = "malformed uleb128, extends past end";
- if (n)
- *n = (unsigned)(p - orig_p);
- return 0;
+ Value = 0;
+ break;
}
uint64_t Slice = *p & 0x7f;
if ((Shift >= 64 && Slice != 0) || Slice << Shift >> Shift != Slice) {
if (error)
*error = "uleb128 too big for uint64";
- if (n)
- *n = (unsigned)(p - orig_p);
- return 0;
+ Value = 0;
+ break;
}
Value += Slice << Shift;
Shift += 7;
>From 772b9c45d24033ea9e7895fa0682ebe1d34c1de4 Mon Sep 17 00:00:00 2001
From: Adrian Prantl <aprantl at apple.com>
Date: Mon, 27 Nov 2023 14:15:12 -0800
Subject: [PATCH 3/4] [LEB128] Don't handle edge cases in every loop iteration
Previously the overflow check was done for every byte even though it
is only needed for the case where Shift == 63.
---
llvm/include/llvm/Support/LEB128.h | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/llvm/include/llvm/Support/LEB128.h b/llvm/include/llvm/Support/LEB128.h
index 76c93acaadfdcc7..22e995df2391ff1 100644
--- a/llvm/include/llvm/Support/LEB128.h
+++ b/llvm/include/llvm/Support/LEB128.h
@@ -142,7 +142,8 @@ inline uint64_t decodeULEB128(const uint8_t *p, unsigned *n = nullptr,
break;
}
uint64_t Slice = *p & 0x7f;
- if ((Shift >= 64 && Slice != 0) || Slice << Shift >> Shift != Slice) {
+ if (Shift >= 63 && ((Shift == 63 && ((Slice << Shift) >> Shift) != Slice) ||
+ (Shift > 63 && Slice != 0))) {
if (error)
*error = "uleb128 too big for uint64";
Value = 0;
@@ -177,8 +178,8 @@ inline int64_t decodeSLEB128(const uint8_t *p, unsigned *n = nullptr,
}
Byte = *p;
uint64_t Slice = Byte & 0x7f;
- if ((Shift >= 64 && Slice != (Value < 0 ? 0x7f : 0x00)) ||
- (Shift == 63 && Slice != 0 && Slice != 0x7f)) {
+ if ((Shift >= 63) && ((Shift == 63 && Slice != 0 && Slice != 0x7f) ||
+ (Shift > 63 && Slice != (Value < 0 ? 0x7f : 0x00)))) {
if (error)
*error = "sleb128 too big for int64";
if (n)
>From 4374a286bca03bdfc05bdb1c7fb4157acaf78baa Mon Sep 17 00:00:00 2001
From: Adrian Prantl <aprantl at apple.com>
Date: Mon, 27 Nov 2023 14:46:57 -0800
Subject: [PATCH 4/4] [LEB128] Mark error condition with LLVM_UNLIKELY
---
llvm/include/llvm/Support/LEB128.h | 14 ++++++++------
1 file changed, 8 insertions(+), 6 deletions(-)
diff --git a/llvm/include/llvm/Support/LEB128.h b/llvm/include/llvm/Support/LEB128.h
index 22e995df2391ff1..3d5e98c4b2cddee 100644
--- a/llvm/include/llvm/Support/LEB128.h
+++ b/llvm/include/llvm/Support/LEB128.h
@@ -135,15 +135,16 @@ inline uint64_t decodeULEB128(const uint8_t *p, unsigned *n = nullptr,
uint64_t Value = 0;
unsigned Shift = 0;
do {
- if (p == end) {
+ if (LLVM_UNLIKELY(p == end)) {
if (error)
*error = "malformed uleb128, extends past end";
Value = 0;
break;
}
uint64_t Slice = *p & 0x7f;
- if (Shift >= 63 && ((Shift == 63 && ((Slice << Shift) >> Shift) != Slice) ||
- (Shift > 63 && Slice != 0))) {
+ if (LLVM_UNLIKELY(Shift >= 63) &&
+ ((Shift == 63 && ((Slice << Shift) >> Shift) != Slice) ||
+ (Shift > 63 && Slice != 0))) {
if (error)
*error = "uleb128 too big for uint64";
Value = 0;
@@ -169,7 +170,7 @@ inline int64_t decodeSLEB128(const uint8_t *p, unsigned *n = nullptr,
unsigned Shift = 0;
uint8_t Byte;
do {
- if (p == end) {
+ if (LLVM_UNLIKELY(p == end)) {
if (error)
*error = "malformed sleb128, extends past end";
if (n)
@@ -178,8 +179,9 @@ inline int64_t decodeSLEB128(const uint8_t *p, unsigned *n = nullptr,
}
Byte = *p;
uint64_t Slice = Byte & 0x7f;
- if ((Shift >= 63) && ((Shift == 63 && Slice != 0 && Slice != 0x7f) ||
- (Shift > 63 && Slice != (Value < 0 ? 0x7f : 0x00)))) {
+ if (LLVM_UNLIKELY(Shift >= 63) &&
+ ((Shift == 63 && Slice != 0 && Slice != 0x7f) ||
+ (Shift > 63 && Slice != (Value < 0 ? 0x7f : 0x00)))) {
if (error)
*error = "sleb128 too big for int64";
if (n)
More information about the llvm-commits
mailing list