[lld] [lld-macho] Avoid infinite recursion when parsing corrupted export tries (PR #152569)
Daniel RodrÃguez Troitiño via llvm-commits
llvm-commits at lists.llvm.org
Mon Aug 25 13:33:55 PDT 2025
https://github.com/drodriguez updated https://github.com/llvm/llvm-project/pull/152569
>From 6abf565186e158f42ba44401870ebe4490bb5619 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Daniel=20Rodr=C3=ADguez?= <danielrodriguez at meta.com>
Date: Tue, 5 Aug 2025 18:13:47 -0700
Subject: [PATCH 1/3] [lld-macho] Avoid infinite recursion when parsing
corrupted export tries
If an export trie is encoded incorrectly, and one of the children
offsets points back to one of the nodes earlier in the serialization,
the current code will end up in an infinite recursion, and eventually
fail exhausting the available memory.
The failure can be avoided if, before recursing, one checks that the
offset is valid, that is, that the child offset is not one of the
previously visited offsets.. This is similar to a check done by llvm-objdump
which reports the trie being corrupted.
---
lld/MachO/ExportTrie.cpp | 35 +++++++++++++-----
lld/MachO/ExportTrie.h | 3 +-
lld/MachO/InputFiles.cpp | 13 ++++---
.../MachO/invalid/Inputs/macho-trie-node-loop | Bin 0 -> 8752 bytes
.../MachO/invalid/export-trie-node-loop.s | 9 +++++
5 files changed, 43 insertions(+), 17 deletions(-)
create mode 100755 lld/test/MachO/invalid/Inputs/macho-trie-node-loop
create mode 100644 lld/test/MachO/invalid/export-trie-node-loop.s
diff --git a/lld/MachO/ExportTrie.cpp b/lld/MachO/ExportTrie.cpp
index 303eda416c235..9d1ed850ee89d 100644
--- a/lld/MachO/ExportTrie.cpp
+++ b/lld/MachO/ExportTrie.cpp
@@ -41,6 +41,7 @@
#include "llvm/BinaryFormat/MachO.h"
#include "llvm/Support/LEB128.h"
#include <optional>
+#include <unordered_set>
using namespace llvm;
using namespace lld;
@@ -296,13 +297,19 @@ namespace {
// Parse a serialized trie and invoke a callback for each entry.
class TrieParser {
public:
- TrieParser(const uint8_t *buf, size_t size, const TrieEntryCallback &callback)
- : start(buf), end(start + size), callback(callback) {}
+ TrieParser(const std::string &fileName, const uint8_t *buf, size_t size,
+ const TrieEntryCallback &callback)
+ : fileName(fileName), start(buf), end(start + size), callback(callback) {}
- void parse(const uint8_t *buf, const Twine &cumulativeString);
+ void parse(const uint8_t *buf, const Twine &cumulativeString,
+ std::unordered_set<size_t> &visited);
- void parse() { parse(start, ""); }
+ void parse() {
+ std::unordered_set<size_t> visited;
+ parse(start, "", visited);
+ }
+ const std::string fileName;
const uint8_t *start;
const uint8_t *end;
const TrieEntryCallback &callback;
@@ -310,9 +317,13 @@ class TrieParser {
} // namespace
-void TrieParser::parse(const uint8_t *buf, const Twine &cumulativeString) {
+void TrieParser::parse(const uint8_t *buf, const Twine &cumulativeString,
+ std::unordered_set<size_t> &visited) {
if (buf >= end)
- fatal("Node offset points outside export section");
+ fatal(fileName + ": export trie node offset points outside export section");
+
+ size_t currentOffset = buf - start;
+ visited.insert(currentOffset);
unsigned ulebSize;
uint64_t terminalSize = decodeULEB128(buf, &ulebSize);
@@ -331,14 +342,18 @@ void TrieParser::parse(const uint8_t *buf, const Twine &cumulativeString) {
buf += substring.size() + 1;
offset = decodeULEB128(buf, &ulebSize);
buf += ulebSize;
- parse(start + offset, cumulativeString + substring);
+ if (visited.find(offset) != visited.end())
+ fatal(fileName + ": export trie child node loop");
+ parse(start + offset, cumulativeString + substring, visited);
}
+
+ visited.erase(currentOffset);
}
-void macho::parseTrie(const uint8_t *buf, size_t size,
- const TrieEntryCallback &callback) {
+void macho::parseTrie(const std::string &fileName, const uint8_t *buf,
+ size_t size, const TrieEntryCallback &callback) {
if (size == 0)
return;
- TrieParser(buf, size, callback).parse();
+ TrieParser(fileName, buf, size, callback).parse();
}
diff --git a/lld/MachO/ExportTrie.h b/lld/MachO/ExportTrie.h
index aa7e3b0d4a14b..fa73fc4ef6605 100644
--- a/lld/MachO/ExportTrie.h
+++ b/lld/MachO/ExportTrie.h
@@ -41,7 +41,8 @@ class TrieBuilder {
using TrieEntryCallback =
llvm::function_ref<void(const llvm::Twine & /*name*/, uint64_t /*flags*/)>;
-void parseTrie(const uint8_t *buf, size_t size, const TrieEntryCallback &);
+void parseTrie(const std::string &fileName, const uint8_t *buf, size_t size,
+ const TrieEntryCallback &);
} // namespace lld::macho
diff --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp
index 3b3023a94166f..442fc608865d2 100644
--- a/lld/MachO/InputFiles.cpp
+++ b/lld/MachO/InputFiles.cpp
@@ -1789,12 +1789,13 @@ void DylibFile::parseExportedSymbols(uint32_t offset, uint32_t size) {
auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart());
std::vector<TrieEntry> entries;
// Find all the $ld$* symbols to process first.
- parseTrie(buf + offset, size, [&](const Twine &name, uint64_t flags) {
- StringRef savedName = saver().save(name);
- if (handleLDSymbol(savedName))
- return;
- entries.push_back({savedName, flags});
- });
+ parseTrie(toString(this), buf + offset, size,
+ [&](const Twine &name, uint64_t flags) {
+ StringRef savedName = saver().save(name);
+ if (handleLDSymbol(savedName))
+ return;
+ entries.push_back({savedName, flags});
+ });
// Process the "normal" symbols.
for (TrieEntry &entry : entries) {
diff --git a/lld/test/MachO/invalid/Inputs/macho-trie-node-loop b/lld/test/MachO/invalid/Inputs/macho-trie-node-loop
new file mode 100755
index 0000000000000000000000000000000000000000..b94dfa2610e9f0fae275dc1cef46563d1b695cf6
GIT binary patch
literal 8752
zcmeHN&1)M+6n`68jq143h7_89SlP68n=h0&F~#7fSRnzKh9Z9?wFB;W70Fnz(mK-m
zs}K;Rg_>YcZ#|X%0X^jCTTRoHl1op89D6Awkd#~sA&08JH#;M*HgTc!RG0@(Z{EzC
z-^~2pF6{2pFV7zTwU@|B5YbD}G_;T;`l9(2T12ly?^{GdOkbRtmR;RHcDGB)2POeV
zDn|$rl-B}og%Zcx-XHeQsd4suR9l)JYDQfl8o_e0(dmlgW%wYFzR(c+I7UZO3Xjjs
z65@%)vbR*OOTYWPCl%g at GMb3}J*q9ubNgGatyXF!QK>EXFTnE at US46a--Q>&mh;Pt
zVxjI<%i%!$yjw9m_Bp4!3Fq}onb!-s>0BgA=W`d!d1YLk8zE|*XslO at z9*J~I(U3B
zo`&P=Bf2l<KYDH<JolgQYz3t8*pK4z{`B6^<m+y+UUo}j#jW$z65pSrfDUmmMEjvf
zbWk^x?ckN%z(rpiZ$#ngJ+L1=-+>q&=Yg*QAuf%7{L#e=<N6+RRG`8m<)2nyd`_gV
zBd?%!6w2p__Is7$g70Tb>u7NB+eFRN+;E6eFe%s(*fCgMs-WyV>^yv4jxm3Bh<J_L
zM*sl34_bh-a11Nwk}>(d-%Y;8e0|SFW8Q?r>)gAt_3WL@==<OOIeg~$dq+nH&?hO_
z*Na7a>A3b`S@{0e=jJ_V!EuXjedpX9$LHoXWSq5zmPMzQ8}(BNX1#XeM0Q9`;rNkA
zyw!UTDeaD*T5km9YWDojDn##M4)1r{3As3?eCE%HVZbn87%&VN1`Gp+0mHz5XW(*v
z<M;gLxyu$&`?Xs%cV|ILHhuVW3C`nN{J=MNr^O=UZ$7f~8#kWjZ{2wM;4sGWH-EIF
z<D1F5?RFbI?ZHPFZV&z;u`X@)|B6O#sy%oYwpIA#%H%|@kehh$6T<Tw$?th|Ha9bS
z^;0p^9=r{A5lRn#z<Bsv{ucX?sO9bP`|dC-tA2OoHox2S;kna>Dv^y>m6P~vlB_k`
zvitU?S{V7a{Tfu;57c|)yt2AKZt*Z|#4unOFbo(53<HJ%!+>GHFkl!k3>XFs1BQYB
z2?Ga?*ps}~gJY;{dMl#n`#~e9yGt~2-D!=XZdj6)c2Ptz8<p|qeSFH0Vzqc4rd_SA
zR=FwV`XL}yUC;Bckqoc|>VZkrAcyH#7 at BpxirWw<&z+^S_#L8i0i93Kc0VP0p7c{H
zWrc&3Nb$FhJ*&Q`mm9vfg7v2;nY6#jv@(>izIE{3q~Eb&WKA!F=-<@;2)I7F|7Fzh
zxEKfFI&Ru((i1iQ27Inza=nh_B$R!&EN7r;;OYM*a4j&6;-;hUM%B2jzLSi at M|h_c
zJsbMy3Pr?xF5^eznZc;f`{(^|9TGC8eTPJ_>f>hs*&3+SDgjEzG6Nz)Yseq}4v~=D
W3&dgK4w3P6t6-TGoxta^PxLojN;=H|
literal 0
HcmV?d00001
diff --git a/lld/test/MachO/invalid/export-trie-node-loop.s b/lld/test/MachO/invalid/export-trie-node-loop.s
new file mode 100644
index 0000000000000..e9bb4b2627ea6
--- /dev/null
+++ b/lld/test/MachO/invalid/export-trie-node-loop.s
@@ -0,0 +1,9 @@
+# REQUIRES: x86
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t.o
+# RUN: not %lld -o %t %t.o %S/Inputs/macho-trie-node-loop 2>&1 | FileCheck %s
+# CHECK: error:
+# CHECK-SAME: /Inputs/macho-trie-node-loop: export trie child node loop
+
+.globl _main
+_main:
+ ret
>From a0ea8b42a2f0ec1d7c47c3e48808703f04b6e5e6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Daniel=20Rodr=C3=ADguez?= <danielrodriguez at meta.com>
Date: Mon, 25 Aug 2025 13:06:16 -0700
Subject: [PATCH 2/3] Improve fatal error message
This came up during an internal review.
---
lld/MachO/ExportTrie.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/lld/MachO/ExportTrie.cpp b/lld/MachO/ExportTrie.cpp
index 9d1ed850ee89d..34478f4c6cf45 100644
--- a/lld/MachO/ExportTrie.cpp
+++ b/lld/MachO/ExportTrie.cpp
@@ -343,7 +343,7 @@ void TrieParser::parse(const uint8_t *buf, const Twine &cumulativeString,
offset = decodeULEB128(buf, &ulebSize);
buf += ulebSize;
if (visited.find(offset) != visited.end())
- fatal(fileName + ": export trie child node loop");
+ fatal(fileName + ": export trie child node infinite loop");
parse(start + offset, cumulativeString + substring, visited);
}
>From 1ea7b3ecddf2351b108c3473276949047e59fcfe Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Daniel=20Rodr=C3=ADguez?= <danielrodriguez at meta.com>
Date: Mon, 25 Aug 2025 13:33:35 -0700
Subject: [PATCH 3/3] Also change message in test
---
lld/test/MachO/invalid/export-trie-node-loop.s | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/lld/test/MachO/invalid/export-trie-node-loop.s b/lld/test/MachO/invalid/export-trie-node-loop.s
index e9bb4b2627ea6..fe991597fe5ad 100644
--- a/lld/test/MachO/invalid/export-trie-node-loop.s
+++ b/lld/test/MachO/invalid/export-trie-node-loop.s
@@ -2,7 +2,7 @@
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t.o
# RUN: not %lld -o %t %t.o %S/Inputs/macho-trie-node-loop 2>&1 | FileCheck %s
# CHECK: error:
-# CHECK-SAME: /Inputs/macho-trie-node-loop: export trie child node loop
+# CHECK-SAME: /Inputs/macho-trie-node-loop: export trie child node infinite loop
.globl _main
_main:
More information about the llvm-commits
mailing list