[llvm] [llvm-symbolizer] Make symbolizer parse section relative syntax (PR #168524)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Nov 19 00:31:58 PST 2025
https://github.com/midhuncodes7 updated https://github.com/llvm/llvm-project/pull/168524
>From 6280d2382724b449c1a3fd63d19c4e934caa4d34 Mon Sep 17 00:00:00 2001
From: Midhunesh <midhunesh.p at ibm.com>
Date: Thu, 13 Nov 2025 12:04:34 +0530
Subject: [PATCH 1/3] section relative syntax implementation
---
.../llvm-symbolizer/xcoff-section-relative.ll | 51 ++++++
.../llvm-symbolizer/xcoff-section-syntax.test | 31 ++++
.../tools/llvm-symbolizer/llvm-symbolizer.cpp | 158 +++++++++++++++++-
3 files changed, 232 insertions(+), 8 deletions(-)
create mode 100644 llvm/test/tools/llvm-symbolizer/xcoff-section-relative.ll
create mode 100644 llvm/test/tools/llvm-symbolizer/xcoff-section-syntax.test
diff --git a/llvm/test/tools/llvm-symbolizer/xcoff-section-relative.ll b/llvm/test/tools/llvm-symbolizer/xcoff-section-relative.ll
new file mode 100644
index 0000000000000..cfc6b31812a98
--- /dev/null
+++ b/llvm/test/tools/llvm-symbolizer/xcoff-section-relative.ll
@@ -0,0 +1,51 @@
+;; Test section-relative address syntax for XCOFF
+;; The syntax (SECTION_TYPE)(+offset) represents: offset from section base
+
+; REQUIRES: system-aix
+; RUN: llc -filetype=obj -o %t -mtriple=powerpc-aix-ibm-xcoff -function-sections < %s
+
+;; Test 1: Symbolize .foo using section-relative offset
+; RUN: llvm-nm --numeric-sort %t | grep " T \.foo$" | awk '{printf "CODE (TEXT)(+0x%%s)", $1}' > %t.foo_query
+; RUN: llvm-symbolizer --obj=%t @%t.foo_query | FileCheck %s --check-prefix=TEST-FOO
+
+;; Test 2: Symbolize .bar using section-relative offset
+; RUN: llvm-nm --numeric-sort %t | grep " T \.bar$" | awk '{printf "CODE (TEXT)(+0x%%s)", $1}' > %t.bar_query
+; RUN: llvm-symbolizer --obj=%t @%t.bar_query | FileCheck %s --check-prefix=TEST-BAR
+
+;; Test 3: Symbolize global_var using section-relative offset in DATA section
+; RUN: llvm-readobj --sections %t | awk '/Name: \.data/{found=1} found && /VirtualAddress:/{print $2; exit}' > %t.data_base
+; RUN: llvm-nm --numeric-sort %t | grep " D global_var$" | awk '{print $1}' > %t.global_var_vma
+; RUN: sh -c 'printf "%%d\n" $(cat %t.data_base)' > %t.data_base_dec
+; RUN: sh -c 'printf "%%d\n" 0x$(cat %t.global_var_vma)' > %t.global_var_dec
+; RUN: awk 'NR==FNR{base=$1; next} {vma=$1; printf "DATA (DATA)(+0x%%x)", vma-base}' %t.data_base_dec %t.global_var_dec > %t.data_query
+; RUN: llvm-symbolizer --obj=%t @%t.data_query | FileCheck %s --check-prefix=TEST-DATA
+
+;; Test 4: Verify section structure with llvm-readobj
+; RUN: llvm-readobj --sections %t | FileCheck %s --check-prefix=SECTIONS
+
+define void @foo() {
+entry:
+ ret void
+}
+
+define void @bar() {
+entry:
+ ret void
+}
+
+ at global_var = global i32 42, align 4
+
+;; Verify correct symbolization with section-relative syntax
+; TEST-FOO: .foo
+; TEST-FOO-NEXT: ??:0:0
+
+; TEST-BAR: .bar
+; TEST-BAR-NEXT: ??:0:0
+
+; TEST-DATA: global_var
+
+;; Verify XCOFF sections exist with correct types
+; SECTIONS: Name: .text
+; SECTIONS: Type: STYP_TEXT
+; SECTIONS: Name: .data
+; SECTIONS: Type: STYP_DATA
\ No newline at end of file
diff --git a/llvm/test/tools/llvm-symbolizer/xcoff-section-syntax.test b/llvm/test/tools/llvm-symbolizer/xcoff-section-syntax.test
new file mode 100644
index 0000000000000..01bda672387f4
--- /dev/null
+++ b/llvm/test/tools/llvm-symbolizer/xcoff-section-syntax.test
@@ -0,0 +1,31 @@
+## Test section-relative address syntax parsing for XCOFF
+## This tests that the (SECTION_TYPE)(+offset) syntax produces appropriate
+## error messages for invalid syntax
+
+# REQUIRES: system-aix
+
+## Create a simple XCOFF object for testing
+# RUN: echo "define void @test() { ret void }" | \
+# RUN: llc -filetype=obj -mtriple=powerpc-aix-ibm-xcoff -o %t.o
+
+## Test invalid section type
+# RUN: llvm-symbolizer --obj=%t.o '(INVALID)(+0x10)' 2>&1 | \
+# RUN: FileCheck %s --check-prefix=INVALID-TYPE
+
+## Test missing '+' sign
+# RUN: llvm-symbolizer --obj=%t.o '(TEXT)(0x10)' 2>&1 | \
+# RUN: FileCheck %s --check-prefix=NO-PLUS
+
+## Test invalid offset value (not a hex number)
+# RUN: llvm-symbolizer --obj=%t.o '(TEXT)(+abc)' 2>&1 | \
+# RUN: FileCheck %s --check-prefix=INVALID-OFFSET
+
+## Test empty section type
+# RUN: llvm-symbolizer --obj=%t.o '()(+0x10)' 2>&1 | \
+# RUN: FileCheck %s --check-prefix=EMPTY-SECTION
+
+## Verify error messages are helpful
+# INVALID-TYPE: unknown section type
+# NO-PLUS: section-relative offset must start with '+'
+# INVALID-OFFSET: invalid offset in section-relative address
+# EMPTY-SECTION: unknown section type
\ No newline at end of file
diff --git a/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp b/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp
index 4784dafeb2948..d239d1aad73d7 100644
--- a/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp
+++ b/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp
@@ -17,12 +17,15 @@
#include "Opts.inc"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/BinaryFormat/XCOFF.h"
#include "llvm/Config/config.h"
#include "llvm/DebugInfo/Symbolize/DIPrinter.h"
#include "llvm/DebugInfo/Symbolize/Markup.h"
#include "llvm/DebugInfo/Symbolize/MarkupFilter.h"
#include "llvm/DebugInfo/Symbolize/SymbolizableModule.h"
#include "llvm/DebugInfo/Symbolize/Symbolize.h"
+#include "llvm/Object/XCOFFObjectFile.h"
#include "llvm/Debuginfod/BuildIDFetcher.h"
#include "llvm/Debuginfod/Debuginfod.h"
#include "llvm/Debuginfod/HTTPClient.h"
@@ -157,11 +160,97 @@ static Error makeStringError(StringRef Msg) {
return make_error<StringError>(Msg, inconvertibleErrorCode());
}
+// Helper function to get XCOFF section type flag from string
+ static std::optional<XCOFF::SectionTypeFlags> parseXCOFFSectionType(StringRef TypeStr) {
+ return StringSwitch<std::optional<XCOFF::SectionTypeFlags>>(TypeStr)
+ .Case("PAD", XCOFF::STYP_PAD)
+ .Case("DWARF", XCOFF::STYP_DWARF)
+ .Case("TEXT", XCOFF::STYP_TEXT)
+ .Case("DATA", XCOFF::STYP_DATA)
+ .Case("BSS", XCOFF::STYP_BSS)
+ .Case("EXCEPT", XCOFF::STYP_EXCEPT)
+ .Case("INFO", XCOFF::STYP_INFO)
+ .Case("TDATA", XCOFF::STYP_TDATA)
+ .Case("TBSS", XCOFF::STYP_TBSS)
+ .Case("LOADER", XCOFF::STYP_LOADER)
+ .Case("DEBUG", XCOFF::STYP_DEBUG)
+ .Case("TYPCHK", XCOFF::STYP_TYPCHK)
+ .Case("OVRFLO", XCOFF::STYP_OVRFLO)
+ .Default(std::nullopt);
+ }
+
+ // Find the base VMA of the first section matching the given type for XCOFF.
+ // The syntax (SECTION_TYPE)(+offset) represents an offset from the section base,
+ // so we return the section's base address to compute: VMA = base + offset.
+ static Expected<uint64_t> getXCOFFSectionBaseAddress(
+ const object::XCOFFObjectFile *XCOFFObj,
+ XCOFF::SectionTypeFlags TypeFlag) {
+
+ for (const auto &Section : XCOFFObj->sections()) {
+ DataRefImpl SecRef = Section.getRawDataRefImpl();
+ int32_t Flags = XCOFFObj->getSectionFlags(SecRef);
+
+ if ((Flags & 0xFFFF) == TypeFlag) {
+ return Section.getAddress();
+ }
+ }
+
+ return createStringError(inconvertibleErrorCode(),
+ "section type not found in XCOFF object");
+ }
+
+ static Expected<uint64_t> validateSectionType(StringRef ModulePath,
+ StringRef SectionType,
+ uint64_t &Offset,
+ LLVMSymbolizer &Symbolizer) {
+ // Parse the section type string
+ auto SectionTypeFlag = parseXCOFFSectionType(SectionType);
+ if (!SectionTypeFlag) {
+ return createStringError(inconvertibleErrorCode(),
+ "unknown section type: " + SectionType.str());
+ }
+
+ // Get the module info to access the object file
+ auto ModuleOrErr = Symbolizer.getOrCreateModuleInfo(ModulePath);
+ if (!ModuleOrErr) {
+ return ModuleOrErr.takeError();
+ }
+
+ auto BinaryOrErr = object::createBinary(ModulePath);
+ if (!BinaryOrErr) {
+ return BinaryOrErr.takeError();
+ }
+
+ object::Binary *Binary = BinaryOrErr->getBinary();
+ if (auto *XCOFFObj = dyn_cast<object::XCOFFObjectFile>(Binary)) {
+ // Get the base VMA of the section matching the type
+ auto SectionBaseOrErr = getXCOFFSectionBaseAddress(XCOFFObj, *SectionTypeFlag);
+ if (!SectionBaseOrErr)
+ return SectionBaseOrErr.takeError();
+
+ uint64_t SectionBase = *SectionBaseOrErr;
+ uint64_t SectionRelativeOffset = Offset;
+
+ // Convert section-relative offset to absolute VMA
+ // VMA = section_base + offset
+ Offset = SectionBase + SectionRelativeOffset;
+
+ // Return UndefSection - XCOFF symbolizer doesn't support SectionedAddress,
+ // so we use absolute VMA addressing instead.
+ return object::SectionedAddress::UndefSection;
+ }
+
+ return createStringError(inconvertibleErrorCode(),
+ "section type syntax is only supported for XCOFF objects");
+ }
+
static Error parseCommand(StringRef BinaryName, bool IsAddr2Line,
StringRef InputString, Command &Cmd,
std::string &ModuleName, object::BuildID &BuildID,
- StringRef &Symbol, uint64_t &Offset) {
+ StringRef &Symbol, uint64_t &Offset,
+ StringRef &SectionType) {
ModuleName = BinaryName;
+ SectionType = StringRef();
if (InputString.consume_front("CODE ")) {
Cmd = Command::Code;
} else if (InputString.consume_front("DATA ")) {
@@ -245,10 +334,43 @@ static Error parseCommand(StringRef BinaryName, bool IsAddr2Line,
AddrSpec.consume_front_insensitive("+0x");
}
+ // Check for section-relative address syntax: (SECTION_TYPE)(+0x0)
+ if (AddrSpec.starts_with("(")) {
+ size_t FirstClose = AddrSpec.find(')');
+ if (FirstClose != StringRef::npos && FirstClose + 1 < AddrSpec.size() &&
+ AddrSpec[FirstClose + 1] == '(') {
+ size_t SecondOpen = FirstClose + 1;
+ size_t SecondClose = AddrSpec.find(')', SecondOpen);
+ if (SecondClose != StringRef::npos) {
+ // Extract section type from first parentheses
+ SectionType = AddrSpec.substr(1, FirstClose - 1);
+
+ // Validate that section type is not empty
+ if (SectionType.empty())
+ return makeStringError("unknown section type: empty section type");
+
+ // Extract offset from second parentheses
+ StringRef OffsetPart = AddrSpec.substr(SecondOpen + 1, SecondClose - SecondOpen - 1);
+
+ // The offset should start with '+'
+ if (!OffsetPart.consume_front("+"))
+ return makeStringError("section-relative offset must start with '+'");
+
+ // Parse the offset - auto-detect base (0x prefix = hex, otherwise decimal)
+ if (OffsetPart.getAsInteger(0, Offset))
+ return makeStringError("invalid offset in section-relative address");
+
+ Symbol = StringRef();
+ return Error::success();
+ }
+ }
+ }
+
// If address specification is a number, treat it as a module offset.
if (!AddrSpec.getAsInteger(IsAddr2Line ? 16 : 0, Offset)) {
// Module offset is an address.
Symbol = StringRef();
+ SectionType = StringRef();
return Error::success();
}
@@ -260,6 +382,7 @@ static Error parseCommand(StringRef BinaryName, bool IsAddr2Line,
// Otherwise it is a symbol name, potentially with an offset.
Symbol = AddrSpec;
Offset = 0;
+ SectionType = StringRef();
// If the address specification contains '+', try treating it as
// "symbol + offset".
@@ -282,10 +405,11 @@ template <typename T>
void executeCommand(StringRef ModuleName, const T &ModuleSpec, Command Cmd,
StringRef Symbol, uint64_t Offset, uint64_t AdjustVMA,
bool ShouldInline, OutputStyle Style,
- LLVMSymbolizer &Symbolizer, DIPrinter &Printer) {
- uint64_t AdjustedOffset = Offset - AdjustVMA;
- object::SectionedAddress Address = {AdjustedOffset,
- object::SectionedAddress::UndefSection};
+ LLVMSymbolizer &Symbolizer, DIPrinter &Printer,
+ uint64_t SectionIndex) {
+ uint64_t AdjustedOffset = Offset - AdjustVMA;
+ object::SectionedAddress Address = {AdjustedOffset, SectionIndex};
+
Request SymRequest = {
ModuleName, Symbol.empty() ? std::make_optional(Offset) : std::nullopt,
Symbol};
@@ -342,6 +466,7 @@ static void symbolizeInput(const opt::InputArgList &Args,
object::BuildID BuildID(IncomingBuildID.begin(), IncomingBuildID.end());
uint64_t Offset = 0;
StringRef Symbol;
+ StringRef SectionType;
// An empty input string may be used to check if the process is alive and
// responding to input. Do not emit a message on stderr in this case but
@@ -352,24 +477,41 @@ static void symbolizeInput(const opt::InputArgList &Args,
}
if (Error E = parseCommand(Args.getLastArgValue(OPT_obj_EQ), IsAddr2Line,
StringRef(InputString), Cmd, ModuleName, BuildID,
- Symbol, Offset)) {
+ Symbol, Offset, SectionType)) {
handleAllErrors(std::move(E), [&](const StringError &EI) {
printError(EI, InputString);
printUnknownLineInfo(ModuleName, Printer);
});
return;
}
+
+ // Validate section index from section type if specified
+ uint64_t SectionIndex = object::SectionedAddress::UndefSection;
+ if (!SectionType.empty() && !ModuleName.empty()) {
+ auto SectionIndexOrErr = validateSectionType(ModuleName, SectionType, Offset, Symbolizer);
+ if (!SectionIndexOrErr) {
+ handleAllErrors(SectionIndexOrErr.takeError(), [&](const ErrorInfoBase &EI) {
+ printError(EI, InputString);
+ });
+ printUnknownLineInfo(ModuleName, Printer);
+ return;
+ }
+ SectionIndex = *SectionIndexOrErr;
+ }
+
bool ShouldInline = Args.hasFlag(OPT_inlines, OPT_no_inlines, !IsAddr2Line);
if (!BuildID.empty()) {
assert(ModuleName.empty());
if (!Args.hasArg(OPT_no_debuginfod))
enableDebuginfod(Symbolizer, Args);
std::string BuildIDStr = toHex(BuildID);
+ // Note: Section type resolution is not supported for BuildID-based lookup
executeCommand(BuildIDStr, BuildID, Cmd, Symbol, Offset, AdjustVMA,
- ShouldInline, Style, Symbolizer, Printer);
+ ShouldInline, Style, Symbolizer, Printer,
+ object::SectionedAddress::UndefSection);
} else {
executeCommand(ModuleName, ModuleName, Cmd, Symbol, Offset, AdjustVMA,
- ShouldInline, Style, Symbolizer, Printer);
+ShouldInline, Style, Symbolizer, Printer, SectionIndex);
}
}
>From 07ab95bdabbf2c69f0fe18d7acb7103e262c19d5 Mon Sep 17 00:00:00 2001
From: Midhunesh <midhuensh.p at ibm.com>
Date: Sun, 9 Nov 2025 12:33:22 -0500
Subject: [PATCH 2/3] symbolizer to accept section relative syntax
---
llvm/test/tools/llvm-symbolizer/xcoff-section-relative.ll | 2 +-
llvm/test/tools/llvm-symbolizer/xcoff-section-syntax.test | 2 +-
llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp | 1 +
3 files changed, 3 insertions(+), 2 deletions(-)
diff --git a/llvm/test/tools/llvm-symbolizer/xcoff-section-relative.ll b/llvm/test/tools/llvm-symbolizer/xcoff-section-relative.ll
index cfc6b31812a98..d1e21fe135e9e 100644
--- a/llvm/test/tools/llvm-symbolizer/xcoff-section-relative.ll
+++ b/llvm/test/tools/llvm-symbolizer/xcoff-section-relative.ll
@@ -48,4 +48,4 @@ entry:
; SECTIONS: Name: .text
; SECTIONS: Type: STYP_TEXT
; SECTIONS: Name: .data
-; SECTIONS: Type: STYP_DATA
\ No newline at end of file
+; SECTIONS: Type: STYP_DATA
diff --git a/llvm/test/tools/llvm-symbolizer/xcoff-section-syntax.test b/llvm/test/tools/llvm-symbolizer/xcoff-section-syntax.test
index 01bda672387f4..ca5ef9d3cb2cc 100644
--- a/llvm/test/tools/llvm-symbolizer/xcoff-section-syntax.test
+++ b/llvm/test/tools/llvm-symbolizer/xcoff-section-syntax.test
@@ -28,4 +28,4 @@
# INVALID-TYPE: unknown section type
# NO-PLUS: section-relative offset must start with '+'
# INVALID-OFFSET: invalid offset in section-relative address
-# EMPTY-SECTION: unknown section type
\ No newline at end of file
+# EMPTY-SECTION: unknown section type
diff --git a/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp b/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp
index d239d1aad73d7..3bdbce55c4f68 100644
--- a/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp
+++ b/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp
@@ -29,6 +29,7 @@
#include "llvm/Debuginfod/BuildIDFetcher.h"
#include "llvm/Debuginfod/Debuginfod.h"
#include "llvm/Debuginfod/HTTPClient.h"
+#include "llvm/Object/XCOFFObjectFile.h"
#include "llvm/Option/Arg.h"
#include "llvm/Option/ArgList.h"
#include "llvm/Option/Option.h"
>From a0b4db7d32a4a3ca40141e555ee6a2c428c47593 Mon Sep 17 00:00:00 2001
From: Midhunesh <midhuensh.p at ibm.com>
Date: Tue, 18 Nov 2025 07:24:11 -0500
Subject: [PATCH 3/3] code format fix
---
llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp b/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp
index 3bdbce55c4f68..1ad5d43409b4a 100644
--- a/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp
+++ b/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp
@@ -1,4 +1,5 @@
-//===-- llvm-symbolizer.cpp - Simple addr2line-like symbolizer ------------===//
+//===-- llvm-symbolizer.cpp - Simple addr2line-like symbolizer
+//------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
More information about the llvm-commits
mailing list