[llvm] [llvm-symbolizer] Make symbolizer parse section relative syntax (PR #168524)

via llvm-commits llvm-commits at lists.llvm.org
Tue Nov 18 04:18:45 PST 2025


https://github.com/midhuncodes7 created https://github.com/llvm/llvm-project/pull/168524

Implements `(SECTION_TYPE)(+offset) `syntax for XCOFF in llvm-symbolizer to support section-relative addressing on AIX. This enables sanitizers to symbolize addresses without knowing section names by using standardized section types (TEXT, DATA, BSS, etc.).

The implementation parses the new syntax, maps section types to XCOFF STYP flags, and converts section-relative offsets to absolute VMAs using the formula: VMA = section_base + offset.

Implementation detail:
llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp:

- Parse `(SECTION_TYPE)(+offset) `syntax for XCOFF (e.g., `(TEXT)(+0x100)`)
- Map section types (TEXT, DATA, BSS, TDATA, TBSS, etc.) to XCOFF STYP flags
- Convert section-relative offsets to absolute VMAs: VMA = section_base + offset
- Validate syntax with helpful error messages

Tests:

- xcoff-section-relative.ll: Functional test with actual symbol addresses
- xcoff-section-syntax.test: Syntax validation and error handling

>From 6280d2382724b449c1a3fd63d19c4e934caa4d34 Mon Sep 17 00:00:00 2001
From: Midhunesh <midhunesh.p at ibm.com>
Date: Thu, 13 Nov 2025 12:04:34 +0530
Subject: [PATCH 1/2] section relative syntax implementation

---
 .../llvm-symbolizer/xcoff-section-relative.ll |  51 ++++++
 .../llvm-symbolizer/xcoff-section-syntax.test |  31 ++++
 .../tools/llvm-symbolizer/llvm-symbolizer.cpp | 158 +++++++++++++++++-
 3 files changed, 232 insertions(+), 8 deletions(-)
 create mode 100644 llvm/test/tools/llvm-symbolizer/xcoff-section-relative.ll
 create mode 100644 llvm/test/tools/llvm-symbolizer/xcoff-section-syntax.test

diff --git a/llvm/test/tools/llvm-symbolizer/xcoff-section-relative.ll b/llvm/test/tools/llvm-symbolizer/xcoff-section-relative.ll
new file mode 100644
index 0000000000000..cfc6b31812a98
--- /dev/null
+++ b/llvm/test/tools/llvm-symbolizer/xcoff-section-relative.ll
@@ -0,0 +1,51 @@
+;; Test section-relative address syntax for XCOFF
+;; The syntax (SECTION_TYPE)(+offset) represents: offset from section base
+
+; REQUIRES: system-aix
+; RUN: llc -filetype=obj -o %t -mtriple=powerpc-aix-ibm-xcoff -function-sections < %s
+
+;; Test 1: Symbolize .foo using section-relative offset
+; RUN: llvm-nm --numeric-sort %t | grep " T \.foo$" | awk '{printf "CODE (TEXT)(+0x%%s)", $1}' > %t.foo_query
+; RUN: llvm-symbolizer --obj=%t @%t.foo_query | FileCheck %s --check-prefix=TEST-FOO
+
+;; Test 2: Symbolize .bar using section-relative offset
+; RUN: llvm-nm --numeric-sort %t | grep " T \.bar$" | awk '{printf "CODE (TEXT)(+0x%%s)", $1}' > %t.bar_query
+; RUN: llvm-symbolizer --obj=%t @%t.bar_query | FileCheck %s --check-prefix=TEST-BAR
+
+;; Test 3: Symbolize global_var using section-relative offset in DATA section
+; RUN: llvm-readobj --sections %t | awk '/Name: \.data/{found=1} found && /VirtualAddress:/{print $2; exit}' > %t.data_base
+; RUN: llvm-nm --numeric-sort %t | grep " D global_var$" | awk '{print $1}' > %t.global_var_vma
+; RUN: sh -c 'printf "%%d\n" $(cat %t.data_base)' > %t.data_base_dec
+; RUN: sh -c 'printf "%%d\n" 0x$(cat %t.global_var_vma)' > %t.global_var_dec
+; RUN: awk 'NR==FNR{base=$1; next} {vma=$1; printf "DATA (DATA)(+0x%%x)", vma-base}' %t.data_base_dec %t.global_var_dec > %t.data_query
+; RUN: llvm-symbolizer --obj=%t @%t.data_query | FileCheck %s --check-prefix=TEST-DATA
+
+;; Test 4: Verify section structure with llvm-readobj
+; RUN: llvm-readobj --sections %t | FileCheck %s --check-prefix=SECTIONS
+
+define void @foo() {
+entry:
+  ret void
+}
+
+define void @bar() {
+entry:
+  ret void
+}
+
+ at global_var = global i32 42, align 4
+
+;; Verify correct symbolization with section-relative syntax
+; TEST-FOO: .foo
+; TEST-FOO-NEXT: ??:0:0
+
+; TEST-BAR: .bar
+; TEST-BAR-NEXT: ??:0:0
+
+; TEST-DATA: global_var
+
+;; Verify XCOFF sections exist with correct types
+; SECTIONS: Name: .text
+; SECTIONS: Type: STYP_TEXT
+; SECTIONS: Name: .data
+; SECTIONS: Type: STYP_DATA
\ No newline at end of file
diff --git a/llvm/test/tools/llvm-symbolizer/xcoff-section-syntax.test b/llvm/test/tools/llvm-symbolizer/xcoff-section-syntax.test
new file mode 100644
index 0000000000000..01bda672387f4
--- /dev/null
+++ b/llvm/test/tools/llvm-symbolizer/xcoff-section-syntax.test
@@ -0,0 +1,31 @@
+## Test section-relative address syntax parsing for XCOFF
+## This tests that the (SECTION_TYPE)(+offset) syntax produces appropriate
+## error messages for invalid syntax
+
+# REQUIRES: system-aix
+
+## Create a simple XCOFF object for testing
+# RUN: echo "define void @test() { ret void }" | \
+# RUN:   llc -filetype=obj -mtriple=powerpc-aix-ibm-xcoff -o %t.o
+
+## Test invalid section type
+# RUN: llvm-symbolizer --obj=%t.o '(INVALID)(+0x10)' 2>&1 | \
+# RUN:   FileCheck %s --check-prefix=INVALID-TYPE
+
+## Test missing '+' sign
+# RUN: llvm-symbolizer --obj=%t.o '(TEXT)(0x10)' 2>&1 | \
+# RUN:   FileCheck %s --check-prefix=NO-PLUS
+
+## Test invalid offset value (not a hex number)
+# RUN: llvm-symbolizer --obj=%t.o '(TEXT)(+abc)' 2>&1 | \
+# RUN:   FileCheck %s --check-prefix=INVALID-OFFSET
+
+## Test empty section type
+# RUN: llvm-symbolizer --obj=%t.o '()(+0x10)' 2>&1 | \
+# RUN:   FileCheck %s --check-prefix=EMPTY-SECTION
+
+## Verify error messages are helpful
+# INVALID-TYPE: unknown section type
+# NO-PLUS: section-relative offset must start with '+'
+# INVALID-OFFSET: invalid offset in section-relative address
+# EMPTY-SECTION: unknown section type
\ No newline at end of file
diff --git a/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp b/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp
index 4784dafeb2948..d239d1aad73d7 100644
--- a/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp
+++ b/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp
@@ -17,12 +17,15 @@
 #include "Opts.inc"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/BinaryFormat/XCOFF.h"
 #include "llvm/Config/config.h"
 #include "llvm/DebugInfo/Symbolize/DIPrinter.h"
 #include "llvm/DebugInfo/Symbolize/Markup.h"
 #include "llvm/DebugInfo/Symbolize/MarkupFilter.h"
 #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h"
 #include "llvm/DebugInfo/Symbolize/Symbolize.h"
+#include "llvm/Object/XCOFFObjectFile.h"
 #include "llvm/Debuginfod/BuildIDFetcher.h"
 #include "llvm/Debuginfod/Debuginfod.h"
 #include "llvm/Debuginfod/HTTPClient.h"
@@ -157,11 +160,97 @@ static Error makeStringError(StringRef Msg) {
   return make_error<StringError>(Msg, inconvertibleErrorCode());
 }
 
+// Helper function to get XCOFF section type flag from string
+ static std::optional<XCOFF::SectionTypeFlags> parseXCOFFSectionType(StringRef TypeStr) {
+   return StringSwitch<std::optional<XCOFF::SectionTypeFlags>>(TypeStr)
+       .Case("PAD", XCOFF::STYP_PAD)
+       .Case("DWARF", XCOFF::STYP_DWARF)
+       .Case("TEXT", XCOFF::STYP_TEXT)
+       .Case("DATA", XCOFF::STYP_DATA)
+       .Case("BSS", XCOFF::STYP_BSS)
+       .Case("EXCEPT", XCOFF::STYP_EXCEPT)
+       .Case("INFO", XCOFF::STYP_INFO)
+       .Case("TDATA", XCOFF::STYP_TDATA)
+       .Case("TBSS", XCOFF::STYP_TBSS)
+       .Case("LOADER", XCOFF::STYP_LOADER)
+       .Case("DEBUG", XCOFF::STYP_DEBUG)
+       .Case("TYPCHK", XCOFF::STYP_TYPCHK)
+       .Case("OVRFLO", XCOFF::STYP_OVRFLO)
+       .Default(std::nullopt);
+ }
+
+ // Find the base VMA of the first section matching the given type for XCOFF.
+ // The syntax (SECTION_TYPE)(+offset) represents an offset from the section base,
+ // so we return the section's base address to compute: VMA = base + offset.
+ static Expected<uint64_t> getXCOFFSectionBaseAddress(
+     const object::XCOFFObjectFile *XCOFFObj,
+     XCOFF::SectionTypeFlags TypeFlag) {
+
+   for (const auto &Section : XCOFFObj->sections()) {
+     DataRefImpl SecRef = Section.getRawDataRefImpl();
+     int32_t Flags = XCOFFObj->getSectionFlags(SecRef);
+
+     if ((Flags & 0xFFFF) == TypeFlag) {
+       return Section.getAddress();
+     }
+   }
+
+   return createStringError(inconvertibleErrorCode(),
+                            "section type not found in XCOFF object");
+ }
+
+ static Expected<uint64_t> validateSectionType(StringRef ModulePath,
+                                                 StringRef SectionType,
+                                                 uint64_t &Offset,
+                                                 LLVMSymbolizer &Symbolizer) {
+   // Parse the section type string
+   auto SectionTypeFlag = parseXCOFFSectionType(SectionType);
+   if (!SectionTypeFlag) {
+     return createStringError(inconvertibleErrorCode(),
+                             "unknown section type: " + SectionType.str());
+   }
+
+   // Get the module info to access the object file
+   auto ModuleOrErr = Symbolizer.getOrCreateModuleInfo(ModulePath);
+   if (!ModuleOrErr) {
+     return ModuleOrErr.takeError();
+   }
+
+   auto BinaryOrErr = object::createBinary(ModulePath);
+   if (!BinaryOrErr) {
+     return BinaryOrErr.takeError();
+   }
+
+   object::Binary *Binary = BinaryOrErr->getBinary();
+   if (auto *XCOFFObj = dyn_cast<object::XCOFFObjectFile>(Binary)) {
+     // Get the base VMA of the section matching the type
+     auto SectionBaseOrErr = getXCOFFSectionBaseAddress(XCOFFObj, *SectionTypeFlag);
+     if (!SectionBaseOrErr)
+       return SectionBaseOrErr.takeError();
+
+     uint64_t SectionBase = *SectionBaseOrErr;
+     uint64_t SectionRelativeOffset = Offset;
+
+     // Convert section-relative offset to absolute VMA
+     // VMA = section_base + offset
+     Offset = SectionBase + SectionRelativeOffset;
+
+     // Return UndefSection - XCOFF symbolizer doesn't support SectionedAddress,
+     // so we use absolute VMA addressing instead.
+     return object::SectionedAddress::UndefSection;
+   }
+
+   return createStringError(inconvertibleErrorCode(),
+                           "section type syntax is only supported for XCOFF objects");
+ }
+
 static Error parseCommand(StringRef BinaryName, bool IsAddr2Line,
                           StringRef InputString, Command &Cmd,
                           std::string &ModuleName, object::BuildID &BuildID,
-                          StringRef &Symbol, uint64_t &Offset) {
+                          StringRef &Symbol, uint64_t &Offset,
+                          StringRef &SectionType) {
   ModuleName = BinaryName;
+  SectionType = StringRef();
   if (InputString.consume_front("CODE ")) {
     Cmd = Command::Code;
   } else if (InputString.consume_front("DATA ")) {
@@ -245,10 +334,43 @@ static Error parseCommand(StringRef BinaryName, bool IsAddr2Line,
         AddrSpec.consume_front_insensitive("+0x");
   }
 
+  // Check for section-relative address syntax: (SECTION_TYPE)(+0x0)
+   if (AddrSpec.starts_with("(")) {
+     size_t FirstClose = AddrSpec.find(')');
+     if (FirstClose != StringRef::npos && FirstClose + 1 < AddrSpec.size() &&
+         AddrSpec[FirstClose + 1] == '(') {
+       size_t SecondOpen = FirstClose + 1;
+       size_t SecondClose = AddrSpec.find(')', SecondOpen);
+       if (SecondClose != StringRef::npos) {
+         // Extract section type from first parentheses
+         SectionType = AddrSpec.substr(1, FirstClose - 1);
+
+         // Validate that section type is not empty
+         if (SectionType.empty())
+           return makeStringError("unknown section type: empty section type");
+
+         // Extract offset from second parentheses
+         StringRef OffsetPart = AddrSpec.substr(SecondOpen + 1, SecondClose - SecondOpen - 1);
+
+         // The offset should start with '+'
+         if (!OffsetPart.consume_front("+"))
+           return makeStringError("section-relative offset must start with '+'");
+
+         // Parse the offset - auto-detect base (0x prefix = hex, otherwise decimal)
+         if (OffsetPart.getAsInteger(0, Offset))
+           return makeStringError("invalid offset in section-relative address");
+
+         Symbol = StringRef();
+         return Error::success();
+       }
+     }
+   }
+
   // If address specification is a number, treat it as a module offset.
   if (!AddrSpec.getAsInteger(IsAddr2Line ? 16 : 0, Offset)) {
     // Module offset is an address.
     Symbol = StringRef();
+    SectionType = StringRef();
     return Error::success();
   }
 
@@ -260,6 +382,7 @@ static Error parseCommand(StringRef BinaryName, bool IsAddr2Line,
   // Otherwise it is a symbol name, potentially with an offset.
   Symbol = AddrSpec;
   Offset = 0;
+  SectionType = StringRef();
 
   // If the address specification contains '+', try treating it as
   // "symbol + offset".
@@ -282,10 +405,11 @@ template <typename T>
 void executeCommand(StringRef ModuleName, const T &ModuleSpec, Command Cmd,
                     StringRef Symbol, uint64_t Offset, uint64_t AdjustVMA,
                     bool ShouldInline, OutputStyle Style,
-                    LLVMSymbolizer &Symbolizer, DIPrinter &Printer) {
-  uint64_t AdjustedOffset = Offset - AdjustVMA;
-  object::SectionedAddress Address = {AdjustedOffset,
-                                      object::SectionedAddress::UndefSection};
+                    LLVMSymbolizer &Symbolizer, DIPrinter &Printer,
+                    uint64_t SectionIndex) {
+   uint64_t AdjustedOffset = Offset - AdjustVMA;
+   object::SectionedAddress Address = {AdjustedOffset, SectionIndex};
+
   Request SymRequest = {
       ModuleName, Symbol.empty() ? std::make_optional(Offset) : std::nullopt,
       Symbol};
@@ -342,6 +466,7 @@ static void symbolizeInput(const opt::InputArgList &Args,
   object::BuildID BuildID(IncomingBuildID.begin(), IncomingBuildID.end());
   uint64_t Offset = 0;
   StringRef Symbol;
+  StringRef SectionType;
 
   // An empty input string may be used to check if the process is alive and
   // responding to input. Do not emit a message on stderr in this case but
@@ -352,24 +477,41 @@ static void symbolizeInput(const opt::InputArgList &Args,
   }
   if (Error E = parseCommand(Args.getLastArgValue(OPT_obj_EQ), IsAddr2Line,
                              StringRef(InputString), Cmd, ModuleName, BuildID,
-                             Symbol, Offset)) {
+                             Symbol, Offset, SectionType)) {
     handleAllErrors(std::move(E), [&](const StringError &EI) {
       printError(EI, InputString);
       printUnknownLineInfo(ModuleName, Printer);
     });
     return;
   }
+
+  // Validate section index from section type if specified
+  uint64_t SectionIndex = object::SectionedAddress::UndefSection;
+  if (!SectionType.empty() && !ModuleName.empty()) {
+    auto SectionIndexOrErr = validateSectionType(ModuleName, SectionType, Offset, Symbolizer);
+    if (!SectionIndexOrErr) {
+      handleAllErrors(SectionIndexOrErr.takeError(), [&](const ErrorInfoBase &EI) {
+        printError(EI, InputString);
+      });
+      printUnknownLineInfo(ModuleName, Printer);
+      return;
+    }
+    SectionIndex = *SectionIndexOrErr;
+  }
+
   bool ShouldInline = Args.hasFlag(OPT_inlines, OPT_no_inlines, !IsAddr2Line);
   if (!BuildID.empty()) {
     assert(ModuleName.empty());
     if (!Args.hasArg(OPT_no_debuginfod))
       enableDebuginfod(Symbolizer, Args);
     std::string BuildIDStr = toHex(BuildID);
+    // Note: Section type resolution is not supported for BuildID-based lookup
     executeCommand(BuildIDStr, BuildID, Cmd, Symbol, Offset, AdjustVMA,
-                   ShouldInline, Style, Symbolizer, Printer);
+                   ShouldInline, Style, Symbolizer, Printer,
+                    object::SectionedAddress::UndefSection);
   } else {
     executeCommand(ModuleName, ModuleName, Cmd, Symbol, Offset, AdjustVMA,
-                   ShouldInline, Style, Symbolizer, Printer);
+ShouldInline, Style, Symbolizer, Printer, SectionIndex);
   }
 }
 

>From 07ab95bdabbf2c69f0fe18d7acb7103e262c19d5 Mon Sep 17 00:00:00 2001
From: Midhunesh <midhuensh.p at ibm.com>
Date: Sun, 9 Nov 2025 12:33:22 -0500
Subject: [PATCH 2/2] symbolizer to accept section relative syntax

---
 llvm/test/tools/llvm-symbolizer/xcoff-section-relative.ll | 2 +-
 llvm/test/tools/llvm-symbolizer/xcoff-section-syntax.test | 2 +-
 llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp            | 1 +
 3 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/llvm/test/tools/llvm-symbolizer/xcoff-section-relative.ll b/llvm/test/tools/llvm-symbolizer/xcoff-section-relative.ll
index cfc6b31812a98..d1e21fe135e9e 100644
--- a/llvm/test/tools/llvm-symbolizer/xcoff-section-relative.ll
+++ b/llvm/test/tools/llvm-symbolizer/xcoff-section-relative.ll
@@ -48,4 +48,4 @@ entry:
 ; SECTIONS: Name: .text
 ; SECTIONS: Type: STYP_TEXT
 ; SECTIONS: Name: .data
-; SECTIONS: Type: STYP_DATA
\ No newline at end of file
+; SECTIONS: Type: STYP_DATA
diff --git a/llvm/test/tools/llvm-symbolizer/xcoff-section-syntax.test b/llvm/test/tools/llvm-symbolizer/xcoff-section-syntax.test
index 01bda672387f4..ca5ef9d3cb2cc 100644
--- a/llvm/test/tools/llvm-symbolizer/xcoff-section-syntax.test
+++ b/llvm/test/tools/llvm-symbolizer/xcoff-section-syntax.test
@@ -28,4 +28,4 @@
 # INVALID-TYPE: unknown section type
 # NO-PLUS: section-relative offset must start with '+'
 # INVALID-OFFSET: invalid offset in section-relative address
-# EMPTY-SECTION: unknown section type
\ No newline at end of file
+# EMPTY-SECTION: unknown section type
diff --git a/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp b/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp
index d239d1aad73d7..3bdbce55c4f68 100644
--- a/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp
+++ b/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp
@@ -29,6 +29,7 @@
 #include "llvm/Debuginfod/BuildIDFetcher.h"
 #include "llvm/Debuginfod/Debuginfod.h"
 #include "llvm/Debuginfod/HTTPClient.h"
+#include "llvm/Object/XCOFFObjectFile.h"
 #include "llvm/Option/Arg.h"
 #include "llvm/Option/ArgList.h"
 #include "llvm/Option/Option.h"



More information about the llvm-commits mailing list