[llvm] [llvm-symbolizer] Make symbolizer parse section relative syntax (PR #168524)

via llvm-commits llvm-commits at lists.llvm.org
Wed Nov 19 00:37:59 PST 2025


https://github.com/midhuncodes7 updated https://github.com/llvm/llvm-project/pull/168524

>From 6280d2382724b449c1a3fd63d19c4e934caa4d34 Mon Sep 17 00:00:00 2001
From: Midhunesh <midhunesh.p at ibm.com>
Date: Thu, 13 Nov 2025 12:04:34 +0530
Subject: [PATCH 1/3] section relative syntax implementation

---
 .../llvm-symbolizer/xcoff-section-relative.ll |  51 ++++++
 .../llvm-symbolizer/xcoff-section-syntax.test |  31 ++++
 .../tools/llvm-symbolizer/llvm-symbolizer.cpp | 158 +++++++++++++++++-
 3 files changed, 232 insertions(+), 8 deletions(-)
 create mode 100644 llvm/test/tools/llvm-symbolizer/xcoff-section-relative.ll
 create mode 100644 llvm/test/tools/llvm-symbolizer/xcoff-section-syntax.test

diff --git a/llvm/test/tools/llvm-symbolizer/xcoff-section-relative.ll b/llvm/test/tools/llvm-symbolizer/xcoff-section-relative.ll
new file mode 100644
index 0000000000000..cfc6b31812a98
--- /dev/null
+++ b/llvm/test/tools/llvm-symbolizer/xcoff-section-relative.ll
@@ -0,0 +1,51 @@
+;; Test section-relative address syntax for XCOFF
+;; The syntax (SECTION_TYPE)(+offset) represents: offset from section base
+
+; REQUIRES: system-aix
+; RUN: llc -filetype=obj -o %t -mtriple=powerpc-aix-ibm-xcoff -function-sections < %s
+
+;; Test 1: Symbolize .foo using section-relative offset
+; RUN: llvm-nm --numeric-sort %t | grep " T \.foo$" | awk '{printf "CODE (TEXT)(+0x%%s)", $1}' > %t.foo_query
+; RUN: llvm-symbolizer --obj=%t @%t.foo_query | FileCheck %s --check-prefix=TEST-FOO
+
+;; Test 2: Symbolize .bar using section-relative offset
+; RUN: llvm-nm --numeric-sort %t | grep " T \.bar$" | awk '{printf "CODE (TEXT)(+0x%%s)", $1}' > %t.bar_query
+; RUN: llvm-symbolizer --obj=%t @%t.bar_query | FileCheck %s --check-prefix=TEST-BAR
+
+;; Test 3: Symbolize global_var using section-relative offset in DATA section
+; RUN: llvm-readobj --sections %t | awk '/Name: \.data/{found=1} found && /VirtualAddress:/{print $2; exit}' > %t.data_base
+; RUN: llvm-nm --numeric-sort %t | grep " D global_var$" | awk '{print $1}' > %t.global_var_vma
+; RUN: sh -c 'printf "%%d\n" $(cat %t.data_base)' > %t.data_base_dec
+; RUN: sh -c 'printf "%%d\n" 0x$(cat %t.global_var_vma)' > %t.global_var_dec
+; RUN: awk 'NR==FNR{base=$1; next} {vma=$1; printf "DATA (DATA)(+0x%%x)", vma-base}' %t.data_base_dec %t.global_var_dec > %t.data_query
+; RUN: llvm-symbolizer --obj=%t @%t.data_query | FileCheck %s --check-prefix=TEST-DATA
+
+;; Test 4: Verify section structure with llvm-readobj
+; RUN: llvm-readobj --sections %t | FileCheck %s --check-prefix=SECTIONS
+
+define void @foo() {
+entry:
+  ret void
+}
+
+define void @bar() {
+entry:
+  ret void
+}
+
+ at global_var = global i32 42, align 4
+
+;; Verify correct symbolization with section-relative syntax
+; TEST-FOO: .foo
+; TEST-FOO-NEXT: ??:0:0
+
+; TEST-BAR: .bar
+; TEST-BAR-NEXT: ??:0:0
+
+; TEST-DATA: global_var
+
+;; Verify XCOFF sections exist with correct types
+; SECTIONS: Name: .text
+; SECTIONS: Type: STYP_TEXT
+; SECTIONS: Name: .data
+; SECTIONS: Type: STYP_DATA
\ No newline at end of file
diff --git a/llvm/test/tools/llvm-symbolizer/xcoff-section-syntax.test b/llvm/test/tools/llvm-symbolizer/xcoff-section-syntax.test
new file mode 100644
index 0000000000000..01bda672387f4
--- /dev/null
+++ b/llvm/test/tools/llvm-symbolizer/xcoff-section-syntax.test
@@ -0,0 +1,31 @@
+## Test section-relative address syntax parsing for XCOFF
+## This tests that the (SECTION_TYPE)(+offset) syntax produces appropriate
+## error messages for invalid syntax
+
+# REQUIRES: system-aix
+
+## Create a simple XCOFF object for testing
+# RUN: echo "define void @test() { ret void }" | \
+# RUN:   llc -filetype=obj -mtriple=powerpc-aix-ibm-xcoff -o %t.o
+
+## Test invalid section type
+# RUN: llvm-symbolizer --obj=%t.o '(INVALID)(+0x10)' 2>&1 | \
+# RUN:   FileCheck %s --check-prefix=INVALID-TYPE
+
+## Test missing '+' sign
+# RUN: llvm-symbolizer --obj=%t.o '(TEXT)(0x10)' 2>&1 | \
+# RUN:   FileCheck %s --check-prefix=NO-PLUS
+
+## Test invalid offset value (not a hex number)
+# RUN: llvm-symbolizer --obj=%t.o '(TEXT)(+abc)' 2>&1 | \
+# RUN:   FileCheck %s --check-prefix=INVALID-OFFSET
+
+## Test empty section type
+# RUN: llvm-symbolizer --obj=%t.o '()(+0x10)' 2>&1 | \
+# RUN:   FileCheck %s --check-prefix=EMPTY-SECTION
+
+## Verify error messages are helpful
+# INVALID-TYPE: unknown section type
+# NO-PLUS: section-relative offset must start with '+'
+# INVALID-OFFSET: invalid offset in section-relative address
+# EMPTY-SECTION: unknown section type
\ No newline at end of file
diff --git a/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp b/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp
index 4784dafeb2948..d239d1aad73d7 100644
--- a/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp
+++ b/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp
@@ -17,12 +17,15 @@
 #include "Opts.inc"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/BinaryFormat/XCOFF.h"
 #include "llvm/Config/config.h"
 #include "llvm/DebugInfo/Symbolize/DIPrinter.h"
 #include "llvm/DebugInfo/Symbolize/Markup.h"
 #include "llvm/DebugInfo/Symbolize/MarkupFilter.h"
 #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h"
 #include "llvm/DebugInfo/Symbolize/Symbolize.h"
+#include "llvm/Object/XCOFFObjectFile.h"
 #include "llvm/Debuginfod/BuildIDFetcher.h"
 #include "llvm/Debuginfod/Debuginfod.h"
 #include "llvm/Debuginfod/HTTPClient.h"
@@ -157,11 +160,97 @@ static Error makeStringError(StringRef Msg) {
   return make_error<StringError>(Msg, inconvertibleErrorCode());
 }
 
+// Helper function to get XCOFF section type flag from string
+ static std::optional<XCOFF::SectionTypeFlags> parseXCOFFSectionType(StringRef TypeStr) {
+   return StringSwitch<std::optional<XCOFF::SectionTypeFlags>>(TypeStr)
+       .Case("PAD", XCOFF::STYP_PAD)
+       .Case("DWARF", XCOFF::STYP_DWARF)
+       .Case("TEXT", XCOFF::STYP_TEXT)
+       .Case("DATA", XCOFF::STYP_DATA)
+       .Case("BSS", XCOFF::STYP_BSS)
+       .Case("EXCEPT", XCOFF::STYP_EXCEPT)
+       .Case("INFO", XCOFF::STYP_INFO)
+       .Case("TDATA", XCOFF::STYP_TDATA)
+       .Case("TBSS", XCOFF::STYP_TBSS)
+       .Case("LOADER", XCOFF::STYP_LOADER)
+       .Case("DEBUG", XCOFF::STYP_DEBUG)
+       .Case("TYPCHK", XCOFF::STYP_TYPCHK)
+       .Case("OVRFLO", XCOFF::STYP_OVRFLO)
+       .Default(std::nullopt);
+ }
+
+ // Find the base VMA of the first section matching the given type for XCOFF.
+ // The syntax (SECTION_TYPE)(+offset) represents an offset from the section base,
+ // so we return the section's base address to compute: VMA = base + offset.
+ static Expected<uint64_t> getXCOFFSectionBaseAddress(
+     const object::XCOFFObjectFile *XCOFFObj,
+     XCOFF::SectionTypeFlags TypeFlag) {
+
+   for (const auto &Section : XCOFFObj->sections()) {
+     DataRefImpl SecRef = Section.getRawDataRefImpl();
+     int32_t Flags = XCOFFObj->getSectionFlags(SecRef);
+
+     if ((Flags & 0xFFFF) == TypeFlag) {
+       return Section.getAddress();
+     }
+   }
+
+   return createStringError(inconvertibleErrorCode(),
+                            "section type not found in XCOFF object");
+ }
+
+ static Expected<uint64_t> validateSectionType(StringRef ModulePath,
+                                                 StringRef SectionType,
+                                                 uint64_t &Offset,
+                                                 LLVMSymbolizer &Symbolizer) {
+   // Parse the section type string
+   auto SectionTypeFlag = parseXCOFFSectionType(SectionType);
+   if (!SectionTypeFlag) {
+     return createStringError(inconvertibleErrorCode(),
+                             "unknown section type: " + SectionType.str());
+   }
+
+   // Get the module info to access the object file
+   auto ModuleOrErr = Symbolizer.getOrCreateModuleInfo(ModulePath);
+   if (!ModuleOrErr) {
+     return ModuleOrErr.takeError();
+   }
+
+   auto BinaryOrErr = object::createBinary(ModulePath);
+   if (!BinaryOrErr) {
+     return BinaryOrErr.takeError();
+   }
+
+   object::Binary *Binary = BinaryOrErr->getBinary();
+   if (auto *XCOFFObj = dyn_cast<object::XCOFFObjectFile>(Binary)) {
+     // Get the base VMA of the section matching the type
+     auto SectionBaseOrErr = getXCOFFSectionBaseAddress(XCOFFObj, *SectionTypeFlag);
+     if (!SectionBaseOrErr)
+       return SectionBaseOrErr.takeError();
+
+     uint64_t SectionBase = *SectionBaseOrErr;
+     uint64_t SectionRelativeOffset = Offset;
+
+     // Convert section-relative offset to absolute VMA
+     // VMA = section_base + offset
+     Offset = SectionBase + SectionRelativeOffset;
+
+     // Return UndefSection - XCOFF symbolizer doesn't support SectionedAddress,
+     // so we use absolute VMA addressing instead.
+     return object::SectionedAddress::UndefSection;
+   }
+
+   return createStringError(inconvertibleErrorCode(),
+                           "section type syntax is only supported for XCOFF objects");
+ }
+
 static Error parseCommand(StringRef BinaryName, bool IsAddr2Line,
                           StringRef InputString, Command &Cmd,
                           std::string &ModuleName, object::BuildID &BuildID,
-                          StringRef &Symbol, uint64_t &Offset) {
+                          StringRef &Symbol, uint64_t &Offset,
+                          StringRef &SectionType) {
   ModuleName = BinaryName;
+  SectionType = StringRef();
   if (InputString.consume_front("CODE ")) {
     Cmd = Command::Code;
   } else if (InputString.consume_front("DATA ")) {
@@ -245,10 +334,43 @@ static Error parseCommand(StringRef BinaryName, bool IsAddr2Line,
         AddrSpec.consume_front_insensitive("+0x");
   }
 
+  // Check for section-relative address syntax: (SECTION_TYPE)(+0x0)
+   if (AddrSpec.starts_with("(")) {
+     size_t FirstClose = AddrSpec.find(')');
+     if (FirstClose != StringRef::npos && FirstClose + 1 < AddrSpec.size() &&
+         AddrSpec[FirstClose + 1] == '(') {
+       size_t SecondOpen = FirstClose + 1;
+       size_t SecondClose = AddrSpec.find(')', SecondOpen);
+       if (SecondClose != StringRef::npos) {
+         // Extract section type from first parentheses
+         SectionType = AddrSpec.substr(1, FirstClose - 1);
+
+         // Validate that section type is not empty
+         if (SectionType.empty())
+           return makeStringError("unknown section type: empty section type");
+
+         // Extract offset from second parentheses
+         StringRef OffsetPart = AddrSpec.substr(SecondOpen + 1, SecondClose - SecondOpen - 1);
+
+         // The offset should start with '+'
+         if (!OffsetPart.consume_front("+"))
+           return makeStringError("section-relative offset must start with '+'");
+
+         // Parse the offset - auto-detect base (0x prefix = hex, otherwise decimal)
+         if (OffsetPart.getAsInteger(0, Offset))
+           return makeStringError("invalid offset in section-relative address");
+
+         Symbol = StringRef();
+         return Error::success();
+       }
+     }
+   }
+
   // If address specification is a number, treat it as a module offset.
   if (!AddrSpec.getAsInteger(IsAddr2Line ? 16 : 0, Offset)) {
     // Module offset is an address.
     Symbol = StringRef();
+    SectionType = StringRef();
     return Error::success();
   }
 
@@ -260,6 +382,7 @@ static Error parseCommand(StringRef BinaryName, bool IsAddr2Line,
   // Otherwise it is a symbol name, potentially with an offset.
   Symbol = AddrSpec;
   Offset = 0;
+  SectionType = StringRef();
 
   // If the address specification contains '+', try treating it as
   // "symbol + offset".
@@ -282,10 +405,11 @@ template <typename T>
 void executeCommand(StringRef ModuleName, const T &ModuleSpec, Command Cmd,
                     StringRef Symbol, uint64_t Offset, uint64_t AdjustVMA,
                     bool ShouldInline, OutputStyle Style,
-                    LLVMSymbolizer &Symbolizer, DIPrinter &Printer) {
-  uint64_t AdjustedOffset = Offset - AdjustVMA;
-  object::SectionedAddress Address = {AdjustedOffset,
-                                      object::SectionedAddress::UndefSection};
+                    LLVMSymbolizer &Symbolizer, DIPrinter &Printer,
+                    uint64_t SectionIndex) {
+   uint64_t AdjustedOffset = Offset - AdjustVMA;
+   object::SectionedAddress Address = {AdjustedOffset, SectionIndex};
+
   Request SymRequest = {
       ModuleName, Symbol.empty() ? std::make_optional(Offset) : std::nullopt,
       Symbol};
@@ -342,6 +466,7 @@ static void symbolizeInput(const opt::InputArgList &Args,
   object::BuildID BuildID(IncomingBuildID.begin(), IncomingBuildID.end());
   uint64_t Offset = 0;
   StringRef Symbol;
+  StringRef SectionType;
 
   // An empty input string may be used to check if the process is alive and
   // responding to input. Do not emit a message on stderr in this case but
@@ -352,24 +477,41 @@ static void symbolizeInput(const opt::InputArgList &Args,
   }
   if (Error E = parseCommand(Args.getLastArgValue(OPT_obj_EQ), IsAddr2Line,
                              StringRef(InputString), Cmd, ModuleName, BuildID,
-                             Symbol, Offset)) {
+                             Symbol, Offset, SectionType)) {
     handleAllErrors(std::move(E), [&](const StringError &EI) {
       printError(EI, InputString);
       printUnknownLineInfo(ModuleName, Printer);
     });
     return;
   }
+
+  // Validate section index from section type if specified
+  uint64_t SectionIndex = object::SectionedAddress::UndefSection;
+  if (!SectionType.empty() && !ModuleName.empty()) {
+    auto SectionIndexOrErr = validateSectionType(ModuleName, SectionType, Offset, Symbolizer);
+    if (!SectionIndexOrErr) {
+      handleAllErrors(SectionIndexOrErr.takeError(), [&](const ErrorInfoBase &EI) {
+        printError(EI, InputString);
+      });
+      printUnknownLineInfo(ModuleName, Printer);
+      return;
+    }
+    SectionIndex = *SectionIndexOrErr;
+  }
+
   bool ShouldInline = Args.hasFlag(OPT_inlines, OPT_no_inlines, !IsAddr2Line);
   if (!BuildID.empty()) {
     assert(ModuleName.empty());
     if (!Args.hasArg(OPT_no_debuginfod))
       enableDebuginfod(Symbolizer, Args);
     std::string BuildIDStr = toHex(BuildID);
+    // Note: Section type resolution is not supported for BuildID-based lookup
     executeCommand(BuildIDStr, BuildID, Cmd, Symbol, Offset, AdjustVMA,
-                   ShouldInline, Style, Symbolizer, Printer);
+                   ShouldInline, Style, Symbolizer, Printer,
+                    object::SectionedAddress::UndefSection);
   } else {
     executeCommand(ModuleName, ModuleName, Cmd, Symbol, Offset, AdjustVMA,
-                   ShouldInline, Style, Symbolizer, Printer);
+ShouldInline, Style, Symbolizer, Printer, SectionIndex);
   }
 }
 

>From 07ab95bdabbf2c69f0fe18d7acb7103e262c19d5 Mon Sep 17 00:00:00 2001
From: Midhunesh <midhuensh.p at ibm.com>
Date: Sun, 9 Nov 2025 12:33:22 -0500
Subject: [PATCH 2/3] symbolizer to accept section relative syntax

---
 llvm/test/tools/llvm-symbolizer/xcoff-section-relative.ll | 2 +-
 llvm/test/tools/llvm-symbolizer/xcoff-section-syntax.test | 2 +-
 llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp            | 1 +
 3 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/llvm/test/tools/llvm-symbolizer/xcoff-section-relative.ll b/llvm/test/tools/llvm-symbolizer/xcoff-section-relative.ll
index cfc6b31812a98..d1e21fe135e9e 100644
--- a/llvm/test/tools/llvm-symbolizer/xcoff-section-relative.ll
+++ b/llvm/test/tools/llvm-symbolizer/xcoff-section-relative.ll
@@ -48,4 +48,4 @@ entry:
 ; SECTIONS: Name: .text
 ; SECTIONS: Type: STYP_TEXT
 ; SECTIONS: Name: .data
-; SECTIONS: Type: STYP_DATA
\ No newline at end of file
+; SECTIONS: Type: STYP_DATA
diff --git a/llvm/test/tools/llvm-symbolizer/xcoff-section-syntax.test b/llvm/test/tools/llvm-symbolizer/xcoff-section-syntax.test
index 01bda672387f4..ca5ef9d3cb2cc 100644
--- a/llvm/test/tools/llvm-symbolizer/xcoff-section-syntax.test
+++ b/llvm/test/tools/llvm-symbolizer/xcoff-section-syntax.test
@@ -28,4 +28,4 @@
 # INVALID-TYPE: unknown section type
 # NO-PLUS: section-relative offset must start with '+'
 # INVALID-OFFSET: invalid offset in section-relative address
-# EMPTY-SECTION: unknown section type
\ No newline at end of file
+# EMPTY-SECTION: unknown section type
diff --git a/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp b/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp
index d239d1aad73d7..3bdbce55c4f68 100644
--- a/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp
+++ b/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp
@@ -29,6 +29,7 @@
 #include "llvm/Debuginfod/BuildIDFetcher.h"
 #include "llvm/Debuginfod/Debuginfod.h"
 #include "llvm/Debuginfod/HTTPClient.h"
+#include "llvm/Object/XCOFFObjectFile.h"
 #include "llvm/Option/Arg.h"
 #include "llvm/Option/ArgList.h"
 #include "llvm/Option/Option.h"

>From a0b4db7d32a4a3ca40141e555ee6a2c428c47593 Mon Sep 17 00:00:00 2001
From: Midhunesh <midhuensh.p at ibm.com>
Date: Tue, 18 Nov 2025 07:24:11 -0500
Subject: [PATCH 3/3] code format fix

---
 llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp b/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp
index 3bdbce55c4f68..1ad5d43409b4a 100644
--- a/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp
+++ b/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp
@@ -1,4 +1,5 @@
-//===-- llvm-symbolizer.cpp - Simple addr2line-like symbolizer ------------===//
+//===-- llvm-symbolizer.cpp - Simple addr2line-like symbolizer
+//------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.



More information about the llvm-commits mailing list