[llvm] [NFC] Extract DWARFCFIProgram into separate files (PR #139326)
via llvm-commits
llvm-commits at lists.llvm.org
Fri May 9 14:03:54 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-debuginfo
Author: None (Sterling-Augustine)
<details>
<summary>Changes</summary>
CFIPrograms' most common uses are within debug frames, but it is not their only use. For example, some assembly writers encode them by hand into .cfi_escape directives. This PR extracts code for them into its own files, setting them up to be evaluated from outside debug frames themselves.
One in a series of NFC DebugInfo/DWARF refactoring changes to layer it more cleanly, so that binary CFI parsing can be used from low-level code, (such as byte strings created via .cfi_escape) without circular dependencies. The final goal is to make a more limited dwarf library usable from lower-level code.
---
Patch is 46.35 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/139326.diff
5 Files Affected:
- (added) llvm/include/llvm/DebugInfo/DWARF/DWARFCFIProgram.h (+158)
- (modified) llvm/include/llvm/DebugInfo/DWARF/DWARFDebugFrame.h (+1-130)
- (modified) llvm/lib/DebugInfo/DWARF/CMakeLists.txt (+1)
- (added) llvm/lib/DebugInfo/DWARF/DWARFCFIProgram.cpp (+444)
- (modified) llvm/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp (+1-408)
``````````diff
diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFCFIProgram.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFCFIProgram.h
new file mode 100644
index 0000000000000..5c3252590e5d0
--- /dev/null
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFCFIProgram.h
@@ -0,0 +1,158 @@
+//===- DWARFCFIProgram.h ----------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_DWARF_DWARFCFIPROGRAM_H
+#define LLVM_DEBUGINFO_DWARF_DWARFCFIPROGRAM_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/iterator.h"
+#include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h"
+#include "llvm/DebugInfo/DWARF/DWARFExpression.h"
+#include "llvm/Support/Error.h"
+#include "llvm/TargetParser/Triple.h"
+#include <map>
+#include <memory>
+#include <vector>
+
+namespace llvm {
+
+namespace dwarf {
+/// Represent a sequence of Call Frame Information instructions that, when read
+/// in order, construct a table mapping PC to frame state. This can also be
+/// referred to as "CFI rules" in DWARF literature to avoid confusion with
+/// computer programs in the broader sense, and in this context each instruction
+/// would be a rule to establish the mapping. Refer to pg. 172 in the DWARF5
+/// manual, "6.4.1 Structure of Call Frame Information".
+class CFIProgram {
+public:
+ static constexpr size_t MaxOperands = 3;
+ typedef SmallVector<uint64_t, MaxOperands> Operands;
+
+ /// An instruction consists of a DWARF CFI opcode and an optional sequence of
+ /// operands. If it refers to an expression, then this expression has its own
+ /// sequence of operations and operands handled separately by DWARFExpression.
+ struct Instruction {
+ Instruction(uint8_t Opcode) : Opcode(Opcode) {}
+
+ uint8_t Opcode;
+ Operands Ops;
+ // Associated DWARF expression in case this instruction refers to one
+ std::optional<DWARFExpression> Expression;
+
+ Expected<uint64_t> getOperandAsUnsigned(const CFIProgram &CFIP,
+ uint32_t OperandIdx) const;
+
+ Expected<int64_t> getOperandAsSigned(const CFIProgram &CFIP,
+ uint32_t OperandIdx) const;
+ };
+
+ using InstrList = std::vector<Instruction>;
+ using iterator = InstrList::iterator;
+ using const_iterator = InstrList::const_iterator;
+
+ iterator begin() { return Instructions.begin(); }
+ const_iterator begin() const { return Instructions.begin(); }
+ iterator end() { return Instructions.end(); }
+ const_iterator end() const { return Instructions.end(); }
+
+ unsigned size() const { return (unsigned)Instructions.size(); }
+ bool empty() const { return Instructions.empty(); }
+ uint64_t codeAlign() const { return CodeAlignmentFactor; }
+ int64_t dataAlign() const { return DataAlignmentFactor; }
+ Triple::ArchType triple() const { return Arch; }
+
+ CFIProgram(uint64_t CodeAlignmentFactor, int64_t DataAlignmentFactor,
+ Triple::ArchType Arch)
+ : CodeAlignmentFactor(CodeAlignmentFactor),
+ DataAlignmentFactor(DataAlignmentFactor), Arch(Arch) {}
+
+ /// Parse and store a sequence of CFI instructions from Data,
+ /// starting at *Offset and ending at EndOffset. *Offset is updated
+ /// to EndOffset upon successful parsing, or indicates the offset
+ /// where a problem occurred in case an error is returned.
+ Error parse(DWARFDataExtractor Data, uint64_t *Offset, uint64_t EndOffset);
+
+ void dump(raw_ostream &OS, DIDumpOptions DumpOpts, unsigned IndentLevel,
+ std::optional<uint64_t> InitialLocation) const;
+
+ void addInstruction(const Instruction &I) { Instructions.push_back(I); }
+
+ /// Get a DWARF CFI call frame string for the given DW_CFA opcode.
+ StringRef callFrameString(unsigned Opcode) const;
+
+private:
+ std::vector<Instruction> Instructions;
+ const uint64_t CodeAlignmentFactor;
+ const int64_t DataAlignmentFactor;
+ Triple::ArchType Arch;
+
+ /// Convenience method to add a new instruction with the given opcode.
+ void addInstruction(uint8_t Opcode) {
+ Instructions.push_back(Instruction(Opcode));
+ }
+
+ /// Add a new single-operand instruction.
+ void addInstruction(uint8_t Opcode, uint64_t Operand1) {
+ Instructions.push_back(Instruction(Opcode));
+ Instructions.back().Ops.push_back(Operand1);
+ }
+
+ /// Add a new instruction that has two operands.
+ void addInstruction(uint8_t Opcode, uint64_t Operand1, uint64_t Operand2) {
+ Instructions.push_back(Instruction(Opcode));
+ Instructions.back().Ops.push_back(Operand1);
+ Instructions.back().Ops.push_back(Operand2);
+ }
+
+ /// Add a new instruction that has three operands.
+ void addInstruction(uint8_t Opcode, uint64_t Operand1, uint64_t Operand2,
+ uint64_t Operand3) {
+ Instructions.push_back(Instruction(Opcode));
+ Instructions.back().Ops.push_back(Operand1);
+ Instructions.back().Ops.push_back(Operand2);
+ Instructions.back().Ops.push_back(Operand3);
+ }
+
+ /// Types of operands to CFI instructions
+ /// In DWARF, this type is implicitly tied to a CFI instruction opcode and
+ /// thus this type doesn't need to be explicitly written to the file (this is
+ /// not a DWARF encoding). The relationship of instrs to operand types can
+ /// be obtained from getOperandTypes() and is only used to simplify
+ /// instruction printing.
+ enum OperandType {
+ OT_Unset,
+ OT_None,
+ OT_Address,
+ OT_Offset,
+ OT_FactoredCodeOffset,
+ OT_SignedFactDataOffset,
+ OT_UnsignedFactDataOffset,
+ OT_Register,
+ OT_AddressSpace,
+ OT_Expression
+ };
+
+ /// Get the OperandType as a "const char *".
+ static const char *operandTypeString(OperandType OT);
+
+ /// Retrieve the array describing the types of operands according to the enum
+ /// above. This is indexed by opcode.
+ static ArrayRef<OperandType[MaxOperands]> getOperandTypes();
+
+ /// Print \p Opcode's operand number \p OperandIdx which has value \p Operand.
+ void printOperand(raw_ostream &OS, DIDumpOptions DumpOpts,
+ const Instruction &Instr, unsigned OperandIdx,
+ uint64_t Operand, std::optional<uint64_t> &Address) const;
+};
+
+} // end namespace dwarf
+
+} // end namespace llvm
+
+#endif // LLVM_DEBUGINFO_DWARF_DWARFCFIPROGRAM_H
diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugFrame.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugFrame.h
index a9a3c7edde691..b4b1e49e68a84 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugFrame.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugFrame.h
@@ -12,6 +12,7 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/iterator.h"
+#include "llvm/DebugInfo/DWARF/DWARFCFIProgram.h"
#include "llvm/DebugInfo/DWARF/DWARFExpression.h"
#include "llvm/Support/Error.h"
#include "llvm/TargetParser/Triple.h"
@@ -309,7 +310,6 @@ class UnwindRow {
raw_ostream &operator<<(raw_ostream &OS, const UnwindRow &Row);
-class CFIProgram;
class CIE;
class FDE;
@@ -398,135 +398,6 @@ class UnwindTable {
raw_ostream &operator<<(raw_ostream &OS, const UnwindTable &Rows);
-/// Represent a sequence of Call Frame Information instructions that, when read
-/// in order, construct a table mapping PC to frame state. This can also be
-/// referred to as "CFI rules" in DWARF literature to avoid confusion with
-/// computer programs in the broader sense, and in this context each instruction
-/// would be a rule to establish the mapping. Refer to pg. 172 in the DWARF5
-/// manual, "6.4.1 Structure of Call Frame Information".
-class CFIProgram {
-public:
- static constexpr size_t MaxOperands = 3;
- typedef SmallVector<uint64_t, MaxOperands> Operands;
-
- /// An instruction consists of a DWARF CFI opcode and an optional sequence of
- /// operands. If it refers to an expression, then this expression has its own
- /// sequence of operations and operands handled separately by DWARFExpression.
- struct Instruction {
- Instruction(uint8_t Opcode) : Opcode(Opcode) {}
-
- uint8_t Opcode;
- Operands Ops;
- // Associated DWARF expression in case this instruction refers to one
- std::optional<DWARFExpression> Expression;
-
- Expected<uint64_t> getOperandAsUnsigned(const CFIProgram &CFIP,
- uint32_t OperandIdx) const;
-
- Expected<int64_t> getOperandAsSigned(const CFIProgram &CFIP,
- uint32_t OperandIdx) const;
- };
-
- using InstrList = std::vector<Instruction>;
- using iterator = InstrList::iterator;
- using const_iterator = InstrList::const_iterator;
-
- iterator begin() { return Instructions.begin(); }
- const_iterator begin() const { return Instructions.begin(); }
- iterator end() { return Instructions.end(); }
- const_iterator end() const { return Instructions.end(); }
-
- unsigned size() const { return (unsigned)Instructions.size(); }
- bool empty() const { return Instructions.empty(); }
- uint64_t codeAlign() const { return CodeAlignmentFactor; }
- int64_t dataAlign() const { return DataAlignmentFactor; }
- Triple::ArchType triple() const { return Arch; }
-
- CFIProgram(uint64_t CodeAlignmentFactor, int64_t DataAlignmentFactor,
- Triple::ArchType Arch)
- : CodeAlignmentFactor(CodeAlignmentFactor),
- DataAlignmentFactor(DataAlignmentFactor),
- Arch(Arch) {}
-
- /// Parse and store a sequence of CFI instructions from Data,
- /// starting at *Offset and ending at EndOffset. *Offset is updated
- /// to EndOffset upon successful parsing, or indicates the offset
- /// where a problem occurred in case an error is returned.
- Error parse(DWARFDataExtractor Data, uint64_t *Offset, uint64_t EndOffset);
-
- void dump(raw_ostream &OS, DIDumpOptions DumpOpts, unsigned IndentLevel,
- std::optional<uint64_t> InitialLocation) const;
-
- void addInstruction(const Instruction &I) { Instructions.push_back(I); }
-
- /// Get a DWARF CFI call frame string for the given DW_CFA opcode.
- StringRef callFrameString(unsigned Opcode) const;
-
-private:
- std::vector<Instruction> Instructions;
- const uint64_t CodeAlignmentFactor;
- const int64_t DataAlignmentFactor;
- Triple::ArchType Arch;
-
- /// Convenience method to add a new instruction with the given opcode.
- void addInstruction(uint8_t Opcode) {
- Instructions.push_back(Instruction(Opcode));
- }
-
- /// Add a new single-operand instruction.
- void addInstruction(uint8_t Opcode, uint64_t Operand1) {
- Instructions.push_back(Instruction(Opcode));
- Instructions.back().Ops.push_back(Operand1);
- }
-
- /// Add a new instruction that has two operands.
- void addInstruction(uint8_t Opcode, uint64_t Operand1, uint64_t Operand2) {
- Instructions.push_back(Instruction(Opcode));
- Instructions.back().Ops.push_back(Operand1);
- Instructions.back().Ops.push_back(Operand2);
- }
-
- /// Add a new instruction that has three operands.
- void addInstruction(uint8_t Opcode, uint64_t Operand1, uint64_t Operand2,
- uint64_t Operand3) {
- Instructions.push_back(Instruction(Opcode));
- Instructions.back().Ops.push_back(Operand1);
- Instructions.back().Ops.push_back(Operand2);
- Instructions.back().Ops.push_back(Operand3);
- }
-
- /// Types of operands to CFI instructions
- /// In DWARF, this type is implicitly tied to a CFI instruction opcode and
- /// thus this type doesn't need to be explicitly written to the file (this is
- /// not a DWARF encoding). The relationship of instrs to operand types can
- /// be obtained from getOperandTypes() and is only used to simplify
- /// instruction printing.
- enum OperandType {
- OT_Unset,
- OT_None,
- OT_Address,
- OT_Offset,
- OT_FactoredCodeOffset,
- OT_SignedFactDataOffset,
- OT_UnsignedFactDataOffset,
- OT_Register,
- OT_AddressSpace,
- OT_Expression
- };
-
- /// Get the OperandType as a "const char *".
- static const char *operandTypeString(OperandType OT);
-
- /// Retrieve the array describing the types of operands according to the enum
- /// above. This is indexed by opcode.
- static ArrayRef<OperandType[MaxOperands]> getOperandTypes();
-
- /// Print \p Opcode's operand number \p OperandIdx which has value \p Operand.
- void printOperand(raw_ostream &OS, DIDumpOptions DumpOpts,
- const Instruction &Instr, unsigned OperandIdx,
- uint64_t Operand, std::optional<uint64_t> &Address) const;
-};
-
/// An entry in either debug_frame or eh_frame. This entry can be a CIE or an
/// FDE.
class FrameEntry {
diff --git a/llvm/lib/DebugInfo/DWARF/CMakeLists.txt b/llvm/lib/DebugInfo/DWARF/CMakeLists.txt
index e565821cf2942..c4bacbdc484f5 100644
--- a/llvm/lib/DebugInfo/DWARF/CMakeLists.txt
+++ b/llvm/lib/DebugInfo/DWARF/CMakeLists.txt
@@ -2,6 +2,7 @@ add_llvm_component_library(LLVMDebugInfoDWARF
DWARFAbbreviationDeclaration.cpp
DWARFAddressRange.cpp
DWARFAcceleratorTable.cpp
+ DWARFCFIProgram.cpp
DWARFCompileUnit.cpp
DWARFContext.cpp
DWARFDataExtractor.cpp
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFCFIProgram.cpp b/llvm/lib/DebugInfo/DWARF/DWARFCFIProgram.cpp
new file mode 100644
index 0000000000000..1a4fc4930fdd9
--- /dev/null
+++ b/llvm/lib/DebugInfo/DWARF/DWARFCFIProgram.cpp
@@ -0,0 +1,444 @@
+//===- DWARFCFIProgram.cpp - Parsing the cfi-portions of .debug_frame -----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/DWARF/DWARFCFIProgram.h"
+#include "llvm/DebugInfo/DIContext.h"
+#include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/DataExtractor.h"
+#include "llvm/Support/Errc.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+#include <cinttypes>
+#include <cstdint>
+#include <optional>
+
+using namespace llvm;
+using namespace dwarf;
+
+static void printRegister(raw_ostream &OS, DIDumpOptions DumpOpts,
+ unsigned RegNum) {
+ if (DumpOpts.GetNameForDWARFReg) {
+ auto RegName = DumpOpts.GetNameForDWARFReg(RegNum, DumpOpts.IsEH);
+ if (!RegName.empty()) {
+ OS << RegName;
+ return;
+ }
+ }
+ OS << "reg" << RegNum;
+}
+
+// See DWARF standard v3, section 7.23
+const uint8_t DWARF_CFI_PRIMARY_OPCODE_MASK = 0xc0;
+const uint8_t DWARF_CFI_PRIMARY_OPERAND_MASK = 0x3f;
+
+Error CFIProgram::parse(DWARFDataExtractor Data, uint64_t *Offset,
+ uint64_t EndOffset) {
+ DataExtractor::Cursor C(*Offset);
+ while (C && C.tell() < EndOffset) {
+ uint8_t Opcode = Data.getRelocatedValue(C, 1);
+ if (!C)
+ break;
+
+ // Some instructions have a primary opcode encoded in the top bits.
+ if (uint8_t Primary = Opcode & DWARF_CFI_PRIMARY_OPCODE_MASK) {
+ // If it's a primary opcode, the first operand is encoded in the bottom
+ // bits of the opcode itself.
+ uint64_t Op1 = Opcode & DWARF_CFI_PRIMARY_OPERAND_MASK;
+ switch (Primary) {
+ case DW_CFA_advance_loc:
+ case DW_CFA_restore:
+ addInstruction(Primary, Op1);
+ break;
+ case DW_CFA_offset:
+ addInstruction(Primary, Op1, Data.getULEB128(C));
+ break;
+ default:
+ llvm_unreachable("invalid primary CFI opcode");
+ }
+ continue;
+ }
+
+ // Extended opcode - its value is Opcode itself.
+ switch (Opcode) {
+ default:
+ return createStringError(errc::illegal_byte_sequence,
+ "invalid extended CFI opcode 0x%" PRIx8, Opcode);
+ case DW_CFA_nop:
+ case DW_CFA_remember_state:
+ case DW_CFA_restore_state:
+ case DW_CFA_GNU_window_save:
+ case DW_CFA_AARCH64_negate_ra_state_with_pc:
+ // No operands
+ addInstruction(Opcode);
+ break;
+ case DW_CFA_set_loc:
+ // Operands: Address
+ addInstruction(Opcode, Data.getRelocatedAddress(C));
+ break;
+ case DW_CFA_advance_loc1:
+ // Operands: 1-byte delta
+ addInstruction(Opcode, Data.getRelocatedValue(C, 1));
+ break;
+ case DW_CFA_advance_loc2:
+ // Operands: 2-byte delta
+ addInstruction(Opcode, Data.getRelocatedValue(C, 2));
+ break;
+ case DW_CFA_advance_loc4:
+ // Operands: 4-byte delta
+ addInstruction(Opcode, Data.getRelocatedValue(C, 4));
+ break;
+ case DW_CFA_restore_extended:
+ case DW_CFA_undefined:
+ case DW_CFA_same_value:
+ case DW_CFA_def_cfa_register:
+ case DW_CFA_def_cfa_offset:
+ case DW_CFA_GNU_args_size:
+ // Operands: ULEB128
+ addInstruction(Opcode, Data.getULEB128(C));
+ break;
+ case DW_CFA_def_cfa_offset_sf:
+ // Operands: SLEB128
+ addInstruction(Opcode, Data.getSLEB128(C));
+ break;
+ case DW_CFA_LLVM_def_aspace_cfa:
+ case DW_CFA_LLVM_def_aspace_cfa_sf: {
+ auto RegNum = Data.getULEB128(C);
+ auto CfaOffset = Opcode == DW_CFA_LLVM_def_aspace_cfa
+ ? Data.getULEB128(C)
+ : Data.getSLEB128(C);
+ auto AddressSpace = Data.getULEB128(C);
+ addInstruction(Opcode, RegNum, CfaOffset, AddressSpace);
+ break;
+ }
+ case DW_CFA_offset_extended:
+ case DW_CFA_register:
+ case DW_CFA_def_cfa:
+ case DW_CFA_val_offset: {
+ // Operands: ULEB128, ULEB128
+ // Note: We can not embed getULEB128 directly into function
+ // argument list. getULEB128 changes Offset and order of evaluation
+ // for arguments is unspecified.
+ uint64_t op1 = Data.getULEB128(C);
+ uint64_t op2 = Data.getULEB128(C);
+ addInstruction(Opcode, op1, op2);
+ break;
+ }
+ case DW_CFA_offset_extended_sf:
+ case DW_CFA_def_cfa_sf:
+ case DW_CFA_val_offset_sf: {
+ // Operands: ULEB128, SLEB128
+ // Note: see comment for the previous case
+ uint64_t op1 = Data.getULEB128(C);
+ uint64_t op2 = (uint64_t)Data.getSLEB128(C);
+ addInstruction(Opcode, op1, op2);
+ break;
+ }
+ case DW_CFA_def_cfa_expression: {
+ uint64_t ExprLength = Data.getULEB128(C);
+ addInstruction(Opcode, 0);
+ StringRef Expression = Data.getBytes(C, ExprLength);
+
+ DataExtractor Extractor(Expression, Data.isLittleEndian(),
+ Data.getAddressSize());
+ // Note. We do not pass the DWARF format to DWARFExpression, because
+ // DW_OP_call_ref, the only operation which depends on the format, is
+ // prohibited in call frame instructions, see sec. 6.4.2 in DWARFv5.
+ Instructions.back().Expression =
+ DWARFExpression(Extractor, Data.getAddressSize());
+ break;
+ }
+ case DW_CFA_expression:
+ case DW_CFA_val_expression: {
+ uint64_t RegNum = Data.getULEB128(C);
+ addInstruction(Opcode, RegNum, 0);
+
+ uint64_t BlockLength = Data.getULEB128(C);
+ StringRef Expression = Data.getBytes(C, BlockLength);
+ DataExtractor Extractor(Expression, Data.isLittleEndian(),
+ Data.getAddressSize());
+ // Note. We do not pass the DWARF format to DWARFExpression, because
+ // DW_OP_call_ref, the only operation which depends on the format, is
+ // prohibited in call frame instructions, see sec. 6.4.2 in DWARFv5.
+ Instructions.back().Expression =
+ DWARFExpression(Extractor, Data.getAddressSize());
+ break;
+ }
+ }
+ }
+
+ *Offset = C.tell();
+ return C.takeError();
+}
+
+StringRef CFIProgram::callFrameString(unsigned Opcode) const {
+ return dwarf::CallFrameString(Opcode, Arch);
+}
+
+const char *CFIProgram::operandTypeString(CFIProgram::OperandType OT) {
+#define ENUM_TO_CSTR(e) \
+ case e: \
+ return #e;
+ switch (OT) {
+ ENUM_TO_CSTR...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/139326
More information about the llvm-commits
mailing list