[llvm] 5866935 - [DebugInfo] Add DW_OP_LLVM_user extension point

Scott Linder via llvm-commits llvm-commits at lists.llvm.org
Mon Jun 19 14:46:55 PDT 2023


Author: Scott Linder
Date: 2023-06-19T21:46:24Z
New Revision: 58669354bf1f8eef39979e31915b1e212a3985c9

URL: https://github.com/llvm/llvm-project/commit/58669354bf1f8eef39979e31915b1e212a3985c9
DIFF: https://github.com/llvm/llvm-project/commit/58669354bf1f8eef39979e31915b1e212a3985c9.diff

LOG: [DebugInfo] Add DW_OP_LLVM_user extension point

The extension codespace for DWARF expressions (DW_OP_LLVM_{lo,hi}_user)
has shrunk over time, as no extension is ever "retired" in practice. To
facilitate future extensions, this patch reserves one open opcode as an extension
point (0xfe), which is followed by a ULEB128-encoded SubOperation, and
then by the subop's operands.

There is some prior-art, namely DW_OP_AARCH64_operation
(see https://github.com/ARM-software/abi-aa/blob/edd7460d87493fff124b8b5713acf71ffc06ee91/aadwarf64/aadwarf64.rst#45dwarf-expression-operations).

This version makes some different tradeoffs, opting to use a ULEB128 for
the subop encoding for future-proofing.

Reviewed By: #debug-info, dblaikie

Differential Revision: https://reviews.llvm.org/D147271

Added: 
    llvm/test/tools/llvm-dwarfdump/X86/DW_OP_LLVM_user.s

Modified: 
    llvm/include/llvm/BinaryFormat/Dwarf.def
    llvm/include/llvm/BinaryFormat/Dwarf.h
    llvm/include/llvm/DebugInfo/DWARF/DWARFExpression.h
    llvm/lib/BinaryFormat/Dwarf.cpp
    llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
    llvm/lib/DWARFLinker/DWARFLinker.cpp
    llvm/lib/DebugInfo/DWARF/DWARFExpression.cpp
    llvm/unittests/DebugInfo/DWARF/DWARFExpressionCompactPrinterTest.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/BinaryFormat/Dwarf.def b/llvm/include/llvm/BinaryFormat/Dwarf.def
index 1409568b8664f..d0357bec0bbf5 100644
--- a/llvm/include/llvm/BinaryFormat/Dwarf.def
+++ b/llvm/include/llvm/BinaryFormat/Dwarf.def
@@ -13,7 +13,8 @@
 // TODO: Add other DW-based macros.
 #if !(                                                                         \
     defined HANDLE_DW_TAG || defined HANDLE_DW_AT || defined HANDLE_DW_FORM || \
-    defined HANDLE_DW_OP || defined HANDLE_DW_LANG || defined HANDLE_DW_ATE || \
+    defined HANDLE_DW_OP || defined HANDLE_DW_OP_LLVM_USEROP ||                    \
+    defined HANDLE_DW_LANG || defined HANDLE_DW_ATE ||                         \
     defined HANDLE_DW_VIRTUALITY || defined HANDLE_DW_DEFAULTED ||             \
     defined HANDLE_DW_CC || defined HANDLE_DW_LNS || defined HANDLE_DW_LNE ||  \
     defined HANDLE_DW_LNCT || defined HANDLE_DW_MACRO ||                       \
@@ -52,6 +53,10 @@
 #define HANDLE_DW_OP(ID, NAME, VERSION, VENDOR)
 #endif
 
+#ifndef HANDLE_DW_OP_LLVM_USEROP
+#define HANDLE_DW_OP_LLVM_USEROP(ID, NAME)
+#endif
+
 #ifndef HANDLE_DW_LANG
 #define HANDLE_DW_LANG(ID, NAME, LOWER_BOUND, VERSION, VENDOR)
 #endif
@@ -874,6 +879,24 @@ HANDLE_DW_OP(0xf8, PGI_omp_thread_num, 0, PGI)
 HANDLE_DW_OP(0xfb, GNU_addr_index, 0, GNU)
 HANDLE_DW_OP(0xfc, GNU_const_index, 0, GNU)
 
+// DW_OP_LLVM_user has two operands:
+//   (1) An unsigned LEB128 "LLVM Vendor Extension Opcode".
+//   (2) Zero or more literal operands, the number and type of which are
+//       implied by the opcode (1).
+// DW_OP_LLVM_user acts as an extension multiplexer, opening up the encoding
+// space to accommodate an infinite number of extensions. This better reflects
+// the de-facto permanent allocation of extensions.
+HANDLE_DW_OP(0xe9, LLVM_user, 0, LLVM)
+// "LLVM Vendor Extension" operations under the DW_OP_LLVM_user encoding
+// scheme. This list is authoritative and exhaustive. Once an operation is
+// registered here it cannot be removed nor have its encoding changed. The
+// encoding space must skip zero (which is reserved) and have no gaps.
+//
+// The DW_OP_LLVM_user DW_OP_LLVM_nop operation has no effect on the
+// location stack or any of its values. It is defined as a placeholder for
+// testing purposes.
+HANDLE_DW_OP_LLVM_USEROP(0x0001, nop)
+
 // DWARF languages.
 HANDLE_DW_LANG(0x0001, C89, 0, 2, DWARF)
 HANDLE_DW_LANG(0x0002, C, 0, 2, DWARF)
@@ -1235,6 +1258,7 @@ HANDLE_DW_SECT(8, RNGLISTS)
 #undef HANDLE_DW_AT
 #undef HANDLE_DW_FORM
 #undef HANDLE_DW_OP
+#undef HANDLE_DW_OP_LLVM_USEROP
 #undef HANDLE_DW_LANG
 #undef HANDLE_DW_ATE
 #undef HANDLE_DW_VIRTUALITY

diff  --git a/llvm/include/llvm/BinaryFormat/Dwarf.h b/llvm/include/llvm/BinaryFormat/Dwarf.h
index ed6abbc0d1143..c4d0232ac6c75 100644
--- a/llvm/include/llvm/BinaryFormat/Dwarf.h
+++ b/llvm/include/llvm/BinaryFormat/Dwarf.h
@@ -146,6 +146,11 @@ enum LocationAtom {
   DW_OP_LLVM_arg = 0x1005,              ///< Only used in LLVM metadata.
 };
 
+enum LlvmUserLocationAtom {
+#define HANDLE_DW_OP_LLVM_USEROP(ID, NAME) DW_OP_LLVM_##NAME = ID,
+#include "llvm/BinaryFormat/Dwarf.def"
+};
+
 enum TypeKind : uint8_t {
 #define HANDLE_DW_ATE(ID, NAME, VERSION, VENDOR) DW_ATE_##NAME = ID,
 #include "llvm/BinaryFormat/Dwarf.def"
@@ -631,6 +636,8 @@ StringRef ChildrenString(unsigned Children);
 StringRef AttributeString(unsigned Attribute);
 StringRef FormEncodingString(unsigned Encoding);
 StringRef OperationEncodingString(unsigned Encoding);
+StringRef SubOperationEncodingString(unsigned OpEncoding,
+                                     unsigned SubOpEncoding);
 StringRef AttributeEncodingString(unsigned Encoding);
 StringRef DecimalSignString(unsigned Sign);
 StringRef EndianityString(unsigned Endian);
@@ -674,6 +681,8 @@ StringRef RLEString(unsigned RLE);
 /// @{
 unsigned getTag(StringRef TagString);
 unsigned getOperationEncoding(StringRef OperationEncodingString);
+unsigned getSubOperationEncoding(unsigned OpEncoding,
+                                 StringRef SubOperationEncodingString);
 unsigned getVirtuality(StringRef VirtualityString);
 unsigned getLanguage(StringRef LanguageString);
 unsigned getCallingConvention(StringRef LanguageString);

diff  --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFExpression.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFExpression.h
index 6b80dbc038822..00228a32173f1 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFExpression.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFExpression.h
@@ -42,6 +42,9 @@ class DWARFExpression {
       SizeRefAddr = 6,
       SizeBlock = 7, ///< Preceding operand contains block size
       BaseTypeRef = 8,
+      /// The operand is a ULEB128 encoded SubOpcode. This is only valid
+      /// for the first operand of an operation.
+      SizeSubOpLEB = 9,
       WasmLocationArg = 30,
       SignBit = 0x80,
       SignedSize1 = SignBit | Size1,
@@ -83,6 +86,7 @@ class DWARFExpression {
   public:
     const Description &getDescription() const { return Desc; }
     uint8_t getCode() const { return Opcode; }
+    std::optional<unsigned> getSubCode() const;
     uint64_t getNumOperands() const { return Operands.size(); }
     ArrayRef<uint64_t> getRawOperands() const { return Operands; };
     uint64_t getRawOperand(unsigned Idx) const { return Operands[Idx]; }

diff  --git a/llvm/lib/BinaryFormat/Dwarf.cpp b/llvm/lib/BinaryFormat/Dwarf.cpp
index 12e9c7374ce8f..e4e5b5dd8c0e0 100644
--- a/llvm/lib/BinaryFormat/Dwarf.cpp
+++ b/llvm/lib/BinaryFormat/Dwarf.cpp
@@ -172,6 +172,40 @@ unsigned llvm::dwarf::getOperationEncoding(StringRef OperationEncodingString) {
       .Default(0);
 }
 
+static StringRef LlvmUserOperationEncodingString(unsigned Encoding) {
+  switch (Encoding) {
+  default:
+    llvm_unreachable("unhandled DWARF operation with LLVM user op");
+#define HANDLE_DW_OP_LLVM_USEROP(ID, NAME)                                     \
+  case DW_OP_LLVM_##NAME:                                                      \
+    return "DW_OP_LLVM_" #NAME;
+#include "llvm/BinaryFormat/Dwarf.def"
+  }
+}
+
+static unsigned
+getLlvmUserOperationEncoding(StringRef LlvmUserOperationEncodingString) {
+  unsigned E = StringSwitch<unsigned>(LlvmUserOperationEncodingString)
+#define HANDLE_DW_OP_LLVM_USEROP(ID, NAME) .Case(#NAME, DW_OP_LLVM_##NAME)
+#include "llvm/BinaryFormat/Dwarf.def"
+                   .Default(0);
+  assert(E && "unhandled DWARF operation string with LLVM user op");
+  return E;
+}
+
+StringRef llvm::dwarf::SubOperationEncodingString(unsigned OpEncoding,
+                                                  unsigned SubOpEncoding) {
+  assert(OpEncoding == DW_OP_LLVM_user);
+  return LlvmUserOperationEncodingString(SubOpEncoding);
+}
+
+unsigned
+llvm::dwarf::getSubOperationEncoding(unsigned OpEncoding,
+                                     StringRef SubOperationEncodingString) {
+  assert(OpEncoding == DW_OP_LLVM_user);
+  return getLlvmUserOperationEncoding(SubOperationEncodingString);
+}
+
 unsigned llvm::dwarf::OperationVersion(dwarf::LocationAtom Op) {
   switch (Op) {
   default:

diff  --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index f5a58f79ef4d5..f0c13edaf4bd3 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -2613,6 +2613,7 @@ void DwarfDebug::emitDebugLocEntry(ByteStreamer &Streamer,
   for (const auto &Op : Expr) {
     assert(Op.getCode() != dwarf::DW_OP_const_type &&
            "3 operand ops not yet supported");
+    assert(!Op.getSubCode() && "SubOps not yet supported");
     Streamer.emitInt8(Op.getCode(), Comment != End ? *(Comment++) : "");
     Offset++;
     for (unsigned I = 0; I < Op.getDescription().Op.size(); ++I) {

diff  --git a/llvm/lib/DWARFLinker/DWARFLinker.cpp b/llvm/lib/DWARFLinker/DWARFLinker.cpp
index 2b1c6e5ada26d..18b81468fc906 100644
--- a/llvm/lib/DWARFLinker/DWARFLinker.cpp
+++ b/llvm/lib/DWARFLinker/DWARFLinker.cpp
@@ -1169,6 +1169,7 @@ void DWARFLinker::DIECloner::cloneExpression(
       assert(ULEBsize <= 16);
 
       // Copy over the operation.
+      assert(!Op.getSubCode() && "SubOps not yet supported");
       OutputBuffer.push_back(Op.getCode());
       uint64_t RefOffset;
       if (Desc.Op.size() == 1) {

diff  --git a/llvm/lib/DebugInfo/DWARF/DWARFExpression.cpp b/llvm/lib/DebugInfo/DWARF/DWARFExpression.cpp
index 7801170bc6e9e..187d530a00bcf 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFExpression.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFExpression.cpp
@@ -101,6 +101,10 @@ static std::vector<Desc> getOpDescriptions() {
   Descriptions[DW_OP_entry_value] = Desc(Op::Dwarf5, Op::SizeLEB);
   Descriptions[DW_OP_regval_type] =
       Desc(Op::Dwarf5, Op::SizeLEB, Op::BaseTypeRef);
+  // This Description acts as a marker that getSubOpDesc must be called
+  // to fetch the final Description for the operation. Each such final
+  // Description must share the same first SizeSubOpLEB operand.
+  Descriptions[DW_OP_LLVM_user] = Desc(Op::Dwarf5, Op::SizeSubOpLEB);
   return Descriptions;
 }
 
@@ -116,6 +120,23 @@ static Desc getOpDesc(unsigned Opcode) {
   return getDescImpl(Descriptions, Opcode);
 }
 
+static std::vector<Desc> getSubOpDescriptions() {
+  static constexpr unsigned LlvmUserDescriptionsSize = 1
+#define HANDLE_DW_OP_LLVM_USEROP(ID, NAME) +1
+#include "llvm/BinaryFormat/Dwarf.def"
+      ;
+  std::vector<Desc> Descriptions;
+  Descriptions.resize(LlvmUserDescriptionsSize);
+  Descriptions[DW_OP_LLVM_nop] = Desc(Op::Dwarf5, Op::SizeSubOpLEB);
+  return Descriptions;
+}
+
+static Desc getSubOpDesc(unsigned Opcode, unsigned SubOpcode) {
+  assert(Opcode == DW_OP_LLVM_user);
+  static std::vector<Desc> Descriptions = getSubOpDescriptions();
+  return getDescImpl(Descriptions, SubOpcode);
+}
+
 bool DWARFExpression::Operation::extract(DataExtractor Data,
                                          uint8_t AddressSize, uint64_t Offset,
                                          std::optional<DwarfFormat> Format) {
@@ -133,6 +154,15 @@ bool DWARFExpression::Operation::extract(DataExtractor Data,
     unsigned Signed = Size & Operation::SignBit;
 
     switch (Size & ~Operation::SignBit) {
+    case Operation::SizeSubOpLEB:
+      assert(Operand == 0 && "SubOp operand must be the first operand");
+      Operands[Operand] = Data.getULEB128(&Offset);
+      Desc = getSubOpDesc(Opcode, Operands[Operand]);
+      if (Desc.Version == Operation::DwarfNA)
+        return false;
+      assert(Desc.Op[Operand] == Operation::SizeSubOpLEB &&
+             "SizeSubOpLEB Description must begin with SizeSubOpLEB operand");
+      break;
     case Operation::Size1:
       Operands[Operand] = Data.getU8(&Offset);
       if (Signed)
@@ -257,6 +287,12 @@ bool DWARFExpression::prettyPrintRegisterOp(DWARFUnit *U, raw_ostream &OS,
   return false;
 }
 
+std::optional<unsigned> DWARFExpression::Operation::getSubCode() const {
+  if (!Desc.Op.size() || Desc.Op[0] != Operation::SizeSubOpLEB)
+    return std::nullopt;
+  return Operands[0];
+}
+
 bool DWARFExpression::Operation::print(raw_ostream &OS, DIDumpOptions DumpOpts,
                                        const DWARFExpression *Expr,
                                        DWARFUnit *U) const {
@@ -280,7 +316,11 @@ bool DWARFExpression::Operation::print(raw_ostream &OS, DIDumpOptions DumpOpts,
     unsigned Size = Desc.Op[Operand];
     unsigned Signed = Size & Operation::SignBit;
 
-    if (Size == Operation::BaseTypeRef && U) {
+    if (Size == Operation::SizeSubOpLEB) {
+      StringRef SubName = SubOperationEncodingString(Opcode, Operands[Operand]);
+      assert(!SubName.empty() && "DW_OP SubOp has no name!");
+      OS << " " << SubName;
+    } else if (Size == Operation::BaseTypeRef && U) {
       // For DW_OP_convert the operand may be 0 to indicate that conversion to
       // the generic type should be done. The same holds for DW_OP_reinterpret,
       // which is currently not supported.
@@ -450,6 +490,10 @@ static bool printCompactDWARFExpr(
     case dwarf::DW_OP_nop: {
       break;
     }
+    case dwarf::DW_OP_LLVM_user: {
+      assert(Op.getSubCode() && *Op.getSubCode() == dwarf::DW_OP_LLVM_nop);
+      break;
+    }
     default:
       if (Opcode >= dwarf::DW_OP_reg0 && Opcode <= dwarf::DW_OP_reg31) {
         // DW_OP_reg<N>: A register, with the register num implied by the

diff  --git a/llvm/test/tools/llvm-dwarfdump/X86/DW_OP_LLVM_user.s b/llvm/test/tools/llvm-dwarfdump/X86/DW_OP_LLVM_user.s
new file mode 100644
index 0000000000000..bcb0dee0ce778
--- /dev/null
+++ b/llvm/test/tools/llvm-dwarfdump/X86/DW_OP_LLVM_user.s
@@ -0,0 +1,16 @@
+# RUN: llvm-mc %s -filetype=obj -triple=i686-pc-linux -o %t
+# RUN: llvm-dwarfdump -v %t | FileCheck %s
+
+# FIXME: Is there a better approach than using `DW_CFA_expression EAX <op>`?
+
+# CHECK:      .eh_frame contents:
+# CHECK:      FDE
+# CHECK-NEXT: Format: DWARF32
+
+foo:
+ .cfi_startproc
+ # CHECK-NEXT: DW_CFA_expression: EAX <decoding error> e9 00
+ .cfi_escape 0x10, 0x00, 0x02, 0xe9, 0x00
+ # CHECK-NEXT: DW_CFA_expression: EAX DW_OP_LLVM_user DW_OP_LLVM_nop
+ .cfi_escape 0x10, 0x00, 0x02, 0xe9, 0x01
+ .cfi_endproc

diff  --git a/llvm/unittests/DebugInfo/DWARF/DWARFExpressionCompactPrinterTest.cpp b/llvm/unittests/DebugInfo/DWARF/DWARFExpressionCompactPrinterTest.cpp
index 819f7d56a4318..69edbe53c9e10 100644
--- a/llvm/unittests/DebugInfo/DWARF/DWARFExpressionCompactPrinterTest.cpp
+++ b/llvm/unittests/DebugInfo/DWARF/DWARFExpressionCompactPrinterTest.cpp
@@ -129,6 +129,15 @@ TEST_F(DWARFExpressionCompactPrinterTest, Test_OP_nop) {
   TestExprPrinter({DW_OP_nop}, "<stack of size 0, expected 1>");
 }
 
+TEST_F(DWARFExpressionCompactPrinterTest, Test_OP_LLVM_nop) {
+  TestExprPrinter({DW_OP_LLVM_user, DW_OP_LLVM_nop},
+                  "<stack of size 0, expected 1>");
+}
+
 TEST_F(DWARFExpressionCompactPrinterTest, Test_OP_nop_OP_reg) {
   TestExprPrinter({DW_OP_nop, DW_OP_reg0}, "R0");
 }
+
+TEST_F(DWARFExpressionCompactPrinterTest, Test_OP_LLVM_nop_OP_reg) {
+  TestExprPrinter({DW_OP_LLVM_user, DW_OP_LLVM_nop, DW_OP_reg0}, "R0");
+}


        


More information about the llvm-commits mailing list