[llvm-branch-commits] [llvm] [llvm-debuginfo-analyzer] Add support for LLVM IR format. (PR #135440)

Jeremy Morse via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Thu May 1 07:59:01 PDT 2025


================
@@ -0,0 +1,2348 @@
+//===-- LVIRReader.cpp ----------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the LVIRReader class.
+// It supports LLVM text IR and bitcode format.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/LogicalView/Readers/LVIRReader.h"
+#include "llvm/CodeGen/DebugHandlerBase.h"
+#include "llvm/DebugInfo/LogicalView/Core/LVLine.h"
+#include "llvm/DebugInfo/LogicalView/Core/LVScope.h"
+#include "llvm/DebugInfo/LogicalView/Core/LVSymbol.h"
+#include "llvm/DebugInfo/LogicalView/Core/LVType.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IRReader/IRReader.h"
+#include "llvm/Object/Error.h"
+#include "llvm/Object/IRObjectFile.h"
+#include "llvm/Support/FormatAdapters.h"
+#include "llvm/Support/FormatVariadic.h"
+#include "llvm/Support/SourceMgr.h"
+
+using namespace llvm;
+using namespace llvm::object;
+using namespace llvm::logicalview;
+
+#define DEBUG_TYPE "IRReader"
+
+// Extra debug traces. Default is false
+#define DEBUG_ALL
+
+// These flavours of DINodes are not handled:
+//   DW_TAG_APPLE_property   = 19896
+//   DW_TAG_atomic_type      = 71
+//   DW_TAG_common_block     = 26
+//   DW_TAG_file_type        = 41
+//   DW_TAG_friend           = 42
+//   DW_TAG_generic_subrange = 69
+//   DW_TAG_immutable_type   = 75
+//   DW_TAG_module           = 30
+
+// Create a logical element and setup the following information:
+// - Name, DWARF tag, line
+// - Collect any file information
+LVElement *LVIRReader::constructElement(const DINode *DN) {
+  dwarf::Tag Tag = DN->getTag();
+  LVElement *Element = createElement(Tag);
+  if (Element) {
+    Element->setTag(Tag);
+    addMD(DN, Element);
+
+    StringRef Name = getMDName(DN);
+    if (!Name.empty())
+      Element->setName(Name);
+
+    // Record any file information.
+    if (const DIFile *File = getMDFile(DN))
+      getOrCreateSourceID(File);
+  }
+
+  return Element;
+}
+
+void LVIRReader::mapFortranLanguage(unsigned DWLang) {
+  switch (DWLang) {
+  case dwarf::DW_LANG_Fortran77:
+  case dwarf::DW_LANG_Fortran90:
+  case dwarf::DW_LANG_Fortran95:
+  case dwarf::DW_LANG_Fortran03:
+  case dwarf::DW_LANG_Fortran08:
+  case dwarf::DW_LANG_Fortran18:
+    LanguageIsFortran = true;
+    break;
+  default:
+    LanguageIsFortran = false;
+  }
+}
+
+// Looking at IR generated with the '-gdwarf -gsplit-dwarf=split' the only
+// difference is setting the 'DICompileUnit::splitDebugFilename' to the
+// name of the split filename: "xxx.dwo".
+bool LVIRReader::includeMinimalInlineScopes() const {
+  return getCUNode()->getEmissionKind() == DICompileUnit::LineTablesOnly;
+}
+
+// For the given 'DIFile' generate an index 1-based to indicate the
+// source file where the logical element is declared.
+// In DWARF v4, the files are 1-indexed.
+// In DWARF v5, the files are 0-indexed.
+// The IR reader expects the indexes as 1-indexed.
+// Each compile unit, keeps track of the last assigned index.
+size_t LVIRReader::getOrCreateSourceID(const DIFile *File) {
+  if (!File)
+    return 0;
+
+#ifdef DEBUG_ALL
+  LLVM_DEBUG({
+    dbgs() << "\n[getOrCreateSourceID] DIFile\n";
+    File->dump();
+  });
+#endif
+
+  addMD(File, CompileUnit);
+
+  LLVM_DEBUG({
+    dbgs() << "Directory: '" << File->getDirectory() << "'\n";
+    dbgs() << "Filename:  '" << File->getFilename() << "'\n";
+  });
+  size_t FileIndex = 0;
+  LVCompileUnitFiles::iterator Iter = CompileUnitFiles.find(File);
+  if (Iter == CompileUnitFiles.cend()) {
+    FileIndex = getFileIndex(CompileUnit);
+    std::string Directory(File->getDirectory());
+    if (Directory.empty())
+      Directory = std::string(CompileUnit->getCompilationDirectory());
+
+    std::string FullName;
+    raw_string_ostream Out(FullName);
+    Out << Directory << "/" << llvm::sys::path::filename(File->getFilename());
+    CompileUnit->addFilename(transformPath(FullName));
+    CompileUnitFiles.emplace(File, ++FileIndex);
+    updateFileIndex(CompileUnit, FileIndex);
+  } else {
+    FileIndex = Iter->second;
+  }
+
+  LLVM_DEBUG({ dbgs() << "FileIndex: " << FileIndex << "\n"; });
+  return FileIndex;
+}
+
+void LVIRReader::addSourceLine(LVElement *Element, unsigned Line,
+                               const DIFile *File) {
+  if (Line == 0)
+    return;
+
+  // After the scopes are created, the generic reader traverses the 'Children'
+  // and perform additional setting tasks (resolve types names, references,
+  // etc.). One of those tasks is select the correct string pool index based on
+  // the commmand line options: --attribute=filename or --attribute=pathname.
+  // As the 'Children' do not include logical lines, do that selection now,
+  // by calling 'setFilename' if the logical element is a line.
+  size_t FileID = getOrCreateSourceID(File);
+  if (Element->getIsLine())
+    Element->setFilename(CompileUnit->getFilename(FileID));
+  else
+    Element->setFilenameIndex(FileID);
+  Element->setLineNumber(Line);
+
+  LLVM_DEBUG({
+    dbgs() << "\n[addSourceLine]\n";
+    File->dump();
+    dbgs() << "FileIndex: " << Element->getFilenameIndex() << ", ";
+    dbgs() << "ID:   " << Element->getID() << ", ";
+    dbgs() << "Kind: " << Element->kind() << ", ";
+    dbgs() << "Line: " << Element->getLineNumber() << ", ";
+    dbgs() << "Name: " << Element->getName() << "\n";
+  });
+}
+
+void LVIRReader::addSourceLine(LVElement *Element, const DIGlobalVariable *G) {
+  assert(G);
+  addSourceLine(Element, G->getLine(), G->getFile());
+}
+
+void LVIRReader::addSourceLine(LVElement *Element, const DIImportedEntity *IE) {
+  assert(IE);
+  addSourceLine(Element, IE->getLine(), IE->getFile());
+}
+
+void LVIRReader::addSourceLine(LVElement *Element, const DILabel *L) {
+  assert(L);
+  addSourceLine(Element, L->getLine(), L->getFile());
+}
+
+void LVIRReader::addSourceLine(LVElement *Element, const DILocalVariable *V) {
+  assert(V);
+  addSourceLine(Element, V->getLine(), V->getFile());
+}
+
+void LVIRReader::addSourceLine(LVElement *Element, const DILocation *DL) {
+  assert(DL);
+  addSourceLine(Element, DL->getLine(), DL->getFile());
+}
+
+void LVIRReader::addSourceLine(LVElement *Element, const DIObjCProperty *Ty) {
+  assert(Ty);
+  addSourceLine(Element, Ty->getLine(), Ty->getFile());
+}
+
+void LVIRReader::addSourceLine(LVElement *Element, const DISubprogram *SP) {
+  assert(SP);
+  addSourceLine(Element, SP->getLine(), SP->getFile());
+}
+
+void LVIRReader::addSourceLine(LVElement *Element, const DIType *Ty) {
+  assert(Ty);
+  addSourceLine(Element, Ty->getLine(), Ty->getFile());
+}
+
+void LVIRReader::addConstantValue(LVElement *Element,
+                                  const DIExpression *DIExpr) {
+  std::optional<DIExpression::SignedOrUnsignedConstant> Constant =
+      DIExpr->isConstant();
+  std::stringstream Stream;
+  if (DIExpression::SignedOrUnsignedConstant::SignedConstant == Constant) {
+    int64_t Value = DIExpr->getElement(1);
+    if (Value < 0) {
+      Stream << "-";
+      Value = std::abs(Value);
+    }
+    Stream << hexString(Value, 2);
+    Element->setValue(Stream.str());
+  } else if (DIExpression::SignedOrUnsignedConstant::UnsignedConstant ==
+             Constant) {
+    uint64_t Value = DIExpr->getElement(1);
+    Stream << hexString(Value, 2);
+    Element->setValue(Stream.str());
+  }
+}
+
+void LVIRReader::addConstantValue(LVElement *Element, const ConstantInt *CI,
+                                  const DIType *Ty) {
+  addConstantValue(Element, CI->getValue(), Ty);
+}
+
+void LVIRReader::addConstantValue(LVElement *Element, uint64_t Val,
+                                  const DIType *Ty) {
+  addConstantValue(Element, DebugHandlerBase::isUnsignedDIType(Ty), Val);
+}
+
+void LVIRReader::addConstantValue(LVElement *Element, bool Unsigned,
+                                  uint64_t Val) {
+  addConstantValue(Element, llvm::APInt(64, Val, Unsigned), Unsigned);
+}
+
+void LVIRReader::addConstantValue(LVElement *Element, const APInt &Val,
+                                  const DIType *Ty) {
+  addConstantValue(Element, Val, DebugHandlerBase::isUnsignedDIType(Ty));
+}
+
+void LVIRReader::addConstantValue(LVElement *Element, const APInt &Value,
+                                  bool Unsigned) {
+  SmallString<128> StringValue;
+  Value.toString(StringValue, /*Radix=*/16, /*Signed=*/!Unsigned,
+                 /*formatAsCLiteral=*/true, /*UpperCase=*/false,
+                 /*InsertSeparators=*/false);
+  Element->setValue(StringValue.str());
+}
+
+void LVIRReader::addString(LVElement *Element, StringRef String) {
+  Element->setValue(String);
+}
+
+void LVIRReader::processLocationGaps() {
+  if (options().getAttributeAnyLocation())
+    for (LVSymbol *Symbol : SymbolsWithLocations)
+      Symbol->fillLocationGaps();
+}
+
+void LVIRReader::processScopes() {
+  // - Calculate their location ranges.
+  // - Assign unique offset to the logical scopes, symbols and types,
+  //   as the code the handles public names, expects them to have one.
+  //   Use an arbitrary increment of 4.
+  // - Resolve any line pattern match.
+  LVOffset Offset = 0;
+  auto SetOffset = [&](LVElement *Element) {
+    Element->setOffset(Offset);
+    Offset += OffsetIncrease;
+  };
+
+  std::function<void(LVScope *)> TraverseScope = [&](LVScope *Current) {
+    LVOffset Lower = Offset;
+    SetOffset(Current);
+    constructRange(Current);
+
+    if (const LVScopes *Scopes = Current->getScopes())
+      for (LVScope *Scope : *Scopes)
+        TraverseScope(Scope);
+
+    // Set an arbitrary 'Offset' for symbols and types.
+    if (const LVSymbols *Symbols = Current->getSymbols())
+      for (LVSymbol *Symbol : *Symbols)
+        SetOffset(Symbol);
+    if (const LVTypes *Types = Current->getTypes())
+      for (LVType *Type : *Types)
+        SetOffset(Type);
+
+    // Resolve any given pattern.
+    if (const LVLines *Lines = Current->getLines())
+      for (LVLine *Line : *Lines)
+        patterns().resolvePatternMatch(Line);
+
+    // Calculate contributions to the debug info.
+    LVOffset Upper = Offset;
+    if (options().getPrintSizes())
+      CompileUnit->addSize(Current, Lower, Upper);
+  };
+
+  TraverseScope(CompileUnit);
+}
+
+std::string LVIRReader::getRegisterName(LVSmall Opcode,
+                                        ArrayRef<uint64_t> Operands) {
+  // At this point we are operating on a logical view item, with no access
+  // to the underlying DWARF data used by LLVM.
+  // We do not support DW_OP_regval_type here.
+  if (Opcode == dwarf::DW_OP_regval_type)
+    return {};
+
+  if (Opcode == dwarf::DW_OP_regx || Opcode == dwarf::DW_OP_bregx) {
+    // If the following trace is enabled, its output will be intermixed
+    // with the logical view output, causing some confusion.
+    // Leaving it here, just for any specific needs.
+    // LLVM_DEBUG({
+    //   dbgs() << "Printing Value: " << Operands[0] << " - "
+    //          << DbgValueRanges->getVariableName(Operands[0]) << "\n";
+    // });
+    return DbgValueRanges->getVariableName(Operands[0]);
----------------
jmorse wrote:

What's the correspondence between `Operands` and the index to variable names -- does the operand identify an LLVM-IR instruction by number or something?

https://github.com/llvm/llvm-project/pull/135440


More information about the llvm-branch-commits mailing list