[llvm-branch-commits] [llvm] [llvm-debuginfo-analyzer] Add support for LLVM IR format. (PR #135440)
Jeremy Morse via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Thu May 1 07:59:01 PDT 2025
================
@@ -0,0 +1,2348 @@
+//===-- LVIRReader.cpp ----------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the LVIRReader class.
+// It supports LLVM text IR and bitcode format.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/LogicalView/Readers/LVIRReader.h"
+#include "llvm/CodeGen/DebugHandlerBase.h"
+#include "llvm/DebugInfo/LogicalView/Core/LVLine.h"
+#include "llvm/DebugInfo/LogicalView/Core/LVScope.h"
+#include "llvm/DebugInfo/LogicalView/Core/LVSymbol.h"
+#include "llvm/DebugInfo/LogicalView/Core/LVType.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IRReader/IRReader.h"
+#include "llvm/Object/Error.h"
+#include "llvm/Object/IRObjectFile.h"
+#include "llvm/Support/FormatAdapters.h"
+#include "llvm/Support/FormatVariadic.h"
+#include "llvm/Support/SourceMgr.h"
+
+using namespace llvm;
+using namespace llvm::object;
+using namespace llvm::logicalview;
+
+#define DEBUG_TYPE "IRReader"
+
+// Extra debug traces. Default is false
+#define DEBUG_ALL
+
+// These flavours of DINodes are not handled:
+// DW_TAG_APPLE_property = 19896
+// DW_TAG_atomic_type = 71
+// DW_TAG_common_block = 26
+// DW_TAG_file_type = 41
+// DW_TAG_friend = 42
+// DW_TAG_generic_subrange = 69
+// DW_TAG_immutable_type = 75
+// DW_TAG_module = 30
+
+// Create a logical element and setup the following information:
+// - Name, DWARF tag, line
+// - Collect any file information
+LVElement *LVIRReader::constructElement(const DINode *DN) {
+ dwarf::Tag Tag = DN->getTag();
+ LVElement *Element = createElement(Tag);
+ if (Element) {
+ Element->setTag(Tag);
+ addMD(DN, Element);
+
+ StringRef Name = getMDName(DN);
+ if (!Name.empty())
+ Element->setName(Name);
+
+ // Record any file information.
+ if (const DIFile *File = getMDFile(DN))
+ getOrCreateSourceID(File);
+ }
+
+ return Element;
+}
+
+void LVIRReader::mapFortranLanguage(unsigned DWLang) {
+ switch (DWLang) {
+ case dwarf::DW_LANG_Fortran77:
+ case dwarf::DW_LANG_Fortran90:
+ case dwarf::DW_LANG_Fortran95:
+ case dwarf::DW_LANG_Fortran03:
+ case dwarf::DW_LANG_Fortran08:
+ case dwarf::DW_LANG_Fortran18:
+ LanguageIsFortran = true;
+ break;
+ default:
+ LanguageIsFortran = false;
+ }
+}
+
+// Looking at IR generated with the '-gdwarf -gsplit-dwarf=split' the only
+// difference is setting the 'DICompileUnit::splitDebugFilename' to the
+// name of the split filename: "xxx.dwo".
+bool LVIRReader::includeMinimalInlineScopes() const {
+ return getCUNode()->getEmissionKind() == DICompileUnit::LineTablesOnly;
+}
+
+// For the given 'DIFile' generate an index 1-based to indicate the
+// source file where the logical element is declared.
+// In DWARF v4, the files are 1-indexed.
+// In DWARF v5, the files are 0-indexed.
+// The IR reader expects the indexes as 1-indexed.
+// Each compile unit, keeps track of the last assigned index.
+size_t LVIRReader::getOrCreateSourceID(const DIFile *File) {
+ if (!File)
+ return 0;
+
+#ifdef DEBUG_ALL
+ LLVM_DEBUG({
+ dbgs() << "\n[getOrCreateSourceID] DIFile\n";
+ File->dump();
+ });
+#endif
+
+ addMD(File, CompileUnit);
+
+ LLVM_DEBUG({
+ dbgs() << "Directory: '" << File->getDirectory() << "'\n";
+ dbgs() << "Filename: '" << File->getFilename() << "'\n";
+ });
+ size_t FileIndex = 0;
+ LVCompileUnitFiles::iterator Iter = CompileUnitFiles.find(File);
+ if (Iter == CompileUnitFiles.cend()) {
+ FileIndex = getFileIndex(CompileUnit);
+ std::string Directory(File->getDirectory());
+ if (Directory.empty())
+ Directory = std::string(CompileUnit->getCompilationDirectory());
+
+ std::string FullName;
+ raw_string_ostream Out(FullName);
+ Out << Directory << "/" << llvm::sys::path::filename(File->getFilename());
+ CompileUnit->addFilename(transformPath(FullName));
+ CompileUnitFiles.emplace(File, ++FileIndex);
+ updateFileIndex(CompileUnit, FileIndex);
+ } else {
+ FileIndex = Iter->second;
+ }
+
+ LLVM_DEBUG({ dbgs() << "FileIndex: " << FileIndex << "\n"; });
+ return FileIndex;
+}
+
+void LVIRReader::addSourceLine(LVElement *Element, unsigned Line,
+ const DIFile *File) {
+ if (Line == 0)
+ return;
+
+ // After the scopes are created, the generic reader traverses the 'Children'
+ // and perform additional setting tasks (resolve types names, references,
+ // etc.). One of those tasks is select the correct string pool index based on
+ // the commmand line options: --attribute=filename or --attribute=pathname.
+ // As the 'Children' do not include logical lines, do that selection now,
+ // by calling 'setFilename' if the logical element is a line.
+ size_t FileID = getOrCreateSourceID(File);
+ if (Element->getIsLine())
+ Element->setFilename(CompileUnit->getFilename(FileID));
+ else
+ Element->setFilenameIndex(FileID);
+ Element->setLineNumber(Line);
+
+ LLVM_DEBUG({
+ dbgs() << "\n[addSourceLine]\n";
+ File->dump();
+ dbgs() << "FileIndex: " << Element->getFilenameIndex() << ", ";
+ dbgs() << "ID: " << Element->getID() << ", ";
+ dbgs() << "Kind: " << Element->kind() << ", ";
+ dbgs() << "Line: " << Element->getLineNumber() << ", ";
+ dbgs() << "Name: " << Element->getName() << "\n";
+ });
+}
+
+void LVIRReader::addSourceLine(LVElement *Element, const DIGlobalVariable *G) {
+ assert(G);
+ addSourceLine(Element, G->getLine(), G->getFile());
+}
+
+void LVIRReader::addSourceLine(LVElement *Element, const DIImportedEntity *IE) {
+ assert(IE);
+ addSourceLine(Element, IE->getLine(), IE->getFile());
+}
+
+void LVIRReader::addSourceLine(LVElement *Element, const DILabel *L) {
+ assert(L);
+ addSourceLine(Element, L->getLine(), L->getFile());
+}
+
+void LVIRReader::addSourceLine(LVElement *Element, const DILocalVariable *V) {
+ assert(V);
+ addSourceLine(Element, V->getLine(), V->getFile());
+}
+
+void LVIRReader::addSourceLine(LVElement *Element, const DILocation *DL) {
+ assert(DL);
+ addSourceLine(Element, DL->getLine(), DL->getFile());
+}
+
+void LVIRReader::addSourceLine(LVElement *Element, const DIObjCProperty *Ty) {
+ assert(Ty);
+ addSourceLine(Element, Ty->getLine(), Ty->getFile());
+}
+
+void LVIRReader::addSourceLine(LVElement *Element, const DISubprogram *SP) {
+ assert(SP);
+ addSourceLine(Element, SP->getLine(), SP->getFile());
+}
+
+void LVIRReader::addSourceLine(LVElement *Element, const DIType *Ty) {
+ assert(Ty);
+ addSourceLine(Element, Ty->getLine(), Ty->getFile());
+}
+
+void LVIRReader::addConstantValue(LVElement *Element,
+ const DIExpression *DIExpr) {
+ std::optional<DIExpression::SignedOrUnsignedConstant> Constant =
+ DIExpr->isConstant();
+ std::stringstream Stream;
+ if (DIExpression::SignedOrUnsignedConstant::SignedConstant == Constant) {
+ int64_t Value = DIExpr->getElement(1);
+ if (Value < 0) {
+ Stream << "-";
+ Value = std::abs(Value);
+ }
+ Stream << hexString(Value, 2);
+ Element->setValue(Stream.str());
+ } else if (DIExpression::SignedOrUnsignedConstant::UnsignedConstant ==
+ Constant) {
+ uint64_t Value = DIExpr->getElement(1);
+ Stream << hexString(Value, 2);
+ Element->setValue(Stream.str());
+ }
+}
+
+void LVIRReader::addConstantValue(LVElement *Element, const ConstantInt *CI,
+ const DIType *Ty) {
+ addConstantValue(Element, CI->getValue(), Ty);
+}
+
+void LVIRReader::addConstantValue(LVElement *Element, uint64_t Val,
+ const DIType *Ty) {
+ addConstantValue(Element, DebugHandlerBase::isUnsignedDIType(Ty), Val);
+}
+
+void LVIRReader::addConstantValue(LVElement *Element, bool Unsigned,
+ uint64_t Val) {
+ addConstantValue(Element, llvm::APInt(64, Val, Unsigned), Unsigned);
+}
+
+void LVIRReader::addConstantValue(LVElement *Element, const APInt &Val,
+ const DIType *Ty) {
+ addConstantValue(Element, Val, DebugHandlerBase::isUnsignedDIType(Ty));
+}
+
+void LVIRReader::addConstantValue(LVElement *Element, const APInt &Value,
+ bool Unsigned) {
+ SmallString<128> StringValue;
+ Value.toString(StringValue, /*Radix=*/16, /*Signed=*/!Unsigned,
+ /*formatAsCLiteral=*/true, /*UpperCase=*/false,
+ /*InsertSeparators=*/false);
+ Element->setValue(StringValue.str());
+}
+
+void LVIRReader::addString(LVElement *Element, StringRef String) {
+ Element->setValue(String);
+}
+
+void LVIRReader::processLocationGaps() {
+ if (options().getAttributeAnyLocation())
+ for (LVSymbol *Symbol : SymbolsWithLocations)
+ Symbol->fillLocationGaps();
+}
+
+void LVIRReader::processScopes() {
+ // - Calculate their location ranges.
+ // - Assign unique offset to the logical scopes, symbols and types,
+ // as the code the handles public names, expects them to have one.
+ // Use an arbitrary increment of 4.
+ // - Resolve any line pattern match.
+ LVOffset Offset = 0;
+ auto SetOffset = [&](LVElement *Element) {
+ Element->setOffset(Offset);
+ Offset += OffsetIncrease;
+ };
+
+ std::function<void(LVScope *)> TraverseScope = [&](LVScope *Current) {
+ LVOffset Lower = Offset;
+ SetOffset(Current);
+ constructRange(Current);
+
+ if (const LVScopes *Scopes = Current->getScopes())
+ for (LVScope *Scope : *Scopes)
+ TraverseScope(Scope);
+
+ // Set an arbitrary 'Offset' for symbols and types.
+ if (const LVSymbols *Symbols = Current->getSymbols())
+ for (LVSymbol *Symbol : *Symbols)
+ SetOffset(Symbol);
+ if (const LVTypes *Types = Current->getTypes())
+ for (LVType *Type : *Types)
+ SetOffset(Type);
+
+ // Resolve any given pattern.
+ if (const LVLines *Lines = Current->getLines())
+ for (LVLine *Line : *Lines)
+ patterns().resolvePatternMatch(Line);
+
+ // Calculate contributions to the debug info.
+ LVOffset Upper = Offset;
+ if (options().getPrintSizes())
+ CompileUnit->addSize(Current, Lower, Upper);
+ };
+
+ TraverseScope(CompileUnit);
+}
+
+std::string LVIRReader::getRegisterName(LVSmall Opcode,
+ ArrayRef<uint64_t> Operands) {
+ // At this point we are operating on a logical view item, with no access
+ // to the underlying DWARF data used by LLVM.
+ // We do not support DW_OP_regval_type here.
+ if (Opcode == dwarf::DW_OP_regval_type)
+ return {};
+
+ if (Opcode == dwarf::DW_OP_regx || Opcode == dwarf::DW_OP_bregx) {
+ // If the following trace is enabled, its output will be intermixed
+ // with the logical view output, causing some confusion.
+ // Leaving it here, just for any specific needs.
+ // LLVM_DEBUG({
+ // dbgs() << "Printing Value: " << Operands[0] << " - "
+ // << DbgValueRanges->getVariableName(Operands[0]) << "\n";
+ // });
+ return DbgValueRanges->getVariableName(Operands[0]);
----------------
jmorse wrote:
What's the correspondence between `Operands` and the index to variable names -- does the operand identify an LLVM-IR instruction by number or something?
https://github.com/llvm/llvm-project/pull/135440
More information about the llvm-branch-commits
mailing list