[llvm] [RemoveDIs] Add flag to preserve the debug info format of input IR (PR #87379)

Stephen Tozer via llvm-commits llvm-commits at lists.llvm.org
Fri Apr 5 00:51:23 PDT 2024


https://github.com/SLTozer updated https://github.com/llvm/llvm-project/pull/87379

>From 75fc035d915d8fc34183b8a812596ebb0a71c7ba Mon Sep 17 00:00:00 2001
From: Stephen Tozer <stephen.tozer at sony.com>
Date: Thu, 28 Mar 2024 20:34:54 +0000
Subject: [PATCH 1/3] [RemoveDIs] Add flag to preserve the debug info format of
 input IR

This patch adds a new flag: --preserve-input-debuginfo-format

This flag instructs the tool to not convert the debug info format
(intrinsics/records), but to instead determine the format of the input IR
and override the other format-determining flags so that we process and
output the file in the same format that we received it in. This flag is
turned off by llvm-link, llvm-lto, and llvm-lto2, and should be turned off
by any other tool that expects to parse multiple IR files.

The motivation for this flag is to allow us to use debug records by default
in all LLVM tools except for those that have an interest in not converting
the debug info format - verify-uselistorder and llvm-reduce, and any
downstream tools that seek to test or mutate IR as-is, without applying
extraneous modifications to the input.

The implementation of this flag may not be to everyone's liking - setting
it to "true" directly modifies the value of the flags:
  --experimental-debuginfo-iterators
  --write-experimental-debuginfo
  --write-experimental-debuginfo-iterators-to-bitcode
The reason for this approach is that when we pass this new flag, we're
intending to override any effect that those flags might actually have; this
could be done by adding a check at every site where those flags are used,
but I'm planning to rewrite or remove those flags soon, so this method
avoids updating the same sets of code twice without a functional change; if
modifying the flag values is a problem though, then this can be rewritten.
---
 llvm/include/llvm/AsmParser/LLParser.h        |  1 +
 llvm/include/llvm/IR/AutoUpgrade.h            |  3 +-
 llvm/include/llvm/IR/BasicBlock.h             |  6 +-
 llvm/include/llvm/IR/Function.h               |  6 +-
 llvm/include/llvm/IR/Module.h                 | 14 ++--
 llvm/lib/AsmParser/LLParser.cpp               | 27 +++++--
 llvm/lib/Bitcode/Reader/BitcodeReader.cpp     | 70 +++++++++++++++----
 llvm/lib/IR/AutoUpgrade.cpp                   | 12 ++--
 llvm/lib/IR/BasicBlock.cpp                    | 22 ++++--
 llvm/lib/IR/Function.cpp                      | 14 ++--
 llvm/test/Bitcode/dbg-record-roundtrip.ll     | 10 +++
 .../roundtrip-non-instruction-debug-info.ll   |  8 +++
 llvm/tools/llvm-link/llvm-link.cpp            |  5 ++
 llvm/tools/llvm-lto/llvm-lto.cpp              |  5 ++
 llvm/tools/llvm-lto2/llvm-lto2.cpp            |  5 ++
 15 files changed, 157 insertions(+), 51 deletions(-)

diff --git a/llvm/include/llvm/AsmParser/LLParser.h b/llvm/include/llvm/AsmParser/LLParser.h
index 8ebd0d3409c899..b4e971fea1a139 100644
--- a/llvm/include/llvm/AsmParser/LLParser.h
+++ b/llvm/include/llvm/AsmParser/LLParser.h
@@ -335,6 +335,7 @@ namespace llvm {
 
     // Top-Level Entities
     bool parseTopLevelEntities();
+    bool finalizeDebugInfoFormat(Module *M);
     void dropUnknownMetadataReferences();
     bool validateEndOfModule(bool UpgradeDebugInfo);
     bool validateEndOfIndex();
diff --git a/llvm/include/llvm/IR/AutoUpgrade.h b/llvm/include/llvm/IR/AutoUpgrade.h
index 152f781ffa9b30..97c3e4d7589d7b 100644
--- a/llvm/include/llvm/IR/AutoUpgrade.h
+++ b/llvm/include/llvm/IR/AutoUpgrade.h
@@ -36,7 +36,8 @@ namespace llvm {
   /// for upgrading, and returns true if it requires upgrading. It may return
   /// null in NewFn if the all calls to the original intrinsic function
   /// should be transformed to non-function-call instructions.
-  bool UpgradeIntrinsicFunction(Function *F, Function *&NewFn);
+  bool UpgradeIntrinsicFunction(Function *F, Function *&NewFn,
+                                bool CanUpgradeDebugIntrinsicsToRecords = true);
 
   /// This is the complement to the above, replacing a specific call to an
   /// intrinsic function with a call to the specified new function.
diff --git a/llvm/include/llvm/IR/BasicBlock.h b/llvm/include/llvm/IR/BasicBlock.h
index 0eea4cdccca5bb..f14d5d92da72de 100644
--- a/llvm/include/llvm/IR/BasicBlock.h
+++ b/llvm/include/llvm/IR/BasicBlock.h
@@ -81,17 +81,17 @@ class BasicBlock final : public Value, // Basic blocks are data objects also
   /// intrinsics into DbgMarkers / DbgRecords. Deletes all dbg.values in
   /// the process and sets IsNewDbgInfoFormat = true. Only takes effect if
   /// the UseNewDbgInfoFormat LLVM command line option is given.
-  void convertToNewDbgValues();
+  void convertToNewDbgValues(bool UpdateFlagOnly = false);
 
   /// Convert variable location debugging information stored in DbgMarkers and
   /// DbgRecords into the dbg.value intrinsic representation. Sets
   /// IsNewDbgInfoFormat = false.
-  void convertFromNewDbgValues();
+  void convertFromNewDbgValues(bool UpdateFlagOnly = false);
 
   /// Ensure the block is in "old" dbg.value format (\p NewFlag == false) or
   /// in the new format (\p NewFlag == true), converting to the desired format
   /// if necessary.
-  void setIsNewDbgInfoFormat(bool NewFlag);
+  void setIsNewDbgInfoFormat(bool NewFlag, bool UpdateFlagOnly = false);
 
   /// Record that the collection of DbgRecords in \p M "trails" after the last
   /// instruction of this block. These are equivalent to dbg.value intrinsics
diff --git a/llvm/include/llvm/IR/Function.h b/llvm/include/llvm/IR/Function.h
index d96d506a9b05d0..8c881f38748fdc 100644
--- a/llvm/include/llvm/IR/Function.h
+++ b/llvm/include/llvm/IR/Function.h
@@ -114,12 +114,12 @@ class LLVM_EXTERNAL_VISIBILITY Function : public GlobalObject,
   }
 
   /// \see BasicBlock::convertToNewDbgValues.
-  void convertToNewDbgValues();
+  void convertToNewDbgValues(bool UpdateFlagOnly = false);
 
   /// \see BasicBlock::convertFromNewDbgValues.
-  void convertFromNewDbgValues();
+  void convertFromNewDbgValues(bool UpdateFlagOnly = false);
 
-  void setIsNewDbgInfoFormat(bool NewVal);
+  void setIsNewDbgInfoFormat(bool NewVal, bool UpdateFlagOnly = false);
 
 private:
   friend class TargetLibraryInfoImpl;
diff --git a/llvm/include/llvm/IR/Module.h b/llvm/include/llvm/IR/Module.h
index bb2e667ef6f410..bc3986140f0b9a 100644
--- a/llvm/include/llvm/IR/Module.h
+++ b/llvm/include/llvm/IR/Module.h
@@ -224,26 +224,26 @@ class LLVM_EXTERNAL_VISIBILITY Module {
   void removeDebugIntrinsicDeclarations();
 
   /// \see BasicBlock::convertToNewDbgValues.
-  void convertToNewDbgValues() {
+  void convertToNewDbgValues(bool UpdateFlagOnly = false) {
     for (auto &F : *this) {
-      F.convertToNewDbgValues();
+      F.convertToNewDbgValues(UpdateFlagOnly);
     }
     IsNewDbgInfoFormat = true;
   }
 
   /// \see BasicBlock::convertFromNewDbgValues.
-  void convertFromNewDbgValues() {
+  void convertFromNewDbgValues(bool UpdateFlagOnly = false) {
     for (auto &F : *this) {
-      F.convertFromNewDbgValues();
+      F.convertFromNewDbgValues(UpdateFlagOnly);
     }
     IsNewDbgInfoFormat = false;
   }
 
-  void setIsNewDbgInfoFormat(bool UseNewFormat) {
+  void setIsNewDbgInfoFormat(bool UseNewFormat, bool UpdateFlagOnly = false) {
     if (UseNewFormat && !IsNewDbgInfoFormat)
-      convertToNewDbgValues();
+      convertToNewDbgValues(UpdateFlagOnly);
     else if (!UseNewFormat && IsNewDbgInfoFormat)
-      convertFromNewDbgValues();
+      convertFromNewDbgValues(UpdateFlagOnly);
   }
 
   /// The Module constructor. Note that there is no default constructor. You
diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp
index f0be021668afa7..70a6ef864f5656 100644
--- a/llvm/lib/AsmParser/LLParser.cpp
+++ b/llvm/lib/AsmParser/LLParser.cpp
@@ -63,6 +63,9 @@ static cl::opt<bool> AllowIncompleteIR(
         "metadata will be dropped)"));
 
 extern llvm::cl::opt<bool> UseNewDbgInfoFormat;
+extern cl::opt<cl::boolOrDefault> PreserveInputDbgFormat;
+extern bool WriteNewDbgInfoFormatToBitcode;
+extern cl::opt<bool> WriteNewDbgInfoFormat;
 
 static std::string getTypeString(Type *T) {
   std::string Result;
@@ -71,12 +74,20 @@ static std::string getTypeString(Type *T) {
   return Tmp.str();
 }
 
-// Currently, we should always process modules in the old debug info format by
-// default regardless of the module's format in IR; convert it to the old format
-// here.
-bool finalizeDebugInfoFormat(Module *M) {
-  if (M)
+// Whatever debug info format we parsed, we should convert to the expected debug
+// info format immediately afterwards.
+bool LLParser::finalizeDebugInfoFormat(Module *M) {
+  // We should have already returned an error if we observed both intrinsics and
+  // records in this IR.
+  assert(!(SeenNewDbgInfoFormat && SeenOldDbgInfoFormat) &&
+         "Mixed debug intrinsics/records seen without a parsing error?");
+  if (PreserveInputDbgFormat == cl::boolOrDefault::BOU_TRUE) {
+    UseNewDbgInfoFormat = SeenNewDbgInfoFormat;
+    WriteNewDbgInfoFormatToBitcode = SeenNewDbgInfoFormat;
+    WriteNewDbgInfoFormat = SeenNewDbgInfoFormat;
+  } else if (M) {
     M->setIsNewDbgInfoFormat(false);
+  }
   return false;
 }
 
@@ -6507,10 +6518,10 @@ bool LLParser::parseBasicBlock(PerFunctionState &PFS) {
       if (SeenOldDbgInfoFormat)
         return error(Lex.getLoc(), "debug record should not appear in a module "
                                    "containing debug info intrinsics");
+      if (!SeenNewDbgInfoFormat)
+        M->setIsNewDbgInfoFormat(true, true);
       SeenNewDbgInfoFormat = true;
       Lex.Lex();
-      if (!M->IsNewDbgInfoFormat)
-        M->convertToNewDbgValues();
 
       DbgRecord *DR;
       if (parseDebugRecord(DR, PFS))
@@ -7912,6 +7923,8 @@ bool LLParser::parseCall(Instruction *&Inst, PerFunctionState &PFS,
       return error(CallLoc, "llvm.dbg intrinsic should not appear in a module "
                             "using non-intrinsic debug info");
     }
+    if (!SeenOldDbgInfoFormat)
+      M->setIsNewDbgInfoFormat(false, true);
     SeenOldDbgInfoFormat = true;
   }
   CI->setAttributes(PAL);
diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
index 6ee93f17792b4f..5d92d8bed061b3 100644
--- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -108,6 +108,9 @@ cl::opt<cl::boolOrDefault> LoadBitcodeIntoNewDbgInfoFormat(
     "load-bitcode-into-experimental-debuginfo-iterators", cl::Hidden,
     cl::desc("Load bitcode directly into the new debug info format (regardless "
              "of input format)"));
+extern cl::opt<cl::boolOrDefault> PreserveInputDbgFormat;
+extern bool WriteNewDbgInfoFormatToBitcode;
+extern cl::opt<bool> WriteNewDbgInfoFormat;
 
 namespace {
 
@@ -682,6 +685,11 @@ class BitcodeReader : public BitcodeReaderBase, public GVMaterializer {
   /// (e.g.) blockaddress forward references.
   bool WillMaterializeAllForwardRefs = false;
 
+  /// Tracks whether we have seen debug intrinsics or records in this bitcode;
+  /// seeing both in a single module is currently a fatal error.
+  bool SeenDebugIntrinsic = false;
+  bool SeenDebugRecord = false;
+
   bool StripDebugInfo = false;
   TBAAVerifier TBAAVerifyHelper;
 
@@ -3774,7 +3782,11 @@ Error BitcodeReader::globalCleanup() {
   for (Function &F : *TheModule) {
     MDLoader->upgradeDebugIntrinsics(F);
     Function *NewFn;
-    if (UpgradeIntrinsicFunction(&F, NewFn))
+    // If PreserveInputDbgFormat=true, then we don't know whether we want
+    // intrinsics or records, and we won't perform any conversions in either
+    // case, so don't upgrade intrinsics to records.
+    if (UpgradeIntrinsicFunction(
+            &F, NewFn, PreserveInputDbgFormat != cl::boolOrDefault::BOU_TRUE))
       UpgradedIntrinsics[&F] = NewFn;
     // Look for functions that rely on old function attribute behavior.
     UpgradeFunctionAttributes(F);
@@ -4301,10 +4313,13 @@ Error BitcodeReader::parseModule(uint64_t ResumeBit,
                                  bool ShouldLazyLoadMetadata,
                                  ParserCallbacks Callbacks) {
   // Load directly into RemoveDIs format if LoadBitcodeIntoNewDbgInfoFormat
-  // has been set to true (default action: load into the old debug format).
-  TheModule->IsNewDbgInfoFormat =
-      UseNewDbgInfoFormat &&
-      LoadBitcodeIntoNewDbgInfoFormat == cl::boolOrDefault::BOU_TRUE;
+  // has been set to true and we aren't attempting to preserve the existing
+  // format in the bitcode (default action: load into the old debug format).
+  if (PreserveInputDbgFormat != cl::boolOrDefault::BOU_TRUE) {
+    TheModule->IsNewDbgInfoFormat =
+        UseNewDbgInfoFormat &&
+        LoadBitcodeIntoNewDbgInfoFormat == cl::boolOrDefault::BOU_TRUE;
+  }
 
   this->ValueTypeCallback = std::move(Callbacks.ValueType);
   if (ResumeBit) {
@@ -6443,6 +6458,7 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
     case bitc::FUNC_CODE_DEBUG_RECORD_ASSIGN: {
       // DbgVariableRecords are placed after the Instructions that they are
       // attached to.
+      SeenDebugRecord = true;
       Instruction *Inst = getLastInstruction();
       if (!Inst)
         return error("Invalid dbg record: missing instruction");
@@ -6603,6 +6619,8 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
         TCK = CallInst::TCK_NoTail;
       cast<CallInst>(I)->setTailCallKind(TCK);
       cast<CallInst>(I)->setAttributes(PAL);
+      if (isa<DbgInfoIntrinsic>(I))
+        SeenDebugIntrinsic = true;
       if (Error Err = propagateAttributeTypes(cast<CallBase>(I), ArgTyIDs)) {
         I->deleteValue();
         return Err;
@@ -6791,20 +6809,46 @@ Error BitcodeReader::materialize(GlobalValue *GV) {
   if (Error JumpFailed = Stream.JumpToBit(DFII->second))
     return JumpFailed;
 
-  // Set the debug info mode to "new", possibly creating a mismatch between
-  // module and function debug modes. This is okay because we'll convert
-  // everything back to the old mode after parsing if needed.
-  // FIXME: Remove this once all tools support RemoveDIs.
+  // Regardless of the debug info format we want to end up in, we need
+  // IsNewDbgInfoFormat=true to construct any debug records seen in the bitcode.
   F->IsNewDbgInfoFormat = true;
 
   if (Error Err = parseFunctionBody(F))
     return Err;
   F->setIsMaterializable(false);
 
-  // Convert new debug info records into intrinsics.
-  // FIXME: Remove this once all tools support RemoveDIs.
-  if (!F->getParent()->IsNewDbgInfoFormat)
-    F->convertFromNewDbgValues();
+  // All parsed Functions should load into the debug info format dictated by the
+  // Module, unless we're attempting to preserve the input debug info format.
+  if (SeenDebugIntrinsic && SeenDebugRecord)
+    return error("Mixed debug intrinsics and debug records in bitcode module!");
+  if (PreserveInputDbgFormat == cl::boolOrDefault::BOU_TRUE) {
+    bool SeenAnyDebugInfo = SeenDebugIntrinsic || SeenDebugRecord;
+    bool NewDbgInfoFormatDesired =
+        SeenAnyDebugInfo ? SeenDebugRecord : F->getParent()->IsNewDbgInfoFormat;
+    if (SeenAnyDebugInfo) {
+      UseNewDbgInfoFormat = SeenDebugRecord;
+      WriteNewDbgInfoFormatToBitcode = SeenDebugRecord;
+      WriteNewDbgInfoFormat = SeenDebugRecord;
+    }
+    // If the module's debug info format doesn't match the observed input
+    // format, then set its format now; we don't need to call the conversion
+    // function because there must be no existing intrinsics to convert.
+    // Otherwise, just set the format on this function now.
+    if (NewDbgInfoFormatDesired != F->getParent()->IsNewDbgInfoFormat)
+      F->getParent()->setIsNewDbgInfoFormat(NewDbgInfoFormatDesired, true);
+    else
+      F->setIsNewDbgInfoFormat(NewDbgInfoFormatDesired, true);
+  } else {
+    // If we aren't preserving formats, we use the Module flag to get our
+    // desired format instead of reading flags, in case we are lazy-loading and
+    // the format of the module has been changed since it was set by the flags.
+    // We only need to convert debug info here if we have debug records but
+    // desire the intrinsic format; everything else is a no-op or handled by the
+    // autoupgrader.
+    bool ModuleIsNewDbgInfoFormat = F->getParent()->IsNewDbgInfoFormat;
+    F->setIsNewDbgInfoFormat(ModuleIsNewDbgInfoFormat,
+                             ModuleIsNewDbgInfoFormat || !SeenDebugRecord);
+  }
 
   if (StripDebugInfo)
     stripDebugInfo(*F);
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
index a44f6af4162f03..dc0b7347e4fe4e 100644
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@@ -983,7 +983,8 @@ static Intrinsic::ID shouldUpgradeNVPTXBF16Intrinsic(StringRef Name) {
   return Intrinsic::not_intrinsic;
 }
 
-static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
+static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn,
+                                      bool CanUpgradeDebugIntrinsicsToRecords) {
   assert(F && "Illegal to upgrade a non-existent Function.");
 
   StringRef Name = F->getName();
@@ -1057,7 +1058,8 @@ static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
   case 'd':
     if (Name.consume_front("dbg.")) {
       // Mark debug intrinsics for upgrade to new debug format.
-      if (F->getParent()->IsNewDbgInfoFormat) {
+      if (CanUpgradeDebugIntrinsicsToRecords &&
+          F->getParent()->IsNewDbgInfoFormat) {
         if (Name == "addr" || Name == "value" || Name == "assign" ||
             Name == "declare" || Name == "label") {
           // There's no function to replace these with.
@@ -1413,9 +1415,11 @@ static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
   return false;
 }
 
-bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn) {
+bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn,
+                                    bool CanUpgradeDebugIntrinsicsToRecords) {
   NewFn = nullptr;
-  bool Upgraded = upgradeIntrinsicFunction1(F, NewFn);
+  bool Upgraded =
+      upgradeIntrinsicFunction1(F, NewFn, CanUpgradeDebugIntrinsicsToRecords);
   assert(F != NewFn && "Intrinsic function upgraded to the same function");
 
   // Upgrade intrinsic attributes.  This does not change the function.
diff --git a/llvm/lib/IR/BasicBlock.cpp b/llvm/lib/IR/BasicBlock.cpp
index f088c7a2cc4e77..3bb0f423f270ca 100644
--- a/llvm/lib/IR/BasicBlock.cpp
+++ b/llvm/lib/IR/BasicBlock.cpp
@@ -35,6 +35,12 @@ cl::opt<bool>
                         cl::desc("Enable communicating debuginfo positions "
                                  "through iterators, eliminating intrinsics"),
                         cl::init(true));
+cl::opt<cl::boolOrDefault> PreserveInputDbgFormat(
+    "preserve-input-debuginfo-format", cl::Hidden,
+    cl::desc("When set to true, IR files will be processed and printed in "
+             "their current debug info format, regardless of default behaviour "
+             "or other flags passed. Has no effect if input IR does not "
+             "contain debug records or intrinsics."));
 
 bool WriteNewDbgInfoFormatToBitcode /*set default value in cl::init() below*/;
 cl::opt<bool, true> WriteNewDbgInfoFormatToBitcode2(
@@ -65,8 +71,10 @@ DbgMarker *BasicBlock::createMarker(InstListType::iterator It) {
   return DM;
 }
 
-void BasicBlock::convertToNewDbgValues() {
+void BasicBlock::convertToNewDbgValues(bool UpdateFlagOnly) {
   IsNewDbgInfoFormat = true;
+  if (UpdateFlagOnly)
+    return;
 
   // Iterate over all instructions in the instruction list, collecting debug
   // info intrinsics and converting them to DbgRecords. Once we find a "real"
@@ -104,9 +112,11 @@ void BasicBlock::convertToNewDbgValues() {
   }
 }
 
-void BasicBlock::convertFromNewDbgValues() {
-  invalidateOrders();
+void BasicBlock::convertFromNewDbgValues(bool UpdateFlagOnly) {
   IsNewDbgInfoFormat = false;
+  if (UpdateFlagOnly)
+    return;
+  invalidateOrders();
 
   // Iterate over the block, finding instructions annotated with DbgMarkers.
   // Convert any attached DbgRecords to debug intrinsics and insert ahead of the
@@ -141,11 +151,11 @@ void BasicBlock::dumpDbgValues() const {
 }
 #endif
 
-void BasicBlock::setIsNewDbgInfoFormat(bool NewFlag) {
+void BasicBlock::setIsNewDbgInfoFormat(bool NewFlag, bool UpdateFlagOnly) {
   if (NewFlag && !IsNewDbgInfoFormat)
-    convertToNewDbgValues();
+    convertToNewDbgValues(UpdateFlagOnly);
   else if (!NewFlag && IsNewDbgInfoFormat)
-    convertFromNewDbgValues();
+    convertFromNewDbgValues(UpdateFlagOnly);
 }
 
 ValueSymbolTable *BasicBlock::getValueSymbolTable() {
diff --git a/llvm/lib/IR/Function.cpp b/llvm/lib/IR/Function.cpp
index eb126f182eadcb..953977ca33ed40 100644
--- a/llvm/lib/IR/Function.cpp
+++ b/llvm/lib/IR/Function.cpp
@@ -83,25 +83,25 @@ static cl::opt<unsigned> NonGlobalValueMaxNameSize(
     "non-global-value-max-name-size", cl::Hidden, cl::init(1024),
     cl::desc("Maximum size for the name of non-global values."));
 
-void Function::convertToNewDbgValues() {
+void Function::convertToNewDbgValues(bool UpdateFlagOnly) {
   IsNewDbgInfoFormat = true;
   for (auto &BB : *this) {
-    BB.convertToNewDbgValues();
+    BB.convertToNewDbgValues(UpdateFlagOnly);
   }
 }
 
-void Function::convertFromNewDbgValues() {
+void Function::convertFromNewDbgValues(bool UpdateFlagOnly) {
   IsNewDbgInfoFormat = false;
   for (auto &BB : *this) {
-    BB.convertFromNewDbgValues();
+    BB.convertFromNewDbgValues(UpdateFlagOnly);
   }
 }
 
-void Function::setIsNewDbgInfoFormat(bool NewFlag) {
+void Function::setIsNewDbgInfoFormat(bool NewFlag, bool UpdateFlagOnly) {
   if (NewFlag && !IsNewDbgInfoFormat)
-    convertToNewDbgValues();
+    convertToNewDbgValues(UpdateFlagOnly);
   else if (!NewFlag && IsNewDbgInfoFormat)
-    convertFromNewDbgValues();
+    convertFromNewDbgValues(UpdateFlagOnly);
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/llvm/test/Bitcode/dbg-record-roundtrip.ll b/llvm/test/Bitcode/dbg-record-roundtrip.ll
index bd347cac720677..cc83fdd4fa5381 100644
--- a/llvm/test/Bitcode/dbg-record-roundtrip.ll
+++ b/llvm/test/Bitcode/dbg-record-roundtrip.ll
@@ -15,6 +15,16 @@
 ; RUN: | llvm-dis --load-bitcode-into-experimental-debuginfo-iterators=true --write-experimental-debuginfo=true \
 ; RUN: | FileCheck %s --check-prefixes=RECORDS
 
+;; When preserving, we should output the format the bitcode was written in
+;; regardless of the value of the write flag.
+; RUN: llvm-as --write-experimental-debuginfo-iterators-to-bitcode=true %s -o - \
+; RUN: | llvm-dis --preserve-input-debuginfo-format=true --write-experimental-debuginfo=false \
+; RUN: | FileCheck %s --check-prefixes=RECORDS
+
+; RUN: llvm-as --write-experimental-debuginfo-iterators-to-bitcode=false %s -o - \
+; RUN: | llvm-dis --preserve-input-debuginfo-format=true --write-experimental-debuginfo=true \
+; RUN: | FileCheck %s
+
 ;; Check that verify-uselistorder passes regardless of input format.
 ; RUN: llvm-as %s --write-experimental-debuginfo-iterators-to-bitcode=true -o - | verify-uselistorder
 ; RUN: verify-uselistorder %s
diff --git a/llvm/test/DebugInfo/roundtrip-non-instruction-debug-info.ll b/llvm/test/DebugInfo/roundtrip-non-instruction-debug-info.ll
index b15b76d1690c41..b154a4ff162d54 100644
--- a/llvm/test/DebugInfo/roundtrip-non-instruction-debug-info.ll
+++ b/llvm/test/DebugInfo/roundtrip-non-instruction-debug-info.ll
@@ -18,6 +18,14 @@
 ; RUN: opt --passes=verify -S --try-experimental-debuginfo-iterators --write-experimental-debuginfo=true < %s \
 ; RUN:   | FileCheck %s --check-prefixes=CHECK,NEWDBG --implicit-check-not=llvm.dbg --implicit-check-not=#dbg
 
+;; Test that the preserving flag overrides the write flag.
+; RUN: opt --passes=verify -S --preserve-input-debuginfo-format=true --write-experimental-debuginfo=true < %s \
+; RUN:   | FileCheck %s --check-prefixes=CHECK,OLDDBG  --implicit-check-not=llvm.dbg --implicit-check-not=#dbg
+
+; RUN: opt --passes=verify -S --write-experimental-debuginfo=true < %s \
+; RUN:   | opt --passes=verify -S --preserve-input-debuginfo-format=true --write-experimental-debuginfo=false \
+; RUN:   | FileCheck %s --check-prefixes=CHECK,NEWDBG  --implicit-check-not=llvm.dbg --implicit-check-not=#dbg
+
 ; CHECK: @f(i32 %[[VAL_A:[0-9a-zA-Z]+]])
 ; CHECK-NEXT: entry:
 ; OLDDBG-NEXT: call void @llvm.dbg.value(metadata i32 %[[VAL_A]], metadata ![[VAR_A:[0-9]+]], metadata !DIExpression()), !dbg ![[LOC_1:[0-9]+]]
diff --git a/llvm/tools/llvm-link/llvm-link.cpp b/llvm/tools/llvm-link/llvm-link.cpp
index 9e7f2c3ebac437..c73ff446939469 100644
--- a/llvm/tools/llvm-link/llvm-link.cpp
+++ b/llvm/tools/llvm-link/llvm-link.cpp
@@ -135,6 +135,7 @@ static cl::opt<bool> TryUseNewDbgInfoFormat(
     cl::init(false));
 
 extern cl::opt<bool> UseNewDbgInfoFormat;
+extern cl::opt<cl::boolOrDefault> PreserveInputDbgFormat;
 
 static ExitOnError ExitOnErr;
 
@@ -486,6 +487,10 @@ int main(int argc, char **argv) {
     // Turn the new debug-info format on.
     UseNewDbgInfoFormat = true;
   }
+  // Since llvm-link collects multiple IR modules together, for simplicity's
+  // sake we disable the "PreserveInputDbgFormat" flag to enforce a single
+  // debug info format.
+  PreserveInputDbgFormat = cl::boolOrDefault::BOU_FALSE;
 
   LLVMContext Context;
   Context.setDiagnosticHandler(std::make_unique<LLVMLinkDiagnosticHandler>(),
diff --git a/llvm/tools/llvm-lto/llvm-lto.cpp b/llvm/tools/llvm-lto/llvm-lto.cpp
index 3c452b650cee1b..f310097eec634f 100644
--- a/llvm/tools/llvm-lto/llvm-lto.cpp
+++ b/llvm/tools/llvm-lto/llvm-lto.cpp
@@ -271,6 +271,7 @@ static cl::opt<bool> TryUseNewDbgInfoFormat(
 
 extern cl::opt<bool> UseNewDbgInfoFormat;
 extern cl::opt<cl::boolOrDefault> LoadBitcodeIntoNewDbgInfoFormat;
+extern cl::opt<cl::boolOrDefault> PreserveInputDbgFormat;
 
 namespace {
 
@@ -954,6 +955,10 @@ int main(int argc, char **argv) {
     // Turn the new debug-info format on.
     UseNewDbgInfoFormat = true;
   }
+  // Since llvm-lto collects multiple IR modules together, for simplicity's sake
+  // we disable the "PreserveInputDbgFormat" flag to enforce a single debug info
+  // format.
+  PreserveInputDbgFormat = cl::boolOrDefault::BOU_FALSE;
 
   if (OptLevel < '0' || OptLevel > '3')
     error("optimization level must be between 0 and 3");
diff --git a/llvm/tools/llvm-lto2/llvm-lto2.cpp b/llvm/tools/llvm-lto2/llvm-lto2.cpp
index f222d02bd7cea7..faed9ff9939bd5 100644
--- a/llvm/tools/llvm-lto2/llvm-lto2.cpp
+++ b/llvm/tools/llvm-lto2/llvm-lto2.cpp
@@ -194,6 +194,7 @@ static cl::opt<bool> TryUseNewDbgInfoFormat(
 
 extern cl::opt<bool> UseNewDbgInfoFormat;
 extern cl::opt<cl::boolOrDefault> LoadBitcodeIntoNewDbgInfoFormat;
+extern cl::opt<cl::boolOrDefault> PreserveInputDbgFormat;
 
 static void check(Error E, std::string Msg) {
   if (!E)
@@ -239,6 +240,10 @@ static int run(int argc, char **argv) {
     // Turn the new debug-info format on.
     UseNewDbgInfoFormat = true;
   }
+  // Since llvm-lto2 collects multiple IR modules together, for simplicity's
+  // sake we disable the "PreserveInputDbgFormat" flag to enforce a single debug
+  // info format.
+  PreserveInputDbgFormat = cl::boolOrDefault::BOU_FALSE;
 
   // FIXME: Workaround PR30396 which means that a symbol can appear
   // more than once if it is defined in module-level assembly and

>From c75af616036445b998946aba15bb9818b101f36c Mon Sep 17 00:00:00 2001
From: Stephen Tozer <stephen.tozer at sony.com>
Date: Fri, 5 Apr 2024 08:34:01 +0100
Subject: [PATCH 2/3] Move flag-setting to separate fn, address nits

---
 llvm/include/llvm/IR/BasicBlock.h         |  7 ++++---
 llvm/include/llvm/IR/Function.h           |  7 ++++---
 llvm/include/llvm/IR/Module.h             | 20 +++++++++++++-------
 llvm/lib/AsmParser/LLParser.cpp           |  4 ++--
 llvm/lib/Bitcode/Reader/BitcodeReader.cpp | 10 ++++++----
 llvm/lib/IR/BasicBlock.cpp                | 22 +++++++++++-----------
 llvm/lib/IR/Function.cpp                  | 20 +++++++++++++-------
 7 files changed, 53 insertions(+), 37 deletions(-)

diff --git a/llvm/include/llvm/IR/BasicBlock.h b/llvm/include/llvm/IR/BasicBlock.h
index f14d5d92da72de..61ec60f9f4485b 100644
--- a/llvm/include/llvm/IR/BasicBlock.h
+++ b/llvm/include/llvm/IR/BasicBlock.h
@@ -81,17 +81,18 @@ class BasicBlock final : public Value, // Basic blocks are data objects also
   /// intrinsics into DbgMarkers / DbgRecords. Deletes all dbg.values in
   /// the process and sets IsNewDbgInfoFormat = true. Only takes effect if
   /// the UseNewDbgInfoFormat LLVM command line option is given.
-  void convertToNewDbgValues(bool UpdateFlagOnly = false);
+  void convertToNewDbgValues();
 
   /// Convert variable location debugging information stored in DbgMarkers and
   /// DbgRecords into the dbg.value intrinsic representation. Sets
   /// IsNewDbgInfoFormat = false.
-  void convertFromNewDbgValues(bool UpdateFlagOnly = false);
+  void convertFromNewDbgValues();
 
   /// Ensure the block is in "old" dbg.value format (\p NewFlag == false) or
   /// in the new format (\p NewFlag == true), converting to the desired format
   /// if necessary.
-  void setIsNewDbgInfoFormat(bool NewFlag, bool UpdateFlagOnly = false);
+  void setIsNewDbgInfoFormat(bool NewFlag);
+  void setNewDbgInfoFormatFlag(bool NewFlag);
 
   /// Record that the collection of DbgRecords in \p M "trails" after the last
   /// instruction of this block. These are equivalent to dbg.value intrinsics
diff --git a/llvm/include/llvm/IR/Function.h b/llvm/include/llvm/IR/Function.h
index 8c881f38748fdc..60f41b30e91c24 100644
--- a/llvm/include/llvm/IR/Function.h
+++ b/llvm/include/llvm/IR/Function.h
@@ -114,12 +114,13 @@ class LLVM_EXTERNAL_VISIBILITY Function : public GlobalObject,
   }
 
   /// \see BasicBlock::convertToNewDbgValues.
-  void convertToNewDbgValues(bool UpdateFlagOnly = false);
+  void convertToNewDbgValues();
 
   /// \see BasicBlock::convertFromNewDbgValues.
-  void convertFromNewDbgValues(bool UpdateFlagOnly = false);
+  void convertFromNewDbgValues();
 
-  void setIsNewDbgInfoFormat(bool NewVal, bool UpdateFlagOnly = false);
+  void setIsNewDbgInfoFormat(bool NewVal);
+  void setNewDbgInfoFormatFlag(bool NewVal);
 
 private:
   friend class TargetLibraryInfoImpl;
diff --git a/llvm/include/llvm/IR/Module.h b/llvm/include/llvm/IR/Module.h
index bc3986140f0b9a..6135e15fd030f7 100644
--- a/llvm/include/llvm/IR/Module.h
+++ b/llvm/include/llvm/IR/Module.h
@@ -224,26 +224,32 @@ class LLVM_EXTERNAL_VISIBILITY Module {
   void removeDebugIntrinsicDeclarations();
 
   /// \see BasicBlock::convertToNewDbgValues.
-  void convertToNewDbgValues(bool UpdateFlagOnly = false) {
+  void convertToNewDbgValues() {
     for (auto &F : *this) {
-      F.convertToNewDbgValues(UpdateFlagOnly);
+      F.convertToNewDbgValues();
     }
     IsNewDbgInfoFormat = true;
   }
 
   /// \see BasicBlock::convertFromNewDbgValues.
-  void convertFromNewDbgValues(bool UpdateFlagOnly = false) {
+  void convertFromNewDbgValues() {
     for (auto &F : *this) {
-      F.convertFromNewDbgValues(UpdateFlagOnly);
+      F.convertFromNewDbgValues();
     }
     IsNewDbgInfoFormat = false;
   }
 
-  void setIsNewDbgInfoFormat(bool UseNewFormat, bool UpdateFlagOnly = false) {
+  void setIsNewDbgInfoFormat(bool UseNewFormat) {
     if (UseNewFormat && !IsNewDbgInfoFormat)
-      convertToNewDbgValues(UpdateFlagOnly);
+      convertToNewDbgValues();
     else if (!UseNewFormat && IsNewDbgInfoFormat)
-      convertFromNewDbgValues(UpdateFlagOnly);
+      convertFromNewDbgValues();
+  }
+  void setNewDbgInfoFormatFlag(bool NewFlag) {
+    for (auto &F : *this) {
+      F.setNewDbgInfoFormatFlag(NewFlag);
+    }
+    IsNewDbgInfoFormat = NewFlag;
   }
 
   /// The Module constructor. Note that there is no default constructor. You
diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp
index 70a6ef864f5656..c82826708f1a91 100644
--- a/llvm/lib/AsmParser/LLParser.cpp
+++ b/llvm/lib/AsmParser/LLParser.cpp
@@ -6519,7 +6519,7 @@ bool LLParser::parseBasicBlock(PerFunctionState &PFS) {
         return error(Lex.getLoc(), "debug record should not appear in a module "
                                    "containing debug info intrinsics");
       if (!SeenNewDbgInfoFormat)
-        M->setIsNewDbgInfoFormat(true, true);
+        M->setNewDbgInfoFormatFlag(true);
       SeenNewDbgInfoFormat = true;
       Lex.Lex();
 
@@ -7924,7 +7924,7 @@ bool LLParser::parseCall(Instruction *&Inst, PerFunctionState &PFS,
                             "using non-intrinsic debug info");
     }
     if (!SeenOldDbgInfoFormat)
-      M->setIsNewDbgInfoFormat(false, true);
+      M->setNewDbgInfoFormatFlag(false);
     SeenOldDbgInfoFormat = true;
   }
   CI->setAttributes(PAL);
diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
index 5d92d8bed061b3..eccb975270b941 100644
--- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -6835,9 +6835,9 @@ Error BitcodeReader::materialize(GlobalValue *GV) {
     // function because there must be no existing intrinsics to convert.
     // Otherwise, just set the format on this function now.
     if (NewDbgInfoFormatDesired != F->getParent()->IsNewDbgInfoFormat)
-      F->getParent()->setIsNewDbgInfoFormat(NewDbgInfoFormatDesired, true);
+      F->getParent()->setNewDbgInfoFormatFlag(NewDbgInfoFormatDesired);
     else
-      F->setIsNewDbgInfoFormat(NewDbgInfoFormatDesired, true);
+      F->setNewDbgInfoFormatFlag(NewDbgInfoFormatDesired);
   } else {
     // If we aren't preserving formats, we use the Module flag to get our
     // desired format instead of reading flags, in case we are lazy-loading and
@@ -6846,8 +6846,10 @@ Error BitcodeReader::materialize(GlobalValue *GV) {
     // desire the intrinsic format; everything else is a no-op or handled by the
     // autoupgrader.
     bool ModuleIsNewDbgInfoFormat = F->getParent()->IsNewDbgInfoFormat;
-    F->setIsNewDbgInfoFormat(ModuleIsNewDbgInfoFormat,
-                             ModuleIsNewDbgInfoFormat || !SeenDebugRecord);
+    if (ModuleIsNewDbgInfoFormat || !SeenDebugRecord)
+      F->setNewDbgInfoFormatFlag(ModuleIsNewDbgInfoFormat);
+    else
+      F->setIsNewDbgInfoFormat(ModuleIsNewDbgInfoFormat);
   }
 
   if (StripDebugInfo)
diff --git a/llvm/lib/IR/BasicBlock.cpp b/llvm/lib/IR/BasicBlock.cpp
index 3bb0f423f270ca..6b8d26b69e8598 100644
--- a/llvm/lib/IR/BasicBlock.cpp
+++ b/llvm/lib/IR/BasicBlock.cpp
@@ -40,7 +40,8 @@ cl::opt<cl::boolOrDefault> PreserveInputDbgFormat(
     cl::desc("When set to true, IR files will be processed and printed in "
              "their current debug info format, regardless of default behaviour "
              "or other flags passed. Has no effect if input IR does not "
-             "contain debug records or intrinsics."));
+             "contain debug records or intrinsics. Ignored in llvm-link, "
+             "llvm-lto, and llvm-lto2."));
 
 bool WriteNewDbgInfoFormatToBitcode /*set default value in cl::init() below*/;
 cl::opt<bool, true> WriteNewDbgInfoFormatToBitcode2(
@@ -71,10 +72,8 @@ DbgMarker *BasicBlock::createMarker(InstListType::iterator It) {
   return DM;
 }
 
-void BasicBlock::convertToNewDbgValues(bool UpdateFlagOnly) {
+void BasicBlock::convertToNewDbgValues() {
   IsNewDbgInfoFormat = true;
-  if (UpdateFlagOnly)
-    return;
 
   // Iterate over all instructions in the instruction list, collecting debug
   // info intrinsics and converting them to DbgRecords. Once we find a "real"
@@ -112,11 +111,9 @@ void BasicBlock::convertToNewDbgValues(bool UpdateFlagOnly) {
   }
 }
 
-void BasicBlock::convertFromNewDbgValues(bool UpdateFlagOnly) {
-  IsNewDbgInfoFormat = false;
-  if (UpdateFlagOnly)
-    return;
+void BasicBlock::convertFromNewDbgValues() {
   invalidateOrders();
+  IsNewDbgInfoFormat = false;
 
   // Iterate over the block, finding instructions annotated with DbgMarkers.
   // Convert any attached DbgRecords to debug intrinsics and insert ahead of the
@@ -151,11 +148,14 @@ void BasicBlock::dumpDbgValues() const {
 }
 #endif
 
-void BasicBlock::setIsNewDbgInfoFormat(bool NewFlag, bool UpdateFlagOnly) {
+void BasicBlock::setIsNewDbgInfoFormat(bool NewFlag) {
   if (NewFlag && !IsNewDbgInfoFormat)
-    convertToNewDbgValues(UpdateFlagOnly);
+    convertToNewDbgValues();
   else if (!NewFlag && IsNewDbgInfoFormat)
-    convertFromNewDbgValues(UpdateFlagOnly);
+    convertFromNewDbgValues();
+}
+void BasicBlock::setNewDbgInfoFormatFlag(bool NewFlag) {
+  IsNewDbgInfoFormat = NewFlag;
 }
 
 ValueSymbolTable *BasicBlock::getValueSymbolTable() {
diff --git a/llvm/lib/IR/Function.cpp b/llvm/lib/IR/Function.cpp
index 953977ca33ed40..b5fda9bb3d129a 100644
--- a/llvm/lib/IR/Function.cpp
+++ b/llvm/lib/IR/Function.cpp
@@ -83,25 +83,31 @@ static cl::opt<unsigned> NonGlobalValueMaxNameSize(
     "non-global-value-max-name-size", cl::Hidden, cl::init(1024),
     cl::desc("Maximum size for the name of non-global values."));
 
-void Function::convertToNewDbgValues(bool UpdateFlagOnly) {
+void Function::convertToNewDbgValues() {
   IsNewDbgInfoFormat = true;
   for (auto &BB : *this) {
-    BB.convertToNewDbgValues(UpdateFlagOnly);
+    BB.convertToNewDbgValues();
   }
 }
 
-void Function::convertFromNewDbgValues(bool UpdateFlagOnly) {
+void Function::convertFromNewDbgValues() {
   IsNewDbgInfoFormat = false;
   for (auto &BB : *this) {
-    BB.convertFromNewDbgValues(UpdateFlagOnly);
+    BB.convertFromNewDbgValues();
   }
 }
 
-void Function::setIsNewDbgInfoFormat(bool NewFlag, bool UpdateFlagOnly) {
+void Function::setIsNewDbgInfoFormat(bool NewFlag) {
   if (NewFlag && !IsNewDbgInfoFormat)
-    convertToNewDbgValues(UpdateFlagOnly);
+    convertToNewDbgValues();
   else if (!NewFlag && IsNewDbgInfoFormat)
-    convertFromNewDbgValues(UpdateFlagOnly);
+    convertFromNewDbgValues();
+}
+void Function::setNewDbgInfoFormatFlag(bool NewFlag) {
+  for (auto &BB : *this) {
+    BB.setNewDbgInfoFormatFlag(NewFlag);
+  }
+  IsNewDbgInfoFormat = NewFlag;
 }
 
 //===----------------------------------------------------------------------===//

>From 5598bfb1eb4e1f0ab54f470e1ea9b2c7526db9fd Mon Sep 17 00:00:00 2001
From: Stephen Tozer <stephen.tozer at sony.com>
Date: Fri, 5 Apr 2024 08:51:07 +0100
Subject: [PATCH 3/3] Update flag descriptions

---
 llvm/lib/IR/BasicBlock.cpp       | 11 ++++++-----
 llvm/lib/IR/IRPrintingPasses.cpp |  3 ++-
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/IR/BasicBlock.cpp b/llvm/lib/IR/BasicBlock.cpp
index 6b8d26b69e8598..40dc1a0928eee0 100644
--- a/llvm/lib/IR/BasicBlock.cpp
+++ b/llvm/lib/IR/BasicBlock.cpp
@@ -30,11 +30,12 @@ using namespace llvm;
 #define DEBUG_TYPE "ir"
 STATISTIC(NumInstrRenumberings, "Number of renumberings across all blocks");
 
-cl::opt<bool>
-    UseNewDbgInfoFormat("experimental-debuginfo-iterators",
-                        cl::desc("Enable communicating debuginfo positions "
-                                 "through iterators, eliminating intrinsics"),
-                        cl::init(true));
+cl::opt<bool> UseNewDbgInfoFormat(
+    "experimental-debuginfo-iterators",
+    cl::desc("Enable communicating debuginfo positions through iterators, "
+             "eliminating intrinsics. Has no effect if "
+             "--preserve-input-debuginfo-format=true."),
+    cl::init(true));
 cl::opt<cl::boolOrDefault> PreserveInputDbgFormat(
     "preserve-input-debuginfo-format", cl::Hidden,
     cl::desc("When set to true, IR files will be processed and printed in "
diff --git a/llvm/lib/IR/IRPrintingPasses.cpp b/llvm/lib/IR/IRPrintingPasses.cpp
index 84fb8e6c66b8f9..43252c57afca99 100644
--- a/llvm/lib/IR/IRPrintingPasses.cpp
+++ b/llvm/lib/IR/IRPrintingPasses.cpp
@@ -25,7 +25,8 @@ using namespace llvm;
 
 cl::opt<bool> WriteNewDbgInfoFormat(
     "write-experimental-debuginfo",
-    cl::desc("Write debug info in the new non-intrinsic format"),
+    cl::desc("Write debug info in the new non-intrinsic format. Has no effect "
+             "if --preserve-input-debuginfo-format=true."),
     cl::init(false));
 
 namespace {



More information about the llvm-commits mailing list