[llvm] [RemoveDIs] Fix spliceDebugInfo splice-to-end edge case (PR #105671)

Orlando Cazalet-Hyams via llvm-commits llvm-commits at lists.llvm.org
Thu Aug 22 07:31:17 PDT 2024


https://github.com/OCHyams created https://github.com/llvm/llvm-project/pull/105671

Fix https://github.com/llvm/llvm-project/issues/105571 which demonstrates an end() iterator dereference when
performing a non-empty splice to end() from a region that ends at
Src::end().

Rather than calling Instruction::adoptDbgRecords from Dest, create a marker
(which takes an iterator) and absorbDebugValues onto that. The "absorb" variant
doesn't clean up the source marker, which in this case we know is a trailing
marker, so we have to do that manually.

---

Note, this is based on #105670 - please ignore the first commit in this PR (I'll rebase once that one goes in).

>From 716952debaeff657af33a72c43db87a31d843a45 Mon Sep 17 00:00:00 2001
From: Orlando Cazalet-Hyams <orlando.hyams at sony.com>
Date: Thu, 22 Aug 2024 11:30:45 +0100
Subject: [PATCH 1/2] [RemoveDIs] Simplify spliceDebugInfo, fixing
 splice-to-end edge case

Not quite NFC, fixes splitBasicBlockBefore case when we split before an
instruction with debug records (but without the headBit set, i.e., we are
splitting before the instruction but after the debug records that come before
it). splitBasicBlockBefore splices the instructions before the split point into
a new block. Prior to this patch, the debug records would get shifted up to the
front of the spliced instructions (as seen in the modified unittest - I believe
the unittest was checking erroneous behaviour). We instead want to leave those
debug records at the end of the spliced instructions.
---
 llvm/lib/IR/BasicBlock.cpp                  | 24 +++++++++------------
 llvm/unittests/IR/BasicBlockDbgInfoTest.cpp |  4 ++--
 2 files changed, 12 insertions(+), 16 deletions(-)

diff --git a/llvm/lib/IR/BasicBlock.cpp b/llvm/lib/IR/BasicBlock.cpp
index cf05b11c53963c..e259ec59da0b9e 100644
--- a/llvm/lib/IR/BasicBlock.cpp
+++ b/llvm/lib/IR/BasicBlock.cpp
@@ -961,9 +961,13 @@ void BasicBlock::spliceDebugInfoImpl(BasicBlock::iterator Dest, BasicBlock *Src,
   // Detach the marker at Dest -- this lets us move the "====" DbgRecords
   // around.
   DbgMarker *DestMarker = nullptr;
-  if (Dest != end()) {
-    if ((DestMarker = getMarker(Dest)))
+  if ((DestMarker = getMarker(Dest))) {
+    if (Dest == end()) {
+      assert(DestMarker == getTrailingDbgRecords());
+      deleteTrailingDbgRecords();
+    } else {
       DestMarker->removeFromParent();
+    }
   }
 
   // If we're moving the tail range of DbgRecords (":::"), absorb them into the
@@ -1005,22 +1009,14 @@ void BasicBlock::spliceDebugInfoImpl(BasicBlock::iterator Dest, BasicBlock *Src,
     } else {
       // Insert them right at the start of the range we moved, ahead of First
       // and the "++++" DbgRecords.
+      // This also covers the rare circumstance where we insert at end(), and we
+      // did not generate the iterator with begin() / getFirstInsertionPt(),
+      // meaning any trailing debug-info at the end of the block would
+      // "normally" have been pushed in front of "First". We move it there now.
       DbgMarker *FirstMarker = createMarker(First);
       FirstMarker->absorbDebugValues(*DestMarker, true);
     }
     DestMarker->eraseFromParent();
-  } else if (Dest == end() && !InsertAtHead) {
-    // In the rare circumstance where we insert at end(), and we did not
-    // generate the iterator with begin() / getFirstInsertionPt(), it means
-    // any trailing debug-info at the end of the block would "normally" have
-    // been pushed in front of "First". Move it there now.
-    DbgMarker *TrailingDbgRecords = getTrailingDbgRecords();
-    if (TrailingDbgRecords) {
-      DbgMarker *FirstMarker = createMarker(First);
-      FirstMarker->absorbDebugValues(*TrailingDbgRecords, true);
-      TrailingDbgRecords->eraseFromParent();
-      deleteTrailingDbgRecords();
-    }
   }
 }
 
diff --git a/llvm/unittests/IR/BasicBlockDbgInfoTest.cpp b/llvm/unittests/IR/BasicBlockDbgInfoTest.cpp
index 91a0745a0cc76e..835780e63aaf4f 100644
--- a/llvm/unittests/IR/BasicBlockDbgInfoTest.cpp
+++ b/llvm/unittests/IR/BasicBlockDbgInfoTest.cpp
@@ -141,11 +141,11 @@ TEST(BasicBlockDbgInfoTest, SplitBasicBlockBefore) {
   Function *F = M->getFunction("func");
 
   BasicBlock &BB = F->getEntryBlock();
-  auto I = std::prev(BB.end(), 2);
+  auto I = std::prev(BB.end(), 2); // store i32 2, ptr %1.
   BB.splitBasicBlockBefore(I, "before");
 
   BasicBlock &BBBefore = F->getEntryBlock();
-  auto I2 = std::prev(BBBefore.end(), 2);
+  auto I2 = std::prev(BBBefore.end()); // br label %1 (new).
   ASSERT_TRUE(I2->hasDbgRecords());
 }
 

>From 63aedda2fca58ea7d4c25c96107c421d4269bf5b Mon Sep 17 00:00:00 2001
From: Orlando Cazalet-Hyams <orlando.hyams at sony.com>
Date: Thu, 22 Aug 2024 14:52:34 +0100
Subject: [PATCH 2/2] [RemoveDIs] Fix spliceDebugInfo splice-to-end edge case

Fix #105571 which demonstrates an end() iterator dereference when
performing a non-empty splice to end() from a region that ends at
Src::end().

Rather than calling Instruction::adoptDbgRecords from Dest, create a marker
(which takes an iterator) and absorbDebugValues onto that. The "absorb" variant
doesn't clean up the source marker, which in this case we know is a trailing
marker, so we have to do that manually.
---
 llvm/lib/IR/BasicBlock.cpp                  | 12 ++++-
 llvm/unittests/IR/BasicBlockDbgInfoTest.cpp | 54 +++++++++++++++++++++
 2 files changed, 64 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/IR/BasicBlock.cpp b/llvm/lib/IR/BasicBlock.cpp
index e259ec59da0b9e..39cefa5280c62f 100644
--- a/llvm/lib/IR/BasicBlock.cpp
+++ b/llvm/lib/IR/BasicBlock.cpp
@@ -975,8 +975,16 @@ void BasicBlock::spliceDebugInfoImpl(BasicBlock::iterator Dest, BasicBlock *Src,
   if (ReadFromTail && Src->getMarker(Last)) {
     DbgMarker *FromLast = Src->getMarker(Last);
     if (LastIsEnd) {
-      Dest->adoptDbgRecords(Src, Last, true);
-      // adoptDbgRecords will release any trailers.
+      if (Dest == end()) {
+        // Abosrb the trailing markers from Src.
+        assert(FromLast == Src->getTrailingDbgRecords());
+        createMarker(Dest)->absorbDebugValues(*FromLast, true);
+        FromLast->eraseFromParent();
+        Src->deleteTrailingDbgRecords();
+      } else {
+        // adoptDbgRecords will release any trailers.
+        Dest->adoptDbgRecords(Src, Last, true);
+      }
       assert(!Src->getTrailingDbgRecords());
     } else {
       // FIXME: can we use adoptDbgRecords here to reduce allocations?
diff --git a/llvm/unittests/IR/BasicBlockDbgInfoTest.cpp b/llvm/unittests/IR/BasicBlockDbgInfoTest.cpp
index 835780e63aaf4f..5d62ce6f2c875d 100644
--- a/llvm/unittests/IR/BasicBlockDbgInfoTest.cpp
+++ b/llvm/unittests/IR/BasicBlockDbgInfoTest.cpp
@@ -1525,4 +1525,58 @@ TEST(BasicBlockDbgInfoTest, DbgMoveToEnd) {
   EXPECT_FALSE(Ret->hasDbgRecords());
 }
 
+TEST(BasicBlockDbgInfoTest, DbgKnownSentinelCrash) {
+  return;
+  LLVMContext C;
+  std::unique_ptr<Module> M = parseIR(C, R"(
+    define i16 @foo(i16 %a) !dbg !6 {
+    entry:
+      %b = add i16 %a, 0
+        #dbg_value(i16 %b, !9, !DIExpression(), !11)
+      ret i16 0, !dbg !11
+    }
+
+    !llvm.dbg.cu = !{!0}
+    !llvm.module.flags = !{!5}
+
+    !0 = distinct !DICompileUnit(language: DW_LANG_C, file: !1, producer: "debugify", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
+    !1 = !DIFile(filename: "t.ll", directory: "/")
+    !2 = !{}
+    !5 = !{i32 2, !"Debug Info Version", i32 3}
+    !6 = distinct !DISubprogram(name: "foo", linkageName: "foo", scope: null, file: !1, line: 1, type: !7, scopeLine: 1, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !8)
+    !7 = !DISubroutineType(types: !2)
+    !8 = !{!9}
+    !9 = !DILocalVariable(name: "1", scope: !6, file: !1, line: 1, type: !10)
+    !10 = !DIBasicType(name: "ty16", size: 16, encoding: DW_ATE_unsigned)
+    !11 = !DILocation(line: 1, column: 1, scope: !6)
+)");
+  ASSERT_TRUE(M);
+
+  Function *F = M->getFunction("foo");
+  BasicBlock &BB = F->getEntryBlock();
+  // Start with no trailing records.
+  ASSERT_FALSE(BB.getTrailingDbgRecords());
+
+  BasicBlock::iterator Ret = std::prev(BB.end());
+  BasicBlock::iterator B = std::prev(Ret);
+
+  // Delete terminator which has debug records: we now get trailing records.
+  Ret->eraseFromParent();
+  EXPECT_TRUE(BB.getTrailingDbgRecords());
+
+  BasicBlock *NewBB = BasicBlock::Create(C, "NewBB", F);
+  NewBB->splice(NewBB->end(), &BB, B, BB.end());
+
+  // The trailing records should've been absorbed into NewBB.
+  EXPECT_FALSE(BB.getTrailingDbgRecords());
+  EXPECT_TRUE(NewBB->getTrailingDbgRecords());
+  if (NewBB->getTrailingDbgRecords())
+    EXPECT_EQ(
+        llvm::range_size(NewBB->getTrailingDbgRecords()->getDbgRecordRange()),
+        1);
+
+  // Drop the trailing records now, to prevent a cleanup assertion.
+  NewBB->deleteTrailingDbgRecords();
+}
+
 } // End anonymous namespace.



More information about the llvm-commits mailing list