[llvm] [SelectionDAG] Salvage debuginfo when combining load and sext instrs. (PR #169779)

Shubham Sandeep Rastogi via llvm-commits llvm-commits at lists.llvm.org
Thu Nov 27 10:10:19 PST 2025


https://github.com/rastogishubham updated https://github.com/llvm/llvm-project/pull/169779

>From 476c477f195d0328b5608394bd71042904995f9c Mon Sep 17 00:00:00 2001
From: Shubham Sandeep Rastogi <Shubham.Rastogi at sony.com>
Date: Wed, 26 Nov 2025 10:02:47 -0800
Subject: [PATCH 1/2] [SelectionDAG]Salvage debuginfo when combining load and
 sext instrs.

SelectionDAG uses the DAGCombiner to fold a load followed by a sext to
a load and sext instruction. For example, in x86 we will see that

%1 = load i32, ptr @GlobArr
  #dbg_value(i32 %1, !43, !DIExpression(), !52)
%2 = sext i32 %1 to i64, !dbg !53

is converted to:

%0:gr64_nosp = MOVSX64rm32 $rip, 1, $noreg, @GlobArr, $noreg,
debug-instr-number 1, debug-location !51
DBG_VALUE $noreg, $noreg, !"Idx", !DIExpression(), debug-location !52

The DBG_VALUE needs to be transferred correctly to the new combined
instruction, and it needs to be appended with a DIExpression which
contains a DW_OP_LLVM_fragment, describing that the lower bits of the
virtual register contain the value.

This patch fixes the above described problem.
---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 42 ++++++++++++++-
 .../DebugInfo/X86/selectionDAG-load-sext.mir  | 54 +++++++++++++++++++
 2 files changed, 95 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/DebugInfo/X86/selectionDAG-load-sext.mir

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 6b79dbb46cadc..2d52719ef97d6 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -32,6 +32,7 @@
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Analysis/VectorUtils.h"
+#include "llvm/BinaryFormat/Dwarf.h"
 #include "llvm/CodeGen/ByteProvider.h"
 #include "llvm/CodeGen/DAGCombine.h"
 #include "llvm/CodeGen/ISDOpcodes.h"
@@ -51,6 +52,7 @@
 #include "llvm/IR/Attributes.h"
 #include "llvm/IR/Constant.h"
 #include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfoMetadata.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Metadata.h"
@@ -78,6 +80,7 @@
 #include <variant>
 
 #include "MatchContext.h"
+#include "SDNodeDbgValue.h"
 
 using namespace llvm;
 using namespace llvm::SDPatternMatch;
@@ -14444,14 +14447,51 @@ static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner,
                                    LN0->getMemOperand());
   Combiner.ExtendSetCCUses(SetCCs, N0, ExtLoad, ExtOpc);
   // If the load value is used only by N, replace it via CombineTo N.
-  bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
+  SDValue OldLoadVal(LN0, 0);
+  SDValue OldSextValue(N, 0);
+  bool NoReplaceTrunc = OldLoadVal.hasOneUse();
   Combiner.CombineTo(N, ExtLoad);
+
+  // Because we are replacing a load and a sext with a load-sext instruction,
+  // the dbg_value attached to the load will be of a smaller bit width, and we
+  // have to add a DW_OP_LLVM_fragment to the DIExpression.
+  auto SalvageToOldLoadSize = [&](SDValue From, SDValue To64) {
+    for (SDDbgValue *Dbg : DAG.GetDbgValues(From.getNode())) {
+      unsigned VarBits = From->getValueSizeInBits(0);
+
+      // Build/append a fragment expression [0, VarBits]
+      const DIExpression *OldE = Dbg->getExpression();
+      auto NewE = DIExpression::createFragmentExpression(OldE, 0, VarBits);
+
+      // Create a new SDDbgValue that points at the widened node with the
+      // fragment.
+      if (!NewE)
+        continue;
+      SDDbgValue *NewDV = DAG.getDbgValue(
+          Dbg->getVariable(), *NewE, To64.getNode(), To64.getResNo(),
+          Dbg->isIndirect(), Dbg->getDebugLoc(), Dbg->getOrder());
+      DAG.AddDbgValue(NewDV, /*isParametet*/ false);
+    }
+  };
+
   if (NoReplaceTrunc) {
+    if (LN0->getHasDebugValue()) {
+      DAG.transferDbgValues(OldLoadVal, ExtLoad);
+      SalvageToOldLoadSize(OldLoadVal, ExtLoad);
+    }
+    if (N->getHasDebugValue())
+      DAG.transferDbgValues(OldSextValue, ExtLoad);
     DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
     Combiner.recursivelyDeleteUnusedNodes(LN0);
   } else {
     SDValue Trunc =
         DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
+    if (LN0->getHasDebugValue()) {
+      DAG.transferDbgValues(OldLoadVal, Trunc);
+      SalvageToOldLoadSize(OldLoadVal, Trunc);
+    }
+    if (N->getHasDebugValue())
+      DAG.transferDbgValues(OldSextValue, Trunc);
     Combiner.CombineTo(LN0, Trunc, ExtLoad.getValue(1));
   }
   return SDValue(N, 0); // Return N so it doesn't get rechecked!
diff --git a/llvm/test/DebugInfo/X86/selectionDAG-load-sext.mir b/llvm/test/DebugInfo/X86/selectionDAG-load-sext.mir
new file mode 100644
index 0000000000000..44fa73c90c3cb
--- /dev/null
+++ b/llvm/test/DebugInfo/X86/selectionDAG-load-sext.mir
@@ -0,0 +1,54 @@
+
+# RUN: llc -O2 %s -mtriple=x86_64-unkown-linux -start-before=x86-isel -stop-after=x86-isel -o - | FileCheck %s --check-prefix=MIR
+# RUN: llc -O2 %s -start-before=x86-isel -mtriple=x86_64-unkown-linux --filetype=obj -o %t.o 
+# RUN: llvm-dwarfdump %t.o --name Idx | FileCheck %s --check-prefix=DUMP
+
+# MIR: ![[IDX:[0-9]+]] = !DILocalVariable(name: "Idx"
+# MIR-LABEL: bb.0
+# MIR: %{{[0-9a-zA-Z]+}}{{.*}} = MOVSX64rm32 ${{.*}}, 1, $noreg, @GlobArr, $noreg, debug-instr-number [[INSTR_NUM:[0-9]+]]
+# MIR-NEXT: DBG_INSTR_REF ![[IDX]], !DIExpression(DW_OP_LLVM_arg, 0), dbg-instr-ref([[INSTR_NUM]], 0)
+# MIR-NEXT DBG_INSTR_REF ![[IDX]], !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_LLVM_fragment, 0, 32), dbg-instr-ref([[INSTR_NUM]], 0)
+
+# DUMP: DW_AT_location	(indexed ({{[0-9]+}}x{{[0-9]+}}) loclist = 0x{{[0-9]+}}: 
+# DUMP-NEXT: [0x{{[0-9]+}}, 0x{{[0-9]+}}): {{.*}}, DW_OP_piece 0x4
+
+--- |
+  @GlobArr = dso_local local_unnamed_addr global [5 x i32] [i32 1, i32 1, i32 2, i32 3, i32 5], align 16, !dbg !0
+  @__const.main.Data = private unnamed_addr constant [7 x i32] [i32 10, i32 20, i32 30, i32 40, i32 50, i32 60, i32 70], align 16
+  define dso_local void @_Z8useValuei(i32 noundef %0) local_unnamed_addr #0 !dbg !22 {
+    ret void, !dbg !28
+  }
+  define dso_local noundef i32 @main() local_unnamed_addr #1 !dbg !29 {
+    %1 = load i32, ptr @GlobArr
+      #dbg_value(i32 %1, !43, !DIExpression(), !52)
+    %2 = sext i32 %1 to i64
+    %3 = getelementptr inbounds i32, ptr @__const.main.Data, i64 %2
+    %4 = load i32, ptr %3
+    tail call void @_Z8useValuei(i32 noundef %4), !dbg !56
+    ret i32 0
+  }
+    !llvm.dbg.cu = !{!2}  
+  !llvm.module.flags = !{!10, !11, !16}
+  !0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression())
+  !1 = distinct !DIGlobalVariable(type: !6, isDefinition: true)
+  !2 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !3, emissionKind: FullDebug, nameTableKind: None)
+  !3 = !DIFile(filename: "/tmp/test.cpp", directory: "/Users/srastogi/Development/llvm-project/build_ninja", checksumkind: CSK_MD5, checksum: "0fe735937e606b4db3e3b2e9253eff90")
+  !6 = !DICompositeType(tag: DW_TAG_array_type, elements: !8)
+  !7 = !DIBasicType()
+  !8 = !{}
+  !10 = !{i32 7, !"Dwarf Version", i32 5}
+  !11 = !{i32 2, !"Debug Info Version", i32 3}
+  !16 = !{i32 7, !"debug-info-assignment-tracking", i1 true}
+  !22 = distinct !DISubprogram(type: !23, unit: !2, keyInstructions: true)
+  !23 = !DISubroutineType(types: !24)
+  !24 = !{}
+  !28 = !DILocation(scope: !22, atomRank: 1)
+  !29 = distinct !DISubprogram(type: !30, unit: !2, keyInstructions: true)
+  !30 = !DISubroutineType(types: !31)
+  !31 = !{}
+  !38 = distinct !DILexicalBlock(scope: !29, line: 5, column: 3)
+  !43 = !DILocalVariable(name: "Idx", scope: !44, type: !7)
+  !44 = distinct !DILexicalBlock(scope: !38, line: 5, column: 3)
+  !46 = distinct !DILexicalBlock(scope: !44, line: 5, column: 27)
+  !52 = !DILocation(scope: !44)
+  !56 = !DILocation(scope: !46)

>From f696f0cbe1e1fcbbe1ef069cd32f50fdab9819c2 Mon Sep 17 00:00:00 2001
From: Shubham Sandeep Rastogi <Shubham.Rastogi at sony.com>
Date: Thu, 27 Nov 2025 10:08:11 -0800
Subject: [PATCH 2/2] Addressed review feedback:

1. Removed whitespace from top of llvm/test/DebugInfo/X86/selectionDAG-load-sext.mir

2. Added a sentence to explain why llvm/test/DebugInfo/X86/selectionDAG-load-sext.mir exists

3. Removed #include "llvm/BinaryFormat/Dwarf.h" from top of DAGCombiner.cpp
---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp      | 1 -
 llvm/test/DebugInfo/X86/selectionDAG-load-sext.mir | 3 ++-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 2d52719ef97d6..99cc9e28a5946 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -32,7 +32,6 @@
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Analysis/VectorUtils.h"
-#include "llvm/BinaryFormat/Dwarf.h"
 #include "llvm/CodeGen/ByteProvider.h"
 #include "llvm/CodeGen/DAGCombine.h"
 #include "llvm/CodeGen/ISDOpcodes.h"
diff --git a/llvm/test/DebugInfo/X86/selectionDAG-load-sext.mir b/llvm/test/DebugInfo/X86/selectionDAG-load-sext.mir
index 44fa73c90c3cb..a20c1e754eb0f 100644
--- a/llvm/test/DebugInfo/X86/selectionDAG-load-sext.mir
+++ b/llvm/test/DebugInfo/X86/selectionDAG-load-sext.mir
@@ -1,3 +1,4 @@
+# This test checks that after SelectionDAG runs, it preserves the debug info that is lost due to the DAGCombiner combining a load and a sext instruction, where the #dbg_value is pointing to the result of the load.
 
 # RUN: llc -O2 %s -mtriple=x86_64-unkown-linux -start-before=x86-isel -stop-after=x86-isel -o - | FileCheck %s --check-prefix=MIR
 # RUN: llc -O2 %s -start-before=x86-isel -mtriple=x86_64-unkown-linux --filetype=obj -o %t.o 
@@ -7,7 +8,7 @@
 # MIR-LABEL: bb.0
 # MIR: %{{[0-9a-zA-Z]+}}{{.*}} = MOVSX64rm32 ${{.*}}, 1, $noreg, @GlobArr, $noreg, debug-instr-number [[INSTR_NUM:[0-9]+]]
 # MIR-NEXT: DBG_INSTR_REF ![[IDX]], !DIExpression(DW_OP_LLVM_arg, 0), dbg-instr-ref([[INSTR_NUM]], 0)
-# MIR-NEXT DBG_INSTR_REF ![[IDX]], !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_LLVM_fragment, 0, 32), dbg-instr-ref([[INSTR_NUM]], 0)
+# MIR-NEXT: DBG_INSTR_REF ![[IDX]], !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_LLVM_fragment, 0, 32), dbg-instr-ref([[INSTR_NUM]], 0)
 
 # DUMP: DW_AT_location	(indexed ({{[0-9]+}}x{{[0-9]+}}) loclist = 0x{{[0-9]+}}: 
 # DUMP-NEXT: [0x{{[0-9]+}}, 0x{{[0-9]+}}): {{.*}}, DW_OP_piece 0x4



More information about the llvm-commits mailing list