[llvm] [SelectionDAG] Salvage debuginfo when combining load and sext instrs. (PR #169779)

Shubham Sandeep Rastogi via llvm-commits llvm-commits at lists.llvm.org
Wed Dec 10 12:18:17 PST 2025


https://github.com/rastogishubham updated https://github.com/llvm/llvm-project/pull/169779

>From 172f4dfa9186179eec8219a468d2dde625f8c2fb Mon Sep 17 00:00:00 2001
From: Shubham Sandeep Rastogi <Shubham.Rastogi at sony.com>
Date: Wed, 26 Nov 2025 10:02:47 -0800
Subject: [PATCH] [SelectionDAG]Salvage debuginfo when combining load and sext
 instrs.

SelectionDAG uses the DAGCombiner to fold a load followed by a sext to
a load and sext instruction. For example, in x86 we will see that

%1 = load i32, ptr @GlobArr
  #dbg_value(i32 %1, !43, !DIExpression(), !52)
%2 = sext i32 %1 to i64, !dbg !53

is converted to:

%0:gr64_nosp = MOVSX64rm32 $rip, 1, $noreg, @GlobArr, $noreg,
debug-instr-number 1, debug-location !51
DBG_VALUE $noreg, $noreg, !"Idx", !DIExpression(), debug-location !52

The DBG_VALUE needs to be transferred correctly to the new combined
instruction, and it needs to be appended with a DIExpression which
contains a DW_OP_LLVM_fragment, describing that the lower bits of the
virtual register contain the value.

This patch fixes the above described problem.
---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 41 ++++++++++-
 .../X86/selectionDAG-load-sext-trunc.ll       | 70 +++++++++++++++++++
 .../DebugInfo/X86/selectionDAG-load-sext.ll   | 61 ++++++++++++++++
 3 files changed, 170 insertions(+), 2 deletions(-)
 create mode 100644 llvm/test/DebugInfo/X86/selectionDAG-load-sext-trunc.ll
 create mode 100644 llvm/test/DebugInfo/X86/selectionDAG-load-sext.ll

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 6a99d4e29b64f..569ab4cfa3efe 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -51,6 +51,7 @@
 #include "llvm/IR/Attributes.h"
 #include "llvm/IR/Constant.h"
 #include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfoMetadata.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Metadata.h"
@@ -78,6 +79,7 @@
 #include <variant>
 
 #include "MatchContext.h"
+#include "SDNodeDbgValue.h"
 
 using namespace llvm;
 using namespace llvm::SDPatternMatch;
@@ -14465,10 +14467,44 @@ static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner,
                                    LN0->getBasePtr(), N0.getValueType(),
                                    LN0->getMemOperand());
   Combiner.ExtendSetCCUses(SetCCs, N0, ExtLoad, ExtOpc);
+  unsigned Opcode = N->getOpcode();
+  bool IsSigned = Opcode == ISD::SIGN_EXTEND;
   // If the load value is used only by N, replace it via CombineTo N.
-  bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
-  Combiner.CombineTo(N, ExtLoad);
+  SDValue OldLoadVal(LN0, 0);
+  SDValue OldExtValue(N, 0);
+  bool NoReplaceTrunc = OldLoadVal.hasOneUse();
+
+  // Because we are replacing a load and a s|z ext with a load-s|z ext
+  // instruction, the dbg_value attached to the load will be of a smaller bit
+  // width, and we have to add a DW_OP_LLVM_convert expression to get the
+  // correct size.
+  auto SalvageToOldLoadSize = [&](SDValue From, SDValue To, bool IsSigned) {
+    for (SDDbgValue *Dbg : DAG.GetDbgValues(From.getNode())) {
+      unsigned VarBitsFrom = From->getValueSizeInBits(0);
+      unsigned VarBitsTo = To->getValueSizeInBits(0);
+
+      // Build a convert expression for the s|z extend.
+      const DIExpression *OldE = Dbg->getExpression();
+      auto *NewE =
+          DIExpression::appendExt(OldE, VarBitsFrom, VarBitsTo, IsSigned);
+
+      // Create a new SDDbgValue that points at the widened node with the
+      // fragment.
+      Dbg->setIsInvalidated();
+      Dbg->setIsEmitted();
+      SDDbgValue *NewDV = DAG.getDbgValue(
+          Dbg->getVariable(), NewE, To.getNode(), To.getResNo(),
+          Dbg->isIndirect(), Dbg->getDebugLoc(), Dbg->getOrder());
+      DAG.AddDbgValue(NewDV, /*isParametet*/ false);
+    }
+  };
+
   if (NoReplaceTrunc) {
+    if (LN0->getHasDebugValue())
+      SalvageToOldLoadSize(OldLoadVal, ExtLoad, IsSigned);
+
+    if (N->getHasDebugValue())
+      DAG.transferDbgValues(OldExtValue, ExtLoad);
     DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
     Combiner.recursivelyDeleteUnusedNodes(LN0);
   } else {
@@ -14476,6 +14512,7 @@ static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner,
         DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
     Combiner.CombineTo(LN0, Trunc, ExtLoad.getValue(1));
   }
+  Combiner.CombineTo(N, ExtLoad);
   return SDValue(N, 0); // Return N so it doesn't get rechecked!
 }
 
diff --git a/llvm/test/DebugInfo/X86/selectionDAG-load-sext-trunc.ll b/llvm/test/DebugInfo/X86/selectionDAG-load-sext-trunc.ll
new file mode 100644
index 0000000000000..46afb9c598a74
--- /dev/null
+++ b/llvm/test/DebugInfo/X86/selectionDAG-load-sext-trunc.ll
@@ -0,0 +1,70 @@
+; This test checks that after SelectionDAG runs, it preserves the debug info that is lost due to the DAGCombiner combining a load and a sext instruction, where the #dbg_value is pointing to the result of the load.
+; However, in this case, the load has multiple uses.
+
+; RUN: llc %s -mtriple=x86_64-unkown-linux -start-before=x86-isel -stop-after=x86-isel -o - | FileCheck %s --check-prefix=MIR
+; RUN: llc -O2 %s -start-before=x86-isel -mtriple=x86_64-unkown-linux --filetype=obj -o %t.o 
+; RUN: llvm-dwarfdump %t.o --name Idx | FileCheck %s --check-prefix=DUMP
+; RUN: llvm-dwarfdump %t.o --name Idx2 | FileCheck %s --check-prefix=DUMP2
+
+; MIR: ![[IDX:[0-9]+]] = !DILocalVariable(name: "Idx"
+; MIR: ![[IDX2:[0-9]+]] = !DILocalVariable(name: "Idx2"
+; MIR: name: _Z8useValuei
+; MIR: name: main
+; MIR: debugValueSubstitutions
+; MIR-NEXT: - { srcinst: [[INSTR_NUM2:[0-9]+]], srcop: 0, dstinst: [[INSTR_NUM:[0-9]+]], dstop: 0, subreg: 6 }
+; MIR-LABEL: bb.0 (%ir-block.0)
+; MIR: %{{[0-9a-f]+}}{{.*}} = MOVSX64rm32 ${{.*}}, 1, $noreg, @GlobArr, $noreg, debug-instr-number [[INSTR_NUM]]
+; MIR-NEXT: {{.*}} = COPY %0.sub_32bit
+; MIR-NEXT DBG_INSTR_REF ![[IDX]], !DIExpression(DW_OP_LLVM_arg, 0), dbg-instr-ref([[INSTR_NUM2]], 0)
+; MIR-NEXT DBG_INSTR_REF ![[IDX2]], !DIExpression(DW_OP_LLVM_arg, 0), dbg-instr-ref([[INSTR_NUM]], 0)
+
+; DUMP: DW_AT_location	(indexed ({{[0-9a-f]+}}x{{[0-9a-f]+}}) loclist = 0x{{[0-9a-f]+}}: 
+; DUMP-NEXT: [0x{{[0-9a-f]+}}, 0x{{[0-9a-f]+}}):  DW_OP_reg3 RBX)
+
+; DUMP2: DW_AT_location	(indexed ({{[0-9a-f]+}}x{{[0-9a-f]+}}) loclist = 0x{{[0-9a-f]+}}: 
+; DUMP2-NEXT: [0x{{[0-9a-f]+}}, 0x{{[0-9a-f]+}}):  DW_OP_reg3 RBX)
+
+
+
+  @GlobArr = dso_local local_unnamed_addr global [5 x i32] [i32 1, i32 1, i32 2, i32 3, i32 5], align 16, !dbg !0
+  @__const.main.Data = private unnamed_addr constant [7 x i32] [i32 10, i32 20, i32 30, i32 40, i32 50, i32 60, i32 70], align 16
+  define dso_local void @_Z8useValuei(i32 noundef %0) local_unnamed_addr #0 !dbg !22 {
+    ret void, !dbg !28
+  }
+  define dso_local noundef i32 @main() local_unnamed_addr #1 !dbg !29 {
+    %1 = load i32, ptr @GlobArr
+      #dbg_value(i32 %1, !43, !DIExpression(), !52)
+    %2 = sext i32 %1 to i64
+      #dbg_value(i64 %2, !57, !DIExpression(), !52)
+    tail call void @_Z8useValuei(i32 noundef %1), !dbg !56
+    %3 = getelementptr inbounds i32, ptr @__const.main.Data, i64 %2
+    %4 = load i32, ptr %3
+    tail call void @_Z8useValuei(i32 noundef %4), !dbg !56
+    ret i32 0
+  }
+    !llvm.dbg.cu = !{!2}  
+  !llvm.module.flags = !{!10, !11, !16}
+  !0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression())
+  !1 = distinct !DIGlobalVariable(type: !6, isDefinition: true)
+  !2 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !3, emissionKind: FullDebug, nameTableKind: None)
+  !3 = !DIFile(filename: "/tmp/test.cpp", directory: "/Users/srastogi/Development/llvm-project/build_ninja", checksumkind: CSK_MD5, checksum: "0fe735937e606b4db3e3b2e9253eff90")
+  !6 = !DICompositeType(tag: DW_TAG_array_type, elements: !8)
+  !7 = !DIBasicType()
+  !8 = !{}
+  !10 = !{i32 7, !"Dwarf Version", i32 5}
+  !11 = !{i32 2, !"Debug Info Version", i32 3}
+  !16 = !{i32 7, !"debug-info-assignment-tracking", i1 true}
+  !22 = distinct !DISubprogram(type: !23, unit: !2, keyInstructions: true)
+  !23 = !DISubroutineType(types: !24)
+  !24 = !{}
+  !28 = !DILocation(scope: !22, atomRank: 1)
+  !29 = distinct !DISubprogram(type: !30, unit: !2, keyInstructions: true)
+  !30 = !DISubroutineType(types: !31)
+  !31 = !{}
+  !38 = distinct !DILexicalBlock(scope: !29, line: 5, column: 3)
+  !43 = !DILocalVariable(name: "Idx", scope: !44, type: !7)
+  !44 = distinct !DILexicalBlock(scope: !38, line: 5, column: 3)
+  !46 = distinct !DILexicalBlock(scope: !44, line: 5, column: 27)
+  !52 = !DILocation(scope: !44)
+  !56 = !DILocation(scope: !46)
+  !57 = !DILocalVariable(name: "Idx2", scope: !44, type: !7)
diff --git a/llvm/test/DebugInfo/X86/selectionDAG-load-sext.ll b/llvm/test/DebugInfo/X86/selectionDAG-load-sext.ll
new file mode 100644
index 0000000000000..7e61780a6ab13
--- /dev/null
+++ b/llvm/test/DebugInfo/X86/selectionDAG-load-sext.ll
@@ -0,0 +1,61 @@
+; This test checks that after SelectionDAG runs, it preserves the debug info that is lost due to the DAGCombiner combining a load and a sext instruction, where the #dbg_value is pointing to the result of the load.
+; RUN: llc %s -mtriple=x86_64-unkown-linux -start-before=x86-isel -stop-after=x86-isel -o - | FileCheck %s --check-prefix=MIR
+; RUN: llc -O2 %s -start-before=x86-isel -mtriple=x86_64-unkown-linux --filetype=obj -o %t.o 
+; RUN: llvm-dwarfdump %t.o --name Idx | FileCheck %s --check-prefix=DUMP
+; RUN: llvm-dwarfdump %t.o --name Idx2 | FileCheck %s --check-prefix=DUMP2
+
+; MIR: ![[IDX:[0-9]+]] = !DILocalVariable(name: "Idx"
+; MIR: ![[IDX2:[0-9]+]] = !DILocalVariable(name: "Idx2"
+; MIR-LABEL: bb.0
+; MIR: %{{[0-9a-f]+}}{{.*}} = MOVSX64rm32 ${{.*}}, 1, $noreg, @GlobArr, $noreg, debug-instr-number [[INSTR_NUM:[0-9]+]]
+; MIR-NEXT: DBG_INSTR_REF ![[IDX]], !DIExpression(DW_OP_LLVM_arg, 0,  DW_OP_LLVM_convert, 32, DW_ATE_signed, DW_OP_LLVM_convert, 64, DW_ATE_signed, DW_OP_stack_value), dbg-instr-ref([[INSTR_NUM]], 0)
+; MIR-NEXT: DBG_INSTR_REF ![[IDX2]], !DIExpression(DW_OP_LLVM_arg, 0), dbg-instr-ref([[INSTR_NUM]], 0)
+
+; DUMP: DW_AT_location	(indexed ({{[0-9a-f]+}}x{{[0-9a-f]+}}) loclist = 0x{{[0-9a-f]+}}: 
+; DUMP-NEXT: [0x{{[0-9a-f]+}}, 0x{{[0-9a-f]+}}): DW_OP_breg0 RAX+0, DW_OP_convert (0x{{[0-9a-f]+}}) "DW_ATE_signed_32", DW_OP_convert (0x{{[0-9a-f]+}}) "DW_ATE_signed_64", DW_OP_stack_value)
+
+; DUMP2: DW_AT_location	(indexed ({{[0-9a-f]+}}x{{[0-9a-f]+}}) loclist = 0x{{[0-9a-f]+}}: 
+; DUMP2-NEXT: [0x{{[0-9a-f]+}}, 0x{{[0-9a-f]+}}): DW_OP_reg0 RAX)
+
+
+  @GlobArr = dso_local local_unnamed_addr global [5 x i32] [i32 1, i32 1, i32 2, i32 3, i32 5], align 16, !dbg !0
+  @__const.main.Data = private unnamed_addr constant [7 x i32] [i32 10, i32 20, i32 30, i32 40, i32 50, i32 60, i32 70], align 16
+  define dso_local void @_Z8useValuei(i32 noundef %0) local_unnamed_addr #0 !dbg !22 {
+    ret void, !dbg !28
+  }
+  define dso_local noundef i32 @main() local_unnamed_addr #1 !dbg !29 {
+    %1 = load i32, ptr @GlobArr
+      #dbg_value(i32 %1, !43, !DIExpression(), !52)
+    %2 = sext i32 %1 to i64
+      #dbg_value(i64 %2, !57, !DIExpression(), !52)
+    %3 = getelementptr inbounds i32, ptr @__const.main.Data, i64 %2
+    %4 = load i32, ptr %3
+    tail call void @_Z8useValuei(i32 noundef %4), !dbg !56
+    ret i32 0
+  }
+    !llvm.dbg.cu = !{!2}  
+  !llvm.module.flags = !{!10, !11, !16}
+  !0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression())
+  !1 = distinct !DIGlobalVariable(type: !6, isDefinition: true)
+  !2 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !3, emissionKind: FullDebug, nameTableKind: None)
+  !3 = !DIFile(filename: "/tmp/test.cpp", directory: "/Users/srastogi/Development/llvm-project/build_ninja", checksumkind: CSK_MD5, checksum: "0fe735937e606b4db3e3b2e9253eff90")
+  !6 = !DICompositeType(tag: DW_TAG_array_type, elements: !8)
+  !7 = !DIBasicType()
+  !8 = !{}
+  !10 = !{i32 7, !"Dwarf Version", i32 5}
+  !11 = !{i32 2, !"Debug Info Version", i32 3}
+  !16 = !{i32 7, !"debug-info-assignment-tracking", i1 true}
+  !22 = distinct !DISubprogram(type: !23, unit: !2, keyInstructions: true)
+  !23 = !DISubroutineType(types: !24)
+  !24 = !{}
+  !28 = !DILocation(scope: !22, atomRank: 1)
+  !29 = distinct !DISubprogram(type: !30, unit: !2, keyInstructions: true)
+  !30 = !DISubroutineType(types: !31)
+  !31 = !{}
+  !38 = distinct !DILexicalBlock(scope: !29, line: 5, column: 3)
+  !43 = !DILocalVariable(name: "Idx", scope: !44, type: !7)
+  !44 = distinct !DILexicalBlock(scope: !38, line: 5, column: 3)
+  !46 = distinct !DILexicalBlock(scope: !44, line: 5, column: 27)
+  !52 = !DILocation(scope: !44)
+  !56 = !DILocation(scope: !46)
+  !57 = !DILocalVariable(name: "Idx2", scope: !44, type: !7)



More information about the llvm-commits mailing list