[lld] [llvm] [Symbolizer] Support for Missing Line Numbers. (PR #82240)

Fri May 17 09:42:21 PDT 2024

+# REQUIRES: x86-registered-target
+# RUN: clang -O3 -gline-tables-only -T%S/linker-script.ld --target=x86_64-pc-linux %s -o %t.o
bd1976bris wrote:

I started with some compiler output for two.c:
int foo()
return 10;

int bar()
return 20;

Then I cleaned up the asm and added a handwritten .debug_line section:

	.section	.text.foo,"ax", at progbits
	.hidden	foo
	.globl	foo
	.type	foo, at function
	movl	$10, %eax
	.size	foo, .Lfunc_end0-foo

	.hidden	bar
	.globl	bar
	.type	bar, at function
# %bb.0:
	pushq	%rbp
	movq	%rsp, %rbp
	callq	foo
	movl	$20, %eax
	popq	%rbp
	.size	bar, .Lfunc_end1-bar

	.section	.debug_abbrev,"", at progbits
	.byte	1                               # Abbreviation Code
	.byte	17                              # DW_TAG_compile_unit
	.byte	0                               # DW_CHILDREN_no
	.byte	37                              # DW_AT_producer
	.byte	14                              # DW_FORM_strp
	.byte	19                              # DW_AT_language
	.byte	5                               # DW_FORM_data2
	.byte	3                               # DW_AT_name
	.byte	14                              # DW_FORM_strp
	.byte	16                              # DW_AT_stmt_list
	.byte	23                              # DW_FORM_sec_offset
	.byte	27                              # DW_AT_comp_dir
	.byte	14                              # DW_FORM_strp
	.byte	83                              # DW_AT_use_UTF8
	.byte	25                              # DW_FORM_flag_present
	.byte	17                              # DW_AT_low_pc
	.byte	1                               # DW_FORM_addr
	.byte	85                              # DW_AT_ranges
	.byte	23                              # DW_FORM_sec_offset
	.byte	0                               # EOM(1)
	.byte	0                               # EOM(2)
	.byte	0                               # EOM(3)
	.section	.debug_info,"", at progbits
	.long	.Ldebug_info_end0-.Ldebug_info_start0 # Length of Unit
	.short	4                               # DWARF version number
	.long	.debug_abbrev                   # Offset Into Abbrev. Section
	.byte	8                               # Address Size (in bytes)
	.byte	1                               # Abbrev [1] 0xb:0x1f DW_TAG_compile_unit
	.long	.Linfo_string0                  # DW_AT_producer
	.short	29                              # DW_AT_language
	.long	.Linfo_string1                  # DW_AT_name
	.long	.Lline_table_start0             # DW_AT_stmt_list
	.long	.Linfo_string2                  # DW_AT_comp_dir
                                        # DW_AT_use_UTF8
	.quad	0                               # DW_AT_low_pc
	.long	.Ldebug_ranges0                 # DW_AT_ranges
	.section	.debug_aranges,"", at progbits
	.section	.debug_ranges,"", at progbits
	.quad	.Lfunc_begin0
	.quad	.Lfunc_end0
	.quad	.Lfunc_begin1
	.quad	.Lfunc_end1
	.quad	0
	.quad	0
	.section	.debug_str,"MS", at progbits,1
	.asciz	"clang version 16.0.5 ---------------------------------------" # string offset=0
	.asciz	"two.c"                         # string offset=61
	.asciz	"c:\\Temp\\dwarfline"           # string offset=67
	.ident	"clang version 16.0.5 ---------------------------------------"
	.section	".note.GNU-stack","", at progbits
	.section	.debug_line,"", at progbits
        .long .Lunit_end - .Lunit_start # unit length
        .short 4   # version
        .long .Lprologue_end - .Lprologue_start # header length
	.byte 1                                # minimum_instruction_length
	.byte 1                                # maximum_operations_per_instruction
	.byte 0                                # default_is_stmt
	.byte -5                               # line_base
	.byte 14                               # line_range
	.byte 13                               # opcode_base
	.byte 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1 # arguments in standard opcodes
	.asciz "dir0"                          # include directory
	.asciz "dir1"                          # include directory
	.byte 0                                # end of include directories
	.asciz "two.c"                         # filename
	.byte 0                                # reference to dir0
	.byte 0                                # modification time
	.byte 0                                # length of file (unavailable)
	.byte 0                                # end of filenames
        .byte 0, 9, 2        # DW_LNE_set_address
        .quad 0x0            #  baz
        .byte 3              # DW_LNS_advance_line
        .sleb128 9           #  by 9 (to 10)
        .byte 1              # DW_LNS_copy
        .byte 3              # DW_LNS_advance_line
        .sleb128 -1          #  by -1 (to 9)
        .byte 2              # DW_LNS_advance_pc
        .byte 1              #  += (1 * min instruction length)
        .byte 1              # DW_LNS_copy
        .byte 3              # DW_LNS_advance_line
        .sleb128 -1          #  by -1 (to 8)
        .byte 2              # DW_LNS_advance_pc
        .byte 5              #  += (5 * min instruction length)
        .byte 0, 1, 1        # DW_LNE_end_sequence
        .byte 0, 9, 2        # DW_LNE_set_address
        .quad .Lfunc_begin1 - .Lfunc_begin0 #  wibble
        .byte 3              # DW_LNS_advance_line
        .sleb128 -1           #  by -1 (to 0)
        .byte 1              # DW_LNS_copy
        .byte 2              # DW_LNS_advance_pc
        .byte 1              #  += (1 * min instruction length)
        .byte 1              # DW_LNS_copy
        .byte 3              # DW_LNS_advance_line
        .sleb128 5           #  by 5 (to 5)
        .byte 2              # DW_LNS_advance_pc
        .byte 15             #  += (15 * min instruction length)
        .byte 0, 1, 1        # DW_LNE_end_sequence

Hopefully, that's enough to understand the idea. My reading of DWARF suggests that this is legal as there's nothing to disallow two contiguous sequences AFAICS. This might be worth checking: @pogo59 or @dwblaikie?

Note: It may be better to try yaml2obj as suggested by James.


