[llvm] [BOLT][DWARF] Fix invalid address ranges (PR #71474)

Maksim Panchenko via llvm-commits llvm-commits at lists.llvm.org
Mon Nov 6 17:39:57 PST 2023


https://github.com/maksfb created https://github.com/llvm/llvm-project/pull/71474

When NOP instructions are removed by BOLT and a DWARF address range falls past the removed instructions, it may lead to invalid DWARF ranges in the output binary. E.g. the range may fall outside of the basic block boundaries.

This fix makes sure the modified range fits within the containing basic block. A proper fix requires tracking instructions within the block and will come in a different PR.

>From afc1c0df7aec711fa5b45eb6f7a99f25ec684caa Mon Sep 17 00:00:00 2001
From: Maksim Panchenko <maks at fb.com>
Date: Mon, 6 Nov 2023 16:08:13 -0800
Subject: [PATCH] [BOLT][DWARF] Fix invalid address ranges

Summary:
When NOP instructions are removed by BOLT and a DWARF address range
falls past the removed instructions, it may lead to invalid DWARF ranges
in the output binary. E.g. the range may fall outside of the basic block
boundaries.

This fix makes sure the modified range fits within the containing basic
block. A proper fix requires tracking instructions within the block and
will come in a different PR.
---
 bolt/lib/Core/BinaryFunction.cpp   |   2 +-
 bolt/test/X86/dwarf-inline-range.s | 393 +++++++++++++++++++++++++++++
 2 files changed, 394 insertions(+), 1 deletion(-)
 create mode 100644 bolt/test/X86/dwarf-inline-range.s

diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp
index 61845a7711c9b2c..97683b3b2ca7128 100644
--- a/bolt/lib/Core/BinaryFunction.cpp
+++ b/bolt/lib/Core/BinaryFunction.cpp
@@ -4306,7 +4306,7 @@ BinaryFunction::translateInputToOutputRange(DebugAddressRange InRange) const {
     // block boundaries.
     auto translateBlockOffset = [&](const uint64_t Offset) {
       const uint64_t OutAddress = BB.getOutputAddressRange().first + Offset;
-      return OutAddress;
+      return std::min(OutAddress, BB.getOutputAddressRange().second);
     };
 
     uint64_t OutLowPC = BB.getOutputAddressRange().first;
diff --git a/bolt/test/X86/dwarf-inline-range.s b/bolt/test/X86/dwarf-inline-range.s
new file mode 100644
index 000000000000000..6cf4203cf5972e1
--- /dev/null
+++ b/bolt/test/X86/dwarf-inline-range.s
@@ -0,0 +1,393 @@
+## Use llvm-dwarfdump to check the integrity of the inlined function "bar"
+## DWARF range after llvm-bolt removes a 6-byte nop instruction.
+##
+## If the range is not properly updated, it will exceed the range of the
+## containing function causing llvm-dwarfdump to issue an error.
+
+# CHECK-NOT: error: DIE address ranges are not contained in its parent's ranges
+
+# REQUIRES: system-linux
+
+# RUN: %clang++ %cflags -gdwarf-4 %s -o %t.exe -Wl,-q
+# RUN: llvm-bolt %t.exe -o %t.bolt --update-debug-sections
+# RUN: llvm-dwarfdump --verify %t.bolt | FileCheck %s
+
+
+# Test compiled with "-O2 -g" from:
+#
+# unsigned long bar(unsigned long i) {
+#   asm volatile("nopw    %cs:(%rax,%rax)");
+#   return ++i;
+# }
+#
+# int main(int argc, char **argv) {
+#   bar(argc);
+#   return 0;
+# }
+
+	.text
+	.file	"dwarf-inline-range.cpp"
+	.globl	_Z3barm                         # -- Begin function _Z3barm
+	.p2align	4, 0x90
+	.type	_Z3barm, at function
+_Z3barm:                                # @_Z3barm
+.Lfunc_begin0:
+	.file	1 "." "dwarf-inline-range.cpp"
+	.loc	1 1 0                           # dwarf-inline-range.cpp:1:0
+	.cfi_startproc
+# %bb.0:
+	#DEBUG_VALUE: bar:i <- $rdi
+	.loc	1 2 3 prologue_end              # dwarf-inline-range.cpp:2:3
+	#APP
+	nopw	%cs:(%rax,%rax)
+	#NO_APP
+	.loc	1 3 10                          # dwarf-inline-range.cpp:3:10
+	leaq	1(%rdi), %rax
+.Ltmp0:
+	#DEBUG_VALUE: bar:i <- $rax
+	.loc	1 3 3 is_stmt 0                 # dwarf-inline-range.cpp:3:3
+	retq
+.Ltmp1:
+.Lfunc_end0:
+	.size	_Z3barm, .Lfunc_end0-_Z3barm
+	.cfi_endproc
+                                        # -- End function
+	.globl	main                            # -- Begin function main
+	.p2align	4, 0x90
+	.type	main, at function
+main:                                   # @main
+.Lfunc_begin1:
+	.loc	1 6 0 is_stmt 1                 # dwarf-inline-range.cpp:6:0
+	.cfi_startproc
+# %bb.0:
+	#DEBUG_VALUE: main:argc <- $edi
+	#DEBUG_VALUE: main:argv <- $rsi
+	#DEBUG_VALUE: bar:i <- [DW_OP_LLVM_convert 32 5, DW_OP_LLVM_convert 64 5, DW_OP_stack_value] $edi
+	.loc	1 2 3 prologue_end              # dwarf-inline-range.cpp:2:3
+	#APP
+	nopw	%cs:(%rax,%rax)
+	#NO_APP
+.Ltmp2:
+	#DEBUG_VALUE: bar:i <- [DW_OP_LLVM_convert 32 5, DW_OP_LLVM_convert 64 5, DW_OP_plus_uconst 1, DW_OP_stack_value] undef
+	.loc	1 8 3                           # dwarf-inline-range.cpp:8:3
+	xorl	%eax, %eax
+	retq
+.Ltmp3:
+.Lfunc_end1:
+	.size	main, .Lfunc_end1-main
+	.cfi_endproc
+                                        # -- End function
+	.section	.debug_loc,"", at progbits
+.Ldebug_loc0:
+	.quad	.Lfunc_begin0-.Lfunc_begin0
+	.quad	.Ltmp0-.Lfunc_begin0
+	.short	1                               # Loc expr size
+	.byte	85                              # DW_OP_reg5
+	.quad	.Ltmp0-.Lfunc_begin0
+	.quad	.Lfunc_end0-.Lfunc_begin0
+	.short	1                               # Loc expr size
+	.byte	80                              # DW_OP_reg0
+	.quad	0
+	.quad	0
+.Ldebug_loc1:
+	.quad	.Lfunc_begin1-.Lfunc_begin0
+	.quad	.Ltmp2-.Lfunc_begin0
+	.short	21                              # Loc expr size
+	.byte	117                             # DW_OP_breg5
+	.byte	0                               # 0
+	.byte	16                              # DW_OP_constu
+	.byte	255                             # 4294967295
+	.byte	255                             # 
+	.byte	255                             # 
+	.byte	255                             # 
+	.byte	15                              # 
+	.byte	26                              # DW_OP_and
+	.byte	18                              # DW_OP_dup
+	.byte	16                              # DW_OP_constu
+	.byte	31                              # 31
+	.byte	37                              # DW_OP_shr
+	.byte	48                              # DW_OP_lit0
+	.byte	32                              # DW_OP_not
+	.byte	30                              # DW_OP_mul
+	.byte	16                              # DW_OP_constu
+	.byte	32                              # 32
+	.byte	36                              # DW_OP_shl
+	.byte	33                              # DW_OP_or
+	.byte	159                             # DW_OP_stack_value
+	.quad	0
+	.quad	0
+	.section	.debug_abbrev,"", at progbits
+	.byte	1                               # Abbreviation Code
+	.byte	17                              # DW_TAG_compile_unit
+	.byte	1                               # DW_CHILDREN_yes
+	.byte	37                              # DW_AT_producer
+	.byte	14                              # DW_FORM_strp
+	.byte	19                              # DW_AT_language
+	.byte	5                               # DW_FORM_data2
+	.byte	3                               # DW_AT_name
+	.byte	14                              # DW_FORM_strp
+	.byte	16                              # DW_AT_stmt_list
+	.byte	23                              # DW_FORM_sec_offset
+	.byte	27                              # DW_AT_comp_dir
+	.byte	14                              # DW_FORM_strp
+	.byte	17                              # DW_AT_low_pc
+	.byte	1                               # DW_FORM_addr
+	.byte	18                              # DW_AT_high_pc
+	.byte	6                               # DW_FORM_data4
+	.byte	0                               # EOM(1)
+	.byte	0                               # EOM(2)
+	.byte	2                               # Abbreviation Code
+	.byte	46                              # DW_TAG_subprogram
+	.byte	1                               # DW_CHILDREN_yes
+	.byte	17                              # DW_AT_low_pc
+	.byte	1                               # DW_FORM_addr
+	.byte	18                              # DW_AT_high_pc
+	.byte	6                               # DW_FORM_data4
+	.byte	64                              # DW_AT_frame_base
+	.byte	24                              # DW_FORM_exprloc
+	.ascii	"\227B"                         # DW_AT_GNU_all_call_sites
+	.byte	25                              # DW_FORM_flag_present
+	.byte	49                              # DW_AT_abstract_origin
+	.byte	19                              # DW_FORM_ref4
+	.byte	0                               # EOM(1)
+	.byte	0                               # EOM(2)
+	.byte	3                               # Abbreviation Code
+	.byte	5                               # DW_TAG_formal_parameter
+	.byte	0                               # DW_CHILDREN_no
+	.byte	2                               # DW_AT_location
+	.byte	23                              # DW_FORM_sec_offset
+	.byte	49                              # DW_AT_abstract_origin
+	.byte	19                              # DW_FORM_ref4
+	.byte	0                               # EOM(1)
+	.byte	0                               # EOM(2)
+	.byte	4                               # Abbreviation Code
+	.byte	46                              # DW_TAG_subprogram
+	.byte	1                               # DW_CHILDREN_yes
+	.byte	110                             # DW_AT_linkage_name
+	.byte	14                              # DW_FORM_strp
+	.byte	3                               # DW_AT_name
+	.byte	14                              # DW_FORM_strp
+	.byte	58                              # DW_AT_decl_file
+	.byte	11                              # DW_FORM_data1
+	.byte	59                              # DW_AT_decl_line
+	.byte	11                              # DW_FORM_data1
+	.byte	73                              # DW_AT_type
+	.byte	19                              # DW_FORM_ref4
+	.byte	63                              # DW_AT_external
+	.byte	25                              # DW_FORM_flag_present
+	.byte	32                              # DW_AT_inline
+	.byte	11                              # DW_FORM_data1
+	.byte	0                               # EOM(1)
+	.byte	0                               # EOM(2)
+	.byte	5                               # Abbreviation Code
+	.byte	5                               # DW_TAG_formal_parameter
+	.byte	0                               # DW_CHILDREN_no
+	.byte	3                               # DW_AT_name
+	.byte	14                              # DW_FORM_strp
+	.byte	58                              # DW_AT_decl_file
+	.byte	11                              # DW_FORM_data1
+	.byte	59                              # DW_AT_decl_line
+	.byte	11                              # DW_FORM_data1
+	.byte	73                              # DW_AT_type
+	.byte	19                              # DW_FORM_ref4
+	.byte	0                               # EOM(1)
+	.byte	0                               # EOM(2)
+	.byte	6                               # Abbreviation Code
+	.byte	36                              # DW_TAG_base_type
+	.byte	0                               # DW_CHILDREN_no
+	.byte	3                               # DW_AT_name
+	.byte	14                              # DW_FORM_strp
+	.byte	62                              # DW_AT_encoding
+	.byte	11                              # DW_FORM_data1
+	.byte	11                              # DW_AT_byte_size
+	.byte	11                              # DW_FORM_data1
+	.byte	0                               # EOM(1)
+	.byte	0                               # EOM(2)
+	.byte	7                               # Abbreviation Code
+	.byte	46                              # DW_TAG_subprogram
+	.byte	1                               # DW_CHILDREN_yes
+	.byte	17                              # DW_AT_low_pc
+	.byte	1                               # DW_FORM_addr
+	.byte	18                              # DW_AT_high_pc
+	.byte	6                               # DW_FORM_data4
+	.byte	64                              # DW_AT_frame_base
+	.byte	24                              # DW_FORM_exprloc
+	.ascii	"\227B"                         # DW_AT_GNU_all_call_sites
+	.byte	25                              # DW_FORM_flag_present
+	.byte	3                               # DW_AT_name
+	.byte	14                              # DW_FORM_strp
+	.byte	58                              # DW_AT_decl_file
+	.byte	11                              # DW_FORM_data1
+	.byte	59                              # DW_AT_decl_line
+	.byte	11                              # DW_FORM_data1
+	.byte	73                              # DW_AT_type
+	.byte	19                              # DW_FORM_ref4
+	.byte	63                              # DW_AT_external
+	.byte	25                              # DW_FORM_flag_present
+	.byte	0                               # EOM(1)
+	.byte	0                               # EOM(2)
+	.byte	8                               # Abbreviation Code
+	.byte	5                               # DW_TAG_formal_parameter
+	.byte	0                               # DW_CHILDREN_no
+	.byte	2                               # DW_AT_location
+	.byte	24                              # DW_FORM_exprloc
+	.byte	3                               # DW_AT_name
+	.byte	14                              # DW_FORM_strp
+	.byte	58                              # DW_AT_decl_file
+	.byte	11                              # DW_FORM_data1
+	.byte	59                              # DW_AT_decl_line
+	.byte	11                              # DW_FORM_data1
+	.byte	73                              # DW_AT_type
+	.byte	19                              # DW_FORM_ref4
+	.byte	0                               # EOM(1)
+	.byte	0                               # EOM(2)
+	.byte	9                               # Abbreviation Code
+	.byte	29                              # DW_TAG_inlined_subroutine
+	.byte	1                               # DW_CHILDREN_yes
+	.byte	49                              # DW_AT_abstract_origin
+	.byte	19                              # DW_FORM_ref4
+	.byte	17                              # DW_AT_low_pc
+	.byte	1                               # DW_FORM_addr
+	.byte	18                              # DW_AT_high_pc
+	.byte	6                               # DW_FORM_data4
+	.byte	88                              # DW_AT_call_file
+	.byte	11                              # DW_FORM_data1
+	.byte	89                              # DW_AT_call_line
+	.byte	11                              # DW_FORM_data1
+	.byte	87                              # DW_AT_call_column
+	.byte	11                              # DW_FORM_data1
+	.byte	0                               # EOM(1)
+	.byte	0                               # EOM(2)
+	.byte	10                              # Abbreviation Code
+	.byte	15                              # DW_TAG_pointer_type
+	.byte	0                               # DW_CHILDREN_no
+	.byte	73                              # DW_AT_type
+	.byte	19                              # DW_FORM_ref4
+	.byte	0                               # EOM(1)
+	.byte	0                               # EOM(2)
+	.byte	0                               # EOM(3)
+	.section	.debug_info,"", at progbits
+.Lcu_begin0:
+	.long	.Ldebug_info_end0-.Ldebug_info_start0 # Length of Unit
+.Ldebug_info_start0:
+	.short	4                               # DWARF version number
+	.long	.debug_abbrev                   # Offset Into Abbrev. Section
+	.byte	8                               # Address Size (in bytes)
+	.byte	1                               # Abbrev [1] 0xb:0xca DW_TAG_compile_unit
+	.long	.Linfo_string0                  # DW_AT_producer
+	.short	33                              # DW_AT_language
+	.long	.Linfo_string1                  # DW_AT_name
+	.long	.Lline_table_start0             # DW_AT_stmt_list
+	.long	.Linfo_string2                  # DW_AT_comp_dir
+	.quad	.Lfunc_begin0                   # DW_AT_low_pc
+	.long	.Lfunc_end1-.Lfunc_begin0       # DW_AT_high_pc
+	.byte	2                               # Abbrev [2] 0x2a:0x1d DW_TAG_subprogram
+	.quad	.Lfunc_begin0                   # DW_AT_low_pc
+	.long	.Lfunc_end0-.Lfunc_begin0       # DW_AT_high_pc
+	.byte	1                               # DW_AT_frame_base
+	.byte	87
+                                        # DW_AT_GNU_all_call_sites
+	.long	71                              # DW_AT_abstract_origin
+	.byte	3                               # Abbrev [3] 0x3d:0x9 DW_TAG_formal_parameter
+	.long	.Ldebug_loc0                    # DW_AT_location
+	.long	87                              # DW_AT_abstract_origin
+	.byte	0                               # End Of Children Mark
+	.byte	4                               # Abbrev [4] 0x47:0x1c DW_TAG_subprogram
+	.long	.Linfo_string3                  # DW_AT_linkage_name
+	.long	.Linfo_string4                  # DW_AT_name
+	.byte	1                               # DW_AT_decl_file
+	.byte	1                               # DW_AT_decl_line
+	.long	99                              # DW_AT_type
+                                        # DW_AT_external
+	.byte	1                               # DW_AT_inline
+	.byte	5                               # Abbrev [5] 0x57:0xb DW_TAG_formal_parameter
+	.long	.Linfo_string6                  # DW_AT_name
+	.byte	1                               # DW_AT_decl_file
+	.byte	1                               # DW_AT_decl_line
+	.long	99                              # DW_AT_type
+	.byte	0                               # End Of Children Mark
+	.byte	6                               # Abbrev [6] 0x63:0x7 DW_TAG_base_type
+	.long	.Linfo_string5                  # DW_AT_name
+	.byte	7                               # DW_AT_encoding
+	.byte	8                               # DW_AT_byte_size
+	.byte	7                               # Abbrev [7] 0x6a:0x52 DW_TAG_subprogram
+	.quad	.Lfunc_begin1                   # DW_AT_low_pc
+	.long	.Lfunc_end1-.Lfunc_begin1       # DW_AT_high_pc
+	.byte	1                               # DW_AT_frame_base
+	.byte	87
+                                        # DW_AT_GNU_all_call_sites
+	.long	.Linfo_string7                  # DW_AT_name
+	.byte	1                               # DW_AT_decl_file
+	.byte	6                               # DW_AT_decl_line
+	.long	188                             # DW_AT_type
+                                        # DW_AT_external
+	.byte	8                               # Abbrev [8] 0x83:0xd DW_TAG_formal_parameter
+	.byte	1                               # DW_AT_location
+	.byte	85
+	.long	.Linfo_string9                  # DW_AT_name
+	.byte	1                               # DW_AT_decl_file
+	.byte	6                               # DW_AT_decl_line
+	.long	188                             # DW_AT_type
+	.byte	8                               # Abbrev [8] 0x90:0xd DW_TAG_formal_parameter
+	.byte	1                               # DW_AT_location
+	.byte	84
+	.long	.Linfo_string10                 # DW_AT_name
+	.byte	1                               # DW_AT_decl_file
+	.byte	6                               # DW_AT_decl_line
+	.long	195                             # DW_AT_type
+	.byte	9                               # Abbrev [9] 0x9d:0x1e DW_TAG_inlined_subroutine
+	.long	71                              # DW_AT_abstract_origin
+	.quad	.Lfunc_begin1                   # DW_AT_low_pc
+	.long	.Ltmp2-.Lfunc_begin1            # DW_AT_high_pc
+	.byte	1                               # DW_AT_call_file
+	.byte	7                               # DW_AT_call_line
+	.byte	3                               # DW_AT_call_column
+	.byte	3                               # Abbrev [3] 0xb1:0x9 DW_TAG_formal_parameter
+	.long	.Ldebug_loc1                    # DW_AT_location
+	.long	87                              # DW_AT_abstract_origin
+	.byte	0                               # End Of Children Mark
+	.byte	0                               # End Of Children Mark
+	.byte	6                               # Abbrev [6] 0xbc:0x7 DW_TAG_base_type
+	.long	.Linfo_string8                  # DW_AT_name
+	.byte	5                               # DW_AT_encoding
+	.byte	4                               # DW_AT_byte_size
+	.byte	10                              # Abbrev [10] 0xc3:0x5 DW_TAG_pointer_type
+	.long	200                             # DW_AT_type
+	.byte	10                              # Abbrev [10] 0xc8:0x5 DW_TAG_pointer_type
+	.long	205                             # DW_AT_type
+	.byte	6                               # Abbrev [6] 0xcd:0x7 DW_TAG_base_type
+	.long	.Linfo_string11                 # DW_AT_name
+	.byte	6                               # DW_AT_encoding
+	.byte	1                               # DW_AT_byte_size
+	.byte	0                               # End Of Children Mark
+.Ldebug_info_end0:
+	.section	.debug_str,"MS", at progbits,1
+.Linfo_string0:
+	.asciz	"clang version 15"
+.Linfo_string1:
+	.asciz	"dwarf-inline-range.cpp"        # string offset=69
+.Linfo_string2:
+	.asciz	"."           # string offset=92
+.Linfo_string3:
+	.asciz	"_Z3barm"                       # string offset=112
+.Linfo_string4:
+	.asciz	"bar"                           # string offset=120
+.Linfo_string5:
+	.asciz	"unsigned long"                 # string offset=124
+.Linfo_string6:
+	.asciz	"i"                             # string offset=138
+.Linfo_string7:
+	.asciz	"main"                          # string offset=140
+.Linfo_string8:
+	.asciz	"int"                           # string offset=145
+.Linfo_string9:
+	.asciz	"argc"                          # string offset=149
+.Linfo_string10:
+	.asciz	"argv"                          # string offset=154
+.Linfo_string11:
+	.asciz	"char"                          # string offset=159
+	.ident	"clang version 15"
+	.section	".note.GNU-stack","", at progbits
+	.addrsig
+	.section	.debug_line,"", at progbits
+.Lline_table_start0:



More information about the llvm-commits mailing list