[llvm] [BOLT][DWARF] Fix invalid address ranges (PR #71474)
Maksim Panchenko via llvm-commits
llvm-commits at lists.llvm.org
Mon Nov 6 17:39:57 PST 2023
https://github.com/maksfb created https://github.com/llvm/llvm-project/pull/71474
When NOP instructions are removed by BOLT and a DWARF address range falls past the removed instructions, it may lead to invalid DWARF ranges in the output binary. E.g. the range may fall outside of the basic block boundaries.
This fix makes sure the modified range fits within the containing basic block. A proper fix requires tracking instructions within the block and will come in a different PR.
>From afc1c0df7aec711fa5b45eb6f7a99f25ec684caa Mon Sep 17 00:00:00 2001
From: Maksim Panchenko <maks at fb.com>
Date: Mon, 6 Nov 2023 16:08:13 -0800
Subject: [PATCH] [BOLT][DWARF] Fix invalid address ranges
Summary:
When NOP instructions are removed by BOLT and a DWARF address range
falls past the removed instructions, it may lead to invalid DWARF ranges
in the output binary. E.g. the range may fall outside of the basic block
boundaries.
This fix makes sure the modified range fits within the containing basic
block. A proper fix requires tracking instructions within the block and
will come in a different PR.
---
bolt/lib/Core/BinaryFunction.cpp | 2 +-
bolt/test/X86/dwarf-inline-range.s | 393 +++++++++++++++++++++++++++++
2 files changed, 394 insertions(+), 1 deletion(-)
create mode 100644 bolt/test/X86/dwarf-inline-range.s
diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp
index 61845a7711c9b2c..97683b3b2ca7128 100644
--- a/bolt/lib/Core/BinaryFunction.cpp
+++ b/bolt/lib/Core/BinaryFunction.cpp
@@ -4306,7 +4306,7 @@ BinaryFunction::translateInputToOutputRange(DebugAddressRange InRange) const {
// block boundaries.
auto translateBlockOffset = [&](const uint64_t Offset) {
const uint64_t OutAddress = BB.getOutputAddressRange().first + Offset;
- return OutAddress;
+ return std::min(OutAddress, BB.getOutputAddressRange().second);
};
uint64_t OutLowPC = BB.getOutputAddressRange().first;
diff --git a/bolt/test/X86/dwarf-inline-range.s b/bolt/test/X86/dwarf-inline-range.s
new file mode 100644
index 000000000000000..6cf4203cf5972e1
--- /dev/null
+++ b/bolt/test/X86/dwarf-inline-range.s
@@ -0,0 +1,393 @@
+## Use llvm-dwarfdump to check the integrity of the inlined function "bar"
+## DWARF range after llvm-bolt removes a 6-byte nop instruction.
+##
+## If the range is not properly updated, it will exceed the range of the
+## containing function causing llvm-dwarfdump to issue an error.
+
+# CHECK-NOT: error: DIE address ranges are not contained in its parent's ranges
+
+# REQUIRES: system-linux
+
+# RUN: %clang++ %cflags -gdwarf-4 %s -o %t.exe -Wl,-q
+# RUN: llvm-bolt %t.exe -o %t.bolt --update-debug-sections
+# RUN: llvm-dwarfdump --verify %t.bolt | FileCheck %s
+
+
+# Test compiled with "-O2 -g" from:
+#
+# unsigned long bar(unsigned long i) {
+# asm volatile("nopw %cs:(%rax,%rax)");
+# return ++i;
+# }
+#
+# int main(int argc, char **argv) {
+# bar(argc);
+# return 0;
+# }
+
+ .text
+ .file "dwarf-inline-range.cpp"
+ .globl _Z3barm # -- Begin function _Z3barm
+ .p2align 4, 0x90
+ .type _Z3barm, at function
+_Z3barm: # @_Z3barm
+.Lfunc_begin0:
+ .file 1 "." "dwarf-inline-range.cpp"
+ .loc 1 1 0 # dwarf-inline-range.cpp:1:0
+ .cfi_startproc
+# %bb.0:
+ #DEBUG_VALUE: bar:i <- $rdi
+ .loc 1 2 3 prologue_end # dwarf-inline-range.cpp:2:3
+ #APP
+ nopw %cs:(%rax,%rax)
+ #NO_APP
+ .loc 1 3 10 # dwarf-inline-range.cpp:3:10
+ leaq 1(%rdi), %rax
+.Ltmp0:
+ #DEBUG_VALUE: bar:i <- $rax
+ .loc 1 3 3 is_stmt 0 # dwarf-inline-range.cpp:3:3
+ retq
+.Ltmp1:
+.Lfunc_end0:
+ .size _Z3barm, .Lfunc_end0-_Z3barm
+ .cfi_endproc
+ # -- End function
+ .globl main # -- Begin function main
+ .p2align 4, 0x90
+ .type main, at function
+main: # @main
+.Lfunc_begin1:
+ .loc 1 6 0 is_stmt 1 # dwarf-inline-range.cpp:6:0
+ .cfi_startproc
+# %bb.0:
+ #DEBUG_VALUE: main:argc <- $edi
+ #DEBUG_VALUE: main:argv <- $rsi
+ #DEBUG_VALUE: bar:i <- [DW_OP_LLVM_convert 32 5, DW_OP_LLVM_convert 64 5, DW_OP_stack_value] $edi
+ .loc 1 2 3 prologue_end # dwarf-inline-range.cpp:2:3
+ #APP
+ nopw %cs:(%rax,%rax)
+ #NO_APP
+.Ltmp2:
+ #DEBUG_VALUE: bar:i <- [DW_OP_LLVM_convert 32 5, DW_OP_LLVM_convert 64 5, DW_OP_plus_uconst 1, DW_OP_stack_value] undef
+ .loc 1 8 3 # dwarf-inline-range.cpp:8:3
+ xorl %eax, %eax
+ retq
+.Ltmp3:
+.Lfunc_end1:
+ .size main, .Lfunc_end1-main
+ .cfi_endproc
+ # -- End function
+ .section .debug_loc,"", at progbits
+.Ldebug_loc0:
+ .quad .Lfunc_begin0-.Lfunc_begin0
+ .quad .Ltmp0-.Lfunc_begin0
+ .short 1 # Loc expr size
+ .byte 85 # DW_OP_reg5
+ .quad .Ltmp0-.Lfunc_begin0
+ .quad .Lfunc_end0-.Lfunc_begin0
+ .short 1 # Loc expr size
+ .byte 80 # DW_OP_reg0
+ .quad 0
+ .quad 0
+.Ldebug_loc1:
+ .quad .Lfunc_begin1-.Lfunc_begin0
+ .quad .Ltmp2-.Lfunc_begin0
+ .short 21 # Loc expr size
+ .byte 117 # DW_OP_breg5
+ .byte 0 # 0
+ .byte 16 # DW_OP_constu
+ .byte 255 # 4294967295
+ .byte 255 #
+ .byte 255 #
+ .byte 255 #
+ .byte 15 #
+ .byte 26 # DW_OP_and
+ .byte 18 # DW_OP_dup
+ .byte 16 # DW_OP_constu
+ .byte 31 # 31
+ .byte 37 # DW_OP_shr
+ .byte 48 # DW_OP_lit0
+ .byte 32 # DW_OP_not
+ .byte 30 # DW_OP_mul
+ .byte 16 # DW_OP_constu
+ .byte 32 # 32
+ .byte 36 # DW_OP_shl
+ .byte 33 # DW_OP_or
+ .byte 159 # DW_OP_stack_value
+ .quad 0
+ .quad 0
+ .section .debug_abbrev,"", at progbits
+ .byte 1 # Abbreviation Code
+ .byte 17 # DW_TAG_compile_unit
+ .byte 1 # DW_CHILDREN_yes
+ .byte 37 # DW_AT_producer
+ .byte 14 # DW_FORM_strp
+ .byte 19 # DW_AT_language
+ .byte 5 # DW_FORM_data2
+ .byte 3 # DW_AT_name
+ .byte 14 # DW_FORM_strp
+ .byte 16 # DW_AT_stmt_list
+ .byte 23 # DW_FORM_sec_offset
+ .byte 27 # DW_AT_comp_dir
+ .byte 14 # DW_FORM_strp
+ .byte 17 # DW_AT_low_pc
+ .byte 1 # DW_FORM_addr
+ .byte 18 # DW_AT_high_pc
+ .byte 6 # DW_FORM_data4
+ .byte 0 # EOM(1)
+ .byte 0 # EOM(2)
+ .byte 2 # Abbreviation Code
+ .byte 46 # DW_TAG_subprogram
+ .byte 1 # DW_CHILDREN_yes
+ .byte 17 # DW_AT_low_pc
+ .byte 1 # DW_FORM_addr
+ .byte 18 # DW_AT_high_pc
+ .byte 6 # DW_FORM_data4
+ .byte 64 # DW_AT_frame_base
+ .byte 24 # DW_FORM_exprloc
+ .ascii "\227B" # DW_AT_GNU_all_call_sites
+ .byte 25 # DW_FORM_flag_present
+ .byte 49 # DW_AT_abstract_origin
+ .byte 19 # DW_FORM_ref4
+ .byte 0 # EOM(1)
+ .byte 0 # EOM(2)
+ .byte 3 # Abbreviation Code
+ .byte 5 # DW_TAG_formal_parameter
+ .byte 0 # DW_CHILDREN_no
+ .byte 2 # DW_AT_location
+ .byte 23 # DW_FORM_sec_offset
+ .byte 49 # DW_AT_abstract_origin
+ .byte 19 # DW_FORM_ref4
+ .byte 0 # EOM(1)
+ .byte 0 # EOM(2)
+ .byte 4 # Abbreviation Code
+ .byte 46 # DW_TAG_subprogram
+ .byte 1 # DW_CHILDREN_yes
+ .byte 110 # DW_AT_linkage_name
+ .byte 14 # DW_FORM_strp
+ .byte 3 # DW_AT_name
+ .byte 14 # DW_FORM_strp
+ .byte 58 # DW_AT_decl_file
+ .byte 11 # DW_FORM_data1
+ .byte 59 # DW_AT_decl_line
+ .byte 11 # DW_FORM_data1
+ .byte 73 # DW_AT_type
+ .byte 19 # DW_FORM_ref4
+ .byte 63 # DW_AT_external
+ .byte 25 # DW_FORM_flag_present
+ .byte 32 # DW_AT_inline
+ .byte 11 # DW_FORM_data1
+ .byte 0 # EOM(1)
+ .byte 0 # EOM(2)
+ .byte 5 # Abbreviation Code
+ .byte 5 # DW_TAG_formal_parameter
+ .byte 0 # DW_CHILDREN_no
+ .byte 3 # DW_AT_name
+ .byte 14 # DW_FORM_strp
+ .byte 58 # DW_AT_decl_file
+ .byte 11 # DW_FORM_data1
+ .byte 59 # DW_AT_decl_line
+ .byte 11 # DW_FORM_data1
+ .byte 73 # DW_AT_type
+ .byte 19 # DW_FORM_ref4
+ .byte 0 # EOM(1)
+ .byte 0 # EOM(2)
+ .byte 6 # Abbreviation Code
+ .byte 36 # DW_TAG_base_type
+ .byte 0 # DW_CHILDREN_no
+ .byte 3 # DW_AT_name
+ .byte 14 # DW_FORM_strp
+ .byte 62 # DW_AT_encoding
+ .byte 11 # DW_FORM_data1
+ .byte 11 # DW_AT_byte_size
+ .byte 11 # DW_FORM_data1
+ .byte 0 # EOM(1)
+ .byte 0 # EOM(2)
+ .byte 7 # Abbreviation Code
+ .byte 46 # DW_TAG_subprogram
+ .byte 1 # DW_CHILDREN_yes
+ .byte 17 # DW_AT_low_pc
+ .byte 1 # DW_FORM_addr
+ .byte 18 # DW_AT_high_pc
+ .byte 6 # DW_FORM_data4
+ .byte 64 # DW_AT_frame_base
+ .byte 24 # DW_FORM_exprloc
+ .ascii "\227B" # DW_AT_GNU_all_call_sites
+ .byte 25 # DW_FORM_flag_present
+ .byte 3 # DW_AT_name
+ .byte 14 # DW_FORM_strp
+ .byte 58 # DW_AT_decl_file
+ .byte 11 # DW_FORM_data1
+ .byte 59 # DW_AT_decl_line
+ .byte 11 # DW_FORM_data1
+ .byte 73 # DW_AT_type
+ .byte 19 # DW_FORM_ref4
+ .byte 63 # DW_AT_external
+ .byte 25 # DW_FORM_flag_present
+ .byte 0 # EOM(1)
+ .byte 0 # EOM(2)
+ .byte 8 # Abbreviation Code
+ .byte 5 # DW_TAG_formal_parameter
+ .byte 0 # DW_CHILDREN_no
+ .byte 2 # DW_AT_location
+ .byte 24 # DW_FORM_exprloc
+ .byte 3 # DW_AT_name
+ .byte 14 # DW_FORM_strp
+ .byte 58 # DW_AT_decl_file
+ .byte 11 # DW_FORM_data1
+ .byte 59 # DW_AT_decl_line
+ .byte 11 # DW_FORM_data1
+ .byte 73 # DW_AT_type
+ .byte 19 # DW_FORM_ref4
+ .byte 0 # EOM(1)
+ .byte 0 # EOM(2)
+ .byte 9 # Abbreviation Code
+ .byte 29 # DW_TAG_inlined_subroutine
+ .byte 1 # DW_CHILDREN_yes
+ .byte 49 # DW_AT_abstract_origin
+ .byte 19 # DW_FORM_ref4
+ .byte 17 # DW_AT_low_pc
+ .byte 1 # DW_FORM_addr
+ .byte 18 # DW_AT_high_pc
+ .byte 6 # DW_FORM_data4
+ .byte 88 # DW_AT_call_file
+ .byte 11 # DW_FORM_data1
+ .byte 89 # DW_AT_call_line
+ .byte 11 # DW_FORM_data1
+ .byte 87 # DW_AT_call_column
+ .byte 11 # DW_FORM_data1
+ .byte 0 # EOM(1)
+ .byte 0 # EOM(2)
+ .byte 10 # Abbreviation Code
+ .byte 15 # DW_TAG_pointer_type
+ .byte 0 # DW_CHILDREN_no
+ .byte 73 # DW_AT_type
+ .byte 19 # DW_FORM_ref4
+ .byte 0 # EOM(1)
+ .byte 0 # EOM(2)
+ .byte 0 # EOM(3)
+ .section .debug_info,"", at progbits
+.Lcu_begin0:
+ .long .Ldebug_info_end0-.Ldebug_info_start0 # Length of Unit
+.Ldebug_info_start0:
+ .short 4 # DWARF version number
+ .long .debug_abbrev # Offset Into Abbrev. Section
+ .byte 8 # Address Size (in bytes)
+ .byte 1 # Abbrev [1] 0xb:0xca DW_TAG_compile_unit
+ .long .Linfo_string0 # DW_AT_producer
+ .short 33 # DW_AT_language
+ .long .Linfo_string1 # DW_AT_name
+ .long .Lline_table_start0 # DW_AT_stmt_list
+ .long .Linfo_string2 # DW_AT_comp_dir
+ .quad .Lfunc_begin0 # DW_AT_low_pc
+ .long .Lfunc_end1-.Lfunc_begin0 # DW_AT_high_pc
+ .byte 2 # Abbrev [2] 0x2a:0x1d DW_TAG_subprogram
+ .quad .Lfunc_begin0 # DW_AT_low_pc
+ .long .Lfunc_end0-.Lfunc_begin0 # DW_AT_high_pc
+ .byte 1 # DW_AT_frame_base
+ .byte 87
+ # DW_AT_GNU_all_call_sites
+ .long 71 # DW_AT_abstract_origin
+ .byte 3 # Abbrev [3] 0x3d:0x9 DW_TAG_formal_parameter
+ .long .Ldebug_loc0 # DW_AT_location
+ .long 87 # DW_AT_abstract_origin
+ .byte 0 # End Of Children Mark
+ .byte 4 # Abbrev [4] 0x47:0x1c DW_TAG_subprogram
+ .long .Linfo_string3 # DW_AT_linkage_name
+ .long .Linfo_string4 # DW_AT_name
+ .byte 1 # DW_AT_decl_file
+ .byte 1 # DW_AT_decl_line
+ .long 99 # DW_AT_type
+ # DW_AT_external
+ .byte 1 # DW_AT_inline
+ .byte 5 # Abbrev [5] 0x57:0xb DW_TAG_formal_parameter
+ .long .Linfo_string6 # DW_AT_name
+ .byte 1 # DW_AT_decl_file
+ .byte 1 # DW_AT_decl_line
+ .long 99 # DW_AT_type
+ .byte 0 # End Of Children Mark
+ .byte 6 # Abbrev [6] 0x63:0x7 DW_TAG_base_type
+ .long .Linfo_string5 # DW_AT_name
+ .byte 7 # DW_AT_encoding
+ .byte 8 # DW_AT_byte_size
+ .byte 7 # Abbrev [7] 0x6a:0x52 DW_TAG_subprogram
+ .quad .Lfunc_begin1 # DW_AT_low_pc
+ .long .Lfunc_end1-.Lfunc_begin1 # DW_AT_high_pc
+ .byte 1 # DW_AT_frame_base
+ .byte 87
+ # DW_AT_GNU_all_call_sites
+ .long .Linfo_string7 # DW_AT_name
+ .byte 1 # DW_AT_decl_file
+ .byte 6 # DW_AT_decl_line
+ .long 188 # DW_AT_type
+ # DW_AT_external
+ .byte 8 # Abbrev [8] 0x83:0xd DW_TAG_formal_parameter
+ .byte 1 # DW_AT_location
+ .byte 85
+ .long .Linfo_string9 # DW_AT_name
+ .byte 1 # DW_AT_decl_file
+ .byte 6 # DW_AT_decl_line
+ .long 188 # DW_AT_type
+ .byte 8 # Abbrev [8] 0x90:0xd DW_TAG_formal_parameter
+ .byte 1 # DW_AT_location
+ .byte 84
+ .long .Linfo_string10 # DW_AT_name
+ .byte 1 # DW_AT_decl_file
+ .byte 6 # DW_AT_decl_line
+ .long 195 # DW_AT_type
+ .byte 9 # Abbrev [9] 0x9d:0x1e DW_TAG_inlined_subroutine
+ .long 71 # DW_AT_abstract_origin
+ .quad .Lfunc_begin1 # DW_AT_low_pc
+ .long .Ltmp2-.Lfunc_begin1 # DW_AT_high_pc
+ .byte 1 # DW_AT_call_file
+ .byte 7 # DW_AT_call_line
+ .byte 3 # DW_AT_call_column
+ .byte 3 # Abbrev [3] 0xb1:0x9 DW_TAG_formal_parameter
+ .long .Ldebug_loc1 # DW_AT_location
+ .long 87 # DW_AT_abstract_origin
+ .byte 0 # End Of Children Mark
+ .byte 0 # End Of Children Mark
+ .byte 6 # Abbrev [6] 0xbc:0x7 DW_TAG_base_type
+ .long .Linfo_string8 # DW_AT_name
+ .byte 5 # DW_AT_encoding
+ .byte 4 # DW_AT_byte_size
+ .byte 10 # Abbrev [10] 0xc3:0x5 DW_TAG_pointer_type
+ .long 200 # DW_AT_type
+ .byte 10 # Abbrev [10] 0xc8:0x5 DW_TAG_pointer_type
+ .long 205 # DW_AT_type
+ .byte 6 # Abbrev [6] 0xcd:0x7 DW_TAG_base_type
+ .long .Linfo_string11 # DW_AT_name
+ .byte 6 # DW_AT_encoding
+ .byte 1 # DW_AT_byte_size
+ .byte 0 # End Of Children Mark
+.Ldebug_info_end0:
+ .section .debug_str,"MS", at progbits,1
+.Linfo_string0:
+ .asciz "clang version 15"
+.Linfo_string1:
+ .asciz "dwarf-inline-range.cpp" # string offset=69
+.Linfo_string2:
+ .asciz "." # string offset=92
+.Linfo_string3:
+ .asciz "_Z3barm" # string offset=112
+.Linfo_string4:
+ .asciz "bar" # string offset=120
+.Linfo_string5:
+ .asciz "unsigned long" # string offset=124
+.Linfo_string6:
+ .asciz "i" # string offset=138
+.Linfo_string7:
+ .asciz "main" # string offset=140
+.Linfo_string8:
+ .asciz "int" # string offset=145
+.Linfo_string9:
+ .asciz "argc" # string offset=149
+.Linfo_string10:
+ .asciz "argv" # string offset=154
+.Linfo_string11:
+ .asciz "char" # string offset=159
+ .ident "clang version 15"
+ .section ".note.GNU-stack","", at progbits
+ .addrsig
+ .section .debug_line,"", at progbits
+.Lline_table_start0:
More information about the llvm-commits
mailing list