[llvm] [llvm-debuginfo-analyzer] Fix crash with WebAssembly dead code (PR #141616)
via llvm-commits
llvm-commits at lists.llvm.org
Tue May 27 08:01:14 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-webassembly
Author: Carlos Alberto Enciso (CarlosAlbertoEnciso)
<details>
<summary>Changes</summary>
https://github.com/llvm/llvm-project/issues/136772
Incorrect handling of 'tombstone' value for WebAssembly.
llvm-debuginfo-analyzer already uses the tombstone approach
to identify dead code. Currently, the tombstone value is
evaluated as std::numeric_limits<uint64_t>::max(). Which is
wrong as it does not take into account the 'Address Byte Size'
from the Compile Unit header.
---
Patch is 22.80 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/141616.diff
6 Files Affected:
- (modified) llvm/include/llvm/DebugInfo/LogicalView/Core/LVRange.h (+6-3)
- (modified) llvm/include/llvm/DebugInfo/LogicalView/Core/LVReader.h (+7)
- (modified) llvm/lib/DebugInfo/LogicalView/Readers/LVBinaryReader.cpp (+4-2)
- (modified) llvm/lib/DebugInfo/LogicalView/Readers/LVDWARFReader.cpp (+11-4)
- (added) llvm/test/tools/llvm-debuginfo-analyzer/WebAssembly/Inputs/test-clang-tombstone.s (+366)
- (added) llvm/test/tools/llvm-debuginfo-analyzer/WebAssembly/wasm-crash-tombstone.test (+44)
``````````diff
diff --git a/llvm/include/llvm/DebugInfo/LogicalView/Core/LVRange.h b/llvm/include/llvm/DebugInfo/LogicalView/Core/LVRange.h
index 3ec0ccb31168f..75889637e3252 100644
--- a/llvm/include/llvm/DebugInfo/LogicalView/Core/LVRange.h
+++ b/llvm/include/llvm/DebugInfo/LogicalView/Core/LVRange.h
@@ -54,11 +54,14 @@ class LVRange final : public LVObject {
LVAllocator Allocator;
LVRangesTree RangesTree;
LVRangeEntries RangeEntries;
- LVAddress Lower = MaxAddress;
+ LVAddress TombstoneAddress;
+ LVAddress Lower;
LVAddress Upper = 0;
public:
- LVRange() : LVObject(), RangesTree(Allocator) {}
+ LVRange(LVAddress Address = MaxAddress)
+ : LVObject(),
+ RangesTree(Allocator), TombstoneAddress(Address), Lower(Address) {}
LVRange(const LVRange &) = delete;
LVRange &operator=(const LVRange &) = delete;
~LVRange() = default;
@@ -75,7 +78,7 @@ class LVRange final : public LVObject {
void clear() {
RangeEntries.clear();
- Lower = MaxAddress;
+ Lower = TombstoneAddress;
Upper = 0;
}
bool empty() const { return RangeEntries.empty(); }
diff --git a/llvm/include/llvm/DebugInfo/LogicalView/Core/LVReader.h b/llvm/include/llvm/DebugInfo/LogicalView/Core/LVReader.h
index 870b53f6774a8..d8d731c4bf4a3 100644
--- a/llvm/include/llvm/DebugInfo/LogicalView/Core/LVReader.h
+++ b/llvm/include/llvm/DebugInfo/LogicalView/Core/LVReader.h
@@ -133,6 +133,10 @@ class LVReader {
// Only for ELF format. The CodeView is handled in a different way.
LVSectionIndex DotTextSectionIndex = UndefinedSectionIndex;
+ // Tombstone value. Assume 64 bits. The value is updated for each
+ // Compile Unit that is processed.
+ LVAddress TombstoneAddress = MaxAddress;
+
// Record Compilation Unit entry.
void addCompileUnitOffset(LVOffset Offset, LVScopeCompileUnit *CompileUnit) {
CompileUnits.emplace(Offset, CompileUnit);
@@ -257,6 +261,9 @@ class LVReader {
return CompileUnit->getCPUType();
}
+ void setTombstoneAddress(LVAddress Address) { TombstoneAddress = Address; }
+ LVAddress getTombstoneAddress() const { return TombstoneAddress; }
+
// Access to the scopes root.
LVScopeRoot *getScopesRoot() const { return Root; }
diff --git a/llvm/lib/DebugInfo/LogicalView/Readers/LVBinaryReader.cpp b/llvm/lib/DebugInfo/LogicalView/Readers/LVBinaryReader.cpp
index 434709f076dc5..4a9a19e50440f 100644
--- a/llvm/lib/DebugInfo/LogicalView/Readers/LVBinaryReader.cpp
+++ b/llvm/lib/DebugInfo/LogicalView/Readers/LVBinaryReader.cpp
@@ -384,8 +384,10 @@ LVRange *LVBinaryReader::getSectionRanges(LVSectionIndex SectionIndex) {
// Check if we already have a mapping for this section index.
LVSectionRanges::iterator IterSection = SectionRanges.find(SectionIndex);
if (IterSection == SectionRanges.end())
- IterSection =
- SectionRanges.emplace(SectionIndex, std::make_unique<LVRange>()).first;
+ IterSection = SectionRanges
+ .emplace(SectionIndex,
+ std::make_unique<LVRange>(getTombstoneAddress()))
+ .first;
LVRange *Range = IterSection->second.get();
assert(Range && "Range is null.");
return Range;
diff --git a/llvm/lib/DebugInfo/LogicalView/Readers/LVDWARFReader.cpp b/llvm/lib/DebugInfo/LogicalView/Readers/LVDWARFReader.cpp
index 14538c1aefa18..0a25f55c13bfc 100644
--- a/llvm/lib/DebugInfo/LogicalView/Readers/LVDWARFReader.cpp
+++ b/llvm/lib/DebugInfo/LogicalView/Readers/LVDWARFReader.cpp
@@ -426,10 +426,11 @@ void LVDWARFReader::processOneAttribute(const DWARFDie &Die,
}
}
if (FoundLowPC) {
- if (CurrentLowPC == MaxAddress)
+ if (CurrentLowPC == getTombstoneAddress())
CurrentElement->setIsDiscarded();
- // Consider the case of WebAssembly.
- CurrentLowPC += WasmCodeSectionOffset;
+ else
+ // Consider the case of WebAssembly.
+ CurrentLowPC += WasmCodeSectionOffset;
if (CurrentElement->isCompileUnit())
setCUBaseAddress(CurrentLowPC);
}
@@ -483,7 +484,8 @@ void LVDWARFReader::processOneAttribute(const DWARFDie &Die,
DWARFAddressRangesVector Ranges = RangesOrError.get();
for (DWARFAddressRange &Range : Ranges) {
// This seems to be a tombstone for empty ranges.
- if (Range.LowPC == Range.HighPC)
+ if ((Range.LowPC == Range.HighPC) ||
+ (Range.LowPC = getTombstoneAddress()))
continue;
// Store the real upper limit for the address range.
if (UpdateHighAddress && Range.HighPC > 0)
@@ -841,6 +843,11 @@ Error LVDWARFReader::createScopes() {
: DwarfContext->dwo_compile_units();
for (const std::unique_ptr<DWARFUnit> &CU : CompileUnits) {
+ // Take into account the address byte size for a correct 'tombstone'
+ // value identification.
+ setTombstoneAddress(
+ dwarf::computeTombstoneAddress(CU->getAddressByteSize()));
+
// Deduction of index used for the line records.
//
// For the following test case: test.cpp
diff --git a/llvm/test/tools/llvm-debuginfo-analyzer/WebAssembly/Inputs/test-clang-tombstone.s b/llvm/test/tools/llvm-debuginfo-analyzer/WebAssembly/Inputs/test-clang-tombstone.s
new file mode 100644
index 0000000000000..f7e85420b89dd
--- /dev/null
+++ b/llvm/test/tools/llvm-debuginfo-analyzer/WebAssembly/Inputs/test-clang-tombstone.s
@@ -0,0 +1,366 @@
+ .text
+ .file "test.cpp"
+ .globaltype __stack_pointer, i32
+ .functype _Z3fooPKijb (i32, i32, i32) -> (i32)
+ .section .text._Z3fooPKijb,"",@
+ .hidden _Z3fooPKijb # -- Begin function _Z3fooPKijb
+ .globl _Z3fooPKijb
+ .type _Z3fooPKijb, at function
+_Z3fooPKijb: # @_Z3fooPKijb
+.Lfunc_begin0:
+ .file 1 "/data/projects/scripts/regression-suite/input/general" "test.cpp"
+ .loc 1 2 0 # test.cpp:2:0
+ .functype _Z3fooPKijb (i32, i32, i32) -> (i32)
+ .local i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32
+# %bb.0: # %entry
+ global.get __stack_pointer
+ local.set 3
+ i32.const 32
+ local.set 4
+ local.get 3
+ local.get 4
+ i32.sub
+ local.set 5
+ local.get 5
+ local.get 0
+ i32.store 24
+ local.get 5
+ local.get 1
+ i32.store 20
+ local.get 2
+ local.set 6
+ local.get 5
+ local.get 6
+ i32.store8 19
+.Ltmp0:
+ .loc 1 3 7 prologue_end # test.cpp:3:7
+ local.get 5
+ i32.load8_u 19
+ local.set 7
+.Ltmp1:
+ .loc 1 3 7 is_stmt 0 # test.cpp:3:7
+ i32.const 1
+ local.set 8
+ local.get 7
+ local.get 8
+ i32.and
+ local.set 9
+ block
+ block
+ local.get 9
+ i32.eqz
+ br_if 0 # 0: down to label1
+# %bb.1: # %if.then
+.Ltmp2:
+ .loc 1 5 19 is_stmt 1 # test.cpp:5:19
+ i32.const 7
+ local.set 10
+ local.get 5
+ local.get 10
+ i32.store 12
+ .loc 1 6 5 # test.cpp:6:5
+ i32.const 7
+ local.set 11
+ local.get 5
+ local.get 11
+ i32.store 28
+ br 1 # 1: down to label0
+.Ltmp3:
+.LBB0_2: # %if.end
+ .loc 1 0 5 is_stmt 0 # test.cpp:0:5
+ end_block # label1:
+ .loc 1 8 10 is_stmt 1 # test.cpp:8:10
+ local.get 5
+ i32.load 20
+ local.set 12
+ .loc 1 8 3 is_stmt 0 # test.cpp:8:3
+ local.get 5
+ local.get 12
+ i32.store 28
+.LBB0_3: # %return
+ .loc 1 0 3 # test.cpp:0:3
+ end_block # label0:
+ .loc 1 9 1 is_stmt 1 # test.cpp:9:1
+ local.get 5
+ i32.load 28
+ local.set 13
+ local.get 13
+ return
+ end_function
+.Ltmp4:
+.Lfunc_end0:
+ # -- End function
+ .section .debug_abbrev,"",@
+ .int8 1 # Abbreviation Code
+ .int8 17 # DW_TAG_compile_unit
+ .int8 1 # DW_CHILDREN_yes
+ .int8 37 # DW_AT_producer
+ .int8 14 # DW_FORM_strp
+ .int8 19 # DW_AT_language
+ .int8 5 # DW_FORM_data2
+ .int8 3 # DW_AT_name
+ .int8 14 # DW_FORM_strp
+ .int8 16 # DW_AT_stmt_list
+ .int8 23 # DW_FORM_sec_offset
+ .int8 27 # DW_AT_comp_dir
+ .int8 14 # DW_FORM_strp
+ .int8 17 # DW_AT_low_pc
+ .int8 1 # DW_FORM_addr
+ .int8 18 # DW_AT_high_pc
+ .int8 6 # DW_FORM_data4
+ .int8 0 # EOM(1)
+ .int8 0 # EOM(2)
+ .int8 2 # Abbreviation Code
+ .int8 46 # DW_TAG_subprogram
+ .int8 1 # DW_CHILDREN_yes
+ .int8 17 # DW_AT_low_pc
+ .int8 1 # DW_FORM_addr
+ .int8 18 # DW_AT_high_pc
+ .int8 6 # DW_FORM_data4
+ .int8 64 # DW_AT_frame_base
+ .int8 24 # DW_FORM_exprloc
+ .int8 110 # DW_AT_linkage_name
+ .int8 14 # DW_FORM_strp
+ .int8 3 # DW_AT_name
+ .int8 14 # DW_FORM_strp
+ .int8 58 # DW_AT_decl_file
+ .int8 11 # DW_FORM_data1
+ .int8 59 # DW_AT_decl_line
+ .int8 11 # DW_FORM_data1
+ .int8 73 # DW_AT_type
+ .int8 19 # DW_FORM_ref4
+ .int8 63 # DW_AT_external
+ .int8 25 # DW_FORM_flag_present
+ .int8 0 # EOM(1)
+ .int8 0 # EOM(2)
+ .int8 3 # Abbreviation Code
+ .int8 5 # DW_TAG_formal_parameter
+ .int8 0 # DW_CHILDREN_no
+ .int8 2 # DW_AT_location
+ .int8 24 # DW_FORM_exprloc
+ .int8 3 # DW_AT_name
+ .int8 14 # DW_FORM_strp
+ .int8 58 # DW_AT_decl_file
+ .int8 11 # DW_FORM_data1
+ .int8 59 # DW_AT_decl_line
+ .int8 11 # DW_FORM_data1
+ .int8 73 # DW_AT_type
+ .int8 19 # DW_FORM_ref4
+ .int8 0 # EOM(1)
+ .int8 0 # EOM(2)
+ .int8 4 # Abbreviation Code
+ .int8 11 # DW_TAG_lexical_block
+ .int8 1 # DW_CHILDREN_yes
+ .int8 17 # DW_AT_low_pc
+ .int8 1 # DW_FORM_addr
+ .int8 18 # DW_AT_high_pc
+ .int8 6 # DW_FORM_data4
+ .int8 0 # EOM(1)
+ .int8 0 # EOM(2)
+ .int8 5 # Abbreviation Code
+ .int8 52 # DW_TAG_variable
+ .int8 0 # DW_CHILDREN_no
+ .int8 2 # DW_AT_location
+ .int8 24 # DW_FORM_exprloc
+ .int8 3 # DW_AT_name
+ .int8 14 # DW_FORM_strp
+ .int8 58 # DW_AT_decl_file
+ .int8 11 # DW_FORM_data1
+ .int8 59 # DW_AT_decl_line
+ .int8 11 # DW_FORM_data1
+ .int8 73 # DW_AT_type
+ .int8 19 # DW_FORM_ref4
+ .int8 0 # EOM(1)
+ .int8 0 # EOM(2)
+ .int8 6 # Abbreviation Code
+ .int8 22 # DW_TAG_typedef
+ .int8 0 # DW_CHILDREN_no
+ .int8 73 # DW_AT_type
+ .int8 19 # DW_FORM_ref4
+ .int8 3 # DW_AT_name
+ .int8 14 # DW_FORM_strp
+ .int8 58 # DW_AT_decl_file
+ .int8 11 # DW_FORM_data1
+ .int8 59 # DW_AT_decl_line
+ .int8 11 # DW_FORM_data1
+ .int8 0 # EOM(1)
+ .int8 0 # EOM(2)
+ .int8 7 # Abbreviation Code
+ .int8 36 # DW_TAG_base_type
+ .int8 0 # DW_CHILDREN_no
+ .int8 3 # DW_AT_name
+ .int8 14 # DW_FORM_strp
+ .int8 62 # DW_AT_encoding
+ .int8 11 # DW_FORM_data1
+ .int8 11 # DW_AT_byte_size
+ .int8 11 # DW_FORM_data1
+ .int8 0 # EOM(1)
+ .int8 0 # EOM(2)
+ .int8 8 # Abbreviation Code
+ .int8 15 # DW_TAG_pointer_type
+ .int8 0 # DW_CHILDREN_no
+ .int8 73 # DW_AT_type
+ .int8 19 # DW_FORM_ref4
+ .int8 0 # EOM(1)
+ .int8 0 # EOM(2)
+ .int8 9 # Abbreviation Code
+ .int8 38 # DW_TAG_const_type
+ .int8 0 # DW_CHILDREN_no
+ .int8 73 # DW_AT_type
+ .int8 19 # DW_FORM_ref4
+ .int8 0 # EOM(1)
+ .int8 0 # EOM(2)
+ .int8 0 # EOM(3)
+ .section .debug_info,"",@
+.Lcu_begin0:
+ .int32 .Ldebug_info_end0-.Ldebug_info_start0 # Length of Unit
+.Ldebug_info_start0:
+ .int16 4 # DWARF version number
+ .int32 .debug_abbrev0 # Offset Into Abbrev. Section
+ .int8 4 # Address Size (in bytes)
+ .int8 1 # Abbrev [1] 0xb:0xb5 DW_TAG_compile_unit
+ .int32 .Linfo_string0 # DW_AT_producer
+ .int16 33 # DW_AT_language
+ .int32 .Linfo_string1 # DW_AT_name
+ .int32 .Lline_table_start0 # DW_AT_stmt_list
+ .int32 .Linfo_string2 # DW_AT_comp_dir
+ .int32 .Lfunc_begin0 # DW_AT_low_pc
+ .int32 .Lfunc_end0-.Lfunc_begin0 # DW_AT_high_pc
+ .int8 2 # Abbrev [2] 0x26:0x6a DW_TAG_subprogram
+ .int32 0xffffffff # DW_AT_low_pc
+ .int32 .Lfunc_end0-.Lfunc_begin0 # DW_AT_high_pc
+ .int8 4 # DW_AT_frame_base
+ .int8 237
+ .int8 0
+ .int8 5
+ .int8 159
+ .int32 .Linfo_string3 # DW_AT_linkage_name
+ .int32 .Linfo_string4 # DW_AT_name
+ .int8 1 # DW_AT_decl_file
+ .int8 2 # DW_AT_decl_line
+ .int32 144 # DW_AT_type
+ # DW_AT_external
+ .int8 3 # Abbrev [3] 0x42:0xe DW_TAG_formal_parameter
+ .int8 2 # DW_AT_location
+ .int8 145
+ .int8 24
+ .int32 .Linfo_string6 # DW_AT_name
+ .int8 1 # DW_AT_decl_file
+ .int8 2 # DW_AT_decl_line
+ .int32 151 # DW_AT_type
+ .int8 3 # Abbrev [3] 0x50:0xe DW_TAG_formal_parameter
+ .int8 2 # DW_AT_location
+ .int8 145
+ .int8 20
+ .int32 .Linfo_string8 # DW_AT_name
+ .int8 1 # DW_AT_decl_file
+ .int8 2 # DW_AT_decl_line
+ .int32 172 # DW_AT_type
+ .int8 3 # Abbrev [3] 0x5e:0xe DW_TAG_formal_parameter
+ .int8 2 # DW_AT_location
+ .int8 145
+ .int8 19
+ .int32 .Linfo_string10 # DW_AT_name
+ .int8 1 # DW_AT_decl_file
+ .int8 2 # DW_AT_decl_line
+ .int32 179 # DW_AT_type
+ .int8 4 # Abbrev [4] 0x6c:0x18 DW_TAG_lexical_block
+ .int32 .Ltmp2 # DW_AT_low_pc
+ .int32 .Ltmp3-.Ltmp2 # DW_AT_high_pc
+ .int8 5 # Abbrev [5] 0x75:0xe DW_TAG_variable
+ .int8 2 # DW_AT_location
+ .int8 145
+ .int8 12
+ .int32 .Linfo_string12 # DW_AT_name
+ .int8 1 # DW_AT_decl_file
+ .int8 5 # DW_AT_decl_line
+ .int32 186 # DW_AT_type
+ .int8 0 # End Of Children Mark
+ .int8 6 # Abbrev [6] 0x84:0xb DW_TAG_typedef
+ .int32 144 # DW_AT_type
+ .int32 .Linfo_string13 # DW_AT_name
+ .int8 1 # DW_AT_decl_file
+ .int8 4 # DW_AT_decl_line
+ .int8 0 # End Of Children Mark
+ .int8 7 # Abbrev [7] 0x90:0x7 DW_TAG_base_type
+ .int32 .Linfo_string5 # DW_AT_name
+ .int8 5 # DW_AT_encoding
+ .int8 4 # DW_AT_byte_size
+ .int8 6 # Abbrev [6] 0x97:0xb DW_TAG_typedef
+ .int32 162 # DW_AT_type
+ .int32 .Linfo_string7 # DW_AT_name
+ .int8 1 # DW_AT_decl_file
+ .int8 1 # DW_AT_decl_line
+ .int8 8 # Abbrev [8] 0xa2:0x5 DW_TAG_pointer_type
+ .int32 167 # DW_AT_type
+ .int8 9 # Abbrev [9] 0xa7:0x5 DW_TAG_const_type
+ .int32 144 # DW_AT_type
+ .int8 7 # Abbrev [7] 0xac:0x7 DW_TAG_base_type
+ .int32 .Linfo_string9 # DW_AT_name
+ .int8 7 # DW_AT_encoding
+ .int8 4 # DW_AT_byte_size
+ .int8 7 # Abbrev [7] 0xb3:0x7 DW_TAG_base_type
+ .int32 .Linfo_string11 # DW_AT_name
+ .int8 2 # DW_AT_encoding
+ .int8 1 # DW_AT_byte_size
+ .int8 9 # Abbrev [9] 0xba:0x5 DW_TAG_const_type
+ .int32 132 # DW_AT_type
+ .int8 0 # End Of Children Mark
+.Ldebug_info_end0:
+ .section .debug_str,"S",@
+.Linfo_string0:
+ .asciz "clang version 19.0.0" # string offset=0
+.Linfo_string1:
+ .asciz "test.cpp" # string offset=111
+.Linfo_string2:
+ .asciz "general" # string offset=120
+.Linfo_string3:
+ .asciz "_Z3fooPKijb" # string offset=174
+.Linfo_string4:
+ .asciz "foo" # string offset=186
+.Linfo_string5:
+ .asciz "int" # string offset=190
+.Linfo_string6:
+ .asciz "ParamPtr" # string offset=194
+.Linfo_string7:
+ .asciz "INTPTR" # string offset=203
+.Linfo_string8:
+ .asciz "ParamUnsigned" # string offset=210
+.Linfo_string9:
+ .asciz "unsigned int" # strin...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/141616
More information about the llvm-commits
mailing list