[llvm] ad32576 - [DWARFVerifier] Allow overlapping ranges for ICF-merged functions (#117952)

via llvm-commits llvm-commits at lists.llvm.org
Tue Dec 17 11:00:59 PST 2024


Author: alx32
Date: 2024-12-17T11:00:56-08:00
New Revision: ad32576cffc88bf7c359a528afbed7c2ae7ddb2d

URL: https://github.com/llvm/llvm-project/commit/ad32576cffc88bf7c359a528afbed7c2ae7ddb2d
DIFF: https://github.com/llvm/llvm-project/commit/ad32576cffc88bf7c359a528afbed7c2ae7ddb2d.diff

LOG: [DWARFVerifier] Allow overlapping ranges for ICF-merged functions (#117952)

This patch modifies the DWARF verifier to handle a valid case where two
or more functions have identical address ranges due to being merged by
ICF (Identical Code Folding). Previously, the verifier would incorrectly
report these as errors, but functions merged via ICF (such as when using
LLD's --keep-icf-stabs option) can legitimately share the same address
range.

A new test case has been added to verify this behavior using YAML-based
DWARF data that simulates two DW_TAG_subprogram entries with identical
address ranges. The test ensures that the verifier correctly identifies
this as a valid case and doesn't emit any errors, while still
maintaining the existing verification for truly invalid overlapping
ranges in other scenarios. Before this change, the newly added test case
would have failed, with `llvm-dwarfdump` marking the overlapping address
ranges in the DWARF as an error.

We also modify the existing tests `llvm-dwarfutil/ELF/X86/verify.test` and 
`llvm/test/tools/llvm-dwarfdump/X86/verify_parent_zero_length.yaml`
which rely on the existence of the error that we're trying to
suppress. We slightly change one offset so that the ranges don't
perfectly overlap and an error is still generated.

Added: 
    llvm/test/tools/llvm-dwarfdump/X86/verify_no_overlap_error_icf.yaml

Modified: 
    llvm/include/llvm/DebugInfo/DWARF/DWARFVerifier.h
    llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp
    llvm/test/tools/llvm-dwarfdump/X86/verify_parent_zero_length.yaml
    llvm/test/tools/llvm-dwarfutil/ELF/X86/verify.test

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFVerifier.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFVerifier.h
index c2365a4c7cf647..7b51bb63cd15ba 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFVerifier.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFVerifier.h
@@ -68,7 +68,9 @@ class DWARFVerifier {
 
     /// Inserts the address range. If the range overlaps with an existing
     /// range, the range that it overlaps with will be returned and the two
-    /// address ranges will be unioned together in "Ranges".
+    /// address ranges will be unioned together in "Ranges". If a duplicate
+    /// entry is attempted to be added, the duplicate range will not actually be
+    /// added and the returned iterator will point to end().
     ///
     /// This is used for finding overlapping ranges in the DW_AT_ranges
     /// attribute of a DIE. It is also used as a set of address ranges that
@@ -77,7 +79,9 @@ class DWARFVerifier {
 
     /// Inserts the address range info. If any of its ranges overlaps with a
     /// range in an existing range info, the range info is *not* added and an
-    /// iterator to the overlapping range info.
+    /// iterator to the overlapping range info. If a duplicate entry is
+    /// attempted to be added, the duplicate range will not actually be added
+    /// and the returned iterator will point to end().
     ///
     /// This is used for finding overlapping children of the same DIE.
     die_range_info_iterator insert(const DieRangeInfo &RI);
@@ -86,7 +90,7 @@ class DWARFVerifier {
     bool contains(const DieRangeInfo &RHS) const;
 
     /// Return true if any range in this object intersects with any range in
-    /// RHS.
+    /// RHS. Identical ranges are not considered to be intersecting.
     bool intersects(const DieRangeInfo &RHS) const;
   };
 

diff  --git a/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp b/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp
index 1fe3eb1e90fe65..8bf513538de7c7 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp
@@ -53,6 +53,11 @@ DWARFVerifier::DieRangeInfo::insert(const DWARFAddressRange &R) {
   auto End = Ranges.end();
   auto Pos = std::lower_bound(Begin, End, R);
 
+  // Check for exact duplicates which is an allowed special case
+  if (Pos != End && *Pos == R) {
+    return std::nullopt;
+  }
+
   if (Pos != End) {
     DWARFAddressRange Range(*Pos);
     if (Pos->merge(R))
@@ -113,8 +118,11 @@ bool DWARFVerifier::DieRangeInfo::intersects(const DieRangeInfo &RHS) const {
   auto I1 = Ranges.begin(), E1 = Ranges.end();
   auto I2 = RHS.Ranges.begin(), E2 = RHS.Ranges.end();
   while (I1 != E1 && I2 != E2) {
-    if (I1->intersects(*I2))
-      return true;
+    if (I1->intersects(*I2)) {
+      // Exact duplicates are allowed
+      if (!(*I1 == *I2))
+        return true;
+    }
     if (I1->LowPC < I2->LowPC)
       ++I1;
     else

diff  --git a/llvm/test/tools/llvm-dwarfdump/X86/verify_no_overlap_error_icf.yaml b/llvm/test/tools/llvm-dwarfdump/X86/verify_no_overlap_error_icf.yaml
new file mode 100644
index 00000000000000..b1ce724ff0b6d5
--- /dev/null
+++ b/llvm/test/tools/llvm-dwarfdump/X86/verify_no_overlap_error_icf.yaml
@@ -0,0 +1,153 @@
+#--- comments.txt
+
+# This test verifies several scenarios with DW_TAG_subprogram address ranges:
+# 1. Two subprograms can have identical ranges (shown with foo2 and foo3 having same low_pc/high_pc)
+#    This is valid and can happen when ICF (Identical Code Folding) merges functions.
+# 2. Two subprograms can have overlapping ranges when using DW_AT_ranges
+#    (shown with func1_with_ranges and func2_with_ranges sharing range 0x5000-0x6000)
+#    This is also valid and can occur with -fbasic-block-sections=all
+# 3. The test also verifies that non-identical overlapping ranges are correctly flagged as errors:
+#    - When modifying just the first range's high offset from 0x6000 to 0x5999, it creates an invalid subrange overlap
+#    - When modifying just the first instance of DW_AT_high_pc 0x77 to 0x66, it creates an invalid function overlap
+# The test ensures llvm-dwarfdump --verify correctly validates these cases by:
+#  a) Accepting valid identical overlapping ranges
+#  b) Rejecting invalid non-identical overlapping ranges
+
+# Need to use split-file in order for `sed` calls below to work correctly
+# RUN: split-file %s %t
+# RUN: yaml2obj %t/test.yaml | llvm-dwarfdump --error-display=details --verify - | FileCheck %s
+# CHECK: No errors.
+
+# RUN: sed '0,/HighOffset: 0x6000/{s//HighOffset: 0x5999/}' %t/test.yaml | yaml2obj | not llvm-dwarfdump --error-display=details --verify - | FileCheck %s --check-prefix=CHECK-RANGES
+# CHECK-RANGES: error: DIEs have overlapping address ranges
+
+# RUN: sed '0,/Value:  0x77/{s/Value:  0x77/Value:  0x66/}' %t/test.yaml | yaml2obj | not llvm-dwarfdump --error-display=details --verify - | FileCheck %s --check-prefix=CHECK-HIGH-PC
+# CHECK-HIGH-PC: error: DIEs have overlapping address ranges
+
+# RUN: sed '0,/LowOffset:  0x880111/{s//LowOffset:  0x880112/}' %t/test.yaml | yaml2obj | not llvm-dwarfdump --error-display=details --verify - | FileCheck %s --check-prefix=CHECK-LEX-BLOCK
+# CHECK-LEX-BLOCK: DIE has overlapping ranges in DW_AT_ranges attribute
+
+#--- test.yaml
+--- !ELF
+FileHeader:
+  Class:    ELFCLASS64
+  Data:     ELFDATA2LSB
+  Type:     ET_REL
+  Machine:  EM_X86_64
+DWARF:
+  debug_abbrev:
+    - Table:
+      - Tag:      DW_TAG_compile_unit
+        Children: DW_CHILDREN_yes
+        Attributes:
+          - Attribute: DW_AT_producer
+            Form:      DW_FORM_string
+          - Attribute: DW_AT_language
+            Form:      DW_FORM_data2
+          - Attribute: DW_AT_name
+            Form:      DW_FORM_string
+          - Attribute: DW_AT_low_pc
+            Form:      DW_FORM_addr
+          - Attribute: DW_AT_high_pc
+            Form:      DW_FORM_data8
+      - Tag:      DW_TAG_subprogram
+        Children: DW_CHILDREN_no
+        Attributes:
+          - Attribute: DW_AT_name
+            Form:      DW_FORM_string
+          - Attribute: DW_AT_low_pc
+            Form:      DW_FORM_addr
+          - Attribute: DW_AT_high_pc
+            Form:      DW_FORM_data8
+      - Tag:      DW_TAG_subprogram
+        Children: DW_CHILDREN_no
+        Attributes:
+          - Attribute: DW_AT_name
+            Form:      DW_FORM_string
+          - Attribute: DW_AT_ranges
+            Form:      DW_FORM_sec_offset
+      - Tag:      DW_TAG_base_type
+        Children: DW_CHILDREN_no
+        Attributes:
+          - Attribute: DW_AT_name
+            Form:      DW_FORM_string
+      - Tag:      DW_TAG_lexical_block
+        Children: DW_CHILDREN_no
+        Attributes:
+          - Attribute: DW_AT_ranges
+            Form:      DW_FORM_sec_offset
+  debug_ranges:
+    - Offset:    0x0
+      AddrSize:  0x8
+      Entries:
+        - LowOffset:  0x1000
+          HighOffset: 0x2000
+        - LowOffset:  0x3000
+          HighOffset: 0x4000
+        - LowOffset:  0x5000  # Overlaps with 2nd range below
+          HighOffset: 0x6000
+        - LowOffset:  0x0
+          HighOffset: 0x0
+    - Offset:    0x50
+      AddrSize:  0x8
+      Entries:
+        - LowOffset:  0x2500
+          HighOffset: 0x2800
+        - LowOffset:  0x5000  # Overlaps with 3rd range above
+          HighOffset: 0x6000
+        - LowOffset:  0x7000
+          HighOffset: 0x8000
+        - LowOffset:  0x0
+          HighOffset: 0x0
+    - Offset: 0xA0   # Added Range List #3 for lexical block
+      AddrSize: 0x8
+      Entries:
+        - LowOffset:  0x880111
+          HighOffset: 0x881222
+        - LowOffset:  0x882333
+          HighOffset: 0x883444
+        - LowOffset:  0x880111  # Overlaps with 1st range in the same list
+          HighOffset: 0x881222
+        - LowOffset:  0x0   # End of list
+          HighOffset: 0x0
+  debug_info:
+    - Version: 4
+      Entries:
+        - AbbrCode: 1
+          Values:
+            - CStr: by_hand
+            - Value:  0x04
+            - CStr: CU1
+            - Value:  0x1000
+            - Value:  0x100
+        - AbbrCode: 4
+          Values:
+            - CStr: int
+        - AbbrCode: 2
+          Values:
+            - CStr: foo1
+            - Value:  0x1000
+            - Value:  0x10
+        - AbbrCode: 2
+          Values:
+            - CStr: foo2
+            - Value:  0x0    # Overlaps with 'foo3' below
+            - Value:  0x77
+        - AbbrCode: 2
+          Values:
+            - CStr: foo3
+            - Value:  0x0    # Overlaps with 'foo2' above
+            - Value:  0x77
+        - AbbrCode: 3
+          Values:
+            - CStr: func1_with_ranges
+            - Value:  0x0
+        - AbbrCode: 3
+          Values:
+            - CStr: func2_with_ranges
+            - Value:  0x50
+        - AbbrCode: 5   # Added lexical block using ranges
+          Values:
+            - Value: 0xA0 # Range list index in debug_ranges
+        - AbbrCode: 0
+...

diff  --git a/llvm/test/tools/llvm-dwarfdump/X86/verify_parent_zero_length.yaml b/llvm/test/tools/llvm-dwarfdump/X86/verify_parent_zero_length.yaml
index 655819515f0ff2..8eb7a349b9b804 100644
--- a/llvm/test/tools/llvm-dwarfdump/X86/verify_parent_zero_length.yaml
+++ b/llvm/test/tools/llvm-dwarfdump/X86/verify_parent_zero_length.yaml
@@ -19,7 +19,7 @@
 #                 DW_AT_high_pc   (0x0000000000000000)
 #
 # 0x00000033:     DW_TAG_lexical_block
-#                   DW_AT_low_pc  (0x0000000000001000)
+#                   DW_AT_low_pc  (0x0000000000001001)
 #                   DW_AT_high_pc (0x0000000000002000)
 #
 # 0x00000044:     DW_TAG_lexical_block
@@ -47,7 +47,7 @@
 
 # CHECK: error: DIEs have overlapping address ranges:
 # CHECK: 0x00000044: DW_TAG_lexical_block
-# CHECK:               DW_AT_low_pc	[DW_FORM_addr] (0x0000000000001000)
+# CHECK:               DW_AT_low_pc	[DW_FORM_addr] (0x0000000000001001)
 # CHECK:               DW_AT_high_pc	[DW_FORM_addr] (0x0000000000002000)
 
 # CHECK: 0x00000033: DW_TAG_lexical_block
@@ -61,7 +61,7 @@
 # CHECK:               DW_AT_high_pc	[DW_FORM_addr] (0x0000000000000000)
 
 # CHECK: 0x00000044:   DW_TAG_lexical_block
-# CHECK:                 DW_AT_low_pc	[DW_FORM_addr] (0x0000000000001000)
+# CHECK:                 DW_AT_low_pc	[DW_FORM_addr] (0x0000000000001001)
 # CHECK:                 DW_AT_high_pc	[DW_FORM_addr] (0x0000000000002000)
 
 
@@ -229,7 +229,7 @@ DWARF:
             - Value:           0x0000000000002000
         - AbbrCode:        0x00000003
           Values:
-            - Value:           0x0000000000001000
+            - Value:           0x0000000000001001
             - Value:           0x0000000000002000
         - AbbrCode:        0x00000000
         - AbbrCode:        0x00000000

diff  --git a/llvm/test/tools/llvm-dwarfutil/ELF/X86/verify.test b/llvm/test/tools/llvm-dwarfutil/ELF/X86/verify.test
index bf736937782c81..73ee11f46d10e7 100644
--- a/llvm/test/tools/llvm-dwarfutil/ELF/X86/verify.test
+++ b/llvm/test/tools/llvm-dwarfutil/ELF/X86/verify.test
@@ -150,7 +150,7 @@ DWARF:
           Values:
             - CStr: foo3
             - Value:  0x0
-            - Value:  0x100
+            - Value:  0x80
             - Value:  0x00000040
         - AbbrCode: 0
 ...


        


More information about the llvm-commits mailing list