[llvm] [BOLT][AArch64] Support for pointer authentication (v2) (PR #120064)

Thu Apr 17 08:00:06 PDT 2025

================
@@ -0,0 +1,114 @@
+#!/usr/bin/env python3
+
+# This tool helps matching dwarf dumps
+# (= the output from running llvm-objdump --dwarf=frames),
+# by address to function names (which are parsed from a normal objdump).
+# The script is used for checking if .cfi_negate_ra_state CFIs
+# are generated by BOLT the same way they are generated by LLVM.
+
+import argparse
+import subprocess
+import sys
+import re
+
+
+class NameDwarfPair(object):
+    def __init__(self, name, body):
+        self.name = name
+        self.body = body
+        self.finalized = False
+
+    def append(self, body_line):
+        # only store elements into the body until the first whitespace line is encountered.
+        if body_line.isspace():
+            self.finalized = True
+        if not self.finalized:
+            self.body += body_line
+
+    def print(self):
+        print(self.name)
+        print(self.body)
+
+    def parse_negates(self):
+        negate_offsets = []
+        loc = 0
+        # TODO: make sure this is not printed in hex
+        re_advloc = f"DW_CFA_advance_loc: (\d+)"
+
+        for line in self.body.splitlines():
+            # if line matches advance_loc int
+            match = re.search(re_advloc, line)
+            if match:
+                loc += int(match.group(1))
+            if "DW_CFA_AARCH64_negate_ra_state" in line:
+                negate_offsets.append(loc)
+
+        self.negate_offsets = negate_offsets
+
+    def __eq__(self, other):
+        return self.name == other.name and self.negate_offsets == other.negate_offsets
+
+
+def parse_objdump(objdump):
+    """
+    Parse and return address-to-name dictionary from objdump file
+    """
+    addr_name_dict = dict()
+    re_function = re.compile(r"^([0-9a-fA-F]+)\s<(.*)>:$")
+    with open(objdump, "r") as f:
+        for line in f.readlines():
+            match = re_function.match(line)
+            if not match:
+                continue
+            m_addr = match.groups()[0]
+            m_name = match.groups()[1]
+            addr_name_dict[int(m_addr, 16)] = m_name
+
+    return addr_name_dict
+
+
+def parse_dwarf(dwarfdump, addr_name_dict):
+    """
+    Parse dwarf dump, and match names to blocks using the dict from the objdump.
+    Return a list of NameDwarfPairs.
+    """
+    re_address_line = re.compile(r".*pc=([0-9a-fA-F]{8})\.\.\.([0-9a-fA-F]{8})")
+    with open(dwarfdump, "r") as dw:
+        functions = []
+        for line in dw.readlines():
+            match = re_address_line.match(line)
+            if not match:
+                if len(functions) > 0:
+                    functions[-1].append(line)
+                continue
+            pc_start_address = match.groups()[0]
+            name = addr_name_dict.get(int(pc_start_address, 16))
+            functions.append(NameDwarfPair(name, ""))
+
+        return functions
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("objdump", help="Objdump file")
----------------
paschalis-mpeis wrote:

Do we really need this objdump file? I think we can use `parse_objdump` on `args.dwarfdump` too, and drop the extra argument, and the RUN line that generates `%t.exe.dump` in the lit test.

Given that's done, you could consider renaming:
- `parse_objdump`to something like `extract_symbols` or `extract_functions`
- `parse_dwarf` to `associate_dwarf_data`

(just quick suggetions – feel free to choose better names)

https://github.com/llvm/llvm-project/pull/120064