[llvm] [BOLT][AArch64] Support for pointer authentication (v2) (PR #120064)

Tue Apr 22 02:30:21 PDT 2025

================
@@ -0,0 +1,114 @@
+#!/usr/bin/env python3
+
+# This tool helps matching dwarf dumps
+# (= the output from running llvm-objdump --dwarf=frames),
+# by address to function names (which are parsed from a normal objdump).
+# The script is used for checking if .cfi_negate_ra_state CFIs
+# are generated by BOLT the same way they are generated by LLVM.
+
+import argparse
+import subprocess
+import sys
+import re
+
+
+class NameDwarfPair(object):
+    def __init__(self, name, body):
+        self.name = name
+        self.body = body
+        self.finalized = False
+
+    def append(self, body_line):
+        # only store elements into the body until the first whitespace line is encountered.
+        if body_line.isspace():
+            self.finalized = True
+        if not self.finalized:
+            self.body += body_line
+
+    def print(self):
+        print(self.name)
+        print(self.body)
+
+    def parse_negates(self):
+        negate_offsets = []
+        loc = 0
+        # TODO: make sure this is not printed in hex
+        re_advloc = f"DW_CFA_advance_loc: (\d+)"
+
+        for line in self.body.splitlines():
+            # if line matches advance_loc int
+            match = re.search(re_advloc, line)
+            if match:
+                loc += int(match.group(1))
+            if "DW_CFA_AARCH64_negate_ra_state" in line:
+                negate_offsets.append(loc)
+
+        self.negate_offsets = negate_offsets
+
+    def __eq__(self, other):
+        return self.name == other.name and self.negate_offsets == other.negate_offsets
+
+
+def parse_objdump(objdump):
+    """
+    Parse and return address-to-name dictionary from objdump file
+    """
+    addr_name_dict = dict()
+    re_function = re.compile(r"^([0-9a-fA-F]+)\s<(.*)>:$")
+    with open(objdump, "r") as f:
+        for line in f.readlines():
+            match = re_function.match(line)
+            if not match:
+                continue
+            m_addr = match.groups()[0]
+            m_name = match.groups()[1]
+            addr_name_dict[int(m_addr, 16)] = m_name
+
+    return addr_name_dict
+
+
+def parse_dwarf(dwarfdump, addr_name_dict):
+    """
+    Parse dwarf dump, and match names to blocks using the dict from the objdump.
+    Return a list of NameDwarfPairs.
+    """
+    re_address_line = re.compile(r".*pc=([0-9a-fA-F]{8})\.\.\.([0-9a-fA-F]{8})")
+    with open(dwarfdump, "r") as dw:
+        functions = []
+        for line in dw.readlines():
+            match = re_address_line.match(line)
+            if not match:
+                if len(functions) > 0:
+                    functions[-1].append(line)
+                continue
+            pc_start_address = match.groups()[0]
+            name = addr_name_dict.get(int(pc_start_address, 16))
+            functions.append(NameDwarfPair(name, ""))
+
+        return functions
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("objdump", help="Objdump file")
+    parser.add_argument(
+        "dwarfdump", help="dwarf dump file created with 'llvm-objdump --dwarf=frames'"
+    )
+    parser.add_argument("function", help="Function to search CFIs in.")
+
+    args = parser.parse_args()
+
+    addr_name_dict = parse_objdump(args.objdump)
+    functions = parse_dwarf(args.dwarfdump, addr_name_dict)
+
+    for f in functions:
----------------
bgergely0 wrote:

Good point, I think the current way the script looks (I mean it only outputs the offsets for one specific function, which we pipe to diff), it is better to only support one function at a time, and simplify the disassembly with that flag.

https://github.com/llvm/llvm-project/pull/120064