[PATCH] D142431: [extract_symbols.py] Filter out more symbols for MSVC

Mike Hommey via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Mon Jan 23 23:35:21 PST 2023


glandium created this revision.
Herald added a project: All.
glandium requested review of this revision.
Herald added a project: LLVM.
Herald added a subscriber: llvm-commits.

Fixes https://github.com/llvm/llvm-project/issues/60109


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D142431

Files:
  llvm/utils/extract_symbols.py


Index: llvm/utils/extract_symbols.py
===================================================================
--- llvm/utils/extract_symbols.py
+++ llvm/utils/extract_symbols.py
@@ -131,6 +131,21 @@
 def aix_is_32bit_windows(lib):
     return False
 
+# An approximation of identifying private symbols without actually demangling.
+# This has no known false-positives as far as clang is concerned, but tons
+# of false-negatives for e.g. templates, but that's enough for now.
+def is_private(symbol):
+    # Bail on special symbols (*structors, operators), and templates, which
+    # require more understanding of the mangled symbol.
+    if symbol.startswith('??') or "?$" in symbol:
+        return False
+    # See the description of mangling further below. Catch what looks like
+    # function symbols. A function-class between A and F function-class is
+    # private.
+    match = re.search('(?<!@)@@[A-Z]', symbol)
+    return match and symbol[match.end() - 1] <= "F"
+
+
 # MSVC mangles names to ?<identifier_mangling>@<type_mangling>. By examining the
 # identifier/type mangling we can decide which symbols could possibly be
 # required and which we can discard.
@@ -141,7 +156,10 @@
             # Remove calling convention decoration from names
             match = re.match('[_@]([^@]+)', symbol)
             if match:
-                return match.group(1)
+                symbol = match.group(1)
+        # Discard floating point/SIMD constants.
+        if symbol.startswith(("__xmm@", "__real@")):
+            return None
         return symbol
     # Function template instantiations start with ?$; keep the instantiations of
     # clang::Type::getAs, as some of them are explipict specializations that are
@@ -165,6 +183,9 @@
     # namespace doesn't exist outside of that translation unit.
     elif re.search('\?A(0x\w+)?@', symbol):
         return None
+    # Skip private symbols, which plugins wouldn't be able to use.
+    elif is_private(symbol):
+        return None
     # Keep mangled llvm:: and clang:: function symbols. How we detect these is a
     # bit of a mess and imprecise, but that avoids having to completely demangle
     # the symbol name. The outermost namespace is at the end of the identifier


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D142431.491613.patch
Type: text/x-patch
Size: 2240 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20230124/23f3808d/attachment.bin>


More information about the llvm-commits mailing list