[libc-commits] [libc] [libc] Make hdrgen deduce header type lists from function signatures (PR #127251)

Roland McGrath via libc-commits libc-commits at lists.llvm.org
Fri Feb 14 11:54:16 PST 2025


https://github.com/frobtech created https://github.com/llvm/llvm-project/pull/127251

With this, the `types` list in YAML files should only be used to
list the types that a standard specifies should be in that header
per se.  All the types referenced in function signatures will be
collected automatically.


>From 5aae70262eb927a0e6f882ac66c944850f09d447 Mon Sep 17 00:00:00 2001
From: Roland McGrath <mcgrathr at google.com>
Date: Fri, 14 Feb 2025 11:46:23 -0800
Subject: [PATCH] [libc] Make hdrgen deduce header type lists from function
 signatures

With this, the `types` list in YAML files should only be used to
list the types that a standard specifies should be in that header
per se.  All the types referenced in function signatures will be
collected automatically.
---
 libc/utils/hdrgen/function.py                 | 57 ++++++++++++++++---
 libc/utils/hdrgen/header.py                   | 57 +++++++++++++++----
 .../tests/expected_output/test_header.h       |  2 +-
 .../utils/hdrgen/tests/input/subdir/test.yaml |  3 -
 .../utils/hdrgen/tests/input/test_small.h.def |  1 -
 libc/utils/hdrgen/type.py                     |  7 +++
 6 files changed, 103 insertions(+), 24 deletions(-)

diff --git a/libc/utils/hdrgen/function.py b/libc/utils/hdrgen/function.py
index 8ae47e574785f..25a7fa80e4501 100644
--- a/libc/utils/hdrgen/function.py
+++ b/libc/utils/hdrgen/function.py
@@ -6,25 +6,68 @@
 #
 # ==-------------------------------------------------------------------------==#
 
+import re
+from type import Type
+
+
+# These are the keywords that appear in C type syntax but are not part of the
+# include file name.  This is all of the modifiers, qualifiers, and base types,
+# but not "struct".
+KEYWORDS = [
+    "_Atomic",
+    "_Complex",
+    "_Float16",
+    "_Noreturn",
+    "__restrict",
+    "accum",
+    "char",
+    "const",
+    "double",
+    "float",
+    "fract",
+    "int",
+    "long",
+    "short",
+    "signed",
+    "unsigned",
+    "void",
+    "volatile",
+]
+NONIDENTIFIER = re.compile("[^a-zA-Z0-9_]+")
+
 
 class Function:
     def __init__(
         self, return_type, name, arguments, standards, guard=None, attributes=[]
     ):
+        assert return_type
         self.return_type = return_type
         self.name = name
         self.arguments = [
             arg if isinstance(arg, str) else arg["type"] for arg in arguments
         ]
+        assert all(self.arguments)
         self.standards = standards
         self.guard = guard
-        self.attributes = attributes or ""
+        self.attributes = attributes or []
+
+    def signature_types(self):
+        def collapse(type_string):
+            assert type_string
+            # Split into words at nonidentifier characters (`*`, `[`, etc.),
+            # filter out keywords and numbers, and then rejoin with "_".
+            return "_".join(
+                word
+                for word in NONIDENTIFIER.split(type_string)
+                if word and not word.isdecimal() and word not in KEYWORDS
+            )
+
+        all_types = [self.return_type] + self.arguments
+        return {
+            Type(string) for string in filter(None, (collapse(t) for t in all_types))
+        }
 
     def __str__(self):
-        attributes_str = " ".join(self.attributes)
+        attrs_str = "".join(f"{attr} " for attr in self.attributes)
         arguments_str = ", ".join(self.arguments) if self.arguments else "void"
-        if attributes_str == "":
-            result = f"{self.return_type} {self.name}({arguments_str})"
-        else:
-            result = f"{attributes_str} {self.return_type} {self.name}({arguments_str})"
-        return result
+        return attrs_str + f"{self.return_type} {self.name}({arguments_str})"
diff --git a/libc/utils/hdrgen/header.py b/libc/utils/hdrgen/header.py
index f94cb8e628fa6..c817c02b83b75 100644
--- a/libc/utils/hdrgen/header.py
+++ b/libc/utils/hdrgen/header.py
@@ -6,9 +6,33 @@
 #
 # ==-------------------------------------------------------------------------==#
 
+from functools import reduce
 from pathlib import PurePosixPath
 
 
+STDINT_SIZES = [
+    "16",
+    "32",
+    "64",
+    "8",
+    "least16",
+    "least32",
+    "least64",
+    "least8",
+    "max",
+    "ptr",
+]
+
+COMPILER_HEADER_TYPES = (
+    {
+        "bool": "<stdbool.h>",
+        "va_list": "<stdarg.h>",
+    }
+    | {f"int{size}_t": "<stdint.h>" for size in STDINT_SIZES}
+    | {f"uint{size}_t": "<stdint.h>" for size in STDINT_SIZES}
+)
+
+
 class HeaderFile:
     def __init__(self, name):
         self.template_file = None
@@ -34,19 +58,25 @@ def add_object(self, object):
     def add_function(self, function):
         self.functions.append(function)
 
-    def includes(self):
-        return sorted(
-            {
-                PurePosixPath("llvm-libc-macros") / macro.header
-                for macro in self.macros
-                if macro.header is not None
-            }
-            | {
-                PurePosixPath("llvm-libc-types") / f"{typ.type_name}.h"
-                for typ in self.types
-            }
+    def all_types(self):
+        return reduce(
+            lambda a, b: a | b,
+            [f.signature_types() for f in self.functions],
+            set(self.types),
         )
 
+    def includes(self):
+        return {
+            PurePosixPath("llvm-libc-macros") / macro.header
+            for macro in self.macros
+            if macro.header is not None
+        } | {
+            COMPILER_HEADER_TYPES.get(
+                typ.type_name, PurePosixPath("llvm-libc-types") / f"{typ.type_name}.h"
+            )
+            for typ in self.all_types()
+        }
+
     def public_api(self):
         # Python 3.12 has .relative_to(dir, walk_up=True) for this.
         path_prefix = PurePosixPath("../" * (len(PurePosixPath(self.name).parents) - 1))
@@ -56,7 +86,10 @@ def relpath(file):
 
         content = [
             f"#include {file}"
-            for file in sorted(f'"{relpath(file)!s}"' for file in self.includes())
+            for file in sorted(
+                file if isinstance(file, str) else f'"{relpath(file)!s}"'
+                for file in self.includes()
+            )
         ]
 
         for macro in self.macros:
diff --git a/libc/utils/hdrgen/tests/expected_output/test_header.h b/libc/utils/hdrgen/tests/expected_output/test_header.h
index 838b6d7728150..748c09808c128 100644
--- a/libc/utils/hdrgen/tests/expected_output/test_header.h
+++ b/libc/utils/hdrgen/tests/expected_output/test_header.h
@@ -11,10 +11,10 @@
 
 #include "__llvm-libc-common.h"
 #include "llvm-libc-macros/float16-macros.h"
-#include "llvm-libc-types/float128.h"
 
 #include "llvm-libc-macros/test_more-macros.h"
 #include "llvm-libc-macros/test_small-macros.h"
+#include "llvm-libc-types/float128.h"
 #include "llvm-libc-types/type_a.h"
 #include "llvm-libc-types/type_b.h"
 
diff --git a/libc/utils/hdrgen/tests/input/subdir/test.yaml b/libc/utils/hdrgen/tests/input/subdir/test.yaml
index a414b6f387ccb..e68af00849b0e 100644
--- a/libc/utils/hdrgen/tests/input/subdir/test.yaml
+++ b/libc/utils/hdrgen/tests/input/subdir/test.yaml
@@ -1,8 +1,5 @@
 header: subdir/test.h
 header_template: test.h.def
-types:
-  - type_name: type_a
-  - type_name: type_b
 functions:
   - name: func
     return_type: type_a
diff --git a/libc/utils/hdrgen/tests/input/test_small.h.def b/libc/utils/hdrgen/tests/input/test_small.h.def
index 587b163b68d96..1f484a8a2bdb3 100644
--- a/libc/utils/hdrgen/tests/input/test_small.h.def
+++ b/libc/utils/hdrgen/tests/input/test_small.h.def
@@ -11,7 +11,6 @@
 
 #include "__llvm-libc-common.h"
 #include "llvm-libc-macros/float16-macros.h"
-#include "llvm-libc-types/float128.h"
 
 %%public_api()
 
diff --git a/libc/utils/hdrgen/type.py b/libc/utils/hdrgen/type.py
index b90480267d8d5..0dbd8a5837d15 100644
--- a/libc/utils/hdrgen/type.py
+++ b/libc/utils/hdrgen/type.py
@@ -9,4 +9,11 @@
 
 class Type:
     def __init__(self, type_name):
+        assert type_name
         self.type_name = type_name
+
+    def __eq__(self, other):
+        return self.type_name == other.type_name
+
+    def __hash__(self):
+        return self.type_name.__hash__()



More information about the libc-commits mailing list