[llvm] [bazel] Rewrite overlay handling to starlark (PR #170000)

David Zbarsky via llvm-commits llvm-commits at lists.llvm.org
Sat Nov 29 07:35:40 PST 2025


https://github.com/dzbarsky updated https://github.com/llvm/llvm-project/pull/170000

>From f5d1d54db77aca94312e8adef2a1ff2de9c509a0 Mon Sep 17 00:00:00 2001
From: David Zbarsky <dzbarsky at gmail.com>
Date: Fri, 28 Nov 2025 21:57:45 -0500
Subject: [PATCH] [bazel] Rewrite overlay handling to starlark

---
 utils/bazel/configure.bzl          | 86 ++++++++++++++------------
 utils/bazel/overlay_directories.py | 99 ------------------------------
 2 files changed, 47 insertions(+), 138 deletions(-)
 delete mode 100755 utils/bazel/overlay_directories.py

diff --git a/utils/bazel/configure.bzl b/utils/bazel/configure.bzl
index b976f3955febf..adbd3c6539037 100644
--- a/utils/bazel/configure.bzl
+++ b/utils/bazel/configure.bzl
@@ -4,9 +4,6 @@
 
 """Helper macros to configure the LLVM overlay project."""
 
-# Directory of overlay files relative to WORKSPACE
-DEFAULT_OVERLAY_PATH = "llvm-project-overlay"
-
 DEFAULT_TARGETS = [
     "AArch64",
     "AMDGPU",
@@ -30,43 +27,54 @@ DEFAULT_TARGETS = [
     "XCore",
 ]
 
+
+MAX_TRAVERSAL_STEPS = 1000000  # "big number" upper bound on total visited dirs
+
 def _overlay_directories(repository_ctx):
-    src_path = repository_ctx.path(Label("@llvm-raw//:WORKSPACE")).dirname
-    bazel_path = src_path.get_child("utils").get_child("bazel")
-    overlay_path = bazel_path.get_child("llvm-project-overlay")
-    script_path = bazel_path.get_child("overlay_directories.py")
-
-    python_bin = repository_ctx.which("python3")
-    if not python_bin:
-        # Windows typically just defines "python" as python3. The script itself
-        # contains a check to ensure python3.
-        python_bin = repository_ctx.which("python")
-
-    if not python_bin:
-        fail("Failed to find python3 binary")
-
-    cmd = [
-        python_bin,
-        script_path,
-        "--src",
-        src_path,
-        "--overlay",
-        overlay_path,
-        "--target",
-        ".",
-    ]
-    exec_result = repository_ctx.execute(cmd, timeout = 20)
-
-    if exec_result.return_code != 0:
-        fail(("Failed to execute overlay script: '{cmd}'\n" +
-              "Exited with code {return_code}\n" +
-              "stdout:\n{stdout}\n" +
-              "stderr:\n{stderr}\n").format(
-            cmd = " ".join([str(arg) for arg in cmd]),
-            return_code = exec_result.return_code,
-            stdout = exec_result.stdout,
-            stderr = exec_result.stderr,
-        ))
+    src_root = repository_ctx.path(Label("@llvm-raw//:WORKSPACE")).dirname
+    overlay_root = src_root.get_child("utils/bazel/llvm-project-overlay")
+    target_root = repository_ctx.path(".")
+
+    # Tries to minimize the number of symlinks created (that is, does not symlink
+    # every single file). Symlinks every file in the overlay directory. Only symlinks
+    # individual files in the source directory if their parent directory is also
+    # contained in the overlay directory tree.
+
+    stack = ["."]
+    for _ in range(MAX_TRAVERSAL_STEPS):
+        rel_dir = stack.pop()
+
+        overlay_dirs = set()
+
+        # Symlink overlay files, overlay dirs will be handled in future iterations.
+        for entry in overlay_root.get_child(rel_dir).readdir():
+            name = entry.basename
+            full_rel_path = rel_dir + "/" + name
+
+            if entry.is_dir:
+                stack.append(full_rel_path)
+                overlay_dirs.add(name)
+            else:
+                src_path = overlay_root.get_child(full_rel_path)
+                dst_path = target_root.get_child(full_rel_path)
+                repository_ctx.symlink(src_path, dst_path)
+
+        # Symlink source dirs (if not themselves overlaid) and files.
+        for src_entry in src_root.get_child(rel_dir).readdir():
+            name = src_entry.basename
+            if name in overlay_dirs:
+                # Skip: overlay has a directory with this name
+                continue
+
+            repository_ctx.symlink(src_entry, target_root.get_child(rel_dir + "/" + name))
+
+        if not stack:
+            return
+
+    fail("overlay_directories: exceeded MAX_TRAVERSAL_STEPS ({}). " +
+         "Tree too large or a cycle in the filesystem?".format(
+             MAX_TRAVERSAL_STEPS,
+         ))
 
 def _extract_cmake_settings(repository_ctx, llvm_cmake):
     # The list to be written to vars.bzl
diff --git a/utils/bazel/overlay_directories.py b/utils/bazel/overlay_directories.py
deleted file mode 100755
index 526a78e978e5d..0000000000000
--- a/utils/bazel/overlay_directories.py
+++ /dev/null
@@ -1,99 +0,0 @@
-#!/bin/python3
-
-# This file is licensed under the Apache License v2.0 with LLVM Exceptions.
-# See https://llvm.org/LICENSE.txt for license information.
-# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-"""Overlays two directories into a target directory using symlinks.
-
-Tries to minimize the number of symlinks created (that is, does not symlink
-every single file). Symlinks every file in the overlay directory. Only symlinks
-individual files in the source directory if their parent directory is also
-contained in the overlay directory tree.
-"""
-
-import argparse
-import errno
-import os
-import sys
-
-
-def _check_python_version():
-    if sys.version_info[0] < 3:
-        raise RuntimeError(
-            "Must be invoked with a python 3 interpreter but was %s" % sys.executable
-        )
-
-
-def _check_dir_exists(path):
-    if not os.path.isdir(path):
-        raise OSError(errno.ENOENT, os.strerror(errno.ENOENT), path)
-
-
-def parse_arguments():
-    parser = argparse.ArgumentParser(
-        description="""
-    Overlays two directories into a target directory using symlinks.
-
-    Tries to minimize the number of symlinks created (that is, does not symlink
-    every single file). Symlinks every file in the overlay directory. Only
-    symlinks individual files in the source directory if their parent directory
-    is also contained in the overlay directory tree.
-    """
-    )
-    parser.add_argument(
-        "--src",
-        required=True,
-        help="Directory that contains most of the content to symlink.",
-    )
-    parser.add_argument(
-        "--overlay",
-        required=True,
-        help="Directory to overlay on top of the source directory.",
-    )
-    parser.add_argument(
-        "--target",
-        required=True,
-        help="Directory in which to place the fused symlink directories.",
-    )
-
-    args = parser.parse_args()
-
-    _check_dir_exists(args.target)
-    _check_dir_exists(args.overlay)
-    _check_dir_exists(args.src)
-
-    return args
-
-
-def _symlink_abs(from_path, to_path):
-    os.symlink(os.path.abspath(from_path), os.path.abspath(to_path))
-
-
-def main(args):
-    for root, dirs, files in os.walk(args.overlay):
-        # We could do something more intelligent here and only symlink individual
-        # files if the directory is present in both overlay and src. This could also
-        # be generalized to an arbitrary number of directories without any
-        # "src/overlay" distinction. In the current use case we only have two and
-        # the overlay directory is always small, so putting that off for now.
-        rel_root = os.path.relpath(root, start=args.overlay)
-        if rel_root != ".":
-            os.mkdir(os.path.join(args.target, rel_root))
-
-        for file in files:
-            relpath = os.path.join(rel_root, file)
-            _symlink_abs(
-                os.path.join(args.overlay, relpath), os.path.join(args.target, relpath)
-            )
-
-        for src_entry in os.listdir(os.path.join(args.src, rel_root)):
-            if src_entry not in dirs:
-                relpath = os.path.join(rel_root, src_entry)
-                _symlink_abs(
-                    os.path.join(args.src, relpath), os.path.join(args.target, relpath)
-                )
-
-
-if __name__ == "__main__":
-    _check_python_version()
-    main(parse_arguments())



More information about the llvm-commits mailing list