[clang] [Clang][Driver][Test] Created test for unsupported driver options (PR #120900)

Tue Feb 25 13:00:26 PST 2025

================
@@ -0,0 +1,472 @@
+#!/usr/bin/env python3
+
+"""generate_unsupported_in_drivermode.py
+
+This script generates Lit regression test files that validate that options are only exposed to intended driver modes.
+
+The options and driver modes are parsed from Options.td, whose path should be provided on the command line.
+See clang/include/clang/Driver/Options.td
+
+The path to the TableGen executable can optionally be provided. Otherwise, the script will search for it.
+
+The primary maintenance task for this script would be updating the expected return message for a driver mode if
+there are changes over time. See the instantiations of DriverData, specifically the check_str.
+
+Logic:
+1) For each option, (records of class "Option"), and for each driver, (records of class "OptionVisibility")
+    a. if the option's "Visibility" field includes the driver flavour, skip processing this option for this driver
+    b. if the option is part of an option group, (the record has the "Group" property),
+       and the group's "Visibility" field includes the driver flavour, skip processing this option for this driver
+    c. otherwise this option is not supported by this driver flavour, and this pairing is saved for testing
+2) For each unsupported pairing, generate a Lit RUN line, and a CHECK line to parse for expected output. Ex: "error: unknown argument"
+"""
+
+import shutil
+import os
+import json
+import subprocess
+from bisect import bisect_left
+from dataclasses import dataclass
+import argparse
+import dataclasses
+from itertools import batched
+
+# Strings defined in Options.td for the various driver flavours. See "OptionVisibility"
+VISIBILITY_CC1AS = "CC1AsOption"
+VISIBILITY_CC1 = "CC1Option"
+VISIBILITY_CL = "CLOption"
+VISIBILITY_DXC = "DXCOption"
+VISIBILITY_DEFAULT = "DefaultVis"
+VISIBILITY_FC1 = "FC1Option"
+VISIBILITY_FLANG = "FlangOption"
+
+# Lit test prefix strings
+SLASH_SLASH = "// "
+EXCLAMATION = "! "
+
+# Invalid usage of the driver options below causes unique output, so skip testing
+exceptions_sequence = [
+    "cc1",
+    "cc1as",
+]
+
+
+class UnsupportedDriverOption:
+    """Defines an unsupported driver-option combination
+    driver: The driver string as defined by OptionVisibility in Options.td
+    option: The option object from Options.td
+    option_name: Corresponding string for an option. See "Name" for a given option in Options.td
+    prefix: String that precedes the option. Ex. "-"
+    """
+
+    def __init__(self, driver, option, option_name, prefix):
+        self.driver = driver
+        self.option = option
+        self.option_name = option_name
+        self.prefix = prefix
+
+    # For sorting
+    def __len__(self):
+        return len(self.option_name)
+
+    def __lt__(self, other):
+        return len(self.option_name) > len(other.option_name)
+
+
+ at dataclass
+class DriverData:
+    """Dataclass for data specific to each driver
+    lit_cmd_prefix: The beginning string of the Lit command
+    lit_cmd_options: Strings containing additional options for this driver
+    visibility_str: The corresponding visibility string from OptionVisibility in Options.td
+    lit_cmd_end: String at the end of the Lit command
+    check_str: The string or regex to be sent to FileCheck
+    supported_sequence: List of UnsupportedDriverOption objects for supported options
+                        that are Kind *JOINED*, as defined in Options.td
+    test_option_sequence: A list of all the prefix-option pairs that will be tested for this driver
+    """
+
+    lit_cmd_prefix: str
+    lit_cmd_options: str
+    visibility_str: str
+    lit_cmd_end: str = " - < /dev/null 2>&1 | FileCheck -check-prefix=CHECK-COUNT-"
+    check_str: str = "{{(unknown argument|n?N?o such file or directory)}}"
+    supported_sequence: list[UnsupportedDriverOption] = dataclasses.field(
+        default_factory=list
+    )
+    test_option_sequence: list[str] = dataclasses.field(default_factory=list)
+
+
+def find_groups(options_dictionary, option):
+    """Find the groups for a given option
+    Note that groups can themselves be part of groups, hence the recursion
+
+    For example, considering option "C", it has the following 'Group' list as defined by Options.td:
+      "Group": {
+        "def": "Preprocessor_Group",
+        "kind": "def",
+        "printable": "Preprocessor_Group"
+      },
+    Preprocessor_Group is itself part of CompileOnly_Group, so option C would be part of both groups
+      "Group": {
+        "def": "CompileOnly_Group",
+        "kind": "def",
+        "printable": "CompileOnly_Group"
+      },
+
+    options_dictionary: The converted Python dictionary from the Options.td json string
+    option: The option object from Options.td
+
+    Return: A set including the group found for the option
+    """
+    group_list = options_dictionary[option]["Group"]
+
+    if group_list is None:
+        return None
+    found_group = group_list["def"]
+    group_set = {found_group}
+
+    sub_group_set = find_groups(options_dictionary, found_group)
+    if sub_group_set is None:
+        return group_set
+    else:
+        group_set.update(sub_group_set)
+        return group_set
+
+
+def get_visibility(option):
+    """Get a list of drivers that a given option is exposed to
+    option: The option object from Options.td
+    Return: Set that contains the visibilities of the given option
+    """
+    visibility_set = set(())
+    # Check for the option's explicit visibility
+    for visibility in options_dictionary[option]["Visibility"]:
+        if visibility is not None:
+            visibility_set.add(visibility["def"])
+
+    # Check for the option's group's visibility
+    group_set = find_groups(options_dictionary, option)
+    if group_set is not None:
+        for group_name in group_set:
+            for visibility in options_dictionary[group_name]["Visibility"]:
+                visibility_set.add(visibility["def"])
+
+    return visibility_set
+
+
+def get_lit_test_note(test_visibility):
+    """Return the note to be included at the start of the Lit test file
+    test_visibility: Any VISIBILITY_* variable. VISIBILITY_FLANG will return the .f90 formatted test note.
+    All other will return the .c formatted test note
+    """
+    test_prefix = EXCLAMATION if test_visibility == VISIBILITY_FLANG else SLASH_SLASH
+
+    return (
+        f"{test_prefix}NOTE: This lit test was automatically generated to validate "
+        "unintentionally exposed arguments to various driver flavours.\n"
+        f"{test_prefix}NOTE: To make changes, see llvm-project/clang/utils/generate_unsupported_in_drivermode.py"
+        + " from which it was generated.\n"
+        f"{test_prefix}NOTE: Regenerate this Lit test with the following:\n"
+        f"{test_prefix}NOTE: python generate_unsupported_in_drivermode.py "
+        + "llvm-project/clang/include/clang/Driver/Options.td --llvm-bin llvm-project/build/bin --llvm-tblgen llvm-tblgen\n\n"
+    )
+
+
+def write_lit_test(test_path, test_visibility, unsupported_list):
+    """Write the Lit tests to file
+    test_path: File write path
+    test_visibility: VISIBILITY_DEFAULT or VISIBILITY_FLANG, which indicates whether to write
+    to the main Lit test file or flang Lit test file respectively
+    unsupported_list: List of UnsupportedDriverOption objects
+    """
+    # If each option is tested with its own run line, the Lit tests become quite large. Instead, test options in batches
+    try:
+        with open(test_path, "w") as lit_file:
+            lit_file.write(get_lit_test_note(test_visibility))
+            batch_size = 100
+
+            for visibility, driver_data in driver_data_dict.items():
+                is_flang_pair = (
+                    visibility == VISIBILITY_FLANG or visibility == VISIBILITY_FC1
+                )
+
+                if (test_visibility == VISIBILITY_FLANG and not is_flang_pair) or (
+                    test_visibility == VISIBILITY_DEFAULT and is_flang_pair
+                ):
+                    continue
+
+                comment_str = EXCLAMATION if is_flang_pair else SLASH_SLASH
+                last_batch_size = 0
+
+                unflattened_option_data = list(
+                    batched(driver_data.test_option_sequence, batch_size)
+                )
+
+                for batch in unflattened_option_data:
+                    # Example run line: // RUN: not --crash %clang -cc1 -A -x c++ - < /dev/null 2>&1 | FileCheck -check-prefix=CC1Option %s
+                    run_cmd = (
+                        f"{comment_str}RUN: not " + driver_data.lit_cmd_prefix
+                    )  # "// RUN: not --crash %clang -cc1 "
+
+                    for option_str in batch:
+                        run_cmd += option_str + " "  # "-A"
+
+                    run_cmd += (
+                        driver_data.lit_cmd_options  # "-x c++"
+                        + driver_data.lit_cmd_end  # " - < /dev/null 2>&1 | FileCheck  -check-prefix=CC1OptionCHECK-COUNT-"
+                        + str(len(batch))  # 100
+                        + " %s\n\n"  # " %s"
+                    )
+
+                    lit_file.write(run_cmd)
+
+                    last_batch_size = len(batch)
+
+                # CHECK statements. Instead of writing custom CHECK statements for each RUN line, create two statements
+                # per driver. One statement for a full batch, and a second for a partial batch.
+                check_cmd_start = (
+                    comment_str + visibility + "CHECK-COUNT-"
+                )  # //CC1OptionCHECK-COUNT-
+                check_cmd_end = (
+                    ": " + driver_data.check_str + "\n"
+                )  # ": {{(unknown argument|n?N?o such file or directory)}}"
+                check_cmd_full_batch = (
+                    check_cmd_start + str(batch_size) + check_cmd_end
+                )  # "//CC1OptionCHECK-COUNT-100: {{(unknown argument|n?N?o such file or directory)}}"
+                check_cmd_partial_batch = (
+                    check_cmd_start + str(last_batch_size) + check_cmd_end + "\n"
+                )  # "//CC1OptionCHECK-COUNT-22: {{(unknown argument|n?N?o such file or directory)}}"
+
+                lit_file.write(check_cmd_full_batch + check_cmd_partial_batch)
+
+    except (FileNotFoundError, PermissionError, OSError):
+        raise IOError(f"Error opening {test_path}. Exiting")
+    else:
+        lit_file.close()
+
+
+def validate_file(path):
+    if not os.path.isfile(path):
+        raise argparse.ArgumentTypeError(f"Invalid file provided: {path}")
+    return path
+
+
+# List of driver flavours
+driver_sequence = []
+# List of unsupported driver-option pairs
+unsupported_sequence = []
+# List of driver-option pairs that will be skipped due to overlapping supported and unsupported option names.
+# See later comments for detail
+skipped_sequence = []
+
+# Parse arguments
+parser = argparse.ArgumentParser(
+    description="This script generates Lit regression test files that validate that options are only exposed to "
+    "intended driver modes. "
+    "The options and driver modes are parsed from Options.td."
+)
+
+parser.add_argument(
+    "<path>/Options.td",
+    type=validate_file,
+    help="Path to Options.td file. Typically found under clang/include/clang/Driver/Options.td",
+)
+parser.add_argument(
+    "--llvm-bin",
+    help="llvm build tree bin directory path. Must be specified with --llvm-tblgen. Default path: llvm-project/build/bin",
+)
+parser.add_argument(
+    "--llvm-tblgen",
+    help="LLVM TableGen executable. If not included with --llvm-bin, the script will search for the llvm-tblgen executable",
+)
+
+args = vars(parser.parse_args())
+
+tablegen = ""
+arg_llvm_bin = args["llvm_bin"]
+arg_llvm_tblgen = args["llvm_tblgen"]
+if arg_llvm_bin is None or arg_llvm_tblgen is None:
+    tablegen = shutil.which("llvm-tblgen")
+else:
+    tablegen = arg_llvm_bin + "/" + arg_llvm_tblgen
+
+# Run TableGen to convert Options.td to json
+options_json_str = subprocess.run(
+    [
+        tablegen,
+        "-I",
+        os.path.join(os.path.dirname(__file__), "../../llvm/include"),
+        args["<path>/Options.td"],
+        "-dump-json",
+    ],
+    stdout=subprocess.PIPE,
+)
+options_dictionary = json.loads(options_json_str.stdout.decode("utf-8"))
+
+# Establish the dataclass objects for each driver
+driver_cc1as = DriverData(
+    "%clang -cc1as ",
+    "",
+    VISIBILITY_CC1AS,
+    f" - < /dev/null 2>&1 | FileCheck -check-prefix={VISIBILITY_CC1AS}CHECK-COUNT-",
+)
+driver_cc1 = DriverData(
+    "%clang -cc1 ",
+    " -x c++",
+    VISIBILITY_CC1,
+    f" - < /dev/null 2>&1 | FileCheck -check-prefix={VISIBILITY_CC1}CHECK-COUNT-",
+)
+driver_cl = DriverData(
+    "%clang_cl ",
+    " -### /c /WX -Werror",
+    VISIBILITY_CL,
+    f" 2>&1 | FileCheck -check-prefix={VISIBILITY_CL}CHECK-COUNT-",
+    "{{(unknown argument ignored in|no such file or directory|argument unused during compilation)}}",
+)
+driver_dxc = DriverData(
+    "%clang_dxc ",
+    " -### /T lib_6_7",
+    VISIBILITY_DXC,
+    f" 2>&1 | FileCheck -check-prefix={VISIBILITY_DXC}CHECK-COUNT-",
+    "{{(unknown argument|no such file or directory|argument unused during compilation)}}",
+)
+driver_default = DriverData(
+    "%clang ",
+    " -### -x c++ -c",
+    VISIBILITY_DEFAULT,
+    f" - < /dev/null 2>&1 | FileCheck -check-prefix={VISIBILITY_DEFAULT}CHECK-COUNT-",
+    "{{(unknown argument|unsupported option|argument unused|no such file or directory)}}",
+)
+driver_fc1 = DriverData(
+    "%clang --driver-mode=flang -fc1 ",
+    "",
+    VISIBILITY_FC1,
+    f" - < /dev/null 2>&1 | FileCheck -check-prefix={VISIBILITY_FC1}CHECK-COUNT-",
+    "{{(unknown argument|no such file or directory|does not exist)}}",
+)
+driver_flang = DriverData(
+    "%clang --driver-mode=flang ",
+    " -### -x c++ -c",
+    VISIBILITY_FLANG,
+    f" - < /dev/null 2>&1 | FileCheck -check-prefix={VISIBILITY_FLANG}CHECK-COUNT-",
+    "{{unknown argument|unsupported option|argument unused during compilation|invalid argument|no such file or directory}}",
----------------
GeorgeKA wrote:

I've simplified the expected error strings in the latest commit. To explain "_no such file or directory_", that message is returned when an unsupported option has the "/" prefix. 

Ex:
```
clang /A  -### -x c++ -c - < /dev/null 2>&1
  clang: error: no such file or directory: '/A' 
```

Ultimately I switched to using prefix "-" instead of "/", so this is no longer an issue.

Why the switch? When testing options in batches, regardless of the order in which the options were originally provided, the error messages would be grouped by prefix used. This was causing the tests to fail. Creating handling for ordering options based on their prefix was making things messy.
Fyi, all of the "/" prefix options support "-" as well. Used this regex to double check (with gnu-grep):

`grep '"\/",\n\s6}(?!w"-")' -Poz Options.td.json`

https://github.com/llvm/llvm-project/pull/120900