[llvm] [Code Coverage] Add a tool to check test coverage of a patch (PR #71841)

Wed Dec 13 19:16:59 PST 2023

================
@@ -0,0 +1,722 @@
+#!/usr/bin/env python3
+#
+# ===- git-check-coverage - CheckCoverage Git Integration ---------*- python -*--===#
+#
+# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+# ===------------------------------------------------------------------------===#
+
+r"""
+code-coverage git integration
+============================
+This file provides a code-coverage integration for git. Put it in your
+llvm-project root directory and ensure that it is executable. Code
+coverage information will be provided for the last commit/HEAD by
+runing below command.
+Example uses -
+          git check-coverage -b build bin/opt llvm/test
+Here b is build directory (optional, default is build)
+next we have binray
+and then test suite path
+"""
+
+import argparse
+import logging
+import os
+import subprocess
+import re
+import sys
+from unidiff import PatchSet
+
+
+# Configure the logging module
+def configure_logging(build_dir):
+    logging.basicConfig(
+        filename=os.path.join(
+            build_dir, "patch_coverage.log"
+        ),  # Specify the log file in the build directory
+        level=logging.INFO,  # Set the logging level to INFO
+        format="%(message)s",  # Specify the log message format
+    )
+
+
+# Define a custom print function that writes to both the log file and the terminal
+def custom_print(*args):
+    message = " ".join(map(str, args))
+    logging.info(message)  # Write to the log file
+
+
+def create_patch_from_last_commit(output_path):
+    """Create a patch file from the last commit in the Git repository."""
+
+    try:
+        # Create the patch from the last commit
+        patch_cmd = ["git", "format-patch", "-1", "--stdout"]
+        patch_output = subprocess.check_output(patch_cmd).decode("utf-8", "ignore")
+
+        # Write the patch to the output file in binary mode
+        with open(output_path, "wb") as patch_file:
+            patch_file.write(patch_output.encode("utf-8"))
+
+        print("Patch file '{}' created successfully.".format(output_path))
+        print("")
+
+    except subprocess.CalledProcessError as e:
+        print("Error while creating the patch from the last commit:", e)
+        sys.exit(1)
+
+
+def extract_source_files_from_patch(patch_path):
+    """Read the patch file and extract the names of .cpp and .h files that
+    have been modified or added in the patch."""
+
+    try:
+        source_files = []
+        with open(patch_path, "rb") as patch_file:
+            patch_diff = patch_file.read().decode("utf-8", "ignore")
+
+        # Use regular expression to find .cpp files in the patch
+        source_file_matches = re.findall(r"\+{3} b/(\S+\.(?:cpp|c))", patch_diff)
+
+        # Filter out files with "test" in their directory path
+        source_files = [file for file in source_file_matches if "test" not in file]
+
+        print()
+        print("Source files in the patch (excluding test files):")
+        for source_file in source_files:
+            print(source_file)
+        print("")
+        return source_files
+
+    except Exception as ex:
+        print("Error while extracting .cpp files from patch:", ex)
+        sys.exit(1)
+
+
+def write_source_file_allowlist(source_files, output_path):
+    """Write a file containing the list of source files in the format"""
+    try:
+        # Get the absolute path of the current directory
+        current_directory = os.getcwd()
+        absolute_path = os.path.abspath(current_directory)
+
+        # Write the source file paths to the allowlist file in the specified format
+        with open(output_path, "w") as allowlist_file:
+            for source_file in source_files:
+                source_file = os.path.join(absolute_path, source_file)
+                allowlist_file.write("source:{}=allow\n".format(source_file))
+            allowlist_file.write("default:skip")  # Specify default behavior
+
+        # Print a success message after writing the allowlist file
+        custom_print("Source file allowlist written to file '{}'.".format(output_path))
+        custom_print("")
+
+    except subprocess.CalledProcessError as e:
+        custom_print("Error while writing allow list for -fprofile-list:", e)
+        sys.exit(1)
+
+
+def extract_modified_source_lines_from_patch(patch_path, tests):
+    """Extract the modified source lines from the patch."""
+
+    source_lines = {}  # Dictionary for modified lines in source code files
+
+    tests_relative = {os.path.relpath(file) for file in tests}
+
+    try:
+        # Parse the patch file using the unidiff library
+        patchset = PatchSet.from_filename(patch_path)
+        custom_print("All files in patch:")
+        for patched_file in patchset:
+            current_file = patched_file.target_file
+            # Check if the current file is not a test file
+            if os.path.relpath(current_file)[2:] not in tests:
+                custom_print(os.path.relpath(current_file)[2:])
+                # Initialize an empty list for modified lines in this file
+                source_lines[current_file] = []
+
+            for hunk in patched_file:
+                for line in hunk:
+                    if line.is_added:
+                        # Skip test file since we want only source file
+                        if os.path.relpath(current_file)[2:] not in tests_relative:
+                            # Append the modified line as a tuple (line number, line content)
+                            source_lines[current_file].append(
+                                (line.target_line_no, line.value)
+                            )
+        custom_print("")
+
+        # Return dictionary of modified lines
+        return source_lines
+
+    except Exception as ex:
+        custom_print("Error while extracting modified lines from patch:", ex)
+        return {}
+
+
+def build_llvm(build_dir):
+    """Configure and build LLVM in the specified build directory."""
+
+    try:
+        cwd = os.getcwd()
+
+        # Change to the build directory
+        os.chdir(build_dir)
+
+        # Remove older profile files
+        command = 'find . -type f -name "*.profraw" -delete'
+        try:
+            subprocess.run(command, shell=True, check=True)
+            custom_print(
+                "Files in build directory with '.profraw' extension deleted successfully."
+            )
+        except subprocess.CalledProcessError as e:
+            custom_print(f"Error: {e}")
+        custom_print("")
+
+        # Run the cmake command to re-configure the LLVM build for coverage instrumentation
+        cmake_command = [
+            "cmake",
+            "-DLLVM_BUILD_INSTRUMENTED_COVERAGE=ON",
+            "-DLLVM_INDIVIDUAL_TEST_COVERAGE=ON",
+            f"-DCMAKE_C_FLAGS=-fprofile-list={os.path.abspath('fun.list')}",
+            f"-DCMAKE_CXX_FLAGS=-fprofile-list={os.path.abspath('fun.list')}",
+            ".",
+        ]
+
+        subprocess.check_call(cmake_command)
+
+        try:
+            # Run the ninja build command
+            print()
+            subprocess.check_call(["ninja"])
+        except subprocess.CalledProcessError as ninja_error:
+            custom_print(f"Error during Ninja build: {ninja_error}")
+            custom_print(
+                "Attempting to build with 'make' using the available processors."
+            )
+            # Get the number of processors on the system
+            num_processors = os.cpu_count() or 1
+            make_command = ["make", f"-j{num_processors}"]
+            subprocess.check_call(make_command)
+
+        os.chdir(cwd)
+
+        custom_print("LLVM build completed successfully.")
+        custom_print("")
+
+    except subprocess.CalledProcessError as e:
+        custom_print("Error during LLVM build:", e)
+        sys.exit(1)
+
+
+def run_single_test_with_coverage(llvm_lit_path, test_path):
+    """Run a single test case using llvm-lit with coverage."""
+
+    try:
+        # Run llvm-lit with --per-test-coverage
+        # https://llvm.org/docs/CommandGuide/lit.html#cmdoption-lit-per-test-coverage
+        lit_cmd = [llvm_lit_path, "--per-test-coverage", test_path]
+        subprocess.check_call(lit_cmd)
+
+        custom_print("Test case executed:", test_path)
+
+    except subprocess.CalledProcessError as e:
+        custom_print("Error while running test:", e)
+        sys.exit(1)
+
+    except Exception as ex:
+        custom_print("Error:", ex)
+        sys.exit(1)
+
+
+def run_modified_lit_tests(llvm_lit_path, patch_path, tests):
+    """Read the patch file, identify modified and added test cases, and
+    then execute each of these test cases."""
+
+    try:
+        # Get the list of modified and added test cases from the patch
+        with open(patch_path, "r") as patch_file:
+            patch_diff = patch_file.read()
+
+        modified_tests = []
+
+        # Use regular expressions to find modified test cases with ".ll|.c|.cpp" extension
+        for match in re.finditer(
+            r"^\+\+\+ [ab]/(.*\.(ll|mir|mlir|fir|c|cpp|f90|s|test))$",
+            patch_diff,
+            re.MULTILINE,
+        ):
+            test_file = match.group(1)
+
+            # Get the current working directory
+            cwd = os.getcwd()
+
+            # Build the full file path dynamically by going two steps back from cwd
+            full_test_file = os.path.join(
+                os.path.dirname(cwd), "llvm-project", test_file
+            )
+
+            if full_test_file in tests:
+                custom_print("Lit test file in the patch:", test_file)
+                custom_print("Full lit test file path:", full_test_file)
+
+                # Check if the file name is starts with +++
+                if match.group(0).startswith("+++"):
+                    modified_tests.append(full_test_file)
+
+        if not modified_tests:
+            custom_print("No modified lit tests found in the patch.")
+            return
+
+        # Run each modified test case
+        custom_print("")
+        custom_print("Running modified test cases:")
+        for test_file in modified_tests:
+            run_single_test_with_coverage(llvm_lit_path, test_file)
+
+    except subprocess.CalledProcessError as e:
+        custom_print("Error while running modified tests:", e)
+        sys.exit(1)
+
+    except Exception as ex:
+        custom_print("Error:", ex)
+        sys.exit(1)
+
+
+def run_modified_unit_tests(build_dir, patch_path, tests):
+    """Read the patch file, identify modified and added test cases, and
+    then execute each of these test cases."""
+
+    try:
+        # Get the list of modified and added test cases from the patch
+        with open(patch_path, "r") as patch_file:
+            patch_diff = patch_file.read()
+
+        modified_tests = []
+
+        custom_print()
+        # Use regular expressions to find modified test cases with ".ll|.c|.cpp" extension
+        for match in re.finditer(
+            r"^\+\+\+ [ab]/(.*\.(c|cpp|f90))$",
+            patch_diff,
+            re.MULTILINE,
+        ):
+            test_file = match.group(1)
+
+            # Check if "unittests" is not present in the path
+            # Skip this iteration of the loop
+            if "unittests" not in test_file:
+                continue
+
+            # Get the current working directory
+            cwd = os.getcwd()
+
+            # Build the full file path dynamically by going two steps back from cwd
+            full_test_file = os.path.join(
+                os.path.dirname(cwd), "llvm-project", test_file
+            )
+
+            # Extract the first three directories from the test_file path
+            second_and_third_dirs = os.path.join(*test_file.split(os.path.sep)[1:3])
+
+            matching_test_paths = [
+                test_path for test_path in tests if second_and_third_dirs in test_path
+            ]
+            if matching_test_paths:
+                custom_print()
+                custom_print("Unit test file in the patch:", test_file)
+                custom_print("Full unit test file path:", full_test_file)
+                custom_print("Matching unit test in tests:", matching_test_paths[0])
+
+                # Capture the first matching test path
+                modified_test_path = os.path.dirname(
+                    os.path.dirname(matching_test_paths[0])
+                )
+
+                # Extract the file name (excluding the extension) from test_file
+                file_name = os.path.splitext(os.path.basename(test_file))[0]
+
+                # Extract the last directory name (excluding the extension) from test_file
+                last_directory = os.path.splitext(
+                    os.path.basename(os.path.dirname(test_file))
+                )[0]
+
+                # Add "Tests" to the last_directory
+                last_directory_with_tests = f"{last_directory}Tests"
+
+                # Set LLVM_PROFILE_FILE environment variable
+                llvm_profile_file = os.path.join(
+                    os.path.dirname(modified_test_path), f"{file_name}.profraw"
+                )
+                os.environ["LLVM_PROFILE_FILE"] = llvm_profile_file
+
+                cwd = os.getcwd()
+                os.chdir(build_dir)
+                subprocess.check_call(["ninja", last_directory_with_tests])
+                os.chdir(cwd)
+
+                # Check if the file name is starts with +++
+                if match.group(0).startswith("+++"):
+                    modified_tests.append(full_test_file)
+
+                    # Run each modified test case
+                    custom_print("")
+                    custom_print(
+                        f"Running modified test cases with Ninja target {last_directory_with_tests}:"
+                    )
+                    subprocess.check_call(
+                        # [modified_test_path, f"--gtest_filter={file_name}*"]
+                        [modified_test_path]
+                    )
+                    custom_print("Test case executed:", full_test_file)
+
+        if not modified_tests:
+            custom_print("No modified unit tests found in the patch.")
+            return
+
+    except subprocess.CalledProcessError as e:
+        custom_print("Error while running modified tests:", e)
+        sys.exit(1)
+
+    except Exception as ex:
+        custom_print("Error:", ex)
+        sys.exit(1)
+
+
+def process_coverage_data(cpp_files, build_dir, binary):
+    """Convert profraw coverage data files to profdata format, generate human-readable
+    coverage information, for specific source files."""
+
+    # Create a dictionary to store the paths of the generated coverage data files for each cpp_file
+    coverage_files = {}
+
+    try:
+        # Change to the build directory
+        os.chdir(build_dir)
+        for root, dirs, files in os.walk("."):
+            for file in files:
+                if os.path.basename(file) == "default.profraw":
+                    continue
+                # Convert each .profraw file into .profdata file using llvm-profdata
+                if file.endswith(".profraw"):
+                    profraw_file = os.path.join(root, file)
+                    profdata_output = os.path.splitext(profraw_file)[0] + ".profdata"
+                    custom_print("")
+                    custom_print("Profraw File:", profraw_file)
+                    custom_print("Profdata File:", profdata_output)
+
+                    # Construct the llvm-profdata command
+                    llvm_profdata_cmd = [
+                        "/usr/local/bin/llvm-profdata",
+                        "merge",
+                        "-o",
+                        profdata_output,
+                        profraw_file,
+                    ]
+
+                    # Run llvm-profdata to convert profraw to profdata
+                    subprocess.check_call(llvm_profdata_cmd)
+
+                    custom_print(
+                        "Converted {} to {}".format(profraw_file, profdata_output)
+                    )
+
+                    # Process coverage data for each of the specific source files
+                    for cpp_file in cpp_files:
+                        # Keep the original cpp_file unchanged for each iteration
+                        cpp_file_original = cpp_file
+
+                        output_file = (
+                            os.path.splitext(profdata_output)[0]
+                            + f"_{cpp_file_original.replace('/', '_')}.txt"
+                        )
+
+                        # Use parent directory path with the current cpp_file path to create an absolute path for cpp_file
+                        current_directory = os.getcwd()
+                        parent_directory = os.path.abspath(
+                            os.path.join(current_directory, "..")
+                        )
+                        cpp_file = os.path.join(parent_directory, cpp_file_original)
+                        # Construct the llvm-cov show command to extract coverage data for the specific C++ file
+                        llvm_cov_cmd = [
+                            "/usr/local/bin/llvm-cov",
+                            "show",
+                            "-instr-profile",
+                            profdata_output,
+                            binary,
+                            "--format=text",
+                            cpp_file,  # Specify the target C++ file
+                        ]
+
+                        # Redirect the output of llvm-cov show to the output file
+                        with open(output_file, "w") as output:
+                            subprocess.check_call(llvm_cov_cmd, stdout=output)
+
+                        custom_print(f"Processed file saved as: {output_file}")
+                        # Update the coverage_files dictionary with the output_file for the current cpp_file
+                        coverage_files.setdefault(cpp_file_original, []).append(
+                            output_file
+                        )
+
+        custom_print("")
+        custom_print("Conversion of profraw files to human-readable form is completed.")
+        custom_print("")
+        custom_print("Dictionary of coverage files:", coverage_files)
+        custom_print("")
+
+        # Return the dictionary of processed coverage files for each source file
+        return coverage_files
+
+    except subprocess.CalledProcessError as e:
+        custom_print("Error during profraw to profdata conversion:", e)
+        sys.exit(1)
+
+
+def report_covered_and_uncovered_lines(coverage_files, modified_lines):
+    """Report Covered and uncovered source code lines."""
+
+    try:
+        # Initialize an empty dictionary to store uncovered lines for each file
+        common_uncovered_line_numbers_for_each_file = {}
+
+        # Iterate through each cpp_file and its associated coverage file in coverage_files
+        for cpp_file, coverage_files_list in coverage_files.items():
+            for coverage_file in coverage_files_list:
+                custom_print()
+                # Print the current coverage file being processed
+                custom_print(f"Coverage File: build/{coverage_file}")
+
+                # Initialize sets to store uncovered and covered line numbers in the current coverage file
+                uncovered_line_numbers = set()
+                covered_line_numbers = set()
+
+                # Open the coverage file and process each line
+                with open(coverage_file, "r") as cov_file:
+                    for line in cov_file:
+                        # Split each line into parts using "|" as a delimiter
+                        parts = line.strip().split("|")
+                        if len(parts) >= 3:
+                            # Extract line number and execution count
+                            line_number_str = parts[0].strip()
+                            execution_count = parts[1].strip()
+
+                            # Check if line number and execution count are numeric
+                            if line_number_str.isdigit() and execution_count.isdigit():
+                                line_number = int(line_number_str)
+                                # Determine whether the line is uncovered or covered based on execution count
+                                if int(execution_count) == 0:
+                                    uncovered_line_numbers.add(line_number)
+                                elif int(execution_count) > 0:
+                                    covered_line_numbers.add(line_number)
+
+                # Store the uncovered line numbers in the common_uncovered_line_numbers dictionary
+                common_uncovered_line_numbers_for_each_file[
+                    cpp_file
+                ] = uncovered_line_numbers
+
+                # Print the original lines for uncovered and covered lines
+                for file, lines in modified_lines.items():
+                    if os.path.relpath(file)[2:] == cpp_file:
+                        custom_print(f"Modified File: {os.path.relpath(file)[2:]}")
+                        for line_number_source, line_content in lines:
+                            if line_number_source in uncovered_line_numbers:
+                                custom_print(
+                                    f"  Uncovered Line: {line_number_source} : {line_content.strip()}"
+                                )
+                            elif line_number_source in covered_line_numbers:
+                                custom_print(
+                                    f"  Covered Line: {line_number_source} : {line_content.strip()}"
+                                )
+
+        # Print the final uncovered lines for each file
+        for (
+            cpp_file,
+            uncovered_lines,
+        ) in common_uncovered_line_numbers_for_each_file.items():
+            print(f"\nCommon uncovered lines for {cpp_file} after all changes:")
+            # print(uncovered_lines)
+
+            # Iterate through modified lines to find the corresponding source lines
+            for file, lines in modified_lines.items():
+                for line_number_source, line_content in lines:
+                    # Print the common uncovered line with source file information for each cpp_file
+                    if (
+                        os.path.relpath(file)[2:] == cpp_file
+                        and line_number_source in uncovered_lines
+                    ):
+                        print(f"  Line {line_number_source}: {line_content.strip()}")
+
+    except Exception as ex:
+        # Handle exceptions, print an error message, and exit with status 1
+        custom_print("Error while reporting covered and uncovered lines:", ex)
+        sys.exit(1)
+
+
+def parse_suite_info(s):
+    """Function to return test suite information."""
+
+    curr_suite = None
+    res = {}
+
+    # Iterate over each line in the decoded 's' split by lines.
+    for line in s.decode().splitlines():
+        # Calculate the number of leading spaces in the line.
+        leading_spaces = len(line) - len(line.lstrip(" "))
+
+        # Check if there are 2 leading spaces, indicating a suite name.
+        if leading_spaces == 2:
+            # Extract the suite name and assign it to curr_suite.
+            curr_suite = line.split()[0]
+        # Check if curr_suite is not None and there are 4 leading spaces, and "Source Root:" is in the line.
+        elif curr_suite is not None and leading_spaces == 4 and "Source Root:" in line:
+            # Ensure that curr_suite is not already in the res dictionary.
+            assert curr_suite not in res
+
+            # Add the suite name as a key and the last part of the line as its value in the res dictionary.
+            res[curr_suite] = line.split()[-1]
+
+    # Return the res dictionary containing suite information.
+    return res
+
+
+def find_lit_tests(lit_path, test_paths):
+    """Function to find the list of test cases using llvm-lit."""
+
+    # Create a command list for listing test suites using lit_path and test_paths.
+    suites_cmd = [lit_path, "--show-suites"] + test_paths
+    output = subprocess.check_output(suites_cmd)
+
+    # Parse the output to extract suite information using parse_suite_info function.
+    test_suites = parse_suite_info(output)
+
+    # Create a command list for listing individual tests using lit_path and test_paths.
+    tests_cmd = [lit_path, "--show-tests"] + test_paths
+    output = subprocess.check_output(tests_cmd)
+
+    # Convert the output lines to strings and split them.
+    lines = [line.decode() for line in output.splitlines()]
+
+    # Extract test information from lines where "::" is present.
+    test_info = [line.split() for line in lines if "::" in line]
+
+    # Construct a list of test paths by combining suite source roots and test case names
+    # clang-tools-extra's test_info have four argument to unpack while clang have three.
+    if test_info is not None and len(test_info) > 0:
+        if len(test_info[0]) == 3:
+            return [
+                os.path.join(test_suites[suite], test_case)
+                for (suite, sep, test_case) in test_info
+            ]
+        elif len(test_info[0]) == 4:
+            return [
+                os.path.join(test_suites[suite1], test_case)
+                for (suite1, suite2, sep, test_case) in test_info
+            ]
+    else:
+        # Handle the case where test_info is None or empty
+        return []  # or any other appropriate action
+
+
+def parse_args():
+    """Function to parse command line arguments."""
+
+    # Create an ArgumentParser object.
+    parser = argparse.ArgumentParser()
+
+    # Add optional argument "-b" or "--build-dir" with a default value of "build".
+    parser.add_argument("-b", "--build-dir", dest="build_dir", default="build")
+
+    # Add argument "binary".
+    parser.add_argument("binary")
+
+    # Add positional argument "test_path" which can have one or more values.
+    parser.add_argument("test_path", nargs="+")
+
+    # Parse the command line arguments.
+    args = parser.parse_args()
+
+    # Return a tuple containing build_dir and test_path.
+    return (args.build_dir, args.binary, args.test_path)
+
+
+def main():
+    (
+        build_dir,
+        binary,
+        test_paths,
+    ) = (
+        parse_args()
+    )  # Parse command-line arguments to get build directory, test paths and binary.
+
+    configure_logging(build_dir)
+
+    patch_path = os.path.join(
+        build_dir, "patch.diff"
+    )  # Define the path to the patch file (output).
+
+    create_patch_from_last_commit(
+        patch_path
+    )  # Create a patch file from the last commit.
+
+    llvm_lit_path = os.path.join(
+        build_dir, "bin/llvm-lit"
+    )  # Define the path to the llvm-lit executable.
+
+    tests = frozenset(
+        find_lit_tests(llvm_lit_path, test_paths)
+    )  # Find lit tests and create a frozen set of them.
+
+    source_files = extract_source_files_from_patch(
+        patch_path
+    )  # Extract source files from the patch.
+
+    output_path = os.path.join(
+        build_dir, "fun.list"
----------------
boomanaiden154 wrote:

This could probably be a more descriptive name, and should be set as a constant or passed around more as it is used in other functions above.

https://github.com/llvm/llvm-project/pull/71841