[clang] fb4b565 - [analyzer] SATest: Move from csv to json project maps

Tue Jun 16 03:31:17 PDT 2020

Author: Valeriy Savchenko
Date: 2020-06-16T13:30:00+03:00
New Revision: fb4b565212b0158e2b41ffec71a7c4282907cda9

URL: https://github.com/llvm/llvm-project/commit/fb4b565212b0158e2b41ffec71a7c4282907cda9
DIFF: https://github.com/llvm/llvm-project/commit/fb4b565212b0158e2b41ffec71a7c4282907cda9.diff

LOG: [analyzer] SATest: Move from csv to json project maps

Summary:
JSON format is a bit more verbose and easier to reason about
and extend.  For this reason, before extending SATestBuild
functionality it is better to refactor the part of how we
configure the whole system.

Differential Revision: https://reviews.llvm.org/D81563

Added: 
    clang/utils/analyzer/ProjectMap.py

Modified: 
    clang/utils/analyzer/SATestAdd.py
    clang/utils/analyzer/SATestBuild.py
    clang/utils/analyzer/SATestUpdateDiffs.py

Removed: 
    


################################################################################
diff  --git a/clang/utils/analyzer/ProjectMap.py b/clang/utils/analyzer/ProjectMap.py
new file mode 100644
index 000000000000..182a05c1a935

--- /dev/null
+++ b/clang/utils/analyzer/ProjectMap.py
@@ -0,0 +1,94 @@
+import json
+import os
+
+from typing import Any, Dict, List, NamedTuple, Optional
+
+
+JSON = Dict[str, Any]
+
+
+DEFAULT_MAP_FILE = "projects.json"
+
+
+class ProjectInfo(NamedTuple):
+    """
+    Information about a project to analyze.
+    """
+    name: str
+    mode: int
+    enabled: bool = True
+
+
+class ProjectMap:
+    """
+    Project map stores info about all the "registered" projects.
+    """
+    def __init__(self, path: Optional[str] = None, should_exist: bool = True):
+        """
+        :param path: optional path to a project JSON file, when None defaults
+                     to DEFAULT_MAP_FILE.
+        :param should_exist: flag to tell if it's an exceptional situation when
+                             the project file doesn't exist, creates an empty
+                             project list instead if we are not expecting it to
+                             exist.
+        """
+        if path is None:
+            path = os.path.join(os.path.abspath(os.curdir), DEFAULT_MAP_FILE)
+
+        if not os.path.exists(path):
+            if should_exist:
+                raise ValueError(
+                    f"Cannot find the project map file {path}"
+                    f"\nRunning script for the wrong directory?\n")
+            else:
+                self._create_empty(path)
+
+        self.path = path
+        self._load_projects()
+
+    def save(self):
+        """
+        Save project map back to its original file.
+        """
+        self._save(self.projects, self.path)
+
+    def _load_projects(self):
+        with open(self.path) as raw_data:
+            raw_projects = json.load(raw_data)
+
+            if not isinstance(raw_projects, list):
+                raise ValueError(
+                    "Project map should be a list of JSON objects")
+
+            self.projects = self._parse(raw_projects)
+
+    @staticmethod
+    def _parse(raw_projects: List[JSON]) -> List[ProjectInfo]:
+        return [ProjectMap._parse_project(raw_project)
+                for raw_project in raw_projects]
+
+    @staticmethod
+    def _parse_project(raw_project: JSON) -> ProjectInfo:
+        try:
+            name: str = raw_project["name"]
+            build_mode: int = raw_project["mode"]
+            enabled: bool = raw_project.get("enabled", True)
+            return ProjectInfo(name, build_mode, enabled)
+
+        except KeyError as e:
+            raise ValueError(
+                f"Project info is required to have a '{e.args[0]}' field")
+
+    @staticmethod
+    def _create_empty(path: str):
+        ProjectMap._save([], path)
+
+    @staticmethod
+    def _save(projects: List[ProjectInfo], path: str):
+        with open(path, "w") as output:
+            json.dump(ProjectMap._convert_infos_to_dicts(projects),
+                      output, indent=2)
+
+    @staticmethod
+    def _convert_infos_to_dicts(projects: List[ProjectInfo]) -> List[JSON]:
+        return [project._asdict() for project in projects]

diff  --git a/clang/utils/analyzer/SATestAdd.py b/clang/utils/analyzer/SATestAdd.py
index 012d8ec3fd9a..83ff3d719ea1 100755
--- a/clang/utils/analyzer/SATestAdd.py
+++ b/clang/utils/analyzer/SATestAdd.py
@@ -43,13 +43,11 @@
                                               > changes_for_analyzer.patch
 """
 import SATestBuild
+from ProjectMap import ProjectMap, ProjectInfo
 
-import csv
 import os
 import sys
 
-from typing import IO
-
 
 def add_new_project(name: str, build_mode: int):
     """
@@ -57,9 +55,10 @@ def add_new_project(name: str, build_mode: int):
     :param name: is a short string used to identify a project.
     """
 
-    project_info = SATestBuild.ProjectInfo(name, build_mode,
-                                           is_reference_build=True)
-    tester = SATestBuild.ProjectTester(project_info)
+    project_info = ProjectInfo(name, build_mode)
+    test_info = SATestBuild.TestInfo(project_info,
+                                     is_reference_build=True)
+    tester = SATestBuild.ProjectTester(test_info)
 
     project_dir = tester.get_project_dir()
     if not os.path.exists(project_dir):
@@ -70,33 +69,20 @@ def add_new_project(name: str, build_mode: int):
     tester.test()
 
     # Add the project name to the project map.
-    project_map_path = SATestBuild.get_project_map_path(should_exist=False)
+    project_map = ProjectMap(should_exist=False)
 
-    if os.path.exists(project_map_path):
-        file_mode = "r+"
+    if is_existing_project(project_map, name):
+        print(f"Warning: Project with name '{name}' already exists.",
+              file=sys.stdout)
+        print("Reference output has been regenerated.", file=sys.stdout)
     else:
-        print("Warning: Creating the project map file!")
-        file_mode = "w+"
-
-    with open(project_map_path, file_mode) as map_file:
-        if is_existing_project(map_file, name):
-            print(f"Warning: Project with name '{name}' already exists.",
-                  file=sys.stdout)
-            print("Reference output has been regenerated.", file=sys.stdout)
-        else:
-            map_writer = csv.writer(map_file)
-            map_writer.writerow((name, build_mode))
-            print(f"The project map is updated: {project_map_path}")
-
-
-def is_existing_project(map_file: IO, project_name: str) -> bool:
-    map_reader = csv.reader(map_file)
+        project_map.projects.append(project_info)
+        project_map.save()
 
-    for raw_info in map_reader:
-        if project_name == raw_info[0]:
-            return True
 
-    return False
+def is_existing_project(project_map: ProjectMap, project_name: str) -> bool:
+    return any(existing_project.name == project_name
+               for existing_project in project_map.projects)
 
 
 # TODO: Use argparse

diff  --git a/clang/utils/analyzer/SATestBuild.py b/clang/utils/analyzer/SATestBuild.py
index c7f28abf9654..8b0b80318471 100755
--- a/clang/utils/analyzer/SATestBuild.py
+++ b/clang/utils/analyzer/SATestBuild.py
@@ -44,9 +44,9 @@
 """
 import CmpRuns
 import SATestUtils
+from ProjectMap import ProjectInfo, ProjectMap
 
 import argparse
-import csv
 import glob
 import logging
 import math
@@ -59,9 +59,11 @@
 import time
 
 from queue import Queue
+# mypy has problems finding InvalidFileException in the module
+# and this is we can shush that false positive
+from plistlib import InvalidFileException  # type:ignore
 from subprocess import CalledProcessError, check_call
-from typing import (cast, Dict, Iterable, IO, List, NamedTuple, Optional,
-                    Tuple, TYPE_CHECKING)
+from typing import Dict, IO, List, NamedTuple, Optional, TYPE_CHECKING
 
 
 ###############################################################################
@@ -105,9 +107,6 @@ def stdout(message: str):
 # Number of jobs.
 MAX_JOBS = int(math.ceil(multiprocessing.cpu_count() * 0.75))
 
-# Project map stores info about all the "registered" projects.
-PROJECT_MAP_FILE = "projectMap.csv"
-
 # Names of the project specific scripts.
 # The script that downloads the project.
 DOWNLOAD_SCRIPT = "download_project.sh"
@@ -187,18 +186,6 @@ def fileno(self) -> int:
 ###############################################################################
 
 
-def get_project_map_path(should_exist: bool = True) -> str:
-    project_map_path = os.path.join(os.path.abspath(os.curdir),
-                                    PROJECT_MAP_FILE)
-
-    if should_exist and not os.path.exists(project_map_path):
-        stderr(f"Error: Cannot find the project map file {project_map_path}"
-               f"\nRunning script for the wrong directory?\n")
-        sys.exit(1)
-
-    return project_map_path
-
-
 def run_cleanup_script(directory: str, build_log_file: IO):
     """
     Run pre-processing script if any.
@@ -268,12 +255,11 @@ def apply_patch(directory: str, build_log_file: IO):
         sys.exit(1)
 
 
-class ProjectInfo(NamedTuple):
+class TestInfo(NamedTuple):
     """
     Information about a project and settings for its analysis.
     """
-    name: str
-    build_mode: int
+    project: ProjectInfo
     override_compiler: bool = False
     extra_analyzer_config: str = ""
     is_reference_build: bool = False
@@ -287,9 +273,9 @@ class ProjectInfo(NamedTuple):
 # It is a common workaround for this situation:
 # https://mypy.readthedocs.io/en/stable/common_issues.html#using-classes-that-are-generic-in-stubs-but-not-at-runtime
 if TYPE_CHECKING:
-    ProjectQueue = Queue[ProjectInfo]  # this is only processed by mypy
+    TestQueue = Queue[TestInfo]  # this is only processed by mypy
 else:
-    ProjectQueue = Queue  # this will be executed at runtime
+    TestQueue = Queue  # this will be executed at runtime
 
 
 class RegressionTester:
@@ -306,25 +292,24 @@ def __init__(self, jobs: int, override_compiler: bool,
         self.strictness = strictness
 
     def test_all(self) -> bool:
-        projects_to_test: List[ProjectInfo] = []
-
-        with open(get_project_map_path(), "r") as map_file:
-            validate_project_file(map_file)
-
-            # Test the projects.
-            for proj_name, proj_build_mode in get_projects(map_file):
-                projects_to_test.append(
-                    ProjectInfo(proj_name, int(proj_build_mode),
-                                self.override_compiler,
-                                self.extra_analyzer_config,
-                                self.regenerate, self.strictness))
+        projects_to_test: List[TestInfo] = []
+
+        project_map = ProjectMap()
+
+        # Test the projects.
+        for project in project_map.projects:
+            projects_to_test.append(
+                TestInfo(project,
+                         self.override_compiler,
+                         self.extra_analyzer_config,
+                         self.regenerate, self.strictness))
         if self.jobs <= 1:
             return self._single_threaded_test_all(projects_to_test)
         else:
             return self._multi_threaded_test_all(projects_to_test)
 
     def _single_threaded_test_all(self,
-                                  projects_to_test: List[ProjectInfo]) -> bool:
+                                  projects_to_test: List[TestInfo]) -> bool:
         """
         Run all projects.
         :return: whether tests have passed.
@@ -336,7 +321,7 @@ def _single_threaded_test_all(self,
         return success
 
     def _multi_threaded_test_all(self,
-                                 projects_to_test: List[ProjectInfo]) -> bool:
+                                 projects_to_test: List[TestInfo]) -> bool:
         """
         Run each project in a separate thread.
 
@@ -345,7 +330,7 @@ def _multi_threaded_test_all(self,
 
         :return: whether tests have passed.
         """
-        tasks_queue = ProjectQueue()
+        tasks_queue = TestQueue()
 
         for project_info in projects_to_test:
             tasks_queue.put(project_info)
@@ -370,13 +355,12 @@ class ProjectTester:
     """
     A component aggregating testing for one project.
     """
-    def __init__(self, project_info: ProjectInfo):
-        self.project_name = project_info.name
-        self.build_mode = project_info.build_mode
-        self.override_compiler = project_info.override_compiler
-        self.extra_analyzer_config = project_info.extra_analyzer_config
-        self.is_reference_build = project_info.is_reference_build
-        self.strictness = project_info.strictness
+    def __init__(self, test_info: TestInfo):
+        self.project = test_info.project
+        self.override_compiler = test_info.override_compiler
+        self.extra_analyzer_config = test_info.extra_analyzer_config
+        self.is_reference_build = test_info.is_reference_build
+        self.strictness = test_info.strictness
 
     def test(self) -> bool:
         """
@@ -384,7 +368,11 @@ def test(self) -> bool:
         :return tests_passed: Whether tests have passed according
         to the :param strictness: criteria.
         """
-        stdout(f" \n\n--- Building project {self.project_name}\n")
+        if not self.project.enabled:
+            stdout(f" \n\n--- Skipping disabled project {self.project.name}\n")
+            return True
+
+        stdout(f" \n\n--- Building project {self.project.name}\n")
 
         start_time = time.time()
 
@@ -405,13 +393,13 @@ def test(self) -> bool:
         else:
             passed = run_cmp_results(project_dir, self.strictness)
 
-        stdout(f"Completed tests for project {self.project_name} "
+        stdout(f"Completed tests for project {self.project.name} "
                f"(time: {time.time() - start_time:.2f}).\n")
 
         return passed
 
     def get_project_dir(self) -> str:
-        return os.path.join(os.path.abspath(os.curdir), self.project_name)
+        return os.path.join(os.path.abspath(os.curdir), self.project.name)
 
     def get_output_dir(self) -> str:
         if self.is_reference_build:
@@ -441,7 +429,7 @@ def build(self, directory: str, output_dir: str):
 
         # Build and analyze the project.
         with open(build_log_path, "w+") as build_log_file:
-            if self.build_mode == 1:
+            if self.project.mode == 1:
                 download_and_patch(directory, build_log_file)
                 run_cleanup_script(directory, build_log_file)
                 self.scan_build(directory, output_dir, build_log_file)
@@ -451,7 +439,7 @@ def build(self, directory: str, output_dir: str):
             if self.is_reference_build:
                 run_cleanup_script(directory, build_log_file)
                 normalize_reference_results(directory, output_dir,
-                                            self.build_mode)
+                                            self.project.mode)
 
         stdout(f"Build complete (time: {time.time() - time_start:.2f}). "
                f"See the log for more details: {build_log_path}\n")
@@ -549,7 +537,7 @@ def analyze_preprocessed(self, directory: str, output_dir: str):
         prefix += " -Xclang -analyzer-config "
         prefix += f"-Xclang {self.generate_config()} "
 
-        if self.build_mode == 2:
+        if self.project.mode == 2:
             prefix += "-std=c++11 "
 
         plist_path = os.path.join(directory, output_dir, "date")
@@ -601,7 +589,7 @@ def generate_config(self) -> str:
 
 
 class TestProjectThread(threading.Thread):
-    def __init__(self, tasks_queue: ProjectQueue,
+    def __init__(self, tasks_queue: TestQueue,
                  results_
diff er: threading.Event,
                  failure_flag: threading.Event):
         """
@@ -621,13 +609,13 @@ def __init__(self, tasks_queue: ProjectQueue,
     def run(self):
         while not self.tasks_queue.empty():
             try:
-                project_info = self.tasks_queue.get()
+                test_info = self.tasks_queue.get()
 
-                Logger = logging.getLogger(project_info.name)
+                Logger = logging.getLogger(test_info.project.name)
                 LOCAL.stdout = StreamToLogger(Logger, logging.INFO)
                 LOCAL.stderr = StreamToLogger(Logger, logging.ERROR)
 
-                tester = ProjectTester(project_info)
+                tester = ProjectTester(test_info)
                 if not tester.test():
                     self.results_
diff er.set()
 
@@ -841,7 +829,7 @@ def clean_up_empty_plists(output_dir: str):
                 os.remove(plist)
                 continue
 
-        except plistlib.InvalidFileException as e:
+        except InvalidFileException as e:
             stderr(f"Error parsing plist file {plist}: {str(e)}")
             continue
 
@@ -856,36 +844,6 @@ def clean_up_empty_folders(output_dir: str):
             os.removedirs(subdir)
 
 
-def get_projects(map_file: IO) -> Iterable[Tuple[str, str]]:
-    """
-    Iterate over all projects defined in the project file handler `map_file`
-    from the start.
-    """
-    map_file.seek(0)
-    # TODO: csv format is not very readable, change it to JSON
-    for project_info in csv.reader(map_file):
-        if SATestUtils.is_comment_csv_line(project_info):
-            continue
-        # suppress mypy error
-        yield cast(Tuple[str, str], project_info)
-
-
-def validate_project_file(map_file: IO):
-    """
-    Validate project file.
-    """
-    for project_info in get_projects(map_file):
-        if len(project_info) != 2:
-            stderr("Error: Rows in the project map file "
-                   "should have 2 entries.")
-            raise Exception()
-
-        if project_info[1] not in ('0', '1', '2'):
-            stderr("Error: Second entry in the project map file should be 0"
-                   " (single file), 1 (project), or 2(single file c++11).")
-            raise Exception()
-
-
 if __name__ == "__main__":
     # Parse command line arguments.
     parser = argparse.ArgumentParser(

diff  --git a/clang/utils/analyzer/SATestUpdateDiffs.py b/clang/utils/analyzer/SATestUpdateDiffs.py
index e89b06dd75eb..1a2c41d2debf 100755
--- a/clang/utils/analyzer/SATestUpdateDiffs.py
+++ b/clang/utils/analyzer/SATestUpdateDiffs.py
@@ -4,6 +4,7 @@
 Update reference results for static analyzer.
 """
 import SATestBuild
+from ProjectMap import ProjectInfo, ProjectMap
 
 import os
 import shutil
@@ -14,9 +15,9 @@
 Verbose = 0
 
 
-def update_reference_results(project_name: str, build_mode: int):
-    project_info = SATestBuild.ProjectInfo(project_name, build_mode)
-    tester = SATestBuild.ProjectTester(project_info)
+def update_reference_results(project: ProjectInfo):
+    test_info = SATestBuild.TestInfo(project)
+    tester = SATestBuild.ProjectTester(test_info)
     project_dir = tester.get_project_dir()
 
     tester.is_reference_build = True
@@ -54,7 +55,7 @@ def run_cmd(command: str):
         SATestBuild.run_cleanup_script(project_dir, build_log_file)
 
         SATestBuild.normalize_reference_results(
-            project_dir, ref_results_path, build_mode)
+            project_dir, ref_results_path, project.mode)
 
         # Clean up the generated 
diff erence results.
         SATestBuild.cleanup_reference_results(ref_results_path)
@@ -62,6 +63,7 @@ def run_cmd(command: str):
         run_cmd(f"git add '{ref_results_path}'")
 
 
+# TODO: use argparse
 def main(argv):
     if len(argv) == 2 and argv[1] in ("-h", "--help"):
         print("Update static analyzer reference results based "
@@ -70,9 +72,9 @@ def main(argv):
               file=sys.stderr)
         sys.exit(1)
 
-    with open(SATestBuild.get_project_map_path(), "r") as f:
-        for project_name, build_mode in SATestBuild.get_projects(f):
-            update_reference_results(project_name, int(build_mode))
+    project_map = ProjectMap()
+    for project in project_map.projects:
+        update_reference_results(project)
 
 
 if __name__ == '__main__':