[clang] 21bacc2 - [analyzer][tests] Measure peak memory consumption for every project

Fri Jul 10 01:33:16 PDT 2020

Author: Valeriy Savchenko
Date: 2020-07-10T11:31:41+03:00
New Revision: 21bacc215413d10df53a4690e9561e9b96698742

URL: https://github.com/llvm/llvm-project/commit/21bacc215413d10df53a4690e9561e9b96698742
DIFF: https://github.com/llvm/llvm-project/commit/21bacc215413d10df53a4690e9561e9b96698742.diff

LOG: [analyzer][tests] Measure peak memory consumption for every project

Differential Revision: https://reviews.llvm.org/D82967

Added: 
    clang/utils/analyzer/requirements.txt

Modified: 
    clang/utils/analyzer/Dockerfile
    clang/utils/analyzer/SATestBuild.py
    clang/utils/analyzer/SATestUtils.py

Removed: 
    


################################################################################
diff  --git a/clang/utils/analyzer/Dockerfile b/clang/utils/analyzer/Dockerfile
index 30fb67cf93c8..21906011c7dc 100644

--- a/clang/utils/analyzer/Dockerfile
+++ b/clang/utils/analyzer/Dockerfile
@@ -54,8 +54,7 @@ ENV PATH="/analyzer/bin:${PATH}"
 
 ADD entrypoint.py /entrypoint.py
 
-# Uncomment in case of requirements
-# ADD requirements.txt /requirements.txt
-# RUN pip3 install -r /requirements.txt
+ADD requirements.txt /requirements.txt
+RUN pip3 install -r /requirements.txt
 
 ENTRYPOINT ["python", "/entrypoint.py"]

diff  --git a/clang/utils/analyzer/SATestBuild.py b/clang/utils/analyzer/SATestBuild.py
index 7d337632744f..ee510e03cc5a 100644
--- a/clang/utils/analyzer/SATestBuild.py
+++ b/clang/utils/analyzer/SATestBuild.py
@@ -43,7 +43,7 @@
 variable. It should contain a comma separated list.
 """
 import CmpRuns
-import SATestUtils
+import SATestUtils as utils
 from ProjectMap import DownloadType, ProjectInfo
 
 import glob
@@ -63,7 +63,7 @@
 # and this is we can shush that false positive
 from plistlib import InvalidFileException  # type:ignore
 from subprocess import CalledProcessError, check_call
-from typing import Dict, IO, List, NamedTuple, Optional, TYPE_CHECKING
+from typing import Dict, IO, List, NamedTuple, Optional, TYPE_CHECKING, Tuple
 
 
 ###############################################################################
@@ -115,7 +115,7 @@ def stdout(message: str):
 if 'CC' in os.environ:
     cc_candidate: Optional[str] = os.environ['CC']
 else:
-    cc_candidate = SATestUtils.which("clang", os.environ['PATH'])
+    cc_candidate = utils.which("clang", os.environ['PATH'])
 if not cc_candidate:
     stderr("Error: cannot find 'clang' in PATH")
     sys.exit(1)
@@ -194,9 +194,9 @@ def run_cleanup_script(directory: str, build_log_file: IO):
     cwd = os.path.join(directory, PATCHED_SOURCE_DIR_NAME)
     script_path = os.path.join(directory, CLEANUP_SCRIPT)
 
-    SATestUtils.run_script(script_path, build_log_file, cwd,
-                           out=LOCAL.stdout, err=LOCAL.stderr,
-                           verbose=VERBOSE)
+    utils.run_script(script_path, build_log_file, cwd,
+                     out=LOCAL.stdout, err=LOCAL.stderr,
+                     verbose=VERBOSE)
 
 
 class TestInfo(NamedTuple):
@@ -351,8 +351,6 @@ def get_output_dir(self) -> str:
             return OUTPUT_DIR_NAME
 
     def build(self, directory: str, output_dir: str):
-        time_start = time.time()
-
         build_log_path = get_build_log_path(output_dir)
 
         stdout(f"Log file: {build_log_path}\n")
@@ -375,19 +373,23 @@ def build(self, directory: str, output_dir: str):
             if self.project.mode == 1:
                 self._download_and_patch(directory, build_log_file)
                 run_cleanup_script(directory, build_log_file)
-                self.scan_build(directory, output_dir, build_log_file)
+                build_time, memory = self.scan_build(directory, output_dir,
+                                               build_log_file)
             else:
-                self.analyze_preprocessed(directory, output_dir)
+                build_time, memory = self.analyze_preprocessed(directory,
+                                                               output_dir)
 
             if self.is_reference_build:
                 run_cleanup_script(directory, build_log_file)
                 normalize_reference_results(directory, output_dir,
                                             self.project.mode)
 
-        stdout(f"Build complete (time: {time.time() - time_start:.2f}). "
+        stdout(f"Build complete (time: {utils.time_to_str(build_time)}, "
+               f"peak memory: {utils.memory_to_str(memory)}). "
                f"See the log for more details: {build_log_path}\n")
 
-    def scan_build(self, directory: str, output_dir: str, build_log_file: IO):
+    def scan_build(self, directory: str, output_dir: str,
+                   build_log_file: IO) -> Tuple[float, int]:
         """
         Build the project with scan-build by reading in the commands and
         prefixing them with the scan-build options.
@@ -416,6 +418,10 @@ def scan_build(self, directory: str, output_dir: str, build_log_file: IO):
             options += "--override-compiler "
 
         extra_env: Dict[str, str] = {}
+
+        execution_time = 0.0
+        peak_memory = 0
+
         try:
             command_file = open(build_script_path, "r")
             command_prefix = "scan-build " + options + " "
@@ -451,11 +457,15 @@ def scan_build(self, directory: str, output_dir: str, build_log_file: IO):
                 if VERBOSE >= 1:
                     stdout(f"  Executing: {command_to_run}\n")
 
-                check_call(command_to_run, cwd=cwd,
-                           stderr=build_log_file,
-                           stdout=build_log_file,
-                           env=dict(os.environ, **extra_env),
-                           shell=True)
+                time, mem = utils.check_and_measure_call(
+                    command_to_run, cwd=cwd,
+                    stderr=build_log_file,
+                    stdout=build_log_file,
+                    env=dict(os.environ, **extra_env),
+                    shell=True)
+
+                execution_time += time
+                peak_memory = max(peak_memory, mem)
 
         except CalledProcessError:
             stderr("Error: scan-build failed. Its output was: \n")
@@ -463,7 +473,10 @@ def scan_build(self, directory: str, output_dir: str, build_log_file: IO):
             shutil.copyfileobj(build_log_file, LOCAL.stderr)
             sys.exit(1)
 
-    def analyze_preprocessed(self, directory: str, output_dir: str):
+        return execution_time, peak_memory
+
+    def analyze_preprocessed(self, directory: str,
+                             output_dir: str) -> Tuple[float, int]:
         """
         Run analysis on a set of preprocessed files.
         """
@@ -487,14 +500,17 @@ def analyze_preprocessed(self, directory: str, output_dir: str):
         fail_path = os.path.join(plist_path, "failures")
         os.makedirs(fail_path)
 
+        execution_time = 0.0
+        peak_memory = 0
+
         for full_file_name in glob.glob(directory + "/*"):
             file_name = os.path.basename(full_file_name)
             failed = False
 
             # Only run the analyzes on supported files.
-            if SATestUtils.has_no_extension(file_name):
+            if utils.has_no_extension(file_name):
                 continue
-            if not SATestUtils.is_valid_single_input_file(file_name):
+            if not utils.is_valid_single_input_file(file_name):
                 stderr(f"Error: Invalid single input file {full_file_name}.\n")
                 raise Exception()
 
@@ -509,8 +525,12 @@ def analyze_preprocessed(self, directory: str, output_dir: str):
                     if VERBOSE >= 1:
                         stdout(f"  Executing: {command}\n")
 
-                    check_call(command, cwd=directory, stderr=log_file,
-                               stdout=log_file, shell=True)
+                    time, mem = utils.check_and_measure_call(
+                        command, cwd=directory, stderr=log_file,
+                        stdout=log_file, shell=True)
+
+                    execution_time += time
+                    peak_memory = max(peak_memory, mem)
 
                 except CalledProcessError as e:
                     stderr(f"Error: Analyzes of {full_file_name} failed. "
@@ -522,6 +542,8 @@ def analyze_preprocessed(self, directory: str, output_dir: str):
                 if not failed:
                     os.remove(log_file.name)
 
+        return execution_time, peak_memory
+
     def generate_config(self) -> str:
         out = "serialize-stats=true,stable-report-filename=true"
 
@@ -598,9 +620,9 @@ def _unpack_zip(self, directory: str, build_log_file: IO):
     @staticmethod
     def _run_download_script(directory: str, build_log_file: IO):
         script_path = os.path.join(directory, DOWNLOAD_SCRIPT)
-        SATestUtils.run_script(script_path, build_log_file, directory,
-                               out=LOCAL.stdout, err=LOCAL.stderr,
-                               verbose=VERBOSE)
+        utils.run_script(script_path, build_log_file, directory,
+                         out=LOCAL.stdout, err=LOCAL.stderr,
+                         verbose=VERBOSE)
 
     @staticmethod
     def _apply_patch(directory: str, build_log_file: IO):

diff  --git a/clang/utils/analyzer/SATestUtils.py b/clang/utils/analyzer/SATestUtils.py
index 4e126e66b869..3947e183d82f 100644
--- a/clang/utils/analyzer/SATestUtils.py
+++ b/clang/utils/analyzer/SATestUtils.py
@@ -1,8 +1,9 @@
 import os
 import sys
+import time
 
 from subprocess import CalledProcessError, check_call
-from typing import List, IO, Optional
+from typing import List, IO, Optional, Tuple
 
 
 def which(command: str, paths: Optional[str] = None) -> Optional[str]:
@@ -47,6 +48,87 @@ def is_valid_single_input_file(file_name: str) -> bool:
     return ext in (".i", ".ii", ".c", ".cpp", ".m", "")
 
 
+def time_to_str(time: float) -> str:
+    """
+    Convert given time in seconds into a human-readable string.
+    """
+    return f"{time:.2f}s"
+
+
+def memory_to_str(memory: int) -> str:
+    """
+    Convert given number of bytes into a human-readable string.
+    """
+    if memory:
+        try:
+            import humanize
+            return humanize.naturalsize(memory, gnu=True)
+        except ImportError:
+            # no formatter installed, let's keep it in bytes
+            return f"{memory}B"
+
+    # If memory is 0, we didn't succeed measuring it.
+    return "N/A"
+
+
+def check_and_measure_call(*popenargs, **kwargs) -> Tuple[float, int]:
+    """
+    Run command with arguments.  Wait for command to complete and measure
+    execution time and peak memory consumption.
+    If the exit code was zero then return, otherwise raise
+    CalledProcessError.  The CalledProcessError object will have the
+    return code in the returncode attribute.
+
+    The arguments are the same as for the call and check_call functions.
+
+    Return a tuple of execution time and peak memory.
+    """
+    peak_mem = 0
+    start_time = time.time()
+
+    try:
+        import psutil as ps
+
+        def get_memory(process: ps.Process) -> int:
+            mem = 0
+
+            # we want to gather memory usage from all of the child processes
+            descendants = list(process.children(recursive=True))
+            descendants.append(process)
+
+            for subprocess in descendants:
+                try:
+                    mem += subprocess.memory_info().rss
+                except (ps.NoSuchProcess, ps.AccessDenied):
+                    continue
+
+            return mem
+
+        with ps.Popen(*popenargs, **kwargs) as process:
+            # while the process is running calculate resource utilization.
+            while (process.is_running() and
+                   process.status() != ps.STATUS_ZOMBIE):
+                # track the peak utilization of the process
+                peak_mem = max(peak_mem, get_memory(process))
+                time.sleep(.5)
+
+            if process.is_running():
+                process.kill()
+
+        if process.returncode != 0:
+            cmd = kwargs.get("args")
+            if cmd is None:
+                cmd = popenargs[0]
+            raise CalledProcessError(process.returncode, cmd)
+
+    except ImportError:
+        # back off to subprocess if we don't have psutil installed
+        peak_mem = 0
+        check_call(*popenargs, **kwargs)
+
+    return time.time() - start_time, peak_mem
+
+
 def run_script(script_path: str, build_log_file: IO, cwd: str,
                out=sys.stdout, err=sys.stderr, verbose: int = 0):
     """

diff  --git a/clang/utils/analyzer/requirements.txt b/clang/utils/analyzer/requirements.txt
new file mode 100644
index 000000000000..ec4f66929952
--- /dev/null
+++ b/clang/utils/analyzer/requirements.txt
@@ -0,0 +1,4 @@
+graphviz
+humanize
+matplotlib
+psutil