[llvm-commits] [zorg] r147165 - in /zorg/trunk/lnt/lnt: server/reporting/analysis.py server/reporting/runs.py util/NTEmailReport.py

Thu Dec 22 11:24:32 PST 2011

Author: ddunbar
Date: Thu Dec 22 13:24:32 2011
New Revision: 147165

URL: http://llvm.org/viewvc/llvm-project?rev=147165&view=rev
Log:
[lnt/v0.4] lnt.server.reporting: Continue defining V4 report implementation.
 - Generates the summary table counts.
 - Returns the changes in a format that can be reported by the lnt submit client.

Added:
    zorg/trunk/lnt/lnt/server/reporting/analysis.py
Modified:
    zorg/trunk/lnt/lnt/server/reporting/runs.py
    zorg/trunk/lnt/lnt/util/NTEmailReport.py

Added: zorg/trunk/lnt/lnt/server/reporting/analysis.py
URL: http://llvm.org/viewvc/llvm-project/zorg/trunk/lnt/lnt/server/reporting/analysis.py?rev=147165&view=auto
==============================================================================

--- zorg/trunk/lnt/lnt/server/reporting/analysis.py (added)
+++ zorg/trunk/lnt/lnt/server/reporting/analysis.py Thu Dec 22 13:24:32 2011
@@ -0,0 +1,132 @@
+"""
+Utilities for helping with the analysis of data, for reporting purposes.
+"""
+
+from lnt.server.ui import util
+from lnt.db.runinfo import ComparisonResult
+from lnt.testing import PASS, FAIL, XFAIL
+
+class RunInfo(object):
+    def __init__(self, testsuite):
+        self.testsuite = testsuite
+
+        self.sample_map = util.multidict()
+        self.loaded_run_ids = set()
+
+    def get_run_comparison_result(self, run, compare_to, test_id, field,
+                                  comparison_window=[]):
+        # Get the field which indicates the requested field's status.
+        status_field = field.status_field
+
+        # Load the sample data for the current and previous runs and the
+        # comparison window.
+        if compare_to is None:
+            compare_id = None
+        else:
+            compare_id = compare_to.id
+        runs_to_load = set([r.id for r in comparison_window])
+        runs_to_load.add(run.id)
+        if compare_id is not None:
+            runs_to_load.add(compare_id)
+        self._load_samples_for_runs(runs_to_load)
+
+        # Lookup the current and previous samples.
+        run_samples = self.sample_map.get((run.id, test_id), ())
+        prev_samples = self.sample_map.get((compare_id, test_id), ())
+
+        # Determine whether this (test,pset) passed or failed in the current and
+        # previous runs.
+        #
+        # FIXME: Support XFAILs and non-determinism (mixed fail and pass)
+        # better.
+        run_failed = prev_failed = False
+        if status_field:
+            for sample in run_samples:
+                run_failed |= sample.get_field(status_field) == FAIL
+            for sample in prev_samples:
+                prev_failed |= sample.get_field(status_field) == FAIL
+
+        # Get the current and previous values.
+        run_values = [s.get_field(field) for s in run_samples]
+        prev_values = [s.get_field(field) for s in prev_samples]
+        if run_values:
+            run_value = min(run_values)
+        else:
+            run_value = None
+        if prev_values:
+            prev_value = min(prev_values)
+        else:
+            prev_value = None
+
+        # If we have multiple values for this run, use that to estimate the
+        # distribution.
+        if run_values and len(run_values) > 1:
+            stddev = stats.standard_deviation(run_values)
+            MAD = stats.median_absolute_deviation(run_values)
+            stddev_mean = stats.mean(run_values)
+            stddev_is_estimated = False
+        else:
+            stddev = None
+            MAD = None
+            stddev_mean = None
+            stddev_is_estimated = False
+
+        # If we are missing current or comparison values we are done.
+        if run_value is None or prev_value is None:
+            return ComparisonResult(
+                run_value, prev_value, delta=None,
+                pct_delta = None, stddev = stddev, MAD = MAD,
+                cur_failed = run_failed, prev_failed = prev_failed,
+                samples = run_values)
+
+        # Compute the comparison status for the test value.
+        delta = run_value - prev_value
+        if prev_value != 0:
+            pct_delta = delta / prev_value
+        else:
+            pct_delta = 0.0
+
+        # If we don't have an estimate for the distribution, attempt to "guess"
+        # it using the comparison window.
+        #
+        # FIXME: We can substantially improve the algorithm for guessing the
+        # noise level from a list of values. Probably better to just find a way
+        # to kill this code though.
+        if stddev is None:
+            # Get all previous values in the comparison window, for passing
+            # runs.
+            #
+            # FIXME: This is using the wrong status kind. :/
+            prev_samples = [v for run_id in comparison_window
+                            for v in self.sample_map.get((run_id, test_id), ())]
+            if prev_samples:
+                # Filter out failing samples.
+                if status_field:
+                    prev_samples = [s for s in prev_samples
+                                    if s.get_field(status_field) == PASS]
+                prev_values = [s.get_field(field)
+                               for s in prev_samples]
+                stddev = stats.standard_deviation(prev_values)
+                MAD = stats.median_absolute_deviation(prev_values)
+                stddev_mean = stats.mean(prev_values)
+                stddev_is_estimated = True
+
+        return ComparisonResult(run_value, prev_value, delta,
+                                pct_delta, stddev, MAD,
+                                run_failed, prev_failed, run_values,
+                                stddev_mean, stddev_is_estimated)
+
+    def _load_samples_for_runs(self, run_ids):
+        # Find the set of new runs to load.
+        to_load = set(run_ids) - self.loaded_run_ids
+        if not to_load:
+            return
+
+        # Batch load all of the samples for the needed runs.
+        q = self.testsuite.query(self.testsuite.Sample)
+        q = q.filter(self.testsuite.Sample.run_id.in_(to_load))
+        for sample in q:
+            self.sample_map[(sample.run_id, sample.test_id)] = sample
+
+        self.loaded_run_ids |= to_load
+

Modified: zorg/trunk/lnt/lnt/server/reporting/runs.py
URL: http://llvm.org/viewvc/llvm-project/zorg/trunk/lnt/lnt/server/reporting/runs.py?rev=147165&r1=147164&r2=147165&view=diff
==============================================================================
--- zorg/trunk/lnt/lnt/server/reporting/runs.py (original)
+++ zorg/trunk/lnt/lnt/server/reporting/runs.py Thu Dec 22 13:24:32 2011
@@ -5,8 +5,11 @@
 import StringIO
 import os
 
+import lnt.server.reporting.analysis
+from lnt.db import runinfo
+
 def generate_run_report(run, baseurl, only_html_body = False,
-                        num_comparison_runs = 10):
+                        num_comparison_runs = 10, result = None):
     """
     generate_run_report(...) -> (str: subject, str: text_report,
                                  str: html_report)
@@ -17,10 +20,10 @@
 
     assert num_comparison_runs > 0
 
-
     ts = run.testsuite
     machine = run.machine
     machine_parameters = machine.parameters
+    sri = lnt.server.reporting.analysis.RunInfo(ts)
 
     # Gather the runs to use for statistical data.
     comparison_window = list(ts.get_previous_runs_on_machine(
@@ -32,6 +35,70 @@
     else:
         compare_to = None
 
+    # Get the test names.
+    test_names = ts.query(ts.Test.name, ts.Test.id).order_by(ts.Test.name).all()
+
+    # Gather the changes to report, organized by field and then collated by
+    # change type.
+    primary_fields = list(ts.Sample.get_primary_fields())
+    num_total_tests = len(primary_fields) * len(test_names)
+    test_results = []
+    for field in primary_fields:
+        new_failures = []
+        new_passes = []
+        perf_regressions = []
+        perf_improvements = []
+        removed_tests = []
+        added_tests = []
+        existing_failures = []
+        unchanged_tests = []
+        for name,test_id in test_names:
+            cr = sri.get_run_comparison_result(run, compare_to, test_id, field,
+                                               comparison_window)
+            test_status = cr.get_test_status()
+            perf_status = cr.get_value_status()
+            if test_status == runinfo.REGRESSED:
+                bucket = new_failures
+            elif test_status == runinfo.IMPROVED:
+                bucket = new_passes
+            elif cr.current is None and cr.previous is not None:
+                bucket = removed_tests
+            elif cr.current is not None and cr.previous is None:
+                bucket = added_tests
+            elif test_status == runinfo.UNCHANGED_FAIL:
+                bucket = existing_failures
+            elif perf_status == runinfo.REGRESSED:
+                bucket = perf_regressions
+            elif perf_status == runinfo.IMPROVED:
+                bucket = perf_improvements
+            else:
+                bucket = unchanged_tests
+
+            bucket.append((name, cr))
+
+        test_results.append(
+            (field, (('New Failures', new_failures, False),
+                     ('New Passes', new_passes, False),
+                     ('Performance Regressions', perf_regressions, True),
+                     ('Performance Improvements', perf_improvements, True),
+                     ('Removed Tests', removed_tests, False),
+                     ('Added Tests', added_tests, False),
+                     ('Existing Failures', existing_failures, False),
+                     ('Unchanged Tests', unchanged_tests, False))))
+
+    # Collect the simplified results, if desired, for sending back to clients.
+    if result is not None:
+        pset_results = []
+        result['test_results'] = [{ 'pset' : (), 'results' : pset_results}]
+        for field,field_results in test_results:
+            for _,bucket,_ in field_results:
+                for name,cr in bucket:
+                    # FIXME: Include additional information about performance
+                    # changes.
+                    pset_results.append(("%s.%s" % (name, field.name),
+                                         cr.get_test_status(),
+                                         cr.get_value_status()))
+
     # Begin report generation...
     subject = """%s test results: %s""" % (
         machine.name, run.start_time.strftime('%Y-%m-%d %H:%M:%S %Z PST'))
@@ -107,6 +174,41 @@
         print >>html_report, """(%s:%d)</b></p>""" % (compare_to.machine.name,
                                                       compare_to.machine.id)
 
+    # Generate the summary of the changes.
+    total_changes = sum(len(bucket)
+                        for _,field_results in test_results
+                        for name,bucket,_ in field_results
+                        if name != 'Unchanged Tests')
+
+    print >>report, """==============="""
+    print >>report, """Tests Summary"""
+    print >>report, """==============="""
+    print >>report
+    print >>html_report, """
+<hr>
+<h3>Tests Summary</h3>
+<table>
+<thead><tr><th>Status Group</th><th align="right">#</th></tr></thead>
+"""
+    # For now, we aggregate across all bucket types for reports.
+    for i,(name,_,_) in enumerate(test_results[0][1]):
+        num_items = sum(len(field_results[i][1])
+                        for _,field_results in test_results)
+        if num_items:
+                print >>report, '%s: %d' % (name, num_items)
+                print >>html_report, """
+<tr><td>%s</td><td align="right">%d</td></tr>""" % (
+                    name, num_items)
+    print >>report, """Total Tests: %d""" % num_total_tests
+    print >>report
+    print >>html_report, """
+<tfoot>
+  <tr><td><b>Total Tests</b></td><td align="right"><b>%d</b></td></tr>
+</tfoot>
+</table>
+""" % num_total_tests
+
+    # Finish up the HTML report (wrapping the body, if necessary).
     html_report = html_report.getvalue()
     if not only_html_body:
         # We embed the additional resources, so that the message is self
@@ -116,7 +218,7 @@
         style_css = open(os.path.join(static_path,
                                       "style.css")).read()
 
-        html_report = """
+        html_report = """\
 <html>
   <head>
     <style type="text/css">

Modified: zorg/trunk/lnt/lnt/util/NTEmailReport.py
URL: http://llvm.org/viewvc/llvm-project/zorg/trunk/lnt/lnt/util/NTEmailReport.py?rev=147165&r1=147164&r2=147165&view=diff
==============================================================================
--- zorg/trunk/lnt/lnt/util/NTEmailReport.py (original)
+++ zorg/trunk/lnt/lnt/util/NTEmailReport.py Thu Dec 22 13:24:32 2011
@@ -470,7 +470,8 @@
     # We haven't implemented V4DB support yet in reports.
     if isinstance(db, lnt.server.db.v4db.V4DB):
         return lnt.server.reporting.runs.generate_run_report(
-            run, baseurl=baseurl, only_html_body=only_html_body)
+            run, baseurl=baseurl, only_html_body=only_html_body,
+            result=result)
 
     # Use a simple report unless the tag indicates this is an old style nightly
     # test run.