[llvm-commits] [zorg] r106080 - in /zorg/trunk/lnt/lnt: db/runinfo.py viewer/simple.ptl

Tue Jun 15 22:24:21 PDT 2010

Author: ddunbar
Date: Wed Jun 16 00:24:21 2010
New Revision: 106080

URL: http://llvm.org/viewvc/llvm-project?rev=106080&view=rev
Log:
LNT/simple: Add SimpleRunInfo, to standardize collecting pass/fail information about a run.
 - This moves 'simple' style runs to compute significant changes based on the standard deviation of the previous samples. This will hopefully allow us to eliminate the noise from nightly test reports.

 - This also updates the LNT/simple viewer to make it easier to see the interesting changes:
   o Use buildbot style red/green/yellow to report regressions, improvements, or persistent failures.
   o By default, only show significant performance changes, and ignore cases when the test changes from pass to fail or vice versa.

Added:
    zorg/trunk/lnt/lnt/db/runinfo.py
Modified:
    zorg/trunk/lnt/lnt/viewer/simple.ptl

Added: zorg/trunk/lnt/lnt/db/runinfo.py
URL: http://llvm.org/viewvc/llvm-project/zorg/trunk/lnt/lnt/db/runinfo.py?rev=106080&view=auto
==============================================================================

--- zorg/trunk/lnt/lnt/db/runinfo.py (added)
+++ zorg/trunk/lnt/lnt/db/runinfo.py Wed Jun 16 00:24:21 2010
@@ -0,0 +1,199 @@
+from lnt.util import stats
+from lnt.viewer import Util
+from lnt.viewer.PerfDB import Sample
+
+REGRESSED = 0
+IMPROVED = 1
+UNCHANGED_PASS = 2
+UNCHANGED_FAIL = 3
+
+class ComparisonResult:
+    def __init__(self, cur_value, prev_value, delta, pct_delta, stddev, MAD,
+                 cur_failed, prev_failed):
+        self.current = cur_value
+        self.previous = prev_value
+        self.delta = delta
+        self.pct_delta = pct_delta
+        self.stddev = stddev
+        self.MAD = MAD
+        self.failed = cur_failed
+        self.prev_failed = prev_failed
+
+    def get_test_status(self):
+        # Compute the comparison status for the test success.
+        if self.failed:
+            if self.prev_failed:
+                return UNCHANGED_FAIL
+            else:
+                return REGRESSED
+        else:
+            if self.prev_failed:
+                return IMPROVED
+            else:
+                return UNCHANGED_PASS
+
+    def get_value_status(self, confidence_interval=1.96, value_precision=0.01):
+        if self.current is None or self.previous is None:
+            return None
+
+        # Don't report value errors for tests which fail, or which just started
+        # passing.
+        #
+        # FIXME: One bug here is that we risk losing performance data on tests
+        # which flop to failure then back. What would be nice to do here is to
+        # find the last value in a passing run, or to move to using proper keyed
+        # reference runs.
+        if self.failed:
+            return UNCHANGED_FAIL
+        elif self.prev_failed:
+            return UNCHANGED_PASS
+
+        # Ignore tests whose delt is too small relative to the precision we can
+        # sample at; otherwise quantization means that we can't measure the
+        # standard deviation with enough accuracy.
+        if abs(self.delta) <= value_precision * confidence_interval:
+            if self.failed:
+                return UNCHANGED_FAIL
+            else:
+                return UNCHANGED_PASS
+
+        # If we have a comparison window, then measure using a symmetic
+        # confidence interval.
+        if self.stddev is not None:
+            if abs(self.delta) > self.stddev * confidence_interval:
+                if self.delta < 0:
+                    return IMPROVED
+                else:
+                    return REGRESSED
+            else:
+                if self.failed:
+                    return UNCHANGED_FAIL
+                else:
+                    return UNCHANGED_PASS
+
+        # Otherwise, use the old "significant change" metric of > 5%.
+        if abs(self.pct_delta) >= .05:
+            if self.pct_delta < 0:
+                return IMPROVED
+            else:
+                return REGRESSED
+        else:
+            if self.failed:
+                return UNCHANGED_FAIL
+            else:
+                return UNCHANGED_PASS
+
+class SimpleRunInfo:
+    def __init__(self, db, test_suite_summary):
+        self.db = db
+        self.test_suite_summary = test_suite_summary
+
+        self.sample_map = Util.multidict()
+        self.loaded_samples = set()
+
+    def get_run_comparison_result(self, run, compare_to, test_name, pset,
+                                  comparison_window=[]):
+        # Get the test.
+        test = self.test_suite_summary.test_map.get((test_name, pset))
+        if test is None:
+            return ComparisonResult(run_value=None, prev_value=None, delta=None,
+                                    pct_delta=None, stddev=None, MAD=None,
+                                    cur_failed=None, prev_failed=None)
+
+        # Get the test status info.
+        status_info = self.test_suite_summary.test_status_map.get(test_name)
+        if status_info is not None:
+            status_name,status_kind = status_info
+            status_test = self.test_suite_summary.test_map.get(
+                (status_name, pset))
+        else:
+            status_test = status_kind = None
+
+        # Load the sample data for the current and previous runs and the
+        # comparison window.
+        if compare_to is None:
+            compare_id = None
+        else:
+            compare_id = compare_to.id
+        runs_to_load = set(comparison_window)
+        runs_to_load.add(run.id)
+        if compare_id is not None:
+            runs_to_load.add(compare_id)
+        self._load_samples_for_runs(runs_to_load)
+
+        # Lookup the current and previous values.
+        run_values = self.sample_map.get((run.id, test.id))
+        prev_values = self.sample_map.get((compare_id, test.id))
+
+        # Determine whether this (test,pset) passed or failed in the current and
+        # previous runs.
+        run_failed = prev_failed = False
+        if not status_test:
+            run_failed = not run_values
+            prev_failed = not prev_values
+        else:
+            run_status = self.sample_map.get((run.id,status_test.id))
+            prev_status = self.sample_map.get((compare_id,status_test.id))
+
+            # FIXME: Support XFAILs.
+            #
+            # FIXME: What to do about the multiple entries here. We could start
+            # by just treating non-matching samples as errors.
+            if status_kind == False: # .success style
+                run_failed = not run_status or not run_status[0]
+                prev_failed = not prev_status or not prev_status[0]
+            else:
+                run_failed = run_status and run_status[0] != 0
+                prev_failed = prev_status and prev_status[0] != 0
+
+        # Get the current and previous values.
+        if run_values:
+            run_value = min(run_values)
+        else:
+            run_value = None
+        if prev_values:
+            prev_value = min(prev_values)
+        else:
+            prev_value = None
+
+        # If we are missing current or comparison values we are done.
+        if run_value is None or prev_value is None:
+            return ComparisonResult(
+                run_value, prev_value, delta=None,
+                pct_delta=None, stddev=None, MAD=None,
+                cur_failed=run_failed, prev_failed=prev_failed)
+
+        # Compute the comparison status for the test value.
+        delta = run_value - prev_value
+        if prev_value != 0:
+            pct_delta = delta / prev_value
+        else:
+            pct_delta = 0.0
+
+        # Get all previous values in the comparison window.
+        prev_values = [v for run_id in comparison_window
+                       for v in self.sample_map.get((run_id, test.id), ())]
+        if prev_values:
+            stddev = stats.standard_deviation(prev_values)
+            MAD = stats.median_absolute_deviation(prev_values)
+        else:
+            stddev = None
+            MAD = None
+
+        return ComparisonResult(run_value, prev_value, delta,
+                                pct_delta, stddev, MAD,
+                                run_failed, prev_failed)
+
+    def _load_samples_for_runs(self, runs):
+        # Find the set of new runs to load.
+        to_load = set(runs) - self.loaded_samples
+        if not to_load:
+            return
+
+        q = self.db.session.query(Sample.value, Sample.run_id, Sample.test_id)
+        q = q.filter(Sample.run_id.in_(to_load))
+        for value,run_id,test_id in q:
+            self.sample_map[(run_id,test_id)] = value
+
+        self.loaded_samples |= to_load
+

Modified: zorg/trunk/lnt/lnt/viewer/simple.ptl
URL: http://llvm.org/viewvc/llvm-project/zorg/trunk/lnt/lnt/viewer/simple.ptl?rev=106080&r1=106079&r2=106080&view=diff
==============================================================================
--- zorg/trunk/lnt/lnt/viewer/simple.ptl (original)
+++ zorg/trunk/lnt/lnt/viewer/simple.ptl Wed Jun 16 00:24:21 2010
@@ -11,6 +11,7 @@
 from quixote.directory import Directory
 from quixote.errors import TraversalError
 
+from lnt.db import runinfo
 from lnt.db import perfdbsummary
 from lnt.util import stats
 
@@ -459,94 +460,63 @@
         self.show_run_page(db, run, run_summary, compare_to, graph_body)
 
     def _q_index_body [html] (self, db, run, run_summary, compare_to):
-        # Find the tests. The simple UI maps all tests that start with
-        # 'simple.'.
-        #
-        # One sensible addition would be to allow 'simple.foo.success' as a test
-        # to indicate the success or failure of the test. We would assume that
-        # the test succeeded if its .success test was missing, which leads to a
-        # nice compact format (failures are expected to be rare).
-
-        if compare_to:
-            prev_id = compare_to.id
-            interesting_runs = [run.id, prev_id]
-        else:
-            prev_id = None
-            interesting_runs = [run.id]
-
         # Load the test suite summary.
         ts_summary = perfdbsummary.get_simple_suite_summary(db, self.tag)
+        sri = runinfo.SimpleRunInfo(db, ts_summary)
 
-        cur_id = run.id
-        previous_runs = []
+        # Get the filtering form.
+        form = quixote.form.Form(method=str("get"))
+        form.add(quixote.form.CheckboxWidget, "show_delta",
+                 title="Show Delta")
+        form.add(quixote.form.CheckboxWidget, "show_stddev",
+                 title="Show Standard Deviation")
+        form.add(quixote.form.CheckboxWidget, "show_mad",
+                 title="Show Median Absolute Deviation")
+        form.add(quixote.form.CheckboxWidget, "show_all",
+                 title="Show All Values")
+        form.add(quixote.form.IntWidget, "num_comparison_runs",
+                 title="Number of Comparison Runs")
+        form.add_submit("submit", "Update")
 
         request = quixote.get_request()
-        show_delta = bool(request.form.get('show_delta'))
-        show_stddev =  bool(request.form.get('show_stddev'))
-        show_mad = bool(request.form.get('show_mad'))
-
-        if show_stddev or show_mad:
-            for i in range(5):
-                cur_id = run_summary.get_previous_run_on_machine(cur_id)
-                if not cur_id:
-                    break
-
-                previous_runs.append(cur_id)
-                if cur_id not in interesting_runs:
-                    interesting_runs.append(cur_id)
-
-        interesting_runs = tuple(set(interesting_runs + previous_runs))
-
-        # Load the run sample data.
-        q = db.session.query(Sample.value, Sample.run_id, Sample.test_id)
-        q = q.filter(Sample.run_id.in_(interesting_runs))
-
-        sample_map = Util.multidict()
-        for value,run_id,test_id in q:
-            key = (run_id,test_id)
-            sample_map[key] = value
+        show_delta = bool(form['show_delta'])
+        show_stddev =  bool(form['show_stddev'])
+        show_mad = bool(form['show_mad'])
+        show_all = bool(form['show_all'])
+        try:
+            num_comparison_runs = int(form['num_comparison_runs'])
+        except:
+            num_comparison_runs = 5
 
-        # Render the page.
+        self.renderPopupBegin('view_options', 'View Options', True)
+        form.render()
+        self.renderPopupEnd()
 
-        def get_cell_value [html] (test, status_test, status_kind, name, pset):
-            run_values = sample_map.get((run.id,test.id))
-            prev_values = sample_map.get((prev_id,test.id))
-
-            # Determine whether this (test,pset) passed or failed in the current
-            # and previous runs.
-            run_failed = prev_failed = False
-            if not status_test:
-                run_failed = not run_values
-                prev_failed = not prev_values
-            else:
-                run_status = sample_map.get((run.id,status_test.id))
-                prev_status = sample_map.get((prev_id,status_test.id))
+        # Gather the runs to use for statistical data.
+        cur_id = run.id
+        comparison_window = []
+        for i in range(num_comparison_runs):
+            cur_id = run_summary.get_previous_run_on_machine(cur_id)
+            if not cur_id:
+                break
+            comparison_window.append(cur_id)
 
-                # FIXME: What to do about the multiple entries here. We could
-                # start by just treating non-matching samples as errors.
-                if status_kind == False: # .success style
-                    run_failed = not run_status or not run_status[0]
-                    prev_failed = not prev_status or not prev_status[0]
-                else:
-                    run_failed = run_status and run_status[0] != 0
-                    prev_failed = prev_status and prev_status[0] != 0
+        # Render the page.
+        def get_cell_value [html] (cr):
+            test_status = cr.get_test_status()
+            value_status = cr.get_value_status()
 
             run_cell_value = "-"
-            if run_values:
-                run_value = min(run_values)
-                run_cell_value = "%.4f" % run_value
-            else:
-                run_value = None
+            if cr.current is not None:
+                run_cell_value = "%.4f" % cr.current
 
             cell_color = None
-            if run_failed:
-                if prev_failed:
-                    cell_color = (255,195,67)
-                else:
-                    cell_color = (233,128,128)
-            else:
-                if prev_failed:
-                    cell_color = (143,223,95)
+            if test_status == runinfo.REGRESSED:
+                cell_color = (233,128,128)
+            elif test_status == runinfo.IMPROVED:
+                cell_color = (143,223,95)
+            elif test_status == runinfo.UNCHANGED_FAIL:
+                cell_color = (255,195,67)
 
             if cell_color:
                 """
@@ -556,44 +526,27 @@
                 """
                 <td>%s</td""" % (run_cell_value,)
 
-            if prev_values and run_value is not None:
-                prev_value = min(prev_values)
-                pct = safediv(run_value, prev_value,
-                              '<center><font size=-2>nan</font></center>')
-                Util.PctCell(pct, delta=True).render()
+            if show_all or value_status in (runinfo.REGRESSED,
+                                            runinfo.IMPROVED):
+                Util.PctCell(cr.pct_delta).render()
             else:
                 """<td>-</td>"""
-                prev_value = None
 
             if show_delta:
-                if prev_value is not None and run_value is not None:
-                    """<td>%.4f</td>""" % (run_value - prev_value)
+                if cr.delta is not None:
+                    """<td>%.4f</td>""" % cr.delta
                 else:
                     """<td>-</td>"""
-
             if show_stddev:
-                previous_values = [v for run_id in previous_runs
-                                   for v in sample_map.get((run_id,
-                                                            test.id), ())]
-                if previous_values:
-                    sd_value = stats.standard_deviation(previous_values)
-                    sd_cell_value = "%.4f" % sd_value
+                if cr.stddev is not None:
+                    """<td>%.4f</td>""" % cr.stddev
                 else:
-                    sd_cell_value = "-"
-                """
-                <td>%s</td""" % (sd_cell_value,)
-
+                    """<td>-</td>"""
             if show_mad:
-                previous_values = [v for run_id in previous_runs
-                                   for v in sample_map.get((run_id,
-                                                            test.id), ())]
-                if previous_values:
-                    mad_value = stats.median_absolute_deviation(previous_values)
-                    mad_cell_value = "%.4f" % mad_value
+                if cr.MAD is not None:
+                    """<td>%.4f</td>""" % cr.MAD
                 else:
-                    mad_cell_value = "-"
-                """
-                <td>%s</td""" % (mad_cell_value,)
+                    """<td>-</td>"""
 
         """
         <h3>Parameter Sets</h3>
@@ -660,19 +613,9 @@
             <td><input type="checkbox" name="test.%s"></td>
             <td>%s</td>""" % (name, name)
             for pset in ts_summary.parameter_sets:
-                test = ts_summary.test_map.get((name,pset))
-                if test is None:
-                    """
-                <td></td><td></td>"""
-                    continue
-
-                status_info = ts_summary.test_status_map.get(name)
-                if status_info:
-                    status_name,status_kind = status_info
-                    status_test = ts_summary.test_map.get((status_name,pset))
-                else:
-                    status_test = status_kind = None
-                get_cell_value(test, status_test, status_kind, name, pset)
+                cr = sri.get_run_comparison_result(run, compare_to, name, pset,
+                                                   comparison_window)
+                get_cell_value(cr)
             """
           </tr>"""
         """