[LNT] r209204 - Aggregate most recent runs

Tue May 20 03:42:12 PDT 2014

Author: kongyi
Date: Tue May 20 05:42:11 2014
New Revision: 209204

URL: http://llvm.org/viewvc/llvm-project?rev=209204&view=rev
Log:
Aggregate most recent runs

This method significantly reduces noise when applied in combination of
Mann-Whitney U test. It replaces standard deviation estimation feature, as it
clearly performs better.

It isn't enabled by default.

Modified:
    lnt/trunk/lnt/server/db/testsuitedb.py
    lnt/trunk/lnt/server/reporting/analysis.py
    lnt/trunk/lnt/server/reporting/runs.py
    lnt/trunk/lnt/server/ui/templates/v4_run.html
    lnt/trunk/lnt/server/ui/views.py

Modified: lnt/trunk/lnt/server/db/testsuitedb.py
URL: http://llvm.org/viewvc/llvm-project/lnt/trunk/lnt/server/db/testsuitedb.py?rev=209204&r1=209203&r2=209204&view=diff
==============================================================================

--- lnt/trunk/lnt/server/db/testsuitedb.py (original)
+++ lnt/trunk/lnt/server/db/testsuitedb.py Tue May 20 05:42:11 2014
@@ -747,9 +747,12 @@ test %r does not map to a sample field i
         The direction must be -1 or 1 and specified whether or not the
         preceeding or following runs should be returned.
         """
-        assert N > 0, "invalid count"
+        assert N >= 0, "invalid count"
         assert direction in (-1, 1), "invalid direction"
 
+        if N==0:
+            return []
+
         # The obvious algorithm here is to step through the run orders in the
         # appropriate direction and yield any runs on the same machine which
         # were reported at that order.

Modified: lnt/trunk/lnt/server/reporting/analysis.py
URL: http://llvm.org/viewvc/llvm-project/lnt/trunk/lnt/server/reporting/analysis.py?rev=209204&r1=209203&r2=209204&view=diff
==============================================================================
--- lnt/trunk/lnt/server/reporting/analysis.py (original)
+++ lnt/trunk/lnt/server/reporting/analysis.py Tue May 20 05:42:11 2014
@@ -14,7 +14,7 @@ UNCHANGED_FAIL = 'UNCHANGED_FAIL'
 class ComparisonResult:
     def __init__(self, cur_value, prev_value, delta, pct_delta, stddev, MAD,
                  cur_failed, prev_failed, samples, prev_samples, stddev_mean = None,
-                 stddev_is_estimated = False, confidence_lv = .05):
+                 confidence_lv = .05):
         self.current = cur_value
         self.previous = prev_value
         self.delta = delta
@@ -26,7 +26,6 @@ class ComparisonResult:
         self.samples = samples
         self.prev_samples = prev_samples
         self.stddev_mean = stddev_mean
-        self.stddev_is_estimated = stddev_is_estimated
         self.confidence_lv = confidence_lv
 
     def get_samples(self):
@@ -104,12 +103,6 @@ class ComparisonResult:
             is_significant = abs(self.delta) > (self.stddev *
                                                 confidence_interval)
 
-            # If the stddev is estimated, then it is also only significant if
-            # the delta from the estimate mean is above the confidence interval.
-            if self.stddev_is_estimated:
-                is_significant &= (abs(self.current - self.stddev_mean) >
-                                   self.stddev * confidence_interval)
-
             # If the delta is significant, return 
             if is_significant:
                 if self.delta < 0:
@@ -144,18 +137,33 @@ class RunInfo(object):
 
     def get_test_ids(self):
         return set(key[1] for key in self.sample_map.keys())
+
+    def get_sliding_runs(self, run, compare_run, num_comparison_runs=0):
+        """
+        Get num_comparison_runs most recent runs,
+        This query is expensive.
+        """
+        runs = [run]
+        runs_prev = self.testsuite.get_previous_runs_on_machine(run, num_comparison_runs)
+        runs += runs_prev
+
+        if compare_run is not None:
+            compare_runs = [compare_run]
+            comp_prev = self.testsuite.get_previous_runs_on_machine(compare_run, num_comparison_runs)
+            compare_runs += comp_prev
+        else:
+            compare_runs = []
+
+        return runs, compare_runs
     
-    def get_run_comparison_result(self, run, compare_to, test_id, field,
-                                  comparison_window=[]):
+    def get_run_comparison_result(self, run, compare_to, test_id, field):
         if compare_to is not None:
             compare_to = [compare_to]
         else:
             compare_to = []
-        return self.get_comparison_result([run], compare_to, test_id, field,
-                                          comparison_window)
+        return self.get_comparison_result([run], compare_to, test_id, field)
 
-    def get_comparison_result(self, runs, compare_runs, test_id, field,
-                              comparison_window=[]):
+    def get_comparison_result(self, runs, compare_runs, test_id, field):
         # Get the field which indicates the requested field's status.
         status_field = field.status_field
 
@@ -204,12 +212,10 @@ class RunInfo(object):
             stddev = stats.standard_deviation(run_values)
             MAD = stats.median_absolute_deviation(run_values)
             stddev_mean = stats.mean(run_values)
-            stddev_is_estimated = False
         else:
             stddev = None
             MAD = None
             stddev_mean = None
-            stddev_is_estimated = False
 
         # If we are missing current or comparison values we are done.
         if run_value is None or prev_value is None:
@@ -227,34 +233,10 @@ class RunInfo(object):
         else:
             pct_delta = 0.0
 
-        # If we don't have an estimate for the distribution, attempt to "guess"
-        # it using the comparison window.
-        #
-        # FIXME: We can substantially improve the algorithm for guessing the
-        # noise level from a list of values. Probably better to just find a way
-        # to kill this code though.
-        if stddev is None:
-            # Get all previous values in the comparison window.
-            prev_samples = [s for run in comparison_window
-                            for s in self.sample_map.get((run.id, test_id), ())
-                            if s[field.index] is not None]
-            # Filter out failing samples.
-            if status_field:
-                prev_samples = [s for s in prev_samples
-                                if s[status_field.index] != FAIL]
-            if prev_samples:
-                prev_values = [s[field.index]
-                               for s in prev_samples]
-                stddev = stats.standard_deviation(prev_values)
-                MAD = stats.median_absolute_deviation(prev_values)
-                stddev_mean = stats.mean(prev_values)
-                stddev_is_estimated = True
-
         return ComparisonResult(run_value, prev_value, delta,
                                 pct_delta, stddev, MAD,
                                 run_failed, prev_failed, run_values, prev_values,
-                                stddev_mean, stddev_is_estimated,
-                                self.confidence_lv)
+                                stddev_mean, self.confidence_lv)
 
     def get_geomean_comparison_result(self, run, compare_to, field,
                                           comparison_window=[]):

Modified: lnt/trunk/lnt/server/reporting/runs.py
URL: http://llvm.org/viewvc/llvm-project/lnt/trunk/lnt/server/reporting/runs.py?rev=209204&r1=209203&r2=209204&view=diff
==============================================================================
--- lnt/trunk/lnt/server/reporting/runs.py (original)
+++ lnt/trunk/lnt/server/reporting/runs.py Tue May 20 05:42:11 2014
@@ -12,10 +12,9 @@ import lnt.server.ui.app
 import lnt.util.stats
 
 def generate_run_report(run, baseurl, only_html_body = False,
-                        num_comparison_runs = 10, result = None,
+                        num_comparison_runs = 0, result = None,
                         compare_to = None, baseline = None,
-                        comparison_window = None, aggregation_fn = min,
-                        confidence_lv = .05):
+                        aggregation_fn = min, confidence_lv = .05):
     """
     generate_run_report(...) -> (str: subject, str: text_report,
                                  str: html_report)
@@ -24,7 +23,7 @@ def generate_run_report(run, baseurl, on
     run, suitable for emailing or presentation on a web page.
     """
 
-    assert num_comparison_runs > 0
+    assert num_comparison_runs >= 0
 
     start_time = time.time()
 
@@ -43,10 +42,9 @@ def generate_run_report(run, baseurl, on
         baseline = None
 
     # Gather the runs to use for statistical data.
-    if comparison_window is None:
-        comparison_start_run = compare_to or run
-        comparison_window = list(ts.get_previous_runs_on_machine(
-                comparison_start_run, num_comparison_runs))
+    comparison_start_run = compare_to or run
+    comparison_window = list(ts.get_previous_runs_on_machine(
+            comparison_start_run, num_comparison_runs))
     if baseline:
         baseline_window = list(ts.get_previous_runs_on_machine(
                 baseline, num_comparison_runs))
@@ -80,13 +78,13 @@ def generate_run_report(run, baseurl, on
     # Gather the run-over-run changes to report, organized by field and then
     # collated by change type.
     run_to_run_info, test_results = _get_changes_by_type(
-        run, compare_to, primary_fields, test_names, comparison_window, sri)
+        run, compare_to, primary_fields, test_names, num_comparison_runs, sri)
 
     # If we have a baseline, gather the run-over-baseline results and
     # changes.
     if baseline:
         run_to_baseline_info, baselined_results = _get_changes_by_type(
-            run, baseline, primary_fields, test_names, baseline_window, sri)
+            run, baseline, primary_fields, test_names, num_comparison_runs, sri)
     else:
         run_to_baseline_info = baselined_results = None
 
@@ -227,7 +225,7 @@ def generate_run_report(run, baseurl, on
     return subject, text_report, html_report, sri
 
 def _get_changes_by_type(run_a, run_b, primary_fields, test_names,
-                         comparison_window, sri):
+                         num_comparison_runs, sri):
     comparison_results = {}
     results_by_type = []
     for field in primary_fields:
@@ -240,8 +238,7 @@ def _get_changes_by_type(run_a, run_b, p
         existing_failures = []
         unchanged_tests = []
         for name,test_id in test_names:
-            cr = sri.get_run_comparison_result(run_a, run_b, test_id, field,
-                                               comparison_window)
+            cr = sri.get_run_comparison_result(run_a, run_b, test_id, field)
             comparison_results[(name,field)] = cr
             test_status = cr.get_test_status()
             perf_status = cr.get_value_status()

Modified: lnt/trunk/lnt/server/ui/templates/v4_run.html
URL: http://llvm.org/viewvc/llvm-project/lnt/trunk/lnt/server/ui/templates/v4_run.html?rev=209204&r1=209203&r2=209204&view=diff
==============================================================================
--- lnt/trunk/lnt/server/ui/templates/v4_run.html (original)
+++ lnt/trunk/lnt/server/ui/templates/v4_run.html Tue May 20 05:42:11 2014
@@ -307,9 +307,9 @@
           </tr>
         </thead>
         <tbody>  
+		  {% set (runs, compare_runs) = request_info.sri.get_sliding_runs(run, compare_to, request_info.num_comparison_runs) %}
           {% for test_name,test_id in test_info %}
-            {% set cr = request_info.sri.get_run_comparison_result(
-                      run, compare_to, test_id, field, request_info.comparison_window) %}
+		    {% set cr = request_info.sri.get_comparison_result(runs, compare_runs, test_id, field) %}
             {% if cr.previous is not none or cr.current is not none %}
               {% if cr.current is none or cr.current >= test_min_value_filter %}
                 <tr>
@@ -356,7 +356,7 @@
         <tr>
           <td>{{ test_name }}</td>
           {% for field in primary_fields %}
-            {% set cr = request_info.sri.get_run_comparison_result(run, compare_to, test_id, field, request_info.comparison_window) %}
+		    {% set cr = request_info.sri.get_run_comparison_result(run, compare_to, test_id, field) %}
             <td>{{cr.previous}}</td>
             <td>{{cr.current}}</td>
             <td>{{cr.pct_delta}}</td>

Modified: lnt/trunk/lnt/server/ui/views.py
URL: http://llvm.org/viewvc/llvm-project/lnt/trunk/lnt/server/ui/views.py?rev=209204&r1=209203&r2=209204&view=diff
==============================================================================
--- lnt/trunk/lnt/server/ui/views.py (original)
+++ lnt/trunk/lnt/server/ui/views.py Tue May 20 05:42:11 2014
@@ -229,7 +229,7 @@ class V4RequestInfo(object):
             self.num_comparison_runs = int(
                 request.args.get('num_comparison_runs'))
         except:
-            self.num_comparison_runs = 10
+            self.num_comparison_runs = 0
 
         # Find the baseline run, if requested.
         baseline_str = request.args.get('baseline')
@@ -245,14 +245,12 @@ class V4RequestInfo(object):
 
         # Gather the runs to use for statistical data.
         comparison_start_run = self.compare_to or self.run
-        self.comparison_window = list(ts.get_previous_runs_on_machine(
-                    comparison_start_run, self.num_comparison_runs))
 
         reports = lnt.server.reporting.runs.generate_run_report(
             self.run, baseurl=db_url_for('index', _external=True),
             only_html_body=only_html_body, result=None,
             compare_to=self.compare_to, baseline=self.baseline,
-            comparison_window=self.comparison_window,
+            num_comparison_runs=self.num_comparison_runs,
             aggregation_fn=self.aggregation_fn, confidence_lv=confidence_lv)
         _, self.text_report, self.html_report, self.sri = reports