[LNT] r209204 - Aggregate most recent runs
Yi Kong
Yi.Kong at arm.com
Tue May 20 03:42:12 PDT 2014
Author: kongyi
Date: Tue May 20 05:42:11 2014
New Revision: 209204
URL: http://llvm.org/viewvc/llvm-project?rev=209204&view=rev
Log:
Aggregate most recent runs
This method significantly reduces noise when applied in combination of
Mann-Whitney U test. It replaces standard deviation estimation feature, as it
clearly performs better.
It isn't enabled by default.
Modified:
lnt/trunk/lnt/server/db/testsuitedb.py
lnt/trunk/lnt/server/reporting/analysis.py
lnt/trunk/lnt/server/reporting/runs.py
lnt/trunk/lnt/server/ui/templates/v4_run.html
lnt/trunk/lnt/server/ui/views.py
Modified: lnt/trunk/lnt/server/db/testsuitedb.py
URL: http://llvm.org/viewvc/llvm-project/lnt/trunk/lnt/server/db/testsuitedb.py?rev=209204&r1=209203&r2=209204&view=diff
==============================================================================
--- lnt/trunk/lnt/server/db/testsuitedb.py (original)
+++ lnt/trunk/lnt/server/db/testsuitedb.py Tue May 20 05:42:11 2014
@@ -747,9 +747,12 @@ test %r does not map to a sample field i
The direction must be -1 or 1 and specified whether or not the
preceeding or following runs should be returned.
"""
- assert N > 0, "invalid count"
+ assert N >= 0, "invalid count"
assert direction in (-1, 1), "invalid direction"
+ if N==0:
+ return []
+
# The obvious algorithm here is to step through the run orders in the
# appropriate direction and yield any runs on the same machine which
# were reported at that order.
Modified: lnt/trunk/lnt/server/reporting/analysis.py
URL: http://llvm.org/viewvc/llvm-project/lnt/trunk/lnt/server/reporting/analysis.py?rev=209204&r1=209203&r2=209204&view=diff
==============================================================================
--- lnt/trunk/lnt/server/reporting/analysis.py (original)
+++ lnt/trunk/lnt/server/reporting/analysis.py Tue May 20 05:42:11 2014
@@ -14,7 +14,7 @@ UNCHANGED_FAIL = 'UNCHANGED_FAIL'
class ComparisonResult:
def __init__(self, cur_value, prev_value, delta, pct_delta, stddev, MAD,
cur_failed, prev_failed, samples, prev_samples, stddev_mean = None,
- stddev_is_estimated = False, confidence_lv = .05):
+ confidence_lv = .05):
self.current = cur_value
self.previous = prev_value
self.delta = delta
@@ -26,7 +26,6 @@ class ComparisonResult:
self.samples = samples
self.prev_samples = prev_samples
self.stddev_mean = stddev_mean
- self.stddev_is_estimated = stddev_is_estimated
self.confidence_lv = confidence_lv
def get_samples(self):
@@ -104,12 +103,6 @@ class ComparisonResult:
is_significant = abs(self.delta) > (self.stddev *
confidence_interval)
- # If the stddev is estimated, then it is also only significant if
- # the delta from the estimate mean is above the confidence interval.
- if self.stddev_is_estimated:
- is_significant &= (abs(self.current - self.stddev_mean) >
- self.stddev * confidence_interval)
-
# If the delta is significant, return
if is_significant:
if self.delta < 0:
@@ -144,18 +137,33 @@ class RunInfo(object):
def get_test_ids(self):
return set(key[1] for key in self.sample_map.keys())
+
+ def get_sliding_runs(self, run, compare_run, num_comparison_runs=0):
+ """
+ Get num_comparison_runs most recent runs,
+ This query is expensive.
+ """
+ runs = [run]
+ runs_prev = self.testsuite.get_previous_runs_on_machine(run, num_comparison_runs)
+ runs += runs_prev
+
+ if compare_run is not None:
+ compare_runs = [compare_run]
+ comp_prev = self.testsuite.get_previous_runs_on_machine(compare_run, num_comparison_runs)
+ compare_runs += comp_prev
+ else:
+ compare_runs = []
+
+ return runs, compare_runs
- def get_run_comparison_result(self, run, compare_to, test_id, field,
- comparison_window=[]):
+ def get_run_comparison_result(self, run, compare_to, test_id, field):
if compare_to is not None:
compare_to = [compare_to]
else:
compare_to = []
- return self.get_comparison_result([run], compare_to, test_id, field,
- comparison_window)
+ return self.get_comparison_result([run], compare_to, test_id, field)
- def get_comparison_result(self, runs, compare_runs, test_id, field,
- comparison_window=[]):
+ def get_comparison_result(self, runs, compare_runs, test_id, field):
# Get the field which indicates the requested field's status.
status_field = field.status_field
@@ -204,12 +212,10 @@ class RunInfo(object):
stddev = stats.standard_deviation(run_values)
MAD = stats.median_absolute_deviation(run_values)
stddev_mean = stats.mean(run_values)
- stddev_is_estimated = False
else:
stddev = None
MAD = None
stddev_mean = None
- stddev_is_estimated = False
# If we are missing current or comparison values we are done.
if run_value is None or prev_value is None:
@@ -227,34 +233,10 @@ class RunInfo(object):
else:
pct_delta = 0.0
- # If we don't have an estimate for the distribution, attempt to "guess"
- # it using the comparison window.
- #
- # FIXME: We can substantially improve the algorithm for guessing the
- # noise level from a list of values. Probably better to just find a way
- # to kill this code though.
- if stddev is None:
- # Get all previous values in the comparison window.
- prev_samples = [s for run in comparison_window
- for s in self.sample_map.get((run.id, test_id), ())
- if s[field.index] is not None]
- # Filter out failing samples.
- if status_field:
- prev_samples = [s for s in prev_samples
- if s[status_field.index] != FAIL]
- if prev_samples:
- prev_values = [s[field.index]
- for s in prev_samples]
- stddev = stats.standard_deviation(prev_values)
- MAD = stats.median_absolute_deviation(prev_values)
- stddev_mean = stats.mean(prev_values)
- stddev_is_estimated = True
-
return ComparisonResult(run_value, prev_value, delta,
pct_delta, stddev, MAD,
run_failed, prev_failed, run_values, prev_values,
- stddev_mean, stddev_is_estimated,
- self.confidence_lv)
+ stddev_mean, self.confidence_lv)
def get_geomean_comparison_result(self, run, compare_to, field,
comparison_window=[]):
Modified: lnt/trunk/lnt/server/reporting/runs.py
URL: http://llvm.org/viewvc/llvm-project/lnt/trunk/lnt/server/reporting/runs.py?rev=209204&r1=209203&r2=209204&view=diff
==============================================================================
--- lnt/trunk/lnt/server/reporting/runs.py (original)
+++ lnt/trunk/lnt/server/reporting/runs.py Tue May 20 05:42:11 2014
@@ -12,10 +12,9 @@ import lnt.server.ui.app
import lnt.util.stats
def generate_run_report(run, baseurl, only_html_body = False,
- num_comparison_runs = 10, result = None,
+ num_comparison_runs = 0, result = None,
compare_to = None, baseline = None,
- comparison_window = None, aggregation_fn = min,
- confidence_lv = .05):
+ aggregation_fn = min, confidence_lv = .05):
"""
generate_run_report(...) -> (str: subject, str: text_report,
str: html_report)
@@ -24,7 +23,7 @@ def generate_run_report(run, baseurl, on
run, suitable for emailing or presentation on a web page.
"""
- assert num_comparison_runs > 0
+ assert num_comparison_runs >= 0
start_time = time.time()
@@ -43,10 +42,9 @@ def generate_run_report(run, baseurl, on
baseline = None
# Gather the runs to use for statistical data.
- if comparison_window is None:
- comparison_start_run = compare_to or run
- comparison_window = list(ts.get_previous_runs_on_machine(
- comparison_start_run, num_comparison_runs))
+ comparison_start_run = compare_to or run
+ comparison_window = list(ts.get_previous_runs_on_machine(
+ comparison_start_run, num_comparison_runs))
if baseline:
baseline_window = list(ts.get_previous_runs_on_machine(
baseline, num_comparison_runs))
@@ -80,13 +78,13 @@ def generate_run_report(run, baseurl, on
# Gather the run-over-run changes to report, organized by field and then
# collated by change type.
run_to_run_info, test_results = _get_changes_by_type(
- run, compare_to, primary_fields, test_names, comparison_window, sri)
+ run, compare_to, primary_fields, test_names, num_comparison_runs, sri)
# If we have a baseline, gather the run-over-baseline results and
# changes.
if baseline:
run_to_baseline_info, baselined_results = _get_changes_by_type(
- run, baseline, primary_fields, test_names, baseline_window, sri)
+ run, baseline, primary_fields, test_names, num_comparison_runs, sri)
else:
run_to_baseline_info = baselined_results = None
@@ -227,7 +225,7 @@ def generate_run_report(run, baseurl, on
return subject, text_report, html_report, sri
def _get_changes_by_type(run_a, run_b, primary_fields, test_names,
- comparison_window, sri):
+ num_comparison_runs, sri):
comparison_results = {}
results_by_type = []
for field in primary_fields:
@@ -240,8 +238,7 @@ def _get_changes_by_type(run_a, run_b, p
existing_failures = []
unchanged_tests = []
for name,test_id in test_names:
- cr = sri.get_run_comparison_result(run_a, run_b, test_id, field,
- comparison_window)
+ cr = sri.get_run_comparison_result(run_a, run_b, test_id, field)
comparison_results[(name,field)] = cr
test_status = cr.get_test_status()
perf_status = cr.get_value_status()
Modified: lnt/trunk/lnt/server/ui/templates/v4_run.html
URL: http://llvm.org/viewvc/llvm-project/lnt/trunk/lnt/server/ui/templates/v4_run.html?rev=209204&r1=209203&r2=209204&view=diff
==============================================================================
--- lnt/trunk/lnt/server/ui/templates/v4_run.html (original)
+++ lnt/trunk/lnt/server/ui/templates/v4_run.html Tue May 20 05:42:11 2014
@@ -307,9 +307,9 @@
</tr>
</thead>
<tbody>
+ {% set (runs, compare_runs) = request_info.sri.get_sliding_runs(run, compare_to, request_info.num_comparison_runs) %}
{% for test_name,test_id in test_info %}
- {% set cr = request_info.sri.get_run_comparison_result(
- run, compare_to, test_id, field, request_info.comparison_window) %}
+ {% set cr = request_info.sri.get_comparison_result(runs, compare_runs, test_id, field) %}
{% if cr.previous is not none or cr.current is not none %}
{% if cr.current is none or cr.current >= test_min_value_filter %}
<tr>
@@ -356,7 +356,7 @@
<tr>
<td>{{ test_name }}</td>
{% for field in primary_fields %}
- {% set cr = request_info.sri.get_run_comparison_result(run, compare_to, test_id, field, request_info.comparison_window) %}
+ {% set cr = request_info.sri.get_run_comparison_result(run, compare_to, test_id, field) %}
<td>{{cr.previous}}</td>
<td>{{cr.current}}</td>
<td>{{cr.pct_delta}}</td>
Modified: lnt/trunk/lnt/server/ui/views.py
URL: http://llvm.org/viewvc/llvm-project/lnt/trunk/lnt/server/ui/views.py?rev=209204&r1=209203&r2=209204&view=diff
==============================================================================
--- lnt/trunk/lnt/server/ui/views.py (original)
+++ lnt/trunk/lnt/server/ui/views.py Tue May 20 05:42:11 2014
@@ -229,7 +229,7 @@ class V4RequestInfo(object):
self.num_comparison_runs = int(
request.args.get('num_comparison_runs'))
except:
- self.num_comparison_runs = 10
+ self.num_comparison_runs = 0
# Find the baseline run, if requested.
baseline_str = request.args.get('baseline')
@@ -245,14 +245,12 @@ class V4RequestInfo(object):
# Gather the runs to use for statistical data.
comparison_start_run = self.compare_to or self.run
- self.comparison_window = list(ts.get_previous_runs_on_machine(
- comparison_start_run, self.num_comparison_runs))
reports = lnt.server.reporting.runs.generate_run_report(
self.run, baseurl=db_url_for('index', _external=True),
only_html_body=only_html_body, result=None,
compare_to=self.compare_to, baseline=self.baseline,
- comparison_window=self.comparison_window,
+ num_comparison_runs=self.num_comparison_runs,
aggregation_fn=self.aggregation_fn, confidence_lv=confidence_lv)
_, self.text_report, self.html_report, self.sri = reports
More information about the llvm-commits
mailing list