[LNT] r207898 - Use Mann-Whitney U test to identify changes
Yi Kong
Yi.Kong at arm.com
Sat May 3 01:01:32 PDT 2014
Author: kongyi
Date: Sat May 3 03:01:30 2014
New Revision: 207898
URL: http://llvm.org/viewvc/llvm-project?rev=207898&view=rev
Log:
Use Mann-Whitney U test to identify changes
Modified:
lnt/trunk/lnt/server/reporting/analysis.py
lnt/trunk/lnt/server/reporting/runs.py
lnt/trunk/lnt/server/ui/templates/reporting/runs.html
lnt/trunk/lnt/server/ui/templates/v4_run.html
lnt/trunk/lnt/server/ui/views.py
lnt/trunk/setup.py
Modified: lnt/trunk/lnt/server/reporting/analysis.py
URL: http://llvm.org/viewvc/llvm-project/lnt/trunk/lnt/server/reporting/analysis.py?rev=207898&r1=207897&r2=207898&view=diff
==============================================================================
--- lnt/trunk/lnt/server/reporting/analysis.py (original)
+++ lnt/trunk/lnt/server/reporting/analysis.py Sat May 3 03:01:30 2014
@@ -5,6 +5,7 @@ Utilities for helping with the analysis
from lnt.util import stats
from lnt.server.ui import util
from lnt.testing import PASS, FAIL, XFAIL
+from scipy.stats import mannwhitneyu
REGRESSED = 'REGRESSED'
IMPROVED = 'IMPROVED'
@@ -13,8 +14,8 @@ UNCHANGED_FAIL = 'UNCHANGED_FAIL'
class ComparisonResult:
def __init__(self, cur_value, prev_value, delta, pct_delta, stddev, MAD,
- cur_failed, prev_failed, samples, stddev_mean = None,
- stddev_is_estimated = False):
+ cur_failed, prev_failed, samples, prev_samples, stddev_mean = None,
+ stddev_is_estimated = False, confidence_lv = .9):
self.current = cur_value
self.previous = prev_value
self.delta = delta
@@ -24,8 +25,17 @@ class ComparisonResult:
self.failed = cur_failed
self.prev_failed = prev_failed
self.samples = samples
+ self.prev_samples = prev_samples
self.stddev_mean = stddev_mean
self.stddev_is_estimated = stddev_is_estimated
+ self.confidence_lv = confidence_lv
+
+ try:
+ u, prob = mannwhitneyu(self.samples, self.prev_samples)
+ self.p = 1 - prob
+ except ValueError:
+ # All numbers are identical
+ self.p = 1
def get_samples(self):
return self.samples
@@ -89,6 +99,12 @@ class ComparisonResult:
if ignore_small and abs(self.delta) < .01:
return UNCHANGED_PASS
+ # Use Mann-Whitney U test to test null hypothesis that result is
+ # unchanged.
+ if len(self.samples) >= 4 and len(self.prev_samples) >= 4:
+ if self.p <= self.confidence_lv:
+ return UNCHANGED_PASS
+
# If we have a comparison window, then measure using a symmetic
# confidence interval.
if self.stddev is not None:
@@ -121,11 +137,16 @@ class ComparisonResult:
else:
return UNCHANGED_PASS
+
+ def get_p_value(self):
+ return self.p
+
class RunInfo(object):
def __init__(self, testsuite, runs_to_load,
- aggregation_fn = min):
+ aggregation_fn = min, confidence_lv = .9):
self.testsuite = testsuite
self.aggregation_fn = aggregation_fn
+ self.confidence_lv = confidence_lv
self.sample_map = util.multidict()
self.loaded_run_ids = set()
@@ -207,7 +228,8 @@ class RunInfo(object):
run_value, prev_value, delta=None,
pct_delta = None, stddev = stddev, MAD = MAD,
cur_failed = run_failed, prev_failed = prev_failed,
- samples = run_values)
+ samples = run_values, prev_samples = prev_values,
+ confidence_lv = self.confidence_lv)
# Compute the comparison status for the test value.
delta = run_value - prev_value
@@ -241,8 +263,9 @@ class RunInfo(object):
return ComparisonResult(run_value, prev_value, delta,
pct_delta, stddev, MAD,
- run_failed, prev_failed, run_values,
- stddev_mean, stddev_is_estimated)
+ run_failed, prev_failed, run_values, prev_values,
+ stddev_mean, stddev_is_estimated,
+ self.confidence_lv)
def _load_samples_for_runs(self, run_ids):
# Find the set of new runs to load.
Modified: lnt/trunk/lnt/server/reporting/runs.py
URL: http://llvm.org/viewvc/llvm-project/lnt/trunk/lnt/server/reporting/runs.py?rev=207898&r1=207897&r2=207898&view=diff
==============================================================================
--- lnt/trunk/lnt/server/reporting/runs.py (original)
+++ lnt/trunk/lnt/server/reporting/runs.py Sat May 3 03:01:30 2014
@@ -14,7 +14,8 @@ import lnt.util.stats
def generate_run_report(run, baseurl, only_html_body = False,
num_comparison_runs = 10, result = None,
compare_to = None, baseline = None,
- comparison_window = None, aggregation_fn = min):
+ comparison_window = None, aggregation_fn = min,
+ confidence_lv = .5):
"""
generate_run_report(...) -> (str: subject, str: text_report,
str: html_report)
@@ -67,7 +68,7 @@ def generate_run_report(run, baseurl, on
if baseline:
runs_to_load.add(baseline.id)
sri = lnt.server.reporting.analysis.RunInfo(
- ts, runs_to_load, aggregation_fn)
+ ts, runs_to_load, aggregation_fn, confidence_lv)
# Get the test names, primary fields and total test counts.
test_names = ts.query(ts.Test.name, ts.Test.id).\
Modified: lnt/trunk/lnt/server/ui/templates/reporting/runs.html
URL: http://llvm.org/viewvc/llvm-project/lnt/trunk/lnt/server/ui/templates/reporting/runs.html?rev=207898&r1=207897&r2=207898&view=diff
==============================================================================
--- lnt/trunk/lnt/server/ui/templates/reporting/runs.html (original)
+++ lnt/trunk/lnt/server/ui/templates/reporting/runs.html Sat May 3 03:01:30 2014
@@ -25,9 +25,11 @@
<th style="{{ styles['th'] }}">{{ primary_name }}</th>
<th style="{{ styles['th'] }}">Current</th>
<th style="{{ styles['th'] }}">σ {{ primary_field_suffix }}</th>
+ <th style="{{ styles['th'] }}">p {{ primary_field_suffix }}</th>
{% if secondary_info %}
<th style="{{ styles['th'] }}">Δ {{ secondary_field_suffix }}</th>
<th style="{{ styles['th'] }}">σ {{ secondary_field_suffix }}</th>
+ <th style="{{ styles['th'] }}">p {{ secondary_field_suffix }}</th>
{% endif %}
</tr>
@@ -40,10 +42,12 @@
<td style="{{ styles['td'] }}">{{ "%.4f" | format(cr.previous) }}</td>
<td style="{{ styles['td'] }}">{{ "%.4f" | format(cr.current) }}</td>
<td style="{{ styles['td'] }}">{{ "%.4f" | format_or_default(cr.stddev, '-') }}</td>
+ <td style="{{ styles['td'] }}">{{ "%.4f" | format_or_default(cr.p, '-') }}</td>
{% if secondary_info %}
{% set a_cr = secondary_info[(name, field)] %}
{{ a_cr.pct_delta|aspctcell(style=styles['td'])|safe }}
<td style="{{ styles['td'] }}">{{ "%.4f" | format_or_default(a_cr.stddev, '-') }}</td>
+ <td style="{{ styles['td'] }}">{{ "%.4f" | format_or_default(a_cr.p, '-') }}</td>
{% endif %}
</tr>
{% endfor %}
Modified: lnt/trunk/lnt/server/ui/templates/v4_run.html
URL: http://llvm.org/viewvc/llvm-project/lnt/trunk/lnt/server/ui/templates/v4_run.html?rev=207898&r1=207897&r2=207898&view=diff
==============================================================================
--- lnt/trunk/lnt/server/ui/templates/v4_run.html (original)
+++ lnt/trunk/lnt/server/ui/templates/v4_run.html Sat May 3 03:01:30 2014
@@ -76,6 +76,10 @@
{% if options.show_sample_counts %}
<td>{{cr.get_samples()|length}}</td>
{% endif %}
+
+ {% if options.show_p_value %}
+ <td>{{ "%.4f" % cr.get_p_value() }}</td>
+ {% endif %}
{% endmacro %}
{% block sidebar %}
@@ -244,6 +248,14 @@
</td>
</tr>
<tr>
+ <td>Mann-Whitney test confidence level:</td>
+ <td><input type="text" name="MW_confidence_lv" value="{{ options.MW_confidence_lv }}"></td>
+ </tr>
+ <tr>
+ <td>Show Mann-Whitney p value:</td>
+ <td><input type="checkbox" name="show_p_value" value="yes" {{ "checked" if options.show_p_value else ""}}></td>
+ </tr>
+ <tr>
<td colspan="2">
{% if compare_to %}
<input type="hidden" name="compare_to" value="{{compare_to.id}}">
@@ -285,6 +297,7 @@
{% if options.show_mad %}<th>MAD</th>{% endif %}
{% if options.show_all_samples %}<th>Samples</th>{% endif %}
{% if options.show_sample_counts %}<th>N</th>{% endif %}
+ {% if options.show_p_value %}<th>p</th>{% endif %}
</tr>
</thead>
<tbody>
Modified: lnt/trunk/lnt/server/ui/views.py
URL: http://llvm.org/viewvc/llvm-project/lnt/trunk/lnt/server/ui/views.py?rev=207898&r1=207897&r2=207898&view=diff
==============================================================================
--- lnt/trunk/lnt/server/ui/views.py (original)
+++ lnt/trunk/lnt/server/ui/views.py Sat May 3 03:01:30 2014
@@ -191,6 +191,10 @@ class V4RequestInfo(object):
'median' : lnt.util.stats.median }.get(
aggregation_fn_name, min)
+ # Get the MW confidence level
+ confidence_lv = float(request.args.get('MW_confidence_lv', .9))
+ self.confidence_lv = confidence_lv
+
# Find the neighboring runs, by order.
prev_runs = list(ts.get_previous_runs_on_machine(run, N = 3))
next_runs = list(ts.get_next_runs_on_machine(run, N = 3))
@@ -246,7 +250,7 @@ class V4RequestInfo(object):
only_html_body=only_html_body, result=None,
compare_to=self.compare_to, baseline=self.baseline,
comparison_window=self.comparison_window,
- aggregation_fn=self.aggregation_fn)
+ aggregation_fn=self.aggregation_fn, confidence_lv=confidence_lv)
_, self.text_report, self.html_report, self.sri = reports
@v4_route("/<int:id>/report")
@@ -314,6 +318,8 @@ def v4_run(id):
options['num_comparison_runs'] = info.num_comparison_runs
options['test_filter'] = test_filter_str = request.args.get(
'test_filter', '')
+ options['MW_confidence_lv'] = float(request.args.get('MW_confidence_lv', .9))
+ options['show_p_value'] = bool(request.args.get('show_p_value'))
if test_filter_str:
test_filter_re = re.compile(test_filter_str)
else:
Modified: lnt/trunk/setup.py
URL: http://llvm.org/viewvc/llvm-project/lnt/trunk/setup.py?rev=207898&r1=207897&r2=207898&view=diff
==============================================================================
--- lnt/trunk/setup.py (original)
+++ lnt/trunk/setup.py Sat May 3 03:01:30 2014
@@ -90,5 +90,5 @@ http://llvm.org/svn/llvm-project/lnt/tru
'lnt = lnt.lnttool:main',
],
},
- install_requires=['SQLAlchemy', 'Flask'],
+ install_requires=['SQLAlchemy', 'Flask', 'SciPy'],
)
More information about the llvm-commits
mailing list