[LNT] r208450 - Use Mann-Whitney U test to identify changes(2)
Yi Kong
Yi.Kong at arm.com
Fri May 9 15:09:29 PDT 2014
Author: kongyi
Date: Fri May 9 17:09:29 2014
New Revision: 208450
URL: http://llvm.org/viewvc/llvm-project?rev=208450&view=rev
Log:
Use Mann-Whitney U test to identify changes(2)
Correctly calculates Mann-Whitney U
Optionally depends on SciPy
Modified:
lnt/trunk/lnt/server/reporting/analysis.py
lnt/trunk/lnt/server/reporting/runs.py
lnt/trunk/lnt/server/ui/templates/v4_run.html
lnt/trunk/lnt/server/ui/views.py
lnt/trunk/lnt/util/stats.py
Modified: lnt/trunk/lnt/server/reporting/analysis.py
URL: http://llvm.org/viewvc/llvm-project/lnt/trunk/lnt/server/reporting/analysis.py?rev=208450&r1=208449&r2=208450&view=diff
==============================================================================
--- lnt/trunk/lnt/server/reporting/analysis.py (original)
+++ lnt/trunk/lnt/server/reporting/analysis.py Fri May 9 17:09:29 2014
@@ -13,8 +13,8 @@ UNCHANGED_FAIL = 'UNCHANGED_FAIL'
class ComparisonResult:
def __init__(self, cur_value, prev_value, delta, pct_delta, stddev, MAD,
- cur_failed, prev_failed, samples, stddev_mean = None,
- stddev_is_estimated = False):
+ cur_failed, prev_failed, samples, prev_samples, stddev_mean = None,
+ stddev_is_estimated = False, confidence_lv = .05):
self.current = cur_value
self.previous = prev_value
self.delta = delta
@@ -24,8 +24,10 @@ class ComparisonResult:
self.failed = cur_failed
self.prev_failed = prev_failed
self.samples = samples
+ self.prev_samples = prev_samples
self.stddev_mean = stddev_mean
self.stddev_is_estimated = stddev_is_estimated
+ self.confidence_lv = confidence_lv
def get_samples(self):
return self.samples
@@ -56,6 +58,9 @@ class ComparisonResult:
def get_value_status(self, confidence_interval=2.576,
value_precision=0.0001, ignore_small=True):
+ """
+ Raises ImportError if SciPy is not installed and sample size is too large.
+ """
if self.current is None or self.previous is None:
return None
@@ -89,6 +94,13 @@ class ComparisonResult:
if ignore_small and abs(self.delta) < .01:
return UNCHANGED_PASS
+ # Use Mann-Whitney U test to test null hypothesis that result is
+ # unchanged.
+ if len(self.samples) >= 4 and len(self.prev_samples) >= 4:
+ same = stats.mannwhitneyu(self.samples, self.prev_samples, self.confidence_lv)
+ if same:
+ return UNCHANGED_PASS
+
# If we have a comparison window, then measure using a symmetic
# confidence interval.
if self.stddev is not None:
@@ -123,9 +135,10 @@ class ComparisonResult:
class RunInfo(object):
def __init__(self, testsuite, runs_to_load,
- aggregation_fn = min):
+ aggregation_fn = min, confidence_lv = .05):
self.testsuite = testsuite
self.aggregation_fn = aggregation_fn
+ self.confidence_lv = confidence_lv
self.sample_map = util.multidict()
self.loaded_run_ids = set()
@@ -207,7 +220,8 @@ class RunInfo(object):
run_value, prev_value, delta=None,
pct_delta = None, stddev = stddev, MAD = MAD,
cur_failed = run_failed, prev_failed = prev_failed,
- samples = run_values)
+ samples = run_values, prev_samples = prev_values,
+ confidence_lv = self.confidence_lv)
# Compute the comparison status for the test value.
delta = run_value - prev_value
@@ -241,8 +255,9 @@ class RunInfo(object):
return ComparisonResult(run_value, prev_value, delta,
pct_delta, stddev, MAD,
- run_failed, prev_failed, run_values,
- stddev_mean, stddev_is_estimated)
+ run_failed, prev_failed, run_values, prev_values,
+ stddev_mean, stddev_is_estimated,
+ self.confidence_lv)
def _load_samples_for_runs(self, run_ids):
# Find the set of new runs to load.
Modified: lnt/trunk/lnt/server/reporting/runs.py
URL: http://llvm.org/viewvc/llvm-project/lnt/trunk/lnt/server/reporting/runs.py?rev=208450&r1=208449&r2=208450&view=diff
==============================================================================
--- lnt/trunk/lnt/server/reporting/runs.py (original)
+++ lnt/trunk/lnt/server/reporting/runs.py Fri May 9 17:09:29 2014
@@ -14,7 +14,8 @@ import lnt.util.stats
def generate_run_report(run, baseurl, only_html_body = False,
num_comparison_runs = 10, result = None,
compare_to = None, baseline = None,
- comparison_window = None, aggregation_fn = min):
+ comparison_window = None, aggregation_fn = min,
+ confidence_lv = .05):
"""
generate_run_report(...) -> (str: subject, str: text_report,
str: html_report)
@@ -67,7 +68,7 @@ def generate_run_report(run, baseurl, on
if baseline:
runs_to_load.add(baseline.id)
sri = lnt.server.reporting.analysis.RunInfo(
- ts, runs_to_load, aggregation_fn)
+ ts, runs_to_load, aggregation_fn, confidence_lv)
# Get the test names, primary fields and total test counts.
test_names = ts.query(ts.Test.name, ts.Test.id).\
Modified: lnt/trunk/lnt/server/ui/templates/v4_run.html
URL: http://llvm.org/viewvc/llvm-project/lnt/trunk/lnt/server/ui/templates/v4_run.html?rev=208450&r1=208449&r2=208450&view=diff
==============================================================================
--- lnt/trunk/lnt/server/ui/templates/v4_run.html (original)
+++ lnt/trunk/lnt/server/ui/templates/v4_run.html Fri May 9 17:09:29 2014
@@ -76,6 +76,7 @@
{% if options.show_sample_counts %}
<td>{{cr.get_samples()|length}}</td>
{% endif %}
+
{% endmacro %}
{% block sidebar %}
@@ -243,6 +244,17 @@
</select>
</td>
</tr>
+ <tr>
+ <td>Mann-Whitney test confidence level:</td>
+ <td>
+ <select name="MW_confidence_lv">
+ <option value="0.05" {{ "selected" if options.MW_confidence_lv == 0.05 else ""}}>
+ 5%</value>
+ <option value="0.01" {{ "selected" if options.MW_confidence_lv == 0.01 else ""}}>
+ 1%</value>
+ </select>
+ </td>
+ </tr>
<tr>
<td colspan="2">
{% if compare_to %}
Modified: lnt/trunk/lnt/server/ui/views.py
URL: http://llvm.org/viewvc/llvm-project/lnt/trunk/lnt/server/ui/views.py?rev=208450&r1=208449&r2=208450&view=diff
==============================================================================
--- lnt/trunk/lnt/server/ui/views.py (original)
+++ lnt/trunk/lnt/server/ui/views.py Fri May 9 17:09:29 2014
@@ -191,6 +191,13 @@ class V4RequestInfo(object):
'median' : lnt.util.stats.median }.get(
aggregation_fn_name, min)
+ # Get the MW confidence level.
+ try:
+ confidence_lv = float(request.args.get('MW_confidence_lv'))
+ except (TypeError, ValueError):
+ confidence_lv = .05
+ self.confidence_lv = confidence_lv
+
# Find the neighboring runs, by order.
prev_runs = list(ts.get_previous_runs_on_machine(run, N = 3))
next_runs = list(ts.get_next_runs_on_machine(run, N = 3))
@@ -246,7 +253,7 @@ class V4RequestInfo(object):
only_html_body=only_html_body, result=None,
compare_to=self.compare_to, baseline=self.baseline,
comparison_window=self.comparison_window,
- aggregation_fn=self.aggregation_fn)
+ aggregation_fn=self.aggregation_fn, confidence_lv=confidence_lv)
_, self.text_report, self.html_report, self.sri = reports
@v4_route("/<int:id>/report")
@@ -294,7 +301,12 @@ Unable to find a v0.4 run for this ID. P
@v4_route("/<int:id>")
def v4_run(id):
- info = V4RequestInfo(id)
+ try:
+ info = V4RequestInfo(id)
+ except ImportError:
+ return render_template("error.html",
+ message="SciPy is not installed on server and sample size is too large.")
+
ts = info.ts
run = info.run
@@ -314,6 +326,7 @@ def v4_run(id):
options['num_comparison_runs'] = info.num_comparison_runs
options['test_filter'] = test_filter_str = request.args.get(
'test_filter', '')
+ options['MW_confidence_lv'] = info.confidence_lv
if test_filter_str:
test_filter_re = re.compile(test_filter_str)
else:
Modified: lnt/trunk/lnt/util/stats.py
URL: http://llvm.org/viewvc/llvm-project/lnt/trunk/lnt/util/stats.py?rev=208450&r1=208449&r2=208450&view=diff
==============================================================================
--- lnt/trunk/lnt/util/stats.py (original)
+++ lnt/trunk/lnt/util/stats.py Fri May 9 17:09:29 2014
@@ -19,3 +19,100 @@ def standard_deviation(l):
means_sqrd = sum([(v - m)**2 for v in l]) / len(l)
rms = math.sqrt(means_sqrd)
return rms
+
+def mannwhitneyu(a, b, sigLevel = .05):
+ """
+ Determine if sample a and b are the same at given significance level,
+ raises ImportError if SciPy is not installed on server and sample size is
+ too large.
+ """
+ if len(a) <= 20 and len(b) <= 20:
+ return mannwhitneyu_small(a, b, sigLevel)
+ else:
+ try:
+ from scipy.stats import mannwhitneyu as mannwhitneyu_large
+ return mannwhitneyu_large(a, b, False) >= sigLevel
+ except ValueError:
+ return True
+
+def mannwhitneyu_small(a, b, sigLevel):
+ """
+ Determine if sample a and b are the same.
+ Sample size must be less than 20.
+ """
+ assert len(a) <= 20, "Sample size must be less than 20."
+ assert len(b) <= 20, "Sample size must be less than 20."
+
+ if not sigLevel in tables:
+ raise ValueError("Do not have according significance table.")
+
+ # Calculate U value for sample groups using method described on Wikipedia.
+ flip = len(a) > len(b)
+ x = a if not flip else b
+ y = b if not flip else a
+
+ Ux = 0.
+ for xe in x:
+ for ye in y:
+ if xe < ye:
+ Ux += 1
+ elif xe == ye:
+ Ux += .5
+ Uy = len(a) * len(b) - Ux
+ Ua = Ux if not flip else Uy
+ Ub = Uy if not flip else Ux
+
+ U = abs(Ua - Ub)
+
+ same = U <= SIGN_TABLES[sigLevel][len(a) - 1][len(b) - 1]
+ return same
+
+# Table for .05 significance level.
+TABLE_0_05 = [
+ [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+ [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2],
+ [0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8],
+ [0, 0, 0, 0, 1, 2, 3, 4, 4, 5, 6, 7, 8, 9, 10, 11, 11, 12, 13, 13],
+ [0, 0, 0, 1, 2, 3, 5, 6, 7, 8, 9, 11, 12, 13, 14, 15, 17, 18, 19, 20],
+ [0, 0, 1, 2, 3, 5, 6, 8, 10, 11, 13, 14, 16, 17, 19, 21, 22, 24, 25, 27],
+ [0, 0, 1, 3, 5, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34],
+ [0, 0, 2, 4, 6, 8, 10, 13, 15, 17, 19, 22, 24, 26, 29, 31, 34, 36, 38, 41],
+ [0, 0, 2, 4, 7, 10, 12, 15, 17, 20, 23, 26, 28, 31, 34, 37, 39, 42, 45, 48],
+ [0, 0, 3, 5, 8, 11, 14, 17, 20, 23, 26, 29, 33, 36, 39, 42, 45, 48, 52, 55],
+ [0, 0, 3, 6, 9, 13, 16, 19, 23, 26, 30, 33, 37, 40, 44, 47, 51, 55, 58, 62],
+ [0, 1, 4, 7, 11, 14, 18, 22, 26, 29, 33, 37, 41, 45, 49, 53, 57, 61, 65, 69],
+ [0, 1, 4, 8, 12, 16, 20, 24, 28, 33, 37, 41, 45, 50, 54, 59, 63, 67, 72, 76],
+ [0, 1, 5, 9, 13, 17, 22, 26, 31, 36, 40, 45, 50, 55, 59, 64, 67, 74, 78, 83],
+ [0, 1, 5, 10, 14, 19, 24, 29, 34, 39, 44, 49, 54, 59, 64, 70, 75, 80, 85, 90],
+ [0, 1, 6, 11, 15, 21, 26, 31, 37, 42, 47, 53, 59, 64, 70, 75, 81, 86, 92, 98],
+ [0, 2, 6, 11, 17, 22, 28, 34, 39, 45, 51, 57, 63, 67, 75, 81, 87, 93, 99, 105],
+ [0, 2, 7, 12, 18, 24, 30, 36, 42, 48, 55, 61, 67, 74, 80, 86, 93, 99, 106, 112],
+ [0, 2, 7, 13, 19, 25, 32, 38, 45, 52, 58, 65, 72, 78, 85, 92, 99, 106, 113, 119],
+ [0, 2, 8, 13, 20, 27, 34, 41, 48, 55, 62, 69, 76, 83, 90, 98, 105, 112, 119, 127]
+ ]
+
+# Table for .01 significance level.
+TABLE_0_01 = [
+ [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+ [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+ [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 2, 3, 3],
+ [0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 5, 5, 6, 6, 7, 8],
+ [0, 0, 0, 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 7, 8, 9, 10, 11, 12, 13],
+ [0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 12, 13, 15, 16, 17, 18],
+ [0, 0, 0, 0, 1, 3, 4, 6, 7, 9, 10, 12, 13, 15, 16, 18, 19, 21, 22, 24],
+ [0, 0, 0, 1, 2, 4, 6, 7, 9, 11, 13, 15, 17, 18, 20, 22, 24, 26, 28, 30],
+ [0, 0, 0, 1, 3, 5, 7, 9, 11, 13, 16, 18, 20, 22, 24, 27, 29, 31, 33, 36],
+ [0, 0, 0, 2, 4, 6, 9, 11, 13, 16, 18, 21, 24, 26, 29, 31, 34, 37, 39, 42],
+ [0, 0, 0, 2, 5, 7, 10, 13, 16, 18, 21, 24, 27, 30, 33, 36, 39, 42, 45, 46],
+ [0, 0, 1, 3, 6, 9, 12, 15, 18, 21, 24, 27, 31, 34, 37, 41, 44, 47, 51, 54],
+ [0, 0, 1, 3, 7, 10, 13, 17, 20, 24, 27, 31, 34, 38, 42, 45, 49, 53, 56, 60],
+ [0, 0, 1, 4, 7, 11, 15, 18, 22, 26, 30, 34, 38, 42, 46, 50, 54, 58, 63, 67],
+ [0, 0, 2, 5, 8, 12, 16, 20, 24, 29, 33, 37, 42, 46, 51, 55, 60, 64, 69, 73],
+ [0, 0, 2, 5, 9, 13, 18, 22, 27, 31, 36, 41, 45, 50, 55, 60, 65, 70, 74, 79],
+ [0, 0, 2, 6, 10, 15, 19, 24, 29, 34, 39, 44, 49, 54, 60, 65, 70, 75, 81, 86],
+ [0, 0, 2, 6, 11, 16, 21, 26, 31, 37, 42, 47, 53, 58, 64, 70, 75, 81, 87, 92],
+ [0, 0, 3, 7, 12, 17, 22, 28, 33, 39, 45, 51, 56, 63, 69, 74, 81, 87, 93, 99],
+ [0, 0, 3, 8, 13, 18, 24, 30, 36, 42, 46, 54, 60, 67, 73, 79, 86, 92, 99, 105]
+ ]
+
+SIGN_TABLES = {.05: TABLE_0_05, .01: TABLE_0_01}
More information about the llvm-commits
mailing list