[LNT] r210123 - Fixups in geomean calculation
Chris Matthews
cmatthews5 at apple.com
Tue Jun 3 13:13:23 PDT 2014
Author: cmatthews
Date: Tue Jun 3 15:13:23 2014
New Revision: 210123
URL: http://llvm.org/viewvc/llvm-project?rev=210123&view=rev
Log:
Fixups in geomean calculation
Geomean calculation was having problems with compile test suite. Fix that, as well as some bugs in how it was calculated.
Modified:
lnt/trunk/lnt/server/reporting/analysis.py
lnt/trunk/lnt/server/reporting/dailyreport.py
lnt/trunk/lnt/server/reporting/runs.py
lnt/trunk/lnt/server/ui/views.py
lnt/trunk/lnt/util/stats.py
Modified: lnt/trunk/lnt/server/reporting/analysis.py
URL: http://llvm.org/viewvc/llvm-project/lnt/trunk/lnt/server/reporting/analysis.py?rev=210123&r1=210122&r2=210123&view=diff
==============================================================================
--- lnt/trunk/lnt/server/reporting/analysis.py (original)
+++ lnt/trunk/lnt/server/reporting/analysis.py Tue Jun 3 15:13:23 2014
@@ -103,7 +103,7 @@ class ComparisonResult:
is_significant = abs(self.delta) > (self.stddev *
confidence_interval)
- # If the delta is significant, return
+ # If the delta is significant, return
if is_significant:
if self.delta < 0:
return IMPROVED
@@ -123,9 +123,10 @@ class ComparisonResult:
else:
return UNCHANGED_PASS
+
class RunInfo(object):
def __init__(self, testsuite, runs_to_load,
- aggregation_fn = min, confidence_lv = .05):
+ aggregation_fn=stats.safe_min, confidence_lv=.05):
self.testsuite = testsuite
self.aggregation_fn = aggregation_fn
self.confidence_lv = confidence_lv
@@ -155,7 +156,7 @@ class RunInfo(object):
compare_runs = []
return runs, compare_runs
-
+
def get_run_comparison_result(self, run, compare_to, test_id, field):
if compare_to is not None:
compare_to = [compare_to]
@@ -235,42 +236,66 @@ class RunInfo(object):
return ComparisonResult(run_value, prev_value, delta,
pct_delta, stddev, MAD,
- run_failed, prev_failed, run_values, prev_values,
- stddev_mean, self.confidence_lv)
+ run_failed, prev_failed, run_values,
+ prev_values, stddev_mean, self.confidence_lv)
+
+ #Smallest possible change we ever look for.
+ MIN_VALUE = 0.00001
+
+ @staticmethod
+ def not_none(thing):
+ if thing is None:
+ return True
+ return False
+
+ def _extract_values_from_samples(self, run, field):
+ """Given a run object, collect values for a particular field."""
+
+ run_samples = filter(self.not_none,
+ [self.sample_map.get((run, test_id))
+ for test_id in self.get_test_ids()])
+
+ run_values = filter(self.not_none,
+ [self.aggregation_fn(a[field] + self.MIN_VALUE
+ for a in e if a[field] is not None)
+ for e in run_samples if e])
+ return run_values
+
+ def _calc_geomean(self, run_values):
+ if not run_values:
+ return None
+ return util.geometric_mean(run_values) - self.MIN_VALUE
def get_geomean_comparison_result(self, run, compare_to, field,
- comparison_window=[]):
+ comparison_window=[]):
# FIXME: Geometric mean does not take 0 values, so fix it by adding 1
# to each value and substract 1 from the result. Since we are only
# interested in the change of data set, this workaround is good enough,
# but not ideal.
- run_samples = filter(None,
- [self.sample_map.get((run.id, test_id))
- for test_id in self.get_test_ids()])
- run_values = [self.aggregation_fn(a[field.index] + 1
- for a in e) for e in run_samples]
-
- prev_samples = filter(None,
- [self.sample_map.get((run.id, test_id))
- for test_id in self.get_test_ids()])
- prev_values = [self.aggregation_fn(a[field.index] + 1
- for a in e) for e in prev_samples]
-
- run_geomean = util.geometric_mean(run_values) - 1
- prev_geomean = util.geometric_mean(prev_values) - 1
-
- delta = run_geomean - prev_geomean
- if prev_geomean != 0:
- pct_delta = delta / prev_geomean
+
+ run_values = self._extract_values_from_samples(run.id, field.index)
+
+ prev_values = self._extract_values_from_samples(compare_to, field.index)
+
+ prev_geomean = self._calc_geomean(prev_values)
+ run_geomean = self._calc_geomean(run_values)
+
+ if run_geomean and prev_geomean:
+ delta = run_geomean - prev_geomean
+ if prev_geomean != 0:
+ pct_delta = delta / prev_geomean
+ else:
+ pct_delta = 0.0
else:
- pct_delta = 0.0
+ delta = pct_delta = 0
return ComparisonResult(run_geomean, prev_geomean, delta,
- pct_delta, stddev = None, MAD = None,
- cur_failed = False, prev_failed = False,
- samples = [run_geomean],
- prev_samples = [prev_geomean],
- confidence_lv = 0)
+ pct_delta, stddev=None, MAD=None,
+ cur_failed=not run_geomean,
+ prev_failed=not prev_geomean,
+ samples=[run_geomean],
+ prev_samples=[prev_geomean],
+ confidence_lv=0)
def _load_samples_for_runs(self, run_ids):
# Find the set of new runs to load.
@@ -283,7 +308,7 @@ class RunInfo(object):
# We speed things up considerably by loading the column data directly
# here instead of requiring SA to materialize Sample objects.
columns = [self.testsuite.Sample.run_id,
- self.testsuite.Sample.test_id]
+ self.testsuite.Sample.test_id]
columns.extend(f.column for f in self.testsuite.sample_fields)
q = self.testsuite.query(*columns)
q = q.filter(self.testsuite.Sample.run_id.in_(to_load))
@@ -294,4 +319,3 @@ class RunInfo(object):
self.sample_map[(run_id, test_id)] = sample_values
self.loaded_run_ids |= to_load
-
Modified: lnt/trunk/lnt/server/reporting/dailyreport.py
URL: http://llvm.org/viewvc/llvm-project/lnt/trunk/lnt/server/reporting/dailyreport.py?rev=210123&r1=210122&r2=210123&view=diff
==============================================================================
--- lnt/trunk/lnt/server/reporting/dailyreport.py (original)
+++ lnt/trunk/lnt/server/reporting/dailyreport.py Tue Jun 3 15:13:23 2014
@@ -43,14 +43,14 @@ class DailyReport(object):
raise ArgumentError("report not initialized")
if day_index >= self.num_prior_days_to_include:
raise ArgumentError("invalid day index")
-
+
runs = self.machine_runs.get((machine.id, day_index))
if runs is None:
return None
# Select a key run arbitrarily.
return runs[0]
-
+
def build(self):
ts = self.ts
Modified: lnt/trunk/lnt/server/reporting/runs.py
URL: http://llvm.org/viewvc/llvm-project/lnt/trunk/lnt/server/reporting/runs.py?rev=210123&r1=210122&r2=210123&view=diff
==============================================================================
--- lnt/trunk/lnt/server/reporting/runs.py (original)
+++ lnt/trunk/lnt/server/reporting/runs.py Tue Jun 3 15:13:23 2014
@@ -7,10 +7,11 @@ import lnt.server.reporting.analysis
import lnt.server.ui.app
import lnt.util.stats
-def generate_run_report(run, baseurl, only_html_body = False,
- num_comparison_runs = 0, result = None,
- compare_to = None, baseline = None,
- aggregation_fn = min, confidence_lv = .05):
+
+def generate_run_report(run, baseurl, only_html_body=False,
+ num_comparison_runs=0, result=None,
+ compare_to=None, baseline=None,
+ aggregation_fn=lnt.util.stats.safe_min, confidence_lv=.05):
"""
generate_run_report(...) -> (str: subject, str: text_report,
str: html_report)
@@ -27,6 +28,7 @@ def generate_run_report(run, baseurl, on
machine = run.machine
machine_parameters = machine.parameters
+
if baseline is None:
# If a baseline has not been given, look up the run closest to
# the default baseline revision for which this machine also
Modified: lnt/trunk/lnt/server/ui/views.py
URL: http://llvm.org/viewvc/llvm-project/lnt/trunk/lnt/server/ui/views.py?rev=210123&r1=210122&r2=210123&view=diff
==============================================================================
--- lnt/trunk/lnt/server/ui/views.py (original)
+++ lnt/trunk/lnt/server/ui/views.py Tue Jun 3 15:13:23 2014
@@ -187,9 +187,9 @@ class V4RequestInfo(object):
# Get the aggregation function to use.
aggregation_fn_name = request.args.get('aggregation_fn')
- self.aggregation_fn = { 'min' : min,
- 'median' : lnt.util.stats.median }.get(
- aggregation_fn_name, min)
+ self.aggregation_fn = {'min': lnt.util.stats.safe_min,
+ 'median': lnt.util.stats.median}.get(
+ aggregation_fn_name, lnt.util.stats.safe_min)
# Get the MW confidence level.
try:
@@ -612,7 +612,7 @@ def v4_graph():
dates = [data_date[1] for data_date in datapoints]
metadata = {"label":point_label}
-
+
# When we can, map x-axis to revisions, but when that is too hard
# use the position of the sample instead.
rev_x = convert_revision(point_label)
@@ -628,11 +628,11 @@ def v4_graph():
if show_all_points:
for i,v in enumerate(values):
point_metadata = dict(metadata)
- point_metadata["date"] = str(dates[i])
+ point_metadata["date"] = str(dates[i])
points_data.append((x, v, point_metadata))
elif show_points:
points_data.append((x, min_value, metadata))
-
+
# Add the standard deviation error bar, if requested.
if show_stddev:
mean = stats.mean(values)
@@ -822,15 +822,15 @@ def v4_global_status():
# Get a sorted list of recent machines.
recent_machines = sorted(recent_runs_by_machine.keys(),
key=lambda m: m.name)
-
+
# We use periods in our machine names. css does not like this
# since it uses periods to demark classes. Thus we convert periods
# in the names of our machines to dashes for use in css. It is
# also convenient for our computations in the jinja page to have
- # access to
+ # access to
def get_machine_keys(m):
m.css_name = m.name.replace('.','-')
- return m
+ return m
recent_machines = map(get_machine_keys, recent_machines)
# For each machine, build a table of the machine, the baseline run, and the
@@ -881,8 +881,8 @@ def v4_global_status():
test_table.append(row)
# Order the table by worst regression.
- test_table.sort(key = lambda row: row[1], reverse=True)
-
+ test_table.sort(key = lambda row: row[1], reverse=True)
+
return render_template("v4_global_status.html",
ts=ts,
tests=test_table,
@@ -918,13 +918,13 @@ def v4_daily_report(year, month, day):
num_days = int(num_days_str)
else:
num_days = 3
-
+
day_start_str = request.args.get('day_start')
if day_start_str is not None:
day_start = int(day_start_str)
else:
day_start = 16
-
+
ts = request.get_testsuite()
# Create the report object.
Modified: lnt/trunk/lnt/util/stats.py
URL: http://llvm.org/viewvc/llvm-project/lnt/trunk/lnt/util/stats.py?rev=210123&r1=210122&r2=210123&view=diff
==============================================================================
--- lnt/trunk/lnt/util/stats.py (original)
+++ lnt/trunk/lnt/util/stats.py Tue Jun 3 15:13:23 2014
@@ -1,26 +1,43 @@
import math
from lnt.external.stats.stats import mannwhitneyu as mannwhitneyu_large
+
+def safe_min(l):
+ """Calculate min, but if given an empty list return None."""
+ l = list(l) #In case this is a complex type, get a simple list.
+ if not l:
+ return None
+ else:
+ return min(l)
+
def mean(l):
- return sum(l)/len(l)
+ if l:
+ return sum(l)/len(l)
+ else:
+ return None
def median(l):
+ if not l:
+ return None
l = list(l)
l.sort()
N = len(l)
return (l[(N-1)//2] + l[N//2])*.5
+
def median_absolute_deviation(l, med = None):
if med is None:
med = median(l)
return median([abs(x - med) for x in l])
+
def standard_deviation(l):
m = mean(l)
means_sqrd = sum([(v - m)**2 for v in l]) / len(l)
rms = math.sqrt(means_sqrd)
return rms
+
def mannwhitneyu(a, b, sigLevel = .05):
"""
Determine if sample a and b are the same at given significance level.
@@ -35,6 +52,7 @@ def mannwhitneyu(a, b, sigLevel = .05):
except ValueError:
return True
+
def mannwhitneyu_small(a, b, sigLevel):
"""
Determine if sample a and b are the same.
More information about the llvm-commits
mailing list