[LNT] r207898 - Use Mann-Whitney U test to identify changes

Sat May 3 03:47:51 PDT 2014

Hi Yi,

I'm getting this error on our test-suite bot:

No distributions at all found for SciPy (from LNT==0.4.1dev)

http://lab.llvm.org:8011/builders/clang-native-arm-lnt/builds/6265/steps/venv.lnt.install/logs/stdio

Any idea how to pull that? Or why it's not finding the right module?

I did install python-scipy module from Ubuntu repo, but I don't think
that's related...

cheers,
--renato

On 3 May 2014 09:01, Yi Kong <Yi.Kong at arm.com> wrote:
> Author: kongyi
> Date: Sat May  3 03:01:30 2014
> New Revision: 207898
>
> URL: http://llvm.org/viewvc/llvm-project?rev=207898&view=rev
> Log:
> Use Mann-Whitney U test to identify changes
>
> Modified:
>     lnt/trunk/lnt/server/reporting/analysis.py
>     lnt/trunk/lnt/server/reporting/runs.py
>     lnt/trunk/lnt/server/ui/templates/reporting/runs.html
>     lnt/trunk/lnt/server/ui/templates/v4_run.html
>     lnt/trunk/lnt/server/ui/views.py
>     lnt/trunk/setup.py
>
> Modified: lnt/trunk/lnt/server/reporting/analysis.py
> URL: http://llvm.org/viewvc/llvm-project/lnt/trunk/lnt/server/reporting/analysis.py?rev=207898&r1=207897&r2=207898&view=diff
> ==============================================================================
> --- lnt/trunk/lnt/server/reporting/analysis.py (original)
> +++ lnt/trunk/lnt/server/reporting/analysis.py Sat May  3 03:01:30 2014
> @@ -5,6 +5,7 @@ Utilities for helping with the analysis
>  from lnt.util import stats
>  from lnt.server.ui import util
>  from lnt.testing import PASS, FAIL, XFAIL
> +from scipy.stats import mannwhitneyu
>
>  REGRESSED = 'REGRESSED'
>  IMPROVED = 'IMPROVED'
> @@ -13,8 +14,8 @@ UNCHANGED_FAIL = 'UNCHANGED_FAIL'
>
>  class ComparisonResult:
>      def __init__(self, cur_value, prev_value, delta, pct_delta, stddev, MAD,
> -                 cur_failed, prev_failed, samples, stddev_mean = None,
> -                 stddev_is_estimated = False):
> +                 cur_failed, prev_failed, samples, prev_samples, stddev_mean = None,
> +                 stddev_is_estimated = False, confidence_lv = .9):
>          self.current = cur_value
>          self.previous = prev_value
>          self.delta = delta
> @@ -24,8 +25,17 @@ class ComparisonResult:
>          self.failed = cur_failed
>          self.prev_failed = prev_failed
>          self.samples = samples
> +        self.prev_samples = prev_samples
>          self.stddev_mean = stddev_mean
>          self.stddev_is_estimated = stddev_is_estimated
> +        self.confidence_lv = confidence_lv
> +
> +        try:
> +            u, prob = mannwhitneyu(self.samples, self.prev_samples)
> +            self.p = 1 - prob
> +        except ValueError:
> +            # All numbers are identical
> +            self.p = 1
>
>      def get_samples(self):
>          return self.samples
> @@ -89,6 +99,12 @@ class ComparisonResult:
>          if ignore_small and abs(self.delta) < .01:
>              return UNCHANGED_PASS
>
> +        # Use Mann-Whitney U test to test null hypothesis that result is
> +        # unchanged.
> +        if len(self.samples) >= 4 and len(self.prev_samples) >= 4:
> +            if self.p <= self.confidence_lv:
> +                return UNCHANGED_PASS
> +
>          # If we have a comparison window, then measure using a symmetic
>          # confidence interval.
>          if self.stddev is not None:
> @@ -121,11 +137,16 @@ class ComparisonResult:
>          else:
>              return UNCHANGED_PASS
>
> +
> +    def get_p_value(self):
> +        return self.p
> +
>  class RunInfo(object):
>      def __init__(self, testsuite, runs_to_load,
> -                 aggregation_fn = min):
> +                 aggregation_fn = min, confidence_lv = .9):
>          self.testsuite = testsuite
>          self.aggregation_fn = aggregation_fn
> +        self.confidence_lv = confidence_lv
>
>          self.sample_map = util.multidict()
>          self.loaded_run_ids = set()
> @@ -207,7 +228,8 @@ class RunInfo(object):
>                  run_value, prev_value, delta=None,
>                  pct_delta = None, stddev = stddev, MAD = MAD,
>                  cur_failed = run_failed, prev_failed = prev_failed,
> -                samples = run_values)
> +                samples = run_values, prev_samples = prev_values,
> +                confidence_lv = self.confidence_lv)
>
>          # Compute the comparison status for the test value.
>          delta = run_value - prev_value
> @@ -241,8 +263,9 @@ class RunInfo(object):
>
>          return ComparisonResult(run_value, prev_value, delta,
>                                  pct_delta, stddev, MAD,
> -                                run_failed, prev_failed, run_values,
> -                                stddev_mean, stddev_is_estimated)
> +                                run_failed, prev_failed, run_values, prev_values,
> +                                stddev_mean, stddev_is_estimated,
> +                                self.confidence_lv)
>
>      def _load_samples_for_runs(self, run_ids):
>          # Find the set of new runs to load.
>
> Modified: lnt/trunk/lnt/server/reporting/runs.py
> URL: http://llvm.org/viewvc/llvm-project/lnt/trunk/lnt/server/reporting/runs.py?rev=207898&r1=207897&r2=207898&view=diff
> ==============================================================================
> --- lnt/trunk/lnt/server/reporting/runs.py (original)
> +++ lnt/trunk/lnt/server/reporting/runs.py Sat May  3 03:01:30 2014
> @@ -14,7 +14,8 @@ import lnt.util.stats
>  def generate_run_report(run, baseurl, only_html_body = False,
>                          num_comparison_runs = 10, result = None,
>                          compare_to = None, baseline = None,
> -                        comparison_window = None, aggregation_fn = min):
> +                        comparison_window = None, aggregation_fn = min,
> +                        confidence_lv = .5):
>      """
>      generate_run_report(...) -> (str: subject, str: text_report,
>                                   str: html_report)
> @@ -67,7 +68,7 @@ def generate_run_report(run, baseurl, on
>      if baseline:
>          runs_to_load.add(baseline.id)
>      sri = lnt.server.reporting.analysis.RunInfo(
> -        ts, runs_to_load, aggregation_fn)
> +        ts, runs_to_load, aggregation_fn, confidence_lv)
>
>      # Get the test names, primary fields and total test counts.
>      test_names = ts.query(ts.Test.name, ts.Test.id).\
>
> Modified: lnt/trunk/lnt/server/ui/templates/reporting/runs.html
> URL: http://llvm.org/viewvc/llvm-project/lnt/trunk/lnt/server/ui/templates/reporting/runs.html?rev=207898&r1=207897&r2=207898&view=diff
> ==============================================================================
> --- lnt/trunk/lnt/server/ui/templates/reporting/runs.html (original)
> +++ lnt/trunk/lnt/server/ui/templates/reporting/runs.html Sat May  3 03:01:30 2014
> @@ -25,9 +25,11 @@
>           <th style="{{ styles['th'] }}">{{ primary_name }}</th>
>           <th style="{{ styles['th'] }}">Current</th>
>           <th style="{{ styles['th'] }}">σ {{ primary_field_suffix }}</th>
> +         <th style="{{ styles['th'] }}">p {{ primary_field_suffix }}</th>
>           {% if secondary_info %}
>           <th style="{{ styles['th'] }}">Δ {{ secondary_field_suffix }}</th>
>           <th style="{{ styles['th'] }}">σ {{ secondary_field_suffix }}</th>
> +         <th style="{{ styles['th'] }}">p {{ secondary_field_suffix }}</th>
>           {% endif %}
>           </tr>
>
> @@ -40,10 +42,12 @@
>             <td style="{{ styles['td'] }}">{{ "%.4f" | format(cr.previous) }}</td>
>             <td style="{{ styles['td'] }}">{{ "%.4f" | format(cr.current) }}</td>
>             <td style="{{ styles['td'] }}">{{ "%.4f" | format_or_default(cr.stddev, '-') }}</td>
> +           <td style="{{ styles['td'] }}">{{ "%.4f" | format_or_default(cr.p, '-') }}</td>
>             {% if secondary_info %}
>               {% set a_cr = secondary_info[(name, field)] %}
>               {{ a_cr.pct_delta|aspctcell(style=styles['td'])|safe }}
>               <td style="{{ styles['td'] }}">{{ "%.4f" | format_or_default(a_cr.stddev, '-') }}</td>
> +             <td style="{{ styles['td'] }}">{{ "%.4f" | format_or_default(a_cr.p, '-') }}</td>
>             {% endif %}
>           </tr>
>           {% endfor %}
>
> Modified: lnt/trunk/lnt/server/ui/templates/v4_run.html
> URL: http://llvm.org/viewvc/llvm-project/lnt/trunk/lnt/server/ui/templates/v4_run.html?rev=207898&r1=207897&r2=207898&view=diff
> ==============================================================================
> --- lnt/trunk/lnt/server/ui/templates/v4_run.html (original)
> +++ lnt/trunk/lnt/server/ui/templates/v4_run.html Sat May  3 03:01:30 2014
> @@ -76,6 +76,10 @@
>    {% if options.show_sample_counts %}
>      <td>{{cr.get_samples()|length}}</td>
>    {% endif %}
> +
> +  {% if options.show_p_value %}
> +    <td>{{ "%.4f" % cr.get_p_value() }}</td>
> +  {% endif %}
>  {% endmacro %}
>
>  {% block sidebar %}
> @@ -244,6 +248,14 @@
>        </td>
>      </tr>
>      <tr>
> +      <td>Mann-Whitney test confidence level:</td>
> +      <td><input type="text" name="MW_confidence_lv" value="{{ options.MW_confidence_lv }}"></td>
> +    </tr>
> +    <tr>
> +      <td>Show Mann-Whitney p value:</td>
> +      <td><input type="checkbox" name="show_p_value" value="yes" {{ "checked" if options.show_p_value else ""}}></td>
> +    </tr>
> +    <tr>
>        <td colspan="2">
>          {% if compare_to %}
>            <input type="hidden" name="compare_to" value="{{compare_to.id}}">
> @@ -285,6 +297,7 @@
>              {% if options.show_mad %}<th>MAD</th>{% endif %}
>              {% if options.show_all_samples %}<th>Samples</th>{% endif %}
>              {% if options.show_sample_counts %}<th>N</th>{% endif %}
> +            {% if options.show_p_value %}<th>p</th>{% endif %}
>            </tr>
>          </thead>
>          <tbody>
>
> Modified: lnt/trunk/lnt/server/ui/views.py
> URL: http://llvm.org/viewvc/llvm-project/lnt/trunk/lnt/server/ui/views.py?rev=207898&r1=207897&r2=207898&view=diff
> ==============================================================================
> --- lnt/trunk/lnt/server/ui/views.py (original)
> +++ lnt/trunk/lnt/server/ui/views.py Sat May  3 03:01:30 2014
> @@ -191,6 +191,10 @@ class V4RequestInfo(object):
>                                  'median' : lnt.util.stats.median }.get(
>              aggregation_fn_name, min)
>
> +        # Get the MW confidence level
> +        confidence_lv = float(request.args.get('MW_confidence_lv', .9))
> +        self.confidence_lv = confidence_lv
> +
>          # Find the neighboring runs, by order.
>          prev_runs = list(ts.get_previous_runs_on_machine(run, N = 3))
>          next_runs = list(ts.get_next_runs_on_machine(run, N = 3))
> @@ -246,7 +250,7 @@ class V4RequestInfo(object):
>              only_html_body=only_html_body, result=None,
>              compare_to=self.compare_to, baseline=self.baseline,
>              comparison_window=self.comparison_window,
> -            aggregation_fn=self.aggregation_fn)
> +            aggregation_fn=self.aggregation_fn, confidence_lv=confidence_lv)
>          _, self.text_report, self.html_report, self.sri = reports
>
>  @v4_route("/<int:id>/report")
> @@ -314,6 +318,8 @@ def v4_run(id):
>      options['num_comparison_runs'] = info.num_comparison_runs
>      options['test_filter'] = test_filter_str = request.args.get(
>          'test_filter', '')
> +    options['MW_confidence_lv'] = float(request.args.get('MW_confidence_lv', .9))
> +    options['show_p_value'] = bool(request.args.get('show_p_value'))
>      if test_filter_str:
>          test_filter_re = re.compile(test_filter_str)
>      else:
>
> Modified: lnt/trunk/setup.py
> URL: http://llvm.org/viewvc/llvm-project/lnt/trunk/setup.py?rev=207898&r1=207897&r2=207898&view=diff
> ==============================================================================
> --- lnt/trunk/setup.py (original)
> +++ lnt/trunk/setup.py Sat May  3 03:01:30 2014
> @@ -90,5 +90,5 @@ http://llvm.org/svn/llvm-project/lnt/tru
>              'lnt = lnt.lnttool:main',
>              ],
>          },
> -    install_requires=['SQLAlchemy', 'Flask'],
> +    install_requires=['SQLAlchemy', 'Flask', 'SciPy'],
>  )
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits