[LNT] r238968 - Update the daily report to use more previous data

Wed Jun 3 15:24:10 PDT 2015

Chris Matthews <cmatthews5 at apple.com> writes:
> Author: cmatthews
> Date: Wed Jun  3 16:01:32 2015
> New Revision: 238968
>
> URL: http://llvm.org/viewvc/llvm-project?rev=238968&view=rev
> Log:
> Update the daily report to use more previous data
>
> Now the analysis can use past data better, pass in more old data.  Daily
> report used to compare the last rev on each day, now it compares last
> rev on current day to all revs on previous day.
>
> Modified:
>     lnt/trunk/lnt/server/reporting/dailyreport.py
>
> Modified: lnt/trunk/lnt/server/reporting/dailyreport.py
> URL:
> http://llvm.org/viewvc/llvm-project/lnt/trunk/lnt/server/reporting/dailyreport.py?rev=238968&r1=238967&r2=238968&view=diff
> ==============================================================================
> --- lnt/trunk/lnt/server/reporting/dailyreport.py (original)
> +++ lnt/trunk/lnt/server/reporting/dailyreport.py Wed Jun  3 16:01:32 2015
> @@ -9,6 +9,10 @@ from lnt.server.reporting.analysis impor
>  
>  from lnt.server.ui import util
>  
> +from collections import namedtuple
> +
> +OrderAndHistory = namedtuple('OrderAndHistory', ['max_order', 'recent_orders'])
> +
>  class DailyReport(object):
>      def __init__(self, ts, year, month, day, num_prior_days_to_include = 3,
>                   day_start_offset_hours=16, for_mail=False):
> @@ -72,7 +76,7 @@ class DailyReport(object):
>                             for i in range(self.num_prior_days_to_include + 1)]
>  
>          # Find all the runs that occurred for each day slice.
> -        prior_runs = [ts.query(ts.Run).\
> +        prior_runs = [ts.query(ts.Run).
>                            filter(ts.Run.start_time > prior_day).\
>                            filter(ts.Run.start_time <= day).all()
>                        for day,prior_day in util.pairs(self.prior_days)]
> @@ -89,23 +93,29 @@ class DailyReport(object):
>          # aggregation.
>          self.prior_days_machine_order_map = \
>              [None] * self.num_prior_days_to_include
> -        for i,runs in enumerate(prior_runs):
> +        historic_runs = [None] * len(prior_runs)
> +        for i, runs in enumerate(prior_runs):
>              # Aggregate the runs by machine.
>              machine_to_all_orders = util.multidict()
>              for r in runs:
>                  machine_to_all_orders[r.machine] = r.order
>  
> -            # Create a map from machine to max order.
> +            # Create a map from machine to max order and some history.
>              self.prior_days_machine_order_map[i] = machine_order_map = dict(
> -                (machine, max(orders))
> -                for machine,orders in machine_to_all_orders.items())
> +                (machine, OrderAndHistory(max(orders), sorted(orders)))
> +                for machine, orders in machine_to_all_orders.items())
>  
>              # Update the run list to only include the runs with that order.
>              prior_runs[i] = [r for r in runs
> -                             if r.order is machine_order_map[r.machine]]
> +                             if r.order is machine_order_map[r.machine].max_order]
> +
> +            # Also keep some recent runs, so we have some extra samples.
> +            historic_runs[i] = [r for r in runs
> +                                if r.order in machine_order_map[r.machine].recent_orders]

80-col?

>  
>          # Form a list of all relevant runs.
>          relevant_runs = sum(prior_runs, [])
> +        less_relevant_runs = sum(historic_runs, relevant_runs)
>  
>          # Find the union of all machines reporting in the relevant runs.
>          self.reporting_machines = list(set(r.machine for r in relevant_runs))
> @@ -143,6 +153,12 @@ class DailyReport(object):
>              for run in day_runs:
>                  machine_runs[(run.machine_id, day_index)] = run
>  
> +        # Also aggregate past runs by day.
> +        self.machine_past_runs = util.multidict()
> +        for day_index, day_runs in enumerate(historic_runs):
> +            for run in day_runs:
> +                self.machine_past_runs[(run.machine_id, day_index)] = run
> +
>          relevant_run_ids = [r.id for r in relevant_runs]
>  
>          # If there are no relevant runs, just stop processing (the report will
> @@ -158,7 +174,7 @@ class DailyReport(object):
>                      ts.Sample.test_id == ts.Test.id))).all()
>          self.reporting_tests.sort(key=lambda t: t.name)
>  
> -        run_ids_to_load = list(relevant_run_ids)
> +        run_ids_to_load = list(relevant_run_ids) + [r.id for r in less_relevant_runs]

Here too.

>  
>          # Create a run info object.
>          sri = lnt.server.reporting.analysis.RunInfo(ts, run_ids_to_load)
> @@ -193,19 +209,25 @@ class DailyReport(object):
>                  for machine in self.reporting_machines:
>                      # Get the most recent comparison result.
>                      day_runs = machine_runs.get((machine.id, 0), ())
> -                    prev_runs = machine_runs.get((machine.id, 1), ())
> +                    prev_runs = self.machine_past_runs.get((machine.id, 1), ())
> +
> +                    prev_day_run = machine_runs.get((machine.id, 1), ())
> +
>                      cr = sri.get_comparison_result(
>                          day_runs, prev_runs, test.id, field)
>  
>                      # If the result is not "interesting", ignore this machine.
>                      if not cr.is_result_interesting():
>                          continue
> -
> +                    else:
> +                        print test.name.split("/")[-1], repr(cr), cr.pct_delta

"else" after continue is a bit strange here.

>                      # Otherwise, compute the results for all the days.
>                      day_results = [cr]
>                      for i in range(1, self.num_prior_days_to_include):
> -                        day_runs = prev_runs
> -                        prev_runs = machine_runs.get((machine.id, i+1), ())
> +                        day_runs = prev_day_run
> +                        prev_day_run = machine_runs.get((machine.id, i+1), ())
> +                        prev_runs = self.machine_past_runs.get(
> +                                       (machine.id, i+1), ())
>                          cr = sri.get_comparison_result(day_runs, prev_runs,
>                                                         test.id, field)
>                          day_results.append(cr)
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits