[LNT] r284560 - More aggressive change lumping

Tue Oct 18 17:40:17 PDT 2016

Author: cmatthews
Date: Tue Oct 18 19:40:17 2016
New Revision: 284560

URL: http://llvm.org/viewvc/llvm-project?rev=284560&view=rev
Log:
More aggressive change lumping

This changes change grouping to use soft comparisons instead of equality.
Currently we compare machine and benchmark names directly, now we use
name similarity from difflib.  This will allow us to group benchmarks
with simlar names.

Modified:
    lnt/trunk/lnt/server/db/fieldchange.py
    lnt/trunk/tests/server/ui/change_processing.py

Modified: lnt/trunk/lnt/server/db/fieldchange.py
URL: http://llvm.org/viewvc/llvm-project/lnt/trunk/lnt/server/db/fieldchange.py?rev=284560&r1=284559&r2=284560&view=diff
==============================================================================

--- lnt/trunk/lnt/server/db/fieldchange.py (original)
+++ lnt/trunk/lnt/server/db/fieldchange.py Tue Oct 18 19:40:17 2016
@@ -1,3 +1,4 @@
+import difflib
 import sqlalchemy.sql
 from sqlalchemy.orm.exc import ObjectDeletedError
 import lnt.server.reporting.analysis
@@ -158,35 +159,52 @@ def is_overlaping(fc1, fc2):
     return (r1_min == r2_min and r1_max == r2_max) or \
            (r1_min < r2_max and r2_min < r1_max)
 
+
+def percent_similar(a, b):
+    """
+    Percent similar: are these strings similar to each other?
+    :param a: first string
+    :param b: second string
+    """
+    # type: (str, str) -> float
+    s = difflib.SequenceMatcher(lambda x: x.isdigit(), a, b)
+    return s.ratio()
+
+
 @timed
 def identify_related_changes(ts, regressions, fc):
-    """Can we find a home for this change in some existing regression? """
+    """Can we find a home for this change in some existing regression? If a
+    match is found add a regression indicator adding this change to that
+    regression, otherwise create a new regression for this change.
+
+    Regression matching looks for regressions that happen in overlapping order
+    ranges. Then looks for changes that are similar.
+
+    """
     for regression in regressions:
         regression_indicators = get_ris(ts, regression)
         for change in regression_indicators:
             regression_change = change.field_change
             if is_overlaping(regression_change, fc):
-                confidence = 0
-                relation = ["Revision"]
-                if regression_change.machine == fc.machine:
-                    confidence += 1
-                    relation.append("Machine")
-                if regression_change.test == fc.test:
-                    confidence += 1
-                    relation.append("Test")
+                confidence = 0.0
+
+                confidence += percent_similar(regression_change.machine.name,
+                                              fc.machine.name)
+                confidence += percent_similar(regression_change.test.name, fc.test.name)
+
                 if regression_change.field == fc.field:
-                    confidence += 1
-                    relation.append("Field")
+                    confidence += 1.0
 
-                if confidence >= 2:
+                if confidence >= 2.0:
                     # Matching
-                    note("Found a match:" + str(regression)  + " On " +
-                         ', '.join(relation))
+                    MSG = "Found a match: {} with score {}."
+                    note(MSG.format(str(regression),
+                                    confidence))
                     ri = ts.RegressionIndicator(regression, fc)
                     ts.add(ri)
                     # Update the default title if needed.
                     rebuild_title(ts, regression)
-                    return (True, regression)
+                    return True, regression
     note("Could not find a partner, creating new Regression for change")
     new_reg = new_regression(ts, [fc.id])
-    return (False, new_reg)
+    return False, new_reg

Modified: lnt/trunk/tests/server/ui/change_processing.py
URL: http://llvm.org/viewvc/llvm-project/lnt/trunk/tests/server/ui/change_processing.py?rev=284560&r1=284559&r2=284560&view=diff
==============================================================================
--- lnt/trunk/tests/server/ui/change_processing.py (original)
+++ lnt/trunk/tests/server/ui/change_processing.py Tue Oct 18 19:40:17 2016
@@ -43,13 +43,13 @@ class ChangeProcessingTests(unittest.Tes
         machine = self.machine = ts_db.Machine("test-machine")
         ts_db.add(machine)
         
-        test = self.test = ts_db.Test("test-a")
+        test = self.test = ts_db.Test("foo")
         ts_db.add(test)
         
         machine2 = self.machine2 = ts_db.Machine("test-machine2")
         ts_db.add(machine2)
         
-        test2 = self.test2 = ts_db.Test("test-b")
+        test2 = self.test2 = ts_db.Test("bar")
         ts_db.add(test2)
         
         run = self.run = ts_db.Run(machine, order1235,  start_time,
@@ -120,13 +120,9 @@ class ChangeProcessingTests(unittest.Tes
     def test_startup(self):
         pass
 
-    #def test_rebuild_title(self):
-    #    ts = self.ts_db
-    #    
     def test_change_grouping_criteria(self):
         ts_db = self.ts_db
 
-        
         # Check simple overlap checks work.
         self.assertTrue(is_overlaping(self.field_change, self.field_change2),
                         "Should be overlapping")
@@ -152,7 +148,7 @@ class ChangeProcessingTests(unittest.Tes
         ts_db.add(field_change7)
         ret, reg = identify_related_changes(ts_db, self.regressions, field_change7)
         self.assertNotEquals(self.regression, reg)
-        self.assertFalse(ret, "Should not match with differnt machine and tests.")
+        self.assertFalse(ret, "No match with different machine and tests.")
         self.regressions.append(reg)
         field_change4 = ts_db.FieldChange(self.order1234,
                                           self.order1235,
@@ -162,7 +158,7 @@ class ChangeProcessingTests(unittest.Tes
 
         # Check a regression matches if all fields match.
         ret, _ = identify_related_changes(ts_db, self.regressions, field_change4)
-        self.assertTrue(ret, "Should Match with differnt machine.")
+        self.assertTrue(ret, "Should Match with different machine.")
 
         field_change5 = ts_db.FieldChange(self.order1234,
                                           self.order1235,
@@ -172,7 +168,7 @@ class ChangeProcessingTests(unittest.Tes
 
         # Check a regression matches if all fields match.
         ret, _ = identify_related_changes(ts_db, self.regressions, field_change5)
-        self.assertTrue(ret, "Should Match with differnt tests.")
+        self.assertTrue(ret, "Should Match with different tests.")
         field_change6 = ts_db.FieldChange(self.order1234,
                                           self.order1235,
                                           self.machine,
@@ -181,12 +177,12 @@ class ChangeProcessingTests(unittest.Tes
 
         # Check a regression matches if all fields match.
         ret, _ = identify_related_changes(ts_db, self.regressions, field_change6)
-        self.assertTrue(ret, "Should Match with differnt fields.")
+        self.assertTrue(ret, "Should Match with different fields.")
 
         ts_db.commit()
         
         r2 = rebuild_title(ts_db, self.regression)
-        EXPECTED_TITLE = "Regression of 6 benchmarks: test-a, test-b"
+        EXPECTED_TITLE = "Regression of 6 benchmarks: foo, bar"
         self.assertEquals(r2.title, EXPECTED_TITLE)
 
     def test_regression_evolution(self):