[llvm] fec521a - [lit] Add the ability to parse regexes in Lit boolean expressions

Wed Jun 30 07:52:26 PDT 2021

Author: Louis Dionne
Date: 2021-06-30T10:52:16-04:00
New Revision: fec521a7b206815ad995f7247e671a8f25d144fc

URL: https://github.com/llvm/llvm-project/commit/fec521a7b206815ad995f7247e671a8f25d144fc
DIFF: https://github.com/llvm/llvm-project/commit/fec521a7b206815ad995f7247e671a8f25d144fc.diff

LOG: [lit] Add the ability to parse regexes in Lit boolean expressions

This patch augments Lit with the ability to parse regular expressions
in boolean expressions. This includes REQUIRES:, XFAIL:, UNSUPPORTED:,
and all other special Lit markup that evaluates to a boolean expression.

Regular expressions can be specified by enclosing them in {{...}},
similarly to how FileCheck handles such regular expressions. The regular
expression can either be on its own, or it can be part of an identifier.
For example, a match expression like {{.+}}-apple-darwin{{.+}} would match
the following variables:

     x86_64-apple-darwin20.0
     arm64-apple-darwin20.0
     arm64-apple-darwin22.0
     etc...

In the long term, this could be used to remove the need to handle the
target triple specially when parsing boolean expressions.

Differential Revision: https://reviews.llvm.org/D104572

Added: 
    

Modified: 
    llvm/docs/TestingGuide.rst
    llvm/utils/lit/lit/BooleanExpression.py
    llvm/utils/lit/lit/Test.py
    llvm/utils/lit/tests/Inputs/show-used-features/mixed.txt
    llvm/utils/lit/tests/show-used-features.py

Removed: 
    


################################################################################
diff  --git a/llvm/docs/TestingGuide.rst b/llvm/docs/TestingGuide.rst
index 4ec6a3d52b072..cd2c012d95738 100644

--- a/llvm/docs/TestingGuide.rst
+++ b/llvm/docs/TestingGuide.rst
@@ -459,8 +459,12 @@ will be a failure if its execution succeeds.
 ``REQUIRES`` and ``UNSUPPORTED`` and ``XFAIL`` all accept a comma-separated
 list of boolean expressions. The values in each expression may be:
 
-- Features added to ``config.available_features`` by
-  configuration files such as ``lit.cfg``.
+- Features added to ``config.available_features`` by configuration files such as ``lit.cfg``.
+  String comparison of features is case-sensitive. Furthermore, a boolean expression can
+  contain any Python regular expression enclosed in ``{{ }}``, in which case the boolean
+  expression is satisfied if any feature matches the regular expression. Regular
+  expressions can appear inside an identifier, so for example ``he{{l+}}o`` would match
+  ``helo``, ``hello``, ``helllo``, and so on.
 - Substrings of the target triple (``UNSUPPORTED`` and ``XFAIL`` only).
 
 | ``REQUIRES`` enables the test if all expressions are true.

diff  --git a/llvm/utils/lit/lit/BooleanExpression.py b/llvm/utils/lit/lit/BooleanExpression.py
index 34e07fc1b8e5e..ff5352778e99c 100644
--- a/llvm/utils/lit/lit/BooleanExpression.py
+++ b/llvm/utils/lit/lit/BooleanExpression.py
@@ -4,18 +4,24 @@ class BooleanExpression:
     # A simple evaluator of boolean expressions.
     #
     # Grammar:
-    #   expr       :: or_expr
-    #   or_expr    :: and_expr ('||' and_expr)*
-    #   and_expr   :: not_expr ('&&' not_expr)*
-    #   not_expr   :: '!' not_expr
-    #                 '(' or_expr ')'
-    #                 identifier
-    #   identifier :: [-+=._a-zA-Z0-9]+
+    #   expr         :: or_expr
+    #   or_expr      :: and_expr ('||' and_expr)*
+    #   and_expr     :: not_expr ('&&' not_expr)*
+    #   not_expr     :: '!' not_expr
+    #                   '(' or_expr ')'
+    #                   match_expr
+    #   match_expr   :: braced_regex
+    #                   identifier
+    #                   braced_regex match_expr
+    #                   identifier match_expr
+    #   identifier   :: [-+=._a-zA-Z0-9]+
+    #   braced_regex :: '{{' python_regex '}}'
 
     # Evaluates `string` as a boolean expression.
     # Returns True or False. Throws a ValueError on syntax error.
     #
     # Variables in `variables` are true.
+    # Regexes that match any variable in `variables` are true.
     # Substrings of `triple` are true.
     # 'true' is true.
     # All other identifiers are false.
@@ -41,7 +47,7 @@ def __init__(self, string, variables, triple=""):
     END = object()
 
     # Tokenization pattern.
-    Pattern = re.compile(r'\A\s*([()]|[-+=._a-zA-Z0-9]+|&&|\|\||!)\s*(.*)\Z')
+    Pattern = re.compile(r'\A\s*([()]|&&|\|\||!|(?:[-+=._a-zA-Z0-9]+|\{\{.+?\}\})+)\s*(.*)\Z')
 
     @staticmethod
     def tokenize(string):
@@ -80,12 +86,24 @@ def expect(self, t):
                              (self.quote(t), self.quote(self.token)))
 
     @staticmethod
-    def isIdentifier(token):
+    def isMatchExpression(token):
         if (token is BooleanExpression.END or token == '&&' or token == '||' or
             token == '!' or token == '(' or token == ')'):
             return False
         return True
 
+    def parseMATCH(self):
+        regex = ''
+        for part in filter(None, re.split(r'(\{\{.+?\}\})', self.token)):
+            if part.startswith('{{'):
+                assert part.endswith('}}')
+                regex += '(?:{})'.format(part[2:-2])
+            else:
+                regex += re.escape(part)
+        regex = re.compile(regex)
+        self.value = self.token in self.triple or any(regex.fullmatch(var) for var in self.variables)
+        self.token = next(self.tokens)
+
     def parseNOT(self):
         if self.accept('!'):
             self.parseNOT()
@@ -93,13 +111,11 @@ def parseNOT(self):
         elif self.accept('('):
             self.parseOR()
             self.expect(')')
-        elif not BooleanExpression.isIdentifier(self.token):
-            raise ValueError("expected: '!' or '(' or identifier\nhave: %s" %
+        elif not BooleanExpression.isMatchExpression(self.token):
+            raise ValueError("expected: '!', '(', '{{', or identifier\nhave: %s" %
                              self.quote(self.token))
         else:
-            self.value = (self.token in self.variables or
-                          self.token in self.triple)
-            self.token = next(self.tokens)
+            self.parseMATCH()
 
     def parseAND(self):
         self.parseNOT()
@@ -143,12 +159,20 @@ def test_variables(self):
         self.assertTrue(BooleanExpression.evaluate('under_score', variables))
         self.assertTrue(BooleanExpression.evaluate('e=quals', variables))
         self.assertTrue(BooleanExpression.evaluate('d1g1ts', variables))
+        self.assertTrue(BooleanExpression.evaluate('{{its.+}}', variables))
+        self.assertTrue(BooleanExpression.evaluate('{{false-[lo]+-true}}', variables))
+        self.assertTrue(BooleanExpression.evaluate('{{(true|false)-lol-(true|false)}}', variables))
+        self.assertTrue(BooleanExpression.evaluate('d1g{{[0-9]}}ts', variables))
+        self.assertTrue(BooleanExpression.evaluate('d1g{{[0-9]}}t{{[a-z]}}', variables))
+        self.assertTrue(BooleanExpression.evaluate('{{d}}1g{{[0-9]}}t{{[a-z]}}', variables))
+        self.assertTrue(BooleanExpression.evaluate('d1{{(g|1)+}}ts', variables))
 
         self.assertFalse(BooleanExpression.evaluate('false', variables))
         self.assertFalse(BooleanExpression.evaluate('True', variables))
         self.assertFalse(BooleanExpression.evaluate('true-ish', variables))
         self.assertFalse(BooleanExpression.evaluate('not_true', variables))
         self.assertFalse(BooleanExpression.evaluate('tru', variables))
+        self.assertFalse(BooleanExpression.evaluate('{{its-true.+}}', variables))
 
     def test_triple(self):
         triple = 'arch-vendor-os'
@@ -159,6 +183,21 @@ def test_triple(self):
         self.assertTrue(BooleanExpression.evaluate('-os', {}, triple))
         self.assertFalse(BooleanExpression.evaluate('arch-os', {}, triple))
 
+        # When matching against the triple, a regex is treated as an identifier and checked
+        # for a literal match. This preserves existing behavior before regexes were introduced.
+        self.assertFalse(BooleanExpression.evaluate('arch-{{vendor}}-os', {}, triple))
+        self.assertTrue(BooleanExpression.evaluate('arch-{{vendor}}-os', {}, 'arch-{{vendor}}-os'))
+
+    def test_matching(self):
+        expr1 = 'linux && (target={{aarch64-.+}} || target={{x86_64-.+}})'
+        self.assertTrue(BooleanExpression.evaluate(expr1, {'linux', 'target=x86_64-unknown-linux-gnu'}))
+        self.assertFalse(BooleanExpression.evaluate(expr1, {'linux', 'target=i386-unknown-linux-gnu'}))
+
+        expr2 = 'use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12}} && !no-exceptions'
+        self.assertTrue(BooleanExpression.evaluate(expr2, {'use_system_cxx_lib', 'target=arm64-apple-macosx10.12'}))
+        self.assertFalse(BooleanExpression.evaluate(expr2, {'use_system_cxx_lib', 'target=arm64-apple-macosx10.12', 'no-exceptions'}))
+        self.assertFalse(BooleanExpression.evaluate(expr2, {'use_system_cxx_lib', 'target=arm64-apple-macosx10.15'}))
+
     def test_operators(self):
         self.assertTrue(BooleanExpression.evaluate('true || true', {}))
         self.assertTrue(BooleanExpression.evaluate('true || false', {}))
@@ -206,17 +245,17 @@ def test_errors(self):
                             "in expression: 'true and true'")
 
         self.checkException("|| true",
-                            "expected: '!' or '(' or identifier\n" +
+                            "expected: '!', '(', '{{', or identifier\n" +
                             "have: '||'\n" +
                             "in expression: '|| true'")
 
         self.checkException("true &&",
-                            "expected: '!' or '(' or identifier\n" +
+                            "expected: '!', '(', '{{', or identifier\n" +
                             "have: <end of expression>\n" +
                             "in expression: 'true &&'")
 
         self.checkException("",
-                            "expected: '!' or '(' or identifier\n" +
+                            "expected: '!', '(', '{{', or identifier\n" +
                             "have: <end of expression>\n" +
                             "in expression: ''")
 
@@ -244,9 +283,18 @@ def test_errors(self):
                             "in expression: 'true (true)'")
 
         self.checkException("( )",
-                            "expected: '!' or '(' or identifier\n" +
+                            "expected: '!', '(', '{{', or identifier\n" +
                             "have: ')'\n" +
                             "in expression: '( )'")
 
+        self.checkException("abc{{def",
+                            "couldn't parse text: '{{def'\n" +
+                            "in expression: 'abc{{def'")
+
+        self.checkException("{{}}",
+                            "couldn't parse text: '{{}}'\n" +
+                            "in expression: '{{}}'")
+
+
 if __name__ == '__main__':
     unittest.main()

diff  --git a/llvm/utils/lit/lit/Test.py b/llvm/utils/lit/lit/Test.py
index 7cc610bf56bde..45ab12a85bd5e 100644
--- a/llvm/utils/lit/lit/Test.py
+++ b/llvm/utils/lit/lit/Test.py
@@ -408,5 +408,5 @@ def getUsedFeatures(self):
             BooleanExpression.tokenize(expr) for expr in
                 boolean_expressions if expr != '*'
         )
-        identifiers = set(filter(BooleanExpression.isIdentifier, tokens))
-        return identifiers
+        matchExpressions = set(filter(BooleanExpression.isMatchExpression, tokens))
+        return matchExpressions

diff  --git a/llvm/utils/lit/tests/Inputs/show-used-features/mixed.txt b/llvm/utils/lit/tests/Inputs/show-used-features/mixed.txt
index 1de0f7442a086..309b3eaaa7698 100644
--- a/llvm/utils/lit/tests/Inputs/show-used-features/mixed.txt
+++ b/llvm/utils/lit/tests/Inputs/show-used-features/mixed.txt
@@ -1,4 +1,4 @@
 
-// REQUIRES: my-require-feature-2 || my-require-feature-3
-// UNSUPPORTED: my-unsupported-feature-2, my-unsupported-feature-3
-// XFAIL: my-xfail-feature-2, my-xfail-feature-3
+// REQUIRES: my-require-feature-2 || my-require-feature-3, my-{{[require]*}}-feature-4
+// UNSUPPORTED: my-unsupported-feature-2, my-unsupported-feature-3 && !my-{{[unsupported]*}}-feature-4
+// XFAIL: my-xfail-feature-2, my-xfail-feature-3, my-{{[xfail]*}}-feature-4

diff  --git a/llvm/utils/lit/tests/show-used-features.py b/llvm/utils/lit/tests/show-used-features.py
index 069ee08196701..b88c68faca981 100644
--- a/llvm/utils/lit/tests/show-used-features.py
+++ b/llvm/utils/lit/tests/show-used-features.py
@@ -4,3 +4,6 @@
 # CHECK: my-require-feature-1 my-require-feature-2 my-require-feature-3
 # CHECK: my-unsupported-feature-1 my-unsupported-feature-2 my-unsupported-feature-3
 # CHECK: my-xfail-feature-1 my-xfail-feature-2 my-xfail-feature-3
+# CHECK: {{my-[{][{]\[require\]\*[}][}]-feature-4}}
+# CHECK: {{my-[{][{]\[unsupported\]\*[}][}]-feature-4}}
+# CHECK: {{my-[{][{]\[xfail\]\*[}][}]-feature-4}}