[llvm] r288694 - [lit] Support custom parsers in parseIntegratedTestScript

Mon Dec 5 12:21:22 PST 2016

Author: ericwf
Date: Mon Dec  5 14:21:21 2016
New Revision: 288694

URL: http://llvm.org/viewvc/llvm-project?rev=288694&view=rev
Log:
[lit] Support custom parsers in parseIntegratedTestScript

Summary:
Libc++ frequently has the need to parse more than just the builtin *test keywords* (`RUN`, `REQUIRES`, `XFAIL`, ect). For example libc++ currently needs a new keyword `MODULES-DEFINES: macro list...`. Instead of re-implementing the script parsing in libc++ this patch allows `parseIntegratedTestScript` to take custom parsers.

This patch introduces a new class `IntegratedTestKeywordParser` which implements the logic to parse/process a test keyword. Parsing of various keyword "kinds" are supported out of the box, including 'TAG', 'COMMAND', and 'LIST', which parse keywords such as `END.`, `RUN:` and `XFAIL:` respectively.

As an example after this change libc++ can implement the `MODULES-DEFINES` simply using: 
```
mparser = IntegratedTestKeywordParser('MODULES-DEFINES:', ParserKind.LIST)
parseIntegratedTestScript(test, additional_parsers=[mparser])
macro_list = mparser.getValue()
```


Reviewers: ddunbar, modocache, rnk, danalbert, jroelofs

Subscribers: mgrang, llvm-commits, cfe-commits

Differential Revision: https://reviews.llvm.org/D27005

Added:
    llvm/trunk/utils/lit/tests/Inputs/testrunner-custom-parsers/
    llvm/trunk/utils/lit/tests/Inputs/testrunner-custom-parsers/lit.cfg
    llvm/trunk/utils/lit/tests/Inputs/testrunner-custom-parsers/test.txt
    llvm/trunk/utils/lit/tests/unit/TestRunner.py
Modified:
    llvm/trunk/utils/lit/lit/TestRunner.py

Modified: llvm/trunk/utils/lit/lit/TestRunner.py
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/utils/lit/lit/TestRunner.py?rev=288694&r1=288693&r2=288694&view=diff
==============================================================================

--- llvm/trunk/utils/lit/lit/TestRunner.py (original)
+++ llvm/trunk/utils/lit/lit/TestRunner.py Mon Dec  5 14:21:21 2016
@@ -630,7 +630,7 @@ def parseIntegratedTestScriptCommands(so
     # version.
 
     keywords_re = re.compile(
-        to_bytes("(%s)(.*)\n" % ("|".join(k for k in keywords),)))
+        to_bytes("(%s)(.*)\n" % ("|".join(re.escape(k) for k in keywords),)))
 
     f = open(source_path, 'rb')
     try:
@@ -657,7 +657,7 @@ def parseIntegratedTestScriptCommands(so
             # Python 2, to avoid other code having to differentiate between the
             # str and unicode types.
             keyword,ln = match.groups()
-            yield (line_number, to_string(keyword[:-1].decode('utf-8')),
+            yield (line_number, to_string(keyword.decode('utf-8')),
                    to_string(ln.decode('utf-8')))
     finally:
         f.close()
@@ -739,10 +739,119 @@ def applySubstitutions(script, substitut
     # convert to list before returning.
     return list(map(processLine, script))
 
-def parseIntegratedTestScript(test, require_script=True):
+
+class ParserKind(object):
+    """
+    An enumeration representing the style of an integrated test keyword or
+    command.
+
+    TAG: A keyword taking no value. Ex 'END.'
+    COMMAND: A Keyword taking a list of shell commands. Ex 'RUN:'
+    LIST: A keyword taking a comma separated list of value. Ex 'XFAIL:'
+    CUSTOM: A keyword with custom parsing semantics.
+    """
+    TAG = 0
+    COMMAND = 1
+    LIST = 2
+    CUSTOM = 3
+
+
+class IntegratedTestKeywordParser(object):
+    """A parser for LLVM/Clang style integrated test scripts.
+
+    keyword: The keyword to parse for. It must end in either '.' or ':'.
+    kind: An value of ParserKind.
+    parser: A custom parser. This value may only be specified with
+            ParserKind.CUSTOM.
+    """
+    def __init__(self, keyword, kind, parser=None, initial_value=None):
+        if not keyword.endswith('.') and not keyword.endswith(':'):
+            raise ValueError("keyword '%s' must end with either '.' or ':' "
+                             % keyword)
+        if keyword.endswith('.') and kind in \
+                [ParserKind.LIST, ParserKind.COMMAND]:
+            raise ValueError("Keyword '%s' should end in ':'" % keyword)
+
+        elif keyword.endswith(':') and kind in [ParserKind.TAG]:
+            raise ValueError("Keyword '%s' should end in '.'" % keyword)
+        if parser is not None and kind != ParserKind.CUSTOM:
+            raise ValueError("custom parsers can only be specified with "
+                             "ParserKind.CUSTOM")
+        self.keyword = keyword
+        self.kind = kind
+        self.parsed_lines = []
+        self.value = initial_value
+        self.parser = parser
+
+        if kind == ParserKind.COMMAND:
+            self.parser = self._handleCommand
+        elif kind == ParserKind.LIST:
+            self.parser = self._handleList
+        elif kind == ParserKind.TAG:
+            if not keyword.endswith('.'):
+                raise ValueError("keyword '%s' should end with '.'" % keyword)
+            self.parser = self._handleTag
+        elif kind == ParserKind.CUSTOM:
+            if parser is None:
+                raise ValueError("ParserKind.CUSTOM requires a custom parser")
+            self.parser = parser
+        else:
+            raise ValueError("Unknown kind '%s'" % kind)
+
+    def parseLine(self, line_number, line):
+        self.parsed_lines += [(line_number, line)]
+        self.value = self.parser(line_number, line, self.value)
+
+    def getValue(self):
+        return self.value
+
+    @staticmethod
+    def _handleTag(line_number, line, output):
+        """A helper for parsing TAG type keywords"""
+        return (not line.strip() or output)
+
+    @staticmethod
+    def _handleCommand(line_number, line, output):
+        """A helper for parsing COMMAND type keywords"""
+        # Trim trailing whitespace.
+        line = line.rstrip()
+        # Substitute line number expressions
+        line = re.sub('%\(line\)', str(line_number), line)
+
+        def replace_line_number(match):
+            if match.group(1) == '+':
+                return str(line_number + int(match.group(2)))
+            if match.group(1) == '-':
+                return str(line_number - int(match.group(2)))
+        line = re.sub('%\(line *([\+-]) *(\d+)\)', replace_line_number, line)
+        # Collapse lines with trailing '\\'.
+        if output and output[-1][-1] == '\\':
+            output[-1] = output[-1][:-1] + line
+        else:
+            if output is None:
+                output = []
+            output.append(line)
+        return output
+
+    @staticmethod
+    def _handleList(line_number, line, output):
+        """A parser for LIST type keywords"""
+        if output is None:
+            output = []
+        output.extend([s.strip() for s in line.split(',')])
+        return output
+
+
+def parseIntegratedTestScript(test, additional_parsers=[],
+                              require_script=True):
     """parseIntegratedTestScript - Scan an LLVM/Clang style integrated test
     script and extract the lines to 'RUN' as well as 'XFAIL' and 'REQUIRES'
-    and 'UNSUPPORTED' information. If 'require_script' is False an empty script
+    'REQUIRES-ANY' and 'UNSUPPORTED' information.
+
+    If additional parsers are specified then the test is also scanned for the
+    keywords they specify and all matches are passed to the custom parser.
+
+    If 'require_script' is False an empty script
     may be returned. This can be used for test formats where the actual script
     is optional or ignored.
     """
@@ -752,43 +861,36 @@ def parseIntegratedTestScript(test, requ
     requires = []
     requires_any = []
     unsupported = []
-    keywords = ['RUN:', 'XFAIL:', 'REQUIRES:', 'REQUIRES-ANY:',
-                'UNSUPPORTED:', 'END.']
+    builtin_parsers = [
+        IntegratedTestKeywordParser('RUN:', ParserKind.COMMAND,
+                                    initial_value=script),
+        IntegratedTestKeywordParser('XFAIL:', ParserKind.LIST,
+                                    initial_value=test.xfails),
+        IntegratedTestKeywordParser('REQUIRES:', ParserKind.LIST,
+                                    initial_value=requires),
+        IntegratedTestKeywordParser('REQUIRES-ANY:', ParserKind.LIST,
+                                    initial_value=requires_any),
+        IntegratedTestKeywordParser('UNSUPPORTED:', ParserKind.LIST,
+                                    initial_value=unsupported),
+        IntegratedTestKeywordParser('END.', ParserKind.TAG)
+    ]
+    keyword_parsers = {p.keyword: p for p in builtin_parsers}
+    for parser in additional_parsers:
+        if not isinstance(parser, IntegratedTestKeywordParser):
+            raise ValueError('additional parser must be an instance of '
+                             'IntegratedTestKeywordParser')
+        if parser.keyword in keyword_parsers:
+            raise ValueError("Parser for keyword '%s' already exists"
+                             % parser.keyword)
+        keyword_parsers[parser.keyword] = parser
+
     for line_number, command_type, ln in \
-            parseIntegratedTestScriptCommands(sourcepath, keywords):
-        if command_type == 'RUN':
-            # Trim trailing whitespace.
-            ln = ln.rstrip()
-
-            # Substitute line number expressions
-            ln = re.sub('%\(line\)', str(line_number), ln)
-            def replace_line_number(match):
-                if match.group(1) == '+':
-                    return str(line_number + int(match.group(2)))
-                if match.group(1) == '-':
-                    return str(line_number - int(match.group(2)))
-            ln = re.sub('%\(line *([\+-]) *(\d+)\)', replace_line_number, ln)
-
-            # Collapse lines with trailing '\\'.
-            if script and script[-1][-1] == '\\':
-                script[-1] = script[-1][:-1] + ln
-            else:
-                script.append(ln)
-        elif command_type == 'XFAIL':
-            test.xfails.extend([s.strip() for s in ln.split(',')])
-        elif command_type == 'REQUIRES':
-            requires.extend([s.strip() for s in ln.split(',')])
-        elif command_type == 'REQUIRES-ANY':
-            requires_any.extend([s.strip() for s in ln.split(',')])
-        elif command_type == 'UNSUPPORTED':
-            unsupported.extend([s.strip() for s in ln.split(',')])
-        elif command_type == 'END':
-            # END commands are only honored if the rest of the line is empty.
-            if not ln.strip():
-                break
-        else:
-            raise ValueError("unknown script command type: %r" % (
-                    command_type,))
+            parseIntegratedTestScriptCommands(sourcepath,
+                                              keyword_parsers.keys()):
+        parser = keyword_parsers[command_type]
+        parser.parseLine(line_number, ln)
+        if command_type == 'END.' and parser.getValue() is True:
+            break
 
     # Verify the script contains a run line.
     if require_script and not script:
@@ -805,26 +907,30 @@ def parseIntegratedTestScript(test, requ
     if missing_required_features:
         msg = ', '.join(missing_required_features)
         return lit.Test.Result(Test.UNSUPPORTED,
-                               "Test requires the following features: %s" % msg)
+                               "Test requires the following features: %s"
+                               % msg)
     requires_any_features = [f for f in requires_any
                              if f in test.config.available_features]
     if requires_any and not requires_any_features:
         msg = ' ,'.join(requires_any)
         return lit.Test.Result(Test.UNSUPPORTED,
-            "Test requires any of the following features: %s" % msg)
+                               "Test requires any of the following features: "
+                               "%s" % msg)
     unsupported_features = [f for f in unsupported
                             if f in test.config.available_features]
     if unsupported_features:
         msg = ', '.join(unsupported_features)
-        return lit.Test.Result(Test.UNSUPPORTED,
-                    "Test is unsupported with the following features: %s" % msg)
+        return lit.Test.Result(
+            Test.UNSUPPORTED,
+            "Test is unsupported with the following features: %s" % msg)
 
     unsupported_targets = [f for f in unsupported
                            if f in test.suite.config.target_triple]
     if unsupported_targets:
-      return lit.Test.Result(Test.UNSUPPORTED,
-                  "Test is unsupported with the following triple: %s" % (
-                      test.suite.config.target_triple,))
+        return lit.Test.Result(
+            Test.UNSUPPORTED,
+            "Test is unsupported with the following triple: %s" % (
+             test.suite.config.target_triple,))
 
     if test.config.limit_to_features:
         # Check that we have one of the limit_to_features features in requires.
@@ -832,11 +938,12 @@ def parseIntegratedTestScript(test, requ
                                    if f in requires]
         if not limit_to_features_tests:
             msg = ', '.join(test.config.limit_to_features)
-            return lit.Test.Result(Test.UNSUPPORTED,
-                 "Test requires one of the limit_to_features features %s" % msg)
-
+            return lit.Test.Result(
+                Test.UNSUPPORTED,
+                "Test requires one of the limit_to_features features %s" % msg)
     return script
 
+
 def _runShTest(test, litConfig, useExternalSh, script, tmpBase):
     # Create the output directory if it does not already exist.
     lit.util.mkdir_p(os.path.dirname(tmpBase))

Added: llvm/trunk/utils/lit/tests/Inputs/testrunner-custom-parsers/lit.cfg
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/utils/lit/tests/Inputs/testrunner-custom-parsers/lit.cfg?rev=288694&view=auto
==============================================================================
--- llvm/trunk/utils/lit/tests/Inputs/testrunner-custom-parsers/lit.cfg (added)
+++ llvm/trunk/utils/lit/tests/Inputs/testrunner-custom-parsers/lit.cfg Mon Dec  5 14:21:21 2016
@@ -0,0 +1,14 @@
+import lit.formats
+import os
+import lit.Test
+
+class TestParserFormat(lit.formats.FileBasedTest):
+  def execute(self, test, lit_config):
+      return lit.Test.PASS, ''
+
+config.name = 'custom-parsers'
+config.suffixes = ['.txt']
+config.test_format = TestParserFormat()
+config.test_source_root = None
+config.test_exec_root = None
+config.target_triple = 'x86_64-unknown-unknown'

Added: llvm/trunk/utils/lit/tests/Inputs/testrunner-custom-parsers/test.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/utils/lit/tests/Inputs/testrunner-custom-parsers/test.txt?rev=288694&view=auto
==============================================================================
--- llvm/trunk/utils/lit/tests/Inputs/testrunner-custom-parsers/test.txt (added)
+++ llvm/trunk/utils/lit/tests/Inputs/testrunner-custom-parsers/test.txt Mon Dec  5 14:21:21 2016
@@ -0,0 +1,13 @@
+
+// MY_TAG.
+// foo bar baz
+// MY_RUN: baz
+// MY_LIST: one, two
+// MY_LIST: three, four
+// MY_RUN: foo \
+// MY_RUN: bar
+//
+// MY_CUSTOM: a b c
+//
+// END.
+// MY_LIST: five

Added: llvm/trunk/utils/lit/tests/unit/TestRunner.py
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/utils/lit/tests/unit/TestRunner.py?rev=288694&view=auto
==============================================================================
--- llvm/trunk/utils/lit/tests/unit/TestRunner.py (added)
+++ llvm/trunk/utils/lit/tests/unit/TestRunner.py Mon Dec  5 14:21:21 2016
@@ -0,0 +1,114 @@
+# RUN: %{python} %s
+#
+# END.
+
+
+import unittest
+import platform
+import os.path
+import tempfile
+
+import lit
+from lit.TestRunner import ParserKind, IntegratedTestKeywordParser, \
+                           parseIntegratedTestScript
+
+
+class TestIntegratedTestKeywordParser(unittest.TestCase):
+    inputTestCase = None
+
+    @staticmethod
+    def load_keyword_parser_lit_tests():
+        """
+        Create and load the LIT test suite and test objects used by
+        TestIntegratedTestKeywordParser
+        """
+        # Create the global config object.
+        lit_config = lit.LitConfig.LitConfig(progname='lit',
+                                             path=[],
+                                             quiet=False,
+                                             useValgrind=False,
+                                             valgrindLeakCheck=False,
+                                             valgrindArgs=[],
+                                             noExecute=False,
+                                             debug=False,
+                                             isWindows=(
+                                               platform.system() == 'Windows'),
+                                             params={})
+        TestIntegratedTestKeywordParser.litConfig = lit_config
+        # Perform test discovery.
+        test_path = os.path.dirname(os.path.dirname(__file__))
+        inputs = [os.path.join(test_path, 'Inputs/testrunner-custom-parsers/')]
+        assert os.path.isdir(inputs[0])
+        run = lit.run.Run(lit_config,
+                          lit.discovery.find_tests_for_inputs(lit_config, inputs))
+        assert len(run.tests) == 1 and "there should only be one test"
+        TestIntegratedTestKeywordParser.inputTestCase = run.tests[0]
+
+    @staticmethod
+    def make_parsers():
+        def custom_parse(line_number, line, output):
+            if output is None:
+                output = []
+            output += [part for part in line.split(' ') if part.strip()]
+            return output
+
+        return [
+            IntegratedTestKeywordParser("MY_TAG.", ParserKind.TAG),
+            IntegratedTestKeywordParser("MY_DNE_TAG.", ParserKind.TAG),
+            IntegratedTestKeywordParser("MY_LIST:", ParserKind.LIST),
+            IntegratedTestKeywordParser("MY_RUN:", ParserKind.COMMAND),
+            IntegratedTestKeywordParser("MY_CUSTOM:", ParserKind.CUSTOM,
+                                        custom_parse)
+        ]
+
+    @staticmethod
+    def get_parser(parser_list, keyword):
+        for p in parser_list:
+            if p.keyword == keyword:
+                return p
+        assert False and "parser not found"
+
+    @staticmethod
+    def parse_test(parser_list):
+        script = parseIntegratedTestScript(
+            TestIntegratedTestKeywordParser.inputTestCase,
+            additional_parsers=parser_list, require_script=False)
+        assert not isinstance(script, lit.Test.Result)
+        assert isinstance(script, list)
+        assert len(script) == 0
+
+    def test_tags(self):
+        parsers = self.make_parsers()
+        self.parse_test(parsers)
+        tag_parser = self.get_parser(parsers, 'MY_TAG.')
+        dne_tag_parser = self.get_parser(parsers, 'MY_DNE_TAG.')
+        self.assertTrue(tag_parser.getValue())
+        self.assertFalse(dne_tag_parser.getValue())
+
+    def test_lists(self):
+        parsers = self.make_parsers()
+        self.parse_test(parsers)
+        list_parser = self.get_parser(parsers, 'MY_LIST:')
+        self.assertItemsEqual(list_parser.getValue(),
+                              ['one', 'two', 'three', 'four'])
+
+    def test_commands(self):
+        parsers = self.make_parsers()
+        self.parse_test(parsers)
+        cmd_parser = self.get_parser(parsers, 'MY_RUN:')
+        value = cmd_parser.getValue()
+        self.assertEqual(len(value), 2)  # there are only two run lines
+        self.assertEqual(value[0].strip(), 'baz')
+        self.assertEqual(value[1].strip(), 'foo  bar')
+
+    def test_custom(self):
+        parsers = self.make_parsers()
+        self.parse_test(parsers)
+        custom_parser = self.get_parser(parsers, 'MY_CUSTOM:')
+        value = custom_parser.getValue()
+        self.assertItemsEqual(value, ['a', 'b', 'c'])
+
+
+if __name__ == '__main__':
+    TestIntegratedTestKeywordParser.load_keyword_parser_lit_tests()
+    unittest.main(verbosity=2)