[llvm] 92787e3 - [LLVM][TableGen] Support combined cells in jupyter kernel

David Spickett via llvm-commits llvm-commits at lists.llvm.org
Mon Jan 23 06:19:42 PST 2023


Author: David Spickett
Date: 2023-01-23T14:19:36Z
New Revision: 92787e3e3409890001171bdd01ced86ccdf4b77d

URL: https://github.com/llvm/llvm-project/commit/92787e3e3409890001171bdd01ced86ccdf4b77d
DIFF: https://github.com/llvm/llvm-project/commit/92787e3e3409890001171bdd01ced86ccdf4b77d.diff

LOG: [LLVM][TableGen] Support combined cells in jupyter kernel

This changes the default mode to cache the code blocks we're
asked to compile until we see the new `%reset` magic to clear that cache.

This means that if you run several cells in sequence, at the end you're
compiling the code from all the cells at once.

This emulates what the ipython kernel does where it uses a persistent
interpreter state by default.

`%reset` will only be acted on when it's in the cell we're asked to run
(the newest code).

`%args` we will use the most recent value we have cached.

The example notebook has been updated to explain that.

Depends on D132378

Reviewed By: jpienaar

Differential Revision: https://reviews.llvm.org/D132646

Added: 
    

Modified: 
    llvm/utils/TableGen/jupyter/LLVM_TableGen.ipynb
    llvm/utils/TableGen/jupyter/LLVM_TableGen.md
    llvm/utils/TableGen/jupyter/tablegen_kernel/kernel.py

Removed: 
    


################################################################################
diff  --git a/llvm/utils/TableGen/jupyter/LLVM_TableGen.ipynb b/llvm/utils/TableGen/jupyter/LLVM_TableGen.ipynb
index 82e5282637868..058d9a5d38239 100644
--- a/llvm/utils/TableGen/jupyter/LLVM_TableGen.ipynb
+++ b/llvm/utils/TableGen/jupyter/LLVM_TableGen.ipynb
@@ -34,6 +34,7 @@
     }
    ],
    "source": [
+    "%reset\n",
     "// This is some tablegen\n",
     "class Foo {}"
    ]
@@ -65,6 +66,7 @@
     }
    ],
    "source": [
+    "%reset\n",
     "This is not tablegen."
    ]
   },
@@ -96,6 +98,7 @@
     }
    ],
    "source": [
+    "%reset\n",
     "class Stuff {}\n",
     "def thing : Stuff {}"
    ]
@@ -105,13 +108,50 @@
    "id": "3f29d1a0",
    "metadata": {},
    "source": [
-    "Currently cells are not connected. Meaning that this next cell doesn't have the class from the previous one."
+    "By default cells are connected. Meaning that we cache the code and magic directives from the previously run cells.\n",
+    "\n",
+    "This means that the next cell still sees the `Stuff` class."
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 4,
    "id": "3a204c70",
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "------------- Classes -----------------\n",
+      "class Stuff {\n",
+      "}\n",
+      "------------- Defs -----------------\n",
+      "def other_thing {\t// Stuff\n",
+      "}\n",
+      "def thing {\t// Stuff\n",
+      "}\n"
+     ]
+    }
+   ],
+   "source": [
+    "def other_thing : Stuff {}"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "473b0a1c",
+   "metadata": {},
+   "source": [
+    "You can use the magic `%reset` to clear this cache and start fresh."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "1f674a03",
    "metadata": {},
    "outputs": [
     {
@@ -125,6 +165,7 @@
     }
    ],
    "source": [
+    "%reset\n",
     "def other_thing : Stuff {}"
    ]
   },
@@ -140,7 +181,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 6,
    "id": "2586893b",
    "metadata": {},
    "outputs": [
@@ -155,6 +196,7 @@
     }
    ],
    "source": [
+    "%reset\n",
     "class Thing <int A, int B> {\n",
     "    int num = A;\n",
     "}"
@@ -170,7 +212,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 7,
    "id": "ae078bc4",
    "metadata": {},
    "outputs": [
@@ -187,10 +229,7 @@
     }
    ],
    "source": [
-    "%args --no-warn-on-unused-template-args\n",
-    "class Thing <int A, int B> {\n",
-    "    int num = A;\n",
-    "}"
+    "%args --no-warn-on-unused-template-args"
    ]
   },
   {
@@ -198,14 +237,36 @@
    "id": "316b9543",
    "metadata": {},
    "source": [
-    "The last `%args` in a cell will be the arguments used."
+    "If you have a run of cells without a `%reset`, the most recent `%args` is used."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 8,
    "id": "9634170c",
    "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "------------- Classes -----------------\n",
+      "class Thing<int Thing:A = ?, int Thing:B = ?> {\n",
+      "  int num = Thing:A;\n",
+      "}\n",
+      "------------- Defs -----------------\n"
+     ]
+    }
+   ],
+   "source": [
+    "// This passes --no-warn-on-unused-template-args"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "fa15b542",
+   "metadata": {},
    "outputs": [
     {
      "name": "stderr",
@@ -218,20 +279,43 @@
     }
    ],
    "source": [
-    "%args --no-warn-on-unused-template-args\n",
     "%args\n",
-    "class Thing <int A, int B> {\n",
-    "    int num = A;\n",
-    "}"
+    "// Now we're not passing the argument so the warning comes back."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e112f1d4",
+   "metadata": {},
+   "source": [
+    "If there are many `%args` in a cell, the last one is used."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "id": "29bd004b",
+   "execution_count": 10,
+   "id": "2ac46c7a",
    "metadata": {},
-   "outputs": [],
-   "source": []
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "<stdin>:1:18: warning: unused template argument: Thing:A\n",
+      "class Thing <int A, int B> {}\n",
+      "                 ^\n",
+      "<stdin>:1:25: warning: unused template argument: Thing:B\n",
+      "class Thing <int A, int B> {}\n",
+      "                        ^\n"
+     ]
+    }
+   ],
+   "source": [
+    "%reset\n",
+    "%args --no-warn-on-unused-template-args\n",
+    "%args\n",
+    "class Thing <int A, int B> {}"
+   ]
   }
  ],
  "metadata": {

diff  --git a/llvm/utils/TableGen/jupyter/LLVM_TableGen.md b/llvm/utils/TableGen/jupyter/LLVM_TableGen.md
index 52a6a1de66e96..a8a6f7cfb9895 100644
--- a/llvm/utils/TableGen/jupyter/LLVM_TableGen.md
+++ b/llvm/utils/TableGen/jupyter/LLVM_TableGen.md
@@ -4,6 +4,7 @@ This notebook is running `llvm-tblgen`.
 
 
 ```tablegen
+%reset
 // This is some tablegen
 class Foo {}
 ```
@@ -18,6 +19,7 @@ Errors printed to stderr are shown.
 
 
 ```tablegen
+%reset
 This is not tablegen.
 ```
 
@@ -30,6 +32,7 @@ Add some classes to get some output.
 
 
 ```tablegen
+%reset
 class Stuff {}
 def thing : Stuff {}
 ```
@@ -42,11 +45,31 @@ def thing : Stuff {}
     }
 
 
-Currently cells are not connected. Meaning that this next cell doesn't have the class from the previous one.
+By default cells are connected. Meaning that we cache the code and magic directives from the previously run cells.
+
+This means that the next cell still sees the `Stuff` class.
 
 
 ```tablegen
 def other_thing : Stuff {}
+```
+
+    ------------- Classes -----------------
+    class Stuff {
+    }
+    ------------- Defs -----------------
+    def other_thing {	// Stuff
+    }
+    def thing {	// Stuff
+    }
+
+
+You can use the magic `%reset` to clear this cache and start fresh.
+
+
+```tablegen
+%reset
+def other_thing : Stuff {}
 ```
 
     <stdin>:1:19: error: Couldn't find class 'Stuff'
@@ -60,6 +83,7 @@ For example, here we have some code that shows a warning.
 
 
 ```tablegen
+%reset
 class Thing <int A, int B> {
     int num = A;
 }
@@ -75,9 +99,6 @@ We can pass an argument to ignore that warning.
 
 ```tablegen
 %args --no-warn-on-unused-template-args
-class Thing <int A, int B> {
-    int num = A;
-}
 ```
 
     ------------- Classes -----------------
@@ -87,15 +108,24 @@ class Thing <int A, int B> {
     ------------- Defs -----------------
 
 
-The last `%args` in a cell will be the arguments used.
+If you have a run of cells without a `%reset`, the most recent `%args` is used.
+
+
+```tablegen
+// This passes --no-warn-on-unused-template-args
+```
+
+    ------------- Classes -----------------
+    class Thing<int Thing:A = ?, int Thing:B = ?> {
+      int num = Thing:A;
+    }
+    ------------- Defs -----------------
+
 
 
 ```tablegen
-%args --no-warn-on-unused-template-args
 %args
-class Thing <int A, int B> {
-    int num = A;
-}
+// Now we're not passing the argument so the warning comes back.
 ```
 
     <stdin>:1:25: warning: unused template argument: Thing:B
@@ -103,7 +133,20 @@ class Thing <int A, int B> {
                             ^
 
 
+If there are many `%args` in a cell, the last one is used.
 
-```tablegen
 
+```tablegen
+%reset
+%args --no-warn-on-unused-template-args
+%args
+class Thing <int A, int B> {}
 ```
+
+    <stdin>:1:18: warning: unused template argument: Thing:A
+    class Thing <int A, int B> {}
+                     ^
+    <stdin>:1:25: warning: unused template argument: Thing:B
+    class Thing <int A, int B> {}
+                            ^
+

diff  --git a/llvm/utils/TableGen/jupyter/tablegen_kernel/kernel.py b/llvm/utils/TableGen/jupyter/tablegen_kernel/kernel.py
index 21d04c8f59294..074d58a4caaca 100644
--- a/llvm/utils/TableGen/jupyter/tablegen_kernel/kernel.py
+++ b/llvm/utils/TableGen/jupyter/tablegen_kernel/kernel.py
@@ -17,15 +17,17 @@ class TableGenKernel(Kernel):
     All input is treated as TableGen unless the first non whitespace character
     is "%" in which case it is a "magic" line.
 
-    The only magic line supported is "%args". The rest of the line is the
-    arguments passed to llvm-tblgen.
+    The supported magic is:
+    * %args - to set the arguments passed to llvm-tblgen.
+    * %reset - to reset the cached code and magic state.
 
-    This is "cell magic" meaning it applies to the whole cell. Therefore
+    These are "cell magic" meaning it applies to the whole cell. Therefore
     it must be the first line, or part of a run of magic lines starting
     from the first line.
 
     ```tablgen
     %args
+    %reset
     %args --print-records --print-detailed-records
     class Stuff {
       string Name;
@@ -51,6 +53,12 @@ def a_thing : Stuff {}
     def __init__(self, **kwargs):
         Kernel.__init__(self, **kwargs)
         self._executable = None
+        # A list of (code, magic) tuples.
+        # All the previous cell's code we have run since the last reset.
+        # This emulates a persistent state like a Python interpreter would have.
+        self._previous_code = ""
+        # The most recent set of magic since the last reset.
+        self._previous_magic = {}
 
     @property
     def banner(self):
@@ -73,32 +81,35 @@ def executable(self):
         return self._executable
 
     def get_magic(self, code):
-        """Given a block of code remove the magic lines from it and return
-        a tuple of the code lines (newline joined) and a list of magic lines
-        with their leading spaces removed.
+        """Given a block of code remove the magic lines from it.
+        Returns a tuple of newline joined code lines and a dictionary of magic.
+        Where the key is the magic name (minus the %) and the values are lists
+        of the arguments to the magic.
 
         Currently we only look for "cell magic" which must be at the start of
         the cell. Meaning the first line, or a set of lines beginning with %
         that come before the first non-magic line.
 
         >>> k.get_magic("")
-        ('', [])
+        ('', {})
         >>> k.get_magic("Hello World.\\nHello again.")
-        ('Hello World.\\nHello again.', [])
+        ('Hello World.\\nHello again.', {})
         >>> k.get_magic("   %foo a b c")
-        ('', ['%foo a b c'])
-        >>> k.get_magic("   %foo a b c\\n%foo\\nFoo")
-        ('Foo', ['%foo a b c', '%foo'])
+        ('', {'foo': ['a', 'b', 'c']})
+        >>> k.get_magic("%foo\\n   %foo a b c\\nFoo")
+        ('Foo', {'foo': ['a', 'b', 'c']})
+        >>> k.get_magic("%foo\\n%bar\\nFoo")
+        ('Foo', {'foo': [], 'bar': []})
         >>> k.get_magic("Foo\\n%foo\\nFoo")
-        ('Foo\\n%foo\\nFoo', [])
-        >>> k.get_magic("%foo\\n   Foo\\n%foo")
-        ('   Foo\\n%foo', ['%foo'])
-        >>> k.get_magic("%foo\\n\\n%foo")
-        ('\\n%foo', ['%foo'])
-        >>> k.get_magic("%foo\\n \\n%foo")
-        (' \\n%foo', ['%foo'])
+        ('Foo\\n%foo\\nFoo', {})
+        >>> k.get_magic("%bar\\n\\n%foo")
+        ('\\n%foo', {'bar': []})
+        >>> k.get_magic("%foo a b\\n   Foo\\n%foo c d")
+        ('   Foo\\n%foo c d', {'foo': ['a', 'b']})
+        >>> k.get_magic("%foo a b\\n \\n%foo c d")
+        (' \\n%foo c d', {'foo': ['a', 'b']})
         """
-        magic_lines = []
+        magic = {}
         code_lines = []
 
         lines = code.splitlines()
@@ -106,30 +117,57 @@ def get_magic(self, code):
             line = lines.pop(0)
             possible_magic = line.lstrip()
             if possible_magic.startswith("%"):
-                magic_lines.append(possible_magic)
+                magic_parts = possible_magic.split()
+                # Key has the % removed
+                magic[magic_parts[0][1:]] = magic_parts[1:]
             else:
                 code_lines = [line, *lines]
                 break
 
-        return "\n".join(code_lines), magic_lines
+        return "\n".join(code_lines), magic
+
+    def get_code_and_args(self, new_code):
+        """Get the code that do_execute should use, taking into account
+        the code from any cached cells.
+
+        Returns the code to compile and the arguments to use to do so.
+
+        >>> k._previous_code = ""
+        >>> k._previous_magic = {}
+        >>> k.get_code_and_args("")
+        ('', [])
+        >>> k.get_code_and_args("%args 1\\nSome code")
+        ('Some code', ['1'])
+        >>> k.get_code_and_args("%args 2\\nSome more code")
+        ('Some code\\nSome more code', ['2'])
+        >>> k.get_code_and_args("%reset\\n%args 3 4\\nSome new code")
+        ('Some new code', ['3', '4'])
+        >>> k.get_code_and_args("%reset\\nSome new code")
+        ('Some new code', [])
+        """
+        new_code, new_magic = self.get_magic(new_code)
+
+        # Only a reset in the newest cell actually does a reset.
+        if new_magic.get("reset") is not None:
+            self._previous_code = new_code
+            self._previous_magic = new_magic
+        else:
+            self._previous_code += ("\n" if self._previous_code else "") + new_code
+            self._previous_magic.update(new_magic)
+
+        return self._previous_code, self._previous_magic.get("args", [])
 
     def do_execute(
         self, code, silent, store_history=True, user_expressions=None, allow_stdin=False
     ):
         """Execute user code using llvm-tblgen binary."""
-        code, magic = self.get_magic(code)
-
-        extra_args = []
-        for m in magic:
-            if m.startswith("%args"):
-                # Last one in wins
-                extra_args = m.split()[1:]
+        all_code, args = self.get_code_and_args(code)
 
         with tempfile.TemporaryFile("w+") as f:
-            f.write(code)
+            f.write(all_code)
             f.seek(0)
             got = subprocess.run(
-                [self.executable, *extra_args],
+                [self.executable, *args],
                 stdin=f,
                 stderr=subprocess.PIPE,
                 stdout=subprocess.PIPE,


        


More information about the llvm-commits mailing list