refactor and add documentation.

jbweston · jbweston · commit 9e9cf800aa6b · 2018-10-24T13:56:13.000+02:00
diff --git a/jupyter_sphinx/execute.py b/jupyter_sphinx/execute.py
@@ -29,46 +29,28 @@
 logger = logging.getLogger(__name__)
 
 
-def blank_nb(kernel_name):
-    try:
-        spec = get_kernel_spec(kernel_name)
-    except NoSuchKernel as e:
-        raise ExtensionError('Unable to find kernel', orig_exc=e)
-    return nbformat.v4.new_notebook(metadata={
-        'kernelspec': {
-            'display_name': spec.display_name,
-            'language': spec.language,
-            'name': kernel_name,
-        }
-    })
-
-
-def split_on(pred, it):
-    """Split an iterator wherever a predicate is True."""
-
-    counter = 0
-
-    def count(x):
-        nonlocal counter
-        if pred(x):
-            counter += 1
-        return counter
-
-    # Return iterable of lists to ensure that we don't lose our
-    # place in the iterator
-    return (list(x) for _, x in groupby(it, count))
-
-
-class Cell(docutils.nodes.container):
-    """Container for input/output from Jupyter kernel"""
-    pass
-
-class KernelNode(docutils.nodes.Element):
-    """Dummy node for signaling a new kernel"""
-    pass
-
+### Directives and their associated doctree nodes
 
 class JupyterKernel(Directive):
+    """Specify a new Jupyter Kernel.
+
+    Arguments
+    ---------
+    kernel_name : str (optional)
+        The name of the kernel in which to execute future Jupyter cells, as
+        reported by executing 'jupyter kernelspec list' on the command line.
+
+    Options
+    -------
+    id : str
+        An identifier for *this kernel instance*. Used to name any output
+        files generated when executing the Jupyter cells (e.g. images
+        produced by cells, or a script containing the cell inputs).
+
+    Content
+    -------
+    None
+    """
 
     optional_arguments = 1
     final_argument_whitespace = False
@@ -79,15 +61,53 @@ class JupyterKernel(Directive):
     }
 
     def run(self):
-        kernel_name = self.arguments[0] if self.arguments else ''
-        return [KernelNode(
+        return [JupyterKernelNode(
+            kernel_name=self.arguments[0] if self.arguments else '',
+            kernel_id=self.options.get('id', ''),
+        )]
+
+
+class JupyterKernelNode(docutils.nodes.Element):
+    """Inserted into doctree whenever a JupyterKernel directive is encountered.
+
+    Used as a marker to signal that the following JupyterCellNodes (until the
+    next, if any, JupyterKernelNode) should be executed in a separate kernel.
+    """
+
+    def __init__(self, kernel_name, kernel_id):
+        super().__init__(
             '',
             kernel_name=kernel_name.strip(),
-            kernel_id=self.options.get('id', '').strip(),
-        )]
+            kernel_id=kernel_id.strip(),
+        )
 
 
 class JupyterCell(Directive):
+    """Define a code cell to be later executed in a Jupyter kernel.
+
+    The content of the directive is the code to execute. Code is not
+    executed when the directive is parsed, but later during a doctree
+    transformation.
+
+    Arguments
+    ---------
+    filename : str (optional)
+        If provided, a path to a file containing code.
+
+    Options
+    -------
+    hide-code : bool
+        If provided, the code will not be displayed in the output.
+    hide-output : bool
+        If provided, the cell output will not be displayed in the output.
+    code-below : bool
+        If provided, the code will be shown below the cell output.
+
+    Content
+    -------
+    code : str
+        A code cell.
+    """
 
     required_arguments = 0
     optional_arguments = 1
@@ -122,17 +142,98 @@ def run(self):
             self.assert_has_content()
             content = self.content
 
-        # Cell only contains the input for now; we will execute the cell
-        # and insert the output when the whole document has been parsed.
-        return [Cell('',
+        return [JupyterCellNode(content, self.options)]
+
+
+class JupyterCellNode(docutils.nodes.container):
+    """Inserted into doctree whever a JupyterKernel directive is encountered.
+
+    Used as a marker to signal that the following JupyterCellNodes (until the
+    next, if any, JupyterKernelNode) should be executed in a separate kernel.
+    """
+
+    def __init__(self, source_lines, options):
+        return super().__init__(
+            '',
             docutils.nodes.literal_block(
-                text='\n'.join(content),
+                text='\n'.join(source_lines),
             ),
-            hide_code=('hide-code' in self.options),
-            hide_output=('hide-output' in self.options),
-            code_below=('code-below' in self.options),
-        )]
+            hide_code=('hide-code' in options),
+            hide_output=('hide-output' in options),
+            code_below=('code-below' in options),
+        )
+
+
+### Doctree transformations
 
+class ExecuteJupyterCells(SphinxTransform):
+    """Execute code cells in Jupyter kernels.
+
+   Traverses the doctree to find JupyterKernel and JupyterCell nodes,
+   then executes the code in the JupyterCell nodes in sequence, starting
+   a new kernel every time a JupyterKernel node is encountered. The output
+   from each code cell is inserted into the doctree.
+   """
+    default_priority = 180  # An early transform, idk
+
+    def apply(self):
+        doctree = self.document
+        doc_relpath = os.path.dirname(self.env.docname)  # relative to src dir
+        docname = os.path.basename(self.env.docname)
+        default_kernel = self.config.jupyter_execute_default_kernel
+        default_names = default_notebook_names(docname)
+
+        # Check if we have anything to execute.
+        if not doctree.traverse(JupyterCellNode):
+            return
+
+        logger.info('executing {}'.format(docname))
+        output_dir = os.path.join(output_directory(self.env), doc_relpath)
+
+        # Start new notebook whenever a JupyterKernelNode is encountered
+        jupyter_nodes = (JupyterCellNode, JupyterKernelNode)
+        nodes_by_notebook = split_on(
+            lambda n: isinstance(n, JupyterKernelNode),
+            doctree.traverse(lambda n: isinstance(n, jupyter_nodes))
+        )
+
+        for first, *nodes in nodes_by_notebook:
+            if isinstance(first, JupyterKernelNode):
+                kernel_name = first['kernel_name'] or default_kernel
+                file_name = first['kernel_id'] or next(default_names)
+            else:
+                nodes = (first, *nodes)
+                kernel_name = default_kernel
+                file_name = next(default_names)
+
+            notebook = execute_cells(
+                kernel_name,
+                [nbformat.v4.new_code_cell(node.astext()) for node in nodes],
+                self.config.jupyter_execute_kwargs,
+            )
+
+            # Highlight the code cells now that we know what language they are
+            for node in nodes:
+                source = node.children[0]
+                lexer = notebook.metadata.language_info.pygments_lexer
+                source.attributes['language'] = lexer
+
+            # Write certain cell outputs (e.g. images) to separate files, and
+            # modify the metadata of the associated cells in 'notebook' to
+            # include the path to the output file.
+            write_notebook_output(notebook, output_dir, file_name)
+
+            # Add doctree nodes for cell outputs.
+            for node, cell in zip(nodes, notebook.cells):
+                output_nodes = cell_output_to_nodes(
+                    cell,
+                    self.config.jupyter_execute_data_priority,
+                    sphinx_abs_dir(self.env)
+                )
+                attach_outputs(output_nodes, node)
+
+
+### Roles
 
 def jupyter_download_role(name, rawtext, text, lineno, inliner):
     _, filetype = name.split(':')
@@ -147,11 +248,43 @@ def jupyter_download_role(name, rawtext, text, lineno, inliner):
     return [node], []
 
 
+### Utilities
+
+def blank_nb(kernel_name):
+    try:
+        spec = get_kernel_spec(kernel_name)
+    except NoSuchKernel as e:
+        raise ExtensionError('Unable to find kernel', orig_exc=e)
+    return nbformat.v4.new_notebook(metadata={
+        'kernelspec': {
+            'display_name': spec.display_name,
+            'language': spec.language,
+            'name': kernel_name,
+        }
+    })
+
+
+def split_on(pred, it):
+    """Split an iterator wherever a predicate is True."""
+
+    counter = 0
+
+    def count(x):
+        nonlocal counter
+        if pred(x):
+            counter += 1
+        return counter
+
+    # Return iterable of lists to ensure that we don't lose our
+    # place in the iterator
+    return (list(x) for _, x in groupby(it, count))
+
+
 def cell_output_to_nodes(cell, data_priority, dir):
     """Convert a jupyter cell with outputs and filenames to doctree nodes.
 
     Parameters
-    ==========
+    ----------
     cell : jupyter cell
     data_priority : list of mime types
         Which media types to prioritize.
@@ -304,63 +437,6 @@ def sphinx_abs_dir(env):
     )
 
 
-class ExecuteJupyterCells(SphinxTransform):
-    default_priority = 180  # An early transform, idk
-
-    def apply(self):
-        doctree = self.document
-        doc_relpath = os.path.dirname(self.env.docname)  # relative to src dir
-        docname = os.path.basename(self.env.docname)
-        default_kernel = self.config.jupyter_execute_default_kernel
-        default_names = default_notebook_names(docname)
-
-        # Check if we have anything to execute.
-        if not doctree.traverse(Cell):
-            return
-
-        logger.info('executing {}'.format(docname))
-        output_dir = os.path.join(output_directory(self.env), doc_relpath)
-
-        # Start new notebook whenever a KernelNode is encountered
-        nodes_by_notebook = split_on(
-            lambda n: isinstance(n, KernelNode),
-            doctree.traverse(lambda n: isinstance(n, (Cell, KernelNode)))
-        )
-
-        for first, *nodes in nodes_by_notebook:
-            if isinstance(first, KernelNode):
-                kernel_name = first['kernel_name'] or default_kernel
-                file_name = first['kernel_id'] or next(default_names)
-            else:
-                nodes = (first, *nodes)
-                kernel_name = default_kernel
-                file_name = next(default_names)
-
-            notebook = execute_cells(
-                kernel_name,
-                [nbformat.v4.new_code_cell(node.astext()) for node in nodes],
-                self.config.jupyter_execute_kwargs,
-            )
-
-            for node in nodes:
-                source = node.children[0]
-                lexer = notebook.metadata.language_info.pygments_lexer
-                source.attributes['language'] = lexer
-
-            # Modifies 'notebook' in-place, adding metadata specifying the
-            # filenames of the saved outputs.
-            write_notebook_output(notebook, output_dir, file_name)
-            # Add doctree nodes for cell output; images reference the filenames
-            # we just wrote to; sphinx copies these when writing outputs.
-            for node, cell in zip(nodes, notebook.cells):
-                output_nodes = cell_output_to_nodes(
-                    cell,
-                    self.config.jupyter_execute_data_priority,
-                    sphinx_abs_dir(self.env)
-                )
-                attach_outputs(output_nodes, node)
-
-
 def setup(app):
     # Configuration
     app.add_config_value(
@@ -386,13 +462,13 @@ def setup(app):
         'env',
     )
 
-    # KernelNode is just a doctree marker for the ExecuteJupyterCells
-    # transform, so we don't actually render it.
+    # JupyterKernelNode is just a doctree marker for the
+    # ExecuteJupyterCells transform, so we don't actually render it.
     def skip(self, node):
         raise docutils.nodes.SkipNode
 
     app.add_node(
-        KernelNode,
+        JupyterKernelNode,
         html=(skip, None),
         latex=(skip, None),
         textinfo=(skip, None),
@@ -401,13 +477,15 @@ def skip(self, node):
     )
 
 
+    # JupyterCellNode is a container that holds the input and
+    # any output, so we render it as a container.
     render_container = (
         lambda self, node: self.visit_container(node),
         lambda self, node: self.depart_container(node),
     )
 
     app.add_node(
-        Cell,
+        JupyterCellNode,
         html=render_container,
         latex=render_container,
         textinfo=render_container,