Skip to content

Commit 525a1ee

Browse files
committed
Use jupyter notebook html rendering for more faithful appearance
1 parent 0c94d61 commit 525a1ee

File tree

4 files changed

+11789
-46
lines changed

4 files changed

+11789
-46
lines changed

build.py

+59-46
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
import argparse
1212
import subprocess
1313
import shutil
14+
import lxml.html
1415

1516

1617
def main():
@@ -210,13 +211,14 @@ def convert_file(dst_path, fn, editors):
210211
211212
"""
212213
print(fn)
213-
subprocess.check_call(['jupyter', 'nbconvert', '--to', 'rst',
214+
subprocess.check_call(['jupyter', 'nbconvert', '--to', 'html',
214215
'--output-dir', os.path.abspath(dst_path),
215216
os.path.abspath(fn)],
216217
cwd=dst_path, stderr=subprocess.STDOUT)
217218

218219
basename = os.path.splitext(os.path.basename(fn))[0]
219220
rst_fn = os.path.join(dst_path, basename + '.rst')
221+
html_fn = os.path.join(dst_path, basename + '.html')
220222

221223
title = None
222224
tags = set()
@@ -225,59 +227,70 @@ def convert_file(dst_path, fn, editors):
225227

226228
lines = []
227229

228-
with open(rst_fn, 'r') as f:
229-
prev_line = ''
230-
for orig_line in f:
231-
line = orig_line.strip()
232-
m = re.match('^===+\s*$', line)
233-
m2 = re.match('^---+\s*$', line)
234-
if m or m2:
235-
if prev_line and len(line) >= 1+len(prev_line)//2 and not title:
236-
title = prev_line.strip()
237-
lines = lines[:-1]
238-
continue
239-
240-
m = re.match('^TAGS:\s*(.*)\s*$', line)
241-
if m:
242-
tag_line = m.group(1).strip().replace(';', ',')
243-
tags.update([x.strip() for x in tag_line.split(",")])
244-
continue
245-
246-
m = re.match('^AUTHORS:\s*(.*)\s*$', line)
247-
if m:
248-
# Author lines override editors
249-
if legacy_editors:
250-
editors = []
251-
legacy_editors = False
252-
author_line = m.group(1).strip().replace(';', ',')
253-
for author in author_line.split(","):
254-
author = author.strip()
255-
if author and author not in editors:
256-
editors.append(author)
257-
continue
230+
# Parse and munge HTML
231+
tree = lxml.html.parse(html_fn)
232+
os.unlink(html_fn)
233+
234+
root = tree.getroot()
235+
head = root.find('head')
236+
container, = root.xpath("//div[@id='notebook-container']")
237+
238+
headers = container.xpath('//h1')
239+
if headers:
240+
title = headers[0].text
241+
if isinstance(title, unicode):
242+
title = title.encode('utf-8')
243+
h1_parent = headers[0].getparent()
244+
h1_parent.remove(headers[0])
245+
246+
lines.extend([u".. raw:: html", u""])
247+
248+
for element in head.getchildren():
249+
if element.tag in ('script',):
250+
text = lxml.html.tostring(element)
251+
lines.extend(" " + x for x in text.splitlines())
252+
253+
text = lxml.html.tostring(container)
254+
255+
m = re.search(ur'<p>TAGS:\s*(.*)\s*</p>', text)
256+
if m:
257+
tag_line = m.group(1).strip().replace(';', ',')
258+
if isinstance(tag_line, unicode):
259+
tag_line = tag_line.encode('utf-8')
260+
tags.update([x.strip() for x in tag_line.split(",")])
261+
text = text[:m.start()] + text[m.end():]
262+
263+
m = re.search(ur'<p>AUTHORS:\s*(.*)\s*</p>', text)
264+
if m:
265+
# Author lines override editors
266+
if legacy_editors:
267+
editors = []
268+
legacy_editors = False
269+
author_line = m.group(1).strip().replace(';', ',')
270+
if isinstance(author_line, unicode):
271+
author_line = author_line.encode('utf-8')
272+
for author in author_line.split(","):
273+
author = author.strip()
274+
if author and author not in editors:
275+
editors.append(author)
276+
277+
text = text[:m.start()] + text[m.end():]
278+
279+
text = text.replace(u'attachments/{0}/'.format(basename),
280+
u'../_downloads/')
281+
282+
lines.extend(u" " + x for x in text.splitlines())
283+
lines.append(u"")
258284

259-
prev_line = line
260-
lines.append(orig_line)
261-
262-
text = "".join(lines)
285+
# Produce output
286+
text = u"\n".join(lines).encode('utf-8')
263287

264288
if not title:
265289
title = basename
266290

267291
authors = ", ".join(editors)
268292
text = "{0}\n{1}\n\n{2}".format(title, "="*len(title), text)
269293

270-
text = re.sub(r'`(.*?) <files/(attachments/.*?)>`__',
271-
r':download:`\1 <\2>`',
272-
text,
273-
flags=re.M)
274-
text = re.sub(r'^TAGS:.*$', '', text, flags=re.M)
275-
text = re.sub(r'(figure|image):: files/attachments/', r'\1:: attachments/', text, flags=re.M)
276-
text = re.sub(r' <files/attachments/', r' <attachments/', text, flags=re.M)
277-
text = re.sub(r'.. parsed-literal::', r'.. parsed-literal::\n :class: ipy-out', text, flags=re.M)
278-
text = re.sub(r'`([^`<]*)\s+<(?!attachments/)([^:.>]*?)(?:.html)?>`__', r':doc:`\1 <\2>`', text, flags=re.M)
279-
text = re.sub(r'^(\s*)\.\.\s*raw:: latex', '\\1.. math::\\1 :nowrap:', text, flags=re.M)
280-
text = re.sub(r'^(\s*)\.\. code::\s*(ipython3|ipython2|python3|python2|python)?\s*$', r'\1.. code-block:: python\n', text, flags=re.M)
281294
with open(rst_fn, 'w') as f:
282295
f.write(text)
283296
if authors:

0 commit comments

Comments
 (0)