11
11
import argparse
12
12
import subprocess
13
13
import shutil
14
+ import lxml .html
14
15
15
16
16
17
def main ():
@@ -210,13 +211,14 @@ def convert_file(dst_path, fn, editors):
210
211
211
212
"""
212
213
print (fn )
213
- subprocess .check_call (['jupyter' , 'nbconvert' , '--to' , 'rst ' ,
214
+ subprocess .check_call (['jupyter' , 'nbconvert' , '--to' , 'html ' ,
214
215
'--output-dir' , os .path .abspath (dst_path ),
215
216
os .path .abspath (fn )],
216
217
cwd = dst_path , stderr = subprocess .STDOUT )
217
218
218
219
basename = os .path .splitext (os .path .basename (fn ))[0 ]
219
220
rst_fn = os .path .join (dst_path , basename + '.rst' )
221
+ html_fn = os .path .join (dst_path , basename + '.html' )
220
222
221
223
title = None
222
224
tags = set ()
@@ -225,59 +227,70 @@ def convert_file(dst_path, fn, editors):
225
227
226
228
lines = []
227
229
228
- with open (rst_fn , 'r' ) as f :
229
- prev_line = ''
230
- for orig_line in f :
231
- line = orig_line .strip ()
232
- m = re .match ('^===+\s*$' , line )
233
- m2 = re .match ('^---+\s*$' , line )
234
- if m or m2 :
235
- if prev_line and len (line ) >= 1 + len (prev_line )// 2 and not title :
236
- title = prev_line .strip ()
237
- lines = lines [:- 1 ]
238
- continue
239
-
240
- m = re .match ('^TAGS:\s*(.*)\s*$' , line )
241
- if m :
242
- tag_line = m .group (1 ).strip ().replace (';' , ',' )
243
- tags .update ([x .strip () for x in tag_line .split ("," )])
244
- continue
245
-
246
- m = re .match ('^AUTHORS:\s*(.*)\s*$' , line )
247
- if m :
248
- # Author lines override editors
249
- if legacy_editors :
250
- editors = []
251
- legacy_editors = False
252
- author_line = m .group (1 ).strip ().replace (';' , ',' )
253
- for author in author_line .split ("," ):
254
- author = author .strip ()
255
- if author and author not in editors :
256
- editors .append (author )
257
- continue
230
+ # Parse and munge HTML
231
+ tree = lxml .html .parse (html_fn )
232
+ os .unlink (html_fn )
233
+
234
+ root = tree .getroot ()
235
+ head = root .find ('head' )
236
+ container , = root .xpath ("//div[@id='notebook-container']" )
237
+
238
+ headers = container .xpath ('//h1' )
239
+ if headers :
240
+ title = headers [0 ].text
241
+ if isinstance (title , unicode ):
242
+ title = title .encode ('utf-8' )
243
+ h1_parent = headers [0 ].getparent ()
244
+ h1_parent .remove (headers [0 ])
245
+
246
+ lines .extend ([u".. raw:: html" , u"" ])
247
+
248
+ for element in head .getchildren ():
249
+ if element .tag in ('script' ,):
250
+ text = lxml .html .tostring (element )
251
+ lines .extend (" " + x for x in text .splitlines ())
252
+
253
+ text = lxml .html .tostring (container )
254
+
255
+ m = re .search (ur'<p>TAGS:\s*(.*)\s*</p>' , text )
256
+ if m :
257
+ tag_line = m .group (1 ).strip ().replace (';' , ',' )
258
+ if isinstance (tag_line , unicode ):
259
+ tag_line = tag_line .encode ('utf-8' )
260
+ tags .update ([x .strip () for x in tag_line .split ("," )])
261
+ text = text [:m .start ()] + text [m .end ():]
262
+
263
+ m = re .search (ur'<p>AUTHORS:\s*(.*)\s*</p>' , text )
264
+ if m :
265
+ # Author lines override editors
266
+ if legacy_editors :
267
+ editors = []
268
+ legacy_editors = False
269
+ author_line = m .group (1 ).strip ().replace (';' , ',' )
270
+ if isinstance (author_line , unicode ):
271
+ author_line = author_line .encode ('utf-8' )
272
+ for author in author_line .split ("," ):
273
+ author = author .strip ()
274
+ if author and author not in editors :
275
+ editors .append (author )
276
+
277
+ text = text [:m .start ()] + text [m .end ():]
278
+
279
+ text = text .replace (u'attachments/{0}/' .format (basename ),
280
+ u'../_downloads/' )
281
+
282
+ lines .extend (u" " + x for x in text .splitlines ())
283
+ lines .append (u"" )
258
284
259
- prev_line = line
260
- lines .append (orig_line )
261
-
262
- text = "" .join (lines )
285
+ # Produce output
286
+ text = u"\n " .join (lines ).encode ('utf-8' )
263
287
264
288
if not title :
265
289
title = basename
266
290
267
291
authors = ", " .join (editors )
268
292
text = "{0}\n {1}\n \n {2}" .format (title , "=" * len (title ), text )
269
293
270
- text = re .sub (r'`(.*?) <files/(attachments/.*?)>`__' ,
271
- r':download:`\1 <\2>`' ,
272
- text ,
273
- flags = re .M )
274
- text = re .sub (r'^TAGS:.*$' , '' , text , flags = re .M )
275
- text = re .sub (r'(figure|image):: files/attachments/' , r'\1:: attachments/' , text , flags = re .M )
276
- text = re .sub (r' <files/attachments/' , r' <attachments/' , text , flags = re .M )
277
- text = re .sub (r'.. parsed-literal::' , r'.. parsed-literal::\n :class: ipy-out' , text , flags = re .M )
278
- text = re .sub (r'`([^`<]*)\s+<(?!attachments/)([^:.>]*?)(?:.html)?>`__' , r':doc:`\1 <\2>`' , text , flags = re .M )
279
- text = re .sub (r'^(\s*)\.\.\s*raw:: latex' , '\\ 1.. math::\\ 1 :nowrap:' , text , flags = re .M )
280
- text = re .sub (r'^(\s*)\.\. code::\s*(ipython3|ipython2|python3|python2|python)?\s*$' , r'\1.. code-block:: python\n' , text , flags = re .M )
281
294
with open (rst_fn , 'w' ) as f :
282
295
f .write (text )
283
296
if authors :
0 commit comments