Skip to content

Commit d9bd30f

Browse files
committed
Fix flake8 and pylint warnings
1 parent 96fefd1 commit d9bd30f

36 files changed

+949
-610
lines changed

build_link_map.py

+9-4
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
import os
2626
from link_map import LinkMap
2727

28+
2829
# returns a dict { title -> filename }.
2930
# directory - either 'output/reference' or 'reference'
3031
def build_link_map(directory):
@@ -41,26 +42,30 @@ def build_link_map(directory):
4142
text = f.read()
4243
f.close()
4344

44-
m = re.search('<script>[^<]*mw\.config\.set([^<]*wgPageName[^<]*)</script>', text)
45+
m = re.search(r'<script>[^<]*mw\.config\.set([^<]*wgPageName[^<]*)</script>', text) # noqa
4546
if not m:
4647
continue
4748
text = m.group(1)
48-
text = re.sub('\s*', '', text)
49+
text = re.sub(r'\s*', '', text)
4950
m = re.search('"wgPageName":"([^"]*)"', text)
5051
if not m:
5152
continue
5253

5354
title = m.group(1)
5455

55-
target = os.path.relpath(os.path.abspath(fn), os.path.abspath(directory))
56+
target = os.path.relpath(os.path.abspath(fn),
57+
os.path.abspath(directory))
5658
link_map.add_link(title, target)
5759
return link_map
5860

61+
5962
def main():
6063
link_map = build_link_map('output/reference')
6164

62-
# create an xml file containing mapping between page title and actual location
65+
# create an xml file containing mapping between page title and actual
66+
# location
6367
link_map.write('output/link-map.xml')
6468

69+
6570
if __name__ == "__main__":
6671
main()

commands/preprocess.py

+70-32
Original file line numberDiff line numberDiff line change
@@ -19,23 +19,24 @@
1919

2020
import fnmatch
2121
import io
22-
from lxml import etree
2322
import re
2423
import os
25-
import sys
2624
import shutil
2725
import urllib.parse
28-
from xml_utils import xml_escape, xml_unescape
26+
from lxml import etree
27+
2928

3029
def rmtree_if_exists(dir):
3130
if os.path.isdir(dir):
3231
shutil.rmtree(dir)
3332

33+
3434
def move_dir_contents_to_dir(srcdir, dstdir):
3535
for fn in os.listdir(srcdir):
3636
shutil.move(os.path.join(srcdir, fn),
3737
os.path.join(dstdir, fn))
3838

39+
3940
def rearrange_archive(root):
4041
# rearrange the archive. {root} here is output/reference
4142

@@ -71,8 +72,10 @@ def rearrange_archive(root):
7172
move_dir_contents_to_dir(src_data_path, data_path)
7273

7374
# also copy the custom fonts
74-
shutil.copy(os.path.join(path, 'DejaVuSansMonoCondensed60.ttf'), data_path)
75-
shutil.copy(os.path.join(path, 'DejaVuSansMonoCondensed75.ttf'), data_path)
75+
shutil.copy(os.path.join(path, 'DejaVuSansMonoCondensed60.ttf'),
76+
data_path)
77+
shutil.copy(os.path.join(path, 'DejaVuSansMonoCondensed75.ttf'),
78+
data_path)
7679

7780
# remove what's left
7881
shutil.rmtree(path)
@@ -81,20 +84,23 @@ def rearrange_archive(root):
8184
for fn in fnmatch.filter(os.listdir(root), 'cppreference-export*.xml'):
8285
os.remove(os.path.join(root, fn))
8386

84-
# Converts complex URL to resources supplied by MediaWiki loader to a simplified name
87+
8588
def convert_loader_name(fn):
89+
# Converts complex URL to resources supplied by MediaWiki loader to a
90+
# simplified name
8691
if "modules=site&only=scripts" in fn:
8792
return "site_scripts.js"
88-
elif "modules=site&only=styles" in fn:
93+
if "modules=site&only=styles" in fn:
8994
return "site_modules.css"
90-
elif "modules=startup&only=scripts" in fn:
95+
if "modules=startup&only=scripts" in fn:
9196
return "startup_scripts.js"
92-
elif re.search("modules=skins.*&only=scripts", fn):
97+
if re.search("modules=skins.*&only=scripts", fn):
9398
return "skin_scripts.js"
94-
elif re.search("modules=.*ext.*&only=styles", fn):
99+
if re.search("modules=.*ext.*&only=styles", fn):
95100
return "ext.css"
96-
else:
97-
raise Exception('Loader file {0} does not match any known files'.format(fn))
101+
msg = 'Loader file {0} does not match any known files'.format(fn)
102+
raise Exception(msg)
103+
98104

99105
def build_rename_map(root):
100106
# Returns a rename map: a map from old to new file name
@@ -122,13 +128,17 @@ def build_rename_map(root):
122128
if num > 0:
123129
name, ext = os.path.splitext(fn)
124130
# add file with its path -> only rename that occurrence
125-
result[os.path.join(dir, fn)] = "{}.{}{}".format(name, num + 1, ext)
131+
result[os.path.join(dir, fn)] = "{}.{}{}".format(name, num + 1,
132+
ext)
126133
seen[low] += 1
127134

128135
return result
129136

137+
130138
def rename_files(root, rename_map):
131-
for dir, old_fn in ((dir, fn) for dir, _, filenames in os.walk(root) for fn in filenames):
139+
for dir, old_fn in ((dir, fn)
140+
for dir, _, filenames in os.walk(root)
141+
for fn in filenames):
132142
src_path = os.path.join(dir, old_fn)
133143

134144
new_fn = rename_map.get(old_fn)
@@ -144,6 +154,7 @@ def rename_files(root, rename_map):
144154
print("Renaming {0}\n to {1}".format(src_path, dst_path))
145155
shutil.move(src_path, dst_path)
146156

157+
147158
def find_html_files(root):
148159
# find files that need to be preprocessed
149160
html_files = []
@@ -152,21 +163,25 @@ def find_html_files(root):
152163
html_files.append(os.path.join(dir, filename))
153164
return html_files
154165

166+
155167
def is_loader_link(target):
156168
if re.match(r'https?://[a-z]+\.cppreference\.com/mwiki/load\.php', target):
157169
return True
158170
return False
159171

172+
160173
def transform_loader_link(target, file, root):
161174
# Absolute loader.php links need to be made relative
162175
abstarget = os.path.join(root, "common", convert_loader_name(target))
163176
return os.path.relpath(abstarget, os.path.dirname(file))
164177

178+
165179
def is_ranges_placeholder(target):
166-
if re.match(r'https?://[a-z]+\.cppreference\.com/w/cpp/ranges(-[a-z]+)?-placeholder/.+', target):
180+
if re.match(r'https?://[a-z]+\.cppreference\.com/w/cpp/ranges(-[a-z]+)?-placeholder/.+', target): # noqa
167181
return True
168182
return False
169183

184+
170185
def transform_ranges_placeholder(target, file, root):
171186
# Placeholder link replacement is implemented in the MediaWiki site JS at
172187
# https://en.cppreference.com/w/MediaWiki:Common.js
@@ -175,9 +190,9 @@ def transform_ranges_placeholder(target, file, root):
175190
repl = (r'\1/cpp/experimental/ranges/\2' if ranges else r'\1/cpp/\2')
176191

177192
if 'ranges-placeholder' in target:
178-
match = r'https?://([a-z]+)\.cppreference\.com/w/cpp/ranges-placeholder/(.+)'
193+
match = r'https?://([a-z]+)\.cppreference\.com/w/cpp/ranges-placeholder/(.+)' # noqa
179194
else:
180-
match = r'https?://([a-z]+)\.cppreference\.com/w/cpp/ranges-([a-z]+)-placeholder/(.+)'
195+
match = r'https?://([a-z]+)\.cppreference\.com/w/cpp/ranges-([a-z]+)-placeholder/(.+)' # noqa
181196
repl += (r'/\3' if ranges else r'/ranges/\3')
182197

183198
# Turn absolute placeholder link into site-relative link
@@ -187,24 +202,27 @@ def transform_ranges_placeholder(target, file, root):
187202
abstarget = os.path.join(root, reltarget)
188203
return os.path.relpath(abstarget, os.path.dirname(file))
189204

205+
190206
def is_external_link(target):
191207
url = urllib.parse.urlparse(target)
192208
return url.scheme != '' or url.netloc != ''
193209

210+
194211
def trasform_relative_link(rename_map, target, file):
195212
# urlparse returns (scheme, host, path, params, query, fragment)
196213
_, _, path, params, _, fragment = urllib.parse.urlparse(target)
197214
assert params == ''
198215

199216
path = urllib.parse.unquote(path)
200-
path = path.replace('../../upload.cppreference.com/mwiki/','../common/')
201-
path = path.replace('../mwiki/','../common/')
217+
path = path.replace('../../upload.cppreference.com/mwiki/', '../common/')
218+
path = path.replace('../mwiki/', '../common/')
202219

203220
dir, fn = os.path.split(path)
204221
new_fn = rename_map.get(fn)
205222
if new_fn:
206223
# look for case conflict of the renamed file
207-
abstarget = os.path.normpath(os.path.join(os.path.dirname(file), dir, new_fn))
224+
abstarget = os.path.normpath(os.path.join(os.path.dirname(file),
225+
dir, new_fn))
208226
new_fn = rename_map.get(abstarget, new_fn)
209227
else:
210228
# original filename unchanged, look for case conflict
@@ -216,11 +234,13 @@ def trasform_relative_link(rename_map, target, file):
216234
path = urllib.parse.quote(path)
217235
return urllib.parse.urlunparse(('', '', path, params, '', fragment))
218236

237+
219238
# Transforms a link in the given file according to rename map.
220239
# target is the link to transform.
221240
# file is the path of the file the link came from.
222241
# root is the path to the root of the archive.
223242
def transform_link(rename_map, target, file, root):
243+
224244
if is_loader_link(target):
225245
return transform_loader_link(target, file, root)
226246

@@ -232,6 +252,7 @@ def transform_link(rename_map, target, file, root):
232252

233253
return trasform_relative_link(rename_map, target, file)
234254

255+
235256
def has_class(el, *classes_to_check):
236257
value = el.get('class')
237258
if value is None:
@@ -242,6 +263,7 @@ def has_class(el, *classes_to_check):
242263
return True
243264
return False
244265

266+
245267
# remove non-printable elements
246268
def remove_noprint(html):
247269
for el in html.xpath('//*'):
@@ -250,14 +272,16 @@ def remove_noprint(html):
250272
elif el.get('id') in ['toc', 'catlinks']:
251273
el.getparent().remove(el)
252274

275+
253276
# remove see also links between C and C++ documentations
254277
def remove_see_also(html):
255278
for el in html.xpath('//tr[@class]'):
256279
if not has_class(el, 't-dcl-list-item', 't-dsc'):
257280
continue
258281

259282
child_tds = el.xpath('.//td/div[@class]')
260-
if not any(has_class(td, 't-dcl-list-see', 't-dsc-see') for td in child_tds):
283+
if not any(has_class(td, 't-dcl-list-see', 't-dsc-see')
284+
for td in child_tds):
261285
continue
262286

263287
# remove preceding separator, if any
@@ -276,17 +300,23 @@ def remove_see_also(html):
276300
next = el.getnext()
277301
if next is None:
278302
el.getparent().remove(el)
279-
elif next.tag == 'table' and has_class(next, 't-dcl-list-begin') and len(next.xpath('.//tr')) == 0:
303+
elif next.tag == 'table' and has_class(next, 't-dcl-list-begin') and \
304+
len(next.xpath('.//tr')) == 0:
280305
el.getparent().remove(el)
281306
next.getparent().remove(next)
282307

308+
283309
# remove Google Analytics scripts
284310
def remove_google_analytics(html):
285311
for el in html.xpath('/html/body/script'):
286-
if el.get('src') is not None and 'google-analytics.com/ga.js' in el.get('src'):
287-
el.getparent().remove(el)
288-
elif el.text is not None and ('google-analytics.com/ga.js' in el.text or 'pageTracker' in el.text):
289-
el.getparent().remove(el)
312+
if el.get('src') is not None:
313+
if 'google-analytics.com/ga.js' in el.get('src'):
314+
el.getparent().remove(el)
315+
elif el.text is not None:
316+
if 'google-analytics.com/ga.js' in el.text or \
317+
'pageTracker' in el.text:
318+
el.getparent().remove(el)
319+
290320

291321
# remove Carbon ads
292322
def remove_ads(html):
@@ -297,13 +327,15 @@ def remove_ads(html):
297327
if el.text is not None and '#carbonads' in el.text:
298328
el.getparent().remove(el)
299329

330+
300331
# remove links to file info pages (e.g. on images)
301332
def remove_fileinfo(html):
302-
info = etree.XPath(r"//a[re:test(@href, 'https?://[a-z]+\.cppreference\.com/w/File:')]/..",
303-
namespaces={'re':'http://exslt.org/regular-expressions'})
333+
info = etree.XPath(r"//a[re:test(@href, 'https?://[a-z]+\.cppreference\.com/w/File:')]/..", # noqa
334+
namespaces={'re':'http://exslt.org/regular-expressions'}) # noqa
304335
for el in info(html):
305336
el.getparent().remove(el)
306337

338+
307339
# remove external links to unused resources
308340
def remove_unused_external(html):
309341
for el in html.xpath('/html/head/link'):
@@ -313,6 +345,7 @@ def remove_unused_external(html):
313345
(head, tail) = os.path.split(el.get('href'))
314346
el.set('href', os.path.join(head, 'common', tail))
315347

348+
316349
def preprocess_html_file(root, fn, rename_map):
317350
parser = etree.HTMLParser()
318351
html = etree.parse(fn, parser)
@@ -331,23 +364,27 @@ def preprocess_html_file(root, fn, rename_map):
331364
for el in html.xpath('//*[@href]'):
332365
el.set('href', transform_link(rename_map, el.get('href'), fn, root))
333366

334-
for err in parser.error_log:
367+
for err in list(parser.error_log):
335368
print("HTML WARN: {0}".format(err), file=output)
336369

337370
html.write(fn, encoding='utf-8', method='html')
338371
return output.getvalue()
339372

373+
340374
def preprocess_css_file(fn):
341375
f = open(fn, "r", encoding='utf-8')
342376
text = f.read()
343377
f.close()
344378

345379
# note that query string is not used in css files
346380

347-
text = text.replace('../DejaVuSansMonoCondensed60.ttf', 'DejaVuSansMonoCondensed60.ttf')
348-
text = text.replace('../DejaVuSansMonoCondensed75.ttf', 'DejaVuSansMonoCondensed75.ttf')
381+
text = text.replace('../DejaVuSansMonoCondensed60.ttf',
382+
'DejaVuSansMonoCondensed60.ttf')
383+
text = text.replace('../DejaVuSansMonoCondensed75.ttf',
384+
'DejaVuSansMonoCondensed75.ttf')
349385

350-
text = text.replace('../../upload.cppreference.com/mwiki/images/', 'images/')
386+
text = text.replace('../../upload.cppreference.com/mwiki/images/',
387+
'images/')
351388

352389
# QT Help viewer doesn't understand nth-child
353390
text = text.replace('nth-child(1)', 'first-child')
@@ -356,6 +393,7 @@ def preprocess_css_file(fn):
356393
f.write(text)
357394
f.close()
358395

396+
359397
def preprocess_startup_script(fn):
360398
with open(fn, "r", encoding='utf-8') as f:
361399
text = f.read()

0 commit comments

Comments
 (0)