Skip to content

Commit 123ee6a

Browse files
committed
preprocess scripts: init
1 parent abd146f commit 123ee6a

File tree

3 files changed

+305
-0
lines changed

3 files changed

+305
-0
lines changed

.gitignore

+15
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
*.chm
2+
*.hhp
3+
*.hhk
4+
*.hhc
5+
*.cab
6+
*.zip
7+
*.rar
8+
*.7z
9+
reference/*
10+
zh/*
11+
en/*
12+
chmhelp/*
13+
common/*
14+
chm_temp/*
15+
cppreference-doc/*

preprocess-zh.sh

+107
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
#!/bin/bash
2+
3+
set -e
4+
git clone https://github.com/PeterFeicht/cppreference-doc.git --depth=1
5+
cd cppreference-doc
6+
git apply -3 ../zh.diff
7+
make source
8+
set +e
9+
10+
# init files and vars
11+
startup_scripts_replace="startup_scripts.js"
12+
startup_scripts_path="$(find | grep -iP 'load\.php.*?modules=startup&only=scripts.*?' | head -1)"
13+
14+
site_scripts_replace="site_scripts.js"
15+
site_scripts_path="$(find | grep -iP 'load\.php.*?modules=site&only=scripts.*?' | head -1)"
16+
17+
site_modules_replace="site_modules.css"
18+
site_modules_path="$(find | grep -iP 'load\.php.*?modules=site&only=styles.*?' | head -1)"
19+
20+
skin_scripts_replace="skin_scripts.js"
21+
skin_scripts_path="$(find | grep -iP 'load\.php.*?modules=skins.*&only=scripts.*?' | head -1)"
22+
23+
ext_replace="ext.css"
24+
ext_path="$(find | grep -iP 'load\.php.*?modules=.*ext.*&only=styles.*?' | head -1)"
25+
26+
LIST="startup_scripts site_scripts site_modules skin_scripts ext"
27+
extra_fonts="DejaVuSans.ttf DejaVuSans-Bold.ttf DejaVuSansMono.ttf DejaVuSansMono-Bold.ttf"
28+
CPUS="$(cat /proc/cpuinfo | grep -c '^processor')"
29+
30+
# https://gist.github.com/cdown/1163649/8a35c36fdd24b373788a7057ed483a5bcd8cd43e
31+
url_encode() {
32+
local _length="${#1}"
33+
for (( _offset = 0 ; _offset < _length ; _offset++ )); do
34+
_print_offset="${1:_offset:1}"
35+
case "${_print_offset}" in
36+
[a-zA-Z0-9.~_-]) printf "${_print_offset}" ;;
37+
' ') printf + ;;
38+
*) printf '%%%X' "'${_print_offset}" ;;
39+
esac
40+
done
41+
}
42+
43+
copy_file(){
44+
local var=$1
45+
local path="$(eval echo "\${${var}_path}")"
46+
local replace="$(eval echo "\${${var}_replace}")"
47+
local dir="$(dirname "${path}")"
48+
cp -f -T "${path}" "${dir}/${replace}"
49+
}
50+
51+
remove_file(){
52+
local var=$1
53+
local path="$(eval echo "\${${var}_path}")"
54+
local name="$(basename "${path}")"
55+
find -iname "${name}" | xargs rm -f
56+
}
57+
58+
replace_in_html(){
59+
local var=$1
60+
local path="$(eval echo "\${${var}_path}")"
61+
local replace="$(eval echo "\${${var}_replace}")"
62+
local name="$(basename "${path}")"
63+
local encoded_name="$(url_encode "${name}")"
64+
find ./ -iname '*.html' -type f | xargs -P "${CPUS}" sed -i "s/${name}/${replace}/gi"
65+
find ./ -iname '*.html' -type f | xargs -P "${CPUS}" sed -i "s/${encoded_name}/${replace}/gi"
66+
}
67+
68+
echo pre-processing...
69+
for i in $LIST; do copy_file $i; done
70+
71+
# backup extra fonts
72+
mkdir -p font_temp
73+
for i in $extra_fonts; do
74+
find -iname $i -exec cp {} font_temp/$i \;
75+
done
76+
77+
# original preprocess
78+
make doc_html
79+
80+
# restore extra fonts
81+
if [[ -d 'reference/common' ]]; then
82+
font_path='reference/common'
83+
elif [[ -d 'output/common' ]]; then
84+
font_path='output/common'
85+
fi
86+
if [[ -d $font_path ]]; then
87+
for i in $extra_fonts; do
88+
cp -f font_temp/$i $font_path/$i
89+
done
90+
fi
91+
rm -rf font_temp
92+
93+
echo post-processing...
94+
for i in $LIST; do
95+
echo processing $i
96+
remove_file $i
97+
replace_in_html $i
98+
done
99+
100+
find -iname "${startup_scripts_replace}" | xargs sed -i 's/document\.write/void /ig'
101+
find -iname "${site_scripts_replace}" | xargs sed -i '1 i if(window.mw)'
102+
find -iname "${skin_scripts_replace}" | xargs sed -i '1 i if(window.mw)'
103+
find -iname '*.css' | xargs sed -i -r 's/\.\.\/([^.]+?)\.ttf/\1.ttf/ig'
104+
echo Done.
105+
106+
mv reference/* ../
107+
cd ..

zh.diff

+183
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,183 @@
1+
diff --git a/Makefile b/Makefile
2+
index deadfe2..46abab2 100644
3+
--- a/Makefile
4+
+++ b/Makefile
5+
@@ -230,7 +230,7 @@ indexes:
6+
./index2autolinker.py index-functions-c.xml output/indexes/autolink-c
7+
./index2autolinker.py index-functions-cpp.xml output/indexes/autolink-cpp
8+
9+
-#redownloads the source documentation directly from en.cppreference.com
10+
+#redownloads the source documentation directly from zh.cppreference.com
11+
source:
12+
rm -rf "reference"
13+
mkdir "reference"
14+
@@ -239,15 +239,15 @@ source:
15+
regex=".*index\\.php.*|.*/Special:.*|.*/Talk:.*" \
16+
regex+="|.*/Help:.*|.*/File:.*|.*/Cppreference:.*" \
17+
regex+="|.*/WhatLinksHere:.*|.*/Template:.*|.*/Category:.*" \
18+
- regex+="|.*action=.*|.*printable=.*|.*en.cppreference.com/book.*" ; \
19+
+ regex+="|.*action=.*|.*printable=.*|.*zh.cppreference.com/book.*" ; \
20+
echo $$regex ; \
21+
wget --adjust-extension --page-requisites --convert-links \
22+
- --force-directories --recursive --level=15 \
23+
- --span-hosts --domains=en.cppreference.com,upload.cppreference.com \
24+
+ --force-directories --recursive --level=17 \
25+
+ --span-hosts --domains=zh.cppreference.com,upload.cppreference.com \
26+
--reject-regex $$regex \
27+
- --timeout=5 --tries=50 --no-verbose \
28+
+ --timeout=5 --tries=150 --no-verbose \
29+
--retry-connrefused --waitretry=10 --read-timeout=20 \
30+
- http://en.cppreference.com/w/ ; \
31+
+ http://zh.cppreference.com/w/ ; \
32+
popd > /dev/null
33+
34+
- ./export.py --url=http://en.cppreference.com/mwiki reference/cppreference-export-ns0,4,8,10.xml 0 4 8 10
35+
+ ./export.py --url=http://zh.cppreference.com/mwiki reference/cppreference-export-ns0,4,8,10.xml 0 4 8 10
36+
diff --git a/commands/preprocess.py b/commands/preprocess.py
37+
index ed67174..ada56fe 100644
38+
--- a/commands/preprocess.py
39+
+++ b/commands/preprocess.py
40+
@@ -40,15 +40,15 @@ def rearrange_archive(root):
41+
# rearrange the archive. {root} here is output/reference
42+
43+
# before
44+
- # {root}/en.cppreference.com/w/ : html
45+
- # {root}/en.cppreference.com/mwiki/ : data
46+
- # {root}/en.cppreference.com/ : data
47+
+ # {root}/zh.cppreference.com/w/ : html
48+
+ # {root}/zh.cppreference.com/mwiki/ : data
49+
+ # {root}/zh.cppreference.com/ : data
50+
# ... (other languages)
51+
# {root}/upload.cppreference.com/mwiki/ : data
52+
53+
# after
54+
# {root}/common/ : all common data
55+
- # {root}/en/ : html for en
56+
+ # {root}/zh/ : html for zh
57+
# ... (other languages)
58+
59+
data_path = os.path.join(root, 'common')
60+
@@ -56,7 +56,7 @@ def rearrange_archive(root):
61+
shutil.move(os.path.join(root, 'upload.cppreference.com/mwiki'), data_path)
62+
shutil.rmtree(os.path.join(root, 'upload.cppreference.com'))
63+
64+
- for lang in ["en"]:
65+
+ for lang in ["zh"]:
66+
path = os.path.join(root, lang + ".cppreference.com/")
67+
src_html_path = path + "w/"
68+
src_data_path = path + "mwiki/"
69+
@@ -214,7 +214,7 @@ def has_class(el, classes_to_check):
70+
return False
71+
72+
def preprocess_html_file(root, fn, rename_map):
73+
- parser = etree.HTMLParser()
74+
+ parser = etree.HTMLParser(encoding="utf-8")
75+
html = etree.parse(fn, parser)
76+
77+
# remove non-printable elements
78+
diff --git a/gadgets/standard_revisions-tests/base.py b/gadgets/standard_revisions-tests/base.py
79+
index 848d431..67d00f4 100644
80+
--- a/gadgets/standard_revisions-tests/base.py
81+
+++ b/gadgets/standard_revisions-tests/base.py
82+
@@ -27,7 +27,7 @@ import unittest, time, re
83+
84+
class Driver:
85+
def __init__(self):
86+
- base_url = "http://en.cppreference.com/"
87+
+ base_url = "http://zh.cppreference.com/"
88+
driver = webdriver.Firefox()
89+
driver.implicitly_wait(30)
90+
try:
91+
diff --git a/gadgets/sync_tests_mwiki.py b/gadgets/sync_tests_mwiki.py
92+
index 9aa3fc7..13294ec 100755
93+
--- a/gadgets/sync_tests_mwiki.py
94+
+++ b/gadgets/sync_tests_mwiki.py
95+
@@ -114,7 +114,7 @@ def perform_sync(url, direction, dest_root, title_filter, user, password,
96+
# Supply information to config that would otherwise be defined in
97+
# user-config.py
98+
pywikibot.config2.family = 'cppreference'
99+
- pywikibot.config2.mylang = 'en'
100+
+ pywikibot.config2.mylang = 'zh'
101+
pywikibot.config2.family_files['cppreference'] = url
102+
pywikibot.config2.step = 100
103+
pywikibot.config2.put_throttle = 0
104+
diff --git a/index2ddg.py b/index2ddg.py
105+
index 9789e56..7a3a3dc 100755
106+
--- a/index2ddg.py
107+
+++ b/index2ddg.py
108+
@@ -447,7 +447,7 @@ def process_identifier(out, redirects, root, link, item_ident, item_type,
109+
abstract = abstract.replace('\n','\\n')
110+
line += abstract + '\t'
111+
# source url
112+
- line += 'http://en.cppreference.com/w/' + link + '\n'
113+
+ line += 'http://zh.cppreference.com/w/' + link + '\n'
114+
out.write(line)
115+
116+
build_redirects(redirects, item_ident, item_type)
117+
diff --git a/index_transform/browser.py b/index_transform/browser.py
118+
index d2e625c..82cb11f 100644
119+
--- a/index_transform/browser.py
120+
+++ b/index_transform/browser.py
121+
@@ -42,7 +42,7 @@ class Index2Browser(IndexTransform):
122+
123+
res = u''
124+
res += '<tt><b>' + xml_escape(full_name) + '</b></tt> [<span class="link">'
125+
- res += '<a href="http://en.cppreference.com/w/' + xml_escape(full_link) + '">'
126+
+ res += '<a href="http://zh.cppreference.com/w/' + xml_escape(full_link) + '">'
127+
res += full_link + '</a></span>] <span class="mark">' + mark + '</span>\n'
128+
return res
129+
130+
diff --git a/preprocess.py b/preprocess.py
131+
index cb6e8cc..a8ac18f 100755
132+
--- a/preprocess.py
133+
+++ b/preprocess.py
134+
@@ -28,12 +28,12 @@ def main():
135+
parser.add_argument('--dst', type=str, help='Destination folder to put preprocessed archive to')
136+
args = parser.parse_args()
137+
138+
- root = args.dst
139+
- src = args.src
140+
+ root = args.src
141+
+ # src = args.src
142+
143+
# copy the source tree
144+
- rmtree_if_exists(root)
145+
- shutil.copytree(src, root)
146+
+ # rmtree_if_exists(root)
147+
+ # shutil.copytree(src, root)
148+
149+
rearrange_archive(root)
150+
151+
diff --git a/tests/test_preprocess.py b/tests/test_preprocess.py
152+
index e4aa687..d6f143f 100644
153+
--- a/tests/test_preprocess.py
154+
+++ b/tests/test_preprocess.py
155+
@@ -6,23 +6,23 @@ from lxml import etree
156+
157+
class TestConvertLoaderName(unittest.TestCase):
158+
def test_convert_loader_name(self):
159+
- url = 'http://en.cppreference.com/mwiki/load.php?debug=false&lang=en&\
160+
+ url = 'http://zh.cppreference.com/mwiki/load.php?debug=false&lang=*&\
161+
modules=site&only=scripts&skin=cppreference2&*'
162+
self.assertEqual('site_scripts.js', convert_loader_name(url))
163+
164+
- url = 'http://en.cppreference.com/mwiki/load.php?debug=false&lang=en&\
165+
+ url = 'http://zh.cppreference.com/mwiki/load.php?debug=false&lang=*&\
166+
modules=site&only=styles&skin=cppreference2&*'
167+
self.assertEqual('site_modules.css', convert_loader_name(url))
168+
169+
- url = 'http://en.cppreference.com/mwiki/load.php?debug=false&lang=en&\
170+
+ url = 'http://zh.cppreference.com/mwiki/load.php?debug=false&lang=*&\
171+
modules=skins.cppreference2&only=scripts&skin=cppreference2&*'
172+
self.assertEqual('skin_scripts.js', convert_loader_name(url))
173+
174+
- url = 'http://en.cppreference.com/mwiki/load.php?debug=false&lang=en&\
175+
+ url = 'http://zh.cppreference.com/mwiki/load.php?debug=false&lang=*&\
176+
modules=startup&only=scripts&skin=cppreference2&*'
177+
self.assertEqual('startup_scripts.js', convert_loader_name(url))
178+
179+
- url = 'http://en.cppreference.com/mwiki/load.php?debug=false&lang=en&\
180+
+ url = 'http://zh.cppreference.com/mwiki/load.php?debug=false&lang=*&\
181+
modules=ext.gadget.ColiruCompiler%2CMathJax%7Cext.rtlcite%7Cmediawiki.\
182+
legacy.commonPrint%2Cshared%7Cskins.cppreference2&only=styles&skin=\
183+
cppreference2&*'

0 commit comments

Comments
 (0)