forked from meditationstuff/protocol_1
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathnumber_toc.rb
More file actions
130 lines (113 loc) · 3.75 KB
/
number_toc.rb
File metadata and controls
130 lines (113 loc) · 3.75 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
# experimental, not yet in use
# adds deep hierarchical numbering to table of contents
# and section headers.
# (kludgy, inelegant first pass, possibly not operating at
# right stage of the pipeline. there might be a better thing,
# such as a provided option in a tool or a library).
# if this were used, index2.html should be substituted for
# index.html in m.sh, making sure that all enclosing scripts
# fail if this script fails.
# limitations:
# 1. does not go tag by tag,
# so if too much is on one line then this will fail.
# 2. finds number insertion point in a kludgy way.
# 3. not obviously safe/bug-free in terms of not accidentally
# dropping content, and risk of regression grows if further
# modifications
prefix_hash = {}
prefix_stack = []
counter_stack = []
in_toc = false
after_toc = false
in_back_matter = false
prefix = ""
counter = 0
skip_strings = [
"bibliography (incomplete)",
"appendix 1: preliminary/auxiliary practices (names only)",
"appendix 2: preliminary/auxiliary practices (full)",
'appendix 3: main practice p1 appendix (usa english; "en-us")'
]
# https://stackoverflow.com/questions/9661478/how-to-return-the-substring-of-a-string-between-two-strings-in-ruby
class String
def string_between_markers marker1, marker2
self[/#{Regexp.escape(marker1)}(.*?)#{Regexp.escape(marker2)}/m, 1]
end
end
def hash_title(title) #not really hashing; normalize, maybe
title.gsub(/[^0-9A-Za-z\s]/, '')
end
def skippable(line)
return true if line.include?("<li>") && line.include?("bibliography")
return true if line.include?("<li>") && line.include?("appendix")
return true if line.include?("<h1>") && line.include?("bibliography")
return true if line.include?("<h1>") && line.include?("appendix")
false
end
File.open("index2.html", "w") do |file_to_write|
File.open("index.html").each do |line|
chomped_line = line.chomp
if chomped_line.include?("<h1>") && (chomped_line.include?("appendix") || chomped_line.include?("bibliography"))
in_back_matter = true
end
if in_back_matter
file_to_write.write(chomped_line)
next
end
if chomped_line.include?("<h1>") && chomped_line.include?("Full Table of Contents")
in_toc = true
file_to_write.write(chomped_line)
next
end
if skippable(chomped_line)
file_to_write.write(chomped_line)
next
end
if !in_toc && !after_toc
file_to_write.write(chomped_line)
next
end
if chomped_line.include?("<h1>") && in_toc && !after_toc
in_toc = false
after_toc = true
end
if in_toc
if chomped_line.include?("<ul>")
prefix_stack.push(prefix)
counter_stack.push(counter)
if prefix == ""
prefix = counter.to_s
else
prefix = prefix + "." + counter.to_s
end
counter = 0
end
if chomped_line.include?("</ul>")
prefix = prefix_stack.pop
counter = counter_stack.pop
end
idx = chomped_line.index('">')
if idx.nil?
file_to_write.write(chomped_line)
next
end
counter += 1
title = chomped_line.string_between_markers('">',"</a>")
full_prefix = prefix + "." + counter.to_s + ". "
prefix_hash[hash_title(title)] = full_prefix
idx = chomped_line.index("<a")
chomped_line.insert(idx,full_prefix[2..-1])
file_to_write.write(chomped_line)
end
if after_toc
if chomped_line.include?("<h1>")
title = chomped_line.string_between_markers("</span> ",":</h1>")
idx = chomped_line.index('</span> ') #assumes a space after tag!
chomped_line.insert(idx+8,prefix_hash[hash_title(title)][2..-1])
file_to_write.write(chomped_line)
else
file_to_write.write(chomped_line)
end
end
end
end