forked from fmhy/bookmarks
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmake_fmhy_bookmarks.py
270 lines (219 loc) ยท 11.2 KB
/
make_fmhy_bookmarks.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
import requests
def addPretext(lines, sectionName, baseURL, subURL):
modified_lines = []
currMdSubheading = ""
currSubCat = ""
currSubSubCat = ""
#Remove from the lines any line that isnt a heading and doesnt contain the character `โญ`
#lines = [line for line in lines if line.startswith("#") or 'โญ' in line]
#Parse headings
for line in lines:
if line.startswith("#"): #Title Lines
if not subURL=="storage":
if line.startswith("# โบ"):
currMdSubheading = "#" + line.replace("# โบ", "").strip().replace(" / ", "-").replace(" ", "-").lower()
currSubCat = line.replace("# โบ", "").strip()
currSubSubCat = "/"
elif line.startswith("## โท"):
if not subURL=="non-english": #Because non-eng section has multiple subsubcats with same names
currMdSubheading = "#" + line.replace("## โท", "").strip().replace(" / ", "-").replace(" ", "-").lower()
currSubSubCat = line.replace("## โท", "").strip()
elif subURL=="storage":
if line.startswith("## "):
currMdSubheading = "#" + line.replace("## ", "").strip().replace(" / ", "-").replace(" ", "-").lower()
currSubCat = line.replace("## ", "").strip()
currSubSubCat = "/"
elif line.startswith("### "):
currMdSubheading = "#" + line.replace("### ", "").strip().replace(" / ", "-").replace(" ", "-").lower()
currSubSubCat = line.replace("### ", "").strip()
# Remove links from subcategory titles (because the screw the format)
if 'http' in currSubCat: currSubCat = ''
if 'http' in currSubSubCat: currSubSubCat = ''
elif any(char.isalpha() for char in line): #If line has content
preText = f"{{\"{sectionName.replace(".md", "")}\", \"{currSubCat}\", \"{currSubSubCat}\"}}"
if line.startswith("* "): line = line[2:]
modified_lines.append(preText + line)
return modified_lines
#----------------base64 page processing------------
import base64
import re
doBase64Decoding = True
def fix_base64_string(encoded_string):
missing_padding = len(encoded_string) % 4
if missing_padding != 0:
encoded_string += '=' * (4 - missing_padding)
return encoded_string
def decode_base64_in_backticks(input_string):
def base64_decode(match):
encoded_data = match.group(0)[1:-1] # Extract content within backticks
decoded_bytes = base64.b64decode( fix_base64_string(encoded_data) )
return decoded_bytes.decode()
pattern = r"`[^`]+`" # Regex pattern to find substrings within backticks
decoded_string = re.sub(pattern, base64_decode, input_string)
return decoded_string
def remove_empty_lines(text):
lines = text.split('\n') # Split the text into lines
non_empty_lines = [line for line in lines if line.strip()] # Filter out empty lines
return '\n'.join(non_empty_lines) # Join non-empty lines back together
def extract_base64_sections(base64_page):
sections = base64_page.split("***") # Split the input string by "***" to get sections
formatted_sections = []
for section in sections:
formatted_section = remove_empty_lines( section.strip().replace("#### ", "").replace("\n\n", " - ").replace("\n", ", ") )
if doBase64Decoding: formatted_section = decode_base64_in_backticks(formatted_section)
formatted_section = '[๐Base64](https://rentry.co/FMHYBase64) โบ ' + formatted_section
formatted_sections.append(formatted_section)
lines = formatted_sections
return lines
#----------------</end>base64 page processing------------
def dlWikiChunk(fileName, icon, redditSubURL):
#first, try to get the chunk locally
try:
#First, try to get it from the local file
print("Loading " + fileName + " from local file...")
with open(fileName.lower(), 'r') as f:
page = f.read()
print("Loaded.\n")
#if not available locally, download the chunk
except:
if not fileName=='base64.md':
print("Local file not found. Downloading " + fileName + " from Github...")
page = requests.get("https://raw.githubusercontent.com/fmhy/FMHYedit/main/docs/" + fileName.lower()).text
elif fileName=='base64.md':
print("Local file not found. Downloading rentry.co/FMHYBase64...")
page = requests.get("https://rentry.co/FMHYBase64/raw").text.replace("\r", "")
print("Downloaded")
#add a pretext
redditBaseURL = "https://www.reddit.com/r/FREEMEDIAHECKYEAH/wiki/"
siteBaseURL = "https://fmhy.net/"
if not fileName=='base64.md':
pagesDevSiteSubURL = fileName.replace(".md", "").lower()
subURL = pagesDevSiteSubURL
lines = page.split('\n')
lines = addPretext(lines, fileName, siteBaseURL, subURL)
elif fileName=='base64.md':
lines = extract_base64_sections(page)
return lines
def cleanLineForSearchMatchChecks(line):
siteBaseURL = "https://fmhy.net/"
redditBaseURL = "https://www.reddit.com/r/FREEMEDIAHECKYEAH/wiki/"
return line.replace(redditBaseURL, '/').replace(siteBaseURL, '/')
def alternativeWikiIndexing():
wikiChunks = [
dlWikiChunk("VideoPiracyGuide.md", "๐บ", "video"),
dlWikiChunk("AI.md", "๐ค", "ai"),
dlWikiChunk("Android-iOSGuide.md", "๐ฑ", "android"),
dlWikiChunk("AudioPiracyGuide.md", "๐ต", "audio"),
dlWikiChunk("DownloadPiracyGuide.md", "๐พ", "download"),
dlWikiChunk("EDUPiracyGuide.md", "๐ง ", "edu"),
dlWikiChunk("GamingPiracyGuide.md", "๐ฎ", "games"),
dlWikiChunk("AdblockVPNGuide.md", "๐", "adblock-vpn-privacy"),
dlWikiChunk("System-Tools.md", "๐ป", "system-tools"),
dlWikiChunk("File-Tools.md", "๐๏ธ", "file-tools"),
dlWikiChunk("Internet-Tools.md", "๐", "internet-tools"),
dlWikiChunk("Social-Media-Tools.md", "๐ฌ", "social-media"),
dlWikiChunk("Text-Tools.md", "๐", "text-tools"),
dlWikiChunk("Video-Tools.md", "๐ผ", "video-tools"),
dlWikiChunk("MISCGuide.md", "๐", "misc"),
dlWikiChunk("ReadingPiracyGuide.md", "๐", "reading"),
dlWikiChunk("TorrentPiracyGuide.md", "๐", "torrent"),
dlWikiChunk("img-tools.md", "๐ท", "img-tools"),
dlWikiChunk("gaming-tools.md", "๐พ", "gaming-tools"),
dlWikiChunk("LinuxGuide.md", "๐ง๐", "linux"),
dlWikiChunk("DEVTools.md", "๐ฅ๏ธ", "dev-tools"),
dlWikiChunk("Non-English.md", "๐", "non-eng"),
dlWikiChunk("STORAGE.md", "๐๏ธ", "storage"),
#dlWikiChunk("base64.md", "๐", "base64"),
dlWikiChunk("NSFWPiracy.md", "๐ถ", "https://saidit.net/s/freemediafuckyeah/wiki/index")
]
return [item for sublist in wikiChunks for item in sublist] #Flatten a <list of lists of strings> into a <list of strings>
#--------------------------------
# Save the result of alternativeWikiIndexing to a .md file
# with open('wiki_adapted.md', 'w') as f:
# for line in alternativeWikiIndexing():
# f.write(line + '\n')
# Instead of saving it to a file, save it into a string variable
wiki_adapted_md = '\n'.join(alternativeWikiIndexing())
# Remove from the lines in wiki_adapted_md any line that doesnt contain the character `โญ`
wiki_adapted_starred_only_md = '\n'.join([line for line in wiki_adapted_md.split('\n') if 'โญ' in line])
import re
def markdown_to_html_bookmarks(input_md_text, output_file):
# Predefined folder name
folder_name = "FMHY"
# Read the input markdown file
#with open(input_file, 'r', encoding='utf-8') as f:
# markdown_content = f.read()
# Instead of reading from a file, read from a string variable
markdown_content = input_md_text
# Regex pattern to extract URLs and titles from markdown
url_pattern = re.compile(r'\[([^\]]+)\]\((https?://[^\)]+)\)')
# Regex pattern to extract hierarchy levels
hierarchy_pattern = re.compile(r'^\{"([^"]+)", "([^"]+)", "([^"]+)"\}')
# Dictionary to hold bookmarks by hierarchy
bookmarks = {}
# Split the content by lines
lines = markdown_content.split('\n')
# Parse each line
for line in lines:
# Find hierarchy levels
hierarchy_match = hierarchy_pattern.match(line)
if not hierarchy_match:
continue
level1, level2, level3 = hierarchy_match.groups()
# Initialize nested dictionaries for hierarchy levels
if level1 not in bookmarks:
bookmarks[level1] = {}
if level2 not in bookmarks[level1]:
bookmarks[level1][level2] = {}
if level3 not in bookmarks[level1][level2]:
bookmarks[level1][level2][level3] = []
# Find all matches in the line for URLs
matches = url_pattern.findall(line)
# If the input_md_text is wiki_adapted_starred_only_md, only add the first match of url_pattern in each line
if input_md_text == wiki_adapted_starred_only_md:
matches = matches[:1]
# Extract the description (text after the last match)
last_match_end = line.rfind(')')
description = line[last_match_end+1:].replace('**', '').strip() if last_match_end != -1 else ''
# When the description is empty, use as description the lowest hierachy level that is not empty
if not description:
description = '- ' + (level3 if level3 != '/' else level2 if level2 else level1)
# Add matches to the appropriate hierarchy
for title, url in matches:
full_title = f"{title} {description}" if description else title
bookmarks[level1][level2][level3].append((full_title, url))
# Function to generate HTML from nested dictionary
def generate_html(bookmarks_dict, indent=1):
html = ''
for key, value in bookmarks_dict.items():
html += ' ' * indent + f'<DT><H3>{key}</H3>\n'
html += ' ' * indent + '<DL><p>\n'
if isinstance(value, dict):
html += generate_html(value, indent + 1)
else:
for full_title, url in value:
html += ' ' * (indent + 1) + f'<DT><A HREF="{url}" ADD_DATE="0">{full_title}</A>\n'
html += ' ' * indent + '</DL><p>\n'
return html
# HTML structure
html_content = '''<!DOCTYPE NETSCAPE-Bookmark-file-1>
<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=UTF-8">
<TITLE>Bookmarks</TITLE>
<H1>Bookmarks</H1>
<DL><p>
'''
# Add the main folder
html_content += f' <DT><H3>{folder_name}</H3>\n'
html_content += ' <DL><p>\n'
# Add bookmarks to HTML content
html_content += generate_html(bookmarks)
html_content += ' </DL><p>\n'
html_content += '</DL><p>\n'
# Write the HTML content to the output file
with open(output_file, 'w', encoding='utf-8') as f:
f.write(html_content)
# Print success message
#print(f'Successfully created bookmarks in {output_file}')
# Example usage:
markdown_to_html_bookmarks(wiki_adapted_md, 'fmhy_in_bookmarks.html')
markdown_to_html_bookmarks(wiki_adapted_starred_only_md, 'fmhy_in_bookmarks_starred_only.html')