22
22
'Collision_shape' : 'Colshape' ,
23
23
}
24
24
25
+ NAME_BLACKLIST = [
26
+ 'Matrix' ,
27
+ 'Vector'
28
+ ]
29
+
25
30
def clean_category (category_name : str ) -> str :
26
31
if category_name .endswith ("events" ):
27
32
return category_name [:- 7 ]
@@ -61,6 +66,8 @@ def parse_links(source_label: str, url: str) -> dict:
61
66
if a and a .get ("href" , "" ).startswith ("/wiki/" ):
62
67
name = a .text .strip ()
63
68
name = name .replace ("/wiki/" , "" ).split ("/" )[- 1 ]
69
+ if any (blacklist in name for blacklist in NAME_BLACKLIST ):
70
+ continue
64
71
page_url = a .get ("href" )
65
72
page_url = f"https://wiki.multitheftauto.com{ page_url } "
66
73
if name not in result [current_category ]:
@@ -134,7 +141,7 @@ def parse_examples(content_div):
134
141
return examples
135
142
136
143
137
- def parse_note_boxes (content_div ):
144
+ def parse_notes (content_div ):
138
145
note_boxes = []
139
146
140
147
# 1. Note and warning boxes use specific class names
@@ -171,16 +178,44 @@ def parse_note_boxes(content_div):
171
178
for table in content_div .find_all ("table" ):
172
179
style = table .get ("style" , "" )
173
180
if "98fb98" in style and "border-left" in style : # distinctive green border
174
- td = table .find ("td" )
175
- if td :
176
- text = td .get_text (strip = True )
181
+
182
+ rows = table .find_all ("tr" )
183
+ if not rows :
184
+ continue
185
+ # Get the second <td> of the first <tr>
186
+ cells = rows [0 ].find_all ("td" )
187
+ if len (cells ) >= 2 :
188
+ message_cell = cells [1 ]
189
+ text = message_cell .get_text (" " , strip = True )
177
190
text = text .replace ("Tip:" , "" , 1 ).strip ()
178
191
note_boxes .append ({
179
192
"type" : "tip" ,
180
193
"text" : text
181
194
})
195
+
196
+ # 3. Important Note boxes also don't have class, FFB2B2 border color
197
+ for table in content_div .find_all ("table" ):
198
+ # Ignore if it parent div has class "warning-messagebox" (because that's also the same color lol)
199
+ if "warning-messagebox" in table .parent .get ("class" , []):
200
+ continue
201
+ style = table .get ("style" , "" )
202
+ if "FFB2B2" in style and "border-left" in style :
203
+
204
+ rows = table .find_all ("tr" )
205
+ if not rows :
206
+ continue
207
+ # Get the second <td> of the first <tr>
208
+ cells = rows [0 ].find_all ("td" )
209
+ if len (cells ) >= 2 :
210
+ message_cell = cells [1 ]
211
+ text = message_cell .get_text (" " , strip = True )
212
+ text = text .replace ("Important Note:" , "" , 1 ).strip ()
213
+ note_boxes .append ({
214
+ "type" : "important" ,
215
+ "text" : text
216
+ })
182
217
183
- # 3 . "This article needs checking" boxes (purple border, distinct title)
218
+ # 4 . "This article needs checking" boxes (purple border, distinct title)
184
219
for table in content_div .find_all ("table" ):
185
220
style = table .get ("style" , "" )
186
221
if "border-left: 25px solid #8181ff" in style :
@@ -201,43 +236,63 @@ def parse_note_boxes(content_div):
201
236
"text" : text
202
237
})
203
238
204
- return note_boxes
205
-
239
+ the_notes = []
240
+ the_meta = []
241
+ for note in note_boxes :
242
+ if note ["type" ] == "note" or note ["type" ] == "tip" or note ["type" ] == "warning" or note ["type" ] == "important" :
243
+ the_notes .append ({
244
+ "type" : "info" if note ["type" ] == "note" else note ["type" ],
245
+ "content" : note ["text" ]
246
+ })
247
+ elif note ["type" ] == "needs_checking" :
248
+ the_meta .append ({
249
+ "needs_checking" : note ["text" ]
250
+ })
206
251
207
- def parse_event_page (page_url : str , category : str , name : str , source : str ) -> dict :
208
- response = requests .get (page_url )
209
- soup = BeautifulSoup (response .text , "html.parser" )
252
+ return the_notes , the_meta
210
253
211
- # Find first non-empty p inside mw-content-text
212
- content_div = soup .find ("div" , id = "mw-content-text" )
213
- if not content_div :
214
- raise ValueError (f"Could not find content in { page_url } ." )
254
+ def parse_description (content_div ):
215
255
216
- event_description = None
256
+ the_description = None
217
257
# Find the first p before a header h2 or h3
218
258
for element in content_div .find_all (["p" , "h2" , "h3" ]):
219
259
if element .name == "p" :
220
260
text = element .get_text ().strip ()
221
261
if text and not text .isspace ():
222
- event_description = convert_to_markdown (str (element ))
223
- event_description = event_description .strip ()
224
- # print(f"Found description for {name}: {event_description }")
262
+ the_description = convert_to_markdown (str (element ))
263
+ the_description = the_description .strip ()
264
+ # print(f"Found description for {name}: {the_description }")
225
265
break
226
266
elif element .name in ["h2" , "h3" ]:
227
267
# Stop at the first header
228
268
break
229
269
230
- if not event_description :
270
+ if not the_description :
231
271
# Alternatively, look for content inside a div that has style="padding: 4px 8px"
232
272
divs = content_div .find_all ("div" , style = "padding: 4px 8px" )
233
273
for div in divs :
234
274
text = div .get_text ()
235
275
if text and not text .isspace ():
236
- event_description = convert_to_markdown (str (div ))
237
- event_description = event_description .strip ()
238
- # print(f"Found description in div for {name}: {event_description }")
276
+ the_description = convert_to_markdown (str (div ))
277
+ the_description = the_description .strip ()
278
+ # print(f"Found description in div for {name}: {the_description }")
239
279
break
240
280
281
+ return the_description
282
+
283
+
284
+ def parse_event_page (page_url : str , category : str , name : str , source : str ) -> dict :
285
+ response = requests .get (page_url )
286
+ soup = BeautifulSoup (response .text , "html.parser" )
287
+
288
+ # Find first non-empty p inside mw-content-text
289
+ content_div = soup .find ("div" , id = "mw-content-text" )
290
+ if not content_div :
291
+ raise ValueError (f"Could not find content in { page_url } " )
292
+
293
+ event_type = "client" if "Client" in source else "server"
294
+
295
+ event_description = parse_description (content_div )
241
296
if event_description is None :
242
297
raise ValueError (f"Could not find a valid description for { name } in { page_url } " )
243
298
@@ -338,8 +393,6 @@ def parse_event_page(page_url: str, category: str, name: str, source: str) -> di
338
393
if len (examples ) == 0 :
339
394
print (f"Found no examples for { name } " )
340
395
341
- event_type = "client" if "Client" in source else "server"
342
-
343
396
# For each example, create a .lua file with the code
344
397
# with name eventName-index.lua
345
398
example_index = 1
@@ -360,20 +413,7 @@ def parse_event_page(page_url: str, category: str, name: str, source: str) -> di
360
413
})
361
414
example_index += 1
362
415
363
- note_boxes = parse_note_boxes (content_div )
364
- event_notes = []
365
- event_meta = []
366
- for note in note_boxes :
367
- if note ["type" ] == "note" or note ["type" ] == "tip" or note ["type" ] == "warning" :
368
- event_notes .append ({
369
- "type" : "info" if note ["type" ] == "note" else note ["type" ],
370
- "content" : note ["text" ]
371
- })
372
- elif note ["type" ] == "needs_checking" :
373
- event_meta .append ({
374
- "needs_checking" : note ["text" ]
375
- })
376
-
416
+ event_notes , event_meta = parse_notes (content_div )
377
417
378
418
yaml_dict = {
379
419
"name" : name ,
@@ -396,26 +436,51 @@ def parse_event_page(page_url: str, category: str, name: str, source: str) -> di
396
436
397
437
return yaml_dict
398
438
399
- def parse_function_page (page_url : str , category : str , name : str , source : str ) -> str :
400
- if source .startswith ("Shared" ):
401
- yaml_content = "shared: &shared\n "
402
- yaml_content += f" incomplete: true\n "
403
- yaml_content += f" name: { name } \n "
404
- yaml_content += f" description: TODO\n "
405
- yaml_content += "\n server:\n <<: *shared"
406
- yaml_content += "\n client:\n <<: *shared"
407
- elif source .startswith ("Server" ):
408
- yaml_content = "server:\n "
409
- yaml_content += f" incomplete: true\n "
410
- yaml_content += f" name: { name } \n "
411
- yaml_content += f" description: TODO\n "
412
- elif source .startswith ("Client" ):
413
- yaml_content = "client:\n "
414
- yaml_content += f" incomplete: true\n "
415
- yaml_content += f" name: { name } \n "
416
- yaml_content += f" description: TODO\n "
417
-
418
- return yaml_content
439
+ def parse_function_page (page_url : str , category : str , name : str , source : str ) -> dict :
440
+ response = requests .get (page_url )
441
+ soup = BeautifulSoup (response .text , "html.parser" )
442
+ content_div = soup .find ("div" , id = "mw-content-text" )
443
+ if not content_div :
444
+ raise ValueError (f"Could not find content in { page_url } " )
445
+
446
+ func_type = "shared" if "Shared" in source else "server" if "Server" in source else "client"
447
+
448
+ func_description = parse_description (content_div )
449
+ if func_description is None :
450
+ raise ValueError (f"Could not find a valid description for { name } in { page_url } " )
451
+
452
+ func_notes , func_meta = parse_notes (content_div )
453
+
454
+ yaml_dict = {
455
+ func_type : {
456
+ "name" : name ,
457
+ "description" : func_description ,
458
+ "parameters" : [],
459
+ "examples" : [],
460
+ "notes" : func_notes ,
461
+ "meta" : func_meta
462
+ }
463
+ }
464
+
465
+ # if source.startswith("Shared"):
466
+ # yaml_content = "shared: &shared\n"
467
+ # yaml_content += f" incomplete: true\n"
468
+ # yaml_content += f" name: {name}\n"
469
+ # yaml_content += f" description: TODO\n"
470
+ # yaml_content += "\nserver:\n <<: *shared"
471
+ # yaml_content += "\nclient:\n <<: *shared"
472
+ # elif source.startswith("Server"):
473
+ # yaml_content = "server:\n"
474
+ # yaml_content += f" incomplete: true\n"
475
+ # yaml_content += f" name: {name}\n"
476
+ # yaml_content += f" description: TODO\n"
477
+ # elif source.startswith("Client"):
478
+ # yaml_content = "client:\n"
479
+ # yaml_content += f" incomplete: true\n"
480
+ # yaml_content += f" name: {name}\n"
481
+ # yaml_content += f" description: TODO\n"
482
+
483
+ return yaml_dict
419
484
420
485
def convert_page_to_yaml (page_url : str , category : str , name : str , source : str ) -> str :
421
486
# This scrapes the page and tries to parse the MediaWiki content into a YAML format for the function/event
@@ -426,17 +491,17 @@ def convert_page_to_yaml(page_url: str, category: str, name: str, source: str) -
426
491
raise ValueError ("Source must be either a function or an event." )
427
492
428
493
if is_event :
429
- yaml_content = yaml .safe_dump (parse_event_page (page_url , category , name , source ),
430
- sort_keys = False ,
431
- allow_unicode = True ,
432
- default_flow_style = False )
494
+ yaml_dict = parse_event_page (page_url , category , name , source )
433
495
434
496
elif is_function :
435
- yaml_content = parse_function_page (page_url , category , name , source )
436
-
437
- return yaml_content
497
+ yaml_dict = parse_function_page (page_url , category , name , source )
498
+
499
+ return yaml .safe_dump (yaml_dict ,
500
+ sort_keys = False ,
501
+ allow_unicode = True ,
502
+ default_flow_style = False )
438
503
439
- def write_yaml_per_entry (base_dir , data_by_source ):
504
+ def parse_items_by_source (base_dir , data_by_source ):
440
505
for source , categories in data_by_source .items ():
441
506
print (f"Parsing individual pages of { source } ..." )
442
507
for category , entries in categories .items ():
@@ -464,9 +529,16 @@ def main():
464
529
events_by_source = {}
465
530
466
531
# Functions
532
+ # functions_by_source["Shared functions"] = parse_links("Shared functions", URL_SHARED_FUNCS)
467
533
# functions_by_source["Client functions"] = parse_links("Client functions", URL_CLIENT_FUNCS)
468
534
# functions_by_source["Server functions"] = parse_links("Server functions", URL_SERVER_FUNCS)
469
- # functions_by_source["Shared functions"] = parse_links("Shared functions", URL_SHARED_FUNCS)
535
+
536
+ # TEST Parse only these:
537
+ # functions_by_source["Shared functions"] = {
538
+ # "Element": [
539
+ # ("https://wiki.multitheftauto.com/wiki/SetElementParent", "setElementParent"),
540
+ # ]
541
+ # }
470
542
471
543
# Events
472
544
events_by_source ["Client events" ] = parse_links ("Client events" , URL_CLIENT_EVENTS )
@@ -476,8 +548,8 @@ def main():
476
548
if os .path .exists ("./output" ):
477
549
shutil .rmtree ("./output" )
478
550
479
- write_yaml_per_entry (FUNCTIONS_DIR , functions_by_source )
480
- write_yaml_per_entry (EVENTS_DIR , events_by_source )
551
+ parse_items_by_source (FUNCTIONS_DIR , functions_by_source )
552
+ parse_items_by_source (EVENTS_DIR , events_by_source )
481
553
482
554
if __name__ == "__main__" :
483
555
main ()
0 commit comments