1
+ # Comments:
2
+ # - Running without skipping cache for the 1st time will take a while, but subsequent runs will be much faster
3
+ # - Parsing functions takes a lot longer than events because there's a lot more
4
+
1
5
import requests
2
6
from bs4 import BeautifulSoup
3
7
from html_to_markdown import convert_to_markdown
4
8
import yaml
5
9
10
+ import time
6
11
import os
7
12
import shutil
8
13
9
- # 🌐 URL constants
14
+ # Cache of event/function Wiki pages
15
+ SKIP_CACHE = False # Set to True to skip cache and always fetch fresh pages
16
+ PAGES_CACHE_DIR = "./cache/pages"
17
+
18
+ # Function listings URLs
10
19
URL_CLIENT_FUNCS = "https://wiki.multitheftauto.com/wiki/Client_Scripting_Functions"
11
20
URL_SERVER_FUNCS = "https://wiki.multitheftauto.com/wiki/Server_Scripting_Functions"
12
21
URL_SHARED_FUNCS = "https://wiki.multitheftauto.com/wiki/Shared_Scripting_Functions"
13
22
23
+ # Event listings URLs
14
24
URL_CLIENT_EVENTS = "https://wiki.multitheftauto.com/wiki/Client_Scripting_Events"
15
25
URL_SERVER_EVENTS = "https://wiki.multitheftauto.com/wiki/Server_Scripting_Events"
16
26
27
+ # Output directories
17
28
FUNCTIONS_DIR = "./output/functions"
18
29
EVENTS_DIR = "./output/events"
19
30
31
+ # Rename some categories
20
32
CATEGORY_CORRECTIONS = {
21
33
'SQL' : 'Database' ,
22
34
'Collision_shape' : 'Colshape' ,
23
35
}
24
36
37
+ # Don't include these items from the listings
25
38
NAME_BLACKLIST = [
26
39
'Matrix' ,
27
40
'Vector'
@@ -41,7 +54,7 @@ def fix_category(category_name: str) -> str:
41
54
return category_name
42
55
43
56
def parse_links (source_label : str , url : str ) -> dict :
44
- print (f"Parsing list of { source_label } from { url } ..." )
57
+ print (f"Parsing list of { source_label } ..." )
45
58
46
59
response = requests .get (url )
47
60
soup = BeautifulSoup (response .text , "html.parser" )
@@ -279,11 +292,27 @@ def parse_description(content_div):
279
292
break
280
293
281
294
return the_description
282
-
295
+
296
+ def get_page_from_cache_or_fetch (page_url : str , page_name : str ) -> str :
297
+ """Get the page content from cache or fetch it if not cached."""
298
+ cache_file = os .path .join (PAGES_CACHE_DIR , f"{ page_name } .html" )
299
+ if (not SKIP_CACHE ) and os .path .exists (cache_file ):
300
+ with open (cache_file , "r" , encoding = "utf-8" ) as f :
301
+ return f .read ()
302
+ else :
303
+ # Fetch and cache the page
304
+ response = requests .get (page_url )
305
+ if response .status_code == 200 :
306
+ with open (cache_file , "w" , encoding = "utf-8" ) as f :
307
+ f .write (response .text )
308
+ return response .text
309
+ else :
310
+ raise ValueError (f"Failed to fetch { page_url } : { response .status_code } " )
283
311
284
312
def parse_event_page (page_url : str , category : str , name : str , source : str ) -> dict :
285
- response = requests .get (page_url )
286
- soup = BeautifulSoup (response .text , "html.parser" )
313
+ response_text = get_page_from_cache_or_fetch (page_url , name )
314
+
315
+ soup = BeautifulSoup (response_text , "html.parser" )
287
316
288
317
# Find first non-empty p inside mw-content-text
289
318
content_div = soup .find ("div" , id = "mw-content-text" )
@@ -391,7 +420,7 @@ def parse_event_page(page_url: str, category: str, name: str, source: str) -> di
391
420
# Examples
392
421
examples = parse_examples (content_div )
393
422
if len (examples ) == 0 :
394
- print (f"Found no examples for { name } " )
423
+ print (f"Event is missing code examples: { page_url } " )
395
424
396
425
# For each example, create a .lua file with the code
397
426
# with name eventName-index.lua
@@ -409,7 +438,7 @@ def parse_event_page(page_url: str, category: str, name: str, source: str) -> di
409
438
added_examples .append ({
410
439
"path" : 'examples/' + example_filename ,
411
440
"description" : example_description ,
412
- "side" : example .get ("type" ) or event_type # Default to event_type if not specified
441
+ "side" : example .get ("type" ) or event_type # Default to this if not specified
413
442
})
414
443
example_index += 1
415
444
@@ -437,8 +466,9 @@ def parse_event_page(page_url: str, category: str, name: str, source: str) -> di
437
466
return yaml_dict
438
467
439
468
def parse_function_page (page_url : str , category : str , name : str , source : str ) -> dict :
440
- response = requests .get (page_url )
441
- soup = BeautifulSoup (response .text , "html.parser" )
469
+ response_text = get_page_from_cache_or_fetch (page_url , name )
470
+
471
+ soup = BeautifulSoup (response_text , "html.parser" )
442
472
content_div = soup .find ("div" , id = "mw-content-text" )
443
473
if not content_div :
444
474
raise ValueError (f"Could not find content in { page_url } " )
@@ -450,13 +480,41 @@ def parse_function_page(page_url: str, category: str, name: str, source: str) ->
450
480
raise ValueError (f"Could not find a valid description for { name } in { page_url } " )
451
481
452
482
func_notes , func_meta = parse_notes (content_div )
483
+
484
+
485
+ # Examples
486
+ examples = parse_examples (content_div )
487
+ # if len(examples) == 0:
488
+ # print(f"Function is missing code examples: {page_url}")
489
+
490
+
491
+ # For each example, create a .lua file with the code
492
+ # with name eventName-index.lua
493
+ example_index = 1
494
+ added_examples = []
495
+ for example in examples :
496
+ example_code = example .get ("code" , "" ).strip ()
497
+ if example_code :
498
+ example_filename = f"{ name } -{ example_index } .lua"
499
+ example_path = os .path .join (FUNCTIONS_DIR , category , 'examples' , example_filename )
500
+ os .makedirs (os .path .dirname (example_path ), exist_ok = True )
501
+ with open (example_path , "w" , encoding = "utf-8" ) as example_file :
502
+ example_file .write (example_code )
503
+ example_description = example .get ("description" , "" ).strip ()
504
+ added_examples .append ({
505
+ "path" : 'examples/' + example_filename ,
506
+ "description" : example_description ,
507
+ "side" : example .get ("type" ) or func_type # Default to this if not specified
508
+ })
509
+ example_index += 1
510
+
453
511
454
512
yaml_dict = {
455
513
func_type : {
456
514
"name" : name ,
457
515
"description" : func_description ,
458
516
"parameters" : [],
459
- "examples" : [] ,
517
+ "examples" : added_examples ,
460
518
"notes" : func_notes ,
461
519
"meta" : func_meta
462
520
}
@@ -503,6 +561,7 @@ def convert_page_to_yaml(page_url: str, category: str, name: str, source: str) -
503
561
504
562
def parse_items_by_source (base_dir , data_by_source ):
505
563
for source , categories in data_by_source .items ():
564
+ started_at = time .time ()
506
565
print (f"Parsing individual pages of { source } ..." )
507
566
for category , entries in categories .items ():
508
567
dir_path = os .path .join (base_dir , category )
@@ -522,16 +581,21 @@ def parse_items_by_source(base_dir, data_by_source):
522
581
if os .path .exists (filename ):
523
582
os .remove (filename )
524
583
525
- print (f"YAML & Lua files for { source } written successfully to { base_dir } ." )
584
+ print (f">> Parsed individual pages of { source } in { time . time () - started_at :.2f } seconds ." )
526
585
527
586
def main ():
587
+ # Create cache directory if it doesn't exist
588
+ if not os .path .exists (PAGES_CACHE_DIR ):
589
+ os .makedirs (PAGES_CACHE_DIR )
590
+ print ("SKIP_CACHE is set to" , SKIP_CACHE )
591
+
528
592
functions_by_source = {}
529
593
events_by_source = {}
530
594
531
595
# Functions
532
- # functions_by_source["Shared functions"] = parse_links("Shared functions", URL_SHARED_FUNCS)
533
- # functions_by_source["Client functions"] = parse_links("Client functions", URL_CLIENT_FUNCS)
534
- # functions_by_source["Server functions"] = parse_links("Server functions", URL_SERVER_FUNCS)
596
+ functions_by_source ["Shared functions" ] = parse_links ("Shared functions" , URL_SHARED_FUNCS )
597
+ functions_by_source ["Client functions" ] = parse_links ("Client functions" , URL_CLIENT_FUNCS )
598
+ functions_by_source ["Server functions" ] = parse_links ("Server functions" , URL_SERVER_FUNCS )
535
599
536
600
# TEST Parse only these:
537
601
# functions_by_source["Shared functions"] = {
@@ -541,8 +605,8 @@ def main():
541
605
# }
542
606
543
607
# Events
544
- events_by_source ["Client events" ] = parse_links ("Client events" , URL_CLIENT_EVENTS )
545
- events_by_source ["Server events" ] = parse_links ("Server events" , URL_SERVER_EVENTS )
608
+ # events_by_source["Client events"] = parse_links("Client events", URL_CLIENT_EVENTS)
609
+ # events_by_source["Server events"] = parse_links("Server events", URL_SERVER_EVENTS)
546
610
547
611
# Empty output directory
548
612
if os .path .exists ("./output" ):
0 commit comments