@@ -293,6 +293,26 @@ def parse_description(content_div):
293
293
294
294
return the_description
295
295
296
+ def parse_issues (content_div ):
297
+ issues = []
298
+ for a in content_div .find_all ("a" , href = True ):
299
+ # Ignore if parent div has class "note-messagebox"
300
+ if "note-messagebox" in a .find_parent ("div" ).get ("class" , []):
301
+ continue
302
+ href = a ["href" ]
303
+ if "github.com/multitheftauto/mtasa-blue/issues/" in href :
304
+ issue_number = href .split ("/" )[- 1 ]
305
+ # Find the next td, that is the description
306
+ issue_desc = "TODO"
307
+ next_td = a .find_next ("td" )
308
+ if next_td :
309
+ issue_desc = next_td .get_text (strip = True )
310
+ issues .append ({
311
+ "id" : issue_number ,
312
+ "description" : issue_desc
313
+ })
314
+ return issues
315
+
296
316
def get_page_from_cache_or_fetch (page_url : str , page_name : str ) -> str :
297
317
"""Get the page content from cache or fetch it if not cached."""
298
318
cache_file = os .path .join (PAGES_CACHE_DIR , f"{ page_name } .html" )
@@ -309,6 +329,31 @@ def get_page_from_cache_or_fetch(page_url: str, page_name: str) -> str:
309
329
else :
310
330
raise ValueError (f"Failed to fetch { page_url } : { response .status_code } " )
311
331
332
+ def print_additional_headers_found_in_page (content_div , handled_header_names , page_url ):
333
+ """Print any additional headers found in the content_div that were not handled."""
334
+ additional_headers = []
335
+ # Ignore headers from see also
336
+ IGNORE_WORDS = [
337
+ "see also" , "events" , "functions" , "changelog" ,
338
+ "game processing order" , "input" , "gui" ,
339
+ "browsers" , "buttons" , "checkboxes" , "comboboxes" ,
340
+ "edit boxes" , "gridlists" , "memos" , "progressbars" , "radio buttons" ,
341
+ "scrollbars" , "scrollpanes" , "static images" , "tab Panels" , "tabs" ,
342
+ "tab panels" , "text labels" , "windows"
343
+ ]
344
+ for header in content_div .find_all (["h2" , "h3" ]):
345
+ header_text = header .get_text (strip = True )
346
+ if header_text and header_text not in handled_header_names :
347
+ header_text_lower = header_text .lower ()
348
+ # Ignore some headers that are not relevant
349
+ if any (ignore_word in header_text_lower for ignore_word in IGNORE_WORDS ):
350
+ continue
351
+ additional_headers .append (header_text )
352
+
353
+ if additional_headers :
354
+ print (f"Other headers found in { page_url } :" )
355
+ print (f" { ', ' .join (additional_headers )} " )
356
+
312
357
def parse_event_page (page_url : str , category : str , name : str , source : str ) -> dict :
313
358
response_text = get_page_from_cache_or_fetch (page_url , name )
314
359
@@ -318,6 +363,10 @@ def parse_event_page(page_url: str, category: str, name: str, source: str) -> di
318
363
content_div = soup .find ("div" , id = "mw-content-text" )
319
364
if not content_div :
320
365
raise ValueError (f"Could not find content in { page_url } " )
366
+
367
+ stop_if_deprecated (content_div , page_url )
368
+
369
+ handled_header_names = []
321
370
322
371
event_type = "client" if "Client" in source else "server"
323
372
@@ -330,6 +379,7 @@ def parse_event_page(page_url: str, category: str, name: str, source: str) -> di
330
379
parameters_header = content_div .find ("span" , id = "Parameters" )
331
380
332
381
if parameters_header :
382
+ handled_header_names .append ("Parameters" )
333
383
params = []
334
384
next_element = parameters_header .find_next ()
335
385
@@ -393,6 +443,7 @@ def parse_event_page(page_url: str, category: str, name: str, source: str) -> di
393
443
event_source = None
394
444
source_header = content_div .find ("span" , id = "Source" )
395
445
if source_header :
446
+ handled_header_names .append ("Source" )
396
447
source_paragraph = source_header .find_next ("p" )
397
448
if source_paragraph :
398
449
source_text = source_paragraph .get_text ().strip ()
@@ -409,6 +460,7 @@ def parse_event_page(page_url: str, category: str, name: str, source: str) -> di
409
460
event_canceling = None
410
461
canceling_header = content_div .find ("span" , id = "Canceling" ) or content_div .find ("span" , id = "Cancelling" ) or content_div .find ("span" , id = "Cancel_effect" ) or content_div .find ("span" , id = "Cancel_effects" ) or content_div .find ("span" , id = "Cancel_Effect" ) or content_div .find ("span" , id = "Cancel_Effects" )
411
462
if canceling_header :
463
+ handled_header_names .append (canceling_header .text .strip ())
412
464
# Extract text
413
465
canceling_paragraph = canceling_header .find_next ("p" )
414
466
if canceling_paragraph :
@@ -419,11 +471,11 @@ def parse_event_page(page_url: str, category: str, name: str, source: str) -> di
419
471
420
472
# Examples
421
473
examples = parse_examples (content_div )
474
+ handled_header_names .append ("Examples" )
475
+ handled_header_names .append ("Example" )
422
476
if len (examples ) == 0 :
423
477
print (f"Event is missing code examples: { page_url } " )
424
478
425
- # For each example, create a .lua file with the code
426
- # with name eventName-index.lua
427
479
example_index = 1
428
480
added_examples = []
429
481
for example in examples :
@@ -444,6 +496,35 @@ def parse_event_page(page_url: str, category: str, name: str, source: str) -> di
444
496
445
497
event_notes , event_meta = parse_notes (content_div )
446
498
499
+ # Parse Type section, put it into a note
500
+ type_header = content_div .find ("span" , id = "Type" )
501
+ if type_header :
502
+ type_paragraph = type_header .find_next ("p" )
503
+ if type_paragraph :
504
+ type_text = type_paragraph .get_text ().strip ()
505
+ if type_text :
506
+ # Remove new lines from the type text
507
+ type_text = type_text .replace ("\n " , " " )
508
+ # Look for any list after that paragraph
509
+ list_items = type_paragraph .find_next ("ul" )
510
+ if list_items :
511
+ prev_header = type_paragraph .find_previous ("h2" ) or type_paragraph .find_previous ("h3" )
512
+ if prev_header and prev_header .getText (strip = True ) == "Type" :
513
+ # If the header is "Type", we can safely add the list items to the type text
514
+ type_text += " " + ", " .join (li .get_text (strip = True ) for li in list_items .find_all ("li" ))
515
+
516
+ event_notes .append ({
517
+ "type" : "info" ,
518
+ "content" : type_text
519
+ })
520
+ handled_header_names .append ("Type" )
521
+
522
+ # Parse Issues
523
+ event_issues = parse_issues (content_div )
524
+ handled_header_names .append ("Issues" )
525
+
526
+ print_additional_headers_found_in_page (content_div , handled_header_names , page_url )
527
+
447
528
yaml_dict = {
448
529
"name" : name ,
449
530
"type" : event_type ,
@@ -458,38 +539,72 @@ def parse_event_page(page_url: str, category: str, name: str, source: str) -> di
458
539
yaml_dict ["notes" ] = event_notes
459
540
if event_meta :
460
541
yaml_dict ["meta" ] = event_meta
542
+ if event_issues :
543
+ yaml_dict ["issues" ] = event_issues
461
544
462
545
# Set incomplete to true if no description is found for at least one parameter
463
546
if any (param ["description" ] == "MISSING_PARAM_DESC" for param in event_parameters ):
464
547
yaml_dict ["incomplete" ] = True
465
548
466
549
return yaml_dict
467
550
551
+ def stop_if_deprecated (content_div , page_url : str ):
552
+ deprecated_texts = [
553
+ "This function is deprecated" ,
554
+ "Function has been disabled" ,
555
+ "This function is provided by the external" ,
556
+ "This page is marked for deletion"
557
+ # "BEFORE VERSION",
558
+ ]
559
+ for text in deprecated_texts :
560
+ if content_div .find (string = lambda s : s and text in s ):
561
+ raise ValueError (f"Found { text } in { page_url } . Please review manually." )
562
+
468
563
def parse_function_page (page_url : str , category : str , name : str , source : str ) -> dict :
469
564
response_text = get_page_from_cache_or_fetch (page_url , name )
470
565
471
566
soup = BeautifulSoup (response_text , "html.parser" )
472
567
content_div = soup .find ("div" , id = "mw-content-text" )
473
568
if not content_div :
474
569
raise ValueError (f"Could not find content in { page_url } " )
570
+
571
+ stop_if_deprecated (content_div , page_url )
572
+
573
+ handled_header_names = []
475
574
476
575
func_type = "shared" if "Shared" in source else "server" if "Server" in source else "client"
477
576
478
577
func_description = parse_description (content_div )
479
578
if func_description is None :
480
579
raise ValueError (f"Could not find a valid description for { name } in { page_url } " )
481
580
482
- func_notes , func_meta = parse_notes (content_div )
581
+ func_pair = None
582
+ counterpart_b = content_div .find ("b" , string = "Counterpart" )
583
+ if counterpart_b :
584
+ i_tag = counterpart_b .find_next ("i" )
585
+ if i_tag and i_tag .a :
586
+ func_pair = i_tag .a .text .strip ()
483
587
588
+ func_notes , func_meta = parse_notes (content_div )
589
+
590
+ # Syntax: parameters and returns TODO
591
+ handled_header_names .append ("Syntax" )
592
+ handled_header_names .append ("Parameters" )
593
+ handled_header_names .append ("Arguments" )
594
+ handled_header_names .append ("Required Arguments" )
595
+ handled_header_names .append ("Required arguments" )
596
+ handled_header_names .append ("Optional Arguments" )
597
+ handled_header_names .append ("Optional arguments" )
598
+ handled_header_names .append ("Returns" )
599
+
484
600
485
601
# Examples
486
602
examples = parse_examples (content_div )
603
+ handled_header_names .append ("Examples" )
604
+ handled_header_names .append ("Example" )
487
605
# if len(examples) == 0:
488
606
# print(f"Function is missing code examples: {page_url}")
489
607
490
-
491
- # For each example, create a .lua file with the code
492
- # with name eventName-index.lua
493
608
example_index = 1
494
609
added_examples = []
495
610
for example in examples :
@@ -508,35 +623,28 @@ def parse_function_page(page_url: str, category: str, name: str, source: str) ->
508
623
})
509
624
example_index += 1
510
625
626
+ # Parse Issues
627
+ func_issues = parse_issues (content_div )
628
+ handled_header_names .append ("Issues" )
629
+
630
+ print_additional_headers_found_in_page (content_div , handled_header_names , page_url )
511
631
512
632
yaml_dict = {
513
633
func_type : {
514
634
"name" : name ,
515
635
"description" : func_description ,
516
636
"parameters" : [],
517
637
"examples" : added_examples ,
518
- "notes" : func_notes ,
519
- "meta" : func_meta
520
638
}
521
639
}
522
-
523
- # if source.startswith("Shared"):
524
- # yaml_content = "shared: &shared\n"
525
- # yaml_content += f" incomplete: true\n"
526
- # yaml_content += f" name: {name}\n"
527
- # yaml_content += f" description: TODO\n"
528
- # yaml_content += "\nserver:\n <<: *shared"
529
- # yaml_content += "\nclient:\n <<: *shared"
530
- # elif source.startswith("Server"):
531
- # yaml_content = "server:\n"
532
- # yaml_content += f" incomplete: true\n"
533
- # yaml_content += f" name: {name}\n"
534
- # yaml_content += f" description: TODO\n"
535
- # elif source.startswith("Client"):
536
- # yaml_content = "client:\n"
537
- # yaml_content += f" incomplete: true\n"
538
- # yaml_content += f" name: {name}\n"
539
- # yaml_content += f" description: TODO\n"
640
+ if func_pair :
641
+ yaml_dict [func_type ]["pair" ] = func_pair
642
+ if func_notes :
643
+ yaml_dict [func_type ]["notes" ] = func_notes
644
+ if func_meta :
645
+ yaml_dict [func_type ]["meta" ] = func_meta
646
+ if func_issues :
647
+ yaml_dict [func_type ]["issues" ] = func_issues
540
648
541
649
return yaml_dict
542
650
@@ -599,8 +707,8 @@ def main():
599
707
600
708
# TEST Parse only these:
601
709
# functions_by_source["Shared functions"] = {
602
- # "Element ": [
603
- # ("https://wiki.multitheftauto.com/wiki/SetElementParent ", "setElementParent "),
710
+ # "Player ": [
711
+ # ("https://wiki.multitheftauto.com/wiki/SetPlayerName ", "setPlayerName "),
604
712
# ]
605
713
# }
606
714
0 commit comments