1- import re
21from urllib .parse import urljoin
32
43import markdownify
54from bs4 .element import PageElement
65
7- # See https://github.com/matthewwithanm/python-markdownify/issues/31
8- markdownify .whitespace_re = re .compile (r"[\r\n\s\t ]+" )
9-
106
117class DocMarkdownConverter (markdownify .MarkdownConverter ):
128 """Subclass markdownify's MarkdownCoverter to provide custom conversion methods."""
139
1410 def __init__ (self , * , page_url : str , ** options ):
15- super ().__init__ (** options )
11+ # Reflow text to avoid unwanted line breaks.
12+ default_options = {"wrap" : True , "wrap_width" : None }
13+
14+ super ().__init__ (** default_options | options )
1615 self .page_url = page_url
1716
18- def convert_li (self , el : PageElement , text : str , convert_as_inline : bool ) -> str :
17+ def convert_li (self , el : PageElement , text : str , parent_tags : set [ str ] ) -> str :
1918 """Fix markdownify's erroneous indexing in ol tags."""
2019 parent = el .parent
2120 if parent is not None and parent .name == "ol" :
@@ -31,38 +30,38 @@ def convert_li(self, el: PageElement, text: str, convert_as_inline: bool) -> str
3130 bullet = bullets [depth % len (bullets )]
3231 return f"{ bullet } { text } \n "
3332
34- def _convert_hn (self , _n : int , el : PageElement , text : str , convert_as_inline : bool ) -> str :
33+ def _convert_hn (self , _n : int , el : PageElement , text : str , parent_tags : set [ str ] ) -> str :
3534 """Convert h tags to bold text with ** instead of adding #."""
36- if convert_as_inline :
35+ if "_inline" in parent_tags :
3736 return text
3837 return f"**{ text } **\n \n "
3938
40- def convert_code (self , el : PageElement , text : str , convert_as_inline : bool ) -> str :
39+ def convert_code (self , el : PageElement , text : str , parent_tags : set [ str ] ) -> str :
4140 """Undo `markdownify`s underscore escaping."""
4241 return f"`{ text } `" .replace ("\\ " , "" )
4342
44- def convert_pre (self , el : PageElement , text : str , convert_as_inline : bool ) -> str :
43+ def convert_pre (self , el : PageElement , text : str , parent_tags : set [ str ] ) -> str :
4544 """Wrap any codeblocks in `py` for syntax highlighting."""
4645 code = "" .join (el .strings )
4746 return f"```py\n { code } ```"
4847
49- def convert_a (self , el : PageElement , text : str , convert_as_inline : bool ) -> str :
48+ def convert_a (self , el : PageElement , text : str , parent_tags : set [ str ] ) -> str :
5049 """Resolve relative URLs to `self.page_url`."""
5150 el ["href" ] = urljoin (self .page_url , el ["href" ])
5251 # Discord doesn't handle titles properly, showing links with them as raw text.
5352 el ["title" ] = None
54- return super ().convert_a (el , text , convert_as_inline )
53+ return super ().convert_a (el , text , parent_tags )
5554
56- def convert_p (self , el : PageElement , text : str , convert_as_inline : bool ) -> str :
55+ def convert_p (self , el : PageElement , text : str , parent_tags : set [ str ] ) -> str :
5756 """Include only one newline instead of two when the parent is a li tag."""
58- if convert_as_inline :
57+ if "_inline" in parent_tags :
5958 return text
6059
6160 parent = el .parent
6261 if parent is not None and parent .name == "li" :
6362 return f"{ text } \n "
64- return super ().convert_p (el , text , convert_as_inline )
63+ return super ().convert_p (el , text , parent_tags )
6564
66- def convert_hr (self , el : PageElement , text : str , convert_as_inline : bool ) -> str :
65+ def convert_hr (self , el : PageElement , text : str , parent_tags : set [ str ] ) -> str :
6766 """Ignore `hr` tag."""
6867 return ""
0 commit comments