Skip to content

Commit fb00490

Browse files
committed
Support clipper wechat
1 parent dac2a73 commit fb00490

File tree

5 files changed

+19
-6
lines changed

5 files changed

+19
-6
lines changed

demos/Test Case E.html

Lines changed: 3 additions & 0 deletions
Large diffs are not rendered by default.

html2notion/translate/html2json.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ def _is_yinxiang_export_html(html_soup):
3232

3333
"""
3434
<meta name="source-application" content="webclipper.evernote" />
35+
<meta name="source-application" content="微信" />
3536
"""
3637
def _is_yinxiang_clipper_html(html_soup):
3738
exporter_version_meta = html_soup.select_one('html > head > meta[name="exporter-version"]')
@@ -45,6 +46,8 @@ def _is_yinxiang_clipper_html(html_soup):
4546
clipper_source_content = clipper_source_meta.get('content', "") if isinstance(clipper_source_meta, Tag) else ""
4647
if isinstance(clipper_source_content, str) and clipper_source_content.endswith("evernote"):
4748
return True
49+
if isinstance(clipper_source_content, str) and clipper_source_content in ("微信"):
50+
return True
4851
return False
4952

5053

html2notion/translate/html2json_base.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,8 @@ def get_notion_data(self):
7171
@staticmethod
7272
def extract_text_and_parents(tag: PageElement, parents=[]):
7373
results = []
74-
if isinstance(tag, NavigableString):
74+
# Filter empty content
75+
if isinstance(tag, NavigableString) and tag.text:
7576
results.append((tag.text, parents))
7677
return results
7778
elif isinstance(tag, Tag):
@@ -328,7 +329,7 @@ def convert_divider(self, soup):
328329
"type": "divider",
329330
"divider": {}
330331
}
331-
332+
332333
def convert_heading(self, soup):
333334
heading_map = {"h1": "heading_1", "h2": "heading_2", "h3": "heading_3",
334335
"h4": "heading_3", "h5": "heading_3", "h6": "heading_3"}
@@ -345,7 +346,8 @@ def convert_heading(self, soup):
345346
text_obj = self.generate_inline_obj(soup)
346347
if text_obj:
347348
rich_text.extend(text_obj)
348-
return json_obj
349+
return json_obj
350+
return None
349351

350352
# <ol><li><div>first</div></li><li><div>second</div></li><li><div>third</div></li></ol>
351353
def convert_numbered_list_item(self, soup):
@@ -356,6 +358,10 @@ def convert_bulleted_list_item(self, soup):
356358
return self.convert_list_items(soup, 'bulleted_list_item')
357359

358360
def convert_list_items(self, soup, list_type):
361+
# Remove heading tags in li
362+
for heading in soup.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6']):
363+
heading.unwrap()
364+
359365
items = soup.find_all('li', recursive=True)
360366
if not items:
361367
logger.warning("No list items found in {soup}")

html2notion/translate/html2json_clipper.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,5 +109,5 @@ def convert_code(self, soup):
109109
json_obj["code"]["rich_text"] = self.merge_rich_text(rich_text)
110110
return json_obj
111111

112-
112+
113113
Html2JsonBase.register(YinXiangClipper_Type, Html2JsonYinXiang)

html2notion/translate/notion_import.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,8 @@ async def create_new_page(self, notion_data):
4444
blocks = notion_data.get("children", [])
4545
limit_size = 100
4646
chunks = [blocks[i: i + limit_size] for i in range(0, len(blocks), limit_size)]
47-
notion_data.pop("children")
47+
if blocks:
48+
notion_data.pop("children")
4849
first_chunk = chunks[0] if chunks else []
4950
created_page = await self.notion_client.pages.create(**notion_data, children=first_chunk)
5051
page_id = created_page["id"]
@@ -63,7 +64,7 @@ async def main(file_path, notion_api_key):
6364

6465
if __name__ == "__main__":
6566
test_prepare_conf()
66-
file = Path("./demos/Test Case D.html")
67+
file = Path("./demos/Test Case E.html")
6768
notion_api_key = ""
6869
if 'GITHUB_ACTIONS' in os.environ:
6970
notion_api_key = os.environ['notion_api_key']

0 commit comments

Comments
 (0)