Merge branch 'main' into pre/beta

VinciGit00 · web-flow · commit 9b4efaf28729 · 2025-06-06T12:46:34.000+02:00
diff --git a/README.md b/README.md
@@ -1,4 +1,9 @@
+## 🚀 **Looking for an even faster and simpler way to scrape at scale (only 5 lines of code)? ** Check out our enhanced version at [**ScrapeGraphAI.com**](https://scrapegraphai.com/?utm_source=github&utm_medium=readme&utm_campaign=oss_cta&ut#m_content=top_banner)! 🚀
+
+---
+
 # 🕷️ ScrapeGraphAI: You Only Scrape Once
+
 [English](https://github.com/VinciGit00/Scrapegraph-ai/blob/main/README.md) | [中文](https://github.com/VinciGit00/Scrapegraph-ai/blob/main/docs/chinese.md) | [日本語](https://github.com/VinciGit00/Scrapegraph-ai/blob/main/docs/japanese.md)
 | [한국어](https://github.com/VinciGit00/Scrapegraph-ai/blob/main/docs/korean.md)
 | [Русский](https://github.com/VinciGit00/Scrapegraph-ai/blob/main/docs/russian.md) | [Türkçe](https://github.com/VinciGit00/Scrapegraph-ai/blob/main/docs/turkish.md)
@@ -34,7 +39,7 @@ You can find more informations at the following [link](https://scrapegraphai.com
 - **API**: [Documentation](https://docs.scrapegraphai.com/introduction)
 - **SDKs**: [Python](https://docs.scrapegraphai.com/sdks/python), [Node](https://docs.scrapegraphai.com/sdks/javascript) 
 - **LLM Frameworks**: [Langchain](https://docs.scrapegraphai.com/integrations/langchain), [Llama Index](https://docs.scrapegraphai.com/integrations/llamaindex), [Crew.ai](https://docs.scrapegraphai.com/integrations/crewai), [CamelAI](https://github.com/camel-ai/camel)
-- **Low-code Frameworks**: [Pipedream](https://pipedream.com/apps/scrapegraphai), [Bubble](https://bubble.io/plugin/scrapegraphai-1745408893195x213542371433906180), [Zapier](https://zapier.com/apps/scrapegraphai/integrations), [n8n](http://localhost:5001/dashboard)
+- **Low-code Frameworks**: [Pipedream](https://pipedream.com/apps/scrapegraphai), [Bubble](https://bubble.io/plugin/scrapegraphai-1745408893195x213542371433906180), [Zapier](https://zapier.com/apps/scrapegraphai/integrations), [n8n](http://localhost:5001/dashboard), [LangFlow](https://www.langflow.org)
 - **MCP server**:  [Link](https://smithery.ai/server/@ScrapeGraphAI/scrapegraph-mcp)
 
 ## 🚀 Quick install
@@ -192,9 +197,6 @@ The Official API Documentation can be found [here](https://docs.scrapegraphai.co
     <a href="https://scrape.do">
     <img src="https://raw.githubusercontent.com/VinciGit00/Scrapegraph-ai/main/docs/assets/scrapedo.png" alt="Stats" style="width: 11%;">
   </a>
-    <a href="https://www.scrapeless.com/en?utm_source=github&utm_medium=ads&utm_campaign=scraping&utm_term=scrapegraphai">
-    <img src="https://github.com/ScrapeGraphAI/Scrapegraph-ai/blob/main/docs/assets/scrapeless.png" alt="Stats" style="width: 11%;">
-  </a>
 </div>
 
 ## 📈 Telemetry
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,7 +1,7 @@
 [project]
 name = "scrapegraphai"
 
-version = "1.52.0b2"
+version = "1.53.0"
 
 description = "A web scraping library based on LangChain which uses LLM and direct graph logic to create scraping pipelines."
 authors = [
diff --git a/scrapegraphai/helpers/models_tokens.py b/scrapegraphai/helpers/models_tokens.py
@@ -179,6 +179,9 @@
         "claude-3-haiku-20240307": 200000,
         "claude-3-5-sonnet-20240620": 200000,
         "claude-3-5-haiku-latest": 200000,
+        "claude-opus-4-20250514": 200000,
+        "claude-sonnet-4-20250514": 200000,
+        "claude-3-7-sonnet-20250219": 200000,
     },
     "bedrock": {
         "anthropic.claude-3-haiku-20240307-v1:0": 200000,
diff --git a/scrapegraphai/nodes/fetch_node.py b/scrapegraphai/nodes/fetch_node.py
@@ -356,7 +356,7 @@ def handle_web_source(self, state, source):
             compressed_document = [
                 Document(page_content=parsed_content, metadata={"source": "html file"})
             ]
-        state["original_html"] = document
+        state["doc"] = document
         state.update(
             {
                 self.output[0]: compressed_document,
diff --git a/scrapegraphai/nodes/parse_node.py b/scrapegraphai/nodes/parse_node.py
@@ -121,7 +121,6 @@ def execute(self, state: dict) -> dict:
 
         state.update({self.output[0]: chunks})
         state.update({"parsed_doc": chunks})
-        state.update({"content": chunks})
 
         if self.parse_urls:
             state.update({self.output[1]: link_urls})

Original file line number	Diff line number	Diff line change
`@@ -356,7 +356,7 @@ def handle_web_source(self, state, source):`
`356`	`356`	`compressed_document = [`
`357`	`357`	`Document(page_content=parsed_content, metadata={"source": "html file"})`
`358`	`358`	`]`
`359`		`- state["original_html"] = document`
	`359`	`+ state["doc"] = document`
`360`	`360`	`state.update(`
`361`	`361`	`{`
`362`	`362`	`self.output[0]: compressed_document,`