Skip to content

Commit 1ae5d26

Browse files
authored
Merge pull request #78 from VinciGit00/robots_test
Robots test 🤖
2 parents 3a7fd7a + 1056cff commit 1ae5d26

20 files changed

+408
-90
lines changed

examples/benchmarks/SmartScraper/benchmark_docker.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
Basic example of scraping pipeline using SmartScraper from text
33
"""
44

5-
import os
65
from scrapegraphai.graphs import SmartScraperGraph
76
from scrapegraphai.utils import prettify_exec_info
87

examples/openai/custom_graph_openai.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
from dotenv import load_dotenv
77
from scrapegraphai.models import OpenAI
88
from scrapegraphai.graphs import BaseGraph
9-
from scrapegraphai.nodes import FetchNode, ParseNode, RAGNode, GenerateAnswerNode
9+
from scrapegraphai.nodes import FetchNode, ParseNode, RAGNode, GenerateAnswerNode, RobotsNode
1010
load_dotenv()
1111

1212
# ************************************************
@@ -31,6 +31,12 @@
3131
llm_model = OpenAI(graph_config["llm"])
3232

3333
# define the nodes for the graph
34+
robot_node = RobotsNode(
35+
input="url",
36+
output=["is_scrapable"],
37+
node_config={"llm": llm_model}
38+
)
39+
3440
fetch_node = FetchNode(
3541
input="url | local_dir",
3642
output=["doc"],
@@ -57,17 +63,19 @@
5763

5864
graph = BaseGraph(
5965
nodes={
66+
robot_node,
6067
fetch_node,
6168
parse_node,
6269
rag_node,
6370
generate_answer_node,
6471
},
6572
edges={
73+
(robot_node, fetch_node),
6674
(fetch_node, parse_node),
6775
(parse_node, rag_node),
6876
(rag_node, generate_answer_node)
6977
},
70-
entry_point=fetch_node
78+
entry_point=robot_node
7179
)
7280

7381
# ************************************************

examples/single_node/robot_node.py

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
"""
2+
Example of custom graph using existing nodes
3+
"""
4+
5+
import os
6+
from dotenv import load_dotenv
7+
from scrapegraphai.models import OpenAI
8+
from scrapegraphai.nodes import RobotsNode
9+
load_dotenv()
10+
11+
# ************************************************
12+
# Define the configuration for the graph
13+
# ************************************************
14+
15+
openai_key = os.getenv("OPENAI_APIKEY")
16+
17+
graph_config = {
18+
"llm": {
19+
"api_key": openai_key,
20+
"model": "gpt-3.5-turbo",
21+
"temperature": 0,
22+
"streaming": True
23+
},
24+
}
25+
26+
# ************************************************
27+
# Define the node
28+
# ************************************************
29+
30+
llm_model = OpenAI(graph_config["llm"])
31+
32+
robots_node = RobotsNode(
33+
input="url",
34+
output=["is_scrapable"],
35+
node_config={"llm": llm_model}
36+
)
37+
38+
# ************************************************
39+
# Test the node
40+
# ************************************************
41+
42+
state = {
43+
"url": "https://twitter.com/home"
44+
}
45+
46+
result = robots_node.execute(state)
47+
48+
print(result)
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
if [ $# -eq 0 ]; then
2+
echo "Usage: $0 <commit_message>"
3+
exit 1
4+
fi
5+
6+
cd ..
7+
8+
# Extract the commit message from the argument
9+
commit_message="$1"
10+
11+
# Run Pylint on the specified Python files
12+
pylint pylint scrapegraphai/**/*.py scrapegraphai/*.py tests/**/*.py
13+
14+
cd tests
15+
16+
# Run pytest
17+
if ! pytest; then
18+
echo "Pytest failed. Aborting commit and push."
19+
exit 1
20+
fi
21+
22+
cd ..
23+
24+
# Make the pull
25+
git pull
26+
27+
# Add the modified files to the Git repository
28+
git add .
29+
30+
# Commit the changes with the provided message
31+
git commit -m "$commit_message"
32+
33+
# Push the changes to the remote repository
34+
git push

poetry.lock

Lines changed: 84 additions & 84 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

scrapegraphai/graphs/base_graph.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import time
55
from langchain_community.callbacks import get_openai_callback
66

7+
78
class BaseGraph:
89
"""
910
BaseGraph manages the execution flow of a graph composed of interconnected nodes.
@@ -81,7 +82,6 @@ def execute(self, initial_state: dict) -> dict:
8182

8283
with get_openai_callback() as cb:
8384
result = current_node.execute(state)
84-
8585
node_exec_time = time.time() - curr_time
8686
total_exec_time += node_exec_time
8787

scrapegraphai/helpers/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,3 +5,4 @@
55
from .nodes_metadata import nodes_metadata
66
from .schemas import graph_schema
77
from .models_tokens import models_tokens
8+
from .robots import robots_dictionary

scrapegraphai/helpers/robots.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
2+
"""
3+
Module for mapping the models in ai agents
4+
"""
5+
robots_dictionary = {
6+
"gpt-3.5-turbo": ["GPTBot", "ChatGPT-user"],
7+
"gpt-4-turbo": ["GPTBot", "ChatGPT-user"],
8+
"claude": ["Claude-Web", "ClaudeBot"],
9+
"perplexity": "PerplexityBot",
10+
"cohere": "cohere-ai",
11+
"anthropic": "anthropic-ai"
12+
}

scrapegraphai/nodes/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,3 +12,4 @@
1212
from .image_to_text_node import ImageToTextNode
1313
from .search_internet_node import SearchInternetNode
1414
from .generate_scraper_node import GenerateScraperNode
15+
from .robots_node import RobotsNode

scrapegraphai/nodes/rag_node.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,9 @@
99
from langchain_community.document_transformers import EmbeddingsRedundantFilter
1010
from langchain_community.embeddings import HuggingFaceHubEmbeddings
1111
from langchain_community.vectorstores import FAISS
12+
from langchain_community.embeddings import OllamaEmbeddings
1213
from langchain_openai import OpenAIEmbeddings, AzureOpenAIEmbeddings
1314
from ..models import OpenAI, Ollama, AzureOpenAI, HuggingFace
14-
from langchain_community.embeddings import OllamaEmbeddings
1515
from .base_node import BaseNode
1616

1717

@@ -97,7 +97,7 @@ def execute(self, state):
9797
# remove streaming and temperature
9898
params.pop("streaming", None)
9999
params.pop("temperature", None)
100-
100+
101101
embeddings = OllamaEmbeddings(**params)
102102
elif isinstance(embedding_model, HuggingFace):
103103
embeddings = HuggingFaceHubEmbeddings(model=embedding_model.model)

0 commit comments

Comments
 (0)