1- from langchain_core .globals import set_verbose , set_debug
2- from langchain_ollama import ChatOllama , OllamaEmbeddings
1+ import logging
2+ from typing import Optional
3+
4+ import yaml
35from langchain .schema .output_parser import StrOutputParser
4- from langchain_mongodb .vectorstores import MongoDBAtlasVectorSearch
5- from pymongo import MongoClient
6- from langchain_community .document_loaders import PyPDFLoader
7- from langchain .text_splitter import RecursiveCharacterTextSplitter
86from langchain .schema .runnable import RunnablePassthrough
7+ from langchain .text_splitter import RecursiveCharacterTextSplitter
8+ from langchain_community .document_loaders import PyPDFLoader
99from langchain_community .vectorstores .utils import filter_complex_metadata
10+ from langchain_core .globals import set_debug , set_verbose
1011from langchain_core .prompts import ChatPromptTemplate
11- import logging
12- import yaml
13-
12+ from langchain_mongodb . vectorstores import MongoDBAtlasVectorSearch
13+ from langchain_ollama import ChatOllama , OllamaEmbeddings
14+ from pymongo import MongoClient
1415
1516# Enable verbose debugging
1617set_debug (True )
2021logging .basicConfig (level = logging .INFO )
2122logger = logging .getLogger (__name__ )
2223
24+
2325def load_config (config_file : str = "config.yaml" ):
2426 """Load configuration from a YAML file."""
25- with open (config_file , "r" ) as file :
27+ with open (config_file ) as file :
2628 return yaml .safe_load (file )
2729
30+
2831class ChatPDF :
2932 """A class designed for PDF ingestion and question answering using RAG with detailed debugging logs."""
3033
@@ -40,29 +43,33 @@ def __init__(self, config_file: str = "config.yaml"):
4043 mongo_connection_str = config ["mongo_connection_str" ]
4144 database_name = config ["database_name" ]
4245 collection_name = config ["collection_name" ]
43-
46+
4447 self .model = ChatOllama (model = llm_model )
4548 self .embeddings = OllamaEmbeddings (model = embedding_model )
46- self .text_splitter = RecursiveCharacterTextSplitter (chunk_size = 1024 , chunk_overlap = 100 )
49+ self .text_splitter = RecursiveCharacterTextSplitter (
50+ chunk_size = 1024 , chunk_overlap = 100
51+ )
4752 self .prompt = ChatPromptTemplate .from_template (
4853 """
4954 You are a helpful assistant answering questions based on the uploaded document and the conversation.
50-
55+
5156 Conversation History:
5257 {conversation_history}
53-
58+
5459 Context from Documents:
5560 {context}
56-
61+
5762 Question:
5863 {question}
59-
64+
6065 Provide a concise, accurate answer (preferably within three sentences), ensuring it directly addresses the question.
6166 """
6267 )
63-
68+
6469 # Setup MongoDB connection
65- self .client = MongoClient (mongo_connection_str )
70+ self .client = MongoClient (
71+ mongo_connection_str , appname = "devrel.showcase.local_rag_pdf_app"
72+ )
6673 self .collection = self .client [database_name ][collection_name ]
6774
6875 # Verbose connection check
@@ -74,7 +81,7 @@ def __init__(self, config_file: str = "config.yaml"):
7481 collection = self .collection ,
7582 embedding = self .embeddings ,
7683 index_name = "vector_index" ,
77- relevance_score_fn = "cosine"
84+ relevance_score_fn = "cosine" ,
7885 )
7986
8087 # Create vector search index on the collection
@@ -107,7 +114,13 @@ def upload_and_index_pdf(self, pdf_file_path: str):
107114 self .vector_store .add_documents (documents = chunks )
108115 logger .info ("Document embeddings stored successfully in MongoDB Atlas." )
109116
110- def query_with_context (self , query : str , conversation_history : list = None , k : int = 5 , score_threshold : float = 0.2 ):
117+ def query_with_context (
118+ self ,
119+ query : str ,
120+ conversation_history : Optional [list ] = None ,
121+ k : int = 5 ,
122+ score_threshold : float = 0.2 ,
123+ ):
111124 """
112125 Answer a query using the RAG pipeline with verbose debugging and conversation history.
113126
@@ -132,7 +145,9 @@ def query_with_context(self, query: str, conversation_history: list = None, k: i
132145 # Generate and log query embeddings
133146 query_embedding = self .embeddings .embed_query (query )
134147 logger .info (f"User Query: { query } " )
135- logger .debug (f"Query Embedding (sample values): { query_embedding [:10 ]} ... [Total Length: { len (query_embedding )} ]" )
148+ logger .debug (
149+ f"Query Embedding (sample values): { query_embedding [:10 ]} ... [Total Length: { len (query_embedding )} ]"
150+ )
136151
137152 logger .info (f"Retrieving context for query: { query } " )
138153 retrieved_docs = self .retriever .invoke (query )
@@ -147,17 +162,19 @@ def query_with_context(self, query: str, conversation_history: list = None, k: i
147162
148163 # Format the input for the LLM, including conversation history
149164 formatted_input = {
150- "conversation_history" : "\n " .join (conversation_history ) if conversation_history else "" ,
165+ "conversation_history" : (
166+ "\n " .join (conversation_history ) if conversation_history else ""
167+ ),
151168 "context" : "\n \n " .join (doc .page_content for doc in retrieved_docs ),
152169 "question" : query ,
153170 }
154171
155172 # Build the RAG chain
156173 chain = (
157174 RunnablePassthrough () # Passes the input as-is
158- | self .prompt # Formats the input for the LLM
159- | self .model # Queries the LLM
160- | StrOutputParser () # Parses the LLM's output
175+ | self .prompt # Formats the input for the LLM
176+ | self .model # Queries the LLM
177+ | StrOutputParser () # Parses the LLM's output
161178 )
162179
163180 logger .info ("Generating response using the LLM." )
0 commit comments