diff --git a/.env.example b/.env.example index 5521874..2337de5 100644 --- a/.env.example +++ b/.env.example @@ -1,10 +1,11 @@ # Copy this file to .env and fill in the values -## NOTE: The only required value is DEFAULT_OPENAI_API_KEY. The app should work if the other values +## NOTE: The only required values are DEFAULT_OPENROUTER_API_KEY and DEFAULT_OPENAI_API_KEY. The app should work if the other values # are left as is or not defined at all. However, you are strongly encouraged to fill in your # own SERPER_API_KEY value. It is also recommended to fill in the BYPASS_SETTINGS_RESTRICTIONS and # BYPASS_SETTINGS_RESTRICTIONS_PASSWORD values as needed (see below). -DEFAULT_OPENAI_API_KEY="" # your OpenAI API key +DEFAULT_OPENROUTER_API_KEY="" # your OpenRouter API key +DEFAULT_OPENAI_API_KEY="" # your OpenAI API key ## Your Google Serper API key (for web searches). # If you don't set this, my key will be used, which may have @@ -17,8 +18,8 @@ DEFAULT_MODE="/docs" # or "/web" | "/quotes" | "/details" | "/chat" | "/research # Variables controlling whether the Streamlit app imposes some functionality restrictions BYPASS_SETTINGS_RESTRICTIONS="" # whether to immediately allow all settings (any non-empty string means true) -BYPASS_SETTINGS_RESTRICTIONS_PASSWORD="" # what to enter in the OpenAI API key field to bypass settings -# restrictions. If BYPASS_SETTINGS_RESTRICTIONS is non-empty or if the user enters their own OpenAI API key, +BYPASS_SETTINGS_RESTRICTIONS_PASSWORD="" # what to enter in the OpenRouter API key field to bypass settings +# restrictions. If BYPASS_SETTINGS_RESTRICTIONS is non-empty or if the user enters their own OpenRouter API key, # this becomes - mostly - irrelevant, as full settings access is already granted. I say "mostly" because # this password can also be used for a couple of admin-only features, such as deleting the default collection # and deleting a range of collections (see dbmanager.py). Recommendation: for local use, @@ -27,8 +28,8 @@ BYPASS_SETTINGS_RESTRICTIONS_PASSWORD="" # what to enter in the OpenAI API key f # If you are NOT using Azure, the following setting determines the chat model # NOTE: You can change the model and temperature on the fly in Streamlit UI or via the API -MODEL_NAME="gpt-3.5-turbo-0125" # default model to use for chat -ALLOWED_MODELS="gpt-3.5-turbo-0125, gpt-4-turbo-2024-04-09" +MODEL_NAME="google/gemini-2.5-flash" # default model to use for chat +ALLOWED_MODELS="all" # Since we are now using OpenRouter, any model should be allowed. You can still specify a list if desired. CONTEXT_LENGTH="16000" # you can also make it lower than the actual context length EMBEDDINGS_MODEL_NAME="text-embedding-3-large" EMBEDDINGS_DIMENSIONS="3072" # number of dimensions for the embeddings model diff --git a/.gitignore b/.gitignore index 2fdc603..3b99a0e 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,8 @@ chroma-cloud* .tmp* credentials/ chroma-cloud-backup/ +chroma/ +chroma.cf.json .chroma_env my-chroma* .chroma_env diff --git a/.python-version b/.python-version new file mode 100644 index 0000000..2c20ac9 --- /dev/null +++ b/.python-version @@ -0,0 +1 @@ +3.13.3 diff --git a/README-FOR-DEVELOPERS.md b/README-FOR-DEVELOPERS.md index 6ce65e8..90df9fa 100644 --- a/README-FOR-DEVELOPERS.md +++ b/README-FOR-DEVELOPERS.md @@ -69,7 +69,7 @@ If this happens you will need to install the Microsoft C++ Build Tools. You can ### 4. Copy the `.env.example` file to `.env` and fill in the values -At first, you can simply fill in your [OpenAI API key](https://platform.openai.com/signup) and leave the other values as they are. Please see `.env.example` for additional details. +At first, you can simply fill in your [OpenRouter API key](https://openrouter.ai/CLERK-ROUTER/VIRTUAL/sign-up) and [OpenAI API key](https://platform.openai.com/signup) and leave the other values as they are. Please see `.env.example` for additional details. ## Running DocDocGo @@ -116,6 +116,7 @@ The message should be sent as a POST request with the body as a JSON object that class ChatRequestData(BaseModel): message: str api_key: str + openrouter_api_key: str | None = None openai_api_key: str | None = None chat_history: list[JSONish] = [] collection_name: str | None = None @@ -146,7 +147,9 @@ The `collection_name` field is used to specify the collection that the bot shoul The `api_key` field is used to specify the API key for the FastAPI server. The server will only honor requests that include the correct API key, as specified by the `DOCDOCGO_API_KEY` environment variable in. -The `openai_api_key` field is used to specify the OpenAI API key. If not specified, the default (community) key will be used, assuming the `DEFAULT_OPENAI_API_KEY` environment variable is set. +The `openrouter_api_key` field is used to specify the OpenRouter API key. If not specified, the default (community) key will be used, assuming the `DEFAULT_OPENROUTER_API_KEY` environment variable is set. + +The `openai_api_key` field is used to specify the OpenAI API key for embeddings. If not specified, the default (community) key will be used, assuming the `DEFAULT_OPENAI_API_KEY` environment variable is set. The `access_codes_cache` field is an object mapping collection names to access codes that the client has stored for them for the current user. The bot will use these access codes to determine grant the user access to collections that require it. @@ -392,7 +395,7 @@ As an alternative way to handle the issue of the default collection, you can cre ### Q: What is the `BYPASS_SETTINGS_RESTRICTIONS` environment variable? -A: Normally, when this variable is not defined (or is an empty string), the app will start in a "community key" mode, where you can only see and create public collections and there are restriction on allowed settings (e.g. you can't change the model in the UI). The key used as the community key is controlled by the `DEFAULT_OPENAI_API_KEY` environment variable. You can remove these restrictions and switch to using that same key as a private key by entering the admin password (the value of the `BYPASS_SETTINGS_RESTRICTIONS_PASSWORD` environment variable) in rhe OpenAI API key field. +A: Normally, when this variable is not defined (or is an empty string), the app will start in a "community key" mode, where you can only see and create public collections and there are restriction on allowed settings (e.g. you can't change the model in the UI). The keys used as the community keys are controlled by the `DEFAULT_OPENROUTER_API_KEY` and `DEFAULT_OPENAI_API_KEY` environment variables. You can remove these restrictions and switch to using those same keys as private keys by entering the admin password (the value of the `BYPASS_SETTINGS_RESTRICTIONS_PASSWORD` environment variable) in the OpenRouter API key field. However, when the `BYPASS_SETTINGS_RESTRICTIONS` variable is set to a non-empty string, the app will start in the "private key" mode right away, without you having to enter the admin password. This is useful if you use the app in a private setting and don't want to have to enter the admin password every time you start the app. diff --git a/README.md b/README.md index 0e9ae2f..71044e4 100644 --- a/README.md +++ b/README.md @@ -110,7 +110,7 @@ If this happens you will need to install the Microsoft C++ Build Tools. You can ### 4. Copy the `.env.example` file to `.env` and fill in the values -At first, you can simply fill in your [OpenAI API key](https://platform.openai.com/signup) and leave the other values as they are. Please see `.env.example` for additional details. +At first, you can simply fill in your [OpenRouter API key](https://openrouter.ai/CLERK-ROUTER/VIRTUAL/sign-up) and [OpenAI API key](https://platform.openai.com/signup) and leave the other values as they are. Please see `.env.example` for additional details. ## Running DocDocGo @@ -431,9 +431,9 @@ A: Before you entered your own OpenAI API key, you were using the community key You still have access to the public collections, you can switch to any public collection by typing `/db use `. If you want to see all available public collections again, you can switch back to the community key by changing the key to an empty string, then running `/db list` again. -#### Q: I got a shareable link to a collection but using it reloads the Streamlit app, after which it ends up in its default state of using the community key. How can I use the link with my own OpenAI API key? +#### Q: I got a shareable link to a collection but using it reloads the Streamlit app, after which it ends up in its default state of using the community key. How can I use the link with my own OpenRouter API key? -A: Simply enter your key in the OpenAI API key field after the app has reloaded. The access code will still be valid. +A: Simply enter your key in the OpenRouter API key field after the app has reloaded. The access code will still be valid. ## DocDocGo Carbon diff --git a/agents/command_chooser.py b/agents/command_chooser.py new file mode 100644 index 0000000..7dbda1b --- /dev/null +++ b/agents/command_chooser.py @@ -0,0 +1,130 @@ +import json + +from utils.chat_state import ChatState +from utils.helpers import command_ids +from langchain.prompts import PromptTemplate +from components.llm import get_llm, get_prompt_llm_chain +from utils.query_parsing import parse_query + +# Create prompt to generate commands from unstructrured user input +prompt =""" + # MISSION + You are an advanced AI assistant that determines the correct DocDocGo command to use given a user's query. DocDocGo is an AI app that assists with research and uses RAG by storing research in "collections", allowing it to combine insight from all information in a collection and use an LLM to generate answers based on the entire collection. It can also answer questions about its own functioning. + + # INPUT + You will be provided with a query from the user and the current collection the user has selected. + + # HIGH LEVEL TASK + You don't need to answer the query. Instead, your goal is to determine which of the following commands to prepend to the query: + + ## KB (COLLECTION) COMMANDS + - /kb : chat using the current collection as a knowledge base. If the query is relevant to the currently selected collection, use this one. + - /ingest: upload your documents and ingest them into a collection + - /ingest : retrieve a URL and ingest into a collection + - /summarize : retrieve a URL, summarize and ingest into a collection + - /db list: list all your collections + - /db list : list your collections whose names contain + - /db use : switch to the collection named + - /db rename : rename the current collection to + - /db delete : delete the collection named + - /db status: show your access level for the current collection and related info + - /db: show database management options + - /share: share your collection with others + - /details : get details about the retrieved documents + - /quotes : get quotes from the retrieved documents + + ## MAIN RESEARCH COMMANDS + - /research : do "classic" research - ingest websites into a new collection, write a report. If the query seems to be novel and the user specifically asks for research with a fairly in-depth response, use this one. This will ingest the results into a new collection. Use /research ONLY when the query requires an in-depth report. Otherwise for more typical questions, use /research heatseek. + - /research iterate : fetch more websites and iterate on the previous report times. The number of times is optional. If the user wants you to continue researching the topic, or if the user uses the keyword "iterate", use this command. If they specify a number of times to run a deeper or combine search, append the integer to the query. + - /research heatseek : do "heatseek" research - find websites that contain the answer and select one specific site that has exactly what is requested. This command does not use the selected collection. If the user knows about heatseek, they might specify it by name and specify the number of "rounds" of heatseek research, in which case you should output "/research " with "int" being the number. + + ## ADDITIONAL RESEARCH COMMANDS + - /research set-query : change the research query. If the user asks a new question that is similar to the previous question, suggest this command. + - /research set-report-type : instructions for the desired report format. Some examples are: + Detailed Report: A comprehensive overview that includes in-depth information and analysis. + Summary Report: A concise summary of findings, highlighting key points and conclusions. + Numbered List: A structured list format that presents information in a numbered sequence. + Bullet Points: A format that uses bullet points for easy readability and quick reference. + Table Format: A structured format that organizes data into rows and columns for clarity. + - /research set-search-queries: perform web searches with new queries and queue up resulting links + - /research clear: remove all reports but keep ingested content + - /research startover: perform /research clear, then rewrite the initial report + + IMPORTANT: There are two kinds of research, classic and heatseek. If the user is looking for in-depth research on their query use /research. If they are looking for a targeted, specific answer to a relatively narrow question, use /research heatseek. + + ## OTHER COMMANDS + - /web : perform web searches and generate a report without ingesting into a collection + - /chat : regular chat, without retrieving docs or websites (Use this only when you can answer fully based on your internal knowledge or conversation history.) + - /export: export your data + - /help : get help with using DocDocGo + + ## GUIDELINE REGARDING COMMANDS + - Only use /chat if you do not need to fetch external information to fully answer. Otherwise use /research for in-depth, new research, /kb for queries about the current collection, and /research heatseek for typical queries. + + # THE CURRENT COLLECTION + Here is a report on the contents of the current collection so you can decide which command to use: + {details} + IMPORTANT: If the user's question cannot be answered using the current knowledge base, select a command like "/research" that creates a new collection. + + # OUTPUT + You will output 2 strings in a JSON format: The first is an answer to the user's query, informing them what effects the command you choose will have without making reference to the command itself. Your second string will output the raw string of the suggested query, ready to be run. + + ## EXAMPLES OF OUTPUT + + query: 'What are some common birds I might see around Berkeley, California, and how can I identify them?' + output: {{'answer': 'It looks like this is a different topic than your current collection. I will do some research and create a new collection to store the information.', 'command': '/research What are some common birds I might see around Berkeley, California, and how can I identify them?'}} + + query: 'What are some common birds I might see around Berkeley, California, and how can I identify them?' + output: {{'answer': 'This is relevant to your current collection, so I will look through what we have already for the answer.', 'command': '/kb What are some common birds I might see around Berkeley, California, and how can I identify them?'}} + + query: 'There's a small, grayish-brown bird outside my window that is round with a little crest on its head. It is very lively and cute. It is about 4 inches tall. What kind of bird could it be?' + output: {{'answer': 'This is a very specific question so I will do targeted research to find the answer on the web. I won't ingest the results in any of your collections.', 'command': '/research heatseek 3 here's a small, grayish-brown bird outside my window that is round with a little crest on its head. It is very lively and cute. It is about 4 inches tall. What kind of bird could it be?'}} + + query: 'What can I do to help with conservation efforts for Bay Area birds? I asked before but I want more in-depth results.' + output: {{'answer': 'I will do deeper research on this topic', 'command': '/research iterate 3'}} + (Note to LLM: Please don't use /research iterate if the current research query does not exactly match this one in meaning) + + query: 'I want to summarize and add this website to my collection: https://www.inaturalist.org/guides/732' + output: {{'answer': 'I'll create a report for this URL and add it into your collection.", 'command': '/summarize https://www.inaturalist.org/guides/732'}} + + query: 'What is the happiness index for Norway?' + output: {{'answer': 'I will do targeted research and find the exact answer for this question.', 'command': '/research heatseek What is the happiness index for Norway?'}} + + query: 'What's it like being an AI?' + output: {{'answer': 'Hmm, let me think about that.', 'command': '/chat What's it like being an AI?'}} + + ## YOUR ACTUAL OUTPUT + + query: {query} + output: Use the information provided above to construct the output requested, in double curly braces with an "answer" and "command" element separated by a comma, in proper JSON. + """ + +def get_raw_command(query: str, chat_state: ChatState): + prompt_template = PromptTemplate.from_template(prompt) + if not coll_summary_query: + coll_summary_query = {} + + # Get details on the current collection + print("Getting details on", chat_state.collection_name) + if chat_state.collection_name not in coll_summary_query: + coll_summary_query[chat_state.collection_name] = "" + summary_prompt = "/kb Can you summarize in one sentence the contents of the current collection?" + summary_llm = get_llm(chat_state.bot_settings, chat_state,chat_state.openrouter_api_key) + response = summary_llm.invoke(summary_prompt) + coll_summary_query[chat_state.collection_name] = str(response) + + # Check if query already starts with a command string, if so return as is + if any(chat_state.message.startswith(command + "") for command in command_ids): + return chat_state.message + # If not formatted as a command, prompt LLM to generate and return a JSON-formatted command + else: + chain = get_prompt_llm_chain( + prompt=prompt_template, + chat_state=chat_state, + llm_settings=chat_state.bot_settings + ) + json_response = chain.invoke({"details": coll_summary_query[chat_state.collection_name], "query": query}).strip("`json") + dict_response = json.loads(json_response) + return dict_response + + diff --git a/agents/ingester_summarizer.py b/agents/ingester_summarizer.py index 9527d50..f16459f 100644 --- a/agents/ingester_summarizer.py +++ b/agents/ingester_summarizer.py @@ -40,7 +40,7 @@ def summarize(docs: list[Document], chat_state: ChatState) -> str: summarizer_chain = get_prompt_llm_chain( SUMMARIZER_PROMPT, llm_settings=chat_state.bot_settings, - api_key=chat_state.openai_api_key, + chat_state=chat_state, callbacks=chat_state.callbacks, ) diff --git a/agents/research_heatseek.py b/agents/research_heatseek.py index 75b3207..4b30d93 100644 --- a/agents/research_heatseek.py +++ b/agents/research_heatseek.py @@ -1,6 +1,7 @@ import json from pydantic import BaseModel, Field +from agents.command_chooser import get_raw_command from agentblocks.collectionhelper import ( construct_new_collection_name, ingest_into_collection, @@ -452,13 +453,6 @@ def run_main_heatseek_workflow( piece = "\n\n" piece += "I checked but didn't find a good answer on this round." - if chat_state.parsed_query.research_params.num_iterations_left < 2: - piece += ( - "\n\nTo continue checking more sources, type " - "`/research heatseek `. For example, try " - "`/re hs 4` (shorthand is ok)." - ) - if piece: full_reply += piece chat_state.add_to_output(piece) @@ -570,7 +564,6 @@ def get_new_heatseek_response(chat_state: ChatState) -> JSONishDict: is_new_collection=True, retry_with_random_name=True, ) - # Return response (next iteration info will be added upstream) return {"answer": full_reply, "vectorstore": vectorstore} @@ -606,7 +599,11 @@ def get_heatseek_in_progress_response( # NOTE: should catch and handle exceptions in main handler def get_research_heatseek_response(chat_state: ChatState) -> Props: - if chat_state.message: + query = chat_state.message + llm_raw_command = {} + llm_raw_command = get_raw_command(query, chat_state) + command = llm_raw_command['command'] + if "/research heatseek" in command: return get_new_heatseek_response(chat_state) hs_data = chat_state.get_agent_data(use_cached_metadata=True).get("hs") diff --git a/agents/researcher.py b/agents/researcher.py index 35402e7..6f7cd2c 100644 --- a/agents/researcher.py +++ b/agents/researcher.py @@ -133,10 +133,13 @@ def get_web_research_response_no_ingestion( query = chat_state.message # Get queries to search for using query generator prompt query_generator_chain = get_prompt_llm_chain( - QUERY_GENERATOR_PROMPT, - llm_settings=chat_state.bot_settings, - api_key=chat_state.openai_api_key, + prompt=QUERY_GENERATOR_PROMPT, + chat_state=chat_state, + llm_settings=chat_state.bot_settings ) + for a, b in chat_state.__dict__.items(): + print(a, b) + for i in range(MAX_QUERY_GENERATOR_ATTEMPTS): try: query_generator_output = "OUTPUT_FAILED" @@ -248,7 +251,7 @@ def get_web_research_response_no_ingestion( chain = get_prompt_llm_chain( RESEARCHER_PROMPT_INITIAL_REPORT, llm_settings=chat_state.bot_settings, - api_key=chat_state.openai_api_key, + chat_state=chat_state, print_prompt=bool(os.getenv("PRINT_RESEARCHER_PROMPT")), callbacks=chat_state.callbacks, stream=True, @@ -330,8 +333,11 @@ def get_initial_researcher_response(chat_state: ChatState) -> Props: def prepare_next_iteration(chat_state: ChatState) -> dict[str, ParsedQuery]: # NOTE: just mutate chat_state instead? research_params = chat_state.parsed_query.research_params - if research_params.num_iterations_left < 2: - return {} + if research_params: + if research_params.num_iterations_left < 2: + return {} + else: + research_params.num_iterations_left = 1 new_parsed_query = chat_state.parsed_query.model_copy(deep=True) new_parsed_query.research_params.num_iterations_left -= 1 new_parsed_query.message = ( @@ -522,7 +528,7 @@ def get_iterative_researcher_response(chat_state: ChatState) -> Props: answer = get_prompt_llm_chain( prompt, llm_settings=chat_state.bot_settings, - api_key=chat_state.openai_api_key, + chat_state=chat_state, print_prompt=bool(os.getenv("PRINT_RESEARCHER_PROMPT")), callbacks=chat_state.callbacks, stream=True, @@ -660,7 +666,7 @@ def get_ids_to_combine(id_list: list[str]) -> list[str] | None: answer = get_prompt_llm_chain( REPORT_COMBINER_PROMPT, llm_settings=chat_state.bot_settings, - api_key=chat_state.openai_api_key, + chat_state=chat_state, print_prompt=bool(os.getenv("PRINT_RESEARCHER_PROMPT")), callbacks=chat_state.callbacks, stream=True, @@ -917,7 +923,7 @@ def auto_update_search_queries_and_links(chat_state: ChatState) -> Props: chain = get_prompt_llm_chain( SEARCH_QUERIES_UPDATER_PROMPT, llm_settings=chat_state.bot_settings, - api_key=chat_state.openai_api_key, + chat_state=chat_state, print_prompt=True, ) @@ -1048,7 +1054,10 @@ def get_researcher_response_single_iter(chat_state: ChatState) -> Props: def get_researcher_response(chat_state: ChatState) -> Props: research_params = chat_state.parsed_query.research_params - num_iterations_left = research_params.num_iterations_left + if research_params: + num_iterations_left = research_params.num_iterations_left + else: + num_iterations_left = 1 # Due to Streamlit reloading quirks, we need to do this dance: research_params.task_type = ResearchCommand(research_params.task_type.value) diff --git a/agents/websearcher_quick.py b/agents/websearcher_quick.py index 9a0c03e..1298df9 100644 --- a/agents/websearcher_quick.py +++ b/agents/websearcher_quick.py @@ -88,7 +88,7 @@ def get_websearcher_response_quick( chain = get_prompt_llm_chain( RESEARCHER_PROMPT_SIMPLE, llm_settings=chat_state.bot_settings, - api_key=chat_state.openai_api_key, + chat_state=chat_state, callbacks=chat_state.callbacks, stream=True, ) diff --git a/api.py b/api.py index f8c5d7a..7c06965 100644 --- a/api.py +++ b/api.py @@ -28,6 +28,7 @@ BYPASS_SETTINGS_RESTRICTIONS_PASSWORD, DEFAULT_COLLECTION_NAME, DEFAULT_OPENAI_API_KEY, + DEFAULT_OPENROUTER_API_KEY, INCLUDE_ERROR_IN_USER_FACING_ERROR_MSG, MAX_UPLOAD_BYTES, get_logger, @@ -68,6 +69,7 @@ class ChatRequestData(BaseModel): message: str api_key: str openai_api_key: str | None = None + openrouter_api_key: str | None = None chat_history: list[RoleBasedChatMessage] = [] collection_name: str | None = None access_codes_cache: dict[str, str] | None = None # coll name -> access_code @@ -141,21 +143,30 @@ async def handle_chat_or_ingest_request( # If admin pwd is sent, treat it as if the default key was sent if ( BYPASS_SETTINGS_RESTRICTIONS_PASSWORD + and data.openrouter_api_key and data.openai_api_key - and data.openai_api_key.strip() == BYPASS_SETTINGS_RESTRICTIONS_PASSWORD + and data.openrouter_api_key.strip() == BYPASS_SETTINGS_RESTRICTIONS_PASSWORD + and DEFAULT_OPENROUTER_API_KEY # only do this if the default key is configured and DEFAULT_OPENAI_API_KEY # only do this if the default key is configured ): + data.openrouter_api_key = DEFAULT_OPENROUTER_API_KEY data.openai_api_key = DEFAULT_OPENAI_API_KEY + # Same story if no key is sent but BYPASS_SETTINGS_RESTRICTIONS is set - elif not data.openai_api_key and BYPASS_SETTINGS_RESTRICTIONS: - data.openai_api_key = DEFAULT_OPENAI_API_KEY + if BYPASS_SETTINGS_RESTRICTIONS: + if not data.openrouter_api_key: + data.openrouter_api_key = DEFAULT_OPENROUTER_API_KEY + if not data.openai_api_key: + data.openai_api_key = DEFAULT_OPENAI_API_KEY - # If no key is specified, use the default key (but set is_community_key to True) + # If no keys are specified, use the default keys (but set is_community_key and is_or_community_key to True) openai_api_key: str = data.openai_api_key or DEFAULT_OPENAI_API_KEY is_community_key = not data.openai_api_key + openrouter_api_key: str = data.openrouter_api_key or DEFAULT_OPENROUTER_API_KEY + is_or_community_key = not data.openrouter_api_key - # User id is determined from the OpenAI API key (or None if community key) - user_id: str | None = get_short_user_id(data.openai_api_key) + # User id is determined from the OpenRouter API key (or None if community key) + user_id: str | None = get_short_user_id(data.openrouter_api_key) # TODO: use full api key as user id (but show only the short version) chat_history = convert_chat_history(data.chat_history) @@ -171,13 +182,12 @@ async def handle_chat_or_ingest_request( return ChatResponseData(content="Invalid API key.") # Validate the provided bot settings - - if data.bot_settings and is_community_key: + if data.bot_settings and is_or_community_key: # Enforce default settings for community key if data.bot_settings != BotSettings(): return ChatResponseData( content="Apologies, you can customize your model settings (e.g. model name, " - "temperature) only when using your own OpenAI API key." + "temperature) only when using your own OpenRouter API key." ) # Extract text from the files and convert to list of Document @@ -234,6 +244,7 @@ async def handle_chat_or_ingest_request( is_community_key=is_community_key, chat_history=chat_history, openai_api_key=openai_api_key, + openrouter_api_key=openrouter_api_key, user_id=user_id, parsed_query=parsed_query, scheduled_queries=scheduled_queries, diff --git a/components/chat_with_docs_chain.py b/components/chat_with_docs_chain.py index 3498734..0f082ba 100644 --- a/components/chat_with_docs_chain.py +++ b/components/chat_with_docs_chain.py @@ -184,7 +184,7 @@ def _call( { "question": user_query, "chat_history": _format_chat_history(chat_history_for_rephrasing), - } + }, # callbacks=_run_manager.get_child(), )["text"] @@ -299,4 +299,4 @@ async def _acall( def save(self, file_path: Path | str) -> None: if self.format_chat_history: raise ValueError("Chain not saveable when `get_chat_history` is not None.") - super().save(file_path) + super().save(file_path) \ No newline at end of file diff --git a/components/chroma_ddg.py b/components/chroma_ddg.py index 4a314cb..4cb1203 100644 --- a/components/chroma_ddg.py +++ b/components/chroma_ddg.py @@ -227,7 +227,7 @@ def ensure_chroma_client(client: ClientAPI | None = None) -> ClientAPI: def get_vectorstore_using_openai_api_key( collection_name: str, *, - openai_api_key: str, + openai_api_key: str | None = None, client: ClientAPI | None = None, create_if_not_exists: bool = False, ) -> ChromaDDG: diff --git a/components/chroma_ddg_retriever.py b/components/chroma_ddg_retriever.py index 25762d3..ddafd68 100644 --- a/components/chroma_ddg_retriever.py +++ b/components/chroma_ddg_retriever.py @@ -1,9 +1,13 @@ +import streamlit as st from typing import Any, ClassVar from chromadb.api.types import Where, WhereDocument from langchain_core.documents import Document from pydantic import Field +from agents.command_chooser import get_raw_command +from utils.query_parsing import parse_query +from utils.chat_state import ChatState from utils.helpers import DELIMITER, lin_interpolate from utils.lang_utils import expand_chunks from utils.prepare import CONTEXT_LENGTH, EMBEDDINGS_MODEL_NAME @@ -65,7 +69,7 @@ def _get_relevant_documents( filter: Where | None = None, # For metadata (Langchain naming convention) where_document: WhereDocument | None = None, # Filter by text in document **kwargs: Any, # For additional search params - ) -> list[Document]: + ) -> list[Document] | None: # Combine global search kwargs with per-query search params passed here search_kwargs = self.search_kwargs | kwargs if filter is not None: @@ -144,6 +148,10 @@ def _get_relevant_documents( f"Similarities from {self.similarities[-1]:.2f} to {self.similarities[0]:.2f}" ) print(DELIMITER) + + # If no chunks were returned, set chunks empty + if not chunks: + return [] # Get the parent documents for the chunks try: @@ -156,14 +164,12 @@ def _get_relevant_documents( return chunks unique_parent_ids = list(set(parent_ids)) rsp = self.vectorstore.collection.get(unique_parent_ids) - parent_docs_by_id = { id: Document(page_content=text, metadata=metadata) for id, text, metadata in zip( rsp["ids"], rsp["documents"], rsp["metadatas"] ) } - # Expand chunks using the parent docs max_total_tokens = min( self.max_total_tokens, self.max_average_tokens_per_chunk * len(chunks) @@ -176,6 +182,7 @@ def _get_relevant_documents( ) return expanded_chunks + async def _aget_relevant_documents( self, query: str, *, run_manager: AsyncCallbackManagerForRetrieverRun ) -> list[Document]: diff --git a/components/llm.py b/components/llm.py index a42d68b..b2bdd35 100644 --- a/components/llm.py +++ b/components/llm.py @@ -1,7 +1,8 @@ from typing import Any from uuid import UUID +from langchain_openai import ChatOpenAI, AzureChatOpenAI +from langchain.prompts import PromptTemplate from langchain_core.outputs import ChatGenerationChunk, GenerationChunk, LLMResult -from langchain_openai import AzureChatOpenAI, ChatOpenAI from streamlit.delta_generator import DeltaGenerator from utils.helpers import DELIMITER, MAIN_BOT_PREFIX @@ -10,14 +11,17 @@ CHAT_DEPLOYMENT_NAME, IS_AZURE, LLM_REQUEST_TIMEOUT, + EMBEDDINGS_MODEL_NAME, + OPENROUTER_BASE_URL ) from utils.streamlit.helpers import fix_markdown from utils.type_utils import BotSettings, CallbacksOrNone +from utils.chat_state import ChatState from langchain_core.callbacks import BaseCallbackHandler from langchain_core.language_models import BaseChatModel from langchain_core.output_parsers import StrOutputParser from langchain_core.prompt_values import ChatPromptValue -from langchain_core.prompts import PromptTemplate +from langchain_core.prompts import ChatPromptTemplate, PromptTemplate class CallbackHandlerDDGStreamlit(BaseCallbackHandler): @@ -71,9 +75,27 @@ def on_llm_end(self, *args, **kwargs) -> None: def on_retry(self, *args, **kwargs) -> None: print(f"ON_RETRY: \nargs = {args}\nkwargs = {kwargs}") +class NoOpCallbackHandler(BaseCallbackHandler): + def on_llm_start( + self, serialized: dict[str, Any], prompts: list[str], **kwargs: Any + ) -> None: + pass + + def on_llm_new_token(self, token, **kwargs) -> None: + pass + + def on_llm_end(self, *args, **kwargs) -> None: + pass + + def on_retry(self, *args, **kwargs) -> None: + pass + def get_llm_with_callbacks( - settings: BotSettings, api_key: str | None = None, callbacks: CallbacksOrNone = None + settings: BotSettings, + chat_state: ChatState, + api_key: str | None = None, + callbacks: CallbacksOrNone = None ) -> BaseChatModel: """ Returns a chat model instance (either AzureChatOpenAI or ChatOpenAI, depending @@ -81,29 +103,30 @@ def get_llm_with_callbacks( determined by CHAT_DEPLOYMENT_NAME (and other Azure-specific environment variables), not by settings.model_name. """ + llm: AzureChatOpenAI | ChatOpenAI | None = None if IS_AZURE: llm = AzureChatOpenAI( - deployment_name=CHAT_DEPLOYMENT_NAME, + azure_deployment=CHAT_DEPLOYMENT_NAME, temperature=settings.temperature, - request_timeout=LLM_REQUEST_TIMEOUT, + timeout=LLM_REQUEST_TIMEOUT, streaming=True, # seems to help with timeouts callbacks=callbacks, ) else: llm = ChatOpenAI( - api_key=api_key or "", # don't allow None, no implicit key from env - model=settings.llm_model_name, - temperature=settings.temperature, - request_timeout=LLM_REQUEST_TIMEOUT, + api_key=chat_state.openrouter_api_key, + base_url=OPENROUTER_BASE_URL, + model=chat_state.bot_settings.model, + timeout=LLM_REQUEST_TIMEOUT, streaming=True, callbacks=callbacks, verbose=True, # tmp - ) + ) return llm - def get_llm( settings: BotSettings, + chat_state: ChatState, api_key: str | None = None, callbacks: CallbacksOrNone = None, stream=False, @@ -117,35 +140,39 @@ def get_llm( """ if callbacks is None: callbacks = [CallbackHandlerDDGConsole(init_str)] if stream else [] - return get_llm_with_callbacks(settings, api_key, callbacks) + return get_llm_with_callbacks(settings, chat_state, api_key, callbacks) def get_prompt_llm_chain( - prompt: PromptTemplate, + prompt: ChatPromptTemplate | PromptTemplate, + chat_state: ChatState, llm_settings: BotSettings, - api_key: str | None = None, print_prompt=False, **kwargs, ): if not print_prompt: - return prompt | get_llm(llm_settings, api_key, **kwargs) | StrOutputParser() - - def print_and_return(thing): - if isinstance(thing, ChatPromptValue): - print(f"PROMPT:\n{msg_list_chat_history_to_string(thing.messages)}") - else: - print(f"PROMPT:\n{type(thing)}\n{thing}") - print(DELIMITER) - return thing - - return ( + return ( + prompt + | get_llm(llm_settings, chat_state, chat_state.openrouter_api_key, **kwargs) + | StrOutputParser() + ) + else: + def print_and_return(thing): + if isinstance(thing, ChatPromptValue): + print(f"PROMPT:\n{msg_list_chat_history_to_string(thing.messages)}") + else: + print(f"PROMPT:\n{type(thing)}\n{thing}") + print(DELIMITER) + return thing + return ( prompt | print_and_return - | get_llm(llm_settings, api_key, **kwargs) + | get_llm(llm_settings, chat_state, chat_state.openrouter_api_key, **kwargs) | StrOutputParser() ) + def get_llm_from_prompt_llm_chain(prompt_llm_chain): return prompt_llm_chain.middle[-1] diff --git a/components/openai_embeddings_ddg.py b/components/openai_embeddings_ddg.py index b27accf..06fbb6e 100644 --- a/components/openai_embeddings_ddg.py +++ b/components/openai_embeddings_ddg.py @@ -33,7 +33,7 @@ def get_openai_embeddings( # Custom version of OpenAIEmbeddings for DocDocGo. Unlike the original, # an object of this class will pull the current values of env vars every time. # This helps in situations where the user has changed env vars such as -# OPENAI_API_KEY, as is possible in the Streamlit app. +# DEFAULT_OPENROUTER_API_KEY, as is possible in the Streamlit app. # This way is also more consistent with the behavior of e.g. ChatOpenAI, which # always uses the current values of env vars when querying the OpenAI API. diff --git a/docdocgo.py b/docdocgo.py index d20dd80..20bb73c 100644 --- a/docdocgo.py +++ b/docdocgo.py @@ -2,6 +2,7 @@ from typing import Any from langchain.chains import LLMChain +from langchain.prompts import PromptTemplate, ChatPromptTemplate from _prepare_env import is_env_loaded from agents.dbmanager import get_user_facing_collection_name, handle_db_command @@ -9,6 +10,7 @@ from agents.ingester_summarizer import get_ingester_summarizer_response from agents.researcher import get_researcher_response, get_websearcher_response from agents.share_manager import handle_share_command +from agents.command_chooser import get_raw_command from components.chat_with_docs_chain import ChatWithDocsChain from components.chroma_ddg import ChromaDDG, get_vectorstore_using_openai_api_key from components.chroma_ddg_retriever import ChromaDDGRetriever @@ -25,7 +27,7 @@ from utils.lang_utils import pairwise_chat_history_to_msg_list # Load environment variables -from utils.prepare import DEFAULT_COLLECTION_NAME, DEFAULT_OPENAI_API_KEY, get_logger +from utils.prepare import DEFAULT_COLLECTION_NAME, DEFAULT_OPENAI_API_KEY, DEFAULT_OPENROUTER_API_KEY, get_logger from utils.prompts import ( CHAT_WITH_DOCS_PROMPT, CONDENSE_QUESTION_PROMPT, @@ -40,13 +42,15 @@ default_vectorstore = None # can move to chat_state - def get_bot_response(chat_state: ChatState): global default_vectorstore chat_mode_val = ( chat_state.chat_mode.value ) # use value due to Streamlit code reloading - if chat_mode_val == ChatMode.CHAT_WITH_DOCS_COMMAND_ID.value: # /kb command + if chat_mode_val == ChatMode.AUTO_COMMAND_ID.value: # /auto command + response = get_raw_command(chat_state.message, chat_state) + return {"answer": response} + elif chat_mode_val == ChatMode.CHAT_WITH_DOCS_COMMAND_ID.value: # /kb command chat_chain = get_docs_chat_chain(chat_state) elif chat_mode_val == ChatMode.DETAILS_COMMAND_ID.value: # /details command chat_chain = get_docs_chat_chain(chat_state, prompt_qa=QA_PROMPT_SUMMARIZE_KB) @@ -61,8 +65,8 @@ def get_bot_response(chat_state: ChatState): elif chat_mode_val == ChatMode.JUST_CHAT_COMMAND_ID.value: # /chat command chat_chain = get_prompt_llm_chain( JUST_CHAT_PROMPT, + chat_state=chat_state, llm_settings=chat_state.bot_settings, - api_key=chat_state.openai_api_key, callbacks=chat_state.callbacks, stream=True, ) @@ -155,7 +159,7 @@ def get_source_links(result_from_chain: dict[str, Any]) -> list[str]: def get_docs_chat_chain( chat_state: ChatState, - prompt_qa=CHAT_WITH_DOCS_PROMPT, + prompt_qa: PromptTemplate | ChatPromptTemplate = CHAT_WITH_DOCS_PROMPT, ): """ Create a chain to respond to queries using a vectorstore of documents. @@ -163,7 +167,8 @@ def get_docs_chat_chain( # Initialize chain for query generation from chat history llm_for_q_generation = get_llm( settings=chat_state.bot_settings.model_copy(update={"temperature": 0}), - api_key=chat_state.openai_api_key, + chat_state=chat_state, + api_key=chat_state.openrouter_api_key, ) query_generator_chain = LLMChain( llm=llm_for_q_generation, @@ -187,17 +192,12 @@ def get_docs_chat_chain( llm_for_token_counting=None, # will be assigned in a moment verbose=bool(os.getenv("PRINT_SIMILARITIES")), ) - # retriever = VectorStoreRetriever(vectorstore=chat_state.vectorstore) - # search_kwargs={ - # "k": num_docs_max, - # "score_threshold": relevance_threshold, - # }, # Initialize chain for answering queries based on provided doc snippets qa_from_docs_chain = get_prompt_llm_chain( prompt_qa, + chat_state=chat_state, llm_settings=chat_state.bot_settings, - api_key=chat_state.openai_api_key, callbacks=chat_state.callbacks, print_prompt=bool(os.getenv("PRINT_QA_PROMPT")), stream=True, @@ -219,7 +219,7 @@ def get_docs_chat_chain( def do_intro_tasks( - openai_api_key: str, collection_name: str | None = None + openai_api_key: str | None = None, collection_name: str | None = None ) -> ChromaDDG: global default_vectorstore @@ -229,7 +229,7 @@ def do_intro_tasks( # Load and save default vector store try: vectorstore = default_vectorstore = get_vectorstore_using_openai_api_key( - DEFAULT_COLLECTION_NAME, openai_api_key=openai_api_key + DEFAULT_COLLECTION_NAME, openai_api_key=DEFAULT_OPENAI_API_KEY ) except Exception as e: logger.error( @@ -293,6 +293,7 @@ def do_intro_tasks( chat_history=chat_history, vectorstore=vectorstore, # callbacks and bot_settings can be default here openai_api_key=DEFAULT_OPENAI_API_KEY, + openrouter_api_key=DEFAULT_OPENROUTER_API_KEY, user_id=None, # would be set to None by default but just to be explicit ) ) diff --git a/streamlit_app.py b/streamlit_app.py index 613eddb..bf699f7 100644 --- a/streamlit_app.py +++ b/streamlit_app.py @@ -1,4 +1,5 @@ import os +import traceback import streamlit as st from icecream import ic # noqa: F401 @@ -9,6 +10,7 @@ get_short_user_id, get_user_facing_collection_name, ) + from components.llm import CallbackHandlerDDGStreamlit from docdocgo import get_bot_response, get_source_links from utils.chat_state import ChatState @@ -23,13 +25,12 @@ from utils.ingest import extract_text, format_ingest_failure from utils.output import format_exception from utils.prepare import ( - ALLOWED_MODELS, - BYPASS_SETTINGS_RESTRICTIONS, BYPASS_SETTINGS_RESTRICTIONS_PASSWORD, DEFAULT_COLLECTION_NAME, - DEFAULT_OPENAI_API_KEY, + DEFAULT_OPENROUTER_API_KEY, INITIAL_TEST_QUERY_STREAMLIT, MODEL_NAME, + ALLOWED_MODELS, TEMPERATURE, get_logger, ) @@ -42,6 +43,7 @@ just_chat_status_config, mode_option_to_prefix, mode_options, + sanitize_markdown_links, show_downloader, show_sources, show_uploader, @@ -58,6 +60,7 @@ ChatMode, chat_modes_needing_llm, ) +from agents.command_chooser import get_raw_command logger = get_logger() @@ -89,17 +92,17 @@ st.header("DocDocGo " + VERSION) # Default mode - with st.expander("Command Mode", expanded=False): + with st.expander("Command Mode", expanded=True): ss.default_mode = st.selectbox( "Command used if none provided", mode_options, - index=0, + index=12, # label_visibility="collapsed", ) cmd_prefix, cmd_prefix_explainer = mode_option_to_prefix[ss.default_mode] st.caption(cmd_prefix_explainer) - - with st.expander("OpenAI API Key", expanded=not ss.llm_api_key_ok_status): + + with st.expander("OpenAI API Key", expanded=False): supplied_openai_api_key = st.text_input( "OpenAI API Key", label_visibility="collapsed", @@ -109,7 +112,7 @@ if not supplied_openai_api_key: openai_api_key_to_use: str = ss.default_openai_api_key - is_community_key = not BYPASS_SETTINGS_RESTRICTIONS + is_community_key = True elif supplied_openai_api_key in ("public", "community"): # TODO: document this @@ -133,10 +136,10 @@ is_community_key = False # In case there's no community key available, set is_community_key to False - if not openai_api_key_to_use: - is_community_key = False - st.caption("To use this app, you'll need an OpenAI API key. " - "[Get an OpenAI API key](https://platform.openai.com/account/api-keys)" + if not is_community_key and not openai_api_key_to_use: + st.caption( + "To use this app, you'll need an OpenAI API key. " + "[Get an OpenAI API key](https://platform.openai.com/account/api-keys)" ) chat_state.user_id = None elif is_community_key: @@ -150,9 +153,13 @@ else: # User is using their own key (or has unlocked the default key) chat_state.user_id = get_short_user_id(openai_api_key_to_use) + st.caption( + "Using your own OpenAI API key. Your collections are private." + ) # TODO: use full api key as user id (but show only the short version) chat_state.is_community_key = is_community_key # in case it changed + chat_state.openai_api_key = openai_api_key_to_use # in case it changed # If init load or user key field changed, reset user/vectorstore as needed if supplied_openai_api_key != ss.prev_supplied_openai_api_key: @@ -189,18 +196,81 @@ chat_state.chat_history_all.append((None, init_msg)) chat_state.sources_history.append(None) + with st.expander("OpenRouter API Key", expanded=False): + supplied_openrouter_api_key = st.text_input( + "OpenRouter API Key", + label_visibility="collapsed", + key="openrouter_api_key", + type="password", + ) + + if not supplied_openrouter_api_key: + openrouter_api_key_to_use: str = ss.default_openrouter_api_key + is_or_community_key = True + + elif supplied_openrouter_api_key in ("public", "community"): + # TODO: document this + # This allows the user to use community key mode (and see public collections + # even if BYPASS_SETTINGS_RESTRICTIONS is set + openrouter_api_key_to_use = ss.default_openrouter_api_key + is_or_community_key = True + + elif supplied_openrouter_api_key == BYPASS_SETTINGS_RESTRICTIONS_PASSWORD: + openrouter_api_key_to_use = ss.default_openrouter_api_key + is_or_community_key = False + + # Collapse key field (not super important, but nice) + if not ss.openrouter_api_key_ok_status: + ss.openrouter_api_key_ok_status = True # collapse key field + st.rerun() # otherwise won't collapse until next interaction + + else: + # Use the key entered by the user as the OpenRouter API key + openrouter_api_key_to_use = supplied_openrouter_api_key + is_or_community_key = False + + # In case there's no community key available, set is_or_community_key to False + if not openrouter_api_key_to_use: + is_or_community_key = False + st.caption("To use this app, you'll need an OpenRouter API key. " + "[Get an OpenRouter API key](https://openrouter.ai/CLERK-ROUTER/VIRTUAL/sign-up)" + ) + elif is_or_community_key: + st.caption( + "Using the default OpenRouter API key (You may not select a custom model.). " + "[Get your OpenRouter API key](https://openrouter.ai/CLERK-ROUTER/VIRTUAL/sign-up)" + ) + + chat_state.is_or_community_key = is_or_community_key # in case it changed + chat_state.openrouter_api_key = openrouter_api_key_to_use # in case it changed + + # Settings with st.expander("Settings", expanded=False): - if is_community_key: - model_options = [MODEL_NAME] # show only 3.5 if community key - index = 0 + if is_or_community_key: + chat_state.bot_settings.model = st.text_input( + "OpenRouter Model", + label_visibility="collapsed", + key="model", + type="default", + placeholder="google/gemini-2.5-flash", + disabled=True, + ) else: - model_options = ALLOWED_MODELS # guaranteed to include MODEL_NAME - index = model_options.index(chat_state.bot_settings.llm_model_name) - # TODO: adjust context length (for now assume 16k) - chat_state.bot_settings.llm_model_name = st.selectbox( - "Language model", model_options, disabled=is_community_key, index=index - ) + supplied_openrouter_model_name = st.text_input( + "OpenRouter Model", + label_visibility="collapsed", + key="model", + type="default", + placeholder="google/gemini-2.5-flash", + disabled=False, + ) + if ALLOWED_MODELS and supplied_openrouter_model_name not in ALLOWED_MODELS: + st.caption('That model is not in the allowed list of models. Make sure you are formatting it provider/model with a slash. The model will be set to the default, "google/gemini-2.5-flash"') + chat_state.bot_settings.model = MODEL_NAME + else: + chat_state.bot_settings.model = supplied_openrouter_model_name + st.caption("OpenRouter Model (Enter in the form of provider/model, for example google/gemini-2.5-flash. If you are using the community OpenRouter API key you may not choose a custom model.)") # Temperature chat_state.bot_settings.temperature = st.slider( @@ -290,16 +360,34 @@ if docs: ingest_docs(docs, chat_state) -# Check if the user has entered a query +# Get and display collection name coll_name_full = chat_state.vectorstore.name coll_name_as_shown = get_user_facing_collection_name(chat_state.user_id, coll_name_full) -chat_input_text = f"[{ss.default_mode}] " if cmd_prefix else "" -chat_input_text = limit_num_characters(chat_input_text + coll_name_as_shown, 35) + "/" -full_query = st.chat_input(chat_input_text) + +chat_input_text: str = limit_num_characters(coll_name_as_shown, 35) + "/" + +# Get text input by user +full_query: str | None = st.chat_input(chat_input_text) if full_query: - # Prepend the command prefix for the user-selected default mode, if needed - if cmd_prefix and not full_query.startswith("/"): - full_query = cmd_prefix + full_query + # display user message + with st.chat_message("user", avatar=ss.user_avatar): + # NOTE: should use a different avatar for auto-instructions + st.markdown(fix_markdown(full_query)) + + # Send query to LLM to select appropriate command, then display the response and continue with the command + llm_raw_command = {} + llm_raw_command = get_raw_command(full_query, chat_state) + answer = llm_raw_command['answer'] + full_query = llm_raw_command['command'] + + # Check if this is the first time we got a response from the LLM + if not ss.openrouter_api_key_ok_status: + # Set a temp value to trigger a rerun to collapse the API key fields + ss.openrouter_api_key_ok_status = "RERUN_PLEASE" + + # display LLM response + with st.chat_message("assistant", avatar=ss.bot_avatar): + st.markdown(fix_markdown(answer)) else: # If no message from the user, check if we should run an initial test query if not chat_state.chat_history_all and INITIAL_TEST_QUERY_STREAMLIT: @@ -358,40 +446,42 @@ if parsed_query.is_ingestion_needed() else "", ) + chat_state.callbacks[1] = cb chat_state.add_to_output = lambda x: cb.on_llm_new_token(x, run_id=None) - try: - response = get_bot_response(chat_state) - answer = response["answer"] - # Check if this is the first time we got a response from the LLM - if not ss.llm_api_key_ok_status and chat_mode in chat_modes_needing_llm: - # Set a temp value to trigger a rerun to collapse the API key field - ss.llm_api_key_ok_status = "RERUN_PLEASE" + try: + llm_response = get_bot_response(chat_state) + answer = llm_response["answer"] + answer = sanitize_markdown_links(answer) + except Exception as e: + err_msg = format_exception(e) + answer = f"We're sorry, an error has occurred:\n```\n{err_msg}\n```" - # Display non-streaming responses slowly (in particular avoids chat prompt flicker) - if chat_mode not in chat_modes_needing_llm or "needs_print" in response: - write_slowly(message_placeholder, answer) + # Display non-streaming responses slowly (in particular avoids chat prompt flicker) + if chat_mode not in chat_modes_needing_llm or "needs_print" in llm_response: + write_slowly(message_placeholder, answer) + try: # Display sources if present - sources = get_source_links(response) or None # Cheaper to store None than [] - show_sources(sources, cb) + sources = get_source_links(llm_response) or None # Cheaper to store None than [] + show_sources(sources) # Display the "complete" status - custom or default if status: default_status = status_config.get(chat_mode, just_chat_status_config) status.update( - label=response.get("status.header", default_status["complete.header"]), + label=llm_response.get("status.header", default_status["complete.header"]), state="complete", ) - status.write(response.get("status.body", default_status["complete.body"])) + status.write(llm_response.get("status.body", default_status["complete.body"])) - # Add the response to the chat history + # Add the llm_response to the chat history chat_state.chat_history.append((full_query, answer)) # If the response contains instructions to auto-run a query, record it - if new_parsed_query := response.get("new_parsed_query"): - chat_state.scheduled_queries.add_to_front(new_parsed_query) + #if new_parsed_query := llm_response.get("new_parsed_query"): + # chat_state.scheduled_queries.add_to_front(new_parsed_query) except Exception as e: # Add the error message to the likely incomplete response err_msg = format_exception(e) @@ -403,23 +493,22 @@ status.write(status_config[chat_mode]["error.body"]) err_type = ( - "OPENAI_API_AUTH" + "OPENROUTER_API_AUTH" if ( err_msg.startswith("AuthenticationError") - and "key at https://platform.openai" in err_msg ) else "EMBEDDINGS_DIM" if err_msg.startswith("InvalidDimensionException") else "OTHER" ) - if err_type == "OPENAI_API_AUTH": + if err_type == "OPENROUTER_API_AUTH": if is_community_key: - answer = f"Apologies, the community OpenAI API key ({ss.default_openai_api_key[:4]}...{DEFAULT_OPENAI_API_KEY[-4:]}) was rejected by the OpenAI API. Possible reasons:\n- OpenAI believes that the key has leaked\n- The key has reached its usage limit\n\n**What to do:** Please get your own key at https://platform.openai.com/account/api-keys and enter it in the sidebar." - elif openai_api_key_to_use: - answer = f"Apologies, the OpenAI API key you entered ({openai_api_key_to_use[:4]}...) was rejected by the OpenAI API. Possible reasons:\n- The key is invalid\n- OpenAI believes that the key has leaked\n- The key has reached its usage limit\n\n**What to do:** Please get a new key at https://platform.openai.com/account/api-keys and enter it in the sidebar." + answer = f"Apologies, the community OpenRouter API key ({ss.default_openrouter_api_key[:4]}...{DEFAULT_OPENROUTER_API_KEY[-4:]}) was rejected by the OpenRouter API. Possible reasons:\n- OpenRouter believes that the key has leaked\n- The key has reached its usage limit\n\n**What to do:** Please get your own key at https://openrouter.ai/CLERK-ROUTER/VIRTUAL/sign-up and enter it in the sidebar." + elif openrouter_api_key_to_use: + answer = f"Apologies, the OpenRouter API key you entered ({openrouter_api_key_to_use[:4]}...) was rejected by the OpenRouter API. Possible reasons:\n- The key is invalid\n- OpenRouter believes that the key has leaked\n- The key has reached its usage limit\n\n**What to do:** Please get a new key at https://openrouter.ai/CLERK-ROUTER/VIRTUAL/sign-up and enter it in the sidebar." else: - answer = "In order to use DocDocGo, you'll need an OpenAI API key. Please get one at https://platform.openai.com/account/api-keys and enter it in the sidebar." + answer = "In order to use DocDocGo, you'll need an OpenRouter API key. Please get one at https://openrouter.ai/CLERK-ROUTER/VIRTUAL/sign-up and enter it in the sidebar." elif err_type == "EMBEDDINGS_DIM": answer = ( @@ -429,8 +518,8 @@ "via the project's GitHub repository or through LinkedIn at " "https://www.linkedin.com/in/dmitriyvasilyuk/." ) - elif cb.buffer: - answer = f"{cb.buffer}\n\n{answer}" + # elif cb.buffer: + # answer = f"{cb.buffer}\n\n{answer}" # Assign sources sources = None @@ -454,7 +543,7 @@ files, allow_all_ext = show_uploader(is_teleporting=True) # Display the file downloader if needed -for instruction in response.get("instructions", []): +for instruction in llm_response.get("instructions", []): if instruction.type == INSTRUCT_EXPORT_CHAT_HISTORY: ss.idx_file_download = len(chat_state.chat_history_all) - 1 is_downloaded = show_downloader( @@ -463,8 +552,8 @@ ) # Update vectorstore if needed -if "vectorstore" in response: - chat_state.vectorstore = response["vectorstore"] +if "vectorstore" in llm_response: + chat_state.vectorstore = llm_response["vectorstore"] # Update the collection name in the address bar if collection has changed if coll_name_full != chat_state.vectorstore.name: @@ -474,8 +563,9 @@ else {} ) -# If this was the first LLM response, rerun to collapse the OpenAI API key field -if ss.llm_api_key_ok_status == "RERUN_PLEASE": +# If this was the first LLM response, rerun to collapse the OpenRouter and OpenAI API key fields +if ss.openrouter_api_key_ok_status == "RERUN_PLEASE": + ss.openrouter_api_key_ok_status = True ss.llm_api_key_ok_status = True st.rerun() diff --git a/utils/chat_state.py b/utils/chat_state.py index 901301b..77925ba 100644 --- a/utils/chat_state.py +++ b/utils/chat_state.py @@ -10,7 +10,6 @@ CollectionDoesNotExist, get_vectorstore_using_openai_api_key, ) -from components.llm import get_prompt_llm_chain from utils.helpers import ( PRIVATE_COLLECTION_PREFIX, PRIVATE_COLLECTION_USER_ID_LENGTH, @@ -72,6 +71,7 @@ def __init__( operation_mode: OperationMode, vectorstore: ChromaDDG, is_community_key: bool = False, + is_or_community_key: bool = False, parsed_query: ParsedQuery | None = None, chat_history: PairwiseChatHistory | None = None, chat_and_command_history: PairwiseChatHistory | None = None, @@ -81,6 +81,7 @@ def __init__( bot_settings: BotSettings | None = None, user_id: str | None = None, # NOTE: should switch to "" instead of None openai_api_key: str | None = None, + openrouter_api_key: str | None = None, scheduled_queries: ScheduledQueries | None = None, access_role_by_user_id_by_coll: dict[str, dict[str, AccessRole]] | None = None, access_code_by_coll_by_user_id: dict[str, dict[str, str]] | None = None, @@ -90,6 +91,7 @@ def __init__( ) -> None: self.operation_mode = operation_mode self.is_community_key = is_community_key + self.is_or_community_key = is_or_community_key self.parsed_query = parsed_query or ParsedQuery() self.chat_history = chat_history or [] # tuple of (user_message, bot_response) self.chat_history_all = chat_and_command_history or [] @@ -102,6 +104,7 @@ def __init__( self.bot_settings = bot_settings or BotSettings() self.user_id = user_id self.openai_api_key = openai_api_key + self.openrouter_api_key = openrouter_api_key self.scheduled_queries = scheduled_queries or ScheduledQueries() self._access_role_by_user_id_by_coll = access_role_by_user_id_by_coll or {} self._access_code_by_coll_by_user_id = access_code_by_coll_by_user_id or {} @@ -408,10 +411,12 @@ def get_new_vectorstore( return res def get_prompt_llm_chain(self, prompt, *, to_user: bool): + from components.llm import get_prompt_llm_chain return get_prompt_llm_chain( prompt, llm_settings=self.bot_settings, - api_key=self.openai_api_key, + chat_state=self, + print_prompt=False, stream=to_user, callbacks=self.callbacks if to_user else None, ) diff --git a/utils/helpers.py b/utils/helpers.py index c8ef812..f151cae 100644 --- a/utils/helpers.py +++ b/utils/helpers.py @@ -4,7 +4,7 @@ from utils.prepare import DEFAULT_MODE from utils.type_utils import ChatMode -VERSION = "v0.2.7" +VERSION = "v0.2.8" DELIMITER = "-" * 94 + "\n" DELIMITER40 = "-" * 40 + "\n" DELIMITER20_NONL = "-" * 20 @@ -63,48 +63,17 @@ DEFAULT_CHAT_MODE = command_ids[DEFAULT_MODE] -GREETING_MESSAGE = """\ -🦉**Hi, I'm DocDocGo!** With my signature _infinite research_, I can save you time when finding the information you need takes more than a quick Google search. I can comb through hundreds of sites, find which ones have relevant information, and: - -- give you the aswer from each relevant source (_heatseek_ research mode) -- write a report using all sources, put them in a knowledge base for follow-up chat (_classic_ research) -""" - -"""I have two research modes: - -- **Heatseek mode**: I keep looking for sites with the exact information you need -- **Classic mode**: I keep ingesting sites relevant to your query into a knowledge base to use when chatting - -In heatseek mode, I give you candidate answers as I find them. In report mode, you get a report that combines insights from the ingested sources and a knowledge base for follow-up questions. - -""" - GREETING_MESSAGE = """\ 👋**Hi, I'm DocDoc:green[Go]!** My superpower is **infinite research** - when you need to go beyond a quick Google search, I will comb through hundreds of websites looking for the information you need. I can: -- look for sources containing something specific you need (_heatseek_ research mode), or +- look for sources containing something specific you need (_heatseek_ research), or - write a report using all sources and put them in a knowledge base for follow-up chat (_classic_ research) -""" - -_older_draft2 = """\ -🦉**Hi, I'm DocDocGo!** I can help when you need information that can't be found with a quick Google search. I can comb through hundreds of sites and: - -- give you the answer from each relevant source (_heatseek_ research mode) -- write a report using all sources, put them in a knowledge base for follow-up questions (_classic_ research) +You don't need to specify which type of research to use, I'll determine it based on your query and collections. """ -_older_draft = """I have two research modes: - -- **Heatseek mode**: I keep looking for sites with the exact information you need -- **Classic mode**: I keep ingesting sites relevant to your query into a knowledge base to use when chatting - -In heatseek mode, I give you candidate answers as I find them. In report mode, you get a report that combines insights from the ingested sources and a knowledge base for follow-up questions. - -""" - -GREETING_MESSAGE_SUFFIX_DEFAULT = "I have lots of cool commands, but the only one to remember is: `/help `" +GREETING_MESSAGE_SUFFIX_DEFAULT = "Go ahead and ask me any question and I'll determine how best to research it!" # GREETING_MESSAGE_SUFFIX_DEFAULT = "I'm also _self-aware_ - I know how to use me, `/help `" GREETING_MESSAGE_SUFFIX_OTHER = GREETING_MESSAGE_SUFFIX_DEFAULT # "How? Just ask me by typing `/help `." @@ -126,8 +95,8 @@ information from 2x as many sources as the original report. If you wanted to quadruple \ the number of sources, you could use `/research deeper 2` instead. -:grey[**Tip:** Swiching from GPT 3.5 to 4 (in the sidebar) improves my performance. \ -You'll need your own OpenAI API key for that. Using your own key also relaxes the restriction \ +:grey[**Tip:** Swiching to a different model (in the sidebar) may improve my performance. \ +You'll need your own OpenRouter API key for that. Using your own key also relaxes the restriction \ on the maximum number of automatic research iterations.] """ @@ -145,9 +114,12 @@ ### How to use me First things first, I know figuring out how to use a new tool can be a bit overwhelming. But don't \ -worry, you won't have to memorize all the commands. Instead, you can just type `/help` followed by \ -what you want to do, and I'll guide you through it. For example: +worry, you won't have to memorize any commands. I'll automatically determine which command is appropriate \ +for your query. And if you prefer to specify a command, great! Go ahead and use the commands and I will obey. + +""" +TRUNCATED_HELP_INFO = """ ```markdown /help How can I have you do web research for me? ``` diff --git a/utils/lang_utils.py b/utils/lang_utils.py index 6df18b0..6583f70 100644 --- a/utils/lang_utils.py +++ b/utils/lang_utils.py @@ -612,4 +612,4 @@ def expand_chunks( for parent_id in final_chunks_by_id: final_chunks.extend(final_chunks_by_id[parent_id].values()) - return final_chunks + return final_chunks \ No newline at end of file diff --git a/utils/prepare.py b/utils/prepare.py index ac645b4..7eb7d3c 100644 --- a/utils/prepare.py +++ b/utils/prepare.py @@ -30,10 +30,12 @@ def get_logger(logger_name: str = DEFAULT_LOGGER_NAME): # Set up the environment variables -DEFAULT_OPENAI_API_KEY = os.getenv("DEFAULT_OPENAI_API_KEY", "") +DEFAULT_OPENROUTER_API_KEY = os.getenv("DEFAULT_OPENROUTER_API_KEY") +DEFAULT_OPENAI_API_KEY = os.getenv("DEFAULT_OPENAI_API_KEY") IS_AZURE = bool(os.getenv("OPENAI_API_BASE") or os.getenv("AZURE_OPENAI_API_KEY")) EMBEDDINGS_DEPLOYMENT_NAME = os.getenv("EMBEDDINGS_DEPLOYMENT_NAME") CHAT_DEPLOYMENT_NAME = os.getenv("CHAT_DEPLOYMENT_NAME") +OPENROUTER_BASE_URL = os.getenv("OPENROUTER_BASE_URL") DEFAULT_COLLECTION_NAME = os.getenv("DEFAULT_COLLECTION_NAME", "docdocgo-documentation") @@ -48,15 +50,20 @@ def get_logger(logger_name: str = DEFAULT_LOGGER_NAME): # The following variable is only used if USE_CHROMA_VIA_HTTP is False VECTORDB_DIR = os.getenv("VECTORDB_DIR", "chroma/") -MODEL_NAME = os.getenv("MODEL_NAME", "gpt-4o-mini") # rename to DEFAULT_MODEL? +MODEL_NAME = os.getenv("MODEL_NAME", "google/gemini-2.5-flash") # rename to DEFAULT_MODEL? CONTEXT_LENGTH = int(os.getenv("CONTEXT_LENGTH", 16000)) # it's actually more like max # size of what we think we can feed to the model so that it doesn't get overwhelmed TEMPERATURE = float(os.getenv("TEMPERATURE", 0.3)) -ALLOWED_MODELS = os.getenv("ALLOWED_MODELS", MODEL_NAME).split(",") -ALLOWED_MODELS = [model.strip() for model in ALLOWED_MODELS] -if MODEL_NAME not in ALLOWED_MODELS: - raise ValueError("The default model must be in the list of allowed models.") + +allowed_models_str = os.getenv("ALLOWED_MODELS", "") +if allowed_models_str == "all": + ALLOWED_MODELS: list[str] = [] +else: + ALLOWED_MODELS = [model.strip() for model in ALLOWED_MODELS] + if ALLOWED_MODELS: + if MODEL_NAME not in ALLOWED_MODELS: + raise ValueError("The default model must be in the list of allowed models.") EMBEDDINGS_MODEL_NAME = os.getenv("EMBEDDINGS_MODEL_NAME", "text-embedding-3-large") EMBEDDINGS_DIMENSIONS = int(os.getenv("EMBEDDINGS_DIMENSIONS", 3072)) @@ -81,6 +88,7 @@ def get_logger(logger_name: str = DEFAULT_LOGGER_NAME): INITIAL_TEST_QUERY_STREAMLIT = os.getenv("INITIAL_QUERY_STREAMLIT") # Check that the necessary environment variables are set +DUMMY_OPENROUTER_API_KEY_PLACEHOLDER = "DUMMY NON-EMPTY VALUE" DUMMY_OPENAI_API_KEY_PLACEHOLDER = "DUMMY NON-EMPTY VALUE" if IS_AZURE and not ( @@ -93,17 +101,17 @@ def get_logger(logger_name: str = DEFAULT_LOGGER_NAME): "You have set some but not all environment variables necessary to utilize the " "Azure OpenAI API endpoint. Please refer to .env.example for details." ) -elif not IS_AZURE and not DEFAULT_OPENAI_API_KEY: +elif not IS_AZURE and not DEFAULT_OPENROUTER_API_KEY: # We don't exit because we could get the key from the Streamlit app print( - "WARNING: You have not set the DEFAULT_OPENAI_API_KEY environment variable. " + "WARNING: You have not set the DEFAULT_OPENROUTER_API_KEY environment variable. " "This is ok when running the Streamlit app, but not when running " "the command line app. For now, we will set it to a dummy non-empty value " "to avoid problems initializing the vectorstore etc. " "Please refer to .env.example for additional information." ) - os.environ["DEFAULT_OPENAI_API_KEY"] = DUMMY_OPENAI_API_KEY_PLACEHOLDER - DEFAULT_OPENAI_API_KEY = DUMMY_OPENAI_API_KEY_PLACEHOLDER + os.environ["DEFAULT_OPENROUTER_API_KEY"] = DUMMY_OPENROUTER_API_KEY_PLACEHOLDER + DEFAULT_OPENROUTER_API_KEY = DUMMY_OPENROUTER_API_KEY_PLACEHOLDER # TODO investigate the behavior when this happens if not os.getenv("SERPER_API_KEY") and not os.getenv("IGNORE_LACK_OF_SERPER_API_KEY"): diff --git a/utils/prompts.py b/utils/prompts.py index de519b8..97dac3e 100644 --- a/utils/prompts.py +++ b/utils/prompts.py @@ -449,7 +449,7 @@ for i, t in enumerate(prompts_templates_to_test): prompt = PromptTemplate.from_template(t) chain = get_prompt_llm_chain( - prompt, BotSettings(), os.getenv("DEFAULT_OPENAI_API_KEY"), stream=True + prompt, BotSettings(), chat_state, os.getenv("DEFAULT_OPENROUTER_API_KEY"), stream=True ) print("Prompt", i) try: diff --git a/utils/query_parsing.py b/utils/query_parsing.py index 73a2739..eb0b737 100644 --- a/utils/query_parsing.py +++ b/utils/query_parsing.py @@ -429,4 +429,4 @@ def parse_query( e, m = get_command(query, export_command_to_enum, ExportCommand.NONE) return ParsedQuery(chat_mode=chat_mode, export_command=e, message=m) - return ParsedQuery(chat_mode=chat_mode, message=query) + return ParsedQuery(chat_mode=ChatMode.AUTO_COMMAND_ID, message=query) diff --git a/utils/streamlit/helpers.py b/utils/streamlit/helpers.py index a05538b..581a77c 100644 --- a/utils/streamlit/helpers.py +++ b/utils/streamlit/helpers.py @@ -51,7 +51,7 @@ """ mode_option_to_prefix = { - "/kb (main mode)": ( + "/kb": ( "", # TODO: this presupposes that DEFAULT_MODE is /kb "Chat using the current collection as a knowledge base.", ), @@ -99,6 +99,10 @@ "/ch ", "Regular chat, without retrieving information from the current collection.", ), + "/auto (main mode)": ( + "/au ", + "Default mode, chat in natural language and a mode will be chosen for your query.", + ), } mode_options = list(mode_option_to_prefix.keys()) @@ -149,6 +153,7 @@ ChatMode.RESEARCH_COMMAND_ID: research_status_config, ChatMode.INGEST_COMMAND_ID: ingest_status_config, ChatMode.SUMMARIZE_COMMAND_ID: summarize_status_config, + ChatMode.AUTO_COMMAND_ID: chat_with_docs_status_config, } STAND_BY_FOR_INGESTION_MESSAGE = ( @@ -208,12 +213,13 @@ def write_slowly(message_placeholder, answer, delay=None): def show_sources( sources: list[str] | None, - callback_handler=None, + # Commenting callback logic as it is not needed for now + # callback_handler=None, ): """Show the sources if present.""" # If the cb handler is provided, remove the stand-by message - if callback_handler and callback_handler.end_str_printed: - callback_handler.container.markdown(fix_markdown(callback_handler.buffer)) + # if callback_handler and callback_handler.end_str_printed: + # callback_handler.container.markdown(fix_markdown(callback_handler.buffer)) if not sources: return @@ -296,3 +302,32 @@ def show_downloader( key=st.session_state.downloader_form_key, ) return is_downloaded + +def sanitize_markdown_links(text: str) -> str: + """ + Finds all Markdown links in the text and neutralizes any that are not valid, + absolute HTTP/HTTPS URLs, by turning them into plain text or code blocks. + + Example: + - "[Click me](https://example.com)" -> remains unchanged + - "[Click me](?collection=xyz)" -> becomes "[Click me](`?collection=xyz`)" + - "[Click me](/local/path)" -> becomes "[Click me](`/local/path`)" + """ + # Regex to find all markdown links: [text](url) + markdown_link_regex = r"\[([^\]]+)\]\(([^)]+)\)" + + def replacer(match): + link_text = match.group(1) + url = match.group(2) + + # Check if the URL is a valid, absolute HTTP/HTTPS URL + if url.startswith("http://") or url.startswith("https://"): + # It's a valid link, so return it as is + return f"[{link_text}]({url})" + else: + # It's a relative path or potentially malicious link. Neutralize it. + # We'll display it as text with the 'url' part in a code block. + # This prevents Streamlit from rendering it as a clickable link. + return f"{link_text} (source: `{url}`)" + + return re.sub(markdown_link_regex, replacer, text) \ No newline at end of file diff --git a/utils/streamlit/prepare.py b/utils/streamlit/prepare.py index 237b156..914107b 100644 --- a/utils/streamlit/prepare.py +++ b/utils/streamlit/prepare.py @@ -2,10 +2,10 @@ import streamlit as st -from components.llm import CallbackHandlerDDGConsole +from components.llm import CallbackHandlerDDGConsole, NoOpCallbackHandler from docdocgo import do_intro_tasks from utils.chat_state import ChatState -from utils.prepare import DEFAULT_OPENAI_API_KEY, DUMMY_OPENAI_API_KEY_PLACEHOLDER +from utils.prepare import DEFAULT_OPENAI_API_KEY, DEFAULT_OPENROUTER_API_KEY, DUMMY_OPENAI_API_KEY_PLACEHOLDER, DUMMY_OPENROUTER_API_KEY_PLACEHOLDER, MODEL_NAME from utils.streamlit.fix_event_loop import remove_tornado_fix from utils.type_utils import OperationMode from utils.streamlit.helpers import mode_options @@ -19,8 +19,9 @@ def prepare_app(): ) st.stop() - # Flag for whether or not the OpenAI API key has succeeded at least once + # Flag for whether or not the OpenRouter and OpenAI API keys have succeeded at least once st.session_state.llm_api_key_ok_status = False + st.session_state.openrouter_api_key_ok_status = False print("query params:", st.query_params) st.session_state.update_query_params = None @@ -42,15 +43,22 @@ def prepare_app(): vectorstore=vectorstore, callbacks=[ CallbackHandlerDDGConsole(), - "placeholder for CallbackHandlerDDGStreamlit", + NoOpCallbackHandler() ], + openrouter_api_key=DEFAULT_OPENROUTER_API_KEY, openai_api_key=DEFAULT_OPENAI_API_KEY, ) st.session_state.prev_supplied_openai_api_key = None - st.session_state.default_openai_api_key = DEFAULT_OPENAI_API_KEY - if st.session_state.default_openai_api_key == DUMMY_OPENAI_API_KEY_PLACEHOLDER: - st.session_state.default_openai_api_key = "" + st.session_state.prev_supplied_openrouter_api_key = None + st.session_state.openai_api_key = DEFAULT_OPENAI_API_KEY + st.session_state.default_openrouter_api_key = DEFAULT_OPENROUTER_API_KEY + if st.session_state.openai_api_key == DUMMY_OPENAI_API_KEY_PLACEHOLDER: + st.session_state.default_openrouter_api_key = "" + if st.session_state.default_openrouter_api_key == DUMMY_OPENROUTER_API_KEY_PLACEHOLDER: + st.session_state.default_openrouter_api_key = "" + + st.session_state.model = MODEL_NAME st.session_state.idx_file_upload = -1 st.session_state.uploader_form_key = "uploader-form" @@ -66,4 +74,4 @@ def prepare_app(): "/research heatseek Code for row of buttons Streamlit, /research What are the biggest AI news this month?, /help How does infinite research work?", ) st.session_state.sample_queries = [q.strip() for q in SAMPLE_QUERIES.split(",")] - st.session_state.default_mode = mode_options[0] \ No newline at end of file + st.session_state.default_mode = mode_options[12] \ No newline at end of file diff --git a/utils/type_utils.py b/utils/type_utils.py index 1308a6a..d0e8003 100644 --- a/utils/type_utils.py +++ b/utils/type_utils.py @@ -34,6 +34,7 @@ class ChatMode(Enum): EXPORT_COMMAND_ID = 10 SUMMARIZE_COMMAND_ID = 11 SHARE_COMMAND_ID = 12 + AUTO_COMMAND_ID = 13 chat_modes_needing_llm = { @@ -45,6 +46,7 @@ class ChatMode(Enum): ChatMode.CHAT_WITH_DOCS_COMMAND_ID, ChatMode.SUMMARIZE_COMMAND_ID, ChatMode.HELP_COMMAND_ID, + ChatMode.AUTO_COMMAND_ID } @@ -82,7 +84,7 @@ def user_facing_message_full(self): class BotSettings(BaseModel): - llm_model_name: str = MODEL_NAME + model: str = MODEL_NAME temperature: float = TEMPERATURE