Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
8b1234c
fix typo in env variable in dev docs
rio-codes Jun 23, 2025
7e172f4
Merge branch 'main' of github.com:rio-codes/docdocgo-core
rio-codes Jun 23, 2025
87cc262
Merge branch 'main' of github.com:rio-codes/docdocgo-core
rio-codes Jun 23, 2025
19252db
updating gitignore
rio-codes Jul 30, 2025
ae7eab3
changing all references to OpenAI API key to OpenRouter
rio-codes Jul 30, 2025
be0035a
openai is required for embeddings, re-implementing
rio-codes Aug 2, 2025
258f5c8
more reversions to openai where embedding is needed
rio-codes Aug 2, 2025
9cc899f
reverting announcement since it is for old version
rio-codes Aug 2, 2025
127614a
changed streamlit UI to include OpenRouter
rio-codes Aug 2, 2025
bb11ad5
fixing openrouter model setting
rio-codes Aug 3, 2025
e8828c9
model name is now obtained from settings
rio-codes Aug 4, 2025
e230fb7
various changes and beginning of new function code
rio-codes Aug 8, 2025
4c4016e
uploading progress but still troubleshooting
rio-codes Aug 8, 2025
6c03c1a
more fixes to command chooser, OpenRouter migration
rio-codes Aug 8, 2025
393adce
small formatting modifications, change to env example
rio-codes Aug 9, 2025
d1f10cd
added default mode, fixed callbacks bug, adjusted prompt
rio-codes Aug 9, 2025
677d9c6
implementing new default chat mode, cached summaries, and other fixes
rio-codes Aug 11, 2025
a256890
quick commit of file that should have been saved
rio-codes Aug 11, 2025
6033dea
quick commit of file that should have been saved
rio-codes Aug 11, 2025
e0781d1
removed all references to embeddings_needed
rio-codes Aug 13, 2025
a8d6f22
Merge branch 'dev-command-chooser' of github.com:rio-codes/docdocgo-c…
rio-codes Aug 13, 2025
11d1441
Only initialize coll_summary_query as blank if it doesn't exist
rio-codes Aug 13, 2025
6d52d0f
Update components/chroma_ddg_retriever.py
rio-codes Aug 13, 2025
19bcd22
removed unnecessary code that would not be reached if no chunks were …
rio-codes Aug 13, 2025
50cd78a
removed unneccesary pwd check
rio-codes Aug 13, 2025
2707f8e
simplifying logic to collapse API key fields if LLM response
rio-codes Aug 13, 2025
7720040
changed all instances of DEFAULT_CHAT_COMMAND_ID to AUTO_COMMAND_ID
rio-codes Aug 13, 2025
bd12176
Fix and refactor get_raw_command
reasonmethis Aug 30, 2025
577107e
Improve logging
reasonmethis Aug 30, 2025
74ab7aa
Add TODO comments
reasonmethis Aug 30, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 8 additions & 7 deletions .env.example
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
# Copy this file to .env and fill in the values

## NOTE: The only required value is DEFAULT_OPENAI_API_KEY. The app should work if the other values
## NOTE: The only required values are DEFAULT_OPENROUTER_API_KEY and DEFAULT_OPENAI_API_KEY. The app should work if the other values
# are left as is or not defined at all. However, you are strongly encouraged to fill in your
# own SERPER_API_KEY value. It is also recommended to fill in the BYPASS_SETTINGS_RESTRICTIONS and
# BYPASS_SETTINGS_RESTRICTIONS_PASSWORD values as needed (see below).
DEFAULT_OPENAI_API_KEY="" # your OpenAI API key
DEFAULT_OPENROUTER_API_KEY="" # your OpenRouter API key
DEFAULT_OPENAI_API_KEY="" # your OpenAI API key

## Your Google Serper API key (for web searches).
# If you don't set this, my key will be used, which may have
Expand All @@ -13,12 +14,12 @@ DEFAULT_OPENAI_API_KEY="" # your OpenAI API key
# SERPER_API_KEY=""

# Mode to use if none is specified in the query
DEFAULT_MODE="/docs" # or "/web" | "/quotes" | "/details" | "/chat" | "/research"
DEFAULT_MODE="/kb" # or "/chat", "/help", etc.

# Variables controlling whether the Streamlit app imposes some functionality restrictions
BYPASS_SETTINGS_RESTRICTIONS="" # whether to immediately allow all settings (any non-empty string means true)
BYPASS_SETTINGS_RESTRICTIONS_PASSWORD="" # what to enter in the OpenAI API key field to bypass settings
# restrictions. If BYPASS_SETTINGS_RESTRICTIONS is non-empty or if the user enters their own OpenAI API key,
BYPASS_SETTINGS_RESTRICTIONS_PASSWORD="" # what to enter in the OpenRouter API key field to bypass settings
# restrictions. If BYPASS_SETTINGS_RESTRICTIONS is non-empty or if the user enters their own OpenRouter API key,
# this becomes - mostly - irrelevant, as full settings access is already granted. I say "mostly" because
# this password can also be used for a couple of admin-only features, such as deleting the default collection
# and deleting a range of collections (see dbmanager.py). Recommendation: for local use,
Expand All @@ -27,8 +28,8 @@ BYPASS_SETTINGS_RESTRICTIONS_PASSWORD="" # what to enter in the OpenAI API key f

# If you are NOT using Azure, the following setting determines the chat model
# NOTE: You can change the model and temperature on the fly in Streamlit UI or via the API
MODEL_NAME="gpt-3.5-turbo-0125" # default model to use for chat
ALLOWED_MODELS="gpt-3.5-turbo-0125, gpt-4-turbo-2024-04-09"
MODEL_NAME="google/gemini-2.5-flash" # default model to use for chat
ALLOWED_MODELS="all" # Since we are now using OpenRouter, any model should be allowed. You can still specify a list if desired.
CONTEXT_LENGTH="16000" # you can also make it lower than the actual context length
EMBEDDINGS_MODEL_NAME="text-embedding-3-large"
EMBEDDINGS_DIMENSIONS="3072" # number of dimensions for the embeddings model
Expand Down
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ chroma-cloud*
.tmp*
credentials/
chroma-cloud-backup/
chroma/
chroma.cf.json
.chroma_env
my-chroma*
.chroma_env
Expand Down
1 change: 1 addition & 0 deletions .python-version
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
3.13.3
9 changes: 6 additions & 3 deletions README-FOR-DEVELOPERS.md
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ If this happens you will need to install the Microsoft C++ Build Tools. You can

### 4. Copy the `.env.example` file to `.env` and fill in the values

At first, you can simply fill in your [OpenAI API key](https://platform.openai.com/signup) and leave the other values as they are. Please see `.env.example` for additional details.
At first, you can simply fill in your [OpenRouter API key](https://openrouter.ai/CLERK-ROUTER/VIRTUAL/sign-up) and [OpenAI API key](https://platform.openai.com/signup) and leave the other values as they are. Please see `.env.example` for additional details.

## Running DocDocGo

Expand Down Expand Up @@ -116,6 +116,7 @@ The message should be sent as a POST request with the body as a JSON object that
class ChatRequestData(BaseModel):
message: str
api_key: str
openrouter_api_key: str | None = None
openai_api_key: str | None = None
chat_history: list[JSONish] = []
collection_name: str | None = None
Expand Down Expand Up @@ -146,7 +147,9 @@ The `collection_name` field is used to specify the collection that the bot shoul

The `api_key` field is used to specify the API key for the FastAPI server. The server will only honor requests that include the correct API key, as specified by the `DOCDOCGO_API_KEY` environment variable in.

The `openai_api_key` field is used to specify the OpenAI API key. If not specified, the default (community) key will be used, assuming the `DEFAULT_OPENAI_API_KEY` environment variable is set.
The `openrouter_api_key` field is used to specify the OpenRouter API key. If not specified, the default (community) key will be used, assuming the `DEFAULT_OPENROUTER_API_KEY` environment variable is set.

The `openai_api_key` field is used to specify the OpenAI API key for embeddings. If not specified, the default (community) key will be used, assuming the `DEFAULT_OPENAI_API_KEY` environment variable is set.

The `access_codes_cache` field is an object mapping collection names to access codes that the client has stored for them for the current user. The bot will use these access codes to determine grant the user access to collections that require it.

Expand Down Expand Up @@ -392,7 +395,7 @@ As an alternative way to handle the issue of the default collection, you can cre

### Q: What is the `BYPASS_SETTINGS_RESTRICTIONS` environment variable?

A: Normally, when this variable is not defined (or is an empty string), the app will start in a "community key" mode, where you can only see and create public collections and there are restriction on allowed settings (e.g. you can't change the model in the UI). The key used as the community key is controlled by the `DEFAULT_OPENAI_API_KEY` environment variable. You can remove these restrictions and switch to using that same key as a private key by entering the admin password (the value of the `BYPASS_SETTINGS_RESTRICTIONS_PASSWORD` environment variable) in rhe OpenAI API key field.
A: Normally, when this variable is not defined (or is an empty string), the app will start in a "community key" mode, where you can only see and create public collections and there are restriction on allowed settings (e.g. you can't change the model in the UI). The keys used as the community keys are controlled by the `DEFAULT_OPENROUTER_API_KEY` and `DEFAULT_OPENAI_API_KEY` environment variables. You can remove these restrictions and switch to using those same keys as private keys by entering the admin password (the value of the `BYPASS_SETTINGS_RESTRICTIONS_PASSWORD` environment variable) in the OpenRouter API key field.

However, when the `BYPASS_SETTINGS_RESTRICTIONS` variable is set to a non-empty string, the app will start in the "private key" mode right away, without you having to enter the admin password. This is useful if you use the app in a private setting and don't want to have to enter the admin password every time you start the app.

Expand Down
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ If this happens you will need to install the Microsoft C++ Build Tools. You can

### 4. Copy the `.env.example` file to `.env` and fill in the values

At first, you can simply fill in your [OpenAI API key](https://platform.openai.com/signup) and leave the other values as they are. Please see `.env.example` for additional details.
At first, you can simply fill in your [OpenRouter API key](https://openrouter.ai/CLERK-ROUTER/VIRTUAL/sign-up) and [OpenAI API key](https://platform.openai.com/signup) and leave the other values as they are. Please see `.env.example` for additional details.

## Running DocDocGo

Expand Down Expand Up @@ -431,9 +431,9 @@ A: Before you entered your own OpenAI API key, you were using the community key

You still have access to the public collections, you can switch to any public collection by typing `/db use <collection name>`. If you want to see all available public collections again, you can switch back to the community key by changing the key to an empty string, then running `/db list` again.

#### Q: I got a shareable link to a collection but using it reloads the Streamlit app, after which it ends up in its default state of using the community key. How can I use the link with my own OpenAI API key?
#### Q: I got a shareable link to a collection but using it reloads the Streamlit app, after which it ends up in its default state of using the community key. How can I use the link with my own OpenRouter API key?

A: Simply enter your key in the OpenAI API key field after the app has reloaded. The access code will still be valid.
A: Simply enter your key in the OpenRouter API key field after the app has reloaded. The access code will still be valid.

## DocDocGo Carbon

Expand Down
19 changes: 11 additions & 8 deletions agentblocks/webretrieve.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ class URLRetrievalData(BaseModel):


MAX_INIT_BATCH_SIZE = 10
MAX_URLS_TO_TRY = 25


def get_content_from_urls(
Expand All @@ -31,8 +32,6 @@ def get_content_from_urls(
Otherwise, fetch a new batch of urls, and repeat until at least min_ok_urls
urls are fetched successfully.

If there are duplicate URLs

Args:
- urls: list of urls to fetch content from
- min_ok_urls: minimum number of urls that need to be fetched successfully
Expand All @@ -42,11 +41,10 @@ def get_content_from_urls(
Returns:
- URLRetrievalData: object containing the fetched content
"""
urls = urls[:MAX_URLS_TO_TRY]
try:
batch_fetcher = batch_fetcher or get_batch_url_fetcher()
init_batch_size = init_batch_size or min(
MAX_INIT_BATCH_SIZE, round(min_ok_urls * 1.2)
) # NOTE: could optimize
init_batch_size = init_batch_size or min(MAX_INIT_BATCH_SIZE, round(min_ok_urls * 1.2)) # NOTE: could optimize

logger.info(
f"Fetching content from {len(urls)} urls:\n"
Expand Down Expand Up @@ -85,11 +83,18 @@ def get_content_from_urls(
batch_htmls = batch_fetcher(batch_urls)

# Process fetched content
at_least_one_ok = False
for url, html in zip(batch_urls, batch_htmls):
link_data = LinkData.from_raw_content(html)
res.link_data_dict[url] = link_data
if not link_data.error:
res.num_ok_urls += 1
at_least_one_ok = True

if not at_least_one_ok:
errors = [res.link_data_dict[url].error for url in batch_urls]
error_string = "\n- ".join(f"{url}: {error}" for url, error in zip(batch_urls, errors))
logger.warning(f"No usable content found for any of the {batch_size} urls: {error_string}")

logger.info(
f"Total URLs processed: {res.idx_first_not_tried} ({num_urls} total)\n"
Expand All @@ -98,6 +103,4 @@ def get_content_from_urls(

return res
except Exception as e:
raise DDGError(
user_facing_message="Apologies, I ran into a problem trying to fetch URL content."
) from e
raise DDGError(user_facing_message="Apologies, I ran into a problem trying to fetch URL content.") from e
Loading