diff --git a/agent-langgraph-short-term-memory/.env.example b/agent-langgraph-short-term-memory/.env.example new file mode 100644 index 0000000..32a59db --- /dev/null +++ b/agent-langgraph-short-term-memory/.env.example @@ -0,0 +1,21 @@ +# Make a copy of this to set environment variables for local development +# cp .env.example .env.local + +# TODO: Fill in auth related env vars +DATABRICKS_CONFIG_PROFILE=DEFAULT +# DATABRICKS_HOST=https://.databricks.com +# DATABRICKS_TOKEN=dapi.... + +# TODO: Update with the MLflow experiment you want to log traces and models to +MLFLOW_EXPERIMENT_ID= + +# TODO: Update with the Lakebase instance you want to store state/checkpoints in +LAKEBASE_INSTANCE_NAME= + +# TODO: Update the route to query agent if you used a different port to deploy your agent +API_PROXY=http://localhost:8000/invocations + +CHAT_APP_PORT=3000 +CHAT_PROXY_TIMEOUT_SECONDS=300 +MLFLOW_TRACKING_URI="databricks" +MLFLOW_REGISTRY_URI="databricks-uc" diff --git a/agent-langgraph-short-term-memory/.gitignore b/agent-langgraph-short-term-memory/.gitignore new file mode 100644 index 0000000..63c3b98 --- /dev/null +++ b/agent-langgraph-short-term-memory/.gitignore @@ -0,0 +1,208 @@ +# Created by https://www.toptal.com/developers/gitignore/api/python +# Edit at https://www.toptal.com/developers/gitignore?templates=python + +databricks.yml + +### Python ### +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ + +# VS Code +.vscode/ + +### Python Patch ### +# Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration +poetry.toml + +# ruff +.ruff_cache/ + +# LSP config files +pyrightconfig.json + +# End of https://www.toptal.com/developers/gitignore/api/python + +# Created by https://www.toptal.com/developers/gitignore/api/react +# Edit at https://www.toptal.com/developers/gitignore?templates=react + +### react ### +.DS_* +*.log +logs +**/*.backup.* +**/*.back.* + +node_modules +bower_components + +*.sublime* + +psd +thumb +sketch + +# End of https://www.toptal.com/developers/gitignore/api/react + +**/uv.lock +**/mlruns/ +**/.vite/ +**/.databricks +**/.claude +**/.env.local diff --git a/agent-langgraph-short-term-memory/AGENTS.md b/agent-langgraph-short-term-memory/AGENTS.md new file mode 100644 index 0000000..642e20d --- /dev/null +++ b/agent-langgraph-short-term-memory/AGENTS.md @@ -0,0 +1,348 @@ +# Agent LangGraph Development Guide + +## Running the App + +**Prerequisites:** uv, nvm (Node 20), Databricks CLI + +**Quick Start:** + +```bash +./scripts/quickstart.sh # First-time setup (auth, MLflow experiment, env) +uv run start-app # Start app at http://localhost:8000 +``` + +**Advanced Server Options:** + +```bash +uv run start-server --reload # Hot-reload on code changes during development +uv run start-server --port 8001 +uv run start-server --workers 4 +``` + +**Test API:** + +```bash +# Streaming request +curl -X POST http://localhost:8000/invocations \ + -H "Content-Type: application/json" \ + -d '{ "input": [{ "role": "user", "content": "hi" }], "stream": true }' + +# Non-streaming request +curl -X POST http://localhost:8000/invocations \ + -H "Content-Type: application/json" \ + -d '{ "input": [{ "role": "user", "content": "hi" }] }' +``` + +--- + +## Testing the Agent + +**Run evaluation:** + +```bash +uv run agent-evaluate # Uses MLflow scorers (RelevanceToQuery, Safety) +``` + +**Run unit tests:** + +```bash +pytest [path] # Standard pytest execution +``` + +--- + +## Modifying the Agent + +Anytime the user wants to modify the agent, look through each of the following resources to help them accomplish their goal: + +If the user wants to convert something into Responses API, refer to https://mlflow.org/docs/latest/genai/serving/responses-agent/ for more information. + +1. Look through existing databricks-langchain APIs to see if they can use one of these to accomplish their goal. +2. Look through the folders in https://github.com/bbqiu/agent-on-app-prototype to see if there's an existing example similar to what they're looking to do. +3. Reference the documentation available under https://docs.databricks.com/aws/en/generative-ai/agent-framework/ and its subpages. +4. For adding tools and capabilities, refer to: https://docs.databricks.com/aws/en/generative-ai/agent-framework/agent-tool +5. For stuff like LangGraph routing, configuration, and customization, refer to the LangGraph documentation: https://docs.langchain.com/oss/python/langgraph/overview. + +**Main file to modify:** `agent_server/agent.py` + +--- + +## databricks-langchain SDK overview + +**SDK Location:** `https://github.com/databricks/databricks-ai-bridge/tree/main/integrations/langchain` + +**Development Workflow:** + +```bash +uv add databricks-langchain +``` + +Before making any changes, ensure that the APIs actually exist in the SDK. If something is missing from the documentation here, feel free to look in the venv's `site-packages` directory for the `databricks_langchain` package. If it's not installed, run `uv sync` in this folder to create the .venv and install the package. + +--- + +### ChatDatabricks - LLM Chat Interface + +Connects to Databricks Model Serving endpoints for LLM inference. + +```python +from databricks_langchain import ChatDatabricks + +llm = ChatDatabricks( + endpoint="databricks-claude-3-7-sonnet", # or databricks-meta-llama-3-1-70b-instruct + temperature=0, + max_tokens=500, +) + +# For Responses API agents: +llm = ChatDatabricks(endpoint="my-agent-endpoint", use_responses_api=True) +``` + +--- + +### DatabricksEmbeddings - Generate Embeddings + +Query Databricks embedding model endpoints. + +```python +from databricks_langchain import DatabricksEmbeddings + +embeddings = DatabricksEmbeddings(endpoint="databricks-bge-large-en") +vector = embeddings.embed_query("The meaning of life is 42") +vectors = embeddings.embed_documents(["doc1", "doc2"]) +``` + +--- + +### DatabricksVectorSearch - Vector Store + +Connect to Databricks Vector Search indexes for similarity search. + +```python +from databricks_langchain import DatabricksVectorSearch + +# Delta-sync index with Databricks-managed embeddings +vs = DatabricksVectorSearch(index_name="catalog.schema.index_name") + +# Direct-access or self-managed embeddings +vs = DatabricksVectorSearch( + index_name="catalog.schema.index_name", + embedding=embeddings, + text_column="content", +) + +docs = vs.similarity_search("query", k=5) +``` + +--- + +### MCP Client - Tool Integration + +Connect to MCP (Model Context Protocol) servers to get tools for your agent. + +**Basic MCP Server (manual URL):** + +```python +from databricks_langchain import DatabricksMCPServer, DatabricksMultiServerMCPClient + +client = DatabricksMultiServerMCPClient([ + DatabricksMCPServer( + name="system-ai", + url=f"{host}/api/2.0/mcp/functions/system/ai", + ) +]) +tools = await client.get_tools() +``` + +**From UC Function (convenience helper):** +Creates MCP server for Unity Catalog functions. If `function_name` is omitted, exposes all functions in the schema. + +```python +server = DatabricksMCPServer.from_uc_function( + catalog="main", + schema="tools", + function_name="send_email", # Optional - omit for all functions in schema + name="email-server", + timeout=30.0, + handle_tool_error=True, +) +``` + +**From Vector Search (convenience helper):** +Creates MCP server for Vector Search indexes. If `index_name` is omitted, exposes all indexes in the schema. + +```python +server = DatabricksMCPServer.from_vector_search( + catalog="main", + schema="embeddings", + index_name="product_docs", # Optional - omit for all indexes in schema + name="docs-search", + timeout=30.0, +) +``` + +**From Genie Space:** +Create MCP server from Genie Space. Need to get the genie space ID. Can prompt the user to retrieve this via the UI by getting the link to the genie space. + +Ex: https://db-ml-models-dev-us-west.cloud.databricks.com/genie/rooms/01f0515f6739169283ef2c39b7329700?o=3217006663075879 means the genie space ID is 01f0515f6739169283ef2c39b7329700 + +```python +DatabricksMCPServer( + name="genie", + url=f"{host_name}/api/2.0/mcp/genie/01f0515f6739169283ef2c39b7329700", +), +``` + +**Non-Databricks MCP Server:** + +```python +from databricks_langchain import MCPServer + +server = MCPServer( + name="external-server", + url="https://other-server.com/mcp", + headers={"X-API-Key": "secret"}, + timeout=15.0, +) +``` + +### Stateful LangGraph agent + +To enable statefulness in a LangGraph agent, we need to install `databricks-langchain[memory]`. + +Look through the package files for the latest on stateful langgraph agents. Can start by looking at the databricks_langchain/checkpoints.py and databricks_langchain/store.py files. + +## Lakebase instance setup for stateful agents + +Add the lakebase name to `.env.local`: + +```bash +LAKEBASE_INSTANCE_NAME= +``` + +## Agent Framework Capabilities + +Reference: https://docs.databricks.com/aws/en/generative-ai/agent-framework/ + +### Tool Types + +1. **Unity Catalog Function Tools** - SQL UDFs managed in UC with built-in governance +2. **Agent Code Tools** - Defined directly in agent code for REST APIs and low-latency operations +3. **MCP Tools** - Interoperable tools via Model Context Protocol (Databricks-managed, external, or self-hosted) + +### Built-in Tools + +- **system.ai.python_exec** - Execute Python code dynamically within agent queries (code interpreter) + +### External Connection Tools + +Connect to external services via Unity Catalog HTTP connections: + +- **Slack** - Post messages to channels +- **Google Calendar** - Calendar operations +- **Microsoft Graph API** - Office 365 services +- **Azure AI Search** - Search functionality +- **Any HTTP API** - Use `http_request` from databricks-sdk + +Example: Create UC function wrapping HTTP request for Slack, then expose via MCP. + +### Common Patterns + +- **Structured data retrieval** - Query SQL tables/databases +- **Unstructured data retrieval** - Document search and RAG via Vector Search +- **Code interpreter** - Python execution for analysis via system.ai.python_exec +- **External connections** - Integrate services like Slack via HTTP connections + +--- + +## Authentication Setup + +**Option 1: OAuth (Recommended)** + +```bash +databricks auth login +``` + +Set in `.env.local`: + +```bash +DATABRICKS_CONFIG_PROFILE=DEFAULT +``` + +**Option 2: Personal Access Token** + +Set in `.env.local`: + +```bash +DATABRICKS_HOST="https://host.databricks.com" +DATABRICKS_TOKEN="dapi_token" +``` + +--- + +## MLflow Experiment Setup + +Create and link an MLflow experiment: + +```bash +DATABRICKS_USERNAME=$(databricks current-user me | jq -r .userName) +databricks experiments create-experiment /Users/$DATABRICKS_USERNAME/agents-on-apps +``` + +Add the experiment ID to `.env.local`: + +```bash +MLFLOW_EXPERIMENT_ID= +``` + +--- + +## Key Files + +| File | Purpose | +| -------------------------------- | --------------------------------------------- | +| `agent_server/agent.py` | Agent logic, model, instructions, MCP servers | +| `agent_server/start_server.py` | FastAPI server + MLflow setup | +| `agent_server/evaluate_agent.py` | Agent evaluation with MLflow scorers | +| `agent_server/utils.py` | Databricks auth helpers, stream processing | +| `scripts/start_app.py` | Manages backend+frontend startup | + +--- + +## Deploying to Databricks Apps + +**Create app:** + +```bash +databricks apps create agent-langgraph +``` + +**Sync files:** + +```bash +DATABRICKS_USERNAME=$(databricks current-user me | jq -r .userName) +databricks sync . "/Users/$DATABRICKS_USERNAME/agent-langgraph" +``` + +**Deploy:** + +```bash +databricks apps deploy agent-langgraph --source-code-path /Workspace/Users/$DATABRICKS_USERNAME/agent-langgraph +``` + +**Query deployed app:** + +Generate OAuth token (PATs are not supported): + +```bash +databricks auth token +``` + +Send request: + +```bash +curl -X POST /invocations \ + -H "Authorization: Bearer " \ + -H "Content-Type: application/json" \ + -d '{ "input": [{ "role": "user", "content": "hi" }], "stream": true }' +``` diff --git a/agent-langgraph-short-term-memory/CLAUDE.md b/agent-langgraph-short-term-memory/CLAUDE.md new file mode 100644 index 0000000..4202b8f --- /dev/null +++ b/agent-langgraph-short-term-memory/CLAUDE.md @@ -0,0 +1,3 @@ +@AGENTS.md + + diff --git a/agent-langgraph-short-term-memory/README.md b/agent-langgraph-short-term-memory/README.md new file mode 100644 index 0000000..7809ec5 --- /dev/null +++ b/agent-langgraph-short-term-memory/README.md @@ -0,0 +1,296 @@ +# Responses API Agent + +This template defines a conversational agent app. The app comes with a built-in chat UI, but also exposes an API endpoint for invoking the agent so that you can serve your UI elsewhere (e.g. on your website or in a mobile app). + +The agent in this template implements the [OpenAI Responses API](https://platform.openai.com/docs/api-reference/responses) interface. It has access to a single tool; the [built-in code interpreter tool](https://docs.databricks.com/aws/en/generative-ai/agent-framework/code-interpreter-tools#built-in-python-executor-tool) (`system.ai.python_exec`) on Databricks. You can customize agent code and test it via the API or UI. + +The agent input and output format are defined by MLflow's ResponsesAgent interface, which closely follows the [OpenAI Responses API](https://platform.openai.com/docs/api-reference/responses) interface. See [the MLflow docs](https://mlflow.org/docs/latest/genai/flavors/responses-agent-intro/) for input and output formats for streaming and non-streaming requests, tracing requirements, and other agent authoring details. + +## Quick start + +Run the `./scripts/quickstart.sh` script to quickly set up your local environment and start the agent server. At any step, if there are issues, refer to the manual local development loop setup below. + +This script will: + +1. Verify uv, nvm, and Databricks CLI installations +2. Configure Databricks authentication +3. Configure agent tracing, by creating and linking an MLflow experiment to your app +4. Start the agent server and chat app + +```bash +./scripts/quickstart.sh +``` + +After the setup is complete, you can start the agent server and the chat app locally with: + +```bash +uv run start-app +``` + +This will start the agent server and the chat app at http://localhost:8000. + +**Next steps**: see [modifying your agent](#modifying-your-agent) to customize and iterate on the agent code. + +## Manual local development loop setup + +1. **Set up your local environment** + Install `uv` (python package manager), `nvm` (node version manager), and the Databricks CLI: + + - [`uv` installation docs](https://docs.astral.sh/uv/getting-started/installation/) + - [`nvm` installation](https://github.com/nvm-sh/nvm?tab=readme-ov-file#installing-and-updating) + - Run the following to use Node 20 LTS: + ```bash + nvm use 20 + ``` + - [`databricks CLI` installation](https://docs.databricks.com/aws/en/dev-tools/cli/install) + +2. **Set up local authentication to Databricks** + + In order to access Databricks resources from your local machine while developing your agent, you need to authenticate with Databricks. Choose one of the following options: + + **Option 1: OAuth via Databricks CLI (Recommended)** + + Authenticate with Databricks using the CLI. See the [CLI OAuth documentation](https://docs.databricks.com/aws/en/dev-tools/cli/authentication#oauth-user-to-machine-u2m-authentication). + + ```bash + databricks auth login + ``` + + Set the `DATABRICKS_CONFIG_PROFILE` environment variable in your .env.local file to the profile you used to authenticate: + + ```bash + DATABRICKS_CONFIG_PROFILE="DEFAULT" # change to the profile name you chose + ``` + + **Option 2: Personal Access Token (PAT)** + + See the [PAT documentation](https://docs.databricks.com/aws/en/dev-tools/auth/pat#databricks-personal-access-tokens-for-workspace-users). + + ```bash + # Add these to your .env.local file + DATABRICKS_HOST="https://host.databricks.com" + DATABRICKS_TOKEN="dapi_token" + ``` + + See the [Databricks SDK authentication docs](https://docs.databricks.com/aws/en/dev-tools/sdk-python#authenticate-the-databricks-sdk-for-python-with-your-databricks-account-or-workspace). + +3. **Create and link an MLflow experiment to your app** + + Create an MLflow experiment to enable tracing and version tracking. This is automatically done by the `./scripts/quickstart.sh` script. + + Create the MLflow experiment via the CLI: + + ```bash + DATABRICKS_USERNAME=$(databricks current-user me | jq -r .userName) + databricks experiments create-experiment /Users/$DATABRICKS_USERNAME/agents-on-apps + ``` + + Make a copy of `.env.example` to `.env.local` and update the `MLFLOW_EXPERIMENT_ID` in your `.env.local` file with the experiment ID you created. The `.env.local` file will be automatically loaded when starting the server. + + ```bash + cp .env.example .env.local + # Edit .env.local and fill in your experiment ID + ``` + + See the [MLflow experiments documentation](https://docs.databricks.com/aws/en/mlflow/experiments#create-experiment-from-the-workspace). + +4. **Test your agent locally** + + Start up the agent server and chat UI locally: + + ```bash + uv run start-app + ``` + + Query your agent via the UI (http://localhost:8000) or REST API: + + **Advanced server options:** + + ```bash + uv run start-server --reload # hot-reload the server on code changes + uv run start-server --port 8001 # change the port the server listens on + uv run start-server --workers 4 # run the server with multiple workers + ``` + + - Example streaming request: + ```bash + curl -X POST http://localhost:8000/invocations \ + -H "Content-Type: application/json" \ + -d '{ "input": [{ "role": "user", "content": "hi" }], "stream": true }' + ``` + - Example non-streaming request: + ```bash + curl -X POST http://localhost:8000/invocations \ + -H "Content-Type: application/json" \ + -d '{ "input": [{ "role": "user", "content": "hi" }] }' + ``` + - Example request with thread ID (for stateful agent): + ```bash + curl -X POST http://localhost:8000/invocations \ + -H "Content-Type: application/json" \ + -d '{ + "input": [{"role": "user", "content": "What did we discuss?"}], + "custom_inputs": {"thread_id": ""} + }' + ``` + +## Modifying your agent + +See the [LangGraph documentation](https://docs.langchain.com/oss/python/langgraph/quickstart) for more information on how to edit your own agent. + +Required files for hosting with MLflow `AgentServer`: + +- `agent.py`: Contains your agent logic. Modify this file to create your custom agent. For example, you can [add agent tools](https://docs.databricks.com/aws/en/generative-ai/agent-framework/agent-tool) to give your agent additional capabilities +- `start_server.py`: Initializes and runs the MLflow `AgentServer` with agent_type="ResponsesAgent". You don't have to modify this file for most common use cases, but can add additional server routes (e.g. a `/metrics` endpoint) here + +**Common customization questions:** + +**Q: Can I add additional files or folders to my agent?** +Yes. Add additional files or folders as needed. Ensure the script within `pyproject.toml` runs the correct script that starts the server and sets up MLflow tracing. + +**Q: How do I add dependencies to my agent?** +Run `uv add ` (e.g., `uv add "mlflow-skinny[databricks]"`). See the [python pyproject.toml guide](https://packaging.python.org/en/latest/guides/writing-pyproject-toml/#dependencies-and-requirements). + +**Q: Can I add custom tracing beyond the built-in tracing?** +Yes. This template uses MLflow's agent server, which comes with automatic tracing for agent logic decorated with `@invoke()` and `@stream()`. It also uses [MLflow autologging APIs](https://mlflow.org/docs/latest/genai/tracing/#one-line-auto-tracing-integrations) to capture traces from LLM invocations. However, you can add additional instrumentation to capture more granular trace information when your agent runs. See the [MLflow tracing documentation](https://docs.databricks.com/aws/en/mlflow3/genai/tracing/app-instrumentation/). + +**Q: How can I extend this example with additional tools and capabilities?** +This template can be extended by integrating additional MCP servers, Vector Search Indexes, UC Functions, and other Databricks tools. See the ["Agent Framework Tools Documentation"](https://docs.databricks.com/aws/en/generative-ai/agent-framework/agent-tool). + +## Evaluating your agent + +Evaluate your agent by calling the invoke function you defined for the agent locally. + +- Update your `evaluate_agent.py` file with the preferred evaluation dataset and scorers. + +Run the evaluation using the evaluation script: + +```bash +uv run agent-evaluate +``` + +After it completes, open the MLflow UI link for your experiment to inspect results. + +## Deploying to Databricks Apps + +0. **Create a Databricks App**: + Ensure you have the [Databricks CLI](https://docs.databricks.com/aws/en/dev-tools/cli/tutorial) installed and configured. + + ```bash + databricks apps create agent-langgraph + ``` + +1. **Set up authentication to Databricks resources** + + For this example, you need to add an MLflow Experiment and Lakebase instance as a resource to your app. Grant the App's Service Principal (SP) permission to edit the experiment by clicking `edit` on your app home page. See the [Databricks Apps MLflow experiment documentation](https://docs.databricks.com/aws/en/dev-tools/databricks-apps/mlflow) and [Databricks Apps Lakebase documentation](https://docs.databricks.com/aws/en/dev-tools/databricks-apps/lakebase) for more information. + + To grant access to other resources like serving endpoints, genie spaces, UC Functions, and Vector Search Indexes, click `edit` on your app home page to grant the App's SP permission. See the [Databricks Apps resources documentation](https://docs.databricks.com/aws/en/dev-tools/databricks-apps/resources). + + For resources that are not supported yet, see the [Agent Framework authentication documentation](https://docs.databricks.com/aws/en/generative-ai/agent-framework/deploy-agent#automatic-authentication-passthrough) for the correct permission level to grant to your app SP. + + **On-behalf-of (OBO) User Authentication**: Use `get_user_workspace_client()` from `agent_server.utils` to authenticate as the requesting user instead of the app service principal. See the [OBO authentication documentation](https://docs.databricks.com/aws/en/dev-tools/databricks-apps/auth?language=Streamlit#retrieve-user-authorization-credentials). + +2. **Sync local files to your workspace** + + See the [Databricks Apps deploy documentation](https://docs.databricks.com/aws/en/dev-tools/databricks-apps/deploy?language=Databricks+CLI#deploy-the-app). + + ```bash + DATABRICKS_USERNAME=$(databricks current-user me | jq -r .userName) + databricks sync . "/Users/$DATABRICKS_USERNAME/agent-langgraph" + ``` + +3. **Grant Lakebase permissions to your App's Service Principal** + + Before deploying/querying your agent, you need to ensure your app has access to the necessary Lakebase tables for memory. + + First, add your Lakebase instance as a resource to your app: + - Go to the Databricks UI + - Navigate to your app and click **Edit** + - Go to **App resources** → **Add resource** + - Add your Lakebase instance that you are using for short-term memory store + + After adding your Lakebase as a resource to your app (with the Connect + Create permissions), you'll need to ensure access to certain schemas and tables that have already been created during local testing. To grant the necessary permissions on your Lakebase instance for your app's service principal, run the following SQL commands on your Lakebase instance (replace `app-sp-id` with your app's service principal UUID): + + ```sql + DO $$ + DECLARE + app_sp text := 'app-sp-id'; -- TODO: Replace with your App's Service Principal ID here + BEGIN + ------------------------------------------------------------------- + -- Drizzle schema: migration metadata tables + ------------------------------------------------------------------- + EXECUTE format('GRANT USAGE, CREATE ON SCHEMA drizzle TO %I;', app_sp); + EXECUTE format('GRANT SELECT, INSERT, UPDATE ON ALL TABLES IN SCHEMA drizzle TO %I;', app_sp); + ------------------------------------------------------------------- + -- App schema: business tables (Chat, Message, etc.) + ------------------------------------------------------------------- + EXECUTE format('GRANT USAGE, CREATE ON SCHEMA ai_chatbot TO %I;', app_sp); + EXECUTE format('GRANT SELECT, INSERT, UPDATE ON ALL TABLES IN SCHEMA ai_chatbot TO %I;', app_sp); + ------------------------------------------------------------------- + -- Public schema for checkpoint tables + ------------------------------------------------------------------- + EXECUTE format('GRANT USAGE, CREATE ON SCHEMA public TO %I;', app_sp); + EXECUTE format('GRANT SELECT, INSERT, UPDATE ON TABLE public.checkpoint_migrations TO %I;', app_sp); + EXECUTE format('GRANT SELECT, INSERT, UPDATE ON TABLE public.checkpoint_writes TO %I;', app_sp); + EXECUTE format('GRANT SELECT, INSERT, UPDATE ON TABLE public.checkpoints TO %I;', app_sp); + EXECUTE format('GRANT SELECT, INSERT, UPDATE ON TABLE public.checkpoint_blobs TO %I;', app_sp); + END $$; + ``` + +4. **Deploy your Databricks App** + + See the [Databricks Apps deploy documentation](https://docs.databricks.com/aws/en/dev-tools/databricks-apps/deploy?language=Databricks+CLI#deploy-the-app). + + ```bash + databricks apps deploy agent-langgraph --source-code-path /Workspace/Users/$DATABRICKS_USERNAME/agent-langgraph + ``` + +5. **Query your agent hosted on Databricks Apps** + + Databricks Apps are _only_ queryable via OAuth token. You cannot use a PAT to query your agent. Generate an [OAuth token with your credentials using the Databricks CLI](https://docs.databricks.com/aws/en/dev-tools/cli/authentication#u2m-auth): + + ```bash + databricks auth login --host + databricks auth token + ``` + + Send a request to the `/invocations` endpoint: + + - Example streaming request: + + ```bash + curl -X POST /invocations \ + -H "Authorization: Bearer " \ + -H "Content-Type: application/json" \ + -d '{ "input": [{ "role": "user", "content": "hi" }], "stream": true }' + ``` + + - Example non-streaming request: + + ```bash + curl -X POST /invocations \ + -H "Authorization: Bearer " \ + -H "Content-Type: application/json" \ + -d '{ "input": [{ "role": "user", "content": "hi" }] }' + ``` + - Example request with thread ID (for stateful agent): + + ```bash + curl -X POST /invocations \ + -H "Authorization: Bearer " \ + -H "Content-Type: application/json" \ + -d '{ + "input": [{"role": "user", "content": "What did we discuss?"}], + "custom_inputs": {"thread_id": ""} + }' + ``` + + +For future updates to the agent, sync and redeploy your agent. + +### FAQ + +- For a streaming response, I see a 200 OK in the logs, but an error in the actual stream. What's going on? + - This is expected behavior. The initial 200 OK confirms stream setup; streaming errors don't affect this status. +- When querying my agent, I get a 302 error. What's going on? + - Use an OAuth token. PATs are not supported for querying agents. diff --git a/agent-langgraph-short-term-memory/agent_server/__init__.py b/agent-langgraph-short-term-memory/agent_server/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/agent-langgraph-short-term-memory/agent_server/agent.py b/agent-langgraph-short-term-memory/agent_server/agent.py new file mode 100644 index 0000000..8f0ad28 --- /dev/null +++ b/agent-langgraph-short-term-memory/agent_server/agent.py @@ -0,0 +1,179 @@ +import logging +import os +from typing import Any, AsyncGenerator, Optional, Sequence, TypedDict + +import uuid_utils + +import mlflow +from databricks.sdk import WorkspaceClient +from databricks_langchain import ( + AsyncCheckpointSaver, + ChatDatabricks, + DatabricksMCPServer, + DatabricksMultiServerMCPClient, +) +from fastapi import HTTPException +from langchain.agents import create_agent +from langchain_core.messages import AnyMessage +from langgraph.graph.message import add_messages +from typing_extensions import Annotated + +from mlflow.genai.agent_server import invoke, stream +from mlflow.types.responses import ( + ResponsesAgentRequest, + ResponsesAgentResponse, + ResponsesAgentStreamEvent, + to_chat_completions_input, +) + +from agent_server.utils import ( + get_databricks_host_from_env, + process_agent_astream_events, +) + +logger = logging.getLogger(__name__) +mlflow.langchain.autolog() +sp_workspace_client = WorkspaceClient() + +############################################ +# Configuration +############################################ +LLM_ENDPOINT_NAME = "databricks-claude-sonnet-4-5" +SYSTEM_PROMPT = "You are a helpful assistant. Use the available tools to answer questions." +LAKEBASE_INSTANCE_NAME = os.getenv("LAKEBASE_INSTANCE_NAME", "") + +if not LAKEBASE_INSTANCE_NAME: + raise ValueError( + "LAKEBASE_INSTANCE_NAME environment variable is required but not set. " + "Please set it in your environment:\n" + " LAKEBASE_INSTANCE_NAME=\n" + ) + + +class StatefulAgentState(TypedDict, total=False): + messages: Annotated[Sequence[AnyMessage], add_messages] + custom_inputs: dict[str, Any] + custom_outputs: dict[str, Any] + + +def init_mcp_client(workspace_client: WorkspaceClient) -> DatabricksMultiServerMCPClient: + host_name = get_databricks_host_from_env() + return DatabricksMultiServerMCPClient( + [ + DatabricksMCPServer( + name="system-ai", + url=f"{host_name}/api/2.0/mcp/functions/system/ai", + ), + ] + ) + + +async def init_agent( + workspace_client: Optional[WorkspaceClient] = None, + checkpointer: Optional[Any] = None, +): + mcp_client = init_mcp_client(workspace_client or sp_workspace_client) + tools = await mcp_client.get_tools() + + model = ChatDatabricks(endpoint=LLM_ENDPOINT_NAME) + + return create_agent( + model=model, + tools=tools, + system_prompt=SYSTEM_PROMPT, + checkpointer=checkpointer, + state_schema=StatefulAgentState, + ) + + +def _get_or_create_thread_id(request: ResponsesAgentRequest) -> str: + # priority of getting thread id: + # 1. Use thread id from custom inputs + # 2. Use conversation id from ChatContext https://mlflow.org/docs/latest/api_reference/python_api/mlflow.types.html#mlflow.types.agent.ChatContext + # 3. Generate random UUID + ci = dict(request.custom_inputs or {}) + + if "thread_id" in ci and ci["thread_id"]: + return str(ci["thread_id"]) + + if request.context and getattr(request.context, "conversation_id", None): + return str(request.context.conversation_id) + + return str(uuid_utils.uuid7()) + + +@invoke() +async def non_streaming(request: ResponsesAgentRequest) -> ResponsesAgentResponse: + thread_id = _get_or_create_thread_id(request) + request.custom_inputs = dict(request.custom_inputs or {}) + request.custom_inputs["thread_id"] = thread_id + + outputs = [ + event.item + async for event in streaming(request) + if event.type == "response.output_item.done" + ] + + return ResponsesAgentResponse(output=outputs, custom_outputs={"thread_id": thread_id}) + + +@stream() +async def streaming( + request: ResponsesAgentRequest, +) -> AsyncGenerator[ResponsesAgentStreamEvent, None]: + thread_id = _get_or_create_thread_id(request) + + config = {"configurable": {"thread_id": thread_id}} + input_state: dict[str, Any] = { + "messages": to_chat_completions_input([i.model_dump() for i in request.input]), + "custom_inputs": dict(request.custom_inputs or {}), + } + + try: + async with AsyncCheckpointSaver(instance_name=LAKEBASE_INSTANCE_NAME) as checkpointer: + agent = await init_agent(checkpointer=checkpointer) + + async for event in process_agent_astream_events( + agent.astream( + input_state, + config, + stream_mode=["updates", "messages"], + ) + ): + yield event + except Exception as e: + error_msg = str(e).lower() + # Check for Lakebase access/connection errors + if any(keyword in error_msg for keyword in ["permission"]): + logger.error(f"Lakebase access error: {e}") + raise HTTPException(status_code=503, detail=_get_lakebase_access_error_message()) from e + raise + + +def _is_databricks_app_env() -> bool: + """Check if running in a Databricks App environment.""" + return bool(os.getenv("DATABRICKS_APP_NAME")) + + +def _get_lakebase_access_error_message() -> str: + """Generate a helpful error message for Lakebase access issues.""" + if _is_databricks_app_env(): + app_name = os.getenv("DATABRICKS_APP_NAME") + return ( + f"Failed to connect to Lakebase instance '{LAKEBASE_INSTANCE_NAME}'. " + f"The App Service Principal for '{app_name}' may not have access.\n\n" + "To fix this:\n" + "1. Go to the Databricks UI and navigate to your app\n" + "2. Click 'Edit' → 'App resources' → 'Add resource'\n" + "3. Add your Lakebase instance as a resource\n" + "4. Grant the necessary permissions on your Lakebase instance. " + "See the README section 'Grant Lakebase permissions to your App's Service Principal' for the SQL commands." + ) + else: + return ( + f"Failed to connect to Lakebase instance '{LAKEBASE_INSTANCE_NAME}'. " + "Please verify:\n" + "1. The instance name is correct\n" + "2. You have the necessary permissions to access the instance\n" + "3. Your Databricks authentication is configured correctly" + ) \ No newline at end of file diff --git a/agent-langgraph-short-term-memory/agent_server/evaluate_agent.py b/agent-langgraph-short-term-memory/agent_server/evaluate_agent.py new file mode 100644 index 0000000..1a38cbc --- /dev/null +++ b/agent-langgraph-short-term-memory/agent_server/evaluate_agent.py @@ -0,0 +1,53 @@ +import asyncio + +import mlflow +from dotenv import load_dotenv +from mlflow.genai.agent_server import get_invoke_function +from mlflow.genai.scorers import RelevanceToQuery, Safety +from mlflow.types.responses import ResponsesAgentRequest, ResponsesAgentResponse + +# Load environment variables from .env.local if it exists +load_dotenv(dotenv_path=".env.local", override=True) + +# need to import agent for our @invoke-registered function to be found +from agent_server import agent # noqa: F401 + +# Create your evaluation dataset +# Refer to documentation for evaluations: +# Scorers: https://docs.databricks.com/aws/en/mlflow3/genai/eval-monitor/concepts/scorers +# Predefined LLM scorers: https://mlflow.org/docs/latest/genai/eval-monitor/scorers/llm-judge/predefined +# Defining custom scorers: https://docs.databricks.com/aws/en/mlflow3/genai/eval-monitor/custom-scorers +eval_dataset = [ + { + "inputs": { + "request": { + "input": [{"role": "user", "content": "Calculate the 15th Fibonacci number"}] + } + }, + "expected_response": "The 15th Fibonacci number is 610.", + } +] + +# Get the invoke function that was registered via @invoke decorator in your agent +invoke_fn = get_invoke_function() +assert invoke_fn is not None, ( + "No function registered with the `@invoke` decorator found." + "Ensure you have a function decorated with `@invoke()`." +) + +# if invoke function is async, then we need to wrap it in a sync function +if asyncio.iscoroutinefunction(invoke_fn): + + def sync_invoke_fn(request: dict) -> ResponsesAgentResponse: + req = ResponsesAgentRequest(**request) + return asyncio.run(invoke_fn(req)) +else: + sync_invoke_fn = invoke_fn + + +def evaluate(): + mlflow.genai.evaluate( + data=eval_dataset, + predict_fn=sync_invoke_fn, + scorers=[RelevanceToQuery(), Safety()], + ) diff --git a/agent-langgraph-short-term-memory/agent_server/start_server.py b/agent-langgraph-short-term-memory/agent_server/start_server.py new file mode 100644 index 0000000..8cab41b --- /dev/null +++ b/agent-langgraph-short-term-memory/agent_server/start_server.py @@ -0,0 +1,18 @@ +from dotenv import load_dotenv +from mlflow.genai.agent_server import AgentServer, setup_mlflow_git_based_version_tracking + +# Load env vars from .env.local before importing the agent for proper auth +load_dotenv(dotenv_path=".env.local", override=True) + +# Need to import the agent to register the functions with the server +import agent_server.agent # noqa: E402 + +agent_server = AgentServer("ResponsesAgent", enable_chat_proxy=True) + +# Define the app as a module level variable to enable multiple workers +app = agent_server.app # noqa: F841 +setup_mlflow_git_based_version_tracking() + + +def main(): + agent_server.run(app_import_string="agent_server.start_server:app") diff --git a/agent-langgraph-short-term-memory/agent_server/utils.py b/agent-langgraph-short-term-memory/agent_server/utils.py new file mode 100644 index 0000000..4e2b441 --- /dev/null +++ b/agent-langgraph-short-term-memory/agent_server/utils.py @@ -0,0 +1,55 @@ +import logging +from typing import Any, AsyncGenerator, AsyncIterator, Optional + +from databricks.sdk import WorkspaceClient +from databricks_langchain.chat_models import json +from langchain.messages import AIMessageChunk, ToolMessage +from mlflow.genai.agent_server import get_request_headers +from mlflow.types.responses import ( + ResponsesAgentStreamEvent, + create_text_delta, + output_to_responses_items_stream, +) + + +def get_user_workspace_client() -> WorkspaceClient: + token = get_request_headers().get("x-forwarded-access-token") + return WorkspaceClient(token=token, auth_type="pat") + + +def get_databricks_host_from_env() -> Optional[str]: + try: + w = WorkspaceClient() + return w.config.host + except Exception as e: + logging.exception(f"Error getting databricks host from env: {e}") + return None + + +async def process_agent_astream_events( + async_stream: AsyncIterator[Any], +) -> AsyncGenerator[ResponsesAgentStreamEvent, None]: + """ + Generic helper to process agent stream events and yield ResponsesAgentStreamEvent objects. + + Args: + async_stream: The async iterator from agent.astream() + """ + async for event in async_stream: + if event[0] == "updates": + for node_data in event[1].values(): + if len(node_data.get("messages", [])) > 0: + for msg in node_data["messages"]: + if isinstance(msg, ToolMessage) and not isinstance(msg.content, str): + msg.content = json.dumps(msg.content) + for item in output_to_responses_items_stream(node_data["messages"]): + yield item + elif event[0] == "messages": + try: + chunk = event[1][0] + if isinstance(chunk, AIMessageChunk) and (content := chunk.content): + yield ResponsesAgentStreamEvent( + **create_text_delta(delta=content, item_id=chunk.id) + ) + except Exception as e: + logging.exception(f"Error processing agent stream event: {e}") diff --git a/agent-langgraph-short-term-memory/app.yaml b/agent-langgraph-short-term-memory/app.yaml new file mode 100644 index 0000000..74d6e93 --- /dev/null +++ b/agent-langgraph-short-term-memory/app.yaml @@ -0,0 +1,18 @@ +command: ["uv", "run", "start-app"] +# databricks apps listen by default on port 8000 + +env: + - name: MLFLOW_TRACKING_URI + value: "databricks" + - name: MLFLOW_REGISTRY_URI + value: "databricks-uc" + - name: API_PROXY + value: "http://localhost:8000/invocations" + - name: CHAT_APP_PORT + value: "3000" + - name: CHAT_PROXY_TIMEOUT_SECONDS + value: "300" + - name: MLFLOW_EXPERIMENT_ID + valueFrom: "experiment" + - name: LAKEBASE_INSTANCE_NAME + valueFrom: "database" \ No newline at end of file diff --git a/agent-langgraph-short-term-memory/pyproject.toml b/agent-langgraph-short-term-memory/pyproject.toml new file mode 100644 index 0000000..f15471f --- /dev/null +++ b/agent-langgraph-short-term-memory/pyproject.toml @@ -0,0 +1,37 @@ +[project] +name = "agent-server" +version = "0.1.0" +description = "MLflow-compatible agent server with FastAPI" +readme = "README.md" +authors = [ + { name = "Agent Developer", email = "developer@example.com" } +] +requires-python = ">=3.11" +dependencies = [ + "fastapi>=0.115.12", + "uvicorn>=0.34.2", + "databricks-langchain[memory]>=0.13.0", + "mlflow>=3.8.0rc0", + "langgraph>=1.0.1", + "langchain-mcp-adapters>=0.1.11", + "python-dotenv", +] + +[tool.uv] +prerelease = "allow" + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[dependency-groups] +dev = [ + "hatchling>=1.27.0", + "pytest>=7.0.0", +] + + +[project.scripts] +start-app = "scripts.start_app:main" +start-server = "agent_server.start_server:main" +agent-evaluate = "agent_server.evaluate_agent:evaluate" diff --git a/agent-langgraph-short-term-memory/requirements.txt b/agent-langgraph-short-term-memory/requirements.txt new file mode 100644 index 0000000..60cc5e6 --- /dev/null +++ b/agent-langgraph-short-term-memory/requirements.txt @@ -0,0 +1 @@ +uv diff --git a/agent-langgraph-short-term-memory/scripts/__init__.py b/agent-langgraph-short-term-memory/scripts/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/agent-langgraph-short-term-memory/scripts/quickstart.sh b/agent-langgraph-short-term-memory/scripts/quickstart.sh new file mode 100755 index 0000000..ef78e70 --- /dev/null +++ b/agent-langgraph-short-term-memory/scripts/quickstart.sh @@ -0,0 +1,501 @@ +#!/bin/bash +set -e + +# Helper function to check if a command exists +command_exists() { + command -v "$1" >/dev/null 2>&1 +} + +# Helper function to check if Homebrew is available +has_brew() { + command_exists brew +} + +# Parse command line arguments +PROFILE_ARG="" +HOST_ARG="" +LAKEBASE_ARG="" + +while [[ $# -gt 0 ]]; do + case $1 in + --profile) + PROFILE_ARG="$2" + shift 2 + ;; + --host) + HOST_ARG="$2" + shift 2 + ;; + --lakebase) + LAKEBASE_ARG="$2" + shift 2 + ;; + -h|--help) + echo "Usage: $0 [OPTIONS]" + echo "" + echo "Options:" + echo " --profile NAME Use specified Databricks profile (non-interactive)" + echo " --host URL Databricks workspace URL (for initial setup)" + echo " --lakebase NAME Lakebase instance name for short-term memory" + echo " -h, --help Show this help message" + exit 0 + ;; + *) + echo "Error: Unknown option $1" + echo "Use --help for usage information" + exit 1 + ;; + esac +done + +echo "===================================================================" +echo "Agent on Apps - Quickstart Setup" +echo "===================================================================" +echo + +# =================================================================== +# Section 1: Prerequisites Installation +# =================================================================== + +echo "Checking and installing prerequisites..." +echo + +# Check and install UV +if command_exists uv; then + echo "✓ UV is already installed" + uv --version +else + echo "Installing UV..." + if has_brew; then + echo "Using Homebrew to install UV..." + brew install uv + else + echo "Using curl to install UV..." + curl -LsSf https://astral.sh/uv/install.sh | sh + # Add UV to PATH for current session + export PATH="$HOME/.cargo/bin:$PATH" + fi + echo "✓ UV installed successfully" +fi + +# Check and install nvm +if [ -s "$HOME/.nvm/nvm.sh" ]; then + echo "✓ nvm is already installed" + # Load nvm for current session + export NVM_DIR="$HOME/.nvm" + [ -s "$NVM_DIR/nvm.sh" ] && \. "$NVM_DIR/nvm.sh" +else + echo "Installing nvm..." + if has_brew; then + echo "Using Homebrew to install nvm..." + brew install nvm + # Create nvm directory + mkdir -p ~/.nvm + # Add nvm to current session + export NVM_DIR="$HOME/.nvm" + [ -s "/opt/homebrew/opt/nvm/nvm.sh" ] && \. "/opt/homebrew/opt/nvm/nvm.sh" + [ -s "/usr/local/opt/nvm/nvm.sh" ] && \. "/usr/local/opt/nvm/nvm.sh" + else + echo "Using curl to install nvm..." + curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.39.0/install.sh | bash + # Load nvm for current session + export NVM_DIR="$HOME/.nvm" + [ -s "$NVM_DIR/nvm.sh" ] && \. "$NVM_DIR/nvm.sh" + fi + echo "✓ nvm installed successfully" +fi + +# Use Node 20 +echo "Setting up Node.js 20..." +nvm install 20 +nvm use 20 +echo "✓ Node.js 20 is now active" +node --version +npm --version +echo + +# Check and install Databricks CLI +if command_exists databricks; then + echo "✓ Databricks CLI is already installed" + databricks --version +else + echo "Installing Databricks CLI..." + if has_brew; then + echo "Using Homebrew to install Databricks CLI..." + brew tap databricks/tap + brew install databricks + else + echo "Using curl to install Databricks CLI..." + if curl -fsSL https://raw.githubusercontent.com/databricks/setup-cli/main/install.sh | sh; then + echo "✓ Databricks CLI installed successfully" + else + echo "Installation failed, trying with sudo..." + curl -fsSL https://raw.githubusercontent.com/databricks/setup-cli/main/install.sh | sudo sh + fi + fi + echo "✓ Databricks CLI installed successfully" +fi +echo + +# =================================================================== +# Section 2: Configuration Files Setup +# =================================================================== +echo "Setting up configuration files..." + +# Copy .env.example to .env.local if it doesn't exist +if [ ! -f ".env.local" ]; then + echo "Copying .env.example to .env.local..." + cp .env.example .env.local + echo +else + echo ".env.local already exists, skipping copy..." +fi +echo + +# =================================================================== +# Section 3: Databricks Authentication +# =================================================================== + +echo "Setting up Databricks authentication..." + +# If --profile was provided, use it directly +if [ -n "$PROFILE_ARG" ]; then + PROFILE_NAME="$PROFILE_ARG" + echo "Using provided profile: $PROFILE_NAME" + + # Test if the profile works + set +e + DATABRICKS_CONFIG_PROFILE="$PROFILE_NAME" databricks current-user me >/dev/null 2>&1 + PROFILE_TEST=$? + set -e + + if [ $PROFILE_TEST -eq 0 ]; then + echo "✓ Successfully validated profile '$PROFILE_NAME'" + else + echo "Error: Profile '$PROFILE_NAME' is not valid or not authenticated" + echo "Please authenticate first with: databricks auth login --profile $PROFILE_NAME" + exit 1 + fi + + # Update .env.local with the profile name + if grep -q "DATABRICKS_CONFIG_PROFILE=" .env.local; then + sed -i '' "s/DATABRICKS_CONFIG_PROFILE=.*/DATABRICKS_CONFIG_PROFILE=$PROFILE_NAME/" .env.local + else + echo "DATABRICKS_CONFIG_PROFILE=$PROFILE_NAME" >> .env.local + fi + echo "✓ Databricks profile '$PROFILE_NAME' saved to .env.local" + +# If --host was provided, use it for initial setup +elif [ -n "$HOST_ARG" ]; then + echo "Setting up authentication with host: $HOST_ARG" + echo "You will be prompted to log in to Databricks in your browser." + echo + + set +e + AUTH_LOG=$(mktemp) + databricks auth login --host "$HOST_ARG" 2>&1 | tee "$AUTH_LOG" + AUTH_EXIT_CODE=$? + set -e + + if [ $AUTH_EXIT_CODE -eq 0 ]; then + echo "✓ Successfully authenticated with Databricks" + PROFILE_NAME=$(grep -i "Profile .* was successfully saved" "$AUTH_LOG" | sed -E 's/.*Profile ([^ ]+) was successfully saved.*/\1/' | head -1) + rm -f "$AUTH_LOG" + + if [ -z "$PROFILE_NAME" ]; then + PROFILE_NAME="DEFAULT" + fi + + if grep -q "DATABRICKS_CONFIG_PROFILE=" .env.local; then + sed -i '' "s/DATABRICKS_CONFIG_PROFILE=.*/DATABRICKS_CONFIG_PROFILE=$PROFILE_NAME/" .env.local + else + echo "DATABRICKS_CONFIG_PROFILE=$PROFILE_NAME" >> .env.local + fi + echo "✓ Databricks profile '$PROFILE_NAME' saved to .env.local" + else + rm -f "$AUTH_LOG" + echo "Databricks authentication failed." + exit 1 + fi + +else + # Interactive mode - check for existing profiles + set +e + EXISTING_PROFILES=$(databricks auth profiles 2>/dev/null) + PROFILES_EXIT_CODE=$? + set -e + + if [ $PROFILES_EXIT_CODE -eq 0 ] && [ -n "$EXISTING_PROFILES" ]; then + # Profiles exist - let user select one + echo "Found existing Databricks profiles:" + echo + + # Parse profiles into an array (compatible with older bash) + # Skip the first line (header row) + PROFILE_ARRAY=() + PROFILE_NAMES=() + LINE_NUM=0 + while IFS= read -r line; do + if [ -n "$line" ]; then + if [ $LINE_NUM -eq 0 ]; then + # Print header without number + echo "$line" + else + # Add full line to display array + PROFILE_ARRAY+=("$line") + # Extract just the profile name (first column) for selection + PROFILE_NAME_ONLY=$(echo "$line" | awk '{print $1}') + PROFILE_NAMES+=("$PROFILE_NAME_ONLY") + fi + LINE_NUM=$((LINE_NUM + 1)) + fi + done <<< "$EXISTING_PROFILES" + echo + + # Display numbered list + for i in "${!PROFILE_ARRAY[@]}"; do + echo "$((i+1))) ${PROFILE_ARRAY[$i]}" + done + echo + + echo "Enter the number of the profile you want to use:" + read -r PROFILE_CHOICE + + if [ -z "$PROFILE_CHOICE" ]; then + echo "Error: Profile selection is required" + exit 1 + fi + + # Validate the choice is a number + if ! [[ "$PROFILE_CHOICE" =~ ^[0-9]+$ ]]; then + echo "Error: Please enter a valid number" + exit 1 + fi + + # Convert to array index (subtract 1) + PROFILE_INDEX=$((PROFILE_CHOICE - 1)) + + # Check if the index is valid + if [ $PROFILE_INDEX -lt 0 ] || [ $PROFILE_INDEX -ge ${#PROFILE_NAMES[@]} ]; then + echo "Error: Invalid selection. Please choose a number between 1 and ${#PROFILE_NAMES[@]}" + exit 1 + fi + + # Get the selected profile name (just the name, not the full line) + PROFILE_NAME="${PROFILE_NAMES[$PROFILE_INDEX]}" + echo "Selected profile: $PROFILE_NAME" + + # Test if the profile works + set +e + DATABRICKS_CONFIG_PROFILE="$PROFILE_NAME" databricks current-user me >/dev/null 2>&1 + PROFILE_TEST=$? + set -e + + if [ $PROFILE_TEST -eq 0 ]; then + echo "✓ Successfully validated profile '$PROFILE_NAME'" + else + # Profile exists but isn't authenticated - prompt to authenticate + echo "Profile '$PROFILE_NAME' is not authenticated." + echo "Authenticating profile '$PROFILE_NAME'..." + echo "You will be prompted to log in to Databricks in your browser." + echo + + # Temporarily disable exit on error for the auth command + set +e + + # Run auth login with the profile name and capture output while still showing it to the user + AUTH_LOG=$(mktemp) + databricks auth login --profile "$PROFILE_NAME" 2>&1 | tee "$AUTH_LOG" + AUTH_EXIT_CODE=$? + + set -e + + if [ $AUTH_EXIT_CODE -eq 0 ]; then + echo "✓ Successfully authenticated profile '$PROFILE_NAME'" + # Clean up temp file + rm -f "$AUTH_LOG" + else + # Clean up temp file + rm -f "$AUTH_LOG" + echo "Error: Profile '$PROFILE_NAME' authentication failed" + exit 1 + fi + fi + + # Update .env.local with the profile name + if grep -q "DATABRICKS_CONFIG_PROFILE=" .env.local; then + sed -i '' "s/DATABRICKS_CONFIG_PROFILE=.*/DATABRICKS_CONFIG_PROFILE=$PROFILE_NAME/" .env.local + else + echo "DATABRICKS_CONFIG_PROFILE=$PROFILE_NAME" >> .env.local + fi + echo "✓ Databricks profile '$PROFILE_NAME' saved to .env.local" + else + # No profiles exist - create default one + echo "No existing profiles found. Setting up Databricks authentication..." + echo "Please enter your Databricks host URL (e.g., https://your-workspace.cloud.databricks.com):" + read -r DATABRICKS_HOST + + if [ -z "$DATABRICKS_HOST" ]; then + echo "Error: Databricks host is required" + exit 1 + fi + + echo "Authenticating with Databricks..." + echo "You will be prompted to log in to Databricks in your browser." + echo + + # Temporarily disable exit on error for the auth command + set +e + + # Run auth login with host parameter and capture output while still showing it to the user + AUTH_LOG=$(mktemp) + databricks auth login --host "$DATABRICKS_HOST" 2>&1 | tee "$AUTH_LOG" + AUTH_EXIT_CODE=$? + + set -e + + if [ $AUTH_EXIT_CODE -eq 0 ]; then + echo "✓ Successfully authenticated with Databricks" + + # Extract profile name from the captured output + # Expected format: "Profile DEFAULT was successfully saved" + PROFILE_NAME=$(grep -i "Profile .* was successfully saved" "$AUTH_LOG" | sed -E 's/.*Profile ([^ ]+) was successfully saved.*/\1/' | head -1) + + # Clean up temp file + rm -f "$AUTH_LOG" + + # If we couldn't extract the profile name, default to "DEFAULT" + if [ -z "$PROFILE_NAME" ]; then + PROFILE_NAME="DEFAULT" + echo "Note: Could not detect profile name, using 'DEFAULT'" + fi + + # Update .env.local with the profile name + if grep -q "DATABRICKS_CONFIG_PROFILE=" .env.local; then + sed -i '' "s/DATABRICKS_CONFIG_PROFILE=.*/DATABRICKS_CONFIG_PROFILE=$PROFILE_NAME/" .env.local + else + echo "DATABRICKS_CONFIG_PROFILE=$PROFILE_NAME" >> .env.local + fi + + echo "✓ Databricks profile '$PROFILE_NAME' saved to .env.local" + else + # Clean up temp file + rm -f "$AUTH_LOG" + echo "Databricks authentication was cancelled or failed." + echo "Please run this script again when you're ready to authenticate." + exit 1 + fi + fi +fi +echo + +# =================================================================== +# Section 4: MLflow Experiment Setup +# =================================================================== + + +# Get current Databricks username +echo "Getting Databricks username..." +DATABRICKS_USERNAME=$(databricks -p $PROFILE_NAME current-user me | jq -r .userName) +echo "Username: $DATABRICKS_USERNAME" +echo + +# Create MLflow experiment and capture the experiment ID +echo "Creating MLflow experiment..." +EXPERIMENT_NAME="/Users/$DATABRICKS_USERNAME/agents-on-apps" + +# Try to create the experiment with the default name first +if EXPERIMENT_RESPONSE=$(databricks -p $PROFILE_NAME experiments create-experiment $EXPERIMENT_NAME 2>/dev/null); then + EXPERIMENT_ID=$(echo $EXPERIMENT_RESPONSE | jq -r .experiment_id) + echo "Created experiment '$EXPERIMENT_NAME' with ID: $EXPERIMENT_ID" +else + echo "Experiment name already exists, creating with random suffix..." + RANDOM_SUFFIX=$(openssl rand -hex 4) + EXPERIMENT_NAME="/Users/$DATABRICKS_USERNAME/agents-on-apps-$RANDOM_SUFFIX" + EXPERIMENT_RESPONSE=$(databricks -p $PROFILE_NAME experiments create-experiment $EXPERIMENT_NAME) + EXPERIMENT_ID=$(echo $EXPERIMENT_RESPONSE | jq -r .experiment_id) + echo "Created experiment '$EXPERIMENT_NAME' with ID: $EXPERIMENT_ID" +fi +echo + +# Update .env.local with the experiment ID +echo "Updating .env.local with experiment ID..." +sed -i '' "s/MLFLOW_EXPERIMENT_ID=.*/MLFLOW_EXPERIMENT_ID=$EXPERIMENT_ID/" .env.local +echo + +# =================================================================== +# Section 5: Lakebase Instance Setup +# =================================================================== + +echo "Setting up Lakebase instance for short-term memory..." + +# If --lakebase was provided, use it directly +if [ -n "$LAKEBASE_ARG" ]; then + LAKEBASE_INSTANCE_NAME="$LAKEBASE_ARG" + echo "Using provided Lakebase instance: $LAKEBASE_INSTANCE_NAME" +else + # Check if already configured in app.yaml (via valueFrom: database) + if grep -q "name: LAKEBASE_INSTANCE_NAME" app.yaml 2>/dev/null; then + echo "Found LAKEBASE_INSTANCE_NAME configured in app.yaml" + # Check if we have the actual instance name in .env.local + if grep -q "LAKEBASE_INSTANCE_NAME=." .env.local 2>/dev/null; then + LAKEBASE_INSTANCE_NAME=$(grep "LAKEBASE_INSTANCE_NAME=" .env.local | cut -d'=' -f2) + echo "Current Lakebase instance in .env.local: $LAKEBASE_INSTANCE_NAME" + echo "Press Enter to keep this value, or enter a new instance name:" + read -r NEW_LAKEBASE + if [ -n "$NEW_LAKEBASE" ]; then + LAKEBASE_INSTANCE_NAME="$NEW_LAKEBASE" + fi + else + echo "Please enter your Lakebase instance name (must match the database resource in your app):" + read -r LAKEBASE_INSTANCE_NAME + if [ -z "$LAKEBASE_INSTANCE_NAME" ]; then + echo "Error: Lakebase instance name is required for short-term memory" + exit 1 + fi + fi + # Check if already set in .env.local only + elif grep -q "LAKEBASE_INSTANCE_NAME=." .env.local 2>/dev/null; then + LAKEBASE_INSTANCE_NAME=$(grep "LAKEBASE_INSTANCE_NAME=" .env.local | cut -d'=' -f2) + echo "Found existing Lakebase instance in .env.local: $LAKEBASE_INSTANCE_NAME" + echo "Press Enter to keep this value, or enter a new instance name:" + read -r NEW_LAKEBASE + if [ -n "$NEW_LAKEBASE" ]; then + LAKEBASE_INSTANCE_NAME="$NEW_LAKEBASE" + fi + else + # Interactive mode - prompt for instance name + echo "Please enter your Lakebase instance name:" + read -r LAKEBASE_INSTANCE_NAME + + if [ -z "$LAKEBASE_INSTANCE_NAME" ]; then + echo "Error: Lakebase instance name is required for short-term memory" + exit 1 + fi + fi +fi + +# Update .env.local with the Lakebase instance name +if grep -q "LAKEBASE_INSTANCE_NAME=" .env.local; then + sed -i '' "s/LAKEBASE_INSTANCE_NAME=.*/LAKEBASE_INSTANCE_NAME=$LAKEBASE_INSTANCE_NAME/" .env.local +else + echo "LAKEBASE_INSTANCE_NAME=$LAKEBASE_INSTANCE_NAME" >> .env.local +fi +echo "✓ Lakebase instance name '$LAKEBASE_INSTANCE_NAME' saved to .env.local" +echo + +echo "===================================================================" +echo "Setup Complete!" +echo "===================================================================" +echo "✓ Prerequisites installed (UV, nvm, Databricks CLI)" +echo "✓ Databricks authenticated with profile: $PROFILE_NAME" +echo "✓ Configuration files created (.env.local)" +echo "✓ MLflow experiment created: $EXPERIMENT_NAME" +echo "✓ Experiment ID: $EXPERIMENT_ID" +echo "✓ Lakebase instance: $LAKEBASE_INSTANCE_NAME" +echo "✓ Configuration updated in .env.local" +echo "===================================================================" +echo +echo "To start the app locally, run:" +echo " uv run start-app" +echo diff --git a/agent-langgraph-short-term-memory/scripts/start_app.py b/agent-langgraph-short-term-memory/scripts/start_app.py new file mode 100644 index 0000000..47b9113 --- /dev/null +++ b/agent-langgraph-short-term-memory/scripts/start_app.py @@ -0,0 +1,218 @@ +#!/usr/bin/env python3 +""" +Start script for running frontend and backend processes concurrently. + +Requirements: +1. Not reporting ready until BOTH frontend and backend processes are ready +2. Exiting as soon as EITHER process fails +3. Printing error logs if either process fails +""" + +import re +import shutil +import subprocess +import sys +import threading +import time +from pathlib import Path + +from dotenv import load_dotenv + +# Readiness patterns +BACKEND_READY = [r"Uvicorn running on", r"Application startup complete", r"Started server process"] +FRONTEND_READY = [r"Server is running on http://localhost"] + + +class ProcessManager: + def __init__(self): + self.backend_process = None + self.frontend_process = None + self.backend_ready = False + self.frontend_ready = False + self.failed = threading.Event() + self.backend_log = None + self.frontend_log = None + + def monitor_process(self, process, name, log_file, patterns): + is_ready = False + try: + for line in iter(process.stdout.readline, ""): + if not line: + break + + line = line.rstrip() + log_file.write(line + "\n") + print(f"[{name}] {line}") + + # Check readiness + if not is_ready and any(re.search(p, line, re.IGNORECASE) for p in patterns): + is_ready = True + if name == "backend": + self.backend_ready = True + else: + self.frontend_ready = True + print(f"✓ {name.capitalize()} is ready!") + + if self.backend_ready and self.frontend_ready: + print("\n" + "=" * 50) + print("✓ Both frontend and backend are ready!") + print("✓ Open the frontend at http://localhost:8000") + print("=" * 50 + "\n") + + process.wait() + if process.returncode != 0: + self.failed.set() + + except Exception as e: + print(f"Error monitoring {name}: {e}") + self.failed.set() + + def clone_frontend_if_needed(self): + if Path("e2e-chatbot-app-next").exists(): + return True + + print("Cloning e2e-chatbot-app-next...") + for url in [ + "https://github.com/databricks/app-templates.git", + "git@github.com:databricks/app-templates.git", + ]: + try: + subprocess.run( + ["git", "clone", "--filter=blob:none", "--sparse", url, "temp-app-templates"], + check=True, + capture_output=True, + ) + break + except subprocess.CalledProcessError: + continue + else: + print("ERROR: Failed to clone repository.") + print( + "Manually download from: https://download-directory.github.io/?url=https://github.com/databricks/app-templates/tree/main/e2e-chatbot-app-next" + ) + return False + + subprocess.run( + ["git", "sparse-checkout", "set", "e2e-chatbot-app-next"], + cwd="temp-app-templates", + check=True, + ) + Path("temp-app-templates/e2e-chatbot-app-next").rename("e2e-chatbot-app-next") + shutil.rmtree("temp-app-templates", ignore_errors=True) + return True + + def start_process(self, cmd, name, log_file, patterns, cwd=None): + print(f"Starting {name}...") + process = subprocess.Popen( + cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, bufsize=1, cwd=cwd + ) + + thread = threading.Thread( + target=self.monitor_process, args=(process, name, log_file, patterns), daemon=True + ) + thread.start() + return process + + def print_logs(self, log_path): + print(f"\nLast 50 lines of {log_path}:") + print("-" * 40) + try: + lines = Path(log_path).read_text().splitlines() + print("\n".join(lines[-50:])) + except FileNotFoundError: + print(f"(no {log_path} found)") + print("-" * 40) + + def cleanup(self): + print("\n" + "=" * 42) + print("Shutting down both processes...") + print("=" * 42) + + for proc in [self.backend_process, self.frontend_process]: + if proc: + try: + proc.terminate() + proc.wait(timeout=5) + except (subprocess.TimeoutExpired, Exception): + proc.kill() + + if self.backend_log: + self.backend_log.close() + if self.frontend_log: + self.frontend_log.close() + + def run(self): + load_dotenv(dotenv_path=".env.local", override=True) + + if not self.clone_frontend_if_needed(): + return 1 + + # Open log files + self.backend_log = open("backend.log", "w", buffering=1) + self.frontend_log = open("frontend.log", "w", buffering=1) + + try: + # Start backend + self.backend_process = self.start_process( + ["uv", "run", "start-server"], "backend", self.backend_log, BACKEND_READY + ) + + # Setup and start frontend + frontend_dir = Path("e2e-chatbot-app-next") + for cmd, desc in [("npm install", "install"), ("npm run build", "build")]: + print(f"Running npm {desc}...") + result = subprocess.run( + cmd.split(), cwd=frontend_dir, capture_output=True, text=True + ) + if result.returncode != 0: + print(f"npm {desc} failed: {result.stderr}") + return 1 + + self.frontend_process = self.start_process( + ["npm", "run", "start"], + "frontend", + self.frontend_log, + FRONTEND_READY, + cwd=frontend_dir, + ) + + print( + f"\nMonitoring processes (Backend PID: {self.backend_process.pid}, Frontend PID: {self.frontend_process.pid})\n" + ) + + # Wait for failure + while not self.failed.is_set(): + time.sleep(0.1) + for proc in [self.backend_process, self.frontend_process]: + if proc.poll() is not None: + self.failed.set() + break + + # Determine which failed + failed_name = "backend" if self.backend_process.poll() is not None else "frontend" + failed_proc = ( + self.backend_process if failed_name == "backend" else self.frontend_process + ) + exit_code = failed_proc.returncode if failed_proc else 1 + + print( + f"\n{'=' * 42}\nERROR: {failed_name} process exited with code {exit_code}\n{'=' * 42}" + ) + self.print_logs("backend.log") + self.print_logs("frontend.log") + return exit_code + + except KeyboardInterrupt: + print("\nInterrupted") + return 0 + + finally: + self.cleanup() + + +def main(): + sys.exit(ProcessManager().run()) + + +if __name__ == "__main__": + main()