diff --git a/agent-openai-agents-sdk/.env.example b/agent-openai-agents-sdk/.env.example index d6b5a70..7c4ebf9 100644 --- a/agent-openai-agents-sdk/.env.example +++ b/agent-openai-agents-sdk/.env.example @@ -9,10 +9,9 @@ DATABRICKS_CONFIG_PROFILE=DEFAULT # TODO: Update with the MLflow experiment you want to log traces and models to MLFLOW_EXPERIMENT_ID= -# TODO: Update the route to query agent if you used a different port to deploy your agent -API_PROXY=http://localhost:8000/invocations - CHAT_APP_PORT=3000 CHAT_PROXY_TIMEOUT_SECONDS=300 +# IMPORTANT: For local development, use databricks (for default profile) or databricks:// to specify which Databricks CLI profile to use +# This is automatically configured by the quickstart script MLFLOW_TRACKING_URI="databricks" MLFLOW_REGISTRY_URI="databricks-uc" diff --git a/agent-openai-agents-sdk/.gitignore b/agent-openai-agents-sdk/.gitignore index 63c3b98..077d791 100644 --- a/agent-openai-agents-sdk/.gitignore +++ b/agent-openai-agents-sdk/.gitignore @@ -1,8 +1,6 @@ # Created by https://www.toptal.com/developers/gitignore/api/python # Edit at https://www.toptal.com/developers/gitignore?templates=python -databricks.yml - ### Python ### # Byte-compiled / optimized / DLL files __pycache__/ diff --git a/agent-openai-agents-sdk/AGENTS.md b/agent-openai-agents-sdk/AGENTS.md index c885dc1..6e03e94 100644 --- a/agent-openai-agents-sdk/AGENTS.md +++ b/agent-openai-agents-sdk/AGENTS.md @@ -1,272 +1,450 @@ -# Agent OpenAI Agents SDK Development Guide +# Agent Development Guide -## Running the App +## For AI Agents: MANDATORY First Action -**Prerequisites:** uv, nvm (Node 20), Databricks CLI +**BEFORE any other action, run `databricks auth profiles` to check authentication status.** -**Quick Start:** +This helps you understand: +- Which Databricks profiles are configured +- Whether authentication is already set up +- Which profile to use for subsequent commands -```bash -./scripts/quickstart.sh # First-time setup (auth, MLflow experiment, env) -uv run start-app # Start app at http://localhost:8000 -``` +If no profiles exist, guide the user through running `./scripts/quickstart.sh` to set up authentication. -**Advanced Server Options:** +## For AI Agents: Handling Deployment Errors -```bash -uv run start-server --reload # Hot-reload on code changes during development -uv run start-server --port 8001 -uv run start-server --workers 4 -``` +**If `databricks bundle deploy` fails with "An app with the same name already exists":** -**Test API:** +Ask the user: "I see there's an existing app with the same name. Would you like me to bind it to this bundle so we can manage it, or delete it and create a new one?" -```bash -# Streaming request -curl -X POST http://localhost:8000/invocations \ - -H "Content-Type: application/json" \ - -d '{ "input": [{ "role": "user", "content": "hi" }], "stream": true }' +- **If they want to bind**: Follow the steps in the "Binding an existing app to your bundle" section +- **If they want to delete**: Run `databricks apps delete ` then deploy again -# Non-streaming request -curl -X POST http://localhost:8000/invocations \ - -H "Content-Type: application/json" \ - -d '{ "input": [{ "role": "user", "content": "hi" }] }' -``` +--- + +## Getting Started with Your Agent + +This guide walks you through the initial setup of your agent project: installing prerequisites, discovering available tools in your workspace, and testing the baseline template locally. + +**After completing these steps**, see the README.md for information on modifying your agent and deploying to Databricks. --- -## Testing the Agent +## Quick Setup -**Run evaluation:** +**Prerequisites:** uv, nvm (Node 20), Databricks CLI (v0.283.0+) -```bash -uv run agent-evaluate # Uses MLflow scorers (RelevanceToQuery, Safety) -``` +**What you need:** +- uv (Python package manager) +- nvm (Node.js version manager) with Node 20 +- Databricks CLI v0.283.0 or above -**Run unit tests:** +**Quickest path to running:** ```bash -pytest [path] # Standard pytest execution -``` +# 0. Ensure you have the latest Databricks CLI +databricks -v # Should be v0.283.0 or above +brew upgrade databricks # Offer to run if the Databricks CLI version is too old ---- +# 1. Initialize git (recommended for version control) +git init -## Modifying the Agent +# 2. Run quickstart for setup (auth, MLflow experiment) +./scripts/quickstart.sh -Anytime the user wants to modify the agent, look through each of the following resources to help them accomplish their goal: +# Or run non-interactively with a profile +./scripts/quickstart.sh --profile DEFAULT -If the user wants to convert something into Responses API, refer to https://mlflow.org/docs/latest/genai/serving/responses-agent/ for more information. +# Or with a host URL for initial setup +./scripts/quickstart.sh --host https://your-workspace.cloud.databricks.com -1. Look through existing databricks-openai APIs to see if they can use one of these to accomplish their goal. -2. Look through the folders in https://github.com/bbqiu/agent-on-app-prototype to see if there's an existing example similar to what they're looking to do. -3. Reference the documentation available under https://docs.databricks.com/aws/en/generative-ai/agent-framework/ and its subpages. -4. For adding tools and capabilities, refer to: https://docs.databricks.com/aws/en/generative-ai/agent-framework/agent-tool -5. Reference the OpenAI Agents SDK documentation: https://platform.openai.com/docs/guides/agents-sdk +# 3. Discover available tools (IMPORTANT - do this before coding!) +uv run discover-tools -**Main file to modify:** `agent_server/agent.py` +# 4. Start the agent server +uv run start-app +``` + +**Quickstart script handles:** +- Databricks authentication (OAuth) +- MLflow experiment creation +- Environment variable configuration (`.env.local`) + - Sets `DATABRICKS_CONFIG_PROFILE` to your selected profile + - Configures `MLFLOW_TRACKING_URI` as `databricks://` for proper local authentication + - Sets `MLFLOW_EXPERIMENT_ID` to the created experiment + +**Quickstart options:** +- `--profile NAME`: Use specified Databricks profile (non-interactive) +- `--host URL`: Databricks workspace URL (for initial setup) +- `-h, --help`: Show help message --- -## databricks-openai SDK Overview +## Discovering Available Tools -**SDK Location:** `https://github.com/databricks/databricks-ai-bridge/tree/main/integrations/openai` +**⚠️ CRITICAL:** Always run tool discovery BEFORE writing agent code! -**Development Workflow:** +This step helps you understand what resources are already available in your workspace, preventing duplicate work and showing you the best practices for connecting to each resource. ```bash -uv add databricks-openai -``` +# Discover all available resources (recommended) +uv run discover-tools -Before making any changes, ensure that the APIs actually exist in the SDK. If something is missing from the documentation here, feel free to look in the venv's `site-packages` directory for the `databricks_openai` package. If it's not installed, run `uv sync` in this folder to create the .venv and install the package. +# Limit to specific catalog/schema +uv run discover-tools --catalog my_catalog --schema my_schema ---- +# Output as JSON for programmatic use +uv run discover-tools --format json --output tools.json -### Key Components +# Save markdown report +uv run discover-tools --output tools.md +``` -#### 1. MCP Servers - Tool Integration +**What gets discovered:** +1. **Unity Catalog Functions** - SQL UDFs usable as agent tools +2. **Unity Catalog Tables** - Structured data for querying +3. **Vector Search Indexes** - For RAG applications +4. **Genie Spaces** - Natural language interface to data +5. **Custom MCP Servers** - Your MCP servers deployed as Databricks Apps +6. **External MCP Servers** - Third-party MCP servers via UC connections -Connect to MCP (Model Context Protocol) servers to get tools for your agent. +**Using discovered tools in your agent:** -**Basic MCP Server:** +After discovering tools, configure your agent to use them: ```python from databricks_openai.agents import McpServer async def init_mcp_server(): return McpServer( - url=f"{host}/api/2.0/mcp/functions/system/ai", - name="system.ai uc function mcp server", + url=f"{host}/api/2.0/mcp/functions/{catalog}/{schema}", + name="my custom tools", ) # Use in agent agent = Agent( - name="code execution agent", - instructions="You are a code execution agent.", + name="my agent", + instructions="You are a helpful agent.", model="databricks-claude-3-7-sonnet", mcp_servers=[mcp_server], ) ``` -#### 2. Sync and Async Databricks OpenAI Clients +See the [MCP documentation](https://docs.databricks.com/aws/en/generative-ai/mcp/) for more details. -Set up Databricks-hosted OpenAI-compatible models: +--- -```python -from databricks_openai import AsyncDatabricksOpenAI, DatabricksOpenAI -from agents import set_default_openai_api, set_default_openai_client +## Granting Access to App Resources -# Async client (recommended for agent servers) -set_default_openai_client(AsyncDatabricksOpenAI()) -set_default_openai_api("chat_completions") +### ⚠️ CRITICAL: Resource Permissions -# Sync client -client = DatabricksOpenAI() -``` +**After adding any MCP server to your agent, you MUST grant the app access to the server's dependent resource(s) in `databricks.yml`.** -**Note:** This works for all Databricks models except GPT-OSS, which uses a slightly different API. +Without this, you'll get permission errors when the agent tries to use the resource. ---- +### Example Workflow -## Agent Development Patterns - -### Creating and Running Agents +**1. Add MCP server in `agent_server/agent.py`:** ```python -from agents import Agent, Runner from databricks_openai.agents import McpServer -# Create agent with MCP servers +genie_server = McpServer( + url=f"{host}/api/2.0/mcp/genie/01234567-89ab-cdef", + name="my genie space", +) + agent = Agent( - name="code execution agent", - instructions="You are a code execution agent.", + name="my agent", + instructions="You are a helpful agent.", model="databricks-claude-3-7-sonnet", - mcp_servers=[mcp_server], + mcp_servers=[genie_server], ) +``` -# Run agent (non-streaming) -messages = [{"role": "user", "content": "hi"}] -result = await Runner.run(agent, messages) +**2. Grant access in `databricks.yml`:** + +```yaml +resources: + apps: + agent_openai_agents_sdk: + resources: + - name: 'my_genie_space' + genie_space: + name: 'My Genie Space' + space_id: '01234567-89ab-cdef' + permission: 'CAN_RUN' +``` -# Run agent (streaming) -result = Runner.run_streamed(agent, input=messages) -async for event in result.stream_events(): - # Process stream events - pass +### Resource Type Examples + +```yaml +# Unity Catalog function (for UC functions accessed via MCP) +- name: 'my_uc_function' + uc_securable: + securable_full_name: 'catalog.schema.function_name' + securable_type: 'FUNCTION' + permission: 'EXECUTE' + +# Unity Catalog connection (for external MCP servers via UC connections) +- name: 'my_connection' + uc_securable: + securable_full_name: 'my-connection-name' + securable_type: 'CONNECTION' + permission: 'USE_CONNECTION' + +# Vector search index +- name: 'my_vector_index' + uc_securable: + securable_full_name: 'catalog.schema.index_name' + securable_type: 'TABLE' + permission: 'SELECT' + +# SQL warehouse +- name: 'my_warehouse' + sql_warehouse: + sql_warehouse_id: 'abc123def456' + permission: 'CAN_USE' + +# Model serving endpoint +- name: 'my_endpoint' + serving_endpoint: + name: 'my_endpoint' + permission: 'CAN_QUERY' + +# Genie space +- name: 'my_genie_space' + genie_space: + name: 'My Genie Space' + space_id: '01234567-89ab-cdef' + permission: 'CAN_RUN' + +# MLflow experiment +- name: 'my_experiment' + experiment: + experiment_id: "12349876" + permission: 'CAN_MANAGE' ``` -### MLflow Tracing with Decorators +### Custom MCP Servers (Databricks Apps) -The template uses MLflow's `@invoke()` and `@stream()` decorators for automatic tracing: +If you're using custom MCP servers deployed as Databricks Apps (names starting with `mcp-`), you need to manually grant your agent app's service principal permission to access them: -```python -from mlflow.genai.agent_server import invoke, stream -from mlflow.types.responses import ( - ResponsesAgentRequest, - ResponsesAgentResponse, - ResponsesAgentStreamEvent, -) +1. Find your agent app's service principal name: +```bash +databricks apps get --output json | jq -r '.service_principal_name' +``` -@invoke() -async def invoke(request: ResponsesAgentRequest) -> ResponsesAgentResponse: - # Agent logic here - pass +2. Grant the service principal `CAN_USE` permission on the MCP server app: +```bash +databricks apps update-permissions --service-principal --permission-level CAN_USE +``` + +**Note:** Apps are not yet supported as resource dependencies for other apps in `databricks.yml`, so this manual permission grant is required for now. + +### Important Notes + +- The app automatically has access to the MLflow experiment (already configured in template) +- For all other resources (UC functions, Genie spaces, vector indexes, warehouses, etc.), you MUST add them +- Without proper resource grants, you'll see permission errors at runtime -@stream() -async def stream(request: dict) -> AsyncGenerator[ResponsesAgentStreamEvent, None]: - # Streaming agent logic here - pass +--- + +## Running the App Locally + +**Start the server:** + +```bash +uv run start-app ``` -### Adding Custom Tracing +This starts the agent at http://localhost:8000 -Beyond built-in tracing, you can add additional instrumentation: +**Advanced server options:** -```python -import mlflow +```bash +uv run start-server --reload # Hot-reload on code changes during development +uv run start-server --port 8001 +uv run start-server --workers 4 +``` + +**Test the API:** -mlflow.openai.autolog() # Auto-trace OpenAI calls +```bash +# Streaming request +curl -X POST http://localhost:8000/invocations \ + -H "Content-Type: application/json" \ + -d '{ "input": [{ "role": "user", "content": "hi" }], "stream": true }' -# For more granular tracing, see: -# https://docs.databricks.com/aws/en/mlflow3/genai/tracing/app-instrumentation/ +# Non-streaming request +curl -X POST http://localhost:8000/invocations \ + -H "Content-Type: application/json" \ + -d '{ "input": [{ "role": "user", "content": "hi" }] }' ``` +**Common issues:** +- Port already in use: Use `--port` to specify a different port +- Authentication errors: Verify `.env.local` is correct +- Module not found: Run `uv sync` to install dependencies +- **MLflow experiment not found**: If you see an error like "The provided MLFLOW_EXPERIMENT_ID environment variable value does not exist", ensure your `MLFLOW_TRACKING_URI` in `.env.local` is set to `databricks://` (e.g., `databricks://DEFAULT-testing`). The quickstart script should configure this automatically, but if you manually edit `.env.local`, make sure to include the profile name in the tracking URI. + --- -## Authentication Setup +## Modifying the Agent -**Option 1: OAuth (Recommended)** +**Main file to modify:** `agent_server/agent.py` -```bash -databricks auth login +**Key resources:** +1. [databricks-openai SDK](https://github.com/databricks/databricks-ai-bridge/tree/main/integrations/openai) +2. [Agent examples](https://github.com/bbqiu/agent-on-app-prototype) +3. [Agent Framework docs](https://docs.databricks.com/aws/en/generative-ai/agent-framework/) +4. [Adding tools](https://docs.databricks.com/aws/en/generative-ai/agent-framework/agent-tool) +5. [OpenAI Agents SDK](https://platform.openai.com/docs/guides/agents-sdk) +6. [Responses API](https://mlflow.org/docs/latest/genai/serving/responses-agent/) + +**databricks-openai SDK basics:** + +```python +from databricks_openai import AsyncDatabricksOpenAI +from agents import set_default_openai_api, set_default_openai_client + +# Set up async client (recommended for agent servers) +set_default_openai_client(AsyncDatabricksOpenAI()) +set_default_openai_api("chat_completions") ``` -Set in `.env.local`: +--- + +## Testing the Agent ```bash -DATABRICKS_CONFIG_PROFILE=DEFAULT -``` +# Run evaluation +uv run agent-evaluate -**Option 2: Personal Access Token** +# Run unit tests +pytest [path] +``` -Set in `.env.local`: +**Test API locally:** ```bash -DATABRICKS_HOST="https://host.databricks.com" -DATABRICKS_TOKEN="dapi_token" +# Streaming request +curl -X POST http://localhost:8000/invocations \ + -H "Content-Type: application/json" \ + -d '{ "input": [{ "role": "user", "content": "hi" }], "stream": true }' + +# Non-streaming request +curl -X POST http://localhost:8000/invocations \ + -H "Content-Type: application/json" \ + -d '{ "input": [{ "role": "user", "content": "hi" }] }' ``` --- -## MLflow Experiment Setup +## Deploying to Databricks Apps -Create and link an MLflow experiment: +**Deploy using Databricks bundles:** ```bash -DATABRICKS_USERNAME=$(databricks current-user me | jq -r .userName) -databricks experiments create-experiment /Users/$DATABRICKS_USERNAME/agents-on-apps +# Deploy the bundle (creates/updates resources and uploads files) +databricks bundle deploy + +# Run the app (starts/restarts the app with uploaded source code) +databricks bundle run agent_openai_agents_sdk ``` -Add the experiment ID to `.env.local`: +The resource key `agent_openai_agents_sdk` matches the app name defined in `databricks.yml` under `resources.apps.agent_openai_agents_sdk`. + +**Error: "An app with the same name already exists"** + +If you see this error when running `databricks bundle deploy`: -```bash -MLFLOW_EXPERIMENT_ID= ``` +Error: failed to create app ---- +Failed to create app . An app with the same name already exists. +``` -## Key Files +This means you have an existing app that needs to be linked to your bundle. You have two options: -| File | Purpose | -| -------------------------------- | --------------------------------------------- | -| `agent_server/agent.py` | Agent logic, model, instructions, MCP servers | -| `agent_server/start_server.py` | FastAPI server + MLflow setup | -| `agent_server/evaluate_agent.py` | Agent evaluation with MLflow scorers | -| `agent_server/utils.py` | Databricks auth helpers, stream processing | -| `scripts/start_app.py` | Manages backend+frontend startup | +1. **Bind the existing app to your bundle** (recommended if you want to manage the existing app): + - Follow the steps in [Binding an existing app to your bundle](#binding-an-existing-app-to-your-bundle) below + - This will link the existing app to your bundle so future deploys update it ---- +2. **Delete the existing app and let the bundle create a new one**: + ```bash + databricks apps delete + databricks bundle deploy + ``` + - ⚠️ This will permanently delete the existing app including its URL, OAuth credentials, and service principal -## Deploying to Databricks Apps +**Binding an existing app to your bundle:** + +If you've already deployed an app from a different directory or through the UI and want to link it to this bundle, follow these steps: -**Create app:** +**Step 1: Update `databricks.yml` to match the existing app name** +⚠️ **CRITICAL**: The app name in your `databricks.yml` **must match** the existing app name exactly, or Terraform will **destroy and recreate** the app (not update it in-place). + +First, find your existing app name: ```bash -databricks apps create agent-openai-agents-sdk +# List existing apps to find the app name +databricks apps list --output json | jq '.[].name' ``` -**Sync files:** +Then update `databricks.yml` to use that exact name: +```yaml +resources: + apps: + agent_openai_agents_sdk: + name: "openai-agents-sdk-agent" # Match your existing app name exactly + description: "OpenAI Agents SDK agent application" + source_code_path: ./ +``` + +The default configuration uses: +```yaml +name: "${bundle.target}-agent-openai-agents-sdk" # Evaluates to "dev-agent-openai-agents-sdk" +``` + +Make sure to replace this with your actual app name. + +**Step 2: Bind the resource to the existing app** ```bash -DATABRICKS_USERNAME=$(databricks current-user me | jq -r .userName) -databricks sync . "/Users/$DATABRICKS_USERNAME/agent-openai-agents-sdk" +# Bind the resource to the existing app +databricks bundle deployment bind agent_openai_agents_sdk + +# Example: +databricks bundle deployment bind agent_openai_agents_sdk openai-agents-sdk-agent + +# If the operation requires confirmation and you want to skip prompts: +databricks bundle deployment bind agent_openai_agents_sdk openai-agents-sdk-agent --auto-approve ``` -**Deploy:** +This links your bundle configuration to the existing deployed app. Future `databricks bundle deploy` commands will update the existing app instead of creating a new one. + +**Important notes about binding:** +- **Remote Terraform state**: Databricks stores Terraform state remotely, so the same app can be detected across different local directories +- **Name is immutable**: The `name` field cannot be changed in-place; changing it forces replacement (destroy + create) +- **Review the plan**: When binding, carefully review the Terraform plan output. Look for `# forces replacement` which indicates the app will be destroyed and recreated +- **Existing binding**: If a resource is already bound to another app, you must unbind it first before binding to a different app + +**Unbinding a resource:** + +To remove the link between your bundle and the deployed app: ```bash -databricks apps deploy agent-openai-agents-sdk --source-code-path /Workspace/Users/$DATABRICKS_USERNAME/agent-openai-agents-sdk +databricks bundle deployment unbind agent_openai_agents_sdk ``` +This is useful when: +- You want to bind to a different app +- You want to let the bundle create a new app on the next deploy +- You're switching between different deployed instances + +Note: Unbinding only removes the link in your bundle state - it does not delete the deployed app. + **Query deployed app:** Generate OAuth token (PATs are not supported): @@ -284,25 +462,69 @@ curl -X POST /invocations \ -d '{ "input": [{ "role": "user", "content": "hi" }], "stream": true }' ``` +**Debug deployed apps:** + +```bash +# View logs (use the deployed app name from databricks.yml) +databricks apps logs dev-agent-openai-agents-sdk --follow + +# Check status +databricks apps get dev-agent-openai-agents-sdk --output json | jq '{app_status, compute_status}' +``` + --- -## Agent Framework Capabilities +## Key Files -Reference: https://docs.databricks.com/aws/en/generative-ai/agent-framework/ +| File | Purpose | +| -------------------------------- | --------------------------------------------- | +| `agent_server/agent.py` | Agent logic, model, instructions, MCP servers | +| `agent_server/start_server.py` | FastAPI server + MLflow setup | +| `agent_server/evaluate_agent.py` | Agent evaluation with MLflow scorers | +| `agent_server/utils.py` | Databricks auth helpers, stream processing | +| `scripts/start_app.py` | Manages backend+frontend startup | +| `scripts/discover_tools.py` | Discovers available workspace resources | +| `scripts/quickstart.sh` | One-command setup script | -### Tool Types +--- +## Agent Framework Capabilities + +**Tool Types:** 1. **Unity Catalog Function Tools** - SQL UDFs managed in UC with built-in governance 2. **Agent Code Tools** - Defined directly in agent code for REST APIs and low-latency operations 3. **MCP Tools** - Interoperable tools via Model Context Protocol (Databricks-managed, external, or self-hosted) -### Built-in Tools - +**Built-in Tools:** - **system.ai.python_exec** - Execute Python code dynamically within agent queries (code interpreter) -### Common Patterns - +**Common Patterns:** - **Structured data retrieval** - Query SQL tables/databases - **Unstructured data retrieval** - Document search and RAG via Vector Search - **Code interpreter** - Python execution for analysis via system.ai.python_exec - **External connections** - Integrate services like Slack via HTTP connections + +Reference: https://docs.databricks.com/aws/en/generative-ai/agent-framework/ + +--- + +## Next Steps + +✅ **You've completed the initial setup!** + +After running the quickstart script, you have: +- ✅ Installed prerequisites +- ✅ Authenticated with Databricks +- ✅ Created MLflow experiment +- ✅ Discovered available tools in your workspace + +Now you're ready to: +- Start the agent locally: `uv run start-app` +- Modify your agent to use the tools you discovered +- Deploy your agent to Databricks + +**See the README.md** for more information on: +- Modifying the agent and adding tools +- Evaluating your agent +- Deploying to Databricks Apps +- Debugging and monitoring deployed apps diff --git a/agent-openai-agents-sdk/databricks.yml b/agent-openai-agents-sdk/databricks.yml new file mode 100644 index 0000000..48e2794 --- /dev/null +++ b/agent-openai-agents-sdk/databricks.yml @@ -0,0 +1,37 @@ +bundle: + name: agent_openai_agents_sdk + +resources: + # MLflow experiment for agent tracing - automatically created by bundle + experiments: + agent_openai_agents_sdk_experiment: + name: /Users/${workspace.current_user.userName}/${bundle.name}-${bundle.target} + + apps: + agent_openai_agents_sdk: + name: "${bundle.target}-agent-openai-agents-sdk" + description: "OpenAI Agents SDK agent application" + source_code_path: ./ + + # Resources which this app has access to + resources: + - name: 'experiment' + experiment: + experiment_id: "${resources.experiments.agent_openai_agents_sdk_experiment.id}" + permission: 'CAN_MANAGE' + +targets: + dev: + mode: development + default: true + # workspace: + # host: https://... + + prod: + mode: production + # workspace: + # host: https://... + resources: + apps: + agent_openai_agents_sdk: + name: agent-openai-agents-sdk diff --git a/agent-openai-agents-sdk/pyproject.toml b/agent-openai-agents-sdk/pyproject.toml index a76a6b7..1bb46f3 100644 --- a/agent-openai-agents-sdk/pyproject.toml +++ b/agent-openai-agents-sdk/pyproject.toml @@ -10,7 +10,7 @@ requires-python = ">=3.11" dependencies = [ "fastapi>=0.115.12", "uvicorn>=0.34.2", - "databricks-openai>=0.8.0", + "databricks-openai>=0.9.0", "mlflow>=3.8.0rc0", "openai-agents>=0.4.1", "python-dotenv", @@ -34,3 +34,4 @@ dev = [ start-app = "scripts.start_app:main" start-server = "agent_server.start_server:main" agent-evaluate = "agent_server.evaluate_agent:evaluate" +discover-tools = "scripts.discover_tools:main" diff --git a/agent-openai-agents-sdk/scripts/discover_tools.py b/agent-openai-agents-sdk/scripts/discover_tools.py new file mode 100755 index 0000000..3eb3796 --- /dev/null +++ b/agent-openai-agents-sdk/scripts/discover_tools.py @@ -0,0 +1,432 @@ +#!/usr/bin/env python3 +""" +Discover available tools and data sources for Databricks agents. + +This script scans for: +- Unity Catalog functions (data retrieval tools e.g. SQL UDFs) +- Unity Catalog tables (data sources) +- Vector search indexes (RAG data sources) +- Genie spaces (conversational interface over structured data) +- Custom MCP servers (Databricks apps with name mcp-*) +- External MCP servers (via Unity Catalog connections) +""" + +import json +import subprocess +import sys +from pathlib import Path +from typing import Any, Dict, List + +from databricks.sdk import WorkspaceClient + +DEFAULT_MAX_RESULTS = 100 +DEFAULT_MAX_SCHEMAS = 25 + +def run_databricks_cli(args: List[str]) -> str: + """Run databricks CLI command and return output.""" + try: + result = subprocess.run( + ["databricks"] + args, + capture_output=True, + text=True, + check=True, + ) + return result.stdout + except subprocess.CalledProcessError as e: + print(f"Error running databricks CLI: {e.stderr}", file=sys.stderr) + return "" + + +def discover_uc_functions(w: WorkspaceClient, catalog: str = None, max_schemas: int = DEFAULT_MAX_SCHEMAS) -> List[Dict[str, Any]]: + """Discover Unity Catalog functions that could be used as tools. + + Args: + w: WorkspaceClient instance + catalog: Optional specific catalog to search + max_schemas: Total number of schemas to search across all catalogs + """ + functions = [] + schemas_searched = 0 + + try: + catalogs = [catalog] if catalog else [c.name for c in w.catalogs.list()] + + for cat in catalogs: + if schemas_searched >= max_schemas: + break + + try: + all_schemas = list(w.schemas.list(catalog_name=cat)) + # Take schemas from this catalog until we hit the global budget + schemas_to_search = all_schemas[:max_schemas - schemas_searched] + + for schema in schemas_to_search: + schema_name = f"{cat}.{schema.name}" + try: + funcs = list(w.functions.list(catalog_name=cat, schema_name=schema.name)) + for func in funcs: + functions.append({ + "type": "uc_function", + "name": func.full_name, + "catalog": cat, + "schema": schema.name, + "function_name": func.name, + "comment": func.comment, + "routine_definition": getattr(func, "routine_definition", None), + }) + except Exception as e: + # Skip schemas we can't access + continue + finally: + schemas_searched += 1 + except Exception as e: + # Skip catalogs we can't access + continue + + except Exception as e: + print(f"Error discovering UC functions: {e}", file=sys.stderr) + + return functions + + +def discover_uc_tables(w: WorkspaceClient, catalog: str = None, schema: str = None, max_schemas: int = DEFAULT_MAX_SCHEMAS) -> List[Dict[str, Any]]: + """Discover Unity Catalog tables that could be queried. + + Args: + w: WorkspaceClient instance + catalog: Optional specific catalog to search + schema: Optional specific schema to search (requires catalog) + max_schemas: Total number of schemas to search across all catalogs + """ + tables = [] + schemas_searched = 0 + + try: + catalogs = [catalog] if catalog else [c.name for c in w.catalogs.list()] + + for cat in catalogs: + if cat in ["__databricks_internal", "system"]: + continue + + if schemas_searched >= max_schemas: + break + + try: + if schema: + schemas_to_search = [schema] + else: + all_schemas = [s.name for s in w.schemas.list(catalog_name=cat)] + # Take schemas from this catalog until we hit the global budget + schemas_to_search = all_schemas[:max_schemas - schemas_searched] + + for sch in schemas_to_search: + if sch == "information_schema": + schemas_searched += 1 + continue + + try: + tbls = list(w.tables.list(catalog_name=cat, schema_name=sch)) + for tbl in tbls: + # Get column info + columns = [] + if hasattr(tbl, "columns") and tbl.columns: + columns = [ + {"name": col.name, "type": col.type_name.value if hasattr(col.type_name, "value") else str(col.type_name)} + for col in tbl.columns + ] + + tables.append({ + "type": "uc_table", + "name": tbl.full_name, + "catalog": cat, + "schema": sch, + "table_name": tbl.name, + "table_type": tbl.table_type.value if tbl.table_type else None, + "comment": tbl.comment, + "columns": columns, + }) + except Exception as e: + # Skip schemas we can't access + pass + finally: + schemas_searched += 1 + except Exception as e: + # Skip catalogs we can't access + continue + + except Exception as e: + print(f"Error discovering UC tables: {e}", file=sys.stderr) + + return tables + + +def discover_vector_search_indexes(w: WorkspaceClient) -> List[Dict[str, Any]]: + """Discover Vector Search indexes for RAG applications.""" + indexes = [] + + try: + # List all vector search endpoints + endpoints = list(w.vector_search_endpoints.list_endpoints()) + + for endpoint in endpoints: + try: + # List indexes for each endpoint + endpoint_indexes = list(w.vector_search_indexes.list_indexes(endpoint_name=endpoint.name)) + for idx in endpoint_indexes: + indexes.append({ + "type": "vector_search_index", + "name": idx.name, + "endpoint": endpoint.name, + "primary_key": idx.primary_key, + "index_type": idx.index_type.value if idx.index_type else None, + "status": idx.status.state.value if idx.status and idx.status.state else None, + }) + except Exception as e: + # Skip endpoints we can't access + continue + + except Exception as e: + print(f"Error discovering vector search indexes: {e}", file=sys.stderr) + + return indexes + + +def discover_genie_spaces(w: WorkspaceClient) -> List[Dict[str, Any]]: + """Discover Genie spaces for conversational data access.""" + spaces = [] + + try: + # Use SDK to list genie spaces + response = w.genie.list_spaces() + genie_spaces = response.spaces if hasattr(response, "spaces") else [] + for space in genie_spaces: + spaces.append({ + "type": "genie_space", + "id": space.space_id, + "name": space.title, + "description": space.description, + }) + except Exception as e: + print(f"Error discovering Genie spaces: {e}", file=sys.stderr) + + return spaces + + + +def discover_custom_mcp_servers(w: WorkspaceClient) -> List[Dict[str, Any]]: + """Discover custom MCP servers deployed as Databricks apps.""" + custom_servers = [] + + try: + # List all apps and filter for those starting with mcp- + apps = w.apps.list() + for app in apps: + if app.name and app.name.startswith("mcp-"): + custom_servers.append({ + "type": "custom_mcp_server", + "name": app.name, + "url": app.url, + "status": app.app_status.state.value if app.app_status and app.app_status.state else None, + "description": app.description, + }) + except Exception as e: + print(f"Error discovering custom MCP servers: {e}", file=sys.stderr) + + return custom_servers + + +def discover_external_mcp_servers(w: WorkspaceClient) -> List[Dict[str, Any]]: + """Discover external MCP servers configured via Unity Catalog connections.""" + external_servers = [] + + try: + # List all connections and filter for MCP connections + connections = w.connections.list() + for conn in connections: + # Check if this is an MCP connection + if conn.options and conn.options.get("is_mcp_connection") == "true": + external_servers.append({ + "type": "external_mcp_server", + "name": conn.name, + "connection_type": conn.connection_type.value if hasattr(conn.connection_type, "value") else str(conn.connection_type), + "comment": conn.comment, + "full_name": conn.full_name, + }) + except Exception as e: + print(f"Error discovering external MCP servers: {e}", file=sys.stderr) + + return external_servers + + +def format_output_markdown(results: Dict[str, List[Dict[str, Any]]]) -> str: + """Format discovery results as markdown.""" + lines = ["# Agent Tools and Data Sources Discovery\n"] + + # UC Functions + functions = results.get("uc_functions", []) + if functions: + lines.append(f"## Unity Catalog Functions ({len(functions)})\n") + lines.append("**What they are:** SQL UDFs that can be used as agent tools.\n") + lines.append("**How to use:** Access via UC functions MCP server:") + lines.append("- All functions in a schema: `{workspace_host}/api/2.0/mcp/functions/{catalog}/{schema}`") + lines.append("- Single function: `{workspace_host}/api/2.0/mcp/functions/{catalog}/{schema}/{function_name}`\n") + for func in functions[:10]: # Show first 10 + lines.append(f"- `{func['name']}`") + if func.get("comment"): + lines.append(f" - {func['comment']}") + if len(functions) > 10: + lines.append(f"\n*...and {len(functions) - 10} more*\n") + lines.append("") + + # UC Tables + tables = results.get("uc_tables", []) + if tables: + lines.append(f"## Unity Catalog Tables ({len(tables)})\n") + lines.append("Structured data that agents can query via UC SQL functions.\n") + for table in tables[:10]: # Show first 10 + lines.append(f"- `{table['name']}` ({table['table_type']})") + if table.get("comment"): + lines.append(f" - {table['comment']}") + if table.get("columns"): + col_names = [c["name"] for c in table["columns"][:5]] + lines.append(f" - Columns: {', '.join(col_names)}") + if len(tables) > 10: + lines.append(f"\n*...and {len(tables) - 10} more*\n") + lines.append("") + + # Vector Search Indexes + indexes = results.get("vector_search_indexes", []) + if indexes: + lines.append(f"## Vector Search Indexes ({len(indexes)})\n") + lines.append("These can be used for RAG applications with unstructured data.\n") + lines.append("**How to use:** Connect via MCP server at `{workspace_host}/api/2.0/mcp/vector-search/{catalog}/{schema}` or\n") + lines.append("`{workspace_host}/api/2.0/mcp/vector-search/{catalog}/{schema}/{index_name}`\n") + for idx in indexes: + lines.append(f"- `{idx['name']}`") + lines.append(f" - Endpoint: {idx['endpoint']}") + lines.append(f" - Status: {idx['status']}") + lines.append("") + + # Genie Spaces + spaces = results.get("genie_spaces", []) + if spaces: + lines.append(f"## Genie Spaces ({len(spaces)})\n") + lines.append("**What they are:** Natural language interface to your data\n") + lines.append("**How to use:** Connect via Genie MCP server at `{workspace_host}/api/2.0/mcp/genie/{space_id}`\n") + for space in spaces: + lines.append(f"- `{space['name']}` (ID: {space['id']})") + if space.get("description"): + lines.append(f" - {space['description']}") + lines.append("") + + # Custom MCP Servers (Databricks Apps) + custom_servers = results.get("custom_mcp_servers", []) + if custom_servers: + lines.append(f"## Custom MCP Servers ({len(custom_servers)})\n") + lines.append("**What:** Your own MCP servers deployed as Databricks Apps (names starting with mcp-)\n") + lines.append("**How to use:** Access via `{app_url}/mcp`\n") + lines.append("**⚠️ Important:** Custom MCP server apps require manual permission grants:") + lines.append("1. Get your agent app's service principal: `databricks apps get --output json | jq -r '.service_principal_name'`") + lines.append("2. Grant permission: `databricks apps update-permissions --service-principal --permission-level CAN_USE`") + lines.append("(Apps are not yet supported as resource dependencies in databricks.yml)\n") + for server in custom_servers: + lines.append(f"- `{server['name']}`") + if server.get("url"): + lines.append(f" - URL: {server['url']}") + if server.get("status"): + lines.append(f" - Status: {server['status']}") + if server.get("description"): + lines.append(f" - {server['description']}") + lines.append("") + + # External MCP Servers (UC Connections) + external_servers = results.get("external_mcp_servers", []) + if external_servers: + lines.append(f"## External MCP Servers ({len(external_servers)})\n") + lines.append("**What:** Third-party MCP servers via Unity Catalog connections\n") + lines.append("**How to use:** Connect via `{workspace_host}/api/2.0/mcp/external/{connection_name}`\n") + lines.append("**Benefits:** Secure access to external APIs through UC governance\n") + for server in external_servers: + lines.append(f"- `{server['name']}`") + if server.get("full_name"): + lines.append(f" - Full name: {server['full_name']}") + if server.get("comment"): + lines.append(f" - {server['comment']}") + lines.append("") + return "\n".join(lines) + + +def main(): + """Main discovery function.""" + import argparse + + parser = argparse.ArgumentParser(description="Discover available agent tools and data sources") + parser.add_argument("--catalog", help="Limit discovery to specific catalog") + parser.add_argument("--schema", help="Limit discovery to specific schema (requires --catalog)") + parser.add_argument("--format", choices=["json", "markdown"], default="markdown", help="Output format") + parser.add_argument("--output", help="Output file (default: stdout)") + parser.add_argument("--profile", help="Databricks CLI profile to use (default: uses default profile)") + parser.add_argument("--max-results", type=int, default=DEFAULT_MAX_RESULTS, help=f"Maximum results per resource type (default: {DEFAULT_MAX_RESULTS})") + parser.add_argument("--max-schemas", type=int, default=DEFAULT_MAX_SCHEMAS, help=f"Total schemas to search across all catalogs (default: {DEFAULT_MAX_SCHEMAS})") + + args = parser.parse_args() + + if args.schema and not args.catalog: + print("Error: --schema requires --catalog", file=sys.stderr) + sys.exit(1) + + print("Discovering available tools and data sources...", file=sys.stderr) + + # Initialize Databricks workspace client + # Only pass profile if specified, otherwise use default + if args.profile: + w = WorkspaceClient(profile=args.profile) + else: + w = WorkspaceClient() + + results = {} + + # Discover each type with configurable limits + print("- UC Functions...", file=sys.stderr) + results["uc_functions"] = discover_uc_functions(w, catalog=args.catalog, max_schemas=args.max_schemas)[:args.max_results] + + print("- UC Tables...", file=sys.stderr) + results["uc_tables"] = discover_uc_tables(w, catalog=args.catalog, schema=args.schema, max_schemas=args.max_schemas)[:args.max_results] + + print("- Vector Search Indexes...", file=sys.stderr) + results["vector_search_indexes"] = discover_vector_search_indexes(w)[:args.max_results] + + print("- Genie Spaces...", file=sys.stderr) + results["genie_spaces"] = discover_genie_spaces(w)[:args.max_results] + + print("- Custom MCP Servers (Apps)...", file=sys.stderr) + results["custom_mcp_servers"] = discover_custom_mcp_servers(w)[:args.max_results] + + print("- External MCP Servers (Connections)...", file=sys.stderr) + results["external_mcp_servers"] = discover_external_mcp_servers(w)[:args.max_results] + + # Format output + if args.format == "json": + output = json.dumps(results, indent=2) + else: + output = format_output_markdown(results) + + # Write output + if args.output: + Path(args.output).write_text(output) + print(f"\nResults written to {args.output}", file=sys.stderr) + else: + print("\n" + output) + + # Print summary + print("\n=== Discovery Summary ===", file=sys.stderr) + print(f"UC Functions: {len(results['uc_functions'])}", file=sys.stderr) + print(f"UC Tables: {len(results['uc_tables'])}", file=sys.stderr) + print(f"Vector Search Indexes: {len(results['vector_search_indexes'])}", file=sys.stderr) + print(f"Genie Spaces: {len(results['genie_spaces'])}", file=sys.stderr) + print(f"Custom MCP Servers: {len(results['custom_mcp_servers'])}", file=sys.stderr) + print(f"External MCP Servers: {len(results['external_mcp_servers'])}", file=sys.stderr) + + +if __name__ == "__main__": + main() diff --git a/agent-openai-agents-sdk/scripts/quickstart.sh b/agent-openai-agents-sdk/scripts/quickstart.sh index 84ea6a9..870351c 100755 --- a/agent-openai-agents-sdk/scripts/quickstart.sh +++ b/agent-openai-agents-sdk/scripts/quickstart.sh @@ -1,6 +1,37 @@ #!/bin/bash set -e +# Parse command line arguments +PROFILE_ARG="" +HOST_ARG="" + +while [[ $# -gt 0 ]]; do + case $1 in + --profile) + PROFILE_ARG="$2" + shift 2 + ;; + --host) + HOST_ARG="$2" + shift 2 + ;; + -h|--help) + echo "Usage: $0 [OPTIONS]" + echo "" + echo "Options:" + echo " --profile NAME Use specified Databricks profile (non-interactive)" + echo " --host URL Databricks workspace URL (for initial setup)" + echo " -h, --help Show this help message" + exit 0 + ;; + *) + echo "Error: Unknown option $1" + echo "Use --help for usage information" + exit 1 + ;; + esac +done + # Helper function to check if a command exists command_exists() { command -v "$1" >/dev/null 2>&1 @@ -121,71 +152,81 @@ echo echo "Setting up Databricks authentication..." -# Check if there are existing profiles -set +e -EXISTING_PROFILES=$(databricks auth profiles 2>/dev/null) -PROFILES_EXIT_CODE=$? -set -e +# If --profile flag was provided, use that directly +if [ -n "$PROFILE_ARG" ]; then + PROFILE_NAME="$PROFILE_ARG" + echo "Using specified profile: $PROFILE_NAME" +else + # Check if there are existing profiles + set +e + EXISTING_PROFILES=$(databricks auth profiles 2>/dev/null) + PROFILES_EXIT_CODE=$? + set -e -if [ $PROFILES_EXIT_CODE -eq 0 ] && [ -n "$EXISTING_PROFILES" ]; then - # Profiles exist - let user select one - echo "Found existing Databricks profiles:" - echo + if [ $PROFILES_EXIT_CODE -eq 0 ] && [ -n "$EXISTING_PROFILES" ]; then + # Profiles exist - let user select one + echo "Found existing Databricks profiles:" + echo - # Parse profiles into an array (compatible with older bash) - # Skip the first line (header row) - PROFILE_ARRAY=() - PROFILE_NAMES=() - LINE_NUM=0 - while IFS= read -r line; do - if [ -n "$line" ]; then - if [ $LINE_NUM -eq 0 ]; then - # Print header without number - echo "$line" - else - # Add full line to display array - PROFILE_ARRAY+=("$line") - # Extract just the profile name (first column) for selection - PROFILE_NAME_ONLY=$(echo "$line" | awk '{print $1}') - PROFILE_NAMES+=("$PROFILE_NAME_ONLY") + # Parse profiles into an array (compatible with older bash) + # Skip the first line (header row) + PROFILE_ARRAY=() + PROFILE_NAMES=() + LINE_NUM=0 + while IFS= read -r line; do + if [ -n "$line" ]; then + if [ $LINE_NUM -eq 0 ]; then + # Print header without number + echo "$line" + else + # Add full line to display array + PROFILE_ARRAY+=("$line") + # Extract just the profile name (first column) for selection + PROFILE_NAME_ONLY=$(echo "$line" | awk '{print $1}') + PROFILE_NAMES+=("$PROFILE_NAME_ONLY") + fi + LINE_NUM=$((LINE_NUM + 1)) fi - LINE_NUM=$((LINE_NUM + 1)) - fi - done <<< "$EXISTING_PROFILES" - echo + done <<< "$EXISTING_PROFILES" + echo - # Display numbered list - for i in "${!PROFILE_ARRAY[@]}"; do - echo "$((i+1))) ${PROFILE_ARRAY[$i]}" - done - echo + # Display numbered list + for i in "${!PROFILE_ARRAY[@]}"; do + echo "$((i+1))) ${PROFILE_ARRAY[$i]}" + done + echo - echo "Enter the number of the profile you want to use:" - read -r PROFILE_CHOICE + echo "Enter the number of the profile you want to use:" + read -r PROFILE_CHOICE - if [ -z "$PROFILE_CHOICE" ]; then - echo "Error: Profile selection is required" - exit 1 - fi + if [ -z "$PROFILE_CHOICE" ]; then + echo "Error: Profile selection is required" + exit 1 + fi - # Validate the choice is a number - if ! [[ "$PROFILE_CHOICE" =~ ^[0-9]+$ ]]; then - echo "Error: Please enter a valid number" - exit 1 - fi + # Validate the choice is a number + if ! [[ "$PROFILE_CHOICE" =~ ^[0-9]+$ ]]; then + echo "Error: Please enter a valid number" + exit 1 + fi - # Convert to array index (subtract 1) - PROFILE_INDEX=$((PROFILE_CHOICE - 1)) + # Convert to array index (subtract 1) + PROFILE_INDEX=$((PROFILE_CHOICE - 1)) - # Check if the index is valid - if [ $PROFILE_INDEX -lt 0 ] || [ $PROFILE_INDEX -ge ${#PROFILE_NAMES[@]} ]; then - echo "Error: Invalid selection. Please choose a number between 1 and ${#PROFILE_NAMES[@]}" - exit 1 + # Check if the index is valid + if [ $PROFILE_INDEX -lt 0 ] || [ $PROFILE_INDEX -ge ${#PROFILE_NAMES[@]} ]; then + echo "Error: Invalid selection. Please choose a number between 1 and ${#PROFILE_NAMES[@]}" + exit 1 + fi + + # Get the selected profile name (just the name, not the full line) + PROFILE_NAME="${PROFILE_NAMES[$PROFILE_INDEX]}" + echo "Selected profile: $PROFILE_NAME" fi +fi - # Get the selected profile name (just the name, not the full line) - PROFILE_NAME="${PROFILE_NAMES[$PROFILE_INDEX]}" - echo "Selected profile: $PROFILE_NAME" +# Validate the profile if it was specified +if [ -n "$PROFILE_NAME" ]; then # Test if the profile works set +e @@ -230,16 +271,30 @@ if [ $PROFILES_EXIT_CODE -eq 0 ] && [ -n "$EXISTING_PROFILES" ]; then else echo "DATABRICKS_CONFIG_PROFILE=$PROFILE_NAME" >> .env.local fi + + # Update MLFLOW_TRACKING_URI to use the profile for local development + if grep -q "MLFLOW_TRACKING_URI=" .env.local; then + sed -i '' "s|MLFLOW_TRACKING_URI=.*|MLFLOW_TRACKING_URI=\"databricks://$PROFILE_NAME\"|" .env.local + else + echo "MLFLOW_TRACKING_URI=\"databricks://$PROFILE_NAME\"" >> .env.local + fi echo "✓ Databricks profile '$PROFILE_NAME' saved to .env.local" else # No profiles exist - create default one echo "No existing profiles found. Setting up Databricks authentication..." - echo "Please enter your Databricks host URL (e.g., https://your-workspace.cloud.databricks.com):" - read -r DATABRICKS_HOST - if [ -z "$DATABRICKS_HOST" ]; then - echo "Error: Databricks host is required" - exit 1 + # Use --host flag if provided, otherwise prompt + if [ -n "$HOST_ARG" ]; then + DATABRICKS_HOST="$HOST_ARG" + echo "Using specified host: $DATABRICKS_HOST" + else + echo "Please enter your Databricks host URL (e.g., https://your-workspace.cloud.databricks.com):" + read -r DATABRICKS_HOST + + if [ -z "$DATABRICKS_HOST" ]; then + echo "Error: Databricks host is required" + exit 1 + fi fi echo "Authenticating with Databricks..." @@ -249,9 +304,9 @@ else # Temporarily disable exit on error for the auth command set +e - # Run auth login with host parameter and capture output while still showing it to the user + # Run auth login with profile DEFAULT and host parameter AUTH_LOG=$(mktemp) - databricks auth login --host "$DATABRICKS_HOST" 2>&1 | tee "$AUTH_LOG" + databricks auth login --profile DEFAULT --host "$DATABRICKS_HOST" 2>&1 | tee "$AUTH_LOG" AUTH_EXIT_CODE=$? set -e @@ -259,19 +314,12 @@ else if [ $AUTH_EXIT_CODE -eq 0 ]; then echo "✓ Successfully authenticated with Databricks" - # Extract profile name from the captured output - # Expected format: "Profile DEFAULT was successfully saved" - PROFILE_NAME=$(grep -i "Profile .* was successfully saved" "$AUTH_LOG" | sed -E 's/.*Profile ([^ ]+) was successfully saved.*/\1/' | head -1) + # Use DEFAULT as the profile name + PROFILE_NAME="DEFAULT" # Clean up temp file rm -f "$AUTH_LOG" - # If we couldn't extract the profile name, default to "DEFAULT" - if [ -z "$PROFILE_NAME" ]; then - PROFILE_NAME="DEFAULT" - echo "Note: Could not detect profile name, using 'DEFAULT'" - fi - # Update .env.local with the profile name if grep -q "DATABRICKS_CONFIG_PROFILE=" .env.local; then sed -i '' "s/DATABRICKS_CONFIG_PROFILE=.*/DATABRICKS_CONFIG_PROFILE=$PROFILE_NAME/" .env.local @@ -279,6 +327,13 @@ else echo "DATABRICKS_CONFIG_PROFILE=$PROFILE_NAME" >> .env.local fi + # Update MLFLOW_TRACKING_URI to use the profile for local development + if grep -q "MLFLOW_TRACKING_URI=" .env.local; then + sed -i '' "s|MLFLOW_TRACKING_URI=.*|MLFLOW_TRACKING_URI=\"databricks://$PROFILE_NAME\"|" .env.local + else + echo "MLFLOW_TRACKING_URI=\"databricks://$PROFILE_NAME\"" >> .env.local + fi + echo "✓ Databricks profile '$PROFILE_NAME' saved to .env.local" else # Clean up temp file diff --git a/agent-openai-agents-sdk/scripts/start_app.py b/agent-openai-agents-sdk/scripts/start_app.py index 47b9113..a812880 100644 --- a/agent-openai-agents-sdk/scripts/start_app.py +++ b/agent-openai-agents-sdk/scripts/start_app.py @@ -6,8 +6,16 @@ 1. Not reporting ready until BOTH frontend and backend processes are ready 2. Exiting as soon as EITHER process fails 3. Printing error logs if either process fails + +Usage: + start-app [OPTIONS] + +All options are passed through to the backend server (start-server). +See 'uv run start-server --help' for available options. """ +import argparse +import os import re import shutil import subprocess @@ -24,7 +32,7 @@ class ProcessManager: - def __init__(self): + def __init__(self, port=8000): self.backend_process = None self.frontend_process = None self.backend_ready = False @@ -32,6 +40,7 @@ def __init__(self): self.failed = threading.Event() self.backend_log = None self.frontend_log = None + self.port = port def monitor_process(self, process, name, log_file, patterns): is_ready = False @@ -56,7 +65,7 @@ def monitor_process(self, process, name, log_file, patterns): if self.backend_ready and self.frontend_ready: print("\n" + "=" * 50) print("✓ Both frontend and backend are ready!") - print("✓ Open the frontend at http://localhost:8000") + print(f"✓ Open the frontend at http://localhost:{self.port}") print("=" * 50 + "\n") process.wait() @@ -141,20 +150,28 @@ def cleanup(self): if self.frontend_log: self.frontend_log.close() - def run(self): + def run(self, backend_args=None): load_dotenv(dotenv_path=".env.local", override=True) if not self.clone_frontend_if_needed(): return 1 + # Set API_PROXY environment variable for frontend to connect to backend + os.environ["API_PROXY"] = f"http://localhost:{self.port}/invocations" + # Open log files self.backend_log = open("backend.log", "w", buffering=1) self.frontend_log = open("frontend.log", "w", buffering=1) try: + # Build backend command, passing through all arguments + backend_cmd = ["uv", "run", "start-server"] + if backend_args: + backend_cmd.extend(backend_args) + # Start backend self.backend_process = self.start_process( - ["uv", "run", "start-server"], "backend", self.backend_log, BACKEND_READY + backend_cmd, "backend", self.backend_log, BACKEND_READY ) # Setup and start frontend @@ -211,7 +228,25 @@ def run(self): def main(): - sys.exit(ProcessManager().run()) + parser = argparse.ArgumentParser( + description="Start agent frontend and backend", + usage="%(prog)s [OPTIONS]\n\nAll options are passed through to start-server. " + "Use 'uv run start-server --help' for available options." + ) + # Parse known args (none currently) and pass remaining to backend + _, backend_args = parser.parse_known_args() + + # Extract port from backend_args if specified + port = 8000 + for i, arg in enumerate(backend_args): + if arg == "--port" and i + 1 < len(backend_args): + try: + port = int(backend_args[i + 1]) + except ValueError: + pass + break + + sys.exit(ProcessManager(port=port).run(backend_args)) if __name__ == "__main__":