From 73b84dcda192a7e083de8a7d3b7f277a69418920 Mon Sep 17 00:00:00 2001 From: Nick Galluzzo Date: Thu, 24 Jul 2025 18:37:49 +0700 Subject: [PATCH 1/6] fix: add regional support & refactors for DRY --- .env.example | 3 + README.md | 5 ++ src/notebookllama/processing.py | 17 +++-- src/notebookllama/querying.py | 8 ++- src/notebookllama/utils.py | 107 ++++++++++++++++++++++++++++ tools/create_llama_cloud_index.py | 11 +-- tools/create_llama_extract_agent.py | 4 +- 7 files changed, 138 insertions(+), 17 deletions(-) create mode 100644 src/notebookllama/utils.py diff --git a/.env.example b/.env.example index 34756db..bd3c4b3 100644 --- a/.env.example +++ b/.env.example @@ -1,5 +1,8 @@ OPENAI_API_KEY="sk-***" LLAMACLOUD_API_KEY="llx-***" +# Regional Endpoint Configuration (Uncomment the appropriate line for your region) +# LLAMACLOUD_REGION="us" # North America (default) +# LLAMACLOUD_REGION="eu" # Europe ELEVENLABS_API_KEY="sk_***" pgql_db="postgres" pgql_user="localhost" diff --git a/README.md b/README.md index 8ca3089..537c437 100644 --- a/README.md +++ b/README.md @@ -65,6 +65,11 @@ Next, open the `.env` file and add your API keys: - `ELEVENLABS_API_KEY`: find it [on ElevenLabs Settings](https://elevenlabs.io/app/settings/api-keys) - `LLAMACLOUD_API_KEY`: find it [on LlamaCloud Dashboard](https://cloud.llamaindex.ai?utm_source=demo&utm_medium=notebookLM) +> **🌍 Regional Support**: LlamaCloud operates in multiple regions. If you're using a non-US region, configure it in your `.env` file: +> +> - For **Europe (EU)**: Uncomment and set `LLAMACLOUD_REGION="eu"` +> - For **North America (US)**: Either leave it commented or set `LLAMACLOUD_REGION="us"` + **4. Activate the Virtual Environment** (on mac/unix) diff --git a/src/notebookllama/processing.py b/src/notebookllama/processing.py index a11cf25..37b98d6 100644 --- a/src/notebookllama/processing.py +++ b/src/notebookllama/processing.py @@ -7,12 +7,16 @@ from mrkdwn_analysis import MarkdownAnalyzer from mrkdwn_analysis.markdown_analyzer import InlineParser, MarkdownParser -from llama_cloud_services import LlamaExtract, LlamaParse from llama_cloud_services.extract import SourceText -from llama_cloud.client import AsyncLlamaCloud from typing_extensions import override from typing import List, Tuple, Union, Optional, Dict +from .utils import ( + create_llamacloud_client, + create_llama_extract_client, + create_llama_parse_client, +) + load_dotenv() if ( @@ -20,11 +24,10 @@ and os.getenv("EXTRACT_AGENT_ID", None) and os.getenv("LLAMACLOUD_PIPELINE_ID", None) ): - CLIENT = AsyncLlamaCloud(token=os.getenv("LLAMACLOUD_API_KEY")) - EXTRACT_AGENT = LlamaExtract(api_key=os.getenv("LLAMACLOUD_API_KEY")).get_agent( - id=os.getenv("EXTRACT_AGENT_ID") - ) - PARSER = LlamaParse(api_key=os.getenv("LLAMACLOUD_API_KEY"), result_type="markdown") + CLIENT = create_llamacloud_client() + llama_extract_client = create_llama_extract_client() + EXTRACT_AGENT = llama_extract_client.get_agent(id=os.getenv("EXTRACT_AGENT_ID")) + PARSER = create_llama_parse_client(result_type="markdown") PIPELINE_ID = os.getenv("LLAMACLOUD_PIPELINE_ID") diff --git a/src/notebookllama/querying.py b/src/notebookllama/querying.py index aab1596..ac40325 100644 --- a/src/notebookllama/querying.py +++ b/src/notebookllama/querying.py @@ -3,10 +3,11 @@ from llama_index.core.query_engine import CitationQueryEngine from llama_index.core.base.response.schema import Response -from llama_index.indices.managed.llama_cloud import LlamaCloudIndex from llama_index.llms.openai import OpenAIResponses from typing import Union, cast +from .utils import create_llamacloud_index + load_dotenv() if ( @@ -16,9 +17,10 @@ ): LLM = OpenAIResponses(model="gpt-4.1", api_key=os.getenv("OPENAI_API_KEY")) PIPELINE_ID = os.getenv("LLAMACLOUD_PIPELINE_ID") - RETR = LlamaCloudIndex( + index = create_llamacloud_index( api_key=os.getenv("LLAMACLOUD_API_KEY"), pipeline_id=PIPELINE_ID - ).as_retriever() + ) + RETR = index.as_retriever() QE = CitationQueryEngine( retriever=RETR, llm=LLM, diff --git a/src/notebookllama/utils.py b/src/notebookllama/utils.py new file mode 100644 index 0000000..182345a --- /dev/null +++ b/src/notebookllama/utils.py @@ -0,0 +1,107 @@ +import os +from typing import Optional, Dict, Any +from llama_cloud.client import AsyncLlamaCloud +from llama_cloud_services import LlamaExtract, LlamaParse +from llama_index.indices.managed.llama_cloud import LlamaCloudIndex + +# LlamaCloud regional endpoints +LLAMACLOUD_REGIONS = { + "us": "https://api.cloud.llamaindex.ai", + "eu": "https://api.cloud.eu.llamaindex.ai", +} + + +def get_llamacloud_base_url() -> Optional[str]: + """ + Get the appropriate LlamaCloud base URL based on region configuration. + + Returns: + str: The base URL for LlamaCloud API, or None if using default + """ + base_url = os.getenv("LLAMACLOUD_BASE_URL") + if base_url: + return base_url + + region = os.getenv("LLAMACLOUD_REGION", "").lower() + if region in LLAMACLOUD_REGIONS: + return LLAMACLOUD_REGIONS[region] + + return None + + +def get_llamacloud_config() -> Dict[str, Any]: + """ + Get LlamaCloud configuration including base URL. + + Returns: + dict: Configuration dictionary with token and optional base_url + """ + config = {"token": os.getenv("LLAMACLOUD_API_KEY")} + + base_url = get_llamacloud_base_url() + if base_url: + config["base_url"] = base_url + + return config + + +def create_llamacloud_client() -> AsyncLlamaCloud: + """ + Create a configured AsyncLlamaCloud client with regional support. + + Returns: + AsyncLlamaCloud: Configured client instance + """ + config = get_llamacloud_config() + return AsyncLlamaCloud(**config) + + +def create_llama_extract_client() -> LlamaExtract: + """ + Create a configured LlamaExtract client with regional support. + + Returns: + LlamaExtract: Configured client instance + """ + config = get_llamacloud_config() + return LlamaExtract(**config) + + +def create_llama_parse_client(result_type: str = "markdown") -> LlamaParse: + """ + Create a configured LlamaParse client with regional support. + + Args: + result_type: The result type for parsing (default: "markdown") + + Returns: + LlamaParse: Configured client instance + """ + config = get_llamacloud_config() + base_url = config.get("base_url") + if base_url: + return LlamaParse( + api_key=config["token"], result_type=result_type, base_url=base_url + ) + else: + return LlamaParse(api_key=config["token"], result_type=result_type) + + +def create_llamacloud_index(api_key: str, pipeline_id: str) -> LlamaCloudIndex: + """ + Create a configured LlamaCloudIndex with regional support. + + Args: + api_key: The API key for authentication + pipeline_id: The pipeline ID to use + + Returns: + LlamaCloudIndex: Configured index instance + """ + base_url = get_llamacloud_base_url() + if base_url: + return LlamaCloudIndex( + api_key=api_key, pipeline_id=pipeline_id, base_url=base_url + ) + else: + return LlamaCloudIndex(api_key=api_key, pipeline_id=pipeline_id) diff --git a/tools/create_llama_cloud_index.py b/tools/create_llama_cloud_index.py index 80a040e..33b0862 100644 --- a/tools/create_llama_cloud_index.py +++ b/tools/create_llama_cloud_index.py @@ -1,6 +1,10 @@ import os +import sys + +sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))) from dotenv import load_dotenv -from cli.embedding_app import EmbeddingSetupApp +from tools.cli.embedding_app import EmbeddingSetupApp +from src.notebookllama.utils import create_llamacloud_client from llama_cloud import ( PipelineTransformConfig_Advanced, @@ -8,7 +12,6 @@ AdvancedModeTransformConfigSegmentationConfig_Page, PipelineCreate, ) -from llama_cloud.client import LlamaCloud def main(): @@ -16,10 +19,8 @@ def main(): Create a new Llama Cloud index with the given embedding configuration. """ load_dotenv() - client = LlamaCloud(token=os.getenv("LLAMACLOUD_API_KEY")) + client = create_llamacloud_client() - # Run the embedding setup app to get the embedding configuration - # This prompts the user to select an embedding provider and configure the embedding model app = EmbeddingSetupApp() embedding_config = app.run() diff --git a/tools/create_llama_extract_agent.py b/tools/create_llama_extract_agent.py index 5908fad..62e688c 100644 --- a/tools/create_llama_extract_agent.py +++ b/tools/create_llama_extract_agent.py @@ -3,15 +3,15 @@ sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))) -from llama_cloud_services import LlamaExtract from src.notebookllama.models import Notebook +from src.notebookllama.utils import create_llama_extract_client from dotenv import load_dotenv load_dotenv() def main() -> int: - conn = LlamaExtract(api_key=os.getenv("LLAMACLOUD_API_KEY")) + conn = create_llama_extract_client() agent = conn.create_agent(name="q_and_a_agent", data_schema=Notebook) _id = agent.id with open(".env", "a") as f: From 872af68a39b8b0b5e1cd10f69a6dd0f66d8cd77c Mon Sep 17 00:00:00 2001 From: Nick Galluzzo Date: Thu, 24 Jul 2025 19:13:08 +0700 Subject: [PATCH 2/6] fix: resolve regional API support issues and improve code maintainability - Fix AsyncLlamaCloud initialization to use 'token' parameter instead of 'api_key' - Resolve async event loop conflicts in create_llama_cloud_index.py - Fix relative import issues by converting to absolute imports with proper sys.path handling - Centralize regional support logic in utils.py with dedicated client creation functions - Eliminate DRY violations by removing duplicated base_url handling across files - Improve maintainability with consistent parameter handling and import patterns - Add proper error handling for async operations within existing event loops From 9f1b0b785a6db61b430cd8b151f94f57933dbb46 Mon Sep 17 00:00:00 2001 From: Nick Galluzzo Date: Thu, 24 Jul 2025 19:14:12 +0700 Subject: [PATCH 3/6] fix: resolve regional API support issues and improve code maintainability --- src/notebookllama/processing.py | 7 +++++-- src/notebookllama/querying.py | 7 +++++-- src/notebookllama/server.py | 5 +++++ src/notebookllama/utils.py | 26 ++++++++++++++++---------- tools/create_llama_cloud_index.py | 8 ++++---- 5 files changed, 35 insertions(+), 18 deletions(-) diff --git a/src/notebookllama/processing.py b/src/notebookllama/processing.py index 37b98d6..c497d99 100644 --- a/src/notebookllama/processing.py +++ b/src/notebookllama/processing.py @@ -1,7 +1,8 @@ +import os +import sys from dotenv import load_dotenv import pandas as pd import json -import os import warnings from datetime import datetime @@ -11,7 +12,9 @@ from typing_extensions import override from typing import List, Tuple, Union, Optional, Dict -from .utils import ( +sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))) + +from notebookllama.utils import ( create_llamacloud_client, create_llama_extract_client, create_llama_parse_client, diff --git a/src/notebookllama/querying.py b/src/notebookllama/querying.py index ac40325..be636de 100644 --- a/src/notebookllama/querying.py +++ b/src/notebookllama/querying.py @@ -1,12 +1,15 @@ -from dotenv import load_dotenv import os +import sys +from dotenv import load_dotenv from llama_index.core.query_engine import CitationQueryEngine from llama_index.core.base.response.schema import Response from llama_index.llms.openai import OpenAIResponses from typing import Union, cast -from .utils import create_llamacloud_index +sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))) + +from notebookllama.utils import create_llamacloud_index load_dotenv() diff --git a/src/notebookllama/server.py b/src/notebookllama/server.py index f798384..b5d8ddc 100644 --- a/src/notebookllama/server.py +++ b/src/notebookllama/server.py @@ -1,9 +1,14 @@ +import os +import sys from querying import query_index from processing import process_file from mindmap import get_mind_map from fastmcp import FastMCP from typing import List, Union, Literal +sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))) + + mcp: FastMCP = FastMCP(name="MCP For NotebookLM") diff --git a/src/notebookllama/utils.py b/src/notebookllama/utils.py index 182345a..02768ef 100644 --- a/src/notebookllama/utils.py +++ b/src/notebookllama/utils.py @@ -52,8 +52,12 @@ def create_llamacloud_client() -> AsyncLlamaCloud: Returns: AsyncLlamaCloud: Configured client instance """ - config = get_llamacloud_config() - return AsyncLlamaCloud(**config) + token = os.getenv("LLAMACLOUD_API_KEY") + base_url = get_llamacloud_base_url() + if base_url: + return AsyncLlamaCloud(token=token, base_url=base_url) + else: + return AsyncLlamaCloud(token=token) def create_llama_extract_client() -> LlamaExtract: @@ -63,8 +67,12 @@ def create_llama_extract_client() -> LlamaExtract: Returns: LlamaExtract: Configured client instance """ - config = get_llamacloud_config() - return LlamaExtract(**config) + api_key = os.getenv("LLAMACLOUD_API_KEY") + base_url = get_llamacloud_base_url() + if base_url: + return LlamaExtract(api_key=api_key, base_url=base_url) + else: + return LlamaExtract(api_key=api_key) def create_llama_parse_client(result_type: str = "markdown") -> LlamaParse: @@ -77,14 +85,12 @@ def create_llama_parse_client(result_type: str = "markdown") -> LlamaParse: Returns: LlamaParse: Configured client instance """ - config = get_llamacloud_config() - base_url = config.get("base_url") + api_key = os.getenv("LLAMACLOUD_API_KEY") + base_url = get_llamacloud_base_url() if base_url: - return LlamaParse( - api_key=config["token"], result_type=result_type, base_url=base_url - ) + return LlamaParse(api_key=api_key, result_type=result_type, base_url=base_url) else: - return LlamaParse(api_key=config["token"], result_type=result_type) + return LlamaParse(api_key=api_key, result_type=result_type) def create_llamacloud_index(api_key: str, pipeline_id: str) -> LlamaCloudIndex: diff --git a/tools/create_llama_cloud_index.py b/tools/create_llama_cloud_index.py index 33b0862..2f98876 100644 --- a/tools/create_llama_cloud_index.py +++ b/tools/create_llama_cloud_index.py @@ -1,7 +1,5 @@ -import os -import sys +import asyncio -sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))) from dotenv import load_dotenv from tools.cli.embedding_app import EmbeddingSetupApp from src.notebookllama.utils import create_llamacloud_client @@ -46,7 +44,9 @@ def main(): transform_config=transform_config, ) - pipeline = client.pipelines.upsert_pipeline(request=pipeline_request) + pipeline = asyncio.run( + client.pipelines.upsert_pipeline(request=pipeline_request) + ) with open(".env", "a") as f: f.write(f'\nLLAMACLOUD_PIPELINE_ID="{pipeline.id}"') From b1bb47cff207521a7256020754220576fb646b53 Mon Sep 17 00:00:00 2001 From: Nick Galluzzo Date: Tue, 29 Jul 2025 11:48:38 +0700 Subject: [PATCH 4/6] fix: change file imports for accuracy in create_llama_cloud_index --- tools/create_llama_cloud_index.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tools/create_llama_cloud_index.py b/tools/create_llama_cloud_index.py index 2f98876..1e3f98f 100644 --- a/tools/create_llama_cloud_index.py +++ b/tools/create_llama_cloud_index.py @@ -1,7 +1,12 @@ import asyncio +import os +import sys + +sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))) + from dotenv import load_dotenv -from tools.cli.embedding_app import EmbeddingSetupApp +from cli.embedding_app import EmbeddingSetupApp from src.notebookllama.utils import create_llamacloud_client from llama_cloud import ( From a6ca566ca47c1a249f7456a3b9d7f514119d3450 Mon Sep 17 00:00:00 2001 From: Nick Galluzzo Date: Tue, 29 Jul 2025 13:06:07 +0700 Subject: [PATCH 5/6] chore: Change `us` regional definition to `default` --- .env.example | 3 +- README.md | 4 +- src/notebookllama/utils.py | 95 ++++++++---- tests/test_utils.py | 303 +++++++++++++++++++++++++++++++++++++ 4 files changed, 374 insertions(+), 31 deletions(-) diff --git a/.env.example b/.env.example index bd3c4b3..55371bc 100644 --- a/.env.example +++ b/.env.example @@ -1,8 +1,9 @@ OPENAI_API_KEY="sk-***" LLAMACLOUD_API_KEY="llx-***" + # Regional Endpoint Configuration (Uncomment the appropriate line for your region) -# LLAMACLOUD_REGION="us" # North America (default) # LLAMACLOUD_REGION="eu" # Europe + ELEVENLABS_API_KEY="sk_***" pgql_db="postgres" pgql_user="localhost" diff --git a/README.md b/README.md index 537c437..c68fe7b 100644 --- a/README.md +++ b/README.md @@ -65,10 +65,10 @@ Next, open the `.env` file and add your API keys: - `ELEVENLABS_API_KEY`: find it [on ElevenLabs Settings](https://elevenlabs.io/app/settings/api-keys) - `LLAMACLOUD_API_KEY`: find it [on LlamaCloud Dashboard](https://cloud.llamaindex.ai?utm_source=demo&utm_medium=notebookLM) -> **🌍 Regional Support**: LlamaCloud operates in multiple regions. If you're using a non-US region, configure it in your `.env` file: +> **🌍 Regional Support**: LlamaCloud operates in multiple regions. If you're using a European region, configure it in your `.env` file: > +> - For **North America**: This is the default region - no configuration necesary. > - For **Europe (EU)**: Uncomment and set `LLAMACLOUD_REGION="eu"` -> - For **North America (US)**: Either leave it commented or set `LLAMACLOUD_REGION="us"` **4. Activate the Virtual Environment** diff --git a/src/notebookllama/utils.py b/src/notebookllama/utils.py index 02768ef..e07bbd9 100644 --- a/src/notebookllama/utils.py +++ b/src/notebookllama/utils.py @@ -6,27 +6,44 @@ # LlamaCloud regional endpoints LLAMACLOUD_REGIONS = { - "us": "https://api.cloud.llamaindex.ai", - "eu": "https://api.cloud.eu.llamaindex.ai", + "default": "https://api.cloud.llamaindex.ai", # North America (default) + "eu": "https://api.cloud.eu.llamaindex.ai", # Europe } +class LlamaCloudConfigError(Exception): + """Raised when LlamaCloud configuration is invalid.""" + + pass + + def get_llamacloud_base_url() -> Optional[str]: """ Get the appropriate LlamaCloud base URL based on region configuration. + Defaults to North America region and returns the North America endpoint URL + when no region is specified. + Returns: - str: The base URL for LlamaCloud API, or None if using default + str: The base URL for LlamaCloud API + + Raises: + LlamaCloudConfigError: If an invalid region is specified """ + # Direct base URL override takes precedence base_url = os.getenv("LLAMACLOUD_BASE_URL") if base_url: return base_url - region = os.getenv("LLAMACLOUD_REGION", "").lower() - if region in LLAMACLOUD_REGIONS: - return LLAMACLOUD_REGIONS[region] + region = os.getenv("LLAMACLOUD_REGION", "default").lower().strip() + + if region not in LLAMACLOUD_REGIONS: + valid_regions = ", ".join(LLAMACLOUD_REGIONS.keys()) + raise LlamaCloudConfigError( + f"Invalid LLAMACLOUD_REGION '{region}'. Supported regions: {valid_regions}" + ) - return None + return LLAMACLOUD_REGIONS[region] def get_llamacloud_config() -> Dict[str, Any]: @@ -35,8 +52,17 @@ def get_llamacloud_config() -> Dict[str, Any]: Returns: dict: Configuration dictionary with token and optional base_url + + Raises: + LlamaCloudConfigError: If API key is missing or region is invalid """ - config = {"token": os.getenv("LLAMACLOUD_API_KEY")} + token = os.getenv("LLAMACLOUD_API_KEY") + if not token: + raise LlamaCloudConfigError( + "LLAMACLOUD_API_KEY environment variable is required" + ) + + config = {"token": token} base_url = get_llamacloud_base_url() if base_url: @@ -51,13 +77,12 @@ def create_llamacloud_client() -> AsyncLlamaCloud: Returns: AsyncLlamaCloud: Configured client instance + + Raises: + LlamaCloudConfigError: If API key is missing or region is invalid """ - token = os.getenv("LLAMACLOUD_API_KEY") - base_url = get_llamacloud_base_url() - if base_url: - return AsyncLlamaCloud(token=token, base_url=base_url) - else: - return AsyncLlamaCloud(token=token) + config = get_llamacloud_config() + return AsyncLlamaCloud(**config) def create_llama_extract_client() -> LlamaExtract: @@ -66,13 +91,18 @@ def create_llama_extract_client() -> LlamaExtract: Returns: LlamaExtract: Configured client instance + + Raises: + LlamaCloudConfigError: If API key is missing or region is invalid """ api_key = os.getenv("LLAMACLOUD_API_KEY") + if not api_key: + raise LlamaCloudConfigError( + "LLAMACLOUD_API_KEY environment variable is required" + ) + base_url = get_llamacloud_base_url() - if base_url: - return LlamaExtract(api_key=api_key, base_url=base_url) - else: - return LlamaExtract(api_key=api_key) + return LlamaExtract(api_key=api_key, base_url=base_url) def create_llama_parse_client(result_type: str = "markdown") -> LlamaParse: @@ -84,13 +114,18 @@ def create_llama_parse_client(result_type: str = "markdown") -> LlamaParse: Returns: LlamaParse: Configured client instance + + Raises: + LlamaCloudConfigError: If API key is missing or region is invalid """ api_key = os.getenv("LLAMACLOUD_API_KEY") + if not api_key: + raise LlamaCloudConfigError( + "LLAMACLOUD_API_KEY environment variable is required" + ) + base_url = get_llamacloud_base_url() - if base_url: - return LlamaParse(api_key=api_key, result_type=result_type, base_url=base_url) - else: - return LlamaParse(api_key=api_key, result_type=result_type) + return LlamaParse(api_key=api_key, result_type=result_type, base_url=base_url) def create_llamacloud_index(api_key: str, pipeline_id: str) -> LlamaCloudIndex: @@ -103,11 +138,15 @@ def create_llamacloud_index(api_key: str, pipeline_id: str) -> LlamaCloudIndex: Returns: LlamaCloudIndex: Configured index instance + + Raises: + LlamaCloudConfigError: If API key or pipeline_id is missing, or region is invalid """ + if not api_key: + raise LlamaCloudConfigError("API key is required") + + if not pipeline_id: + raise LlamaCloudConfigError("Pipeline ID is required") + base_url = get_llamacloud_base_url() - if base_url: - return LlamaCloudIndex( - api_key=api_key, pipeline_id=pipeline_id, base_url=base_url - ) - else: - return LlamaCloudIndex(api_key=api_key, pipeline_id=pipeline_id) + return LlamaCloudIndex(api_key=api_key, pipeline_id=pipeline_id, base_url=base_url) diff --git a/tests/test_utils.py b/tests/test_utils.py index 479b862..3c0b384 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -3,6 +3,7 @@ import pandas as pd from pathlib import Path from dotenv import load_dotenv +from unittest.mock import patch, MagicMock from typing import Callable from pydantic import ValidationError @@ -15,6 +16,16 @@ ) from src.notebookllama.mindmap import get_mind_map from src.notebookllama.models import Notebook +from src.notebookllama.utils import ( + get_llamacloud_base_url, + get_llamacloud_config, + create_llamacloud_client, + create_llama_extract_client, + create_llama_parse_client, + create_llamacloud_index, + LlamaCloudConfigError, + LLAMACLOUD_REGIONS, +) load_dotenv() @@ -189,3 +200,295 @@ def test_images_renaming(images_dir: str): with open(images_dir + "image.png", "wb") as wb: wb.write(bts) os.remove(image) + + +# ============================================================================= +# Regional LlamaCloud Utilities Tests +# ============================================================================= + + +class TestLlamaCloudRegionalUtils: + """Test suite for regional LlamaCloud utility functions.""" + + def test_llamacloud_regions_constant(self): + """Test that LLAMACLOUD_REGIONS contains expected regions.""" + assert "default" in LLAMACLOUD_REGIONS + assert "eu" in LLAMACLOUD_REGIONS + assert LLAMACLOUD_REGIONS["default"] == "https://api.cloud.llamaindex.ai" + assert LLAMACLOUD_REGIONS["eu"] == "https://api.cloud.eu.llamaindex.ai" + + @patch.dict(os.environ, {}, clear=True) + def test_get_llamacloud_base_url_no_region(self): + """Test get_llamacloud_base_url with no region set (defaults to North America).""" + result = get_llamacloud_base_url() + assert result == "https://api.cloud.llamaindex.ai" + + @patch.dict(os.environ, {"LLAMACLOUD_REGION": "eu"}) + def test_get_llamacloud_base_url_eu_region(self): + """Test get_llamacloud_base_url with EU region.""" + result = get_llamacloud_base_url() + assert result == "https://api.cloud.eu.llamaindex.ai" + + @patch.dict(os.environ, {"LLAMACLOUD_REGION": "default"}) + def test_get_llamacloud_base_url_default_region(self): + """Test get_llamacloud_base_url with default region.""" + result = get_llamacloud_base_url() + assert result == "https://api.cloud.llamaindex.ai" + + @patch.dict(os.environ, {"LLAMACLOUD_REGION": "DEFAULT"}) + def test_get_llamacloud_base_url_case_insensitive(self): + """Test get_llamacloud_base_url with case insensitive region.""" + result = get_llamacloud_base_url() + assert result == "https://api.cloud.llamaindex.ai" + + @patch.dict(os.environ, {"LLAMACLOUD_REGION": " default "}) + def test_get_llamacloud_base_url_strips_whitespace(self): + """Test get_llamacloud_base_url strips whitespace from region.""" + result = get_llamacloud_base_url() + assert result == "https://api.cloud.llamaindex.ai" + + @patch.dict(os.environ, {"LLAMACLOUD_BASE_URL": "https://custom.api.com"}) + def test_get_llamacloud_base_url_custom_override(self): + """Test get_llamacloud_base_url with custom base URL override.""" + result = get_llamacloud_base_url() + assert result == "https://custom.api.com" + + @patch.dict( + os.environ, + {"LLAMACLOUD_BASE_URL": "https://custom.api.com", "LLAMACLOUD_REGION": "eu"}, + ) + def test_get_llamacloud_base_url_custom_override_precedence(self): + """Test that custom base URL takes precedence over region.""" + result = get_llamacloud_base_url() + assert result == "https://custom.api.com" + + @patch.dict(os.environ, {"LLAMACLOUD_REGION": "invalid"}) + def test_get_llamacloud_base_url_invalid_region(self): + """Test get_llamacloud_base_url with invalid region raises error.""" + with pytest.raises(LlamaCloudConfigError) as exc_info: + get_llamacloud_base_url() + assert "Invalid LLAMACLOUD_REGION 'invalid'" in str(exc_info.value) + assert "default, eu" in str(exc_info.value) + + @patch.dict(os.environ, {"LLAMACLOUD_API_KEY": "test-key"}, clear=True) + def test_get_llamacloud_config_valid(self): + """Test get_llamacloud_config with valid API key (defaults to North America).""" + result = get_llamacloud_config() + expected = {"token": "test-key", "base_url": "https://api.cloud.llamaindex.ai"} + assert result == expected + + @patch.dict( + os.environ, + {"LLAMACLOUD_API_KEY": "test-key", "LLAMACLOUD_REGION": "eu"}, + clear=True, + ) + def test_get_llamacloud_config_with_region(self): + """Test get_llamacloud_config with region.""" + result = get_llamacloud_config() + expected = { + "token": "test-key", + "base_url": "https://api.cloud.eu.llamaindex.ai", + } + assert result == expected + + @patch.dict(os.environ, {}, clear=True) + def test_get_llamacloud_config_missing_api_key(self): + """Test get_llamacloud_config with missing API key raises error.""" + with pytest.raises(LlamaCloudConfigError): + get_llamacloud_config() + + @patch.dict( + os.environ, + {"LLAMACLOUD_API_KEY": "test-key", "LLAMACLOUD_REGION": "invalid"}, + clear=True, + ) + def test_get_llamacloud_config_invalid_region(self): + """Test get_llamacloud_config with invalid region raises error.""" + with pytest.raises(LlamaCloudConfigError): + get_llamacloud_config() + + @patch("src.notebookllama.utils.AsyncLlamaCloud") + @patch.dict(os.environ, {"LLAMACLOUD_API_KEY": "test-key"}, clear=True) + def test_create_llamacloud_client_valid(self, mock_client_class): + """Test create_llamacloud_client with valid configuration (defaults to North America).""" + mock_instance = MagicMock() + mock_client_class.return_value = mock_instance + + result = create_llamacloud_client() + + mock_client_class.assert_called_once_with( + token="test-key", base_url="https://api.cloud.llamaindex.ai" + ) + assert result == mock_instance + + @patch("src.notebookllama.utils.AsyncLlamaCloud") + @patch.dict( + os.environ, + {"LLAMACLOUD_API_KEY": "test-key", "LLAMACLOUD_REGION": "eu"}, + clear=True, + ) + def test_create_llamacloud_client_with_region(self, mock_client_class): + """Test create_llamacloud_client with region.""" + mock_instance = MagicMock() + mock_client_class.return_value = mock_instance + + result = create_llamacloud_client() + + mock_client_class.assert_called_once_with( + token="test-key", base_url="https://api.cloud.eu.llamaindex.ai" + ) + assert result == mock_instance + + @patch.dict(os.environ, {}, clear=True) + def test_create_llamacloud_client_missing_api_key(self): + """Test create_llamacloud_client with missing API key raises error.""" + with pytest.raises(LlamaCloudConfigError): + create_llamacloud_client() + + @patch("src.notebookllama.utils.LlamaExtract") + @patch.dict(os.environ, {"LLAMACLOUD_API_KEY": "test-key"}, clear=True) + def test_create_llama_extract_client_valid(self, mock_extract_class): + """Test create_llama_extract_client with valid configuration (defaults to North America).""" + mock_instance = MagicMock() + mock_extract_class.return_value = mock_instance + + result = create_llama_extract_client() + + mock_extract_class.assert_called_once_with( + api_key="test-key", base_url="https://api.cloud.llamaindex.ai" + ) + assert result == mock_instance + + @patch("src.notebookllama.utils.LlamaExtract") + @patch.dict( + os.environ, + {"LLAMACLOUD_API_KEY": "test-key", "LLAMACLOUD_REGION": "eu"}, + clear=True, + ) + def test_create_llama_extract_client_with_region(self, mock_extract_class): + """Test create_llama_extract_client with region.""" + mock_instance = MagicMock() + mock_extract_class.return_value = mock_instance + + result = create_llama_extract_client() + + mock_extract_class.assert_called_once_with( + api_key="test-key", base_url="https://api.cloud.eu.llamaindex.ai" + ) + assert result == mock_instance + + @patch.dict(os.environ, {}, clear=True) + def test_create_llama_extract_client_missing_api_key(self): + """Test create_llama_extract_client with missing API key raises error.""" + with pytest.raises(LlamaCloudConfigError): + create_llama_extract_client() + + @patch("src.notebookllama.utils.LlamaParse") + @patch.dict(os.environ, {"LLAMACLOUD_API_KEY": "test-key"}, clear=True) + def test_create_llama_parse_client_default(self, mock_parse_class): + """Test create_llama_parse_client with default parameters (defaults to North America).""" + mock_instance = MagicMock() + mock_parse_class.return_value = mock_instance + + result = create_llama_parse_client() + + mock_parse_class.assert_called_once_with( + api_key="test-key", + result_type="markdown", + base_url="https://api.cloud.llamaindex.ai", + ) + assert result == mock_instance + + @patch("src.notebookllama.utils.LlamaParse") + @patch.dict(os.environ, {"LLAMACLOUD_API_KEY": "test-key"}, clear=True) + def test_create_llama_parse_client_custom_result_type(self, mock_parse_class): + """Test create_llama_parse_client with custom result type (defaults to North America).""" + mock_instance = MagicMock() + mock_parse_class.return_value = mock_instance + + result = create_llama_parse_client(result_type="text") + + mock_parse_class.assert_called_once_with( + api_key="test-key", + result_type="text", + base_url="https://api.cloud.llamaindex.ai", + ) + assert result == mock_instance + + @patch("src.notebookllama.utils.LlamaParse") + @patch.dict( + os.environ, + {"LLAMACLOUD_API_KEY": "test-key", "LLAMACLOUD_REGION": "eu"}, + clear=True, + ) + def test_create_llama_parse_client_with_region(self, mock_parse_class): + """Test create_llama_parse_client with region.""" + mock_instance = MagicMock() + mock_parse_class.return_value = mock_instance + + result = create_llama_parse_client() + + mock_parse_class.assert_called_once_with( + api_key="test-key", + result_type="markdown", + base_url="https://api.cloud.eu.llamaindex.ai", + ) + assert result == mock_instance + + @patch.dict(os.environ, {}, clear=True) + def test_create_llama_parse_client_missing_api_key(self): + """Test create_llama_parse_client with missing API key raises error.""" + with pytest.raises(LlamaCloudConfigError): + create_llama_parse_client() + + @patch("src.notebookllama.utils.LlamaCloudIndex") + @patch.dict(os.environ, {}, clear=True) + def test_create_llamacloud_index_valid(self, mock_index_class): + """Test create_llamacloud_index with valid parameters (defaults to North America).""" + mock_instance = MagicMock() + mock_index_class.return_value = mock_instance + + result = create_llamacloud_index("test-key", "test-pipeline") + + mock_index_class.assert_called_once_with( + api_key="test-key", + pipeline_id="test-pipeline", + base_url="https://api.cloud.llamaindex.ai", + ) + assert result == mock_instance + + @patch("src.notebookllama.utils.LlamaCloudIndex") + @patch.dict(os.environ, {"LLAMACLOUD_REGION": "eu"}, clear=True) + def test_create_llamacloud_index_with_region(self, mock_index_class): + """Test create_llamacloud_index with region.""" + mock_instance = MagicMock() + mock_index_class.return_value = mock_instance + + result = create_llamacloud_index("test-key", "test-pipeline") + + mock_index_class.assert_called_once_with( + api_key="test-key", + pipeline_id="test-pipeline", + base_url="https://api.cloud.eu.llamaindex.ai", + ) + assert result == mock_instance + + @patch.dict(os.environ, {}, clear=True) + def test_create_llamacloud_index_missing_api_key(self): + """Test create_llamacloud_index with missing API key raises error.""" + with pytest.raises(LlamaCloudConfigError) as exc_info: + create_llamacloud_index("", "test-pipeline") + assert "API key is required" in str(exc_info.value) + + @patch.dict(os.environ, {}, clear=True) + def test_create_llamacloud_index_missing_pipeline_id(self): + """Test create_llamacloud_index with missing pipeline ID raises error.""" + with pytest.raises(LlamaCloudConfigError) as exc_info: + create_llamacloud_index("test-key", "") + assert "Pipeline ID is required" in str(exc_info.value) + + @patch.dict(os.environ, {"LLAMACLOUD_REGION": "invalid"}, clear=True) + def test_create_llamacloud_index_invalid_region(self): + """Test create_llamacloud_index with invalid region raises error.""" + with pytest.raises(LlamaCloudConfigError): + create_llamacloud_index("test-key", "test-pipeline") From d181908590a4cd3bac7cdff5994ecbbc7880ec61 Mon Sep 17 00:00:00 2001 From: Nick Galluzzo Date: Tue, 29 Jul 2025 13:21:12 +0700 Subject: [PATCH 6/6] fix: resolve type checking errors in querying.py - Ensure create_llamacloud_index is never passes `None` for `api_key` and `pipeline_id` --- src/notebookllama/querying.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/notebookllama/querying.py b/src/notebookllama/querying.py index be636de..cb31f9d 100644 --- a/src/notebookllama/querying.py +++ b/src/notebookllama/querying.py @@ -20,9 +20,12 @@ ): LLM = OpenAIResponses(model="gpt-4.1", api_key=os.getenv("OPENAI_API_KEY")) PIPELINE_ID = os.getenv("LLAMACLOUD_PIPELINE_ID") - index = create_llamacloud_index( - api_key=os.getenv("LLAMACLOUD_API_KEY"), pipeline_id=PIPELINE_ID - ) + API_KEY = os.getenv("LLAMACLOUD_API_KEY") + + if API_KEY is None or PIPELINE_ID is None: + raise ValueError("LLAMACLOUD_API_KEY and LLAMACLOUD_PIPELINE_ID must be set") + + index = create_llamacloud_index(api_key=API_KEY, pipeline_id=PIPELINE_ID) RETR = index.as_retriever() QE = CitationQueryEngine( retriever=RETR,