Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .env.example
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
OPENAI_API_KEY="sk-***"
LLAMACLOUD_API_KEY="llx-***"

# Regional Endpoint Configuration (Uncomment the appropriate line for your region)
# LLAMACLOUD_REGION="eu" # Europe

ELEVENLABS_API_KEY="sk_***"
pgql_db="postgres"
pgql_user="localhost"
Expand Down
5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,11 @@ Next, open the `.env` file and add your API keys:
- `ELEVENLABS_API_KEY`: find it [on ElevenLabs Settings](https://elevenlabs.io/app/settings/api-keys)
- `LLAMACLOUD_API_KEY`: find it [on LlamaCloud Dashboard](https://cloud.llamaindex.ai?utm_source=demo&utm_medium=notebookLM)

> **🌍 Regional Support**: LlamaCloud operates in multiple regions. If you're using a European region, configure it in your `.env` file:
>
> - For **North America**: This is the default region - no configuration necesary.
> - For **Europe (EU)**: Uncomment and set `LLAMACLOUD_REGION="eu"`

**4. Activate the Virtual Environment**

(on mac/unix)
Expand Down
22 changes: 14 additions & 8 deletions src/notebookllama/processing.py
Original file line number Diff line number Diff line change
@@ -1,30 +1,36 @@
import os
import sys
from dotenv import load_dotenv
import pandas as pd
import json
import os
import warnings
from datetime import datetime

from mrkdwn_analysis import MarkdownAnalyzer
from mrkdwn_analysis.markdown_analyzer import InlineParser, MarkdownParser
from llama_cloud_services import LlamaExtract, LlamaParse
from llama_cloud_services.extract import SourceText
from llama_cloud.client import AsyncLlamaCloud
from typing_extensions import override
from typing import List, Tuple, Union, Optional, Dict

sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))

from notebookllama.utils import (
create_llamacloud_client,
create_llama_extract_client,
create_llama_parse_client,
)

load_dotenv()

if (
os.getenv("LLAMACLOUD_API_KEY", None)
and os.getenv("EXTRACT_AGENT_ID", None)
and os.getenv("LLAMACLOUD_PIPELINE_ID", None)
):
CLIENT = AsyncLlamaCloud(token=os.getenv("LLAMACLOUD_API_KEY"))
EXTRACT_AGENT = LlamaExtract(api_key=os.getenv("LLAMACLOUD_API_KEY")).get_agent(
id=os.getenv("EXTRACT_AGENT_ID")
)
PARSER = LlamaParse(api_key=os.getenv("LLAMACLOUD_API_KEY"), result_type="markdown")
CLIENT = create_llamacloud_client()
llama_extract_client = create_llama_extract_client()
EXTRACT_AGENT = llama_extract_client.get_agent(id=os.getenv("EXTRACT_AGENT_ID"))
PARSER = create_llama_parse_client(result_type="markdown")
PIPELINE_ID = os.getenv("LLAMACLOUD_PIPELINE_ID")


Expand Down
18 changes: 13 additions & 5 deletions src/notebookllama/querying.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,16 @@
from dotenv import load_dotenv
import os
import sys
from dotenv import load_dotenv

from llama_index.core.query_engine import CitationQueryEngine
from llama_index.core.base.response.schema import Response
from llama_index.indices.managed.llama_cloud import LlamaCloudIndex
from llama_index.llms.openai import OpenAIResponses
from typing import Union, cast

sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))

from notebookllama.utils import create_llamacloud_index

load_dotenv()

if (
Expand All @@ -16,9 +20,13 @@
):
LLM = OpenAIResponses(model="gpt-4.1", api_key=os.getenv("OPENAI_API_KEY"))
PIPELINE_ID = os.getenv("LLAMACLOUD_PIPELINE_ID")
RETR = LlamaCloudIndex(
api_key=os.getenv("LLAMACLOUD_API_KEY"), pipeline_id=PIPELINE_ID
).as_retriever()
API_KEY = os.getenv("LLAMACLOUD_API_KEY")

if API_KEY is None or PIPELINE_ID is None:
raise ValueError("LLAMACLOUD_API_KEY and LLAMACLOUD_PIPELINE_ID must be set")

index = create_llamacloud_index(api_key=API_KEY, pipeline_id=PIPELINE_ID)
RETR = index.as_retriever()
QE = CitationQueryEngine(
retriever=RETR,
llm=LLM,
Expand Down
5 changes: 5 additions & 0 deletions src/notebookllama/server.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,14 @@
import os
import sys
from querying import query_index
from processing import process_file
from mindmap import get_mind_map
from fastmcp import FastMCP
from typing import List, Union, Literal

sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))


mcp: FastMCP = FastMCP(name="MCP For NotebookLM")


Expand Down
152 changes: 152 additions & 0 deletions src/notebookllama/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
import os
from typing import Optional, Dict, Any
from llama_cloud.client import AsyncLlamaCloud
from llama_cloud_services import LlamaExtract, LlamaParse
from llama_index.indices.managed.llama_cloud import LlamaCloudIndex

# LlamaCloud regional endpoints
LLAMACLOUD_REGIONS = {
"default": "https://api.cloud.llamaindex.ai", # North America (default)
"eu": "https://api.cloud.eu.llamaindex.ai", # Europe
}


class LlamaCloudConfigError(Exception):
"""Raised when LlamaCloud configuration is invalid."""

pass


def get_llamacloud_base_url() -> Optional[str]:
"""
Get the appropriate LlamaCloud base URL based on region configuration.

Defaults to North America region and returns the North America endpoint URL
when no region is specified.

Returns:
str: The base URL for LlamaCloud API

Raises:
LlamaCloudConfigError: If an invalid region is specified
"""
# Direct base URL override takes precedence
base_url = os.getenv("LLAMACLOUD_BASE_URL")
if base_url:
return base_url

region = os.getenv("LLAMACLOUD_REGION", "default").lower().strip()

if region not in LLAMACLOUD_REGIONS:
valid_regions = ", ".join(LLAMACLOUD_REGIONS.keys())
raise LlamaCloudConfigError(
f"Invalid LLAMACLOUD_REGION '{region}'. Supported regions: {valid_regions}"
)

return LLAMACLOUD_REGIONS[region]


def get_llamacloud_config() -> Dict[str, Any]:
"""
Get LlamaCloud configuration including base URL.

Returns:
dict: Configuration dictionary with token and optional base_url

Raises:
LlamaCloudConfigError: If API key is missing or region is invalid
"""
token = os.getenv("LLAMACLOUD_API_KEY")
if not token:
raise LlamaCloudConfigError(
"LLAMACLOUD_API_KEY environment variable is required"
)

config = {"token": token}

base_url = get_llamacloud_base_url()
if base_url:
config["base_url"] = base_url

return config


def create_llamacloud_client() -> AsyncLlamaCloud:
"""
Create a configured AsyncLlamaCloud client with regional support.

Returns:
AsyncLlamaCloud: Configured client instance

Raises:
LlamaCloudConfigError: If API key is missing or region is invalid
"""
config = get_llamacloud_config()
return AsyncLlamaCloud(**config)


def create_llama_extract_client() -> LlamaExtract:
"""
Create a configured LlamaExtract client with regional support.

Returns:
LlamaExtract: Configured client instance

Raises:
LlamaCloudConfigError: If API key is missing or region is invalid
"""
api_key = os.getenv("LLAMACLOUD_API_KEY")
if not api_key:
raise LlamaCloudConfigError(
"LLAMACLOUD_API_KEY environment variable is required"
)

base_url = get_llamacloud_base_url()
return LlamaExtract(api_key=api_key, base_url=base_url)


def create_llama_parse_client(result_type: str = "markdown") -> LlamaParse:
"""
Create a configured LlamaParse client with regional support.

Args:
result_type: The result type for parsing (default: "markdown")

Returns:
LlamaParse: Configured client instance

Raises:
LlamaCloudConfigError: If API key is missing or region is invalid
"""
api_key = os.getenv("LLAMACLOUD_API_KEY")
if not api_key:
raise LlamaCloudConfigError(
"LLAMACLOUD_API_KEY environment variable is required"
)

base_url = get_llamacloud_base_url()
return LlamaParse(api_key=api_key, result_type=result_type, base_url=base_url)


def create_llamacloud_index(api_key: str, pipeline_id: str) -> LlamaCloudIndex:
"""
Create a configured LlamaCloudIndex with regional support.

Args:
api_key: The API key for authentication
pipeline_id: The pipeline ID to use

Returns:
LlamaCloudIndex: Configured index instance

Raises:
LlamaCloudConfigError: If API key or pipeline_id is missing, or region is invalid
"""
if not api_key:
raise LlamaCloudConfigError("API key is required")

if not pipeline_id:
raise LlamaCloudConfigError("Pipeline ID is required")

base_url = get_llamacloud_base_url()
return LlamaCloudIndex(api_key=api_key, pipeline_id=pipeline_id, base_url=base_url)
Loading