Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
137 changes: 134 additions & 3 deletions synthetic_data_kit/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,14 +99,41 @@ def system_check(
console.print("Install with: pip install openai>=1.0.0", style="yellow")
return 1

# Create client
# If no API key is provided but an api_base is configured (common for
# local Ollama or proxies), try a direct HTTP request first so the
# CLI doesn't require the OpenAI SDK or OPENAI_API_KEY.
if api_base and not api_key:
try:
messages = [{"role": "user", "content": "Hello"}]
payload = {
"model": model,
"messages": messages,
"temperature": 0.1,
}
resp = requests.post(f"{api_base}/chat/completions", json=payload, timeout=10)
if resp.status_code == 200:
console.print(f" API endpoint access confirmed via HTTP: {api_base}", style="green")
console.print(f"Default model: {model}", style="green")
# Try to parse response content if present
try:
j = resp.json()
console.print(f"Response from model: {j.get('choices',[{}])[0].get('message',{}).get('content')}", style="green")
except Exception:
pass
return 0
# If not 200, fallthrough to SDK-based attempt for more detailed error
except requests.exceptions.RequestException:
# Fall back to attempting with the SDK for a clearer error message
pass

# Create client (SDK path)
client_kwargs = {}
if api_key:
client_kwargs['api_key'] = api_key
if api_base:
client_kwargs['base_url'] = api_base
# Check API access

# Check API access using the OpenAI SDK
try:
client = OpenAI(**client_kwargs)
# Try a simple models request to check connectivity
Expand Down Expand Up @@ -134,6 +161,65 @@ def system_check(
except Exception as e:
console.print(f"L Error: {str(e)}", style="red")
return 1
elif selected_provider == "ollama":
# Get Ollama config
from synthetic_data_kit.utils.config import get_ollama_config
ollama_config = get_ollama_config(ctx.config)
api_base = api_base or ollama_config.get("api_base")
model = ollama_config.get("model")

with console.status(f"Checking Ollama server at {api_base}..."):
try:
# Check if Ollama server is running by hitting the tags endpoint
response = requests.get(f"{api_base}/api/tags", timeout=5)
if response.status_code == 200:
console.print(f" Ollama server is running at {api_base}", style="green")
tags_data = response.json()
if 'models' in tags_data and tags_data['models']:
console.print(f"Available models: {[m['name'] for m in tags_data['models']]}")
else:
console.print("No models found. Install a model with: ollama pull <model-name>", style="yellow")

# Test generation with a simple prompt
console.print(f"Testing generation with model: {model}", style="blue")
test_payload = {
"model": model,
"prompt": "Hello",
"stream": False,
"options": {
"temperature": 0.1,
"num_predict": 50
}
}

try:
gen_response = requests.post(f"{api_base}/api/generate", json=test_payload, timeout=30)
if gen_response.status_code == 200:
gen_data = gen_response.json()
if 'response' in gen_data:
console.print(f"Response from model: {gen_data['response'][:100]}...", style="green")
console.print(" Ollama generation test successful", style="green")
return 0
else:
console.print(f"Generation test failed with status: {gen_response.status_code}", style="yellow")
console.print(f"Response: {gen_response.text}", style="yellow")
except requests.exceptions.RequestException as e:
console.print(f"Generation test failed: {str(e)}", style="yellow")

# Server is running but generation failed - still consider it partially successful
return 0
else:
console.print(f"L Ollama server is not available at {api_base}", style="red")
console.print(f"Error: Server returned status code: {response.status_code}")
except requests.exceptions.RequestException as e:
console.print(f"L Ollama server is not available at {api_base}", style="red")
console.print(f"Error: {str(e)}")

# Show instruction to start the server
console.print("\nTo start Ollama, run:", style="yellow")
console.print("ollama serve", style="bold blue")
console.print(f"Then install a model: ollama pull {model}", style="bold blue")
return 1
else:
# Default to vLLM
# Get vLLM server details
Expand Down Expand Up @@ -338,6 +424,26 @@ def create(
api_base = api_base or api_endpoint_config.get("api_base")
model = model or api_endpoint_config.get("model")
# No server check needed for API endpoint
elif provider == "ollama":
# Use Ollama config
from synthetic_data_kit.utils.config import get_ollama_config
ollama_config = get_ollama_config(ctx.config)
api_base = api_base or ollama_config.get("api_base")
model = model or ollama_config.get("model")

# Check Ollama server availability
try:
response = requests.get(f"{api_base}/api/tags", timeout=2)
if response.status_code != 200:
console.print(f"❌ Error: Ollama server not available at {api_base}", style="red")
console.print("Please start the Ollama server with:", style="yellow")
console.print(f"ollama serve", style="bold blue")
return 1
except requests.exceptions.RequestException:
console.print(f"❌ Error: Ollama server not available at {api_base}", style="red")
console.print("Please start the Ollama server with:", style="yellow")
console.print(f"ollama serve", style="bold blue")
return 1
else:
# Use vLLM config
vllm_config = get_vllm_config(ctx.config)
Expand Down Expand Up @@ -498,6 +604,26 @@ def curate(
api_base = api_base or api_endpoint_config.get("api_base")
model = model or api_endpoint_config.get("model")
# No server check needed for API endpoint
elif provider == "ollama":
# Use Ollama config
from synthetic_data_kit.utils.config import get_ollama_config
ollama_config = get_ollama_config(ctx.config)
api_base = api_base or ollama_config.get("api_base")
model = model or ollama_config.get("model")

# Check Ollama server availability
try:
response = requests.get(f"{api_base}/api/tags", timeout=2)
if response.status_code != 200:
console.print(f"❌ Error: Ollama server not available at {api_base}", style="red")
console.print("Please start the Ollama server with:", style="yellow")
console.print(f"ollama serve", style="bold blue")
return 1
except requests.exceptions.RequestException:
console.print(f"❌ Error: Ollama server not available at {api_base}", style="red")
console.print("Please start the Ollama server with:", style="yellow")
console.print(f"ollama serve", style="bold blue")
return 1
else:
# Use vLLM config
vllm_config = get_vllm_config(ctx.config)
Expand All @@ -517,6 +643,11 @@ def curate(
console.print("Please start the VLLM server with:", style="yellow")
console.print(f"vllm serve {model}", style="bold blue")
return 1
except requests.exceptions.RequestException:
console.print(f"❌ Error: VLLM server not available at {api_base}", style="red")
console.print("Please start the VLLM server with:", style="yellow")
console.print(f"vllm serve {model}", style="bold blue")
return 1
Comment on lines +646 to +650
Copy link

Copilot AI Oct 25, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This new exception handler for RequestException in the vLLM section will never execute because it appears after the return statement at line 645. The existing try-except block already handles this case earlier in the code, making this block unreachable dead code. Remove lines 646-650.

Suggested change
except requests.exceptions.RequestException:
console.print(f"❌ Error: VLLM server not available at {api_base}", style="red")
console.print("Please start the VLLM server with:", style="yellow")
console.print(f"vllm serve {model}", style="bold blue")
return 1

Copilot uses AI. Check for mistakes.

try:
# Check if input is a directory
Expand Down
12 changes: 10 additions & 2 deletions synthetic_data_kit/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@ paths:

# LLM Provider configuration
llm:
# Provider selection: "vllm" or "api-endpoint"
provider: "api-endpoint"
# Provider selection: "vllm", "api-endpoint", or "ollama"
provider: "ollama"

# VLLM server configuration
vllm:
Expand All @@ -33,6 +33,14 @@ api-endpoint:
max_retries: 3 # Number of retries for API calls
retry_delay: 1.0 # Initial delay between retries (seconds)

# Ollama configuration
ollama:
api_base: "http://localhost:11434" # Base URL for Ollama API
model: "phi4:latest" # Default model to use
max_retries: 3 # Number of retries for API calls
retry_delay: 1.0 # Initial delay between retries (seconds)
sleep_time: 0.1 # Sleep time between batch requests

# Ingest configuration
ingest:
default_format: "txt" # Default output format for parsed files
Expand Down
Loading