Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions .github/workflows/e2e.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@ on:
permissions:
contents: read

env:
# Ollama config for AI assistant tests
OLLAMA_MODELS: /home/runner/.ollama/models

jobs:
e2e:
name: Playwright E2E
Expand All @@ -22,10 +26,39 @@ jobs:
NEXT_PUBLIC_OIDC_PROVIDER_ID: oidc
BETTER_AUTH_URL: http://localhost:3000
BETTER_AUTH_SECRET: test-only-not-a-real-better-auth-secret
USE_OLLAMA: "true"
OLLAMA_MODEL: qwen2.5:1.5b
OLLAMA_BASE_URL: http://localhost:11434
steps:
- name: Checkout
uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4

- name: Cache Ollama binary
id: cache-ollama
uses: actions/cache@v4
with:
path: /usr/local/bin/ollama
key: ollama-binary-v0.13.5

- name: Cache Ollama models
uses: actions/cache@v4
with:
path: /home/runner/.ollama/models
key: ollama-qwen2.5-1.5b-v3

- name: Install Ollama
if: steps.cache-ollama.outputs.cache-hit != 'true'
run: curl -fsSL https://ollama.com/install.sh | sh

- name: Start Ollama server
run: |
sudo systemctl stop ollama 2>/dev/null || true
OLLAMA_MODELS=/home/runner/.ollama/models ollama serve &
sleep 3

- name: Pull Ollama model
run: OLLAMA_MODELS=/home/runner/.ollama/models ollama pull qwen2.5:1.5b

- name: Setup
uses: ./.github/actions/setup

Expand Down
3 changes: 3 additions & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
"test:e2e": "playwright test",
"test:e2e:ui": "playwright test --ui",
"test:e2e:debug": "playwright test --debug",
"test:e2e:ollama": "USE_OLLAMA=true OLLAMA_MODEL=qwen2.5:1.5b OLLAMA_BASE_URL=http://localhost:11434 playwright test",
"test:e2e:ollama:ui": "USE_OLLAMA=true OLLAMA_MODEL=qwen2.5:1.5b OLLAMA_BASE_URL=http://localhost:11434 playwright test --ui",
"test:coverage": "vitest run --coverage",
"type-check": "tsc --noEmit",
"prepare": "husky",
Expand Down Expand Up @@ -47,6 +49,7 @@
"@radix-ui/react-toggle-group": "^1.1.11",
"@radix-ui/react-tooltip": "^1.2.8",
"ai": "^5.0.114",
"ai-sdk-ollama": "^2.1.0",
"ajv": "^8.17.1",
"ajv-formats": "^3.0.1",
"better-auth": "1.4.6",
Expand Down
5 changes: 5 additions & 0 deletions playwright.config.mts
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,11 @@ export default defineConfig({
NEXT_PUBLIC_OIDC_PROVIDER_ID: "okta",
BETTER_AUTH_URL: "http://localhost:3000",
BETTER_AUTH_SECRET: "e2e-test-secret-at-least-32-chars-long",
// Always use Ollama for E2E tests to avoid needing OpenRouter API keys
USE_OLLAMA: "true",
OLLAMA_MODEL: process.env.OLLAMA_MODEL ?? "qwen2.5:1.5b",
OLLAMA_BASE_URL:
process.env.OLLAMA_BASE_URL ?? "http://localhost:11434",
},
},
});
30 changes: 30 additions & 0 deletions pnpm-lock.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

42 changes: 30 additions & 12 deletions src/app/api/chat/route.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import {
streamText,
type ToolSet,
} from "ai";
import { ollama } from "ai-sdk-ollama";
import { headers } from "next/headers";
import { DEFAULT_MODEL } from "@/app/assistant/constants";
import { getServers } from "@/app/catalog/actions";
Expand All @@ -16,6 +17,8 @@ import {
} from "@/lib/mcp/client";
import { SYSTEM_PROMPT } from "./system-prompt";

const OLLAMA_MODEL = process.env.OLLAMA_MODEL ?? "qwen2.5:1.5b";

export const maxDuration = 60;

interface ConnectionResult {
Expand Down Expand Up @@ -196,19 +199,35 @@ export async function POST(req: Request) {
? requestedModel
: DEFAULT_MODEL;

// Check if we should use Ollama (for E2E testing)
const useOllama = process.env.USE_OLLAMA === "true";

// Validate API key for production mode
const apiKey = process.env.OPENROUTER_API_KEY;
if (!apiKey) {
if (!useOllama && !apiKey) {
console.error("[Chat API] OPENROUTER_API_KEY not configured");
return new Response("Service unavailable", { status: 503 });
}

const { tools, clients, errors } = await getMcpTools({
selectedServers,
enabledTools: enabledToolsFromRequest,
});
// Create model - use Ollama for E2E tests, OpenRouter for production
const model = useOllama
? ollama(OLLAMA_MODEL)
: createOpenRouter({ apiKey: apiKey as string })(modelId);

// If all servers failed to connect, return an error
if (Object.keys(tools).length === 0 && errors.length > 0) {
if (useOllama) {
console.log(`[Chat API] Using Ollama model: ${OLLAMA_MODEL}`);
}

// Skip MCP tool fetching in test mode (Ollama) to avoid connection errors
const { tools, clients, errors } = useOllama
? { tools: {}, clients: [], errors: [] }
: await getMcpTools({
selectedServers,
enabledTools: enabledToolsFromRequest,
});

// If all servers failed to connect, return an error (skip in test mode)
if (!useOllama && Object.keys(tools).length === 0 && errors.length > 0) {
const serverNames = errors.map((err) => err.serverName).join(", ");
return new Response(
`Unable to connect to ${serverNames} MCP servers. Please check that the servers are running and accessible.`,
Expand All @@ -219,9 +238,6 @@ export async function POST(req: Request) {
);
}

const provider = createOpenRouter({ apiKey });
const model = provider(modelId);

const startTime = Date.now();

const result = streamText({
Expand All @@ -231,6 +247,8 @@ export async function POST(req: Request) {
toolChoice: "auto",
stopWhen: stepCountIs(5), // Allow multiple steps for tool execution and response generation
system: SYSTEM_PROMPT,
// Use low temperature for more deterministic responses in test mode
temperature: useOllama ? 0 : undefined,
onFinish: async () => {
// Close MCP clients
for (const client of clients) {
Expand All @@ -248,8 +266,8 @@ export async function POST(req: Request) {
if (part.type === "start") {
return {
createdAt: Date.now(),
model: modelId,
providerId: "openrouter",
model: useOllama ? OLLAMA_MODEL : modelId,
providerId: useOllama ? "ollama" : "openrouter",
};
}
if (part.type === "finish") {
Expand Down
105 changes: 105 additions & 0 deletions tests/e2e/assistant.spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
import { expect, test } from "./fixtures";

const OLLAMA_BASE_URL = process.env.OLLAMA_BASE_URL ?? "http://localhost:11434";
const OLLAMA_MODEL = process.env.OLLAMA_MODEL ?? "qwen2.5:1.5b";

/**
* Warms up Ollama by making a simple generation request.
* This ensures the model is loaded into memory before running tests.
*/
async function warmupOllama(): Promise<void> {
const response = await fetch(`${OLLAMA_BASE_URL}/api/generate`, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({
model: OLLAMA_MODEL,
prompt: "Say hello",
stream: false,
}),
});

if (!response.ok) {
throw new Error(
`Ollama warmup failed: ${response.status} ${response.statusText}`,
);
}

const result = await response.json();
console.log(`Ollama warmup complete. Model: ${OLLAMA_MODEL}`);
console.log(`Warmup response: ${result.response?.substring(0, 50)}...`);
}

test.describe("Assistant chat", () => {
// Triple all timeouts for this describe block since LLM operations are slow
test.slow();

// Warmup Ollama before running any tests in this describe block
test.beforeAll(async () => {

Check failure on line 37 in tests/e2e/assistant.spec.ts

View workflow job for this annotation

GitHub Actions / Playwright E2E

[chromium] › tests/e2e/assistant.spec.ts:85:7 › Assistant chat › displays streaming response

2) [chromium] › tests/e2e/assistant.spec.ts:85:7 › Assistant chat › displays streaming response ── Retry #2 ─────────────────────────────────────────────────────────────────────────────────────── "beforeAll" hook timeout of 30000ms exceeded. 35 | 36 | // Warmup Ollama before running any tests in this describe block > 37 | test.beforeAll(async () => { | ^ 38 | console.log("Warming up Ollama..."); 39 | const startTime = Date.now(); 40 | at /home/runner/work/toolhive-cloud-ui/toolhive-cloud-ui/tests/e2e/assistant.spec.ts:37:8

Check failure on line 37 in tests/e2e/assistant.spec.ts

View workflow job for this annotation

GitHub Actions / Playwright E2E

[chromium] › tests/e2e/assistant.spec.ts:50:7 › Assistant chat › responds to user message with expected content

1) [chromium] › tests/e2e/assistant.spec.ts:50:7 › Assistant chat › responds to user message with expected content Retry #2 ─────────────────────────────────────────────────────────────────────────────────────── "beforeAll" hook timeout of 30000ms exceeded. 35 | 36 | // Warmup Ollama before running any tests in this describe block > 37 | test.beforeAll(async () => { | ^ 38 | console.log("Warming up Ollama..."); 39 | const startTime = Date.now(); 40 | at /home/runner/work/toolhive-cloud-ui/toolhive-cloud-ui/tests/e2e/assistant.spec.ts:37:8

Check failure on line 37 in tests/e2e/assistant.spec.ts

View workflow job for this annotation

GitHub Actions / Playwright E2E

[chromium] › tests/e2e/assistant.spec.ts:50:7 › Assistant chat › responds to user message with expected content

1) [chromium] › tests/e2e/assistant.spec.ts:50:7 › Assistant chat › responds to user message with expected content Retry #1 ─────────────────────────────────────────────────────────────────────────────────────── "beforeAll" hook timeout of 30000ms exceeded. 35 | 36 | // Warmup Ollama before running any tests in this describe block > 37 | test.beforeAll(async () => { | ^ 38 | console.log("Warming up Ollama..."); 39 | const startTime = Date.now(); 40 | at /home/runner/work/toolhive-cloud-ui/toolhive-cloud-ui/tests/e2e/assistant.spec.ts:37:8

Check failure on line 37 in tests/e2e/assistant.spec.ts

View workflow job for this annotation

GitHub Actions / Playwright E2E

[chromium] › tests/e2e/assistant.spec.ts:85:7 › Assistant chat › displays streaming response

2) [chromium] › tests/e2e/assistant.spec.ts:85:7 › Assistant chat › displays streaming response ── Retry #2 ─────────────────────────────────────────────────────────────────────────────────────── "beforeAll" hook timeout of 30000ms exceeded. 35 | 36 | // Warmup Ollama before running any tests in this describe block > 37 | test.beforeAll(async () => { | ^ 38 | console.log("Warming up Ollama..."); 39 | const startTime = Date.now(); 40 | at /home/runner/work/toolhive-cloud-ui/toolhive-cloud-ui/tests/e2e/assistant.spec.ts:37:8

Check failure on line 37 in tests/e2e/assistant.spec.ts

View workflow job for this annotation

GitHub Actions / Playwright E2E

[chromium] › tests/e2e/assistant.spec.ts:50:7 › Assistant chat › responds to user message with expected content

1) [chromium] › tests/e2e/assistant.spec.ts:50:7 › Assistant chat › responds to user message with expected content Retry #2 ─────────────────────────────────────────────────────────────────────────────────────── "beforeAll" hook timeout of 30000ms exceeded. 35 | 36 | // Warmup Ollama before running any tests in this describe block > 37 | test.beforeAll(async () => { | ^ 38 | console.log("Warming up Ollama..."); 39 | const startTime = Date.now(); 40 | at /home/runner/work/toolhive-cloud-ui/toolhive-cloud-ui/tests/e2e/assistant.spec.ts:37:8

Check failure on line 37 in tests/e2e/assistant.spec.ts

View workflow job for this annotation

GitHub Actions / Playwright E2E

[chromium] › tests/e2e/assistant.spec.ts:50:7 › Assistant chat › responds to user message with expected content

1) [chromium] › tests/e2e/assistant.spec.ts:50:7 › Assistant chat › responds to user message with expected content Retry #1 ─────────────────────────────────────────────────────────────────────────────────────── "beforeAll" hook timeout of 30000ms exceeded. 35 | 36 | // Warmup Ollama before running any tests in this describe block > 37 | test.beforeAll(async () => { | ^ 38 | console.log("Warming up Ollama..."); 39 | const startTime = Date.now(); 40 | at /home/runner/work/toolhive-cloud-ui/toolhive-cloud-ui/tests/e2e/assistant.spec.ts:37:8
console.log("Warming up Ollama...");
const startTime = Date.now();

try {
await warmupOllama();
console.log(`Ollama warmup took ${Date.now() - startTime}ms`);
} catch (error) {
console.error("Ollama warmup failed:", error);
throw error;
}
});

test("responds to user message with expected content", async ({
authenticatedPage,
}) => {
// Use a unique identifier that we expect to appear in the response
const testUsername = `testuser_${Date.now()}`;

await authenticatedPage.goto("/assistant");

// Wait for the page to load
await expect(
authenticatedPage.getByPlaceholder(/type your message/i),
).toBeVisible({ timeout: 10_000 });

// Type a message that includes the unique identifier
const textarea = authenticatedPage.getByPlaceholder(/type your message/i);
await textarea.fill(
`Reply with a short greeting for the user named '${testUsername}'. Include their exact username in your response.`,
);

// Submit the message
await authenticatedPage.keyboard.press("Enter");

// Wait for the assistant's response to appear
// The response should contain our unique username in a greeting
// Using a generous timeout since model inference can take time
// The regex matches any greeting pattern followed by the username
await expect(
authenticatedPage.getByText(
new RegExp(`(hello|hi|hey|greetings).*${testUsername}`, "i"),
),
).toBeVisible({

Check failure on line 80 in tests/e2e/assistant.spec.ts

View workflow job for this annotation

GitHub Actions / Playwright E2E

[chromium] › tests/e2e/assistant.spec.ts:50:7 › Assistant chat › responds to user message with expected content

1) [chromium] › tests/e2e/assistant.spec.ts:50:7 › Assistant chat › responds to user message with expected content Error: expect(locator).toBeVisible() failed Locator: getByText(/(hello|hi|hey|greetings).*testuser_1766172013708/i) Expected: visible Timeout: 60000ms Error: element(s) not found Call log: - Expect "toBeVisible" with timeout 60000ms - waiting for getByText(/(hello|hi|hey|greetings).*testuser_1766172013708/i) 78 | new RegExp(`(hello|hi|hey|greetings).*${testUsername}`, "i"), 79 | ), > 80 | ).toBeVisible({ | ^ 81 | timeout: 60_000, 82 | }); 83 | }); at /home/runner/work/toolhive-cloud-ui/toolhive-cloud-ui/tests/e2e/assistant.spec.ts:80:7

Check failure on line 80 in tests/e2e/assistant.spec.ts

View workflow job for this annotation

GitHub Actions / Playwright E2E

[chromium] › tests/e2e/assistant.spec.ts:50:7 › Assistant chat › responds to user message with expected content

1) [chromium] › tests/e2e/assistant.spec.ts:50:7 › Assistant chat › responds to user message with expected content Error: expect(locator).toBeVisible() failed Locator: getByText(/(hello|hi|hey|greetings).*testuser_1766172012434/i) Expected: visible Timeout: 60000ms Error: element(s) not found Call log: - Expect "toBeVisible" with timeout 60000ms - waiting for getByText(/(hello|hi|hey|greetings).*testuser_1766172012434/i) 78 | new RegExp(`(hello|hi|hey|greetings).*${testUsername}`, "i"), 79 | ), > 80 | ).toBeVisible({ | ^ 81 | timeout: 60_000, 82 | }); 83 | }); at /home/runner/work/toolhive-cloud-ui/toolhive-cloud-ui/tests/e2e/assistant.spec.ts:80:7
timeout: 60_000,
});
});

test("displays streaming response", async ({ authenticatedPage }) => {
await authenticatedPage.goto("/assistant");

await expect(
authenticatedPage.getByPlaceholder(/type your message/i),
).toBeVisible({ timeout: 10_000 });

const textarea = authenticatedPage.getByPlaceholder(/type your message/i);
await textarea.fill("Count from 1 to 5, one number per line.");

await authenticatedPage.keyboard.press("Enter");

// Wait for the assistant's response containing numbers
// Look for a pattern that indicates the assistant has responded with numbers
await expect(
authenticatedPage.getByText(/[1-5].*[1-5]/), // At least two numbers in the response
).toBeVisible({
timeout: 60_000,
});
});
});
Loading