stacklok · kantord · Dec 19, 2025 · Dec 19, 2025 · Dec 19, 2025 · Dec 19, 2025
diff --git a/.github/workflows/e2e.yml b/.github/workflows/e2e.yml
@@ -9,6 +9,10 @@ on:
 permissions:
   contents: read
 
+env:
+  # Ollama config for AI assistant tests
+  OLLAMA_MODELS: /home/runner/.ollama/models
+
 jobs:
   e2e:
     name: Playwright E2E
@@ -22,10 +26,39 @@ jobs:
       NEXT_PUBLIC_OIDC_PROVIDER_ID: oidc
       BETTER_AUTH_URL: http://localhost:3000
       BETTER_AUTH_SECRET: test-only-not-a-real-better-auth-secret
+      USE_OLLAMA: "true"
+      OLLAMA_MODEL: qwen2.5:1.5b
+      OLLAMA_BASE_URL: http://localhost:11434
     steps:
       - name: Checkout
         uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
 
+      - name: Cache Ollama binary
+        id: cache-ollama
+        uses: actions/cache@v4
+        with:
+          path: /usr/local/bin/ollama
+          key: ollama-binary-v0.13.5
+
+      - name: Cache Ollama models
+        uses: actions/cache@v4
+        with:
+          path: /home/runner/.ollama/models
+          key: ollama-qwen2.5-1.5b-v3
+
+      - name: Install Ollama
+        if: steps.cache-ollama.outputs.cache-hit != 'true'
+        run: curl -fsSL https://ollama.com/install.sh | sh
+
+      - name: Start Ollama server
+        run: |
+          sudo systemctl stop ollama 2>/dev/null || true
+          OLLAMA_MODELS=/home/runner/.ollama/models ollama serve &
+          sleep 3
+
+      - name: Pull Ollama model
+        run: OLLAMA_MODELS=/home/runner/.ollama/models ollama pull qwen2.5:1.5b
+
       - name: Setup
         uses: ./.github/actions/setup
 

diff --git a/package.json b/package.json
@@ -16,6 +16,8 @@
     "test:e2e": "playwright test",
     "test:e2e:ui": "playwright test --ui",
     "test:e2e:debug": "playwright test --debug",
+    "test:e2e:ollama": "USE_OLLAMA=true OLLAMA_MODEL=qwen2.5:1.5b OLLAMA_BASE_URL=http://localhost:11434 playwright test",
+    "test:e2e:ollama:ui": "USE_OLLAMA=true OLLAMA_MODEL=qwen2.5:1.5b OLLAMA_BASE_URL=http://localhost:11434 playwright test --ui",
     "test:coverage": "vitest run --coverage",
     "type-check": "tsc --noEmit",
     "prepare": "husky",
@@ -47,6 +49,7 @@
     "@radix-ui/react-toggle-group": "^1.1.11",
     "@radix-ui/react-tooltip": "^1.2.8",
     "ai": "^5.0.114",
+    "ai-sdk-ollama": "^2.1.0",
     "ajv": "^8.17.1",
     "ajv-formats": "^3.0.1",
     "better-auth": "1.4.6",

diff --git a/playwright.config.mts b/playwright.config.mts
@@ -48,6 +48,11 @@ export default defineConfig({
           NEXT_PUBLIC_OIDC_PROVIDER_ID: "okta",
           BETTER_AUTH_URL: "http://localhost:3000",
           BETTER_AUTH_SECRET: "e2e-test-secret-at-least-32-chars-long",
+          // Always use Ollama for E2E tests to avoid needing OpenRouter API keys
+          USE_OLLAMA: "true",
+          OLLAMA_MODEL: process.env.OLLAMA_MODEL ?? "qwen2.5:1.5b",
+          OLLAMA_BASE_URL:
+            process.env.OLLAMA_BASE_URL ?? "http://localhost:11434",
         },
       },
 });
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
diff --git a/src/app/api/chat/route.ts b/src/app/api/chat/route.ts
@@ -5,6 +5,7 @@ import {
   streamText,
   type ToolSet,
 } from "ai";
+import { ollama } from "ai-sdk-ollama";
 import { headers } from "next/headers";
 import { DEFAULT_MODEL } from "@/app/assistant/constants";
 import { getServers } from "@/app/catalog/actions";
@@ -16,6 +17,8 @@ import {
 } from "@/lib/mcp/client";
 import { SYSTEM_PROMPT } from "./system-prompt";
 
+const OLLAMA_MODEL = process.env.OLLAMA_MODEL ?? "qwen2.5:1.5b";
+
 export const maxDuration = 60;
 
 interface ConnectionResult {
@@ -196,19 +199,35 @@ export async function POST(req: Request) {
       ? requestedModel
       : DEFAULT_MODEL;
 
+  // Check if we should use Ollama (for E2E testing)
+  const useOllama = process.env.USE_OLLAMA === "true";
+
+  // Validate API key for production mode
   const apiKey = process.env.OPENROUTER_API_KEY;
-  if (!apiKey) {
+  if (!useOllama && !apiKey) {
     console.error("[Chat API] OPENROUTER_API_KEY not configured");
     return new Response("Service unavailable", { status: 503 });
   }
 
-  const { tools, clients, errors } = await getMcpTools({
-    selectedServers,
-    enabledTools: enabledToolsFromRequest,
-  });
+  // Create model - use Ollama for E2E tests, OpenRouter for production
+  const model = useOllama
+    ? ollama(OLLAMA_MODEL)
+    : createOpenRouter({ apiKey: apiKey as string })(modelId);
 
-  // If all servers failed to connect, return an error
-  if (Object.keys(tools).length === 0 && errors.length > 0) {
+  if (useOllama) {
+    console.log(`[Chat API] Using Ollama model: ${OLLAMA_MODEL}`);
+  }
+
+  // Skip MCP tool fetching in test mode (Ollama) to avoid connection errors
+  const { tools, clients, errors } = useOllama
+    ? { tools: {}, clients: [], errors: [] }
+    : await getMcpTools({
+        selectedServers,
+        enabledTools: enabledToolsFromRequest,
+      });
+
+  // If all servers failed to connect, return an error (skip in test mode)
+  if (!useOllama && Object.keys(tools).length === 0 && errors.length > 0) {
     const serverNames = errors.map((err) => err.serverName).join(", ");
     return new Response(
       `Unable to connect to ${serverNames} MCP servers. Please check that the servers are running and accessible.`,
@@ -219,9 +238,6 @@ export async function POST(req: Request) {
     );
   }
 
-  const provider = createOpenRouter({ apiKey });
-  const model = provider(modelId);
-
   const startTime = Date.now();
 
   const result = streamText({
@@ -231,6 +247,8 @@ export async function POST(req: Request) {
     toolChoice: "auto",
     stopWhen: stepCountIs(5), // Allow multiple steps for tool execution and response generation
     system: SYSTEM_PROMPT,
+    // Use low temperature for more deterministic responses in test mode
+    temperature: useOllama ? 0 : undefined,
     onFinish: async () => {
       // Close MCP clients
       for (const client of clients) {
@@ -248,8 +266,8 @@ export async function POST(req: Request) {
       if (part.type === "start") {
         return {
           createdAt: Date.now(),
-          model: modelId,
-          providerId: "openrouter",
+          model: useOllama ? OLLAMA_MODEL : modelId,
+          providerId: useOllama ? "ollama" : "openrouter",
         };
       }
       if (part.type === "finish") {

diff --git a/tests/e2e/assistant.spec.ts b/tests/e2e/assistant.spec.ts
@@ -0,0 +1,105 @@
+import { expect, test } from "./fixtures";
+
+const OLLAMA_BASE_URL = process.env.OLLAMA_BASE_URL ?? "http://localhost:11434";
+const OLLAMA_MODEL = process.env.OLLAMA_MODEL ?? "qwen2.5:1.5b";
+
+/**
+ * Warms up Ollama by making a simple generation request.
+ * This ensures the model is loaded into memory before running tests.
+ */
+async function warmupOllama(): Promise<void> {
+  const response = await fetch(`${OLLAMA_BASE_URL}/api/generate`, {
+    method: "POST",
+    headers: { "Content-Type": "application/json" },
+    body: JSON.stringify({
+      model: OLLAMA_MODEL,
+      prompt: "Say hello",
+      stream: false,
+    }),
+  });
+
+  if (!response.ok) {
+    throw new Error(
+      `Ollama warmup failed: ${response.status} ${response.statusText}`,
+    );
+  }
+
+  const result = await response.json();
+  console.log(`Ollama warmup complete. Model: ${OLLAMA_MODEL}`);
+  console.log(`Warmup response: ${result.response?.substring(0, 50)}...`);
+}
+
+test.describe("Assistant chat", () => {
+  // Triple all timeouts for this describe block since LLM operations are slow
+  test.slow();
+
+  // Warmup Ollama before running any tests in this describe block
+  test.beforeAll(async () => {
+    console.log("Warming up Ollama...");
+    const startTime = Date.now();
+
+    try {
+      await warmupOllama();
+      console.log(`Ollama warmup took ${Date.now() - startTime}ms`);
+    } catch (error) {
+      console.error("Ollama warmup failed:", error);
+      throw error;
+    }
+  });
+
+  test("responds to user message with expected content", async ({
+    authenticatedPage,
+  }) => {
+    // Use a unique identifier that we expect to appear in the response
+    const testUsername = `testuser_${Date.now()}`;
+
+    await authenticatedPage.goto("/assistant");
+
+    // Wait for the page to load
+    await expect(
+      authenticatedPage.getByPlaceholder(/type your message/i),
+    ).toBeVisible({ timeout: 10_000 });
+
+    // Type a message that includes the unique identifier
+    const textarea = authenticatedPage.getByPlaceholder(/type your message/i);
+    await textarea.fill(
+      `Reply with a short greeting for the user named '${testUsername}'. Include their exact username in your response.`,
+    );
+
+    // Submit the message
+    await authenticatedPage.keyboard.press("Enter");
+
+    // Wait for the assistant's response to appear
+    // The response should contain our unique username in a greeting
+    // Using a generous timeout since model inference can take time
+    // The regex matches any greeting pattern followed by the username
+    await expect(
+      authenticatedPage.getByText(
+        new RegExp(`(hello|hi|hey|greetings).*${testUsername}`, "i"),
+      ),
+    ).toBeVisible({
+      timeout: 60_000,
+    });
+  });
+
+  test("displays streaming response", async ({ authenticatedPage }) => {
+    await authenticatedPage.goto("/assistant");
+
+    await expect(
+      authenticatedPage.getByPlaceholder(/type your message/i),
+    ).toBeVisible({ timeout: 10_000 });
+
+    const textarea = authenticatedPage.getByPlaceholder(/type your message/i);
+    await textarea.fill("Count from 1 to 5, one number per line.");
+
+    await authenticatedPage.keyboard.press("Enter");
+
+    // Wait for the assistant's response containing numbers
+    // Look for a pattern that indicates the assistant has responded with numbers
+    await expect(
+      authenticatedPage.getByText(/[1-5].*[1-5]/), // At least two numbers in the response
+    ).toBeVisible({
+      timeout: 60_000,
+    });
+  });
+});