diff --git a/.github/workflows/e2e.yml b/.github/workflows/e2e.yml
index ae948b8..6a3a7db 100644
--- a/.github/workflows/e2e.yml
+++ b/.github/workflows/e2e.yml
@@ -9,6 +9,10 @@ on:
 permissions:
   contents: read
 
+env:
+  # Ollama config for AI assistant tests
+  OLLAMA_MODELS: /home/runner/.ollama/models
+
 jobs:
   e2e:
     name: Playwright E2E
@@ -22,10 +26,39 @@ jobs:
       NEXT_PUBLIC_OIDC_PROVIDER_ID: oidc
       BETTER_AUTH_URL: http://localhost:3000
       BETTER_AUTH_SECRET: test-only-not-a-real-better-auth-secret
+      USE_OLLAMA: "true"
+      OLLAMA_MODEL: qwen2.5:1.5b
+      OLLAMA_BASE_URL: http://localhost:11434
     steps:
       - name: Checkout
         uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
 
+      - name: Cache Ollama binary
+        id: cache-ollama
+        uses: actions/cache@v4
+        with:
+          path: /usr/local/bin/ollama
+          key: ollama-binary-v0.13.5
+
+      - name: Cache Ollama models
+        uses: actions/cache@v4
+        with:
+          path: /home/runner/.ollama/models
+          key: ollama-qwen2.5-1.5b-v3
+
+      - name: Install Ollama
+        if: steps.cache-ollama.outputs.cache-hit != 'true'
+        run: curl -fsSL https://ollama.com/install.sh | sh
+
+      - name: Start Ollama server
+        run: |
+          sudo systemctl stop ollama 2>/dev/null || true
+          OLLAMA_MODELS=/home/runner/.ollama/models ollama serve &
+          sleep 3
+
+      - name: Pull Ollama model
+        run: OLLAMA_MODELS=/home/runner/.ollama/models ollama pull qwen2.5:1.5b
+
       - name: Setup
         uses: ./.github/actions/setup
 
diff --git a/package.json b/package.json
index 6ce1956..6d21cfb 100644
--- a/package.json
+++ b/package.json
@@ -16,6 +16,8 @@
     "test:e2e": "playwright test",
     "test:e2e:ui": "playwright test --ui",
     "test:e2e:debug": "playwright test --debug",
+    "test:e2e:ollama": "USE_OLLAMA=true OLLAMA_MODEL=qwen2.5:1.5b OLLAMA_BASE_URL=http://localhost:11434 playwright test",
+    "test:e2e:ollama:ui": "USE_OLLAMA=true OLLAMA_MODEL=qwen2.5:1.5b OLLAMA_BASE_URL=http://localhost:11434 playwright test --ui",
     "test:coverage": "vitest run --coverage",
     "type-check": "tsc --noEmit",
     "prepare": "husky",
@@ -47,6 +49,7 @@
     "@radix-ui/react-toggle-group": "^1.1.11",
     "@radix-ui/react-tooltip": "^1.2.8",
     "ai": "^5.0.114",
+    "ai-sdk-ollama": "^2.1.0",
     "ajv": "^8.17.1",
     "ajv-formats": "^3.0.1",
     "better-auth": "1.4.6",
diff --git a/playwright.config.mts b/playwright.config.mts
index ddaf463..93789ed 100644
--- a/playwright.config.mts
+++ b/playwright.config.mts
@@ -48,6 +48,11 @@ export default defineConfig({
           NEXT_PUBLIC_OIDC_PROVIDER_ID: "okta",
           BETTER_AUTH_URL: "http://localhost:3000",
           BETTER_AUTH_SECRET: "e2e-test-secret-at-least-32-chars-long",
+          // Always use Ollama for E2E tests to avoid needing OpenRouter API keys
+          USE_OLLAMA: "true",
+          OLLAMA_MODEL: process.env.OLLAMA_MODEL ?? "qwen2.5:1.5b",
+          OLLAMA_BASE_URL:
+            process.env.OLLAMA_BASE_URL ?? "http://localhost:11434",
         },
       },
 });
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
index 4015740..ffe0df6 100644
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -68,6 +68,9 @@ importers:
       ai:
         specifier: ^5.0.114
         version: 5.0.114(zod@4.1.12)
+      ai-sdk-ollama:
+        specifier: ^2.1.0
+        version: 2.1.0(ai@5.0.114(zod@4.1.12))(zod@4.1.12)
       ajv:
         specifier: ^8.17.1
         version: 8.17.1
@@ -2164,6 +2167,12 @@ packages:
     resolution: {integrity: sha512-MnA+YT8fwfJPgBx3m60MNqakm30XOkyIoH1y6huTQvC0PwZG7ki8NacLBcrPbNoo8vEZy7Jpuk7+jMO+CUovTQ==}
     engines: {node: '>= 14'}
 
+  ai-sdk-ollama@2.1.0:
+    resolution: {integrity: sha512-VRTT34zt6GG09IxdiO0aqOM6wBMgKj+oUQH2E+28XtPrHL2AV+L87UhU1VnGFfo+eXLMcZKJre5NWM9HMA5L5A==}
+    engines: {node: '>=22'}
+    peerDependencies:
+      ai: ^5.0.115
+
   ai@5.0.114:
     resolution: {integrity: sha512-q/lxcJA6avYn/TXTaE41VX6p9lN245mDU9bIGuPpfk6WxDMvmMoUKUIS0/aXAPYN3UmkUn/r9rvq/8C98RoCWw==}
     engines: {node: '>=18'}
@@ -3793,6 +3802,9 @@ packages:
   oidc-provider@9.6.0:
     resolution: {integrity: sha512-CCRUYPOumEy/DT+L86H40WgXjXfDHlsJYZdyd4ZKGFxJh/kAd7DxMX3dwpbX0g+WjB+NWU+kla1b/yZmHNcR0Q==}
 
+  ollama@0.6.3:
+    resolution: {integrity: sha512-KEWEhIqE5wtfzEIZbDCLH51VFZ6Z3ZSa6sIOg/E/tBV8S51flyqBOXi+bRxlOYKDf8i327zG9eSTb8IJxvm3Zg==}
+
   on-finished@2.4.1:
     resolution: {integrity: sha512-oVlzkg3ENAhCk2zdv7IJwd/QUD4z2RxRwpkcGY8psCVcCYZNq4wYnVWALHM+brtuJjePWiYF/ClmuDr8Ch5+kg==}
     engines: {node: '>= 0.8'}
@@ -4641,6 +4653,9 @@ packages:
     resolution: {integrity: sha512-6qN4hJdMwfYBtE3YBTTHhoeuUrDBPZmbQaxWAqSALV/MeEnR5z1xd8UKud2RAkFoPkmB+hli1TZSnyi84xz1vQ==}
     engines: {node: '>=18'}
 
+  whatwg-fetch@3.6.20:
+    resolution: {integrity: sha512-EqhiFU6daOA8kpjOWTL0olhVOF3i7OrFzSYiGsEMB8GcXS+RrzauAERX65xMeNWVqxA6HXH2m69Z9LaKKdisfg==}
+
   whatwg-mimetype@4.0.0:
     resolution: {integrity: sha512-QaKxh0eNIi2mE9p2vEdzfagOKHCcj1pJ56EEHGQOVxp8r9/iszLUUV7v89x9O1p/T+NlTM5W7jW6+cz4Fq1YVg==}
     engines: {node: '>=18'}
@@ -6479,6 +6494,15 @@ snapshots:
 
   agent-base@7.1.4: {}
 
+  ai-sdk-ollama@2.1.0(ai@5.0.114(zod@4.1.12))(zod@4.1.12):
+    dependencies:
+      '@ai-sdk/provider': 2.0.0
+      '@ai-sdk/provider-utils': 3.0.19(zod@4.1.12)
+      ai: 5.0.114(zod@4.1.12)
+      ollama: 0.6.3
+    transitivePeerDependencies:
+      - zod
+
   ai@5.0.114(zod@4.1.12):
     dependencies:
       '@ai-sdk/gateway': 2.0.21(zod@4.1.12)
@@ -8472,6 +8496,10 @@ snapshots:
     transitivePeerDependencies:
       - supports-color
 
+  ollama@0.6.3:
+    dependencies:
+      whatwg-fetch: 3.6.20
+
   on-finished@2.4.1:
     dependencies:
       ee-first: 1.1.1
@@ -9415,6 +9443,8 @@ snapshots:
     dependencies:
       iconv-lite: 0.6.3
 
+  whatwg-fetch@3.6.20: {}
+
   whatwg-mimetype@4.0.0: {}
 
   whatwg-url@15.1.0:
diff --git a/src/app/api/chat/route.ts b/src/app/api/chat/route.ts
index f839fe9..1bd2804 100644
--- a/src/app/api/chat/route.ts
+++ b/src/app/api/chat/route.ts
@@ -5,6 +5,7 @@ import {
   streamText,
   type ToolSet,
 } from "ai";
+import { ollama } from "ai-sdk-ollama";
 import { headers } from "next/headers";
 import { DEFAULT_MODEL } from "@/app/assistant/constants";
 import { getServers } from "@/app/catalog/actions";
@@ -16,6 +17,8 @@ import {
 } from "@/lib/mcp/client";
 import { SYSTEM_PROMPT } from "./system-prompt";
 
+const OLLAMA_MODEL = process.env.OLLAMA_MODEL ?? "qwen2.5:1.5b";
+
 export const maxDuration = 60;
 
 interface ConnectionResult {
@@ -196,19 +199,35 @@ export async function POST(req: Request) {
       ? requestedModel
       : DEFAULT_MODEL;
 
+  // Check if we should use Ollama (for E2E testing)
+  const useOllama = process.env.USE_OLLAMA === "true";
+
+  // Validate API key for production mode
   const apiKey = process.env.OPENROUTER_API_KEY;
-  if (!apiKey) {
+  if (!useOllama && !apiKey) {
     console.error("[Chat API] OPENROUTER_API_KEY not configured");
     return new Response("Service unavailable", { status: 503 });
   }
 
-  const { tools, clients, errors } = await getMcpTools({
-    selectedServers,
-    enabledTools: enabledToolsFromRequest,
-  });
+  // Create model - use Ollama for E2E tests, OpenRouter for production
+  const model = useOllama
+    ? ollama(OLLAMA_MODEL)
+    : createOpenRouter({ apiKey: apiKey as string })(modelId);
 
-  // If all servers failed to connect, return an error
-  if (Object.keys(tools).length === 0 && errors.length > 0) {
+  if (useOllama) {
+    console.log(`[Chat API] Using Ollama model: ${OLLAMA_MODEL}`);
+  }
+
+  // Skip MCP tool fetching in test mode (Ollama) to avoid connection errors
+  const { tools, clients, errors } = useOllama
+    ? { tools: {}, clients: [], errors: [] }
+    : await getMcpTools({
+        selectedServers,
+        enabledTools: enabledToolsFromRequest,
+      });
+
+  // If all servers failed to connect, return an error (skip in test mode)
+  if (!useOllama && Object.keys(tools).length === 0 && errors.length > 0) {
     const serverNames = errors.map((err) => err.serverName).join(", ");
     return new Response(
       `Unable to connect to ${serverNames} MCP servers. Please check that the servers are running and accessible.`,
@@ -219,9 +238,6 @@ export async function POST(req: Request) {
     );
   }
 
-  const provider = createOpenRouter({ apiKey });
-  const model = provider(modelId);
-
   const startTime = Date.now();
 
   const result = streamText({
@@ -231,6 +247,8 @@ export async function POST(req: Request) {
     toolChoice: "auto",
     stopWhen: stepCountIs(5), // Allow multiple steps for tool execution and response generation
     system: SYSTEM_PROMPT,
+    // Use low temperature for more deterministic responses in test mode
+    temperature: useOllama ? 0 : undefined,
     onFinish: async () => {
       // Close MCP clients
       for (const client of clients) {
@@ -248,8 +266,8 @@ export async function POST(req: Request) {
       if (part.type === "start") {
         return {
           createdAt: Date.now(),
-          model: modelId,
-          providerId: "openrouter",
+          model: useOllama ? OLLAMA_MODEL : modelId,
+          providerId: useOllama ? "ollama" : "openrouter",
         };
       }
       if (part.type === "finish") {
diff --git a/tests/e2e/assistant.spec.ts b/tests/e2e/assistant.spec.ts
new file mode 100644
index 0000000..5616bf4
--- /dev/null
+++ b/tests/e2e/assistant.spec.ts
@@ -0,0 +1,105 @@
+import { expect, test } from "./fixtures";
+
+const OLLAMA_BASE_URL = process.env.OLLAMA_BASE_URL ?? "http://localhost:11434";
+const OLLAMA_MODEL = process.env.OLLAMA_MODEL ?? "qwen2.5:1.5b";
+
+/**
+ * Warms up Ollama by making a simple generation request.
+ * This ensures the model is loaded into memory before running tests.
+ */
+async function warmupOllama(): Promise<void> {
+  const response = await fetch(`${OLLAMA_BASE_URL}/api/generate`, {
+    method: "POST",
+    headers: { "Content-Type": "application/json" },
+    body: JSON.stringify({
+      model: OLLAMA_MODEL,
+      prompt: "Say hello",
+      stream: false,
+    }),
+  });
+
+  if (!response.ok) {
+    throw new Error(
+      `Ollama warmup failed: ${response.status} ${response.statusText}`,
+    );
+  }
+
+  const result = await response.json();
+  console.log(`Ollama warmup complete. Model: ${OLLAMA_MODEL}`);
+  console.log(`Warmup response: ${result.response?.substring(0, 50)}...`);
+}
+
+test.describe("Assistant chat", () => {
+  // Triple all timeouts for this describe block since LLM operations are slow
+  test.slow();
+
+  // Warmup Ollama before running any tests in this describe block
+  test.beforeAll(async () => {
+    console.log("Warming up Ollama...");
+    const startTime = Date.now();
+
+    try {
+      await warmupOllama();
+      console.log(`Ollama warmup took ${Date.now() - startTime}ms`);
+    } catch (error) {
+      console.error("Ollama warmup failed:", error);
+      throw error;
+    }
+  });
+
+  test("responds to user message with expected content", async ({
+    authenticatedPage,
+  }) => {
+    // Use a unique identifier that we expect to appear in the response
+    const testUsername = `testuser_${Date.now()}`;
+
+    await authenticatedPage.goto("/assistant");
+
+    // Wait for the page to load
+    await expect(
+      authenticatedPage.getByPlaceholder(/type your message/i),
+    ).toBeVisible({ timeout: 10_000 });
+
+    // Type a message that includes the unique identifier
+    const textarea = authenticatedPage.getByPlaceholder(/type your message/i);
+    await textarea.fill(
+      `Reply with a short greeting for the user named '${testUsername}'. Include their exact username in your response.`,
+    );
+
+    // Submit the message
+    await authenticatedPage.keyboard.press("Enter");
+
+    // Wait for the assistant's response to appear
+    // The response should contain our unique username in a greeting
+    // Using a generous timeout since model inference can take time
+    // The regex matches any greeting pattern followed by the username
+    await expect(
+      authenticatedPage.getByText(
+        new RegExp(`(hello|hi|hey|greetings).*${testUsername}`, "i"),
+      ),
+    ).toBeVisible({
+      timeout: 60_000,
+    });
+  });
+
+  test("displays streaming response", async ({ authenticatedPage }) => {
+    await authenticatedPage.goto("/assistant");
+
+    await expect(
+      authenticatedPage.getByPlaceholder(/type your message/i),
+    ).toBeVisible({ timeout: 10_000 });
+
+    const textarea = authenticatedPage.getByPlaceholder(/type your message/i);
+    await textarea.fill("Count from 1 to 5, one number per line.");
+
+    await authenticatedPage.keyboard.press("Enter");
+
+    // Wait for the assistant's response containing numbers
+    // Look for a pattern that indicates the assistant has responded with numbers
+    await expect(
+      authenticatedPage.getByText(/[1-5].*[1-5]/), // At least two numbers in the response
+    ).toBeVisible({
+      timeout: 60_000,
+    });
+  });
+});