Skip to content

Commit 07d5aa3

Browse files
Using smaller container and mock model server
Signed-off-by: Martin Gaievski <[email protected]>
1 parent 3b1ecc0 commit 07d5aa3

File tree

5 files changed

+759
-75
lines changed

5 files changed

+759
-75
lines changed

.github/workflows/test-with-llm.yml

Lines changed: 151 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -7,85 +7,167 @@ on:
77
branches: [ main ]
88

99
jobs:
10-
test-with-localai:
10+
test-with-mock-llm:
1111
runs-on: ubuntu-latest
12-
13-
services:
14-
localai:
15-
image: quay.io/go-skynet/local-ai:v2.23.0
16-
ports:
17-
- 8080:8080
18-
env:
19-
MODELS_PATH: /models
20-
THREADS: 4
21-
CONTEXT_SIZE: 2048
22-
DEBUG: "true"
23-
options: >-
24-
--health-cmd="curl -f http://localhost:8080/readyz || exit 1"
25-
--health-interval=30s
26-
--health-timeout=10s
27-
--health-retries=5
28-
12+
2913
steps:
3014
- uses: actions/checkout@v4
31-
32-
- name: Set up JDK 17
15+
16+
- name: Set up JDK 21
3317
uses: actions/setup-java@v4
3418
with:
35-
java-version: '17'
19+
java-version: '21'
3620
distribution: 'temurin'
37-
21+
3822
- name: Setup Gradle
3923
uses: gradle/gradle-build-action@v2
40-
41-
- name: Download and configure LLM model
24+
25+
- name: Create Mock LLM Server
4226
run: |
43-
# Create model configuration for LocalAI
44-
mkdir -p models
45-
46-
# Download a small GGUF model (phi-2)
47-
curl -L "https://huggingface.co/TheBloke/phi-2-GGUF/resolve/main/phi-2.Q4_K_M.gguf" \
48-
-o models/phi-2.gguf
49-
50-
# Create model configuration
51-
cat > models/phi-2.yaml <<EOF
52-
name: phi-2
53-
backend: llama-cpp
54-
parameters:
55-
model: phi-2.gguf
56-
temperature: 0.1
57-
top_k: 40
58-
top_p: 0.95
59-
seed: -1
60-
mmap: true
61-
f16: true
62-
threads: 4
63-
batch: 512
64-
context_size: 2048
65-
template:
66-
chat_message: |
67-
<|im_start|>{role}
68-
{content}<|im_end|>
69-
chat: |
70-
{messages}
71-
<|im_start|>assistant
72-
completion: |
73-
{prompt}
27+
# Create a simple mock LLM server using Python
28+
cat > mock_llm_server.py <<'EOF'
29+
from http.server import HTTPServer, BaseHTTPRequestHandler
30+
import json
31+
import uuid
32+
from datetime import datetime
33+
34+
class MockLLMHandler(BaseHTTPRequestHandler):
35+
def do_GET(self):
36+
if self.path == '/v1/models':
37+
self.send_response(200)
38+
self.send_header('Content-Type', 'application/json')
39+
self.end_headers()
40+
response = {
41+
"data": [
42+
{
43+
"id": "mock-model",
44+
"object": "model",
45+
"created": int(datetime.now().timestamp()),
46+
"owned_by": "mock"
47+
}
48+
]
49+
}
50+
self.wfile.write(json.dumps(response).encode())
51+
else:
52+
self.send_response(404)
53+
self.end_headers()
54+
55+
def do_POST(self):
56+
content_length = int(self.headers['Content-Length'])
57+
post_data = self.rfile.read(content_length)
58+
59+
if self.path == '/v1/chat/completions':
60+
self.send_response(200)
61+
self.send_header('Content-Type', 'application/json')
62+
self.end_headers()
63+
64+
# Mock response for relevance evaluation
65+
response = {
66+
"id": f"chatcmpl-{uuid.uuid4().hex[:8]}",
67+
"object": "chat.completion",
68+
"created": int(datetime.now().timestamp()),
69+
"model": "mock-model",
70+
"choices": [
71+
{
72+
"index": 0,
73+
"message": {
74+
"role": "assistant",
75+
"content": "Based on the query and document, I would rate the relevance as 0.85 out of 1.0. The document directly addresses the query about smartphones with good cameras."
76+
},
77+
"finish_reason": "stop"
78+
}
79+
],
80+
"usage": {
81+
"prompt_tokens": 10,
82+
"completion_tokens": 20,
83+
"total_tokens": 30
84+
}
85+
}
86+
self.wfile.write(json.dumps(response).encode())
87+
else:
88+
self.send_response(404)
89+
self.end_headers()
90+
91+
def log_message(self, format, *args):
92+
# Suppress log messages
93+
pass
94+
95+
if __name__ == '__main__':
96+
server = HTTPServer(('localhost', 8080), MockLLMHandler)
97+
print("Mock LLM server started on http://localhost:8080")
98+
server.serve_forever()
7499
EOF
75-
76-
# Copy model to LocalAI container
77-
docker cp models/phi-2.gguf $(docker ps -q -f "ancestor=quay.io/go-skynet/local-ai:v2.23.0"):/models/
78-
docker cp models/phi-2.yaml $(docker ps -q -f "ancestor=quay.io/go-skynet/local-ai:v2.23.0"):/models/
79-
80-
# Wait for model to be loaded
81-
sleep 10
82-
83-
# Test LocalAI API
84-
curl -f http://localhost:8080/v1/models || exit 1
85-
86-
- name: Run integration tests with LLM
100+
101+
# Start the mock server in the background
102+
python3 mock_llm_server.py &
103+
MOCK_SERVER_PID=$!
104+
echo "MOCK_SERVER_PID=$MOCK_SERVER_PID" >> $GITHUB_ENV
105+
106+
# Wait for server to start
107+
sleep 2
108+
109+
# Test the mock server
110+
curl -s http://localhost:8080/v1/models | jq .
111+
112+
- name: Run integration tests with Mock LLM
87113
env:
88114
LOCALAI_API_URL: http://localhost:8080
89-
LLM_MODEL_NAME: phi-2
115+
LLM_MODEL_NAME: mock-model
116+
run: |
117+
./gradlew integTest --tests "*LLMJudgmentGenerationIT" -Dtests.cluster.llm.enabled=true
118+
119+
- name: Stop Mock Server
120+
if: always()
121+
run: |
122+
if [ ! -z "$MOCK_SERVER_PID" ]; then
123+
kill $MOCK_SERVER_PID || true
124+
fi
125+
126+
# Alternative job using llamafile (single binary, ~4GB but faster than LocalAI)
127+
test-with-llamafile:
128+
runs-on: ubuntu-latest
129+
# if: github.event_name == 'push' && github.ref == 'refs/heads/main'
130+
131+
steps:
132+
- uses: actions/checkout@v4
133+
134+
- name: Set up JDK 21
135+
uses: actions/setup-java@v4
136+
with:
137+
java-version: '21'
138+
distribution: 'temurin'
139+
140+
- name: Setup Llamafile
141+
run: |
142+
# Download TinyLlama llamafile (smallest available, ~600MB)
143+
wget -q https://huggingface.co/jartine/TinyLlama-1.1B-Chat-v1.0-GGUF/resolve/main/TinyLlama-1.1B-Chat-v1.0.Q4_K_M.llamafile
144+
chmod +x TinyLlama-1.1B-Chat-v1.0.Q4_K_M.llamafile
145+
146+
# Start llamafile server
147+
./TinyLlama-1.1B-Chat-v1.0.Q4_K_M.llamafile --server --port 8080 --nobrowser &
148+
LLAMAFILE_PID=$!
149+
echo "LLAMAFILE_PID=$LLAMAFILE_PID" >> $GITHUB_ENV
150+
151+
# Wait for server to start (may take a minute)
152+
echo "Waiting for llamafile to start..."
153+
for i in {1..60}; do
154+
if curl -s http://localhost:8080/v1/models > /dev/null; then
155+
echo "Llamafile started successfully"
156+
break
157+
fi
158+
sleep 2
159+
done
160+
161+
- name: Run integration tests with Llamafile
162+
env:
163+
LOCALAI_API_URL: http://localhost:8080
164+
LLM_MODEL_NAME: TinyLlama-1.1B-Chat-v1.0
165+
run: |
166+
./gradlew integTest --tests "*LLMJudgmentGenerationIT" -Dtests.cluster.llm.enabled=true
167+
168+
- name: Stop Llamafile
169+
if: always()
90170
run: |
91-
./gradlew integTest --tests "*LLMJudgmentIT" -Dtests.cluster.llm.enabled=true
171+
if [ ! -z "$LLAMAFILE_PID" ]; then
172+
kill $LLAMAFILE_PID || true
173+
fi

0 commit comments

Comments
 (0)