Skip to content

Commit 4426822

Browse files
anmarquesmarkurtz
authored andcommitted
Adds aiohttp backend
1 parent 30e874e commit 4426822

File tree

4 files changed

+167
-1
lines changed

4 files changed

+167
-1
lines changed

src/guidellm/backend/__init__.py

+2
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
11
from .base import Backend, BackendEngine, BackendEnginePublic, GenerativeResponse
22
from .openai import OpenAIBackend
3+
from .aiohttp import AiohttpBackend
34

45
__all__ = [
56
"Backend",
67
"BackendEngine",
78
"BackendEnginePublic",
89
"GenerativeResponse",
910
"OpenAIBackend",
11+
"AiohttpBackend"
1012
]

src/guidellm/backend/aiohttp.py

+160
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,160 @@
1+
from typing import AsyncGenerator, Dict, List, Optional
2+
from loguru import logger
3+
4+
import aiohttp
5+
import json
6+
7+
from guidellm.backend.base import Backend, GenerativeResponse
8+
from guidellm.config import settings
9+
from guidellm.core import TextGenerationRequest
10+
11+
__all__ = ["AiohttpBackend"]
12+
13+
@Backend.register("aiohttp_server")
14+
class AiohttpBackend(Backend):
15+
"""
16+
An aiohttp-based backend implementation for LLM requests.
17+
18+
This class provides an interface to communicate with a server hosting
19+
an LLM API using aiohttp for asynchronous requests.
20+
"""
21+
22+
def __init__(
23+
self,
24+
openai_api_key: Optional[str] = None,
25+
target: Optional[str] = None,
26+
model: Optional[str] = None,
27+
timeout: Optional[float] = None,
28+
**request_args,
29+
):
30+
self._request_args: Dict = request_args
31+
self._api_key: str = openai_api_key or settings.aiohttp.api_key
32+
33+
if not self._api_key:
34+
err = ValueError(
35+
"`GUIDELLM__AIOHTTP__API_KEY` environment variable or "
36+
"--openai-api-key CLI parameter must be specified for the "
37+
"aiohttp backend."
38+
)
39+
logger.error("{}", err)
40+
raise err
41+
42+
base_url = target or settings.aiohttp.base_url
43+
self._api_url = f"{base_url}/chat/completions"
44+
45+
if not base_url:
46+
err = ValueError(
47+
"`GUIDELLM__AIOHTTP__BASE_URL` environment variable or "
48+
"target parameter must be specified for the OpenAI backend."
49+
)
50+
logger.error("{}", err)
51+
raise err
52+
53+
self._timeout = aiohttp.ClientTimeout(total=timeout or settings.request_timeout)
54+
self._model = model
55+
56+
super().__init__(type_="aiohttp_backend", target=base_url, model=self._model)
57+
logger.info("aiohttp {} Backend listening on {}", self._model, base_url)
58+
59+
async def make_request(
60+
self,
61+
request: TextGenerationRequest,
62+
) -> AsyncGenerator[GenerativeResponse, None]:
63+
"""
64+
Make a request to the aiohttp backend.
65+
66+
Sends a prompt to the LLM server and streams the response tokens.
67+
68+
:param request: The text generation request to submit.
69+
:type request: TextGenerationRequest
70+
:yield: A stream of GenerativeResponse objects.
71+
:rtype: AsyncGenerator[GenerativeResponse, None]
72+
"""
73+
74+
async with aiohttp.ClientSession(timeout=self._timeout) as session:
75+
logger.debug("Making request to aiohttp backend with prompt: {}", request.prompt)
76+
77+
request_args = {}
78+
if request.output_token_count is not None:
79+
request_args.update(
80+
{
81+
"max_completion_tokens": request.output_token_count,
82+
"stop": None,
83+
"ignore_eos": True,
84+
}
85+
)
86+
elif settings.aiohttp.max_gen_tokens and settings.aiohttp.max_gen_tokens > 0:
87+
request_args.update(
88+
{
89+
"max_tokens": settings.aiohttp.max_gen_tokens,
90+
}
91+
)
92+
93+
request_args.update(self._request_args)
94+
95+
payload = {
96+
"model": self._model,
97+
"messages": [
98+
{"role": "user", "content": request.prompt},
99+
],
100+
"stream": True,
101+
**request_args,
102+
}
103+
104+
headers = {
105+
"Content-Type": "application/json",
106+
"Authorization": f"Bearer {self._api_key}",
107+
}
108+
109+
try:
110+
async with session.post(url=self._api_url, json=payload, headers=headers) as response:
111+
if response.status != 200:
112+
error_message = await response.text()
113+
logger.error("Request failed: {} - {}", response.status, error_message)
114+
raise Exception(f"Failed to generate response: {error_message}")
115+
116+
token_count = 0
117+
async for chunk_bytes in response.content:
118+
chunk_bytes = chunk_bytes.strip()
119+
if not chunk_bytes:
120+
continue
121+
122+
chunk = chunk_bytes.decode("utf-8").removeprefix("data: ")
123+
if chunk == "[DONE]":
124+
# Final response
125+
yield GenerativeResponse(
126+
type_="final",
127+
prompt=request.prompt,
128+
output_token_count=token_count,
129+
prompt_token_count=request.prompt_token_count,
130+
)
131+
else:
132+
# Intermediate token response
133+
token_count += 1
134+
data = json.loads(chunk)
135+
delta = data["choices"][0]["delta"]
136+
token = delta["content"]
137+
yield GenerativeResponse(
138+
type_="token_iter",
139+
add_token=token,
140+
prompt=request.prompt,
141+
output_token_count=token_count,
142+
prompt_token_count=request.prompt_token_count,
143+
)
144+
except Exception as e:
145+
logger.error("Error while making request: {}", e)
146+
raise
147+
148+
def available_models(self) -> List[str]:
149+
"""
150+
Retrieve a list of available models from the server.
151+
"""
152+
# This could include an API call to `self._api_url/models` if the server supports it.
153+
logger.warning("Fetching available models is not implemented for aiohttp backend.")
154+
return []
155+
156+
def validate_connection(self):
157+
"""
158+
Validate the connection to the backend server.
159+
"""
160+
logger.info("Connection validation is not explicitly implemented for aiohttp backend.")

src/guidellm/backend/base.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
__all__ = ["Backend", "BackendEngine", "BackendEnginePublic", "GenerativeResponse"]
1616

1717

18-
BackendEnginePublic = Literal["openai_server"]
18+
BackendEnginePublic = Literal["openai_server", "aiohttp_server"]
1919
BackendEngine = Union[BackendEnginePublic, Literal["test"]]
2020

2121

src/guidellm/config.py

+4
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,9 @@ class OpenAISettings(BaseModel):
109109
max_gen_tokens: int = 4096
110110

111111

112+
class AiohttpSettings(OpenAISettings):
113+
pass
114+
112115
class ReportGenerationSettings(BaseModel):
113116
"""
114117
Report generation settings for the application
@@ -153,6 +156,7 @@ class Settings(BaseSettings):
153156

154157
# Request settings
155158
openai: OpenAISettings = OpenAISettings()
159+
aiohttp: AiohttpSettings = AiohttpSettings()
156160

157161
# Report settings
158162
report_generation: ReportGenerationSettings = ReportGenerationSettings()

0 commit comments

Comments
 (0)