@@ -47,7 +47,7 @@ def _max_threads():
4747async def initialize_server_endpoint (
4848 threads : int = Query (1 , gt = 0 , le = os .cpu_count ()),
4949 ctx_size : int = Query (2048 , gt = 0 ),
50- port : int = Query (8081 , gt = 8080 , le = 65535 ),
50+ port : int = Query (8081 , gt = 8081 , le = 65535 ),
5151 system_prompt : str = Query ("You are a helpful assistant." , description = "Unique system prompt for this server instance" ),
5252 n_predict : int = Query (256 , gt = 0 , description = "Number of tokens to predict for the server instance." ),
5353 temperature : float = Query (0.8 , gt = 0.0 , le = 2.0 , description = "Temperature for sampling" )
@@ -263,7 +263,7 @@ async def chat_with_bitnet(
263263 proc = server_processes .get (key )
264264 cfg = server_configs .get (key )
265265 if not (proc and proc .poll () is None and cfg ):
266- raise HTTPException (status_code = 503 , detail = f"bitnet server not running on { host } : { chat .port } . Initialize it first ." )
266+ raise HTTPException (status_code = 404 , detail = f"Server on port { chat .port } not running or not configured ." )
267267 server_url = f"http://{ host } :{ chat .port } /completion"
268268 payload = {
269269 "prompt" : chat .message ,
@@ -272,34 +272,32 @@ async def chat_with_bitnet(
272272 "n_predict" : chat .n_predict ,
273273 "temperature" : chat .temperature
274274 }
275+ # Use httpx for async requests
275276 async def _chat ():
276277 async with httpx .AsyncClient () as client :
277278 try :
278- response = await client .post (server_url , json = payload , timeout = 180.0 )
279- response .raise_for_status ()
280- result_data = response .json ()
281- content = result_data .get ("content" , result_data )
282- return {"result" : content }
283- except httpx .TimeoutException :
284- raise HTTPException (status_code = 504 , detail = "Request to bitnet server timed out." )
279+ response = await client .post (server_url , json = payload , timeout = 60.0 ) # Increased timeout
280+ response .raise_for_status () # Raise an exception for bad status codes
281+ return response .json ()
282+ except httpx .ReadTimeout :
283+ raise HTTPException (status_code = 504 , detail = f"Request to BitNet server on port { chat .port } timed out." )
285284 except httpx .ConnectError :
286- raise HTTPException (status_code = 503 , detail = f"Could not connect to bitnet server at { server_url } . Is it running?" )
287- except httpx .RequestError as e :
288- raise HTTPException (status_code = 500 , detail = f"Error during request to bitnet server: { str (e )} " )
285+ raise HTTPException (status_code = 503 , detail = f"Could not connect to BitNet server on port { chat .port } ." )
289286 except httpx .HTTPStatusError as e :
290- error_detail = e .response .text or str (e )
291- raise HTTPException (status_code = e .response .status_code , detail = f"bitnet server returned error: { error_detail } " )
287+ raise HTTPException (status_code = e .response .status_code , detail = f"BitNet server error: { e .response .text } " )
292288 except Exception as e :
293- raise HTTPException (status_code = 500 , detail = f"Unexpected error during chat: { str (e )} " )
294- return _chat
289+ # Catch any other unexpected errors during the chat process
290+ error_detail = f"An unexpected error occurred while communicating with BitNet server on port { chat .port } : { str (e )} "
291+ raise HTTPException (status_code = 500 , detail = error_detail )
292+ return await _chat ()
295293
296294class MultiChatRequest (BaseModel ):
297295 requests : List [ChatRequest ]
298296
299297async def multichat_with_bitnet (multichat : MultiChatRequest ):
300298 async def run_chat (chat_req : ChatRequest ):
301299 chat_fn = chat_with_bitnet (chat_req )
302- return await chat_fn ()
300+ return await chat_fn
303301 results = await asyncio .gather (* (run_chat (req ) for req in multichat .requests ), return_exceptions = True )
304302 # Format results: if exception, return error message
305303 formatted = []
0 commit comments