Skip to content

Commit cc0e507

Browse files
authored
openai compatible chat api (#441)
1 parent 0063302 commit cc0e507

File tree

10 files changed

+350
-187
lines changed

10 files changed

+350
-187
lines changed

modelscope_agent/agents/role_play.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,7 @@ def _run(self,
153153
lang: str = 'zh',
154154
**kwargs):
155155

156-
chat_mode = kwargs.get('chat_mode', False)
156+
chat_mode = kwargs.pop('chat_mode', False)
157157
tools = kwargs.get('tools', None)
158158
tool_choice = kwargs.get('tool_choice', 'auto')
159159

modelscope_agent/llm/base.py

+15
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,8 @@ def __init__(self,
4545
self.model_server = model_server
4646
self.max_length = 6000
4747

48+
self.last_call_usage_info = {}
49+
4850
# It is okay to use the same code to handle the output
4951
# regardless of whether stream is True or False, as follows:
5052
# ```py
@@ -239,3 +241,16 @@ def check_max_length(self, messages: Union[List[Dict], str]) -> bool:
239241

240242
def get_max_length(self) -> int:
241243
return self.max_length
244+
245+
def get_usage(self) -> Dict:
246+
return self.last_call_usage_info
247+
248+
def stat_last_call_token_info(self, response):
249+
try:
250+
self.last_call_usage_info = response.usage.dict()
251+
return response
252+
except AttributeError:
253+
for chunk in response:
254+
if hasattr(chunk, 'usage') and chunk.usage is not None:
255+
self.last_call_usage_info = chunk.usage.dict()
256+
yield chunk

modelscope_agent/llm/dashscope.py

+21
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,8 @@ def _chat_stream(self,
101101
if kwargs.get('seed', None):
102102
generation_input['seed'] = kwargs.get('seed')
103103
response = dashscope.Generation.call(**generation_input)
104+
print(response)
105+
response = self.stat_last_call_token_info(response)
104106
return stream_output(response, **kwargs)
105107

106108
def _chat_no_stream(self,
@@ -119,6 +121,7 @@ def _chat_no_stream(self,
119121
top_p=top_p,
120122
)
121123
if response.status_code == HTTPStatus.OK:
124+
self.stat_last_call_token_info(response)
122125
return response.output.choices[0].message.content
123126
else:
124127
err = 'Error code: %s, error message: %s' % (
@@ -127,6 +130,24 @@ def _chat_no_stream(self,
127130
)
128131
return err
129132

133+
def stat_last_call_token_info(self, response):
134+
try:
135+
self.last_call_usage_info = {
136+
'prompt_tokens': response.usage.input_tokens,
137+
'completion_tokens': response.usage.output_tokens,
138+
'total_tokens': response.usage.total_tokens
139+
}
140+
return response
141+
except AttributeError:
142+
for chunk in response:
143+
# if hasattr(chunk.output, 'usage'):
144+
self.last_call_usage_info = {
145+
'prompt_tokens': chunk.usage.input_tokens,
146+
'completion_tokens': chunk.usage.output_tokens,
147+
'total_tokens': chunk.usage.total_tokens
148+
}
149+
yield chunk
150+
130151

131152
@register_llm('dashscope_qwen')
132153
@register_llm('dashscope_qwen1.5')

modelscope_agent/llm/ollama.py

+28
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ def __init__(self, model: str, model_server: str, **kwargs):
1515
host = kwargs.get('host', 'http://localhost:11434')
1616
self.client = ollama.Client(host=host)
1717
self.model = model
18+
self.client.pull(self.model)
1819

1920
def _chat_stream(self,
2021
messages: List[Dict],
@@ -25,6 +26,7 @@ def _chat_stream(self,
2526
f'stop: {str(stop)}, stream: True, args: {str(kwargs)}')
2627
stream = self.client.chat(
2728
model=self.model, messages=messages, stream=True)
29+
stream = self.stat_last_call_token_info(stream)
2830
for chunk in stream:
2931
tmp_content = chunk['message']['content']
3032
logger.info(f'call ollama success, output: {tmp_content}')
@@ -40,6 +42,7 @@ def _chat_no_stream(self,
4042
f'call ollama, model: {self.model}, messages: {str(messages)}, '
4143
f'stop: {str(stop)}, stream: False, args: {str(kwargs)}')
4244
response = self.client.chat(model=self.model, messages=messages)
45+
self.stat_last_call_token_info(response)
4346
final_content = response['message']['content']
4447
logger.info(f'call ollama success, output: {final_content}')
4548
return final_content
@@ -83,3 +86,28 @@ def chat(self,
8386
messages = [{'role': 'user', 'content': prompt}]
8487
return super().chat(
8588
messages=messages, stop=stop, stream=stream, **kwargs)
89+
90+
def stat_last_call_token_info(self, response):
91+
try:
92+
self.last_call_usage_info = {
93+
'prompt_tokens':
94+
response.get('prompt_eval_count', -1),
95+
'completion_tokens':
96+
response.get('eval_count', -1),
97+
'total_tokens':
98+
response.get('prompt_eval_count') + response.get('eval_count')
99+
}
100+
return response
101+
except AttributeError:
102+
for chunk in response:
103+
# if hasattr(chunk.output, 'usage'):
104+
self.last_call_usage_info = {
105+
'prompt_tokens':
106+
chunk.get('prompt_eval_count', -1),
107+
'completion_tokens':
108+
chunk.get('eval_count', -1),
109+
'total_tokens':
110+
chunk.get('prompt_eval_count', -1)
111+
+ chunk.get('eval_count', -1)
112+
}
113+
yield chunk

modelscope_agent/llm/openai.py

+7-2
Original file line numberDiff line numberDiff line change
@@ -36,17 +36,21 @@ def _chat_stream(self,
3636
logger.info(
3737
f'call openai api, model: {self.model}, messages: {str(messages)}, '
3838
f'stop: {str(stop)}, stream: True, args: {str(kwargs)}')
39+
stream_options = {'include_usage': True}
3940
response = self.client.chat.completions.create(
4041
model=self.model,
4142
messages=messages,
4243
stop=stop,
4344
stream=True,
45+
stream_options=stream_options,
4446
**kwargs)
47+
response = self.stat_last_call_token_info(response)
4548
# TODO: error handling
4649
for chunk in response:
4750
# sometimes delta.content is None by vllm, we should not yield None
48-
if hasattr(chunk.choices[0].delta,
49-
'content') and chunk.choices[0].delta.content:
51+
if len(chunk.choices) > 0 and hasattr(
52+
chunk.choices[0].delta,
53+
'content') and chunk.choices[0].delta.content:
5054
logger.info(
5155
f'call openai api success, output: {chunk.choices[0].delta.content}'
5256
)
@@ -66,6 +70,7 @@ def _chat_no_stream(self,
6670
stop=stop,
6771
stream=False,
6872
**kwargs)
73+
self.stat_last_call_token_info(response)
6974
logger.info(
7075
f'call openai api success, output: {response.choices[0].message.content}'
7176
)

modelscope_agent_servers/README.md

+71-88
Original file line numberDiff line numberDiff line change
@@ -38,21 +38,60 @@ cd modelscope-agent
3838
# start the assistant server
3939
sh scripts/run_assistant_server.sh
4040

41+
# start the assistant server with specified backend
42+
sh scripts/run_assistant_server.sh dashscope
4143
```
4244

4345
### Use case
4446

4547
#### Chat
4648

49+
We provide compatibility with parts of the OpenAI API `chat/completions`, especially function calls. The developers can use `OpenAI` SDK with specified local url. Currently the supported model server includes `dashscope`, `openai` and `ollama`.
4750

48-
To interact with the chat API, you should construct a object like `ChatRequest` on the client side, and then use the requests library to send it as the request body.
51+
Here is an code snippet using `OpenAI` SDK with `dashscope` model server:
4952

50-
#### function calling
51-
An example code snippet is as follows:
53+
```Python
54+
api_base = "http://localhost:31512/v1/"
55+
model = 'qwen-max'
56+
57+
tools = [{
58+
"type": "function",
59+
"function": {
60+
"name": "amap_weather",
61+
"description": "amap weather tool",
62+
"parameters": [{
63+
"name": "location",
64+
"type": "string",
65+
"description": "城市/区具体名称,如`北京市海淀区`请描述为`海淀区`",
66+
"required": True
67+
}]
68+
}
69+
}]
70+
71+
tool_choice = 'auto'
72+
73+
client = OpenAI(
74+
api_key="YOUR_DASHSCOPE_API_KEY",
75+
base_url=api_base,
76+
)
77+
chat_completion = client.chat.completions.create(
78+
messages=[{
79+
"role": "user",
80+
"content": "海淀区天气是什么?"
81+
}],
82+
model=model,
83+
tools=tools,
84+
tool_choice=tool_choice
85+
)
86+
87+
```
88+
89+
You can also use `curl` to request this API.
5290

5391
```Shell
5492
curl -X POST 'http://localhost:31512/v1/chat/completions' \
5593
-H 'Content-Type: application/json' \
94+
-H "Authorization: Bearer $DASHSCOPE_API_KEY" \
5695
-d '{
5796
"tools": [{
5897
"type": "function",
@@ -68,108 +107,51 @@ curl -X POST 'http://localhost:31512/v1/chat/completions' \
68107
}
69108
}],
70109
"tool_choice": "auto",
71-
"llm_config": {
72-
"model": "qwen-max",
73-
"model_server": "dashscope",
74-
"api_key": "YOUR DASHSCOPE API KEY"
75-
},
110+
"model": "qwen-max",
76111
"messages": [
77112
{"content": "海淀区天气", "role": "user"}
78-
],
79-
"uuid_str": "test",
80-
"stream": false
113+
]
81114
}'
82115

83116
```
84117

85118
With above examples, the output should be like this:
86119
```Python
87120
{
88-
"request_id":"xxxxx",
89-
"message":"",
90-
"output": None,
91-
"choices": [{
121+
"request_id":"xxx",
122+
"id":"xxx",
123+
"choices":[{
92124
"index":0,
93-
"message": {
94-
"role": "assistant",
95-
"content": "Action: amap_weather\nAction Input: {\"location\": \"海淀区\"}\n",
96-
"tool_calls": [
97-
{
98-
"type": "function",
99-
"function": {
100-
"name": "amap_weather",
101-
"arguments": "{\"location\":\"海淀区\"}"
102-
}
103-
}]
125+
"message":{
126+
"role":"assistant",
127+
"content":"Action: amap_weather\nAction Input: {\"location\": \"海淀区\"}\n",
128+
"tool_calls":[{
129+
"type":"function",
130+
"function":{
131+
"name":"amap_weather",
132+
"arguments":"{\"location\": \"海淀区\"}"
133+
}
134+
}]
104135
},
105-
"finish_reason": "tool_calls"
106-
}]
107-
}
136+
"finish_reason":"tool_calls"
137+
}],
138+
"created":xxx,
139+
"model":"qwen-max",
140+
"object":"chat.completion",
141+
"usage":{"prompt_tokens":267,"completion_tokens":15,"total_tokens":282}}
108142
```
109143

144+
#### Assistant
145+
146+
To interact with the chat API, you should construct a object like `AgentRequest` on the client side, and then use the requests library to send it as the request body.
147+
110148
#### knowledge retrieval
111149

112-
To enable knowledge retrieval, you'll need to include use_knowledge and files in your configuration settings.
150+
In `assistants/lite` API, to enable knowledge retrieval, you'll need to include use_knowledge and files in your configuration settings.
113151

114152
- `use_knowledge`: Specifies whether knowledge retrieval should be activated.
115153
- `files`: the file(s) you wish to use during the conversation. By default, all previously uploaded files will be used.
116154

117-
```Shell
118-
curl -X POST 'http://localhost:31512/v1/chat/completions' \
119-
-H 'Content-Type: application/json' \
120-
-d '{
121-
"tools": [
122-
{
123-
"type": "function",
124-
"function": {
125-
"name": "amap_weather",
126-
"description": "amap weather tool",
127-
"parameters": [{
128-
"name": "location",
129-
"type": "string",
130-
"description": "城市/区具体名称,如`北京市海淀区`请描述为`海淀区`",
131-
"required": true
132-
}]
133-
}
134-
}],
135-
"llm_config": {
136-
"model": "qwen-max",
137-
"model_server": "dashscope",
138-
"api_key": "YOUR DASHSCOPE API KEY"
139-
},
140-
"messages": [
141-
{"content": "高德天气api申请", "role": "user"}
142-
],
143-
"uuid_str": "test",
144-
"stream": false,
145-
"use_knowledge": true,
146-
"files": ["QA.pdf"]
147-
}'
148-
```
149-
150-
With above examples, the output should be like this:
151-
```Python
152-
{
153-
"request_id":"2bdb05fb-48b6-4ba2-9a38-7c9eb7c5c88e",
154-
"message":"",
155-
"output": None,
156-
"choices": [{
157-
"index":0,
158-
"message": {
159-
"role": "assistant",
160-
"content": "Information based on knowledge retrieval.",
161-
}
162-
"finish_reason": "stop"
163-
164-
}]
165-
}
166-
```
167-
168-
#### Assistant
169-
170-
Like `v1/chat/completions` API, you should construct a `ChatRequest` object when use `v1/assistants/lite`. Here is an example using python `requests` library.
171-
172-
173155
```Python
174156
import os
175157
import requests
@@ -194,10 +176,11 @@ request = {
194176
'agent_config': agent_cfg,
195177
'llm_config': llm_cfg,
196178
'messages': [
197-
{'content': '请为我介绍一下modelscope', 'role': 'user'}
179+
{'content': '高德天气API申请', 'role': 'user'}
198180
],
199181
'uuid_str': 'test',
200182
'use_knowledge': True # whether to use knowledge
183+
"files": ["QA.pdf"]
201184
}
202185

203186
response = requests.post(url, json=request)
@@ -211,7 +194,7 @@ request = {
211194
'agent_config': agent_cfg,
212195
'llm_config': llm_cfg,
213196
'messages': [
214-
{'content': '请为我介绍一下modelscope', 'role': 'user'}
197+
{'content': '高德天气API申请', 'role': 'user'}
215198
],
216199
'uuid_str': 'test',
217200
'stream': True, # whether to use stream

0 commit comments

Comments
 (0)