@@ -39,9 +39,11 @@ async def on_valves_updated(self):
39
39
self .set_pipelines ()
40
40
41
41
async def on_startup (self ):
42
+ # This function is called when the server is started.
42
43
print (f"on_startup:{ __name__ } " )
43
44
44
45
async def on_shutdown (self ):
46
+ # This function is called when the server is stopped.
45
47
print (f"on_shutdown:{ __name__ } " )
46
48
47
49
def pipe (
@@ -55,32 +57,21 @@ def pipe(
55
57
print (messages )
56
58
print (user_message )
57
59
60
+ user_email = body .get ("user" , {}).get ("email" )
61
+
58
62
headers = {
59
63
"api-key" : self .valves .AZURE_OPENAI_API_KEY ,
60
64
"Content-Type" : "application/json" ,
61
65
}
62
66
63
- # 1) Read the 'user' email from body
64
- # user_id = body.get("user", {})
65
- # user_name = user_id.get("email", "").split("@")[0]
66
- user_name = body .get ("name" , {})
67
-
68
- # 2) Build the base URL *manually* to preserve `@` in the `source`
69
- # This ensures the server sees `[email protected] ` literally
70
- # instead of `source=you%40company.com`
71
- if user_name :
72
- full_url = (
73
- f"{ self .valves .AZURE_OPENAI_ENDPOINT } /openai/deployments/{ model_id } /chat/completions"
74
- f"?api-version={ self .valves .AZURE_OPENAI_API_VERSION } &source={ user_name } "
75
- )
76
- else :
77
- # If we have no email, just omit the source from the query string
78
- full_url = (
79
- f"{ self .valves .AZURE_OPENAI_ENDPOINT } /openai/deployments/{ model_id } /chat/completions"
80
- f"?api-version={ self .valves .AZURE_OPENAI_API_VERSION } "
81
- )
67
+ # URL for Chat Completions in Azure OpenAI
68
+ url = (
69
+ f"{ self .valves .AZURE_OPENAI_ENDPOINT } /openai/deployments/"
70
+ f"{ model_id } /chat/completions?api-version={ self .valves .AZURE_OPENAI_API_VERSION } &source={ user_email } "
71
+ )
82
72
83
73
# --- Define the allowed parameter sets ---
74
+ # (1) Default allowed params (non-o1)
84
75
allowed_params_default = {
85
76
"messages" ,
86
77
"temperature" ,
@@ -97,7 +88,7 @@ def pipe(
97
88
"presence_penalty" ,
98
89
"frequency_penalty" ,
99
90
"logit_bias" ,
100
- "user" ,
91
+ "user" ,
101
92
"function_call" ,
102
93
"funcions" ,
103
94
"tools" ,
@@ -109,6 +100,7 @@ def pipe(
109
100
"seed" ,
110
101
}
111
102
103
+ # (2) o1 models allowed params
112
104
allowed_params_o1 = {
113
105
"model" ,
114
106
"messages" ,
@@ -118,31 +110,45 @@ def pipe(
118
110
"presence_penalty" ,
119
111
"frequency_penalty" ,
120
112
"logit_bias" ,
121
- "user" , # <--- still here too
113
+ "user" ,
122
114
}
123
115
116
+ # Simple helper to detect if it's an o1 model
124
117
def is_o1_model (m : str ) -> bool :
125
- return "o1" in m or m .endswith ("o" )
118
+ # Adjust this check to your naming pattern for o1 models
119
+ return "o1" in m or m .startswith ("o1" )
120
+
121
+ # Ensure user is a string
122
+ if "user" in body and not isinstance (body ["user" ], str ):
123
+ body ["user" ] = body ["user" ].get ("id" , str (body ["user" ]))
126
124
127
125
# If it's an o1 model, do a "fake streaming" approach
128
126
if is_o1_model (model_id ):
129
- body .pop ("stream" , None ) # only remove 'stream' if present
127
+ # We'll remove "stream" from the body if present (since we'll do manual streaming),
128
+ # then filter to the allowed params for o1 models.
129
+ body .pop ("stream" , None )
130
130
filtered_body = {k : v for k , v in body .items () if k in allowed_params_o1 }
131
131
132
+ # Log which fields were dropped
132
133
if len (body ) != len (filtered_body ):
133
134
dropped_keys = set (body .keys ()) - set (filtered_body .keys ())
134
135
print (f"Dropped params: { ', ' .join (dropped_keys )} " )
135
136
136
137
try :
138
+ # We make a normal request (non-streaming)
137
139
r = requests .post (
138
- url = full_url ,
140
+ url = url ,
139
141
json = filtered_body ,
140
142
headers = headers ,
141
143
stream = False ,
142
144
)
143
145
r .raise_for_status ()
144
146
147
+ # Parse the full JSON response
145
148
data = r .json ()
149
+
150
+ # Typically, the text content is in data["choices"][0]["message"]["content"]
151
+ # This may vary depending on your actual response shape.
146
152
content = ""
147
153
if (
148
154
isinstance (data , dict )
@@ -157,6 +163,7 @@ def is_o1_model(m: str) -> bool:
157
163
content = str (data )
158
164
159
165
def chunk_text (text : str , chunk_size : int = 30 ) -> Generator [str , None , None ]:
166
+ """Yield text in fixed-size chunks."""
160
167
for i in range (0 , len (text ), chunk_size ):
161
168
yield text [i : i + chunk_size ]
162
169
@@ -173,23 +180,26 @@ def fake_stream() -> Generator[str, None, None]:
173
180
return f"Error: { e } "
174
181
175
182
else :
183
+ # Normal pipeline for non-o1 models:
176
184
filtered_body = {k : v for k , v in body .items () if k in allowed_params_default }
177
185
if len (body ) != len (filtered_body ):
178
186
dropped_keys = set (body .keys ()) - set (filtered_body .keys ())
179
187
print (f"Dropped params: { ', ' .join (dropped_keys )} " )
180
188
181
189
try :
182
190
r = requests .post (
183
- url = full_url ,
191
+ url = url ,
184
192
json = filtered_body ,
185
193
headers = headers ,
186
194
stream = True ,
187
195
)
188
196
r .raise_for_status ()
189
197
190
198
if filtered_body .get ("stream" ):
199
+ # Real streaming
191
200
return r .iter_lines ()
192
201
else :
202
+ # Just return the JSON
193
203
return r .json ()
194
204
195
205
except Exception as e :
0 commit comments