You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Copy file name to clipboardExpand all lines: llama_cpp/server/app.py
+17-4Lines changed: 17 additions & 4 deletions
Original file line number
Diff line number
Diff line change
@@ -152,6 +152,19 @@ def get_llama():
152
152
+"Repeat penalty is a hyperparameter used to penalize the repetition of token sequences during text generation. It helps prevent the model from generating repetitive or monotonous text. A higher value (e.g., 1.5) will penalize repetitions more strongly, while a lower value (e.g., 0.9) will be more lenient.",
153
153
)
154
154
155
+
presence_penalty_field=Field(
156
+
default=0.0,
157
+
ge=-2.0,
158
+
le=2.0,
159
+
description="Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.",
160
+
)
161
+
162
+
frequency_penalty_field=Field(
163
+
default=0.0,
164
+
ge=-2.0,
165
+
le=2.0,
166
+
description="Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.",
167
+
)
155
168
156
169
classCreateCompletionRequest(BaseModel):
157
170
prompt: Optional[str] =Field(
@@ -175,13 +188,13 @@ class CreateCompletionRequest(BaseModel):
175
188
ge=0,
176
189
description="The number of logprobs to generate. If None, no logprobs are generated.",
0 commit comments