Add support for fine-tuned models in encoding_for_model

thespino · thespino · commit 07ba41228949 · 2023-06-01T23:06:18.000+02:00
Identify models that can be fine-tuned in encoding_for_model. - See https://platform.openai.com/docs/models/model-endpoint-compatibility - See https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb
diff --git a/tiktoken/model.py b/tiktoken/model.py
@@ -9,6 +9,11 @@
     "gpt-4-": "cl100k_base",  # e.g., gpt-4-0314, etc., plus gpt-4-32k
     "gpt-3.5-turbo-": "cl100k_base",  # e.g, gpt-3.5-turbo-0301, -0401, etc.
     "gpt-35-turbo": "cl100k_base",  # Azure deployment name
+    # fine-tuned models
+    "davinci": "r50k_base",
+    "curie": "r50k_base",
+    "babbage": "r50k_base",
+    "ada": "r50k_base",
 }
 
 MODEL_TO_ENCODING: dict[str, str] = {