Skip to content

Commit 446cb49

Browse files
committed
Bump version, sync codebase
1 parent 3e86200 commit 446cb49

File tree

5 files changed

+10
-5
lines changed

5 files changed

+10
-5
lines changed

CHANGELOG.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,9 @@
22

33
This is the changelog for the open source version of tiktoken.
44

5+
## [v0.3.2]
6+
- Add encoding for GPT-4
7+
58
## [v0.3.1]
69
- Build aarch64 wheels
710
- Make `blobfile` an optional dependency

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "tiktoken"
3-
version = "0.3.1"
3+
version = "0.3.2"
44
edition = "2021"
55
rust-version = "1.57.0"
66

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,11 @@ OpenAI's models.
55

66
```python
77
import tiktoken
8-
enc = tiktoken.get_encoding("gpt2")
8+
enc = tiktoken.get_encoding("cl100k_base")
99
assert enc.decode(enc.encode("hello world")) == "hello world"
1010

1111
# To get the tokeniser corresponding to a specific model in the OpenAI API:
12-
enc = tiktoken.encoding_for_model("text-davinci-003")
12+
enc = tiktoken.encoding_for_model("gpt-4")
1313
```
1414

1515
The open source version of `tiktoken` can be installed from PyPI:

src/lib.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ fn _byte_pair_merge<T>(
3434
}
3535
};
3636

37-
// We look up the ranks once in the beggining and iteratively update
37+
// We look up the ranks once in the beginning and iteratively update
3838
// them during each merge, which reduces the number of rank lookups.
3939
for i in 0..parts.len() - 2 {
4040
match get_rank(&parts, i, 0) {

tiktoken/model.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,13 @@
66
# TODO: these will likely be replaced by an API endpoint
77
MODEL_PREFIX_TO_ENCODING: dict[str, str] = {
88
# chat
9-
"gpt-3.5-turbo-": "cl100k_base" # e.g, gpt-3.5-turbo-0301, -0401, etc.
9+
"gpt-4-": "cl100k_base", # e.g., gpt-4-0314, etc., plus gpt-4-32k
10+
"gpt-3.5-turbo-": "cl100k_base", # e.g, gpt-3.5-turbo-0301, -0401, etc.
1011
}
1112

1213
MODEL_TO_ENCODING: dict[str, str] = {
1314
# chat
15+
"gpt-4": "cl100k_base",
1416
"gpt-3.5-turbo": "cl100k_base",
1517
# text
1618
"text-davinci-003": "p50k_base",

0 commit comments

Comments
 (0)