github
diff --git a/‎crates/bpe/.gitignore
Lines changed: 10 additions & 0 deletions b/‎crates/bpe/.gitignore
Lines changed: 10 additions & 0 deletions
diff --git a/‎crates/bpe/README.md
Lines changed: 18 additions & 0 deletions b/‎crates/bpe/README.md
Lines changed: 18 additions & 0 deletions
diff --git a/‎crates/bpe/benches/performance.rs
Lines changed: 1 addition & 3 deletions b/‎crates/bpe/benches/performance.rs
Lines changed: 1 addition & 3 deletions
diff --git a/‎crates/bpe/benches/result/reports/appending-o200k/lines.svg b/‎crates/bpe/benches/result/reports/appending-o200k/lines.svg
diff --git a/‎crates/bpe/benches/result/reports/counting-o200k/lines.svg b/‎crates/bpe/benches/result/reports/counting-o200k/lines.svg
diff --git a/‎crates/bpe/benches/result/reports/encoding-o200k/lines.svg b/‎crates/bpe/benches/result/reports/encoding-o200k/lines.svg
diff --git a/‎crates/bpe/criterion.toml
Lines changed: 1 addition & 1 deletion b/‎crates/bpe/criterion.toml
Lines changed: 1 addition & 1 deletion
@@ -0,0 +1,10 @@
+# Ignore benchmark results except figures references in the README.
+# Negated ignore patterns do not work for files inside a directory that is itself ignored.
+# Therefore ignore using `**` and then negate the nested directories (but not the files inside).
+/benches/result/**
+!/benches/result/*/
+!/benches/result/*/*/
+# Negate the actual figures we want to keep.
+!/benches/result/reports/counting-o200k/lines.svg
+!/benches/result/reports/encoding-o200k/lines.svg
+!/benches/result/reports/appending-o200k/lines.svg
@@ -198,3 +198,21 @@ As can be seen, our Backtracking implementation beats the TikToken Rust implemen
 And even the fully dynamic programming solution is faster with a more consistent runtime.
 The tuned heap implementation is still quite competitive to TikToken (especially for smaller inputs).
 If the requirement of correct BPE output can be relaxed, then the Greedy approach or the minimal encoding approach are the clear winners.
+
+### Counting results
+
+Results for counting o200k tokens for random 10000 byte slices. The setup time of the interval encoder is comparable to backtracking. After setup counting of slices of the original data are approximately constant time.
+
+![Counting o200k tokens for random 10000 byte slices](./benches/result/reports/counting-o200k/lines.svg)
+
+### Encoding results
+
+Results for encoding o200k tokens for random 1000 bytes. The backtracking encoder consistently outperforms tiktoken by a constant factor.
+
+![Encoding o200k tokens for 10000 random bytes](./benches/result/reports/encoding-o200k/lines.svg)
+
+### Incremental encoding results
+
+Results for incrementally encoding o200k tokens by appending 10000 random bytes. The appending encoder is slower by a constant factor but overall has similar performance curve as the backtracking encoder encoding all data at once. 
+
+![Incrementally encoding o200k tokens by appending 10000 random bytes](./benches/result/reports/appending-o200k/lines.svg)
@@ -140,9 +140,7 @@ fn appending_benchmark(c: &mut Criterion) {
                             AppendableEncoder::new(bpe),
                         )
                     },
-                    |(start, mut enc)| {
-                        enc.extend(input[start..start + bytes].into_iter().copied())
-                    },
+                    |(start, mut enc)| enc.extend(input[start..start + bytes].into_iter().copied()),
                     criterion::BatchSize::SmallInput,
                 )
             });
 
@@ -1,2 +1,2 @@
 # save report in this directory, even if a custom target directory is set
-criterion_home = "./target/criterion"
+criterion_home = "./benches/result"
Original file line number	Diff line number	Diff line change
`@@ -140,9 +140,7 @@ fn appending_benchmark(c: &mut Criterion) {`
`140`	`140`	`AppendableEncoder::new(bpe),`
`141`	`141`	`)`
`142`	`142`	`},`
`143`		`- \|(start, mut enc)\| {`
`144`		`- enc.extend(input[start..start + bytes].into_iter().copied())`
`145`		`- },`
	`143`	`+ \|(start, mut enc)\| enc.extend(input[start..start + bytes].into_iter().copied()),`
`146`	`144`	`criterion::BatchSize::SmallInput,`
`147`	`145`	`)`
`148`	`146`	`});`
Original file line number	Diff line number	Diff line change
`@@ -1,2 +1,2 @@`
`1`	`1`	`# save report in this directory, even if a custom target directory is set`
`2`		`-criterion_home = "./target/criterion"`
	`2`	`+criterion_home = "./benches/result"`