Skip to content

Commit 104fe0c

Browse files
authored
Use pyo3 smd v0.21 (#1574)
1 parent 2048c02 commit 104fe0c

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

48 files changed

+64
-67
lines changed

bindings/python/Cargo.toml

+3-3
Original file line numberDiff line numberDiff line change
@@ -19,15 +19,15 @@ ndarray = "0.15"
1919
onig = { version = "6.4", default-features = false }
2020
itertools = "0.12"
2121
derive_more = "0.99.17"
22-
pyo3 = { version = "0.22", features = ["multiple-pymethods"] }
23-
pyo3_special_method_derive = "0.4"
22+
pyo3 = { version = "0.21", features = ["multiple-pymethods"] }
23+
pyo3_special_method_derive_0_21 = "0.4"
2424

2525
[dependencies.tokenizers]
2626
path = "../../tokenizers"
2727

2828
[dev-dependencies]
2929
tempfile = "3.10"
30-
pyo3 = { version = "0.22", features = ["auto-initialize"] }
30+
pyo3 = { version = "0.21", features = ["auto-initialize"] }
3131

3232
[features]
3333
defaut = ["pyo3/extension-module"]

bindings/python/src/decoders.rs

+16-2
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,12 @@ use std::sync::{Arc, RwLock};
22

33
use crate::pre_tokenizers::from_string;
44
use crate::utils::PyPattern;
5-
use pyo3_special_method_derive::AutoDisplay;
65
use pyo3::exceptions;
76
use pyo3::prelude::*;
87
use pyo3::types::*;
8+
use pyo3_special_method_derive_0_21::AutoDisplay;
9+
use pyo3_special_method_derive_0_21::PyDebug;
10+
use pyo3_special_method_derive_0_21::PyDisplay;
911
use serde::de::Error;
1012
use serde::{Deserialize, Deserializer, Serialize, Serializer};
1113
use tk::decoders::bpe::BPEDecoder;
@@ -487,11 +489,23 @@ impl PySequenceDecoder {
487489
}
488490
}
489491

490-
#[derive(Clone, AutoDisplay)]
492+
#[derive(Clone)]
491493
pub(crate) struct CustomDecoder {
492494
pub inner: PyObject,
493495
}
494496

497+
impl PyDisplay for CustomDecoder {
498+
fn fmt_display(&self) -> String {
499+
"CustomDecoder()".to_string()
500+
}
501+
}
502+
503+
impl PyDebug for CustomDecoder {
504+
fn fmt_debug(&self) -> String {
505+
"CustomDecoder()".to_string()
506+
}
507+
}
508+
495509
impl CustomDecoder {
496510
pub(crate) fn new(inner: PyObject) -> Self {
497511
CustomDecoder { inner }

bindings/python/src/models.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,10 @@ use std::sync::{Arc, RwLock};
55
use super::error::{deprecation_warning, ToPyResult};
66
use crate::token::PyToken;
77
use crate::trainers::PyTrainer;
8-
use pyo3_special_method_derive::AutoDisplay;
98
use pyo3::exceptions;
109
use pyo3::prelude::*;
1110
use pyo3::types::*;
11+
use pyo3_special_method_derive_0_21::AutoDisplay;
1212
use serde::{Deserialize, Serialize};
1313
use tk::models::bpe::{BpeBuilder, Merges, Vocab, BPE};
1414
use tk::models::unigram::Unigram;

bindings/python/src/normalizers.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,10 @@ use std::sync::{Arc, RwLock};
22

33
use crate::error::ToPyResult;
44
use crate::utils::{PyNormalizedString, PyNormalizedStringRefMut, PyPattern};
5-
use pyo3_special_method_derive::AutoDisplay;
65
use pyo3::exceptions;
76
use pyo3::prelude::*;
87
use pyo3::types::*;
8+
use pyo3_special_method_derive_0_21::AutoDisplay;
99
use serde::ser::SerializeStruct;
1010
use serde::{Deserialize, Deserializer, Serialize, Serializer};
1111
use tk::normalizers::{

bindings/python/src/pre_tokenizers.rs

+1-9
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ use tokenizers as tk;
2323

2424
use super::error::ToPyResult;
2525
use super::utils::*;
26-
use pyo3_special_method_derive::AutoDisplay;
26+
use pyo3_special_method_derive_0_21::{AutoDisplay, Dict, Dir, Repr, Str};
2727
/// Base class for all pre-tokenizers
2828
///
2929
/// This class is not supposed to be instantiated directly. Instead, any implementation of a
@@ -181,14 +181,6 @@ impl PyPreTokenizer {
181181
.map(|(s, o, _)| (s.to_owned(), o))
182182
.collect())
183183
}
184-
185-
fn __str__(&self) -> PyResult<String> {
186-
Ok(format!("{}", self.pretok))
187-
}
188-
189-
fn __repr__(&self) -> PyResult<String> {
190-
Ok(format!("{}", self.pretok))
191-
}
192184
}
193185

194186
macro_rules! getter {

bindings/python/src/processors.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,10 @@ use std::sync::Arc;
33

44
use crate::encoding::PyEncoding;
55
use crate::error::ToPyResult;
6-
use pyo3_special_method_derive::AutoDisplay;
76
use pyo3::exceptions;
87
use pyo3::prelude::*;
98
use pyo3::types::*;
9+
use pyo3_special_method_derive_0_21::AutoDisplay;
1010
use serde::{Deserialize, Serialize};
1111
use tk::processors::bert::BertProcessing;
1212
use tk::processors::byte_level::ByteLevel;

bindings/python/src/tokenizer.rs

+1-9
Original file line numberDiff line numberDiff line change
@@ -10,13 +10,13 @@ use super::pre_tokenizers::PyPreTokenizer;
1010
use super::trainers::PyTrainer;
1111
use crate::processors::PyPostProcessor;
1212
use crate::utils::{MaybeSizedIterator, PyBufferedIterator};
13-
use pyo3_special_method_derive::AutoDisplay;
1413
use numpy::{npyffi, PyArray1};
1514
use pyo3::class::basic::CompareOp;
1615
use pyo3::exceptions;
1716
use pyo3::intern;
1817
use pyo3::prelude::*;
1918
use pyo3::types::*;
19+
use pyo3_special_method_derive_0_21::AutoDisplay;
2020
use std::collections::BTreeMap;
2121
use tk::models::bpe::BPE;
2222
use tk::tokenizer::{
@@ -1409,14 +1409,6 @@ impl PyTokenizer {
14091409
fn set_decoder(&mut self, decoder: PyRef<PyDecoder>) {
14101410
self.tokenizer.with_decoder(decoder.clone());
14111411
}
1412-
1413-
fn __str__(&self) -> PyResult<String> {
1414-
Ok(format!("{}", self.tokenizer))
1415-
}
1416-
1417-
fn __repr__(&self) -> PyResult<String> {
1418-
Ok(format!("{}", self.tokenizer))
1419-
}
14201412
}
14211413

14221414
#[cfg(test)]

tokenizers/Cargo.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ fancy-regex = { version = "0.13", optional = true}
6363
getrandom = { version = "0.2.10" }
6464
esaxx-rs = { version = "0.1.10", default-features = false, features=[]}
6565
monostate = "0.1.12"
66-
pyo3_special_method_derive = "0.4"
66+
pyo3_special_method_derive_0_21 = "0.4"
6767

6868
[features]
6969
default = ["progressbar", "onig", "esaxx_fast"]

tokenizers/src/decoders/bpe.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
use crate::tokenizer::{Decoder, Result};
2-
use pyo3_special_method_derive::AutoDisplay;
2+
use pyo3_special_method_derive_0_21::AutoDisplay;
33
use serde::{Deserialize, Serialize};
44
#[derive(Deserialize, Clone, Debug, Serialize, AutoDisplay)]
55
/// Allows decoding Original BPE by joining all the tokens and then replacing

tokenizers/src/decoders/byte_fallback.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
use crate::tokenizer::{Decoder, Result};
22
use monostate::MustBe;
3-
use pyo3_special_method_derive::AutoDisplay;
3+
use pyo3_special_method_derive_0_21::AutoDisplay;
44
use serde::{Deserialize, Serialize};
55
#[derive(Deserialize, Clone, Debug, Serialize, Default, AutoDisplay)]
66
/// ByteFallback is a simple trick which converts tokens looking like `<0x61>`

tokenizers/src/decoders/ctc.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
use crate::decoders::wordpiece;
22
use crate::tokenizer::{Decoder, Result};
33
use itertools::Itertools;
4-
use pyo3_special_method_derive::AutoDisplay;
4+
use pyo3_special_method_derive_0_21::AutoDisplay;
55
use serde::{Deserialize, Serialize};
66

77
#[derive(Debug, Clone, Serialize, Deserialize, AutoDisplay)]

tokenizers/src/decoders/fuse.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
use crate::tokenizer::{Decoder, Result};
22
use monostate::MustBe;
3-
use pyo3_special_method_derive::AutoDisplay;
3+
use pyo3_special_method_derive_0_21::AutoDisplay;
44
use serde::{Deserialize, Serialize};
55
#[derive(Clone, Debug, Serialize, Deserialize, Default, AutoDisplay)]
66
/// Fuse simply fuses all tokens into one big string.

tokenizers/src/decoders/mod.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ use crate::normalizers::replace::Replace;
2121
use crate::pre_tokenizers::byte_level::ByteLevel;
2222
use crate::pre_tokenizers::metaspace::Metaspace;
2323
use crate::{Decoder, Result};
24-
use pyo3_special_method_derive::AutoDisplay;
24+
use pyo3_special_method_derive_0_21::AutoDisplay;
2525
use serde::{Deserialize, Serialize};
2626

2727
#[derive(Serialize, Deserialize, Clone, Debug, AutoDisplay)]

tokenizers/src/decoders/sequence.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
use crate::decoders::DecoderWrapper;
22
use crate::tokenizer::{Decoder, Result};
33
use crate::utils::macro_rules_attribute;
4-
use pyo3_special_method_derive::AutoDisplay;
4+
use pyo3_special_method_derive_0_21::AutoDisplay;
55
use serde::{Deserialize, Serialize};
66

77
#[macro_rules_attribute(impl_serde_type!)]

tokenizers/src/decoders/strip.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
use crate::tokenizer::{Decoder, Result};
22

3-
use pyo3_special_method_derive::AutoDisplay;
3+
use pyo3_special_method_derive_0_21::AutoDisplay;
44
use serde::{Deserialize, Serialize};
55
#[derive(Deserialize, Clone, Debug, Serialize, Default, AutoDisplay)]
66
/// Strip is a simple trick which converts tokens looking like `<0x61>`

tokenizers/src/decoders/wordpiece.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
use crate::tokenizer::{Decoder, Result};
22

3-
use pyo3_special_method_derive::AutoDisplay;
3+
use pyo3_special_method_derive_0_21::AutoDisplay;
44
use serde::{Deserialize, Serialize};
55
#[derive(Deserialize, Clone, Debug, Serialize, AutoDisplay)]
66
/// The WordPiece decoder takes care of decoding a list of wordpiece tokens

tokenizers/src/models/mod.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ pub mod wordpiece;
88
use std::collections::HashMap;
99
use std::path::{Path, PathBuf};
1010

11-
use pyo3_special_method_derive::AutoDisplay;
11+
use pyo3_special_method_derive_0_21::AutoDisplay;
1212
use serde::{Deserialize, Serialize, Serializer};
1313

1414
use crate::models::bpe::{BpeTrainer, BPE};

tokenizers/src/models/unigram/model.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ use super::{
66
use crate::tokenizer::{Model, Result, Token};
77
use crate::utils::cache::Cache;
88

9-
use pyo3_special_method_derive::AutoDisplay;
9+
use pyo3_special_method_derive_0_21::AutoDisplay;
1010
use std::collections::HashMap;
1111
use std::convert::TryInto;
1212
use std::fs::read_to_string;

tokenizers/src/models/wordlevel/mod.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
use super::OrderedVocabIter;
22
use crate::tokenizer::{Model, Result, Token};
3-
use pyo3_special_method_derive::AutoDisplay;
3+
use pyo3_special_method_derive_0_21::AutoDisplay;
44
use serde_json::Value;
55
use std::collections::HashMap;
66
use std::fs::File;

tokenizers/src/models/wordpiece/mod.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
44
use crate::models::bpe::BPE;
55
use crate::tokenizer::{Model, Result, Token};
6-
use pyo3_special_method_derive::AutoDisplay;
6+
use pyo3_special_method_derive_0_21::AutoDisplay;
77
use std::{
88
borrow::Cow,
99
collections::HashMap,

tokenizers/src/normalizers/bert.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
use crate::tokenizer::{NormalizedString, Normalizer, Result};
22

3-
use pyo3_special_method_derive::AutoDisplay;
3+
use pyo3_special_method_derive_0_21::AutoDisplay;
44
use serde::{Deserialize, Serialize};
55
use unicode_categories::UnicodeCategories;
66
/// Checks whether a character is whitespace

tokenizers/src/normalizers/mod.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ pub use crate::normalizers::utils::{Lowercase, Sequence};
1717
use serde::{Deserialize, Serialize};
1818

1919
use crate::{NormalizedString, Normalizer};
20-
use pyo3_special_method_derive::AutoDisplay;
20+
use pyo3_special_method_derive_0_21::AutoDisplay;
2121

2222
/// Wrapper for known Normalizers.
2323
#[derive(Clone, Debug, Deserialize, Serialize, AutoDisplay)]

tokenizers/src/normalizers/prepend.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
use crate::tokenizer::{NormalizedString, Normalizer, Result};
2-
use pyo3_special_method_derive::AutoDisplay;
2+
use pyo3_special_method_derive_0_21::AutoDisplay;
33
use serde::{Deserialize, Serialize};
44

55
#[derive(Clone, Debug, Deserialize, Serialize, AutoDisplay)]

tokenizers/src/normalizers/replace.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ use crate::tokenizer::pattern::Pattern;
22
use crate::tokenizer::Decoder;
33
use crate::tokenizer::{NormalizedString, Normalizer, Result};
44
use crate::utils::SysRegex;
5-
use pyo3_special_method_derive::AutoDisplay;
5+
use pyo3_special_method_derive_0_21::AutoDisplay;
66
use serde::{Deserialize, Serialize};
77
/// Represents the different patterns that `Replace` can use
88
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, Eq)]

tokenizers/src/normalizers/strip.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
use crate::tokenizer::{NormalizedString, Normalizer, Result};
22
use crate::utils::macro_rules_attribute;
3-
use pyo3_special_method_derive::AutoDisplay;
3+
use pyo3_special_method_derive_0_21::AutoDisplay;
44
use serde::{Deserialize, Serialize};
55
use unicode_normalization_alignments::char::is_combining_mark;
66
#[derive(Copy, Clone, Debug, Deserialize, Serialize, AutoDisplay)]

tokenizers/src/normalizers/unicode.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
use crate::tokenizer::{NormalizedString, Normalizer, Result};
22
use crate::utils::macro_rules_attribute;
3-
use pyo3_special_method_derive::AutoDisplay;
3+
use pyo3_special_method_derive_0_21::AutoDisplay;
44

55
#[derive(Default, Copy, Clone, Debug, AutoDisplay)]
66
#[macro_rules_attribute(impl_serde_type!)]

tokenizers/src/normalizers/utils.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ use serde::{Deserialize, Serialize};
33
use crate::normalizers::NormalizerWrapper;
44
use crate::tokenizer::{NormalizedString, Normalizer, Result};
55
use crate::utils::macro_rules_attribute;
6-
use pyo3_special_method_derive::AutoDisplay;
6+
use pyo3_special_method_derive_0_21::AutoDisplay;
77
#[derive(Clone, Deserialize, Debug, Serialize, AutoDisplay)]
88
#[serde(tag = "type")]
99
/// Allows concatenating multiple other Normalizer as a Sequence.

tokenizers/src/pre_tokenizers/bert.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
use crate::tokenizer::{PreTokenizedString, PreTokenizer, Result, SplitDelimiterBehavior};
22
use crate::utils::macro_rules_attribute;
3-
use pyo3_special_method_derive::AutoDisplay;
3+
use pyo3_special_method_derive_0_21::AutoDisplay;
44
use unicode_categories::UnicodeCategories;
55

66
fn is_bert_punc(x: char) -> bool {

tokenizers/src/pre_tokenizers/byte_level.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ use crate::tokenizer::{
66
};
77
use crate::utils::macro_rules_attribute;
88
use crate::utils::SysRegex;
9-
use pyo3_special_method_derive::AutoDisplay;
9+
use pyo3_special_method_derive_0_21::AutoDisplay;
1010
use serde::{Deserialize, Serialize};
1111

1212
/// Converts bytes to unicode characters.

tokenizers/src/pre_tokenizers/delimiter.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
use pyo3_special_method_derive::AutoDisplay;
1+
use pyo3_special_method_derive_0_21::AutoDisplay;
22
use serde::{Deserialize, Serialize};
33

44
use crate::tokenizer::{PreTokenizedString, PreTokenizer, Result, SplitDelimiterBehavior};

tokenizers/src/pre_tokenizers/digits.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
use pyo3_special_method_derive::AutoDisplay;
1+
use pyo3_special_method_derive_0_21::AutoDisplay;
22
use serde::{Deserialize, Serialize};
33

44
use crate::tokenizer::{PreTokenizedString, PreTokenizer, Result, SplitDelimiterBehavior};

tokenizers/src/pre_tokenizers/metaspace.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
use crate::tokenizer::{Decoder, PreTokenizedString, PreTokenizer, Result, SplitDelimiterBehavior};
2-
use pyo3_special_method_derive::AutoDisplay;
2+
use pyo3_special_method_derive_0_21::AutoDisplay;
33
use serde::{de, Deserialize, Deserializer, Serialize};
44
/// Enum representing options for the metaspace prepending scheme.
55
#[derive(Debug, Clone, PartialEq, Serialize, Eq, Deserialize, Copy, AutoDisplay)]

tokenizers/src/pre_tokenizers/mod.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ use crate::pre_tokenizers::split::Split;
2222
use crate::pre_tokenizers::unicode_scripts::UnicodeScripts;
2323
use crate::pre_tokenizers::whitespace::{Whitespace, WhitespaceSplit};
2424
use crate::{PreTokenizedString, PreTokenizer};
25-
use pyo3_special_method_derive::AutoDisplay;
25+
use pyo3_special_method_derive_0_21::AutoDisplay;
2626

2727
#[derive(Deserialize, Serialize, Clone, Debug, PartialEq, AutoDisplay)]
2828
#[auto_display(fmt="pre_tokenizers.{}")]

tokenizers/src/pre_tokenizers/punctuation.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
use pyo3_special_method_derive::AutoDisplay;
1+
use pyo3_special_method_derive_0_21::AutoDisplay;
22
use serde::{Deserialize, Serialize};
33

44
use crate::tokenizer::{PreTokenizedString, PreTokenizer, Result, SplitDelimiterBehavior};

tokenizers/src/pre_tokenizers/sequence.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
use crate::pre_tokenizers::PreTokenizerWrapper;
22
use crate::tokenizer::{PreTokenizedString, PreTokenizer, Result};
33
use crate::utils::macro_rules_attribute;
4-
use pyo3_special_method_derive::AutoDisplay;
4+
use pyo3_special_method_derive_0_21::AutoDisplay;
55
use serde::{Deserialize, Serialize};
66

77
#[macro_rules_attribute(impl_serde_type!)]

tokenizers/src/pre_tokenizers/split.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ use crate::tokenizer::{
22
pattern::Invert, PreTokenizedString, PreTokenizer, Result, SplitDelimiterBehavior,
33
};
44
use crate::utils::SysRegex;
5-
use pyo3_special_method_derive::AutoDisplay;
5+
use pyo3_special_method_derive_0_21::AutoDisplay;
66
use serde::{Deserialize, Deserializer, Serialize};
77

88
/// Represents the different patterns that `Split` can use

tokenizers/src/pre_tokenizers/unicode_scripts/pre_tokenizer.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
use pyo3_special_method_derive::AutoDisplay;
1+
use pyo3_special_method_derive_0_21::AutoDisplay;
22

33
use crate::pre_tokenizers::unicode_scripts::scripts::{get_script, Script};
44
use crate::tokenizer::{normalizer::Range, PreTokenizedString, PreTokenizer, Result};

tokenizers/src/pre_tokenizers/whitespace.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
use pyo3_special_method_derive::AutoDisplay;
1+
use pyo3_special_method_derive_0_21::AutoDisplay;
22
use regex::Regex;
33

44
use crate::tokenizer::{

0 commit comments

Comments
 (0)