-
-
Notifications
You must be signed in to change notification settings - Fork 1.9k
Python 3.14: PEP-784 compression.zstd #14129
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Draft
Rogdham
wants to merge
6
commits into
python:main
Choose a base branch
from
Rogdham:pep-784_compression-zstd
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Draft
Changes from 4 commits
Commits
Show all changes
6 commits
Select commit
Hold shift + click to select a range
219293a
Python 3.14: PEP-784 compression.zstd
Rogdham 3ad00fc
Fix Self import
Rogdham a37a648
Add to allowlist for 3.14 beta2
Rogdham ade5b67
Fix ZstdFile incompatibilities with parent class
Rogdham b29dd87
Use Mapping[int, int] for options
Rogdham 0f99105
Use reference for ZstdCompressor CONTINUE/FLUSH_BLOCK/FLUSH_FRAME
Rogdham File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,89 @@ | ||
from _typeshed import ReadableBuffer | ||
from compression.zstd import _OptionsCompress, _OptionsDecompress | ||
from typing import Any, Final, Literal, final | ||
from typing_extensions import Self | ||
|
||
ZSTD_CLEVEL_DEFAULT: Final[int] | ||
ZSTD_DStreamOutSize: Final[int] | ||
ZSTD_btlazy2: Final[int] | ||
ZSTD_btopt: Final[int] | ||
ZSTD_btultra: Final[int] | ||
ZSTD_btultra2: Final[int] | ||
ZSTD_c_chainLog: Final[int] | ||
ZSTD_c_checksumFlag: Final[int] | ||
ZSTD_c_compressionLevel: Final[int] | ||
ZSTD_c_contentSizeFlag: Final[int] | ||
ZSTD_c_dictIDFlag: Final[int] | ||
ZSTD_c_enableLongDistanceMatching: Final[int] | ||
ZSTD_c_hashLog: Final[int] | ||
ZSTD_c_jobSize: Final[int] | ||
ZSTD_c_ldmBucketSizeLog: Final[int] | ||
ZSTD_c_ldmHashLog: Final[int] | ||
ZSTD_c_ldmHashRateLog: Final[int] | ||
ZSTD_c_ldmMinMatch: Final[int] | ||
ZSTD_c_minMatch: Final[int] | ||
ZSTD_c_nbWorkers: Final[int] | ||
ZSTD_c_overlapLog: Final[int] | ||
ZSTD_c_searchLog: Final[int] | ||
ZSTD_c_strategy: Final[int] | ||
ZSTD_c_targetLength: Final[int] | ||
ZSTD_c_windowLog: Final[int] | ||
ZSTD_d_windowLogMax: Final[int] | ||
ZSTD_dfast: Final[int] | ||
ZSTD_fast: Final[int] | ||
ZSTD_greedy: Final[int] | ||
ZSTD_lazy: Final[int] | ||
ZSTD_lazy2: Final[int] | ||
|
||
@final | ||
class ZstdCompressor: | ||
CONTINUE: Final = 0 | ||
FLUSH_BLOCK: Final = 1 | ||
FLUSH_FRAME: Final = 2 | ||
def __init__( | ||
self, level: int | None = ..., options: _OptionsCompress | None = ..., zstd_dict: ZstdDict | None = ... | ||
) -> None: ... | ||
def compress(self, /, data: ReadableBuffer, mode: Literal[0, 1, 2] = ...) -> bytes: ... | ||
def flush(self, /, mode: Literal[1, 2] = ...) -> bytes: ... | ||
@property | ||
def last_mode(self) -> Literal[0, 1, 2]: ... | ||
|
||
@final | ||
class ZstdDecompressor: | ||
def __init__(self, zstd_dict: ZstdDict | None = ..., options: _OptionsDecompress | None = ...) -> None: ... | ||
def decompress(self, /, data: ReadableBuffer, max_length: int = ...) -> bytes: ... | ||
@property | ||
def eof(self) -> bool: ... | ||
@property | ||
def needs_input(self) -> bool: ... | ||
@property | ||
def unused_data(self) -> bytes: ... | ||
|
||
@final | ||
class ZstdDict: | ||
def __init__(self, dict_content: bytes, /, *, is_raw: bool = ...) -> None: ... | ||
def __len__(self, /) -> int: ... | ||
@property | ||
def as_digested_dict(self) -> tuple[Self, int]: ... | ||
@property | ||
def as_prefix(self) -> tuple[Self, int]: ... | ||
@property | ||
def as_undigested_dict(self) -> tuple[Self, int]: ... | ||
@property | ||
def dict_content(self) -> bytes: ... | ||
@property | ||
def dict_id(self) -> int: ... | ||
|
||
class ZstdError(Exception): ... | ||
|
||
def finalize_dict( | ||
custom_dict_bytes: bytes, samples_bytes: bytes, samples_sizes: tuple[int, ...], dict_size: int, compression_level: int, / | ||
) -> bytes: ... | ||
def get_frame_info(frame_buffer: ReadableBuffer) -> tuple[int, int]: ... | ||
def get_frame_size(frame_buffer: ReadableBuffer) -> int: ... | ||
def get_param_bounds(parameter: int, is_compress: bool) -> tuple[int, int]: ... | ||
def set_parameter_types(c_parameter_type: type[Any], d_parameter_type: type[Any]) -> None: ... | ||
def train_dict(samples_bytes: bytes, samples_sizes: tuple[int, ...], dict_size: int, /) -> bytes: ... | ||
|
||
zstd_version: Final[str] | ||
zstd_version_number: Final[int] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,89 @@ | ||
import enum | ||
from _typeshed import ReadableBuffer | ||
from collections.abc import Iterable, Mapping | ||
from compression.zstd._zstdfile import ZstdFile, open | ||
from typing import Final | ||
from typing_extensions import TypeAlias | ||
|
||
import _zstd | ||
from _zstd import ZstdCompressor, ZstdDecompressor, ZstdDict, ZstdError, get_frame_size, zstd_version | ||
|
||
_OptionsCompress: TypeAlias = Mapping[CompressionParameter, int] | ||
_OptionsDecompress: TypeAlias = Mapping[DecompressionParameter, int] | ||
|
||
__all__ = ( | ||
# compression.zstd | ||
"COMPRESSION_LEVEL_DEFAULT", | ||
"compress", | ||
"CompressionParameter", | ||
"decompress", | ||
"DecompressionParameter", | ||
"finalize_dict", | ||
"get_frame_info", | ||
"Strategy", | ||
"train_dict", | ||
# compression.zstd._zstdfile | ||
"open", | ||
"ZstdFile", | ||
# _zstd | ||
"get_frame_size", | ||
"zstd_version", | ||
"zstd_version_info", | ||
"ZstdCompressor", | ||
"ZstdDecompressor", | ||
"ZstdDict", | ||
"ZstdError", | ||
) | ||
|
||
zstd_version_info: Final[tuple[int, int, int]] | ||
COMPRESSION_LEVEL_DEFAULT: Final[int] | ||
|
||
class FrameInfo: | ||
decompressed_size: int | ||
dictionary_id: int | ||
def __init__(self, decompressed_size: int, dictionary_id: int) -> None: ... | ||
|
||
def get_frame_info(frame_buffer: ReadableBuffer) -> FrameInfo: ... | ||
def train_dict(samples: Iterable[ReadableBuffer], dict_size: int) -> ZstdDict: ... | ||
def finalize_dict(zstd_dict: ZstdDict, /, samples: Iterable[ReadableBuffer], dict_size: int, level: int) -> ZstdDict: ... | ||
def compress( | ||
data: ReadableBuffer, level: int | None = ..., options: _OptionsCompress | None = ..., zstd_dict: ZstdDict | None = ... | ||
) -> bytes: ... | ||
def decompress(data: ReadableBuffer, zstd_dict: ZstdDict | None = ..., options: _OptionsDecompress | None = ...) -> bytes: ... | ||
|
||
class CompressionParameter(enum.IntEnum): | ||
compression_level = _zstd.ZSTD_c_compressionLevel | ||
window_log = _zstd.ZSTD_c_windowLog | ||
hash_log = _zstd.ZSTD_c_hashLog | ||
chain_log = _zstd.ZSTD_c_chainLog | ||
search_log = _zstd.ZSTD_c_searchLog | ||
min_match = _zstd.ZSTD_c_minMatch | ||
target_length = _zstd.ZSTD_c_targetLength | ||
strategy = _zstd.ZSTD_c_strategy | ||
enable_long_distance_matching = _zstd.ZSTD_c_enableLongDistanceMatching | ||
ldm_hash_log = _zstd.ZSTD_c_ldmHashLog | ||
ldm_min_match = _zstd.ZSTD_c_ldmMinMatch | ||
ldm_bucket_size_log = _zstd.ZSTD_c_ldmBucketSizeLog | ||
ldm_hash_rate_log = _zstd.ZSTD_c_ldmHashRateLog | ||
content_size_flag = _zstd.ZSTD_c_contentSizeFlag | ||
checksum_flag = _zstd.ZSTD_c_checksumFlag | ||
dict_id_flag = _zstd.ZSTD_c_dictIDFlag | ||
nb_workers = _zstd.ZSTD_c_nbWorkers | ||
job_size = _zstd.ZSTD_c_jobSize | ||
overlap_log = _zstd.ZSTD_c_overlapLog | ||
def bounds(self) -> tuple[int, int]: ... | ||
|
||
class DecompressionParameter(enum.IntEnum): | ||
window_log_max = _zstd.ZSTD_d_windowLogMax | ||
def bounds(self) -> tuple[int, int]: ... | ||
|
||
class Strategy(enum.IntEnum): | ||
fast = _zstd.ZSTD_fast | ||
dfast = _zstd.ZSTD_dfast | ||
greedy = _zstd.ZSTD_greedy | ||
lazy = _zstd.ZSTD_lazy | ||
lazy2 = _zstd.ZSTD_lazy2 | ||
btlazy2 = _zstd.ZSTD_btlazy2 | ||
btopt = _zstd.ZSTD_btopt | ||
btultra = _zstd.ZSTD_btultra | ||
btultra2 = _zstd.ZSTD_btultra2 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,109 @@ | ||
from _typeshed import ReadableBuffer, StrOrBytesPath, WriteableBuffer | ||
from compression._common import _streams | ||
from compression.zstd import ZstdDict, _OptionsCompress, _OptionsDecompress | ||
from typing import IO, Literal, TextIO, overload | ||
from typing_extensions import TypeAlias | ||
|
||
from _zstd import ZstdCompressor | ||
|
||
__all__ = ("ZstdFile", "open") | ||
|
||
_ReadBinaryMode: TypeAlias = Literal["r", "rb"] | ||
_WriteBinaryMode: TypeAlias = Literal["w", "wb", "x", "xb", "a", "ab"] | ||
_ReadTextMode: TypeAlias = Literal["rt"] | ||
_WriteTextMode: TypeAlias = Literal["wt", "xt", "at"] | ||
_PathOrFileBinary: TypeAlias = StrOrBytesPath | IO[bytes] | ||
_PathOrFileText: TypeAlias = StrOrBytesPath | IO[str] | ||
|
||
class ZstdFile(_streams.BaseStream): | ||
FLUSH_BLOCK = ZstdCompressor.FLUSH_BLOCK | ||
FLUSH_FRAME = ZstdCompressor.FLUSH_FRAME | ||
|
||
@overload | ||
def __init__( | ||
self, | ||
file: _PathOrFileBinary, | ||
/, | ||
mode: _ReadBinaryMode = ..., | ||
*, | ||
level: None = ..., | ||
options: _OptionsDecompress | None = ..., | ||
zstd_dict: ZstdDict | None = ..., | ||
) -> None: ... | ||
@overload | ||
def __init__( | ||
self, | ||
file: _PathOrFileBinary, | ||
/, | ||
mode: _WriteBinaryMode, | ||
*, | ||
level: int | None = ..., | ||
options: _OptionsCompress | None = ..., | ||
zstd_dict: ZstdDict | None = ..., | ||
) -> None: ... | ||
def write(self, data: ReadableBuffer, /) -> int: ... | ||
def flush(self, mode: Literal[1, 2] = ...) -> bytes: ... # type: ignore[override] | ||
def read(self, size: int | None = ...) -> bytes: ... | ||
def read1(self, size: int | None = ...) -> bytes: ... | ||
def readinto(self, b: WriteableBuffer) -> int: ... | ||
def readinto1(self, b: WriteableBuffer) -> int: ... | ||
def readline(self, size: int | None = ...) -> bytes: ... | ||
def seek(self, offset: int, whence: int = ...) -> int: ... | ||
def peek(self, size: int = ...) -> bytes: ... | ||
@property | ||
def name(self) -> str | bytes: ... | ||
@property | ||
def mode(self) -> Literal["rb", "wb"]: ... | ||
|
||
@overload | ||
def open( | ||
file: _PathOrFileBinary, | ||
/, | ||
mode: _ReadBinaryMode = ..., | ||
*, | ||
level: None = ..., | ||
options: _OptionsDecompress | None = ..., | ||
zstd_dict: ZstdDict | None = ..., | ||
encoding: str | None = ..., | ||
errors: str | None = ..., | ||
newline: str | None = ..., | ||
) -> ZstdFile: ... | ||
@overload | ||
def open( | ||
file: _PathOrFileBinary, | ||
/, | ||
mode: _WriteBinaryMode, | ||
*, | ||
level: int | None = ..., | ||
options: _OptionsCompress | None = ..., | ||
zstd_dict: ZstdDict | None = ..., | ||
encoding: str | None = ..., | ||
errors: str | None = ..., | ||
newline: str | None = ..., | ||
) -> ZstdFile: ... | ||
@overload | ||
def open( | ||
file: _PathOrFileText, | ||
/, | ||
mode: _ReadTextMode, | ||
*, | ||
level: None = ..., | ||
options: _OptionsDecompress | None = ..., | ||
zstd_dict: ZstdDict | None = ..., | ||
encoding: str | None = ..., | ||
errors: str | None = ..., | ||
newline: str | None = ..., | ||
) -> TextIO: ... | ||
@overload | ||
def open( | ||
file: _PathOrFileText, | ||
/, | ||
mode: _WriteTextMode, | ||
*, | ||
level: int | None = ..., | ||
options: _OptionsCompress | None = ..., | ||
zstd_dict: ZstdDict | None = ..., | ||
encoding: str | None = ..., | ||
errors: str | None = ..., | ||
newline: str | None = ..., | ||
) -> TextIO: ... |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think it might be useful to express these in terms of the
ZstdCompressor
attributes. It will make it easier to understand I think? If you think it makes the signature too busy then I suppose it is fine as is, but I want discourage passing bare integers.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
(similarly it'd be good to do the same below with the ZstdFile methods)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I believe the below is invalid:
But maybe I can try to do something like the following:
Is it what you meant?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Couldn't you do
CONTINUE | FLUSH_BLOCK | FLUSH_FRAME
?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't think it's possible (playground)
I will go ahead and make the changes of defining the
Literal
once and reusing them afterwards.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Okay, I think that is fine then.