Skip to content

Python 3.14: PEP-784 compression.zstd #14129

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 6 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion stdlib/@tests/stubtest_allowlists/py314.txt
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ compression.gzip.GzipFile.readinto
compression.gzip.GzipFile.readinto1
compression.gzip.GzipFile.readinto1
compression.gzip.compress
compression.zstd
ctypes.memoryview_at
ctypes.py_object.__class_getitem__
ctypes.util.dllist
Expand Down Expand Up @@ -83,6 +82,8 @@ turtle.poly
turtle.save
types.CodeType.co_branches
types.FrameType.f_generator
zipfile.__all__
zipfile.ZIP_ZSTANDARD

# =========================
# New errors in Python 3.14
Expand Down
1 change: 1 addition & 0 deletions stdlib/VERSIONS
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ _warnings: 3.0-
_weakref: 3.0-
_weakrefset: 3.0-
_winapi: 3.3-
_zstd: 3.14-
abc: 3.0-
aifc: 3.0-3.12
annotationlib: 3.14-
Expand Down
89 changes: 89 additions & 0 deletions stdlib/_zstd.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
from _typeshed import ReadableBuffer
from compression.zstd import _OptionsCompress, _OptionsDecompress
from typing import Any, Final, Literal, final
from typing_extensions import Self

ZSTD_CLEVEL_DEFAULT: Final[int]
ZSTD_DStreamOutSize: Final[int]
ZSTD_btlazy2: Final[int]
ZSTD_btopt: Final[int]
ZSTD_btultra: Final[int]
ZSTD_btultra2: Final[int]
ZSTD_c_chainLog: Final[int]
ZSTD_c_checksumFlag: Final[int]
ZSTD_c_compressionLevel: Final[int]
ZSTD_c_contentSizeFlag: Final[int]
ZSTD_c_dictIDFlag: Final[int]
ZSTD_c_enableLongDistanceMatching: Final[int]
ZSTD_c_hashLog: Final[int]
ZSTD_c_jobSize: Final[int]
ZSTD_c_ldmBucketSizeLog: Final[int]
ZSTD_c_ldmHashLog: Final[int]
ZSTD_c_ldmHashRateLog: Final[int]
ZSTD_c_ldmMinMatch: Final[int]
ZSTD_c_minMatch: Final[int]
ZSTD_c_nbWorkers: Final[int]
ZSTD_c_overlapLog: Final[int]
ZSTD_c_searchLog: Final[int]
ZSTD_c_strategy: Final[int]
ZSTD_c_targetLength: Final[int]
ZSTD_c_windowLog: Final[int]
ZSTD_d_windowLogMax: Final[int]
ZSTD_dfast: Final[int]
ZSTD_fast: Final[int]
ZSTD_greedy: Final[int]
ZSTD_lazy: Final[int]
ZSTD_lazy2: Final[int]

@final
class ZstdCompressor:
CONTINUE: Final = 0
FLUSH_BLOCK: Final = 1
FLUSH_FRAME: Final = 2
def __init__(
self, level: int | None = ..., options: _OptionsCompress | None = ..., zstd_dict: ZstdDict | None = ...
) -> None: ...
def compress(self, /, data: ReadableBuffer, mode: Literal[0, 1, 2] = ...) -> bytes: ...
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it might be useful to express these in terms of the ZstdCompressor attributes. It will make it easier to understand I think? If you think it makes the signature too busy then I suppose it is fine as is, but I want discourage passing bare integers.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(similarly it'd be good to do the same below with the ZstdFile methods)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe the below is invalid:

Literal[CONTINUE, FLUSH_BLOCK, FLUSH_FRAME]

But maybe I can try to do something like the following:

_ZstdCompressorContinueType = Literal[0]
_ZstdCompressorFlushBlockType = Literal[1]
_ZstdCompressorFlushFrameType = Literal[2]

@final
class ZstdCompressor:
    CONTINUE: Final[_ZstdCompressorContinueType] = 0
    FLUSH_BLOCK: Final[_ZstdCompressorFlushBlockType] = 1
    FLUSH_FRAME: Final[_ZstdCompressorFlushFrameType] = 2
    def compress(self, /, data: ReadableBuffer, mode: _ZstdCompressorContinueType | _ZstdCompressorFlushBlockType | _ZstdCompressorFlushFrameType = ...) -> bytes: ...

Is it what you meant?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Couldn't you do CONTINUE | FLUSH_BLOCK | FLUSH_FRAME?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think it's possible (playground)

main.py:8: error: Variable "__main__.ZstdCompressor.CONTINUE" is not valid as a type  [valid-type]
main.py:8: note: See https://mypy.readthedocs.io/en/stable/common_issues.html#variables-vs-type-aliases
main.py:8: error: Variable "__main__.ZstdCompressor.FLUSH_BLOCK" is not valid as a type  [valid-type]
main.py:8: error: Variable "__main__.ZstdCompressor.FLUSH_FRAME" is not valid as a type  [valid-type]

I will go ahead and make the changes of defining the Literal once and reusing them afterwards.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Okay, I think that is fine then.

def flush(self, /, mode: Literal[1, 2] = ...) -> bytes: ...
@property
def last_mode(self) -> Literal[0, 1, 2]: ...

@final
class ZstdDecompressor:
def __init__(self, zstd_dict: ZstdDict | None = ..., options: _OptionsDecompress | None = ...) -> None: ...
def decompress(self, /, data: ReadableBuffer, max_length: int = ...) -> bytes: ...
@property
def eof(self) -> bool: ...
@property
def needs_input(self) -> bool: ...
@property
def unused_data(self) -> bytes: ...

@final
class ZstdDict:
def __init__(self, dict_content: bytes, /, *, is_raw: bool = ...) -> None: ...
def __len__(self, /) -> int: ...
@property
def as_digested_dict(self) -> tuple[Self, int]: ...
@property
def as_prefix(self) -> tuple[Self, int]: ...
@property
def as_undigested_dict(self) -> tuple[Self, int]: ...
@property
def dict_content(self) -> bytes: ...
@property
def dict_id(self) -> int: ...

class ZstdError(Exception): ...

def finalize_dict(
custom_dict_bytes: bytes, samples_bytes: bytes, samples_sizes: tuple[int, ...], dict_size: int, compression_level: int, /
) -> bytes: ...
def get_frame_info(frame_buffer: ReadableBuffer) -> tuple[int, int]: ...
def get_frame_size(frame_buffer: ReadableBuffer) -> int: ...
def get_param_bounds(parameter: int, is_compress: bool) -> tuple[int, int]: ...
def set_parameter_types(c_parameter_type: type[Any], d_parameter_type: type[Any]) -> None: ...
def train_dict(samples_bytes: bytes, samples_sizes: tuple[int, ...], dict_size: int, /) -> bytes: ...

zstd_version: Final[str]
zstd_version_number: Final[int]
89 changes: 89 additions & 0 deletions stdlib/compression/zstd/__init__.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
import enum
from _typeshed import ReadableBuffer
from collections.abc import Iterable, Mapping
from compression.zstd._zstdfile import ZstdFile, open
from typing import Final
from typing_extensions import TypeAlias

import _zstd
from _zstd import ZstdCompressor, ZstdDecompressor, ZstdDict, ZstdError, get_frame_size, zstd_version

_OptionsCompress: TypeAlias = Mapping[CompressionParameter, int]
_OptionsDecompress: TypeAlias = Mapping[DecompressionParameter, int]

__all__ = (
# compression.zstd
"COMPRESSION_LEVEL_DEFAULT",
"compress",
"CompressionParameter",
"decompress",
"DecompressionParameter",
"finalize_dict",
"get_frame_info",
"Strategy",
"train_dict",
# compression.zstd._zstdfile
"open",
"ZstdFile",
# _zstd
"get_frame_size",
"zstd_version",
"zstd_version_info",
"ZstdCompressor",
"ZstdDecompressor",
"ZstdDict",
"ZstdError",
)

zstd_version_info: Final[tuple[int, int, int]]
COMPRESSION_LEVEL_DEFAULT: Final[int]

class FrameInfo:
decompressed_size: int
dictionary_id: int
def __init__(self, decompressed_size: int, dictionary_id: int) -> None: ...

def get_frame_info(frame_buffer: ReadableBuffer) -> FrameInfo: ...
def train_dict(samples: Iterable[ReadableBuffer], dict_size: int) -> ZstdDict: ...
def finalize_dict(zstd_dict: ZstdDict, /, samples: Iterable[ReadableBuffer], dict_size: int, level: int) -> ZstdDict: ...
def compress(
data: ReadableBuffer, level: int | None = ..., options: _OptionsCompress | None = ..., zstd_dict: ZstdDict | None = ...
) -> bytes: ...
def decompress(data: ReadableBuffer, zstd_dict: ZstdDict | None = ..., options: _OptionsDecompress | None = ...) -> bytes: ...

class CompressionParameter(enum.IntEnum):
compression_level = _zstd.ZSTD_c_compressionLevel
window_log = _zstd.ZSTD_c_windowLog
hash_log = _zstd.ZSTD_c_hashLog
chain_log = _zstd.ZSTD_c_chainLog
search_log = _zstd.ZSTD_c_searchLog
min_match = _zstd.ZSTD_c_minMatch
target_length = _zstd.ZSTD_c_targetLength
strategy = _zstd.ZSTD_c_strategy
enable_long_distance_matching = _zstd.ZSTD_c_enableLongDistanceMatching
ldm_hash_log = _zstd.ZSTD_c_ldmHashLog
ldm_min_match = _zstd.ZSTD_c_ldmMinMatch
ldm_bucket_size_log = _zstd.ZSTD_c_ldmBucketSizeLog
ldm_hash_rate_log = _zstd.ZSTD_c_ldmHashRateLog
content_size_flag = _zstd.ZSTD_c_contentSizeFlag
checksum_flag = _zstd.ZSTD_c_checksumFlag
dict_id_flag = _zstd.ZSTD_c_dictIDFlag
nb_workers = _zstd.ZSTD_c_nbWorkers
job_size = _zstd.ZSTD_c_jobSize
overlap_log = _zstd.ZSTD_c_overlapLog
def bounds(self) -> tuple[int, int]: ...

class DecompressionParameter(enum.IntEnum):
window_log_max = _zstd.ZSTD_d_windowLogMax
def bounds(self) -> tuple[int, int]: ...

class Strategy(enum.IntEnum):
fast = _zstd.ZSTD_fast
dfast = _zstd.ZSTD_dfast
greedy = _zstd.ZSTD_greedy
lazy = _zstd.ZSTD_lazy
lazy2 = _zstd.ZSTD_lazy2
btlazy2 = _zstd.ZSTD_btlazy2
btopt = _zstd.ZSTD_btopt
btultra = _zstd.ZSTD_btultra
btultra2 = _zstd.ZSTD_btultra2
109 changes: 109 additions & 0 deletions stdlib/compression/zstd/_zstdfile.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
from _typeshed import ReadableBuffer, StrOrBytesPath, WriteableBuffer
from compression._common import _streams
from compression.zstd import ZstdDict, _OptionsCompress, _OptionsDecompress
from typing import IO, Literal, TextIO, overload
from typing_extensions import TypeAlias

from _zstd import ZstdCompressor

__all__ = ("ZstdFile", "open")

_ReadBinaryMode: TypeAlias = Literal["r", "rb"]
_WriteBinaryMode: TypeAlias = Literal["w", "wb", "x", "xb", "a", "ab"]
_ReadTextMode: TypeAlias = Literal["rt"]
_WriteTextMode: TypeAlias = Literal["wt", "xt", "at"]
_PathOrFileBinary: TypeAlias = StrOrBytesPath | IO[bytes]
_PathOrFileText: TypeAlias = StrOrBytesPath | IO[str]

class ZstdFile(_streams.BaseStream):
FLUSH_BLOCK = ZstdCompressor.FLUSH_BLOCK
FLUSH_FRAME = ZstdCompressor.FLUSH_FRAME

@overload
def __init__(
self,
file: _PathOrFileBinary,
/,
mode: _ReadBinaryMode = ...,
*,
level: None = ...,
options: _OptionsDecompress | None = ...,
zstd_dict: ZstdDict | None = ...,
) -> None: ...
@overload
def __init__(
self,
file: _PathOrFileBinary,
/,
mode: _WriteBinaryMode,
*,
level: int | None = ...,
options: _OptionsCompress | None = ...,
zstd_dict: ZstdDict | None = ...,
) -> None: ...
def write(self, data: ReadableBuffer, /) -> int: ...
def flush(self, mode: Literal[1, 2] = ...) -> bytes: ... # type: ignore[override]
def read(self, size: int | None = ...) -> bytes: ...
def read1(self, size: int | None = ...) -> bytes: ...
def readinto(self, b: WriteableBuffer) -> int: ...
def readinto1(self, b: WriteableBuffer) -> int: ...
def readline(self, size: int | None = ...) -> bytes: ...
def seek(self, offset: int, whence: int = ...) -> int: ...
def peek(self, size: int = ...) -> bytes: ...
@property
def name(self) -> str | bytes: ...
@property
def mode(self) -> Literal["rb", "wb"]: ...

@overload
def open(
file: _PathOrFileBinary,
/,
mode: _ReadBinaryMode = ...,
*,
level: None = ...,
options: _OptionsDecompress | None = ...,
zstd_dict: ZstdDict | None = ...,
encoding: str | None = ...,
errors: str | None = ...,
newline: str | None = ...,
) -> ZstdFile: ...
@overload
def open(
file: _PathOrFileBinary,
/,
mode: _WriteBinaryMode,
*,
level: int | None = ...,
options: _OptionsCompress | None = ...,
zstd_dict: ZstdDict | None = ...,
encoding: str | None = ...,
errors: str | None = ...,
newline: str | None = ...,
) -> ZstdFile: ...
@overload
def open(
file: _PathOrFileText,
/,
mode: _ReadTextMode,
*,
level: None = ...,
options: _OptionsDecompress | None = ...,
zstd_dict: ZstdDict | None = ...,
encoding: str | None = ...,
errors: str | None = ...,
newline: str | None = ...,
) -> TextIO: ...
@overload
def open(
file: _PathOrFileText,
/,
mode: _WriteTextMode,
*,
level: int | None = ...,
options: _OptionsCompress | None = ...,
zstd_dict: ZstdDict | None = ...,
encoding: str | None = ...,
errors: str | None = ...,
newline: str | None = ...,
) -> TextIO: ...
Loading