Skip to content

Commit a862f69

Browse files
authored
New ops (#14)
* Add grouping and chaining operations * Fix linting and tests * Pin poetry version in github test action * Update dependencies * Update dependencies for python 3.13 * Update dependencies for python 3.13 * Remove lower dependency testing * Remove lower dependency testing * Remove lower dependency testing * Bump version to 0.4 * Update dependency testing for python 3.13 * Update dependency testing for python 3.13
1 parent 8d3888d commit a862f69

File tree

11 files changed

+520
-422
lines changed

11 files changed

+520
-422
lines changed

.github/workflows/publish.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ jobs:
2727
- uses: actions/setup-python@v4
2828
with:
2929
python-version: ${{ matrix.python-version }}
30-
- run: curl -sSL https://install.python-poetry.org | python - -y
30+
- run: curl -sSL https://install.python-poetry.org | POETRY_VERSION=1.8.4 python - -y
3131
- run: poetry config virtualenvs.in-project true
3232
- run: make test
3333
- run: make test-dep-versions

.github/workflows/test.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ jobs:
4040
runs-on: ubuntu-latest
4141
strategy:
4242
matrix:
43-
python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
43+
python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
4444
steps:
4545
- uses: actions/checkout@v3
4646

@@ -50,7 +50,7 @@ jobs:
5050
python-version: ${{ matrix.python-version }}
5151

5252
- name: Bootstrap poetry
53-
run: curl -sSL https://install.python-poetry.org | python - -y
53+
run: curl -sSL https://install.python-poetry.org | POETRY_VERSION=1.8.4 python - -y
5454

5555
- name: Configure poetry
5656
run: poetry config virtualenvs.in-project true

Makefile

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,9 +29,6 @@ test: prepare
2929

3030
.PHONY: test-python-versions
3131
test-python-versions:
32-
poetry env use python3.8
33-
make test
34-
3532
poetry env use python3.9
3633
make test
3734

poetry.lock

Lines changed: 362 additions & 373 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

Lines changed: 6 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ build-backend = "poetry.core.masonry.api"
55

66
[tool.poetry]
77
name = "pipedata"
8-
version = "0.3"
8+
version = "0.4"
99
description = "Framework for building pipelines for data processing"
1010
authors = ["Simon Wicks <[email protected]>"]
1111
readme = "README.md"
@@ -22,35 +22,25 @@ classifiers = [
2222
"Programming Language :: Python",
2323
"Programming Language :: Python :: 3",
2424
"Programming Language :: Python :: 3 :: Only",
25-
"Programming Language :: Python :: 3.8",
2625
"Programming Language :: Python :: 3.9",
2726
"Programming Language :: Python :: 3.10",
2827
"Programming Language :: Python :: 3.11",
2928
"Programming Language :: Python :: 3.12",
29+
"Programming Language :: Python :: 3.13",
3030
"Topic :: Software Development :: Libraries :: Python Modules",
3131
"Typing :: Typed",
3232
]
3333
packages = [{include = "pipedata", from = "src"}]
3434

3535
[tool.poetry.dependencies]
36-
python = "^3.8"
36+
python = "^3.9"
3737

3838
[tool.poetry.group.ops.dependencies]
39-
fsspec = [
40-
{ version = ">=0.9.0", python = "<3.12" },
41-
{ version = ">=2022.1.0", python = ">=3.12,<3.13"},
42-
]
39+
fsspec = ">=2022.1.0"
4340
ijson = "^3.0.0"
4441
pyarrow = [
45-
{ version = ">=9.0.0", python = "<3.11" },
46-
{ version = ">=11.0.0", python = ">=3.11,<3.12" },
47-
{ version = ">=14.0.0", python = ">=3.12,<=3.13" },
48-
]
49-
# We don't have a direct numpy dependency, but pyarrow depends on numpy
50-
# and numpy has python version constraints with python 3.12
51-
numpy = [
52-
{ version = "<1.25.0", python = "<3.9" },
53-
{ version = "^1.26.0", python = ">=3.12,<3.13" }
42+
{ version = ">=16.0.0", python = "<3.13" },
43+
{ version = ">=18.0.0", python = ">=3.13" },
5444
]
5545

5646
[tool.poetry.group.lint.dependencies]

scripts/test_dependency_versions.sh

Lines changed: 2 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -8,31 +8,11 @@ main() {
88
PYTHON_MINOR_VERSION=$(poetry run python -c 'import sys; version=sys.version_info[:3]; print("{1}".format(*version))')
99
echo "Python minor version: $PYTHON_MINOR_VERSION"
1010

11-
# The errors are mostly / all installation errors,
12-
# about building from source. Could lower
13-
# the requirements if able to build from source.
14-
if (( $PYTHON_MINOR_VERSION < "11" )); then
15-
poetry run pip install pyarrow==9.0.0
16-
poetry run python -m pytest
17-
18-
poetry run pip install pyarrow==10.0.0
11+
if (( $PYTHON_MINOR_VERSION < "13" )); then
12+
poetry run pip install pyarrow==16.0.0
1913
poetry run python -m pytest
2014
fi
2115

22-
if (( $PYTHON_MINOR_VERSION < "12" )); then
23-
poetry run pip install pyarrow==11.0.0
24-
poetry run python -m pytest
25-
26-
poetry run pip install pyarrow==13.0.0
27-
poetry run python -m pytest
28-
29-
poetry run pip install fsspec==0.9.0
30-
poetry run python -m pytest
31-
fi
32-
33-
poetry run pip install pyarrow==14.0.0
34-
poetry run python -m pytest
35-
3616
poetry run pip install ijson==3.0.0
3717
poetry run python -m pytest
3818

src/pipedata/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
__version__ = "0.3"
1+
__version__ = "0.4"
22

33
__all__ = [
44
"__version__",

src/pipedata/core/links.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,8 @@ def __iter__(self) -> Iterator[TStart]:
2323
def __next__(self) -> TStart:
2424
self._count += 1
2525
try:
26-
return next(self._iterator)
26+
next_value = next(self._iterator)
27+
return next_value
2728
except StopIteration as err:
2829
self._count -= 1
2930
raise StopIteration from err
@@ -45,9 +46,10 @@ def __init__(
4546
def __name__(self) -> str: # noqa: A003
4647
return self._func.__name__
4748

48-
def __call__(self, input_iterator: Iterator[TStart]) -> Iterator[TEnd]:
49+
def __call__(self, input_iterator: Iterator[TStart]) -> CountingIterator[TEnd]:
4950
self._input = CountingIterator(input_iterator)
50-
self._output = CountingIterator(self._func(self._input))
51+
result = self._func(self._input)
52+
self._output = CountingIterator(result)
5153
return self._output
5254

5355
def get_counts(self) -> Tuple[int, int]:

src/pipedata/core/ops.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,3 +46,37 @@ def new_action(previous_step: Iterator[TEnd]) -> Iterator[TOther]:
4646
return (func(elements) for elements in _batched(previous_step, n))
4747

4848
super().__init__(new_action)
49+
50+
51+
class chain_iterables(ChainLink[Iterator[TEnd], TEnd]): # noqa: N801
52+
def __init__(self) -> None:
53+
def chain_iterables_(previous_step: Iterator[Iterator[TEnd]]) -> Iterator[TEnd]:
54+
return itertools.chain.from_iterable(previous_step)
55+
56+
super().__init__(chain_iterables_)
57+
58+
59+
class grouper(ChainLink[TEnd, list[TEnd]]): # noqa: N801
60+
def __init__(
61+
self,
62+
*,
63+
starter: Optional[Callable[[TEnd], bool]] = None,
64+
ender: Optional[Callable[[TEnd], bool]] = None,
65+
) -> None:
66+
def grouper_(previous_step: Iterator[TEnd]) -> Iterator[list[TEnd]]:
67+
group: list[TEnd] = []
68+
for element in previous_step:
69+
if starter is not None and starter(element) and len(group) > 0:
70+
yield group
71+
group = [element]
72+
elif ender is not None and ender(element):
73+
group.append(element)
74+
yield group
75+
group = []
76+
else:
77+
group.append(element)
78+
79+
if len(group) > 0:
80+
yield group
81+
82+
super().__init__(grouper_)

tests/core/test_chain.py

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -226,3 +226,70 @@ def add_values(values: Tuple[int, ...]) -> int:
226226
"outputs": 3,
227227
},
228228
]
229+
230+
231+
def test_chain_iterables() -> None:
232+
# TODO: make typing work with chain_iterables
233+
chain = Chain[int]().then(ops.chain_iterables()) # type: ignore
234+
inputs = iter([iter([0, 1]), iter([2, 3])])
235+
result = list(chain(inputs)) # type: ignore
236+
assert result == [0, 1, 2, 3]
237+
assert chain.get_counts() == [
238+
{
239+
"name": "_identity",
240+
"inputs": 2,
241+
"outputs": 2,
242+
},
243+
{
244+
"name": "chain_iterables_",
245+
"inputs": 2,
246+
"outputs": 4,
247+
},
248+
]
249+
250+
251+
def test_chain_grouper() -> None:
252+
def is_one(val: int) -> bool:
253+
return val == 1
254+
255+
def is_three(val: int) -> bool:
256+
return val == 3 # noqa: PLR2004
257+
258+
chain = Chain[int]().then(ops.grouper(starter=is_one, ender=is_three))
259+
inputs = [1, 2, 3, 4, 1, 2, 3, 4]
260+
result = list(chain(iter(inputs)))
261+
assert result == [[1, 2, 3], [4], [1, 2, 3], [4]]
262+
assert chain.get_counts() == [
263+
{
264+
"name": "_identity",
265+
"inputs": 8,
266+
"outputs": 8,
267+
},
268+
{
269+
"name": "grouper_",
270+
"inputs": 8,
271+
"outputs": 4,
272+
},
273+
]
274+
275+
276+
def test_chain_grouper_no_end() -> None:
277+
def is_four(val: int) -> bool:
278+
return val == 4 # noqa: PLR2004
279+
280+
chain = Chain[int]().then(ops.grouper(ender=is_four))
281+
inputs = [1, 2, 3, 4, 1, 2, 3, 4]
282+
result = list(chain(iter(inputs)))
283+
assert result == [[1, 2, 3, 4], [1, 2, 3, 4]]
284+
assert chain.get_counts() == [
285+
{
286+
"name": "_identity",
287+
"inputs": 8,
288+
"outputs": 8,
289+
},
290+
{
291+
"name": "grouper_",
292+
"inputs": 8,
293+
"outputs": 2,
294+
},
295+
]

0 commit comments

Comments
 (0)