Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
dfe8df9
pin <1.35
Matt711 Oct 15, 2025
0f715c8
Merge branch 'branch-25.12' into imp/polars/bump-polars-1.34
Matt711 Oct 15, 2025
df9606c
adhere to new polars multiplication scale promotion rules
Matt711 Oct 15, 2025
de6344f
pass more upstream polars tests
Matt711 Oct 15, 2025
d88e08b
Merge branch 'branch-25.12' into imp/polars/bump-polars-1.34
Matt711 Oct 15, 2025
605ab02
pass more upstream polars tests
Matt711 Oct 15, 2025
b452a20
merge conflict
Matt711 Oct 15, 2025
5b82752
add missing xfail
Matt711 Oct 16, 2025
1ac2985
Merge branch 'branch-25.12' into imp/polars/bump-polars-1.34
Matt711 Oct 16, 2025
4d37a68
code coverage
Matt711 Oct 16, 2025
15c559e
Merge branch 'branch-25.12' into imp/polars/bump-polars-1.34
Matt711 Oct 16, 2025
3ed2951
code coverage and pass more polars tests
Matt711 Oct 16, 2025
af4e355
Merge branch 'branch-25.12' into imp/polars/bump-polars-1.34
Matt711 Oct 19, 2025
be68d0c
version guard test
Matt711 Oct 19, 2025
5288cd6
Merge branch 'main' into imp/polars/bump-polars-1.34
Matt711 Oct 21, 2025
a1bdbd4
Merge branch 'main' into imp/polars/bump-polars-1.34
Matt711 Oct 22, 2025
e2dc40b
Merge branch 'main' into imp/polars/bump-polars-1.34
Matt711 Oct 23, 2025
a0848c1
Merge branch 'main' into imp/polars/bump-polars-1.34
Matt711 Oct 23, 2025
1023972
clean up
Matt711 Oct 23, 2025
bba70a5
Merge branch 'main' into imp/polars/bump-polars-1.34
Matt711 Oct 24, 2025
291f6ae
Merge branch 'main' into imp/polars/bump-polars-1.34
Matt711 Oct 25, 2025
cd1b39e
merge conflict
Matt711 Oct 29, 2025
6d78693
Merge branch 'main' into imp/polars/bump-polars-1.34
Matt711 Oct 30, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion conda/environments/all_cuda-129_arch-aarch64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ dependencies:
- pandas
- pandas>=2.0,<2.4.0dev0
- pandoc
- polars>=1.29,<1.34
- polars>=1.29,<1.35
- pre-commit
- pyarrow>=15.0.0
- pydata-sphinx-theme>=0.15.4
Expand Down
2 changes: 1 addition & 1 deletion conda/environments/all_cuda-129_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ dependencies:
- pandas
- pandas>=2.0,<2.4.0dev0
- pandoc
- polars>=1.29,<1.34
- polars>=1.29,<1.35
- pre-commit
- pyarrow>=15.0.0
- pydata-sphinx-theme>=0.15.4
Expand Down
2 changes: 1 addition & 1 deletion conda/environments/all_cuda-130_arch-aarch64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ dependencies:
- pandas
- pandas>=2.0,<2.4.0dev0
- pandoc
- polars>=1.29,<1.34
- polars>=1.29,<1.35
- pre-commit
- pyarrow>=15.0.0
- pydata-sphinx-theme>=0.15.4
Expand Down
2 changes: 1 addition & 1 deletion conda/environments/all_cuda-130_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ dependencies:
- pandas
- pandas>=2.0,<2.4.0dev0
- pandoc
- polars>=1.29,<1.34
- polars>=1.29,<1.35
- pre-commit
- pyarrow>=15.0.0
- pydata-sphinx-theme>=0.15.4
Expand Down
2 changes: 1 addition & 1 deletion conda/recipes/cudf-polars/recipe.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ requirements:
- nvidia-ml-py>=12
- python
- pylibcudf =${{ version }}
- polars >=1.29,<1.34
- polars >=1.29,<1.35
- ${{ pin_compatible("cuda-version", upper_bound="x", lower_bound="x") }}
- if: python == "3.10"
then: typing_extensions
Expand Down
2 changes: 1 addition & 1 deletion dependencies.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -738,7 +738,7 @@ dependencies:
# 'nvidia-ml-py' provides the 'pynvml' module
- &nvidia_ml_py nvidia-ml-py>=12
- packaging
- polars>=1.29,<1.34
- polars>=1.29,<1.35
specific:
- output_types: [requirements, pyproject]
matrices:
Expand Down
2 changes: 1 addition & 1 deletion python/cudf_polars/cudf_polars/containers/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -393,7 +393,7 @@ def rename_columns(self, mapping: Mapping[str, str]) -> Self:
stream=self.stream,
)

def select_columns(self, names: Set[str]) -> list[Column]:
def select_columns(self, names: Iterable[str]) -> list[Column]:
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm afraid I can't quite tell what this link is pointing to. It seems like it's going to dsl/ir.py, but the changes there on the old lines 2428-2429 are removing calls to select_columns, so I'm not sure how this change is relevant.

"""Select columns by name."""
return [c for c in self.columns if c.name in names]

Expand Down
65 changes: 55 additions & 10 deletions python/cudf_polars/cudf_polars/dsl/expressions/aggregation.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,22 +9,30 @@
from functools import partial
from typing import TYPE_CHECKING, Any, ClassVar

import polars as pl

import pylibcudf as plc

from cudf_polars.containers import Column
from cudf_polars.containers import Column, DataType
from cudf_polars.dsl.expressions.base import ExecutionContext, Expr
from cudf_polars.dsl.expressions.literal import Literal

if TYPE_CHECKING:
from rmm.pylibrmm.stream import Stream

from cudf_polars.containers import DataFrame, DataType
from cudf_polars.containers import DataFrame

__all__ = ["Agg"]


class Agg(Expr):
__slots__ = ("context", "name", "op", "options", "request")
__slots__ = (
"context",
"name",
"op",
"options",
"request",
)
_non_child = ("dtype", "name", "options", "context")

def __init__(
Expand Down Expand Up @@ -156,22 +164,44 @@ def agg_request(self) -> plc.aggregation.Aggregation: # noqa: D102
def _reduce(
self, column: Column, *, request: plc.aggregation.Aggregation, stream: Stream
) -> Column:
if (
self.name in {"mean", "median"}
and plc.traits.is_fixed_point(column.dtype.plc_type)
and self.dtype.plc_type.id() in {plc.TypeId.FLOAT32, plc.TypeId.FLOAT64}
is_mean_or_median = self.name in {"mean", "median"}
is_quantile = self.name == "quantile"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Minor nit: We could inline this one. is_quantile is only used once.


out_dtype = self.dtype
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So Polars has more nuanced casting rules for different aggregations now than it used to?

if plc.traits.is_fixed_point(column.dtype.plc_type) and (
is_mean_or_median or is_quantile
):
column = column.astype(self.dtype, stream=stream)
cast_to = (
self.dtype
if is_mean_or_median
and plc.traits.is_floating_point(self.dtype.plc_type)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This condition seems wrong to me. If we're already inside an is_fixed_point block then is_floating_point must always be false, right?

else DataType(pl.Float64())
)
column = column.astype(cast_to, stream=stream)
out_dtype = cast_to
if column.size == 0 or column.null_count == column.size:
res = None
if self.name == "n_unique":
res = 0 if column.size == 0 else 1
return Column(
plc.Column.from_scalar(
plc.Scalar.from_py(res, out_dtype.plc_type, stream=stream),
1,
stream=stream,
),
name=column.name,
dtype=out_dtype,
)
Comment on lines +182 to +194
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Did Polars not have special handling for all-null columns before?

return Column(
plc.Column.from_scalar(
plc.reduce.reduce(
column.obj, request, self.dtype.plc_type, stream=stream
column.obj, request, out_dtype.plc_type, stream=stream
),
1,
stream=stream,
),
name=column.name,
dtype=self.dtype,
dtype=out_dtype,
)

def _count(self, column: Column, *, include_nulls: bool, stream: Stream) -> Column:
Expand Down Expand Up @@ -199,6 +229,21 @@ def _sum(self, column: Column, stream: Stream) -> Column:
name=column.name,
dtype=self.dtype,
)
if plc.traits.is_fixed_point(column.dtype.plc_type):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why are we special-casing here before calling _reduce? If we have aggregation-specific logic inside _reduce, we could handle this there. OTOH the better option would be to move all aggregation-specific logic outside of _reduce and maybe add some parameters to it to handle e.g. the output dtypes (for mean/median/quantile).

return Column(
plc.Column.from_scalar(
plc.reduce.reduce(
column.obj,
plc.aggregation.sum(),
column.dtype.plc_type,
stream=stream,
),
1,
stream=stream,
),
name=column.name,
dtype=column.dtype,
)
return self._reduce(column, request=plc.aggregation.sum(), stream=stream)

def _min(self, column: Column, *, propagate_nans: bool, stream: Stream) -> Column:
Expand Down
8 changes: 6 additions & 2 deletions python/cudf_polars/cudf_polars/dsl/expressions/binaryop.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,15 @@

from typing import TYPE_CHECKING, ClassVar

from polars.polars import _expr_nodes as pl_expr
from polars import polars

import pylibcudf as plc

from cudf_polars.containers import Column
from cudf_polars.dsl.expressions.base import ExecutionContext, Expr

pl_expr = polars._expr_nodes
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why this change in the import? AFAICT you just need the same thing below anyway.


if TYPE_CHECKING:
from cudf_polars.containers import DataFrame, DataType

Expand Down Expand Up @@ -59,7 +61,9 @@ def __init__(
plc.binaryop.BinaryOperator.LOGICAL_OR: plc.binaryop.BinaryOperator.NULL_LOGICAL_OR,
}

_MAPPING: ClassVar[dict[pl_expr.Operator, plc.binaryop.BinaryOperator]] = {
_MAPPING: ClassVar[
dict[polars._expr_nodes.Operator, plc.binaryop.BinaryOperator]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why isn't this just pl_expr.Operator?

] = {
pl_expr.Operator.Eq: plc.binaryop.BinaryOperator.EQUAL,
pl_expr.Operator.EqValidity: plc.binaryop.BinaryOperator.NULL_EQUALS,
pl_expr.Operator.NotEq: plc.binaryop.BinaryOperator.NOT_EQUAL,
Expand Down
4 changes: 2 additions & 2 deletions python/cudf_polars/cudf_polars/dsl/expressions/boolean.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
from typing_extensions import Self

import polars.type_aliases as pl_types
from polars.polars import _expr_nodes as pl_expr
from polars import polars
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess this import change happened everywhere, why?


from rmm.pylibrmm.stream import Stream

Expand Down Expand Up @@ -55,7 +55,7 @@ class Name(IntEnum):
Not = auto()

@classmethod
def from_polars(cls, obj: pl_expr.BooleanFunction) -> Self:
def from_polars(cls, obj: polars._expr_nodes.BooleanFunction) -> Self:
"""Convert from polars' `BooleanFunction`."""
try:
function, name = str(obj).split(".", maxsplit=1)
Expand Down
4 changes: 2 additions & 2 deletions python/cudf_polars/cudf_polars/dsl/expressions/datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
if TYPE_CHECKING:
from typing_extensions import Self

from polars.polars import _expr_nodes as pl_expr
from polars import polars

from cudf_polars.containers import DataFrame, DataType

Expand Down Expand Up @@ -75,7 +75,7 @@ class Name(IntEnum):
Year = auto()

@classmethod
def from_polars(cls, obj: pl_expr.TemporalFunction) -> Self:
def from_polars(cls, obj: polars._expr_nodes.TemporalFunction) -> Self:
"""Convert from polars' `TemporalFunction`."""
try:
function, name = str(obj).split(".", maxsplit=1)
Expand Down
8 changes: 3 additions & 5 deletions python/cudf_polars/cudf_polars/dsl/expressions/string.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@
from enum import IntEnum, auto
from typing import TYPE_CHECKING, Any, ClassVar

from polars import polars
from polars.exceptions import InvalidOperationError
from polars.polars import dtype_str_repr

import pylibcudf as plc

Expand All @@ -26,8 +26,6 @@
if TYPE_CHECKING:
from typing_extensions import Self

from polars.polars import _expr_nodes as pl_expr

from cudf_polars.containers import DataFrame, DataType

__all__ = ["StringFunction"]
Expand Down Expand Up @@ -101,7 +99,7 @@ class Name(IntEnum):
ZFill = auto()

@classmethod
def from_polars(cls, obj: pl_expr.StringFunction) -> Self:
def from_polars(cls, obj: polars._expr_nodes.StringFunction) -> Self:
"""Convert from polars' `StringFunction`."""
try:
function, name = str(obj).split(".", maxsplit=1)
Expand Down Expand Up @@ -283,7 +281,7 @@ def _validate_input(self) -> None:
and width.value is not None
and width.value < 0
): # pragma: no cover
dtypestr = dtype_str_repr(width.dtype.polars_type)
dtypestr = polars.dtype_str_repr(width.dtype.polars_type)
raise InvalidOperationError(
f"conversion from `{dtypestr}` to `u64` "
f"failed in column 'literal' for 1 out of "
Expand Down
4 changes: 2 additions & 2 deletions python/cudf_polars/cudf_polars/dsl/expressions/struct.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
if TYPE_CHECKING:
from typing_extensions import Self

from polars.polars import _expr_nodes as pl_expr
from polars import polars

from cudf_polars.containers import DataFrame, DataType

Expand All @@ -42,7 +42,7 @@ class Name(IntEnum):
) # https://github.com/pola-rs/polars/pull/23022#issuecomment-2933910958

@classmethod
def from_polars(cls, obj: pl_expr.StructFunction) -> Self:
def from_polars(cls, obj: polars._expr_nodes.StructFunction) -> Self:
"""Convert from polars' `StructFunction`."""
try:
function, name = str(obj).split(".", maxsplit=1)
Expand Down
2 changes: 1 addition & 1 deletion python/cudf_polars/cudf_polars/dsl/expressions/unary.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ def __init__(
)

if self.name not in UnaryFunction._supported_fns:
raise NotImplementedError(f"Unary function {name=}")
raise NotImplementedError(f"Unary function {name=}") # pragma: no cover
if self.name in UnaryFunction._supported_cum_aggs:
(reverse,) = self.options
if reverse:
Expand Down
Loading
Loading