-
Notifications
You must be signed in to change notification settings - Fork 981
Pin Polars version <1.35 #20266
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Pin Polars version <1.35 #20266
Changes from all commits
dfe8df9
0f715c8
df9606c
de6344f
d88e08b
605ab02
b452a20
5b82752
1ac2985
4d37a68
15c559e
3ed2951
af4e355
be68d0c
5288cd6
a1bdbd4
e2dc40b
a0848c1
1023972
bba70a5
291f6ae
cd1b39e
6d78693
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -9,22 +9,30 @@ | |
| from functools import partial | ||
| from typing import TYPE_CHECKING, Any, ClassVar | ||
|
|
||
| import polars as pl | ||
|
|
||
| import pylibcudf as plc | ||
|
|
||
| from cudf_polars.containers import Column | ||
| from cudf_polars.containers import Column, DataType | ||
| from cudf_polars.dsl.expressions.base import ExecutionContext, Expr | ||
| from cudf_polars.dsl.expressions.literal import Literal | ||
|
|
||
| if TYPE_CHECKING: | ||
| from rmm.pylibrmm.stream import Stream | ||
|
|
||
| from cudf_polars.containers import DataFrame, DataType | ||
| from cudf_polars.containers import DataFrame | ||
|
|
||
| __all__ = ["Agg"] | ||
|
|
||
|
|
||
| class Agg(Expr): | ||
| __slots__ = ("context", "name", "op", "options", "request") | ||
| __slots__ = ( | ||
| "context", | ||
| "name", | ||
| "op", | ||
| "options", | ||
| "request", | ||
| ) | ||
| _non_child = ("dtype", "name", "options", "context") | ||
|
|
||
| def __init__( | ||
|
|
@@ -156,22 +164,44 @@ def agg_request(self) -> plc.aggregation.Aggregation: # noqa: D102 | |
| def _reduce( | ||
| self, column: Column, *, request: plc.aggregation.Aggregation, stream: Stream | ||
| ) -> Column: | ||
| if ( | ||
| self.name in {"mean", "median"} | ||
| and plc.traits.is_fixed_point(column.dtype.plc_type) | ||
| and self.dtype.plc_type.id() in {plc.TypeId.FLOAT32, plc.TypeId.FLOAT64} | ||
| is_mean_or_median = self.name in {"mean", "median"} | ||
| is_quantile = self.name == "quantile" | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Minor nit: We could inline this one. |
||
|
|
||
| out_dtype = self.dtype | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So Polars has more nuanced casting rules for different aggregations now than it used to? |
||
| if plc.traits.is_fixed_point(column.dtype.plc_type) and ( | ||
| is_mean_or_median or is_quantile | ||
| ): | ||
| column = column.astype(self.dtype, stream=stream) | ||
| cast_to = ( | ||
| self.dtype | ||
| if is_mean_or_median | ||
| and plc.traits.is_floating_point(self.dtype.plc_type) | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This condition seems wrong to me. If we're already inside an |
||
| else DataType(pl.Float64()) | ||
| ) | ||
| column = column.astype(cast_to, stream=stream) | ||
| out_dtype = cast_to | ||
| if column.size == 0 or column.null_count == column.size: | ||
| res = None | ||
| if self.name == "n_unique": | ||
| res = 0 if column.size == 0 else 1 | ||
| return Column( | ||
| plc.Column.from_scalar( | ||
| plc.Scalar.from_py(res, out_dtype.plc_type, stream=stream), | ||
| 1, | ||
| stream=stream, | ||
| ), | ||
| name=column.name, | ||
| dtype=out_dtype, | ||
| ) | ||
|
Comment on lines
+182
to
+194
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Did Polars not have special handling for all-null columns before? |
||
| return Column( | ||
| plc.Column.from_scalar( | ||
| plc.reduce.reduce( | ||
| column.obj, request, self.dtype.plc_type, stream=stream | ||
| column.obj, request, out_dtype.plc_type, stream=stream | ||
| ), | ||
| 1, | ||
| stream=stream, | ||
| ), | ||
| name=column.name, | ||
| dtype=self.dtype, | ||
| dtype=out_dtype, | ||
| ) | ||
|
|
||
| def _count(self, column: Column, *, include_nulls: bool, stream: Stream) -> Column: | ||
|
|
@@ -199,6 +229,21 @@ def _sum(self, column: Column, stream: Stream) -> Column: | |
| name=column.name, | ||
| dtype=self.dtype, | ||
| ) | ||
| if plc.traits.is_fixed_point(column.dtype.plc_type): | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why are we special-casing here before calling |
||
| return Column( | ||
| plc.Column.from_scalar( | ||
| plc.reduce.reduce( | ||
| column.obj, | ||
| plc.aggregation.sum(), | ||
| column.dtype.plc_type, | ||
| stream=stream, | ||
| ), | ||
| 1, | ||
| stream=stream, | ||
| ), | ||
| name=column.name, | ||
| dtype=column.dtype, | ||
| ) | ||
| return self._reduce(column, request=plc.aggregation.sum(), stream=stream) | ||
|
|
||
| def _min(self, column: Column, *, propagate_nans: bool, stream: Stream) -> Column: | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -8,13 +8,15 @@ | |
|
|
||
| from typing import TYPE_CHECKING, ClassVar | ||
|
|
||
| from polars.polars import _expr_nodes as pl_expr | ||
| from polars import polars | ||
|
|
||
| import pylibcudf as plc | ||
|
|
||
| from cudf_polars.containers import Column | ||
| from cudf_polars.dsl.expressions.base import ExecutionContext, Expr | ||
|
|
||
| pl_expr = polars._expr_nodes | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why this change in the import? AFAICT you just need the same thing below anyway. |
||
|
|
||
| if TYPE_CHECKING: | ||
| from cudf_polars.containers import DataFrame, DataType | ||
|
|
||
|
|
@@ -59,7 +61,9 @@ def __init__( | |
| plc.binaryop.BinaryOperator.LOGICAL_OR: plc.binaryop.BinaryOperator.NULL_LOGICAL_OR, | ||
| } | ||
|
|
||
| _MAPPING: ClassVar[dict[pl_expr.Operator, plc.binaryop.BinaryOperator]] = { | ||
| _MAPPING: ClassVar[ | ||
| dict[polars._expr_nodes.Operator, plc.binaryop.BinaryOperator] | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why isn't this just pl_expr.Operator? |
||
| ] = { | ||
| pl_expr.Operator.Eq: plc.binaryop.BinaryOperator.EQUAL, | ||
| pl_expr.Operator.EqValidity: plc.binaryop.BinaryOperator.NULL_EQUALS, | ||
| pl_expr.Operator.NotEq: plc.binaryop.BinaryOperator.NOT_EQUAL, | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -22,7 +22,7 @@ | |
| from typing_extensions import Self | ||
|
|
||
| import polars.type_aliases as pl_types | ||
| from polars.polars import _expr_nodes as pl_expr | ||
| from polars import polars | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I guess this import change happened everywhere, why? |
||
|
|
||
| from rmm.pylibrmm.stream import Stream | ||
|
|
||
|
|
@@ -55,7 +55,7 @@ class Name(IntEnum): | |
| Not = auto() | ||
|
|
||
| @classmethod | ||
| def from_polars(cls, obj: pl_expr.BooleanFunction) -> Self: | ||
| def from_polars(cls, obj: polars._expr_nodes.BooleanFunction) -> Self: | ||
| """Convert from polars' `BooleanFunction`.""" | ||
| try: | ||
| function, name = str(obj).split(".", maxsplit=1) | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I changed the typing for this reason: https://github.com/rapidsai/cudf/pull/20266/files#r2461036513
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm afraid I can't quite tell what this link is pointing to. It seems like it's going to dsl/ir.py, but the changes there on the old lines 2428-2429 are removing calls to
select_columns, so I'm not sure how this change is relevant.