diff --git a/CHANGELOG.md b/CHANGELOG.md index 86d7315896..25205f48d0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -463,7 +463,7 @@ * Address `read_csv` with both `index_col` and `use_cols` behavior inconsistency with pandas ([#1785](https://github.com/googleapis/python-bigquery-dataframes/issues/1785)) ([ba7c313](https://github.com/googleapis/python-bigquery-dataframes/commit/ba7c313c8d308e3ff3f736b60978cb7a51715209)) * Allow KMeans model init parameter as k-means++ alias ([#1790](https://github.com/googleapis/python-bigquery-dataframes/issues/1790)) ([0b59cf1](https://github.com/googleapis/python-bigquery-dataframes/commit/0b59cf1008613770fa1433c6da395e755c86fe22)) -* Replace function now can handle bpd.NA value. ([#1786](https://github.com/googleapis/python-bigquery-dataframes/issues/1786)) ([7269512](https://github.com/googleapis/python-bigquery-dataframes/commit/7269512a28eb42029447d5380c764353278a74e1)) +* Replace function now can handle pd.NA value. ([#1786](https://github.com/googleapis/python-bigquery-dataframes/issues/1786)) ([7269512](https://github.com/googleapis/python-bigquery-dataframes/commit/7269512a28eb42029447d5380c764353278a74e1)) ### Documentation diff --git a/bigframes/bigquery/_operations/ai.py b/bigframes/bigquery/_operations/ai.py index f4302f8ece..e0af130016 100644 --- a/bigframes/bigquery/_operations/ai.py +++ b/bigframes/bigquery/_operations/ai.py @@ -53,7 +53,6 @@ def generate( >>> import bigframes.pandas as bpd >>> import bigframes.bigquery as bbq - >>> bpd.options.display.progress_bar = None >>> country = bpd.Series(["Japan", "Canada"]) >>> bbq.ai.generate(("What's the capital city of ", country, " one word only")) 0 {'result': 'Tokyo\\n', 'full_response': '{"cand... @@ -155,7 +154,6 @@ def generate_bool( >>> import bigframes.pandas as bpd >>> import bigframes.bigquery as bbq - >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame({ ... "col_1": ["apple", "bear", "pear"], ... "col_2": ["fruit", "animal", "animal"] @@ -240,7 +238,6 @@ def generate_int( >>> import bigframes.pandas as bpd >>> import bigframes.bigquery as bbq - >>> bpd.options.display.progress_bar = None >>> animal = bpd.Series(["Kangaroo", "Rabbit", "Spider"]) >>> bbq.ai.generate_int(("How many legs does a ", animal, " have?")) 0 {'result': 2, 'full_response': '{"candidates":... @@ -322,7 +319,6 @@ def generate_double( >>> import bigframes.pandas as bpd >>> import bigframes.bigquery as bbq - >>> bpd.options.display.progress_bar = None >>> animal = bpd.Series(["Kangaroo", "Rabbit", "Spider"]) >>> bbq.ai.generate_double(("How many legs does a ", animal, " have?")) 0 {'result': 2.0, 'full_response': '{"candidates... @@ -402,7 +398,6 @@ def if_( >>> import bigframes.pandas as bpd >>> import bigframes.bigquery as bbq - >>> bpd.options.display.progress_bar = None >>> us_state = bpd.Series(["Massachusetts", "Illinois", "Hawaii"]) >>> bbq.ai.if_((us_state, " has a city called Springfield")) 0 True @@ -459,7 +454,6 @@ def classify( >>> import bigframes.pandas as bpd >>> import bigframes.bigquery as bbq - >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame({'creature': ['Cat', 'Salmon']}) >>> df['type'] = bbq.ai.classify(df['creature'], ['Mammal', 'Fish']) >>> df @@ -517,7 +511,6 @@ def score( >>> import bigframes.pandas as bpd >>> import bigframes.bigquery as bbq - >>> bpd.options.display.progress_bar = None >>> animal = bpd.Series(["Tiger", "Rabbit", "Blue Whale"]) >>> bbq.ai.score(("Rank the relative weights of ", animal, " on the scale from 1 to 3")) # doctest: +SKIP 0 2.0 diff --git a/bigframes/bigquery/_operations/approx_agg.py b/bigframes/bigquery/_operations/approx_agg.py index 696f8f5a66..73b6fdbb73 100644 --- a/bigframes/bigquery/_operations/approx_agg.py +++ b/bigframes/bigquery/_operations/approx_agg.py @@ -40,7 +40,6 @@ def approx_top_count( >>> import bigframes.pandas as bpd >>> import bigframes.bigquery as bbq - >>> bpd.options.display.progress_bar = None >>> s = bpd.Series(["apple", "apple", "pear", "pear", "pear", "banana"]) >>> bbq.approx_top_count(s, number=2) [{'value': 'pear', 'count': 3}, {'value': 'apple', 'count': 2}] diff --git a/bigframes/bigquery/_operations/array.py b/bigframes/bigquery/_operations/array.py index 4af1416127..6f9dd20b54 100644 --- a/bigframes/bigquery/_operations/array.py +++ b/bigframes/bigquery/_operations/array.py @@ -40,7 +40,6 @@ def array_length(series: series.Series) -> series.Series: >>> import bigframes.pandas as bpd >>> import bigframes.bigquery as bbq - >>> bpd.options.display.progress_bar = None >>> s = bpd.Series([[1, 2, 8, 3], [], [3, 4]]) >>> bbq.array_length(s) @@ -78,8 +77,6 @@ def array_agg( >>> import bigframes.pandas as bpd >>> import bigframes.bigquery as bbq - >>> import numpy as np - >>> bpd.options.display.progress_bar = None For a SeriesGroupBy object: @@ -128,8 +125,6 @@ def array_to_string(series: series.Series, delimiter: str) -> series.Series: >>> import bigframes.pandas as bpd >>> import bigframes.bigquery as bbq - >>> import numpy as np - >>> bpd.options.display.progress_bar = None >>> s = bpd.Series([["H", "i", "!"], ["Hello", "World"], np.nan, [], ["Hi"]]) >>> bbq.array_to_string(s, delimiter=", ") diff --git a/bigframes/bigquery/_operations/datetime.py b/bigframes/bigquery/_operations/datetime.py index f8767336dd..99467beb06 100644 --- a/bigframes/bigquery/_operations/datetime.py +++ b/bigframes/bigquery/_operations/datetime.py @@ -21,10 +21,8 @@ def unix_seconds(input: series.Series) -> series.Series: **Examples:** - >>> import pandas as pd >>> import bigframes.pandas as bpd >>> import bigframes.bigquery as bbq - >>> bpd.options.display.progress_bar = None >>> s = bpd.Series([pd.Timestamp("1970-01-02", tz="UTC"), pd.Timestamp("1970-01-03", tz="UTC")]) >>> bbq.unix_seconds(s) @@ -48,10 +46,8 @@ def unix_millis(input: series.Series) -> series.Series: **Examples:** - >>> import pandas as pd >>> import bigframes.pandas as bpd >>> import bigframes.bigquery as bbq - >>> bpd.options.display.progress_bar = None >>> s = bpd.Series([pd.Timestamp("1970-01-02", tz="UTC"), pd.Timestamp("1970-01-03", tz="UTC")]) >>> bbq.unix_millis(s) @@ -75,10 +71,8 @@ def unix_micros(input: series.Series) -> series.Series: **Examples:** - >>> import pandas as pd >>> import bigframes.pandas as bpd >>> import bigframes.bigquery as bbq - >>> bpd.options.display.progress_bar = None >>> s = bpd.Series([pd.Timestamp("1970-01-02", tz="UTC"), pd.Timestamp("1970-01-03", tz="UTC")]) >>> bbq.unix_micros(s) diff --git a/bigframes/bigquery/_operations/geo.py b/bigframes/bigquery/_operations/geo.py index 9a92a8960d..254d2ae13f 100644 --- a/bigframes/bigquery/_operations/geo.py +++ b/bigframes/bigquery/_operations/geo.py @@ -53,7 +53,6 @@ def st_area( >>> import bigframes.pandas as bpd >>> import bigframes.bigquery as bbq >>> from shapely.geometry import Polygon, LineString, Point - >>> bpd.options.display.progress_bar = None >>> series = bigframes.geopandas.GeoSeries( ... [ @@ -125,7 +124,6 @@ def st_buffer( >>> import bigframes.pandas as bpd >>> import bigframes.bigquery as bbq >>> from shapely.geometry import Point - >>> bpd.options.display.progress_bar = None >>> series = bigframes.geopandas.GeoSeries( ... [ @@ -195,7 +193,6 @@ def st_centroid( >>> import bigframes.pandas as bpd >>> import bigframes.bigquery as bbq >>> from shapely.geometry import Polygon, LineString, Point - >>> bpd.options.display.progress_bar = None >>> series = bigframes.geopandas.GeoSeries( ... [ @@ -250,7 +247,6 @@ def st_convexhull( >>> import bigframes.pandas as bpd >>> import bigframes.bigquery as bbq >>> from shapely.geometry import Polygon, LineString, Point - >>> bpd.options.display.progress_bar = None >>> series = bigframes.geopandas.GeoSeries( ... [ @@ -312,7 +308,6 @@ def st_difference( >>> import bigframes.bigquery as bbq >>> import bigframes.geopandas >>> from shapely.geometry import Polygon, LineString, Point - >>> bpd.options.display.progress_bar = None We can check two GeoSeries against each other, row by row: @@ -407,7 +402,6 @@ def st_distance( >>> import bigframes.bigquery as bbq >>> import bigframes.geopandas >>> from shapely.geometry import Polygon, LineString, Point - >>> bpd.options.display.progress_bar = None We can check two GeoSeries against each other, row by row. @@ -489,7 +483,6 @@ def st_intersection( >>> import bigframes.bigquery as bbq >>> import bigframes.geopandas >>> from shapely.geometry import Polygon, LineString, Point - >>> bpd.options.display.progress_bar = None We can check two GeoSeries against each other, row by row. @@ -583,7 +576,6 @@ def st_isclosed( >>> import bigframes.bigquery as bbq >>> from shapely.geometry import Point, LineString, Polygon - >>> bpd.options.display.progress_bar = None >>> series = bigframes.geopandas.GeoSeries( ... [ @@ -650,7 +642,6 @@ def st_length( >>> import bigframes.bigquery as bbq >>> from shapely.geometry import Polygon, LineString, Point, GeometryCollection - >>> bpd.options.display.progress_bar = None >>> series = bigframes.geopandas.GeoSeries( ... [ diff --git a/bigframes/bigquery/_operations/json.py b/bigframes/bigquery/_operations/json.py index 656e59af0d..4e1f43aab0 100644 --- a/bigframes/bigquery/_operations/json.py +++ b/bigframes/bigquery/_operations/json.py @@ -49,8 +49,6 @@ def json_set( >>> import bigframes.pandas as bpd >>> import bigframes.bigquery as bbq - >>> import numpy as np - >>> bpd.options.display.progress_bar = None >>> s = bpd.read_gbq("SELECT JSON '{\\\"a\\\": 1}' AS data")["data"] >>> bbq.json_set(s, json_path_value_pairs=[("$.a", 100), ("$.b", "hi")]) @@ -101,7 +99,6 @@ def json_extract( >>> import bigframes.pandas as bpd >>> import bigframes.bigquery as bbq - >>> bpd.options.display.progress_bar = None >>> s = bpd.Series(['{"class": {"students": [{"id": 5}, {"id": 12}]}}']) >>> bbq.json_extract(s, json_path="$.class") @@ -141,7 +138,6 @@ def json_extract_array( >>> import bigframes.pandas as bpd >>> import bigframes.bigquery as bbq - >>> bpd.options.display.progress_bar = None >>> s = bpd.Series(['[1, 2, 3]', '[4, 5]']) >>> bbq.json_extract_array(s) @@ -204,7 +200,6 @@ def json_extract_string_array( >>> import bigframes.pandas as bpd >>> import bigframes.bigquery as bbq - >>> bpd.options.display.progress_bar = None >>> s = bpd.Series(['[1, 2, 3]', '[4, 5]']) >>> bbq.json_extract_string_array(s) @@ -272,7 +267,6 @@ def json_query( >>> import bigframes.pandas as bpd >>> import bigframes.bigquery as bbq - >>> bpd.options.display.progress_bar = None >>> s = bpd.Series(['{"class": {"students": [{"id": 5}, {"id": 12}]}}']) >>> bbq.json_query(s, json_path="$.class") @@ -303,7 +297,6 @@ def json_query_array( >>> import bigframes.pandas as bpd >>> import bigframes.bigquery as bbq - >>> bpd.options.display.progress_bar = None >>> s = bpd.Series(['[1, 2, 3]', '[4, 5]']) >>> bbq.json_query_array(s) @@ -355,7 +348,6 @@ def json_value( >>> import bigframes.pandas as bpd >>> import bigframes.bigquery as bbq - >>> bpd.options.display.progress_bar = None >>> s = bpd.Series(['{"name": "Jakob", "age": "6"}', '{"name": "Jakob", "age": []}']) >>> bbq.json_value(s, json_path="$.age") @@ -392,7 +384,6 @@ def json_value_array( >>> import bigframes.pandas as bpd >>> import bigframes.bigquery as bbq - >>> bpd.options.display.progress_bar = None >>> s = bpd.Series(['[1, 2, 3]', '[4, 5]']) >>> bbq.json_value_array(s) @@ -439,7 +430,6 @@ def to_json( >>> import bigframes.pandas as bpd >>> import bigframes.bigquery as bbq - >>> bpd.options.display.progress_bar = None >>> s = bpd.Series([1, 2, 3]) >>> bbq.to_json(s) @@ -473,7 +463,6 @@ def to_json_string( >>> import bigframes.pandas as bpd >>> import bigframes.bigquery as bbq - >>> bpd.options.display.progress_bar = None >>> s = bpd.Series([1, 2, 3]) >>> bbq.to_json_string(s) @@ -512,7 +501,6 @@ def parse_json( >>> import bigframes.pandas as bpd >>> import bigframes.bigquery as bbq - >>> bpd.options.display.progress_bar = None >>> s = bpd.Series(['{"class": {"students": [{"id": 5}, {"id": 12}]}}']) >>> s diff --git a/bigframes/bigquery/_operations/search.py b/bigframes/bigquery/_operations/search.py index c16c2af1a9..b65eed2475 100644 --- a/bigframes/bigquery/_operations/search.py +++ b/bigframes/bigquery/_operations/search.py @@ -111,7 +111,6 @@ def vector_search( >>> import bigframes.pandas as bpd >>> import bigframes.bigquery as bbq - >>> bpd.options.display.progress_bar = None DataFrame embeddings for which to find nearest neighbors. The ``ARRAY`` column is used as the search query: diff --git a/bigframes/bigquery/_operations/sql.py b/bigframes/bigquery/_operations/sql.py index a2de61fc21..295412fd75 100644 --- a/bigframes/bigquery/_operations/sql.py +++ b/bigframes/bigquery/_operations/sql.py @@ -36,9 +36,6 @@ def sql_scalar( >>> import bigframes.pandas as bpd >>> import bigframes.bigquery as bbq - >>> import pandas as pd - >>> import pyarrow as pa - >>> bpd.options.display.progress_bar = None >>> s = bpd.Series(["1.5", "2.5", "3.5"]) >>> s = s.astype(pd.ArrowDtype(pa.decimal128(38, 9))) diff --git a/bigframes/bigquery/_operations/struct.py b/bigframes/bigquery/_operations/struct.py index 7cb826351c..a6304677ef 100644 --- a/bigframes/bigquery/_operations/struct.py +++ b/bigframes/bigquery/_operations/struct.py @@ -39,7 +39,6 @@ def struct(value: dataframe.DataFrame) -> series.Series: >>> import bigframes.pandas as bpd >>> import bigframes.bigquery as bbq >>> import bigframes.series as series - >>> bpd.options.display.progress_bar = None >>> srs = series.Series([{"version": 1, "project": "pandas"}, {"version": 2, "project": "numpy"},]) >>> df = srs.struct.explode() diff --git a/bigframes/core/compile/polars/compiler.py b/bigframes/core/compile/polars/compiler.py index 059ec72076..acaf1b8f22 100644 --- a/bigframes/core/compile/polars/compiler.py +++ b/bigframes/core/compile/polars/compiler.py @@ -538,362 +538,371 @@ def compile_agg_op( f"Aggregate op {op} not yet supported in polars engine." ) + @dataclasses.dataclass(frozen=True) + class PolarsCompiler: + """ + Compiles ArrayValue to polars LazyFrame and executes. + + This feature is in development and is incomplete. + While most node types are supported, this has the following limitations: + 1. GBQ data sources not supported. + 2. Joins do not order rows correctly + 3. Incomplete scalar op support + 4. Incomplete aggregate op support + 5. Incomplete analytic op support + 6. Some complex windowing types not supported (eg. groupby + rolling) + 7. UDFs are not supported. + 8. Returned types may not be entirely consistent with BigQuery backend + 9. Some operations are not entirely lazy - sampling and somse windowing. + """ -@dataclasses.dataclass(frozen=True) -class PolarsCompiler: - """ - Compiles ArrayValue to polars LazyFrame and executes. - - This feature is in development and is incomplete. - While most node types are supported, this has the following limitations: - 1. GBQ data sources not supported. - 2. Joins do not order rows correctly - 3. Incomplete scalar op support - 4. Incomplete aggregate op support - 5. Incomplete analytic op support - 6. Some complex windowing types not supported (eg. groupby + rolling) - 7. UDFs are not supported. - 8. Returned types may not be entirely consistent with BigQuery backend - 9. Some operations are not entirely lazy - sampling and somse windowing. - """ + expr_compiler = PolarsExpressionCompiler() + agg_compiler = PolarsAggregateCompiler() + + def compile(self, plan: nodes.BigFrameNode) -> pl.LazyFrame: + if not polars_installed: + raise ValueError( + "Polars is not installed, cannot compile to polars engine." + ) + + # TODO: Create standard way to configure BFET -> BFET rewrites + # Polars has incomplete slice support in lazy mode + node = plan + node = bigframes.core.rewrite.column_pruning(node) + node = nodes.bottom_up(node, bigframes.core.rewrite.rewrite_slice) + node = bigframes.core.rewrite.pull_out_window_order(node) + node = bigframes.core.rewrite.schema_binding.bind_schema_to_tree(node) + node = lowering.lower_ops_to_polars(node) + return self.compile_node(node) - expr_compiler = PolarsExpressionCompiler() - agg_compiler = PolarsAggregateCompiler() + @functools.singledispatchmethod + def compile_node(self, node: nodes.BigFrameNode) -> pl.LazyFrame: + """Defines transformation but isn't cached, always use compile_node instead""" + raise ValueError(f"Can't compile unrecognized node: {node}") + + @compile_node.register + def compile_readlocal(self, node: nodes.ReadLocalNode): + cols_to_read = { + scan_item.source_id: scan_item.id.sql + for scan_item in node.scan_list.items + } + lazy_frame = cast( + pl.DataFrame, pl.from_arrow(node.local_data_source.data) + ).lazy() + lazy_frame = lazy_frame.select(cols_to_read.keys()).rename(cols_to_read) + if node.offsets_col: + lazy_frame = lazy_frame.with_columns( + [pl.int_range(pl.len(), dtype=pl.Int64).alias(node.offsets_col.sql)] + ) + return lazy_frame + + @compile_node.register + def compile_filter(self, node: nodes.FilterNode): + return self.compile_node(node.child).filter( + self.expr_compiler.compile_expression(node.predicate) + ) - def compile(self, plan: nodes.BigFrameNode) -> pl.LazyFrame: - if not polars_installed: - raise ValueError( - "Polars is not installed, cannot compile to polars engine." + @compile_node.register + def compile_orderby(self, node: nodes.OrderByNode): + frame = self.compile_node(node.child) + if len(node.by) == 0: + # pragma: no cover + return frame + return self._sort(frame, node.by) + + def _sort( + self, frame: pl.LazyFrame, by: Sequence[ordering.OrderingExpression] + ) -> pl.LazyFrame: + sorted = frame.sort( + [ + self.expr_compiler.compile_expression(by.scalar_expression) + for by in by + ], + descending=[not by.direction.is_ascending for by in by], + nulls_last=[by.na_last for by in by], + maintain_order=True, ) + return sorted + + @compile_node.register + def compile_reversed(self, node: nodes.ReversedNode): + return self.compile_node(node.child).reverse() - # TODO: Create standard way to configure BFET -> BFET rewrites - # Polars has incomplete slice support in lazy mode - node = plan - node = bigframes.core.rewrite.column_pruning(node) - node = nodes.bottom_up(node, bigframes.core.rewrite.rewrite_slice) - node = bigframes.core.rewrite.pull_out_window_order(node) - node = bigframes.core.rewrite.schema_binding.bind_schema_to_tree(node) - node = lowering.lower_ops_to_polars(node) - return self.compile_node(node) - - @functools.singledispatchmethod - def compile_node(self, node: nodes.BigFrameNode) -> pl.LazyFrame: - """Defines transformation but isn't cached, always use compile_node instead""" - raise ValueError(f"Can't compile unrecognized node: {node}") - - @compile_node.register - def compile_readlocal(self, node: nodes.ReadLocalNode): - cols_to_read = { - scan_item.source_id: scan_item.id.sql for scan_item in node.scan_list.items - } - lazy_frame = cast( - pl.DataFrame, pl.from_arrow(node.local_data_source.data) - ).lazy() - lazy_frame = lazy_frame.select(cols_to_read.keys()).rename(cols_to_read) - if node.offsets_col: - lazy_frame = lazy_frame.with_columns( - [pl.int_range(pl.len(), dtype=pl.Int64).alias(node.offsets_col.sql)] + @compile_node.register + def compile_selection(self, node: nodes.SelectionNode): + return self.compile_node(node.child).select( + **{new.sql: orig.id.sql for orig, new in node.input_output_pairs} ) - return lazy_frame - - @compile_node.register - def compile_filter(self, node: nodes.FilterNode): - return self.compile_node(node.child).filter( - self.expr_compiler.compile_expression(node.predicate) - ) - - @compile_node.register - def compile_orderby(self, node: nodes.OrderByNode): - frame = self.compile_node(node.child) - if len(node.by) == 0: - # pragma: no cover - return frame - return self._sort(frame, node.by) - - def _sort( - self, frame: pl.LazyFrame, by: Sequence[ordering.OrderingExpression] - ) -> pl.LazyFrame: - sorted = frame.sort( - [self.expr_compiler.compile_expression(by.scalar_expression) for by in by], - descending=[not by.direction.is_ascending for by in by], - nulls_last=[by.na_last for by in by], - maintain_order=True, - ) - return sorted - - @compile_node.register - def compile_reversed(self, node: nodes.ReversedNode): - return self.compile_node(node.child).reverse() - - @compile_node.register - def compile_selection(self, node: nodes.SelectionNode): - return self.compile_node(node.child).select( - **{new.sql: orig.id.sql for orig, new in node.input_output_pairs} - ) - - @compile_node.register - def compile_projection(self, node: nodes.ProjectionNode): - new_cols = [] - for proj_expr, name in node.assignments: - bound_expr = ex.bind_schema_fields(proj_expr, node.child.field_by_id) - new_col = self.expr_compiler.compile_expression(bound_expr).alias(name.sql) - if bound_expr.output_type is None: - new_col = new_col.cast( - _bigframes_dtype_to_polars_dtype(bigframes.dtypes.DEFAULT_DTYPE) + + @compile_node.register + def compile_projection(self, node: nodes.ProjectionNode): + new_cols = [] + for proj_expr, name in node.assignments: + bound_expr = ex.bind_schema_fields(proj_expr, node.child.field_by_id) + new_col = self.expr_compiler.compile_expression(bound_expr).alias( + name.sql ) - new_cols.append(new_col) - return self.compile_node(node.child).with_columns(new_cols) - - @compile_node.register - def compile_offsets(self, node: nodes.PromoteOffsetsNode): - return self.compile_node(node.child).with_columns( - [pl.int_range(pl.len(), dtype=pl.Int64).alias(node.col_id.sql)] - ) - - @compile_node.register - def compile_join(self, node: nodes.JoinNode): - left = self.compile_node(node.left_child) - right = self.compile_node(node.right_child) - - left_on = [] - right_on = [] - for left_ex, right_ex in node.conditions: - left_ex, right_ex = lowering._coerce_comparables(left_ex, right_ex) - left_on.append(self.expr_compiler.compile_expression(left_ex)) - right_on.append(self.expr_compiler.compile_expression(right_ex)) - - if node.type == "right": + if bound_expr.output_type is None: + new_col = new_col.cast( + _bigframes_dtype_to_polars_dtype(bigframes.dtypes.DEFAULT_DTYPE) + ) + new_cols.append(new_col) + return self.compile_node(node.child).with_columns(new_cols) + + @compile_node.register + def compile_offsets(self, node: nodes.PromoteOffsetsNode): + return self.compile_node(node.child).with_columns( + [pl.int_range(pl.len(), dtype=pl.Int64).alias(node.col_id.sql)] + ) + + @compile_node.register + def compile_join(self, node: nodes.JoinNode): + left = self.compile_node(node.left_child) + right = self.compile_node(node.right_child) + + left_on = [] + right_on = [] + for left_ex, right_ex in node.conditions: + left_ex, right_ex = lowering._coerce_comparables(left_ex, right_ex) + left_on.append(self.expr_compiler.compile_expression(left_ex)) + right_on.append(self.expr_compiler.compile_expression(right_ex)) + + if node.type == "right": + return self._ordered_join( + right, left, "left", right_on, left_on, node.joins_nulls + ).select([id.sql for id in node.ids]) return self._ordered_join( - right, left, "left", right_on, left_on, node.joins_nulls - ).select([id.sql for id in node.ids]) - return self._ordered_join( - left, right, node.type, left_on, right_on, node.joins_nulls - ) - - @compile_node.register - def compile_isin(self, node: nodes.InNode): - left = self.compile_node(node.left_child) - right = self.compile_node(node.right_child).unique(node.right_col.id.sql) - right = right.with_columns(pl.lit(True).alias(node.indicator_col.sql)) - - left_ex, right_ex = lowering._coerce_comparables(node.left_col, node.right_col) - - left_pl_ex = self.expr_compiler.compile_expression(left_ex) - right_pl_ex = self.expr_compiler.compile_expression(right_ex) - - joined = left.join( - right, - how="left", - left_on=left_pl_ex, - right_on=right_pl_ex, - # Note: join_nulls renamed to nulls_equal for polars 1.24 - join_nulls=node.joins_nulls, # type: ignore - coalesce=False, - ) - passthrough = [pl.col(id) for id in left.columns] - indicator = pl.col(node.indicator_col.sql).fill_null(False) - return joined.select((*passthrough, indicator)) - - def _ordered_join( - self, - left_frame: pl.LazyFrame, - right_frame: pl.LazyFrame, - how: Literal["inner", "outer", "left", "cross"], - left_on: Sequence[pl.Expr], - right_on: Sequence[pl.Expr], - join_nulls: bool, - ): - if how == "right": - # seems to cause seg faults as of v1.30 for no apparent reason - raise ValueError("right join not supported") - left = left_frame.with_columns( - [ - pl.int_range(pl.len()).alias("_bf_join_l"), - ] - ) - right = right_frame.with_columns( - [ - pl.int_range(pl.len()).alias("_bf_join_r"), - ] - ) - if how != "cross": + left, right, node.type, left_on, right_on, node.joins_nulls + ) + + @compile_node.register + def compile_isin(self, node: nodes.InNode): + left = self.compile_node(node.left_child) + right = self.compile_node(node.right_child).unique(node.right_col.id.sql) + right = right.with_columns(pl.lit(True).alias(node.indicator_col.sql)) + + left_ex, right_ex = lowering._coerce_comparables( + node.left_col, node.right_col + ) + + left_pl_ex = self.expr_compiler.compile_expression(left_ex) + right_pl_ex = self.expr_compiler.compile_expression(right_ex) + joined = left.join( right, - how=how, - left_on=left_on, - right_on=right_on, + how="left", + left_on=left_pl_ex, + right_on=right_pl_ex, # Note: join_nulls renamed to nulls_equal for polars 1.24 - join_nulls=join_nulls, # type: ignore + join_nulls=node.joins_nulls, # type: ignore coalesce=False, ) - else: - joined = left.join(right, how=how, coalesce=False) - - join_order = ( - ["_bf_join_l", "_bf_join_r"] - if how != "right" - else ["_bf_join_r", "_bf_join_l"] - ) - return joined.sort(join_order, nulls_last=True).drop( - ["_bf_join_l", "_bf_join_r"] - ) - - @compile_node.register - def compile_concat(self, node: nodes.ConcatNode): - child_frames = [self.compile_node(child) for child in node.child_nodes] - child_frames = [ - frame.rename( - {col: id.sql for col, id in zip(frame.columns, node.output_ids)} - ).cast( - { - field.id.sql: _bigframes_dtype_to_polars_dtype(field.dtype) - for field in node.fields - } - ) - for frame in child_frames - ] - df = pl.concat(child_frames) - return df - - @compile_node.register - def compile_agg(self, node: nodes.AggregateNode): - df = self.compile_node(node.child) - if node.dropna and len(node.by_column_ids) > 0: - df = df.filter( - [pl.col(ref.id.sql).is_not_null() for ref in node.by_column_ids] + passthrough = [pl.col(id) for id in left.columns] + indicator = pl.col(node.indicator_col.sql).fill_null(False) + return joined.select((*passthrough, indicator)) + + def _ordered_join( + self, + left_frame: pl.LazyFrame, + right_frame: pl.LazyFrame, + how: Literal["inner", "outer", "left", "cross"], + left_on: Sequence[pl.Expr], + right_on: Sequence[pl.Expr], + join_nulls: bool, + ): + if how == "right": + # seems to cause seg faults as of v1.30 for no apparent reason + raise ValueError("right join not supported") + left = left_frame.with_columns( + [ + pl.int_range(pl.len()).alias("_bf_join_l"), + ] ) - if node.order_by: - df = self._sort(df, node.order_by) - return self._aggregate(df, node.aggregations, node.by_column_ids) - - def _aggregate( - self, - df: pl.LazyFrame, - aggregations: Sequence[ - Tuple[agg_expressions.Aggregation, identifiers.ColumnId] - ], - grouping_keys: Tuple[ex.DerefOp, ...], - ) -> pl.LazyFrame: - # Need to materialize columns to broadcast constants - agg_inputs = [ - list( - map( - lambda x: x.alias(guid.generate_guid()), - self.agg_compiler.get_args(agg), - ) + right = right_frame.with_columns( + [ + pl.int_range(pl.len()).alias("_bf_join_r"), + ] ) - for agg, _ in aggregations - ] - - df_agg_inputs = df.with_columns(itertools.chain(*agg_inputs)) - - agg_exprs = [ - self.agg_compiler.compile_agg_op( - agg.op, list(map(lambda x: x.meta.output_name(), inputs)) - ).alias(id.sql) - for (agg, id), inputs in zip(aggregations, agg_inputs) - ] - - if len(grouping_keys) > 0: - group_exprs = [pl.col(ref.id.sql) for ref in grouping_keys] - grouped_df = df_agg_inputs.group_by(group_exprs) - return grouped_df.agg(agg_exprs).sort(group_exprs, nulls_last=True) - else: - return df_agg_inputs.select(agg_exprs) - - @compile_node.register - def compile_explode(self, node: nodes.ExplodeNode): - assert node.offsets_col is None - df = self.compile_node(node.child) - cols = [col.id.sql for col in node.column_ids] - return df.explode(cols) - - @compile_node.register - def compile_sample(self, node: nodes.RandomSampleNode): - df = self.compile_node(node.child) - # Sample is not available on lazyframe - return df.collect().sample(fraction=node.fraction).lazy() - - @compile_node.register - def compile_window(self, node: nodes.WindowOpNode): - df = self.compile_node(node.child) - - window = node.window_spec - # Should have been handled by reweriter - assert len(window.ordering) == 0 - if window.min_periods > 0: - raise NotImplementedError("min_period not yet supported for polars engine") - - if (window.bounds is None) or (window.is_unbounded): - # polars will automatically broadcast the aggregate to the matching input rows - agg_pl = self.agg_compiler.compile_agg_expr(node.expression) - if window.grouping_keys: - agg_pl = agg_pl.over( - self.expr_compiler.compile_expression(key) - for key in window.grouping_keys + if how != "cross": + joined = left.join( + right, + how=how, + left_on=left_on, + right_on=right_on, + # Note: join_nulls renamed to nulls_equal for polars 1.24 + join_nulls=join_nulls, # type: ignore + coalesce=False, ) - result = df.with_columns(agg_pl.alias(node.output_name.sql)) - else: # row-bounded window - window_result = self._calc_row_analytic_func( - df, node.expression, node.window_spec, node.output_name.sql - ) - result = pl.concat([df, window_result], how="horizontal") + else: + joined = left.join(right, how=how, coalesce=False) - # Probably easier just to pull this out as a rewriter - if ( - node.expression.op.skips_nulls - and not node.never_skip_nulls - and node.expression.column_references - ): - nullity_expr = functools.reduce( - operator.or_, - ( - pl.col(column.sql).is_null() - for column in node.expression.column_references - ), + join_order = ( + ["_bf_join_l", "_bf_join_r"] + if how != "right" + else ["_bf_join_r", "_bf_join_l"] ) - result = result.with_columns( - pl.when(nullity_expr) - .then(None) - .otherwise(pl.col(node.output_name.sql)) - .alias(node.output_name.sql) + return joined.sort(join_order, nulls_last=True).drop( + ["_bf_join_l", "_bf_join_r"] ) - return result - - def _calc_row_analytic_func( - self, - frame: pl.LazyFrame, - agg_expr: agg_expressions.Aggregation, - window: window_spec.WindowSpec, - name: str, - ) -> pl.LazyFrame: - if not isinstance(window.bounds, window_spec.RowsWindowBounds): - raise NotImplementedError("Only row bounds supported by polars engine") - groupby = None - if len(window.grouping_keys) > 0: - groupby = [ - self.expr_compiler.compile_expression(ref) - for ref in window.grouping_keys + + @compile_node.register + def compile_concat(self, node: nodes.ConcatNode): + child_frames = [self.compile_node(child) for child in node.child_nodes] + child_frames = [ + frame.rename( + {col: id.sql for col, id in zip(frame.columns, node.output_ids)} + ).cast( + { + field.id.sql: _bigframes_dtype_to_polars_dtype(field.dtype) + for field in node.fields + } + ) + for frame in child_frames + ] + df = pl.concat(child_frames) + return df + + @compile_node.register + def compile_agg(self, node: nodes.AggregateNode): + df = self.compile_node(node.child) + if node.dropna and len(node.by_column_ids) > 0: + df = df.filter( + [pl.col(ref.id.sql).is_not_null() for ref in node.by_column_ids] + ) + if node.order_by: + df = self._sort(df, node.order_by) + return self._aggregate(df, node.aggregations, node.by_column_ids) + + def _aggregate( + self, + df: pl.LazyFrame, + aggregations: Sequence[ + Tuple[agg_expressions.Aggregation, identifiers.ColumnId] + ], + grouping_keys: Tuple[ex.DerefOp, ...], + ) -> pl.LazyFrame: + # Need to materialize columns to broadcast constants + agg_inputs = [ + list( + map( + lambda x: x.alias(guid.generate_guid()), + self.agg_compiler.get_args(agg), + ) + ) + for agg, _ in aggregations ] - # Polars API semi-bounded, and any grouped rolling window challenging - # https://github.com/pola-rs/polars/issues/4799 - # https://github.com/pola-rs/polars/issues/8976 - pl_agg_expr = self.agg_compiler.compile_agg_expr(agg_expr).alias(name) - index_col_name = "_bf_pl_engine_offsets" - indexed_df = frame.with_row_index(index_col_name) - # https://docs.pola.rs/api/python/stable/reference/dataframe/api/polars.DataFrame.rolling.html - period_n, offset_n = _get_period_and_offset(window.bounds) - return ( - indexed_df.rolling( - index_column=index_col_name, - period=f"{period_n}i", - offset=f"{offset_n}i" if (offset_n is not None) else None, - group_by=groupby, + df_agg_inputs = df.with_columns(itertools.chain(*agg_inputs)) + + agg_exprs = [ + self.agg_compiler.compile_agg_op( + agg.op, list(map(lambda x: x.meta.output_name(), inputs)) + ).alias(id.sql) + for (agg, id), inputs in zip(aggregations, agg_inputs) + ] + + if len(grouping_keys) > 0: + group_exprs = [pl.col(ref.id.sql) for ref in grouping_keys] + grouped_df = df_agg_inputs.group_by(group_exprs) + return grouped_df.agg(agg_exprs).sort(group_exprs, nulls_last=True) + else: + return df_agg_inputs.select(agg_exprs) + + @compile_node.register + def compile_explode(self, node: nodes.ExplodeNode): + assert node.offsets_col is None + df = self.compile_node(node.child) + cols = [col.id.sql for col in node.column_ids] + return df.explode(cols) + + @compile_node.register + def compile_sample(self, node: nodes.RandomSampleNode): + df = self.compile_node(node.child) + # Sample is not available on lazyframe + return df.collect().sample(fraction=node.fraction).lazy() + + @compile_node.register + def compile_window(self, node: nodes.WindowOpNode): + df = self.compile_node(node.child) + + window = node.window_spec + # Should have been handled by reweriter + assert len(window.ordering) == 0 + if window.min_periods > 0: + raise NotImplementedError( + "min_period not yet supported for polars engine" + ) + + if (window.bounds is None) or (window.is_unbounded): + # polars will automatically broadcast the aggregate to the matching input rows + agg_pl = self.agg_compiler.compile_agg_expr(node.expression) + if window.grouping_keys: + agg_pl = agg_pl.over( + self.expr_compiler.compile_expression(key) + for key in window.grouping_keys + ) + result = df.with_columns(agg_pl.alias(node.output_name.sql)) + else: # row-bounded window + window_result = self._calc_row_analytic_func( + df, node.expression, node.window_spec, node.output_name.sql + ) + result = pl.concat([df, window_result], how="horizontal") + + # Probably easier just to pull this out as a rewriter + if ( + node.expression.op.skips_nulls + and not node.never_skip_nulls + and node.expression.column_references + ): + nullity_expr = functools.reduce( + operator.or_, + ( + pl.col(column.sql).is_null() + for column in node.expression.column_references + ), + ) + result = result.with_columns( + pl.when(nullity_expr) + .then(None) + .otherwise(pl.col(node.output_name.sql)) + .alias(node.output_name.sql) + ) + return result + + def _calc_row_analytic_func( + self, + frame: pl.LazyFrame, + agg_expr: agg_expressions.Aggregation, + window: window_spec.WindowSpec, + name: str, + ) -> pl.LazyFrame: + if not isinstance(window.bounds, window_spec.RowsWindowBounds): + raise NotImplementedError("Only row bounds supported by polars engine") + groupby = None + if len(window.grouping_keys) > 0: + groupby = [ + self.expr_compiler.compile_expression(ref) + for ref in window.grouping_keys + ] + + # Polars API semi-bounded, and any grouped rolling window challenging + # https://github.com/pola-rs/polars/issues/4799 + # https://github.com/pola-rs/polars/issues/8976 + pl_agg_expr = self.agg_compiler.compile_agg_expr(agg_expr).alias(name) + index_col_name = "_bf_pl_engine_offsets" + indexed_df = frame.with_row_index(index_col_name) + # https://docs.pola.rs/api/python/stable/reference/dataframe/api/polars.DataFrame.rolling.html + period_n, offset_n = _get_period_and_offset(window.bounds) + return ( + indexed_df.rolling( + index_column=index_col_name, + period=f"{period_n}i", + offset=f"{offset_n}i" if (offset_n is not None) else None, + group_by=groupby, + ) + .agg(pl_agg_expr) + .select(name) ) - .agg(pl_agg_expr) - .select(name) - ) def _get_period_and_offset( diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py index bc2bbb963b..ec458cc462 100644 --- a/bigframes/dataframe.py +++ b/bigframes/dataframe.py @@ -1770,8 +1770,6 @@ def to_pandas( **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame({'col': [4, 2, 2]}) Download the data from BigQuery and convert it into an in-memory pandas DataFrame. @@ -1892,8 +1890,6 @@ def to_pandas_batches( **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame({'col': [4, 3, 2, 2, 3]}) Iterate through the results in batches, limiting the total rows yielded @@ -4252,9 +4248,6 @@ def _resample( **Examples:** >>> import bigframes.pandas as bpd - >>> import pandas as pd - >>> bpd.options.display.progress_bar = None - >>> data = { ... "timestamp_col": pd.date_range( ... start="2021-01-01 13:00:00", periods=30, freq="1s" diff --git a/bigframes/ml/compose.py b/bigframes/ml/compose.py index 92c98695cd..54ce7066cb 100644 --- a/bigframes/ml/compose.py +++ b/bigframes/ml/compose.py @@ -69,7 +69,6 @@ class SQLScalarColumnTransformer: >>> from bigframes.ml.compose import ColumnTransformer, SQLScalarColumnTransformer >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame({'name': ["James", None, "Mary"], 'city': ["New York", "Boston", None]}) >>> col_trans = ColumnTransformer([ diff --git a/bigframes/operations/ai.py b/bigframes/operations/ai.py index ac294b0fbd..ad58e8825c 100644 --- a/bigframes/operations/ai.py +++ b/bigframes/operations/ai.py @@ -45,7 +45,6 @@ def filter( **Examples:** >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> bpd.options.experiments.ai_operators = True >>> bpd.options.compute.ai_ops_confirmation_threshold = 25 @@ -115,7 +114,6 @@ def map( **Examples:** >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> bpd.options.experiments.ai_operators = True >>> bpd.options.compute.ai_ops_confirmation_threshold = 25 @@ -134,7 +132,6 @@ def map( >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> bpd.options.experiments.ai_operators = True >>> bpd.options.compute.ai_ops_confirmation_threshold = 25 @@ -266,7 +263,6 @@ def classify( **Examples:** >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> bpd.options.experiments.ai_operators = True >>> bpd.options.compute.ai_ops_confirmation_threshold = 25 @@ -356,7 +352,6 @@ def join( **Examples:** >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> bpd.options.experiments.ai_operators = True >>> bpd.options.compute.ai_ops_confirmation_threshold = 25 @@ -496,7 +491,6 @@ def search( ** Examples: ** >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> import bigframes >>> bigframes.options.experiments.ai_operators = True @@ -608,7 +602,6 @@ def sim_join( ** Examples: ** >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> bpd.options.experiments.ai_operators = True >>> bpd.options.compute.ai_ops_confirmation_threshold = 25 diff --git a/bigframes/operations/semantics.py b/bigframes/operations/semantics.py index 9fa5450748..2266702d47 100644 --- a/bigframes/operations/semantics.py +++ b/bigframes/operations/semantics.py @@ -52,7 +52,6 @@ def agg( **Examples:** >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> bpd.options.experiments.semantic_operators = True >>> bpd.options.compute.semantic_ops_confirmation_threshold = 25 @@ -247,7 +246,6 @@ def cluster_by( **Examples:** >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> bpd.options.experiments.semantic_operators = True >>> bpd.options.compute.semantic_ops_confirmation_threshold = 25 @@ -321,7 +319,6 @@ def filter(self, instruction: str, model, ground_with_google_search: bool = Fals **Examples:** >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> bpd.options.experiments.semantic_operators = True >>> bpd.options.compute.semantic_ops_confirmation_threshold = 25 @@ -435,7 +432,6 @@ def map( **Examples:** >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> bpd.options.experiments.semantic_operators = True >>> bpd.options.compute.semantic_ops_confirmation_threshold = 25 @@ -558,7 +554,6 @@ def join( **Examples:** >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> bpd.options.experiments.semantic_operators = True >>> bpd.options.compute.semantic_ops_confirmation_threshold = 25 @@ -697,7 +692,6 @@ def search( ** Examples: ** >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> import bigframes >>> bigframes.options.experiments.semantic_operators = True @@ -800,7 +794,6 @@ def top_k( **Examples:** >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> bpd.options.experiments.semantic_operators = True >>> bpd.options.compute.semantic_ops_confirmation_threshold = 25 @@ -1001,7 +994,6 @@ def sim_join( ** Examples: ** >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> bpd.options.experiments.semantic_operators = True >>> bpd.options.compute.semantic_ops_confirmation_threshold = 25 diff --git a/bigframes/operations/strings.py b/bigframes/operations/strings.py index 4743483954..efbdd865b0 100644 --- a/bigframes/operations/strings.py +++ b/bigframes/operations/strings.py @@ -68,9 +68,7 @@ def reverse(self) -> series.Series: **Examples:** >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None - - >>> s = bpd.Series(["apple", "banana", "", bpd.NA]) + >>> s = bpd.Series(["apple", "banana", "", pd.NA]) >>> s.str.reverse() 0 elppa 1 ananab diff --git a/bigframes/series.py b/bigframes/series.py index 490298d8dd..642e574627 100644 --- a/bigframes/series.py +++ b/bigframes/series.py @@ -532,8 +532,6 @@ def to_pandas( **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> s = bpd.Series([4, 3, 2]) Download the data from BigQuery and convert it into an in-memory pandas Series. @@ -660,8 +658,6 @@ def to_pandas_batches( **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> s = bpd.Series([4, 3, 2, 2, 3]) Iterate through the results in batches, limiting the total rows yielded @@ -2421,9 +2417,6 @@ def _resample( **Examples:** >>> import bigframes.pandas as bpd - >>> import pandas as pd - >>> bpd.options.display.progress_bar = None - >>> data = { ... "timestamp_col": pd.date_range( ... start="2021-01-01 13:00:00", periods=30, freq="1s" diff --git a/bigframes/session/__init__.py b/bigframes/session/__init__.py index 886072b884..6418f2b78f 100644 --- a/bigframes/session/__init__.py +++ b/bigframes/session/__init__.py @@ -617,11 +617,9 @@ def read_gbq_query( **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None - Simple query input: + >>> import bigframes.pandas as bpd >>> df = bpd.read_gbq_query(''' ... SELECT ... pitcherFirstName, @@ -773,11 +771,9 @@ def read_gbq_table( **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None - Read a whole table, with arbitrary ordering or ordering corresponding to the primary key(s). + >>> import bigframes.pandas as bpd >>> df = bpd.read_gbq_table("bigquery-public-data.ml_datasets.penguins") See also: :meth:`Session.read_gbq`. @@ -852,8 +848,6 @@ def read_gbq_table_streaming( **Examples:** >>> import bigframes.streaming as bst - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> sdf = bst.read_gbq_table("bigquery-public-data.ml_datasets.penguins") @@ -881,11 +875,9 @@ def read_gbq_model(self, model_name: str): **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None - Read an existing BigQuery ML model. + >>> import bigframes.pandas as bpd >>> model_name = "bigframes-dev.bqml_tutorial.penguins_model" >>> model = bpd.read_gbq_model(model_name) @@ -951,9 +943,6 @@ def read_pandas( **Examples:** - >>> import bigframes.pandas as bpd - >>> import pandas as pd - >>> bpd.options.display.progress_bar = None >>> d = {'col1': [1, 2], 'col2': [3, 4]} >>> pandas_df = pd.DataFrame(data=d) @@ -1829,9 +1818,7 @@ def udf( **Examples:** - >>> import bigframes.pandas as bpd >>> import datetime - >>> bpd.options.display.progress_bar = None Turning an arbitrary python function into a BigQuery managed python udf: @@ -1885,7 +1872,7 @@ def udf( You can clean-up the BigQuery functions created above using the BigQuery client from the BigQuery DataFrames session: - >>> session = bpd.get_global_session() + >>> session = bpd.get_global_session() # doctest: +SKIP >>> session.bqclient.delete_routine(minutes_to_hours.bigframes_bigquery_function) # doctest: +SKIP >>> session.bqclient.delete_routine(get_hash.bigframes_bigquery_function) # doctest: +SKIP @@ -1993,12 +1980,10 @@ def read_gbq_function( **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None - Use the [cw_lower_case_ascii_only](https://github.com/GoogleCloudPlatform/bigquery-utils/blob/master/udfs/community/README.md#cw_lower_case_ascii_onlystr-string) function from Community UDFs. + >>> import bigframes.pandas as bpd >>> func = bpd.read_gbq_function("bqutil.fn.cw_lower_case_ascii_only") You can run it on scalar input. Usually you would do so to verify that @@ -2058,13 +2043,13 @@ def read_gbq_function( Another use case is to define your own remote function and use it later. For example, define the remote function: - >>> @bpd.remote_function(cloud_function_service_account="default") + >>> @bpd.remote_function(cloud_function_service_account="default") # doctest: +SKIP ... def tenfold(num: int) -> float: ... return num * 10 Then, read back the deployed BQ remote function: - >>> tenfold_ref = bpd.read_gbq_function( + >>> tenfold_ref = bpd.read_gbq_function( # doctest: +SKIP ... tenfold.bigframes_remote_function, ... ) @@ -2076,7 +2061,7 @@ def read_gbq_function( [2 rows x 3 columns] - >>> df['a'].apply(tenfold_ref) + >>> df['a'].apply(tenfold_ref) # doctest: +SKIP 0 10.0 1 20.0 Name: a, dtype: Float64 @@ -2085,12 +2070,11 @@ def read_gbq_function( note, row processor implies that the function has only one input parameter. - >>> import pandas as pd - >>> @bpd.remote_function(cloud_function_service_account="default") + >>> @bpd.remote_function(cloud_function_service_account="default") # doctest: +SKIP ... def row_sum(s: pd.Series) -> float: ... return s['a'] + s['b'] + s['c'] - >>> row_sum_ref = bpd.read_gbq_function( + >>> row_sum_ref = bpd.read_gbq_function( # doctest: +SKIP ... row_sum.bigframes_remote_function, ... is_row_processor=True, ... ) @@ -2103,7 +2087,7 @@ def read_gbq_function( [2 rows x 3 columns] - >>> df.apply(row_sum_ref, axis=1) + >>> df.apply(row_sum_ref, axis=1) # doctest: +SKIP 0 9.0 1 12.0 dtype: Float64 diff --git a/conftest.py b/conftest.py index 657a59bc0e..bd2053b092 100644 --- a/conftest.py +++ b/conftest.py @@ -20,11 +20,24 @@ import pytest import bigframes._config -import bigframes.pandas as bpd + + +@pytest.fixture(scope="session") +def polars_session_or_bpd(): + # Since the doctest imports fixture is autouse=True, don't skip if polars + # isn't available. + try: + from bigframes.testing import polars_session + + return polars_session.TestSession() + except ImportError: + import bigframes.pandas as bpd + + return bpd @pytest.fixture(autouse=True) -def default_doctest_imports(doctest_namespace): +def default_doctest_imports(doctest_namespace, polars_session_or_bpd): """ Avoid some boilerplate in pandas-inspired tests. @@ -33,5 +46,10 @@ def default_doctest_imports(doctest_namespace): doctest_namespace["np"] = np doctest_namespace["pd"] = pd doctest_namespace["pa"] = pa - doctest_namespace["bpd"] = bpd + doctest_namespace["bpd"] = polars_session_or_bpd bigframes._config.options.display.progress_bar = None + + # TODO(tswast): Consider setting the numpy printoptions here for better + # compatibility across numpy versions. + # https://numpy.org/doc/stable/release/2.0.0-notes.html#representation-of-numpy-scalars-changed + # https://numpy.org/doc/stable/reference/generated/numpy.set_printoptions.html#numpy-set-printoptions diff --git a/samples/polars/requirements.txt b/samples/polars/requirements.txt index a1d8fbcdac..1626982536 100644 --- a/samples/polars/requirements.txt +++ b/samples/polars/requirements.txt @@ -1,3 +1,3 @@ -bigframes==1.11.1 -polars==1.3.0 -pyarrow==15.0.0 +bigframes==2.25.0 +polars==1.24.0 +pyarrow==21.0.0 diff --git a/tests/system/small/operations/test_strings.py b/tests/system/small/operations/test_strings.py index d3e868db59..6cd6309cbb 100644 --- a/tests/system/small/operations/test_strings.py +++ b/tests/system/small/operations/test_strings.py @@ -288,7 +288,7 @@ def test_strip(scalars_dfs): ], ) def test_strip_w_to_strip(to_strip): - s = bpd.Series(["1. Ant. ", "2. Bee!\n", "3. Cat?\t", bpd.NA]) + s = bpd.Series(["1. Ant. ", "2. Bee!\n", "3. Cat?\t", pd.NA]) pd_s = s.to_pandas() bf_result = s.str.strip(to_strip=to_strip).to_pandas() @@ -434,7 +434,7 @@ def test_rstrip(scalars_dfs): ], ) def test_rstrip_w_to_strip(to_strip): - s = bpd.Series(["1. Ant. ", "2. Bee!\n", "3. Cat?\t", bpd.NA]) + s = bpd.Series(["1. Ant. ", "2. Bee!\n", "3. Cat?\t", pd.NA]) pd_s = s.to_pandas() bf_result = s.str.rstrip(to_strip=to_strip).to_pandas() @@ -469,7 +469,7 @@ def test_lstrip(scalars_dfs): ], ) def test_lstrip_w_to_strip(to_strip): - s = bpd.Series(["1. Ant. ", "2. Bee!\n", "3. Cat?\t", bpd.NA]) + s = bpd.Series(["1. Ant. ", "2. Bee!\n", "3. Cat?\t", pd.NA]) pd_s = s.to_pandas() bf_result = s.str.lstrip(to_strip=to_strip).to_pandas() diff --git a/tests/system/small/test_series.py b/tests/system/small/test_series.py index 65b170df32..df538329ce 100644 --- a/tests/system/small/test_series.py +++ b/tests/system/small/test_series.py @@ -1979,7 +1979,10 @@ def test_series_small_repr(scalars_dfs): col_name = "int64_col" bf_series = scalars_df[col_name] pd_series = scalars_pandas_df[col_name] - assert repr(bf_series) == pd_series.to_string(length=False, dtype=True, name=True) + with bigframes.pandas.option_context("display.repr_mode", "head"): + assert repr(bf_series) == pd_series.to_string( + length=False, dtype=True, name=True + ) def test_sum(scalars_dfs): diff --git a/tests/unit/test_pandas.py b/tests/unit/test_pandas.py index 5e75e6b20f..e1e713697d 100644 --- a/tests/unit/test_pandas.py +++ b/tests/unit/test_pandas.py @@ -174,7 +174,7 @@ def test_cut_raises_with_invalid_bins(bins: int, error_message: str): def test_pandas_attribute(): - assert bpd.NA is pd.NA + assert pd.NA is pd.NA assert bpd.BooleanDtype is pd.BooleanDtype assert bpd.Float64Dtype is pd.Float64Dtype assert bpd.Int64Dtype is pd.Int64Dtype diff --git a/tests/unit/test_series_polars.py b/tests/unit/test_series_polars.py index ee4ac245d3..e978ed43da 100644 --- a/tests/unit/test_series_polars.py +++ b/tests/unit/test_series_polars.py @@ -2009,7 +2009,10 @@ def test_series_small_repr(scalars_dfs): col_name = "int64_col" bf_series = scalars_df[col_name] pd_series = scalars_pandas_df[col_name] - assert repr(bf_series) == pd_series.to_string(length=False, dtype=True, name=True) + with bigframes.pandas.option_context("display.repr_mode", "head"): + assert repr(bf_series) == pd_series.to_string( + length=False, dtype=True, name=True + ) def test_sum(scalars_dfs): diff --git a/third_party/bigframes_vendored/geopandas/geoseries.py b/third_party/bigframes_vendored/geopandas/geoseries.py index 92a58b3dc6..20587b4d57 100644 --- a/third_party/bigframes_vendored/geopandas/geoseries.py +++ b/third_party/bigframes_vendored/geopandas/geoseries.py @@ -18,7 +18,6 @@ class GeoSeries: >>> import bigframes.geopandas >>> import bigframes.pandas as bpd >>> from shapely.geometry import Point - >>> bpd.options.display.progress_bar = None >>> s = bigframes.geopandas.GeoSeries([Point(1, 1), Point(2, 2), Point(3, 3)]) >>> s @@ -73,7 +72,6 @@ def x(self) -> bigframes.series.Series: >>> import bigframes.pandas as bpd >>> import geopandas.array >>> import shapely.geometry - >>> bpd.options.display.progress_bar = None >>> series = bpd.Series( ... [shapely.geometry.Point(1, 2), shapely.geometry.Point(2, 3), shapely.geometry.Point(3, 4)], @@ -100,7 +98,6 @@ def y(self) -> bigframes.series.Series: >>> import bigframes.pandas as bpd >>> import geopandas.array >>> import shapely.geometry - >>> bpd.options.display.progress_bar = None >>> series = bpd.Series( ... [shapely.geometry.Point(1, 2), shapely.geometry.Point(2, 3), shapely.geometry.Point(3, 4)], @@ -129,7 +126,6 @@ def boundary(self) -> bigframes.geopandas.GeoSeries: >>> import bigframes.pandas as bpd >>> import geopandas.array >>> import shapely.geometry - >>> bpd.options.display.progress_bar = None >>> from shapely.geometry import Polygon, LineString, Point >>> s = geopandas.GeoSeries( @@ -171,7 +167,6 @@ def from_xy(cls, x, y, index=None, **kwargs) -> bigframes.geopandas.GeoSeries: >>> import bigframes.pandas as bpd >>> import bigframes.geopandas - >>> bpd.options.display.progress_bar = None >>> x = [2.5, 5, -3.0] >>> y = [0.5, 1, 1.5] @@ -210,7 +205,6 @@ def from_wkt(cls, data, index=None) -> bigframes.geopandas.GeoSeries: >>> import bigframes as bpd >>> import bigframes.geopandas - >>> bpd.options.display.progress_bar = None >>> wkts = [ ... 'POINT (1 1)', @@ -246,7 +240,6 @@ def to_wkt(self) -> bigframes.series.Series: >>> import bigframes as bpd >>> import bigframes.geopandas >>> from shapely.geometry import Point - >>> bpd.options.display.progress_bar = None >>> s = bigframes.geopandas.GeoSeries([Point(1, 1), Point(2, 2), Point(3, 3)]) >>> s @@ -279,7 +272,6 @@ def difference(self: GeoSeries, other: GeoSeries) -> GeoSeries: # type: ignore >>> import bigframes as bpd >>> import bigframes.geopandas >>> from shapely.geometry import Polygon, LineString, Point - >>> bpd.options.display.progress_bar = None We can check two GeoSeries against each other, row by row: @@ -411,7 +403,6 @@ def intersection(self: GeoSeries, other: GeoSeries) -> GeoSeries: # type: ignor >>> import bigframes as bpd >>> import bigframes.geopandas >>> from shapely.geometry import Polygon, LineString, Point - >>> bpd.options.display.progress_bar = None We can check two GeoSeries against each other, row by row. diff --git a/third_party/bigframes_vendored/ibis/expr/api.py b/third_party/bigframes_vendored/ibis/expr/api.py index 4ef10e449b..fa09e23b75 100644 --- a/third_party/bigframes_vendored/ibis/expr/api.py +++ b/third_party/bigframes_vendored/ibis/expr/api.py @@ -1532,7 +1532,6 @@ def read_parquet( Examples -------- >>> import ibis - >>> import pandas as pd >>> ibis.options.interactive = True >>> df = pd.DataFrame({"a": [1, 2, 3], "b": list("ghi")}) >>> df @@ -1582,7 +1581,6 @@ def read_delta( Examples -------- >>> import ibis - >>> import pandas as pd >>> ibis.options.interactive = True >>> df = pd.DataFrame({"a": [1, 2, 3], "b": list("ghi")}) >>> df diff --git a/third_party/bigframes_vendored/ibis/expr/datatypes/core.py b/third_party/bigframes_vendored/ibis/expr/datatypes/core.py index eb597cfc6a..4bacebd6d7 100644 --- a/third_party/bigframes_vendored/ibis/expr/datatypes/core.py +++ b/third_party/bigframes_vendored/ibis/expr/datatypes/core.py @@ -62,7 +62,6 @@ def dtype(value: Any, nullable: bool = True) -> DataType: Or other type systems, like numpy/pandas/pyarrow types: - >>> import pyarrow as pa >>> ibis.dtype(pa.int32()) Int32(nullable=True) diff --git a/third_party/bigframes_vendored/ibis/expr/types/arrays.py b/third_party/bigframes_vendored/ibis/expr/types/arrays.py index 72f01334c1..47ae997738 100644 --- a/third_party/bigframes_vendored/ibis/expr/types/arrays.py +++ b/third_party/bigframes_vendored/ibis/expr/types/arrays.py @@ -1008,7 +1008,6 @@ def flatten(self) -> ir.ArrayValue: ... "nulls_only": [None, None, None], ... "mixed_nulls": [[], None, [None]], ... } - >>> import pyarrow as pa >>> t = ibis.memtable( ... pa.Table.from_pydict( ... data, diff --git a/third_party/bigframes_vendored/ibis/expr/types/maps.py b/third_party/bigframes_vendored/ibis/expr/types/maps.py index 881f8327d0..65237decc7 100644 --- a/third_party/bigframes_vendored/ibis/expr/types/maps.py +++ b/third_party/bigframes_vendored/ibis/expr/types/maps.py @@ -35,7 +35,6 @@ class MapValue(Value): -------- >>> import ibis >>> ibis.options.interactive = True - >>> import pyarrow as pa >>> tab = pa.table( ... { ... "m": pa.array( @@ -101,7 +100,6 @@ def get(self, key: ir.Value, default: ir.Value | None = None) -> ir.Value: Examples -------- >>> import ibis - >>> import pyarrow as pa >>> ibis.options.interactive = True >>> tab = pa.table( ... { @@ -167,7 +165,6 @@ def length(self) -> ir.IntegerValue: Examples -------- >>> import ibis - >>> import pyarrow as pa >>> ibis.options.interactive = True >>> tab = pa.table( ... { @@ -224,7 +221,6 @@ def __getitem__(self, key: ir.Value) -> ir.Value: Examples -------- >>> import ibis - >>> import pyarrow as pa >>> ibis.options.interactive = True >>> tab = pa.table( ... { @@ -276,7 +272,6 @@ def contains( Examples -------- >>> import ibis - >>> import pyarrow as pa >>> ibis.options.interactive = True >>> tab = pa.table( ... { @@ -321,7 +316,6 @@ def keys(self) -> ir.ArrayValue: Examples -------- >>> import ibis - >>> import pyarrow as pa >>> ibis.options.interactive = True >>> tab = pa.table( ... { diff --git a/third_party/bigframes_vendored/pandas/core/arrays/arrow/accessors.py b/third_party/bigframes_vendored/pandas/core/arrays/arrow/accessors.py index f4244ab499..94319dbc10 100644 --- a/third_party/bigframes_vendored/pandas/core/arrays/arrow/accessors.py +++ b/third_party/bigframes_vendored/pandas/core/arrays/arrow/accessors.py @@ -19,8 +19,6 @@ def len(self): **Examples:** >>> import bigframes.pandas as bpd - >>> import pyarrow as pa - >>> bpd.options.display.progress_bar = None >>> s = bpd.Series( ... [ ... [1, 2, 3], @@ -45,8 +43,6 @@ def __getitem__(self, key: int | slice): **Examples:** >>> import bigframes.pandas as bpd - >>> import pyarrow as pa - >>> bpd.options.display.progress_bar = None >>> s = bpd.Series( ... [ ... [1, 2, 3], @@ -83,8 +79,6 @@ def field(self, name_or_index: str | int): **Examples:** >>> import bigframes.pandas as bpd - >>> import pyarrow as pa - >>> bpd.options.display.progress_bar = None >>> s = bpd.Series( ... [ ... {"version": 1, "project": "pandas"}, @@ -129,8 +123,6 @@ def explode(self): **Examples:** >>> import bigframes.pandas as bpd - >>> import pyarrow as pa - >>> bpd.options.display.progress_bar = None >>> s = bpd.Series( ... [ ... {"version": 1, "project": "pandas"}, @@ -166,8 +158,6 @@ def dtypes(self): **Examples:** >>> import bigframes.pandas as bpd - >>> import pyarrow as pa - >>> bpd.options.display.progress_bar = None >>> s = bpd.Series( ... [ ... {"version": 1, "project": "pandas"}, @@ -201,8 +191,6 @@ def explode(self, column, *, separator: str = "."): **Examples:** >>> import bigframes.pandas as bpd - >>> import pyarrow as pa - >>> bpd.options.display.progress_bar = None >>> countries = bpd.Series(["cn", "es", "us"]) >>> files = bpd.Series( ... [ diff --git a/third_party/bigframes_vendored/pandas/core/arrays/datetimelike.py b/third_party/bigframes_vendored/pandas/core/arrays/datetimelike.py index 22e946edcd..ace91dad1e 100644 --- a/third_party/bigframes_vendored/pandas/core/arrays/datetimelike.py +++ b/third_party/bigframes_vendored/pandas/core/arrays/datetimelike.py @@ -15,7 +15,6 @@ def strftime(self, date_format: str): **Examples:** >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> s = bpd.to_datetime( ... ['2014-08-15 08:15:12', '2012-02-29 08:15:12+06:00', '2015-08-15 08:15:12+05:00'], @@ -51,9 +50,7 @@ def normalize(self): **Examples:** - >>> import pandas as pd >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> s = bpd.Series(pd.date_range( ... start='2014-08-01 10:00', ... freq='h', @@ -86,9 +83,6 @@ def floor(self, freq: str): **Examples:** - >>> import pandas as pd - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> rng = pd.date_range('1/1/2018 11:59:00', periods=3, freq='min') >>> bpd.Series(rng).dt.floor("h") 0 2018-01-01 11:00:00 diff --git a/third_party/bigframes_vendored/pandas/core/computation/eval.py b/third_party/bigframes_vendored/pandas/core/computation/eval.py index d3d11a9c2a..a1809f6cb3 100644 --- a/third_party/bigframes_vendored/pandas/core/computation/eval.py +++ b/third_party/bigframes_vendored/pandas/core/computation/eval.py @@ -172,8 +172,6 @@ def eval( **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame({"animal": ["dog", "pig"], "age": [10, 20]}) >>> df diff --git a/third_party/bigframes_vendored/pandas/core/config_init.py b/third_party/bigframes_vendored/pandas/core/config_init.py index 3425674e4f..dc2b11ab94 100644 --- a/third_party/bigframes_vendored/pandas/core/config_init.py +++ b/third_party/bigframes_vendored/pandas/core/config_init.py @@ -49,7 +49,6 @@ or just remove it. - >>> bpd.options.display.progress_bar = None Setting to default value "auto" will detect and show progress bar automatically. diff --git a/third_party/bigframes_vendored/pandas/core/frame.py b/third_party/bigframes_vendored/pandas/core/frame.py index 4c0abff545..8a2570d4c6 100644 --- a/third_party/bigframes_vendored/pandas/core/frame.py +++ b/third_party/bigframes_vendored/pandas/core/frame.py @@ -39,8 +39,6 @@ def shape(self) -> tuple[int, int]: **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame({'col1': [1, 2, 3], ... 'col2': [4, 5, 6]}) @@ -63,8 +61,6 @@ def axes(self) -> list: **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame({'col1': [1, 2], 'col2': [3, 4]}) >>> df.axes[1:] @@ -78,8 +74,6 @@ def values(self) -> np.ndarray: **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame({'col1': [1, 2], 'col2': [3, 4]}) >>> df.values @@ -110,8 +104,6 @@ def T(self) -> DataFrame: **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame({'col1': [1, 2], 'col2': [3, 4]}) >>> df col1 col2 @@ -146,8 +138,6 @@ def transpose(self) -> DataFrame: **Square DataFrame with homogeneous dtype** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> d1 = {'col1': [1, 2], 'col2': [3, 4]} >>> df1 = bpd.DataFrame(data=d1) @@ -256,8 +246,6 @@ def select_dtypes(self, include=None, exclude=None) -> DataFrame: **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame({'col1': [1, 2], 'col2': ["hello", "world"], 'col3': [True, False]}) >>> df.select_dtypes(include=['Int64']) @@ -380,8 +368,6 @@ def to_numpy( **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame({'col1': [1, 2], 'col2': [3, 4]}) >>> df.to_numpy() @@ -420,7 +406,6 @@ def to_gbq( **Examples:** >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None Write a DataFrame to a BigQuery table. @@ -530,8 +515,6 @@ def to_parquet( **Examples:** >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None - >>> df = bpd.DataFrame({'col1': [1, 2], 'col2': [3, 4]}) >>> gcs_bucket = "gs://bigframes-dev-testing/sample_parquet*.parquet" >>> df.to_parquet(path=gcs_bucket) @@ -586,8 +569,6 @@ def to_dict( **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame({'col1': [1, 2], 'col2': [3, 4]}) >>> df.to_dict() @@ -666,9 +647,7 @@ def to_excel( **Examples:** - >>> import bigframes.pandas as bpd >>> import tempfile - >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame({'col1': [1, 2], 'col2': [3, 4]}) >>> df.to_excel(tempfile.TemporaryFile()) @@ -703,8 +682,6 @@ def to_latex( **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame({'col1': [1, 2], 'col2': [3, 4]}) >>> print(df.to_latex()) @@ -754,8 +731,6 @@ def to_records( **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame({'col1': [1, 2], 'col2': [3, 4]}) >>> df.to_records() @@ -814,8 +789,6 @@ def to_string( **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame({'col1': [1, 2], 'col2': [3, 4]}) >>> print(df.to_string()) @@ -914,8 +887,6 @@ def to_html( **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame({'col1': [1, 2], 'col2': [3, 4]}) >>> print(df.to_html()) @@ -1024,8 +995,6 @@ def to_markdown( **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame({'col1': [1, 2], 'col2': [3, 4]}) >>> print(df.to_markdown()) @@ -1058,8 +1027,6 @@ def to_pickle(self, path, *, allow_large_results, **kwargs) -> None: **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame({'col1': [1, 2], 'col2': [3, 4]}) >>> gcs_bucket = "gs://bigframes-dev-testing/sample_pickle_gcs.pkl" @@ -1080,8 +1047,6 @@ def to_orc(self, path=None, *, allow_large_results=None, **kwargs) -> bytes | No **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame({'col1': [1, 2], 'col2': [3, 4]}) >>> import tempfile @@ -1190,8 +1155,6 @@ def insert(self, loc, column, value, allow_duplicates=False): **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame({'col1': [1, 2], 'col2': [3, 4]}) @@ -1243,8 +1206,6 @@ def drop( **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame(np.arange(12).reshape(3, 4), ... columns=['A', 'B', 'C', 'D']) @@ -1284,7 +1245,6 @@ def drop( Drop columns and/or rows of MultiIndex DataFrame: - >>> import pandas as pd >>> midx = pd.MultiIndex(levels=[['llama', 'cow', 'falcon'], ... ['speed', 'weight', 'length']], ... codes=[[0, 0, 0, 1, 1, 1, 2, 2, 2], @@ -1402,8 +1362,6 @@ def rename( **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) >>> df @@ -1474,8 +1432,6 @@ def set_index( **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame({'month': [1, 4, 7, 10], ... 'year': [2012, 2014, 2013, 2014], @@ -1616,10 +1572,7 @@ def reset_index( **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None - >>> import numpy as np >>> df = bpd.DataFrame([('bird', 389.0), ... ('bird', 24.0), ... ('mammal', 80.5), @@ -1659,7 +1612,6 @@ class max_speed You can also use ``reset_index`` with ``MultiIndex``. - >>> import pandas as pd >>> index = pd.MultiIndex.from_tuples([('bird', 'falcon'), ... ('bird', 'parrot'), ... ('mammal', 'lion'), @@ -1795,12 +1747,10 @@ def dropna( **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame({"name": ['Alfred', 'Batman', 'Catwoman'], ... "toy": [np.nan, 'Batmobile', 'Bullwhip'], - ... "born": [bpd.NA, "1940-04-25", bpd.NA]}) + ... "born": [pd.NA, "1940-04-25", pd.NA]}) >>> df name toy born 0 Alfred @@ -1908,8 +1858,6 @@ def isin(self, values): **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame({'num_legs': [2, 4], 'num_wings': [2, 0]}, ... index=['falcon', 'dog']) @@ -1964,8 +1912,6 @@ def keys(self): **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame({ ... 'A': [1, 2, 3], @@ -1985,8 +1931,6 @@ def iterrows(self): **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame({ ... 'A': [1, 2, 3], ... 'B': [4, 5, 6], @@ -2011,8 +1955,6 @@ def itertuples(self, index: bool = True, name: str | None = "Pandas"): **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame({ ... 'A': [1, 2, 3], ... 'B': [4, 5, 6], @@ -2044,8 +1986,6 @@ def items(self): **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame({'species': ['bear', 'bear', 'marsupial'], ... 'population': [1864, 22000, 80000]}, @@ -2085,8 +2025,6 @@ def where(self, cond, other): **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame({'a': [20, 10, 0], 'b': [0, 10, 20]}) >>> df @@ -2177,8 +2115,6 @@ def mask(self, cond, other): **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame({'a': [20, 10, 0], 'b': [0, 10, 20]}) >>> df @@ -2280,11 +2216,9 @@ def sort_values( **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame({ - ... 'col1': ['A', 'A', 'B', bpd.NA, 'D', 'C'], + ... 'col1': ['A', 'A', 'B', pd.NA, 'D', 'C'], ... 'col2': [2, 1, 9, 8, 7, 4], ... 'col3': [0, 1, 9, 4, 2, 3], ... 'col4': ['a', 'B', 'c', 'D', 'e', 'F'] @@ -2424,8 +2358,6 @@ def eq(self, other, axis: str | int = "columns") -> DataFrame: **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None You can use method name: @@ -2467,8 +2399,6 @@ def __eq__(self, other): **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame({ ... 'a': [0, 3, 4], @@ -2498,8 +2428,6 @@ def __invert__(self) -> DataFrame: **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame({'a':[True, False, True], 'b':[-1, 0, 1]}) >>> ~df @@ -2527,8 +2455,6 @@ def ne(self, other, axis: str | int = "columns") -> DataFrame: **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None You can use method name: @@ -2569,8 +2495,6 @@ def __ne__(self, other): **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame({ ... 'a': [0, 3, 4], @@ -2609,8 +2533,6 @@ def le(self, other, axis: str | int = "columns") -> DataFrame: **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None You can use method name: @@ -2652,8 +2574,6 @@ def __le__(self, other): **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame({ ... 'a': [0, -1, 1], @@ -2692,8 +2612,6 @@ def lt(self, other, axis: str | int = "columns") -> DataFrame: **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None You can use method name: @@ -2735,8 +2653,6 @@ def __lt__(self, other): **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame({ ... 'a': [0, -1, 1], @@ -2775,8 +2691,6 @@ def ge(self, other, axis: str | int = "columns") -> DataFrame: **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None You can use method name: @@ -2818,8 +2732,6 @@ def __ge__(self, other): **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame({ ... 'a': [0, -1, 1], @@ -2858,8 +2770,6 @@ def gt(self, other, axis: str | int = "columns") -> DataFrame: **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame({'angles': [0, 3, 4], ... 'degrees': [360, 180, 360]}, @@ -2899,8 +2809,6 @@ def __gt__(self, other): **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame({ ... 'a': [0, -1, 1], @@ -2936,8 +2844,6 @@ def add(self, other, axis: str | int = "columns") -> DataFrame: **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame({ ... 'A': [1, 2, 3], @@ -2980,8 +2886,6 @@ def __add__(self, other) -> DataFrame: **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame({ ... 'height': [1.5, 2.6], @@ -3055,8 +2959,6 @@ def radd(self, other, axis: str | int = "columns") -> DataFrame: **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame({ ... 'A': [1, 2, 3], @@ -3118,8 +3020,6 @@ def sub(self, other, axis: str | int = "columns") -> DataFrame: **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame({ ... 'A': [1, 2, 3], @@ -3162,8 +3062,6 @@ def __sub__(self, other): **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None You can subtract a scalar: @@ -3210,8 +3108,6 @@ def rsub(self, other, axis: str | int = "columns") -> DataFrame: **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame({ ... 'A': [1, 2, 3], @@ -3271,8 +3167,6 @@ def mul(self, other, axis: str | int = "columns") -> DataFrame: **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame({ ... 'A': [1, 2, 3], @@ -3315,8 +3209,6 @@ def __mul__(self, other): **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None You can multiply with a scalar: @@ -3363,8 +3255,6 @@ def rmul(self, other, axis: str | int = "columns") -> DataFrame: **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame({ ... 'A': [1, 2, 3], @@ -3407,8 +3297,6 @@ def __rmul__(self, other): **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None You can multiply with a scalar: @@ -3455,8 +3343,6 @@ def truediv(self, other, axis: str | int = "columns") -> DataFrame: **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame({ ... 'A': [1, 2, 3], @@ -3499,8 +3385,6 @@ def __truediv__(self, other): **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None You can multiply with a scalar: @@ -3547,8 +3431,6 @@ def rtruediv(self, other, axis: str | int = "columns") -> DataFrame: **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame({ ... 'A': [1, 2, 3], @@ -3608,8 +3490,6 @@ def floordiv(self, other, axis: str | int = "columns") -> DataFrame: **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame({ ... 'A': [1, 2, 3], @@ -3652,8 +3532,6 @@ def __floordiv__(self, other): **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None You can divide by a scalar: @@ -3700,8 +3578,6 @@ def rfloordiv(self, other, axis: str | int = "columns") -> DataFrame: **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame({ ... 'A': [1, 2, 3], @@ -3761,8 +3637,6 @@ def mod(self, other, axis: str | int = "columns") -> DataFrame: **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame({ ... 'A': [1, 2, 3], @@ -3805,8 +3679,6 @@ def __mod__(self, other): **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None You can modulo with a scalar: @@ -3853,8 +3725,6 @@ def rmod(self, other, axis: str | int = "columns") -> DataFrame: **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame({ ... 'A': [1, 2, 3], @@ -3915,8 +3785,6 @@ def pow(self, other, axis: str | int = "columns") -> DataFrame: **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame({ ... 'A': [1, 2, 3], @@ -3960,8 +3828,6 @@ def __pow__(self, other): **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None You can exponentiate with a scalar: @@ -4009,8 +3875,6 @@ def rpow(self, other, axis: str | int = "columns") -> DataFrame: **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame({ ... 'A': [1, 2, 3], @@ -4105,8 +3969,6 @@ def combine( **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> df1 = bpd.DataFrame({'A': [0, 0], 'B': [4, 4]}) >>> df2 = bpd.DataFrame({'A': [1, 1], 'B': [3, 3]}) @@ -4155,8 +4017,6 @@ def combine_first(self, other) -> DataFrame: **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> df1 = bpd.DataFrame({'A': [None, 0], 'B': [None, 4]}) >>> df2 = bpd.DataFrame({'A': [1, 1], 'B': [3, 3]}) @@ -4185,9 +4045,6 @@ def explode( **Examples:** - >>> import bigframes.pandas as bpd - >>> import numpy as np - >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame({'A': [[0, 1, 2], [], [], [3, 4]], ... 'B': 1, @@ -4244,8 +4101,6 @@ def corr(self, method, min_periods, numeric_only) -> DataFrame: **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame({'A': [1, 2, 3], ... 'B': [400, 500, 600], @@ -4278,8 +4133,6 @@ def cov(self, *, numeric_only) -> DataFrame: **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame({'A': [1, 2, 3], ... 'B': [400, 500, 600], @@ -4317,8 +4170,6 @@ def corrwith( **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> index = ["a", "b", "c", "d", "e"] >>> columns = ["one", "two", "three", "four"] @@ -4353,8 +4204,6 @@ def update( **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame({'A': [1, 2, 3], ... 'B': [400, 500, 600]}) @@ -4418,8 +4267,6 @@ def groupby( **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame({'Animal': ['Falcon', 'Falcon', ... 'Parrot', 'Parrot'], @@ -4515,15 +4362,12 @@ def map(self, func, na_action: Optional[str] = None) -> DataFrame: **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None - Let's use ``reuse=False`` flag to make sure a new ``remote_function`` is created every time we run the following code, but you can skip it to potentially reuse a previously deployed ``remote_function`` from the same user defined function. - >>> @bpd.remote_function(reuse=False, cloud_function_service_account="default") + >>> @bpd.remote_function(reuse=False, cloud_function_service_account="default") # doctest: +SKIP ... def minutes_to_hours(x: int) -> float: ... return x/60 @@ -4540,8 +4384,8 @@ def map(self, func, na_action: Optional[str] = None) -> DataFrame: [5 rows x 2 columns] - >>> df_hours = df_minutes.map(minutes_to_hours) - >>> df_hours + >>> df_hours = df_minutes.map(minutes_to_hours) # doctest: +SKIP + >>> df_hours # doctest: +SKIP system_minutes user_minutes 0 0.0 0.0 1 0.5 0.25 @@ -4557,11 +4401,11 @@ def map(self, func, na_action: Optional[str] = None) -> DataFrame: >>> df_minutes = bpd.DataFrame( ... { - ... "system_minutes" : [0, 30, 60, None, 90, 120, bpd.NA], - ... "user_minutes" : [0, 15, 75, 90, 6, None, bpd.NA] + ... "system_minutes" : [0, 30, 60, None, 90, 120, pd.NA], + ... "user_minutes" : [0, 15, 75, 90, 6, None, pd.NA] ... }, dtype="Int64") - >>> df_hours = df_minutes.map(minutes_to_hours, na_action='ignore') - >>> df_hours + >>> df_hours = df_minutes.map(minutes_to_hours, na_action='ignore') # doctest: +SKIP + >>> df_hours # doctest: +SKIP system_minutes user_minutes 0 0.0 0.0 1 0.5 0.25 @@ -4612,8 +4456,6 @@ def join( **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None Join two DataFrames by specifying how to handle the operation: @@ -4764,8 +4606,6 @@ def merge( **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None Merge DataFrames df1 and df2 by specifying type of merge: @@ -4897,7 +4737,6 @@ def round(self, decimals): **Examples:** >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame([(.21, .32), (.01, .67), (.66, .03), (.21, .18)], ... columns=['dogs', 'cats']) >>> df @@ -4980,9 +4819,6 @@ def apply(self, func, *, axis=0, args=(), **kwargs): **Examples:** - >>> import bigframes.pandas as bpd - >>> import pandas as pd - >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame({'col1': [1, 2], 'col2': [3, 4]}) >>> df @@ -5008,14 +4844,14 @@ def apply(self, func, *, axis=0, args=(), **kwargs): to select only the necessary columns before calling `apply()`. Note: This feature is currently in **preview**. - >>> @bpd.remote_function(reuse=False, cloud_function_service_account="default") + >>> @bpd.remote_function(reuse=False, cloud_function_service_account="default") # doctest: +SKIP ... def foo(row: pd.Series) -> int: ... result = 1 ... result += row["col1"] ... result += row["col2"]*row["col2"] ... return result - >>> df[["col1", "col2"]].apply(foo, axis=1) + >>> df[["col1", "col2"]].apply(foo, axis=1) # doctest: +SKIP 0 11 1 19 dtype: Int64 @@ -5023,7 +4859,7 @@ def apply(self, func, *, axis=0, args=(), **kwargs): You could return an array output for every input row from the remote function. - >>> @bpd.remote_function(reuse=False, cloud_function_service_account="default") + >>> @bpd.remote_function(reuse=False, cloud_function_service_account="default") # doctest: +SKIP ... def marks_analyzer(marks: pd.Series) -> list[float]: ... import statistics ... average = marks.mean() @@ -5040,8 +4876,8 @@ def apply(self, func, *, axis=0, args=(), **kwargs): ... "chemistry": [88, 56, 72], ... "algebra": [78, 91, 79] ... }, index=["Alice", "Bob", "Charlie"]) - >>> stats = df.apply(marks_analyzer, axis=1) - >>> stats + >>> stats = df.apply(marks_analyzer, axis=1) # doctest: +SKIP + >>> stats # doctest: +SKIP Alice [77.67 78. 77.19 76.71] Bob [75.67 80. 74.15 72.56] Charlie [75.33 75. 75.28 75.22] @@ -5064,14 +4900,14 @@ def apply(self, func, *, axis=0, args=(), **kwargs): [2 rows x 3 columns] - >>> @bpd.remote_function(reuse=False, cloud_function_service_account="default") + >>> @bpd.remote_function(reuse=False, cloud_function_service_account="default") # doctest: +SKIP ... def foo(x: int, y: int, z: int) -> float: ... result = 1 ... result += x ... result += y/z ... return result - >>> df.apply(foo, axis=1) + >>> df.apply(foo, axis=1) # doctest: +SKIP 0 2.6 1 3.8 dtype: Float64 @@ -5131,8 +4967,6 @@ def any(self, *, axis=0, bool_only: bool = False): **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame({"A": [True, True], "B": [False, False]}) >>> df @@ -5178,8 +5012,6 @@ def all(self, axis=0, *, bool_only: bool = False): **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame({"A": [True, True], "B": [False, False]}) >>> df @@ -5222,8 +5054,6 @@ def prod(self, axis=0, *, numeric_only: bool = False): **Examples:** >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None - >>> df = bpd.DataFrame({"A": [1, 2, 3], "B": [4.5, 5.5, 6.5]}) >>> df A B @@ -5268,8 +5098,6 @@ def min(self, axis=0, *, numeric_only: bool = False): **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame({"A": [1, 3], "B": [2, 4]}) >>> df @@ -5313,8 +5141,6 @@ def max(self, axis=0, *, numeric_only: bool = False): **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame({"A": [1, 3], "B": [2, 4]}) >>> df @@ -5357,8 +5183,6 @@ def sum(self, axis=0, *, numeric_only: bool = False): **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame({"A": [1, 3], "B": [2, 4]}) >>> df @@ -5399,8 +5223,6 @@ def mean(self, axis=0, *, numeric_only: bool = False): **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame({"A": [1, 3], "B": [2, 4]}) >>> df @@ -5442,8 +5264,6 @@ def median(self, *, numeric_only: bool = False, exact: bool = True): **Examples:** >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None - >>> df = bpd.DataFrame({"A": [1, 3], "B": [2, 4]}) >>> df A B @@ -5480,7 +5300,6 @@ def quantile( **Examples:** >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame(np.array([[1, 1], [2, 10], [3, 100], [4, 100]]), ... columns=['a', 'b']) >>> df.quantile(.1) @@ -5517,8 +5336,6 @@ def var(self, axis=0, *, numeric_only: bool = False): **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame({"A": [1, 3], "B": [2, 4]}) >>> df @@ -5562,8 +5379,6 @@ def skew(self, *, numeric_only: bool = False): **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame({'A': [1, 2, 3, 4, 5], ... 'B': [5, 4, 3, 2, 1], @@ -5603,8 +5418,6 @@ def kurt(self, *, numeric_only: bool = False): **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame({"A": [1, 2, 3, 4, 5], ... "B": [3, 4, 3, 2, 1], @@ -5643,8 +5456,6 @@ def std(self, *, numeric_only: bool = False): **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame({"A": [1, 2, 3, 4, 5], ... "B": [3, 4, 3, 2, 1], @@ -5685,8 +5496,6 @@ def count(self, *, numeric_only: bool = False): **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame({"A": [1, None, 3, 4, 5], ... "B": [1, 2, 3, 4, 5], @@ -5739,8 +5548,6 @@ def nlargest(self, n: int, columns, keep: str = "first"): **Examples:** >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None - >>> df = bpd.DataFrame({"A": [1, 1, 3, 3, 5, 5], ... "B": [5, 6, 3, 4, 1, 2], ... "C": ['a', 'b', 'a', 'b', 'a', 'b']}) @@ -5831,8 +5638,6 @@ def nsmallest(self, n: int, columns, keep: str = "first"): **Examples:** >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None - >>> df = bpd.DataFrame({"A": [1, 1, 3, 3, 5, 5], ... "B": [5, 6, 3, 4, 1, 2], ... "C": ['a', 'b', 'a', 'b', 'a', 'b']}) @@ -5912,8 +5717,6 @@ def idxmin(self): **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame({"A": [3, 1, 2], "B": [1, 2, 3]}) >>> df @@ -5942,8 +5745,6 @@ def idxmax(self): **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame({"A": [3, 1, 2], "B": [1, 2, 3]}) >>> df @@ -5976,8 +5777,6 @@ def melt(self, id_vars, value_vars, var_name, value_name): **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame({"A": [1, None, 3, 4, 5], ... "B": [1, 2, 3, 4, 5], @@ -6051,8 +5850,6 @@ def nunique(self): **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame({"A": [3, 1, 2], "B": [1, 2, 2]}) >>> df @@ -6080,8 +5877,6 @@ def cummin(self) -> DataFrame: **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame({"A": [3, 1, 2], "B": [1, 2, 3]}) >>> df @@ -6112,8 +5907,6 @@ def cummax(self) -> DataFrame: **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame({"A": [3, 1, 2], "B": [1, 2, 3]}) >>> df @@ -6144,8 +5937,6 @@ def cumsum(self) -> DataFrame: **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame({"A": [3, 1, 2], "B": [1, 2, 3]}) >>> df @@ -6181,8 +5972,6 @@ def cumprod(self) -> DataFrame: **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame({"A": [3, 1, 2], "B": [1, 2, 3]}) >>> df @@ -6222,8 +6011,6 @@ def diff( **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame({"A": [3, 1, 2], "B": [1, 2, 3]}) >>> df @@ -6270,8 +6057,6 @@ def agg(self, func): **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame({"A": [3, 1, 2], "B": [1, 2, 3]}) >>> df @@ -6335,8 +6120,6 @@ def describe(self, include: None | Literal["all"] = None): **Examples:** >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None - >>> df = bpd.DataFrame({"A": [3, 1, 2], "B": [0, 2, 8], "C": ["cat", "cat", "dog"]}) >>> df A B C @@ -6406,8 +6189,6 @@ def pivot(self, *, columns, index=None, values=None): **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame({ ... "foo": ["one", "one", "one", "two", "two"], @@ -6477,8 +6258,6 @@ def pivot_table(self, values=None, index=None, columns=None, aggfunc="mean"): **Examples:** >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None - >>> df = bpd.DataFrame({ ... 'Product': ['Product A', 'Product B', 'Product A', 'Product B', 'Product A', 'Product B'], ... 'Region': ['East', 'West', 'East', 'West', 'West', 'East'], @@ -6569,8 +6348,6 @@ def stack(self, level=-1): **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame({'A': [1, 3], 'B': [2, 4]}, index=['foo', 'bar']) >>> df @@ -6608,8 +6385,6 @@ def unstack(self, level=-1): **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame({'A': [1, 3], 'B': [2, 4]}, index=['foo', 'bar']) >>> df @@ -6649,8 +6424,6 @@ def index(self): **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None You can access the index of a DataFrame via ``index`` property. @@ -6702,8 +6475,6 @@ def columns(self): **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None You can access the column labels of a DataFrame via ``columns`` property. @@ -6750,11 +6521,9 @@ def value_counts( **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame({'num_legs': [2, 4, 4, 6, 7], - ... 'num_wings': [2, 0, 0, 0, bpd.NA]}, + ... 'num_wings': [2, 0, 0, 0, pd.NA]}, ... index=['falcon', 'dog', 'cat', 'ant', 'octopus'], ... dtype='Int64') >>> df @@ -6831,8 +6600,6 @@ def eval(self, expr: str) -> DataFrame: **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame({'A': range(1, 6), 'B': range(10, 0, -2)}) >>> df @@ -6907,8 +6674,6 @@ def query(self, expr: str) -> DataFrame | None: **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame({'A': range(1, 6), ... 'B': range(10, 0, -2), @@ -6982,8 +6747,6 @@ def interpolate(self, method: str = "linear"): **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame({ ... 'A': [1, 2, 3, None, None, 6], @@ -7114,8 +6877,6 @@ def replace( **Examples:** >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None - >>> df = bpd.DataFrame({ ... 'int_col': [1, 1, 2, 3], ... 'string_col': ["a", "b", "c", "b"], @@ -7210,8 +6971,6 @@ def iat(self): **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame([[0, 2, 3], [0, 4, 1], [10, 20, 30]], ... columns=['A', 'B', 'C']) @@ -7244,8 +7003,6 @@ def at(self): **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame([[0, 2, 3], [0, 4, 1], [10, 20, 30]], ... index=[4, 5, 6], columns=['A', 'B', 'C']) @@ -7293,8 +7050,6 @@ def dot(self, other): **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> left = bpd.DataFrame([[0, 1, -2, -1], [1, 1, 1, 1]]) >>> left @@ -7387,8 +7142,6 @@ def __matmul__(self, other): **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> left = bpd.DataFrame([[0, 1, -2, -1], [1, 1, 1, 1]]) >>> left @@ -7447,8 +7200,6 @@ def __len__(self): **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame({ ... 'a': [0, 1, 2], @@ -7470,9 +7221,6 @@ def __array__(self, dtype=None, copy: Optional[bool] = None): **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None - >>> import numpy as np >>> df = bpd.DataFrame({"a": [1, 2, 3], "b": [11, 22, 33]}) @@ -7505,8 +7253,6 @@ def __getitem__(self, key): **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame({ ... "name" : ["alpha", "beta", "gamma"], @@ -7551,7 +7297,6 @@ def __getitem__(self, key): You can specify a pandas Index with desired column labels. - >>> import pandas as pd >>> df[pd.Index(["age", "location"])] age location 0 20 WA @@ -7580,8 +7325,6 @@ def __setitem__(self, key, value): **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame({ ... "name" : ["alpha", "beta", "gamma"], diff --git a/third_party/bigframes_vendored/pandas/core/generic.py b/third_party/bigframes_vendored/pandas/core/generic.py index bf67326025..63b9f8199b 100644 --- a/third_party/bigframes_vendored/pandas/core/generic.py +++ b/third_party/bigframes_vendored/pandas/core/generic.py @@ -38,8 +38,6 @@ def size(self) -> int: **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> s = bpd.Series({'a': 1, 'b': 2, 'c': 3}) >>> s.size @@ -65,8 +63,6 @@ def __iter__(self) -> Iterator: **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame({ ... 'A': [1, 2, 3], @@ -106,9 +102,6 @@ def astype(self, dtype): **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None - Create a DataFrame: >>> d = {'col1': [1, 2], 'col2': [3, 4]} @@ -152,7 +145,7 @@ def astype(self, dtype): Note that this is equivalent of using ``to_datetime`` with ``unit='us'``: - >>> bpd.to_datetime(ser, unit='us', utc=True) + >>> bpd.to_datetime(ser, unit='us', utc=True) # doctest: +SKIP 0 2034-02-08 11:13:20.246789+00:00 1 2021-06-19 17:20:44.123101+00:00 2 2003-06-05 17:30:34.120101+00:00 @@ -350,8 +343,6 @@ def get(self, key, default=None): **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame( ... [ @@ -461,8 +452,6 @@ def head(self, n: int = 5): **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame({'animal': ['alligator', 'bee', 'falcon', 'lion', ... 'monkey', 'parrot', 'shark', 'whale', 'zebra']}) @@ -562,8 +551,6 @@ def sample( **Examples:** >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None - >>> df = bpd.DataFrame({'num_legs': [2, 4, 8, 0], ... 'num_wings': [2, 0, 0, 0], ... 'num_specimen_seen': [10, 2, 1, 8]}, @@ -643,8 +630,6 @@ def dtypes(self): **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame({'float': [1.0], 'int': [1], 'string': ['foo']}) >>> df.dtypes @@ -668,8 +653,6 @@ def copy(self): **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None Modification in the original Series will not affect the copy Series: @@ -741,9 +724,6 @@ def ffill(self, *, limit: Optional[int] = None): **Examples:** - >>> import bigframes.pandas as bpd - >>> import numpy as np - >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame([[np.nan, 2, np.nan, 0], ... [3, 4, np.nan, 1], @@ -1081,8 +1061,6 @@ def rolling( **Examples:** >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None - >>> s = bpd.Series([0,1,2,3,4]) >>> s.rolling(window=3).min() 0 @@ -1167,9 +1145,6 @@ def pipe( Constructing a income DataFrame from a dictionary. - >>> import bigframes.pandas as bpd - >>> import numpy as np - >>> bpd.options.display.progress_bar = None >>> data = [[8000, 1000], [9500, np.nan], [5000, 2000]] >>> df = bpd.DataFrame(data, columns=['Salary', 'Others']) diff --git a/third_party/bigframes_vendored/pandas/core/groupby/__init__.py b/third_party/bigframes_vendored/pandas/core/groupby/__init__.py index 1e39ec8f94..ba6310507d 100644 --- a/third_party/bigframes_vendored/pandas/core/groupby/__init__.py +++ b/third_party/bigframes_vendored/pandas/core/groupby/__init__.py @@ -45,8 +45,6 @@ def describe(self, include: None | Literal["all"] = None): **Examples:** >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None - >>> df = bpd.DataFrame({"A": [1, 1, 1, 2, 2], "B": [0, 2, 8, 2, 7], "C": ["cat", "cat", "dog", "mouse", "cat"]}) >>> df A B C @@ -85,8 +83,6 @@ def any(self): For SeriesGroupBy: - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> lst = ['a', 'a', 'b'] >>> ser = bpd.Series([1, 2, 0], index=lst) @@ -124,8 +120,6 @@ def all(self): For SeriesGroupBy: - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> lst = ['a', 'a', 'b'] >>> ser = bpd.Series([1, 2, 0], index=lst) @@ -163,9 +157,6 @@ def count(self): For SeriesGroupBy: - >>> import bigframes.pandas as bpd - >>> import numpy as np - >>> bpd.options.display.progress_bar = None >>> lst = ['a', 'a', 'b'] >>> ser = bpd.Series([1, 2, np.nan], index=lst) @@ -202,9 +193,6 @@ def mean( **Examples:** - >>> import bigframes.pandas as bpd - >>> import numpy as np - >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame({'A': [1, 1, 2, 1, 2], ... 'B': [np.nan, 2, 3, 4, 5], ... 'C': [1, 2, 1, 1, 2]}, columns=['A', 'B', 'C']) @@ -263,9 +251,6 @@ def median( For SeriesGroupBy: >>> import bigframes.pandas as bpd - >>> import numpy as np - >>> bpd.options.display.progress_bar = None - >>> lst = ['a', 'a', 'a', 'b', 'b', 'b'] >>> ser = bpd.Series([7, 2, 8, 4, 3, 3], index=lst) >>> ser.groupby(level=0).median() @@ -304,7 +289,6 @@ def quantile(self, q=0.5, *, numeric_only: bool = False): **Examples:** >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame([ ... ['a', 1], ['a', 2], ['a', 3], ... ['b', 1], ['b', 3], ['b', 5] @@ -343,9 +327,6 @@ def std( For SeriesGroupBy: - >>> import bigframes.pandas as bpd - >>> import numpy as np - >>> bpd.options.display.progress_bar = None >>> lst = ['a', 'a', 'a', 'b', 'b', 'b'] >>> ser = bpd.Series([7, 2, 8, 4, 3, 3], index=lst) @@ -390,9 +371,6 @@ def var( For SeriesGroupBy: - >>> import bigframes.pandas as bpd - >>> import numpy as np - >>> bpd.options.display.progress_bar = None >>> lst = ['a', 'a', 'a', 'b', 'b', 'b'] >>> ser = bpd.Series([7, 2, 8, 4, 3, 3], index=lst) @@ -435,9 +413,6 @@ def rank( **Examples:** >>> import bigframes.pandas as bpd - >>> import numpy as np - >>> bpd.options.display.progress_bar = None - >>> df = bpd.DataFrame( ... { ... "group": ["a", "a", "a", "a", "a", "b", "b", "b", "b", "b"], @@ -510,9 +485,6 @@ def skew( For SeriesGroupBy: - >>> import bigframes.pandas as bpd - >>> import numpy as np - >>> bpd.options.display.progress_bar = None >>> ser = bpd.Series([390., 350., 357., np.nan, 22., 20., 30.], ... index=['Falcon', 'Falcon', 'Falcon', 'Falcon', @@ -546,8 +518,6 @@ def kurt( **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> lst = ['a', 'a', 'a', 'a', 'b', 'b', 'b', 'b', 'b'] >>> ser = bpd.Series([0, 1, 1, 0, 0, 1, 2, 4, 5], index=lst) @@ -579,8 +549,6 @@ def kurtosis( **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> lst = ['a', 'a', 'a', 'a', 'b', 'b', 'b', 'b', 'b'] >>> ser = bpd.Series([0, 1, 1, 0, 0, 1, 2, 4, 5], index=lst) @@ -606,9 +574,8 @@ def first(self, numeric_only: bool = False, min_count: int = -1): Defaults to skipping NA elements. **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None + >>> import bigframes.pandas as bpd >>> df = bpd.DataFrame(dict(A=[1, 1, 3], B=[None, 5, 6], C=[1, 2, 3])) >>> df.groupby("A").first() B C @@ -647,8 +614,6 @@ def last(self, numeric_only: bool = False, min_count: int = -1): Defaults to skipping NA elements. **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame(dict(A=[1, 1, 3], B=[5, None, 6], C=[1, 2, 3])) >>> df.groupby("A").last() @@ -685,8 +650,6 @@ def sum( For SeriesGroupBy: - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> lst = ['a', 'a', 'b', 'b'] >>> ser = bpd.Series([1, 2, 3, 4], index=lst) @@ -730,9 +693,6 @@ def prod(self, numeric_only: bool = False, min_count: int = 0): For SeriesGroupBy: - >>> import bigframes.pandas as bpd - >>> import numpy as np - >>> bpd.options.display.progress_bar = None >>> lst = ['a', 'a', 'b', 'b'] >>> ser = bpd.Series([1, 2, 3, 4], index=lst) @@ -766,9 +726,6 @@ def min( For SeriesGroupBy: - >>> import bigframes.pandas as bpd - >>> import numpy as np - >>> bpd.options.display.progress_bar = None >>> lst = ['a', 'a', 'b', 'b'] >>> ser = bpd.Series([1, 2, 3, 4], index=lst) @@ -815,8 +772,6 @@ def max( For SeriesGroupBy: - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> lst = ['a', 'a', 'b', 'b'] >>> ser = bpd.Series([1, 2, 3, 4], index=lst) @@ -859,8 +814,6 @@ def cumcount(self, ascending: bool = True): For SeriesGroupBy: - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> lst = ['a', 'a', 'b', 'b', 'c'] >>> ser = bpd.Series([5, 1, 2, 3, 4], index=lst) @@ -897,9 +850,6 @@ def cumprod(self, *args, **kwargs): For SeriesGroupBy: - >>> import bigframes.pandas as bpd - >>> import numpy as np - >>> bpd.options.display.progress_bar = None >>> lst = ['a', 'a', 'b'] >>> ser = bpd.Series([6, 2, 0], index=lst) @@ -936,9 +886,6 @@ def cumsum(self, *args, **kwargs): For SeriesGroupBy: - >>> import bigframes.pandas as bpd - >>> import numpy as np - >>> bpd.options.display.progress_bar = None >>> lst = ['a', 'a', 'b'] >>> ser = bpd.Series([6, 2, 0], index=lst) @@ -975,9 +922,6 @@ def cummin(self, *args, numeric_only: bool = False, **kwargs): For SeriesGroupBy: - >>> import bigframes.pandas as bpd - >>> import numpy as np - >>> bpd.options.display.progress_bar = None >>> lst = ['a', 'a', 'b'] >>> ser = bpd.Series([6, 2, 0], index=lst) @@ -1014,9 +958,6 @@ def cummax(self, *args, numeric_only: bool = False, **kwargs): For SeriesGroupBy: - >>> import bigframes.pandas as bpd - >>> import numpy as np - >>> bpd.options.display.progress_bar = None >>> lst = ['a', 'a', 'b'] >>> ser = bpd.Series([6, 2, 0], index=lst) @@ -1055,9 +996,6 @@ def diff(self): For SeriesGroupBy: - >>> import bigframes.pandas as bpd - >>> import numpy as np - >>> bpd.options.display.progress_bar = None >>> lst = ['a', 'a', 'a', 'b', 'b', 'b'] >>> ser = bpd.Series([7, 2, 8, 4, 3, 3], index=lst) @@ -1101,9 +1039,6 @@ def shift(self, periods: int = 1): For SeriesGroupBy: - >>> import bigframes.pandas as bpd - >>> import numpy as np - >>> bpd.options.display.progress_bar = None >>> lst = ['a', 'a', 'b', 'b'] >>> ser = bpd.Series([1, 2, 3, 4], index=lst) @@ -1145,9 +1080,6 @@ def rolling(self, *args, **kwargs): **Examples:** >>> import bigframes.pandas as bpd - >>> import numpy as np - >>> bpd.options.display.progress_bar = None - >>> lst = ['a', 'a', 'a', 'a', 'e'] >>> ser = bpd.Series([1, 0, -2, -1, 2], index=lst) >>> ser.groupby(level=0).rolling(2).min() @@ -1204,9 +1136,6 @@ def expanding(self, *args, **kwargs): **Examples:** >>> import bigframes.pandas as bpd - >>> import numpy as np - >>> bpd.options.display.progress_bar = None - >>> lst = ['a', 'a', 'c', 'c', 'e'] >>> ser = bpd.Series([1, 0, -2, -1, 2], index=lst) >>> ser.groupby(level=0).expanding().min() @@ -1230,8 +1159,6 @@ def head(self, n: int = 5): **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame([[1, 2], [1, 4], [5, 6]], ... columns=['A', 'B']) @@ -1259,8 +1186,6 @@ def size(self): **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None For SeriesGroupBy: @@ -1313,8 +1238,6 @@ def __iter__(self): **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None For SeriesGroupBy: @@ -1377,9 +1300,6 @@ def agg(self, func): **Examples:** - >>> import bigframes.pandas as bpd - >>> import numpy as np - >>> bpd.options.display.progress_bar = None >>> s = bpd.Series([1, 2, 3, 4], index=[1, 1, 2, 2]) >>> s.groupby(level=0).agg(['min', 'max']) @@ -1410,9 +1330,6 @@ def aggregate(self, func): **Examples:** - >>> import bigframes.pandas as bpd - >>> import numpy as np - >>> bpd.options.display.progress_bar = None >>> s = bpd.Series([1, 2, 3, 4], index=[1, 1, 2, 2]) >>> s.groupby(level=0).aggregate(['min', 'max']) @@ -1443,9 +1360,6 @@ def nunique(self): **Examples:** - >>> import bigframes.pandas as bpd - >>> import numpy as np - >>> bpd.options.display.progress_bar = None >>> lst = ['a', 'a', 'b', 'b'] >>> ser = bpd.Series([1, 2, 3, 3], index=lst) @@ -1494,9 +1408,6 @@ def agg(self, func, **kwargs): **Examples:** - >>> import bigframes.pandas as bpd - >>> import numpy as np - >>> bpd.options.display.progress_bar = None >>> data = {"A": [1, 1, 2, 2], ... "B": [1, 2, 3, 4], @@ -1554,9 +1465,6 @@ def aggregate(self, func, **kwargs): **Examples:** - >>> import bigframes.pandas as bpd - >>> import numpy as np - >>> bpd.options.display.progress_bar = None >>> data = {"A": [1, 1, 2, 2], ... "B": [1, 2, 3, 4], @@ -1614,9 +1522,6 @@ def nunique(self): **Examples:** - >>> import bigframes.pandas as bpd - >>> import numpy as np - >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame({'id': ['spam', 'egg', 'egg', 'spam', ... 'ham', 'ham'], @@ -1650,9 +1555,6 @@ def value_counts( **Examples:** - >>> import bigframes.pandas as bpd - >>> import numpy as np - >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame({ ... 'gender': ['male', 'male', 'female', 'male', 'female', 'male'], diff --git a/third_party/bigframes_vendored/pandas/core/indexes/accessor.py b/third_party/bigframes_vendored/pandas/core/indexes/accessor.py index 0dd487d056..b9eb363b29 100644 --- a/third_party/bigframes_vendored/pandas/core/indexes/accessor.py +++ b/third_party/bigframes_vendored/pandas/core/indexes/accessor.py @@ -12,9 +12,6 @@ def day(self): **Examples:** - >>> import pandas as pd - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> s = bpd.Series( ... pd.date_range("2000-01-01", periods=3, freq="D") ... ) @@ -42,9 +39,6 @@ def dayofweek(self): **Examples:** - >>> import pandas as pd - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> s = bpd.Series( ... pd.date_range('2016-12-31', '2017-01-08', freq='D').to_series() ... ) @@ -76,9 +70,6 @@ def day_of_week(self): **Examples:** - >>> import pandas as pd - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> s = bpd.Series( ... pd.date_range('2016-12-31', '2017-01-08', freq='D').to_series() ... ) @@ -106,9 +97,7 @@ def dayofyear(self): **Examples:** - >>> import pandas as pd >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> s = bpd.Series( ... pd.date_range('2016-12-28', '2017-01-03', freq='D').to_series() ... ) @@ -134,9 +123,7 @@ def day_of_year(self): **Examples:** - >>> import pandas as pd >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> s = bpd.Series( ... pd.date_range('2016-12-28', '2017-01-03', freq='D').to_series() ... ) @@ -168,7 +155,6 @@ def date(self): **Examples:** >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> s = bpd.Series(["1/1/2020 10:00:00+00:00", "2/1/2020 11:00:00+00:00"]) >>> s = bpd.to_datetime(s, utc=True, format="%d/%m/%Y %H:%M:%S%Ez") >>> s @@ -189,9 +175,7 @@ def hour(self): **Examples:** - >>> import pandas as pd >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> s = bpd.Series( ... pd.date_range("2000-01-01", periods=3, freq="h") ... ) @@ -215,9 +199,7 @@ def minute(self): **Examples:** - >>> import pandas as pd >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> s = bpd.Series( ... pd.date_range("2000-01-01", periods=3, freq="min") ... ) @@ -241,9 +223,6 @@ def month(self): **Examples:** - >>> import pandas as pd - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> s = bpd.Series( ... pd.date_range("2000-01-01", periods=3, freq="M") ... ) @@ -267,9 +246,6 @@ def isocalendar(self): **Examples:** - >>> import pandas as pd - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> s = bpd.Series( ... pd.date_range('2009-12-27', '2010-01-04', freq='d').to_series() ... ) @@ -300,9 +276,7 @@ def second(self): **Examples:** - >>> import pandas as pd >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> s = bpd.Series( ... pd.date_range("2000-01-01", periods=3, freq="s") ... ) @@ -331,7 +305,6 @@ def time(self): **Examples:** >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> s = bpd.Series(["1/1/2020 10:00:00+00:00", "2/1/2020 11:00:00+00:00"]) >>> s = bpd.to_datetime(s, utc=True, format="%m/%d/%Y %H:%M:%S%Ez") >>> s @@ -353,7 +326,6 @@ def quarter(self): **Examples:** >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> s = bpd.Series(["1/1/2020 10:00:00+00:00", "4/1/2020 11:00:00+00:00"]) >>> s = bpd.to_datetime(s, utc=True, format="%m/%d/%Y %H:%M:%S%Ez") >>> s @@ -374,9 +346,6 @@ def year(self): **Examples:** - >>> import pandas as pd - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> s = bpd.Series( ... pd.date_range("2000-01-01", periods=3, freq="Y") ... ) @@ -400,9 +369,6 @@ def days(self): **Examples:** - >>> import pandas as pd - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> s = bpd.Series([pd.Timedelta("4d3m2s1us")]) >>> s 0 4 days 00:03:02.000001 @@ -418,9 +384,6 @@ def seconds(self): **Examples:** - >>> import pandas as pd - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> s = bpd.Series([pd.Timedelta("4d3m2s1us")]) >>> s 0 4 days 00:03:02.000001 @@ -436,9 +399,6 @@ def microseconds(self): **Examples:** - >>> import pandas as pd - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> s = bpd.Series([pd.Timedelta("4d3m2s1us")]) >>> s 0 4 days 00:03:02.000001 @@ -453,9 +413,6 @@ def total_seconds(self): **Examples:** - >>> import pandas as pd - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> s = bpd.Series([pd.Timedelta("1d1m1s1us")]) >>> s 0 1 days 00:01:01.000001 @@ -472,7 +429,6 @@ def tz(self): **Examples:** >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> s = bpd.Series(["1/1/2020 10:00:00+00:00", "2/1/2020 11:00:00+00:00"]) >>> s = bpd.to_datetime(s, utc=True, format="%m/%d/%Y %H:%M:%S%Ez") >>> s @@ -495,7 +451,6 @@ def unit(self) -> str: **Examples:** >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> s = bpd.Series(["1/1/2020 10:00:00+00:00", "2/1/2020 11:00:00+00:00"]) >>> s = bpd.to_datetime(s, utc=True, format="%m/%d/%Y %H:%M:%S%Ez") >>> s diff --git a/third_party/bigframes_vendored/pandas/core/indexes/base.py b/third_party/bigframes_vendored/pandas/core/indexes/base.py index 82c0563c25..e120dabc66 100644 --- a/third_party/bigframes_vendored/pandas/core/indexes/base.py +++ b/third_party/bigframes_vendored/pandas/core/indexes/base.py @@ -32,8 +32,6 @@ def name(self): **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> idx = bpd.Index([1, 2, 3], name='x') >>> idx @@ -63,8 +61,6 @@ def values(self): **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> idx = bpd.Index([1, 2, 3]) >>> idx @@ -86,8 +82,6 @@ def ndim(self): **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> s = bpd.Series(['Ant', 'Bear', 'Cow']) >>> s @@ -121,8 +115,6 @@ def size(self) -> int: **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None For Series: @@ -156,8 +148,6 @@ def is_monotonic_increasing(self) -> bool: **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> bool(bpd.Index([1, 2, 3]).is_monotonic_increasing) True @@ -181,8 +171,6 @@ def is_monotonic_decreasing(self) -> bool: **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> bool(bpd.Index([3, 2, 1]).is_monotonic_decreasing) True @@ -206,8 +194,6 @@ def from_frame(cls, frame) -> Index: **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame([['HI', 'Temp'], ['HI', 'Precip'], ... ['NJ', 'Temp'], ['NJ', 'Precip']], @@ -246,8 +232,6 @@ def shape(self): **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> idx = bpd.Index([1, 2, 3]) >>> idx @@ -268,8 +252,6 @@ def nlevels(self) -> int: **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> mi = bpd.MultiIndex.from_arrays([['a'], ['b'], ['c']]) >>> mi @@ -290,8 +272,6 @@ def is_unique(self) -> bool: **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> idx = bpd.Index([1, 5, 7, 7]) >>> idx.is_unique @@ -313,8 +293,6 @@ def has_duplicates(self) -> bool: **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> idx = bpd.Index([1, 5, 7, 7]) >>> bool(idx.has_duplicates) @@ -336,8 +314,6 @@ def dtype(self): **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> idx = bpd.Index([1, 2, 3]) >>> idx @@ -364,8 +340,6 @@ def T(self) -> Index: **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> s = bpd.Series(['Ant', 'Bear', 'Cow']) >>> s @@ -403,8 +377,6 @@ def copy( **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> idx = bpd.Index(['a', 'b', 'c']) >>> new_idx = idx.copy() @@ -438,8 +410,6 @@ def astype(self, dtype): **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> idx = bpd.Index([1, 2, 3]) >>> idx @@ -487,8 +457,6 @@ def get_level_values(self, level) -> Index: **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> idx = bpd.Index(list('abc')) >>> idx @@ -517,8 +485,6 @@ def to_series(self): **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> idx = bpd.Index(['Ant', 'Bear', 'Cow'], name='animal') @@ -571,8 +537,6 @@ def isin(self, values): **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> idx = bpd.Index([1,2,3]) >>> idx @@ -611,8 +575,6 @@ def all(self) -> bool: **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None True, because nonzero integers are considered True. @@ -639,8 +601,6 @@ def any(self) -> bool: **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> index = bpd.Index([0, 1, 2]) >>> bool(index.any()) @@ -665,8 +625,6 @@ def min(self): **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> idx = bpd.Index([3, 2, 1]) >>> int(idx.min()) @@ -687,8 +645,6 @@ def max(self): **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> idx = bpd.Index([3, 2, 1]) >>> int(idx.max()) @@ -713,8 +669,6 @@ def argmin(self) -> int: **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None Consider dataset containing cereal calories @@ -750,8 +704,6 @@ def get_loc( **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> unique_index = bpd.Index(list('abc')) >>> unique_index.get_loc('b') @@ -794,8 +746,6 @@ def argmax(self) -> int: Consider dataset containing cereal calories - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> s = bpd.Series({'Corn Flakes': 100.0, 'Almond Delight': 110.0, ... 'Cinnamon Toast Crunch': 120.0, 'Cocoa Puff': 110.0}) @@ -828,8 +778,6 @@ def nunique(self) -> int: **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> s = bpd.Series([1, 3, 5, 7, 7]) >>> s @@ -860,8 +808,6 @@ def sort_values( **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> idx = bpd.Index([10, 100, 1, 1000]) >>> idx @@ -904,9 +850,6 @@ def value_counts( **Examples:** - >>> import bigframes.pandas as bpd - >>> import numpy as np - >>> bpd.options.display.progress_bar = None >>> index = bpd.Index([3, 1, 2, 3, 4, np.nan]) >>> index.value_counts() @@ -998,8 +941,6 @@ def rename(self, name, *, inplace): **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> idx = bpd.Index(['A', 'C', 'A', 'B'], name='score') >>> idx.rename('grade') @@ -1028,8 +969,6 @@ def drop(self, labels) -> Index: **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> idx = bpd.Index(['a', 'b', 'c']) >>> idx.drop(['a']) @@ -1048,9 +987,6 @@ def dropna(self, how: typing.Literal["all", "any"] = "any"): **Examples:** - >>> import bigframes.pandas as bpd - >>> import numpy as np - >>> bpd.options.display.progress_bar = None >>> idx = bpd.Index([1, np.nan, 3]) >>> idx.dropna() @@ -1077,7 +1013,6 @@ def drop_duplicates(self, *, keep: str = "first"): **Examples:** >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None Generate an pandas.Index with duplicate values. @@ -1119,8 +1054,6 @@ def unique(self, level: Hashable | int | None = None): **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> idx = bpd.Index([1, 1, 2, 3, 3]) >>> idx.unique() Index([1, 2, 3], dtype='Int64') @@ -1140,8 +1073,6 @@ def item(self, *args, **kwargs): **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> s = bpd.Series([1], index=['a']) >>> s.index.item() 'a' diff --git a/third_party/bigframes_vendored/pandas/core/indexes/datetimes.py b/third_party/bigframes_vendored/pandas/core/indexes/datetimes.py index 105a376728..f22554e174 100644 --- a/third_party/bigframes_vendored/pandas/core/indexes/datetimes.py +++ b/third_party/bigframes_vendored/pandas/core/indexes/datetimes.py @@ -15,9 +15,6 @@ def year(self) -> base.Index: **Examples:** - >>> import bigframes.pandas as bpd - >>> import pandas as pd - >>> bpd.options.display.progress_bar = None >>> idx = bpd.Index([pd.Timestamp("20250215")]) >>> idx.year @@ -31,9 +28,6 @@ def month(self) -> base.Index: **Examples:** - >>> import bigframes.pandas as bpd - >>> import pandas as pd - >>> bpd.options.display.progress_bar = None >>> idx = bpd.Index([pd.Timestamp("20250215")]) >>> idx.month @@ -47,9 +41,6 @@ def day(self) -> base.Index: **Examples:** - >>> import bigframes.pandas as bpd - >>> import pandas as pd - >>> bpd.options.display.progress_bar = None >>> idx = bpd.Index([pd.Timestamp("20250215")]) >>> idx.day @@ -63,9 +54,6 @@ def day_of_week(self) -> base.Index: **Examples:** - >>> import bigframes.pandas as bpd - >>> import pandas as pd - >>> bpd.options.display.progress_bar = None >>> idx = bpd.Index([pd.Timestamp("20250215")]) >>> idx.day_of_week @@ -79,9 +67,6 @@ def dayofweek(self) -> base.Index: **Examples:** - >>> import bigframes.pandas as bpd - >>> import pandas as pd - >>> bpd.options.display.progress_bar = None >>> idx = bpd.Index([pd.Timestamp("20250215")]) >>> idx.dayofweek @@ -95,9 +80,6 @@ def weekday(self) -> base.Index: **Examples:** - >>> import bigframes.pandas as bpd - >>> import pandas as pd - >>> bpd.options.display.progress_bar = None >>> idx = bpd.Index([pd.Timestamp("20250215")]) >>> idx.weekday diff --git a/third_party/bigframes_vendored/pandas/core/indexes/multi.py b/third_party/bigframes_vendored/pandas/core/indexes/multi.py index a882aa40e3..018e638de3 100644 --- a/third_party/bigframes_vendored/pandas/core/indexes/multi.py +++ b/third_party/bigframes_vendored/pandas/core/indexes/multi.py @@ -25,8 +25,6 @@ def from_tuples( **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> tuples = [(1, 'red'), (1, 'blue'), ... (2, 'red'), (2, 'blue')] >>> bpd.MultiIndex.from_tuples(tuples, names=('number', 'color')) @@ -62,8 +60,6 @@ def from_arrays( **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> arrays = [[1, 1, 2, 2], ['red', 'blue', 'red', 'blue']] >>> bpd.MultiIndex.from_arrays(arrays, names=('number', 'color')) MultiIndex([(1, 'red'), diff --git a/third_party/bigframes_vendored/pandas/core/reshape/tile.py b/third_party/bigframes_vendored/pandas/core/reshape/tile.py index 697c17f23c..0f42433384 100644 --- a/third_party/bigframes_vendored/pandas/core/reshape/tile.py +++ b/third_party/bigframes_vendored/pandas/core/reshape/tile.py @@ -34,8 +34,6 @@ def cut( **Examples:** >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None - >>> s = bpd.Series([0, 1, 5, 10]) >>> s 0 0 @@ -73,7 +71,6 @@ def cut( Cut with pd.IntervalIndex, requires importing pandas for IntervalIndex: - >>> import pandas as pd >>> interval_index = pd.IntervalIndex.from_tuples([(0, 1), (1, 5), (5, 20)]) >>> bpd.cut(s, bins=interval_index) 0 diff --git a/third_party/bigframes_vendored/pandas/core/series.py b/third_party/bigframes_vendored/pandas/core/series.py index 540a66b595..8de1c10f93 100644 --- a/third_party/bigframes_vendored/pandas/core/series.py +++ b/third_party/bigframes_vendored/pandas/core/series.py @@ -38,9 +38,6 @@ def dt(self): **Examples:** >>> import bigframes.pandas as bpd - >>> import pandas as pd - >>> bpd.options.display.progress_bar = None - >>> seconds_series = bpd.Series(pd.date_range("2000-01-01", periods=3, freq="s")) >>> seconds_series 0 2000-01-01 00:00:00 @@ -110,8 +107,6 @@ def index(self): **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None You can access the index of a Series via ``index`` property. @@ -161,13 +156,11 @@ def shape(self): **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> s = bpd.Series([1, 4, 9, 16]) >>> s.shape (4,) - >>> s = bpd.Series(['Alice', 'Bob', bpd.NA]) + >>> s = bpd.Series(['Alice', 'Bob', pd.NA]) >>> s.shape (3,) """ @@ -180,8 +173,6 @@ def dtype(self): **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> s = bpd.Series([1, 2, 3]) >>> s.dtype @@ -200,8 +191,6 @@ def name(self) -> Hashable: **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None For a Series: @@ -248,8 +237,6 @@ def hasnans(self) -> bool: **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> s = bpd.Series([1, 2, 3, None]) >>> s @@ -272,8 +259,6 @@ def T(self) -> Series: **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> s = bpd.Series(['Ant', 'Bear', 'Cow']) >>> s @@ -297,8 +282,6 @@ def transpose(self) -> Series: **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> s = bpd.Series(['Ant', 'Bear', 'Cow']) >>> s @@ -337,9 +320,6 @@ def reset_index( **Examples:** - >>> import bigframes.pandas as bpd - >>> import pandas as pd - >>> bpd.options.display.progress_bar = None >>> s = bpd.Series([1, 2, 3, 4], name='foo', ... index=['a', 'b', 'c', 'd']) @@ -440,8 +420,6 @@ def keys(self): **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> s = bpd.Series([1, 2, 3], index=[0, 1, 2]) >>> s.keys() @@ -522,8 +500,6 @@ def to_markdown( **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> s = bpd.Series(["elk", "pig", "dog", "quetzal"], name="animal") >>> print(s.to_markdown()) @@ -577,9 +553,7 @@ def to_dict( **Examples:** - >>> import bigframes.pandas as bpd >>> from collections import OrderedDict, defaultdict - >>> bpd.options.display.progress_bar = None >>> s = bpd.Series([1, 2, 3, 4]) >>> s.to_dict() @@ -617,8 +591,6 @@ def to_frame(self, name=None) -> DataFrame: **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> s = bpd.Series(["a", "b", "c"], ... name="vals") @@ -714,8 +686,6 @@ def tolist(self, *, allow_large_results: Optional[bool] = None) -> list: **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> s = bpd.Series([1, 2, 3]) >>> s @@ -748,9 +718,6 @@ def to_numpy( **Examples:** - >>> import bigframes.pandas as bpd - >>> import pandas as pd - >>> bpd.options.display.progress_bar = None >>> ser = bpd.Series(pd.Categorical(['a', 'b', 'a'])) >>> ser.to_numpy() @@ -803,8 +770,6 @@ def to_pickle(self, path, *, allow_large_results=None, **kwargs): **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> original_df = bpd.DataFrame({"foo": range(5), "bar": range(5, 10)}) >>> original_df @@ -865,8 +830,6 @@ def agg(self, func): **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> s = bpd.Series([1, 2, 3, 4]) >>> s @@ -902,10 +865,8 @@ def count(self): **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None - >>> s = bpd.Series([0.0, 1.0, bpd.NA]) + >>> s = bpd.Series([0.0, 1.0, pd.NA]) >>> s 0 0.0 1 1.0 @@ -928,8 +889,6 @@ def nunique(self) -> int: **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> s = bpd.Series([1, 3, 5, 7, 7]) >>> s @@ -963,8 +922,6 @@ def unique(self, keep_order=True) -> Series: **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> s = bpd.Series([2, 1, 3, 3], name='A') >>> s @@ -1006,8 +963,6 @@ def mode(self) -> Series: **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> s = bpd.Series([2, 4, 8, 2, 4, None]) >>> s.mode() @@ -1031,11 +986,9 @@ def drop_duplicates( **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None - Generate a Series with duplicated entries. + >>> import bigframes.pandas as bpd >>> s = bpd.Series(['llama', 'cow', 'llama', 'beetle', 'llama', 'hippo'], ... name='animal') >>> s @@ -1101,7 +1054,6 @@ def duplicated(self, keep="first") -> Series: **Examples:** >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None By default, for each set of duplicated values, the first occurrence is set on False and all others on True: @@ -1172,8 +1124,6 @@ def idxmin(self) -> Hashable: **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> s = bpd.Series(data=[1, None, 4, 1], ... index=['A', 'B', 'C', 'D']) @@ -1201,8 +1151,6 @@ def idxmax(self) -> Hashable: **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> s = bpd.Series(data=[1, None, 4, 3, 4], ... index=['A', 'B', 'C', 'D', 'E']) @@ -1229,8 +1177,6 @@ def round(self, decimals: int = 0) -> Series: **Examples:** >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None - >>> s = bpd.Series([0.1, 1.3, 2.7]) >>> s.round() 0 0.0 @@ -1262,8 +1208,6 @@ def explode(self, *, ignore_index: Optional[bool] = False) -> Series: **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> s = bpd.Series([[1, 2, 3], [], [3, 4]]) >>> s @@ -1301,8 +1245,6 @@ def corr(self, other, method="pearson", min_periods=None) -> float: **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> s1 = bpd.Series([.2, .0, .6, .2]) >>> s2 = bpd.Series([.3, .6, .0, .1]) @@ -1339,21 +1281,19 @@ def autocorr(self, lag: int = 1) -> float: **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> s = bpd.Series([0.25, 0.5, 0.2, -0.05]) - >>> s.autocorr() # doctest: +ELLIPSIS - np.float64(0.10355263309024067) + >>> float(s.autocorr()) # doctest: +ELLIPSIS + 0.1035526330902... - >>> s.autocorr(lag=2) - np.float64(-1.0) + >>> float(s.autocorr(lag=2)) + -1.0 If the Pearson correlation is not well defined, then 'NaN' is returned. >>> s = bpd.Series([1, 0, 0, 0]) - >>> s.autocorr() - np.float64(nan) + >>> float(s.autocorr()) + nan Args: lag (int, default 1): @@ -1377,8 +1317,6 @@ def cov( **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> s1 = bpd.Series([0.90010907, 0.13484424, 0.62036035]) >>> s2 = bpd.Series([0.12528585, 0.26962463, 0.51111198]) @@ -1406,8 +1344,6 @@ def diff(self) -> Series: **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None Difference with previous row @@ -1472,8 +1408,6 @@ def dot(self, other) -> Series | np.ndarray: **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> s = bpd.Series([0, 1, 2, 3]) >>> other = bpd.Series([-1, 2, -3, 4]) @@ -1529,9 +1463,6 @@ def sort_values( **Examples:** - >>> import bigframes.pandas as bpd - >>> import numpy as np - >>> bpd.options.display.progress_bar = None >>> s = bpd.Series([np.nan, 1, 3, 10, 5]) >>> s @@ -1628,9 +1559,6 @@ def sort_index( **Examples:** - >>> import bigframes.pandas as bpd - >>> import numpy as np - >>> bpd.options.display.progress_bar = None >>> s = bpd.Series(['a', 'b', 'c', 'd'], index=[3, 2, 1, 4]) >>> s.sort_index() @@ -1690,8 +1618,6 @@ def nlargest( **Examples:** >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None - >>> countries_population = {"Italy": 59000000, "France": 65000000, ... "Malta": 434000, "Maldives": 434000, ... "Brunei": 434000, "Iceland": 337000, @@ -1776,8 +1702,6 @@ def nsmallest(self, n: int = 5, keep: str = "first") -> Series: **Examples:** >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None - >>> countries_population = {"Italy": 59000000, "France": 65000000, ... "Malta": 434000, "Maldives": 434000, ... "Brunei": 434000, "Iceland": 337000, @@ -1863,16 +1787,47 @@ def apply( **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None + Simple vectorized functions, lambdas or ufuncs can be applied directly + with `by_row=False`. - For applying arbitrary python function a `remote_function` is recommended. - Let's use ``reuse=False`` flag to make sure a new `remote_function` - is created every time we run the following code, but you can skip it - to potentially reuse a previously deployed `remote_function` from - the same user defined function. + >>> nums = bpd.Series([1, 2, 3, 4]) + >>> nums + 0 1 + 1 2 + 2 3 + 3 4 + dtype: Int64 + >>> nums.apply(lambda x: x*x + 2*x + 1, by_row=False) + 0 4 + 1 9 + 2 16 + 3 25 + dtype: Int64 + + >>> def is_odd(num): + ... return num % 2 == 1 + >>> nums.apply(is_odd, by_row=False) + 0 True + 1 False + 2 True + 3 False + dtype: boolean + + >>> nums.apply(np.log, by_row=False) + 0 0.0 + 1 0.693147 + 2 1.098612 + 3 1.386294 + dtype: Float64 + + Use `remote_function` to apply an arbitrary Python function. + Set ``reuse=False`` flag to make sure a new `remote_function` + is created every time you run the following code. Omit it + to reuse a previously deployed `remote_function` from + the same user defined function if the hash of the function definition + hasn't changed. - >>> @bpd.remote_function(reuse=False, cloud_function_service_account="default") + >>> @bpd.remote_function(reuse=False, cloud_function_service_account="default") # doctest: +SKIP ... def minutes_to_hours(x: int) -> float: ... return x/60 @@ -1885,8 +1840,8 @@ def apply( 4 120 dtype: Int64 - >>> hours = minutes.apply(minutes_to_hours) - >>> hours + >>> hours = minutes.apply(minutes_to_hours) # doctest: +SKIP + >>> hours # doctest: +SKIP 0 0.0 1 0.5 2 1.0 @@ -1898,7 +1853,7 @@ def apply( a `remote_function`, you would provide the names of the packages via `packages` param. - >>> @bpd.remote_function( + >>> @bpd.remote_function( # doctest: +SKIP ... reuse=False, ... packages=["cryptography"], ... cloud_function_service_account="default" @@ -1915,11 +1870,11 @@ def apply( ... return f.encrypt(input.encode()).decode() >>> names = bpd.Series(["Alice", "Bob"]) - >>> hashes = names.apply(get_hash) + >>> hashes = names.apply(get_hash) # doctest: +SKIP You could return an array output from the remote function. - >>> @bpd.remote_function(reuse=False, cloud_function_service_account="default") + >>> @bpd.remote_function(reuse=False, cloud_function_service_account="default") # doctest: +SKIP ... def text_analyzer(text: str) -> list[int]: ... words = text.count(" ") + 1 ... periods = text.count(".") @@ -1932,46 +1887,13 @@ def apply( ... "I love this product! It's amazing.", ... "Hungry? Wanna eat? Lets go!" ... ]) - >>> features = texts.apply(text_analyzer) - >>> features + >>> features = texts.apply(text_analyzer) # doctest: +SKIP + >>> features # doctest: +SKIP 0 [9 1 0 0] 1 [6 1 1 0] 2 [5 0 1 2] dtype: list[pyarrow] - Simple vectorized functions, lambdas or ufuncs can be applied directly - with `by_row=False`. - - >>> nums = bpd.Series([1, 2, 3, 4]) - >>> nums - 0 1 - 1 2 - 2 3 - 3 4 - dtype: Int64 - >>> nums.apply(lambda x: x*x + 2*x + 1, by_row=False) - 0 4 - 1 9 - 2 16 - 3 25 - dtype: Int64 - - >>> def is_odd(num): - ... return num % 2 == 1 - >>> nums.apply(is_odd, by_row=False) - 0 True - 1 False - 2 True - 3 False - dtype: boolean - - >>> nums.apply(np.log, by_row=False) - 0 0.0 - 1 0.693147 - 2 1.098612 - 3 1.386294 - dtype: Float64 - Args: func (function): BigFrames DataFrames ``remote_function`` to apply. The function @@ -2005,13 +1927,10 @@ def combine( **Examples:** - >>> import bigframes.pandas as bpd - >>> import numpy as np - >>> bpd.options.display.progress_bar = None - Consider 2 Datasets ``s1`` and ``s2`` containing highest clocked speeds of different birds. + >>> import bigframes.pandas as bpd >>> s1 = bpd.Series({'falcon': 330.0, 'eagle': 160.0}) >>> s1 falcon 330.0 @@ -2065,8 +1984,6 @@ def groupby( **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None You can group by a named index level. @@ -2089,7 +2006,6 @@ def groupby( You can also group by more than one index levels. - >>> import pandas as pd >>> s = bpd.Series([380, 370., 24., 26.], ... index=pd.MultiIndex.from_tuples( ... [("Falcon", "Clear"), @@ -2238,8 +2154,6 @@ def drop( **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> s = bpd.Series(data=np.arange(3), index=['A', 'B', 'C']) >>> s @@ -2256,7 +2170,6 @@ def drop( Drop 2nd level label in MultiIndex Series: - >>> import pandas as pd >>> midx = pd.MultiIndex(levels=[['llama', 'cow', 'falcon'], ... ['speed', 'weight', 'length']], ... codes=[[0, 0, 0, 1, 1, 1, 2, 2, 2], @@ -2369,9 +2282,6 @@ def interpolate(self, method: str = "linear"): **Examples:** - >>> import bigframes.pandas as bpd - >>> import numpy as np - >>> bpd.options.display.progress_bar = None Filling in NaN in a Series via linear interpolation. @@ -2474,8 +2384,6 @@ def replace( **Examples:** >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None - >>> s = bpd.Series([1, 2, 3, 4, 5]) >>> s 0 1 @@ -2600,9 +2508,6 @@ def dropna(self, *, axis=0, inplace: bool = False, how=None) -> Series: **Examples:** - >>> import bigframes.pandas as bpd - >>> import numpy as np - >>> bpd.options.display.progress_bar = None Drop NA values from a Series: @@ -2620,7 +2525,7 @@ def dropna(self, *, axis=0, inplace: bool = False, how=None) -> Series: Empty strings are not considered NA values. ``None`` is considered an NA value. - >>> ser = bpd.Series(['2', bpd.NA, '', None, 'I stay'], dtype='object') + >>> ser = bpd.Series(['2', pd.NA, '', None, 'I stay'], dtype='object') >>> ser 0 2 1 @@ -2664,9 +2569,6 @@ def between( **Examples:** - >>> import bigframes.pandas as bpd - >>> import numpy as np - >>> bpd.options.display.progress_bar = None Boundary values are included by default: @@ -2723,9 +2625,6 @@ def case_when( **Examples:** - >>> import bigframes.pandas as bpd - >>> import numpy as np - >>> bpd.options.display.progress_bar = None >>> c = bpd.Series([6, 7, 8, 9], name="c") >>> a = bpd.Series([0, 0, 1, 2]) @@ -2793,9 +2692,6 @@ def cumprod(self): **Examples:** >>> import bigframes.pandas as bpd - >>> import numpy as np - >>> bpd.options.display.progress_bar = None - >>> s = bpd.Series([2, np.nan, 5, -1, 0]) >>> s 0 2.0 @@ -2830,9 +2726,6 @@ def cumsum(self): **Examples:** - >>> import bigframes.pandas as bpd - >>> import numpy as np - >>> bpd.options.display.progress_bar = None >>> s = bpd.Series([2, np.nan, 5, -1, 0]) >>> s @@ -2873,9 +2766,6 @@ def cummax(self): **Examples:** - >>> import bigframes.pandas as bpd - >>> import numpy as np - >>> bpd.options.display.progress_bar = None >>> s = bpd.Series([2, np.nan, 5, -1, 0]) >>> s @@ -2912,9 +2802,6 @@ def cummin(self): **Examples:** - >>> import bigframes.pandas as bpd - >>> import numpy as np - >>> bpd.options.display.progress_bar = None >>> s = bpd.Series([2, np.nan, 5, -1, 0]) >>> s @@ -2949,9 +2836,6 @@ def eq(self, other) -> Series: **Examples:** - >>> import bigframes.pandas as bpd - >>> import numpy as np - >>> bpd.options.display.progress_bar = None >>> a = bpd.Series([1, 1, 1, np.nan], index=['a', 'b', 'c', 'd']) >>> a @@ -2994,9 +2878,6 @@ def ne(self, other) -> Series: **Examples:** - >>> import bigframes.pandas as bpd - >>> import numpy as np - >>> bpd.options.display.progress_bar = None >>> a = bpd.Series([1, 1, 1, np.nan], index=['a', 'b', 'c', 'd']) >>> a @@ -3041,9 +2922,6 @@ def le(self, other) -> Series: **Examples:** - >>> import bigframes.pandas as bpd - >>> import numpy as np - >>> bpd.options.display.progress_bar = None >>> a = bpd.Series([1, 1, 1, np.nan], index=['a', 'b', 'c', 'd']) >>> a @@ -3087,9 +2965,6 @@ def lt(self, other) -> Series: **Examples:** - >>> import bigframes.pandas as bpd - >>> import numpy as np - >>> bpd.options.display.progress_bar = None >>> a = bpd.Series([1, 1, 1, np.nan], index=['a', 'b', 'c', 'd']) >>> a @@ -3134,9 +3009,6 @@ def ge(self, other) -> Series: **Examples:** - >>> import bigframes.pandas as bpd - >>> import numpy as np - >>> bpd.options.display.progress_bar = None >>> a = bpd.Series([1, 1, 1, np.nan], index=['a', 'b', 'c', 'd']) >>> a @@ -3181,9 +3053,6 @@ def gt(self, other) -> Series: **Examples:** - >>> import bigframes.pandas as bpd - >>> import numpy as np - >>> bpd.options.display.progress_bar = None >>> a = bpd.Series([1, 1, 1, np.nan], index=['a', 'b', 'c', 'd']) >>> a @@ -3227,10 +3096,8 @@ def add(self, other) -> Series: **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None - >>> a = bpd.Series([1, 2, 3, bpd.NA]) + >>> a = bpd.Series([1, 2, 3, pd.NA]) >>> a 0 1 1 2 @@ -3291,8 +3158,6 @@ def __add__(self, other): **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> s = bpd.Series([1.5, 2.6], index=['elk', 'moose']) >>> s @@ -3343,9 +3208,6 @@ def radd(self, other) -> Series: **Examples:** - >>> import bigframes.pandas as bpd - >>> import numpy as np - >>> bpd.options.display.progress_bar = None >>> a = bpd.Series([1, 1, 1, np.nan], index=['a', 'b', 'c', 'd']) >>> a @@ -3408,9 +3270,6 @@ def sub( **Examples:** - >>> import bigframes.pandas as bpd - >>> import numpy as np - >>> bpd.options.display.progress_bar = None >>> a = bpd.Series([1, 1, 1, np.nan], index=['a', 'b', 'c', 'd']) >>> a @@ -3453,8 +3312,6 @@ def __sub__(self, other): **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> s = bpd.Series([1.5, 2.6], index=['elk', 'moose']) >>> s @@ -3505,9 +3362,6 @@ def rsub(self, other) -> Series: **Examples:** - >>> import bigframes.pandas as bpd - >>> import numpy as np - >>> bpd.options.display.progress_bar = None >>> a = bpd.Series([1, 1, 1, np.nan], index=['a', 'b', 'c', 'd']) >>> a @@ -3567,9 +3421,6 @@ def mul(self, other) -> Series: **Examples:** - >>> import bigframes.pandas as bpd - >>> import numpy as np - >>> bpd.options.display.progress_bar = None >>> a = bpd.Series([1, 1, 1, np.nan], index=['a', 'b', 'c', 'd']) >>> a @@ -3613,8 +3464,6 @@ def __mul__(self, other): **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None You can multiply with a scalar: @@ -3653,9 +3502,6 @@ def rmul(self, other) -> Series: **Examples:** - >>> import bigframes.pandas as bpd - >>> import numpy as np - >>> bpd.options.display.progress_bar = None >>> a = bpd.Series([1, 1, 1, np.nan], index=['a', 'b', 'c', 'd']) >>> a @@ -3714,9 +3560,6 @@ def truediv(self, other) -> Series: **Examples:** - >>> import bigframes.pandas as bpd - >>> import numpy as np - >>> bpd.options.display.progress_bar = None >>> a = bpd.Series([1, 1, 1, np.nan], index=['a', 'b', 'c', 'd']) >>> a @@ -3760,8 +3603,6 @@ def __truediv__(self, other): **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None You can multiply with a scalar: @@ -3800,9 +3641,6 @@ def rtruediv(self, other) -> Series: **Examples:** - >>> import bigframes.pandas as bpd - >>> import numpy as np - >>> bpd.options.display.progress_bar = None >>> a = bpd.Series([1, 1, 1, np.nan], index=['a', 'b', 'c', 'd']) >>> a @@ -3862,9 +3700,6 @@ def floordiv(self, other) -> Series: **Examples:** - >>> import bigframes.pandas as bpd - >>> import numpy as np - >>> bpd.options.display.progress_bar = None >>> a = bpd.Series([1, 1, 1, np.nan], index=['a', 'b', 'c', 'd']) >>> a @@ -3908,8 +3743,6 @@ def __floordiv__(self, other): **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None You can divide by a scalar: @@ -3948,9 +3781,6 @@ def rfloordiv(self, other) -> Series: **Examples:** - >>> import bigframes.pandas as bpd - >>> import numpy as np - >>> bpd.options.display.progress_bar = None >>> a = bpd.Series([1, 1, 1, np.nan], index=['a', 'b', 'c', 'd']) >>> a @@ -4010,9 +3840,6 @@ def mod(self, other) -> Series: **Examples:** - >>> import bigframes.pandas as bpd - >>> import numpy as np - >>> bpd.options.display.progress_bar = None >>> a = bpd.Series([1, 1, 1, np.nan], index=['a', 'b', 'c', 'd']) >>> a @@ -4056,8 +3883,6 @@ def __mod__(self, other): **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None You can modulo with a scalar: @@ -4095,9 +3920,6 @@ def rmod(self, other) -> Series: **Examples:** - >>> import bigframes.pandas as bpd - >>> import numpy as np - >>> bpd.options.display.progress_bar = None >>> a = bpd.Series([1, 1, 1, np.nan], index=['a', 'b', 'c', 'd']) >>> a @@ -4159,9 +3981,6 @@ def pow(self, other) -> Series: **Examples:** >>> import bigframes.pandas as bpd - >>> import numpy as np - >>> bpd.options.display.progress_bar = None - >>> a = bpd.Series([1, 1, 1, np.nan], index=['a', 'b', 'c', 'd']) >>> a a 1.0 @@ -4194,6 +4013,7 @@ def pow(self, other) -> Series: The result of the operation. """ + # TODO(b/452366836): adjust sample if needed to match pyarrow semantics. raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) def __pow__(self, other): @@ -4205,8 +4025,6 @@ def __pow__(self, other): **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None You can exponentiate with a scalar: @@ -4246,9 +4064,6 @@ def rpow(self, other) -> Series: **Examples:** >>> import bigframes.pandas as bpd - >>> import numpy as np - >>> bpd.options.display.progress_bar = None - >>> a = bpd.Series([1, 1, 1, np.nan], index=['a', 'b', 'c', 'd']) >>> a a 1.0 @@ -4308,9 +4123,6 @@ def divmod(self, other) -> Series: **Examples:** - >>> import bigframes.pandas as bpd - >>> import numpy as np - >>> bpd.options.display.progress_bar = None >>> a = bpd.Series([1, 1, 1, np.nan], index=['a', 'b', 'c', 'd']) >>> a @@ -4360,9 +4172,6 @@ def rdivmod(self, other) -> Series: **Examples:** - >>> import bigframes.pandas as bpd - >>> import numpy as np - >>> bpd.options.display.progress_bar = None >>> a = bpd.Series([1, 1, 1, np.nan], index=['a', 'b', 'c', 'd']) >>> a @@ -4415,9 +4224,6 @@ def combine_first(self, other) -> Series: **Examples:** - >>> import bigframes.pandas as bpd - >>> import numpy as np - >>> bpd.options.display.progress_bar = None >>> s1 = bpd.Series([1, np.nan]) >>> s2 = bpd.Series([3, 4, 5]) @@ -4457,10 +4263,6 @@ def update(self, other) -> None: **Examples:** - >>> import bigframes.pandas as bpd - >>> import pandas as pd - >>> import numpy as np - >>> bpd.options.display.progress_bar = None >>> s = bpd.Series([1, 2, 3]) >>> s.update(bpd.Series([4, 5, 6])) @@ -4551,9 +4353,6 @@ def any( **Examples:** - >>> import bigframes.pandas as bpd - >>> import numpy as np - >>> bpd.options.display.progress_bar = None For Series input, the output is a scalar indicating whether any element is True. @@ -4587,8 +4386,6 @@ def max( **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None Calculating the max of a Series: @@ -4603,7 +4400,7 @@ def max( Calculating the max of a Series containing ``NA`` values: - >>> s = bpd.Series([1, 3, bpd.NA]) + >>> s = bpd.Series([1, 3, pd.NA]) >>> s 0 1 1 3 @@ -4629,8 +4426,6 @@ def min( **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None Calculating the min of a Series: @@ -4645,7 +4440,7 @@ def min( Calculating the min of a Series containing ``NA`` values: - >>> s = bpd.Series([1, 3, bpd.NA]) + >>> s = bpd.Series([1, 3, pd.NA]) >>> s 0 1 1 3 @@ -4670,8 +4465,6 @@ def std( **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame({'person_id': [0, 1, 2, 3], ... 'age': [21, 25, 62, 43], @@ -4718,8 +4511,6 @@ def sum(self): **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None Calculating the sum of a Series: @@ -4734,7 +4525,7 @@ def sum(self): Calculating the sum of a Series containing ``NA`` values: - >>> s = bpd.Series([1, 3, bpd.NA]) + >>> s = bpd.Series([1, 3, pd.NA]) >>> s 0 1 1 3 @@ -4754,8 +4545,6 @@ def mean(self): **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None Calculating the mean of a Series: @@ -4770,7 +4559,7 @@ def mean(self): Calculating the mean of a Series containing ``NA`` values: - >>> s = bpd.Series([1, 3, bpd.NA]) + >>> s = bpd.Series([1, 3, pd.NA]) >>> s 0 1 1 3 @@ -4791,8 +4580,6 @@ def median(self, *, exact: bool = True): **Examples:** >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None - >>> s = bpd.Series([1, 2, 3]) >>> s.median() np.float64(2.0) @@ -4832,8 +4619,6 @@ def quantile( **Examples:** >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None - >>> s = bpd.Series([1, 2, 3, 4]) >>> s.quantile(.5) np.float64(2.5) @@ -4884,8 +4669,6 @@ def describe(self): **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> s = bpd.Series(['A', 'A', 'B']) >>> s @@ -4912,8 +4695,6 @@ def skew(self): **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> s = bpd.Series([1, 2, 3]) >>> s.skew() @@ -4950,8 +4731,6 @@ def kurt(self): **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> s = bpd.Series([1, 2, 2, 3], index=['cat', 'dog', 'dog', 'mouse']) >>> s @@ -4993,9 +4772,6 @@ def item(self: Series, *args, **kwargs): **Examples:** - >>> import bigframes.pandas as bpd - >>> import numpy as np - >>> bpd.options.display.progress_bar = None >>> s = bpd.Series([1]) >>> s.item() np.int64(1) @@ -5017,8 +4793,6 @@ def items(self): **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> s = bpd.Series(['A', 'B', 'C']) >>> for index, value in s.items(): @@ -5039,8 +4813,6 @@ def where(self, cond, other): **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> s = bpd.Series([10, 11, 12, 13, 14]) >>> s @@ -5107,9 +4879,6 @@ def mask(self, cond, other): **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None - >>> s = bpd.Series([10, 11, 12, 13, 14]) >>> s 0 10 @@ -5153,7 +4922,7 @@ def mask(self, cond, other): condition is evaluated based on a complicated business logic which cannot be expressed in form of a Series. - >>> @bpd.remote_function(reuse=False, cloud_function_service_account="default") + >>> @bpd.remote_function(reuse=False, cloud_function_service_account="default") # doctest: +SKIP ... def should_mask(name: str) -> bool: ... hash = 0 ... for char_ in name: @@ -5166,12 +4935,12 @@ def mask(self, cond, other): 1 Bob 2 Caroline dtype: string - >>> s.mask(should_mask) + >>> s.mask(should_mask) # doctest: +SKIP 0 1 Bob 2 Caroline dtype: string - >>> s.mask(should_mask, "REDACTED") + >>> s.mask(should_mask, "REDACTED") # doctest: +SKIP 0 REDACTED 1 Bob 2 Caroline @@ -5265,8 +5034,6 @@ def argmax(self): **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None Consider dataset containing cereal calories. @@ -5303,8 +5070,6 @@ def argmin(self): **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None Consider dataset containing cereal calories. @@ -5344,8 +5109,6 @@ def rename(self, index, *, inplace, **kwargs): **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> s = bpd.Series([1, 2, 3]) >>> s @@ -5396,8 +5159,6 @@ def rename_axis(self, mapper, *, inplace, **kwargs): **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None Series @@ -5461,10 +5222,8 @@ def value_counts( **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None - >>> s = bpd.Series([3, 1, 2, 3, 4, bpd.NA], dtype="Int64") + >>> s = bpd.Series([3, 1, 2, 3, 4, pd.NA], dtype="Int64") >>> s 0 3 @@ -5540,8 +5299,6 @@ def str(self): **Examples:** >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None - >>> s = bpd.Series(["A_Str_Series"]) >>> s 0 A_Str_Series @@ -5569,8 +5326,6 @@ def plot(self): **Examples:** >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None - >>> ser = bpd.Series([1, 2, 3, 3]) >>> plot = ser.plot(kind='hist', title="My plot") >>> plot @@ -5596,8 +5351,6 @@ def isin(self, values): **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> s = bpd.Series(['llama', 'cow', 'llama', 'beetle', 'llama', ... 'hippo'], name='animal') @@ -5662,8 +5415,6 @@ def is_monotonic_increasing(self) -> bool: **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> s = bpd.Series([1, 2, 2]) >>> s.is_monotonic_increasing @@ -5686,8 +5437,6 @@ def is_monotonic_decreasing(self) -> bool: **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> s = bpd.Series([3, 2, 2, 1]) >>> s.is_monotonic_decreasing @@ -5728,10 +5477,7 @@ def map( **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None - - >>> s = bpd.Series(['cat', 'dog', bpd.NA, 'rabbit']) + >>> s = bpd.Series(['cat', 'dog', pd.NA, 'rabbit']) >>> s 0 cat 1 dog @@ -5751,7 +5497,7 @@ def map( It also accepts a remote function: - >>> @bpd.remote_function(cloud_function_service_account="default") + >>> @bpd.remote_function(cloud_function_service_account="default") # doctest: +SKIP ... def my_mapper(val: str) -> str: ... vowels = ["a", "e", "i", "o", "u"] ... if val: @@ -5760,7 +5506,7 @@ def map( ... ]) ... return "N/A" - >>> s.map(my_mapper) + >>> s.map(my_mapper) # doctest: +SKIP 0 cAt 1 dOg 2 N/A @@ -5794,8 +5540,6 @@ def iloc(self): **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> mydict = [{'a': 1, 'b': 2, 'c': 3, 'd': 4}, ... {'a': 100, 'b': 200, 'c': 300, 'd': 400}, @@ -5874,8 +5618,6 @@ def loc(self): **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame([[1, 2], [4, 5], [7, 8]], ... index=['cobra', 'viper', 'sidewinder'], @@ -5961,8 +5703,6 @@ def iat(self): **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame([[0, 2, 3], [0, 4, 1], [10, 20, 30]], ... columns=['A', 'B', 'C']) @@ -5996,8 +5736,6 @@ def at(self): **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame([[0, 2, 3], [0, 4, 1], [10, 20, 30]], ... index=[4, 5, 6], columns=['A', 'B', 'C']) @@ -6032,8 +5770,6 @@ def values(self): **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> bpd.Series([1, 2, 3]).values array([1, 2, 3]) @@ -6054,8 +5790,6 @@ def size(self) -> int: **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None For Series: @@ -6091,9 +5825,6 @@ def __array__(self, dtype=None, copy: Optional[bool] = None) -> numpy.ndarray: **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None - >>> import numpy as np >>> ser = bpd.Series([1, 2, 3]) @@ -6119,8 +5850,6 @@ def __len__(self): **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> s = bpd.Series([1, 2, 3]) >>> len(s) @@ -6135,8 +5864,6 @@ def __invert__(self): **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> ser = bpd.Series([True, False, True]) >>> ~ser @@ -6156,8 +5883,6 @@ def __and__(self, other): **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> s = bpd.Series([0, 1, 2, 3]) @@ -6195,8 +5920,6 @@ def __or__(self, other): **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> s = bpd.Series([0, 1, 2, 3]) @@ -6234,8 +5957,6 @@ def __xor__(self, other): **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> s = bpd.Series([0, 1, 2, 3]) @@ -6273,8 +5994,6 @@ def __getitem__(self, indexer): **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> s = bpd.Series([15, 30, 45]) >>> s[1] diff --git a/third_party/bigframes_vendored/pandas/core/strings/accessor.py b/third_party/bigframes_vendored/pandas/core/strings/accessor.py index fe94bf3049..9a72b98aee 100644 --- a/third_party/bigframes_vendored/pandas/core/strings/accessor.py +++ b/third_party/bigframes_vendored/pandas/core/strings/accessor.py @@ -20,7 +20,6 @@ def __getitem__(self, key: typing.Union[int, slice]): **Examples:** >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> s = bpd.Series(['Alice', 'Bob', 'Charlie']) >>> s.str[0] @@ -54,7 +53,6 @@ def extract(self, pat: str, flags: int = 0): **Examples:** >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None A pattern with two groups will return a DataFrame with two columns. Non-matches will be `NaN`. @@ -115,7 +113,6 @@ def find(self, sub, start: int = 0, end=None): **Examples:** >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> ser = bpd.Series(["cow_", "duck_", "do_ve"]) >>> ser.str.find("_") @@ -146,11 +143,10 @@ def len(self): **Examples:** >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None Returns the length (number of characters) in a string. - >>> s = bpd.Series(['dog', '', bpd.NA]) + >>> s = bpd.Series(['dog', '', pd.NA]) >>> s.str.len() 0 3 1 0 @@ -172,7 +168,6 @@ def lower(self): **Examples:** >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> s = bpd.Series(['lower', ... 'CAPITALS', @@ -197,7 +192,6 @@ def slice(self, start=None, stop=None): **Examples:** >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> s = bpd.Series(["koala", "dog", "chameleon"]) >>> s @@ -250,13 +244,12 @@ def strip(self, to_strip: typing.Optional[str] = None): **Examples:** >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> s = bpd.Series([ ... '1. Ant.', ... ' 2. Bee? ', ... '\\t3. Cat!\\n', - ... bpd.NA, + ... pd.NA, ... ]) >>> s.str.strip() 0 1. Ant. @@ -293,7 +286,6 @@ def upper(self): **Examples:** >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> s = bpd.Series(['lower', ... 'CAPITALS', @@ -322,7 +314,6 @@ def isnumeric(self): **Examples:** >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> s1 = bpd.Series(['one', 'one1', '1', '']) >>> s1.str.isnumeric() @@ -349,7 +340,6 @@ def isalpha(self): **Examples:** >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> s1 = bpd.Series(['one', 'one1', '1', '']) >>> s1.str.isalpha() @@ -375,7 +365,6 @@ def isdigit(self): **Examples:** >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> s = bpd.Series(['23', '1a', '1/5', '']) >>> s.str.isdigit() @@ -401,7 +390,6 @@ def isalnum(self): **Examples:** >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> s1 = bpd.Series(['one', 'one1', '1', '']) >>> s1.str.isalnum() @@ -439,7 +427,6 @@ def isspace(self): **Examples:** >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> s = bpd.Series([' ', '\\t\\r\\n ', '']) >>> s.str.isspace() @@ -465,7 +452,6 @@ def islower(self): **Examples:** >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> s = bpd.Series(['leopard', 'Golden Eagle', 'SNAKE', '']) >>> s.str.islower() @@ -492,7 +478,6 @@ def isupper(self): **Examples:** >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> s = bpd.Series(['leopard', 'Golden Eagle', 'SNAKE', '']) >>> s.str.isupper() @@ -519,7 +504,6 @@ def isdecimal(self): **Examples:** >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None The `isdecimal` method checks for characters used to form numbers in base 10. @@ -550,9 +534,8 @@ def rstrip(self, to_strip: typing.Optional[str] = None): **Examples:** >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None - >>> s = bpd.Series(['Ant', ' Bee ', '\tCat\n', bpd.NA]) + >>> s = bpd.Series(['Ant', ' Bee ', '\tCat\n', pd.NA]) >>> s.str.rstrip() 0 Ant 1 Bee @@ -583,9 +566,8 @@ def lstrip(self, to_strip: typing.Optional[str] = None): **Examples:** >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None - >>> s = bpd.Series(['Ant', ' Bee ', '\tCat\n', bpd.NA]) + >>> s = bpd.Series(['Ant', ' Bee ', '\tCat\n', pd.NA]) >>> s.str.lstrip() 0 Ant 1 Bee @@ -611,7 +593,6 @@ def repeat(self, repeats: int): **Examples:** >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> s = bpd.Series(['a', 'b', 'c']) >>> s @@ -645,7 +626,6 @@ def capitalize(self): **Examples:** >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> s = bpd.Series(['lower', ... 'CAPITALS', @@ -673,7 +653,6 @@ def cat(self, others, *, join): **Examples:** >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None You can concatenate each string in a Series to another string. @@ -730,7 +709,6 @@ def contains(self, pat, case: bool = True, flags: int = 0, *, regex: bool = True **Examples:** >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None Returning a Series of booleans using only a literal pattern. @@ -834,13 +812,12 @@ def replace( **Examples:** >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None When *pat* is a string and *regex* is True, the given *pat* is compiled as a regex. When *repl* is a string, it replaces matching regex patterns as with `re.sub()`. NaN value(s) in the Series are left as is: - >>> s = bpd.Series(['foo', 'fuz', bpd.NA]) + >>> s = bpd.Series(['foo', 'fuz', pd.NA]) >>> s.str.replace('f.', 'ba', regex=True) 0 bao 1 baz @@ -850,7 +827,7 @@ def replace( When *pat* is a string and *regex* is False, every *pat* is replaced with *repl* as with `str.replace()`: - >>> s = bpd.Series(['f.o', 'fuz', bpd.NA]) + >>> s = bpd.Series(['f.o', 'fuz', pd.NA]) >>> s.str.replace('f.', 'ba', regex=False) 0 bao 1 fuz @@ -896,9 +873,8 @@ def startswith( **Examples:** >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None - >>> s = bpd.Series(['bat', 'Bear', 'caT', bpd.NA]) + >>> s = bpd.Series(['bat', 'Bear', 'caT', pd.NA]) >>> s 0 bat 1 Bear @@ -941,9 +917,8 @@ def endswith( **Examples:** >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None - >>> s = bpd.Series(['bat', 'bear', 'caT', bpd.NA]) + >>> s = bpd.Series(['bat', 'bear', 'caT', pd.NA]) >>> s 0 bat 1 bear @@ -987,8 +962,6 @@ def split( **Examples:** >>> import bigframes.pandas as bpd - >>> import numpy as np - >>> bpd.options.display.progress_bar = None >>> s = bpd.Series( ... [ @@ -1031,7 +1004,6 @@ def match(self, pat: str, case: bool = True, flags: int = 0): **Examples:** >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> ser = bpd.Series(["horse", "eagle", "donkey"]) >>> ser.str.match("e") @@ -1060,7 +1032,6 @@ def fullmatch(self, pat: str, case: bool = True, flags: int = 0): **Examples:** >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> ser = bpd.Series(["cat", "duck", "dove"]) >>> ser.str.fullmatch(r'd.+') @@ -1092,7 +1063,6 @@ def get(self, i: int): **Examples:** >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> s = bpd.Series(["apple", "banana", "fig"]) >>> s.str.get(3) @@ -1122,7 +1092,6 @@ def pad( **Examples:** >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> s = bpd.Series(["caribou", "tiger"]) >>> s @@ -1170,7 +1139,6 @@ def ljust( **Examples:** >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> ser = bpd.Series(['dog', 'bird', 'mouse']) >>> ser.str.ljust(8, fillchar='.') @@ -1202,7 +1170,6 @@ def rjust( **Examples:** >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> ser = bpd.Series(['dog', 'bird', 'mouse']) >>> ser.str.rjust(8, fillchar='.') @@ -1238,9 +1205,8 @@ def zfill( **Examples:** >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None - >>> s = bpd.Series(['-1', '1', '1000', bpd.NA]) + >>> s = bpd.Series(['-1', '1', '1000', pd.NA]) >>> s 0 -1 1 1 @@ -1278,7 +1244,6 @@ def center( **Examples:** >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> ser = bpd.Series(['dog', 'bird', 'mouse']) >>> ser.str.center(8, fillchar='.') @@ -1310,8 +1275,6 @@ def join(self, sep: str): **Examples:** >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None - >>> import pandas as pd Example with a list that contains non-string elements. diff --git a/third_party/bigframes_vendored/pandas/core/tools/datetimes.py b/third_party/bigframes_vendored/pandas/core/tools/datetimes.py index 9c17b9632e..655f801b3d 100644 --- a/third_party/bigframes_vendored/pandas/core/tools/datetimes.py +++ b/third_party/bigframes_vendored/pandas/core/tools/datetimes.py @@ -38,7 +38,6 @@ def to_datetime( **Examples:** >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None Converting a Scalar to datetime: diff --git a/third_party/bigframes_vendored/pandas/core/tools/timedeltas.py b/third_party/bigframes_vendored/pandas/core/tools/timedeltas.py index 9442e965fa..4e418af406 100644 --- a/third_party/bigframes_vendored/pandas/core/tools/timedeltas.py +++ b/third_party/bigframes_vendored/pandas/core/tools/timedeltas.py @@ -54,11 +54,9 @@ def to_timedelta( **Examples:** - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None - Converting a Scalar to timedelta + >>> import bigframes.pandas as bpd >>> scalar = 2 >>> bpd.to_timedelta(scalar, unit='s') Timedelta('0 days 00:00:02') diff --git a/third_party/bigframes_vendored/pandas/io/gbq.py b/third_party/bigframes_vendored/pandas/io/gbq.py index 0fdca4dde1..3190c92b92 100644 --- a/third_party/bigframes_vendored/pandas/io/gbq.py +++ b/third_party/bigframes_vendored/pandas/io/gbq.py @@ -61,7 +61,6 @@ def read_gbq( **Examples:** >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None If the input is a table ID: diff --git a/third_party/bigframes_vendored/pandas/io/parquet.py b/third_party/bigframes_vendored/pandas/io/parquet.py index aec911d2fe..c02c5e52c5 100644 --- a/third_party/bigframes_vendored/pandas/io/parquet.py +++ b/third_party/bigframes_vendored/pandas/io/parquet.py @@ -27,7 +27,6 @@ def read_parquet( **Examples:** >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> gcs_path = "gs://cloud-samples-data/bigquery/us-states/us-states.parquet" >>> df = bpd.read_parquet(path=gcs_path, engine="bigquery") diff --git a/third_party/bigframes_vendored/pandas/io/parsers/readers.py b/third_party/bigframes_vendored/pandas/io/parsers/readers.py index 4757f5ed9d..5a505c2859 100644 --- a/third_party/bigframes_vendored/pandas/io/parsers/readers.py +++ b/third_party/bigframes_vendored/pandas/io/parsers/readers.py @@ -71,7 +71,6 @@ def read_csv( **Examples:** >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> gcs_path = "gs://cloud-samples-data/bigquery/us-states/us-states.csv" >>> df = bpd.read_csv(filepath_or_buffer=gcs_path) @@ -192,7 +191,6 @@ def read_json( **Examples:** >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> gcs_path = "gs://bigframes-dev-testing/sample1.json" >>> df = bpd.read_json(path_or_buf=gcs_path, lines=True, orient="records") diff --git a/third_party/bigframes_vendored/pandas/io/pickle.py b/third_party/bigframes_vendored/pandas/io/pickle.py index 33088dc019..03f1afe35e 100644 --- a/third_party/bigframes_vendored/pandas/io/pickle.py +++ b/third_party/bigframes_vendored/pandas/io/pickle.py @@ -35,7 +35,6 @@ def read_pickle( **Examples:** >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> gcs_path = "gs://bigframes-dev-testing/test_pickle.pkl" >>> df = bpd.read_pickle(filepath_or_buffer=gcs_path) diff --git a/third_party/bigframes_vendored/pandas/plotting/_core.py b/third_party/bigframes_vendored/pandas/plotting/_core.py index b0c28ddfe9..6c2aed970d 100644 --- a/third_party/bigframes_vendored/pandas/plotting/_core.py +++ b/third_party/bigframes_vendored/pandas/plotting/_core.py @@ -11,7 +11,6 @@ class PlotAccessor: For Series: >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> ser = bpd.Series([1, 2, 3, 3]) >>> plot = ser.plot(kind='hist', title="My plot") @@ -57,9 +56,6 @@ def hist( **Examples:** >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None - >>> import numpy as np - >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame(np.random.randint(1, 7, 6000), columns=['one']) >>> df['two'] = np.random.randint(1, 7, 6000) + np.random.randint(1, 7, 6000) >>> ax = df.plot.hist(bins=12, alpha=0.5) @@ -96,7 +92,6 @@ def line( **Examples:** >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame( ... { ... 'one': [1, 2, 3, 4], @@ -164,7 +159,6 @@ def area( Draw an area plot based on basic business metrics: >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame( ... { ... 'sales': [3, 2, 3, 9, 10, 6], @@ -233,7 +227,6 @@ def bar( Basic plot. >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame({'lab':['A', 'B', 'C'], 'val':[10, 30, 20]}) >>> ax = df.plot.bar(x='lab', y='val', rot=0) @@ -293,7 +286,6 @@ def barh( Basic plot. >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame({'lab':['A', 'B', 'C'], 'val':[10, 30, 20]}) >>> ax = df.plot.barh(x='lab', y='val', rot=0) @@ -356,7 +348,6 @@ def pie( pie function to get a pie plot. >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame({'mass': [0.330, 4.87 , 5.97], ... 'radius': [2439.7, 6051.8, 6378.1]}, @@ -399,7 +390,6 @@ def scatter( in a DataFrame's columns. >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> df = bpd.DataFrame([[5.1, 3.5, 0], [4.9, 3.0, 0], [7.0, 3.2, 1], ... [6.4, 3.2, 1], [5.9, 3.0, 2]], ... columns=['length', 'width', 'species']) diff --git a/third_party/bigframes_vendored/sklearn/cluster/_kmeans.py b/third_party/bigframes_vendored/sklearn/cluster/_kmeans.py index a7344d49d4..44eefeddd7 100644 --- a/third_party/bigframes_vendored/sklearn/cluster/_kmeans.py +++ b/third_party/bigframes_vendored/sklearn/cluster/_kmeans.py @@ -30,7 +30,6 @@ class KMeans(_BaseKMeans): **Examples:** >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> from bigframes.ml.cluster import KMeans >>> X = bpd.DataFrame({"feat0": [1, 1, 1, 10, 10, 10], "feat1": [2, 4, 0, 2, 4, 0]}) diff --git a/third_party/bigframes_vendored/sklearn/decomposition/_mf.py b/third_party/bigframes_vendored/sklearn/decomposition/_mf.py index c3c3a77b71..e487a2e7c1 100644 --- a/third_party/bigframes_vendored/sklearn/decomposition/_mf.py +++ b/third_party/bigframes_vendored/sklearn/decomposition/_mf.py @@ -24,7 +24,6 @@ class MatrixFactorization(BaseEstimator, metaclass=ABCMeta): >>> import bigframes.pandas as bpd >>> from bigframes.ml.decomposition import MatrixFactorization - >>> bpd.options.display.progress_bar = None >>> X = bpd.DataFrame({ ... "row": [0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6], ... "column": [0,1] * 7, diff --git a/third_party/bigframes_vendored/sklearn/decomposition/_pca.py b/third_party/bigframes_vendored/sklearn/decomposition/_pca.py index f13c52bfb6..3535edc8f9 100644 --- a/third_party/bigframes_vendored/sklearn/decomposition/_pca.py +++ b/third_party/bigframes_vendored/sklearn/decomposition/_pca.py @@ -24,7 +24,6 @@ class PCA(BaseEstimator, metaclass=ABCMeta): >>> import bigframes.pandas as bpd >>> from bigframes.ml.decomposition import PCA - >>> bpd.options.display.progress_bar = None >>> X = bpd.DataFrame({"feat0": [-1, -2, -3, 1, 2, 3], "feat1": [-1, -1, -2, 1, 1, 2]}) >>> pca = PCA(n_components=2).fit(X) >>> pca.predict(X) # doctest:+SKIP diff --git a/third_party/bigframes_vendored/sklearn/impute/_base.py b/third_party/bigframes_vendored/sklearn/impute/_base.py index 42eab24c82..175ad86b21 100644 --- a/third_party/bigframes_vendored/sklearn/impute/_base.py +++ b/third_party/bigframes_vendored/sklearn/impute/_base.py @@ -22,7 +22,6 @@ class SimpleImputer(_BaseImputer): >>> import bigframes.pandas as bpd >>> from bigframes.ml.impute import SimpleImputer - >>> bpd.options.display.progress_bar = None >>> X_train = bpd.DataFrame({"feat0": [7.0, 4.0, 10.0], "feat1": [2.0, None, 5.0], "feat2": [3.0, 6.0, 9.0]}) >>> imp_mean = SimpleImputer().fit(X_train) >>> X_test = bpd.DataFrame({"feat0": [None, 4.0, 10.0], "feat1": [2.0, None, None], "feat2": [3.0, 6.0, 9.0]}) diff --git a/third_party/bigframes_vendored/sklearn/linear_model/_base.py b/third_party/bigframes_vendored/sklearn/linear_model/_base.py index 21ba5a3bf8..7543edd10b 100644 --- a/third_party/bigframes_vendored/sklearn/linear_model/_base.py +++ b/third_party/bigframes_vendored/sklearn/linear_model/_base.py @@ -66,7 +66,6 @@ class LinearRegression(RegressorMixin, LinearModel): >>> from bigframes.ml.linear_model import LinearRegression >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> X = bpd.DataFrame({ \ "feature0": [20, 21, 19, 18], \ "feature1": [0, 1, 1, 0], \ diff --git a/third_party/bigframes_vendored/sklearn/linear_model/_logistic.py b/third_party/bigframes_vendored/sklearn/linear_model/_logistic.py index a85c6fae8d..d449a1040c 100644 --- a/third_party/bigframes_vendored/sklearn/linear_model/_logistic.py +++ b/third_party/bigframes_vendored/sklearn/linear_model/_logistic.py @@ -25,7 +25,6 @@ class LogisticRegression(LinearClassifierMixin, BaseEstimator): >>> from bigframes.ml.linear_model import LogisticRegression >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> X = bpd.DataFrame({ \ "feature0": [20, 21, 19, 18], \ "feature1": [0, 1, 1, 0], \ diff --git a/third_party/bigframes_vendored/sklearn/metrics/_classification.py b/third_party/bigframes_vendored/sklearn/metrics/_classification.py index fd6e8678ea..e60cc8cec4 100644 --- a/third_party/bigframes_vendored/sklearn/metrics/_classification.py +++ b/third_party/bigframes_vendored/sklearn/metrics/_classification.py @@ -30,7 +30,6 @@ def accuracy_score(y_true, y_pred, normalize=True) -> float: >>> import bigframes.pandas as bpd >>> import bigframes.ml.metrics - >>> bpd.options.display.progress_bar = None >>> y_true = bpd.DataFrame([0, 2, 1, 3]) >>> y_pred = bpd.DataFrame([0, 1, 2, 3]) @@ -80,7 +79,6 @@ def confusion_matrix( >>> import bigframes.pandas as bpd >>> import bigframes.ml.metrics - >>> bpd.options.display.progress_bar = None >>> y_true = bpd.DataFrame([2, 0, 2, 2, 0, 1]) >>> y_pred = bpd.DataFrame([0, 0, 2, 2, 0, 2]) @@ -132,7 +130,6 @@ def recall_score( >>> import bigframes.pandas as bpd >>> import bigframes.ml.metrics - >>> bpd.options.display.progress_bar = None >>> y_true = bpd.DataFrame([0, 1, 2, 0, 1, 2]) >>> y_pred = bpd.DataFrame([0, 2, 1, 0, 0, 1]) @@ -181,7 +178,6 @@ def precision_score( >>> import bigframes.pandas as bpd >>> import bigframes.ml.metrics - >>> bpd.options.display.progress_bar = None >>> y_true = bpd.DataFrame([0, 1, 2, 0, 1, 2]) >>> y_pred = bpd.DataFrame([0, 2, 1, 0, 0, 1]) @@ -232,7 +228,6 @@ def f1_score( >>> import bigframes.pandas as bpd >>> import bigframes.ml.metrics - >>> bpd.options.display.progress_bar = None >>> y_true = bpd.DataFrame([0, 1, 2, 0, 1, 2]) >>> y_pred = bpd.DataFrame([0, 2, 1, 0, 0, 1]) diff --git a/third_party/bigframes_vendored/sklearn/metrics/_ranking.py b/third_party/bigframes_vendored/sklearn/metrics/_ranking.py index 9262ffbd3d..cd5bd2cbcd 100644 --- a/third_party/bigframes_vendored/sklearn/metrics/_ranking.py +++ b/third_party/bigframes_vendored/sklearn/metrics/_ranking.py @@ -33,7 +33,6 @@ def auc(x, y) -> float: >>> import bigframes.pandas as bpd >>> import bigframes.ml.metrics - >>> bpd.options.display.progress_bar = None >>> x = bpd.DataFrame([1, 1, 2, 2]) >>> y = bpd.DataFrame([2, 3, 4, 5]) @@ -89,7 +88,6 @@ def roc_auc_score(y_true, y_score) -> float: >>> import bigframes.pandas as bpd >>> import bigframes.ml.metrics - >>> bpd.options.display.progress_bar = None >>> y_true = bpd.DataFrame([0, 0, 1, 1, 0, 1, 0, 1, 1, 1]) >>> y_score = bpd.DataFrame([0.1, 0.4, 0.35, 0.8, 0.65, 0.9, 0.5, 0.3, 0.6, 0.45]) @@ -139,7 +137,6 @@ def roc_curve( >>> import bigframes.pandas as bpd >>> import bigframes.ml.metrics - >>> bpd.options.display.progress_bar = None >>> y_true = bpd.DataFrame([1, 1, 2, 2]) >>> y_score = bpd.DataFrame([0.1, 0.4, 0.35, 0.8]) diff --git a/third_party/bigframes_vendored/sklearn/metrics/_regression.py b/third_party/bigframes_vendored/sklearn/metrics/_regression.py index 1c14e8068b..85f0c1ecf9 100644 --- a/third_party/bigframes_vendored/sklearn/metrics/_regression.py +++ b/third_party/bigframes_vendored/sklearn/metrics/_regression.py @@ -46,7 +46,6 @@ def r2_score(y_true, y_pred, force_finite=True) -> float: >>> import bigframes.pandas as bpd >>> import bigframes.ml.metrics - >>> bpd.options.display.progress_bar = None >>> y_true = bpd.DataFrame([3, -0.5, 2, 7]) >>> y_pred = bpd.DataFrame([2.5, 0.0, 2, 8]) @@ -73,7 +72,6 @@ def mean_squared_error(y_true, y_pred) -> float: >>> import bigframes.pandas as bpd >>> import bigframes.ml.metrics - >>> bpd.options.display.progress_bar = None >>> y_true = bpd.DataFrame([3, -0.5, 2, 7]) >>> y_pred = bpd.DataFrame([2.5, 0.0, 2, 8]) @@ -100,7 +98,6 @@ def mean_absolute_error(y_true, y_pred) -> float: >>> import bigframes.pandas as bpd >>> import bigframes.ml.metrics - >>> bpd.options.display.progress_bar = None >>> y_true = bpd.DataFrame([3, -0.5, 2, 7]) >>> y_pred = bpd.DataFrame([2.5, 0.0, 2, 8]) diff --git a/third_party/bigframes_vendored/sklearn/model_selection/_split.py b/third_party/bigframes_vendored/sklearn/model_selection/_split.py index ec16fa8cf9..326589be7d 100644 --- a/third_party/bigframes_vendored/sklearn/model_selection/_split.py +++ b/third_party/bigframes_vendored/sklearn/model_selection/_split.py @@ -69,7 +69,6 @@ class KFold(_BaseKFold): >>> import bigframes.pandas as bpd >>> from bigframes.ml.model_selection import KFold - >>> bpd.options.display.progress_bar = None >>> X = bpd.DataFrame({"feat0": [1, 3, 5], "feat1": [2, 4, 6]}) >>> y = bpd.DataFrame({"label": [1, 2, 3]}) >>> kf = KFold(n_splits=3, random_state=42) @@ -162,7 +161,6 @@ def train_test_split( >>> import bigframes.pandas as bpd >>> from bigframes.ml.model_selection import train_test_split - >>> bpd.options.display.progress_bar = None >>> X = bpd.DataFrame({"feat0": [0, 2, 4, 6, 8], "feat1": [1, 3, 5, 7, 9]}) >>> y = bpd.DataFrame({"label": [0, 1, 2, 3, 4]}) >>> X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42) diff --git a/third_party/bigframes_vendored/sklearn/model_selection/_validation.py b/third_party/bigframes_vendored/sklearn/model_selection/_validation.py index b93c47ea04..6f84018853 100644 --- a/third_party/bigframes_vendored/sklearn/model_selection/_validation.py +++ b/third_party/bigframes_vendored/sklearn/model_selection/_validation.py @@ -19,7 +19,6 @@ def cross_validate(estimator, X, y=None, *, cv=None): >>> import bigframes.pandas as bpd >>> from bigframes.ml.model_selection import cross_validate, KFold >>> from bigframes.ml.linear_model import LinearRegression - >>> bpd.options.display.progress_bar = None >>> X = bpd.DataFrame({"feat0": [1, 3, 5], "feat1": [2, 4, 6]}) >>> y = bpd.DataFrame({"label": [1, 2, 3]}) >>> model = LinearRegression() diff --git a/third_party/bigframes_vendored/sklearn/preprocessing/_encoder.py b/third_party/bigframes_vendored/sklearn/preprocessing/_encoder.py index 5476a9fb3c..64a5786f17 100644 --- a/third_party/bigframes_vendored/sklearn/preprocessing/_encoder.py +++ b/third_party/bigframes_vendored/sklearn/preprocessing/_encoder.py @@ -25,7 +25,6 @@ class OneHotEncoder(BaseEstimator): >>> from bigframes.ml.preprocessing import OneHotEncoder >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None >>> enc = OneHotEncoder() >>> X = bpd.DataFrame({"a": ["Male", "Female", "Female"], "b": ["1", "3", "2"]})