Skip to content

Commit bfb4ca2

Browse files
committed
do not use numpy for offsets
1 parent 0a4e987 commit bfb4ca2

File tree

4 files changed

+15
-17
lines changed

4 files changed

+15
-17
lines changed

bigframes/core/compile/compiler.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -22,10 +22,9 @@
2222
import bigframes_vendored.ibis.expr.api as ibis_api
2323
import bigframes_vendored.ibis.expr.datatypes as ibis_dtypes
2424
import bigframes_vendored.ibis.expr.types as ibis_types
25-
import pyarrow as pa
2625

2726
from bigframes import dtypes, operations
28-
from bigframes.core import expression
27+
from bigframes.core import expression, pyarrow_utils
2928
import bigframes.core.compile.compiled as compiled
3029
import bigframes.core.compile.concat as concat_impl
3130
import bigframes.core.compile.configs as configs
@@ -172,9 +171,7 @@ def compile_readlocal(node: nodes.ReadLocalNode, *args):
172171
pa_table = pa_table.rename_columns([item.id.sql for item in node.scan_list.items])
173172

174173
if offsets:
175-
pa_table = pa_table.append_column(
176-
offsets, pa.array(range(pa_table.num_rows), type=pa.int64())
177-
)
174+
pa_table = pyarrow_utils.append_offsets(pa_table, offsets)
178175
return compiled.UnorderedIR.from_polars(pa_table, bq_schema)
179176

180177

bigframes/core/compile/sqlglot/compiler.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,9 @@
1818
import typing
1919

2020
from google.cloud import bigquery
21-
import pyarrow as pa
2221
import sqlglot.expressions as sge
2322

24-
from bigframes.core import expression, guid, identifiers, nodes, rewrite
23+
from bigframes.core import expression, guid, identifiers, nodes, pyarrow_utils, rewrite
2524
from bigframes.core.compile import configs
2625
import bigframes.core.compile.sqlglot.scalar_compiler as scalar_compiler
2726
import bigframes.core.compile.sqlglot.sqlglot_ir as ir
@@ -155,9 +154,7 @@ def compile_readlocal(self, node: nodes.ReadLocalNode, *args) -> ir.SQLGlotIR:
155154

156155
offsets = node.offsets_col.sql if node.offsets_col else None
157156
if offsets:
158-
pa_table = pa_table.append_column(
159-
offsets, pa.array(range(pa_table.num_rows), type=pa.int64())
160-
)
157+
pa_table = pyarrow_utils.append_offsets(pa_table, offsets)
161158

162159
return ir.SQLGlotIR.from_pyarrow(pa_table, node.schema, uid_gen=self.uid_gen)
163160

bigframes/core/pyarrow_utils.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,3 +85,12 @@ def truncate_pyarrow_iterable(
8585
else:
8686
yield batch
8787
total_yielded += batch.num_rows
88+
89+
90+
def append_offsets(
91+
pa_table: pa.Table,
92+
offsets_col: str,
93+
) -> pa.Table:
94+
return pa_table.append_column(
95+
offsets_col, pa.array(range(pa_table.num_rows), type=pa.int64())
96+
)

bigframes/session/_io/bigquery/read_gbq_query.py

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -20,12 +20,10 @@
2020

2121
from google.cloud import bigquery
2222
import google.cloud.bigquery.table
23-
import numpy
2423
import pandas
25-
import pyarrow
2624

2725
from bigframes import dataframe
28-
from bigframes.core import local_data
26+
from bigframes.core import local_data, pyarrow_utils
2927
import bigframes.core as core
3028
import bigframes.core.blocks as blocks
3129
import bigframes.core.guid
@@ -67,10 +65,7 @@ def create_dataframe_from_row_iterator(
6765
# TODO(tswast): Use array_value.promote_offsets() instead once that node is
6866
# supported by the local engine.
6967
offsets_col = bigframes.core.guid.generate_guid()
70-
pa_table = pa_table.append_column(
71-
pyarrow.field(offsets_col, pyarrow.int64()),
72-
[numpy.arange(pa_table.num_rows)],
73-
)
68+
pa_table = pyarrow_utils.append_offsets(pa_table, offsets_col=offsets_col)
7469

7570
# We use the ManagedArrowTable constructor directly, because the
7671
# results of to_arrow() should be the source of truth with regards

0 commit comments

Comments
 (0)