Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions bigframes/bigquery/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@
to_json,
to_json_string,
)
from bigframes.bigquery._operations.mathematical import rand
from bigframes.bigquery._operations.search import create_vector_index, vector_search
from bigframes.bigquery._operations.sql import sql_scalar
from bigframes.bigquery._operations.struct import struct
Expand Down Expand Up @@ -97,6 +98,8 @@
parse_json,
to_json,
to_json_string,
# mathematical ops
rand,
# search ops
create_vector_index,
vector_search,
Expand Down Expand Up @@ -148,6 +151,8 @@
"parse_json",
"to_json",
"to_json_string",
# mathematical ops
"rand",
# search ops
"create_vector_index",
"vector_search",
Expand Down
68 changes: 68 additions & 0 deletions bigframes/bigquery/_operations/mathematical.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
# Copyright 2025 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import annotations

from typing import Union

from bigframes import dataframe
from bigframes import dtypes
from bigframes import operations as ops
from bigframes import series


def rand(input_data: Union[series.Series, dataframe.DataFrame]) -> series.Series:
"""
Generates a pseudo-random value of type FLOAT64 in the range of [0, 1),
inclusive of 0 and exclusive of 1.

.. warning::
This method introduces non-determinism to the expression. Reading the
same column twice may result in different results.

**Examples:**

>>> import bigframes.pandas as bpd
>>> import bigframes.bigquery as bbq
>>> df = bpd.DataFrame({"a": [1, 2, 3]})
>>> df['random'] = bbq.rand(df)
>>> # Resulting column 'random' will contain random floats between 0 and 1.

Args:
input_data (bigframes.pandas.Series or bigframes.pandas.DataFrame):
A Series or DataFrame to determine the number of rows and the index
of the result. The actual values in this input are ignored.

Returns:
bigframes.pandas.Series: A new Series of random float values.
"""
if isinstance(input_data, dataframe.DataFrame):
if len(input_data.columns) == 0:
raise ValueError("Input DataFrame must have at least one column.")
# Use the first column as anchor
anchor = input_data.iloc[:, 0]
elif isinstance(input_data, series.Series):
anchor = input_data
else:
raise TypeError(
f"Unsupported type {type(input_data)}. "
"Expected bigframes.pandas.Series or bigframes.pandas.DataFrame."
)

op = ops.SqlScalarOp(
_output_type=dtypes.FLOAT_DTYPE,
sql_template="RAND()",
is_deterministic=False,
)
return anchor._apply_nary_op(op, [])
5 changes: 5 additions & 0 deletions bigframes/operations/generic_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -443,10 +443,15 @@ class SqlScalarOp(base_ops.NaryOp):
name: typing.ClassVar[str] = "sql_scalar"
_output_type: dtypes.ExpressionType
sql_template: str
is_deterministic: bool = True

def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionType:
return self._output_type

@property
def deterministic(self) -> bool:
return self.is_deterministic


@dataclasses.dataclass(frozen=True)
class PyUdfOp(base_ops.NaryOp):
Expand Down
36 changes: 36 additions & 0 deletions tests/system/small/bigquery/test_mathematical.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# Copyright 2025 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import bigframes.bigquery as bbq


def test_rand(scalars_df_index):
df = scalars_df_index

# Apply rand
result = bbq.rand(df)

# Eagerly evaluate
result_pd = result.to_pandas()

# Check length
assert len(result_pd) == len(df)

# Check values in [0, 1)
assert (result_pd >= 0).all()
assert (result_pd < 1).all()

# Check not all values are equal (unlikely collision for random)
if len(result_pd) > 1:
assert result_pd.nunique() > 1
57 changes: 57 additions & 0 deletions tests/unit/bigquery/test_mathematical.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
# Copyright 2025 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import unittest.mock as mock

import bigframes.bigquery as bbq
import bigframes.dataframe as dataframe
import bigframes.dtypes as dtypes
import bigframes.operations as ops
import bigframes.series as series


def test_rand_calls_apply_nary_op():
mock_series = mock.create_autospec(series.Series, instance=True)

bbq.rand(mock_series)

mock_series._apply_nary_op.assert_called_once()
args, _ = mock_series._apply_nary_op.call_args
op = args[0]
assert isinstance(op, ops.SqlScalarOp)
assert op.sql_template == "RAND()"
assert op._output_type == dtypes.FLOAT_DTYPE
assert op.deterministic is False
assert args[1] == []


def test_rand_with_dataframe():
mock_df = mock.create_autospec(dataframe.DataFrame, instance=True)
# mock columns length > 0
mock_df.columns = ["col1"]
# mock iloc to return a series
mock_series = mock.create_autospec(series.Series, instance=True)
# Configure mock_df.iloc to return mock_series when indexed
# iloc is indexable, so we mock __getitem__
mock_indexer = mock.MagicMock()
mock_indexer.__getitem__.return_value = mock_series
type(mock_df).iloc = mock.PropertyMock(return_value=mock_indexer)

bbq.rand(mock_df)

mock_series._apply_nary_op.assert_called_once()
args, _ = mock_series._apply_nary_op.call_args
op = args[0]
assert isinstance(op, ops.SqlScalarOp)
assert op.sql_template == "RAND()"
Loading