AI-Hypercomputer
diff --git a/‎benchmarks/benchmark_db_utils.py‎
Lines changed: 46 additions & 63 deletions b/‎benchmarks/benchmark_db_utils.py‎
Lines changed: 46 additions & 63 deletions
diff --git a/‎benchmarks/benchmark_db_writer/bigquery_types.py‎
Lines changed: 86 additions & 0 deletions b/‎benchmarks/benchmark_db_writer/bigquery_types.py‎
Lines changed: 86 additions & 0 deletions
diff --git a/‎benchmarks/benchmark_db_writer/bq_writer_utils.py‎
Lines changed: 57 additions & 0 deletions b/‎benchmarks/benchmark_db_writer/bq_writer_utils.py‎
Lines changed: 57 additions & 0 deletions
@@ -25,15 +25,12 @@
 import dataclasses
 import getpass
 import os
-import sys
 import uuid
 
 from argparse import Namespace
 
-BQ_WRITER_PATH = "/benchmark-automation/benchmark_db_writer/src"
 temp_dir = gettempdir()
 DEFAULT_LOCAL_DIR = os.path.join(temp_dir, "")
-# bq_writer_repo_root = get_bq_writer_path(DEFAULT_LOCAL_DIR)
 
 DEFAULT_TUNING_PARAMS_FILE = os.path.join(temp_dir, "tuning_params.json")
 
@@ -114,7 +111,6 @@ def write_run(
         dataset: The dataset used in the run.
         num_of_superblock: The number of superblocks in the hardware. ( valid for GPUs)
         update_person_ldap: The LDAP ID of the person updating the record (default: current user).
-        is_test: Whether to use the testing project or the production project.
     metrics: Metrics object containing:
         median_step_time: The median step time of the run.
         e2e_step_time: The end-to-end time of the run.
@@ -134,25 +130,20 @@ def write_run(
   Raises:
     ValueError: If any of the IDs are invalid.
   """
-  bq_writer_repo_root = BQ_WRITER_PATH
-  sys.path.append(bq_writer_repo_root)
-
   # pylint: disable=import-outside-toplevel
 
-  from benchmark_db_writer import bq_writer_utils
-  from benchmark_db_writer import dataclass_bigquery_writer
-  from benchmark_db_writer.run_summary_writer import sample_run_summary_writer
-  from benchmark_db_writer.schema.workload_benchmark_v2 import workload_benchmark_v2_schema
+  from benchmarks.benchmark_db_writer import bq_writer_utils
+  from benchmarks.benchmark_db_writer import dataclass_bigquery_writer
+  from benchmarks.benchmark_db_writer.schema.workload_benchmark_v2 import workload_benchmark_v2_schema
 
   def get_db_client(
-      project: str, dataset: str, table: str, dataclass_type: Type, is_test: bool = False
+      project: str, dataset: str, table: str, dataclass_type: Type
   ) -> dataclass_bigquery_writer.DataclassBigQueryWriter:
     """Creates a BigQuery client object.
 
     Args:
       table: The name of the BigQuery table.
       dataclass_type: The dataclass type corresponding to the table schema.
-      is_test: Whether to use the testing project or the production project.
 
     Returns:
       A BigQuery client object.
@@ -167,53 +158,45 @@ def get_db_client(
 
   print(options.model_id)
 
-  if (
-      sample_run_summary_writer.validate_model_id(options.model_id, options.is_test)
-      and sample_run_summary_writer.validate_hardware_id(options.hardware_id, options.is_test)
-      and sample_run_summary_writer.validate_software_id(options.software_id, options.is_test)
-  ):
-    summary = workload_benchmark_v2_schema.WorkloadBenchmarkV2Schema(
-        run_id=f"run-{uuid.uuid4()}",
-        model_id=options.model_id,
-        software_id=options.software_id,
-        hardware_id=options.hardware_id,
-        hardware_num_chips=number_of_chips,
-        hardware_num_nodes=number_of_nodes,
-        result_success=run_success,
-        configs_framework=framework_config_in_json,
-        configs_env=env_variables,
-        configs_container_version=options.container_image_name,
-        configs_xla_flags=options.xla_flags.replace(",", " "),
-        configs_dataset=options.dataset,
-        logs_artifact_directory="",
-        update_person_ldap=getpass.getuser(),
-        run_source="automation",
-        run_type=options.run_type,
-        run_release_status=run_release_status,
-        workload_precision=options.precision,
-        workload_gbs=int(options.global_batch_size),
-        workload_optimizer=options.optimizer,
-        workload_sequence_length=int(options.seq_length),
-        metrics_e2e_time=metrics.e2e_step_time,
-        metrics_mfu=mfu,
-        metrics_step_time=metrics.median_step_time,
-        metrics_tokens_per_second=metrics.avg_tokens_per_sec,
-        metrics_num_steps=number_of_steps,
-        metrics_other=other_metrics_in_json,
-        hardware_nccl_driver_nickname=nccl_driver_nickname,
-        hardware_topology=options.topology,
-        hardware_num_superblocks=0,
-        logs_comments=comment,
-    )
-
-    client = get_db_client(
-        options.db_project,
-        options.db_dataset,
-        "run_summary",
-        workload_benchmark_v2_schema.WorkloadBenchmarkV2Schema,
-        options.is_test,
-    )
-    client.write([summary])
-
-  else:
-    raise ValueError("Could not upload data in run summary table")
+  summary = workload_benchmark_v2_schema.WorkloadBenchmarkV2Schema(
+      run_id=f"run-{uuid.uuid4()}",
+      model_id=options.model_id,
+      software_id=options.software_id,
+      hardware_id=options.hardware_id,
+      hardware_num_chips=number_of_chips,
+      hardware_num_nodes=number_of_nodes,
+      hardware_num_slices=options.hardware_num_slices,
+      result_success=run_success,
+      configs_framework=framework_config_in_json,
+      configs_env=env_variables,
+      configs_container_version=options.container_image_name,
+      configs_xla_flags=options.xla_flags.replace(",", " "),
+      configs_dataset=options.dataset,
+      logs_artifact_directory="",
+      update_person_ldap=getpass.getuser(),
+      run_source="automation",
+      run_type=options.run_type,
+      run_release_status=run_release_status,
+      workload_precision=options.precision,
+      workload_gbs=int(options.global_batch_size),
+      workload_optimizer=options.optimizer,
+      workload_sequence_length=int(options.seq_length),
+      metrics_e2e_time=metrics.e2e_step_time,
+      metrics_mfu=mfu,
+      metrics_step_time=metrics.median_step_time,
+      metrics_tokens_per_second=metrics.avg_tokens_per_sec,
+      metrics_num_steps=number_of_steps,
+      metrics_other=other_metrics_in_json,
+      hardware_nccl_driver_nickname=nccl_driver_nickname,
+      hardware_topology=options.topology,
+      hardware_num_superblocks=0,
+      logs_comments=comment,
+  )
+
+  client = get_db_client(
+      options.db_project,
+      options.db_dataset,
+      "run_summary",
+      workload_benchmark_v2_schema.WorkloadBenchmarkV2Schema,
+  )
+  client.write([summary])
@@ -0,0 +1,86 @@
+# Copyright 2023–2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+This module defines enumerations for BigQuery data types (e.g., `STRING`,
+`INT64`) and field modes (e.g., `NULLABLE`, `REQUIRED`).
+
+It also defines a primary mapping, `TypeMapping`, which translates these
+BigQuery types into their corresponding standard Python types (like `str`, `int`,
+`datetime.datetime`). Custom types (`TimeStamp`, `Geography`) are included
+for specific BQ types not perfectly represented by Python built-ins.
+Copied & Modified from https://github.com/AI-Hypercomputer/aotc/blob/main/src/aotc/
+benchmark_db_writer/src/benchmark_db_writer/bigquery_types.py
+"""
+import datetime
+import decimal
+import enum
+from typing import Dict, NewType, Type
+
+
+class BigQueryFieldModes(str, enum.Enum):
+  """
+  Enums for BigQueryFieldModes
+  """
+
+  NULLABLE = "NULLABLE"
+  REQUIRED = "REQUIRED"
+  REPEATED = "REPEATED"
+
+
+class BigQueryTypes(str, enum.Enum):
+  """
+  Enums for BigQueryTypes
+  """
+
+  STRING = "STRING"
+  BYTES = "BYTES"
+  INTEGER = "INT64"
+  INT64 = "INT64"
+  FLOAT64 = "FLOAT64"
+  FLOAT = "FLOAT64"
+  NUMERIC = "NUMERIC"
+  BOOL = "BOOL"
+  BOOLEAN = "BOOL"
+  STRUCT = "STRUCT"
+  RECORD = "STRUCT"
+  TIMESTAMP = "TIMESTAMP"
+  DATE = "DATE"
+  TIME = "TIME"
+  DATETIME = "DATETIME"
+  GEOGRAPHY = "GEOGRAPHY"
+  JSON = "JSON"
+
+
+Geography = NewType("Geography", str)
+
+
+class TimeStamp(datetime.datetime):
+  pass
+
+
+TypeMapping: Dict[BigQueryTypes, Type] = {
+    BigQueryTypes.STRING: str,
+    BigQueryTypes.BYTES: bytes,
+    BigQueryTypes.INT64: int,
+    BigQueryTypes.FLOAT64: float,
+    BigQueryTypes.NUMERIC: decimal.Decimal,
+    BigQueryTypes.BOOL: bool,
+    BigQueryTypes.TIMESTAMP: TimeStamp,
+    BigQueryTypes.DATE: datetime.date,
+    BigQueryTypes.TIME: datetime.time,
+    BigQueryTypes.DATETIME: datetime.datetime,
+    BigQueryTypes.GEOGRAPHY: Geography,
+    BigQueryTypes.JSON: dict,
+}
@@ -0,0 +1,57 @@
+# Copyright 2023–2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Utilities and factory functions for creating BigQuery writer clients.
+
+This module provides helper functions to simplify the instantiation of the
+`DataclassBigQueryWriter`. It centralizes the configuration, such as
+project and dataset IDs, making it easier to create database clients
+for specific tables.
+Copied & Modified from https://github.com/AI-Hypercomputer/aotc/blob/main/
+src/aotc/benchmark_db_writer/src/benchmark_db_writer/bigquery_types.py
+"""
+from typing import Type
+from benchmarks.benchmark_db_writer import dataclass_bigquery_writer
+
+
+def create_bq_writer_object(project, dataset, table, dataclass_type):
+  """Creates a BQ writer config and uses it to create BQ writer object."""
+
+  config = dataclass_bigquery_writer.BigqueryWriterConfig(project, dataset, table)
+
+  writer = dataclass_bigquery_writer.DataclassBigQueryWriter(dataclass_type, config)
+
+  return writer
+
+
+def get_db_client(table: str, dataclass_type: Type) -> create_bq_writer_object:
+  """Creates a BigQuery client object.
+
+  Args:
+    table: The name of the BigQuery table.
+    dataclass_type: The dataclass type corresponding to the table schema.
+
+  Returns:
+    A BigQuery client object.
+  """
+
+  project = "ml-workload-benchmarks"
+  dataset = "benchmark_dataset_v2"
+  return create_bq_writer_object(
+      project=project,
+      dataset=dataset,
+      table=table,
+      dataclass_type=dataclass_type,
+  )