ClickHouse · aniket486 · Oct 29, 2025 · Oct 29, 2025 · Oct 29, 2025 · Nov 14, 2025
diff --git a/bigquery/README.md b/bigquery/README.md
@@ -1,38 +1,41 @@
-As of 2025, Google Bigquery allow publishing benchmark results, which was not the case earlier.
-
-It's very difficult to find, how to create a database.
-Databases are named "datasets". You need to press on `⋮` near project.
+Download Google Cloud CLI and configure your project settings using the commands below.
+You can skip this step if you are using [Cloud shell](https://docs.cloud.google.com/shell/docs/launching-cloud-shell) which already comes with gcloud preinstalled:
+```
+curl https://sdk.cloud.google.com | bash
+exec -l $SHELL
+gcloud init
+```
 
-Create dataset `test`.
-Go to the query editor and paste the contents of `create.sql`.
-It will take two seconds to create a table.
+Enable BigQuery permissions for this project if they haven't enabled already:
+```
+# 1. Store the active project ID and authenticated email in variables for convenience
+export PROJECT_ID=$(gcloud config get-value project)
+export USER_EMAIL=$(gcloud config get-value account)
+
+# 2. Grant the BigQuery User role (Fixes datasets.create and jobs.create)
+gcloud projects add-iam-policy-binding $PROJECT_ID \
+    --member="user:$USER_EMAIL" \
+    --role="roles/bigquery.user"
+```
 
-Download Google Cloud CLI:
+Create the dataset and table in BigQuery:
 ```
-wget --continue --progress=dot:giga https://dl.google.com/dl/cloudsdk/channels/rapid/downloads/google-cloud-cli-linux-x86_64.tar.gz
-tar -xf google-cloud-cli-linux-x86_64.tar.gz
-./google-cloud-sdk/install.sh
-source .bashrc
-./google-cloud-sdk/bin/gcloud init
+bq mk --dataset test
+
+bq query --use_legacy_sql=false < create.sql
 ```
 
-Load the data:
+Load the data in the table:
 ```
 wget --continue --progress=dot:giga 'https://datasets.clickhouse.com/hits_compatible/hits.csv.gz'
-gzip -d -f hits.csv.gz
 
+# No need to unzip, BigQuery can load from GZIP compressed CSV file.:
 echo -n "Load time: "
-command time -f '%e' bq load --source_format CSV --allow_quoted_newlines=1 test.hits hits.csv
+command time -f '%e' bq load --source_format CSV --allow_quoted_newlines=1 test.hits hits.csv.gz
 ```
 
 Run the benchmark:
-
 ```
-./run.sh 2>&1 | tee log.txt
-
-cat log.txt |
-  grep -P '^real|^Error' |
-  sed -r -e 's/^Error.*$/null/; s/^real\s*([0-9.]+)m([0-9.]+)s$/\1 \2/' |
-  awk '{ if ($2 != "") { print $1 * 60 + $2 } else { print $1 } }' |
-  awk '{ if ($1 == "null") { skip = 1 } else { if (i % 3 == 0) { printf "[" }; printf skip ? "null" : $1; if (i % 3 != 2) { printf "," } else { print "]," }; ++i; skip = 0; } }'
+pip install google-cloud-bigquery
+python3 run_queries.py > results.txt 2> log.txt
 ```
diff --git a/bigquery/create.sh b/bigquery/create.sh
@@ -0,0 +1,5 @@
+#!/bin/bash
+
+bq mk --dataset test
+
+bq query --use_legacy_sql=false < create.sql
diff --git a/bigquery/create.sql b/bigquery/create.sql
@@ -104,5 +104,6 @@ CREATE TABLE test.hits
     HasGCLID SMALLINT NOT NULL,
     RefererHash BIGINT NOT NULL,
     URLHash BIGINT NOT NULL,
-    CLID INTEGER NOT NULL
-);
+    CLID INTEGER NOT NULL,
+)
+CLUSTER BY CounterID, EventDate, UserID, EventTime;
diff --git a/bigquery/queries.sql b/bigquery/queries.sql
@@ -26,7 +26,7 @@ SELECT SearchPhrase FROM test.hits WHERE SearchPhrase <> '' ORDER BY EventTime L
 SELECT SearchPhrase FROM test.hits WHERE SearchPhrase <> '' ORDER BY SearchPhrase LIMIT 10;
 SELECT SearchPhrase FROM test.hits WHERE SearchPhrase <> '' ORDER BY EventTime, SearchPhrase LIMIT 10;
 SELECT CounterID, AVG(length(URL)) AS l, COUNT(*) AS c FROM test.hits WHERE URL <> '' GROUP BY CounterID HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25;
-SELECT REGEXP_REPLACE(Referer, '^https?://(?:www\.)?([^/]+)/.*$', '\1') AS k, AVG(length(Referer)) AS l, COUNT(*) AS c, MIN(Referer) FROM test.hits WHERE Referer <> '' GROUP BY k HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25;
+SELECT REGEXP_REPLACE(Referer, '^https?://(?:www\\.)?([^/]+)/.*$', '\\1') AS k, AVG(length(Referer)) AS l, COUNT(*) AS c, MIN(Referer) FROM test.hits WHERE Referer <> '' GROUP BY k HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25;
 SELECT SUM(ResolutionWidth), SUM(ResolutionWidth + 1), SUM(ResolutionWidth + 2), SUM(ResolutionWidth + 3), SUM(ResolutionWidth + 4), SUM(ResolutionWidth + 5), SUM(ResolutionWidth + 6), SUM(ResolutionWidth + 7), SUM(ResolutionWidth + 8), SUM(ResolutionWidth + 9), SUM(ResolutionWidth + 10), SUM(ResolutionWidth + 11), SUM(ResolutionWidth + 12), SUM(ResolutionWidth + 13), SUM(ResolutionWidth + 14), SUM(ResolutionWidth + 15), SUM(ResolutionWidth + 16), SUM(ResolutionWidth + 17), SUM(ResolutionWidth + 18), SUM(ResolutionWidth + 19), SUM(ResolutionWidth + 20), SUM(ResolutionWidth + 21), SUM(ResolutionWidth + 22), SUM(ResolutionWidth + 23), SUM(ResolutionWidth + 24), SUM(ResolutionWidth + 25), SUM(ResolutionWidth + 26), SUM(ResolutionWidth + 27), SUM(ResolutionWidth + 28), SUM(ResolutionWidth + 29), SUM(ResolutionWidth + 30), SUM(ResolutionWidth + 31), SUM(ResolutionWidth + 32), SUM(ResolutionWidth + 33), SUM(ResolutionWidth + 34), SUM(ResolutionWidth + 35), SUM(ResolutionWidth + 36), SUM(ResolutionWidth + 37), SUM(ResolutionWidth + 38), SUM(ResolutionWidth + 39), SUM(ResolutionWidth + 40), SUM(ResolutionWidth + 41), SUM(ResolutionWidth + 42), SUM(ResolutionWidth + 43), SUM(ResolutionWidth + 44), SUM(ResolutionWidth + 45), SUM(ResolutionWidth + 46), SUM(ResolutionWidth + 47), SUM(ResolutionWidth + 48), SUM(ResolutionWidth + 49), SUM(ResolutionWidth + 50), SUM(ResolutionWidth + 51), SUM(ResolutionWidth + 52), SUM(ResolutionWidth + 53), SUM(ResolutionWidth + 54), SUM(ResolutionWidth + 55), SUM(ResolutionWidth + 56), SUM(ResolutionWidth + 57), SUM(ResolutionWidth + 58), SUM(ResolutionWidth + 59), SUM(ResolutionWidth + 60), SUM(ResolutionWidth + 61), SUM(ResolutionWidth + 62), SUM(ResolutionWidth + 63), SUM(ResolutionWidth + 64), SUM(ResolutionWidth + 65), SUM(ResolutionWidth + 66), SUM(ResolutionWidth + 67), SUM(ResolutionWidth + 68), SUM(ResolutionWidth + 69), SUM(ResolutionWidth + 70), SUM(ResolutionWidth + 71), SUM(ResolutionWidth + 72), SUM(ResolutionWidth + 73), SUM(ResolutionWidth + 74), SUM(ResolutionWidth + 75), SUM(ResolutionWidth + 76), SUM(ResolutionWidth + 77), SUM(ResolutionWidth + 78), SUM(ResolutionWidth + 79), SUM(ResolutionWidth + 80), SUM(ResolutionWidth + 81), SUM(ResolutionWidth + 82), SUM(ResolutionWidth + 83), SUM(ResolutionWidth + 84), SUM(ResolutionWidth + 85), SUM(ResolutionWidth + 86), SUM(ResolutionWidth + 87), SUM(ResolutionWidth + 88), SUM(ResolutionWidth + 89) FROM test.hits;
 SELECT SearchEngineID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM test.hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10;
 SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM test.hits WHERE SearchPhrase <> '' GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10;

diff --git a/bigquery/results/result.json b/bigquery/results/result.json
@@ -1,6 +1,6 @@
 {
     "system": "Bigquery",
-    "date": "2025-04-09",
+    "date": "2025-10-28",
     "machine": "serverless",
     "cluster_size": "serverless",
     "proprietary": "yes",
@@ -9,52 +9,52 @@
 
     "tags": ["serverless", "column-oriented", "gcp", "managed"],
 
-    "load_time": 1146,
+    "load_time": 776.91,
     "data_size": 8760000000,
 
     "result": [
-[4.862,4.001,3.921],
-[4.268,4.113,4.467],
-[4.341,4.15,4.219],
-[4.124,3.996,4.337],
-[4.553,4.36,4.349],
-[4.565,4.4,4.661],
-[4.089,4.132,3.974],
-[4.514,4.296,4.312],
-[6.183,6.155,4.557],
-[6.068,6.106,6.259],
-[4.109,4.082,4.165],
-[4.24,3.981,4.054],
-[4.295,4.301,4.283],
-[6.03,6.079,6.094],
-[4.383,4.399,4.218],
-[4.304,4.23,4.189],
-[4.849,4.86,4.62],
-[4.309,4.371,4.393],
-[6.096,6.109,6.071],
-[3.838,3.89,3.938],
-[4.249,4.037,4.136],
-[4.337,4.196,4.264],
-[4.493,4.603,4.435],
-[6.125,4.667,4.559],
-[4.039,4.039,3.942],
-[3.903,4.239,4.003],
-[4.013,4.108,4.073],
-[4.524,4.474,4.498],
-[null,null,null],
-[4.866,4.862,6.063],
-[4.271,4.403,4.34],
-[4.39,4.314,4.566],
-[7.233,7.322,7.241],
-[7.39,7.382,7.298],
-[6.05,6.084,6.362],
-[4.31,4.222,4.254],
-[4.181,4.003,3.95],
-[3.98,3.988,3.982],
-[4.017,4.004,3.987],
-[4.334,4.322,4.445],
-[4.126,3.853,3.982],
-[4.214,3.931,3.921],
-[4.033,3.913,3.866]
+[0.383933,0.402355,0.370758],
+[0.334439,0.433776,0.416341],
+[0.469506,0.359557,0.386433],
+[0.491417,0.333208,0.4758],
+[0.552464,0.652322,0.555889],
+[0.581302,0.603089,0.674999],
+[1.087835,0.639649,0.360542],
+[0.438221,0.759105,0.497731],
+[0.702109,0.712533,0.678109],
+[0.857454,0.968303,0.995039],
+[0.547042,0.479513,0.475109],
+[0.547026,0.549529,0.614708],
+[0.686315,0.580551,0.630673],
+[1.792573,2.034019,1.845895],
+[0.610674,0.677655,0.643796],
+[0.580303,0.729024,0.622044],
+[0.760401,0.809858,0.822725],
+[0.721757,0.611165,0.744566],
+[1.49368,1.372045,1.498892],
+[0.363523,0.383959,0.366856],
+[0.625735,0.49802,0.473233],
+[0.513777,0.508772,0.527258],
+[0.895406,0.874879,0.799704],
+[0.909036,0.679151,0.730413],
+[0.358434,0.509104,0.467827],
+[0.421586,0.428603,0.33761],
+[0.54752,0.364919,0.444499],
+[0.691434,0.674469,0.930067],
+[1.143579,1.034013,1.105913],
+[0.569294,0.444362,0.463864],
+[0.517151,0.53565,0.523663],
+[0.56208,0.573,0.543899],
+[1.409102,1.116484,1.295522],
+[1.413902,1.346194,1.406088],
+[1.068575,0.985308,1.194028],
+[0.781501,0.524615,0.664192],
+[0.678144,0.666519,0.548661],
+[0.477265,0.445584,0.469621],
+[0.554599,0.530927,0.551336],
+[0.777017,0.696796,0.810055],
+[0.427604,0.43113,0.449339],
+[0.434927,0.407959,0.435918],
+[0.478507,0.425838,0.541504]
 ]
 }
diff --git a/bigquery/run_queries.py b/bigquery/run_queries.py
@@ -0,0 +1,71 @@
+from google.cloud import bigquery
+from google.cloud.bigquery.enums import JobCreationMode
+
+import sys
+from typing import TextIO, Any
+from datetime import datetime
+
+def log(*objects: Any, sep: str = ' ', end: str = '\n', file: TextIO = sys.stderr, severity: str = 'INFO') -> None:
+    """
+    Mimics the built-in print() function signature but prepends a
+    timestamp and a configurable severity level to the output.
+
+    Args:
+        *objects: The objects to be printed (converted to strings).
+        sep (str): Separator inserted between values, default a space.
+        end (str): String appended after the last value, default a newline.
+        file (TextIO): Object with a write(string) method, default sys.stdout.
+        severity (str): The log level (e.g., "INFO", "WARNING", "ERROR").
+    """
+    # 1. Prepare the standard print content
+    # Use an f-string to join the objects with the specified separator
+    message = sep.join(str(obj) for obj in objects)
+
+    # 2. Prepare the log prefix
+    timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+    prefix = f"[{timestamp}] [{severity.upper()}]: "
+
+    # 3. Combine the prefix and the message
+    full_message = prefix + message
+
+    # 4. Use the file.write method to output the content
+    # The 'end' argument is handled explicitly here
+    file.write(full_message + end)
+
+    # Ensure the buffer is flushed (important for file/stream output)
+    if file is not sys.stdout and file is not sys.stderr:
+        file.flush()
+
+
+job_config = bigquery.QueryJobConfig()
+job_config.use_query_cache = False
+client = bigquery.Client(
+    default_job_creation_mode=JobCreationMode.JOB_CREATION_OPTIONAL
+)
+
+file = open('queries.sql', 'r')
+TRIES = 3
+query_num = 0
+for query in file:
+  query = query.strip()
+  print("[", end='')
+  query_num = query_num + 1
+  for i in range(TRIES):
+    log(f"[q{query_num}: {i}]: {query}")
+    try:
+      client_start_time = datetime.now()
+      results = client.query_and_wait(query, job_config=job_config)
+      client_end_time = datetime.now()
+
+      client_time = client_end_time - client_start_time
+      client_time_secs = client_time.total_seconds()
+      endstr = "],\n" if i == 2 else ","
+      print(f"{client_time_secs}", end=endstr)
+
+      log(f"Job ID: **{results.job_id}**")
+      log(f"Query ID: **{results.query_id}**")
+      log(f"Client time: **{client_time}**")
+
+    except Exception as e:
+      log(f"Job failed with error: {e}", severity="ERROR")
+