You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Presto is a distributed SQL query engine for big data.
4
+
-[Github](https://github.com/prestodb/presto)
5
+
-[Homepage](https://prestodb.io)
6
+
7
+
The benchmarks are based on Presto version `0.287`.
8
+
9
+
We assume that a Presto cluster is already running. For more information, visit [Getting Started](https://prestodb.io/getting-started/).
10
+
11
+
----------
12
+
## Steps
13
+
14
+
1. Download the parquet file and upload it to an S3 Bucket ex. s3://your-bucket/clickbench-parquet/hits/hits.parquet.
15
+
2. Create a new schema named `clickbench_parquet` in the Hive metastore (Hive catalog) and create the hits table in the new schema using the create.sql file. Modify the end of the table creation statement to use the parquet file on S3.
SELECTSUM(AdvEngineID), COUNT(*), AVG(ResolutionWidth) FROM hits;
4
+
SELECTAVG(UserID) FROM hits;
5
+
SELECTCOUNT(DISTINCT UserID) FROM hits;
6
+
SELECTCOUNT(DISTINCT SearchPhrase) FROM hits;
7
+
SELECTMIN(EventDate), MAX(EventDate) FROM hits;
8
+
SELECT AdvEngineID, COUNT(*) FROM hits WHERE AdvEngineID <>0GROUP BY AdvEngineID ORDER BYCOUNT(*) DESC;
9
+
SELECT RegionID, COUNT(DISTINCT UserID) AS u FROM hits GROUP BY RegionID ORDER BY u DESCLIMIT10;
10
+
SELECT RegionID, SUM(AdvEngineID), COUNT(*) AS c, AVG(ResolutionWidth), COUNT(DISTINCT UserID) FROM hits GROUP BY RegionID ORDER BY c DESCLIMIT10;
11
+
SELECT MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <>''GROUP BY MobilePhoneModel ORDER BY u DESCLIMIT10;
12
+
SELECT MobilePhone, MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <>''GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESCLIMIT10;
13
+
SELECT SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <>''GROUP BY SearchPhrase ORDER BY c DESCLIMIT10;
14
+
SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits WHERE SearchPhrase <>''GROUP BY SearchPhrase ORDER BY u DESCLIMIT10;
15
+
SELECT SearchEngineID, SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <>''GROUP BY SearchEngineID, SearchPhrase ORDER BY c DESCLIMIT10;
16
+
SELECT UserID, COUNT(*) FROM hits GROUP BY UserID ORDER BYCOUNT(*) DESCLIMIT10;
17
+
SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase ORDER BYCOUNT(*) DESCLIMIT10;
18
+
SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase LIMIT10;
19
+
SELECT UserID, extract(minute FROM EventTime), SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, extract(minute FROM EventTime), SearchPhrase ORDER BYCOUNT(*) DESCLIMIT10;
20
+
SELECT UserID FROM hits WHERE UserID =435090932899640449;
21
+
SELECTCOUNT(*) FROM hits WHERE URL LIKE'%google%';
22
+
SELECT SearchPhrase, MIN(URL), COUNT(*) AS c FROM hits WHERE URL LIKE'%google%'AND SearchPhrase <>''GROUP BY SearchPhrase ORDER BY c DESCLIMIT10;
23
+
SELECT SearchPhrase, MIN(URL), MIN(Title), COUNT(*) AS c, COUNT(DISTINCT UserID) FROM hits WHERE Title LIKE'%Google%'AND URL NOT LIKE'%.google.%'AND SearchPhrase <>''GROUP BY SearchPhrase ORDER BY c DESCLIMIT10;
24
+
SELECT*FROM hits WHERE URL LIKE'%google%'ORDER BY EventTime LIMIT10;
25
+
SELECT SearchPhrase FROM hits WHERE SearchPhrase <>''ORDER BY EventTime LIMIT10;
26
+
SELECT SearchPhrase FROM hits WHERE SearchPhrase <>''ORDER BY SearchPhrase LIMIT10;
27
+
SELECT SearchPhrase FROM hits WHERE SearchPhrase <>''ORDER BY EventTime, SearchPhrase LIMIT10;
28
+
SELECT CounterID, AVG(length(URL)) AS l, COUNT(*) AS c FROM hits WHERE URL <>''GROUP BY CounterID HAVINGCOUNT(*) >100000ORDER BY l DESCLIMIT25;
29
+
SELECT REGEXP_REPLACE(Referer, '^https?://(?:www\.)?([^/]+)/.*$', '\\1') AS k, AVG(length(Referer)) AS l, COUNT(*) AS c, MIN(Referer) FROM hits WHERE Referer <>''GROUP BY REGEXP_REPLACE(Referer, '^https?://(?:www\.)?([^/]+)/.*$', '\\1') HAVINGCOUNT(*) >100000ORDER BY l DESCLIMIT25;
SELECT SearchEngineID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <>''GROUP BY SearchEngineID, ClientIP ORDER BY c DESCLIMIT10;
32
+
SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <>''GROUP BY WatchID, ClientIP ORDER BY c DESCLIMIT10;
33
+
SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits GROUP BY WatchID, ClientIP ORDER BY c DESCLIMIT10;
34
+
SELECT URL, COUNT(*) AS c FROM hits GROUP BY URL ORDER BY c DESCLIMIT10;
35
+
SELECT1, URL, COUNT(*) AS c FROM hits GROUP BY1, URL ORDER BY c DESCLIMIT10;
36
+
SELECT ClientIP, ClientIP -1, ClientIP -2, ClientIP -3, COUNT(*) AS c FROM hits GROUP BY ClientIP, ClientIP -1, ClientIP -2, ClientIP -3ORDER BY c DESCLIMIT10;
37
+
SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID =62AND EventDate >=DATE('2013-07-01') AND EventDate <=DATE('2013-07-31') AND DontCountHits =0AND IsRefresh =0AND URL <>''GROUP BY URL ORDER BY PageViews DESCLIMIT10;
38
+
SELECT Title, COUNT(*) AS PageViews FROM hits WHERE CounterID =62AND EventDate >=DATE('2013-07-01') AND EventDate <=DATE('2013-07-31') AND DontCountHits =0AND IsRefresh =0AND Title <>''GROUP BY Title ORDER BY PageViews DESCLIMIT10;
39
+
SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID =62AND EventDate >=DATE('2013-07-01') AND EventDate <=DATE('2013-07-31') AND IsRefresh =0AND IsLink <>0AND IsDownload =0GROUP BY URL ORDER BY PageViews DESC OFFSET 1000LIMIT10;
40
+
SELECT TraficSourceID, SearchEngineID, AdvEngineID, CASE WHEN (SearchEngineID =0AND AdvEngineID =0) THEN Referer ELSE '' END AS Src, URL AS Dst, COUNT(*) AS PageViews FROM hits WHERE CounterID =62AND EventDate >=DATE('2013-07-01') AND EventDate <=DATE('2013-07-31') AND IsRefresh =0GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, CASE WHEN (SearchEngineID =0AND AdvEngineID =0) THEN Referer ELSE '' END, URL ORDER BY PageViews DESC OFFSET 1000LIMIT10;
41
+
SELECT URLHash, EventDate, COUNT(*) AS PageViews FROM hits WHERE CounterID =62AND EventDate >=DATE('2013-07-01') AND EventDate <=DATE('2013-07-31') AND IsRefresh =0AND TraficSourceID IN (-1, 6) AND RefererHash =3594120000172545465GROUP BY URLHash, EventDate ORDER BY PageViews DESC OFFSET 100LIMIT10;
42
+
SELECT WindowClientWidth, WindowClientHeight, COUNT(*) AS PageViews FROM hits WHERE CounterID =62AND EventDate >=DATE('2013-07-01') AND EventDate <=DATE('2013-07-31') AND IsRefresh =0AND DontCountHits =0AND URLHash =2868770270353813622GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC OFFSET 10000LIMIT10;
43
+
SELECT DATE_FORMAT(EventTime, '%Y-%m-%d %H:%i:00') AS M, COUNT(*) AS PageViews FROM hits WHERE CounterID =62AND EventDate >=DATE('2013-07-14') AND EventDate <=DATE('2013-07-15') AND IsRefresh =0AND DontCountHits =0GROUP BY DATE_FORMAT(EventTime, '%Y-%m-%d %H:%i:00') ORDER BY DATE_FORMAT(EventTime, '%Y-%m-%d %H:%i:00') OFFSET 1000LIMIT10;
execution_time=$(/opt/presto-cli --server 127.0.0.1:8080 --user clickbench_manager --execute "SELECT date_diff('millisecond',started,\"end\") FROM system.runtime.queries WHERE user='clickbench_runner' ORDER BY created DESC LIMIT 1"| tr -d '"')&&echo"Execution time: ${execution_time}ms"
0 commit comments