Skip to content

Conversation

@pzuev
Copy link
Collaborator

@pzuev pzuev commented Oct 24, 2025

Changelog category

  • Not for changelog (changelog entry is not required)

Description for reviewers

Use with kqprun: ./kqprun -c dq.conf -s s1.sql -p query_dq_hash_combine_q10.sql -C query --log KQP_YQL=trace -T script --result-file result_dq_hash_combine_q10.json

Query SQL (tpch q10 + DqHashCombine):

pragma ydb.UseDqHashCombine = "true";
pragma kikimr.UseLlvm = "false";
-- TPC-H/TPC-R Returned Item Reporting Query (Q10)
-- TPC TPC-H Parameter Substitution (Version 2.17.2 build 0)
-- using 1680793381 as a seed to the RNG

$border = Date("1993-12-01");
$join1 = (
select
    c.c_custkey as c_custkey,
    c.c_name as c_name,
    c.c_acctbal as c_acctbal,
    c.c_address as c_address,
    c.c_phone as c_phone,
    c.c_comment as c_comment,
    c.c_nationkey as c_nationkey,
    o.o_orderkey as o_orderkey
from
    `customer` as c
join
    `orders` as o
on
    c.c_custkey = o.o_custkey
where
    o.o_orderdate >= $border
    and o.o_orderdate < ($border + Interval("P90D"))
);
$join2 = (
select
    j.c_custkey as c_custkey,
    j.c_name as c_name,
    j.c_acctbal as c_acctbal,
    j.c_address as c_address,
    j.c_phone as c_phone,
    j.c_comment as c_comment,
    j.c_nationkey as c_nationkey,
    l.l_extendedprice as l_extendedprice,
    l.l_discount as l_discount
from
    $join1 as j
join
    `lineitem` as l
on
    l.l_orderkey = j.o_orderkey
where
    l.l_returnflag = 'R'
);
$join3 = (
select
    j.c_custkey as c_custkey,
    j.c_name as c_name,
    j.c_acctbal as c_acctbal,
    j.c_address as c_address,
    j.c_phone as c_phone,
    j.c_comment as c_comment,
    j.c_nationkey as c_nationkey,
    j.l_extendedprice as l_extendedprice,
    j.l_discount as l_discount,
    n.n_name as n_name
from
    $join2 as j
join
    `nation` as n
on
    n.n_nationkey = j.c_nationkey
);
select
    c_custkey,
    c_name,
    sum(l_extendedprice * (1 - l_discount)) as revenue,
    c_acctbal,
    n_name,
    c_address,
    c_phone,
    c_comment
from
    $join3
group by
    c_custkey,
    c_name,
    c_acctbal,
    c_phone,
    n_name,
    c_address,
    c_comment
order by
    revenue desc
limit 20;

Schema SQL:

CREATE EXTERNAL DATA SOURCE tpc WITH (
    SOURCE_TYPE="ObjectStorage",
    LOCATION="https://storage.yandexcloud.net/tpc/",
    AUTH_METHOD="NONE"
);

CREATE EXTERNAL TABLE customer (
    c_acctbal Double NOT NULL,
    c_mktsegment String NOT NULL,
    c_phone String NOT NULL,
    c_nationkey Int32 NOT NULL,
    c_custkey Int32 NOT NULL,
    c_name String NOT NULL,
    c_comment String NOT NULL,
    c_address String NOT NULL
) WITH (
    DATA_SOURCE="tpc",
    LOCATION="/h/s1/parquet/customer/",
    FORMAT="parquet"
);

CREATE EXTERNAL TABLE lineitem (
    l_orderkey Int64 NOT NULL,
    l_partkey Int32 NOT NULL,
    l_suppkey Int32 NOT NULL,
    l_linenumber Int32 NOT NULL,
    l_quantity Double NOT NULL,
    l_extendedprice Double NOT NULL,
    l_discount Double NOT NULL,
    l_tax Double NOT NULL,
    l_returnflag String NOT NULL,
    l_linestatus String NOT NULL,
    l_shipdate Date NOT NULL,
    l_commitdate Date NOT NULL,
    l_receiptdate Date NOT NULL,
    l_shipinstruct String NOT NULL,
    l_shipmode String NOT NULL,
    l_comment String NOT NULL
) WITH (
    DATA_SOURCE="tpc",
    LOCATION="/h/s1/parquet/lineitem/",
    FORMAT="parquet"
);

CREATE EXTERNAL TABLE nation (
    n_nationkey Int32 NOT NULL,
    n_name String NOT NULL,
    n_regionkey Int32 NOT NULL,
    n_comment String NOT NULL
) WITH (
    DATA_SOURCE="tpc",
    LOCATION="/h/s1/parquet/nation/",
    FORMAT="parquet"
);

CREATE EXTERNAL TABLE orders (
    o_orderkey Int64 NOT NULL,
    o_custkey Int32 NOT NULL,
    o_orderstatus String NOT NULL,
    o_totalprice Double NOT NULL,
    o_orderdate Date NOT NULL,
    o_orderpriority String NOT NULL,
    o_clerk String NOT NULL,
    o_shippriority Int32 NOT NULL,
    o_comment String NOT NULL
) WITH (
    DATA_SOURCE="tpc",
    LOCATION="/h/s1/parquet/orders/",
    FORMAT="parquet"
);

CREATE EXTERNAL TABLE part (
    p_partkey Int32 NOT NULL,
    p_name String NOT NULL,
    p_mfgr String NOT NULL,
    p_brand String NOT NULL,
    p_type String NOT NULL,
    p_size Int32 NOT NULL,
    p_container String NOT NULL,
    p_retailprice Double NOT NULL,
    p_comment String NOT NULL
) WITH (
    DATA_SOURCE="tpc",
    LOCATION="/h/s1/parquet/part/",
    FORMAT="parquet"
);

CREATE EXTERNAL TABLE partsupp (
    ps_partkey Int32 NOT NULL,
    ps_suppkey Int32 NOT NULL,
    ps_availqty Int32 NOT NULL,
    ps_supplycost Double NOT NULL,
    ps_comment String NOT NULL
) WITH (
    DATA_SOURCE="tpc",
    LOCATION="/h/s1/parquet/partsupp/",
    FORMAT="parquet"
);

CREATE EXTERNAL TABLE region (
    r_regionkey Int32 NOT NULL,
    r_name String NOT NULL,
    r_comment String NOT NULL
) WITH (
    DATA_SOURCE="tpc",
    LOCATION="/h/s1/parquet/region/",
    FORMAT="parquet"
);

CREATE EXTERNAL TABLE supplier (
    s_suppkey Int32 NOT NULL,
    s_name String NOT NULL,
    s_address String NOT NULL,
    s_nationkey Int32 NOT NULL,
    s_phone String NOT NULL,
    s_acctbal Double NOT NULL,
    s_comment String NOT NULL
) WITH (
    DATA_SOURCE="tpc",
    LOCATION="/h/s1/parquet/supplier/",
    FORMAT="parquet"
);

@github-actions
Copy link

github-actions bot commented Oct 24, 2025

2025-10-24 15:11:00 UTC Pre-commit check linux-x86_64-release-asan for 0238831 has started.
2025-10-24 15:11:15 UTC Artifacts will be uploaded here
2025-10-24 15:14:13 UTC ya make is running...
🟡 2025-10-24 17:21:08 UTC Some tests failed, follow the links below. This fail is not in blocking policy yet

Ya make output | Test bloat

TESTS PASSED ERRORS FAILED SKIPPED MUTED?
16201 15727 0 253 203 18

🟢 2025-10-24 17:21:13 UTC Build successful.
🟢 2025-10-24 17:21:38 UTC ydbd size 3.8 GiB changed* by +65.2 KiB, which is < 100.0 KiB vs main: OK

ydbd size dash main: 03c8f25 merge: 0238831 diff diff %
ydbd size 4 055 667 448 Bytes 4 055 734 264 Bytes +65.2 KiB +0.002%
ydbd stripped size 1 506 010 176 Bytes 1 506 037 216 Bytes +26.4 KiB +0.002%

*please be aware that the difference is based on comparing your commit and the last completed build from the post-commit, check comparation

@github-actions
Copy link

github-actions bot commented Oct 24, 2025

2025-10-24 15:11:10 UTC Pre-commit check linux-x86_64-relwithdebinfo for 0238831 has started.
2025-10-24 15:11:25 UTC Artifacts will be uploaded here
2025-10-24 15:14:54 UTC ya make is running...
🟡 2025-10-24 17:08:53 UTC Some tests failed, follow the links below. Going to retry failed tests...

Ya make output | Test bloat

TESTS PASSED ERRORS FAILED SKIPPED MUTED?
39736 36694 0 220 2798 24

2025-10-24 17:09:03 UTC ya make is running... (failed tests rerun, try 2)
🟡 2025-10-24 17:41:22 UTC Some tests failed, follow the links below. Going to retry failed tests...

Ya make output | Test bloat | Test bloat

TESTS PASSED ERRORS FAILED SKIPPED MUTED?
2352 (only retried tests) 2087 0 203 40 22

2025-10-24 17:41:27 UTC ya make is running... (failed tests rerun, try 3)
🔴 2025-10-24 18:19:28 UTC Some tests failed, follow the links below.

Ya make output | Test bloat | Test bloat | Test bloat

TESTS PASSED ERRORS FAILED SKIPPED MUTED?
1982 (only retried tests) 1729 0 190 41 22

🟢 2025-10-24 18:19:30 UTC Build successful.
🟢 2025-10-24 18:20:00 UTC ydbd size 2.3 GiB changed* by +9.8 KiB, which is < 100.0 KiB vs main: OK

ydbd size dash main: 7090690 merge: 0238831 diff diff %
ydbd size 2 437 327 344 Bytes 2 437 337 400 Bytes +9.8 KiB +0.000%
ydbd stripped size 518 128 456 Bytes 518 131 336 Bytes +2.8 KiB +0.001%

*please be aware that the difference is based on comparing your commit and the last completed build from the post-commit, check comparation

@github-actions
Copy link

🟢 2025-10-24 15:12:08 UTC The validation of the Pull Request description is successful.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

1 participant