Skip to content

Commit 64b5be5

Browse files
committed
Add support for multilabels input in DqJoin
1 parent 61aa5ba commit 64b5be5

File tree

3 files changed

+52
-21
lines changed

3 files changed

+52
-21
lines changed

ydb/library/yql/dq/opt/dq_opt_join.cpp

+47-16
Original file line numberDiff line numberDiff line change
@@ -15,17 +15,17 @@ using namespace NYql::NNodes;
1515
namespace {
1616

1717
struct TJoinInputDesc {
18-
TJoinInputDesc(TMaybe<TStringBuf> label, const TExprBase& input,
18+
TJoinInputDesc(TMaybe<THashSet<TStringBuf>> labels, const TExprBase& input,
1919
TSet<std::pair<TStringBuf, TStringBuf>>&& keys)
20-
: Label(label)
20+
: Labels(labels)
2121
, Input(input)
2222
, Keys(std::move(keys)) {}
2323

2424
bool IsRealTable() const {
25-
return Label.Defined();
25+
return Labels.Defined();
2626
}
2727

28-
TMaybe<TStringBuf> Label; // defined for real table input only, empty otherwise
28+
TMaybe<THashSet<TStringBuf>> Labels; // defined for real table input only, empty otherwise
2929
TExprBase Input;
3030
TSet<std::pair<TStringBuf, TStringBuf>> Keys; // set of (label, column_name) pairs in this input
3131
};
@@ -129,9 +129,12 @@ TMaybe<TJoinInputDesc> BuildDqJoin(
129129
{
130130
TMaybe<TJoinInputDesc> left;
131131
TVector<TString> lhsLabels;
132+
TStringBuf leftLabel;
133+
TStringBuf rightLabel;
132134
if (joinTuple.LeftScope().Maybe<TCoAtom>()) {
133135
lhsLabels.push_back(joinTuple.LeftScope().Cast<TCoAtom>().StringValue());
134136
left = inputs.at(joinTuple.LeftScope().Cast<TCoAtom>().Value());
137+
leftLabel = joinTuple.LeftScope().Cast<TCoAtom>().Value();
135138
YQL_ENSURE(left, "unknown scope " << joinTuple.LeftScope().Cast<TCoAtom>().Value());
136139
} else {
137140
left = BuildDqJoin(joinTuple.LeftScope().Cast<TCoEquiJoinTuple>(), inputs, mode, ctx, typeCtx, lhsLabels, hints, useCBO);
@@ -145,6 +148,7 @@ TMaybe<TJoinInputDesc> BuildDqJoin(
145148
if (joinTuple.RightScope().Maybe<TCoAtom>()) {
146149
rhsLabels.push_back(joinTuple.RightScope().Cast<TCoAtom>().StringValue());
147150
right = inputs.at(joinTuple.RightScope().Cast<TCoAtom>().Value());
151+
rightLabel = joinTuple.RightScope().Cast<TCoAtom>().Value();
148152
YQL_ENSURE(right, "unknown scope " << joinTuple.RightScope().Cast<TCoAtom>().Value());
149153
} else {
150154
right = BuildDqJoin(joinTuple.RightScope().Cast<TCoEquiJoinTuple>(), inputs, mode, ctx, typeCtx, rhsLabels, hints, useCBO);
@@ -188,10 +192,10 @@ TMaybe<TJoinInputDesc> BuildDqJoin(
188192
}
189193

190194
auto leftTableLabel = left->IsRealTable()
191-
? BuildAtom(*left->Label, left->Input.Pos(), ctx).Ptr()
195+
? BuildAtom(leftLabel, left->Input.Pos(), ctx).Ptr()
192196
: Build<TCoVoid>(ctx, left->Input.Pos()).Done().Ptr();
193197
auto rightTableLabel = right->IsRealTable()
194-
? BuildAtom(*right->Label, right->Input.Pos(), ctx).Ptr()
198+
? BuildAtom(rightLabel, right->Input.Pos(), ctx).Ptr()
195199
: Build<TCoVoid>(ctx, right->Input.Pos()).Done().Ptr();
196200

197201
size_t joinKeysCount = joinTuple.LeftKeys().Size() / 2;
@@ -353,21 +357,37 @@ TMaybe<TJoinInputDesc> BuildDqJoin(
353357
}
354358

355359
TMaybe<TJoinInputDesc> PrepareJoinInput(const TCoEquiJoinInput& input) {
356-
if (!input.Scope().Maybe<TCoAtom>()) {
357-
YQL_CLOG(TRACE, CoreDq) << "EquiJoin input scope is not an Atom: " << input.Scope().Ref().Content();
358-
return {};
360+
THashSet<TStringBuf> labels;
361+
if (input.Scope().Maybe<TCoAtom>()) {
362+
labels.insert(input.Scope().Cast<TCoAtom>().Value());
363+
} else {
364+
auto list = input.Scope().Cast<TCoAtomList>();
365+
for (auto atomLabel : list) {
366+
labels.insert(atomLabel.Value());
367+
}
359368
}
360-
auto scope = input.Scope().Cast<TCoAtom>().Value();
361369

362370
auto listType = input.List().Ref().GetTypeAnn()->Cast<TListExprType>();
363371
auto resultStructType = listType->GetItemType()->Cast<TStructExprType>();
364372

365373
TSet<std::pair<TStringBuf, TStringBuf>> keys;
366374
for (auto member : resultStructType->GetItems()) {
367-
keys.emplace(scope, member->GetName());
375+
if (input.Scope().Maybe<TCoAtom>()) {
376+
keys.emplace(input.Scope().Cast<TCoAtom>().Value(), member->GetName());
377+
} else {
378+
auto fullMemberName = member->GetName();
379+
if (fullMemberName.find(".") != TString::npos) {
380+
TStringBuf table;
381+
TStringBuf column;
382+
SplitTableName(fullMemberName, table, column);
383+
keys.emplace(table, column);
384+
} else {
385+
return {};
386+
}
387+
}
368388
}
369389

370-
return TJoinInputDesc(scope, input.List(), std::move(keys));
390+
return TJoinInputDesc(labels, input.List(), std::move(keys));
371391
}
372392

373393
TStringBuf RotateRightJoinType(TStringBuf joinType) {
@@ -383,6 +403,14 @@ TStringBuf RotateRightJoinType(TStringBuf joinType) {
383403
YQL_ENSURE(false, "unexpected right join type " << joinType);
384404
}
385405

406+
bool IsMultiLabelInput(TExprNode::TPtr input) {
407+
auto* itemType = input->GetTypeAnn()->Cast<TListExprType>()->GetItemType();
408+
if (itemType->Cast<TStructExprType>()->GetItems().size()) {
409+
return TString(itemType->Cast<TStructExprType>()->GetItems().front()->GetName()).find(".") != TString::npos;
410+
}
411+
return false;
412+
}
413+
386414
std::pair<TVector<TCoAtom>, TVector<TCoAtom>> GetJoinKeys(const TDqJoin& join, TExprContext& ctx) {
387415
TVector<TCoAtom> leftJoinKeys;
388416
TVector<TCoAtom> rightJoinKeys;
@@ -394,15 +422,17 @@ std::pair<TVector<TCoAtom>, TVector<TCoAtom>> GetJoinKeys(const TDqJoin& join, T
394422
for (const auto& keyTuple : join.JoinKeys()) {
395423
auto leftLabel = keyTuple.LeftLabel().Value();
396424
auto rightLabel = keyTuple.RightLabel().Value();
425+
bool leftMultiLabel = IsMultiLabelInput(join.LeftInput().Ptr());
426+
bool rightMultiLabel = IsMultiLabelInput(join.RightInput().Ptr());
397427

398428
auto leftKey = Build<TCoAtom>(ctx, join.Pos())
399-
.Value(join.LeftLabel().Maybe<TCoAtom>() || keyTuple.LeftColumn().Value().starts_with("_yql_dq_key_left_")
429+
.Value((join.LeftLabel().Maybe<TCoAtom>() || keyTuple.LeftColumn().Value().starts_with("_yql_dq_key_left_")) && !leftMultiLabel
400430
? keyTuple.LeftColumn().StringValue()
401431
: FullColumnName(leftLabel, keyTuple.LeftColumn().Value()))
402432
.Done();
403433

404434
auto rightKey = Build<TCoAtom>(ctx, join.Pos())
405-
.Value(join.RightLabel().Maybe<TCoAtom>() || keyTuple.RightColumn().Value().starts_with("_yql_dq_key_right_")
435+
.Value((join.RightLabel().Maybe<TCoAtom>() || keyTuple.RightColumn().Value().starts_with("_yql_dq_key_right_")) && !rightMultiLabel
406436
? keyTuple.RightColumn().StringValue()
407437
: FullColumnName(rightLabel, keyTuple.RightColumn().Value()))
408438
.Done();
@@ -414,7 +444,6 @@ std::pair<TVector<TCoAtom>, TVector<TCoAtom>> GetJoinKeys(const TDqJoin& join, T
414444
return std::make_pair(std::move(leftJoinKeys), std::move(rightJoinKeys));
415445
}
416446

417-
418447
TDqJoinBase DqMakePhyMapJoin(const TDqJoin& join, const TExprBase& leftInput, const TExprBase& rightInput,
419448
TExprContext& ctx, bool useGraceCore)
420449
{
@@ -521,7 +550,9 @@ TExprBase DqRewriteEquiJoin(
521550
THashMap<TStringBuf, TJoinInputDesc> inputs;
522551
for (size_t i = 0; i < equiJoin.ArgCount() - 2; ++i) {
523552
if (auto input = PrepareJoinInput(equiJoin.Arg(i).Cast<TCoEquiJoinInput>())) {
524-
inputs.emplace(*input->Label, std::move(*input));
553+
for (auto label : *(input->Labels)) {
554+
inputs.emplace(label, *input);
555+
}
525556
} else {
526557
return node;
527558
}

ydb/library/yql/dq/opt/dq_opt_join_cost_based.cpp

+4-4
Original file line numberDiff line numberDiff line change
@@ -30,13 +30,13 @@ bool DqCollectJoinRelationsWithStats(
3030

3131
auto stats = typesCtx.GetStats(joinArg.Raw());
3232

33-
if (!stats) {
34-
YQL_CLOG(TRACE, CoreDq) << "Didn't find statistics for scope " << input.Scope().Cast<TCoAtom>().StringValue() << "\n";
33+
auto scope = input.Scope();
34+
if (!scope.Maybe<TCoAtom>()){
3535
return false;
3636
}
3737

38-
auto scope = input.Scope();
39-
if (!scope.Maybe<TCoAtom>()){
38+
if (!stats) {
39+
YQL_CLOG(TRACE, CoreDq) << "Didn't find statistics for scope " << input.Scope().Cast<TCoAtom>().StringValue() << "\n";
4040
return false;
4141
}
4242

ydb/library/yql/dq/type_ann/dq_type_ann.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -277,7 +277,7 @@ ParseJoinInputType(const TStructExprType& rowType, TStringBuf tableLabel, TExprC
277277
if (optional && !memberType->IsOptionalOrNull()) {
278278
memberType = ctx.MakeType<TOptionalExprType>(memberType);
279279
}
280-
if (!tableLabel.empty()) {
280+
if (!tableLabel.empty() && label.empty()) {
281281
result[tableLabel][member->GetName()] = memberType;
282282
} else {
283283
result[label][column] = memberType;

0 commit comments

Comments
 (0)