Skip to content

Commit ec5d1ba

Browse files
committed
Address review comments
1 parent 1c2f6a9 commit ec5d1ba

File tree

3 files changed

+792
-48
lines changed

3 files changed

+792
-48
lines changed

ql/src/java/org/apache/hadoop/hive/ql/optimizer/SharedWorkOptimizer.java

Lines changed: 54 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -1426,27 +1426,10 @@ private static SharedResult extractSharedOptimizationInfoForRoot(ParseContext pc
14261426
if (equalOp1.getNumChild() > 1 || equalOp2.getNumChild() > 1) {
14271427
// TODO: Support checking multiple child operators to merge further.
14281428
discardableInputOps.addAll(gatherDPPBranchOps(pctx, optimizerCache, discardableOps));
1429-
1430-
// Accumulate InMemoryDataSize of unmerged MapJoin operators.
1431-
Set<Operator<?>> opsWork1 = findWorkOperators(optimizerCache, retainableTsOp);
1432-
for (Operator<?> op : opsWork1) {
1433-
if (op instanceof MapJoinOperator) {
1434-
MapJoinOperator mop = (MapJoinOperator) op;
1435-
dataSize = StatsUtils.safeAdd(dataSize, mop.getConf().getInMemoryDataSize());
1436-
maxDataSize = mop.getConf().getMemoryMonitorInfo().getAdjustedNoConditionalTaskSize();
1437-
}
1438-
}
1439-
Set<Operator<?>> opsWork2 = findWorkOperators(optimizerCache, discardableTsOp);
1440-
for (Operator<?> op : opsWork2) {
1441-
if (op instanceof MapJoinOperator) {
1442-
MapJoinOperator mop = (MapJoinOperator) op;
1443-
dataSize = StatsUtils.safeAdd(dataSize, mop.getConf().getInMemoryDataSize());
1444-
maxDataSize = mop.getConf().getMemoryMonitorInfo().getAdjustedNoConditionalTaskSize();
1445-
}
1446-
}
1447-
1448-
return new SharedResult(retainableOps, discardableOps, discardableInputOps, dataSize, maxDataSize);
1429+
return createSharedResultForRoot(optimizerCache, retainableTsOp, discardableTsOp,
1430+
retainableOps, discardableOps, discardableInputOps);
14491431
}
1432+
14501433
if (retainableTsOp.getChildOperators().size() == 0 || discardableTsOp.getChildOperators().size() == 0) {
14511434
return new SharedResult(retainableOps, discardableOps, discardableInputOps, dataSize, maxDataSize);
14521435
}
@@ -1469,18 +1452,18 @@ private static SharedResult extractSharedOptimizationInfoForRoot(ParseContext pc
14691452
}
14701453
}
14711454

1472-
boolean bailOut = false;
14731455
if (equalFilters) {
14741456
equalOp1 = currentOp1;
14751457
equalOp2 = currentOp2;
14761458
retainableOps.add(equalOp1);
14771459
discardableOps.add(equalOp2);
1478-
if (currentOp1.getChildOperators().size() > 1 || currentOp2.getChildOperators().size() > 1) {
1460+
if (currentOp1.getNumChild() > 1 || currentOp2.getNumChild() > 1) {
14791461
// TODO: Support checking multiple child operators to merge further.
14801462
discardableInputOps.addAll(gatherDPPBranchOps(pctx, optimizerCache, discardableOps));
14811463
discardableInputOps.addAll(gatherDPPBranchOps(pctx, optimizerCache, retainableOps,
14821464
discardableInputOps));
1483-
bailOut = true;
1465+
return createSharedResultForRoot(optimizerCache, retainableTsOp, discardableTsOp,
1466+
retainableOps, discardableOps, discardableInputOps);
14841467
}
14851468
currentOp1 = currentOp1.getChildOperators().get(0);
14861469
currentOp2 = currentOp2.getChildOperators().get(0);
@@ -1489,37 +1472,54 @@ private static SharedResult extractSharedOptimizationInfoForRoot(ParseContext pc
14891472
discardableInputOps.addAll(gatherDPPBranchOps(pctx, optimizerCache, discardableOps));
14901473
discardableInputOps.addAll(gatherDPPBranchOps(pctx, optimizerCache, retainableOps,
14911474
discardableInputOps));
1492-
bailOut = true;
1475+
return createSharedResultForRoot(optimizerCache, retainableTsOp, discardableTsOp,
1476+
retainableOps, discardableOps, discardableInputOps);
14931477
}
1478+
}
14941479

1495-
if (bailOut) {
1496-
// Accumulate InMemoryDataSize of unmerged MapJoin operators.
1497-
Set<Operator<?>> opsWork1 = findWorkOperators(optimizerCache, currentOp1);
1498-
for (Operator<?> op : opsWork1) {
1499-
if (op instanceof MapJoinOperator) {
1500-
MapJoinOperator mop = (MapJoinOperator) op;
1501-
dataSize = StatsUtils.safeAdd(dataSize, mop.getConf().getInMemoryDataSize());
1502-
maxDataSize = mop.getConf().getMemoryMonitorInfo().getAdjustedNoConditionalTaskSize();
1503-
}
1480+
return extractSharedOptimizationInfo(pctx, optimizerCache, equalOp1, equalOp2,
1481+
currentOp1, currentOp2, retainableOps, discardableOps, discardableInputOps, mayRemoveDownStreamOperators,
1482+
mayRemoveInputOps);
1483+
}
1484+
1485+
private static SharedResult createSharedResultForRoot(
1486+
SharedWorkOptimizerCache optimizerCache,
1487+
Operator<?> retainableOp,
1488+
Operator<?> discardableOp,
1489+
LinkedHashSet<Operator<?>> retainableOps,
1490+
LinkedHashSet<Operator<?>> discardableOps,
1491+
Set<Operator<?>> discardableInputOps) {
1492+
// Assertion: retainableOps and discardableOps do not contain MapJoinOperator.
1493+
1494+
// Accumulate InMemoryDataSize of unmerged MapJoin operators.
1495+
long dataSize = 0L;
1496+
long maxDataSize = 0L;
1497+
1498+
Set<Operator<?>> opsWork1 = findWorkOperators(optimizerCache, retainableOp);
1499+
for (Operator<?> op : opsWork1) {
1500+
if (op instanceof MapJoinOperator) {
1501+
MapJoinOperator mop = (MapJoinOperator) op;
1502+
dataSize = StatsUtils.safeAdd(dataSize, mop.getConf().getInMemoryDataSize());
1503+
if (maxDataSize < mop.getConf().getMemoryMonitorInfo().getAdjustedNoConditionalTaskSize()) {
1504+
maxDataSize = mop.getConf().getMemoryMonitorInfo().getAdjustedNoConditionalTaskSize();
15041505
}
1505-
Set<Operator<?>> opsWork2 = findWorkOperators(optimizerCache, currentOp2);
1506-
for (Operator<?> op : opsWork2) {
1507-
if (op instanceof MapJoinOperator) {
1508-
MapJoinOperator mop = (MapJoinOperator) op;
1509-
dataSize = StatsUtils.safeAdd(dataSize, mop.getConf().getInMemoryDataSize());
1510-
maxDataSize = mop.getConf().getMemoryMonitorInfo().getAdjustedNoConditionalTaskSize();
1511-
}
1506+
}
1507+
}
1508+
Set<Operator<?>> opsWork2 = findWorkOperators(optimizerCache, discardableOp);
1509+
for (Operator<?> op : opsWork2) {
1510+
if (op instanceof MapJoinOperator) {
1511+
MapJoinOperator mop = (MapJoinOperator) op;
1512+
dataSize = StatsUtils.safeAdd(dataSize, mop.getConf().getInMemoryDataSize());
1513+
if (maxDataSize < mop.getConf().getMemoryMonitorInfo().getAdjustedNoConditionalTaskSize()) {
1514+
maxDataSize = mop.getConf().getMemoryMonitorInfo().getAdjustedNoConditionalTaskSize();
15121515
}
1513-
1514-
return new SharedResult(retainableOps, discardableOps, discardableInputOps, dataSize, maxDataSize);
15151516
}
15161517
}
15171518

1518-
return extractSharedOptimizationInfo(pctx, optimizerCache, equalOp1, equalOp2,
1519-
currentOp1, currentOp2, retainableOps, discardableOps, discardableInputOps, mayRemoveDownStreamOperators,
1520-
mayRemoveInputOps);
1519+
return new SharedResult(retainableOps, discardableOps, discardableInputOps, dataSize, maxDataSize);
15211520
}
15221521

1522+
15231523
private static SharedResult extractSharedOptimizationInfo(ParseContext pctx,
15241524
SharedWorkOptimizerCache optimizerCache,
15251525
Operator<?> retainableOpEqualParent,
@@ -1597,7 +1597,9 @@ private static SharedResult extractSharedOptimizationInfo(ParseContext pctx,
15971597
if (equalOp1 instanceof MapJoinOperator) {
15981598
MapJoinOperator mop = (MapJoinOperator) equalOp1;
15991599
dataSize = StatsUtils.safeAdd(dataSize, mop.getConf().getInMemoryDataSize());
1600-
maxDataSize = mop.getConf().getMemoryMonitorInfo().getAdjustedNoConditionalTaskSize();
1600+
if (maxDataSize < mop.getConf().getMemoryMonitorInfo().getAdjustedNoConditionalTaskSize()) {
1601+
maxDataSize = mop.getConf().getMemoryMonitorInfo().getAdjustedNoConditionalTaskSize();
1602+
}
16011603
}
16021604
if (currentOp1.getChildOperators().size() > 1 ||
16031605
currentOp2.getChildOperators().size() > 1) {
@@ -1615,15 +1617,19 @@ private static SharedResult extractSharedOptimizationInfo(ParseContext pctx,
16151617
if (op instanceof MapJoinOperator && !retainableOps.contains(op)) {
16161618
MapJoinOperator mop = (MapJoinOperator) op;
16171619
dataSize = StatsUtils.safeAdd(dataSize, mop.getConf().getInMemoryDataSize());
1618-
maxDataSize = mop.getConf().getMemoryMonitorInfo().getAdjustedNoConditionalTaskSize();
1620+
if (maxDataSize < mop.getConf().getMemoryMonitorInfo().getAdjustedNoConditionalTaskSize()) {
1621+
maxDataSize = mop.getConf().getMemoryMonitorInfo().getAdjustedNoConditionalTaskSize();
1622+
}
16191623
}
16201624
}
16211625
Set<Operator<?>> opsWork2 = findWorkOperators(optimizerCache, currentOp2);
16221626
for (Operator<?> op : opsWork2) {
16231627
if (op instanceof MapJoinOperator && !discardableOps.contains(op)) {
16241628
MapJoinOperator mop = (MapJoinOperator) op;
16251629
dataSize = StatsUtils.safeAdd(dataSize, mop.getConf().getInMemoryDataSize());
1626-
maxDataSize = mop.getConf().getMemoryMonitorInfo().getAdjustedNoConditionalTaskSize();
1630+
if (maxDataSize < mop.getConf().getMemoryMonitorInfo().getAdjustedNoConditionalTaskSize()) {
1631+
maxDataSize = mop.getConf().getMemoryMonitorInfo().getAdjustedNoConditionalTaskSize();
1632+
}
16271633
}
16281634
}
16291635

ql/src/test/queries/clientpositive/sharedwork_mapjoin_datasize_check.q

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ set hive.auto.convert.join.noconditionaltask.size=500;
77

88
-- The InMemoryDataSize of MapJoin is 280. Therefore, SWO should not merge 2 TSs reading src
99
-- as the sum of InMemoryDataSize of 2 unmerged MapJoin exceeds 500.
10+
-- TSs are identical and FILs are not identical.
1011
explain extended
1112
with
1213
a as (
@@ -27,7 +28,48 @@ bb as (
2728
)
2829
select * from aa join bb on aa.a = bb.a;
2930

31+
32+
-- The InMemoryDataSize of MapJoin is 280. Since the limit is 1000, SWO should not merge 4 TSs into a single TS.
33+
-- TSs are identical and TS.getNumChild() > 1.
34+
set hive.auto.convert.join.noconditionaltask.size=1000;
35+
explain extended
36+
with
37+
a as (
38+
select src.key a, src.value b, src1.value c
39+
from src, src1
40+
where src.key = src1.key and src.value > 1000000 and src1.value > 1000000
41+
),
42+
b as (
43+
select src.key a, 2 * src.value b, src1.value c
44+
from src, src1
45+
where src.key = src1.key and src.value > 1000001 and src1.value > 1000001
46+
),
47+
c as (
48+
select src.key a, 3 * src.value b, src1.value c
49+
from src, src1
50+
where src.key = src1.key and src.value > 1000002 and src1.value > 1000002
51+
),
52+
d as (
53+
select src.key a, 4 * src.value b, src1.value c
54+
from src, src1
55+
where src.key = src1.key and src.value > 1000003 and src1.value > 1000003
56+
),
57+
aa as (
58+
select a, avg(b) as b, sum(c) as c from a group by a
59+
),
60+
bb as (
61+
select a, avg(b) as b, sum(c) as c from b group by a
62+
),
63+
cc as (
64+
select a, avg(b) as b, sum(c) as c from c group by a
65+
),
66+
dd as (
67+
select a, avg(b) as b, sum(c) as c from d group by a
68+
)
69+
select * from aa join bb join cc join dd on aa.a = bb.a and aa.a = cc.a and aa.a = dd.a;
70+
3071
-- The InMemoryDataSize of MapJoin is 280. Since the limit is 1000, SWO should not merge 4 TSs into a single TS.
72+
-- TSs, FILs are identical and FIL.getNumChild() > 1.
3173
set hive.auto.convert.join.noconditionaltask.size=1000;
3274
explain extended
3375
with

0 commit comments

Comments
 (0)