Skip to content

Commit 9e872cf

Browse files
committed
feat: support IN clause optimization for sharding key
1 parent 8bab753 commit 9e872cf

File tree

8 files changed

+1373
-21
lines changed

8 files changed

+1373
-21
lines changed

features/sharding/core/src/main/java/org/apache/shardingsphere/sharding/rewrite/token/ShardingTokenGenerateBuilder.java

Lines changed: 4 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -28,19 +28,7 @@
2828
import org.apache.shardingsphere.infra.rewrite.sql.token.keygen.generator.GeneratedKeyInsertValuesTokenGenerator;
2929
import org.apache.shardingsphere.infra.route.context.RouteContext;
3030
import org.apache.shardingsphere.sharding.rewrite.token.generator.IgnoreForSingleRoute;
31-
import org.apache.shardingsphere.sharding.rewrite.token.generator.impl.ShardingAggregationDistinctTokenGenerator;
32-
import org.apache.shardingsphere.sharding.rewrite.token.generator.impl.ShardingConstraintTokenGenerator;
33-
import org.apache.shardingsphere.sharding.rewrite.token.generator.impl.ShardingCursorTokenGenerator;
34-
import org.apache.shardingsphere.sharding.rewrite.token.generator.impl.ShardingDistinctProjectionPrefixTokenGenerator;
35-
import org.apache.shardingsphere.sharding.rewrite.token.generator.impl.ShardingFetchDirectionTokenGenerator;
36-
import org.apache.shardingsphere.sharding.rewrite.token.generator.impl.ShardingIndexTokenGenerator;
37-
import org.apache.shardingsphere.sharding.rewrite.token.generator.impl.ShardingInsertValuesTokenGenerator;
38-
import org.apache.shardingsphere.sharding.rewrite.token.generator.impl.ShardingOffsetTokenGenerator;
39-
import org.apache.shardingsphere.sharding.rewrite.token.generator.impl.ShardingOrderByTokenGenerator;
40-
import org.apache.shardingsphere.sharding.rewrite.token.generator.impl.ShardingProjectionsTokenGenerator;
41-
import org.apache.shardingsphere.sharding.rewrite.token.generator.impl.ShardingRemoveTokenGenerator;
42-
import org.apache.shardingsphere.sharding.rewrite.token.generator.impl.ShardingRowCountTokenGenerator;
43-
import org.apache.shardingsphere.sharding.rewrite.token.generator.impl.ShardingTableTokenGenerator;
31+
import org.apache.shardingsphere.sharding.rewrite.token.generator.impl.*;
4432
import org.apache.shardingsphere.sharding.rule.ShardingRule;
4533

4634
import java.util.Collection;
@@ -78,6 +66,9 @@ public Collection<SQLTokenGenerator> getSQLTokenGenerators() {
7866
addSQLTokenGenerator(result, new ShardingRemoveTokenGenerator());
7967
addSQLTokenGenerator(result, new ShardingCursorTokenGenerator(rule));
8068
addSQLTokenGenerator(result, new ShardingFetchDirectionTokenGenerator());
69+
70+
// Add the IN predicate to optimize the Token generator
71+
addSQLTokenGenerator(result, new ShardingInPredicateTokenGenerator(rule));
8172
return result;
8273
}
8374

features/sharding/core/src/main/java/org/apache/shardingsphere/sharding/rewrite/token/generator/impl/ShardingInPredicateTokenGenerator.java

Lines changed: 748 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 255 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,255 @@
1+
package org.apache.shardingsphere.sharding.rewrite.token.pojo;
2+
3+
import lombok.Getter;
4+
import org.apache.shardingsphere.infra.rewrite.sql.token.common.pojo.ParameterFilterable;
5+
import org.apache.shardingsphere.infra.rewrite.sql.token.common.pojo.RouteUnitAware;
6+
import org.apache.shardingsphere.infra.rewrite.sql.token.common.pojo.SQLToken;
7+
import org.apache.shardingsphere.infra.rewrite.sql.token.common.pojo.Substitutable;
8+
import org.apache.shardingsphere.infra.route.context.RouteUnit;
9+
10+
import java.util.*;
11+
import java.util.stream.Collectors;
12+
13+
/**
14+
* SQL rewrite token for optimizing sharding IN predicates across multiple columns and route units.
15+
*
16+
* <p>This token handles the complex rewriting of IN predicates in sharded environments where:
17+
* <ul>
18+
* <li>Different route units (database shards) may need different subsets of IN values</li>
19+
* <li>Multiple sharding columns may be involved in complex sharding strategies</li>
20+
* <li>Both parameter markers and literal values need proper handling</li>
21+
* <li>SQL syntax optimization (converting single-value IN to equality) is applied</li>
22+
* </ul>
23+
*
24+
* <p>The token supports both standard and complex sharding strategies:
25+
* <ul>
26+
* <li><strong>Standard sharding</strong>: Single sharding column, values distributed by algorithm</li>
27+
* <li><strong>Complex sharding</strong>: Multiple sharding columns, cartesian product distribution</li>
28+
* </ul>
29+
*
30+
* <p>Key optimization features:
31+
* <ul>
32+
* <li>Route-specific value filtering: Only relevant values sent to each shard</li>
33+
* <li>SQL syntax optimization: Single values converted from IN to equality conditions</li>
34+
* <li>Parameter index management: Maintains correct parameter positions</li>
35+
* <li>Empty clause handling: Generates "IN (NULL) AND 1 = 0" for impossible conditions</li>
36+
* </ul>
37+
*
38+
* @author yinh
39+
* @see org.apache.shardingsphere.sharding.rewrite.token.generator.impl.ShardingInPredicateTokenGenerator
40+
* @see org.apache.shardingsphere.sharding.rewrite.token.pojo.ShardingInPredicateValue
41+
*/
42+
public final class ShardingInPredicateToken extends SQLToken implements Substitutable, RouteUnitAware, ParameterFilterable {
43+
44+
@Getter
45+
private final int stopIndex;
46+
47+
/**
48+
* Maps route units to their respective column parameters for optimized IN predicate rewriting.
49+
*
50+
* <p>Structure: RouteUnit -> ColumnName -> List of Values
51+
* <p>This nested mapping enables:
52+
* <ul>
53+
* <li>Route-specific value filtering: Each route unit only gets relevant values</li>
54+
* <li>Multi-column support: Complex sharding strategies with multiple columns</li>
55+
* <li>Efficient lookup: Direct access to values for specific route and column</li>
56+
* </ul>
57+
*
58+
* <p>Example for a query "WHERE user_id IN (1,2,3) AND tenant_id IN ('a','b')":
59+
* <pre>
60+
* RouteUnit1 -> {
61+
* "user_id" -> [Value(1), Value(3)],
62+
* "tenant_id" -> [Value('a')]
63+
* }
64+
* RouteUnit2 -> {
65+
* "user_id" -> [Value(2)],
66+
* "tenant_id" -> [Value('b')]
67+
* }
68+
* </pre>
69+
*/
70+
private final Map<RouteUnit, Map<String, List<ShardingInPredicateValue>>> columnParameterMap;
71+
72+
/**
73+
* Constructs a new sharding IN predicate token with distributed parameters.
74+
*
75+
* @param startIndex the start position of the original IN predicate in the SQL
76+
* @param stopIndex the end position of the original IN predicate in the SQL
77+
* @param columnParameterMap the distributed parameters mapped by route unit and column name
78+
*/
79+
public ShardingInPredicateToken(final int startIndex,
80+
final int stopIndex,
81+
final Map<RouteUnit, Map<String, List<ShardingInPredicateValue>>> columnParameterMap) {
82+
super(startIndex);
83+
this.stopIndex = stopIndex;
84+
this.columnParameterMap = columnParameterMap;
85+
}
86+
87+
/**
88+
* Generates the optimized SQL fragment for the specified route unit.
89+
*
90+
* <p>This method creates route-specific IN clauses by:
91+
* <ul>
92+
* <li>Retrieving only the values relevant to the given route unit</li>
93+
* <li>Building separate IN clauses for each column</li>
94+
* <li>Optimizing single-value IN clauses to equality conditions</li>
95+
* <li>Handling empty conditions with "IN (NULL) AND 1 = 0"</li>
96+
* <li>Combining multiple column conditions with AND</li>
97+
* </ul>
98+
*
99+
* @param routeUnit the target route unit (database shard) for SQL generation
100+
* @return the optimized SQL fragment, or empty string if no conditions apply
101+
*/
102+
@Override
103+
public String toString(final RouteUnit routeUnit) {
104+
Map<String, List<ShardingInPredicateValue>> routeUnitParams = columnParameterMap.get(routeUnit);
105+
if (routeUnitParams == null || routeUnitParams.isEmpty()) {
106+
return "";
107+
}
108+
109+
return buildOptimizedClauses(routeUnitParams);
110+
}
111+
112+
/**
113+
* Identifies parameter indices that should be removed for the specified route unit.
114+
*
115+
* <p>In sharded environments, each route unit only needs a subset of the original parameters.
116+
* This method calculates which parameter indices from the original SQL should be filtered out
117+
* for the given route unit, enabling proper parameter binding in prepared statements.
118+
*
119+
* <p>The calculation process:
120+
* <ol>
121+
* <li>Collect all parameter indices from all route units</li>
122+
* <li>Identify which parameter indices are kept for the specified route unit</li>
123+
* <li>Return the difference as indices to be removed</li>
124+
* </ol>
125+
*
126+
* @param routeUnit the route unit to calculate removed parameters for
127+
* @return a set of parameter indices that should be removed for this route unit
128+
*/
129+
@Override
130+
public Set<Integer> getRemovedParameterIndices(final RouteUnit routeUnit) {
131+
Map<String, List<ShardingInPredicateValue>> routeUnitParams = columnParameterMap.get(routeUnit);
132+
if (routeUnitParams == null) {
133+
return Collections.emptySet();
134+
}
135+
136+
Set<Integer> allIndices = getAllParameterIndices();
137+
Set<Integer> keptIndices = getKeptParameterIndices(routeUnitParams);
138+
Set<Integer> result = new HashSet<>(allIndices);
139+
result.removeAll(keptIndices);
140+
return result;
141+
}
142+
143+
/**
144+
* Determines if this token involves parameter filtering.
145+
*
146+
* <p>Returns true if any of the predicate values are parameter markers (?),
147+
* indicating that this token will affect parameter binding and requires
148+
* parameter index management during SQL rewriting.
149+
*
150+
* @return true if parameter filtering is needed, false otherwise
151+
*/
152+
@Override
153+
public boolean isParameterFilterable() {
154+
return columnParameterMap.values().stream()
155+
.flatMap(map -> map.values().stream())
156+
.flatMap(List::stream)
157+
.anyMatch(ShardingInPredicateValue::isParameter);
158+
}
159+
160+
/**
161+
* Collects all parameter indices from across all route units and columns.
162+
*
163+
* @return set of all parameter indices that appear in any route unit
164+
*/
165+
private Set<Integer> getAllParameterIndices() {
166+
return columnParameterMap.values().stream()
167+
.flatMap(map -> map.values().stream())
168+
.flatMap(List::stream)
169+
.filter(ShardingInPredicateValue::isParameter)
170+
.map(ShardingInPredicateValue::getParameterIndex)
171+
.collect(Collectors.toSet());
172+
}
173+
174+
/**
175+
* Collects parameter indices that should be kept for the specified parameter map.
176+
*
177+
* @param parameterMap the parameter map for a specific route unit
178+
* @return set of parameter indices that should be retained
179+
*/
180+
private Set<Integer> getKeptParameterIndices(Map<String, List<ShardingInPredicateValue>> parameterMap) {
181+
return parameterMap.values().stream()
182+
.flatMap(List::stream)
183+
.filter(ShardingInPredicateValue::isParameter)
184+
.map(ShardingInPredicateValue::getParameterIndex)
185+
.collect(Collectors.toSet());
186+
}
187+
188+
/**
189+
* Builds optimized WHERE clauses by combining multiple column conditions.
190+
*
191+
* <p>Each column with non-empty values gets its own IN clause (or equality condition),
192+
* and all column conditions are combined with AND operators.
193+
*
194+
* @param parameterMap mapping of column names to their respective values
195+
* @return combined SQL conditions, or empty string if no valid conditions
196+
*/
197+
private String buildOptimizedClauses(Map<String, List<ShardingInPredicateValue>> parameterMap) {
198+
return parameterMap.entrySet().stream()
199+
.filter(entry -> !entry.getValue().isEmpty())
200+
.map(entry -> buildInClause(entry.getKey(), entry.getValue()))
201+
.collect(Collectors.joining(" AND "));
202+
}
203+
204+
/**
205+
* Builds an optimized IN clause for a specific column and its values.
206+
*
207+
* <p>Optimization logic:
208+
* <ul>
209+
* <li>Empty list: "column IN (NULL) AND 1 = 0" (impossible condition)</li>
210+
* <li>Single value: "column = value" (equality is more efficient than IN)</li>
211+
* <li>Multiple values: "column IN (value1, value2, ...)" (standard IN clause)</li>
212+
* </ul>
213+
*
214+
* @param column the column name for the condition
215+
* @param infos the list of values for this column
216+
* @return the optimized SQL condition for this column
217+
*/
218+
private String buildInClause(final String column, final List<ShardingInPredicateValue> infos) {
219+
if (infos.isEmpty()) {
220+
return column + " IN (NULL) AND 1 = 0";
221+
}
222+
if (infos.size() == 1) {
223+
ShardingInPredicateValue single = infos.get(0);
224+
return String.format("%s = %s", column,
225+
single.isParameter() ? "?" : formatValue(single.getValue()));
226+
}
227+
String values = infos.stream()
228+
.map(info -> info.isParameter() ? "?" : formatValue(info.getValue()))
229+
.collect(Collectors.joining(", "));
230+
return String.format("%s IN (%s)", column, values);
231+
}
232+
233+
/**
234+
* Formats a literal value for SQL generation with proper quoting and escaping.
235+
*
236+
* <p>Formatting rules:
237+
* <ul>
238+
* <li>null values: "NULL"</li>
239+
* <li>String values: Single-quoted with internal quotes escaped</li>
240+
* <li>Other types: toString() representation</li>
241+
* </ul>
242+
*
243+
* @param value the value to format
244+
* @return the SQL-safe string representation of the value
245+
*/
246+
private String formatValue(final Comparable<?> value) {
247+
if (null == value) {
248+
return "NULL";
249+
}
250+
if (value instanceof String) {
251+
return "'" + value.toString().replace("'", "''") + "'";
252+
}
253+
return value.toString();
254+
}
255+
}
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
package org.apache.shardingsphere.sharding.rewrite.token.pojo;
2+
3+
import lombok.Getter;
4+
import lombok.RequiredArgsConstructor;
5+
6+
/**
7+
* Represents a value within a sharding IN predicate, which can be either a parameter marker or a literal value.
8+
* This class encapsulates the information needed to identify and process values in IN clauses during SQL rewriting.
9+
*
10+
* <p>In ShardingSphere's SQL rewriting process, IN predicates need special handling for sharding optimization.
11+
* Each value in an IN clause can be either:
12+
* <ul>
13+
* <li>A parameter marker (?): represented with parameterIndex and isParameter=true</li>
14+
* <li>A literal value: represented with the actual value and isParameter=false</li>
15+
* </ul>
16+
*
17+
* <p>This distinction is crucial for:
18+
* <ul>
19+
* <li>Parameter filtering during route-specific SQL generation</li>
20+
* <li>Correct SQL syntax generation (with or without quotes)</li>
21+
* <li>Maintaining parameter index mapping for prepared statements</li>
22+
* </ul>
23+
*
24+
* @author yinh
25+
* @see org.apache.shardingsphere.sharding.rewrite.token.pojo.ShardingInPredicateToken
26+
* @see org.apache.shardingsphere.sharding.rewrite.token.generator.impl.ShardingInPredicateTokenGenerator
27+
*/
28+
@RequiredArgsConstructor
29+
@Getter
30+
public final class ShardingInPredicateValue {
31+
32+
/**
33+
* The index of the parameter marker in the original prepared statement.
34+
* Only meaningful when {@link #isParameter} is true.
35+
*/
36+
private final int parameterIndex;
37+
38+
/**
39+
* The actual value of this predicate component.
40+
* For parameter markers, this represents the bound parameter value.
41+
* For literals, this represents the literal value from the SQL.
42+
* Must implement Comparable for sharding algorithm processing.
43+
*/
44+
private final Comparable<?> value;
45+
46+
/**
47+
* Indicates whether this value originates from a parameter marker (?) in the SQL.
48+
* When true, this value should be represented as "?" in rewritten SQL.
49+
* When false, this value should be formatted as a literal in rewritten SQL.
50+
*/
51+
private final boolean isParameter;
52+
}

0 commit comments

Comments
 (0)