-
Notifications
You must be signed in to change notification settings - Fork 405
fix: non-breaking detection for redundant cast projections #5851
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1588,7 +1588,7 @@ def is_breaking_change(self, previous: Model) -> t.Optional[bool]: | |
|
|
||
| for edit in edits: | ||
| if not isinstance(edit, Insert): | ||
| return None | ||
| return _additive_projection_change(previous_query, this_query, self.dialect) | ||
|
|
||
| expr = edit.expression | ||
| if isinstance(expr, exp.UDTF): | ||
|
|
@@ -1602,7 +1602,7 @@ def is_breaking_change(self, previous: Model) -> t.Optional[bool]: | |
| expr = parent | ||
|
|
||
| if not _is_projection(expr) and expr.parent not in inserted_expressions: | ||
| return None | ||
| return _additive_projection_change(previous_query, this_query, self.dialect) | ||
|
|
||
| return False | ||
|
|
||
|
|
@@ -2907,6 +2907,75 @@ def _is_projection(expr: exp.Expr) -> bool: | |
| return isinstance(parent, exp.Select) and expr.arg_key == "expressions" | ||
|
|
||
|
|
||
| def _additive_projection_change( | ||
| previous_query: exp.Query, | ||
| this_query: exp.Query, | ||
| dialect: DialectType, | ||
| ) -> t.Optional[bool]: | ||
| """Fallback for when SQLGlot's tree diff can't express an additive projection change. | ||
|
|
||
| SQLGlot's diff matches nodes by structural similarity, so interchangeable leaves (e.g. two | ||
| identical ``CAST(... AS T)`` target types) can be cross-matched. Inserting a same-type cast | ||
| above an existing one therefore yields spurious ``Move`` / ``Update`` edits even though a | ||
| column was simply added to the SELECT list. In that case the edit-based check above is | ||
| inconclusive, so we verify additivity directly against the output projections. | ||
|
|
||
| Returns ``False`` (non-breaking) only when the change is provably additive: | ||
| * both queries are simple ``SELECT`` statements, | ||
| * everything other than the projection list is structurally identical, | ||
| * no added projection is a (potentially cardinality-changing) ``UDTF``, and | ||
| * every previous projection is preserved, in order, within the new projection list. | ||
|
|
||
| Otherwise returns ``None`` (undetermined), preserving the conservative default. | ||
| """ | ||
| # UNIONs or other query expressions, are left to the caller's conservative diff result. | ||
| if not isinstance(previous_query, exp.Select) or not isinstance(this_query, exp.Select): | ||
| return None | ||
|
|
||
| previous_projections = previous_query.expressions | ||
| this_projections = this_query.expressions | ||
| # If the new query has not gained any projections, this cannot be an additive projection-only | ||
| # change, so there is nothing for this fallback to prove. | ||
| if len(this_projections) <= len(previous_projections): | ||
| return None | ||
|
|
||
| # Adding a UDTF projection (e.g. EXPLODE / UNNEST) can change row cardinality, so such a | ||
| # change is not safely non-breaking even when it appears as an extra SELECT item. | ||
| for projection in this_projections: | ||
| if isinstance(projection, exp.UDTF) and not projection.find_ancestor(exp.Subquery): | ||
| return None | ||
|
|
||
| # Everything other than the projection list must be structurally identical. Replacing each | ||
| # SELECT list with the same dummy literal lets the expression equality check focus on the | ||
| # FROM / WHERE / GROUP BY / ORDER BY / etc. parts of the query. | ||
| previous_skeleton = previous_query.copy() | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This fallback can incorrectly treat projection inserts as non-breaking when the query uses positional references outside the SELECT list. For example, this is currently categorized as NON_BREAKING on this branch:
But |
||
| this_skeleton = this_query.copy() | ||
| previous_skeleton.set("expressions", [exp.Literal.number(1)]) | ||
| this_skeleton.set("expressions", [exp.Literal.number(1)]) | ||
| if previous_skeleton != this_skeleton: | ||
| return None | ||
|
|
||
| # Every previous projection must appear, in order, within the new projection list. Comparing | ||
| # dialect-normalized SQL makes semantically equivalent projection nodes match even when the | ||
| # parser built distinct object identities. | ||
| this_projection_sql = [p.sql(dialect=dialect, comments=False) for p in this_projections] | ||
| search_start = 0 | ||
| for projection in previous_projections: | ||
| target_sql = projection.sql(dialect=dialect, comments=False) | ||
| # Continue after the previous match so added columns can appear before, between, or after | ||
| # the original projections, but existing projections cannot be reordered or rewritten. | ||
| for index in range(search_start, len(this_projection_sql)): | ||
| if this_projection_sql[index] == target_sql: | ||
| search_start = index + 1 | ||
| break | ||
| else: | ||
| return None | ||
|
|
||
| # At this point the query shape is unchanged and all prior outputs are preserved, so the only | ||
| # remaining difference is one or more additional, non-UDTF projections. | ||
| return False | ||
|
|
||
|
|
||
| def _single_expr_or_tuple(values: t.Sequence[exp.Expr]) -> exp.Expr | exp.Tuple: | ||
| return values[0] if len(values) == 1 else exp.Tuple(expressions=values) | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This UDTF guard misses aliased UDTF projections because
EXPLODE(y) AS yis anexp.Alias, not anexp.UDTF.When the fallback is triggered by same-type casts, this branch can categorize a change like this as NON_BREAKING:
SELECT a::DATE AS a, s::TEXT AS s FROM t->
SELECT a::DATE AS a, x::TEXT AS x, EXPLODE(y) AS y, s::TEXT AS s FROM tUpstream returns
Nonefor this case. The check probably needs to inspect newly added projections for nested UDTFs, while still allowing UDTFs inside scalar subqueries as the existing tests expect.