diff --git a/sqlglot/dialects/duckdb.py b/sqlglot/dialects/duckdb.py index 10d490934c..2ad4628d0a 100644 --- a/sqlglot/dialects/duckdb.py +++ b/sqlglot/dialects/duckdb.py @@ -1203,6 +1203,38 @@ def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: return self.func("STRUCT_INSERT", this, kv_sql) + def startswith_sql(self, expression: exp.StartsWith) -> str: + this = expression.this + expr = expression.expression + + if not this.type: + from sqlglot.optimizer.annotate_types import annotate_types + + this = annotate_types(this, dialect=self.dialect) + + if not expr.type: + from sqlglot.optimizer.annotate_types import annotate_types + + expr = annotate_types(expr, dialect=self.dialect) + + if isinstance(expression.this, exp.ByteString): + expression.this.replace(exp.Literal.string(expression.this.this)) + elif this.type and not this.is_type( + exp.DataType.Type.VARCHAR, exp.DataType.Type.UNKNOWN + ): + expression.this.replace(exp.cast(expression.this, exp.DataType.Type.VARCHAR)) + + if isinstance(expression.expression, exp.ByteString): + expression.expression.replace(exp.Literal.string(expression.expression.this)) + elif expr.type and not expr.is_type( + exp.DataType.Type.VARCHAR, exp.DataType.Type.UNKNOWN + ): + expression.expression.replace( + exp.cast(expression.expression, exp.DataType.Type.VARCHAR) + ) + + return self.func("STARTS_WITH", expression.this, expression.expression) + def unnest_sql(self, expression: exp.Unnest) -> str: explode_array = expression.args.get("explode_array") if explode_array: diff --git a/tests/dialects/test_bigquery.py b/tests/dialects/test_bigquery.py index cee8a47f9b..d637a90693 100644 --- a/tests/dialects/test_bigquery.py +++ b/tests/dialects/test_bigquery.py @@ -1205,6 +1205,21 @@ def test_bigquery(self): "spark": "CAST(a AS BINARY)", }, ) + # Test STARTS_WITH with BYTES/BLOB handling from BigQuery to DuckDB + self.validate_all( + "STARTS_WITH(CAST('foo' AS BYTES), CAST('f' AS BYTES))", + write={ + "bigquery": "STARTS_WITH(CAST('foo' AS BYTES), CAST('f' AS BYTES))", + "duckdb": "STARTS_WITH(CAST(CAST('foo' AS BLOB) AS TEXT), CAST(CAST('f' AS BLOB) AS TEXT))", + }, + ) + self.validate_all( + "STARTS_WITH(CAST('foo' AS BYTES), b'f')", + write={ + "bigquery": "STARTS_WITH(CAST('foo' AS BYTES), b'f')", + "duckdb": "STARTS_WITH(CAST(CAST('foo' AS BLOB) AS TEXT), 'f')", + }, + ) self.validate_all( "CAST(a AS NUMERIC)", write={