Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
f588e12
extracts adbc parquet load job with file format selector
rudolfix Nov 18, 2025
f2ffc42
ports postgres parquet job to base job
rudolfix Nov 18, 2025
bee3720
implements mssql adbc job
rudolfix Nov 18, 2025
9934559
adds pickle test for all destination caps
rudolfix Nov 18, 2025
2d6380a
adds dbc to adbc group, updates test workflow
rudolfix Nov 18, 2025
302907f
fixes sqlglot from find
rudolfix Nov 19, 2025
2786892
fixes docs
rudolfix Nov 19, 2025
517afaa
adds sqlalchemy adbc docs
rudolfix Nov 20, 2025
c92c4eb
adds support from sqllite and mysql in sqlalchemy
rudolfix Nov 20, 2025
6a9aa93
fixes and tests str annotation resolving
rudolfix Nov 22, 2025
5839cf1
allows to disable adbc and does that in tests
rudolfix Nov 22, 2025
9bbc1a0
Merge branch 'devel' into feat/mssql-adbc-parquet-ingestion
rudolfix Nov 22, 2025
806f7f2
fixes imports
rudolfix Nov 22, 2025
2ecf82d
docs lock bump
rudolfix Nov 22, 2025
506d894
fixes globalns extraction
rudolfix Nov 22, 2025
3b55171
clarifies how adbc drivers are installed, implements fallback for pos…
rudolfix Nov 23, 2025
503f87d
improves dashboard multi schema test
rudolfix Nov 23, 2025
03add05
fixes followup jobs
rudolfix Nov 23, 2025
330675b
Fix: The child table column remains in the schema as a partial column…
anuunchin Nov 23, 2025
3fc3114
Updated the DltResourceHints class to set a default boundary timestam…
alkaline-0 Nov 14, 2025
a559c0c
Added specific exception
alkaline-0 Nov 14, 2025
c32df02
UPD: changing the code according to feedback
alkaline-0 Nov 18, 2025
80277b1
Update documentation to clarify usage of boundary_timestamp and add t…
alkaline-0 Nov 18, 2025
6eaa149
UPD: Merged Sqlglot into the branch and addressed feedback
alkaline-0 Nov 24, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 5 additions & 4 deletions .github/workflows/test_destinations_local.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ jobs:
destinations: "[\"postgres\", \"duckdb\", \"ducklake\", \"dummy\"]"
filesystem_drivers: "[\"memory\", \"file\"]"
extras: "--group adbc --extra postgres --extra postgis --extra parquet --extra duckdb --extra cli --extra filesystem"
post_install_commands: "uv run dbc install postgresql"
needs_postgres: true

# Clickhouse OSS (TODO: test with minio s3)
Expand All @@ -60,17 +61,17 @@ jobs:
- name: sqlalchemy
destinations: "[\"sqlalchemy\"]"
filesystem_drivers: "[\"memory\", \"file\"]"
extras: "--extra sqlalchemy --extra filesystem --extra parquet"
extras: "--extra sqlalchemy --extra filesystem --extra parquet --group adbc"
needs_mysql: true
post_install_commands: "uv run pip install pymysql && uv run pip install sqlalchemy==1.4"
post_install_commands: "uv run pip install pymysql && uv run pip install sqlalchemy==1.4 && uv run dbc install mysql && uv run dbc install sqlite"

# SQLAlchemy 2.0 (same as above but with sqlalchemy 2.0)
- name: sqlalchemy
destinations: "[\"sqlalchemy\"]"
filesystem_drivers: "[\"memory\", \"file\"]"
extras: "--extra sqlalchemy --extra filesystem --extra parquet"
extras: "--extra sqlalchemy --extra filesystem --extra parquet --group adbc"
needs_mysql: true
post_install_commands: "uv run pip install pymysql && uv run pip install sqlalchemy==2.0"
post_install_commands: "uv run pip install pymysql && uv run pip install sqlalchemy==2.0 && uv run dbc install mysql && uv run dbc install sqlite"

env:
ACTIVE_DESTINATIONS: ${{ matrix.destinations }}
Expand Down
4 changes: 3 additions & 1 deletion .github/workflows/test_destinations_remote.yml
Original file line number Diff line number Diff line change
Expand Up @@ -112,8 +112,9 @@ jobs:
- name: mssql
destinations: "[\"mssql\"]"
filesystem_drivers: "[\"memory\"]"
extras: "--extra mssql --extra s3 --extra gs --extra az --extra parquet"
extras: "--extra mssql --extra s3 --extra gs --extra az --extra parquet --group adbc"
pre_install_commands: "sudo ACCEPT_EULA=Y apt-get install --yes msodbcsql18"
post_install_commands: "uv run dbc install mssql"
always_run_all_tests: true

# Synapse
Expand All @@ -133,6 +134,7 @@ jobs:
destinations: "[\"postgres\"]"
filesystem_drivers: "[\"memory\", \"file\"]"
extras: "--group adbc --extra postgres --extra postgis --extra parquet --extra duckdb"
post_install_commands: "uv pip install adbc-driver-postgresql" # use adbc driver installation
always_run_all_tests: true

# Qdrant (disabled, because we do not have a test account atm, qdrant is tested with local version)
Expand Down
13 changes: 10 additions & 3 deletions dlt/common/configuration/inject.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,13 @@
from typing import Callable, Dict, Type, Any, Optional, Union, Tuple, TypeVar, overload, cast
from inspect import Signature, Parameter, unwrap

from dlt.common.typing import DictStrAny, TFun, AnyFun
from dlt.common.typing import (
DictStrAny,
TFun,
AnyFun,
get_type_globals,
resolve_single_annotation,
)
from dlt.common.configuration.resolve import resolve_configuration, inject_section
from dlt.common.configuration.specs.base_configuration import BaseConfiguration
from dlt.common.configuration.specs.config_section_context import ConfigSectionContext
Expand Down Expand Up @@ -128,12 +134,13 @@ def decorator(f: TFun) -> TFun:

spec_arg: Parameter = None
section_name_arg: Parameter = None
globalns = get_type_globals(f)

for p in sig.parameters.values():
# for all positional parameters that do not have default value, set default
# if hasattr(SPEC, p.name) and p.default == Parameter.empty:
# p._default = None # type: ignore
if p.annotation is SPEC:
if resolve_single_annotation(p.annotation, globalns=globalns) is SPEC:
# if any argument has type SPEC then us it to take initial value
spec_arg = p
if p.name == section_arg_name:
Expand Down Expand Up @@ -209,7 +216,7 @@ def update_bound_args(
for p in sig.parameters.values():
if p.name in resolved_params:
bound_args.arguments[p.name] = resolved_params.pop(p.name)
if p.annotation is SPEC:
if resolve_single_annotation(p.annotation, globalns=globalns) is SPEC:
bound_args.arguments[p.name] = config
# pass all other config parameters into kwargs if present
if kwargs_arg is not None:
Expand Down
11 changes: 7 additions & 4 deletions dlt/common/configuration/specs/base_configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
Annotated,
Self,
extract_inner_type,
get_type_globals,
is_annotated,
is_any_type,
is_final_type,
Expand All @@ -46,6 +47,7 @@
is_union_type,
get_args,
get_origin,
resolve_single_annotation,
)
from dlt.common.data_types import py_type_to_sc_type
from dlt.common.configuration.exceptions import (
Expand Down Expand Up @@ -190,6 +192,7 @@ def configspec(
def wrap(cls: Type[TAnyClass]) -> Type[TAnyClass]:
cls.__hint_resolvers__ = {} # type: ignore[attr-defined]
is_context = issubclass(cls, _F_ContainerInjectableContext)

# if type does not derive from BaseConfiguration then derive it
with contextlib.suppress(NameError):
if not issubclass(cls, BaseConfiguration):
Expand All @@ -211,6 +214,7 @@ def wrap(cls: Type[TAnyClass]) -> Type[TAnyClass]:
)
setattr(cls, ann, None)
# get all attributes without corresponding annotations
globalns = get_type_globals(cls)
for att_name, att_value in list(cls.__dict__.items()):
# skip callables, dunder names, class variables and some special names
if callable(att_value):
Expand All @@ -233,9 +237,7 @@ def wrap(cls: Type[TAnyClass]) -> Type[TAnyClass]:
# resolve the annotation as per PEP 563
# NOTE: we do not use get_type_hints because at this moment cls is an unknown name
# (ie. used as decorator and module is being imported)
if isinstance(hint, str):
hint = eval(hint)

hint = resolve_single_annotation(hint, globalns=globalns, raise_on_error=True)
# context can have any type
if not is_valid_hint(hint) and not is_context:
raise ConfigFieldTypeHintNotSupported(att_name, cls, hint)
Expand Down Expand Up @@ -376,8 +378,9 @@ def _get_resolvable_dataclass_fields(cls) -> Iterator[TDtcField]:
@classmethod
def get_resolvable_fields(cls) -> Dict[str, type]:
"""Returns a mapping of fields to their type hints. Dunders should not be resolved and are not returned"""
globalns = get_type_globals(cls)
return {
f.name: eval(f.type) if isinstance(f.type, str) else f.type
f.name: resolve_single_annotation(f.type, globalns=globalns)
for f in cls._get_resolvable_dataclass_fields()
}

Expand Down
14 changes: 12 additions & 2 deletions dlt/common/reflection/spec.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
TSecretValue,
Annotated,
SecretSentinel,
get_type_globals,
resolve_single_annotation,
)
from dlt.common.configuration import configspec, is_valid_hint, is_secret_hint
from dlt.common.configuration.specs import BaseConfiguration
Expand Down Expand Up @@ -56,14 +58,19 @@ def spec_from_signature(
new_fields: Dict[str, Any] = {}
sig_base_fields: Dict[str, Any] = {}
annotations: Dict[str, Any] = {}
globalns = get_type_globals(f)

for p in sig.parameters.values():
# skip *args and **kwargs, skip typical method params
if p.kind not in (Parameter.VAR_KEYWORD, Parameter.VAR_POSITIONAL) and p.name not in [
"self",
"cls",
]:
field_type = AnyType if p.annotation == Parameter.empty else p.annotation
field_type = (
AnyType
if p.annotation == Parameter.empty
else resolve_single_annotation(p.annotation, globalns=globalns)
)
# keep the base fields if sig not annotated
if (
p.name in base_fields
Expand Down Expand Up @@ -101,7 +108,10 @@ def spec_from_signature(
annotations[p.name] = field_type
# set field with default value
new_fields[p.name] = p.default
# print(f"Param {p.name} is {field_type}: {p.default} due to {include_defaults} or {type_from_literal}")
# print(
# f"Param {p.name} is {field_type}: {p.default} due to {include_defaults} or"
# f" {type_from_literal}"
# )

signature_fields = {**sig_base_fields, **new_fields}

Expand Down
17 changes: 17 additions & 0 deletions dlt/common/schema/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -696,6 +696,23 @@ def remove_processing_hints(tables: TSchemaTables) -> TSchemaTables:
return tables


def has_seen_null_first_hint(column_schema: TColumnSchema) -> bool:
"""Checks if `column_schema` has seen seen-null-first hint set to True in the x-normalizer hints."""
return bool(column_schema.get("x-normalizer", {}).get("seen-null-first"))


def remove_seen_null_first_hint(column_schema: TColumnSchema) -> TColumnSchema:
"""Removes seen-null-first hint from the x-normalizer hints in `column_schema` in place,
if the x-normalizer section becomes empty after removing the hint, it is also removed, returns the modified input
"""
x_normalizer = column_schema.setdefault("x-normalizer", {})
if x_normalizer.get("seen-null-first"):
x_normalizer.pop("seen-null-first", None)
if not x_normalizer:
column_schema.pop("x-normalizer", None)
return column_schema


def get_processing_hints(
tables: TSchemaTables,
) -> Tuple[Dict[str, List[str]], Dict[str, Dict[str, List[str]]]]:
Expand Down
72 changes: 71 additions & 1 deletion dlt/common/typing.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,9 @@
from datetime import datetime, date # noqa: I251
import inspect
import os
import sys
from re import Pattern as _REPattern
from types import FunctionType
from types import FunctionType, ModuleType
from typing import (
Callable,
ClassVar,
Expand Down Expand Up @@ -535,3 +536,72 @@ def add_value_to_literal(literal: Any, value: Any) -> None:
if value not in type_args:
type_args += (value,)
literal.__args__ = type_args


def get_type_globals(obj: Any) -> Dict[str, Any]:
"""
Best-effort extraction of globals() associated with a type. If object is passed,
we get its __class__

Handles:
- functions
- classes (including TypedDict, dataclasses, Pydantic models, etc.)
- modules (returns their __dict__)
"""

# 1. Module: just return its dict
if isinstance(obj, ModuleType):
return obj.__dict__

# 2. Function or bound/unbound method
if inspect.isfunction(obj):
return obj.__globals__

# 3. Class (includes TypedDict, dataclasses, normal classes, etc.)
if not inspect.isclass(obj):
obj = obj.__class__

if mod := sys.modules.get(obj.__module__):
return mod.__dict__
return {}


def resolve_single_annotation(
ann: Any,
*,
globalns: Optional[Dict[str, Any]] = None,
localns: Optional[Dict[str, Any]] = None,
raise_on_error: bool = False,
) -> Any:
"""
Resolves annotation `ann` if it is a str and/or ForwardRef.
- If `ann` is not a str or ForwardRef, it's returned unchanged.
- If it *is* a str/ForwardRef, we eval it in an appropriate namespace.
"""

# fast path: already a real type
if not isinstance(ann, (str, ForwardRef)):
return ann

# extract the expression and module from ForwardRef if needed
expr: str
if isinstance(ann, ForwardRef):
expr = ann.__forward_arg__
if (
module := sys.modules.get(getattr(ann, "__forward_module__", None))
) and globalns is None:
globalns = module.__dict__
else:
expr = ann

try:
ann = eval(expr, globalns, localns)
if isinstance(ann, ForwardRef):
ann = resolve_single_annotation(
ann, globalns=globalns, localns=localns, raise_on_error=raise_on_error
)
except Exception:
if raise_on_error:
raise

return ann
6 changes: 5 additions & 1 deletion dlt/common/validation.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import sys
import functools
import inspect
from typing import Callable, Any, List, Type
Expand Down Expand Up @@ -61,7 +62,10 @@ def validate_dict(
# TODO: get_type_hints is very slow and we possibly should cache the result
# even better option is to rewrite "verify_prop" so we can cache annotations mapper to validators
# so we do not check the types of typeddict keys all the time
allowed_props = get_type_hints(spec)
allowed_props = get_type_hints(
spec,
globalns=sys.modules[spec.__module__].__dict__ if spec.__module__ in sys.modules else None,
)
required_props = {k: v for k, v in allowed_props.items() if not is_optional_type(v)}
# remove optional props
props = {k: v for k, v in doc.items() if filter_f(k)}
Expand Down
Loading
Loading