|
14 | 14 | IncrementalByTimeRangeKind, |
15 | 15 | ModelKind, |
16 | 16 | ModelKindName, |
| 17 | + PythonModel, |
17 | 18 | SqlModel, |
| 19 | + TimeColumn, |
18 | 20 | load_model, |
19 | 21 | ) |
20 | 22 | from sqlmesh.core.model.meta import IntervalUnit |
|
26 | 28 | SnapshotTableInfo, |
27 | 29 | ) |
28 | 30 | from sqlmesh.utils.errors import ConfigError, SQLMeshError |
| 31 | +from sqlmesh.utils.metaprogramming import Executable |
29 | 32 |
|
30 | 33 |
|
31 | 34 | @pytest.fixture |
@@ -337,3 +340,97 @@ def test_audit_unversioned(mocker: MockerFixture, adapter_mock, make_snapshot): |
337 | 340 | match="Cannot audit 'db.model' because it has not been versioned yet. Apply a plan first.", |
338 | 341 | ): |
339 | 342 | evaluator.audit(snapshot=snapshot, snapshots={}) |
| 343 | + |
| 344 | + |
| 345 | +@pytest.mark.parametrize( |
| 346 | + "input_dfs, output_dict", |
| 347 | + [ |
| 348 | + ( |
| 349 | + """pd.DataFrame({"a": [1, 2, 3], "ds": ["2023-01-01", "2023-01-02", "2023-01-03"]}), |
| 350 | + pd.DataFrame({"a": [4, 5, 6], "ds": ["2023-01-04", "2023-01-05", "2023-01-06"]}), |
| 351 | + pd.DataFrame({"a": [7, 8, 9], "ds": ["2023-01-07", "2023-01-08", "2023-01-09"]})""", |
| 352 | + { |
| 353 | + "a": { |
| 354 | + 0: 1, |
| 355 | + 1: 2, |
| 356 | + 2: 3, |
| 357 | + 3: 4, |
| 358 | + 4: 5, |
| 359 | + 5: 6, |
| 360 | + 6: 7, |
| 361 | + 7: 8, |
| 362 | + 8: 9, |
| 363 | + }, |
| 364 | + "ds": { |
| 365 | + 0: "2023-01-01", |
| 366 | + 1: "2023-01-02", |
| 367 | + 2: "2023-01-03", |
| 368 | + 3: "2023-01-04", |
| 369 | + 4: "2023-01-05", |
| 370 | + 5: "2023-01-06", |
| 371 | + 6: "2023-01-07", |
| 372 | + 7: "2023-01-08", |
| 373 | + 8: "2023-01-09", |
| 374 | + }, |
| 375 | + }, |
| 376 | + ), |
| 377 | + ( |
| 378 | + """pd.DataFrame({"a": [1, 2, 3], "ds": ["2023-01-01", "2023-01-02", "2023-01-03"]})""", |
| 379 | + { |
| 380 | + "a": { |
| 381 | + 0: 1, |
| 382 | + 1: 2, |
| 383 | + 2: 3, |
| 384 | + }, |
| 385 | + "ds": { |
| 386 | + 0: "2023-01-01", |
| 387 | + 1: "2023-01-02", |
| 388 | + 2: "2023-01-03", |
| 389 | + }, |
| 390 | + }, |
| 391 | + ), |
| 392 | + ], |
| 393 | +) |
| 394 | +def test_snapshot_evaluator_yield_pd(adapter_mock, make_snapshot, input_dfs, output_dict): |
| 395 | + adapter_mock.is_pyspark_df.return_value = False |
| 396 | + adapter_mock.SUPPORTS_INSERT_OVERWRITE = True |
| 397 | + adapter_mock.try_get_df = lambda x: x |
| 398 | + evaluator = SnapshotEvaluator(adapter_mock) |
| 399 | + |
| 400 | + snapshot = make_snapshot( |
| 401 | + PythonModel( |
| 402 | + name="db.model", |
| 403 | + entrypoint="python_func", |
| 404 | + kind=IncrementalByTimeRangeKind(time_column=TimeColumn(column="ds", format="%Y-%m-%d")), |
| 405 | + columns={ |
| 406 | + "a": "INT", |
| 407 | + "ds": "STRING", |
| 408 | + }, |
| 409 | + python_env={ |
| 410 | + "python_func": Executable( |
| 411 | + name="python_func", |
| 412 | + alias="python_func", |
| 413 | + path="test_snapshot_evaluator.py", |
| 414 | + payload=f"""import pandas as pd |
| 415 | +def python_func(**kwargs): |
| 416 | + for df in [ |
| 417 | + {input_dfs} |
| 418 | + ]: |
| 419 | + yield df""", |
| 420 | + ) |
| 421 | + }, |
| 422 | + ) |
| 423 | + ) |
| 424 | + |
| 425 | + snapshot.categorize_as(SnapshotChangeCategory.BREAKING) |
| 426 | + evaluator.create([snapshot], {}) |
| 427 | + |
| 428 | + evaluator.evaluate( |
| 429 | + snapshot, |
| 430 | + "2023-01-01", |
| 431 | + "2023-01-09", |
| 432 | + "2023-01-09", |
| 433 | + snapshots={}, |
| 434 | + ) |
| 435 | + |
| 436 | + assert adapter_mock.insert_overwrite_by_time_partition.call_args[0][1].to_dict() == output_dict |
0 commit comments