|
| 1 | +from pathlib import Path |
| 2 | + |
1 | 3 | import pytest |
2 | 4 |
|
3 | 5 | from pydatalab.apps import BLOCK_TYPES, BLOCKS |
@@ -544,3 +546,113 @@ def test_create_sample_with_example_files( |
544 | 546 | if block_type == "xrd": |
545 | 547 | doc = database.items.find_one({"item_id": sample_id}, projection={"blocks_obj": 1}) |
546 | 548 | assert doc["blocks_obj"][block_id]["computed"]["peak_data"] is not None |
| 549 | + |
| 550 | + |
| 551 | +@pytest.fixture() |
| 552 | +def create_large_xye_file(tmpdir): |
| 553 | + """Create a relatively large .xye file for testing tabular block serialization and memory usage, |
| 554 | + as a separate fixture to avoid it being counted in memray profile.""" |
| 555 | + |
| 556 | + fname = Path(tmpdir / "large_table.xye") |
| 557 | + |
| 558 | + # Make a dataframe of ~3 columns and 1,000,000 rows |
| 559 | + # totalling ~2.4 MB (raw floats), so maybe 3 MB as a dataframe |
| 560 | + import numpy as np |
| 561 | + import pandas as pd |
| 562 | + |
| 563 | + N = 50_000 |
| 564 | + |
| 565 | + pd.DataFrame( |
| 566 | + { |
| 567 | + "two_theta": np.array(np.linspace(5, 85, N), dtype=np.float64), |
| 568 | + "intensity": np.array(np.random.rand(N), dtype=np.float64), |
| 569 | + "error": np.array(0.1 * np.random.rand(N), dtype=np.float64), |
| 570 | + } |
| 571 | + ).to_csv(fname, sep=",", index=False) |
| 572 | + |
| 573 | + yield fname |
| 574 | + |
| 575 | + |
| 576 | +@pytest.mark.limit_memory("130MB") |
| 577 | +def test_large_fake_xrd_data_block_serialization( |
| 578 | + admin_client, default_sample_dict, tmpdir, create_large_xye_file |
| 579 | +): |
| 580 | + """Make a fake xye file with relatively large data and test serialization |
| 581 | + memory usage in particular. |
| 582 | +
|
| 583 | + As of the time of writing, we get a breakdown like: |
| 584 | +
|
| 585 | + > Allocation results for tests/server/test_blocks.py::test_large_fake_xrd_data_block_serialization at the high watermark |
| 586 | + > |
| 587 | + > 📦 Total memory allocated: 128.4MiB |
| 588 | + > 📏 Total allocations: 382 |
| 589 | + > 📊 Histogram of allocation sizes: |▁▃█ | |
| 590 | + > 🥇 Biggest allocating functions: |
| 591 | + > - lstsq:./pydatalab/.venv/lib/python3.11/site-packages/numpy/linalg/linalg.py:2326 -> 32.0MiB |
| 592 | + > - raw_decode:/home/mevans/.local/share/uv/python/cpython-3.11.10-linux-x86_64-gnu/lib/python3.11/json/decoder.py:353 -> 20.3MiB |
| 593 | + > - raw_decode:/home/mevans/.local/share/uv/python/cpython-3.11.10-linux-x86_64-gnu/lib/python3.11/json/decoder.py:353 -> 19.3MiB |
| 594 | + > - encode:/home/mevans/.local/share/uv/python/cpython-3.11.10-linux-x86_64-gnu/lib/python3.11/json/encoder.py:203 -> 14.0MiB |
| 595 | + > - _iterencode_list:/home/mevans/.local/share/uv/python/cpython-3.11.10-linux-x86_64-gnu/lib/python3.11/json/encoder.py:303 -> 14.0MiB |
| 596 | +
|
| 597 | + """ |
| 598 | + import gc |
| 599 | + |
| 600 | + gc.collect() |
| 601 | + gc.collect() |
| 602 | + |
| 603 | + block_type = "xrd" |
| 604 | + |
| 605 | + sample_id = "test_sample_with_large_table" |
| 606 | + sample_data = default_sample_dict.copy() |
| 607 | + sample_data["item_id"] = sample_id |
| 608 | + |
| 609 | + response = admin_client.post("/new-sample/", json=sample_data) |
| 610 | + assert response.status_code == 201, f"Failed to create sample for {block_type}: {response.json}" |
| 611 | + assert response.json["status"] == "success" |
| 612 | + |
| 613 | + with open(create_large_xye_file, "rb") as f: |
| 614 | + response = admin_client.post( |
| 615 | + "/upload-file/", |
| 616 | + buffered=True, |
| 617 | + content_type="multipart/form-data", |
| 618 | + data={ |
| 619 | + "item_id": sample_id, |
| 620 | + "file": [(f, create_large_xye_file.name)], |
| 621 | + "type": "application/octet-stream", |
| 622 | + "replace_file": "null", |
| 623 | + "relativePath": "null", |
| 624 | + }, |
| 625 | + ) |
| 626 | + assert response.status_code == 201, f"Failed to upload {create_large_xye_file}" |
| 627 | + assert response.json["status"] == "success" |
| 628 | + file_id = response.json["file_id"] |
| 629 | + |
| 630 | + response = admin_client.post( |
| 631 | + "/add-data-block/", |
| 632 | + json={ |
| 633 | + "block_type": block_type, |
| 634 | + "item_id": sample_id, |
| 635 | + "index": 0, |
| 636 | + }, |
| 637 | + ) |
| 638 | + |
| 639 | + block_id = response.json["new_block_obj"]["block_id"] |
| 640 | + |
| 641 | + gc.collect() |
| 642 | + |
| 643 | + response = admin_client.post( |
| 644 | + "/update-block/", |
| 645 | + json={ |
| 646 | + "block_data": { |
| 647 | + "blocktype": "tabular", |
| 648 | + "item_id": sample_id, |
| 649 | + "file_id": file_id, |
| 650 | + "block_id": block_id, |
| 651 | + }, |
| 652 | + }, |
| 653 | + ) |
| 654 | + |
| 655 | + assert response.status_code == 200, f"Failed to update tabular block: {response.json}" |
| 656 | + assert response.json["new_block_data"]["bokeh_plot_data"] |
| 657 | + |
| 658 | + gc.collect() |
0 commit comments