From 3f4c4e3e3a84db729e0d6d6be962e99cea49f7c6 Mon Sep 17 00:00:00 2001 From: Kristin Cowalcijk Date: Tue, 3 Mar 2026 20:46:57 +0800 Subject: [PATCH 1/7] docs: add memory management guide Add documentation covering memory limits, memory pool types (greedy vs fair), temporary spill directory configuration, operators that support spilling, advanced DataFusion spill settings, and system-level file descriptor tuning. --- docs/memory-management.md | 218 ++++++++++++++++++++++++++++++++++++++ mkdocs.yml | 1 + 2 files changed, 219 insertions(+) create mode 100644 docs/memory-management.md diff --git a/docs/memory-management.md b/docs/memory-management.md new file mode 100644 index 000000000..fb8f62384 --- /dev/null +++ b/docs/memory-management.md @@ -0,0 +1,218 @@ + + +# Memory Management + +SedonaDB supports memory-limited execution with automatic spill-to-disk, +allowing you to process datasets that are larger than available memory. When a +memory limit is configured, operators that exceed their memory budget +automatically spill intermediate data to temporary files on disk and read them +back as needed. + +## Configuring Memory Limits + +Set `memory_limit` on the context options to cap the total memory available for +query execution. The limit accepts an integer (bytes) or a human-readable string +such as `"4gb"`, `"512m"`, or `"1.5g"`. + +```python +import sedona.db + +sd = sedona.db.connect() +sd.options.memory_limit = "4gb" +``` + +Without a memory limit, SedonaDB uses an unbounded memory pool and operators +will not spill to disk. + +!!! note + All runtime options (`memory_limit`, `memory_pool_type`, `temp_dir`, + `unspillable_reserve_ratio`) must be set **before** the first query is + executed. Once the first query runs, the internal execution context is + created and these options become read-only. + +## Memory Pool Types + +The `memory_pool_type` option controls how the memory budget is distributed +among concurrent operators. Two pool types are available: + +- **`"greedy"`** -- Grants memory reservations on a first-come-first-served + basis. This is the default when no pool type is specified. Simple, but can + lead to memory reservation failures under pressure -- one consumer may + exhaust the pool before others get a chance to reserve memory. + +- **`"fair"` (recommended)** -- Distributes memory fairly among spillable + consumers and reserves a fraction of the pool for unspillable consumers. + More stable under memory pressure and significantly less likely to cause + reservation failures, at the cost of slightly lower utilization of the total + reserved memory. + +We recommend using `"fair"` whenever a memory limit is configured: + +```python +sd = sedona.db.connect() +sd.options.memory_limit = "4gb" +sd.options.memory_pool_type = "fair" +``` + +### Unspillable reserve ratio + +When using the `"fair"` pool, the `unspillable_reserve_ratio` option controls +the fraction of the memory pool reserved for unspillable consumers (operators +that cannot spill their memory to disk). It accepts a float between `0.0` and +`1.0` and defaults to `0.2` (20%) when not explicitly set. + +```python +sd = sedona.db.connect() +sd.options.memory_limit = "8gb" +sd.options.memory_pool_type = "fair" +sd.options.unspillable_reserve_ratio = 0.3 # reserve 30% for unspillable consumers +``` + +## Temporary Directory for Spill Files + +By default, DataFusion uses the system temporary directory for spill files. You +can override this with `temp_dir` to control where spill data is written -- for +example, to point to a larger or faster disk: + +```python +sd = sedona.db.connect() +sd.options.memory_limit = "4gb" +sd.options.memory_pool_type = "fair" +sd.options.temp_dir = "/mnt/fast-ssd/sedona-spill" +``` + +## Full Example + +```python +import sedona.db + +sd = sedona.db.connect() + +# Cap execution memory at 4 GB +sd.options.memory_limit = "4gb" + +# Use the fair pool for stable memory distribution (recommended) +sd.options.memory_pool_type = "fair" + +# Reserve 20% of the pool for unspillable consumers (default) +sd.options.unspillable_reserve_ratio = 0.2 + +# Write spill files to a dedicated directory +sd.options.temp_dir = "/tmp/sedona-spill" + +# Now execute queries -- options are frozen after the first query runs +df = sd.sql("SELECT ...") +``` + +## Operators Supporting Memory Limits + +When a memory limit is configured, the following operators automatically spill +intermediate data to disk when they exceed their memory budget: + +**SedonaDB operators:** + +- **Spatial joins** -- Both the build-side (index construction, partition + collection) and probe-side (stream repartitioning) of SedonaDB's spatial + joins support memory-pressure-driven spilling. + +**DataFusion operators:** + +- **`SortExec`** -- External sort that spills sorted runs to disk when memory + is exhausted, then merges them. +- **`HashJoinExec`** -- Hash join that spills hash table partitions to disk + under memory pressure. +- **`SortMergeJoinExec`** -- Sort-merge join that spills buffered batches to + disk when the memory limit is exceeded. +- **`AggregateExec`** -- Grouped aggregation that spills intermediate + aggregation state to sorted spill files when memory is exhausted. + +## Advanced DataFusion Configurations + +DataFusion provides additional execution configurations that affect spill +behavior. These can be set via SQL `SET` statements after connecting: + +### Spill compression + +By default, data is written to spill files uncompressed. Enabling compression +reduces the amount of disk I/O and disk space used at the cost of additional +CPU work. This is beneficial when disk I/O throughput is low or when disk space +is not large enough to hold uncompressed spill data. + +```python +sd = sedona.db.connect() +sd.options.memory_limit = "4gb" +sd.options.memory_pool_type = "fair" + +# Enable LZ4 compression for spill files +sd.sql("SET datafusion.execution.spill_compression = 'lz4_frame'").execute() +``` + +### Maximum temporary directory size + +DataFusion limits the total size of temporary spill files to prevent unbounded +disk usage. The default limit is **100 GB**. If your workload needs to spill +more data than this, increase the limit: + +```python +# Increase the spill directory size limit to 500 GB +sd.sql("SET datafusion.runtime.max_temp_directory_size = '500gb'").execute() +``` + +## System Configuration + +### Maximum number of open files + +Large workloads that spill heavily can create a large number of temporary files. +During a spatial join, each parallel execution thread may create one spill file +per spatial partition. The total number of open spill files can therefore reach +**parallelism x number of spatial partitions**. For example, on an 8-CPU host +running a spatial join that produces 500 spatial partitions, up to +**8 x 500 = 4,000** spill files may be open simultaneously -- far exceeding the +default per-process file descriptor limit. + +The operating system's per-process file descriptor limit must be high enough to +accommodate this, otherwise queries will fail with "too many open files" errors. + +**Linux:** + +The default limit is typically 1024, which is easily exceeded by spill-heavy +workloads like the example above. + +To raise the limit permanently, add the following to `/etc/security/limits.conf`: + +``` +* soft nofile 65535 +* hard nofile 65535 +``` + +Then log out and back in (or reboot) for the change to take effect. Verify with: + +```bash +ulimit -n +``` + +**macOS:** + +```bash +ulimit -n 65535 +``` + +To make this persistent across sessions, refer to your shell's configuration +(e.g., add `ulimit -n 65535` to `~/.zshrc` or `~/.bashrc`). diff --git a/mkdocs.yml b/mkdocs.yml index 33c405a51..07d6305ff 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -37,6 +37,7 @@ nav: - Home: index.md - Quickstart: quickstart-python.md - Programming Guide: programming-guide.md + - Memory Management: memory-management.md - API: - Python: reference/python.md - SQL: reference/sql/index.md From e487fdf7bcc95294452a221f96b29cd7f566044e Mon Sep 17 00:00:00 2001 From: Kristin Cowalcijk Date: Tue, 3 Mar 2026 22:04:13 +0800 Subject: [PATCH 2/7] docs: refine memory and spill configuration guide Clarify memory pool behavior without a limit, add guidance on fair pool usage, use SQL-centric operator descriptions for spill-capable plans, and document DataFusion spill settings and file descriptor requirements more precisely. --- docs/memory-management.md | 59 +++++++++++++++++++++++++++------------ 1 file changed, 41 insertions(+), 18 deletions(-) diff --git a/docs/memory-management.md b/docs/memory-management.md index fb8f62384..fbc00f8f4 100644 --- a/docs/memory-management.md +++ b/docs/memory-management.md @@ -39,7 +39,9 @@ sd.options.memory_limit = "4gb" ``` Without a memory limit, SedonaDB uses an unbounded memory pool and operators -will not spill to disk. +can use as much memory as needed (until the process hits system limits). In +this mode, operators typically won't spill to disk because there is no memory +budget to enforce. !!! note All runtime options (`memory_limit`, `memory_pool_type`, `temp_dir`, @@ -71,6 +73,9 @@ sd.options.memory_limit = "4gb" sd.options.memory_pool_type = "fair" ``` +!!! note + `memory_pool_type` only takes effect when `memory_limit` is set. + ### Unspillable reserve ratio When using the `"fair"` pool, the `unspillable_reserve_ratio` option controls @@ -118,7 +123,15 @@ sd.options.unspillable_reserve_ratio = 0.2 sd.options.temp_dir = "/tmp/sedona-spill" # Now execute queries -- options are frozen after the first query runs -df = sd.sql("SELECT ...") +# Example: configure DataFusion settings and then run your workload +sd.sql("SET datafusion.execution.spill_compression = 'lz4_frame'").execute() + +df = sd.sql(""" +SELECT a.id, b.id +FROM a +JOIN b + ON ST_Intersects(a.geom, b.geom) +""") ``` ## Operators Supporting Memory Limits @@ -126,28 +139,41 @@ df = sd.sql("SELECT ...") When a memory limit is configured, the following operators automatically spill intermediate data to disk when they exceed their memory budget: -**SedonaDB operators:** +In practice, this means memory limits and spilling can apply to both SedonaDB's +spatial operators and DataFusion's general-purpose operators used by common SQL +constructs: + +**SedonaDB:** - **Spatial joins** -- Both the build-side (index construction, partition collection) and probe-side (stream repartitioning) of SedonaDB's spatial joins support memory-pressure-driven spilling. -**DataFusion operators:** +**DataFusion (physical operators):** + +This list is not exhaustive. Many other DataFusion physical operators and +execution strategies may allocate memory through the same runtime memory pool +and may spill to disk when memory limits are enforced. -- **`SortExec`** -- External sort that spills sorted runs to disk when memory - is exhausted, then merges them. -- **`HashJoinExec`** -- Hash join that spills hash table partitions to disk - under memory pressure. -- **`SortMergeJoinExec`** -- Sort-merge join that spills buffered batches to - disk when the memory limit is exceeded. -- **`AggregateExec`** -- Grouped aggregation that spills intermediate - aggregation state to sorted spill files when memory is exhausted. +- **`ORDER BY` / sorted Top-K** (`SortExec`) -- External sort that + spills sorted runs to disk when memory is exhausted, then merges them. +- **Many joins** (`HashJoinExec`) -- Hash join that spills hash table partitions + to disk under memory pressure. +- **Sort-merge joins** (`SortMergeJoinExec`) -- Sort-merge join that spills + buffered batches to disk when the memory limit is exceeded. +- **`GROUP BY` aggregations** (`AggregateExec`) -- Grouped aggregation that + spills intermediate aggregation state to sorted spill files when memory is + exhausted. ## Advanced DataFusion Configurations DataFusion provides additional execution configurations that affect spill behavior. These can be set via SQL `SET` statements after connecting: +!!! note + `SET` is executed as a query. Configure `sd.options.*` runtime options (like + `memory_limit` and `temp_dir`) before running any `SET` statements. + ### Spill compression By default, data is written to spill files uncompressed. Enabling compression @@ -156,10 +182,6 @@ CPU work. This is beneficial when disk I/O throughput is low or when disk space is not large enough to hold uncompressed spill data. ```python -sd = sedona.db.connect() -sd.options.memory_limit = "4gb" -sd.options.memory_pool_type = "fair" - # Enable LZ4 compression for spill files sd.sql("SET datafusion.execution.spill_compression = 'lz4_frame'").execute() ``` @@ -214,5 +236,6 @@ ulimit -n ulimit -n 65535 ``` -To make this persistent across sessions, refer to your shell's configuration -(e.g., add `ulimit -n 65535` to `~/.zshrc` or `~/.bashrc`). +This affects the current shell session. Persistent/system-wide limits are OS +and configuration dependent; consult your macOS configuration and documentation +if you need to raise the hard limit. From cdd7b8731428a1f095e3f3da47ea83643e4b30ac Mon Sep 17 00:00:00 2001 From: Kristin Cowalcijk Date: Wed, 4 Mar 2026 00:17:02 +0800 Subject: [PATCH 3/7] docs: rename section to Configurations with Memory Management and Spilling subsection --- docs/memory-management.md | 2 +- mkdocs.yml | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/docs/memory-management.md b/docs/memory-management.md index fbc00f8f4..a0ee7a04b 100644 --- a/docs/memory-management.md +++ b/docs/memory-management.md @@ -17,7 +17,7 @@ under the License. --> -# Memory Management +# Memory Management and Spilling SedonaDB supports memory-limited execution with automatic spill-to-disk, allowing you to process datasets that are larger than available memory. When a diff --git a/mkdocs.yml b/mkdocs.yml index 07d6305ff..63a307cea 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -37,7 +37,8 @@ nav: - Home: index.md - Quickstart: quickstart-python.md - Programming Guide: programming-guide.md - - Memory Management: memory-management.md + - Configurations: + - Memory Management and Spilling: memory-management.md - API: - Python: reference/python.md - SQL: reference/sql/index.md From 17e6514d012927b7dc003f6248fb45544e39bcee Mon Sep 17 00:00:00 2001 From: Kristin Cowalcijk Date: Wed, 4 Mar 2026 00:18:39 +0800 Subject: [PATCH 4/7] Apply suggestions from code review Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- docs/memory-management.md | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/docs/memory-management.md b/docs/memory-management.md index a0ee7a04b..004396229 100644 --- a/docs/memory-management.md +++ b/docs/memory-management.md @@ -45,9 +45,10 @@ budget to enforce. !!! note All runtime options (`memory_limit`, `memory_pool_type`, `temp_dir`, - `unspillable_reserve_ratio`) must be set **before** the first query is - executed. Once the first query runs, the internal execution context is - created and these options become read-only. + `unspillable_reserve_ratio`) must be set **before** the internal context + is initialized — that is, before the first call to `sd.sql(...)` or any + read method (for example, `sd.read_parquet(...)`). Once the internal + context is created, these options become read-only. ## Memory Pool Types @@ -171,8 +172,10 @@ DataFusion provides additional execution configurations that affect spill behavior. These can be set via SQL `SET` statements after connecting: !!! note - `SET` is executed as a query. Configure `sd.options.*` runtime options (like - `memory_limit` and `temp_dir`) before running any `SET` statements. + Calling `sd.sql("SET ...")` initializes the internal context and freezes + runtime options immediately, before `.execute()` is run. Configure + `sd.options.*` runtime options (like `memory_limit` and `temp_dir`) before + calling any `sd.sql(...)`, including `SET` statements. ### Spill compression From 5feae5d4ca85e08bdf21b30714c6da80533c26e3 Mon Sep 17 00:00:00 2001 From: Kristin Cowalcijk Date: Wed, 4 Mar 2026 00:20:28 +0800 Subject: [PATCH 5/7] Fix description for HashJoinExec --- docs/memory-management.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/docs/memory-management.md b/docs/memory-management.md index 004396229..e8157a390 100644 --- a/docs/memory-management.md +++ b/docs/memory-management.md @@ -158,8 +158,9 @@ and may spill to disk when memory limits are enforced. - **`ORDER BY` / sorted Top-K** (`SortExec`) -- External sort that spills sorted runs to disk when memory is exhausted, then merges them. -- **Many joins** (`HashJoinExec`) -- Hash join that spills hash table partitions - to disk under memory pressure. +- **Hash joins** (`HashJoinExec`) -- Hash join does not support spilling yet. + The query will fail with a memory reservation error if the hash table exceeds + the memory limit. - **Sort-merge joins** (`SortMergeJoinExec`) -- Sort-merge join that spills buffered batches to disk when the memory limit is exceeded. - **`GROUP BY` aggregations** (`AggregateExec`) -- Grouped aggregation that From a2ae59e32a2d396b7f4c0a712b51c0c341e8e6ff Mon Sep 17 00:00:00 2001 From: Kristin Cowalcijk Date: Wed, 4 Mar 2026 12:50:32 +0800 Subject: [PATCH 6/7] docs: add memory management notebook and regenerate markdown --- docs/memory-management.ipynb | 380 +++++++++++++++++++++++++++++++++++ docs/memory-management.md | 214 ++++++++++---------- 2 files changed, 485 insertions(+), 109 deletions(-) create mode 100644 docs/memory-management.ipynb diff --git a/docs/memory-management.ipynb b/docs/memory-management.ipynb new file mode 100644 index 000000000..8a4f94e53 --- /dev/null +++ b/docs/memory-management.ipynb @@ -0,0 +1,380 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "a2e6cc7b", + "metadata": {}, + "source": [ + "\n", + "\n", + "# Memory Management and Spilling\n", + "\n", + "SedonaDB supports memory-limited execution with automatic spill-to-disk, allowing you to process datasets that are larger than available memory. When a memory limit is configured, operators that exceed their memory budget automatically spill intermediate data to temporary files on disk and read them back as needed." + ] + }, + { + "cell_type": "markdown", + "id": "e3a30ea9", + "metadata": {}, + "source": [ + "## Configuring Memory Limits\n", + "\n", + "Set `memory_limit` on the context options to cap the total memory available for query execution. The limit accepts an integer (bytes) or a human-readable string such as `\"4gb\"`, `\"512m\"`, or `\"1.5g\"`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d1f99fcf", + "metadata": {}, + "outputs": [], + "source": [ + "import sedona.db\n", + "\n", + "sd = sedona.db.connect()\n", + "sd.options.memory_limit = \"4gb\"" + ] + }, + { + "cell_type": "markdown", + "id": "1fdc73aa", + "metadata": {}, + "source": [ + "Without a memory limit, SedonaDB uses an unbounded memory pool and operators can use as much memory as needed (until the process hits system limits). In this mode, operators typically won't spill to disk because there is no memory budget to enforce.\n", + "\n", + "> **Note:** All runtime options (`memory_limit`, `memory_pool_type`, `temp_dir`, `unspillable_reserve_ratio`) must be set before the internal context is initialized. The internal context is created on the first call to `sd.sql(...)` (including `SET` statements) or any read method (for example, `sd.read_parquet(...)`) -- not when you call `.execute()` on the returned DataFrame. Once the internal context is created, these runtime options become read-only." + ] + }, + { + "cell_type": "markdown", + "id": "c0f91c15", + "metadata": {}, + "source": [ + "## Memory Pool Types\n", + "\n", + "The `memory_pool_type` option controls how the memory budget is distributed among concurrent operators. Two pool types are available:\n", + "\n", + "- **`\"greedy\"`** -- Grants memory reservations on a first-come-first-served basis. This is the default when no pool type is specified. Simple, but can lead to memory reservation failures under pressure -- one consumer may exhaust the pool before others get a chance to reserve memory.\n", + "- **`\"fair\"` (recommended)** -- Distributes memory fairly among spillable consumers and reserves a fraction of the pool for unspillable consumers. More stable under memory pressure and significantly less likely to cause reservation failures, at the cost of slightly lower utilization of the total reserved memory.\n", + "\n", + "We recommend using `\"fair\"` whenever a memory limit is configured." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b1dff726", + "metadata": {}, + "outputs": [], + "source": [ + "import sedona.db\n", + "\n", + "sd = sedona.db.connect()\n", + "sd.options.memory_limit = \"4gb\"\n", + "sd.options.memory_pool_type = \"fair\"" + ] + }, + { + "cell_type": "markdown", + "id": "bd4c0a76", + "metadata": {}, + "source": [ + "> **Note:** `memory_pool_type` only takes effect when `memory_limit` is set." + ] + }, + { + "cell_type": "markdown", + "id": "969f4fe0", + "metadata": {}, + "source": [ + "### Unspillable reserve ratio\n", + "\n", + "When using the `\"fair\"` pool, the `unspillable_reserve_ratio` option controls the fraction of the memory pool reserved for unspillable consumers (operators that cannot spill their memory to disk). It accepts a float between `0.0` and `1.0` and defaults to `0.2` (20%) when not explicitly set." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "dc0718cf", + "metadata": {}, + "outputs": [], + "source": [ + "import sedona.db\n", + "\n", + "sd = sedona.db.connect()\n", + "sd.options.memory_limit = \"8gb\"\n", + "sd.options.memory_pool_type = \"fair\"\n", + "sd.options.unspillable_reserve_ratio = 0.3 # reserve 30% for unspillable consumers" + ] + }, + { + "cell_type": "markdown", + "id": "fb1c9e39", + "metadata": {}, + "source": [ + "## Temporary Directory for Spill Files\n", + "\n", + "By default, DataFusion uses the system temporary directory for spill files. You can override this with `temp_dir` to control where spill data is written -- for example, to point to a larger or faster disk." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c8d7a5c9", + "metadata": {}, + "outputs": [], + "source": [ + "import sedona.db\n", + "\n", + "sd = sedona.db.connect()\n", + "sd.options.memory_limit = \"4gb\"\n", + "sd.options.memory_pool_type = \"fair\"\n", + "sd.options.temp_dir = \"/mnt/fast-ssd/sedona-spill\"" + ] + }, + { + "cell_type": "markdown", + "id": "5d318b8f", + "metadata": {}, + "source": [ + "## Example: Spatial Join with Limited Memory\n", + "\n", + "This example performs a spatial join between Overture addresses (points) and Overture buildings (polygons) using `ST_Contains`. Spatial joins are one of the most common workloads that benefit from memory limits and spill-to-disk.\n", + "\n", + "> **Note:** Overture removes old releases. See https://docs.overturemaps.org/release-calendar/#current-release for the latest version number and replace the relevant portion of the URLs below." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1ed77d58", + "metadata": {}, + "outputs": [], + "source": [ + "import sedona.db\n", + "\n", + "sd = sedona.db.connect()\n", + "\n", + "# Configure runtime options before any sd.sql(...) or sd.read_* call.\n", + "sd.options.memory_limit = \"4gb\"\n", + "sd.options.memory_pool_type = \"fair\"\n", + "sd.options.unspillable_reserve_ratio = 0.2\n", + "sd.options.temp_dir = \"/tmp/sedona-spill\"\n", + "\n", + "# Overture buildings metadata is large; increase the remote metadata cache.\n", + "sd.sql(\"SET datafusion.runtime.metadata_cache_limit = '900M'\").execute()\n", + "\n", + "aws = {\"aws.skip_signature\": True, \"aws.region\": \"us-west-2\"}\n", + "\n", + "sd.read_parquet(\n", + " \"s3://overturemaps-us-west-2/release/2026-02-18.0/theme=buildings/type=building/\",\n", + " options=aws,\n", + ").to_view(\"buildings\")\n", + "\n", + "sd.read_parquet(\n", + " \"s3://overturemaps-us-west-2/release/2026-02-18.0/theme=addresses/type=address/\",\n", + " options=aws,\n", + ").to_view(\"addresses\")\n", + "\n", + "# Limit the scan to a region of interest using the bbox columns (helps avoid scanning the full tables).\n", + "west, south, east, north = -74.05, 40.68, -73.90, 40.88\n", + "\n", + "sd.sql(\n", + " \"\"\"\n", + " WITH buildings_roi AS (\n", + " SELECT id AS building_id, geometry AS building_geom\n", + " FROM buildings\n", + " WHERE bbox.xmin < $east\n", + " AND bbox.xmax > $west\n", + " AND bbox.ymin < $north\n", + " AND bbox.ymax > $south\n", + " ),\n", + " addresses_roi AS (\n", + " SELECT id AS address_id, geometry AS address_geom\n", + " FROM addresses\n", + " WHERE bbox.xmin < $east\n", + " AND bbox.xmax > $west\n", + " AND bbox.ymin < $north\n", + " AND bbox.ymax > $south\n", + " )\n", + " SELECT\n", + " a.address_id,\n", + " b.building_id\n", + " FROM addresses_roi a\n", + " JOIN buildings_roi b\n", + " ON ST_Contains(b.building_geom, a.address_geom)\n", + " \"\"\",\n", + " params={\"west\": west, \"south\": south, \"east\": east, \"north\": north},\n", + ").show(10)" + ] + }, + { + "cell_type": "markdown", + "id": "b97c0e8b", + "metadata": {}, + "source": [ + "## Operators Supporting Memory Limits\n", + "\n", + "When a memory limit is configured, the following operators automatically spill intermediate data to disk when they exceed their memory budget.\n", + "\n", + "In practice, this means memory limits and spilling can apply to both SedonaDB's spatial operators and DataFusion's general-purpose operators used by common SQL constructs.\n", + "\n", + "**SedonaDB:**\n", + "\n", + "- **Spatial joins** -- Both the build-side (index construction, partition collection) and probe-side (stream repartitioning) of SedonaDB's spatial joins support memory-pressure-driven spilling.\n", + "\n", + "**DataFusion (physical operators):**\n", + "\n", + "This list is not exhaustive. Many other DataFusion physical operators and execution strategies may allocate memory through the same runtime memory pool and may spill to disk when memory limits are enforced.\n", + "\n", + "- **`ORDER BY` / sorted Top-K** (`SortExec`) -- External sort that spills sorted runs to disk when memory is exhausted, then merges them.\n", + "- **Hash joins** (`HashJoinExec`) -- Hash join does not support spilling yet. The query will fail with a memory reservation error if the hash table exceeds the memory limit.\n", + "- **Sort-merge joins** (`SortMergeJoinExec`) -- Sort-merge join that spills buffered batches to disk when the memory limit is exceeded.\n", + "- **`GROUP BY` aggregations** (`AggregateExec`) -- Grouped aggregation that spills intermediate aggregation state to sorted spill files when memory is exhausted." + ] + }, + { + "cell_type": "markdown", + "id": "c56b821f", + "metadata": {}, + "source": [ + "## Advanced DataFusion Configurations\n", + "\n", + "DataFusion provides additional execution configurations that affect spill behavior. These can be set via SQL `SET` statements after connecting.\n", + "\n", + "> **Note:** Calling `sd.sql(...)` initializes the internal context immediately (including `sd.sql(\"SET ...\")`) and freezes runtime options immediately. Configure `sd.options.*` runtime options (like `memory_limit` and `temp_dir`) before calling any `sd.sql(...)`, including `SET` statements." + ] + }, + { + "cell_type": "markdown", + "id": "6dd9f5b7", + "metadata": {}, + "source": [ + "### Spill compression\n", + "\n", + "By default, data is written to spill files uncompressed. Enabling compression reduces the amount of disk I/O and disk space used at the cost of additional CPU work. This is beneficial when disk I/O throughput is low or when disk space is not large enough to hold uncompressed spill data." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "10cf38cb", + "metadata": {}, + "outputs": [], + "source": [ + "import sedona.db\n", + "\n", + "sd = sedona.db.connect()\n", + "sd.options.memory_limit = \"4gb\"\n", + "sd.options.memory_pool_type = \"fair\"\n", + "\n", + "# Enable LZ4 compression for spill files.\n", + "sd.sql(\"SET datafusion.execution.spill_compression = 'lz4_frame'\").execute()" + ] + }, + { + "cell_type": "markdown", + "id": "1a3ce16c", + "metadata": {}, + "source": [ + "### Maximum temporary directory size\n", + "\n", + "DataFusion limits the total size of temporary spill files to prevent unbounded disk usage. The default limit is **100 GB**. If your workload needs to spill more data than this, increase the limit." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "eac829ad", + "metadata": {}, + "outputs": [], + "source": [ + "import sedona.db\n", + "\n", + "sd = sedona.db.connect()\n", + "sd.options.memory_limit = \"4gb\"\n", + "sd.options.memory_pool_type = \"fair\"\n", + "\n", + "# Increase the spill directory size limit to 500 GB.\n", + "sd.sql(\"SET datafusion.runtime.max_temp_directory_size = '500gb'\").execute()" + ] + }, + { + "cell_type": "markdown", + "id": "dd8c2c23", + "metadata": {}, + "source": [ + "## System Configuration\n", + "\n", + "### Maximum number of open files\n", + "\n", + "Large workloads that spill heavily can create a large number of temporary files. During a spatial join, each parallel execution thread may create one spill file per spatial partition. The total number of open spill files can therefore reach **parallelism x number of spatial partitions**. For example, on an 8-CPU host running a spatial join that produces 500 spatial partitions, up to **8 x 500 = 4,000** spill files may be open simultaneously -- far exceeding the default per-process file descriptor limit.\n", + "\n", + "The operating system's per-process file descriptor limit must be high enough to accommodate this, otherwise queries will fail with \"too many open files\" errors.\n", + "\n", + "**Linux:**\n", + "\n", + "The default limit is typically 1024, which is easily exceeded by spill-heavy workloads like the example above.\n", + "\n", + "To raise the limit permanently, add the following to `/etc/security/limits.conf`:\n", + "\n", + "```\n", + "* soft nofile 65535\n", + "* hard nofile 65535\n", + "```\n", + "\n", + "Then log out and back in (or reboot) for the change to take effect. Verify with:\n", + "\n", + "```bash\n", + "ulimit -n\n", + "```\n", + "\n", + "**macOS:**\n", + "\n", + "```bash\n", + "ulimit -n 65535\n", + "```\n", + "\n", + "This affects the current shell session. Persistent/system-wide limits are OS and configuration dependent; consult your macOS configuration and documentation if you need to raise the hard limit." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "python3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/memory-management.md b/docs/memory-management.md index e8157a390..4b485a011 100644 --- a/docs/memory-management.md +++ b/docs/memory-management.md @@ -19,17 +19,12 @@ # Memory Management and Spilling -SedonaDB supports memory-limited execution with automatic spill-to-disk, -allowing you to process datasets that are larger than available memory. When a -memory limit is configured, operators that exceed their memory budget -automatically spill intermediate data to temporary files on disk and read them -back as needed. +SedonaDB supports memory-limited execution with automatic spill-to-disk, allowing you to process datasets that are larger than available memory. When a memory limit is configured, operators that exceed their memory budget automatically spill intermediate data to temporary files on disk and read them back as needed. ## Configuring Memory Limits -Set `memory_limit` on the context options to cap the total memory available for -query execution. The limit accepts an integer (bytes) or a human-readable string -such as `"4gb"`, `"512m"`, or `"1.5g"`. +Set `memory_limit` on the context options to cap the total memory available for query execution. The limit accepts an integer (bytes) or a human-readable string such as `"4gb"`, `"512m"`, or `"1.5g"`. + ```python import sedona.db @@ -38,53 +33,38 @@ sd = sedona.db.connect() sd.options.memory_limit = "4gb" ``` -Without a memory limit, SedonaDB uses an unbounded memory pool and operators -can use as much memory as needed (until the process hits system limits). In -this mode, operators typically won't spill to disk because there is no memory -budget to enforce. +Without a memory limit, SedonaDB uses an unbounded memory pool and operators can use as much memory as needed (until the process hits system limits). In this mode, operators typically won't spill to disk because there is no memory budget to enforce. -!!! note - All runtime options (`memory_limit`, `memory_pool_type`, `temp_dir`, - `unspillable_reserve_ratio`) must be set **before** the internal context - is initialized — that is, before the first call to `sd.sql(...)` or any - read method (for example, `sd.read_parquet(...)`). Once the internal - context is created, these options become read-only. +> **Note:** All runtime options (`memory_limit`, `memory_pool_type`, `temp_dir`, `unspillable_reserve_ratio`) must be set before the internal context is initialized. The internal context is created on the first call to `sd.sql(...)` (including `SET` statements) or any read method (for example, `sd.read_parquet(...)`) -- not when you call `.execute()` on the returned DataFrame. Once the internal context is created, these runtime options become read-only. ## Memory Pool Types -The `memory_pool_type` option controls how the memory budget is distributed -among concurrent operators. Two pool types are available: +The `memory_pool_type` option controls how the memory budget is distributed among concurrent operators. Two pool types are available: -- **`"greedy"`** -- Grants memory reservations on a first-come-first-served - basis. This is the default when no pool type is specified. Simple, but can - lead to memory reservation failures under pressure -- one consumer may - exhaust the pool before others get a chance to reserve memory. +- **`"greedy"`** -- Grants memory reservations on a first-come-first-served basis. This is the default when no pool type is specified. Simple, but can lead to memory reservation failures under pressure -- one consumer may exhaust the pool before others get a chance to reserve memory. +- **`"fair"` (recommended)** -- Distributes memory fairly among spillable consumers and reserves a fraction of the pool for unspillable consumers. More stable under memory pressure and significantly less likely to cause reservation failures, at the cost of slightly lower utilization of the total reserved memory. -- **`"fair"` (recommended)** -- Distributes memory fairly among spillable - consumers and reserves a fraction of the pool for unspillable consumers. - More stable under memory pressure and significantly less likely to cause - reservation failures, at the cost of slightly lower utilization of the total - reserved memory. +We recommend using `"fair"` whenever a memory limit is configured. -We recommend using `"fair"` whenever a memory limit is configured: ```python +import sedona.db + sd = sedona.db.connect() sd.options.memory_limit = "4gb" sd.options.memory_pool_type = "fair" ``` -!!! note - `memory_pool_type` only takes effect when `memory_limit` is set. +> **Note:** `memory_pool_type` only takes effect when `memory_limit` is set. ### Unspillable reserve ratio -When using the `"fair"` pool, the `unspillable_reserve_ratio` option controls -the fraction of the memory pool reserved for unspillable consumers (operators -that cannot spill their memory to disk). It accepts a float between `0.0` and -`1.0` and defaults to `0.2` (20%) when not explicitly set. +When using the `"fair"` pool, the `unspillable_reserve_ratio` option controls the fraction of the memory pool reserved for unspillable consumers (operators that cannot spill their memory to disk). It accepts a float between `0.0` and `1.0` and defaults to `0.2` (20%) when not explicitly set. + ```python +import sedona.db + sd = sedona.db.connect() sd.options.memory_limit = "8gb" sd.options.memory_pool_type = "fair" @@ -93,111 +73,137 @@ sd.options.unspillable_reserve_ratio = 0.3 # reserve 30% for unspillable consum ## Temporary Directory for Spill Files -By default, DataFusion uses the system temporary directory for spill files. You -can override this with `temp_dir` to control where spill data is written -- for -example, to point to a larger or faster disk: +By default, DataFusion uses the system temporary directory for spill files. You can override this with `temp_dir` to control where spill data is written -- for example, to point to a larger or faster disk. + ```python +import sedona.db + sd = sedona.db.connect() sd.options.memory_limit = "4gb" sd.options.memory_pool_type = "fair" sd.options.temp_dir = "/mnt/fast-ssd/sedona-spill" ``` -## Full Example +## Example: Spatial Join with Limited Memory + +This example performs a spatial join between Overture addresses (points) and Overture buildings (polygons) using `ST_Contains`. Spatial joins are one of the most common workloads that benefit from memory limits and spill-to-disk. + +> **Note:** Overture removes old releases. See https://docs.overturemaps.org/release-calendar/#current-release for the latest version number and replace the relevant portion of the URLs below. + ```python import sedona.db sd = sedona.db.connect() -# Cap execution memory at 4 GB +# Configure runtime options before any sd.sql(...) or sd.read_* call. sd.options.memory_limit = "4gb" - -# Use the fair pool for stable memory distribution (recommended) sd.options.memory_pool_type = "fair" - -# Reserve 20% of the pool for unspillable consumers (default) sd.options.unspillable_reserve_ratio = 0.2 - -# Write spill files to a dedicated directory sd.options.temp_dir = "/tmp/sedona-spill" -# Now execute queries -- options are frozen after the first query runs -# Example: configure DataFusion settings and then run your workload -sd.sql("SET datafusion.execution.spill_compression = 'lz4_frame'").execute() - -df = sd.sql(""" -SELECT a.id, b.id -FROM a -JOIN b - ON ST_Intersects(a.geom, b.geom) -""") +# Overture buildings metadata is large; increase the remote metadata cache. +sd.sql("SET datafusion.runtime.metadata_cache_limit = '900M'").execute() + +aws = {"aws.skip_signature": True, "aws.region": "us-west-2"} + +sd.read_parquet( + "s3://overturemaps-us-west-2/release/2026-02-18.0/theme=buildings/type=building/", + options=aws, +).to_view("buildings") + +sd.read_parquet( + "s3://overturemaps-us-west-2/release/2026-02-18.0/theme=addresses/type=address/", + options=aws, +).to_view("addresses") + +# Limit the scan to a region of interest using the bbox columns (helps avoid scanning the full tables). +west, south, east, north = -74.05, 40.68, -73.90, 40.88 + +sd.sql( + """ + WITH buildings_roi AS ( + SELECT id AS building_id, geometry AS building_geom + FROM buildings + WHERE bbox.xmin < $east + AND bbox.xmax > $west + AND bbox.ymin < $north + AND bbox.ymax > $south + ), + addresses_roi AS ( + SELECT id AS address_id, geometry AS address_geom + FROM addresses + WHERE bbox.xmin < $east + AND bbox.xmax > $west + AND bbox.ymin < $north + AND bbox.ymax > $south + ) + SELECT + a.address_id, + b.building_id + FROM addresses_roi a + JOIN buildings_roi b + ON ST_Contains(b.building_geom, a.address_geom) + """, + params={"west": west, "south": south, "east": east, "north": north}, +).show(10) ``` ## Operators Supporting Memory Limits -When a memory limit is configured, the following operators automatically spill -intermediate data to disk when they exceed their memory budget: +When a memory limit is configured, the following operators automatically spill intermediate data to disk when they exceed their memory budget. -In practice, this means memory limits and spilling can apply to both SedonaDB's -spatial operators and DataFusion's general-purpose operators used by common SQL -constructs: +In practice, this means memory limits and spilling can apply to both SedonaDB's spatial operators and DataFusion's general-purpose operators used by common SQL constructs. **SedonaDB:** -- **Spatial joins** -- Both the build-side (index construction, partition - collection) and probe-side (stream repartitioning) of SedonaDB's spatial - joins support memory-pressure-driven spilling. +- **Spatial joins** -- Both the build-side (index construction, partition collection) and probe-side (stream repartitioning) of SedonaDB's spatial joins support memory-pressure-driven spilling. **DataFusion (physical operators):** -This list is not exhaustive. Many other DataFusion physical operators and -execution strategies may allocate memory through the same runtime memory pool -and may spill to disk when memory limits are enforced. - -- **`ORDER BY` / sorted Top-K** (`SortExec`) -- External sort that - spills sorted runs to disk when memory is exhausted, then merges them. -- **Hash joins** (`HashJoinExec`) -- Hash join does not support spilling yet. - The query will fail with a memory reservation error if the hash table exceeds - the memory limit. -- **Sort-merge joins** (`SortMergeJoinExec`) -- Sort-merge join that spills - buffered batches to disk when the memory limit is exceeded. -- **`GROUP BY` aggregations** (`AggregateExec`) -- Grouped aggregation that - spills intermediate aggregation state to sorted spill files when memory is - exhausted. +This list is not exhaustive. Many other DataFusion physical operators and execution strategies may allocate memory through the same runtime memory pool and may spill to disk when memory limits are enforced. + +- **`ORDER BY` / sorted Top-K** (`SortExec`) -- External sort that spills sorted runs to disk when memory is exhausted, then merges them. +- **Hash joins** (`HashJoinExec`) -- Hash join does not support spilling yet. The query will fail with a memory reservation error if the hash table exceeds the memory limit. +- **Sort-merge joins** (`SortMergeJoinExec`) -- Sort-merge join that spills buffered batches to disk when the memory limit is exceeded. +- **`GROUP BY` aggregations** (`AggregateExec`) -- Grouped aggregation that spills intermediate aggregation state to sorted spill files when memory is exhausted. ## Advanced DataFusion Configurations -DataFusion provides additional execution configurations that affect spill -behavior. These can be set via SQL `SET` statements after connecting: +DataFusion provides additional execution configurations that affect spill behavior. These can be set via SQL `SET` statements after connecting. -!!! note - Calling `sd.sql("SET ...")` initializes the internal context and freezes - runtime options immediately, before `.execute()` is run. Configure - `sd.options.*` runtime options (like `memory_limit` and `temp_dir`) before - calling any `sd.sql(...)`, including `SET` statements. +> **Note:** Calling `sd.sql(...)` initializes the internal context immediately (including `sd.sql("SET ...")`) and freezes runtime options immediately. Configure `sd.options.*` runtime options (like `memory_limit` and `temp_dir`) before calling any `sd.sql(...)`, including `SET` statements. ### Spill compression -By default, data is written to spill files uncompressed. Enabling compression -reduces the amount of disk I/O and disk space used at the cost of additional -CPU work. This is beneficial when disk I/O throughput is low or when disk space -is not large enough to hold uncompressed spill data. +By default, data is written to spill files uncompressed. Enabling compression reduces the amount of disk I/O and disk space used at the cost of additional CPU work. This is beneficial when disk I/O throughput is low or when disk space is not large enough to hold uncompressed spill data. + ```python -# Enable LZ4 compression for spill files +import sedona.db + +sd = sedona.db.connect() +sd.options.memory_limit = "4gb" +sd.options.memory_pool_type = "fair" + +# Enable LZ4 compression for spill files. sd.sql("SET datafusion.execution.spill_compression = 'lz4_frame'").execute() ``` ### Maximum temporary directory size -DataFusion limits the total size of temporary spill files to prevent unbounded -disk usage. The default limit is **100 GB**. If your workload needs to spill -more data than this, increase the limit: +DataFusion limits the total size of temporary spill files to prevent unbounded disk usage. The default limit is **100 GB**. If your workload needs to spill more data than this, increase the limit. + ```python -# Increase the spill directory size limit to 500 GB +import sedona.db + +sd = sedona.db.connect() +sd.options.memory_limit = "4gb" +sd.options.memory_pool_type = "fair" + +# Increase the spill directory size limit to 500 GB. sd.sql("SET datafusion.runtime.max_temp_directory_size = '500gb'").execute() ``` @@ -205,21 +211,13 @@ sd.sql("SET datafusion.runtime.max_temp_directory_size = '500gb'").execute() ### Maximum number of open files -Large workloads that spill heavily can create a large number of temporary files. -During a spatial join, each parallel execution thread may create one spill file -per spatial partition. The total number of open spill files can therefore reach -**parallelism x number of spatial partitions**. For example, on an 8-CPU host -running a spatial join that produces 500 spatial partitions, up to -**8 x 500 = 4,000** spill files may be open simultaneously -- far exceeding the -default per-process file descriptor limit. +Large workloads that spill heavily can create a large number of temporary files. During a spatial join, each parallel execution thread may create one spill file per spatial partition. The total number of open spill files can therefore reach **parallelism x number of spatial partitions**. For example, on an 8-CPU host running a spatial join that produces 500 spatial partitions, up to **8 x 500 = 4,000** spill files may be open simultaneously -- far exceeding the default per-process file descriptor limit. -The operating system's per-process file descriptor limit must be high enough to -accommodate this, otherwise queries will fail with "too many open files" errors. +The operating system's per-process file descriptor limit must be high enough to accommodate this, otherwise queries will fail with "too many open files" errors. **Linux:** -The default limit is typically 1024, which is easily exceeded by spill-heavy -workloads like the example above. +The default limit is typically 1024, which is easily exceeded by spill-heavy workloads like the example above. To raise the limit permanently, add the following to `/etc/security/limits.conf`: @@ -240,6 +238,4 @@ ulimit -n ulimit -n 65535 ``` -This affects the current shell session. Persistent/system-wide limits are OS -and configuration dependent; consult your macOS configuration and documentation -if you need to raise the hard limit. +This affects the current shell session. Persistent/system-wide limits are OS and configuration dependent; consult your macOS configuration and documentation if you need to raise the hard limit. From b91357ace505de01856a0d07ca62d4db1f374c9a Mon Sep 17 00:00:00 2001 From: Kristin Cowalcijk Date: Wed, 4 Mar 2026 22:41:09 +0800 Subject: [PATCH 7/7] Refined the doc and make the example runnable and can finish quickly --- docs/memory-management.ipynb | 100 ++++++++++++++++++----------------- docs/memory-management.md | 87 +++++++++++++++--------------- 2 files changed, 95 insertions(+), 92 deletions(-) diff --git a/docs/memory-management.ipynb b/docs/memory-management.ipynb index 8a4f94e53..0c338d4c9 100644 --- a/docs/memory-management.ipynb +++ b/docs/memory-management.ipynb @@ -156,17 +156,46 @@ "source": [ "## Example: Spatial Join with Limited Memory\n", "\n", - "This example performs a spatial join between Overture addresses (points) and Overture buildings (polygons) using `ST_Contains`. Spatial joins are one of the most common workloads that benefit from memory limits and spill-to-disk.\n", - "\n", - "> **Note:** Overture removes old releases. See https://docs.overturemaps.org/release-calendar/#current-release for the latest version number and replace the relevant portion of the URLs below." + "This example performs a spatial join between Natural Earth cities (points) and Natural Earth countries (polygons) using `ST_Contains`. Spatial joins are one of the most common workloads that benefit from memory limits and spill-to-disk." ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 21, "id": "1ed77d58", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "┌───────────────┬─────────────────────────────┐\n", + "│ city_name ┆ country_name │\n", + "│ utf8 ┆ utf8 │\n", + "╞═══════════════╪═════════════════════════════╡\n", + "│ Suva ┆ Fiji │\n", + "├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n", + "│ Dodoma ┆ United Republic of Tanzania │\n", + "├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n", + "│ Dar es Salaam ┆ United Republic of Tanzania │\n", + "├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n", + "│ Bir Lehlou ┆ Western Sahara │\n", + "├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n", + "│ Ottawa ┆ Canada │\n", + "├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n", + "│ Vancouver ┆ Canada │\n", + "├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n", + "│ Toronto ┆ Canada │\n", + "├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n", + "│ San Francisco ┆ United States of America │\n", + "├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n", + "│ Denver ┆ United States of America │\n", + "├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n", + "│ Houston ┆ United States of America │\n", + "└───────────────┴─────────────────────────────┘\n" + ] + } + ], "source": [ "import sedona.db\n", "\n", @@ -178,50 +207,25 @@ "sd.options.unspillable_reserve_ratio = 0.2\n", "sd.options.temp_dir = \"/tmp/sedona-spill\"\n", "\n", - "# Overture buildings metadata is large; increase the remote metadata cache.\n", - "sd.sql(\"SET datafusion.runtime.metadata_cache_limit = '900M'\").execute()\n", - "\n", - "aws = {\"aws.skip_signature\": True, \"aws.region\": \"us-west-2\"}\n", + "cities = sd.read_parquet(\n", + " \"https://raw.githubusercontent.com/geoarrow/geoarrow-data/v0.2.0/natural-earth/files/natural-earth_cities_geo.parquet\"\n", + ")\n", + "cities.to_view(\"cities\", overwrite=True)\n", "\n", - "sd.read_parquet(\n", - " \"s3://overturemaps-us-west-2/release/2026-02-18.0/theme=buildings/type=building/\",\n", - " options=aws,\n", - ").to_view(\"buildings\")\n", - "\n", - "sd.read_parquet(\n", - " \"s3://overturemaps-us-west-2/release/2026-02-18.0/theme=addresses/type=address/\",\n", - " options=aws,\n", - ").to_view(\"addresses\")\n", - "\n", - "# Limit the scan to a region of interest using the bbox columns (helps avoid scanning the full tables).\n", - "west, south, east, north = -74.05, 40.68, -73.90, 40.88\n", + "countries = sd.read_parquet(\n", + " \"https://raw.githubusercontent.com/geoarrow/geoarrow-data/v0.2.0/natural-earth/files/natural-earth_countries_geo.parquet\"\n", + ")\n", + "countries.to_view(\"countries\", overwrite=True)\n", "\n", "sd.sql(\n", " \"\"\"\n", - " WITH buildings_roi AS (\n", - " SELECT id AS building_id, geometry AS building_geom\n", - " FROM buildings\n", - " WHERE bbox.xmin < $east\n", - " AND bbox.xmax > $west\n", - " AND bbox.ymin < $north\n", - " AND bbox.ymax > $south\n", - " ),\n", - " addresses_roi AS (\n", - " SELECT id AS address_id, geometry AS address_geom\n", - " FROM addresses\n", - " WHERE bbox.xmin < $east\n", - " AND bbox.xmax > $west\n", - " AND bbox.ymin < $north\n", - " AND bbox.ymax > $south\n", - " )\n", " SELECT\n", - " a.address_id,\n", - " b.building_id\n", - " FROM addresses_roi a\n", - " JOIN buildings_roi b\n", - " ON ST_Contains(b.building_geom, a.address_geom)\n", - " \"\"\",\n", - " params={\"west\": west, \"south\": south, \"east\": east, \"north\": north},\n", + " cities.name city_name,\n", + " countries.name country_name\n", + " FROM cities\n", + " JOIN countries\n", + " ON ST_Contains(countries.geometry, cities.geometry)\n", + " \"\"\"\n", ").show(10)" ] }, @@ -296,7 +300,7 @@ "source": [ "### Maximum temporary directory size\n", "\n", - "DataFusion limits the total size of temporary spill files to prevent unbounded disk usage. The default limit is **100 GB**. If your workload needs to spill more data than this, increase the limit." + "DataFusion limits the total size of temporary spill files to prevent unbounded disk usage. The default limit is **100 G**. If your workload needs to spill more data than this, increase the limit." ] }, { @@ -313,7 +317,7 @@ "sd.options.memory_pool_type = \"fair\"\n", "\n", "# Increase the spill directory size limit to 500 GB.\n", - "sd.sql(\"SET datafusion.runtime.max_temp_directory_size = '500gb'\").execute()" + "sd.sql(\"SET datafusion.runtime.max_temp_directory_size = '500G'\").execute()" ] }, { @@ -358,7 +362,7 @@ ], "metadata": { "kernelspec": { - "display_name": "python3 (ipykernel)", + "display_name": "sedonadb", "language": "python", "name": "python3" }, @@ -372,7 +376,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.13.3" + "version": "3.13.4" } }, "nbformat": 4, diff --git a/docs/memory-management.md b/docs/memory-management.md index 4b485a011..0318e8070 100644 --- a/docs/memory-management.md +++ b/docs/memory-management.md @@ -87,9 +87,7 @@ sd.options.temp_dir = "/mnt/fast-ssd/sedona-spill" ## Example: Spatial Join with Limited Memory -This example performs a spatial join between Overture addresses (points) and Overture buildings (polygons) using `ST_Contains`. Spatial joins are one of the most common workloads that benefit from memory limits and spill-to-disk. - -> **Note:** Overture removes old releases. See https://docs.overturemaps.org/release-calendar/#current-release for the latest version number and replace the relevant portion of the URLs below. +This example performs a spatial join between Natural Earth cities (points) and Natural Earth countries (polygons) using `ST_Contains`. Spatial joins are one of the most common workloads that benefit from memory limits and spill-to-disk. ```python @@ -103,53 +101,54 @@ sd.options.memory_pool_type = "fair" sd.options.unspillable_reserve_ratio = 0.2 sd.options.temp_dir = "/tmp/sedona-spill" -# Overture buildings metadata is large; increase the remote metadata cache. -sd.sql("SET datafusion.runtime.metadata_cache_limit = '900M'").execute() - -aws = {"aws.skip_signature": True, "aws.region": "us-west-2"} - -sd.read_parquet( - "s3://overturemaps-us-west-2/release/2026-02-18.0/theme=buildings/type=building/", - options=aws, -).to_view("buildings") - -sd.read_parquet( - "s3://overturemaps-us-west-2/release/2026-02-18.0/theme=addresses/type=address/", - options=aws, -).to_view("addresses") +cities = sd.read_parquet( + "https://raw.githubusercontent.com/geoarrow/geoarrow-data/v0.2.0/natural-earth/files/natural-earth_cities_geo.parquet" +) +cities.to_view("cities", overwrite=True) -# Limit the scan to a region of interest using the bbox columns (helps avoid scanning the full tables). -west, south, east, north = -74.05, 40.68, -73.90, 40.88 +countries = sd.read_parquet( + "https://raw.githubusercontent.com/geoarrow/geoarrow-data/v0.2.0/natural-earth/files/natural-earth_countries_geo.parquet" +) +countries.to_view("countries", overwrite=True) sd.sql( """ - WITH buildings_roi AS ( - SELECT id AS building_id, geometry AS building_geom - FROM buildings - WHERE bbox.xmin < $east - AND bbox.xmax > $west - AND bbox.ymin < $north - AND bbox.ymax > $south - ), - addresses_roi AS ( - SELECT id AS address_id, geometry AS address_geom - FROM addresses - WHERE bbox.xmin < $east - AND bbox.xmax > $west - AND bbox.ymin < $north - AND bbox.ymax > $south - ) SELECT - a.address_id, - b.building_id - FROM addresses_roi a - JOIN buildings_roi b - ON ST_Contains(b.building_geom, a.address_geom) - """, - params={"west": west, "south": south, "east": east, "north": north}, + cities.name city_name, + countries.name country_name + FROM cities + JOIN countries + ON ST_Contains(countries.geometry, cities.geometry) + """ ).show(10) ``` + ┌───────────────┬─────────────────────────────┐ + │ city_name ┆ country_name │ + │ utf8 ┆ utf8 │ + ╞═══════════════╪═════════════════════════════╡ + │ Suva ┆ Fiji │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ Dodoma ┆ United Republic of Tanzania │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ Dar es Salaam ┆ United Republic of Tanzania │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ Bir Lehlou ┆ Western Sahara │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ Ottawa ┆ Canada │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ Vancouver ┆ Canada │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ Toronto ┆ Canada │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ San Francisco ┆ United States of America │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ Denver ┆ United States of America │ + ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ + │ Houston ┆ United States of America │ + └───────────────┴─────────────────────────────┘ + + ## Operators Supporting Memory Limits When a memory limit is configured, the following operators automatically spill intermediate data to disk when they exceed their memory budget. @@ -193,7 +192,7 @@ sd.sql("SET datafusion.execution.spill_compression = 'lz4_frame'").execute() ### Maximum temporary directory size -DataFusion limits the total size of temporary spill files to prevent unbounded disk usage. The default limit is **100 GB**. If your workload needs to spill more data than this, increase the limit. +DataFusion limits the total size of temporary spill files to prevent unbounded disk usage. The default limit is **100 G**. If your workload needs to spill more data than this, increase the limit. ```python @@ -204,7 +203,7 @@ sd.options.memory_limit = "4gb" sd.options.memory_pool_type = "fair" # Increase the spill directory size limit to 500 GB. -sd.sql("SET datafusion.runtime.max_temp_directory_size = '500gb'").execute() +sd.sql("SET datafusion.runtime.max_temp_directory_size = '500G'").execute() ``` ## System Configuration