Skip to content

Commit 0ac4d2f

Browse files
Merge pull request #218 from stac-utils/readme-update
Add notes to help new Users get started/ update data loader tool
2 parents 987f0b9 + 47b77e5 commit 0ac4d2f

File tree

7 files changed

+145
-80
lines changed

7 files changed

+145
-80
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
1515
### Changed
1616

1717
- Updated the pip_docker example to use stac-fastapi.elasticsearch 2.1.0 and the elasticsearch 8.11.0 docker image. [#216](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/216)
18+
- Updated the Data Loader CLI tool to accept a base_url, a data directory, a custom collection id, and an option to use bulk insert. [#218](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/218)
1819

1920
### Fixed
2021

Makefile

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,3 @@ install-es: pybase-install
104104
.PHONY: install-os
105105
install-os: pybase-install
106106
pip install -e ./stac_fastapi/opensearch[dev,server]
107-
108-
.PHONY: ingest
109-
ingest:
110-
python3 data_loader/data_loader.py

README.md

Lines changed: 22 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66

77
- Our Api core library can be used to create custom backends. See [stac-fastapi-mongo](https://github.com/Healy-Hyperspatial/stac-fastapi-mongo) for a working example.
88
- Reach out on our [Gitter](https://app.gitter.im/#/room/#stac-fastapi-elasticsearch_community:gitter.im) channel or feel free to add to our [Discussions](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/discussions) page here on github.
9+
- There is [Postman](https://documenter.getpostman.com/view/12888943/2s8ZDSdRHA) documentation here for examples on how to run some of the API routes locally - after starting the elasticsearch backend via the docker-compose.yml file.
10+
- The `/examples` folder shows an example of running stac-fastapi-elasticsearch from PyPI in docker without needing any code from the repository. There is also a Postman collection here that you can load into Postman for testing the API routes.
911

1012
### To install from PyPI:
1113

@@ -91,6 +93,26 @@ get the next page of results.
9193
curl -X "GET" "http://localhost:8080/collections?limit=1&token=example_token"
9294
```
9395

96+
## Ingesting Sample Data CLI Tool
97+
98+
```shell
99+
Usage: data_loader.py [OPTIONS]
100+
101+
Load STAC items into the database.
102+
103+
Options:
104+
--base-url TEXT Base URL of the STAC API [required]
105+
--collection-id TEXT ID of the collection to which items are added
106+
--use-bulk Use bulk insert method for items
107+
--data-dir PATH Directory containing collection.json and feature
108+
collection file
109+
--help Show this message and exit.
110+
```
111+
112+
```shell
113+
python3 data_loader.py --base-url http://localhost:8080
114+
```
115+
94116
## Testing
95117

96118
```shell
@@ -108,12 +130,6 @@ Test against Elasticsearch only
108130
make test-elasticsearch
109131
```
110132

111-
## Ingest sample data
112-
113-
```shell
114-
make ingest
115-
```
116-
117133
## Elasticsearch Mappings
118134

119135
Mappings apply to search index, not source. The mappings are stored in index templates on application startup.

data_loader.py

Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
"""Data Loader CLI STAC_API Ingestion Tool."""
2+
import json
3+
import os
4+
5+
import click
6+
import requests
7+
8+
9+
def load_data(data_dir, filename):
10+
"""Load json data from a file within the specified data directory."""
11+
filepath = os.path.join(data_dir, filename)
12+
if not os.path.exists(filepath):
13+
click.secho(f"File not found: {filepath}", fg="red", err=True)
14+
raise click.Abort()
15+
with open(filepath) as file:
16+
return json.load(file)
17+
18+
19+
def load_collection(base_url, collection_id, data_dir):
20+
"""Load a STAC collection into the database."""
21+
collection = load_data(data_dir, "collection.json")
22+
collection["id"] = collection_id
23+
try:
24+
resp = requests.post(f"{base_url}/collections", json=collection)
25+
if resp.status_code == 200:
26+
click.echo(f"Status code: {resp.status_code}")
27+
click.echo(f"Added collection: {collection['id']}")
28+
elif resp.status_code == 409:
29+
click.echo(f"Status code: {resp.status_code}")
30+
click.echo(f"Collection: {collection['id']} already exists")
31+
except requests.ConnectionError:
32+
click.secho("Failed to connect", fg="red", err=True)
33+
34+
35+
def load_items(base_url, collection_id, use_bulk, data_dir):
36+
"""Load STAC items into the database based on the method selected."""
37+
# Attempt to dynamically find a suitable feature collection file
38+
feature_files = [
39+
file
40+
for file in os.listdir(data_dir)
41+
if file.endswith(".json") and file != "collection.json"
42+
]
43+
if not feature_files:
44+
click.secho(
45+
"No feature collection files found in the specified directory.",
46+
fg="red",
47+
err=True,
48+
)
49+
raise click.Abort()
50+
feature_collection_file = feature_files[
51+
0
52+
] # Use the first found feature collection file
53+
feature_collection = load_data(data_dir, feature_collection_file)
54+
55+
load_collection(base_url, collection_id, data_dir)
56+
if use_bulk:
57+
load_items_bulk_insert(base_url, collection_id, feature_collection, data_dir)
58+
else:
59+
load_items_one_by_one(base_url, collection_id, feature_collection, data_dir)
60+
61+
62+
def load_items_one_by_one(base_url, collection_id, feature_collection, data_dir):
63+
"""Load STAC items into the database one by one."""
64+
for feature in feature_collection["features"]:
65+
try:
66+
feature["collection"] = collection_id
67+
resp = requests.post(
68+
f"{base_url}/collections/{collection_id}/items", json=feature
69+
)
70+
if resp.status_code == 200:
71+
click.echo(f"Status code: {resp.status_code}")
72+
click.echo(f"Added item: {feature['id']}")
73+
elif resp.status_code == 409:
74+
click.echo(f"Status code: {resp.status_code}")
75+
click.echo(f"Item: {feature['id']} already exists")
76+
except requests.ConnectionError:
77+
click.secho("Failed to connect", fg="red", err=True)
78+
79+
80+
def load_items_bulk_insert(base_url, collection_id, feature_collection, data_dir):
81+
"""Load STAC items into the database via bulk insert."""
82+
try:
83+
for i, _ in enumerate(feature_collection["features"]):
84+
feature_collection["features"][i]["collection"] = collection_id
85+
resp = requests.post(
86+
f"{base_url}/collections/{collection_id}/items", json=feature_collection
87+
)
88+
if resp.status_code == 200:
89+
click.echo(f"Status code: {resp.status_code}")
90+
click.echo("Bulk inserted items successfully.")
91+
elif resp.status_code == 204:
92+
click.echo(f"Status code: {resp.status_code}")
93+
click.echo("Bulk update successful, no content returned.")
94+
elif resp.status_code == 409:
95+
click.echo(f"Status code: {resp.status_code}")
96+
click.echo("Conflict detected, some items might already exist.")
97+
except requests.ConnectionError:
98+
click.secho("Failed to connect", fg="red", err=True)
99+
100+
101+
@click.command()
102+
@click.option("--base-url", required=True, help="Base URL of the STAC API")
103+
@click.option(
104+
"--collection-id",
105+
default="test-collection",
106+
help="ID of the collection to which items are added",
107+
)
108+
@click.option("--use-bulk", is_flag=True, help="Use bulk insert method for items")
109+
@click.option(
110+
"--data-dir",
111+
type=click.Path(exists=True),
112+
default="sample_data/",
113+
help="Directory containing collection.json and feature collection file",
114+
)
115+
def main(base_url, collection_id, use_bulk, data_dir):
116+
"""Load STAC items into the database."""
117+
load_items(base_url, collection_id, use_bulk, data_dir)
118+
119+
120+
if __name__ == "__main__":
121+
main()

data_loader/data_loader.py

Lines changed: 0 additions & 69 deletions
This file was deleted.

data_loader/setup_data/collection.json renamed to sample_data/collection.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
{
2-
"id":"sentinel-s2-l2a-cogs",
2+
"id":"sentinel-s2-l2a-cogs-test",
33
"stac_version":"1.0.0",
44
"description":"Sentinel-2a and Sentinel-2b imagery, processed to Level 2A (Surface Reflectance) and converted to Cloud-Optimized GeoTIFFs",
55
"links":[

0 commit comments

Comments
 (0)