Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add geoparquet output to /items and /search endpoints #29

Draft
wants to merge 3 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions runtimes/eoapi/stac/eoapi/stac/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,7 @@ def register_get_item_collection(self):
MimeTypes.html.value: {},
MimeTypes.csv.value: {},
MimeTypes.geojsonseq.value: {},
MimeTypes.parquet.value: {},
},
"model": api.ItemCollection,
},
Expand Down Expand Up @@ -191,6 +192,7 @@ def register_get_search(self):
MimeTypes.html.value: {},
MimeTypes.csv.value: {},
MimeTypes.geojsonseq.value: {},
MimeTypes.parquet.value: {},
},
"model": api.ItemCollection,
},
Expand Down Expand Up @@ -222,6 +224,7 @@ def register_post_search(self):
MimeTypes.geojson.value: {},
MimeTypes.csv.value: {},
MimeTypes.geojsonseq.value: {},
MimeTypes.parquet.value: {},
},
"model": api.ItemCollection,
},
Expand Down
57 changes: 54 additions & 3 deletions runtimes/eoapi/stac/eoapi/stac/client.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
"""eoapi-devseed: Custom pgstac client."""

import csv
import os
import re
import tempfile
from typing import (
Any,
Dict,
Expand All @@ -18,6 +20,7 @@
import attr
import jinja2
import orjson
import stacrs
from fastapi import Request
from geojson_pydantic.geometries import parse_geometry_obj
from stac_fastapi.api.models import JSONResponse
Expand All @@ -37,14 +40,14 @@
)
from stac_pydantic.links import Relations
from stac_pydantic.shared import BBox, MimeTypes
from starlette.responses import StreamingResponse
from starlette.responses import Response, StreamingResponse
from starlette.templating import Jinja2Templates, _TemplateResponse

ResponseType = Literal["json", "html"]
GeoResponseType = Literal["geojson", "html"]
QueryablesResponseType = Literal["jsonschema", "html"]
GeoMultiResponseType = Literal["geojson", "html", "geojsonseq", "csv"]
PostMultiResponseType = Literal["geojson", "geojsonseq", "csv"]
GeoMultiResponseType = Literal["geojson", "html", "geojsonseq", "csv", "parquet"]
PostMultiResponseType = Literal["geojson", "geojsonseq", "csv", "parquet"]


jinja2_env = jinja2.Environment(
Expand Down Expand Up @@ -206,6 +209,24 @@ def items_to_csv_rows(items: Iterable[Dict]) -> Generator[str, None, None]:
return _create_csv_rows(rows)


def create_parquet(items: Dict) -> bytes:
"""Create parquet binary body."""
fp = tempfile.NamedTemporaryFile(suffix=".parquet", delete=False)
fp.close()

content = b""

try:
stacrs.write(fp.name, items)
with open(fp.name, "rb") as f:
content = f.read()

finally:
os.remove(fp.name)

return content


@attr.s
class FiltersClient(PgSTACFiltersClient):
async def get_queryables(
Expand Down Expand Up @@ -529,6 +550,16 @@ async def item_collection(
},
)

elif output_type == MimeTypes.parquet:
return Response(
create_parquet(item_collection),
media_type=MimeTypes.parquet,
headers={
"Content-Disposition": "attachment;filename=items.parquet",
**additional_headers,
},
)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Q: what happens when we're using the fields extension? the feature collection won't be valid STAC items ...

Copy link

@gadomski gadomski Feb 19, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No issues from the stacrs side we don't do any validation. I think if you rip out the geometry it might blow up, though. Now that I think about it more, I think it should be fine even w/o geometry ... we should try though.


# If we have the `fields` extension enabled
# we need to avoid Pydantic validation because the
# Items might not be a valid STAC Item objects
Expand Down Expand Up @@ -671,6 +702,16 @@ async def get_search(
},
)

elif output_type == MimeTypes.parquet:
return Response(
create_parquet(item_collection),
media_type=MimeTypes.parquet,
headers={
"Content-Disposition": "attachment;filename=items.parquet",
**additional_headers,
},
)

if fields := getattr(search_request, "fields", None):
if fields.include or fields.exclude:
return JSONResponse(item_collection) # type: ignore
Expand Down Expand Up @@ -726,6 +767,16 @@ async def post_search(
},
)

elif output_type == MimeTypes.parquet:
return Response(
create_parquet(item_collection),
media_type=MimeTypes.parquet,
headers={
"Content-Disposition": "attachment;filename=items.parquet",
**additional_headers,
},
)

if fields := getattr(search_request, "fields", None):
if fields.include or fields.exclude:
return JSONResponse(item_collection) # type: ignore
Expand Down
2 changes: 1 addition & 1 deletion runtimes/eoapi/stac/eoapi/stac/extensions.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ class HTMLorGeoGetRequestMulti(APIRequest):
"""HTML, GeoJSON, GeoJSONSeq or CSV output."""

f: Annotated[
Optional[Literal["geojson", "html", "csv", "geojsonseq"]],
Optional[Literal["geojson", "html", "csv", "geojsonseq", "parquet"]],
Query(description="Response MediaType."),
] = attr.ib(default=None)

Expand Down
1 change: 1 addition & 0 deletions runtimes/eoapi/stac/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ classifiers = [
dynamic = ["version"]
dependencies = [
"stac-fastapi.pgstac>=4.0.2,<4.1",
"stacrs",
"jinja2>=2.11.2,<4.0.0",
"starlette-cramjam>=0.4,<0.5",
"psycopg_pool",
Expand Down