Skip to content

Commit b5ec2b0

Browse files
committed
Merge branch 'main' into fix-get-sort
2 parents 6674fc2 + 8b12370 commit b5ec2b0

File tree

11 files changed

+311
-72
lines changed

11 files changed

+311
-72
lines changed

.github/workflows/cicd.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ jobs:
1616
services:
1717

1818
elasticsearch_8_svc:
19-
image: docker.elastic.co/elasticsearch/elasticsearch:8.1.3
19+
image: docker.elastic.co/elasticsearch/elasticsearch:8.10.4
2020
env:
2121
cluster.name: stac-cluster
2222
node.name: es01

CHANGELOG.md

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,12 +10,17 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
1010
### Added
1111

1212
- Collection-level Assets to the CollectionSerializer [#148](https://github.com/stac-utils/stac-fastapi-elasticsearch/issues/148)
13-
14-
### Added
15-
13+
- Pagination for /collections - GET all collections - route [#164](https://github.com/stac-utils/stac-fastapi-elasticsearch/pull/164)
1614
- Examples folder with example docker setup for running sfes from pip [#147](https://github.com/stac-utils/stac-fastapi-elasticsearch/pull/147)
15+
- GET /search filter extension queries [#163](https://github.com/stac-utils/stac-fastapi-elasticsearch/pull/163)
16+
- Added support for GET /search intersection queries [#158](https://github.com/stac-utils/stac-fastapi-elasticsearch/issues/158)
1717

1818
### Changed
19+
20+
- Update elasticsearch version from 8.1.3 to 8.10.4 in cicd, gh actions [#164](https://github.com/stac-utils/stac-fastapi-elasticsearch/pull/164)
21+
- Updated core stac-fastapi libraries to 2.4.8 from 2.4.3 [#151](https://github.com/stac-utils/stac-fastapi-elasticsearch/pull/151)
22+
- Use aliases on Elasticsearch indices, add number suffix in index name. [#152](https://github.com/stac-utils/stac-fastapi-elasticsearch/pull/152)
23+
1924
### Fixed
2025

2126
- Corrected the closing of client connections in ES index management functions [#132](https://github.com/stac-utils/stac-fastapi-elasticsearch/issues/132)

README.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,17 @@ curl -X "POST" "http://localhost:8080/collections" \
6161
```
6262

6363
Note: this "Collections Transaction" behavior is not part of the STAC API, but may be soon.
64+
65+
66+
## Collection pagination
67+
68+
The collections route handles optional `limit` and `token` parameters. The `links` field that is
69+
returned from the `/collections` route contains a `next` link with the token that can be used to
70+
get the next page of results.
6471

72+
```shell
73+
curl -X "GET" "http://localhost:8080/collections?limit=1&token=example_token"
74+
```
6575

6676
## Testing
6777

docker-compose.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ services:
3131

3232
elasticsearch:
3333
container_name: es-container
34-
image: docker.elastic.co/elasticsearch/elasticsearch:${ELASTICSEARCH_VERSION:-8.1.3}
34+
image: docker.elastic.co/elasticsearch/elasticsearch:${ELASTICSEARCH_VERSION:-8.10.4}
3535
environment:
3636
ES_JAVA_OPTS: -Xms512m -Xmx1g
3737
volumes:

stac_fastapi/elasticsearch/setup.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
"elasticsearch-dsl==7.4.1",
1818
"pystac[validation]",
1919
"uvicorn",
20+
"orjson",
2021
"overrides",
2122
"starlette",
2223
"geojson-pydantic",

stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/core.py

Lines changed: 77 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,22 @@
11
"""Item crud client."""
2-
import json
32
import logging
3+
import re
4+
from base64 import urlsafe_b64encode
45
from datetime import datetime as datetime_type
56
from datetime import timezone
67
from typing import Any, Dict, List, Optional, Set, Type, Union
7-
from urllib.parse import urljoin
8+
from urllib.parse import unquote_plus, urljoin
89

910
import attr
11+
import orjson
1012
import stac_pydantic
11-
from fastapi import HTTPException
13+
from fastapi import HTTPException, Request
1214
from overrides import overrides
1315
from pydantic import ValidationError
16+
from pygeofilter.backends.cql2_json import to_cql2
17+
from pygeofilter.parsers.cql2_text import parse as parse_cql2_text
1418
from stac_pydantic.links import Relations
1519
from stac_pydantic.shared import MimeTypes
16-
from starlette.requests import Request
1720

1821
from stac_fastapi.elasticsearch import serializers
1922
from stac_fastapi.elasticsearch.config import ElasticsearchSettings
@@ -80,30 +83,58 @@ async def all_collections(self, **kwargs) -> Collections:
8083
Raises:
8184
Exception: If any error occurs while reading the collections from the database.
8285
"""
86+
request: Request = kwargs["request"]
8387
base_url = str(kwargs["request"].base_url)
8488

89+
limit = (
90+
int(request.query_params["limit"])
91+
if "limit" in request.query_params
92+
else 10
93+
)
94+
token = (
95+
request.query_params["token"] if "token" in request.query_params else None
96+
)
97+
98+
hits = await self.database.get_all_collections(limit=limit, token=token)
99+
100+
next_search_after = None
101+
next_link = None
102+
if len(hits) == limit:
103+
last_hit = hits[-1]
104+
next_search_after = last_hit["sort"]
105+
next_token = urlsafe_b64encode(
106+
",".join(map(str, next_search_after)).encode()
107+
).decode()
108+
paging_links = PagingLinks(next=next_token, request=request)
109+
next_link = paging_links.link_next()
110+
111+
links = [
112+
{
113+
"rel": Relations.root.value,
114+
"type": MimeTypes.json,
115+
"href": base_url,
116+
},
117+
{
118+
"rel": Relations.parent.value,
119+
"type": MimeTypes.json,
120+
"href": base_url,
121+
},
122+
{
123+
"rel": Relations.self.value,
124+
"type": MimeTypes.json,
125+
"href": urljoin(base_url, "collections"),
126+
},
127+
]
128+
129+
if next_link:
130+
links.append(next_link)
131+
85132
return Collections(
86133
collections=[
87-
self.collection_serializer.db_to_stac(c, base_url=base_url)
88-
for c in await self.database.get_all_collections()
89-
],
90-
links=[
91-
{
92-
"rel": Relations.root.value,
93-
"type": MimeTypes.json,
94-
"href": base_url,
95-
},
96-
{
97-
"rel": Relations.parent.value,
98-
"type": MimeTypes.json,
99-
"href": base_url,
100-
},
101-
{
102-
"rel": Relations.self.value,
103-
"type": MimeTypes.json,
104-
"href": urljoin(base_url, "collections"),
105-
},
134+
self.collection_serializer.db_to_stac(c["_source"], base_url=base_url)
135+
for c in hits
106136
],
137+
links=links,
107138
)
108139

109140
@overrides
@@ -274,9 +305,9 @@ def _return_date(interval_str):
274305

275306
return {"lte": end_date, "gte": start_date}
276307

277-
@overrides
278308
async def get_search(
279309
self,
310+
request: Request,
280311
collections: Optional[List[str]] = None,
281312
ids: Optional[List[str]] = None,
282313
bbox: Optional[List[NumType]] = None,
@@ -287,8 +318,8 @@ async def get_search(
287318
fields: Optional[List[str]] = None,
288319
sortby: Optional[str] = None,
289320
intersects: Optional[str] = None,
290-
# filter: Optional[str] = None, # todo: requires fastapi > 2.3 unreleased
291-
# filter_lang: Optional[str] = None, # todo: requires fastapi > 2.3 unreleased
321+
filter: Optional[str] = None,
322+
filter_lang: Optional[str] = None,
292323
**kwargs,
293324
) -> ItemCollection:
294325
"""Get search results from the database.
@@ -318,17 +349,24 @@ async def get_search(
318349
"bbox": bbox,
319350
"limit": limit,
320351
"token": token,
321-
"query": json.loads(query) if query else query,
352+
"query": orjson.loads(query) if query else query,
322353
}
323354

355+
# this is borrowed from stac-fastapi-pgstac
356+
# Kludgy fix because using factory does not allow alias for filter-lan
357+
query_params = str(request.query_params)
358+
if filter_lang is None:
359+
match = re.search(r"filter-lang=([a-z0-9-]+)", query_params, re.IGNORECASE)
360+
if match:
361+
filter_lang = match.group(1)
362+
324363
if datetime:
325364
base_args["datetime"] = datetime
326365

327366
if intersects:
328-
base_args["intersects"] = intersects
367+
base_args["intersects"] = orjson.loads(unquote_plus(intersects))
329368

330369
if sortby:
331-
# https://github.com/radiantearth/stac-spec/tree/master/api-spec/extensions/sort#http-get-or-post-form
332370
sort_param = []
333371
for sort in sortby:
334372
sort_param.append(
@@ -340,12 +378,13 @@ async def get_search(
340378
print(sort_param)
341379
base_args["sortby"] = sort_param
342380

343-
# todo: requires fastapi > 2.3 unreleased
344-
# if filter:
345-
# if filter_lang == "cql2-text":
346-
# base_args["filter-lang"] = "cql2-json"
347-
# base_args["filter"] = orjson.loads(to_cql2(parse_cql2_text(filter)))
348-
# print(f'>>> {base_args["filter"]}')
381+
if filter:
382+
if filter_lang == "cql2-text":
383+
base_args["filter-lang"] = "cql2-json"
384+
base_args["filter"] = orjson.loads(to_cql2(parse_cql2_text(filter)))
385+
else:
386+
base_args["filter-lang"] = "cql2-json"
387+
base_args["filter"] = orjson.loads(unquote_plus(filter))
349388

350389
if fields:
351390
includes = set()
@@ -364,13 +403,12 @@ async def get_search(
364403
search_request = self.post_request_model(**base_args)
365404
except ValidationError:
366405
raise HTTPException(status_code=400, detail="Invalid parameters provided")
367-
resp = await self.post_search(search_request, request=kwargs["request"])
406+
resp = await self.post_search(search_request=search_request, request=request)
368407

369408
return resp
370409

371-
@overrides
372410
async def post_search(
373-
self, search_request: BaseSearchPostRequest, **kwargs
411+
self, search_request: BaseSearchPostRequest, request: Request
374412
) -> ItemCollection:
375413
"""
376414
Perform a POST search on the catalog.
@@ -385,7 +423,6 @@ async def post_search(
385423
Raises:
386424
HTTPException: If there is an error with the cql2_json filter.
387425
"""
388-
request: Request = kwargs["request"]
389426
base_url = str(request.base_url)
390427

391428
search = self.database.make_search()
@@ -472,7 +509,7 @@ async def post_search(
472509
filter_kwargs = search_request.fields.filter_fields
473510

474511
items = [
475-
json.loads(stac_pydantic.Item(**feat).json(**filter_kwargs))
512+
orjson.loads(stac_pydantic.Item(**feat).json(**filter_kwargs))
476513
for feat in items
477514
]
478515

stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py

Lines changed: 40 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@
3939
":",
4040
}
4141

42-
DEFAULT_INDICES = f"*,-*kibana*,-{COLLECTIONS_INDEX}"
42+
ITEM_INDICES = f"{ITEMS_INDEX_PREFIX}*,-*kibana*,-{COLLECTIONS_INDEX}*"
4343

4444
DEFAULT_SORT = {
4545
"properties.datetime": {"order": "desc"},
@@ -164,7 +164,7 @@ def indices(collection_ids: Optional[List[str]]) -> str:
164164
A string of comma-separated index names. If `collection_ids` is None, returns the default indices.
165165
"""
166166
if collection_ids is None:
167-
return DEFAULT_INDICES
167+
return ITEM_INDICES
168168
else:
169169
return ",".join([index_by_collection_id(c) for c in collection_ids])
170170

@@ -178,7 +178,8 @@ async def create_collection_index() -> None:
178178
client = AsyncElasticsearchSettings().create_client
179179

180180
await client.indices.create(
181-
index=COLLECTIONS_INDEX,
181+
index=f"{COLLECTIONS_INDEX}-000001",
182+
aliases={COLLECTIONS_INDEX: {}},
182183
mappings=ES_COLLECTIONS_MAPPINGS,
183184
ignore=400, # ignore 400 already exists code
184185
)
@@ -197,9 +198,11 @@ async def create_item_index(collection_id: str):
197198
198199
"""
199200
client = AsyncElasticsearchSettings().create_client
201+
index_name = index_by_collection_id(collection_id)
200202

201203
await client.indices.create(
202-
index=index_by_collection_id(collection_id),
204+
index=f"{index_by_collection_id(collection_id)}-000001",
205+
aliases={index_name: {}},
203206
mappings=ES_ITEMS_MAPPINGS,
204207
settings=ES_ITEMS_SETTINGS,
205208
ignore=400, # ignore 400 already exists code
@@ -215,7 +218,14 @@ async def delete_item_index(collection_id: str):
215218
"""
216219
client = AsyncElasticsearchSettings().create_client
217220

218-
await client.indices.delete(index=index_by_collection_id(collection_id))
221+
name = index_by_collection_id(collection_id)
222+
resolved = await client.indices.resolve_index(name=name)
223+
if "aliases" in resolved and resolved["aliases"]:
224+
[alias] = resolved["aliases"]
225+
await client.indices.delete_alias(index=alias["indices"], name=alias["name"])
226+
await client.indices.delete(index=alias["indices"])
227+
else:
228+
await client.indices.delete(index=name)
219229
await client.close()
220230

221231

@@ -295,21 +305,34 @@ class DatabaseLogic:
295305

296306
"""CORE LOGIC"""
297307

298-
async def get_all_collections(self) -> Iterable[Dict[str, Any]]:
308+
async def get_all_collections(
309+
self, token: Optional[str], limit: int
310+
) -> Iterable[Dict[str, Any]]:
299311
"""Retrieve a list of all collections from the database.
300312
313+
Args:
314+
token (Optional[str]): The token used to return the next set of results.
315+
limit (int): Number of results to return
316+
301317
Returns:
302318
collections (Iterable[Dict[str, Any]]): A list of dictionaries containing the source data for each collection.
303319
304320
Notes:
305321
The collections are retrieved from the Elasticsearch database using the `client.search` method,
306-
with the `COLLECTIONS_INDEX` as the target index and `size=1000` to retrieve up to 1000 records.
322+
with the `COLLECTIONS_INDEX` as the target index and `size=limit` to retrieve records.
307323
The result is a generator of dictionaries containing the source data for each collection.
308324
"""
309-
# https://github.com/stac-utils/stac-fastapi-elasticsearch/issues/65
310-
# collections should be paginated, but at least return more than the default 10 for now
311-
collections = await self.client.search(index=COLLECTIONS_INDEX, size=1000)
312-
return (c["_source"] for c in collections["hits"]["hits"])
325+
search_after = None
326+
if token:
327+
search_after = urlsafe_b64decode(token.encode()).decode().split(",")
328+
collections = await self.client.search(
329+
index=COLLECTIONS_INDEX,
330+
search_after=search_after,
331+
size=limit,
332+
sort={"id": {"order": "asc"}},
333+
)
334+
hits = collections["hits"]["hits"]
335+
return hits
313336

314337
async def get_one_item(self, collection_id: str, item_id: str) -> Dict:
315338
"""Retrieve a single item from the database.
@@ -773,14 +796,11 @@ async def bulk_async(
773796
`mk_actions` function is called to generate a list of actions for the bulk insert. If `refresh` is set to True, the
774797
index is refreshed after the bulk insert. The function does not return any value.
775798
"""
776-
await asyncio.get_event_loop().run_in_executor(
777-
None,
778-
lambda: helpers.bulk(
779-
self.sync_client,
780-
mk_actions(collection_id, processed_items),
781-
refresh=refresh,
782-
raise_on_error=False,
783-
),
799+
await helpers.async_bulk(
800+
self.client,
801+
mk_actions(collection_id, processed_items),
802+
refresh=refresh,
803+
raise_on_error=False,
784804
)
785805

786806
def bulk_sync(
@@ -811,7 +831,7 @@ def bulk_sync(
811831
async def delete_items(self) -> None:
812832
"""Danger. this is only for tests."""
813833
await self.client.delete_by_query(
814-
index=DEFAULT_INDICES,
834+
index=ITEM_INDICES,
815835
body={"query": {"match_all": {}}},
816836
wait_for_completion=True,
817837
)

0 commit comments

Comments
 (0)