Skip to content

Commit caac373

Browse files
Allow capitals in item index aliases (#329)
**Related Issue(s):** - #328 **Description:** Separating alias and index names to allow for aliases to include capitalisation. Index names now include the lowered and hex encoded collection ID to prevent clashes of indices. **PR Checklist:** - [x] Code is formatted and linted (run `pre-commit run --all-files`) - [x] Tests pass (run `make test`) - [x] Documentation has been updated to reflect changes, if applicable - [x] Changes are added to the changelog --------- Co-authored-by: Jonathan Healy <[email protected]>
1 parent 125baff commit caac373

File tree

5 files changed

+58
-31
lines changed

5 files changed

+58
-31
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
1010
### Changed
1111

1212
- Added note on the use of the default `*` use in route authentication dependecies. [#325](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/325)
13+
- Update item index naming and aliasing to allow capitalisation of collection ids [#329](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/329)
1314
- Bugfixes for the `IsNull` operator and datetime filtering [#330](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/330)
1415

1516
## [v3.2.2] - 2024-12-15

README.md

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -267,17 +267,17 @@ A reindex operation might be useful to apply changes to documents or to correct
267267

268268
The index templates will make sure that manually created indices will also have the correct mappings and settings.
269269

270-
In this example, we will make a copy of an existing Item index `items_my-collection-000001` but change the Item identifier to be lowercase.
270+
In this example, we will make a copy of an existing Item index `items_my-collection-lower_my-collection-hex-000001` but change the Item identifier to be lowercase.
271271

272272
```shell
273273
curl -X "POST" "http://localhost:9200/_reindex" \
274274
-H 'Content-Type: application/json' \
275275
-d $'{
276276
"source": {
277-
"index": "items_my-collection-000001"
277+
"index": "items_my-collection-lower_my-collection-hex-000001"
278278
},
279279
"dest": {
280-
"index": "items_my-collection-000002"
280+
"index": "items_my-collection-lower_my-collection-hex-000002"
281281
},
282282
"script": {
283283
"source": "ctx._source.id = ctx._source.id.toLowerCase()",
@@ -286,7 +286,7 @@ curl -X "POST" "http://localhost:9200/_reindex" \
286286
}'
287287
```
288288

289-
If we are happy with the data in the newly created index, we can move the alias `items_my-collection` to the new index `items_my-collection-000002`.
289+
If we are happy with the data in the newly created index, we can move the alias `items_my-collection` to the new index `items_my-collection-lower_my-collection-hex-000002`.
290290
```shell
291291
curl -X "POST" "http://localhost:9200/_aliases" \
292292
-h 'Content-Type: application/json' \
@@ -300,7 +300,7 @@ curl -X "POST" "http://localhost:9200/_aliases" \
300300
},
301301
{
302302
"add": {
303-
"index": "items_my-collection-000002",
303+
"index": "items_my-collection-lower_my-collection-hex-000002",
304304
"alias": "items_my-collection"
305305
}
306306
}

stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py

Lines changed: 23 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -156,7 +156,20 @@ def index_by_collection_id(collection_id: str) -> str:
156156
Returns:
157157
str: The index name derived from the collection id.
158158
"""
159-
return f"{ITEMS_INDEX_PREFIX}{''.join(c for c in collection_id.lower() if c not in ES_INDEX_NAME_UNSUPPORTED_CHARS)}"
159+
return f"{ITEMS_INDEX_PREFIX}{''.join(c for c in collection_id.lower() if c not in ES_INDEX_NAME_UNSUPPORTED_CHARS)}_{collection_id.encode('utf-8').hex()}"
160+
161+
162+
def index_alias_by_collection_id(collection_id: str) -> str:
163+
"""
164+
Translate a collection id into an Elasticsearch index alias.
165+
166+
Args:
167+
collection_id (str): The collection id to translate into an index alias.
168+
169+
Returns:
170+
str: The index alias derived from the collection id.
171+
"""
172+
return f"{ITEMS_INDEX_PREFIX}{''.join(c for c in collection_id if c not in ES_INDEX_NAME_UNSUPPORTED_CHARS)}"
160173

161174

162175
def indices(collection_ids: Optional[List[str]]) -> str:
@@ -172,7 +185,7 @@ def indices(collection_ids: Optional[List[str]]) -> str:
172185
if collection_ids is None or collection_ids == []:
173186
return ITEM_INDICES
174187
else:
175-
return ",".join([index_by_collection_id(c) for c in collection_ids])
188+
return ",".join([index_alias_by_collection_id(c) for c in collection_ids])
176189

177190

178191
async def create_index_templates() -> None:
@@ -231,11 +244,10 @@ async def create_item_index(collection_id: str):
231244
232245
"""
233246
client = AsyncElasticsearchSettings().create_client
234-
index_name = index_by_collection_id(collection_id)
235247

236248
await client.options(ignore_status=400).indices.create(
237249
index=f"{index_by_collection_id(collection_id)}-000001",
238-
aliases={index_name: {}},
250+
aliases={index_alias_by_collection_id(collection_id): {}},
239251
)
240252
await client.close()
241253

@@ -248,7 +260,7 @@ async def delete_item_index(collection_id: str):
248260
"""
249261
client = AsyncElasticsearchSettings().create_client
250262

251-
name = index_by_collection_id(collection_id)
263+
name = index_alias_by_collection_id(collection_id)
252264
resolved = await client.indices.resolve_index(name=name)
253265
if "aliases" in resolved and resolved["aliases"]:
254266
[alias] = resolved["aliases"]
@@ -288,7 +300,7 @@ def mk_actions(collection_id: str, processed_items: List[Item]):
288300
"""
289301
return [
290302
{
291-
"_index": index_by_collection_id(collection_id),
303+
"_index": index_alias_by_collection_id(collection_id),
292304
"_id": mk_item_id(item["id"], item["collection"]),
293305
"_source": item,
294306
}
@@ -449,7 +461,7 @@ async def get_one_item(self, collection_id: str, item_id: str) -> Dict:
449461
"""
450462
try:
451463
item = await self.client.get(
452-
index=index_by_collection_id(collection_id),
464+
index=index_alias_by_collection_id(collection_id),
453465
id=mk_item_id(item_id, collection_id),
454466
)
455467
except exceptions.NotFoundError:
@@ -808,7 +820,7 @@ async def prep_create_item(
808820
await self.check_collection_exists(collection_id=item["collection"])
809821

810822
if not exist_ok and await self.client.exists(
811-
index=index_by_collection_id(item["collection"]),
823+
index=index_alias_by_collection_id(item["collection"]),
812824
id=mk_item_id(item["id"], item["collection"]),
813825
):
814826
raise ConflictError(
@@ -845,7 +857,7 @@ def sync_prep_create_item(
845857
raise NotFoundError(f"Collection {collection_id} does not exist")
846858

847859
if not exist_ok and self.sync_client.exists(
848-
index=index_by_collection_id(collection_id),
860+
index=index_alias_by_collection_id(collection_id),
849861
id=mk_item_id(item_id, collection_id),
850862
):
851863
raise ConflictError(
@@ -871,7 +883,7 @@ async def create_item(self, item: Item, refresh: bool = False):
871883
item_id = item["id"]
872884
collection_id = item["collection"]
873885
es_resp = await self.client.index(
874-
index=index_by_collection_id(collection_id),
886+
index=index_alias_by_collection_id(collection_id),
875887
id=mk_item_id(item_id, collection_id),
876888
document=item,
877889
refresh=refresh,
@@ -897,7 +909,7 @@ async def delete_item(
897909
"""
898910
try:
899911
await self.client.delete(
900-
index=index_by_collection_id(collection_id),
912+
index=index_alias_by_collection_id(collection_id),
901913
id=mk_item_id(item_id, collection_id),
902914
refresh=refresh,
903915
)

stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py

Lines changed: 26 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,20 @@ def index_by_collection_id(collection_id: str) -> str:
158158
Returns:
159159
str: The index name derived from the collection id.
160160
"""
161-
return f"{ITEMS_INDEX_PREFIX}{''.join(c for c in collection_id.lower() if c not in ES_INDEX_NAME_UNSUPPORTED_CHARS)}"
161+
return f"{ITEMS_INDEX_PREFIX}{''.join(c for c in collection_id.lower() if c not in ES_INDEX_NAME_UNSUPPORTED_CHARS)}_{collection_id.encode('utf-8').hex()}"
162+
163+
164+
def index_alias_by_collection_id(collection_id: str) -> str:
165+
"""
166+
Translate a collection id into an Elasticsearch index alias.
167+
168+
Args:
169+
collection_id (str): The collection id to translate into an index alias.
170+
171+
Returns:
172+
str: The index alias derived from the collection id.
173+
"""
174+
return f"{ITEMS_INDEX_PREFIX}{''.join(c for c in collection_id if c not in ES_INDEX_NAME_UNSUPPORTED_CHARS)}"
162175

163176

164177
def indices(collection_ids: Optional[List[str]]) -> str:
@@ -174,7 +187,7 @@ def indices(collection_ids: Optional[List[str]]) -> str:
174187
if collection_ids is None or collection_ids == []:
175188
return ITEM_INDICES
176189
else:
177-
return ",".join([index_by_collection_id(c) for c in collection_ids])
190+
return ",".join([index_alias_by_collection_id(c) for c in collection_ids])
178191

179192

180193
async def create_index_templates() -> None:
@@ -243,13 +256,14 @@ async def create_item_index(collection_id: str):
243256
244257
"""
245258
client = AsyncSearchSettings().create_client
246-
index_name = index_by_collection_id(collection_id)
247259
search_body: Dict[str, Any] = {
248-
"aliases": {index_name: {}},
260+
"aliases": {index_alias_by_collection_id(collection_id): {}},
249261
}
250262

251263
try:
252-
await client.indices.create(index=f"{index_name}-000001", body=search_body)
264+
await client.indices.create(
265+
index=f"{index_by_collection_id(collection_id)}-000001", body=search_body
266+
)
253267
except TransportError as e:
254268
if e.status_code == 400:
255269
pass # Ignore 400 status codes
@@ -267,7 +281,7 @@ async def delete_item_index(collection_id: str):
267281
"""
268282
client = AsyncSearchSettings().create_client
269283

270-
name = index_by_collection_id(collection_id)
284+
name = index_alias_by_collection_id(collection_id)
271285
resolved = await client.indices.resolve_index(name=name)
272286
if "aliases" in resolved and resolved["aliases"]:
273287
[alias] = resolved["aliases"]
@@ -307,7 +321,7 @@ def mk_actions(collection_id: str, processed_items: List[Item]):
307321
"""
308322
return [
309323
{
310-
"_index": index_by_collection_id(collection_id),
324+
"_index": index_alias_by_collection_id(collection_id),
311325
"_id": mk_item_id(item["id"], item["collection"]),
312326
"_source": item,
313327
}
@@ -476,7 +490,7 @@ async def get_one_item(self, collection_id: str, item_id: str) -> Dict:
476490
"""
477491
try:
478492
item = await self.client.get(
479-
index=index_by_collection_id(collection_id),
493+
index=index_alias_by_collection_id(collection_id),
480494
id=mk_item_id(item_id, collection_id),
481495
)
482496
except exceptions.NotFoundError:
@@ -838,7 +852,7 @@ async def prep_create_item(
838852
await self.check_collection_exists(collection_id=item["collection"])
839853

840854
if not exist_ok and await self.client.exists(
841-
index=index_by_collection_id(item["collection"]),
855+
index=index_alias_by_collection_id(item["collection"]),
842856
id=mk_item_id(item["id"], item["collection"]),
843857
):
844858
raise ConflictError(
@@ -875,7 +889,7 @@ def sync_prep_create_item(
875889
raise NotFoundError(f"Collection {collection_id} does not exist")
876890

877891
if not exist_ok and self.sync_client.exists(
878-
index=index_by_collection_id(collection_id),
892+
index=index_alias_by_collection_id(collection_id),
879893
id=mk_item_id(item_id, collection_id),
880894
):
881895
raise ConflictError(
@@ -901,7 +915,7 @@ async def create_item(self, item: Item, refresh: bool = False):
901915
item_id = item["id"]
902916
collection_id = item["collection"]
903917
es_resp = await self.client.index(
904-
index=index_by_collection_id(collection_id),
918+
index=index_alias_by_collection_id(collection_id),
905919
id=mk_item_id(item_id, collection_id),
906920
body=item,
907921
refresh=refresh,
@@ -927,7 +941,7 @@ async def delete_item(
927941
"""
928942
try:
929943
await self.client.delete(
930-
index=index_by_collection_id(collection_id),
944+
index=index_alias_by_collection_id(collection_id),
931945
id=mk_item_id(item_id, collection_id),
932946
refresh=refresh,
933947
)

stac_fastapi/tests/database/test_database.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,14 +11,14 @@
1111
COLLECTIONS_INDEX,
1212
ES_COLLECTIONS_MAPPINGS,
1313
ES_ITEMS_MAPPINGS,
14-
index_by_collection_id,
14+
index_alias_by_collection_id,
1515
)
1616
else:
1717
from stac_fastapi.elasticsearch.database_logic import (
1818
COLLECTIONS_INDEX,
1919
ES_COLLECTIONS_MAPPINGS,
2020
ES_ITEMS_MAPPINGS,
21-
index_by_collection_id,
21+
index_alias_by_collection_id,
2222
)
2323

2424

@@ -42,7 +42,7 @@ async def test_index_mapping_items(txn_client, load_test_data):
4242
api.Collection(**collection), request=MockRequest
4343
)
4444
response = await database.client.indices.get_mapping(
45-
index=index_by_collection_id(collection["id"])
45+
index=index_alias_by_collection_id(collection["id"])
4646
)
4747
if not isinstance(response, dict):
4848
response = response.body

0 commit comments

Comments
 (0)