From 3ba9250acb0b5b94a821aed9032f4922f4a3596e Mon Sep 17 00:00:00 2001 From: Mathieu Leplatre Date: Thu, 25 May 2017 17:07:01 +0200 Subject: [PATCH 1/2] WIP mapping from jsonschema --- kinto_elasticsearch/indexer.py | 21 +++++++- kinto_elasticsearch/listener.py | 3 +- kinto_elasticsearch/views.py | 2 +- setup.py | 1 + tests/test_elasticsearch.py | 87 +++++++++++++++++++++++++++++++++ 5 files changed, 110 insertions(+), 4 deletions(-) diff --git a/kinto_elasticsearch/indexer.py b/kinto_elasticsearch/indexer.py index c5f1a69..cdc8b66 100644 --- a/kinto_elasticsearch/indexer.py +++ b/kinto_elasticsearch/indexer.py @@ -18,11 +18,13 @@ def __init__(self, hosts, prefix="kinto", force_refresh=False): def indexname(self, bucket_id, collection_id): return "{}-{}-{}".format(self.prefix, bucket_id, collection_id) - def create_index(self, bucket_id, collection_id): + def create_index(self, bucket_id, collection_id, schema=None): indexname = self.indexname(bucket_id, collection_id) # Only if necessary. if not self.client.indices.exists(index=indexname): - self.client.indices.create(index=indexname) + mapping = jsonschema_to_mapping(schema) + body = {"mappings": {indexname: mapping}} + self.client.indices.create(index=indexname, body=body) def delete_index(self, bucket_id, collection_id=None): if collection_id is None: @@ -102,3 +104,18 @@ def load_from_config(config): force_refresh = asbool(settings.get('elasticsearch.force_refresh', 'false')) indexer = Indexer(hosts=hosts, prefix=prefix, force_refresh=force_refresh) return indexer + + +def jsonschema_to_mapping(json_schema): + if json_schema is None: + return None + + # XXX: domapping was a quick way to get started, but not sure if it's the + # lightest solution. + # See https://github.com/inveniosoftware/domapping/issues/created_by/leplatrem + from domapping.mapping import ElasticMappingGeneratorConfig, schema_to_mapping + + root_url = json_schema.get("id", "") + mapping = schema_to_mapping(json_schema, root_url, {}, ElasticMappingGeneratorConfig()) + mapping = {"properties": mapping["properties"]} + return mapping diff --git a/kinto_elasticsearch/listener.py b/kinto_elasticsearch/listener.py index 06929e3..6e10b27 100644 --- a/kinto_elasticsearch/listener.py +++ b/kinto_elasticsearch/listener.py @@ -12,7 +12,8 @@ def on_collection_created(event): bucket_id = event.payload["bucket_id"] for created in event.impacted_records: collection_id = created["new"]["id"] - indexer.create_index(bucket_id, collection_id) + schema = created["new"].get("schema") + indexer.create_index(bucket_id, collection_id, schema=schema) def on_collection_deleted(event): diff --git a/kinto_elasticsearch/views.py b/kinto_elasticsearch/views.py index ce31282..8f5c96b 100644 --- a/kinto_elasticsearch/views.py +++ b/kinto_elasticsearch/views.py @@ -36,5 +36,5 @@ def get_search(request): results = indexer.search(bucket_id, collection_id, query) except elasticsearch.ElasticsearchException as e: logger.exception("Index query failed.") - results = {} + raise return results diff --git a/setup.py b/setup.py index 87ee213..04a7dbf 100755 --- a/setup.py +++ b/setup.py @@ -11,6 +11,7 @@ requirements = [ 'elasticsearch', + 'domapping', # XXX: validate more thoroughly. 'kinto>=6.0.0' ] diff --git a/tests/test_elasticsearch.py b/tests/test_elasticsearch.py index d4b262a..38e6dfb 100644 --- a/tests/test_elasticsearch.py +++ b/tests/test_elasticsearch.py @@ -162,3 +162,90 @@ def test_search_is_not_allowed_if_only_read_on_certain_records(self): headers=headers) self.app.post("/buckets/bid/collections/cid/search", status=403, headers=headers) + + +class SchemaSupport(BaseWebTest, unittest.TestCase): + + schema = { + "title": "Builds", + "description": "Mozilla software builds.", + "type": "object", + "properties": { + "id": { + "type": "string", + }, + "last_modified": { + "type": "number", + }, + "build": { + "type": "object", + "properties": { + "id": { + "type": "string", + "title": "Build ID", + "description": "Build ID" + }, + "date": { + "type": "string", + "title": "Build date", + "description": "i.e: 2017-05-11", + "pattern": "^(19|2[0-5])[0-9]{2}-((0[0-9])|(1[0-2]))-(([0-2][0-9])|(3[0-1]))$" + } + } + } + } + } + + def setUp(self): + self.app.put("/buckets/bid", headers=self.headers) + body = {"data": {"schema": self.schema}} + self.app.put_json("/buckets/bid/collections/cid", body, headers=self.headers) + self.app.post_json("/buckets/bid/collections/cid/records", + {"data": {"build": {"id": "abc", "date": "2017-05-24"}}}, + headers=self.headers) + self.app.post_json("/buckets/bid/collections/cid/records", + {"data": {"build": {"id": "efg", "date": "2017-02-01"}}}, + headers=self.headers) + + def get_mapping(self, bucket_id, collection_id): + indexer = self.app.app.registry.indexer + indexname = indexer.indexname(bucket_id, collection_id) + index_mapping = indexer.client.indices.get_mapping(indexname) + return index_mapping[indexname]["mappings"][indexname] + + def test_index_has_mapping_if_collection_has_schema(self): + mapping = self.get_mapping("bid", "cid") + assert mapping == { + "properties": { + "id": {"type": "text"}, + "last_modified": {"type": "double"}, + "build": { + "properties": { + "date": {"type": "text"}, + "id": {"type": "text"} + } + } + } + } + + def test_can_search_for_subproperties(self): + body = { + "query": { + "bool" : { + "must" : { + "term" : { "build.id" : "abc" } + } + } + } + } + resp = self.app.post_json("/buckets/bid/collections/cid/search", body, + headers=self.headers) + result = resp.json + assert len(result["hits"]["hits"]) == 1 + assert result["hits"]["hits"][0]["_source"]["build"]["id"] == "abc" + + def test_mapping_is_updated_on_collection_update(self): + pass + + def test_schema_can_contain_references(self): + pass From 3f7b0cb21823aefd30d8c06ed25d1e6ce6042356 Mon Sep 17 00:00:00 2001 From: Mathieu Leplatre Date: Thu, 25 May 2017 17:33:48 +0200 Subject: [PATCH 2/2] Add test for agregates --- kinto_elasticsearch/views.py | 1 - tests/test_elasticsearch.py | 20 ++++++++++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/kinto_elasticsearch/views.py b/kinto_elasticsearch/views.py index 8f5c96b..2edd6d7 100644 --- a/kinto_elasticsearch/views.py +++ b/kinto_elasticsearch/views.py @@ -36,5 +36,4 @@ def get_search(request): results = indexer.search(bucket_id, collection_id, query) except elasticsearch.ElasticsearchException as e: logger.exception("Index query failed.") - raise return results diff --git a/tests/test_elasticsearch.py b/tests/test_elasticsearch.py index 38e6dfb..6479a3f 100644 --- a/tests/test_elasticsearch.py +++ b/tests/test_elasticsearch.py @@ -244,6 +244,26 @@ def test_can_search_for_subproperties(self): assert len(result["hits"]["hits"]) == 1 assert result["hits"]["hits"][0]["_source"]["build"]["id"] == "abc" + def test_can_aggregate_values(self): + body = { + "aggs" : { + "build_dates" : { + "terms": { + "field" : "build.id", + "size" : 1000 + } + } + } + } + resp = self.app.post_json("/buckets/bid/collections/cid/search", body, + headers=self.headers) + result = resp.json + print(result) + assert result["aggregations"]["build_dates"]["buckets"] == [ + {"key": "abc", "doc_count": 1}, + {"key": "efg", "doc_count": 1}, + ] + def test_mapping_is_updated_on_collection_update(self): pass