Skip to content
This repository has been archived by the owner on Aug 5, 2024. It is now read-only.

[WIP] mapping from jsonschema #36

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 19 additions & 2 deletions kinto_elasticsearch/indexer.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,13 @@ def __init__(self, hosts, prefix="kinto", force_refresh=False):
def indexname(self, bucket_id, collection_id):
return "{}-{}-{}".format(self.prefix, bucket_id, collection_id)

def create_index(self, bucket_id, collection_id):
def create_index(self, bucket_id, collection_id, schema=None):
indexname = self.indexname(bucket_id, collection_id)
# Only if necessary.
if not self.client.indices.exists(index=indexname):
self.client.indices.create(index=indexname)
mapping = jsonschema_to_mapping(schema)
body = {"mappings": {indexname: mapping}}
self.client.indices.create(index=indexname, body=body)

def delete_index(self, bucket_id, collection_id=None):
if collection_id is None:
Expand Down Expand Up @@ -102,3 +104,18 @@ def load_from_config(config):
force_refresh = asbool(settings.get('elasticsearch.force_refresh', 'false'))
indexer = Indexer(hosts=hosts, prefix=prefix, force_refresh=force_refresh)
return indexer


def jsonschema_to_mapping(json_schema):
if json_schema is None:
return None

# XXX: domapping was a quick way to get started, but not sure if it's the
# lightest solution.
# See https://github.com/inveniosoftware/domapping/issues/created_by/leplatrem
from domapping.mapping import ElasticMappingGeneratorConfig, schema_to_mapping

root_url = json_schema.get("id", "")
mapping = schema_to_mapping(json_schema, root_url, {}, ElasticMappingGeneratorConfig())
mapping = {"properties": mapping["properties"]}
return mapping
3 changes: 2 additions & 1 deletion kinto_elasticsearch/listener.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@ def on_collection_created(event):
bucket_id = event.payload["bucket_id"]
for created in event.impacted_records:
collection_id = created["new"]["id"]
indexer.create_index(bucket_id, collection_id)
schema = created["new"].get("schema")
indexer.create_index(bucket_id, collection_id, schema=schema)


def on_collection_deleted(event):
Expand Down
1 change: 0 additions & 1 deletion kinto_elasticsearch/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,5 +36,4 @@ def get_search(request):
results = indexer.search(bucket_id, collection_id, query)
except elasticsearch.ElasticsearchException as e:
logger.exception("Index query failed.")
results = {}
return results
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

requirements = [
'elasticsearch',
'domapping', # XXX: validate more thoroughly.
'kinto>=6.0.0'
]

Expand Down
107 changes: 107 additions & 0 deletions tests/test_elasticsearch.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,3 +162,110 @@ def test_search_is_not_allowed_if_only_read_on_certain_records(self):
headers=headers)

self.app.post("/buckets/bid/collections/cid/search", status=403, headers=headers)


class SchemaSupport(BaseWebTest, unittest.TestCase):

schema = {
"title": "Builds",
"description": "Mozilla software builds.",
"type": "object",
"properties": {
"id": {
"type": "string",
},
"last_modified": {
"type": "number",
},
"build": {
"type": "object",
"properties": {
"id": {
"type": "string",
"title": "Build ID",
"description": "Build ID"
},
"date": {
"type": "string",
"title": "Build date",
"description": "i.e: 2017-05-11",
"pattern": "^(19|2[0-5])[0-9]{2}-((0[0-9])|(1[0-2]))-(([0-2][0-9])|(3[0-1]))$"
}
}
}
}
}

def setUp(self):
self.app.put("/buckets/bid", headers=self.headers)
body = {"data": {"schema": self.schema}}
self.app.put_json("/buckets/bid/collections/cid", body, headers=self.headers)
self.app.post_json("/buckets/bid/collections/cid/records",
{"data": {"build": {"id": "abc", "date": "2017-05-24"}}},
headers=self.headers)
self.app.post_json("/buckets/bid/collections/cid/records",
{"data": {"build": {"id": "efg", "date": "2017-02-01"}}},
headers=self.headers)

def get_mapping(self, bucket_id, collection_id):
indexer = self.app.app.registry.indexer
indexname = indexer.indexname(bucket_id, collection_id)
index_mapping = indexer.client.indices.get_mapping(indexname)
return index_mapping[indexname]["mappings"][indexname]

def test_index_has_mapping_if_collection_has_schema(self):
mapping = self.get_mapping("bid", "cid")
assert mapping == {
"properties": {
"id": {"type": "text"},
"last_modified": {"type": "double"},
"build": {
"properties": {
"date": {"type": "text"},
"id": {"type": "text"}
}
}
}
}

def test_can_search_for_subproperties(self):
body = {
"query": {
"bool" : {
"must" : {
"term" : { "build.id" : "abc" }
}
}
}
}
resp = self.app.post_json("/buckets/bid/collections/cid/search", body,
headers=self.headers)
result = resp.json
assert len(result["hits"]["hits"]) == 1
assert result["hits"]["hits"][0]["_source"]["build"]["id"] == "abc"

def test_can_aggregate_values(self):
body = {
"aggs" : {
"build_dates" : {
"terms": {
"field" : "build.id",
"size" : 1000
}
}
}
}
resp = self.app.post_json("/buckets/bid/collections/cid/search", body,
headers=self.headers)
result = resp.json
print(result)
assert result["aggregations"]["build_dates"]["buckets"] == [
{"key": "abc", "doc_count": 1},
{"key": "efg", "doc_count": 1},
]

def test_mapping_is_updated_on_collection_update(self):
pass

def test_schema_can_contain_references(self):
pass