PilotDataPlatform
diff --git a/‎.dockerignore
+14 b/‎.dockerignore
+14
diff --git a/‎.env.schema
+12 b/‎.env.schema
+12
diff --git a/‎.gitattributes
+2 b/‎.gitattributes
+2
diff --git a/‎.gitignore
+156 b/‎.gitignore
+156
diff --git a/‎.pre-commit-config.yaml
+77 b/‎.pre-commit-config.yaml
+77
diff --git a/‎COPYRIGHT
+4 b/‎COPYRIGHT
+4
diff --git a/‎LICENSE
+3-1 b/‎LICENSE
+3-1
diff --git a/‎README.md
+16 b/‎README.md
+16
diff --git a/‎commons/__init__.py
+5 b/‎commons/__init__.py
+5
diff --git a/‎commons/base_consumer.py
+69 b/‎commons/base_consumer.py
+69
diff --git a/‎commons/encoders.py
+20 b/‎commons/encoders.py
+20
@@ -0,0 +1,14 @@
+# Folders
+.git
+.venv
+venv
+kubernetes
+
+# Files
+.DS_Store
+.gitignore
+.gitlab-ci*
+gitlab-ci*
+.github
+Dockerfile*
+Jenkinsfile
@@ -0,0 +1,12 @@
+KAFKA_SERVICE=
+KAFKA_TOPICS=[]
+ELASTICSEARCH_SERVICE=
+APP_NAME=
+VERSION=
+HOST=
+PORT=
+WORKERS=
+METADATA_SERVICE=
+PROJECT_SERVICE=
+METADATA_SERVICE_PAGE_SIZE=
+PROJECT_SERVICE_PAGE_SIZE=
@@ -0,0 +1,2 @@
+# Auto detect text files and perform LF normalization
+* text=auto
@@ -0,0 +1,156 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# PyCharm
+#  JetBrains specific template is maintainted in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+
+.idea/
+
+.DS_Store
+logs/
@@ -0,0 +1,77 @@
+repos:
+
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v4.3.0
+    hooks:
+      - id: check-added-large-files
+      - id: check-docstring-first
+      - id: check-merge-conflict
+      - id: check-toml
+      - id: check-yaml
+      - id: double-quote-string-fixer
+      - id: end-of-file-fixer
+      - id: trailing-whitespace
+
+  - repo: https://github.com/psf/black
+    rev: 22.8.0
+    hooks:
+      - id: black
+        args: [
+          '--line-length=120',
+          '--skip-string-normalization',
+        ]
+
+  - repo: https://github.com/PyCQA/isort
+    rev: 5.12.0
+    hooks:
+      - id: isort
+        args: [
+          '--line-length=120',
+          '--profile=black',
+          '--filter-files',
+          '--force-single-line-imports',
+          '--reverse-relative',
+        ]
+
+  - repo: https://github.com/PyCQA/flake8
+    rev: 5.0.4
+    hooks:
+      - id: flake8
+        additional_dependencies: [
+          'pycodestyle==2.9.1',  # E,W
+          'pyflakes==2.5.0',  # F
+          'mccabe==0.7.0',  # C
+          'flake8-bugbear==22.9.11',  # B
+          'flake8-builtins==1.5.3',  # A
+          'flake8-comprehensions==3.10.0',  # C4
+          'flake8-debugger==4.1.2',  # T1
+          'flake8-logging-format==0.7.5',  # G
+          'flake8-print==5.0.0',  # T2
+        ]
+        args: [
+          '--select=E,W,F,C,B,A,C4,T1,G,T2',
+          '--ignore=E203,W503,B008,B305,A003,G004',
+          '--max-complexity=10',
+          '--max-line-length=120',
+        ]
+
+  - repo: https://github.com/myint/docformatter
+    rev: v1.5.0
+    hooks:
+      - id: docformatter
+        args: [
+          '--wrap-summaries=120',
+          '--wrap-descriptions=120',
+          '--in-place',
+        ]
+
+  - repo: https://github.com/Lucas-C/pre-commit-hooks
+    rev: v1.4.2
+    hooks:
+      - id: insert-license
+        files: \.py$
+        args: [
+          '--license-filepath=COPYRIGHT',
+          '--comment-style=#',
+          '--use-current-year',
+        ]
@@ -0,0 +1,4 @@
+Copyright (C) 2022-2023 Indoc Systems
+
+Licensed under the GNU AFFERO GENERAL PUBLIC LICENSE, Version 3.0 (the "License") available at https://www.gnu.org/licenses/agpl-3.0.en.html. 
+You may not use this file except in compliance with the License.
@@ -1,3 +1,5 @@
+Copyright (C) 2022-2023 Indoc Systems
+
                     GNU AFFERO GENERAL PUBLIC LICENSE
                        Version 3, 19 November 2007
 
@@ -658,4 +660,4 @@ specific requirements.
   You should also get your employer (if you work as a programmer) or school,
 if any, to sign a "copyright disclaimer" for the program, if necessary.
 For more information on this, and how to apply and follow the GNU AGPL, see
-<https://www.gnu.org/licenses/>.
+<https://www.gnu.org/licenses/>.
@@ -0,0 +1,16 @@
+# Metadata event handler
+
+[![Python](https://img.shields.io/badge/python-3.10-brightgreen.svg)](https://www.python.org/)
+
+## About
+
+Consumes Metadata events produced by the Postgres connector and writes data into ElasticSearch.
+
+### Built With
+
+- Python
+    - [elasticsearch](https://pypi.org/project/elasticsearch/)
+    - [kafka-python3](https://pypi.org/project/kafka-python3/)
+
+## Acknowledgements
+The development of the HealthDataCloud open source software was supported by the EBRAINS research infrastructure, funded from the European Union's Horizon 2020 Framework Programme for Research and Innovation under the Specific Grant Agreement No. 945539 (Human Brain Project SGA3) and H2020 Research and Innovation Action Grant Interactive Computing E-Infrastructure for the Human Brain Project ICEI 800858.
@@ -0,0 +1,5 @@
+# Copyright (C) 2022-2023 Indoc Systems
+#
+# Licensed under the GNU AFFERO GENERAL PUBLIC LICENSE, Version 3.0 (the "License") available at https://www.gnu.org/licenses/agpl-3.0.en.html.
+# You may not use this file except in compliance with the License.
+
@@ -0,0 +1,69 @@
+# Copyright (C) 2022-2023 Indoc Systems
+#
+# Licensed under the GNU AFFERO GENERAL PUBLIC LICENSE, Version 3.0 (the "License") available at https://www.gnu.org/licenses/agpl-3.0.en.html.
+# You may not use this file except in compliance with the License.
+
+import base64
+import io
+import logging
+import math
+from datetime import datetime
+from typing import Any
+
+from aiokafka import ConsumerRecord
+from fastavro import schema
+from fastavro import schemaless_reader
+from fastavro import validate
+
+logger = logging.getLogger(__name__)
+
+
+class BaseConsumer:
+    def __init__(self) -> None:
+        pass
+
+    def decode_label_from_ltree(self, encoded_string: str) -> str:
+        missing_padding = math.ceil(len(encoded_string) / 8) * 8 - len(encoded_string)
+        if missing_padding:
+            encoded_string += '=' * missing_padding
+        utf8_string = base64.b32decode(encoded_string.encode('utf-8')).decode('utf-8')
+        return utf8_string
+
+    def convert_timestamp_millis_to_second(self, timestamp: int) -> int:
+        return timestamp // 1000
+
+    def convert_datetime_to_timestamp(self, date: datetime) -> int:
+        return int(date.timestamp())
+
+    def decode_message(self, message: bytes, topic: str) -> dict[str, Any]:
+        logger.info(f'Starting to decode message from topic "{topic}".')
+        try:
+            imported_schema = schema.load_schema(self.KAFKA_SCHEMAS_PATH / f'{topic}.avsc')
+            message_reader = io.BytesIO(message)
+            message_decoded = schemaless_reader(message_reader, imported_schema)
+            is_valid = validate(message_decoded, imported_schema, raise_errors=False)
+            logger.info(f'Decoded a message from a topic "{topic}": {message_decoded}')
+            if not is_valid:
+                logger.warning(f'Unable validate decoded message from topic "{topic}".')
+                return {}
+
+        except Exception:
+            logger.exception(f'Unable to decode message from topic "{topic}".')
+            return {}
+
+        logger.info(f'Decoded a message from a topic "{topic}": {message_decoded}')
+        return message_decoded
+
+    async def process_event(self, event: ConsumerRecord) -> None:
+        topic = event.topic
+        message = self.decode_message(message=event.value, topic=topic)
+        if not message:
+            await self.producer.send_and_wait('metadata.dlq', event.value)
+        else:
+            await self.process_topic_message(topic, message)
+
+    async def process_topic_message(self, topic: str, message: dict[str, Any]) -> None:
+        pass
+
+    async def run(self) -> None:
+        raise Exception('The class is missing the entry funtion `run()`!')
@@ -0,0 +1,20 @@
+# Copyright (C) 2022-2023 Indoc Systems
+#
+# Licensed under the GNU AFFERO GENERAL PUBLIC LICENSE, Version 3.0 (the "License") available at https://www.gnu.org/licenses/agpl-3.0.en.html.
+# You may not use this file except in compliance with the License.
+
+from datetime import datetime
+
+
+def convert_datetime_to_timestamp_millisecond(date: datetime) -> int:
+    """Translate datetime to timestamp in milliseconds.
+
+    :param date: Date
+    :return: Timestamp in milliseconds
+    """
+    return int(date.timestamp() * 1000)
+
+
+datetime_as_timestamp_milli_encoder = {
+    datetime: convert_datetime_to_timestamp_millisecond,
+}
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+# Auto detect text files and perform LF normalization`
	`2`	`+* text=auto`