diff --git a/.copier-answers.yml b/.copier-answers.yml new file mode 100644 index 0000000..c4f4749 --- /dev/null +++ b/.copier-answers.yml @@ -0,0 +1,12 @@ +# Changes here will be overwritten by Copier; NEVER EDIT MANUALLY +_commit: 2023.10.27 +_src_path: gh:scientific-python/cookie +backend: hatch +email: rkansal@cern.ch +full_name: Raghav Kansal +license: MIT +org: jet-net +project_name: cookiecutter-test +project_short_description: 2023 Jet Simulation Challenge +url: https://github.com/jet-net/simulation-challenge +vcs: true diff --git a/.git_archival.txt b/.git_archival.txt new file mode 100644 index 0000000..8fb235d --- /dev/null +++ b/.git_archival.txt @@ -0,0 +1,4 @@ +node: $Format:%H$ +node-date: $Format:%cI$ +describe-name: $Format:%(describe:tags=true,match=*[0-9]*)$ +ref-names: $Format:%D$ diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..00a7b00 --- /dev/null +++ b/.gitattributes @@ -0,0 +1 @@ +.git_archival.txt export-subst diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md new file mode 100644 index 0000000..1470b00 --- /dev/null +++ b/.github/CONTRIBUTING.md @@ -0,0 +1,101 @@ +See the [Scientific Python Developer Guide][spc-dev-intro] for a detailed +description of best practices for developing scientific packages. + +[spc-dev-intro]: https://learn.scientific-python.org/development/ + +# Quick development + +The fastest way to start with development is to use nox. If you don't have nox, +you can use `pipx run nox` to run it without installing, or `pipx install nox`. +If you don't have pipx (pip for applications), then you can install with +`pip install pipx` (the only case were installing an application with regular +pip is reasonable). If you use macOS, then pipx and nox are both in brew, use +`brew install pipx nox`. + +To use, run `nox`. This will lint and test using every installed version of +Python on your system, skipping ones that are not installed. You can also run +specific jobs: + +```console +$ nox -s lint # Lint only +$ nox -s tests # Python tests +$ nox -s docs -- serve # Build and serve the docs +$ nox -s build # Make an SDist and wheel +``` + +Nox handles everything for you, including setting up an temporary virtual +environment for each run. + +# Setting up a development environment manually + +You can set up a development environment by running: + +```bash +python3 -m venv .venv +source ./.venv/bin/activate +pip install -v -e .[dev] +``` + +If you have the +[Python Launcher for Unix](https://github.com/brettcannon/python-launcher), you +can instead do: + +```bash +py -m venv .venv +py -m install -v -e .[dev] +``` + +# Post setup + +You should prepare pre-commit, which will help you by checking that commits pass +required checks: + +```bash +pip install pre-commit # or brew install pre-commit on macOS +pre-commit install # Will install a pre-commit hook into the git repo +``` + +You can also/alternatively run `pre-commit run` (changes only) or +`pre-commit run --all-files` to check even without installing the hook. + +# Testing + +Use pytest to run the unit checks: + +```bash +pytest +``` + +# Coverage + +Use pytest-cov to generate coverage reports: + +```bash +pytest --cov=cookiecutter-test +``` + +# Building docs + +You can build the docs using: + +```bash +nox -s docs +``` + +You can see a preview with: + +```bash +nox -s docs -- serve +``` + +# Pre-commit + +This project uses pre-commit for all style checking. While you can run it with +nox, this is such an important tool that it deserves to be installed on its own. +Install pre-commit and run: + +```bash +pre-commit run -a +``` + +to check all files. diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000..f9ecf57 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,7 @@ +version: 2 +updates: + # Maintain dependencies for GitHub Actions + - package-ecosystem: "github-actions" + directory: "/" + schedule: + interval: "monthly" diff --git a/.github/matchers/pylint.json b/.github/matchers/pylint.json new file mode 100644 index 0000000..e3a6bd1 --- /dev/null +++ b/.github/matchers/pylint.json @@ -0,0 +1,32 @@ +{ + "problemMatcher": [ + { + "severity": "warning", + "pattern": [ + { + "regexp": "^([^:]+):(\\d+):(\\d+): ([A-DF-Z]\\d+): \\033\\[[\\d;]+m([^\\033]+).*$", + "file": 1, + "line": 2, + "column": 3, + "code": 4, + "message": 5 + } + ], + "owner": "pylint-warning" + }, + { + "severity": "error", + "pattern": [ + { + "regexp": "^([^:]+):(\\d+):(\\d+): (E\\d+): \\033\\[[\\d;]+m([^\\033]+).*$", + "file": 1, + "line": 2, + "column": 3, + "code": 4, + "message": 5 + } + ], + "owner": "pylint-error" + } + ] +} diff --git a/.github/workflows/cd.yml b/.github/workflows/cd.yml new file mode 100644 index 0000000..5100345 --- /dev/null +++ b/.github/workflows/cd.yml @@ -0,0 +1,60 @@ +name: CD + +on: + workflow_dispatch: + pull_request: + push: + branches: + - main + release: + types: + - published + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +env: + FORCE_COLOR: 3 + +jobs: + dist: + name: Distribution build + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Build sdist and wheel + run: pipx run build + + - uses: actions/upload-artifact@v3 + with: + path: dist + + - name: Check products + run: pipx run twine check dist/* + + publish: + needs: [dist] + name: Publish to PyPI + environment: pypi + permissions: + id-token: write + runs-on: ubuntu-latest + if: github.event_name == 'release' && github.event.action == 'published' + + steps: + - uses: actions/download-artifact@v3 + with: + name: artifact + path: dist + + - uses: pypa/gh-action-pypi-publish@release/v1 + if: github.event_name == 'release' && github.event.action == 'published' + with: + # Remember to tell (test-)pypi about this repo before publishing + # Remove this line to publish to PyPI + repository-url: https://test.pypi.org/legacy/ diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..2ce9bc0 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,69 @@ +name: CI + +on: + workflow_dispatch: + pull_request: + push: + branches: + - main + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +env: + FORCE_COLOR: 3 + +jobs: + pre-commit: + name: Format + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + - uses: actions/setup-python@v4 + with: + python-version: "3.x" + - uses: pre-commit/action@v3.0.0 + with: + extra_args: --hook-stage manual --all-files + - name: Run PyLint + run: | + echo "::add-matcher::$GITHUB_WORKSPACE/.github/matchers/pylint.json" + pipx run nox -s pylint + + checks: + name: Check Python ${{ matrix.python-version }} on ${{ matrix.runs-on }} + runs-on: ${{ matrix.runs-on }} + needs: [pre-commit] + strategy: + fail-fast: false + matrix: + python-version: ["3.8", "3.11"] + runs-on: [ubuntu-latest, macos-latest, windows-latest] + + include: + - python-version: pypy-3.10 + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + allow-prereleases: true + + - name: Install package + run: python -m pip install .[test] + + - name: Test package + run: >- + python -m pytest -ra --cov --cov-report=xml --cov-report=term + --durations=20 + + - name: Upload coverage report + uses: codecov/codecov-action@v3.1.4 diff --git a/.github/workflows/evaluate.yml b/.github/workflows/evaluate.yml new file mode 100644 index 0000000..ae7f2f0 --- /dev/null +++ b/.github/workflows/evaluate.yml @@ -0,0 +1,33 @@ +name: evaluate + +on: + push: + paths: + - "submissions/**" + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +env: + FORCE_COLOR: 3 + +jobs: + evaluate: + name: Evaluate submissions + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - uses: actions/setup-python@v4 + with: + python-version: "3.10" + + - name: Install package + run: python -m pip install . + + - name: Evaluate + run: >- + python src/run.py --evaluate --submission all diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml deleted file mode 100644 index a4f9366..0000000 --- a/.github/workflows/lint.yml +++ /dev/null @@ -1,27 +0,0 @@ -name: Lint - -on: [push, pull_request] - -jobs: - run-linters: - name: Run linters - runs-on: ubuntu-latest - - steps: - - name: Check out Git repository - uses: actions/checkout@v2 - - - name: Set up Python - uses: actions/setup-python@v1 - with: - python-version: 3.9 - - - name: Install Python dependencies - run: pip install black - - - name: Run linters - uses: wearerequired/lint-action@v2 - with: - auto_fix: true - black: true - black_auto_fix: true diff --git a/.gitignore b/.gitignore index fdf6c99..c94994c 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,163 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# setuptools_scm +src/*/_version.py + + +# ruff +.ruff_cache/ + +# OS specific stuff .DS_Store -**/.DS_Store -**/__pycache__ +.DS_Store? +._* +.Spotlight-V100 +.Trashes +ehthumbs.db +Thumbs.db + +# Common editor files +*~ +*.swp + +/*test.ipynb +_site + +/datasets diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 8973106..b7e13ea 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,5 +1,90 @@ +ci: + autoupdate_commit_msg: "chore: update pre-commit hooks" + autofix_commit_msg: "style: pre-commit fixes" + repos: - repo: https://github.com/psf/black-pre-commit-mirror - rev: "23.7.0" + rev: "23.10.1" hooks: - id: black-jupyter + + - repo: https://github.com/adamchainz/blacken-docs + rev: "1.16.0" + hooks: + - id: blacken-docs + additional_dependencies: [black==23.*] + + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: "v4.5.0" + hooks: + - id: check-added-large-files + - id: check-case-conflict + - id: check-merge-conflict + - id: check-symlinks + - id: check-yaml + - id: debug-statements + - id: end-of-file-fixer + - id: mixed-line-ending + - id: name-tests-test + args: ["--pytest-test-first"] + - id: requirements-txt-fixer + - id: trailing-whitespace + + - repo: https://github.com/pre-commit/pygrep-hooks + rev: "v1.10.0" + hooks: + - id: rst-backticks + - id: rst-directive-colons + - id: rst-inline-touching-normal + + - repo: https://github.com/pre-commit/mirrors-prettier + rev: "v3.0.3" + hooks: + - id: prettier + types_or: [yaml, markdown, html, css, scss, javascript, json] + args: [--prose-wrap=always] + + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: "v0.1.3" + hooks: + - id: ruff + args: ["--fix", "--show-fixes"] + + # - repo: https://github.com/pre-commit/mirrors-mypy + # rev: "v1.6.1" + # hooks: + # - id: mypy + # files: src|tests + # args: [] + # additional_dependencies: + # - pytest + + - repo: https://github.com/codespell-project/codespell + rev: "v2.2.6" + hooks: + - id: codespell + + - repo: https://github.com/shellcheck-py/shellcheck-py + rev: "v0.9.0.6" + hooks: + - id: shellcheck + + - repo: local + hooks: + - id: disallow-caps + name: Disallow improper capitalization + language: pygrep + entry: PyBind|Numpy|Cmake|CCache|Github|PyTest + exclude: .pre-commit-config.yaml + + - repo: https://github.com/abravalheri/validate-pyproject + rev: v0.15 + hooks: + - id: validate-pyproject + + - repo: https://github.com/python-jsonschema/check-jsonschema + rev: 0.27.0 + hooks: + - id: check-dependabot + - id: check-github-workflows + - id: check-readthedocs diff --git a/.readthedocs.yml b/.readthedocs.yml new file mode 100644 index 0000000..7e49657 --- /dev/null +++ b/.readthedocs.yml @@ -0,0 +1,18 @@ +# Read the Docs configuration file +# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details + +version: 2 + +build: + os: ubuntu-22.04 + tools: + python: "3.11" +sphinx: + configuration: docs/conf.py + +python: + install: + - method: pip + path: . + extra_requirements: + - docs diff --git a/LICENSE b/LICENSE index d417869..6f770bb 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2022 Raghav Kansal +Copyright (c) 2023 Raghav Kansal Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/README.md b/README.md index ded9b20..320eb12 100644 --- a/README.md +++ b/README.md @@ -1,18 +1,45 @@ # HEP Simulation Challenge +[![Actions Status][actions-badge]][actions-link] + + + + + + + + +[actions-badge]: https://github.com/jet-net/simulation-challenge/workflows/CI/badge.svg +[actions-link]: https://github.com/jet-net/simulation-challenge/actions +[conda-badge]: https://img.shields.io/conda/vn/conda-forge/cookiecutter-test +[conda-link]: https://github.com/conda-forge/cookiecutter-test-feedstock +[github-discussions-badge]: https://img.shields.io/static/v1?label=Discussions&message=Ask&color=blue&logo=github +[github-discussions-link]: https://github.com/jet-net/simulation-challenge/discussions +[pypi-link]: https://pypi.org/project/cookiecutter-test/ +[pypi-platforms]: https://img.shields.io/pypi/pyversions/cookiecutter-test +[pypi-version]: https://img.shields.io/pypi/v/cookiecutter-test +[rtd-badge]: https://readthedocs.org/projects/cookiecutter-test/badge/?version=latest +[rtd-link]: https://cookiecutter-test.readthedocs.io/en/latest/?badge=latest + + + [![Codestyle](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) [![pre-commit.ci status](https://results.pre-commit.ci/badge/github/jet-net/simulation-challenge/main.svg)](https://results.pre-commit.ci/latest/github/jet-net/simulation-challenge/main) ## Planning - - [ ] Continuous Integration - - [ ] Make PR template - - [ ] Calculate metrics - - [ ] Make plots - - [ ] Measure timing? Need a way to: - - [ ] Provide and run an environemnt - - [ ] Run the model (GPU + CPU?) - - [ ] Push to webpage - - [ ] Website for results - - [ ] Initial paper / advertisement - - [ ] Journal paper +- [ ] Continuous Integration + - [x] Make PR template + - [x] Calculate metrics + - [ ] Make plots + - [ ] Measure timing? Need a way to: + - [ ] Provide and run an environment + - [ ] Run the model (GPU + CPU?) + - [ ] Push to webpage +- [ ] Website for results +- [ ] Initial paper / advertisement +- [ ] Journal paper diff --git a/docs/conf.py b/docs/conf.py new file mode 100644 index 0000000..4bb8328 --- /dev/null +++ b/docs/conf.py @@ -0,0 +1,45 @@ +from __future__ import annotations + +import importlib.metadata + +project = "cookiecutter-test" +copyright = "2023, Raghav Kansal" +author = "Raghav Kansal" +version = release = importlib.metadata.version("cookiecutter_test") + +extensions = [ + "myst_parser", + "sphinx.ext.autodoc", + "sphinx.ext.intersphinx", + "sphinx.ext.mathjax", + "sphinx.ext.napoleon", + "sphinx_autodoc_typehints", + "sphinx_copybutton", +] + +source_suffix = [".rst", ".md"] +exclude_patterns = [ + "_build", + "**.ipynb_checkpoints", + "Thumbs.db", + ".DS_Store", + ".env", + ".venv", +] + +html_theme = "furo" + +myst_enable_extensions = [ + "colon_fence", +] + +intersphinx_mapping = { + "python": ("https://docs.python.org/3", None), +} + +nitpick_ignore = [ + ("py:class", "_io.StringIO"), + ("py:class", "_io.BytesIO"), +] + +always_document_param_types = True diff --git a/docs/index.md b/docs/index.md new file mode 100644 index 0000000..6dae1f1 --- /dev/null +++ b/docs/index.md @@ -0,0 +1,17 @@ +# cookiecutter-test + +```{toctree} +:maxdepth: 2 +:hidden: + +``` + +```{include} ../README.md +:start-after: +``` + +## Indices and tables + +- {ref}`genindex` +- {ref}`modindex` +- {ref}`search` diff --git a/noxfile.py b/noxfile.py new file mode 100644 index 0000000..c348843 --- /dev/null +++ b/noxfile.py @@ -0,0 +1,111 @@ +from __future__ import annotations + +import argparse +import shutil +from pathlib import Path + +import nox + +DIR = Path(__file__).parent.resolve() + +nox.options.sessions = ["lint", "pylint", "tests"] + + +@nox.session +def lint(session: nox.Session) -> None: + """ + Run the linter. + """ + session.install("pre-commit") + session.run("pre-commit", "run", "--all-files", "--show-diff-on-failure", *session.posargs) + + +@nox.session +def pylint(session: nox.Session) -> None: + """ + Run PyLint. + """ + # This needs to be installed into the package environment, and is slower + # than a pre-commit check + session.install(".", "pylint") + session.run("pylint", "cookiecutter_test", *session.posargs) + + +@nox.session +def tests(session: nox.Session) -> None: + """ + Run the unit and regular tests. + """ + session.install(".[test]") + session.run("pytest", *session.posargs) + + +@nox.session(reuse_venv=True) +def docs(session: nox.Session) -> None: + """ + Build the docs. Pass "--serve" to serve. Pass "-b linkcheck" to check links. + """ + + parser = argparse.ArgumentParser() + parser.add_argument("--serve", action="store_true", help="Serve after building") + parser.add_argument("-b", dest="builder", default="html", help="Build target (default: html)") + args, posargs = parser.parse_known_args(session.posargs) + + if args.builder != "html" and args.serve: + session.error("Must not specify non-HTML builder with --serve") + + extra_installs = ["sphinx-autobuild"] if args.serve else [] + + session.install("-e.[docs]", *extra_installs) + session.chdir("docs") + + if args.builder == "linkcheck": + session.run("sphinx-build", "-b", "linkcheck", ".", "_build/linkcheck", *posargs) + return + + shared_args = ( + "-n", # nitpicky mode + "-T", # full tracebacks + f"-b={args.builder}", + ".", + f"_build/{args.builder}", + *posargs, + ) + + if args.serve: + session.run("sphinx-autobuild", *shared_args) + else: + session.run("sphinx-build", "--keep-going", *shared_args) + + +@nox.session +def build_api_docs(session: nox.Session) -> None: + """ + Build (regenerate) API docs. + """ + + session.install("sphinx") + session.chdir("docs") + session.run( + "sphinx-apidoc", + "-o", + "api/", + "--module-first", + "--no-toc", + "--force", + "../src/cookiecutter_test", + ) + + +@nox.session +def build(session: nox.Session) -> None: + """ + Build an SDist and wheel. + """ + + build_path = DIR.joinpath("build") + if build_path.exists(): + shutil.rmtree(build_path) + + session.install("build") + session.run("python", "-m", "build") diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..73ef9be --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,163 @@ +[build-system] +requires = ["hatchling", "hatch-vcs"] +build-backend = "hatchling.build" + + +[project] +name = "simulation-challenge" +authors = [ + { name = "Raghav Kansal", email = "rkansal@cern.ch" }, +] +description = "Jet Simulation Challenge" +readme = "README.md" +requires-python = ">=3.8" +classifiers = [ + "Development Status :: 1 - Planning", + "Intended Audience :: Science/Research", + "Intended Audience :: Developers", + "License :: OSI Approved :: MIT License", + "Operating System :: OS Independent", + "Programming Language :: Python", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3 :: Only", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Topic :: Scientific/Engineering", + "Typing :: Typed", +] +dynamic = ["version"] +dependencies = [ + "jetnet >= 0.2.4", + "pyyaml", +] + +[project.optional-dependencies] +test = [ + "pytest >=6", + "pytest-cov >=3", +] +dev = [ + "pytest >=6", + "pytest-cov >=3", +] +docs = [ + "sphinx>=7.0", + "myst_parser>=0.13", + "sphinx_copybutton", + "sphinx_autodoc_typehints", + "furo>=2023.08.17", +] + +[project.urls] +Homepage = "https://github.com/jet-net/simulation-challenge" +"Bug Tracker" = "https://github.com/jet-net/simulation-challenge/issues" +Discussions = "https://github.com/jet-net/simulation-challenge/discussions" +Changelog = "https://github.com/jet-net/simulation-challenge/releases" + + +[tool.hatch] +version.source = "vcs" +build.hooks.vcs.version-file = "src/simulation_challenge/_version.py" +envs.default.dependencies = [ + "pytest", + "pytest-cov", +] + + +[tool.pytest.ini_options] +minversion = "6.0" +addopts = ["-ra", "--showlocals", "--strict-markers", "--strict-config"] +xfail_strict = true +filterwarnings = [ + "error", +] +log_cli_level = "INFO" +testpaths = [ + "tests", +] + +[tool.black] +line-length = 100 + +[tool.coverage] +run.source = ["simulation_challenge"] +port.exclude_lines = [ + 'pragma: no cover', + '\.\.\.', + 'if typing.TYPE_CHECKING:', +] + +[tool.mypy] +files = ["src", "tests"] +python_version = "3.8" +warn_unused_configs = true +strict = true +show_error_codes = true +enable_error_code = ["ignore-without-code", "redundant-expr", "truthy-bool"] +warn_unreachable = true +disallow_untyped_defs = false +disallow_incomplete_defs = false + +[[tool.mypy.overrides]] +module = "simulation_challenge.*" +disallow_untyped_defs = true +disallow_incomplete_defs = true + + +[tool.ruff] +src = ["src"] + +[tool.ruff.lint] +extend-select = [ + "B", # flake8-bugbear + "I", # isort + "ARG", # flake8-unused-arguments + "C4", # flake8-comprehensions + "EM", # flake8-errmsg + "ICN", # flake8-import-conventions + "G", # flake8-logging-format + "PGH", # pygrep-hooks + "PIE", # flake8-pie + "PL", # pylint + "PT", # flake8-pytest-style + "PTH", # flake8-use-pathlib + "RET", # flake8-return + "RUF", # Ruff-specific + "SIM", # flake8-simplify + "T20", # flake8-print + "UP", # pyupgrade + "YTT", # flake8-2020 + "EXE", # flake8-executable + "NPY", # NumPy specific rules + "PD", # pandas-vet +] +ignore = [ + "PLR", # Design related pylint codes + "T201", + "EM101", + "EM102", +] +isort.required-imports = ["from __future__ import annotations"] +# Uncomment if using a _compat.typing backport +# typing-modules = ["simulation_challenge._compat.typing"] + +[tool.ruff.lint.per-file-ignores] +"tests/**" = ["T20"] +"noxfile.py" = ["T20"] + + +[tool.pylint] +py-version = "3.8" +ignore-paths = [".*/_version.py"] +reports.output-format = "colorized" +similarities.ignore-imports = "yes" +messages_control.disable = [ + "design", + "fixme", + "line-too-long", + "missing-module-docstring", + "wrong-import-position", +] diff --git a/src/run.py b/src/run.py new file mode 100644 index 0000000..d17dd8b --- /dev/null +++ b/src/run.py @@ -0,0 +1,93 @@ +from __future__ import annotations + +import argparse +import os + +import simulation_challenge as sc +from simulation_challenge.data import Samples +from simulation_challenge.submission import Submission + + +def evaluate_submission(sub: Submission, real_datasets_dir: str): + """Evaluates the given submission for all samples provided. + + Args: + sub (Submission): submission. + real_datasets_dir (str): Path to directory containing real datasets. + """ + + print(f"Evaluating submission {sub.name}...") + + results = {} + + # go through each dataset and class in submission + for dataset, data_classes in sub.samples.items(): + results[dataset] = {} + for data_class, gen_samples in data_classes.items(): + # load real samples + real_samples: Samples = sc.data.get_real_samples(dataset, data_class, real_datasets_dir) + # download generated samples + gen_samples.download() + + print(f"Evaluating {dataset} {data_class}...") + results[dataset][data_class] = sc.evaluate.evaluate(real_samples, gen_samples) + + print(results) + + # TODO: save results to file + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + + parser.add_argument( + "--real-datasets-dir", + default="./datasets/", + help="path to directory containing real datasets", + type=str, + ) + + parser.add_argument( + "--gen-datasets-dir", + default="./datasets/", + help="path to directory containing generated samples", + type=str, + ) + + parser.add_argument( + "--submission-dir", + default="./submissions/", + help="path to directory containing submissions", + type=str, + ) + + parser.add_argument( + "--submission", + help="submission name", + type=str, + ) + + parser.add_argument("--evaluate", action=argparse.BooleanOptionalAction, default=False) + + args = parser.parse_args() + + subs = [] + # load all submissions in submissions directory + if args.submission == "all": + for submission in os.listdir(args.submission_dir): + subs.append( + sc.submission.load_submission( + args.submission_dir, submission, args.gen_datasets_dir + ) + ) + # load specific submission + else: + subs.append( + sc.submission.load_submission( + args.submission_dir, args.submission, args.gen_datasets_dir + ) + ) + + if args.evaluate: + for sub in subs: + evaluate_submission(sub, args.real_datasets_dir) diff --git a/src/simulation_challenge/__init__.py b/src/simulation_challenge/__init__.py new file mode 100644 index 0000000..5fe3929 --- /dev/null +++ b/src/simulation_challenge/__init__.py @@ -0,0 +1,13 @@ +""" +Copyright (c) 2023 Raghav Kansal. All rights reserved. + +simulation-challenge: Jet Simulation Challenge +""" + + +from __future__ import annotations + +from . import data, evaluate, submission +from ._version import version as __version__ + +__all__ = ["__version__", "data", "evaluate", "submission"] diff --git a/src/simulation_challenge/_version.pyi b/src/simulation_challenge/_version.pyi new file mode 100644 index 0000000..91744f9 --- /dev/null +++ b/src/simulation_challenge/_version.pyi @@ -0,0 +1,4 @@ +from __future__ import annotations + +version: str +version_tuple: tuple[int, int, int] | tuple[int, int, int, str, str] diff --git a/src/simulation_challenge/data.py b/src/simulation_challenge/data.py new file mode 100644 index 0000000..6b8b5bc --- /dev/null +++ b/src/simulation_challenge/data.py @@ -0,0 +1,146 @@ +""" +Methods for accessing real and generated data. +""" +from __future__ import annotations + +from dataclasses import dataclass +from pathlib import Path + +import h5py +import jetnet +import numpy as np + +_valid_datasets = {"jetnet": ["g30"]} # TODO: more datasets + + +@dataclass +class Samples: + """ + Class for jet samples. Downloads, loads, and stores the samples. Also calculates and stores EFPs. + + Args: + dataset (str): dataset of the samples, e.g. "jetnet". + data_class (str): jet type of the sample, e.g. "g30". + download_url (str): URL to download samples. + download_path (str): Path to downloaded samples. + downloaded (bool): Whether the samples have been downloaded. + md5 (optional, str): MD5 hash of the downloaded samples, to verify integrity of download. + samples (np.ndarray): Array of shape ``(num_samples, num_particles, num_features)`` containing the samples. + efps_path (str): Path to EFPs numpy file. + efps (np.ndarray): Array of shape ``(num_samples, num_efps)`` containing the calculated EFPs. + """ + + dataset: str = None + data_class: str = None + + download_url: str = None + download_path: str = None + downloaded: bool = False + md5: str = None + samples: np.ndarray = None + + efps_path: str = None + efps: np.ndarray = None + + def __post_init__(self): + """Loads EFPs if they exist.""" + if Path(self.download_path).exists(): + self.downloaded = True + + self.efps_path = Path(self.download_path).parent / f"{self.data_class}_efps.npy" + + if Path(self.efps_path).exists(): + self.efps = np.load(self.efps_path) + + def load_samples(self, num_samples: int = 50_000): + """ + Verifies integrity of downloaded file (if md5 hash provided) + and loads the samples from the HDF5 file. + """ + if self.downloaded is False: + raise RuntimeError("Samples need to be downloaded before loading.") + + if self.md5 is not None: + match_md5, fmd5 = jetnet.datasets.utils._check_md5(self.download_path, self.md5) + if not match_md5: + raise RuntimeError("Downloaded file MD5 does not match expected MD5.") + + with h5py.File(self.download_path, "r") as f: + self.samples = np.array(f["particle_features"])[:num_samples] + + def download(self, overwrite: bool = False): + """Downloads the samples if they do not exist and loads them.""" + if self.downloaded: + print(f"File exists: {self.download_path}.") + if not overwrite: + print("Skipping download.") + self.load_samples() + return + + print(f"Downloading to {self.download_path}.") + + Path.mkdir(Path(self.download_path).parent, exist_ok=True, parents=True) + jetnet.datasets.utils.download_progress_bar(self.download_url, self.download_path) + self.downloaded = True + + self.load_samples() + + def get_efps(self): + """Calculates the EFPs if not already cached and returns them.""" + if self.samples is None: + raise RuntimeError("Samples need to be loaded before calculating EFPs.") + + if self.efps is None: + print("Calculating EFPs...") + self.efps = jetnet.utils.efps(self.samples) + np.save(self.efps_path, self.efps) + + return self.efps + + +def get_real_samples( + dataset: str, data_class: str, data_dir: str, num_samples: int = 50_000 +) -> np.ndarray: + """Downloads, if necessaryk and loads the real data for the given dataset and data class. + + Args: + dataset (str): Choices are ["jetnet"]. + data_class (str): Choices are ["g30"] for jetnet. + data_dir (str): Directory in which data is located, or in which to download the dataset. + num_samples (int, optional): Number of samples to return. Defaults to 50,000. + + Returns: + np.ndarray: Array of shape ``(num_samples, num_particles, num_features)`` containing the real data. + """ + if dataset not in _valid_datasets: + raise ValueError(f"Invalid dataset: {dataset}") + + if data_class not in _valid_datasets[dataset]: + raise ValueError(f"Invalid data class {data_class} for dataset {dataset}") + + data_args = { + "data_dir": f"{data_dir}/{dataset}", + "jet_features": None, + "download": True, + } + + if dataset == "jetnet": + # dataset-specific args + data_args |= {"particle_features": ["etarel", "phirel", "ptrel"]} + + if data_class.endswith("30"): + data_args |= {"num_particles": 30, "jet_type": data_class[:-2]} + elif data_class.endswith("150"): + data_args |= {"num_particles": 150, "jet_type": data_class[:-3]} + + pf, _ = jetnet.datasets.JetNet.getData(**data_args) + pf = pf[-num_samples:] + real_samples = Samples( + dataset=dataset, + # remove "30" to match jetnet file naming convention + data_class=data_class if data_class.endswith("150") else data_class[:-2], + samples=pf, + download_path=f"{data_dir}/{dataset}/{data_class}.hdf5", + ) + + return real_samples diff --git a/src/simulation_challenge/evaluate.py b/src/simulation_challenge/evaluate.py new file mode 100644 index 0000000..da3638a --- /dev/null +++ b/src/simulation_challenge/evaluate.py @@ -0,0 +1,49 @@ +from __future__ import annotations + +from jetnet import evaluation + +from .data import Samples + + +def evaluate( + real_samples: Samples, + gen_samples: Samples, + num_w1_eval_samples: int = 50_000, + num_w1_batches: int = 5, +) -> dict: + """Evaluates the given generated samples against the real samples using the JetNet library. + + Args: + real_samples (np.ndarray): Array of shape ``(num_samples, num_particles, num_features)`` containing the real data. + gen_samples (np.ndarray): Array of shape ``(num_samples, num_particles, num_features)`` containing the generated data. + + Returns: + dict: Dictionary of metrics. + """ + + scores = {} + + # W1 distance between mass distributions + scores["w1m"] = evaluation.w1m( + real_samples.samples, + gen_samples.samples, + num_eval_samples=num_w1_eval_samples, + num_batches=num_w1_batches, + return_std=True, + ) + + # W1 distance between particle feature distributions + scores["w1p"] = evaluation.w1p( + real_samples.samples, + gen_samples.samples, + exclude_zeros=True, + num_eval_samples=num_w1_eval_samples, + num_batches=num_w1_batches, + return_std=True, + ) + + # FPD and KPD using EFPs + scores["fpd"] = evaluation.fpd(real_samples.get_efps(), gen_samples.get_efps()) + scores["kpd"] = evaluation.kpd(real_samples.get_efps(), gen_samples.get_efps()) + + return scores diff --git a/src/simulation_challenge/py.typed b/src/simulation_challenge/py.typed new file mode 100644 index 0000000..e69de29 diff --git a/src/simulation_challenge/submission.py b/src/simulation_challenge/submission.py new file mode 100644 index 0000000..dad1e8c --- /dev/null +++ b/src/simulation_challenge/submission.py @@ -0,0 +1,62 @@ +""" +Methods to handle submissions and their metadata. +""" +from __future__ import annotations + +from dataclasses import dataclass +from pathlib import Path + +import yaml + +from .data import Samples + + +@dataclass +class Submission: + """Class storing information about submissions.""" + + name: str = None + authors: list[str] = None + affiliations: list[str] = None + gen_samples: dict = None + gen_datasets_dir: str = None + container_path: str = None + inference_command: str = None + model_repository: str = None + + def __post_init__(self): + """Convert gen_samples dict to Samples objects and metadata for downloading.""" + self.samples = {} + for dataset in self.gen_samples: + # TODO: add checks for valid datasets? + self.samples[dataset] = {} + for data_class in self.gen_samples[dataset]: + self.samples[dataset][data_class] = Samples( + dataset=dataset, + data_class=data_class, + download_url=self.gen_samples[dataset][data_class]["url"], + md5=self.gen_samples[dataset][data_class].get("md5", None), + download_path=f"{self.gen_datasets_dir}/{self.name}/{dataset}/{data_class}.hdf5", + ) + + +def load_submission(submission_dir: str, submission_name: str, gen_datasets_dir: str) -> dict: + """Loads the metadata for the given submission. + + Args: + submission_dir (str): Directory containing the submission. + submission_name (str): Name of the submission. + + Returns: + Tuple[np.ndarray, dict]: Tuple containing the generated samples and the metadata. + """ + submission_path = Path(submission_dir) / submission_name + metadata_path = submission_path / "metadata.yml" + + if not Path(metadata_path).exists(): + raise FileNotFoundError(f"Metadata file not found: {metadata_path}") + + with metadata_path.open() as f: + metadata = yaml.safe_load(f) + + return Submission(**metadata, gen_datasets_dir=gen_datasets_dir) diff --git a/submissions/template/metadata.yml b/submissions/template/metadata.yml new file mode 100644 index 0000000..ebcd28a --- /dev/null +++ b/submissions/template/metadata.yml @@ -0,0 +1,17 @@ +name: Template +authors: Author 1, Author 2 +affiliations: Affiliation 1, Affiliation 2 + +gen_samples: + # paths to samples for the different datasets and classes + jetnet: + g30: + url: https://zenodo.org/api/records/6975118/files/g.hdf5/content + md5: 4f537329da897a22a59762c9fd8b74da + +# path to container with trained model +container_path: "" +# command to run inference of the model, will need some argument to specify the dataset +inference_command: "" + +model_repository: "" diff --git a/tests/test_package.py b/tests/test_package.py new file mode 100644 index 0000000..3ccd132 --- /dev/null +++ b/tests/test_package.py @@ -0,0 +1,9 @@ +from __future__ import annotations + +import importlib.metadata + +import simulation_challenge as m + + +def test_version(): + assert importlib.metadata.version("simulation_challenge") == m.__version__