Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 6 additions & 46 deletions dandischema/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
ALLOWED_VALIDATION_SCHEMAS,
DANDI_SCHEMA_VERSION,
)
from .exceptions import JsonschemaValidationError, PydanticValidationError
from .exceptions import PydanticValidationError
from . import models
from .utils import (
TransitionalGenerateJsonSchema,
Expand Down Expand Up @@ -151,47 +151,16 @@ def publish_model_schemata(releasedir: Union[str, Path]) -> Path:
return vdir


def _validate_obj_json(
instance: Any, validator: JsonschemaValidator, *, missing_ok: bool = False
) -> None:
"""
Validate a data instance using a jsonschema validator with an option to filter out
errors related to missing required properties

:param instance: The data instance to validate
:param validator: The JSON schema validator to use
:param missing_ok: Indicates whether to filter out errors related to missing
required properties
:raises JsonschemaValidationError: If the metadata instance is invalid, and there
are errors detected in the validation, optionally discounting errors
related to missing required properties. An instance of this exception containing
a list of `jsonschema.exceptions.ValidationError` instances representing all the
(remaining) errors detected in the validation
"""
try:
validate_json(instance, validator)
except JsonschemaValidationError as e:
if missing_ok:
remaining_errs = [
err for err in e.errors if "is a required property" not in err.message
]
# Raise an exception only if there are errors left after filtering
if remaining_errs:
raise JsonschemaValidationError(remaining_errs) from e
else:
raise e


def _validate_dandiset_json(data: dict, schema_dir: Union[str, Path]) -> None:
with Path(schema_dir, "dandiset.json").open() as fp:
schema = json.load(fp)
_validate_obj_json(data, dandi_jsonschema_validator(schema))
validate_json(data, dandi_jsonschema_validator(schema))


def _validate_asset_json(data: dict, schema_dir: Union[str, Path]) -> None:
with Path(schema_dir, "asset.json").open() as fp:
schema = json.load(fp)
_validate_obj_json(data, dandi_jsonschema_validator(schema))
validate_json(data, dandi_jsonschema_validator(schema))


@cache
Expand Down Expand Up @@ -273,7 +242,6 @@ def validate(
obj: dict,
schema_version: Optional[str] = None,
schema_key: Optional[str] = None,
missing_ok: bool = False,
json_validation: bool = False,
) -> None:
"""Validate object using pydantic
Expand All @@ -287,9 +255,6 @@ def validate(
schema_key: str, optional
Name of the schema key to be used, if not specified, `schemaKey` of the
object will be consulted
missing_ok: bool, optional
This flag allows checking if all fields have appropriate values but ignores
missing fields. A `ValueError` is raised with the list of all errors.
json_validation: bool, optional
If set to True, `obj` is first validated against the corresponding jsonschema.

Expand Down Expand Up @@ -324,17 +289,12 @@ def validate(
"using json schema for older versions"
)
jvalidator = _get_jsonschema_validator(schema_version, schema_key)
_validate_obj_json(obj, jvalidator, missing_ok=missing_ok)
validate_json(obj, jvalidator)
klass = getattr(models, schema_key)
try:
klass(**obj)
except pydantic.ValidationError as exc:
messages = []
for el in exc.errors():
if not missing_ok or el["type"] != "missing":
messages.append(el)
if messages:
raise PydanticValidationError(messages) # type: ignore[arg-type]
raise PydanticValidationError(exc.errors()) # type: ignore[arg-type]


def migrate(
Expand Down Expand Up @@ -432,7 +392,7 @@ def migrate(
# Optionally validate the instance against the DANDI schema it specifies
# before migration
if not skip_validation:
_validate_obj_json(obj, _get_jsonschema_validator(obj_ver, "Dandiset"))
validate_json(obj, _get_jsonschema_validator(obj_ver, "Dandiset"))

obj_migrated = deepcopy(obj)

Expand Down
201 changes: 1 addition & 200 deletions dandischema/tests/test_metadata.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from contextlib import nullcontext
from hashlib import md5, sha256
import json
from pathlib import Path
Expand All @@ -10,7 +9,7 @@
import pytest

from dandischema.models import Asset, Dandiset, PublishedAsset, PublishedDandiset
from dandischema.utils import TransitionalGenerateJsonSchema, jsonschema_validator
from dandischema.utils import TransitionalGenerateJsonSchema

from .utils import (
DANDISET_METADATA_DIR,
Expand All @@ -28,7 +27,6 @@
_get_jsonschema_validator_local,
_validate_asset_json,
_validate_dandiset_json,
_validate_obj_json,
aggregate_assets_summary,
migrate,
publish_model_schemata,
Expand Down Expand Up @@ -319,66 +317,6 @@ def test_requirements(
assert set([el["loc"][0] for el in exc.value.errors]) == missingfields


@pytest.mark.parametrize(
"obj, schema_key, errors, num_errors",
[
(
{"schemaKey": "Dandiset", "schemaVersion": "0.4.4"},
None,
{"Field required"},
10,
),
(
{
"schemaKey": "Dandiset",
"identifier": f"{INSTANCE_NAME}:000000",
"schemaVersion": "0.4.4",
},
None,
{"Field required"},
9,
),
],
)
def test_missing_ok(
obj: Dict[str, Any], schema_key: Optional[str], errors: Set[str], num_errors: int
) -> None:
validate(
obj, schema_key=schema_key, schema_version=DANDI_SCHEMA_VERSION, missing_ok=True
)
with pytest.raises(PydanticValidationError) as exc:
validate(obj, schema_key=schema_key, schema_version=DANDI_SCHEMA_VERSION)
exc_errors = [el["msg"] for el in exc.value.errors]
assert len(exc_errors) == num_errors
assert set(exc_errors) == errors


@skipif_no_network
def test_missing_ok_error() -> None:
if INSTANCE_NAME == "DANDI":
# Skip for when the instance being tested is not `DANDI` since the JSON schema
# version at `0.4.4` is hardcoded to only for an instance named `DANDI`
with pytest.raises(JsonschemaValidationError):
validate(
{
"schemaKey": "Dandiset",
"identifier": "000000",
"schemaVersion": "0.4.4",
},
json_validation=True,
missing_ok=True,
)
with pytest.raises(PydanticValidationError):
validate(
{
"schemaKey": "Dandiset",
"identifier": "000000",
"schemaVersion": "0.4.4",
},
missing_ok=True,
)


@pytest.mark.parametrize(
"obj, target, msg",
[
Expand Down Expand Up @@ -758,143 +696,6 @@ def test_aggregation_bids() -> None:
) # only a single entry so we do not duplicate them


class TestValidateObjJson:
"""
Tests for `_validate_obj_json()`
"""

@pytest.fixture
def dummy_jvalidator(self) -> JsonschemaValidator:
"""Returns a dummy jsonschema validator initialized with a dummy schema."""
return jsonschema_validator(
{
"type": "object",
"properties": {"name": {"type": "string"}},
"required": ["name"],
},
check_format=True,
)

@pytest.fixture
def dummy_instance(self) -> dict:
"""Returns a dummy instance"""
return {"name": "Example"}

def test_valid_obj_no_errors(
self,
monkeypatch: pytest.MonkeyPatch,
dummy_jvalidator: JsonschemaValidator,
dummy_instance: dict,
) -> None:
"""
Test that `_validate_obj_json` does not raise when `validate_json` has no errors
"""

def mock_validate_json(_instance: dict, _schema: dict) -> None:
"""Simulate successful validation with no exceptions."""
return # No error raised

# Patch the validate_json function used inside `_validate_obj_json`
from dandischema import metadata

monkeypatch.setattr(metadata, "validate_json", mock_validate_json)

# `_validate_obj_json` should succeed without raising an exception
_validate_obj_json(dummy_instance, dummy_jvalidator)

def test_raises_error_without_missing_ok(
self,
monkeypatch: pytest.MonkeyPatch,
dummy_jvalidator: JsonschemaValidator,
dummy_instance: dict,
) -> None:
"""
Test that `_validate_obj_json` forwards JsonschemaValidationError
when `missing_ok=False`.
"""

def mock_validate_json(_instance: dict, _schema: dict) -> None:
"""Simulate validation error."""
# Create a mock error that says a field is invalid
raise JsonschemaValidationError(
errors=[MagicMock(message="`name` is a required property")]
)

from dandischema import metadata

monkeypatch.setattr(metadata, "validate_json", mock_validate_json)

# Since `missing_ok=False`, any error should be re-raised.
with pytest.raises(JsonschemaValidationError) as excinfo:
_validate_obj_json(dummy_instance, dummy_jvalidator, missing_ok=False)
assert "`name` is a required property" == excinfo.value.errors[0].message

@pytest.mark.parametrize(
("validation_errs", "expect_raises", "expected_remaining_errs_count"),
[
pytest.param(
[
MagicMock(message="`name` is a required property"),
MagicMock(message="`title` is a required property ..."),
],
False,
None,
id="no_remaining_errors",
),
pytest.param(
[
MagicMock(message="`name` is a required property"),
MagicMock(message="Some other validation error"),
],
True,
1,
id="one_remaining_error",
),
],
)
def test_raises_only_nonmissing_errors_with_missing_ok(
self,
monkeypatch: pytest.MonkeyPatch,
dummy_jvalidator: JsonschemaValidator,
dummy_instance: dict,
validation_errs: list[MagicMock],
expect_raises: bool,
expected_remaining_errs_count: Optional[int],
) -> None:
"""
Test that `_validate_obj_json` filters out 'is a required property' errors
when `missing_ok=True`.
"""

def mock_validate_json(_instance: dict, _schema: dict) -> None:
"""
Simulate multiple validation errors, including missing required property.
"""
raise JsonschemaValidationError(
errors=validation_errs # type: ignore[arg-type]
)

from dandischema import metadata

monkeypatch.setattr(metadata, "validate_json", mock_validate_json)

# If expect_raises is True, we use pytest.raises(ValidationError)
# Otherwise, we enter a no-op context
ctx = (
pytest.raises(JsonschemaValidationError) if expect_raises else nullcontext()
)

with ctx as excinfo:
_validate_obj_json(dummy_instance, dummy_jvalidator, missing_ok=True)

if excinfo is not None:
filtered_errors = excinfo.value.errors

# We expect the "required property" error to be filtered out,
# so we should only see the "Some other validation error".
assert len(filtered_errors) == expected_remaining_errs_count


class TestGetJsonschemaValidator:
@pytest.mark.parametrize(
"schema_version, schema_key, expected_error_msg",
Expand Down
Loading