Skip to content

Commit

Permalink
Start working on mutation issues in validate.
Browse files Browse the repository at this point in the history
We change the validation logic and separate the normalisation from
the validation step.

We make sure that if a notebook is normalized, it emits a warning.
In the future we will turn the warning in to an Error.

We add test for the current and an xfail test for the future behavior
  • Loading branch information
Carreau committed Nov 23, 2021
1 parent f101cd2 commit 43ea02c
Show file tree
Hide file tree
Showing 4 changed files with 135 additions and 13 deletions.
2 changes: 2 additions & 0 deletions nbformat/json_compat.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,8 @@ def _validator_for_name(validator_name):
for (name, module, validator_cls) in _VALIDATOR_MAP:
if module and validator_name == name:
return validator_cls
# we always return something.
raise ValueError(f"Missing validator for {validator_name!r}")


def get_current_validator():
Expand Down
53 changes: 51 additions & 2 deletions nbformat/tests/test_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@
import os
import re

from nbformat.warnings import MissingIDFieldWarning
from copy import deepcopy

from .base import TestsBase
from jsonschema import ValidationError
from nbformat import read
Expand All @@ -14,6 +17,8 @@

import pytest

nb4 = ("test4.ipynb", "test4.5.ipynb")


# Fixtures
@pytest.fixture(autouse=True)
Expand All @@ -29,6 +34,49 @@ def set_validator(validator_name):
os.environ["NBFORMAT_VALIDATOR"] = validator_name


@pytest.mark.parametrize("validator_name", VALIDATORS)
def test_should_warn(validator_name):
"""Test that a v4 notebook witout id emit a warning"""
set_validator(validator_name)
with TestsBase.fopen(u"test4.5.ipynb", u"r") as f:
nb = read(f, as_version=4)

del nb.cells[3]["id"]
assert nb.cells[3].get("id") is None
assert nb.cells[3]["cell_type"] == "code"

nb_copy = deepcopy(nb)

with pytest.warns(MissingIDFieldWarning):
validate(nb)
assert isvalid(nb) == True


@pytest.mark.xfail(reason="In the future we want to stop warning, and raise an error")
@pytest.mark.parametrize("validator_name", VALIDATORS)
def test_should_not_mutate(validator_name):
"""Test that a v4 notebook without id raise an error and does/not mutate
Probably should be 2 test. To enable in the future.
"""
set_validator(validator_name)
with TestsBase.fopen(u"test4.5.ipynb", u"r") as f:
nb = read(f, as_version=4)

del nb.cells[3]["id"]
assert nb.cells[3].get("id") is None
assert nb.cells[3]["cell_type"] == "code"

nb_deep_copy = deepcopy(nb)

with (pytest.raises(MissingIDFieldWarning), pytest.warns(None)):
validate(nb)

assert nb == nb_deep_copy

assert isvalid(nb) == True


@pytest.mark.parametrize("validator_name", VALIDATORS)
def test_nb2(validator_name):
"""Test that a v2 notebook converted to current passes validation"""
Expand All @@ -50,10 +98,11 @@ def test_nb3(validator_name):


@pytest.mark.parametrize("validator_name", VALIDATORS)
def test_nb4(validator_name):
@pytest.mark.parametrize("nbfile", nb4)
def test_nb4(validator_name, nbfile):
"""Test that a v4 notebook passes validation"""
set_validator(validator_name)
with TestsBase.fopen(u'test4.ipynb', u'r') as f:
with TestsBase.fopen(nbfile, u"r") as f:
nb = read(f, as_version=4)
validate(nb)
assert isvalid(nb) == True
Expand Down
75 changes: 64 additions & 11 deletions nbformat/validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,13 @@
import pprint
import sys
import warnings
from copy import deepcopy

from ipython_genutils.importstring import import_item
from .json_compat import get_current_validator, ValidationError
from .reader import get_version, reads
from .corpus.words import generate_corpus_id
from .warnings import MissingIDFieldWarning

validators = {}

Expand Down Expand Up @@ -229,15 +231,71 @@ def better_validation_error(error, version, version_minor):
return NotebookValidationError(error, ref)


def validate(nbdict=None, ref=None, version=None, version_minor=None,
relax_add_props=False, nbjson=None):
def normalize(nbdict, version, version_minor):
"""
EXPERIMENTAL
normalise a notebook prior to validation.
This tries to implement a couple of normalisation steps to standardise
notebooks and make validation easier.
You should in general not rely on this function and make sure the notebooks
that reach nbformat are already in a normal form.
Parameters
----------
nbdict : dict
notebook document
version : int
version_minor : int
Returns
-------
changes : int
number of changes in the notebooks
notebook : dict
deep-copy of the original object with relevant changes.
"""
nbdict = deepcopy(nbdict)
return _normalize(deepcopy(nbdict))

def _normalize(nbdict, version, version_minor):
changes = 0

if version >= 4 and version_minor >= 5:
# if we support cell ids ensure default ids are provided
for cell in nbdict["cells"]:
if "id" not in cell:
changes +=1
warnings.warn(
"Code cell is missing an id field, this will become"
" a hard error in future nbformat versions. You may want"
" to use `normalize()` on your notebooks before validations"
" (available since nbformat 5.1.4). Previous of nbformat"
" are also mutating their arguments, and will stop to do so"
" in the future.",
MissingIDFieldWarning,
stacklevel=3,
)
# Generate cell ids if any are missing
cell['id'] = generate_corpus_id()
return changes, nbdict

def validate(nbdict=None, ref:str=None, version=None, version_minor=None,
relax_add_props=False, nbjson=None) -> None:
"""Checks whether the given notebook dict-like object
conforms to the relevant notebook format schema.
Parameters
----------
ref : optional, str
reference to the subset of the schema we want to validate against.
for example ``"markdown_cell"``, `"code_cell"` ....
Raises ValidationError if not valid.
"""

assert isinstance(ref, str) or ref is None
# backwards compatibility for nbjson argument
if nbdict is not None:
pass
Expand All @@ -257,13 +315,8 @@ def validate(nbdict=None, ref=None, version=None, version_minor=None,
# if ref is specified, and we don't have a version number, assume we're validating against 1.0
if version is None:
version, version_minor = 1, 0

if ref is None and version >= 4 and version_minor >= 5:
# if we support cell ids ensure default ids are provided
for cell in nbdict['cells']:
if 'id' not in cell:
# Generate cell ids if any are missing
cell['id'] = generate_corpus_id()
if ref is None:
_normalize(nbdict, version, version_minor)

for error in iter_validate(nbdict, ref=ref, version=version,
version_minor=version_minor,
Expand Down
18 changes: 18 additions & 0 deletions nbformat/warnings.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
"""
Warnings that can be emitted by nbformat.
"""


class MissingIDFieldWarning(FutureWarning):
"""
This warning is emitted in the validation step of nbformat as we used to
mutate the structure which is cause signature issues.
This will be turned into an error at later point.
We subclass FutureWarning as we will change the behavior in the future.
"""

pass

0 comments on commit 43ea02c

Please sign in to comment.