Skip to content

Commit 43ea02c

Browse files
committed
Start working on mutation issues in validate.
We change the validation logic and separate the normalisation from the validation step. We make sure that if a notebook is normalized, it emits a warning. In the future we will turn the warning in to an Error. We add test for the current and an xfail test for the future behavior
1 parent f101cd2 commit 43ea02c

File tree

4 files changed

+135
-13
lines changed

4 files changed

+135
-13
lines changed

nbformat/json_compat.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,8 @@ def _validator_for_name(validator_name):
7676
for (name, module, validator_cls) in _VALIDATOR_MAP:
7777
if module and validator_name == name:
7878
return validator_cls
79+
# we always return something.
80+
raise ValueError(f"Missing validator for {validator_name!r}")
7981

8082

8183
def get_current_validator():

nbformat/tests/test_validator.py

Lines changed: 51 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,9 @@
66
import os
77
import re
88

9+
from nbformat.warnings import MissingIDFieldWarning
10+
from copy import deepcopy
11+
912
from .base import TestsBase
1013
from jsonschema import ValidationError
1114
from nbformat import read
@@ -14,6 +17,8 @@
1417

1518
import pytest
1619

20+
nb4 = ("test4.ipynb", "test4.5.ipynb")
21+
1722

1823
# Fixtures
1924
@pytest.fixture(autouse=True)
@@ -29,6 +34,49 @@ def set_validator(validator_name):
2934
os.environ["NBFORMAT_VALIDATOR"] = validator_name
3035

3136

37+
@pytest.mark.parametrize("validator_name", VALIDATORS)
38+
def test_should_warn(validator_name):
39+
"""Test that a v4 notebook witout id emit a warning"""
40+
set_validator(validator_name)
41+
with TestsBase.fopen(u"test4.5.ipynb", u"r") as f:
42+
nb = read(f, as_version=4)
43+
44+
del nb.cells[3]["id"]
45+
assert nb.cells[3].get("id") is None
46+
assert nb.cells[3]["cell_type"] == "code"
47+
48+
nb_copy = deepcopy(nb)
49+
50+
with pytest.warns(MissingIDFieldWarning):
51+
validate(nb)
52+
assert isvalid(nb) == True
53+
54+
55+
@pytest.mark.xfail(reason="In the future we want to stop warning, and raise an error")
56+
@pytest.mark.parametrize("validator_name", VALIDATORS)
57+
def test_should_not_mutate(validator_name):
58+
"""Test that a v4 notebook without id raise an error and does/not mutate
59+
60+
Probably should be 2 test. To enable in the future.
61+
"""
62+
set_validator(validator_name)
63+
with TestsBase.fopen(u"test4.5.ipynb", u"r") as f:
64+
nb = read(f, as_version=4)
65+
66+
del nb.cells[3]["id"]
67+
assert nb.cells[3].get("id") is None
68+
assert nb.cells[3]["cell_type"] == "code"
69+
70+
nb_deep_copy = deepcopy(nb)
71+
72+
with (pytest.raises(MissingIDFieldWarning), pytest.warns(None)):
73+
validate(nb)
74+
75+
assert nb == nb_deep_copy
76+
77+
assert isvalid(nb) == True
78+
79+
3280
@pytest.mark.parametrize("validator_name", VALIDATORS)
3381
def test_nb2(validator_name):
3482
"""Test that a v2 notebook converted to current passes validation"""
@@ -50,10 +98,11 @@ def test_nb3(validator_name):
5098

5199

52100
@pytest.mark.parametrize("validator_name", VALIDATORS)
53-
def test_nb4(validator_name):
101+
@pytest.mark.parametrize("nbfile", nb4)
102+
def test_nb4(validator_name, nbfile):
54103
"""Test that a v4 notebook passes validation"""
55104
set_validator(validator_name)
56-
with TestsBase.fopen(u'test4.ipynb', u'r') as f:
105+
with TestsBase.fopen(nbfile, u"r") as f:
57106
nb = read(f, as_version=4)
58107
validate(nb)
59108
assert isvalid(nb) == True

nbformat/validator.py

Lines changed: 64 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,13 @@
77
import pprint
88
import sys
99
import warnings
10+
from copy import deepcopy
1011

1112
from ipython_genutils.importstring import import_item
1213
from .json_compat import get_current_validator, ValidationError
1314
from .reader import get_version, reads
1415
from .corpus.words import generate_corpus_id
16+
from .warnings import MissingIDFieldWarning
1517

1618
validators = {}
1719

@@ -229,15 +231,71 @@ def better_validation_error(error, version, version_minor):
229231
return NotebookValidationError(error, ref)
230232

231233

232-
def validate(nbdict=None, ref=None, version=None, version_minor=None,
233-
relax_add_props=False, nbjson=None):
234+
def normalize(nbdict, version, version_minor):
235+
"""
236+
EXPERIMENTAL
237+
238+
normalise a notebook prior to validation.
239+
240+
This tries to implement a couple of normalisation steps to standardise
241+
notebooks and make validation easier.
242+
243+
You should in general not rely on this function and make sure the notebooks
244+
that reach nbformat are already in a normal form.
245+
246+
Parameters
247+
----------
248+
nbdict : dict
249+
notebook document
250+
version : int
251+
version_minor : int
252+
253+
Returns
254+
-------
255+
changes : int
256+
number of changes in the notebooks
257+
notebook : dict
258+
deep-copy of the original object with relevant changes.
259+
260+
"""
261+
nbdict = deepcopy(nbdict)
262+
return _normalize(deepcopy(nbdict))
263+
264+
def _normalize(nbdict, version, version_minor):
265+
changes = 0
266+
267+
if version >= 4 and version_minor >= 5:
268+
# if we support cell ids ensure default ids are provided
269+
for cell in nbdict["cells"]:
270+
if "id" not in cell:
271+
changes +=1
272+
warnings.warn(
273+
"Code cell is missing an id field, this will become"
274+
" a hard error in future nbformat versions. You may want"
275+
" to use `normalize()` on your notebooks before validations"
276+
" (available since nbformat 5.1.4). Previous of nbformat"
277+
" are also mutating their arguments, and will stop to do so"
278+
" in the future.",
279+
MissingIDFieldWarning,
280+
stacklevel=3,
281+
)
282+
# Generate cell ids if any are missing
283+
cell['id'] = generate_corpus_id()
284+
return changes, nbdict
285+
286+
def validate(nbdict=None, ref:str=None, version=None, version_minor=None,
287+
relax_add_props=False, nbjson=None) -> None:
234288
"""Checks whether the given notebook dict-like object
235289
conforms to the relevant notebook format schema.
236290
237-
291+
Parameters
292+
----------
293+
ref : optional, str
294+
reference to the subset of the schema we want to validate against.
295+
for example ``"markdown_cell"``, `"code_cell"` ....
238296
Raises ValidationError if not valid.
239297
"""
240-
298+
assert isinstance(ref, str) or ref is None
241299
# backwards compatibility for nbjson argument
242300
if nbdict is not None:
243301
pass
@@ -257,13 +315,8 @@ def validate(nbdict=None, ref=None, version=None, version_minor=None,
257315
# if ref is specified, and we don't have a version number, assume we're validating against 1.0
258316
if version is None:
259317
version, version_minor = 1, 0
260-
261-
if ref is None and version >= 4 and version_minor >= 5:
262-
# if we support cell ids ensure default ids are provided
263-
for cell in nbdict['cells']:
264-
if 'id' not in cell:
265-
# Generate cell ids if any are missing
266-
cell['id'] = generate_corpus_id()
318+
if ref is None:
319+
_normalize(nbdict, version, version_minor)
267320

268321
for error in iter_validate(nbdict, ref=ref, version=version,
269322
version_minor=version_minor,

nbformat/warnings.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
"""
2+
Warnings that can be emitted by nbformat.
3+
"""
4+
5+
6+
class MissingIDFieldWarning(FutureWarning):
7+
"""
8+
9+
This warning is emitted in the validation step of nbformat as we used to
10+
mutate the structure which is cause signature issues.
11+
12+
This will be turned into an error at later point.
13+
14+
We subclass FutureWarning as we will change the behavior in the future.
15+
16+
"""
17+
18+
pass

0 commit comments

Comments
 (0)