Skip to content

Commit 5806234

Browse files
authored
Merge pull request #412 from common-workflow-language/class-vocab
Class vocab
2 parents f537afd + f0fcc54 commit 5806234

File tree

5 files changed

+95
-60
lines changed

5 files changed

+95
-60
lines changed

schema_salad/avro/schema.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -579,10 +579,13 @@ def make_avsc_object(json_data: JsonDataType, names: Optional[Names] = None) ->
579579
if names is None:
580580
names = Names()
581581

582-
if isinstance(json_data, Dict) and json_data.get("name") == "Any":
583-
del names.names["Any"]
584-
elif not names.has_name("Any", None):
585-
EnumSchema("Any", None, ["Any"], names=names)
582+
if (
583+
isinstance(json_data, Dict)
584+
and json_data.get("name") == "org.w3id.cwl.salad.Any"
585+
):
586+
del names.names["org.w3id.cwl.salad.Any"]
587+
elif not names.has_name("org.w3id.cwl.salad.Any", None):
588+
EnumSchema("org.w3id.cwl.salad.Any", None, ["Any"], names=names)
586589

587590
# JSON object (non-union)
588591
if isinstance(json_data, dict):

schema_salad/makedoc.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
import mistune
2525

2626
from . import schema
27+
from .validate import avro_type_name
2728
from .exceptions import SchemaSaladException, ValidationException
2829
from .utils import add_dictlist, aslist
2930

@@ -315,7 +316,7 @@ def typefmt(
315316
"https://w3id.org/cwl/salad#record",
316317
"https://w3id.org/cwl/salad#enum",
317318
):
318-
frg = schema.avro_type_name(tp["name"])
319+
frg = avro_type_name(tp["name"])
319320
if tp["name"] in redirects:
320321
return """<a href="{}">{}</a>""".format(redirects[tp["name"]], frg)
321322
if tp["name"] in self.typemap:
@@ -335,7 +336,7 @@ def typefmt(
335336
return """<a href="{}">{}</a>""".format(redirects[tp], redirects[tp])
336337
if str(tp) in basicTypes:
337338
return """<a href="{}">{}</a>""".format(
338-
self.primitiveType, schema.avro_type_name(str(tp))
339+
self.primitiveType, avro_type_name(str(tp))
339340
)
340341
frg2 = urldefrag(tp)[1]
341342
if frg2 != "":

schema_salad/schema.py

Lines changed: 30 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -192,7 +192,7 @@ def get_metaschema() -> Tuple[Names, List[Dict[str, str]], Loader]:
192192
_logger.error("%s", j2)
193193
raise SchemaParseException(f"Not a list: {j2}")
194194
else:
195-
sch_obj = make_avro(j2, loader)
195+
sch_obj = make_avro(j2, loader, loader.vocab)
196196
try:
197197
sch_names = make_avro_schema_from_avro(sch_obj)
198198
except SchemaParseException:
@@ -258,7 +258,7 @@ def load_schema(
258258

259259
# Make the Avro validation that will be used to validate the target
260260
# document
261-
avsc_names = make_avro_schema(schema_doc, document_loader)
261+
avsc_names = make_avro_schema(schema_doc, document_loader, metaschema_loader.vocab)
262262

263263
return document_loader, avsc_names, schema_metadata, metaschema_loader
264264

@@ -350,6 +350,7 @@ def validate_doc(
350350
raise_ex=False,
351351
skip_foreign_properties=loader.skip_schemas,
352352
strict_foreign_properties=strict_foreign_properties,
353+
vocab=loader.vocab,
353354
)
354355
if success:
355356
break
@@ -372,6 +373,7 @@ def validate_doc(
372373
raise_ex=True,
373374
skip_foreign_properties=loader.skip_schemas,
374375
strict_foreign_properties=strict_foreign_properties,
376+
vocab=loader.vocab,
375377
)
376378
except ClassValidationException as exc1:
377379
errors = [
@@ -493,42 +495,6 @@ def replace_type(
493495
return items
494496

495497

496-
primitives = {
497-
"http://www.w3.org/2001/XMLSchema#string": "string",
498-
"http://www.w3.org/2001/XMLSchema#boolean": "boolean",
499-
"http://www.w3.org/2001/XMLSchema#int": "int",
500-
"http://www.w3.org/2001/XMLSchema#long": "long",
501-
"http://www.w3.org/2001/XMLSchema#float": "float",
502-
"http://www.w3.org/2001/XMLSchema#double": "double",
503-
saladp + "null": "null",
504-
saladp + "enum": "enum",
505-
saladp + "array": "array",
506-
saladp + "record": "record",
507-
}
508-
509-
510-
def avro_type_name(url: str) -> str:
511-
"""
512-
Turn a URL into an Avro-safe name.
513-
514-
If the URL has no fragment, return this plain URL.
515-
516-
Extract either the last part of the URL fragment past the slash, otherwise
517-
the whole fragment.
518-
"""
519-
global primitives
520-
521-
if url in primitives:
522-
return primitives[url]
523-
524-
if url.startswith("http://"):
525-
url = url[7:]
526-
elif url.startswith("https://"):
527-
url = url[8:]
528-
url = url.replace("/", ".").replace("#", ".")
529-
return url
530-
531-
532498
def avro_field_name(url: str) -> str:
533499
"""
534500
Turn a URL into an Avro-safe name.
@@ -553,18 +519,24 @@ def make_valid_avro(
553519
found: Set[str],
554520
union: bool = False,
555521
fielddef: bool = False,
522+
vocab: Optional[Dict[str, str]] = None,
556523
) -> Union[
557524
Avro, MutableMapping[str, str], str, List[Union[Any, MutableMapping[str, str], str]]
558525
]:
559526
"""Convert our schema to be more avro like."""
527+
528+
if vocab is None:
529+
_, _, metaschema_loader = get_metaschema()
530+
vocab = metaschema_loader.vocab
531+
560532
# Possibly could be integrated into our fork of avro/schema.py?
561533
if isinstance(items, MutableMapping):
562534
avro = copy.copy(items)
563535
if avro.get("name") and avro.get("inVocab", True):
564536
if fielddef:
565537
avro["name"] = avro_field_name(avro["name"])
566538
else:
567-
avro["name"] = avro_type_name(avro["name"])
539+
avro["name"] = validate.avro_type_name(avro["name"])
568540

569541
if "type" in avro and avro["type"] in (
570542
saladp + "record",
@@ -585,6 +557,7 @@ def make_valid_avro(
585557
found,
586558
union=True,
587559
fielddef=(field == "fields"),
560+
vocab=vocab,
588561
)
589562
if "symbols" in avro:
590563
avro["symbols"] = [avro_field_name(sym) for sym in avro["symbols"]]
@@ -593,13 +566,20 @@ def make_valid_avro(
593566
ret = []
594567
for i in items:
595568
ret.append(
596-
make_valid_avro(i, alltypes, found, union=union, fielddef=fielddef)
569+
make_valid_avro(
570+
i, alltypes, found, union=union, fielddef=fielddef, vocab=vocab
571+
)
597572
)
598573
return ret
599574
if union and isinstance(items, str):
600-
if items in alltypes and avro_type_name(items) not in found:
601-
return make_valid_avro(alltypes[items], alltypes, found, union=union)
602-
return avro_type_name(items)
575+
if items in alltypes and validate.avro_type_name(items) not in found:
576+
return make_valid_avro(
577+
alltypes[items], alltypes, found, union=union, vocab=vocab
578+
)
579+
if items in vocab:
580+
return validate.avro_type_name(vocab[items])
581+
else:
582+
return validate.avro_type_name(items)
603583
else:
604584
return items
605585

@@ -697,7 +677,7 @@ def extend_and_specialize(
697677
for ex in aslist(result["extends"]):
698678
if ex_types[ex].get("abstract"):
699679
add_dictlist(extended_by, ex, ex_types[result["name"]])
700-
add_dictlist(extended_by, avro_type_name(ex), ex_types[ex])
680+
add_dictlist(extended_by, validate.avro_type_name(ex), ex_types[ex])
701681

702682
for result in results:
703683
if result.get("abstract") and result["name"] not in extended_by:
@@ -717,27 +697,28 @@ def extend_and_specialize(
717697
def make_avro(
718698
i: List[Dict[str, Any]],
719699
loader: Loader,
700+
metaschema_vocab: Optional[Dict[str, str]] = None,
720701
) -> List[Any]:
721702

722703
j = extend_and_specialize(i, loader)
723704

724705
name_dict = {} # type: Dict[str, Dict[str, Any]]
725706
for entry in j:
726707
name_dict[entry["name"]] = entry
727-
avro = make_valid_avro(j, name_dict, set())
708+
709+
avro = make_valid_avro(j, name_dict, set(), vocab=metaschema_vocab)
728710

729711
return [
730712
t
731713
for t in avro
732714
if isinstance(t, MutableMapping)
733715
and not t.get("abstract")
734-
and t.get("type") != "documentation"
716+
and t.get("type") != "org.w3id.cwl.salad.documentation"
735717
]
736718

737719

738720
def make_avro_schema(
739-
i: List[Any],
740-
loader: Loader,
721+
i: List[Any], loader: Loader, metaschema_vocab: Optional[Dict[str, str]] = None
741722
) -> Names:
742723
"""
743724
All in one convenience function.
@@ -746,7 +727,7 @@ def make_avro_schema(
746727
the intermediate result for diagnostic output.
747728
"""
748729
names = Names()
749-
avro = make_avro(i, loader)
730+
avro = make_avro(i, loader, metaschema_vocab)
750731
make_avsc_object(convert_to_dict(avro), names)
751732
return names
752733

schema_salad/validate.py

Lines changed: 54 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import logging
22
import pprint
3-
from typing import Any, List, MutableMapping, MutableSequence, Optional, Set
3+
from typing import Any, List, Mapping, MutableMapping, MutableSequence, Optional, Set
44
from urllib.parse import urlsplit
55

66
from . import avro
@@ -21,6 +21,7 @@ def validate(
2121
identifiers: Optional[List[str]] = None,
2222
strict: bool = False,
2323
foreign_properties: Optional[Set[str]] = None,
24+
vocab: Optional[Mapping[str, str]] = None,
2425
) -> bool:
2526
if not identifiers:
2627
identifiers = []
@@ -33,6 +34,7 @@ def validate(
3334
strict=strict,
3435
foreign_properties=foreign_properties,
3536
raise_ex=False,
37+
vocab=vocab,
3638
)
3739

3840

@@ -47,6 +49,43 @@ def avro_shortname(name: str) -> str:
4749
return name.split(".")[-1]
4850

4951

52+
saladp = "https://w3id.org/cwl/salad#"
53+
primitives = {
54+
"http://www.w3.org/2001/XMLSchema#string": "string",
55+
"http://www.w3.org/2001/XMLSchema#boolean": "boolean",
56+
"http://www.w3.org/2001/XMLSchema#int": "int",
57+
"http://www.w3.org/2001/XMLSchema#long": "long",
58+
"http://www.w3.org/2001/XMLSchema#float": "float",
59+
"http://www.w3.org/2001/XMLSchema#double": "double",
60+
saladp + "null": "null",
61+
saladp + "enum": "enum",
62+
saladp + "array": "array",
63+
saladp + "record": "record",
64+
}
65+
66+
67+
def avro_type_name(url: str) -> str:
68+
"""
69+
Turn a URL into an Avro-safe name.
70+
71+
If the URL has no fragment, return this plain URL.
72+
73+
Extract either the last part of the URL fragment past the slash, otherwise
74+
the whole fragment.
75+
"""
76+
global primitives
77+
78+
if url in primitives:
79+
return primitives[url]
80+
81+
u = urlsplit(url)
82+
joined = filter(
83+
lambda x: x,
84+
list(reversed(u.netloc.split("."))) + u.path.split("/") + u.fragment.split("/"),
85+
)
86+
return ".".join(joined)
87+
88+
5089
def friendly(v): # type: (Any) -> Any
5190
if isinstance(v, avro.schema.NamedSchema):
5291
return avro_shortname(v.name)
@@ -77,6 +116,7 @@ def validate_ex(
77116
strict_foreign_properties=False, # type: bool
78117
logger=_logger, # type: logging.Logger
79118
skip_foreign_properties=False, # type: bool
119+
vocab=None, # type: Optional[Mapping[str, str]]
80120
):
81121
# type: (...) -> bool
82122
"""Determine if a python datum is an instance of a schema."""
@@ -87,6 +127,9 @@ def validate_ex(
87127
if not foreign_properties:
88128
foreign_properties = set()
89129

130+
if vocab is None:
131+
raise Exception("vocab must be provided")
132+
90133
schema_type = expected_schema.type
91134

92135
if schema_type == "null":
@@ -132,7 +175,7 @@ def validate_ex(
132175
)
133176
return False
134177
elif isinstance(expected_schema, avro.schema.EnumSchema):
135-
if expected_schema.name == "w3id.org.cwl.salad.Any":
178+
if expected_schema.name in ("org.w3id.cwl.salad.Any", "Any"):
136179
if datum is not None:
137180
return True
138181
if raise_ex:
@@ -144,7 +187,7 @@ def validate_ex(
144187
"value is a {} but expected a string".format(type(datum).__name__)
145188
)
146189
return False
147-
if expected_schema.name == "w3id.org.cwl.cwl.Expression":
190+
if expected_schema.name == "org.w3id.cwl.cwl.Expression":
148191
if "$(" in datum or "${" in datum:
149192
return True
150193
if raise_ex:
@@ -182,6 +225,7 @@ def validate_ex(
182225
strict_foreign_properties=strict_foreign_properties,
183226
logger=logger,
184227
skip_foreign_properties=skip_foreign_properties,
228+
vocab=vocab,
185229
):
186230
return False
187231
except ValidationException as v:
@@ -208,6 +252,7 @@ def validate_ex(
208252
strict_foreign_properties=strict_foreign_properties,
209253
logger=logger,
210254
skip_foreign_properties=skip_foreign_properties,
255+
vocab=vocab,
211256
):
212257
return True
213258

@@ -244,6 +289,7 @@ def validate_ex(
244289
strict_foreign_properties=strict_foreign_properties,
245290
logger=logger,
246291
skip_foreign_properties=skip_foreign_properties,
292+
vocab=vocab,
247293
)
248294
except ClassValidationException:
249295
raise
@@ -284,7 +330,10 @@ def validate_ex(
284330
raise ValidationException(f"Missing '{f.name}' field")
285331
else:
286332
return False
287-
if avro_shortname(expected_schema.name) != d:
333+
avroname = None
334+
if d in vocab:
335+
avroname = avro_type_name(vocab[d])
336+
if expected_schema.name != d and expected_schema.name != avroname:
288337
if raise_ex:
289338
raise ValidationException(
290339
"Expected class '{}' but this is '{}'".format(
@@ -321,6 +370,7 @@ def validate_ex(
321370
strict_foreign_properties=strict_foreign_properties,
322371
logger=logger,
323372
skip_foreign_properties=skip_foreign_properties,
373+
vocab=vocab,
324374
):
325375
return False
326376
except ValidationException as v:

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@
9898

9999
setup(
100100
name="schema-salad",
101-
version="8.0", # update the VERSION prefix in the Makefile as well 🙂
101+
version="8.1", # update the VERSION prefix in the Makefile as well 🙂
102102
description="Schema Annotations for Linked Avro Data (SALAD)",
103103
long_description=open(README).read(),
104104
long_description_content_type="text/x-rst",

0 commit comments

Comments
 (0)