Skip to content

Commit dea0b8e

Browse files
committed
store label & doc fields as prospective provenance
TODO: fix intent list add/amend tests
1 parent 8edabf8 commit dea0b8e

File tree

4 files changed

+46
-36
lines changed

4 files changed

+46
-36
lines changed

build-cwltool-docker.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,4 +8,4 @@ ${engine} run -t -v /var/run/docker.sock:/var/run/docker.sock \
88
-v /tmp:/tmp \
99
-v "$PWD":/tmp/cwltool \
1010
quay.io/commonwl/cwltool_module /bin/sh -c \
11-
"apk add gcc bash git && pip install -r/tmp/cwltool/test-requirements.txt ; pytest -k 'not (test_bioconda or test_double_overwrite or test_env_filtering or test_biocontainers or test_disable_file_overwrite_without_ext or test_disable_file_creation_in_outdir_with_ext or test_write_write_conflict or test_directory_literal_with_real_inputs_inside or test_revsort_workflow or test_stdin_with_id_preset or test_no_compute_chcksum or test_packed_workflow_execution[tests/wf/count-lines1-wf.cwl-tests/wf/wc-job.json-False] or test_sequential_workflow or test_single_process_subwf_subwf_inline_step)' --ignore-glob '*test_udocker.py' -n 0 -v -rs --pyargs cwltool"
11+
"apk add gcc bash git && pip install -r/tmp/cwltool/test-requirements.txt ; pytest -k 'not (test_bioconda or test_double_overwrite or test_env_filtering or test_biocontainers or test_disable_file_overwrite_without_ext or test_disable_file_creation_in_outdir_with_ext or test_write_write_conflict or test_directory_literal_with_real_inputs_inside or test_revsort_workflow or test_revsort_label_annotations or test_stdin_with_id_preset or test_no_compute_chcksum or test_packed_workflow_execution[tests/wf/count-lines1-wf.cwl-tests/wf/wc-job.json-False] or test_sequential_workflow or test_single_process_subwf_subwf_inline_step)' --ignore-glob '*test_udocker.py' -n 0 -v -rs --pyargs cwltool"

cwltool/cwlprov/provenance_profile.py

Lines changed: 41 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -51,9 +51,14 @@
5151
)
5252
from .writablebagfile import create_job, write_bag_file # change this later
5353

54+
# from schema_salad.utils import convert_to_dict
55+
56+
5457
if TYPE_CHECKING:
5558
from .ro import ResearchObject
5659

60+
ProvType = Dict[Union[str, Identifier], Any]
61+
5762

5863
def copy_job_order(job: Union[Process, JobsType], job_order_object: CWLObjectType) -> CWLObjectType:
5964
"""Create copy of job object for provenance."""
@@ -177,14 +182,14 @@ def host_provenance(document: ProvDocument) -> None:
177182
# by a user account, as cwltool is a command line tool
178183
account = self.document.agent(ACCOUNT_UUID)
179184
if self.orcid or self.full_name:
180-
person: Dict[Union[str, Identifier], Any] = {
185+
person: ProvType = {
181186
PROV_TYPE: PROV["Person"],
182187
"prov:type": SCHEMA["Person"],
183188
}
184189
if self.full_name:
185190
person["prov:label"] = self.full_name
186191
person["foaf:name"] = self.full_name
187-
person["schema:name"] = self.full_name
192+
person[SCHEMA["name"]] = self.full_name
188193
else:
189194
# TODO: Look up name from ORCID API?
190195
pass
@@ -235,13 +240,13 @@ def evaluate(
235240
"""Evaluate the nature of job."""
236241
if not hasattr(process, "steps"):
237242
# record provenance of independent commandline tool executions
238-
self.prospective_prov(job)
243+
self.prospective_prov(job, process)
239244
customised_job = copy_job_order(job, job_order_object)
240245
self.used_artefacts(customised_job, self.workflow_run_uri)
241246
create_job(research_obj, customised_job)
242247
elif hasattr(job, "workflow"):
243248
# record provenance of workflow executions
244-
self.prospective_prov(job)
249+
self.prospective_prov(job, process)
245250
customised_job = copy_job_order(job, job_order_object)
246251
self.used_artefacts(customised_job, self.workflow_run_uri)
247252
# if CWLPROV['prov'].uri in job_order_object: # maybe move this to another place
@@ -306,8 +311,7 @@ def _add_nested_annotations(
306311
) -> ProvEntity:
307312
"""Propagate input data annotations to provenance."""
308313
# Change https:// into http:// first
309-
schema2_uri = "https://schema.org/"
310-
if schema2_uri in annotation_key:
314+
if (schema2_uri := "https://schema.org/") in annotation_key:
311315
annotation_key = SCHEMA[annotation_key.replace(schema2_uri, "")].uri
312316

313317
if not isinstance(annotation_value, (MutableSequence, MutableMapping)):
@@ -377,9 +381,9 @@ def declare_file(self, value: CWLObjectType) -> Tuple[ProvEntity, ProvEntity, st
377381
self.document.specializationOf(file_entity, entity)
378382

379383
# Identify all schema annotations
380-
schema_annotations = dict(
381-
[(v, value[v]) for v in value.keys() if v.startswith("https://schema.org")]
382-
)
384+
schema_annotations = {
385+
v: value[v] for v in value.keys() if v.startswith("https://schema.org")
386+
}
383387

384388
# Transfer SCHEMA annotations to provenance
385389
for s in schema_annotations:
@@ -509,9 +513,9 @@ def declare_directory(self, value: CWLObjectType) -> ProvEntity:
509513
coll_b.add_attributes(coll_b_attribs)
510514

511515
# Identify all schema annotations
512-
schema_annotations = dict(
513-
[(v, value[v]) for v in value.keys() if v.startswith("https://schema.org")]
514-
)
516+
schema_annotations = {
517+
v: value[v] for v in value.keys() if v.startswith("https://schema.org")
518+
}
515519

516520
# Transfer SCHEMA annotations to provenance
517521
for s in schema_annotations:
@@ -571,7 +575,7 @@ def declare_artefact(self, value: Any) -> ProvEntity:
571575
self.research_object.add_uri(entity.identifier.uri)
572576
return entity
573577

574-
if isinstance(value, (str, str)):
578+
if isinstance(value, str):
575579
(entity, _) = self.declare_string(value)
576580
return entity
577581

@@ -734,35 +738,39 @@ def generate_output_prov(
734738
entity, process_run_id, timestamp, None, {"prov:role": role}
735739
)
736740

737-
def prospective_prov(self, job: JobsType) -> None:
741+
def prospective_prov(self, job: JobsType, process: Process) -> None:
738742
"""Create prospective prov recording as wfdesc prov:Plan."""
743+
prov_items: ProvType = {
744+
PROV_TYPE: WFDESC["Workflow"] if isinstance(job, WorkflowJob) else WFDESC["Process"],
745+
"prov:type": PROV["Plan"],
746+
"prov:label": "Prospective provenance",
747+
}
748+
if "doc" in process.tool:
749+
prov_items[SCHEMA["description"]] = process.tool["doc"]
750+
if "label" in process.tool:
751+
prov_items[SCHEMA["name"]] = process.tool["label"]
752+
# # TypeError: unhashable type: 'list'
753+
# if "intent" in process.tool:
754+
# prov_items[SCHEMA["featureList"]] = convert_to_dict(process.tool["intent"])
755+
self.document.entity("wf:main", prov_items)
739756
if not isinstance(job, WorkflowJob):
740-
# direct command line tool execution
741-
self.document.entity(
742-
"wf:main",
743-
{
744-
PROV_TYPE: WFDESC["Process"],
745-
"prov:type": PROV["Plan"],
746-
"prov:label": "Prospective provenance",
747-
},
748-
)
749757
return
750758

751-
self.document.entity(
752-
"wf:main",
753-
{
754-
PROV_TYPE: WFDESC["Workflow"],
755-
"prov:type": PROV["Plan"],
756-
"prov:label": "Prospective provenance",
757-
},
758-
)
759-
760759
for step in job.steps:
761760
stepnametemp = "wf:main/" + str(step.name)[5:]
762761
stepname = urllib.parse.quote(stepnametemp, safe=":/,#")
762+
provstep_items: ProvType = {
763+
PROV_TYPE: WFDESC["Process"],
764+
"prov:type": PROV["Plan"],
765+
}
766+
# WorkflowStep level annotations
767+
if "doc" in step.tool:
768+
provstep_items[SCHEMA["description"]] = step.tool["doc"]
769+
if "label" in step.tool:
770+
provstep_items[SCHEMA["name"]] = step.tool["label"]
763771
provstep = self.document.entity(
764772
stepname,
765-
{PROV_TYPE: WFDESC["Process"], "prov:type": PROV["Plan"]},
773+
provstep_items,
766774
)
767775
self.document.entity(
768776
"wf:main",

cwltool/singularity.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -369,7 +369,7 @@ def add_writable_file_volume(
369369
if self.inplace_update:
370370
try:
371371
os.link(os.path.realpath(volume.resolved), host_outdir_tgt)
372-
except os.error:
372+
except OSError:
373373
shutil.copy(volume.resolved, host_outdir_tgt)
374374
else:
375375
shutil.copy(volume.resolved, host_outdir_tgt)

tests/test_provenance.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,9 +34,11 @@
3434

3535

3636
def cwltool(tmp_path: Path, *args: Any) -> Path:
37+
out_folder = tmp_path / "out"
38+
out_folder.mkdir()
3739
prov_folder = tmp_path / "provenance"
3840
prov_folder.mkdir()
39-
new_args = ["--provenance", str(prov_folder)]
41+
new_args = ["--provenance", str(prov_folder), "--outdir", str(out_folder)]
4042
new_args.extend(args)
4143
# Run within a temporary directory to not pollute git checkout
4244
tmp_dir = tmp_path / "cwltool-run"

0 commit comments

Comments
 (0)