Skip to content

Commit 51144f9

Browse files
Merge pull request #80 from apdavison/group-in-subdirectories
Add the option to group files into subdirectories by schema when saving
2 parents 8541bf0 + 381955f commit 51144f9

File tree

3 files changed

+46
-11
lines changed

3 files changed

+46
-11
lines changed

pipeline/src/base.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -240,7 +240,9 @@ def __init__(self, id=None, **properties):
240240

241241
def save(self, file_path, indent=2):
242242
"""
243-
Save this object to a file in JSON-LD format
243+
Save this object to a file in JSON-LD format.
244+
245+
It is recommended to use the extension ".jsonld".
244246
"""
245247
with open(file_path, "w") as output_file:
246248
json.dump(self.to_jsonld(), output_file, indent=indent)

pipeline/src/collection.py

Lines changed: 23 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
The collection can be saved to and loaded from disk, in JSON-LD format.
66
"""
77

8+
from glob import glob
89
import json
910
import os
1011
from .registry import lookup_type
@@ -69,7 +70,7 @@ def _sort_nodes_by_id(self):
6970
sorted_nodes = dict(sorted(self.nodes.items()))
7071
self.nodes = sorted_nodes
7172

72-
def save(self, path, individual_files=False, include_empty_properties=False):
73+
def save(self, path, individual_files=False, include_empty_properties=False, group_by_schema=False):
7374
"""
7475
Save the node collection to disk in JSON-LD format.
7576
@@ -78,10 +79,18 @@ def save(self, path, individual_files=False, include_empty_properties=False):
7879
7980
path (str):
8081
either a file or a directory into which the metadata will be written.
82+
It is recommended to use the extension ".jsonld".
8183
individual_files (bool):
8284
if False (default), save the entire collection into a single file.
8385
if True, `path` must be a directory, and each node is saved into a
8486
separate file within that directory.
87+
include_empty_properties (bool):
88+
if False (default), do not include properties with value None.
89+
if True, include all properties.
90+
group_by_schema (bool):
91+
Only applies if `individual_files` is True.
92+
If False (default), save all files in a single directory.
93+
If True, save into subdirectories according to the schema name.
8594
8695
Returns
8796
-------
@@ -137,7 +146,12 @@ def save(self, path, individual_files=False, include_empty_properties=False):
137146
else:
138147
assert node.id.startswith("_:")
139148
file_identifier = node.id[2:]
140-
file_path = os.path.join(path, f"{file_identifier}.jsonld")
149+
if group_by_schema:
150+
dir_path = os.path.join(path, node.__class__.__name__)
151+
os.makedirs(dir_path, exist_ok=True)
152+
file_path = os.path.join(dir_path, f"{file_identifier}.jsonld")
153+
else:
154+
file_path = os.path.join(path, f"{file_identifier}.jsonld")
141155
with open(file_path, "w") as fp:
142156
data = node.to_jsonld(embed_linked_nodes=False, include_empty_properties=include_empty_properties)
143157
json.dump(data, fp, indent=2)
@@ -150,9 +164,9 @@ def load(self, *paths, version=DEFAULT_VERSION):
150164
151165
`*paths` may contain either:
152166
153-
1) a single directory, in which case
154-
all JSON-LD files all the top level of this directory will be loaded
155-
(but without descending into subdirectories)
167+
1) a single directory, in which case all JSON-LD files in this directory
168+
and any non-hidden subdirectories will be loaded
169+
(where hidden subdirectories are those whose name starts with ".").
156170
157171
2) one or more JSON-LD files, which will all be loaded.
158172
@@ -161,11 +175,10 @@ def load(self, *paths, version=DEFAULT_VERSION):
161175
"""
162176
if len(paths) == 1 and os.path.isdir(paths[0]):
163177
data_dir = paths[0]
164-
json_paths = [
165-
os.path.join(data_dir, item)
166-
for item in os.listdir(data_dir)
167-
if os.path.splitext(item)[1] in (".json", ".jsonld")
168-
]
178+
json_paths = (
179+
glob(f"{data_dir}/**/*.jsonld", recursive=True)
180+
+ glob(f"{data_dir}/**/*.json", recursive=True)
181+
)
169182
else:
170183
json_paths = paths
171184

pipeline/tests/test_collections.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,26 @@ def test_round_trip_multi_file():
7777
assert p == np
7878

7979

80+
def test_round_trip_multi_file_group_by_schema():
81+
shutil.rmtree(test_output_dir, ignore_errors=True)
82+
person = build_fake_node(omcore.Person)
83+
collection = Collection(person)
84+
collection.save(test_output_dir, individual_files=True, include_empty_properties=False, group_by_schema=True)
85+
new_collection = Collection()
86+
new_collection.load(test_output_dir)
87+
88+
assert len(collection) == len(new_collection)
89+
90+
for node in new_collection:
91+
if node.id == person.id:
92+
new_person = person
93+
break
94+
95+
p = person.to_jsonld(include_empty_properties=False, embed_linked_nodes=True)
96+
np = new_person.to_jsonld(include_empty_properties=False, embed_linked_nodes=True)
97+
assert p == np
98+
99+
80100
def test_collection_sort_by_id():
81101
person = omcore.Person(given_name="A", family_name="Professor", id="_:004")
82102
uni1 = omcore.Organization(full_name="University of This Place", id="_:002")

0 commit comments

Comments
 (0)