Skip to content

Commit d059bf9

Browse files
authored
Support accessing remote registries via ssh (#589)
* Support accessing remote registries via ssh * Factored out a function that tells whether a registry path is local * Make sure the URL is used, not self.source which could be a local path The URL has to be given as "ssh://[user@]host.xz[:port]/path/to/repo.git" rather than the shorter version "[user@]host.xz:path/to/repo.git" * Make self.source include the subdir from the start. Allows implementing iter_modules in the base class * The check already happens in _update_cache() * Moved is_path_local to shpc.utils * Added a safeguard to prevent cloning multiple times * clone() is actually only supported by VersionControl * No need to yield self.source in iter_modules since it's constant and accessible from outside (and not all callers want it !) * It's more practical to yield the the registry object (provider) rather than using the "source" path (which is undefined for remote registries anyway) * Optimised the "update all" mode by directly using Result objects from the registries. Otherwise, it wastes time finding modules that we know are there * Clones only ever exist within a function * Optimised iter_modules method for remote registries (using the cache) * Moved back iter_modules to Filesystem since VersionControl has its own, optimised, version * Stopped using self.source in VersionControl, to avoid confusion with Filesystem * url, not source, is to be used for remote registries * Cannot do these heuristics as we need to report unexisting local paths * str.split can limit the number of splits * The main Registry object, not the settings, should decide whether there is a local registry or not * To avoid duplicating the code that assesses whether a path or local or not, check which sub-class of Provider is used * The parent class shouldn't know that much about the subclasses * Restored back the automatic addition of https:// * Restructured to avoid an unnecessary else * shpc convention: no else when the then ends with a return * Unnecessary due to operator precedence rule * Added a cache in `library_url` * Fixed the implementation of the cache in VersionControl.exists * exists has its own implementation in VersionControl, so this implementation is in fact specific to Filesystem * iter_registry is basically iter_modules with an extra filter * Yield relative paths rather than full paths since *all* consumers need relative paths * Proper method to cleanup a clone * Removed a cleanup() call that was expected to do nothing * Increased the symmetry to simplify the maintainability * NotImplementedError is more useful than pass * The tuplized version is not the preference here * Easier to understand * Made the clone return a Filesystem object independent from VersionControl * Extra comment * Back to a subclass of VersionControl for each forge * Pre-parse the URL * VersionControl should not be directly used * Renamed the variable for clarity * Removing yaml because it's the only file we have for a container * Defensive programming: local could still be None * bugfix: iter_modules needs to yield paths to container.yaml * Moved the cleanup call up to _sync() * bugfix: iter_modules now returns path to the container.yaml * Need to check here too that the clone still exists * Also need to reset self._clone if the directory is gone * More checks on local and remote * The temp directory may have been deleted in the meantime * It makes more sense to cleanup tmplocal than self, and it works because self expects the cleanup may happen * Moved this to the parent class * Another implementation that doesn't make it too obvious the base-class knows about GitHub and GitLab * Silly typo: self._clone is a Filesystem object, not a string * No colon * You shall use American spelling * Added a test to showcase ssh * Revert "bugfix: iter_modules needs to yield paths to container.yaml" This reverts commit f069f48. * Revert "bugfix: iter_modules now returns path to the container.yaml" This reverts commit c5b4cb9.
1 parent c035442 commit d059bf9

File tree

9 files changed

+225
-200
lines changed

9 files changed

+225
-200
lines changed

shpc/main/client.py

+9-5
Original file line numberDiff line numberDiff line change
@@ -124,12 +124,16 @@ def update(self, name=None, dryrun=False, filters=None):
124124
"""
125125
# No name provided == "update all"
126126
if name:
127-
modules = [name]
127+
# find the module in the registries. _load_container
128+
# calls `container.ContainerConfig(result)` like below
129+
configs = [self._load_container(name)]
128130
else:
129-
modules = [x[1] for x in list(self.registry.iter_modules())]
130-
131-
for module_name in modules:
132-
config = self._load_container(module_name)
131+
# directly iterate over the content of the registry
132+
configs = []
133+
for result in self.registry.iter_registry():
134+
configs.append(container.ContainerConfig(result))
135+
# do the update
136+
for config in configs:
133137
config.update(dryrun=dryrun, filters=filters)
134138

135139
def test(

shpc/main/modules/base.py

+13-10
Original file line numberDiff line numberDiff line change
@@ -172,7 +172,12 @@ def add(self, image, module_name=None, **kwargs):
172172
"""
173173
Add a container to the registry to enable install.
174174
"""
175-
self.settings.ensure_filesystem_registry()
175+
local_registry = self.registry.filesystem_registry
176+
177+
if not local_registry:
178+
logger.exit(
179+
"This command is only supported for a filesystem registry! Add one or use --registry."
180+
)
176181

177182
# Docker module name is always the same namespace as the image
178183
if image.startswith("docker"):
@@ -185,7 +190,7 @@ def add(self, image, module_name=None, **kwargs):
185190

186191
# Assume adding to default registry
187192
dest = os.path.join(
188-
self.settings.filesystem_registry,
193+
local_registry.source,
189194
module_name.split(":")[0],
190195
"container.yaml",
191196
)
@@ -235,10 +240,9 @@ def docgen(self, module_name, registry=None, out=None, branch="main"):
235240
aliases = config.get_aliases()
236241
template = self.template.load("docs.md")
237242
registry = registry or defaults.github_url
238-
github_url = "%s/blob/%s/%s/container.yaml" % (registry, branch, module_name)
239-
raw_github_url = shpc.main.registry.get_module_config_url(
240-
registry, module_name, branch
241-
)
243+
remote = self.registry.get_registry(registry, tag=branch)
244+
github_url = remote.get_container_url(module_name)
245+
raw_github_url = remote.get_raw_container_url(module_name)
242246

243247
# Currently one doc is rendered for all containers
244248
result = template.render(
@@ -306,10 +310,9 @@ def _get_module_lookup(self, base, filename, pattern=None):
306310
A shared function to get a lookup of installed modules or registry entries
307311
"""
308312
modules = {}
309-
for fullpath in utils.recursive_find(base, pattern):
310-
if fullpath.endswith(filename):
311-
module_name, version = os.path.dirname(fullpath).rsplit(os.sep, 1)
312-
module_name = module_name.replace(base, "").strip(os.sep)
313+
for relpath in utils.recursive_find(base, pattern):
314+
if relpath.endswith(filename):
315+
module_name, version = os.path.dirname(relpath).rsplit(os.sep, 1)
313316
if module_name not in modules:
314317
modules[module_name] = set()
315318
modules[module_name].add(version)

shpc/main/registry/__init__.py

+47-33
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
from shpc.main.settings import SettingsBase
1515

1616
from .filesystem import Filesystem, FilesystemResult
17-
from .remote import GitHub, GitLab, get_module_config_url
17+
from .remote import GitHub, GitLab
1818

1919

2020
def update_container_module(module, from_path, existing_path):
@@ -23,13 +23,12 @@ def update_container_module(module, from_path, existing_path):
2323
"""
2424
if not os.path.exists(existing_path):
2525
shpc.utils.mkdir_p(existing_path)
26-
for filename in shpc.utils.recursive_find(from_path):
27-
relative_path = filename.replace(from_path, "").strip("/")
26+
for relative_path in shpc.utils.recursive_find(from_path):
2827
to_path = os.path.join(existing_path, relative_path)
2928
if os.path.exists(to_path):
3029
shutil.rmtree(to_path)
3130
shpc.utils.mkdir_p(os.path.dirname(to_path))
32-
shutil.copy2(filename, to_path)
31+
shutil.copy2(os.path.join(from_path, relative_path), to_path)
3332

3433

3534
class Registry:
@@ -44,21 +43,29 @@ def __init__(self, settings=None):
4443
# and they must exist.
4544
self.registries = [self.get_registry(r) for r in self.settings.registry]
4645

46+
@property
47+
def filesystem_registry(self):
48+
"""
49+
Return the first found filesystem registry.
50+
"""
51+
for registry in self.registries:
52+
if isinstance(registry, Filesystem):
53+
return registry
54+
4755
def exists(self, name):
4856
"""
49-
Determine if a module name *exists* in any local registry, return path
57+
Determine if a module name *exists* in any registry, return the first one
5058
"""
5159
for reg in self.registries:
5260
if reg.exists(name):
53-
return os.path.join(reg.source, name)
61+
return reg
5462

5563
def iter_registry(self, filter_string=None):
5664
"""
5765
Iterate over all known registries defined in settings.
5866
"""
5967
for reg in self.registries:
60-
for entry in reg.iter_registry(filter_string=filter_string):
61-
yield entry
68+
yield from reg.iter_registry(filter_string=filter_string)
6269

6370
def find(self, name, path=None):
6471
"""
@@ -80,19 +87,19 @@ def iter_modules(self):
8087
"""
8188
Iterate over modules found across the registry
8289
"""
83-
for reg in self.registries:
84-
for registry, module in reg.iter_modules():
90+
for registry in self.registries:
91+
for module in registry.iter_modules():
8592
yield registry, module
8693

87-
def get_registry(self, source):
94+
def get_registry(self, source, **kwargs):
8895
"""
8996
A registry is a local or remote registry.
9097
9198
We can upgrade from, or otherwise list
9299
"""
93100
for Registry in PROVIDERS:
94101
if Registry.matches(source):
95-
return Registry(source)
102+
return Registry(source, **kwargs)
96103
raise ValueError("No matching registry provider for %s" % source)
97104

98105
def sync(
@@ -128,20 +135,10 @@ def _sync(
128135
local=None,
129136
sync_registry=None,
130137
):
131-
# Registry to sync from
132-
sync_registry = sync_registry or self.settings.sync_registry
133-
134138
# Create a remote registry with settings preference
135-
Remote = GitHub if "github.com" in sync_registry else GitLab
136-
remote = Remote(sync_registry, tag=tag)
137-
local = self.get_registry(local or self.settings.filesystem_registry)
138-
139-
# We sync to our first registry - if not filesystem, no go
140-
if not local.is_filesystem_registry:
141-
logger.exit(
142-
"sync is only supported for a remote to a filesystem registry: %s"
143-
% sync_registry.source
144-
)
139+
remote = self.get_registry(
140+
sync_registry or self.settings.sync_registry, tag=tag
141+
)
145142

146143
# Upgrade the current registry from the remote
147144
self.sync_from_remote(
@@ -152,6 +149,8 @@ def _sync(
152149
add_new=add_new,
153150
local=local,
154151
)
152+
153+
#  Cleanup the remote once we've done the sync
155154
remote.cleanup()
156155

157156
def sync_from_remote(
@@ -163,26 +162,41 @@ def sync_from_remote(
163162
If the registry module is not installed, we install to the first
164163
filesystem registry found in the list.
165164
"""
166-
updates = False
167165

166+
## First get a valid local Registry
168167
# A local (string) path provided
169-
if local and isinstance(local, str) and os.path.exists(local):
168+
if local and isinstance(local, str):
169+
if not os.path.exists(local):
170+
logger.exit("The path %s doesn't exist." % local)
170171
local = Filesystem(local)
171172

172173
# No local registry provided, use default
173174
if not local:
174-
local = Filesystem(self.settings.filesystem_registry)
175+
local = self.filesystem_registry
176+
# We sync to our first registry - if not filesystem, no go
177+
if not local:
178+
logger.exit("No local registry to sync to. Check the shpc settings.")
179+
180+
if not isinstance(local, Filesystem):
181+
logger.exit(
182+
"Can only synchronize to a local file system, not to %s." % local
183+
)
175184

176-
tmpdir = remote.source
177-
if tmpdir.startswith("http") or not os.path.exists(tmpdir):
178-
tmpdir = remote.clone()
185+
## Then a valid remote Registry
186+
if not remote:
187+
logger.exit("No remote provided. Cannot sync.")
188+
189+
if not isinstance(remote, Filesystem):
190+
# Instantiate a local registry, which will have to be cleaned up
191+
remote = remote.clone()
179192

180193
# These are modules to update
181-
for regpath, module in remote.iter_modules():
194+
updates = False
195+
for module in remote.iter_modules():
182196
if name and module != name:
183197
continue
184198

185-
from_path = os.path.join(regpath, module)
199+
from_path = os.path.join(remote.source, module)
186200
existing_path = local.exists(module)
187201

188202
# If we have an existing module and we want to replace all files

shpc/main/registry/filesystem.py

+18-12
Original file line numberDiff line numberDiff line change
@@ -75,20 +75,31 @@ def override_exists(self, tag):
7575

7676

7777
class Filesystem(Provider):
78-
def __init__(self, *args, **kwargs):
79-
super().__init__(*args, **kwargs)
80-
self.source = os.path.abspath(self.source)
78+
def __init__(self, source):
79+
if not self.matches(source):
80+
raise ValueError(
81+
"Filesystem registry source must exist on the filesystem. Got %s"
82+
% source
83+
)
84+
self.source = os.path.abspath(source)
8185

8286
@classmethod
8387
def matches(cls, source):
8488
return os.path.exists(source) or source == "."
8589

90+
def exists(self, name):
91+
return os.path.exists(os.path.join(self.source, name))
92+
8693
def iter_modules(self):
94+
"""
95+
yield module names
96+
"""
97+
# Find modules based on container.yaml
8798
for filename in shpc.utils.recursive_find(self.source, "container.yaml"):
88-
module = os.path.dirname(filename).replace(self.source, "").strip(os.sep)
99+
module = os.path.dirname(filename)
89100
if not module:
90101
continue
91-
yield self.source, module
102+
yield module
92103

93104
def find(self, name):
94105
"""
@@ -110,14 +121,9 @@ def iter_registry(self, filter_string=None):
110121
"""
111122
Iterate over content in filesystem registry.
112123
"""
113-
for filename in shpc.utils.recursive_find(self.source):
114-
if not filename.endswith("container.yaml"):
115-
continue
116-
module_name = (
117-
os.path.dirname(filename).replace(self.source, "").strip(os.sep)
118-
)
119-
124+
for module_name in self.iter_modules():
120125
# If the user has provided a filter, honor it
121126
if filter_string and not re.search(filter_string, module_name):
122127
continue
128+
filename = os.path.join(self.source, module_name)
123129
yield FilesystemResult(module_name, filename)

shpc/main/registry/provider.py

+31-25
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55

66
import os
77

8+
import shpc.utils
9+
810

911
class Result:
1012
@property
@@ -32,36 +34,40 @@ class Provider:
3234
A general provider should retrieve and provide registry files.
3335
"""
3436

35-
def __init__(self, source, *args, **kwargs):
36-
if not (source.startswith("https://") or os.path.exists(source)):
37-
raise ValueError(
38-
"Registry source must exist on the filesystem or be given as https://."
39-
)
40-
self.source = source
41-
42-
def exists(self, name):
43-
return os.path.exists(os.path.join(self.source, name))
44-
45-
@property
46-
def is_filesystem_registry(self):
47-
return not self.source.startswith("http") and os.path.exists(self.source)
48-
49-
@property
50-
def name(self):
51-
return self.__class__.__name__.lower()
52-
5337
@classmethod
54-
def matches(cls, source_url: str):
55-
pass
38+
def matches(cls, source):
39+
"""
40+
Returns true if this class understands the source
41+
"""
42+
raise NotImplementedError
5643

5744
def find(self, name):
58-
pass
45+
"""
46+
Returns a Result object if the module can be found in the registry
47+
"""
48+
raise NotImplementedError
49+
50+
def exists(self, name):
51+
"""
52+
Returns true if the module can be found in the registry
53+
"""
54+
raise NotImplementedError
5955

6056
def cleanup(self):
61-
pass
57+
"""
58+
Cleanup the registry
59+
"""
60+
raise NotImplementedError
6261

63-
def iter_registry(self):
64-
pass
62+
def iter_registry(self, filter_string=None):
63+
"""
64+
Iterates over the modules of this registry (that match the filte, if
65+
provided) as Result instances
66+
"""
67+
raise NotImplementedError
6568

6669
def iter_modules(self):
67-
pass
70+
"""
71+
Iterates over the module names of this registry
72+
"""
73+
raise NotImplementedError

0 commit comments

Comments
 (0)