-
Notifications
You must be signed in to change notification settings - Fork 218
/
Copy pathload.py
203 lines (166 loc) · 5.58 KB
/
load.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
"""Provide a function ``load_model`` to access remote model repositories."""
import gzip
import logging
from typing import TYPE_CHECKING, Iterable
import diskcache
import httpx
import libsbml
from ...core import Configuration
from ..sbml import _sbml_to_model
from .abstract_model_repository import AbstractModelRepository
from .bigg_models_repository import BiGGModels
from .biomodels_repository import BioModels
from .cobrapy_repository import Cobrapy
from .embl_gems_repository import EMBLGems
if TYPE_CHECKING:
from cobra.core import Model
logger = logging.getLogger(__name__)
configuration = Configuration()
DEFAULT_REPOSITORIES = (
Cobrapy(),
BiGGModels(),
BioModels(),
EMBLGems()
)
def load_model(
model_id: str,
repositories: Iterable[AbstractModelRepository] = DEFAULT_REPOSITORIES,
cache: bool = True,
) -> "Model":
"""
Download an SBML model from a remote repository.
Downloaded SBML documents are by default stored in a cache on disk such that future
access is much faster. By default, models can be loaded from the following
repositories:
* BiGG Models
* BioModels
You can use the ``AbstractModelRepository`` class as a parent to implement your own
repository accessor which you pass to the ``load_model`` function. In case you
implement a new interface, please consider submitting a pull request to COBRApy.
Parameters
----------
model_id : str
The identifier of the desired metabolic model. This is typically repository
specific.
repositories : iterable, optional
An iterable of repository accessor instances. The model_id is searched in order.
cache : bool, optional
Whether or not to use the local caching mechanism (default yes).
Returns
-------
Model
A model instance generated from the SBML document.
Raises
------
RuntimeError
As with any internet connection, there are multiple errors that can occur.
Examples
--------
# Most of the time calling `load_model` with an identifier should be enough.
>>> print(load_model("e_coli_core"))
e_coli_core
>>> print(load_model("MODEL1510010000"))
MODEL1510010000
See Also
--------
BiGGModels
BioModels
"""
if cache:
data = _cached_load(
model_id=model_id,
repositories=repositories,
)
else:
data = _fetch_model(model_id=model_id, repositories=repositories)
return get_model_from_gzip_sbml(data)
def _cached_load(
model_id: str,
repositories: Iterable[AbstractModelRepository],
) -> bytes:
"""
Attempt to load a gzip-compressed SBML document from the cache.
If the given model identifier is not in the cache, the remote repositories are
searched.
Parameters
----------
model_id : str
The identifier of the desired metabolic model. This is typically repository
specific.
repositories : iterable
An iterable of repository accessor instances. The model_id is searched in order.
Returns
-------
bytes
A gzip-compressed, UTF-8 encoded SBML document.
"""
with diskcache.Cache(
directory=str(configuration.cache_directory),
size_limit=configuration.max_cache_size,
) as cache:
try:
return cache[model_id]
except KeyError:
data = _fetch_model(model_id=model_id, repositories=repositories)
cache.set(key=model_id, value=data, expire=configuration.cache_expiration)
return data
def _fetch_model(
model_id: str,
repositories: Iterable[AbstractModelRepository],
) -> bytes:
"""
Attempt to load a gzip-compressed SBML document from the given repositories.
Parameters
----------
model_id : str
The identifier of the desired metabolic model. This is typically repository
specific.
repositories : iterable
An iterable of repository accessor instances. The model_id is searched in order.
Returns
-------
bytes
A gzip-compressed, UTF-8 encoded SBML document.
"""
for repository in repositories:
logger.info(
f"Attempting to fetch '{model_id}' from the {repository.name} repository."
)
try:
return repository.get_sbml(model_id=model_id)
except OSError:
logger.debug(
f"Model '{model_id} not found in the local "
f"repository {repository.name}.'"
)
except httpx.HTTPStatusError as error:
if error.response.status_code == 404:
logger.debug(
f"Model '{model_id}' not found in the {repository.name} repository."
)
continue
raise RuntimeError(
f"The connection to the {repository.name} repository failed."
) from error
except httpx.RequestError as error:
raise RuntimeError(
f"The connection to the {repository.name} repository failed."
) from error
raise RuntimeError(
f"The model '{model_id}' could not be found in any of the repositories."
)
def get_model_from_gzip_sbml(stream: bytes) -> "Model":
"""
Generate a model instance from a gzip-compressed, UTF-8 encoded SBML document.
Parameters
----------
stream : bytes
A gzip-compressed, UTF-8 encoded SBML document.
Returns
-------
Model
A model instance generated from the SBML document.
"""
return _sbml_to_model(
libsbml.readSBMLFromString(gzip.decompress(stream).decode("utf-8"))
)