Skip to content

Commit 00439a9

Browse files
authored
Merge pull request #835 from opencobra/sbml-parser-bugfixes
Sbml parser bugfixes
2 parents 988811b + 897ccfa commit 00439a9

File tree

3 files changed

+418
-36
lines changed

3 files changed

+418
-36
lines changed

cobra/io/sbml.py

+58-36
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,8 @@ def _f_reaction_rev(sid, prefix="R_"):
148148
# -----------------------------------------------------------------------------
149149
# Read SBML
150150
# -----------------------------------------------------------------------------
151-
def read_sbml_model(filename, number=float, f_replace=F_REPLACE, **kwargs):
151+
def read_sbml_model(filename, number=float, f_replace=F_REPLACE,
152+
set_missing_bounds=False, **kwargs):
152153
"""Reads SBML model from given filename.
153154
154155
If the given filename ends with the suffix ''.gz'' (for example,
@@ -184,6 +185,8 @@ def read_sbml_model(filename, number=float, f_replace=F_REPLACE, **kwargs):
184185
By default the following id changes are performed on import:
185186
clip G_ from genes, clip M_ from species, clip R_ from reactions
186187
If no replacements should be performed, set f_replace={}, None
188+
set_missing_bounds : boolean flag to set missing bounds
189+
Missing bounds are set to default bounds in configuration.
187190
188191
Returns
189192
-------
@@ -198,8 +201,11 @@ def read_sbml_model(filename, number=float, f_replace=F_REPLACE, **kwargs):
198201
"""
199202
try:
200203
doc = _get_doc_from_filename(filename)
201-
return _sbml_to_model(doc, number=number,
202-
f_replace=f_replace, **kwargs)
204+
return _sbml_to_model(doc,
205+
number=number,
206+
f_replace=f_replace,
207+
set_missing_bounds=set_missing_bounds,
208+
**kwargs)
203209
except IOError as e:
204210
raise e
205211

@@ -255,7 +261,7 @@ def _get_doc_from_filename(filename):
255261
return doc
256262

257263

258-
def _sbml_to_model(doc, number=float, f_replace=None, skip_annotations=False,
264+
def _sbml_to_model(doc, number=float, f_replace=None, set_missing_bounds=False,
259265
**kwargs):
260266
"""Creates cobra model from SBMLDocument.
261267
@@ -265,6 +271,7 @@ def _sbml_to_model(doc, number=float, f_replace=None, skip_annotations=False,
265271
number: data type of stoichiometry: {float, int}
266272
In which data type should the stoichiometry be parsed.
267273
f_replace : dict of replacement functions for id replacement
274+
set_missing_bounds : flag to set missing bounds
268275
269276
Returns
270277
-------
@@ -529,28 +536,32 @@ def process_association(ass):
529536
p_lb = klaw.getParameter("LOWER_BOUND") # noqa: E501 type: libsbml.LocalParameter
530537
if p_lb:
531538
cobra_reaction.lower_bound = p_lb.getValue()
532-
else:
533-
raise CobraSBMLError("Missing flux bounds on reaction: %s",
534-
reaction)
535539
p_ub = klaw.getParameter("UPPER_BOUND") # noqa: E501 type: libsbml.LocalParameter
536540
if p_ub:
537541
cobra_reaction.upper_bound = p_ub.getValue()
538-
else:
539-
raise CobraSBMLError("Missing flux bounds on reaction %s",
540-
reaction)
541542

542-
LOGGER.warning("Encoding LOWER_BOUND and UPPER_BOUND in "
543-
"KineticLaw is discouraged, "
544-
"use fbc:fluxBounds instead: %s", reaction)
543+
if p_ub is not None or p_lb is not None:
544+
LOGGER.warning("Encoding LOWER_BOUND and UPPER_BOUND in "
545+
"KineticLaw is discouraged, "
546+
"use fbc:fluxBounds instead: %s", reaction)
545547

546548
if p_lb is None:
547549
LOGGER.error("Missing lower flux bound for reaction: "
548550
"%s", reaction)
549551
missing_bounds = True
552+
if set_missing_bounds:
553+
cobra_reaction.lower_bound = config.lower_bound
554+
LOGGER.warning("Set lower flux bound to default for reaction: "
555+
"%s", reaction)
556+
550557
if p_ub is None:
551558
LOGGER.error("Missing upper flux bound for reaction: "
552559
"%s", reaction)
553560
missing_bounds = True
561+
if set_missing_bounds:
562+
cobra_reaction.upper_bound = config.upper_bound
563+
LOGGER.warning("Set upper flux bound to default for reaction: "
564+
"%s", reaction)
554565

555566
# add reaction
556567
reactions.append(cobra_reaction)
@@ -613,8 +624,6 @@ def process_association(ass):
613624
cobra_reaction.gene_reaction_rule = gpr
614625

615626
cobra_model.add_reactions(reactions)
616-
if missing_bounds:
617-
raise CobraSBMLError("Missing flux bounds on reactions.")
618627

619628
# Objective
620629
obj_direction = "max"
@@ -647,18 +656,15 @@ def process_association(ass):
647656
)
648657
except ValueError as e:
649658
LOGGER.warning(str(e))
650-
set_objective(cobra_model, coefficients)
651-
cobra_model.solver.objective.direction = obj_direction
652659
else:
653660
# some legacy models encode objective coefficients in kinetic laws
654-
for cobra_reaction in model.getListOfReactions(): # noqa: E501 type: libsbml.Reaction
655-
if cobra_reaction.isSetKineticLaw():
656-
661+
for reaction in model.getListOfReactions(): # noqa: E501 type: libsbml.Reaction
662+
if reaction.isSetKineticLaw():
657663
klaw = reaction.getKineticLaw() # type: libsbml.KineticLaw
658664
p_oc = klaw.getParameter(
659665
"OBJECTIVE_COEFFICIENT") # noqa: E501 type: libsbml.LocalParameter
660666
if p_oc:
661-
rid = cobra_reaction.getId()
667+
rid = reaction.getId()
662668
if f_replace and F_REACTION in f_replace:
663669
rid = f_replace[F_REACTION](rid)
664670
try:
@@ -762,6 +768,11 @@ def process_association(ass):
762768

763769
cobra_model.add_groups(groups)
764770

771+
# run the complete parsing to get all warnings and errors before
772+
# raising errors
773+
if missing_bounds and not set_missing_bounds:
774+
raise CobraSBMLError("Missing flux bounds on reactions.")
775+
765776
return cobra_model
766777

767778

@@ -847,6 +858,8 @@ def _model_to_sbml(cobra_model, f_replace=None, units=True):
847858
if cobra_model.name is not None:
848859
model.setName(cobra_model.name)
849860

861+
_sbase_annotations(model, cobra_model.annotation)
862+
850863
# Meta information (ModelHistory)
851864
if hasattr(cobra_model, "_sbml"):
852865
meta = cobra_model._sbml
@@ -1011,16 +1024,19 @@ def _model_to_sbml(cobra_model, f_replace=None, units=True):
10111024
# GPR
10121025
gpr = cobra_reaction.gene_reaction_rule
10131026
if gpr is not None and len(gpr) > 0:
1014-
gpa = r_fbc.createGeneProductAssociation() # noqa: E501 type: libsbml.GeneProductAssociation
1015-
# replace ids
1027+
1028+
# replace ids in string
10161029
if f_replace and F_GENE_REV in f_replace:
1030+
gpr = gpr.replace('(', '( ')
1031+
gpr = gpr.replace(')', ' )')
10171032
tokens = gpr.split(' ')
10181033
for k in range(len(tokens)):
1019-
if tokens[k] not in ['and', 'or', '(', ')']:
1034+
if tokens[k] not in [' ', 'and', 'or', '(', ')']:
10201035
tokens[k] = f_replace[F_GENE_REV](tokens[k])
1021-
gpr = " ".join(tokens)
1036+
gpr_new = " ".join(tokens)
10221037

1023-
gpa.setAssociation(gpr)
1038+
gpa = r_fbc.createGeneProductAssociation() # noqa: E501 type: libsbml.GeneProductAssociation
1039+
gpa.setAssociation(gpr_new)
10241040

10251041
# objective coefficients
10261042
if reaction_coefficients.get(cobra_reaction, 0) != 0:
@@ -1238,8 +1254,8 @@ def _sbase_notes_dict(sbase, notes):
12381254
12391255
In the current stage the new annotation format is not completely supported yet.
12401256
"""
1241-
URL_IDENTIFIERS_PATTERN = r"^http[s]{0,1}://identifiers.org/(.+)/(.+)"
1242-
URL_IDENTIFIERS_PREFIX = r"https://identifiers.org"
1257+
URL_IDENTIFIERS_PATTERN = re.compile(r"^http[s]{0,1}://identifiers.org/(.+?)/(.+)") # noqa: E501
1258+
URL_IDENTIFIERS_PREFIX = "https://identifiers.org"
12431259
QUALIFIER_TYPES = {
12441260
"is": libsbml.BQB_IS,
12451261
"hasPart": libsbml.BQB_HAS_PART,
@@ -1278,8 +1294,8 @@ def _parse_annotations(sbase):
12781294
-------
12791295
dict (annotation dictionary)
12801296
1281-
FIXME: annotation format must be updated
1282-
(https://github.com/opencobra/cobrapy/issues/684)
1297+
FIXME: annotation format must be updated (this is a big collection of
1298+
fixes) - see: https://github.com/opencobra/cobrapy/issues/684)
12831299
"""
12841300
annotation = {}
12851301

@@ -1295,20 +1311,22 @@ def _parse_annotations(sbase):
12951311

12961312
for cvterm in cvterms: # type: libsbml.CVTerm
12971313
for k in range(cvterm.getNumResources()):
1298-
uri = cvterm.getResourceURI(k)
1299-
13001314
# FIXME: read and store the qualifier
1301-
tokens = uri.split('/')
1302-
if len(tokens) != 5 or not tokens[2] == "identifiers.org":
1315+
1316+
uri = cvterm.getResourceURI(k)
1317+
match = URL_IDENTIFIERS_PATTERN.match(uri)
1318+
if not match:
13031319
LOGGER.warning("%s does not conform to "
13041320
"http(s)://identifiers.org/collection/id", uri)
13051321
continue
13061322

1307-
provider, identifier = tokens[3], tokens[4]
1323+
provider, identifier = match.group(1), match.group(2)
13081324
if provider in annotation:
13091325
if isinstance(annotation[provider], string_types):
13101326
annotation[provider] = [annotation[provider]]
1311-
annotation[provider].append(identifier)
1327+
# FIXME: use a list
1328+
if identifier not in annotation[provider]:
1329+
annotation[provider].append(identifier)
13121330
else:
13131331
# FIXME: always in list
13141332
annotation[provider] = identifier
@@ -1335,7 +1353,11 @@ def _sbase_annotations(sbase, annotation):
13351353

13361354
# standardize annotations
13371355
annotation_data = deepcopy(annotation)
1356+
13381357
for key, value in annotation_data.items():
1358+
# handling of non-string annotations (e.g. integers)
1359+
if isinstance(value, (float, int)):
1360+
value = str(value)
13391361
if isinstance(value, string_types):
13401362
annotation_data[key] = [("is", value)]
13411363

0 commit comments

Comments
 (0)