Skip to content

Commit 3f3305c

Browse files
author
uri.akavia
committed
add compare.py which contains functions for comparing.
1 parent c3376da commit 3f3305c

File tree

2 files changed

+305
-0
lines changed

2 files changed

+305
-0
lines changed

src/cobra/util/__init__.py

+1
Original file line numberDiff line numberDiff line change
@@ -3,3 +3,4 @@
33
from cobra.util.solver import *
44
from cobra.util.util import *
55
from cobra.util.process_pool import *
6+
from cobra.util.compare import *

src/cobra/util/compare.py

+304
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,304 @@
1+
"""Comparing models, reactions, metabolites, genes and groups."""
2+
3+
from typing import Dict, Optional, Tuple, TypeVar
4+
5+
from cobra import Model, Object, Reaction
6+
from cobra.core import Group
7+
8+
9+
TObject = TypeVar("TObject", bound=Object)
10+
11+
12+
def dict_compare(d1: Dict, d2: Dict, _dont_compare: Optional[set] = None):
13+
"""Compare two dictionaries.
14+
15+
This function will identify overlapping keys, added, removed keys between
16+
dictonaries. If there are identical keys which will not have the same value, they
17+
will be noted as 'modified'.
18+
19+
Parameters
20+
----------
21+
d1: dict
22+
Dictionary to compare.
23+
d2: dict
24+
Dictionary to compare.
25+
_dont_compare: set
26+
Keys that should not be compared. Optional. Default None (compare all keys).
27+
"""
28+
if _dont_compare is None:
29+
_dont_compare = set()
30+
d1_keys = set(d1.keys()).difference(_dont_compare)
31+
d2_keys = set(d2.keys()).difference(_dont_compare)
32+
shared_keys = d1_keys.intersection(d2_keys)
33+
added = d1_keys - d2_keys
34+
removed = d2_keys - d1_keys
35+
modified = {o: (d1[o], d2[o]) for o in shared_keys if d1[o] != d2[o]}
36+
same = set(o for o in shared_keys if d1[o] == d2[o])
37+
return {"added": added, "removed": removed, "modified": modified, "same": same}
38+
39+
40+
def compare_state(
41+
obj1: TObject, obj2: TObject, ignore_keys: Optional[set] = None
42+
) -> Tuple[bool, Dict]:
43+
"""Will compare two cobra Objects (and what is derived from them).
44+
45+
Not useful for comparing GPRs(). Use the equality in GPRs() directly.
46+
For Reaction and Group, use the specific functions which do some processing.
47+
48+
Parameters
49+
----------
50+
obj1: Object, Metabolite, Gene
51+
obj2: Object, Metabolite, Gene
52+
ignore_keys: Set, optional
53+
A set of keys to ignore. Defuault None (empty set - all keys will be compared).
54+
55+
Returns
56+
-------
57+
Tuple - bool, Dict
58+
A tuple of a boolean (are the two objects different or not) and a dictionary
59+
specifying how they differed.
60+
"""
61+
_is_equivalent = True
62+
if ignore_keys is None:
63+
ignore_keys = set()
64+
_is_equivalent = True
65+
state1 = obj1.__getstate__()
66+
state2 = obj2.__getstate__()
67+
_comparison = dict_compare(state1, state2, ignore_keys)
68+
if _comparison["added"] or _comparison["removed"] or _comparison["modified"]:
69+
_is_equivalent = False
70+
return _is_equivalent, _comparison
71+
72+
73+
def compare_reaction_state(
74+
rxn1: Reaction, rxn2: Reaction, ignore_keys: Optional[set] = None
75+
) -> Tuple[bool, Dict]:
76+
"""Will compare two cobra Reactions.
77+
78+
In order to avoid recursion and disagreement on memory address
79+
genes are transformed to gene.ids
80+
metabolites are transformed to metabolite.ids
81+
82+
Parameters
83+
----------
84+
rxn1: Reaction
85+
rxn2: Reaction
86+
ignore_keys: Set, optional
87+
A set of keys to ignore. Defuault None (empty set - all keys will be compared).
88+
89+
Returns
90+
-------
91+
Tuple - bool, Dict
92+
A tuple of a boolean (are the two objects different or not) and a dictionary
93+
specifying how they differed.
94+
"""
95+
_is_equivalent = True
96+
state1 = rxn1.__getstate__()
97+
state1["_metabolites"] = {met.id: stoic for met, stoic in rxn1.metabolites.items()}
98+
state1["_genes"] = {gene.id for gene in rxn1.genes}
99+
state2 = rxn2.__getstate__()
100+
state2["_metabolites"] = {met.id: stoic for met, stoic in rxn2.metabolites.items()}
101+
state2["_genes"] = {gene.id for gene in rxn2.genes}
102+
_comparison = dict_compare(state1, state2, ignore_keys)
103+
if _comparison["added"] or _comparison["removed"] or _comparison["modified"]:
104+
_is_equivalent = False
105+
return _is_equivalent, _comparison
106+
107+
108+
def compare_group_state(
109+
group1: Group, group2: Group, ignore_keys: Optional[set] = None
110+
) -> Tuple[bool, Dict]:
111+
"""Will compare two cobra Groups.
112+
113+
Members are transformed to a list of reaction ids in order to avoid differences in
114+
memory address leading to false positives.
115+
116+
Parameters
117+
----------
118+
group1: Group
119+
group2: Group
120+
ignore_keys: Set, optional
121+
A set of keys to ignore. Defuault None (empty set - all keys will be compared).
122+
123+
Returns
124+
-------
125+
Tuple - bool, Dict
126+
A tuple of a boolean (are the two objects different or not) and a dictionary
127+
specifying how they differed.
128+
"""
129+
_is_equivalent = True
130+
state1 = group1.__getstate__()
131+
state2 = group2.__getstate__()
132+
state1["_members"] = group1.members.list_attr("id")
133+
state2["_members"] = group2.members.list_attr("id")
134+
_comparison = dict_compare(state1, state2, ignore_keys)
135+
if _comparison["added"] or _comparison["removed"] or _comparison["modified"]:
136+
_is_equivalent = False
137+
return _is_equivalent, _comparison
138+
139+
140+
def compare_model_state(
141+
model1: Model,
142+
model2: Model,
143+
ignore_notes: bool = True,
144+
ignore_keys: Optional[set] = None,
145+
):
146+
"""Recursively compare model states.
147+
148+
Will compare the model and then compare metabolites, reactions, genes, groups in
149+
the model. Models will be considered different if any of the objects within the
150+
cobra model are different.
151+
152+
Parameters
153+
----------
154+
model1: cobra.Model
155+
Model to compare.
156+
model2: cobra.Model
157+
Other Model to compare.
158+
ignore_notes: bool, optional
159+
Whether or not to ignore the notes field in the
160+
ignore_keys
161+
162+
Returns
163+
-------
164+
Tuple - bool, Dict
165+
A tuple of a boolean (are the two models different or not)
166+
and a dictionary specifying how they differed. The dictionary contains
167+
different_x as a list and x for metabolites, reactions, genes, groups.
168+
The differenet_x specifies which comparisons were not equivalent, while the
169+
x contains the full dictionary of comparing each element (each group,
170+
metabolite, reaction, gene).
171+
"""
172+
_is_equivalent = True
173+
if ignore_keys is None:
174+
ignore_keys = set()
175+
if ignore_notes:
176+
ignore_keys = ignore_keys.union({"notes"})
177+
do_not_compare_models = {
178+
"metabolites",
179+
"reactions",
180+
"genes",
181+
"notes",
182+
"annotation",
183+
"_annotation",
184+
"groups",
185+
"_sbml", # don't care about SBML properties of the file, just how it is read
186+
"_id", # Will often be different based on different files
187+
"_solver", # Will be different memory locations
188+
}
189+
_eq, model_comparison = compare_state(model1, model2, do_not_compare_models)
190+
_is_equivalent = _eq
191+
model_comparison["metabolites"] = dict()
192+
model_comparison["different_mets"] = list()
193+
mets_model1 = set(model1.metabolites.list_attr("id"))
194+
mets_model2 = set(model2.metabolites.list_attr("id"))
195+
if mets_model1 != mets_model2:
196+
if mets_model1 - mets_model2:
197+
model_comparison["metabolites"]["added"] = mets_model1 - mets_model2
198+
if mets_model2 - mets_model1:
199+
model_comparison["metabolites"]["removed"] = mets_model2 - mets_model1
200+
for _id in list(mets_model1.intersection(mets_model2)):
201+
met1 = model1.metabolites.get_by_id(_id)
202+
met2 = model2.metabolites.get_by_id(_id)
203+
_eq, _comparison = compare_state(met1, met2, ignore_keys=ignore_keys)
204+
if not _eq:
205+
_is_equivalent = False
206+
model_comparison["different_mets"].append(_id)
207+
model_comparison["metabolites"][_id] = _comparison
208+
209+
model_comparison["reactions"] = dict()
210+
model_comparison["different_rxns"] = list()
211+
rxns_model1 = set(model1.reactions.list_attr("id"))
212+
rxns_model2 = set(model2.reactions.list_attr("id"))
213+
if rxns_model1 - rxns_model2:
214+
model_comparison["reactions"]["added"] = rxns_model1 - rxns_model2
215+
if rxns_model2 - rxns_model1:
216+
model_comparison["reactions"]["removed"] = rxns_model2 - rxns_model1
217+
for _id in list(rxns_model1.intersection(rxns_model2)):
218+
rxn1 = model1.reactions.get_by_id(_id)
219+
rxn2 = model2.reactions.get_by_id(_id)
220+
_eq, _comparison = compare_reaction_state(rxn1, rxn2, ignore_keys=ignore_keys)
221+
if not _eq:
222+
_is_equivalent = False
223+
model_comparison["different_rxns"].append(_id)
224+
model_comparison["reactions"][_id] = _comparison
225+
226+
model_comparison["genes"] = dict()
227+
model_comparison["different_genes"] = list()
228+
genes_model1 = set(model1.genes.list_attr("id"))
229+
genes_model2 = set(model2.genes.list_attr("id"))
230+
if genes_model1 - genes_model2:
231+
model_comparison["genes"]["added"] = genes_model1 - genes_model2
232+
if genes_model2 - genes_model1:
233+
model_comparison["genes"]["removed"] = genes_model2 - genes_model1
234+
for _id in list(genes_model1.intersection(genes_model2)):
235+
gene1 = model1.genes.get_by_id(_id)
236+
gene2 = model2.genes.get_by_id(_id)
237+
_eq, _comparison = compare_state(gene1, gene2, ignore_keys=ignore_keys)
238+
if not _eq:
239+
_is_equivalent = False
240+
model_comparison["different_genes"].append(_id)
241+
model_comparison["genes"][_id] = _comparison
242+
243+
model_comparison["groups"] = dict()
244+
model_comparison["different_groups"] = list()
245+
groups_model1 = set(model1.groups.list_attr("id"))
246+
groups_model2 = set(model2.groups.list_attr("id"))
247+
if groups_model1 - groups_model2:
248+
model_comparison["groups"]["added"] = groups_model1 - groups_model2
249+
if groups_model2 - groups_model1:
250+
model_comparison["groups"]["removed"] = groups_model2 - groups_model1
251+
for _id in list(groups_model1.intersection(groups_model2)):
252+
group1 = model1.groups.get_by_id(_id)
253+
group2 = model2.groups.get_by_id(_id)
254+
_eq, _comparison = compare_state(group1, group2, ignore_keys=ignore_keys)
255+
if not _eq:
256+
_is_equivalent = False
257+
model_comparison["different_groups"].append(_id)
258+
model_comparison["groups"][_id] = _comparison
259+
260+
return _is_equivalent, model_comparison
261+
262+
263+
def _fix_xml_annotation_to_identifiers(model: "Model") -> None:
264+
"""Fix XML models which have annotations that do not match identifiers.org.
265+
266+
This function will fix the dict keys of annotations to match identifiers.org.
267+
Eventually, the XML models should be fixed and cobrapy should be strict, but this is
268+
part of SBML rewriting of annotations
269+
see: https://github.com/opencobra/cobrapy/issues/684
270+
271+
Useful for comapring matlab models with XML models, otherwise the difference in
272+
annotation behavoir confuses the funciton.
273+
274+
Parameters
275+
----------
276+
model: Model
277+
A model to fix
278+
"""
279+
for met in model.metabolites:
280+
if met.formula == "":
281+
met.formula = None
282+
if len(met.annotation):
283+
if "chebi" in met.annotation.keys():
284+
met.annotation["CHEBI"] = met.annotation.pop("chebi")
285+
if "sbo" in met.annotation.keys():
286+
met.annotation["SBO"] = met.annotation.pop("sbo")
287+
for annot, val in met.annotation.items():
288+
if isinstance(val, str):
289+
met.annotation[annot] = [val]
290+
for rxn in model.reactions:
291+
rxn.name = rxn.name.strip()
292+
if "sbo" in rxn.annotation.keys():
293+
rxn.annotation["SBO"] = rxn.annotation.pop("sbo")
294+
if len(rxn.annotation):
295+
for annot, val in rxn.annotation.items():
296+
if isinstance(val, str):
297+
rxn.annotation[annot] = [val]
298+
for gene in model.genes:
299+
if len(gene.annotation):
300+
if "ncbigi" in gene.annotation.keys():
301+
gene.annotation["ncbiprotein"] = gene.annotation.pop("ncbigi")
302+
for annot, val in gene.annotation.items():
303+
if isinstance(val, str):
304+
gene.annotation[annot] = [val]

0 commit comments

Comments
 (0)