|
| 1 | +"""Comparing models, reactions, metabolites, genes and groups.""" |
| 2 | + |
| 3 | +from typing import Dict, Optional, Tuple, TypeVar |
| 4 | + |
| 5 | +from cobra import Model, Object, Reaction |
| 6 | +from cobra.core import Group |
| 7 | + |
| 8 | + |
| 9 | +TObject = TypeVar("TObject", bound=Object) |
| 10 | + |
| 11 | + |
| 12 | +def dict_compare(d1: Dict, d2: Dict, _dont_compare: Optional[set] = None): |
| 13 | + """Compare two dictionaries. |
| 14 | +
|
| 15 | + This function will identify overlapping keys, added, removed keys between |
| 16 | + dictonaries. If there are identical keys which will not have the same value, they |
| 17 | + will be noted as 'modified'. |
| 18 | +
|
| 19 | + Parameters |
| 20 | + ---------- |
| 21 | + d1: dict |
| 22 | + Dictionary to compare. |
| 23 | + d2: dict |
| 24 | + Dictionary to compare. |
| 25 | + _dont_compare: set |
| 26 | + Keys that should not be compared. Optional. Default None (compare all keys). |
| 27 | + """ |
| 28 | + if _dont_compare is None: |
| 29 | + _dont_compare = set() |
| 30 | + d1_keys = set(d1.keys()).difference(_dont_compare) |
| 31 | + d2_keys = set(d2.keys()).difference(_dont_compare) |
| 32 | + shared_keys = d1_keys.intersection(d2_keys) |
| 33 | + added = d1_keys - d2_keys |
| 34 | + removed = d2_keys - d1_keys |
| 35 | + modified = {o: (d1[o], d2[o]) for o in shared_keys if d1[o] != d2[o]} |
| 36 | + same = set(o for o in shared_keys if d1[o] == d2[o]) |
| 37 | + return {"added": added, "removed": removed, "modified": modified, "same": same} |
| 38 | + |
| 39 | + |
| 40 | +def compare_state( |
| 41 | + obj1: TObject, obj2: TObject, ignore_keys: Optional[set] = None |
| 42 | +) -> Tuple[bool, Dict]: |
| 43 | + """Will compare two cobra Objects (and what is derived from them). |
| 44 | +
|
| 45 | + Not useful for comparing GPRs(). Use the equality in GPRs() directly. |
| 46 | + For Reaction and Group, use the specific functions which do some processing. |
| 47 | +
|
| 48 | + Parameters |
| 49 | + ---------- |
| 50 | + obj1: Object, Metabolite, Gene |
| 51 | + obj2: Object, Metabolite, Gene |
| 52 | + ignore_keys: Set, optional |
| 53 | + A set of keys to ignore. Defuault None (empty set - all keys will be compared). |
| 54 | +
|
| 55 | + Returns |
| 56 | + ------- |
| 57 | + Tuple - bool, Dict |
| 58 | + A tuple of a boolean (are the two objects different or not) and a dictionary |
| 59 | + specifying how they differed. |
| 60 | + """ |
| 61 | + _is_equivalent = True |
| 62 | + if ignore_keys is None: |
| 63 | + ignore_keys = set() |
| 64 | + _is_equivalent = True |
| 65 | + state1 = obj1.__getstate__() |
| 66 | + state2 = obj2.__getstate__() |
| 67 | + _comparison = dict_compare(state1, state2, ignore_keys) |
| 68 | + if _comparison["added"] or _comparison["removed"] or _comparison["modified"]: |
| 69 | + _is_equivalent = False |
| 70 | + return _is_equivalent, _comparison |
| 71 | + |
| 72 | + |
| 73 | +def compare_reaction_state( |
| 74 | + rxn1: Reaction, rxn2: Reaction, ignore_keys: Optional[set] = None |
| 75 | +) -> Tuple[bool, Dict]: |
| 76 | + """Will compare two cobra Reactions. |
| 77 | +
|
| 78 | + In order to avoid recursion and disagreement on memory address |
| 79 | + genes are transformed to gene.ids |
| 80 | + metabolites are transformed to metabolite.ids |
| 81 | +
|
| 82 | + Parameters |
| 83 | + ---------- |
| 84 | + rxn1: Reaction |
| 85 | + rxn2: Reaction |
| 86 | + ignore_keys: Set, optional |
| 87 | + A set of keys to ignore. Defuault None (empty set - all keys will be compared). |
| 88 | +
|
| 89 | + Returns |
| 90 | + ------- |
| 91 | + Tuple - bool, Dict |
| 92 | + A tuple of a boolean (are the two objects different or not) and a dictionary |
| 93 | + specifying how they differed. |
| 94 | + """ |
| 95 | + _is_equivalent = True |
| 96 | + state1 = rxn1.__getstate__() |
| 97 | + state1["_metabolites"] = {met.id: stoic for met, stoic in rxn1.metabolites.items()} |
| 98 | + state1["_genes"] = {gene.id for gene in rxn1.genes} |
| 99 | + state2 = rxn2.__getstate__() |
| 100 | + state2["_metabolites"] = {met.id: stoic for met, stoic in rxn2.metabolites.items()} |
| 101 | + state2["_genes"] = {gene.id for gene in rxn2.genes} |
| 102 | + _comparison = dict_compare(state1, state2, ignore_keys) |
| 103 | + if _comparison["added"] or _comparison["removed"] or _comparison["modified"]: |
| 104 | + _is_equivalent = False |
| 105 | + return _is_equivalent, _comparison |
| 106 | + |
| 107 | + |
| 108 | +def compare_group_state( |
| 109 | + group1: Group, group2: Group, ignore_keys: Optional[set] = None |
| 110 | +) -> Tuple[bool, Dict]: |
| 111 | + """Will compare two cobra Groups. |
| 112 | +
|
| 113 | + Members are transformed to a list of reaction ids in order to avoid differences in |
| 114 | + memory address leading to false positives. |
| 115 | +
|
| 116 | + Parameters |
| 117 | + ---------- |
| 118 | + group1: Group |
| 119 | + group2: Group |
| 120 | + ignore_keys: Set, optional |
| 121 | + A set of keys to ignore. Defuault None (empty set - all keys will be compared). |
| 122 | +
|
| 123 | + Returns |
| 124 | + ------- |
| 125 | + Tuple - bool, Dict |
| 126 | + A tuple of a boolean (are the two objects different or not) and a dictionary |
| 127 | + specifying how they differed. |
| 128 | + """ |
| 129 | + _is_equivalent = True |
| 130 | + state1 = group1.__getstate__() |
| 131 | + state2 = group2.__getstate__() |
| 132 | + state1["_members"] = group1.members.list_attr("id") |
| 133 | + state2["_members"] = group2.members.list_attr("id") |
| 134 | + _comparison = dict_compare(state1, state2, ignore_keys) |
| 135 | + if _comparison["added"] or _comparison["removed"] or _comparison["modified"]: |
| 136 | + _is_equivalent = False |
| 137 | + return _is_equivalent, _comparison |
| 138 | + |
| 139 | + |
| 140 | +def compare_model_state( |
| 141 | + model1: Model, |
| 142 | + model2: Model, |
| 143 | + ignore_notes: bool = True, |
| 144 | + ignore_keys: Optional[set] = None, |
| 145 | +): |
| 146 | + """Recursively compare model states. |
| 147 | +
|
| 148 | + Will compare the model and then compare metabolites, reactions, genes, groups in |
| 149 | + the model. Models will be considered different if any of the objects within the |
| 150 | + cobra model are different. |
| 151 | +
|
| 152 | + Parameters |
| 153 | + ---------- |
| 154 | + model1: cobra.Model |
| 155 | + Model to compare. |
| 156 | + model2: cobra.Model |
| 157 | + Other Model to compare. |
| 158 | + ignore_notes: bool, optional |
| 159 | + Whether or not to ignore the notes field in the |
| 160 | + ignore_keys |
| 161 | +
|
| 162 | + Returns |
| 163 | + ------- |
| 164 | + Tuple - bool, Dict |
| 165 | + A tuple of a boolean (are the two models different or not) |
| 166 | + and a dictionary specifying how they differed. The dictionary contains |
| 167 | + different_x as a list and x for metabolites, reactions, genes, groups. |
| 168 | + The differenet_x specifies which comparisons were not equivalent, while the |
| 169 | + x contains the full dictionary of comparing each element (each group, |
| 170 | + metabolite, reaction, gene). |
| 171 | + """ |
| 172 | + _is_equivalent = True |
| 173 | + if ignore_keys is None: |
| 174 | + ignore_keys = set() |
| 175 | + if ignore_notes: |
| 176 | + ignore_keys = ignore_keys.union({"notes"}) |
| 177 | + do_not_compare_models = { |
| 178 | + "metabolites", |
| 179 | + "reactions", |
| 180 | + "genes", |
| 181 | + "notes", |
| 182 | + "annotation", |
| 183 | + "_annotation", |
| 184 | + "groups", |
| 185 | + "_sbml", # don't care about SBML properties of the file, just how it is read |
| 186 | + "_id", # Will often be different based on different files |
| 187 | + "_solver", # Will be different memory locations |
| 188 | + } |
| 189 | + _eq, model_comparison = compare_state(model1, model2, do_not_compare_models) |
| 190 | + _is_equivalent = _eq |
| 191 | + model_comparison["metabolites"] = dict() |
| 192 | + model_comparison["different_mets"] = list() |
| 193 | + mets_model1 = set(model1.metabolites.list_attr("id")) |
| 194 | + mets_model2 = set(model2.metabolites.list_attr("id")) |
| 195 | + if mets_model1 != mets_model2: |
| 196 | + if mets_model1 - mets_model2: |
| 197 | + model_comparison["metabolites"]["added"] = mets_model1 - mets_model2 |
| 198 | + if mets_model2 - mets_model1: |
| 199 | + model_comparison["metabolites"]["removed"] = mets_model2 - mets_model1 |
| 200 | + for _id in list(mets_model1.intersection(mets_model2)): |
| 201 | + met1 = model1.metabolites.get_by_id(_id) |
| 202 | + met2 = model2.metabolites.get_by_id(_id) |
| 203 | + _eq, _comparison = compare_state(met1, met2, ignore_keys=ignore_keys) |
| 204 | + if not _eq: |
| 205 | + _is_equivalent = False |
| 206 | + model_comparison["different_mets"].append(_id) |
| 207 | + model_comparison["metabolites"][_id] = _comparison |
| 208 | + |
| 209 | + model_comparison["reactions"] = dict() |
| 210 | + model_comparison["different_rxns"] = list() |
| 211 | + rxns_model1 = set(model1.reactions.list_attr("id")) |
| 212 | + rxns_model2 = set(model2.reactions.list_attr("id")) |
| 213 | + if rxns_model1 - rxns_model2: |
| 214 | + model_comparison["reactions"]["added"] = rxns_model1 - rxns_model2 |
| 215 | + if rxns_model2 - rxns_model1: |
| 216 | + model_comparison["reactions"]["removed"] = rxns_model2 - rxns_model1 |
| 217 | + for _id in list(rxns_model1.intersection(rxns_model2)): |
| 218 | + rxn1 = model1.reactions.get_by_id(_id) |
| 219 | + rxn2 = model2.reactions.get_by_id(_id) |
| 220 | + _eq, _comparison = compare_reaction_state(rxn1, rxn2, ignore_keys=ignore_keys) |
| 221 | + if not _eq: |
| 222 | + _is_equivalent = False |
| 223 | + model_comparison["different_rxns"].append(_id) |
| 224 | + model_comparison["reactions"][_id] = _comparison |
| 225 | + |
| 226 | + model_comparison["genes"] = dict() |
| 227 | + model_comparison["different_genes"] = list() |
| 228 | + genes_model1 = set(model1.genes.list_attr("id")) |
| 229 | + genes_model2 = set(model2.genes.list_attr("id")) |
| 230 | + if genes_model1 - genes_model2: |
| 231 | + model_comparison["genes"]["added"] = genes_model1 - genes_model2 |
| 232 | + if genes_model2 - genes_model1: |
| 233 | + model_comparison["genes"]["removed"] = genes_model2 - genes_model1 |
| 234 | + for _id in list(genes_model1.intersection(genes_model2)): |
| 235 | + gene1 = model1.genes.get_by_id(_id) |
| 236 | + gene2 = model2.genes.get_by_id(_id) |
| 237 | + _eq, _comparison = compare_state(gene1, gene2, ignore_keys=ignore_keys) |
| 238 | + if not _eq: |
| 239 | + _is_equivalent = False |
| 240 | + model_comparison["different_genes"].append(_id) |
| 241 | + model_comparison["genes"][_id] = _comparison |
| 242 | + |
| 243 | + model_comparison["groups"] = dict() |
| 244 | + model_comparison["different_groups"] = list() |
| 245 | + groups_model1 = set(model1.groups.list_attr("id")) |
| 246 | + groups_model2 = set(model2.groups.list_attr("id")) |
| 247 | + if groups_model1 - groups_model2: |
| 248 | + model_comparison["groups"]["added"] = groups_model1 - groups_model2 |
| 249 | + if groups_model2 - groups_model1: |
| 250 | + model_comparison["groups"]["removed"] = groups_model2 - groups_model1 |
| 251 | + for _id in list(groups_model1.intersection(groups_model2)): |
| 252 | + group1 = model1.groups.get_by_id(_id) |
| 253 | + group2 = model2.groups.get_by_id(_id) |
| 254 | + _eq, _comparison = compare_state(group1, group2, ignore_keys=ignore_keys) |
| 255 | + if not _eq: |
| 256 | + _is_equivalent = False |
| 257 | + model_comparison["different_groups"].append(_id) |
| 258 | + model_comparison["groups"][_id] = _comparison |
| 259 | + |
| 260 | + return _is_equivalent, model_comparison |
| 261 | + |
| 262 | + |
| 263 | +def _fix_xml_annotation_to_identifiers(model: "Model") -> None: |
| 264 | + """Fix XML models which have annotations that do not match identifiers.org. |
| 265 | +
|
| 266 | + This function will fix the dict keys of annotations to match identifiers.org. |
| 267 | + Eventually, the XML models should be fixed and cobrapy should be strict, but this is |
| 268 | + part of SBML rewriting of annotations |
| 269 | + see: https://github.com/opencobra/cobrapy/issues/684 |
| 270 | +
|
| 271 | + Useful for comapring matlab models with XML models, otherwise the difference in |
| 272 | + annotation behavoir confuses the funciton. |
| 273 | +
|
| 274 | + Parameters |
| 275 | + ---------- |
| 276 | + model: Model |
| 277 | + A model to fix |
| 278 | + """ |
| 279 | + for met in model.metabolites: |
| 280 | + if met.formula == "": |
| 281 | + met.formula = None |
| 282 | + if len(met.annotation): |
| 283 | + if "chebi" in met.annotation.keys(): |
| 284 | + met.annotation["CHEBI"] = met.annotation.pop("chebi") |
| 285 | + if "sbo" in met.annotation.keys(): |
| 286 | + met.annotation["SBO"] = met.annotation.pop("sbo") |
| 287 | + for annot, val in met.annotation.items(): |
| 288 | + if isinstance(val, str): |
| 289 | + met.annotation[annot] = [val] |
| 290 | + for rxn in model.reactions: |
| 291 | + rxn.name = rxn.name.strip() |
| 292 | + if "sbo" in rxn.annotation.keys(): |
| 293 | + rxn.annotation["SBO"] = rxn.annotation.pop("sbo") |
| 294 | + if len(rxn.annotation): |
| 295 | + for annot, val in rxn.annotation.items(): |
| 296 | + if isinstance(val, str): |
| 297 | + rxn.annotation[annot] = [val] |
| 298 | + for gene in model.genes: |
| 299 | + if len(gene.annotation): |
| 300 | + if "ncbigi" in gene.annotation.keys(): |
| 301 | + gene.annotation["ncbiprotein"] = gene.annotation.pop("ncbigi") |
| 302 | + for annot, val in gene.annotation.items(): |
| 303 | + if isinstance(val, str): |
| 304 | + gene.annotation[annot] = [val] |
0 commit comments