44import math
55import random
66import string
7+ import warnings
78from typing import TYPE_CHECKING
89
910if TYPE_CHECKING :
@@ -38,6 +39,9 @@ def _reduce_csd_formula(formula: str) -> str:
3839 f"{ e } { formula_dct [e ] // reducer if formula_dct [e ] != reducer else '' } "
3940 )
4041
42+ if not formula_str :
43+ raise RuntimeError (f"Unable to create formula for { formula } " )
44+
4145 return formula_str
4246
4347
@@ -49,7 +53,15 @@ def from_csd_entry_directly(
4953
5054 """
5155 asym_unit = entry .crystal .asymmetric_unit_molecule
56+
5257 elements = {d .atomic_symbol for d in asym_unit .atoms }
58+
59+ optimade_elements = elements .copy ()
60+ # Replace deuterium with H
61+ if "D" in elements :
62+ optimade_elements .remove ("D" )
63+ optimade_elements .add ("H" )
64+
5365 try :
5466 positions = [
5567 [atom .coordinates .x , atom .coordinates .y , atom .coordinates .z ]
@@ -110,6 +122,14 @@ def _get_citations(entry) -> list[ReferenceResource]:
110122 if not inchi .success :
111123 inchi = None
112124
125+ try :
126+ reduced_formula = _reduce_csd_formula (asym_unit .formula )
127+ except Exception :
128+ warnings .warn (
129+ f"Unable to reduce formula for { entry .identifier } : { entry .formula } "
130+ )
131+ reduced_formula = None
132+
113133 resource = StructureResource (
114134 ** {
115135 "id" : entry .identifier ,
@@ -121,14 +141,19 @@ def _get_citations(entry) -> list[ReferenceResource]:
121141 "attributes" : StructureResourceAttributes (
122142 last_modified = now ,
123143 chemical_formula_descriptive = entry .formula ,
124- chemical_formula_reduced = _reduce_csd_formula ( asym_unit . formula ) ,
125- elements = sorted (list (elements )),
144+ chemical_formula_reduced = reduced_formula ,
145+ elements = sorted (list (optimade_elements )),
126146 dimension_types = (1 , 1 , 1 ),
127147 nperiodic_dimensions = 3 ,
128- nelements = len (elements ),
148+ nelements = len (optimade_elements ),
129149 nsites = len (positions ) if positions else None ,
150+ # Make sure the "D" is remapped to "H" in the species list, but continue using it in the sites list
130151 species = [
131- Species (chemical_symbols = [e ], name = e , concentration = [1.0 ])
152+ Species (
153+ chemical_symbols = [e if e != "D" else "H" ],
154+ name = e ,
155+ concentration = [1.0 ],
156+ )
132157 for e in elements
133158 ]
134159 if positions
0 commit comments