-
Notifications
You must be signed in to change notification settings - Fork 25
Open
Description
Here's a demo of H2O, which works fine:
julia> mol = smilestomol("O")
{1, 0} simple molecular graph SMILESMolGraph
julia> inchitosdf(inchi(mol)) |> println
Structure #1.
InChIV10
1 0 0 0 0 0 0 0 0 0 1 V2000
0.0000 0.0000 0.0000 O 0 0 0 0 0 0 0 0 0
M END
$$$$
but D2O fails:
julia> mol = smilestomol("[D]O[D]")
{3, 2} simple molecular graph SMILESMolGraph
julia> inchitosdf(inchi(mol)) |> println
Structure #1.
InChIV10
1 0 0 0 0 0 0 0 0 0 3 V2000
0.0000 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0
A 1
OD2
M END
$$$$
The D#s are omitted and the O is replaced by a C - very strange.
Together with ChatGPT I arrived at a different implementation, that doesn't use the OutputSDF flag, but handles the output generation in julia:
using Printf
using MolecularGraph
import MolecularGraph: libinchi, inchi_OutputStructEx, inchi_InputINCHI, inchi_Atom, NUM_H_ISOTOPES
function atom_symbol(a::inchi_Atom)
# extract main atom symbol
s = String(reinterpret(UInt8, collect(Iterators.takewhile(!=(0x00), a.elname))))
return s
end
function bonds_from_atoms(atoms::Vector{inchi_Atom})
bonds = Set{Tuple{Int,Int,Int}}()
for (i, a) in enumerate(atoms)
for k in 1:a.num_bonds
j = a.neighbor[k] + 1 # C -> Julia 1-based
btype = a.bond_type[k]
if j > 0 # skip invalid neighbor
push!(bonds, (min(i,j), max(i,j), btype))
end
end
end
return collect(bonds)
end
function expand_atoms(atoms::Vector{inchi_Atom})
expanded_atoms = Vector{String}()
bonds = Vector{Tuple{Int,Int,Int}}()
for (i, a) in enumerate(atoms)
# Add main atom
push!(expanded_atoms, atom_symbol(a))
# Keep track of next atom index
parent_idx = length(expanded_atoms)
# Add implicit H/D/T as explicit atoms
for iso_index in 1:NUM_H_ISOTOPES
n = a.num_iso_H[iso_index+1]
iso_symbol = ["H", "D", "T"][iso_index]
for _ in 1:n
child_idx = length(expanded_atoms) + 1
push!(expanded_atoms, iso_symbol)
push!(bonds, (parent_idx, child_idx, 1)) # single bond to parent
end
end
end
# Add original bonds between heavy atoms
heavy_bonds = bonds_from_atoms(atoms)
append!(bonds, heavy_bonds)
return expanded_atoms, bonds
end
function expand_atoms(atoms::Vector{inchi_Atom})
expanded_atoms = Vector{String}()
bonds = Vector{Tuple{Int,Int,Int}}()
for (i, a) in enumerate(atoms)
# Add main atom
push!(expanded_atoms, atom_symbol(a))
# Keep track of next atom index
parent_idx = length(expanded_atoms)
# Add implicit H/D/T as explicit atoms
for iso_index in 1:NUM_H_ISOTOPES
n = a.num_iso_H[iso_index+1]
iso_symbol = ["H", "D", "T"][iso_index]
for _ in 1:n
child_idx = length(expanded_atoms) + 1
push!(expanded_atoms, iso_symbol)
push!(bonds, (parent_idx, child_idx, 1)) # single bond to parent
end
end
end
# Add original bonds between heavy atoms
heavy_bonds = bonds_from_atoms(atoms)
append!(bonds, heavy_bonds)
return expanded_atoms, bonds
end
function struct_to_sdf(structure::inchi_OutputStructEx)
atoms = unsafe_wrap(Vector{inchi_Atom}, structure.atom, structure.num_atoms)
expanded_atoms, bonds = expand_atoms(atoms)
io = IOBuffer()
println(io, "Generated by libinchi + Julia")
println(io)
println(io, " Julia libinchi export")
@printf(io, "%3d%3d 0 0 0 0 999 V2000\n",
length(expanded_atoms), length(bonds))
# Dummy coordinates (can use atom coords for heavy atoms, 0 for H/D/T)
for (i, sym) in enumerate(expanded_atoms)
x = y = z = 0.0
if i <= length(atoms) # heavy atoms
x, y, z = atoms[i].x, atoms[i].y, atoms[i].z
end
@printf(io, "%10.4f%10.4f%10.4f %-3s 0 0 0 0 0 0 0 0 0 0\n",
x, y, z, sym)
end
# Write bonds
for (i,j,btype) in bonds
@printf(io, "%3d%3d%3d 0 0 0 0\n", i, j, btype)
end
println(io, "M END")
println(io, raw"$$$$")
return String(take!(io))
end
function inchitosdf2(inchi::String; options::String = "")
structure = inchi_OutputStructEx()
inchi_input = inchi_InputINCHI(Base.unsafe_convert(Cstring, inchi),
Base.unsafe_convert(Cstring, options))
ret = @ccall libinchi.GetStructFromINCHIEx(
inchi_input::Ref{inchi_InputINCHI},
structure::Ref{inchi_OutputStructEx}
)::Int32
if ret < 0
error("libinchi failed with code $ret")
end
sdf = struct_to_sdf(structure)
# cleanup
@ccall libinchi.FreeStructFromINCHIEx(structure::Ref{inchi_OutputStructEx})::Cvoid
return sdf
end
mol = smilestomol("[D]O[D]")
i = inchi(mol)
inchitosdf(i) |> println
inchitosdf2(i) |> println
Is that interesting for you?
I'm not sure whether the algorithm covers all special cases that your might cover. In that case one could test for existence of isotopes and only in that case apply this method. Happy to hear from you.
Metadata
Metadata
Assignees
Labels
No labels