Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] Add a warning to Topology.from_pdb if the stereo of the molecule in the PDB is different from the substructure template or unique mol #1583

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions openff/toolkit/topology/topology.py
Original file line number Diff line number Diff line change
Expand Up @@ -1531,6 +1531,7 @@ def from_pdb(
file_path: Union[str, TextIO],
unique_molecules: Optional[Iterable[Molecule]] = None,
toolkit_registry=GLOBAL_TOOLKIT_REGISTRY,
_ignore_stereo: bool = False,
):
"""
Loads supported or user-specified molecules from a PDB file.
Expand Down Expand Up @@ -1688,8 +1689,10 @@ def from_pdb(
pdb,
substructure_dictionary,
coords_angstrom,
_ignore_stereo=_ignore_stereo,
)

# TODO: This is redundant with the RDKit backend - see RDKTKW._get_connectivity_from_openmm_top
for off_atom, atom in zip([*topology.atoms], pdb.topology.atoms()):
off_atom.metadata["residue_name"] = atom.residue.name
off_atom.metadata["residue_number"] = atom.residue.id
Expand Down
66 changes: 63 additions & 3 deletions openff/toolkit/utils/rdkit_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import logging
import pathlib
import tempfile
import warnings
from collections import defaultdict
from typing import TYPE_CHECKING, Dict, List, Optional, Tuple

Expand Down Expand Up @@ -266,7 +267,12 @@ def _polymer_openmm_topology_to_offmol(
return offmol

def _polymer_openmm_pdbfile_to_offtop(
self, topology_class, pdbfile, substructure_dictionary, coords_angstrom
self,
topology_class,
pdbfile,
substructure_dictionary,
coords_angstrom,
_ignore_stereo: bool = False,
):
from openff.units.openmm import from_openmm
from rdkit import Chem, Geometry
Expand All @@ -276,6 +282,11 @@ def _polymer_openmm_pdbfile_to_offtop(
omm_top, substructure_dictionary
)

# The above method will have set stereo on the atoms directly from the substructures and unique_molecules.
# In order to check that the same stereo is perceived from 3D as is assigned from the template, make
# a copy here, then assign stereo from 3D, and then raise an error if there's a disagreement.
mol_with_stereo_from_substr = Chem.Mol(rdkit_mol)
Chem.AssignStereochemistry(mol_with_stereo_from_substr)
rdmol_conformer = Chem.Conformer()
for atom_idx in range(rdkit_mol.GetNumAtoms()):
x, y, z = coords_angstrom[atom_idx, :]
Expand All @@ -285,9 +296,49 @@ def _polymer_openmm_pdbfile_to_offtop(
rdkit_mol,
Chem.SANITIZE_ALL ^ Chem.SANITIZE_ADJUSTHS,
)
Chem.AssignStereochemistryFrom3D(rdkit_mol)
Chem.Kekulize(rdkit_mol, clearAromaticFlags=True)
Chem.SetAromaticity(rdkit_mol, Chem.AromaticityModel.AROMATICITY_MDL)
Chem.AssignStereochemistryFrom3D(rdkit_mol)

# Check stereo assigned from 3D against the stereo assigned by substructures.

# Note that this DOES NOT compare global stereo (R vs. S) because the global stereo
# of a substructure may be undefined (it could change depending on which substructures connect to it).
# Instead, since we know that the order of atoms and bonds are the same between
# mol_with_stereo_from_substr and rdkit_mol are the same, we compare "local stereo" (CW vs CCW).
msg = ""
for orig_atom, new_atom in zip(
mol_with_stereo_from_substr.GetAtoms(), rdkit_mol.GetAtoms()
):
if _ignore_stereo:
continue
orig_chi = orig_atom.GetChiralTag()
orig_abs_chi = orig_atom.GetPropsAsDict().get("_CIPCode", "")
new_chi = new_atom.GetChiralTag()
new_abs_chi = new_atom.GetPropsAsDict().get("_CIPCode", "")

# If either the original atom or the newly perceived atom isn't a chiral center, skip this check.
# This means that users can avoid this check by adding unique molecules with undefined stereo
if (orig_chi == Chem.CHI_UNSPECIFIED) or (new_chi == Chem.CHI_UNSPECIFIED):
continue
if orig_chi != new_chi:
print(f"{orig_chi=} {new_chi=} {orig_abs_chi=} {new_abs_chi=}")
residue = orig_atom.GetPDBResidueInfo()
atom_name = orig_atom.GetPropsAsDict().get("_Name", "")
atom_symbol = orig_atom.GetSymbol()
atom_index = orig_atom.GetIdx()
residue_name = residue.GetResidueName()
residue_number = residue.GetResidueNumber()
chain_id = residue.GetChainId()

msg += f"{atom_index=} {atom_symbol=} {atom_name=} {residue_name=} {residue_number=} {chain_id=} \n"
if msg != "":
msg = (
"Some atoms loaded from PDB have a 3D geometry that corresponds to a different "
"stereochemistry than the substructure template or unique molecule: \n"
+ msg
)
warnings.warn(msg)

# Don't sanitize or we risk assigning non-MDL aromaticity
rdmols = Chem.GetMolFrags(rdkit_mol, asMols=True, sanitizeFrags=False)
Expand Down Expand Up @@ -401,8 +452,16 @@ def _polymer_openmm_topology_to_rdmol(
for atom_i, j in zip(ref.GetAtoms(), match):
atom_j = mol.GetAtomWithIdx(j)
# copy over chirality
# TODO: This is most likely wrong. This will copy over CW/CCW tags but there's no
# checking that the substituent order in the molecule and substructure are the same,
# so this is basically random assignment. Thankfully Molecule.from_polymer_pdb and
# Topology.from_pdb both overwrite this assignment based on 3D coordinates. The only
# way I can currently think of doing this right would be to arbitrarily assign CW,
# then try to run a strict substructure match on it, and if that fails then assign
# CCW. This would be monstrously inefficient and all it would let us do is compare
# the substructure stereo to 3D stereo, which is of dubious value anyway.
Comment on lines +455 to +462
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Big blocker here, this basically summarizes it. This will need a lot more work and I don't think it's worth holding up the release for.

if atom_i.GetChiralTag():
mol.GetAtomWithIdx(j).SetChiralTag(atom_i.GetChiralTag())
atom_j.SetChiralTag(atom_i.GetChiralTag())
atom_j.SetFormalCharge(atom_i.GetFormalCharge())

for b in ref.GetBonds():
Expand Down Expand Up @@ -463,6 +522,7 @@ def _get_connectivity_from_openmm_top(self, omm_top):
res.SetChainId(atom.residue.chain.id)
rwatom = rwmol.GetAtomWithIdx(idx)
rwatom.SetPDBResidueInfo(res)
rwatom.SetProp("_Name", atom.name)
# we're fully explicit
for atom in rwmol.GetAtoms():
atom.SetNoImplicit(True)
Expand Down