diff --git a/package/MDAnalysis/coordinates/CIF.py b/package/MDAnalysis/coordinates/CIF.py new file mode 100644 index 00000000000..b22221b1b62 --- /dev/null +++ b/package/MDAnalysis/coordinates/CIF.py @@ -0,0 +1,89 @@ + +# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*- +# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 +# +# MDAnalysis --- https://www.mdanalysis.org +# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors +# (see the file AUTHORS for the full list of names) +# +# Released under the GNU Public Licence, v2 or any higher version +# +# Please cite your use of MDAnalysis in published work: +# +# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler, +# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein. +# MDAnalysis: A Python package for the rapid analysis of molecular dynamics +# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th +# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy. +# doi: 10.25080/majora-629e541a-00e +# +# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein. +# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations. +# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 +# + +""" +PDBx (mmcif) files in MDAnalysis --- :mod:`MDAnalysis.coordinates.PDBx` +======================================================================= + +Reads coordinates from a PDBx_ (mmcif) format file. Will populate the Universe positions from the +``_atom_site.Cartn_x`` field in the PDBx file. Will populate the unitcell dimensions from the ``_cell`` section. + + +.. _PDBx: + https://pdb101.rcsb.org/learn/guide-to-understanding-pdb-data/beginner’s-guide-to-pdb-structures-and-the-pdbx-mmcif-format +""" +import gemmi +import numpy as np + +from . import base + + + + +class PDBxReader(base.SingleFrameReaderBase): + format = ['cif', 'pdbx'] + units = {'time': None, 'length': 'Angstrom'} + + def _read_first_frame(self): + doc = gemmi.cif.read(self.filename) + + block = doc.sole_block() + + coords = block.find('_atom_site.', ['Cartn_x', 'Cartn_y', 'Cartn_z']) + self.natoms = len(coords) + + xyz = np.zeros((self.natoms, 3), dtype=np.float32) + + for i, (x, y, z) in enumerate(coords): + xyz[i, :] = x, y, z + + ts = self.ts = base.Timestep.from_coordinates(xyz, **self._ts_kwargs) + ts.frame = 0 + + box = block.find('_cell.', ['length_a', 'length_b', 'length_c', + 'angle_alpha', 'angle_beta', 'angle_gamma']) + if box: + unitcell = np.zeros(6, dtype=np.float64) + unitcell[:] = box[0] + + ts.dimensions = unitcell + + if self.convert_units: + # in-place ! + self.convert_pos_from_native(self.ts._pos) + if self.ts.dimensions is not None: + self.convert_pos_from_native(self.ts.dimensions[:3]) + + + + @staticmethod + def parse_n_atoms(filename, **kwargs): + doc = gemmi.cif.read(self.filename) + block = doc.sole_block() + coords = block.find('_atom_site.', ['Cartn_x', 'Cartn_y', 'Cartn_z']) + natoms = len(coords) + del doc + return n_atoms + + diff --git a/package/MDAnalysis/coordinates/PDBx.py b/package/MDAnalysis/coordinates/PDBx.py new file mode 100644 index 00000000000..2c163eef5a3 --- /dev/null +++ b/package/MDAnalysis/coordinates/PDBx.py @@ -0,0 +1,101 @@ +# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*- +# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 +# +# MDAnalysis --- https://www.mdanalysis.org +# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors +# (see the file AUTHORS for the full list of names) +# +# Released under the GNU Public Licence, v2 or any higher version +# +# Please cite your use of MDAnalysis in published work: +# +# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler, +# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein. +# MDAnalysis: A Python package for the rapid analysis of molecular dynamics +# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th +# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy. +# doi: 10.25080/majora-629e541a-00e +# +# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein. +# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations. +# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 +# + +""" +PDBx (mmcif) files in MDAnalysis --- :mod:`MDAnalysis.coordinates.PDBx` +======================================================================= + +Reads coordinates from a PDBx_ (mmcif) format file. Will populate the Universe positions from the +``_atom_site.Cartn_x`` field in the PDBx file. Will populate the unitcell dimensions from the ``_cell`` section. + + +.. _PDBx: + https://pdb101.rcsb.org/learn/guide-to-understanding-pdb-data/beginner’s-guide-to-pdb-structures-and-the-pdbx-mmcif-format +""" +import gemmi +import numpy as np +from gemmi import FractionalBox + +from . import base + + +class PDBxReader(base.SingleFrameReaderBase): + format = ['cif', 'pdbx'] + units = {'time': None, 'length': 'Angstrom'} + + + def __init__(self, filename, convert_units=True, **kwargs): + super().__init__(filename, convert_units=convert_units, **kwargs) + + def _read_first_frame(self): + doc = gemmi.cif.read(self.filename) + + block = doc.sole_block() + + # PDBx/mmCIF with _cell. sections + box = block.find('_cell.', ['length_a', 'length_b', 'length_c', + 'angle_alpha', 'angle_beta', 'angle_gamma']) + # CIF file with _cell_ sections + if not box: + box = block.find('_cell_', ['length_a', 'length_b', 'length_c', + 'angle_alpha', 'angle_beta', 'angle_gamma']) + + if box: + unitcell = np.zeros(6, dtype=np.float64) + unitcell[:] = box[0] + + # PDBx/mmCIF with _cell. sections + coords = block.find('_atom_site.', ['Cartn_x', 'Cartn_y', 'Cartn_z']) + fractional = lambda xyz: xyz + + # CIF file with _cell_ sections + if not coords: + coords = block.find('_atom_site_', ['fract_x', 'fract_y', 'fract_z']) + fractional = lambda xyz: np.multiply(xyz, unitcell[:3]) + + if not coords: + raise ValueError("No coordinates found in the file") + + self.n_atoms = len(coords) + + xyz = np.zeros((self.n_atoms, 3), dtype=np.float32) + + for i, (x, y, z) in enumerate(coords): + xyz[i, :] = x, y, z + + # for CIF: multiply fractional coordinates by unitcell lengths + xyz = fractional(xyz) + + ts = self.ts = base.Timestep.from_coordinates(xyz, **self._ts_kwargs) + ts.frame = 0 + ts.dimensions = unitcell + + if self.convert_units: + # in-place ! + self.convert_pos_from_native(self.ts._pos) + if self.ts.dimensions is not None: + self.convert_pos_from_native(self.ts.dimensions[:3]) + + return ts + + diff --git a/package/MDAnalysis/coordinates/__init__.py b/package/MDAnalysis/coordinates/__init__.py index 9b6a7121bc9..ad1bc5f70f6 100644 --- a/package/MDAnalysis/coordinates/__init__.py +++ b/package/MDAnalysis/coordinates/__init__.py @@ -791,3 +791,4 @@ class can choose an appropriate reader automatically. from . import NAMDBIN from . import FHIAIMS from . import TNG +from . import PDBx diff --git a/package/MDAnalysis/topology/PDBxParser.py b/package/MDAnalysis/topology/PDBxParser.py new file mode 100644 index 00000000000..8c84465692f --- /dev/null +++ b/package/MDAnalysis/topology/PDBxParser.py @@ -0,0 +1,124 @@ +# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*- +# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 +# +# MDAnalysis --- https://www.mdanalysis.org +# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors +# (see the file AUTHORS for the full list of names) +# +# Released under the GNU Public Licence, v2 or any higher version +# +# Please cite your use of MDAnalysis in published work: +# +# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler, +# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein. +# MDAnalysis: A Python package for the rapid analysis of molecular dynamics +# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th +# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy. +# doi: 10.25080/majora-629e541a-00e +# +# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein. +# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations. +# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 +# +""" +PDBx topology parser +==================== + + +See Also +-------- +:class:`MDAnalysis.coordinates.PDBx` + +""" +import gemmi +import numpy as np + +from .base import TopologyReaderBase, change_squash +from ..core.topology import Topology +from ..core.topologyattrs import ( + Atomnames, + Atomids, + AltLocs, + Elements, + ICodes, + RecordTypes, + Resids, + Resnames, + Segids, +) + + +class PDBxParser(TopologyReaderBase): + """Read a Topology from a PDBx file + + Creates the following attributes from these "_atom_site" PDBx loop entries + - "group_PDB" RecordType + - "id" AtomId + - "label_alt_id" AltLoc + - "label_type_symbol" Element + - "label_atom_id" AtomName + - "auth_seq_id" Resid + - "auth_comp_id" Resname + - "pdbx_PDB_ins_code" ICode + - "auth_asym_id" ChainID + """ + format = ['PDBx', 'cif'] + + def parse(self, **kwargs) -> Topology: + doc = gemmi.cif.read(self.filename) + block = doc.sole_block() + + attrs = [] + + def objarr(x): + return np.array(x, dtype=object) + + # hierarchy correspondence: + # seq_id -> residues + # entity_id -> chains + if recordtypes := block.find('_atom_site.', ['group_PDB']): + attrs.append(RecordTypes(recordtypes)) + ids = block.find_loop('_atom_site.id') + n_atoms = len(ids) + attrs.append(Atomids(ids)) + if altlocs := block.find_loop('_atom_site.label_alt_id'): + altlocs = np.array(altlocs, dtype=object) + altlocs[altlocs == '.'] = '' + attrs.append(AltLocs(altlocs)) + if elements_loop := block.find_loop('_atom_site.type_symbol'): + attrs.append(Elements(objarr(elements_loop))) + if names_loop := block.find_loop('_atom_site.label_atom_id'): + attrs.append(Atomnames(objarr(names_loop))) + + # sort out residues/segments + # label_seq_id seems to not cover entire model unlike author versions + resids = np.array(block.find_loop('_entity_poly_seq.num')) + resnames = np.array(block.find_loop('_entity_poly_seq.mon_id')) + icodes = np.array(block.find_loop('_atom_site.pdbx_PDB_ins_code')) + chainids = np.array(block.find_loop('_atom_site.auth_asym_id')) + + try: + residx, (resids, icodes, resnames, chainids) = change_squash( + (resids, icodes), (resids, icodes, resnames, chainids) + ) + segidx, (chainids,) = change_squash((chainids,), (chainids,)) + except IndexError: + ... + attrs.extend(( + Resids(resids), + Resnames(objarr(resnames)), + ICodes(objarr(icodes)), + Segids(chainids), + )) + + n_residues = len(resids) + n_segments = len(chainids) + + return Topology( + n_atoms=n_atoms, + n_res=n_residues, + n_seg=n_segments, + attrs=attrs, + atom_resindex=residx, + residue_segindex=segidx, + ) diff --git a/package/MDAnalysis/topology/__init__.py b/package/MDAnalysis/topology/__init__.py index 32df510f47e..2313c48ce4d 100644 --- a/package/MDAnalysis/topology/__init__.py +++ b/package/MDAnalysis/topology/__init__.py @@ -333,3 +333,4 @@ from . import MinimalParser from . import ITPParser from . import FHIAIMSParser +from . import PDBxParser diff --git a/package/doc/sphinx/source/documentation_pages/coordinates/PDBx.rst b/package/doc/sphinx/source/documentation_pages/coordinates/PDBx.rst new file mode 100644 index 00000000000..50c9c53bd79 --- /dev/null +++ b/package/doc/sphinx/source/documentation_pages/coordinates/PDBx.rst @@ -0,0 +1,2 @@ +.. automodule:: MDAnalysis.coordinates.PDBx + :members: diff --git a/testsuite/MDAnalysisTests/coordinates/test_cif.py b/testsuite/MDAnalysisTests/coordinates/test_cif.py new file mode 100644 index 00000000000..c54252f8901 --- /dev/null +++ b/testsuite/MDAnalysisTests/coordinates/test_cif.py @@ -0,0 +1,8 @@ +import pytest +from MDAnalysisTests.datafiles import PDBX, CIF, MMCIF + + + +def test_pdbx() + +def test_ \ No newline at end of file