From 46d174ff78d352c2dbc3ba8b420fda75ff721500 Mon Sep 17 00:00:00 2001 From: Richard Gowers Date: Fri, 29 Sep 2023 14:46:07 +0100 Subject: [PATCH 01/11] first draft of PDBx Reader --- package/MDAnalysis/coordinates/PDBx.py | 58 ++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) create mode 100644 package/MDAnalysis/coordinates/PDBx.py diff --git a/package/MDAnalysis/coordinates/PDBx.py b/package/MDAnalysis/coordinates/PDBx.py new file mode 100644 index 00000000000..ad7f29dcfec --- /dev/null +++ b/package/MDAnalysis/coordinates/PDBx.py @@ -0,0 +1,58 @@ +# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*- +# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 +# +# MDAnalysis --- https://www.mdanalysis.org +# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors +# (see the file AUTHORS for the full list of names) +# +# Released under the GNU Public Licence, v2 or any higher version +# +# Please cite your use of MDAnalysis in published work: +# +# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler, +# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein. +# MDAnalysis: A Python package for the rapid analysis of molecular dynamics +# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th +# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy. +# doi: 10.25080/majora-629e541a-00e +# +# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein. +# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations. +# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 +# + +""" + +""" +import gemmi +import numpy as np + +from . import base + + +class PDBxReader(base.SingleFrameReaderBase): + format = ['cif', 'pdbx'] + units = {'time': None, 'length': 'Angstrom'} + + def _read_first_frame(self): + doc = gemmi.cif.read(self.filename) + + block = doc.sole_block() + + coords = block.find('_atom_site.', ['Cartn_x', 'Cartn_y', 'Cartn_z']) + self.natoms = len(coords) + + xyz = np.zeros((self.natoms, 3), dtype=np.float32) + + ts = self.ts = base.Timestep.from_coordinates(xyz, **self._ts_kwargs) + ts.frame = 0 + + # todo: unit cell + + if self.convert_units: + # in-place ! + self.convert_pos_from_native(self.ts._pos) + if self.ts.dimensions is not None: + self.convert_pos_from_native(self.ts.dimensions[:3]) + + return ts From 63f3f49b96922a03c59af626dde8205b2c94c1de Mon Sep 17 00:00:00 2001 From: Richard Gowers Date: Fri, 29 Sep 2023 14:54:47 +0100 Subject: [PATCH 02/11] unpacked coords in mmcif Reader --- package/MDAnalysis/coordinates/PDBx.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/package/MDAnalysis/coordinates/PDBx.py b/package/MDAnalysis/coordinates/PDBx.py index ad7f29dcfec..5434f56c3dc 100644 --- a/package/MDAnalysis/coordinates/PDBx.py +++ b/package/MDAnalysis/coordinates/PDBx.py @@ -44,6 +44,9 @@ def _read_first_frame(self): xyz = np.zeros((self.natoms, 3), dtype=np.float32) + for i, (x, y, z) in enumerate(coords): + xyz[i, :] = x, y, z + ts = self.ts = base.Timestep.from_coordinates(xyz, **self._ts_kwargs) ts.frame = 0 From 4000fe8cf2bde326151db1dd2dac1f3fb183213a Mon Sep 17 00:00:00 2001 From: Richard Gowers Date: Fri, 29 Sep 2023 15:02:13 +0100 Subject: [PATCH 03/11] add box reading to PDBx Reader --- package/MDAnalysis/coordinates/PDBx.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/package/MDAnalysis/coordinates/PDBx.py b/package/MDAnalysis/coordinates/PDBx.py index 5434f56c3dc..d40236fd9d2 100644 --- a/package/MDAnalysis/coordinates/PDBx.py +++ b/package/MDAnalysis/coordinates/PDBx.py @@ -50,7 +50,13 @@ def _read_first_frame(self): ts = self.ts = base.Timestep.from_coordinates(xyz, **self._ts_kwargs) ts.frame = 0 - # todo: unit cell + box = block.find('_cell.', ['length_a', 'length_b', 'length_c', + 'angle_alpha', 'angle_beta', 'angle_gamma']) + if box: + unitcell = np.zeros(6, dtype=np.float64) + unitcell[:] = box[0] + + ts.dimensions = unitcell if self.convert_units: # in-place ! From ad243719bbdb4d5cd0646bbb91f89b21b93e4ac7 Mon Sep 17 00:00:00 2001 From: Richard Gowers Date: Fri, 29 Sep 2023 15:04:30 +0100 Subject: [PATCH 04/11] pdbx Reader docstring --- package/MDAnalysis/coordinates/PDBx.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/package/MDAnalysis/coordinates/PDBx.py b/package/MDAnalysis/coordinates/PDBx.py index d40236fd9d2..b14cbffa627 100644 --- a/package/MDAnalysis/coordinates/PDBx.py +++ b/package/MDAnalysis/coordinates/PDBx.py @@ -22,7 +22,14 @@ # """ +PDBx (mmcif) files in MDAnalysis --- :mod:`MDAnalysis.coordinates.PDBx` +======================================================================= +Reads coordinates from a PDBx_ (mmcif) format file. + + +.. _PDBx: + https://pdb101.rcsb.org/learn/guide-to-understanding-pdb-data/beginner’s-guide-to-pdb-structures-and-the-pdbx-mmcif-format """ import gemmi import numpy as np From 9f42c5f13acf47e837bc350c635b7a264079c99b Mon Sep 17 00:00:00 2001 From: Richard Gowers Date: Fri, 29 Sep 2023 16:35:59 +0100 Subject: [PATCH 05/11] added PDBx doc stub --- .../doc/sphinx/source/documentation_pages/coordinates/PDBx.rst | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 package/doc/sphinx/source/documentation_pages/coordinates/PDBx.rst diff --git a/package/doc/sphinx/source/documentation_pages/coordinates/PDBx.rst b/package/doc/sphinx/source/documentation_pages/coordinates/PDBx.rst new file mode 100644 index 00000000000..50c9c53bd79 --- /dev/null +++ b/package/doc/sphinx/source/documentation_pages/coordinates/PDBx.rst @@ -0,0 +1,2 @@ +.. automodule:: MDAnalysis.coordinates.PDBx + :members: From f063b14e2e39f3d654795d32b7c52962d8c0623c Mon Sep 17 00:00:00 2001 From: Richard Gowers Date: Fri, 29 Sep 2023 16:37:36 +0100 Subject: [PATCH 06/11] work on PDBx coordinates docstring --- package/MDAnalysis/coordinates/PDBx.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/package/MDAnalysis/coordinates/PDBx.py b/package/MDAnalysis/coordinates/PDBx.py index b14cbffa627..d9b4500f7bc 100644 --- a/package/MDAnalysis/coordinates/PDBx.py +++ b/package/MDAnalysis/coordinates/PDBx.py @@ -25,7 +25,8 @@ PDBx (mmcif) files in MDAnalysis --- :mod:`MDAnalysis.coordinates.PDBx` ======================================================================= -Reads coordinates from a PDBx_ (mmcif) format file. +Reads coordinates from a PDBx_ (mmcif) format file. Will populate the Universe positions from the +``_atom_site.Cartn_x`` field in the PDBx file. Will populate the unitcell dimensions from the ``_cell`` section. .. _PDBx: From 837b861fe4c20a3cd0a09a761b03887f49e05895 Mon Sep 17 00:00:00 2001 From: Richard Gowers Date: Fri, 6 Oct 2023 09:06:45 +0100 Subject: [PATCH 07/11] first pass at PDBx topology parser --- package/MDAnalysis/topology/PDBxParser.py | 122 ++++++++++++++++++++++ 1 file changed, 122 insertions(+) create mode 100644 package/MDAnalysis/topology/PDBxParser.py diff --git a/package/MDAnalysis/topology/PDBxParser.py b/package/MDAnalysis/topology/PDBxParser.py new file mode 100644 index 00000000000..a586b577af5 --- /dev/null +++ b/package/MDAnalysis/topology/PDBxParser.py @@ -0,0 +1,122 @@ +# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*- +# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 +# +# MDAnalysis --- https://www.mdanalysis.org +# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors +# (see the file AUTHORS for the full list of names) +# +# Released under the GNU Public Licence, v2 or any higher version +# +# Please cite your use of MDAnalysis in published work: +# +# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler, +# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein. +# MDAnalysis: A Python package for the rapid analysis of molecular dynamics +# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th +# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy. +# doi: 10.25080/majora-629e541a-00e +# +# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein. +# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations. +# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 +# +""" +PDBx topology parser +==================== + + +See Also +-------- +:class:`MDAnalysis.coordinates.PDBx` + +""" +import gemmi +import numpy as np + +from .base import TopologyReaderBase, change_squash +from ..core.topology import Topology +from ..core.topologyattrs import ( + Atomnames, + Atomids, + AltLocs, + Elements, + ICodes, + RecordTypes, + Resids, + Resnames, + Segids, +) + + +class PDBxParser(TopologyReaderBase): + """Read a Topology from a PDBx file + + Creates the following attributes from these "_atom_site" PDBx loop entries + - "group_PDB" RecordType + - "id" AtomId + - "label_alt_id" AltLoc + - "label_type_symbol" Element + - "label_atom_id" AtomName + - "auth_seq_id" Resid + - "auth_comp_id" Resname + - "pdbx_PDB_ins_code" ICode + - "auth_asym_id" ChainID + """ + format = ['PBDx', 'cif'] + + def parse(self, **kwargs) -> Topology: + doc = gemmi.cif.read(self.filename) + block = doc.sole_block() + + attrs = [] + + def objarr(x): + return np.array(x, dtype=object) + + # hierarchy correspondence: + # seq_id -> residues + # entity_id -> chains + if recordtypes := block.find('_atom_site.group_PDB'): + attrs.append(RecordTypes(recordtypes)) + ids = block.find_loop('_atom_site.id') + n_atoms = len(ids) + attrs.append(Atomids(ids)) + if altlocs := block.find_loop('_atom_site.label_alt_id'): + altlocs = np.array(altlocs, dtype=object) + altlocs[altlocs == '.'] = '' + attrs.append(AltLocs(altlocs)) + if elements_loop := block.find_loop('_atom_site.type_symbol'): + attrs.append(Elements(objarr(elements_loop))) + if names_loop := block.find_loop('_atom_site.label_atom_id'): + attrs.append(Atomnames(objarr(names_loop))) + + # sort out residues/segments + # label_seq_id seems to not cover entire model unlike author versions + resids = block.find_loop('_atom_site.auth_seq_id') + resnames = block.find_loop('_atom_site.auth_comp_id') + icodes = block.find_loop('_atom_site.pdbx_PDB_ins_code') + chainids = block.find_loop('_atom_site.auth_asym_id') + + residx, (resids, icodes, resnames, chainids) = change_squash( + (resids, icodes), (resids, icodes, resnames, chainids) + ) + segidx, (chainids,) = change_squash((chainids,), (chainids,)) + + attrs.extend(( + Resids(resids), + Resnames(objarr(resnames)), + ICodes(objarr(icodes)), + Segids(chainids), + )) + + n_residues = len(resids) + n_segments = len(chainids) + + return Topology( + n_atoms=n_atoms, + n_res=n_residues, + n_seg=n_segments, + attrs=attrs, + atom_resindex=residx, + residue_segindex=segidx, + ) From aeefd5680a0379a7a3855687cfd291537224482e Mon Sep 17 00:00:00 2001 From: Hugo MacDermott-Opeskin Date: Fri, 23 Aug 2024 12:27:39 +0100 Subject: [PATCH 08/11] fix import --- package/MDAnalysis/coordinates/CIF.py | 89 +++++++++++++++++++ package/MDAnalysis/coordinates/__init__.py | 1 + .../MDAnalysisTests/coordinates/test_cif.py | 8 ++ 3 files changed, 98 insertions(+) create mode 100644 package/MDAnalysis/coordinates/CIF.py create mode 100644 testsuite/MDAnalysisTests/coordinates/test_cif.py diff --git a/package/MDAnalysis/coordinates/CIF.py b/package/MDAnalysis/coordinates/CIF.py new file mode 100644 index 00000000000..b22221b1b62 --- /dev/null +++ b/package/MDAnalysis/coordinates/CIF.py @@ -0,0 +1,89 @@ + +# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*- +# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 +# +# MDAnalysis --- https://www.mdanalysis.org +# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors +# (see the file AUTHORS for the full list of names) +# +# Released under the GNU Public Licence, v2 or any higher version +# +# Please cite your use of MDAnalysis in published work: +# +# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler, +# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein. +# MDAnalysis: A Python package for the rapid analysis of molecular dynamics +# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th +# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy. +# doi: 10.25080/majora-629e541a-00e +# +# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein. +# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations. +# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 +# + +""" +PDBx (mmcif) files in MDAnalysis --- :mod:`MDAnalysis.coordinates.PDBx` +======================================================================= + +Reads coordinates from a PDBx_ (mmcif) format file. Will populate the Universe positions from the +``_atom_site.Cartn_x`` field in the PDBx file. Will populate the unitcell dimensions from the ``_cell`` section. + + +.. _PDBx: + https://pdb101.rcsb.org/learn/guide-to-understanding-pdb-data/beginner’s-guide-to-pdb-structures-and-the-pdbx-mmcif-format +""" +import gemmi +import numpy as np + +from . import base + + + + +class PDBxReader(base.SingleFrameReaderBase): + format = ['cif', 'pdbx'] + units = {'time': None, 'length': 'Angstrom'} + + def _read_first_frame(self): + doc = gemmi.cif.read(self.filename) + + block = doc.sole_block() + + coords = block.find('_atom_site.', ['Cartn_x', 'Cartn_y', 'Cartn_z']) + self.natoms = len(coords) + + xyz = np.zeros((self.natoms, 3), dtype=np.float32) + + for i, (x, y, z) in enumerate(coords): + xyz[i, :] = x, y, z + + ts = self.ts = base.Timestep.from_coordinates(xyz, **self._ts_kwargs) + ts.frame = 0 + + box = block.find('_cell.', ['length_a', 'length_b', 'length_c', + 'angle_alpha', 'angle_beta', 'angle_gamma']) + if box: + unitcell = np.zeros(6, dtype=np.float64) + unitcell[:] = box[0] + + ts.dimensions = unitcell + + if self.convert_units: + # in-place ! + self.convert_pos_from_native(self.ts._pos) + if self.ts.dimensions is not None: + self.convert_pos_from_native(self.ts.dimensions[:3]) + + + + @staticmethod + def parse_n_atoms(filename, **kwargs): + doc = gemmi.cif.read(self.filename) + block = doc.sole_block() + coords = block.find('_atom_site.', ['Cartn_x', 'Cartn_y', 'Cartn_z']) + natoms = len(coords) + del doc + return n_atoms + + diff --git a/package/MDAnalysis/coordinates/__init__.py b/package/MDAnalysis/coordinates/__init__.py index 9b6a7121bc9..ad1bc5f70f6 100644 --- a/package/MDAnalysis/coordinates/__init__.py +++ b/package/MDAnalysis/coordinates/__init__.py @@ -791,3 +791,4 @@ class can choose an appropriate reader automatically. from . import NAMDBIN from . import FHIAIMS from . import TNG +from . import PDBx diff --git a/testsuite/MDAnalysisTests/coordinates/test_cif.py b/testsuite/MDAnalysisTests/coordinates/test_cif.py new file mode 100644 index 00000000000..c54252f8901 --- /dev/null +++ b/testsuite/MDAnalysisTests/coordinates/test_cif.py @@ -0,0 +1,8 @@ +import pytest +from MDAnalysisTests.datafiles import PDBX, CIF, MMCIF + + + +def test_pdbx() + +def test_ \ No newline at end of file From 30e4ffa9924d74f5daf0c0ddb174149356fe5d8c Mon Sep 17 00:00:00 2001 From: Hugo MacDermott-Opeskin Date: Fri, 23 Aug 2024 13:42:11 +0100 Subject: [PATCH 09/11] fix n_atoms parsing --- package/MDAnalysis/coordinates/PDBx.py | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/package/MDAnalysis/coordinates/PDBx.py b/package/MDAnalysis/coordinates/PDBx.py index d9b4500f7bc..1d3308bdef3 100644 --- a/package/MDAnalysis/coordinates/PDBx.py +++ b/package/MDAnalysis/coordinates/PDBx.py @@ -42,15 +42,21 @@ class PDBxReader(base.SingleFrameReaderBase): format = ['cif', 'pdbx'] units = {'time': None, 'length': 'Angstrom'} + + # def __init__(self, filename, convert_units=True, **kwargs): + # super().__init__(filename, convert_units=convert_units, **kwargs) + # # set n_atoms + # self.natoms = self.parse_n_atoms(filename) + def _read_first_frame(self): doc = gemmi.cif.read(self.filename) block = doc.sole_block() coords = block.find('_atom_site.', ['Cartn_x', 'Cartn_y', 'Cartn_z']) - self.natoms = len(coords) + self.n_atoms = len(coords) - xyz = np.zeros((self.natoms, 3), dtype=np.float32) + xyz = np.zeros((self.n_atoms, 3), dtype=np.float32) for i, (x, y, z) in enumerate(coords): xyz[i, :] = x, y, z @@ -73,3 +79,15 @@ def _read_first_frame(self): self.convert_pos_from_native(self.ts.dimensions[:3]) return ts + + + # @staticmethod + # def parse_n_atoms(filename, **kwargs): + # with open(filename, 'r') as f: + # doc = gemmi.cif.read(filename) + + # block = doc.sole_block() + + # coords = block.find('_atom_site.', ['Cartn_x', 'Cartn_y', 'Cartn_z']) + # n_atoms = len(coords) + # return n_atoms \ No newline at end of file From b60b57bbe9fcadb76a2abe1c617ac111816f5b70 Mon Sep 17 00:00:00 2001 From: s2123329 Date: Fri, 23 Aug 2024 15:44:55 +0100 Subject: [PATCH 10/11] Add PDBx and CIF file coordinate reading --- package/MDAnalysis/coordinates/PDBx.py | 52 +++++++++++++---------- package/MDAnalysis/topology/PDBxParser.py | 2 +- 2 files changed, 31 insertions(+), 23 deletions(-) diff --git a/package/MDAnalysis/coordinates/PDBx.py b/package/MDAnalysis/coordinates/PDBx.py index 1d3308bdef3..2c163eef5a3 100644 --- a/package/MDAnalysis/coordinates/PDBx.py +++ b/package/MDAnalysis/coordinates/PDBx.py @@ -34,6 +34,7 @@ """ import gemmi import numpy as np +from gemmi import FractionalBox from . import base @@ -43,17 +44,38 @@ class PDBxReader(base.SingleFrameReaderBase): units = {'time': None, 'length': 'Angstrom'} - # def __init__(self, filename, convert_units=True, **kwargs): - # super().__init__(filename, convert_units=convert_units, **kwargs) - # # set n_atoms - # self.natoms = self.parse_n_atoms(filename) + def __init__(self, filename, convert_units=True, **kwargs): + super().__init__(filename, convert_units=convert_units, **kwargs) def _read_first_frame(self): doc = gemmi.cif.read(self.filename) block = doc.sole_block() + # PDBx/mmCIF with _cell. sections + box = block.find('_cell.', ['length_a', 'length_b', 'length_c', + 'angle_alpha', 'angle_beta', 'angle_gamma']) + # CIF file with _cell_ sections + if not box: + box = block.find('_cell_', ['length_a', 'length_b', 'length_c', + 'angle_alpha', 'angle_beta', 'angle_gamma']) + + if box: + unitcell = np.zeros(6, dtype=np.float64) + unitcell[:] = box[0] + + # PDBx/mmCIF with _cell. sections coords = block.find('_atom_site.', ['Cartn_x', 'Cartn_y', 'Cartn_z']) + fractional = lambda xyz: xyz + + # CIF file with _cell_ sections + if not coords: + coords = block.find('_atom_site_', ['fract_x', 'fract_y', 'fract_z']) + fractional = lambda xyz: np.multiply(xyz, unitcell[:3]) + + if not coords: + raise ValueError("No coordinates found in the file") + self.n_atoms = len(coords) xyz = np.zeros((self.n_atoms, 3), dtype=np.float32) @@ -61,16 +83,12 @@ def _read_first_frame(self): for i, (x, y, z) in enumerate(coords): xyz[i, :] = x, y, z + # for CIF: multiply fractional coordinates by unitcell lengths + xyz = fractional(xyz) + ts = self.ts = base.Timestep.from_coordinates(xyz, **self._ts_kwargs) ts.frame = 0 - - box = block.find('_cell.', ['length_a', 'length_b', 'length_c', - 'angle_alpha', 'angle_beta', 'angle_gamma']) - if box: - unitcell = np.zeros(6, dtype=np.float64) - unitcell[:] = box[0] - - ts.dimensions = unitcell + ts.dimensions = unitcell if self.convert_units: # in-place ! @@ -81,13 +99,3 @@ def _read_first_frame(self): return ts - # @staticmethod - # def parse_n_atoms(filename, **kwargs): - # with open(filename, 'r') as f: - # doc = gemmi.cif.read(filename) - - # block = doc.sole_block() - - # coords = block.find('_atom_site.', ['Cartn_x', 'Cartn_y', 'Cartn_z']) - # n_atoms = len(coords) - # return n_atoms \ No newline at end of file diff --git a/package/MDAnalysis/topology/PDBxParser.py b/package/MDAnalysis/topology/PDBxParser.py index a586b577af5..d3194ee6fa7 100644 --- a/package/MDAnalysis/topology/PDBxParser.py +++ b/package/MDAnalysis/topology/PDBxParser.py @@ -62,7 +62,7 @@ class PDBxParser(TopologyReaderBase): - "pdbx_PDB_ins_code" ICode - "auth_asym_id" ChainID """ - format = ['PBDx', 'cif'] + format = ['PDBx', 'cif'] def parse(self, **kwargs) -> Topology: doc = gemmi.cif.read(self.filename) From 5cdcf920770e5f0040d5bcd5b14d789563d92462 Mon Sep 17 00:00:00 2001 From: s2123329 Date: Fri, 23 Aug 2024 16:11:10 +0100 Subject: [PATCH 11/11] Add PDBx parser to guessing --- package/MDAnalysis/topology/PDBxParser.py | 18 ++++++++++-------- package/MDAnalysis/topology/__init__.py | 1 + 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/package/MDAnalysis/topology/PDBxParser.py b/package/MDAnalysis/topology/PDBxParser.py index d3194ee6fa7..8c84465692f 100644 --- a/package/MDAnalysis/topology/PDBxParser.py +++ b/package/MDAnalysis/topology/PDBxParser.py @@ -76,7 +76,7 @@ def objarr(x): # hierarchy correspondence: # seq_id -> residues # entity_id -> chains - if recordtypes := block.find('_atom_site.group_PDB'): + if recordtypes := block.find('_atom_site.', ['group_PDB']): attrs.append(RecordTypes(recordtypes)) ids = block.find_loop('_atom_site.id') n_atoms = len(ids) @@ -92,16 +92,18 @@ def objarr(x): # sort out residues/segments # label_seq_id seems to not cover entire model unlike author versions - resids = block.find_loop('_atom_site.auth_seq_id') - resnames = block.find_loop('_atom_site.auth_comp_id') - icodes = block.find_loop('_atom_site.pdbx_PDB_ins_code') - chainids = block.find_loop('_atom_site.auth_asym_id') + resids = np.array(block.find_loop('_entity_poly_seq.num')) + resnames = np.array(block.find_loop('_entity_poly_seq.mon_id')) + icodes = np.array(block.find_loop('_atom_site.pdbx_PDB_ins_code')) + chainids = np.array(block.find_loop('_atom_site.auth_asym_id')) - residx, (resids, icodes, resnames, chainids) = change_squash( + try: + residx, (resids, icodes, resnames, chainids) = change_squash( (resids, icodes), (resids, icodes, resnames, chainids) ) - segidx, (chainids,) = change_squash((chainids,), (chainids,)) - + segidx, (chainids,) = change_squash((chainids,), (chainids,)) + except IndexError: + ... attrs.extend(( Resids(resids), Resnames(objarr(resnames)), diff --git a/package/MDAnalysis/topology/__init__.py b/package/MDAnalysis/topology/__init__.py index 32df510f47e..2313c48ce4d 100644 --- a/package/MDAnalysis/topology/__init__.py +++ b/package/MDAnalysis/topology/__init__.py @@ -333,3 +333,4 @@ from . import MinimalParser from . import ITPParser from . import FHIAIMSParser +from . import PDBxParser