Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add zone selection to support a box like selection from given selection #4324

Open
wants to merge 15 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions package/AUTHORS
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,7 @@ Chronological list of authors
- Valerij Talagayev
- Kurt McKee
- Fabian Zills
- Yun-Pei Liu



Expand Down
3 changes: 2 additions & 1 deletion package/CHANGELOG
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ The rules for this file:
??/??/?? IAlibay, HeetVekariya, marinegor, lilyminium, RMeli,
ljwoods2, aditya292002, pstaerk, PicoCentauri, BFedder,
tyler.je.reddy, SampurnaM, leonwehrhan, kainszs, orionarcher,
yuxuanzhuang, PythonFZ
yuxuanzhuang, PythonFZ, Cloudac7

* 2.8.0

Expand Down Expand Up @@ -52,6 +52,7 @@ Fixes
* Fix groups.py doctests using sphinx directives (Issue #3925, PR #4374)

Enhancements
* Add a new geometric selection: box (Issue #4323, PR #4324)
* Add `analysis.DSSP` module for protein secondary structure assignment, based on [pydssp](https://github.com/ShintaroMinami/PyDSSP)
* Added a tqdm progress bar for `MDAnalysis.analysis.pca.PCA.transform()`
(PR #4531)
Expand Down
19 changes: 19 additions & 0 deletions package/MDAnalysis/core/groups.py
Original file line number Diff line number Diff line change
Expand Up @@ -3271,6 +3271,25 @@ def select_atoms(self, sel, *othersel, periodic=True, rtol=1e-05,
radius 5, external radius 10 centered on the COG. In z, the
cylinder extends from 10 above the COG to 8 below. Positive
values for *zMin*, or negative ones for *zMax*, are allowed.
box *dimensions* *dN_min* *dN_max* [*dN_min* *dN_max*] [*dN_min* *dN_max*] *selection*
Cloudac7 marked this conversation as resolved.
Show resolved Hide resolved
Select all atoms within a box region centered
on the center of geometry (COG) of a given selection.
*dimensions* Specifies which dimension(s) to apply
the box selection on. Can be ``x``, ``y``, ``z``, ``xy``,
``yz``, ``xz``, or ``xyz`. *dN_min*, *dN_max* are the minimum
and maximum bounds along each specified dimension.
Positive values are above/right/front of the COG,
negatives are below/left/behind, and should be specified
for each dimension. *selection* specifies the selection
to center the box on. e.g. ``box x -5 10 protein``
selects a 15 Angstrom box along x centered
on the COG of protein, extending 5 Angstroms
below to 10 Angstroms above. ``box yz -8 10 -10 6 protein``
selects a box with y extending 8 below to 10 above the COG,
and z extending 10 below to 6 above.
``box xyz -5 10 -8 6 -7 9 protein`` selects
a 3D box with x -5 to 10, y -8 to 6, and z -7 to 9 relative
to the protein COG.

**Connectivity**

Expand Down
90 changes: 90 additions & 0 deletions package/MDAnalysis/core/selection.py
Original file line number Diff line number Diff line change
Expand Up @@ -537,6 +537,96 @@ def _apply(self, group):
return group[np.asarray(indices, dtype=np.int64)]


class BoxSelection(Selection):
token = "box"
precedence = 1
axis_map = ["x", "y", "z"]
axis_set = {"x", "y", "z", "xy", "xz", "yz", "xyz"}

def __init__(self, parser, tokens):
super().__init__(parser, tokens)
self.periodic = parser.periodic
self.direction = tokens.popleft()
self.xmin, self.xmax = None, None
self.ymin, self.ymax = None, None
self.zmin, self.zmax = None, None

if self.direction in self.axis_set:
for d in self.direction:
if d == "x":
self.xmin = float(tokens.popleft())
self.xmax = float(tokens.popleft())
if self.xmin > self.xmax:
raise ValueError("xmin must be less than or equal to xmax")
elif d == "y":
self.ymin = float(tokens.popleft())
self.ymax = float(tokens.popleft())
if self.ymin > self.ymax:
raise ValueError("ymin must be less than or equal to ymax")
elif d == "z":
self.zmin = float(tokens.popleft())
self.zmax = float(tokens.popleft())
if self.zmin > self.zmax:
raise ValueError("zmin must be less than or equal to zmax")
else:
raise ValueError(
"The direction '{}' is not valid. "
"Must be one of {}"
"".format(self.direction, ", ".join(self.axis_set))
)

self.sel = parser.parse_expression(self.precedence)

@return_empty_on_apply
def _apply(self, group):
sel = self.sel.apply(group)
if len(sel) == 0:
return group[[]]
# Calculate vectors between point of interest and our group
vecs = group.positions - sel.center_of_geometry()
range_map = {
0: (self.xmin, self.xmax),
1: (self.ymin, self.ymax),
2: (self.zmin, self.zmax),
}

if self.periodic and group.dimensions is not None:
box = group.dimensions[:3]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure this is correct for triclinic boxes, I think instead you want the trace of the 3x3 vector representation

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In fact, I'm not sure if the check necessary. In my opinion, it might be nature to select a zone a little larger than the unit cell, yielding all of the atoms in the unit cell on the corresponding direction. Although it would be misleading because of PBC.


for idx, limits in range_map.items():
axis_index = idx
axis_min, axis_max = limits[0], limits[1]
if axis_min is None or axis_max is None:
continue
axis_height = axis_max - axis_min
if axis_height > box[axis_index]:
raise NotImplementedError(
"The total length of the box selection in {} ({:.3f}) "
"is larger than the unit cell's {} dimension ({:.3f}). "
"Can only do selections where it is smaller or equal."
"".format(
self.axis_map[axis_index],
axis_height,
self.axis_map[axis_index],
box[axis_index],
)
)

vecs = distances.minimize_vectors(vecs, group.dimensions)

# Deal with each dimension criteria
mask = None
for idx, limits in range_map.items():
if limits[0] is None or limits[1] is None:
continue
if mask is None:
mask = (vecs[:, idx] > limits[0]) & (vecs[:, idx] < limits[1])
else:
mask &= (vecs[:, idx] > limits[0]) & (vecs[:, idx] < limits[1])

return group[mask]


class AtomSelection(Selection):
token = 'atom'

Expand Down
1 change: 1 addition & 0 deletions package/doc/sphinx/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import sys
import os
import datetime

import MDAnalysis as mda
# Custom MDA Formating
from pybtex.style.formatting.unsrt import Style as UnsrtStyle
Expand Down
16 changes: 16 additions & 0 deletions package/doc/sphinx/source/documentation_pages/selections.rst
Original file line number Diff line number Diff line change
Expand Up @@ -277,6 +277,22 @@ cyzone *externalRadius* *zMax* *zMin* *selection*
relative to the COG of *selection*, instead of absolute z-values
in the box.

box *dimensions* *dN_min* *dN_max* *selection*
Select all atoms within a box region centered on the center of
geometry (COG) of a given selection. *dimensions* Specifies
which dimension(s) to apply the box selection on.
Can be ``x``, ``y``, ``z``, ``xy``, ``yz``, ``xz``, or ``xyz`.
*dN_min*, *dN_max* are the minimum and maximum
bounds along the first specified dimension. Positive values are
above/right/front of the COG, negatives are below/left/behind.
Should be specified for each dimension. *selection* specifies the selection
to center the box on. e.g. ``box x -5 10 protein`` selects a 15 Angstrom
box along x centered on the COG of protein, extending 5 Angstroms below to
10 Angstroms above. ``box yz -8 10 -10 6 protein`` selects a box with
y extending 8 below to 10 above the COG, and z extending 10 below to 6 above.
``box xyz -5 10 -8 6 -7 9 protein`` selects a 3D box with x -5 to 10,
y -8 to 6, and z -7 to 9 relative to the protein COG.

point *x* *y* *z* *distance*
selects all atoms within a cutoff of a point in space, make sure
coordinate is separated by spaces, e.g. ``point 5.0 5.0 5.0 3.5``
Expand Down
54 changes: 54 additions & 0 deletions testsuite/MDAnalysisTests/core/test_atomselections.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,22 @@ def test_point(self, universe):

assert_equal(set(ag.indices), set(idx))

@pytest.mark.parametrize(
"selstr, expected_value",
[
("box x -2.0 2.0 index 1281", 418),
("box yz -2.0 2.0 -2.0 2.0 index 1280", 58),
("box xyz -2.0 2.0 -2.0 2.0 -2.0 2.0 index 1279", 10),
],
)
def test_box(self, universe, selstr, expected_value):
sel = universe.select_atoms(selstr)
assert_equal(len(sel), expected_value)

def test_empty_box(self, universe):
empty = universe.select_atoms("box y 10 -10 name NOT_A_NAME")
assert_equal(len(empty), 0)

def test_prop(self, universe):
sel = universe.select_atoms('prop y <= 16')
sel2 = universe.select_atoms('prop abs z < 8')
Expand Down Expand Up @@ -802,6 +818,34 @@ def test_sphzone(self, u, periodic, expected):

assert len(sel) == expected

@pytest.mark.parametrize("periodic,expected", ([True, 29], [False, 17]))
def test_box(self, u, periodic, expected):
sel = u.select_atoms("box xyz 2 5 -5 10 -2 6 resid 1", periodic=periodic)

assert len(sel) == expected

@pytest.mark.parametrize(
"selection,error,expected",
(
[
"box xyz -5 10 -90 90 -2 6 resid 1",
NotImplementedError,
"The total length of the box selection in y",
],
[
"box yyy -5 10 -7 7 -2 6 resid 1",
SelectionError,
"Must be combination of",
],
["box a 10 -5 resid 1", SelectionError, "Must be combination of"],
),
)
def test_box_error(self, u, selection, error, expected):
with pytest.raises(error) as excinfo:
u.select_atoms(selection)
exec_msg = str(excinfo.value)
assert expected in exec_msg


class TestTriclinicDistanceSelections(BaseDistanceSelection):
@pytest.fixture()
Expand Down Expand Up @@ -865,6 +909,14 @@ def test_empty_sphzone(self, u):
empty = u.select_atoms('sphzone 5.0 name NOT_A_NAME')
assert len(empty) == 0

def test_box(self, u):
ag = u.select_atoms("box z -2.5 2.5 resid 1")
assert len(ag) == 4237

def test_empty_box(self, u):
ag = u.select_atoms("box z -2.5 2.5 name NOT_A_NAME")
assert len(ag) == 0

def test_point_1(self, u):
# The example selection
ag = u.select_atoms('point 5.0 5.0 5.0 3.5')
Expand Down Expand Up @@ -1274,6 +1326,7 @@ def test_similarity_selection_icodes(u_pdb_icodes, selection, n_atoms):

assert len(sel.atoms) == n_atoms


@pytest.mark.parametrize('selection', [
'all', 'protein', 'backbone', 'nucleic', 'nucleicbackbone',
'name O', 'name N*', 'resname stuff', 'resname ALA', 'type O',
Expand All @@ -1283,6 +1336,7 @@ def test_similarity_selection_icodes(u_pdb_icodes, selection, n_atoms):
'sphlayer 0 10 index 1', 'cyzone 15 4 -8 index 0',
'cylayer 5 10 10 -8 index 1', 'prop abs z <= 100',
'byres index 0', 'same resid as index 0',
'box xz 3 2 4 -5 index 0',
])
def test_selections_on_empty_group(u_pdb_icodes, selection):
ag = u_pdb_icodes.atoms[[]].select_atoms(selection)
Expand Down
Loading