Skip to content

Commit

Permalink
Merge pull request #389 from nickjcroucher/add_seed
Browse files Browse the repository at this point in the history
Enable seed specification
  • Loading branch information
nickjcroucher authored Nov 30, 2023
2 parents 6d5f0af + b8dbdb4 commit 89a1e55
Show file tree
Hide file tree
Showing 7 changed files with 96 additions and 19 deletions.
13 changes: 12 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ autoreconf -i
make
[sudo] make install
cd python
[sudo] python3 -m pip install .
[sudo] python3 -m pip install [--prefix=$PREFIX] .
```
Use `sudo` to install Gubbins system-wide. If you don't have the permissions, run `configure` with a prefix to install Gubbins in your home directory.

Expand Down Expand Up @@ -132,6 +132,17 @@ Gubbins is free software, licensed under [GPLv2](https://github.com/nickjcrouche
## Feedback/Issues
There is no specific support for development or maintenance of Gubbins. However, we will try to help you out if you report any issues about usage of the software to the [issues page](https://github.com/nickjcroucher/gubbins/issues).

## Development plan
Version 3 incorporates a number of features that were explicitly requested by users (e.g. plotting functions), improved the algorithm's accuracy (e.g. using joint ancestral reconstruction) and were commonly used in published analyses (e.g. using IQTREE2 for phylogeny construction).

Future development will prioritise:
- More efficient phylogenetic processing with modern python libraries
- Parallelisation of recombination searches
- Faster sequence reconstruction through hardware acceleration
- Extension of existing analyses using phylogenetic placement

If you believe there are other improvements that could be added, please describe them on the [issues page](https://github.com/nickjcroucher/gubbins/issues) and tag the suggestion as an "enhancement".

## Citation
If you use this software please cite:
[Croucher N. J., Page A. J., Connor T. R., Delaney A. J., Keane J. A., Bentley S. D., Parkhill J., Harris S.R.
Expand Down
8 changes: 4 additions & 4 deletions python/gubbins/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -744,13 +744,13 @@ def return_algorithm_choices(args,i):
def return_algorithm(algorithm_choice, model, input_args, node_labels = None, extra = None):
initialised_algorithm = None
if algorithm_choice == "fasttree":
initialised_algorithm = FastTree(threads = input_args.threads, model = model, bootstrap = input_args.bootstrap, verbose = input_args.verbose, additional_args = extra)
initialised_algorithm = FastTree(threads = input_args.threads, model = model, seed = input_args.seed, bootstrap = input_args.bootstrap, verbose = input_args.verbose, additional_args = extra)
elif algorithm_choice == "raxml":
initialised_algorithm = RAxML(threads = input_args.threads, model = model, bootstrap = input_args.bootstrap, internal_node_prefix = node_labels, verbose = input_args.verbose, additional_args = extra)
initialised_algorithm = RAxML(threads = input_args.threads, model = model, seed = input_args.seed, bootstrap = input_args.bootstrap, internal_node_prefix = node_labels, verbose = input_args.verbose, additional_args = extra)
elif algorithm_choice == "raxmlng":
initialised_algorithm = RAxMLNG(threads = input_args.threads, model = model, bootstrap = input_args.bootstrap, internal_node_prefix = node_labels, verbose = input_args.verbose, additional_args = extra)
initialised_algorithm = RAxMLNG(threads = input_args.threads, model = model, seed = input_args.seed, bootstrap = input_args.bootstrap, internal_node_prefix = node_labels, verbose = input_args.verbose, additional_args = extra)
elif algorithm_choice == "iqtree":
initialised_algorithm = IQTree(threads = input_args.threads, model = model, bootstrap = input_args.bootstrap, internal_node_prefix = node_labels, verbose = input_args.verbose, use_best = (model is None and input_args.best_model), additional_args = extra)
initialised_algorithm = IQTree(threads = input_args.threads, model = model, seed = input_args.seed, bootstrap = input_args.bootstrap, internal_node_prefix = node_labels, verbose = input_args.verbose, use_best = (model is None and input_args.best_model), additional_args = extra)
elif algorithm_choice == "rapidnj":
initialised_algorithm = RapidNJ(threads = input_args.threads, model = model, bootstrap = input_args.bootstrap, verbose = input_args.verbose, additional_args = extra)
elif algorithm_choice == "star":
Expand Down
2 changes: 2 additions & 0 deletions python/gubbins/run_gubbins.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,8 @@ def parse_input_args():
default = False, action = 'store_true')
treeGroup.add_argument('--sh-test', help='Perform an SH test of node likelihoods', default = False,
action = 'store_true')
treeGroup.add_argument('--seed', help='Set seed for reproducibility of analysis',
default = None, type = int)

modelGroup = parser.add_argument_group('Nucleotide substitution model options')
modelGroup.add_argument('--model', '-M', help='Nucleotide substitution model (not all available for all '
Expand Down
49 changes: 49 additions & 0 deletions python/gubbins/tests/test_dependencies.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,18 @@ def test_fasttree(self):
self.cleanup('multiple_recombinations')
assert exit_code == 0

def test_fasttree_seed(self):
exit_code = 1
parser = run_gubbins.parse_input_args()
common.parse_and_run(parser.parse_args(["--tree-builder", "fasttree",
"--verbose", "--iterations", "3",
"--seed","42",
"--threads", "1",
os.path.join(data_dir, 'multiple_recombinations.aln')]))
exit_code = self.check_for_output_files('multiple_recombinations')
self.cleanup('multiple_recombinations')
assert exit_code == 0

# Test resuming a default analysis
def test_fasttree_resume(self):
exit_code = 1
Expand Down Expand Up @@ -122,6 +134,18 @@ def test_iqtree(self):
self.cleanup('multiple_recombinations')
assert exit_code == 0

def test_iqtree_seed(self):
exit_code = 1
parser = run_gubbins.parse_input_args()
common.parse_and_run(parser.parse_args(["--tree-builder", "iqtree",
"--verbose", "--iterations", "3",
"--seed","42",
"--threads", "1",
os.path.join(data_dir, 'multiple_recombinations.aln')]))
exit_code = self.check_for_output_files('multiple_recombinations')
self.cleanup('multiple_recombinations')
assert exit_code == 0

def test_iqtree_custom_model(self):
exit_code = 1
parser = run_gubbins.parse_input_args()
Expand Down Expand Up @@ -246,6 +270,18 @@ def test_raxml(self):
self.cleanup('multiple_recombinations')
assert exit_code == 0

def test_raxml_seed(self):
exit_code = 1
parser = run_gubbins.parse_input_args()
common.parse_and_run(parser.parse_args(["--tree-builder", "raxml",
"--verbose", "--iterations", "3",
"--seed","42",
"--threads", "1",
os.path.join(data_dir, 'multiple_recombinations.aln')]))
exit_code = self.check_for_output_files('multiple_recombinations')
self.cleanup('multiple_recombinations')
assert exit_code == 0

def test_raxml_custom_model(self):
exit_code = 1
parser = run_gubbins.parse_input_args()
Expand Down Expand Up @@ -301,6 +337,19 @@ def test_raxmlng(self):
self.cleanup('multiple_recombinations')
assert exit_code == 0

def test_raxmlng_seed(self):
exit_code = 1
parser = run_gubbins.parse_input_args()
common.parse_and_run(parser.parse_args(["--tree-builder", "raxmlng",
"--model","GTR",
"--verbose", "--iterations", "3",
"--seed","42",
"--threads", "1",
os.path.join(data_dir, 'multiple_recombinations.aln')]))
exit_code = self.check_for_output_files('multiple_recombinations')
self.cleanup('multiple_recombinations')
assert exit_code == 0

def test_raxmlng_custom_model(self):
exit_code = 1
parser = run_gubbins.parse_input_args()
Expand Down
6 changes: 6 additions & 0 deletions python/gubbins/tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,3 +107,9 @@ def test_verbose_printer(self):
printer.print(["AAA", "BBB"])
printed = f.getvalue()
assert printed == "AAA-BBB\n"

def test_seed(self):
set_seed_val = utils.set_seed(42)
assert set_seed_val == "42"
random_seed_val = utils.set_seed(None)
assert(int(random_seed_val) < 10001)
28 changes: 14 additions & 14 deletions python/gubbins/treebuilders.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
import sys
import os
import subprocess
from random import randint

from Bio import SeqIO

Expand Down Expand Up @@ -129,7 +128,7 @@ def bootstrapping_command(self, alignment_filename: str, input_tree: str, basena
class FastTree:
"""Class for operations with the FastTree executable"""

def __init__(self, threads: int, bootstrap = 0, model='GTRCAT', verbose=False, additional_args = None):
def __init__(self, threads: int, bootstrap = 0, model='GTRCAT', seed = None, verbose=False, additional_args = None):
"""Initialises the object"""
self.verbose = verbose
self.threads = threads
Expand All @@ -139,9 +138,10 @@ def __init__(self, threads: int, bootstrap = 0, model='GTRCAT', verbose=False, a
self.alignment_suffix = ".snp_sites.aln"
self.bootstrap = bootstrap
self.additional_args = additional_args
self.seed = utils.set_seed(seed)

# Identify executable
self.potential_executables = ["FastTree", "fasttree"]
self.potential_executables = ["FastTreeMP","fasttreeMP","FastTree", "fasttree"]
self.executable = utils.choose_executable(self.potential_executables)
if self.executable is None:
sys.exit("No usable version of FastTree could be found.")
Expand All @@ -164,6 +164,7 @@ def __init__(self, threads: int, bootstrap = 0, model='GTRCAT', verbose=False, a
command.extend(["-gtr"])
else:
command.extend([self.model])
command.extend(["-seed",self.seed])
# Additional arguments
if self.additional_args is not None:
command.extend([self.additional_args])
Expand Down Expand Up @@ -256,7 +257,7 @@ def get_bootstrapped_trees_file(self, tmp: str, basename: str) -> str:
class IQTree:
"""Class for operations with the IQTree executable"""

def __init__(self, threads: 1, model: str, bootstrap = 0, internal_node_prefix="", verbose=False, use_best=False, additional_args = None):
def __init__(self, threads: 1, model: str, bootstrap = 0, seed = None, internal_node_prefix="", verbose=False, use_best=False, additional_args = None):
"""Initialises the object"""
self.verbose = verbose
self.threads = threads
Expand All @@ -271,6 +272,7 @@ def __init__(self, threads: 1, model: str, bootstrap = 0, internal_node_prefix="
self.internal_node_prefix = internal_node_prefix
self.bootstrap = bootstrap
self.use_best = use_best
self.seed = utils.set_seed(seed)
self.additional_args = additional_args

# Construct base command
Expand Down Expand Up @@ -303,6 +305,7 @@ def __init__(self, threads: 1, model: str, bootstrap = 0, internal_node_prefix="
command.extend(["-m","GTR+G4"])
else:
command.extend(["-m",self.model])
command.extend(["-seed",self.seed])
# Additional arguments
if self.additional_args is not None:
command.extend([self.additional_args])
Expand Down Expand Up @@ -432,7 +435,7 @@ def run_model_comparison(self, alignment_filename: str, basename: str) -> str:
class RAxML:
"""Class for operations with the RAxML executable"""

def __init__(self, threads: 1, model='GTRCAT', bootstrap = 0, internal_node_prefix="", verbose=False, additional_args = None):
def __init__(self, threads: 1, model='GTRCAT', bootstrap = 0, seed = None, internal_node_prefix="", verbose=False, additional_args = None):
"""Initialises the object"""
self.verbose = verbose
self.threads = threads
Expand All @@ -446,6 +449,7 @@ def __init__(self, threads: 1, model='GTRCAT', bootstrap = 0, internal_node_pref
self.alignment_suffix = ".phylip"
self.internal_node_prefix = internal_node_prefix
self.bootstrap = bootstrap
self.seed = utils.set_seed(seed)
self.additional_args = additional_args

self.single_threaded_executables = ['raxmlHPC-AVX2', 'raxmlHPC-AVX', 'raxmlHPC-SSE3', 'raxmlHPC']
Expand All @@ -465,9 +469,6 @@ def __init__(self, threads: 1, model='GTRCAT', bootstrap = 0, internal_node_pref
if self.threads > 1:
command.extend(["-T", str(self.threads)])

# Set a seed
command.extend(["-p",str(randint(0, 10000))])

# Add flags
command.extend(["-safe"])
if self.model == 'JC':
Expand All @@ -482,6 +483,7 @@ def __init__(self, threads: 1, model='GTRCAT', bootstrap = 0, internal_node_pref
command.extend(["-m","GTRGAMMA"])
else:
command.extend(["-m", self.model])
command.extend(["-p",self.seed])
# Additional arguments
if self.additional_args is not None:
command.extend([self.additional_args])
Expand Down Expand Up @@ -579,9 +581,7 @@ def bootstrapping_command(self, alignment_filename: str, input_tree: str, basena
command = self.base_command.copy()
command.extend(["-s", alignment_filename, "-n", basename + ".bootstrapped_trees"])
command.extend(["-w",tmp])
p_seed = str(randint(0, 10000))
command.extend(["-p",p_seed])
command.extend(["-x",p_seed])
command.extend(["-x",self.seed])
command.extend(["-#",str(self.bootstrap)])
# Output
if not self.verbose:
Expand All @@ -592,8 +592,6 @@ def bootstrapping_command(self, alignment_filename: str, input_tree: str, basena
def sh_test(self, alignment_filename: str, input_tree: str, basename: str, tmp: str) -> str:
"""Runs a single branch support test"""
command = self.base_command.copy()
p_seed = str(randint(0, 10000))
command.extend(["-p",p_seed])
command.extend(["-f", "J"])
command.extend(["-s", alignment_filename, "-n", input_tree + ".sh_support"])
command.extend(["-t", input_tree])
Expand All @@ -610,7 +608,7 @@ def get_bootstrapped_trees_file(self, tmp: str, basename: str) -> str:
class RAxMLNG:
"""Class for operations with the RAxML executable"""

def __init__(self, threads: 1, model: str, bootstrap = 0, internal_node_prefix = "", verbose = False, additional_args = None):
def __init__(self, threads: 1, model: str, bootstrap = 0, seed = None, internal_node_prefix = "", verbose = False, additional_args = None):
"""Initialises the object"""
self.verbose = verbose
self.threads = threads
Expand All @@ -624,6 +622,7 @@ def __init__(self, threads: 1, model: str, bootstrap = 0, internal_node_prefix =
self.alignment_suffix = ".phylip"
self.internal_node_prefix = internal_node_prefix
self.bootstrap = bootstrap
self.seed = utils.set_seed(seed)
self.additional_args = additional_args

self.single_threaded_executables = ['raxml-ng']
Expand Down Expand Up @@ -655,6 +654,7 @@ def __init__(self, threads: 1, model: str, bootstrap = 0, internal_node_prefix =
command.extend(["GTR+G"])
else:
command.extend([self.model])
command.extend(["--seed",self.seed])
# Additional arguments
if self.additional_args is not None:
command.extend([self.additional_args])
Expand Down
9 changes: 9 additions & 0 deletions python/gubbins/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import re
import numpy as np
import collections
from random import randint
try:
from multiprocessing.managers import SharedMemoryManager
NumpyShared = collections.namedtuple('NumpyShared', ('name', 'shape', 'dtype'))
Expand Down Expand Up @@ -195,3 +196,11 @@ def extend_args(var,add):
var.extend([add])
var = " ".join(var)
return var

def set_seed(seed):
"""Set seed when specified"""
if seed is None:
seed = str(randint(0, 10000))
else:
seed = str(seed)
return seed

0 comments on commit 89a1e55

Please sign in to comment.