Skip to content

Commit

Permalink
Merge pull request #30 from martinghunt/flag_hit_both_strands
Browse files Browse the repository at this point in the history
Flag hit both strands
  • Loading branch information
bewt85 committed Jun 10, 2015
2 parents d8a21ef + 5faa9ff commit 8b5b95a
Show file tree
Hide file tree
Showing 7 changed files with 20 additions and 16 deletions.
3 changes: 2 additions & 1 deletion ariba/cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -434,8 +434,9 @@ def _fix_contig_orientation(self):
os.unlink(tmp_coords)
in_both = to_revcomp.intersection(not_revcomp)
for name in in_both:
print('WARNING: hits to both strands of gene for scaffold. Interpretation of any variants cannot be trusted', name, file=sys.stderr)
print('WARNING: hits to both strands of gene for scaffold. Interpretation of any variants cannot be trusted for this scaffold:', name, file=sys.stderr)
to_revcomp.remove(name)
self.status_flag.add('hit_both_strands')

f = pyfastaq.utils.open_file_write(self.final_assembly_fa)
seq_reader = pyfastaq.sequences.file_reader(self.gapfilled_scaffolds)
Expand Down
2 changes: 1 addition & 1 deletion ariba/common.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import sys
import subprocess

version = '0.3.2'
version = '0.4.0'

def syscall(cmd, allow_fail=False, verbose=False):
if verbose:
Expand Down
3 changes: 2 additions & 1 deletion ariba/flag.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ class Error (Exception): pass
'scaffold_graph_bad',
'assembly_fail',
'variants_suggest_collapsed_repeat',
'hit_both_strands',
]


Expand All @@ -26,7 +27,7 @@ def set_flag(self, n):
for f in self.flags:
if flag_bits[f] & n != 0:
self.flags[f] = True


def add(self, f):
self.flags[f] = True
Expand Down
8 changes: 4 additions & 4 deletions ariba/summary.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def _load_fofn(self, fofn):
filenames = [x.rstrip() for x in f.readlines()]
pyfastaq.utils.close(f)
return filenames


def _check_files_exist(self):
for fname in self.filenames:
Expand Down Expand Up @@ -119,7 +119,7 @@ def _to_summary_number(self, l):
if f.has('assembly_fail') or not f.has('gene_assembled') or self._pc_id_of_longest(l) <= self.min_id:
return 0

if not f.has('complete_orf'):
if f.has('hit_both_strands') or (not f.has('complete_orf')):
return 1

if f.has('unique_contig') and f.has('gene_assembled_into_one_contig'):
Expand Down Expand Up @@ -185,11 +185,11 @@ def _write_tsv(self):
for row in self.rows_out:
print('\t'.join([str(x) for x in row]), file=f)
pyfastaq.utils.close(f)


def _write_xls(self):
workbook = openpyxl.Workbook()
worksheet = workbook.worksheets[0]
worksheet = workbook.worksheets[0]
worksheet.title = 'ARIBA_summary'
for row in self.rows_out:
worksheet.append(row)
Expand Down
9 changes: 5 additions & 4 deletions ariba/tests/flag_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,14 @@
class TestFlag(unittest.TestCase):
def test_init_and_to_number(self):
'''Test __init__ and to_number'''
for i in range(128):
for i in range(512):
f = flag.Flag(i)
self.assertEqual(f.to_number(), i)


def test_set_flag(self):
'''Test set_flag'''
for i in range(128):
for i in range(512):
f = flag.Flag()
f.set_flag(i)
self.assertEqual(f.to_number(), i)
Expand All @@ -24,15 +24,15 @@ def test_set_flag(self):
def test_add(self):
'''Test add'''
f = flag.Flag()
expected = [1, 3, 7, 15, 31, 63, 127, 255]
expected = [1, 3, 7, 15, 31, 63, 127, 255, 511]
for i in range(len(flag.flags_in_order)):
f.add(flag.flags_in_order[i])
self.assertEqual(f.to_number(), expected[i])


def test_str(self):
'''Test __str__'''
for i in range(256):
for i in range(512):
f = flag.Flag(i)
self.assertEqual(str(f), str(i))

Expand All @@ -49,6 +49,7 @@ def test_to_long_str(self):
'[ ] scaffold_graph_bad',
'[ ] assembly_fail',
'[ ] variants_suggest_collapsed_repeat',
'[ ] hit_both_strands',
])

self.assertEqual(expected, f.to_long_string())
Expand Down
7 changes: 4 additions & 3 deletions ariba/tests/summary_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ def test_load_file(self):
]
dicts = [s._line2dict('\t'.join(x)) for x in lines]
expected = {'gene1': [dicts[0]], 'gene2': dicts[1:3], 'gene3': [dicts[3]]}
got = s._load_file(infile)
got = s._load_file(infile)
self.assertEqual(expected, got)


Expand All @@ -73,6 +73,7 @@ def test_to_summary_number(self):
(0, 0),
(64, 0),
(7, 1),
(259, 1),
(15, 2),
(27, 3),
]
Expand All @@ -97,7 +98,7 @@ def test_gather_output_rows(self):
['filename', 'gene1', 'gene2', 'gene3'],
[infiles[0], 3, 2, 0],
[infiles[1], 3, 0, 3],
]
]
self.assertEqual(expected, s.rows_out)


Expand All @@ -110,7 +111,7 @@ def test_filter_output_rows(self):
['file2', 1, 0, 3],
['file3', 2, 0, 4],
]

expected = [
['filename', 'gene1', 'gene3'],
['file2', 1, 3],
Expand Down
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,16 @@

setup(
name='ariba',
version='0.3.2',
version='0.4.0',
description='ARIBA: Antibiotic Resistance Identification By Assembly',
packages = find_packages(),
author='Martin Hunt',
author_email='[email protected]',
url='https://github.com/sanger-pathogens/ariba',
scripts=glob.glob('scripts/*'),
test_suite='nose.collector',
tests_require=['nose >= 1.3'],
install_requires=[
'nose >= 1.3',
'openpyxl',
'pyfastaq >= 3.0.1',
'pysam >= 0.8.1',
Expand Down

0 comments on commit 8b5b95a

Please sign in to comment.