Merge pull request #30 from martinghunt/flag_hit_both_strands

Flag hit both strands
sanger-pathogens · Jun 10, 2015 · 8b5b95a · 8b5b95a
2 parents d8a21ef + 5faa9ff
commit 8b5b95a
Show file tree

Hide file tree

Showing 7 changed files with 20 additions and 16 deletions.
diff --git a/ariba/cluster.py b/ariba/cluster.py
@@ -434,8 +434,9 @@ def _fix_contig_orientation(self):
         os.unlink(tmp_coords)
         in_both = to_revcomp.intersection(not_revcomp)
         for name in in_both:
-            print('WARNING: hits to both strands of gene for scaffold. Interpretation of any variants cannot be trusted', name, file=sys.stderr)
+            print('WARNING: hits to both strands of gene for scaffold. Interpretation of any variants cannot be trusted for this scaffold:', name, file=sys.stderr)
             to_revcomp.remove(name)
+            self.status_flag.add('hit_both_strands')
 
         f = pyfastaq.utils.open_file_write(self.final_assembly_fa)
         seq_reader = pyfastaq.sequences.file_reader(self.gapfilled_scaffolds)

diff --git a/ariba/common.py b/ariba/common.py
@@ -1,7 +1,7 @@
 import sys
 import subprocess
 
-version = '0.3.2'
+version = '0.4.0'
 
 def syscall(cmd, allow_fail=False, verbose=False):
     if verbose:

diff --git a/ariba/flag.py b/ariba/flag.py
@@ -10,6 +10,7 @@ class Error (Exception): pass
     'scaffold_graph_bad',
     'assembly_fail',
     'variants_suggest_collapsed_repeat',
+    'hit_both_strands',
 ]
 
 
@@ -26,7 +27,7 @@ def set_flag(self, n):
         for f in self.flags:
             if flag_bits[f] & n != 0:
                 self.flags[f] = True
-                
+
 
     def add(self, f):
         self.flags[f] = True

diff --git a/ariba/summary.py b/ariba/summary.py
@@ -70,7 +70,7 @@ def _load_fofn(self, fofn):
         filenames = [x.rstrip() for x in f.readlines()]
         pyfastaq.utils.close(f)
         return filenames
-    
+
 
     def _check_files_exist(self):
         for fname in self.filenames:
@@ -119,7 +119,7 @@ def _to_summary_number(self, l):
         if f.has('assembly_fail') or not f.has('gene_assembled') or self._pc_id_of_longest(l) <= self.min_id:
             return 0
 
-        if not f.has('complete_orf'):
+        if f.has('hit_both_strands') or (not f.has('complete_orf')):
             return 1
 
         if f.has('unique_contig') and f.has('gene_assembled_into_one_contig'):
@@ -185,11 +185,11 @@ def _write_tsv(self):
         for row in self.rows_out:
             print('\t'.join([str(x) for x in row]), file=f)
         pyfastaq.utils.close(f)
-        
+
 
     def _write_xls(self):
         workbook = openpyxl.Workbook()
-        worksheet = workbook.worksheets[0] 
+        worksheet = workbook.worksheets[0]
         worksheet.title = 'ARIBA_summary'
         for row in self.rows_out:
             worksheet.append(row)

diff --git a/ariba/tests/flag_test.py b/ariba/tests/flag_test.py
@@ -8,14 +8,14 @@
 class TestFlag(unittest.TestCase):
     def test_init_and_to_number(self):
         '''Test __init__ and to_number'''
-        for i in range(128):
+        for i in range(512):
             f = flag.Flag(i)
             self.assertEqual(f.to_number(), i)
 
 
     def test_set_flag(self):
         '''Test set_flag'''
-        for i in range(128):
+        for i in range(512):
             f = flag.Flag()
             f.set_flag(i)
             self.assertEqual(f.to_number(), i)
@@ -24,15 +24,15 @@ def test_set_flag(self):
     def test_add(self):
         '''Test add'''
         f = flag.Flag()
-        expected = [1, 3, 7, 15, 31, 63, 127, 255]
+        expected = [1, 3, 7, 15, 31, 63, 127, 255, 511]
         for i in range(len(flag.flags_in_order)):
             f.add(flag.flags_in_order[i])
             self.assertEqual(f.to_number(), expected[i])
 
 
     def test_str(self):
         '''Test __str__'''
-        for i in range(256):
+        for i in range(512):
             f = flag.Flag(i)
             self.assertEqual(str(f), str(i))
 
@@ -49,6 +49,7 @@ def test_to_long_str(self):
             '[ ] scaffold_graph_bad',
             '[ ] assembly_fail',
             '[ ] variants_suggest_collapsed_repeat',
+            '[ ] hit_both_strands',
         ])
 
         self.assertEqual(expected, f.to_long_string())

diff --git a/ariba/tests/summary_test.py b/ariba/tests/summary_test.py
@@ -62,7 +62,7 @@ def test_load_file(self):
 ]
         dicts = [s._line2dict('\t'.join(x)) for x in lines]
         expected = {'gene1': [dicts[0]], 'gene2': dicts[1:3], 'gene3': [dicts[3]]}
-        got = s._load_file(infile) 
+        got = s._load_file(infile)
         self.assertEqual(expected, got)
 
 
@@ -73,6 +73,7 @@ def test_to_summary_number(self):
             (0, 0),
             (64, 0),
             (7, 1),
+            (259, 1),
             (15, 2),
             (27, 3),
         ]
@@ -97,7 +98,7 @@ def test_gather_output_rows(self):
             ['filename', 'gene1', 'gene2', 'gene3'],
             [infiles[0], 3, 2, 0],
             [infiles[1], 3, 0, 3],
-        ]    
+        ]
         self.assertEqual(expected, s.rows_out)
 
 
@@ -110,7 +111,7 @@ def test_filter_output_rows(self):
             ['file2', 1, 0, 3],
             ['file3', 2, 0, 4],
         ]
-        
+
         expected = [
             ['filename', 'gene1', 'gene3'],
             ['file2', 1, 3],

diff --git a/setup.py b/setup.py
@@ -7,16 +7,16 @@
 
 setup(
     name='ariba',
-    version='0.3.2',
+    version='0.4.0',
     description='ARIBA: Antibiotic Resistance Identification By Assembly',
     packages = find_packages(),
     author='Martin Hunt',
     author_email='[email protected]',
     url='https://github.com/sanger-pathogens/ariba',
     scripts=glob.glob('scripts/*'),
     test_suite='nose.collector',
+    tests_require=['nose >= 1.3'],
     install_requires=[
-        'nose >= 1.3',
         'openpyxl',
         'pyfastaq >= 3.0.1',
         'pysam >= 0.8.1',