Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Tests #95

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
140 changes: 140 additions & 0 deletions bin/primers_to_amplicons.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
#!/usr/bin/env python
'''Script to create amplicon bed file from primer bed file with minimal dependencies'''

import argparse
import csv
import re
from collections import defaultdict
from typing import Generator

def init_parser() -> argparse.ArgumentParser:
"""
Specify command line arguments
Returns command line parser with inputs
"""
parser = argparse.ArgumentParser()
parser.add_argument(
'-b',
'--bed',
required=True,
type=str,
help='Path to input primer bed file'
)
parser.add_argument(
'-o',
'--outfile',
required=False,
default='amplicon.bed',
type=str,
help='Name for output amplicon bed file. Default: "amplicon.bed"'
)
return parser

def primer_pair_generator(bed: str, primer_rgx=r'^(.*)_(LEFT|RIGHT).*$') -> Generator[str, list, list]:
'''
Generate and yield primer pairs based on the primer name
Primers are paired through the primer_rgx which requires the names to contain:
_LEFT or _RIGHT
'''
pair_dict = defaultdict(lambda: [None, None])
with open(bed, 'r') as handle:
reader = csv.reader(handle, delimiter='\t')
for row in reader:
# Bed file needs at least 5 rows (chrom, start, stop, name, pool)
if len(row) < 5:
continue
name = str(row[3])

# Pair based on if LEFT or RIGHT match and yield upon a pair
primer_match = re.search(primer_rgx, name)
if primer_match:
if primer_match.group(2) == 'LEFT':
primer_prefix = primer_match.group(1)
if primer_prefix not in pair_dict:
pair_dict[primer_prefix][0] = row
# In case of alt primers, don't want to yield yet
elif pair_dict[primer_prefix][0] != None:
print(f'Skipping {name} - already has a primer in dict')
continue
else:
yield primer_prefix, pair_dict[primer_prefix][0], row
del pair_dict[primer_prefix]
elif primer_match.group(2) == 'RIGHT':
primer_prefix = primer_match.group(1)
if primer_prefix not in pair_dict:
pair_dict[primer_prefix][1] = row
# In case of alt primers, don't want to yield yet
elif pair_dict[primer_prefix][1] != None:
print(f'Skipping {name} - already has a primer in dict')
continue
else:
yield primer_prefix, pair_dict[primer_prefix][0], row
del pair_dict[primer_prefix]

def find_primer_prefix(primer_names: list) -> str:
'''
'''
prefix = ""

# Iterate through characters and check that they all match
for i in range(len(primer_names[0])):
if all(s[i] == primer_names[0][i] for s in primer_names):
prefix += primer_names[0][i]
else:
break

# Don't end with an '_'
if prefix.endswith('_'):
prefix = prefix[0:-1]

return prefix


def main() -> None:
'''Run the program'''
# Init Parser and set arguments
parser = init_parser()
args = parser.parse_args()

# Use variables to find the starting and end position of the scheme
scheme_start = 1000000000
scheme_end = 0
primer_names = []

# Write amplicon bed file
with open(args.outfile, 'w') as f:
for ppair in primer_pair_generator(args.bed):
# ppair = [ primer_name, [primer-left] , [primer-right] ]
# Check starts and stops to make sure that start < stop and if not switches them
start_l, stop_l, start_r, stop_r = int(ppair[1][1]), int(ppair[1][2]), int(ppair[2][1]), int(ppair[2][2])
if start_l > stop_l:
start_l, stop_l = stop_l, start_l
if start_r > stop_r:
start_r, stop_r = stop_r, start_r

# Track overall genome start and end
if scheme_start > stop_l:
scheme_start = stop_l
if scheme_end < start_r:
scheme_end = start_r

# Track primer names
primer_names.append(ppair[0])

# Write output
chrom = ppair[1][0]
pname = ppair[0]
pool = ppair[1][4]
f.write(f'{chrom}\t{stop_l}\t{start_r}\t{pname}\t{pool}\t+\n')

# Write the region tiled to file in case it is needed
with open(f'tiling_region.bed', 'w') as f:
f.write(f'{chrom}\t{scheme_start}\t{scheme_end}\n')

# Get prefix for the primers in case it is needed
primer_prefix = find_primer_prefix(primer_names)
with open(f'primer_prefix.txt', 'w') as f:
f.write(primer_prefix)

if __name__ == '__main__':
main()
Empty file modified bin/run_ncovtools.sh
100644 → 100755
Empty file.
2 changes: 1 addition & 1 deletion extra_data/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ variants_pattern: "{data_root}/{sample}.pass.vcf.gz"
# in the tree by providing a fasta file here
#tree_include_consensus: some_genomes_from_gisaid.fasta

# some plots can by annotated with external metadata. this
# some plots can by annotated with external metadata. This
# file contains the metadata in simple tab-delimited format
# one column must be 'sample'
metadata: metadata.tsv
Expand Down
86 changes: 86 additions & 0 deletions modules/schemes.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
// Scheme related processes to go with scheme subworkflow
process downloadScheme {
// Download primer scheme from given repo URL to primer-schemes directory
tag { params.schemeRepoURL }

output:
path "primer-schemes", emit: scheme

script:
"""
git clone ${params.schemeRepoURL} primer-schemes
"""
}
process validateScheme {
publishDir "${params.outdir}/${task.process.replaceAll(":","_")}", pattern: "primer-schemes", mode: "copy"

input:
path scheme

output:
path "primer-schemes/${params.scheme}/${schemeFolder}/*.reference.fasta", emit: reference
path "primer-schemes/${params.scheme}/${schemeFolder}/*.primer.bed", emit: primer_bed // primer.bed for now as the repos have some conflicting scheme.bed
tuple val(schemeVersion), path("primer-schemes"), emit: scheme

script:
/*
ARTIC Requires the following:
- Scheme folder starting with a V (ex. Vfreed/)
- Scheme input also starting with a V (ex. Vfreed)

So a few checks to attempt to meet those requirements
Note that no def for the params as we need them in the outputs
*/
schemeVersion = params.schemeVersion
schemeFolder = params.schemeVersion
if ( ! schemeVersion.startsWith("V") ) {
schemeVersion = "V" + schemeVersion
schemeFolder = "V" + schemeFolder
}
"""
# Check for directory being called primer-schemes to match everything up
if [[ "$scheme" != "primer-schemes" ]]; then
mv $scheme primer-schemes
fi

# Adjust folder to make sure it starts with a V
# If we can't find the V{schemeVersion} folder, then check for just {schemeVersion} folder and add the V
if [ ! -d primer-schemes/${params.scheme}/${schemeFolder} ]; then
if [ -d primer-schemes/${params.scheme}/${params.schemeVersion} ]; then
mv primer-schemes/${params.scheme}/${params.schemeVersion} primer-schemes/${params.scheme}/${schemeFolder}
else
echo "ERROR: Cannot find input scheme version ${params.schemeVersion} or adjusted ${schemeVersion}"
exit 1
fi
fi

# File checks
if [ ! -f primer-schemes/${params.scheme}/${schemeFolder}/*reference.fasta ]; then
echo "ERROR: Reference Fasta not found in 'primer-schemes/${params.scheme}/${schemeFolder}/*reference.fasta'"
exit 1
elif [ ! -f primer-schemes/${params.scheme}/${schemeFolder}/*scheme.bed ]; then
echo "ERROR: Scheme bed file not found in 'primer-schemes/${params.scheme}/${schemeFolder}/*primer.bed'"
exit 1
fi
"""
}
process generateAmpliconBed {
publishDir "${params.outdir}/amplicon_info", pattern: "*.bed", mode: "copy"
publishDir "${params.outdir}/amplicon_info", pattern: "primer_prefix.txt", mode: "copy"

input:
path primer_bed

output:
path "amplicon.bed", emit: amplicon_bed
path "tiling_region.bed", emit: tiling_bed
env PREFIX, emit: primer_prefix

script:
"""
primers_to_amplicons.py \\
--bed $primer_bed

PREFIX=\$(cat primer_prefix.txt)
"""
}
17 changes: 14 additions & 3 deletions workflows/articNcovNanopore.nf
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ include {collateSamples} from '../modules/upload.nf'

// import subworkflows
include {CLIMBrsync} from './upload.nf'
include { schemeValidate } from './schemeValidate.nf'
include {Genotyping} from './typing.nf'

// workflow component for artic pipeline with nanopolish
Expand All @@ -57,6 +58,16 @@ workflow sequenceAnalysisNanopolish {

articDownloadScheme()

// =============================== //
// Scheme and Reference
// =============================== //
schemeValidate()
ch_scheme = schemeValidate.out.scheme // channel: [ val(scheme_version), path(scheme) ]
ch_reference = schemeValidate.out.reference // channel: [ path(reference.fasta) ]
ch_primer_bed = schemeValidate.out.primer_bed // channel: [ path(primer_bed) ]
ch_amplicon = schemeValidate.out.amplicon_bed // channel: [ path(amplicon_bed) ]
ch_primer_prefix = schemeValidate.out.primer_prefix // channel: [ val(primer_prefix) ]

// Tracking barcodes that fail initial read count checks
accountNoReadsInput(ch_badFastqDirs.collect(),
ch_irida)
Expand Down Expand Up @@ -129,10 +140,10 @@ workflow sequenceAnalysisNanopolish {
.set{ ch_ncov }

runNcovTools(ch_ncov,
articDownloadScheme.out.reffasta,
articDownloadScheme.out.ncov_amplicon,
ch_reference,
ch_amplicon,
articMinIONNanopolish.out[0].collect(),
articDownloadScheme.out.bed,
ch_primer_bed,
ch_irida,
ch_corrected)
ch_versions = ch_versions.mix(runNcovTools.out.versions.first())
Expand Down
41 changes: 41 additions & 0 deletions workflows/schemeValidate.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
/*
Workflow to check, validate, and fix input primer schemes
*/
// Modules to include
include {
downloadScheme ;
validateScheme ;
generateAmpliconBed
} from '../modules/schemes.nf'

/*
Initialize channels from params
*/
ch_scheme = params.local_scheme ? file(params.local_scheme, type: 'dir', checkIfExists: true) : []

// Workflow
workflow schemeValidate {
main:
// =============================== //
// Pull scheme from online
// =============================== //
if ( ! ch_scheme ) {
downloadScheme()
ch_scheme = downloadScheme.out.scheme
}

validateScheme(
ch_scheme
)

generateAmpliconBed(
validateScheme.out.primer_bed
)

emit:
scheme = validateScheme.out.scheme
reference = validateScheme.out.reference
primer_bed = validateScheme.out.primer_bed
amplicon_bed = generateAmpliconBed.out.amplicon_bed
primer_prefix = generateAmpliconBed.out.primer_prefix
}
Loading