Skip to content

Commit

Permalink
Merge pull request #201 from UPHL-BioNGS/update20240520
Browse files Browse the repository at this point in the history
Update 20240520
  • Loading branch information
erinyoung committed May 21, 2024
2 parents e773ce5 + cc0a6da commit 582df26
Show file tree
Hide file tree
Showing 17 changed files with 99 additions and 80 deletions.
21 changes: 12 additions & 9 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -336,23 +336,26 @@ workflow {
if ( params.sample_sheet || params.reads || params.sra_accessions ) {
de_novo_alignment(ch_raw_reads)

ch_assembled = de_novo_alignment.out.contigs
ch_contigs = ch_fastas.mix(de_novo_alignment.out.contigs)
ch_clean_reads = de_novo_alignment.out.clean_reads
ch_for_multiqc = ch_for_multiqc.mix(de_novo_alignment.out.for_multiqc)
ch_versions = ch_versions.mix(de_novo_alignment.out.versions)
ch_assembled = de_novo_alignment.out.contigs
ch_contigs = ch_fastas.mix(de_novo_alignment.out.contigs)
ch_reads_contigs = ch_fastas.map{it -> tuple{it[0], it[1], null}}.mix(de_novo_alignment.out.reads_contigs)
ch_clean_reads = de_novo_alignment.out.clean_reads
ch_for_multiqc = ch_for_multiqc.mix(de_novo_alignment.out.for_multiqc)
ch_versions = ch_versions.mix(de_novo_alignment.out.versions)

} else {
ch_contigs = ch_fastas
ch_clean_reads = Channel.empty()
ch_assembled = Channel.empty()
ch_contigs = ch_fastas
ch_reads_contigs = Channel.empty()
ch_clean_reads = Channel.empty()
ch_assembled = Channel.empty()
}

// getting a summary of everything
if ( ! params.skip_extras ) {
quality_assessment(
ch_raw_reads,
ch_contigs,
ch_reads_contigs,
summfle_script)

ch_for_multiqc = ch_for_multiqc.mix(quality_assessment.out.for_multiqc)
Expand All @@ -362,7 +365,7 @@ workflow {

// optional subworkflow blobtools (useful for interspecies contamination)
if ( params.blast_db && ( params.sample_sheet || params.reads || params.sra_accessions )) {
blobtools(ch_clean_reads, ch_assembled, quality_assessment.out.bams, ch_blast_db )
blobtools(quality_assessment.out.bams, ch_blast_db )

ch_for_summary = ch_for_summary.mix(blobtools.out.for_summary)
ch_for_flag = ch_for_flag.mix(blobtools.out.for_flag)
Expand Down
2 changes: 1 addition & 1 deletion modules/local/amrfinderplus.nf
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ process amrfinderplus {
tag "${meta.id}"
label "process_high"
publishDir params.outdir, mode: 'copy', saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
container 'staphb/ncbi-amrfinderplus:3.12.8-2024-01-31.1_2'
container 'staphb/ncbi-amrfinderplus:3.12.8-2024-05-02.2'
errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'}
time '30m'

Expand Down
2 changes: 1 addition & 1 deletion modules/local/blobtools.nf
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ process blobtools_create {
errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'}

input:
tuple val(meta), file(contig), file(blastn), file(bam)
tuple val(meta), file(contig), file(bam), file(blastn)

output:
tuple val(meta), file("blobtools/*.blobDB.json"), emit: json
Expand Down
7 changes: 5 additions & 2 deletions modules/local/circulocov.nf
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,19 @@ process circulocov {
tag "${meta.id}"
label "process_medium"
stageInMode "copy"
publishDir path: params.outdir, mode: 'copy', saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
publishDir path: params.outdir, mode: 'copy', pattern: 'logs/*/*log'
publishDir path: params.outdir, mode: 'copy', pattern: 'circulocov/*'
publishDir path: params.outdir, mode: 'copy', pattern: 'circulocov/*/*'
container 'staphb/circulocov:0.1.20240104'
time '30m'
errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'}

input:
tuple val(meta), file(fastqs), file(contigs)
tuple val(meta), file(contigs), file(fastqs)

output:
tuple val(meta), file("circulocov/*/*sr.bam*"), emit: bam
tuple val(meta), file(contigs), file("circulocov/*/*sr.bam*"), emit: contig_bam
path "circulocov/*/overall_summary.txt", emit: collect
path "circulocov/*", emit: everything
path "circulocov/fastq/*", emit: fastq, optional: true
Expand Down
2 changes: 1 addition & 1 deletion modules/local/datasets.nf
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ process datasets_summary {
tag "${taxon}"
label "process_single"
publishDir params.outdir, mode: 'copy', saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
container 'staphb/ncbi-datasets:16.10.3'
container 'staphb/ncbi-datasets:16.15.0'
time '1h'
errorStrategy { task.attempt < 2 ? 'retry' : 'ignore' }

Expand Down
5 changes: 2 additions & 3 deletions modules/local/fastani.nf
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,11 @@ process fastani {
shell:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def ref = genomes.join(",")
def ends = genomes.collect { it.Name[-6..-1] }.flatten().unique().join(' *')
"""
mkdir -p fastani logs/${task.process}
log_file=logs/${task.process}/${prefix}.${workflow.sessionId}.log
echo ${ref} | tr "," "\\n" | sort > reference_list.txt
ls *${ends} | grep -v ${contigs} | sort > reference_list.txt
fastANI ${args} \
--threads ${task.cpus} \
Expand Down
10 changes: 5 additions & 5 deletions modules/local/fastp.nf
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,11 @@ process fastp {

output:
tuple val(meta), file("fastp/*_fastp_R{1,2}.fastq.gz"), emit: fastq, optional: true
path "fastp/*_fastp.html", emit: html, optional: true
path "fastp/*_fastp.json", emit: fastp_files, optional: true
path "logs/${task.process}/*.{log,err}", emit: log
tuple val(meta), env(passed_reads), emit: fastp_results
path "versions.yml", emit: versions
path "fastp/*_fastp.html", emit: html, optional: true
path "fastp/*_fastp.json", emit: fastp_files, optional: true
path "logs/${task.process}/*.{log,err}", emit: log
tuple val(meta), file("fastp/*_fastp_R{1,2}.fastq.gz"), env(passed_reads), emit: fastp_results
path "versions.yml", emit: versions

when:
task.ext.when == null || task.ext.when
Expand Down
2 changes: 1 addition & 1 deletion modules/local/iqtree2.nf
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ process iqtree2 {
tag "Phylogenetic analysis"
label "process_high"
publishDir params.outdir, mode: 'copy', saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
container 'staphb/iqtree2:2.2.2.7'
container 'staphb/iqtree2:2.3.1'
time '24h'
errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'}

Expand Down
24 changes: 12 additions & 12 deletions modules/local/local.nf
Original file line number Diff line number Diff line change
Expand Up @@ -77,18 +77,18 @@ process flag {
tuple val(meta), file(files)

output:
tuple val(meta), env(salmonella_flag) , emit: salmonella_flag
tuple val(meta), env(klebsiella_flag) , emit: klebsiella_flag
tuple val(meta), env(ecoli_flag) , emit: ecoli_flag
tuple val(meta), env(streppneu_flag) , emit: streppneu_flag
tuple val(meta), env(legionella_flag) , emit: legionella_flag
tuple val(meta), env(klebacin_flag) , emit: klebacin_flag
tuple val(meta), env(strepa_flag) , emit: strepa_flag
tuple val(meta), env(vibrio_flag) , emit: vibrio_flag
tuple val(meta), env(myco_flag) , emit: myco_flag
tuple val(meta), env(genus), env(species), emit: organism
path "flag/*_flag.csv" , emit: collect
path "logs/${task.process}/*.log" , emit: log_files
tuple val(meta), file("${files[0]}"), env(salmonella_flag) , emit: salmonella_flag
tuple val(meta), file("${files[0]}"), env(klebsiella_flag) , emit: klebsiella_flag
tuple val(meta), file("${files[0]}"), env(ecoli_flag) , emit: ecoli_flag
tuple val(meta), file("${files[0]}"), env(streppneu_flag) , emit: streppneu_flag
tuple val(meta), file("${files[0]}"), env(legionella_flag) , emit: legionella_flag
tuple val(meta), file("${files[0]}"), env(klebacin_flag) , emit: klebacin_flag
tuple val(meta), file("${files[0]}"), env(strepa_flag) , emit: strepa_flag
tuple val(meta), file("${files[0]}"), env(vibrio_flag) , emit: vibrio_flag
tuple val(meta), file("${files[0]}"), env(myco_flag) , emit: myco_flag
tuple val(meta), file("${files[0]}"), env(genus), env(species), emit: organism
path "flag/*_flag.csv", emit: collect
path "logs/${task.process}/*.log", emit: log_files

shell:
def prefix = task.ext.prefix ?: "${meta.id}"
Expand Down
2 changes: 1 addition & 1 deletion modules/local/mlst.nf
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ process mlst {
tag "${meta.id}"
label "process_medium"
publishDir params.outdir, mode: 'copy', saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
container 'staphb/mlst:2.23.0-2024-04-01'
container 'staphb/mlst:2.23.0-2024-05-01'
maxForks 10
errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'}
time '10m'
Expand Down
2 changes: 1 addition & 1 deletion modules/local/panaroo.nf
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ process panaroo {
tag "Core Genome Alignment"
label "process_high"
publishDir params.outdir, mode: 'copy', saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
container 'staphb/panaroo:1.3.4'
container 'staphb/panaroo:1.5.0'
errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'}
time '10h'

Expand Down
12 changes: 8 additions & 4 deletions modules/local/spades.nf
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
process spades {
tag "${meta.id}"
label "process_high"
publishDir params.outdir, mode: 'copy', saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
publishDir path: params.outdir, mode: 'copy', pattern: 'logs/*/*log'
publishDir path: params.outdir, mode: 'copy', pattern: 'spades/*'
publishDir path: params.outdir, mode: 'copy', pattern: 'spades/*/*'
publishDir path: params.outdir, mode: 'copy', pattern: 'contigs/*'
container 'staphb/spades:3.15.5'
errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'}
time '5h'
Expand All @@ -10,10 +13,11 @@ process spades {
tuple val(meta), file(reads)

output:
path "spades/*/*", emit: files
path "spades/*/*", emit: files
tuple val(meta), file("contigs/*_contigs.fa"), optional: true, emit: contigs
path "logs/${task.process}/*.log", emit: log
path "versions.yml", emit: versions
tuple val(meta), file("contigs/*_contigs.fa"), file(reads), optional: true, emit: reads_contigs
path "logs/${task.process}/*.log", emit: log
path "versions.yml", emit: versions

when:
task.ext.when == null || task.ext.when
Expand Down
2 changes: 1 addition & 1 deletion nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ manifest {
author = 'Erin Young'
homePage = 'https://github.com/UPHL-BioNGS/Grandeur'
mainScript = 'main.nf'
version = '4.2.20240425'
version = '4.4.240521'
defaultBranch = 'main'
description = 'Grandeur is short-read de novo assembly pipeline with serotyping.'
nextflowVersion = '!>=22.10.1'
Expand Down
10 changes: 5 additions & 5 deletions subworkflows/blobtools.nf
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,14 @@ include { blobtools_view } from '../modules/local/blobtools' addParams(param

workflow blobtools {
take:
ch_clean_reads
ch_contigs
ch_bams
ch_contig_bams
ch_blast_db

main:
blastn(ch_clean_reads.join(ch_contigs, by: 0).map{it -> tuple(it[0],it[2])}.combine(ch_blast_db))
blobtools_create(ch_contigs.join(blastn.out.blastn, by: 0).join(ch_bams, by: 0))
ch_contigs = ch_contig_bams.filter{it[1]}.map{it -> tuple(it[0], it[1])}

blastn(ch_contigs.combine(ch_blast_db))
blobtools_create(ch_contig_bams.join(blastn.out.blastn, by: 0, failOnMismatch: false, remainder: false))
blobtools_view(blobtools_create.out.json)
blobtools_plot(blobtools_create.out.json)

Expand Down
10 changes: 5 additions & 5 deletions subworkflows/de_novo_alignment.nf
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,7 @@ workflow de_novo_alignment {
bbduk(reads)
fastp(bbduk.out.fastq)

fastp.out.fastq
.join(fastp.out.fastp_results)
fastp.out.fastp_results
.filter ({ it[2] as int >= params.minimum_reads })
.map ( it -> tuple (it[0], it[1]))
.set{ read_check }
Expand All @@ -20,10 +19,11 @@ workflow de_novo_alignment {

emit:
// for downstream analyses
clean_reads = fastp.out.fastq
contigs = spades.out.contigs
reads_contigs = spades.out.reads_contigs
clean_reads = fastp.out.fastq
contigs = spades.out.contigs.filter{it[1] != null}

// for multiqc
for_multiqc = fastp.out.fastp_files.mix(bbduk.out.stats)
versions = bbduk.out.versions.mix(fastp.out.versions).mix(spades.out.versions)
versions = bbduk.out.versions.first().mix(fastp.out.versions.first()).mix(spades.out.versions.first())
}
52 changes: 34 additions & 18 deletions subworkflows/information.nf
Original file line number Diff line number Diff line change
Expand Up @@ -20,25 +20,41 @@ workflow information {
jsoncon_script

main:

// species specific
// TODO : add blobtools
int grouptuplesize = 2
if ( params.kraken2_db && ( params.sample_sheet || params.reads )) { grouptuplesize = grouptuplesize +1 }

//flag(ch_flag.groupTuple(size : grouptuplesize, remainder: true ))
flag(ch_flag.groupTuple())

amrfinderplus(ch_contigs.join(flag.out.organism, by:0))
drprg(ch_contigs.join(flag.out.myco_flag, by:0))
emmtyper(ch_contigs.join(flag.out.strepa_flag, by:0).combine(summfle_script))
kaptive(ch_contigs.join(flag.out.vibrio_flag, by:0))
kleborate(ch_contigs.join(flag.out.klebsiella_flag, by:0).combine(summfle_script))
elgato(ch_contigs.join(flag.out.legionella_flag, by:0))
mykrobe(ch_contigs.join(flag.out.myco_flag, by:0))
pbptyper(ch_contigs.join(flag.out.streppneu_flag, by:0))
seqsero2(ch_contigs.join(flag.out.salmonella_flag, by:0))
serotypefinder(ch_contigs.join(flag.out.ecoli_flag, by:0).combine(summfle_script))
shigatyper(ch_contigs.join(flag.out.ecoli_flag, by:0).combine(summfle_script))
// branch + join = faster than groupTuple
ch_flag
.branch {
blobtools: it[1] =~ /blobtools.txt/
kraken2: it[1] =~ /kraken2.csv/
mash: it[1] =~ /mash.csv/
fastani: it[1] =~ /fastani.csv/
}
.set { ch_flag_branch }

ch_contigs
.filter{it[1] != null}
.join(ch_flag_branch.blobtools, by:0, failOnMismatch: false, remainder: true)
.join(ch_flag_branch.kraken2, by:0, failOnMismatch: false, remainder: true)
.join(ch_flag_branch.mash, by:0, failOnMismatch: false, remainder: true)
.join(ch_flag_branch.fastani, by:0, failOnMismatch: false, remainder: true)
.filter{it[1] != null}
.map{ it -> tuple(it[0],[it[1], it[2], it[3], it[4], it[5]])}
.set {ch_for_flag}

flag(ch_for_flag)

amrfinderplus(flag.out.organism)
drprg(flag.out.myco_flag)
emmtyper(flag.out.strepa_flag.combine(summfle_script))
kaptive(flag.out.vibrio_flag)
kleborate(flag.out.klebsiella_flag.combine(summfle_script))
elgato(flag.out.legionella_flag)
mykrobe(flag.out.myco_flag)
pbptyper(flag.out.streppneu_flag)
seqsero2(flag.out.salmonella_flag)
serotypefinder(flag.out.ecoli_flag.combine(summfle_script))
shigatyper(flag.out.ecoli_flag.combine(summfle_script))

json_convert(drprg.out.json.combine(jsoncon_script))

Expand Down
14 changes: 4 additions & 10 deletions subworkflows/quality_assessment.nf
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ workflow quality_assessment {
take:
ch_reads
ch_contigs
ch_reads_contigs
summfle_script

main:
Expand All @@ -20,13 +21,7 @@ workflow quality_assessment {
if ( params.sample_sheet || params.reads || params.sra_accessions ) {
fastqc(ch_reads)

ch_reads
.join(ch_contigs, by: 0, remainder: true)
.filter {it[1]}
.filter {it[2]}
.set { for_circulocov }

circulocov(for_circulocov)
circulocov(ch_reads_contigs.filter{it[1]}.filter{it[2]})

for_multiqc = for_multiqc.mix(fastqc.out.for_multiqc)

Expand All @@ -46,12 +41,11 @@ workflow quality_assessment {

ch_summary = ch_summary.mix(circulocov_summary).mix(fastqc_summary)
ch_versions = ch_versions.mix(fastqc.out.versions.first()).mix(circulocov.out.versions.first())
ch_bams = ch_bams.mix(circulocov.out.bam)

ch_bams = ch_bams.mix(circulocov.out.contig_bam)
}

// contigs
quast(ch_contigs.join(ch_reads, by: 0, remainder: true ))
quast(ch_reads_contigs.filter{it[2]})
mlst(ch_contigs.combine(summfle_script))
plasmidfinder(ch_contigs.combine(summfle_script))

Expand Down

0 comments on commit 582df26

Please sign in to comment.