Skip to content

Commit

Permalink
Dev0420 (#6)
Browse files Browse the repository at this point in the history
* virsorter, bacphlip and mmseqs taxa fix
  • Loading branch information
rujinlong committed Apr 21, 2023
1 parent 2768259 commit ab403d4
Show file tree
Hide file tree
Showing 11 changed files with 50 additions and 23 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/docker.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ name: Docker
on:
push:
tags:
# - 'v*'
- 'v*'
- 'docker*'

jobs:
Expand Down
4 changes: 2 additions & 2 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -57,11 +57,11 @@ process {
}

withLabel: viroprofiler_replicyc {
container = 'denglab/viroprofiler-replicyc:v0.2'
container = 'denglab/viroprofiler-replicyc:v0.1'
}

withLabel: viroprofiler_taxa {
container = 'denglab/viroprofiler-taxa:v0.2'
container = 'denglab/viroprofiler-taxa:base'
}

withLabel: viroprofiler_virsorter2 {
Expand Down
4 changes: 2 additions & 2 deletions custom.config
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ process {
cpus = { check_max( 1 * task.attempt, 'cpus') }
memory = { check_max( 4.GB * task.attempt, 'memory') }
}

withName: CHECKV {
cpus = { check_max( 1 * task.attempt, 'cpus') }
memory = { check_max( 20.GB * task.attempt, 'memory') }
Expand Down Expand Up @@ -245,4 +245,4 @@ def check_max(obj, type) {
return obj
}
}
}
}
2 changes: 1 addition & 1 deletion docker/viroprofiler-base/env_base.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,5 +15,5 @@ dependencies:
- wget
- pigz
- coverm
- mmseqs2=13
- mmseqs2=13.45111
- unzip
25 changes: 18 additions & 7 deletions docker/viroprofiler-taxa/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -10,21 +10,32 @@ RUN --mount=type=cache,target=/opt/conda/pkgs micromamba install -n base -f /tmp

# Activate conda env during docker build
ARG MAMBA_DOCKERFILE_ACTIVATE=1


RUN cd /opt/conda/lib/python3.8/site-packages/vcontact2/data && \
/opt/conda/bin/wget https://bitbucket.org/MAVERICLab/vcontact2/raw/6089ed83fd03a844de3140c442863c7b29881d43/vcontact2/data/ViralRefSeq-prokaryotes-v211.Merged-reference.csv && \
/opt/conda/bin/wget https://bitbucket.org/MAVERICLab/vcontact2/raw/6089ed83fd03a844de3140c442863c7b29881d43/vcontact2/data/ViralRefSeq-prokaryotes-v211.faa.gz && \
/opt/conda/bin/wget https://bitbucket.org/MAVERICLab/vcontact2/raw/6089ed83fd03a844de3140c442863c7b29881d43/vcontact2/data/ViralRefSeq-prokaryotes-v211.protein2contig.csv

# Set PATH manually, because nextflow doesn't activate base env by default.
ENV PATH=/opt/MMseqs2/build/bin:/opt/conda/bin:$PATH
# ENV PATH=/opt/MMseqs2/build/bin:/opt/conda/bin:$PATH
ENV PATH=/opt/mmseqs/bin:/opt/MMseqs2/build/bin:/opt/conda/bin:$PATH

RUN cd /opt && \
git clone --depth=1 https://github.com/soedinglab/MMseqs2.git && \
git clone https://github.com/soedinglab/MMseqs2.git && \
cd /opt/MMseqs2 && \
git checkout 3b9cf88 && \
mkdir build && \
cd build && \
cmake -DHAVE_SANITIZER=1 -DCMAKE_BUILD_TYPE=ASan -DCMAKE_INSTALL_PREFIX=. .. && \
make -j 4 && \
make -j 1 && \
make install

RUN micromamba clean --all --yes

RUN cd /opt/conda/lib/python3.8/site-packages/vcontact2/data && \
/opt/conda/bin/wget https://bitbucket.org/MAVERICLab/vcontact2/raw/6089ed83fd03a844de3140c442863c7b29881d43/vcontact2/data/ViralRefSeq-prokaryotes-v211.Merged-reference.csv && \
/opt/conda/bin/wget https://bitbucket.org/MAVERICLab/vcontact2/raw/6089ed83fd03a844de3140c442863c7b29881d43/vcontact2/data/ViralRefSeq-prokaryotes-v211.faa.gz && \
/opt/conda/bin/wget https://bitbucket.org/MAVERICLab/vcontact2/raw/6089ed83fd03a844de3140c442863c7b29881d43/vcontact2/data/ViralRefSeq-prokaryotes-v211.protein2contig.csv

# COPY ./docker/viroprofiler-taxa/mmseqs /opt/conda/bin
# RUN chmod +x /opt/conda/bin/mmseqs

RUN cd /opt && \
wget https://mmseqs.com/latest/mmseqs-linux-avx2.tar.gz; tar xvfz mmseqs-linux-avx2.tar.gz; rm mmseqs-linux-avx2.tar.gz
2 changes: 1 addition & 1 deletion docker/viroprofiler-taxa/env_taxa.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ dependencies:
- conda-forge::cmake
- conda-forge::gcc
- conda-forge::cxx-compiler
- bioconda::mmseqs2
# - bioconda::mmseqs2=13.45111
- bioconda::vcontact2=0.11.1
- bioconda::diamond=2.0.6
- bioconda::mcl=14.137
Expand Down
4 changes: 3 additions & 1 deletion docker/viroprofiler-virsorter2/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -15,5 +15,7 @@ COPY ./docker/viroprofiler-virsorter2/env_*.yml /tmp
# Install packages
RUN --mount=type=cache,target=/opt/conda/pkgs micromamba install -n base -f /tmp/env_virsorter2.yml -y

RUN apt-get update && apt-get install git -y && cd / && git clone https://github.com/jiarong/VirSorter2.git && cd VirSorter2 && pip install -e .

# clean
RUN micromamba clean --all --yes
#RUN micromamba clean --all --yes
16 changes: 14 additions & 2 deletions docker/viroprofiler-virsorter2/env_virsorter2.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,20 @@ channels:
- conda-forge
- bioconda
dependencies:
- python>=3.6
- scikit-learn=0.22.1
- imbalanced-learn
- pandas
- seaborn
- hmmer==3.3
- prodigal
- screed
- ruamel.yaml
- snakemake>=5.18,<=5.26
- click
- mamba
- conda-forge::procps-ng
- virsorter=2.2.4
- seqkit
- csvtk
- conda-forge::glpk
- conda-forge::glpk

4 changes: 2 additions & 2 deletions modules/local/replicyc.nf
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ process BACPHLIP {

output:
// TODO nf-core: Named file extensions MUST be emitted for ALL output channels
path "*.bacphlip", emit: repcyc_ch
path "*.bacphlip", emit: bacphlip_ch
path "*.hmmsearch.tsv"
path "versions.yml", emit: versions
// TODO nf-core: List additional required output channels/values here
Expand Down Expand Up @@ -73,7 +73,7 @@ process REPLIDEC {
path(contigs)

output:
path("out_replidec.tsv"), emit: gene_fna_ch
path("out_replidec.tsv"), emit: replidec_ch
path "versions.yml", emit: versions

when:
Expand Down
6 changes: 3 additions & 3 deletions modules/local/taxonomy.nf
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ process TAXONOMY_VCONTACT {
}

process TAXONOMY_MMSEQS {
label "viroprofiler_taxa"
label "viroprofiler_base"

input:
path contigs
Expand All @@ -46,7 +46,7 @@ process TAXONOMY_MMSEQS {
"""
# Run mmseqs taxonomy
mmseqs createdb $contigs qry
mmseqs taxonomy qry ${params.db}/taxonomy/mmseqs_vrefseq/refseq_viral mmseqsTaxaRst tmp --tax-lineage 1 --majority 0.4 --vote-mode 1 --lca-mode 3 --orf-filter 1 --threads $task.cpus
mmseqs taxonomy qry ${params.db}/taxonomy/mmseqs_vrefseq/refseq_viral mmseqsTaxaRst tmp --tax-lineage 1 --majority 0.4 --vote-mode 1 --lca-mode 3 --orf-filter 0 --threads $task.cpus
# report
mmseqs createtsv qry mmseqsTaxaRst mmseqsTaxaRst.tsv
Expand Down Expand Up @@ -74,7 +74,7 @@ process TAXONOMY_MERGE {

when:
task.ext.when == null || task.ext.when

"""
parse_vContact2_vc.py -i $taxa_vc -o taxa_vc2 -a $params.assembler
parse_mmseqsTaxa.py -i $taxa_mmseqs -o taxa_mmseqs -u "" -s $params.taxa_db_source
Expand Down
4 changes: 3 additions & 1 deletion workflows/viroprofiler.nf
Original file line number Diff line number Diff line change
Expand Up @@ -272,13 +272,15 @@ workflow VIROPROFILER {
if ( params.replicyc == "bacphlip" ) {
BACPHLIP (vContigs_and_vMAGs)
ch_versions = ch_versions.mix(BACPHLIP.out.versions)
ch_replicyc = BACPHLIP.out.bacphlip_ch
} else if ( params.replicyc == "replidec" ) {
REPLIDEC (vContigs_and_vMAGs)
ch_versions = ch_versions.mix(REPLIDEC.out.versions)
ch_replicyc = REPLIDEC.out.replidec_ch
}

// TreeSummarizedExperiment
RESULTS_TSE (ABUNDANCE.out.ab_count_ch, ABUNDANCE.out.ab_tpm_ch, ABUNDANCE.out.ab_covfrac_ch, TAXONOMY_MERGE.out.taxa_mmseqs_ch, CHECKV.out.checkv2vContigs_ch, VIRSORTER2.out.vs2_score_ch, VIBRANT.out.vibrant_quality_ch, DVF.out.dvf2vContigs_ch, BACPHLIP.out.repcyc_ch)
RESULTS_TSE (ABUNDANCE.out.ab_count_ch, ABUNDANCE.out.ab_tpm_ch, ABUNDANCE.out.ab_covfrac_ch, TAXONOMY_MERGE.out.taxa_mmseqs_ch, CHECKV.out.checkv2vContigs_ch, VIRSORTER2.out.vs2_score_ch, VIBRANT.out.vibrant_quality_ch, DVF.out.dvf2vContigs_ch, ch_replicyc)

CUSTOM_DUMPSOFTWAREVERSIONS (
ch_versions.unique().collectFile(name: 'collated_versions.yml')
Expand Down

0 comments on commit ab403d4

Please sign in to comment.