diff --git a/CITATIONS.md b/CITATIONS.md index 867bde34..8e02b93d 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -18,6 +18,14 @@ > Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924. +* [Alevin-fry](https://doi.org/10.1038/s41592-022-01408-3) + + > He, D., Zakeri, M., Sarkar, H. et al. Alevin-fry unlocks rapid, accurate and memory-frugal quantification of single-cell RNA-seq data. Nat Methods 19, 316–322 (2022). + +* [Simpleaf](https://doi.org/10.1093/bioinformatics/btad614) + + > He, D., Patro, R. simpleaf: a simple, flexible, and scalable framework for single-cell data processing using alevin-fry, Bioinformatics, Volume 39, Issue 10, October 2023, btad614. + * [Alevin](https://doi.org/10.1186/s13059-019-1670-y) > Srivastava, A., Malik, L., Smith, T. et al. Alevin efficiently estimates accurate gene abundances from dscRNA-seq data. Genome Biol 20, 65 (2019). diff --git a/docs/images/nf-core-scrnaseq_logo_light.png b/docs/images/nf-core-scrnaseq_logo_light.png index dee21bdd..0c1078c4 100644 Binary files a/docs/images/nf-core-scrnaseq_logo_light.png and b/docs/images/nf-core-scrnaseq_logo_light.png differ diff --git a/docs/output.md b/docs/output.md index 3ab87625..d4575ffb 100644 --- a/docs/output.md +++ b/docs/output.md @@ -86,13 +86,13 @@ For details on how to load these into R and perform further downstream analysis, **Output directory: `results/alevin`** - `alevin` - - Contains the created Salmon Alevin pseudo-aligned output + - Contains the created alevin-fry pseudo-aligned output - `alevinqc` - Contains the QC report for the aforementioned Salmon Alevin output data **Output directory: `results/reference_genome`** -- `salmon_index` +- `simpleaf_index` - Contains the indexed reference transcriptome for Salmon Alevin - `alevin/txp2gene.tsv` - The transcriptome to gene mapping TSV file utilized by Salmon Alevin diff --git a/docs/usage.md b/docs/usage.md index 499e404d..d38ea553 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -39,7 +39,7 @@ An [example samplesheet](../assets/samplesheet.csv) has been provided with the p This parameter is currently supported by -- [Salmon Alevin](https://salmon.readthedocs.io/en/latest/alevin.html#expectcells) +- [Alevin-fry](https://alevin-fry.readthedocs.io/en/latest/generate_permit_list.html#:~:text=%2D%2Dexpect%2Dcells%20%3Cncells%3E) - [STARsolo](https://github.com/alexdobin/STAR/blob/master/docs/STARsolo.md) - [Cellranger](https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/what-is-cell-ranger) @@ -47,7 +47,7 @@ Note that since cellranger v7, it is **not recommended** anymore to supply the ` ## Aligning options -By default, the pipeline uses [Salmon Alevin](https://salmon.readthedocs.io/en/latest/alevin.html) (i.e. --aligner alevin) to perform pseudo-alignment of reads to the reference genome and to perform the downstream BAM-level quantification. Then QC reports are generated with AlevinQC. +By default, the pipeline uses [Alevin-fry](https://alevin-fry.readthedocs.io/en/latest/) (i.e. --aligner alevin) via [Simpleaf](https://simpleaf.readthedocs.io/en/latest/) to perform pseudo-alignment of reads to the reference genome and to perform the downstream BAM-level quantification. Then QC reports are generated with AlevinQC. Other aligner options for running the pipeline are: @@ -100,11 +100,11 @@ The command `kb --list` shows all supported, preconfigured protocols. Additional For more details, please refer to the [Kallisto/bustools documentation](https://pachterlab.github.io/kallisto/manual#bus). -#### Alevin/fry +#### Alevin-fry -Alevin/fry also supports custom chemistries in a slighly different format, e.g. `1{b[16]u[12]x:}2{r:}`. +Alevin-fry also supports custom chemistries in a slightly different format, e.g. `1{b[16]u[12]x:}2{r:}`. -For more details, see the [simpleaf documentation](https://simpleaf.readthedocs.io/en/latest/quant-command.html#a-note-on-the-chemistry-flag) +For more details, see the [simpleaf documentation](https://simpleaf.readthedocs.io/en/latest/quant-command.html#a-note-on-the-chemistry-flag) and the [language specification](https://hackmd.io/@PI7Og0l1ReeBZu_pjQGUQQ/rJMgmvr13). #### UniverSC diff --git a/modules/local/alevinqc.nf b/modules/local/alevinqc.nf index 9000d79e..19853687 100644 --- a/modules/local/alevinqc.nf +++ b/modules/local/alevinqc.nf @@ -3,10 +3,10 @@ process ALEVINQC { label 'process_low' //The alevinqc 1.14.0 container is broken, missing some libraries - thus reverting this to previous 1.12.1 version - conda "bioconda::bioconductor-alevinqc=1.12.1" + conda "bioconda::bioconductor-alevinqc=1.18.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/bioconductor-alevinqc:1.12.1--r41h9f5acd7_0' : - 'biocontainers/bioconductor-alevinqc:1.12.1--r41h9f5acd7_0' }" + 'https://depot.galaxyproject.org/singularity/bioconductor-alevinqc:1.18.0--r43hf17093f_0' : + 'biocontainers/bioconductor-alevinqc:1.18.0--r43hf17093f_0' }" input: tuple val(meta), path(alevin_results) @@ -43,4 +43,4 @@ process ALEVINQC { "versions.yml" ) """ -} +} \ No newline at end of file diff --git a/modules/local/simpleaf_index.nf b/modules/local/simpleaf_index.nf index 8e8bd519..f5dbc381 100644 --- a/modules/local/simpleaf_index.nf +++ b/modules/local/simpleaf_index.nf @@ -2,10 +2,10 @@ process SIMPLEAF_INDEX { tag "$transcript_gtf" label "process_medium" - conda 'bioconda::simpleaf=0.10.0-1' + conda 'bioconda::simpleaf=0.17.2-0' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/simpleaf:0.10.0--h9f5acd7_1' : - 'biocontainers/simpleaf:0.10.0--h9f5acd7_1' }" + 'https://depot.galaxyproject.org/singularity/simpleaf:0.17.2--h919a2d8_0' : + 'biocontainers/simpleaf:0.17.2--h919a2d8_0' }" input: path genome_fasta @@ -14,7 +14,7 @@ process SIMPLEAF_INDEX { output: path "salmon/index" , emit: index - path "salmon/ref/*_t2g_3col.tsv" , emit: transcript_tsv + path "salmon/ref/*t2g_3col.tsv" , emit: transcript_tsv path "versions.yml" , emit: versions path "salmon" , emit: salmon @@ -23,7 +23,8 @@ process SIMPLEAF_INDEX { script: def args = task.ext.args ?: '' - def seq_inputs = (params.transcript_fasta) ? "--refseq $transcript_fasta" : "--gtf $transcript_gtf" + def seq_inputs = (params.transcript_fasta) ? "--refseq $transcript_fasta" : "--fasta $genome_fasta --gtf $transcript_gtf" + def no_piscem = (params.no_piscem) ? '--no-piscem' : '' """ # export required var export ALEVIN_FRY_HOME=. @@ -36,8 +37,8 @@ process SIMPLEAF_INDEX { simpleaf \\ index \\ --threads $task.cpus \\ - --fasta $genome_fasta \\ $seq_inputs \\ + $no_piscem \\ $args \\ -o salmon diff --git a/modules/local/simpleaf_quant.nf b/modules/local/simpleaf_quant.nf index abb58404..441efaca 100644 --- a/modules/local/simpleaf_quant.nf +++ b/modules/local/simpleaf_quant.nf @@ -2,10 +2,10 @@ process SIMPLEAF_QUANT { tag "$meta.id" label 'process_high' - conda 'bioconda::simpleaf=0.10.0-1' + conda 'bioconda::simpleaf=0.17.2-0' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/simpleaf:0.10.0--h9f5acd7_1' : - 'biocontainers/simpleaf:0.10.0--h9f5acd7_1' }" + 'https://depot.galaxyproject.org/singularity/simpleaf:0.17.2--h919a2d8_0' : + 'biocontainers/simpleaf:0.17.2--h919a2d8_0' }" input: // @@ -29,6 +29,8 @@ process SIMPLEAF_QUANT { def args = task.ext.args ?: '' def args_list = args.tokenize() def prefix = task.ext.prefix ?: "${meta.id}" + // selective alignment is only available in salmon + def use_selective_alignment = (params.no_piscem && params.use_selective_alignment) ? '-s' : '' // // check if users are using one of the mutually excludable parameters: @@ -70,6 +72,7 @@ process SIMPLEAF_QUANT { -c "$protocol" \\ $expect_cells \\ $unfiltered_command \\ + $use_selective_alignment \\ $args $save_whitelist diff --git a/nextflow.config b/nextflow.config index e1b608d2..4d615374 100644 --- a/nextflow.config +++ b/nextflow.config @@ -25,7 +25,9 @@ params { // salmon alevin parameters (simpleaf) simpleaf_rlen = 91 barcode_whitelist = null - salmon_index = null + simpleaf_index = null + no_piscem = false + use_selective_alignment = false // kallisto bustools parameters kallisto_index = null diff --git a/nextflow_schema.json b/nextflow_schema.json index e5fb71b5..9077f4a5 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -158,9 +158,9 @@ "description": "", "default": "", "properties": { - "salmon_index": { + "simpleaf_index": { "type": "string", - "description": "This can be used to specify a precomputed Salmon index in the pipeline, in order to skip the generation of required indices by Salmon itself.", + "description": "This can be used to specify a precomputed Simpleaf index in the pipeline, in order to skip the generation of required indices by Simpleaf itself.", "fa_icon": "fas fa-fish", "format": "path", "exists": true @@ -178,6 +178,16 @@ "default": 91, "description": "It is the target read length the index will be built for, using simpleaf.", "fa_icon": "fas fa-map-marked-alt" + }, + "no_piscem": { + "type": "boolean", + "fa_icon": "fas fa-map-marked-alt", + "description": "Don't use the default piscem mapper, instead use salmon-alevin" + }, + "use_selective_alignment": { + "type": "boolean", + "fa_icon": "fas fa-map-marked-alt", + "description": "Use selective-alignment for mapping instead of pseudoalignment with structural constraints (only if using salmon alevin as the underlying mapper)." } } }, diff --git a/subworkflows/local/alevin.nf b/subworkflows/local/alevin.nf index 764c08f8..45b342dd 100644 --- a/subworkflows/local/alevin.nf +++ b/subworkflows/local/alevin.nf @@ -16,7 +16,7 @@ workflow SCRNASEQ_ALEVIN { genome_fasta gtf transcript_fasta - salmon_index + simpleaf_index txp2gene barcode_whitelist protocol @@ -26,16 +26,16 @@ workflow SCRNASEQ_ALEVIN { main: ch_versions = Channel.empty() - assert (genome_fasta && gtf && salmon_index && txp2gene) || (genome_fasta && gtf) || (genome_fasta && gtf && transcript_fasta && txp2gene): + assert (genome_fasta && gtf && simpleaf_index && txp2gene) || (genome_fasta && gtf) || (genome_fasta && gtf && transcript_fasta && txp2gene): """Must provide a genome fasta file ('--fasta') and a gtf file ('--gtf'), or a genome fasta file and a transcriptome fasta file ('--transcript_fasta`) if no index and txp2gene is given!""".stripIndent() /* * Build salmon index */ - if (!salmon_index) { + if (!simpleaf_index) { SIMPLEAF_INDEX( genome_fasta, transcript_fasta, gtf ) - salmon_index = SIMPLEAF_INDEX.out.index.collect() + simpleaf_index = SIMPLEAF_INDEX.out.index.collect() transcript_tsv = SIMPLEAF_INDEX.out.transcript_tsv.collect() ch_versions = ch_versions.mix(SIMPLEAF_INDEX.out.versions) @@ -51,7 +51,7 @@ workflow SCRNASEQ_ALEVIN { */ SIMPLEAF_QUANT ( ch_fastq, - salmon_index, + simpleaf_index, txp2gene, protocol, barcode_whitelist diff --git a/workflows/scrnaseq.nf b/workflows/scrnaseq.nf index 10ced221..28ae0b5b 100644 --- a/workflows/scrnaseq.nf +++ b/workflows/scrnaseq.nf @@ -68,7 +68,7 @@ workflow SCRNASEQ { kb_workflow = params.kb_workflow //salmon params - ch_salmon_index = params.salmon_index ? file(params.salmon_index) : [] + ch_simpleaf_index = params.simpleaf_index ? file(params.simpleaf_index) : [] //star params star_index = params.star_index ? file(params.star_index, checkIfExists: true) : null @@ -147,7 +147,7 @@ workflow SCRNASEQ { ch_genome_fasta, ch_filter_gtf, ch_transcript_fasta, - ch_salmon_index, + ch_simpleaf_index, ch_txp2gene, ch_barcode_whitelist, protocol_config['protocol'],