From 012f6083801dd71693ee53c7d239b0fbd915c6c9 Mon Sep 17 00:00:00 2001 From: an-altosian Date: Mon, 16 Sep 2024 03:10:46 +0000 Subject: [PATCH 1/5] update doc related to alevin-fry, simpleaf and salmon --- CITATIONS.md | 8 ++++++++ bin/emptydrops_cell_calling.R | 4 ++-- docs/output.md | 20 +++++++++++--------- docs/usage.md | 10 +++++----- nextflow.config | 2 +- nextflow_schema.json | 7 +++---- 6 files changed, 30 insertions(+), 21 deletions(-) diff --git a/CITATIONS.md b/CITATIONS.md index 867bde34..9a6c64de 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -18,6 +18,14 @@ > Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924. +- [Simpleaf](https://doi.org/10.1093/bioinformatics/btad614) + + > Dongze He, Rob Patro, simpleaf: a simple, flexible, and scalable framework for single-cell data processing using alevin-fry, Bioinformatics 39, 10 (2023). + +* [Alevin-fry](https://doi.org/10.1038/s41592-022-01408-3) + + > He, D., Zakeri, M., Sarkar, H. et al. Alevin-fry unlocks rapid, accurate and memory-frugal quantification of single-cell RNA-seq data. Nat Methods 19, 316–322 (2022). + * [Alevin](https://doi.org/10.1186/s13059-019-1670-y) > Srivastava, A., Malik, L., Smith, T. et al. Alevin efficiently estimates accurate gene abundances from dscRNA-seq data. Genome Biol 20, 65 (2019). diff --git a/bin/emptydrops_cell_calling.R b/bin/emptydrops_cell_calling.R index 23a45267..d21cba61 100755 --- a/bin/emptydrops_cell_calling.R +++ b/bin/emptydrops_cell_calling.R @@ -22,8 +22,8 @@ get_name <- function(file) { } # transpose matrices when required -# based on code of 'mtx_to_seurat.R', only the data from kallisto and alevin would require transposition -print("Only kallisto and alevin have transposed matrices.") +# based on code of 'mtx_to_seurat.R', only the data from kallisto and alevin-fry would require transposition +print("Only kallisto and alevin-fry have transposed matrices.") if (aligner %in% c( "kallisto", "alevin" )) { is_transposed <- TRUE mtx<-t(mtx) diff --git a/docs/output.md b/docs/output.md index 3ab87625..a5292336 100644 --- a/docs/output.md +++ b/docs/output.md @@ -9,20 +9,20 @@ This document describes the output produced by the pipeline. Most of the plots a The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes data using the following steps: - [nf-core/scrnaseq: Output](#nf-corescrnaseq-output) - - [:warning: Please read this documentation on the nf-core website: https://nf-co.re/scrnaseq/output](#warning-please-read-this-documentation-on-the-nf-core-website-httpsnf-corescrnaseqoutput) - [Introduction](#introduction) - [Pipeline overview](#pipeline-overview) - [FastQC](#fastqc) - - [Kallisto & Bustools Results](#kallisto--bustools-results) + - [Kallisto \& Bustools Results](#kallisto--bustools-results) - [STARsolo](#starsolo) - - [Salmon Alevin & AlevinQC](#salmon-alevin--alevinqc) + - [Salmon \& Alevin-fry \& AlevinQC](#salmon--alevin-fry--alevinqc) - [Cellranger](#cellranger) - [Cellranger ARC](#cellranger-arc) + - [Cellranger multi](#cellranger-multi) - [UniverSC](#universc) - [Custom emptydrops filter](#custom-emptydrops-filter) - [Other output data](#other-output-data) - [MultiQC](#multiqc) - - [Pipeline information](#pipeline-information) + - [Pipeline information](#pipeline-information) ## FastQC @@ -81,21 +81,23 @@ For details on how to load these into R and perform further downstream analysis, - `star_index` - Contains the index of the supplied genome fasta file -## Salmon Alevin & AlevinQC +## Salmon & Alevin-fry & AlevinQC + +This pipeline uses the simplified and flexible modules in [Simpleaf](https://simpleaf.readthedocs.io/en/latest/) for processing single-cell data with [Salmon](https://salmon.readthedocs.io/en/latest/) as the underlying mapper and [Alevin-fry](https://alevin-fry.readthedocs.io/en/latest/) as the quantification tool. For detailed examples of using the quantification results generated by Alevin-fry in downstream analyses, such as RNA-velocity, please refer to [Alevin-fry/simpleaf tutorials](https://combine-lab.github.io/alevin-fry-tutorials/#blog). **Output directory: `results/alevin`** - `alevin` - - Contains the created Salmon Alevin pseudo-aligned output + - Contains the count matrix created by Alevin-fry - `alevinqc` - - Contains the QC report for the aforementioned Salmon Alevin output data + - Contains the QC report for the aforementioned Alevin-fry output data **Output directory: `results/reference_genome`** - `salmon_index` - - Contains the indexed reference transcriptome for Salmon Alevin + - Contains the indexed reference transcriptome for the Salmon mapper - `alevin/txp2gene.tsv` - - The transcriptome to gene mapping TSV file utilized by Salmon Alevin + - The transcriptome to gene mapping TSV file utilized by Alevin-fry ## Cellranger diff --git a/docs/usage.md b/docs/usage.md index 499e404d..1c8be75b 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -39,7 +39,7 @@ An [example samplesheet](../assets/samplesheet.csv) has been provided with the p This parameter is currently supported by -- [Salmon Alevin](https://salmon.readthedocs.io/en/latest/alevin.html#expectcells) +- [Alevin-fry](https://alevin-fry.readthedocs.io/en/latest/generate_permit_list.html#:~:text=procedure%20described%20above.-,%2D%2Dexpect%2Dcells,-%3Cncells%3E%3A%20This) - [STARsolo](https://github.com/alexdobin/STAR/blob/master/docs/STARsolo.md) - [Cellranger](https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/what-is-cell-ranger) @@ -47,7 +47,7 @@ Note that since cellranger v7, it is **not recommended** anymore to supply the ` ## Aligning options -By default, the pipeline uses [Salmon Alevin](https://salmon.readthedocs.io/en/latest/alevin.html) (i.e. --aligner alevin) to perform pseudo-alignment of reads to the reference genome and to perform the downstream BAM-level quantification. Then QC reports are generated with AlevinQC. +By default (i.e. `--aligner alevin`), the pipeline uses [Salmon](https://salmon.readthedocs.io/en/latest/) to perform pseudo-alignment of reads to the reference genome and [Alevin-fry](https://alevin-fry.readthedocs.io/en/latest/) to perform the downstream BAM-level quantification. Then QC reports are generated with AlevinQC. Other aligner options for running the pipeline are: @@ -100,11 +100,11 @@ The command `kb --list` shows all supported, preconfigured protocols. Additional For more details, please refer to the [Kallisto/bustools documentation](https://pachterlab.github.io/kallisto/manual#bus). -#### Alevin/fry +#### Alevin-fry -Alevin/fry also supports custom chemistries in a slighly different format, e.g. `1{b[16]u[12]x:}2{r:}`. +Simpleaf has the ability to pass custom chemistries to Alevin-fry, in a slighly different format, e.g. `1{b[16]u[12]x:}2{r:}`. -For more details, see the [simpleaf documentation](https://simpleaf.readthedocs.io/en/latest/quant-command.html#a-note-on-the-chemistry-flag) +For more details, see Simpleaf's paper, [He _et al._ 2023](https://doi.org/10.1093/bioinformatics/btad614). #### UniverSC diff --git a/nextflow.config b/nextflow.config index 70753bea..c130fcc7 100644 --- a/nextflow.config +++ b/nextflow.config @@ -22,7 +22,7 @@ params { fasta = null gtf = null - // salmon alevin parameters (simpleaf) + // alevin-fry parameters (simpleaf) simpleaf_rlen = 91 barcode_whitelist = null salmon_index = null diff --git a/nextflow_schema.json b/nextflow_schema.json index e5fb71b5..811efdde 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -59,7 +59,7 @@ "type": "string", "description": "Name of the tool to use for scRNA (pseudo-) alignment.", "default": "alevin", - "help_text": "The workflow can handle three types of methods:\n\n- Kallisto/Bustools\n- Salmon Alevin + AlevinQC\n- STARsolo\n\nTo choose which one to use, please specify either `alevin`, `star` or `kallisto` as a parameter option for `--aligner`. By default, the pipeline runs the `alevin` option. Note that specifying another aligner option also requires choosing appropriate parameters (see below) for the selected option.", + "help_text": "The workflow can handle three types of methods:\n\n- Kallisto/Bustools\n- Salmon + Alevin-fry + AlevinQC\n- STARsolo\n\nTo choose which one to use, please specify either `alevin`, `star` or `kallisto` as a parameter option for `--aligner`. By default, the pipeline runs the `alevin` option. Note that specifying another aligner option also requires choosing appropriate parameters (see below) for the selected option.", "fa_icon": "fas fa-align-center", "enum": ["kallisto", "star", "alevin", "cellranger", "universc", "cellrangerarc", "cellrangermulti"] }, @@ -153,7 +153,7 @@ } }, "alevin_options": { - "title": "Alevin Options", + "title": "Alevin-fry Options", "type": "object", "description": "", "default": "", @@ -167,8 +167,7 @@ }, "txp2gene": { "type": "string", - "description": "Path to transcript to gene mapping file. This allows the specification of a transcript to gene mapping file for Salmon Alevin and AlevinQC.", - "help_text": "> This is only used by the Salmon Alevin workflow.", + "description": "Path to transcript to gene mapping file. This allows the specification of a transcript to gene mapping file for Alevin-fry and AlevinQC.", "fa_icon": "fas fa-map-marked-alt", "format": "file-path", "exists": true From c02a0545f13c6c579f1e2b3716999458a3d1e252 Mon Sep 17 00:00:00 2001 From: an-altosian Date: Mon, 16 Sep 2024 03:22:40 +0000 Subject: [PATCH 2/5] update simpleaf reference --- CITATIONS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CITATIONS.md b/CITATIONS.md index 9a6c64de..8c2045bf 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -20,7 +20,7 @@ - [Simpleaf](https://doi.org/10.1093/bioinformatics/btad614) - > Dongze He, Rob Patro, simpleaf: a simple, flexible, and scalable framework for single-cell data processing using alevin-fry, Bioinformatics 39, 10 (2023). + > He, D., Patro, R. simpleaf: a simple, flexible, and scalable framework for single-cell data processing using alevin-fry, Bioinformatics 39, 10 (2023). * [Alevin-fry](https://doi.org/10.1038/s41592-022-01408-3) From f2c1a242d76902caad33fe629c02d15fdbf464e9 Mon Sep 17 00:00:00 2001 From: an-altosian Date: Mon, 16 Sep 2024 03:28:12 +0000 Subject: [PATCH 3/5] change aligner name back to alevin --- bin/emptydrops_cell_calling.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/emptydrops_cell_calling.R b/bin/emptydrops_cell_calling.R index d21cba61..5c406369 100755 --- a/bin/emptydrops_cell_calling.R +++ b/bin/emptydrops_cell_calling.R @@ -23,7 +23,7 @@ get_name <- function(file) { # transpose matrices when required # based on code of 'mtx_to_seurat.R', only the data from kallisto and alevin-fry would require transposition -print("Only kallisto and alevin-fry have transposed matrices.") +print("Only kallisto and alevin have transposed matrices.") if (aligner %in% c( "kallisto", "alevin" )) { is_transposed <- TRUE mtx<-t(mtx) From 77f91f770fe7585593ef9367d74d664a8192229e Mon Sep 17 00:00:00 2001 From: an-altosian Date: Mon, 16 Sep 2024 03:28:40 +0000 Subject: [PATCH 4/5] change aligner name back to alevin --- bin/emptydrops_cell_calling.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/emptydrops_cell_calling.R b/bin/emptydrops_cell_calling.R index 5c406369..23a45267 100755 --- a/bin/emptydrops_cell_calling.R +++ b/bin/emptydrops_cell_calling.R @@ -22,7 +22,7 @@ get_name <- function(file) { } # transpose matrices when required -# based on code of 'mtx_to_seurat.R', only the data from kallisto and alevin-fry would require transposition +# based on code of 'mtx_to_seurat.R', only the data from kallisto and alevin would require transposition print("Only kallisto and alevin have transposed matrices.") if (aligner %in% c( "kallisto", "alevin" )) { is_transposed <- TRUE From 973508b2abec263d43006ac9e6574a43f65ef124 Mon Sep 17 00:00:00 2001 From: an-altosian Date: Mon, 16 Sep 2024 03:35:26 +0000 Subject: [PATCH 5/5] make prettier happy --- CITATIONS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CITATIONS.md b/CITATIONS.md index 8c2045bf..a9fa4e2f 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -19,7 +19,7 @@ > Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924. - [Simpleaf](https://doi.org/10.1093/bioinformatics/btad614) - + > He, D., Patro, R. simpleaf: a simple, flexible, and scalable framework for single-cell data processing using alevin-fry, Bioinformatics 39, 10 (2023). * [Alevin-fry](https://doi.org/10.1038/s41592-022-01408-3)