From 1dbfc3d3e0a67605ddada87da5db2555f80ad3b1 Mon Sep 17 00:00:00 2001 From: Felipe Marques de Almeida Date: Fri, 17 Jun 2022 09:17:19 +0200 Subject: [PATCH 001/165] Create mtx_to_h5ad.py Script to convert matrix --- bin/mtx_to_h5ad.py | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100755 bin/mtx_to_h5ad.py diff --git a/bin/mtx_to_h5ad.py b/bin/mtx_to_h5ad.py new file mode 100755 index 00000000..5055885c --- /dev/null +++ b/bin/mtx_to_h5ad.py @@ -0,0 +1,39 @@ +#!/usr/bin/env python3 +import scanpy as sc +import argparse + +def mtx_to_adata( + mtx_file: str, barcode_file: str, feature_file: str, verbose: bool = False +): + + if verbose: + print("Reading in {}".format(mtx_file)) + + adata = sc.read_mtx(mtx_file) + adata.obs_names = pd.read_csv(barcode_file, header=None)[0].values + adata.var_names = pd.read_csv(feature_file, header=None)[0].values + + return adata + + +if __name__ == "__main__": + + parser = argparse.ArgumentParser(description="Converts mtx output to h5ad.") + + parser.add_argument("-m", "--mtx", dest="mtx", help="Path to mtx file.") + parser.add_argument( + "-v", "--verbose", dest="verbose", help="Toggle verbose messages", default=False + ) + parser.add_argument("-f", "--feature", dest="feature", help="Path to feature file.") + parser.add_argument("-b", "--barcode", dest="barcode", help="Path to barcode file.") + parser.add_argument("-o", "--out", dest="out", help="Output path.") + + args = vars(parser.parse_args()) + + adata = mtx_to_adata( + args["mtx"], args["barcode"], args["feature"], verbose=args["verbose"] + ) + + adata.write_h5ad(args["out"]) + + print("Wrote h5ad file to {}".format(args["out"])) \ No newline at end of file From 733ef50acd2971a6069e0f22945fdee24f058941 Mon Sep 17 00:00:00 2001 From: Felipe Marques de Almeida Date: Fri, 17 Jun 2022 09:56:22 +0200 Subject: [PATCH 002/165] adding convertion module --- modules/local/mtx_to_h5ad.nf | 25 +++++++++++++++++++++++++ subworkflows/local/align_cellranger.nf | 6 ++++++ 2 files changed, 31 insertions(+) create mode 100644 modules/local/mtx_to_h5ad.nf diff --git a/modules/local/mtx_to_h5ad.nf b/modules/local/mtx_to_h5ad.nf new file mode 100644 index 00000000..995005c8 --- /dev/null +++ b/modules/local/mtx_to_h5ad.nf @@ -0,0 +1,25 @@ +process MTX_TO_H5AD { + tag "$prefix" + label 'process_medium' + + conda (params.enable_conda ? "conda-forge::scanpy conda-forge::python-igraph conda-forge::leidenalg" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'docker://gcfntnu/scanpy:1.7.0' : + 'gcfntnu/scanpy:1.7.0' }" + + input: + path cellranger_outdir + + output: + path "matrix.h5ad", emit: h5ad + + script: + def prefix = cellranger_outdir.getName().toString() + """ + mtx_to_h5ad.py \\ + -m \$(find ${cellranger_outdir} -wholename "*filtered_feature_bc_matrix/matrix.mtx.gz") \\ + -f \$(find ${cellranger_outdir} -wholename "*filtered_feature_bc_matrix/features.tsv.gz") \\ + -b \$(find ${cellranger_outdir} -wholename "*filtered_feature_bc_matrix/barcodes.tsv.gz") \\ + -o matrix.h5ad + """ +} diff --git a/subworkflows/local/align_cellranger.nf b/subworkflows/local/align_cellranger.nf index 7513935d..05bf0e4d 100644 --- a/subworkflows/local/align_cellranger.nf +++ b/subworkflows/local/align_cellranger.nf @@ -5,6 +5,7 @@ include {CELLRANGER_MKGTF} from "../../modules/nf-core/modules/cellranger/mkgtf/main.nf" include {CELLRANGER_MKREF} from "../../modules/nf-core/modules/cellranger/mkref/main.nf" include {CELLRANGER_COUNT} from "../../modules/nf-core/modules/cellranger/count/main.nf" +include {MTX_TO_H5AD } from "../../modules/local/mtx_to_h5ad.nf" // Define workflow to subset and index a genome region fasta file workflow CELLRANGER_ALIGN { @@ -40,6 +41,11 @@ workflow CELLRANGER_ALIGN { ) ch_versions = ch_versions.mix(CELLRANGER_COUNT.out.versions) + // Convert matrix do h5ad + MTX_TO_H5AD ( + CELLRANGER_COUNT.out.outs + ) + emit: ch_versions cellranger_out = CELLRANGER_COUNT.out.outs From 7c1ba78dcaf58f76b51f27399a0f3164ad7d4f46 Mon Sep 17 00:00:00 2001 From: Felipe Marques de Almeida Date: Fri, 17 Jun 2022 10:25:29 +0200 Subject: [PATCH 003/165] Update modules.config add publish dir directive to new module --- conf/modules.config | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/conf/modules.config b/conf/modules.config index 5c78b365..c1d00e4b 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -55,6 +55,12 @@ if(params.aligner == "cellranger") { mode: params.publish_dir_mode ] } + withName: MTX_TO_H5AD { + publishDir = [ + path: "${params.outdir}/cellranger/mtx_to_h5ad", + mode: params.publish_dir_mode + ] + } } } From e06d4888cf0262598a1f969d5543150d9a511abc Mon Sep 17 00:00:00 2001 From: Felipe Marques de Almeida Date: Fri, 17 Jun 2022 13:30:04 +0200 Subject: [PATCH 004/165] Update mtx_to_h5ad.py adding pandas import --- bin/mtx_to_h5ad.py | 1 + 1 file changed, 1 insertion(+) diff --git a/bin/mtx_to_h5ad.py b/bin/mtx_to_h5ad.py index 5055885c..0868eb37 100755 --- a/bin/mtx_to_h5ad.py +++ b/bin/mtx_to_h5ad.py @@ -1,5 +1,6 @@ #!/usr/bin/env python3 import scanpy as sc +import pandas as pd import argparse def mtx_to_adata( From 336ea7941ffe6546fe1a1cff8564a016fe1c67fd Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Fri, 17 Jun 2022 13:55:05 +0200 Subject: [PATCH 005/165] Bump dev --- CHANGELOG.md | 2 +- nextflow.config | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ae4f9149..92e1d6fc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## v2.0dev - +## v2.0.0 - 2022-06-17 "Gray Nickel Beagle" - Pipeline ported to dsl2 - Template update with latest nf-core/tools v2.1 diff --git a/nextflow.config b/nextflow.config index ad7d4ba6..c986adb8 100644 --- a/nextflow.config +++ b/nextflow.config @@ -198,7 +198,7 @@ manifest { description = 'Pipeline for processing of 10xGenomics single cell rnaseq data' mainScript = 'main.nf' nextflowVersion = '!>=21.10.3' - version = '2.0.0' + version = '2.0.1dev' } // Load modules.config for DSL2 module specific options From ab35513172027336f45f1e02e03cc477ba925f0b Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Fri, 17 Jun 2022 13:56:40 +0200 Subject: [PATCH 006/165] Adjust changelog for dev --- CHANGELOG.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 92e1d6fc..e60b3b56 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,13 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## v2.0.1dev + + +### Fixes + + + ## v2.0.0 - 2022-06-17 "Gray Nickel Beagle" - Pipeline ported to dsl2 From bd2c1e5dc8b65ee7a03931d57ebc75df3902e2c4 Mon Sep 17 00:00:00 2001 From: Felipe Marques de Almeida Date: Fri, 17 Jun 2022 14:01:42 +0200 Subject: [PATCH 007/165] Update mtx_to_h5ad.py made it simpler --- bin/mtx_to_h5ad.py | 25 +++++++------------------ 1 file changed, 7 insertions(+), 18 deletions(-) diff --git a/bin/mtx_to_h5ad.py b/bin/mtx_to_h5ad.py index 0868eb37..95390271 100755 --- a/bin/mtx_to_h5ad.py +++ b/bin/mtx_to_h5ad.py @@ -1,18 +1,13 @@ #!/usr/bin/env python3 import scanpy as sc -import pandas as pd import argparse -def mtx_to_adata( - mtx_file: str, barcode_file: str, feature_file: str, verbose: bool = False -): +def mtx_to_adata( mtx_dir: str, verbose: bool = False ): if verbose: - print("Reading in {}".format(mtx_file)) + print("Reading in {}".format(mtx_dir)) - adata = sc.read_mtx(mtx_file) - adata.obs_names = pd.read_csv(barcode_file, header=None)[0].values - adata.var_names = pd.read_csv(feature_file, header=None)[0].values + adata = sc.read_10x_mtx(mtx_dir) return adata @@ -21,19 +16,13 @@ def mtx_to_adata( parser = argparse.ArgumentParser(description="Converts mtx output to h5ad.") - parser.add_argument("-m", "--mtx", dest="mtx", help="Path to mtx file.") - parser.add_argument( - "-v", "--verbose", dest="verbose", help="Toggle verbose messages", default=False - ) - parser.add_argument("-f", "--feature", dest="feature", help="Path to feature file.") - parser.add_argument("-b", "--barcode", dest="barcode", help="Path to barcode file.") - parser.add_argument("-o", "--out", dest="out", help="Output path.") + parser.add_argument("-m", "--mtx", dest="mtx", help="Path to mtx directory." ) + parser.add_argument("-v", "--verbose", dest="verbose", help="Toggle verbose messages", default=False ) + parser.add_argument("-o", "--out", dest="out", help="Output path." ) args = vars(parser.parse_args()) - adata = mtx_to_adata( - args["mtx"], args["barcode"], args["feature"], verbose=args["verbose"] - ) + adata = mtx_to_adata(args["mtx"], verbose=args["verbose"]) adata.write_h5ad(args["out"]) From bba06408ce7499c0d698b1b3ffc38cf9cf8c4189 Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Fri, 17 Jun 2022 14:02:52 +0200 Subject: [PATCH 008/165] Add grst, fix badges --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index aec34ff7..184a0d38 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ [![GitHub Actions CI Status](https://github.com/nf-core/scrnaseq/workflows/nf-core%20CI/badge.svg)](https://github.com/nf-core/scrnaseq/actions?query=workflow%3A%22nf-core+CI%22) [![GitHub Actions Linting Status](https://github.com/nf-core/scrnaseq/workflows/nf-core%20linting/badge.svg)](https://github.com/nf-core/scrnaseq/actions?query=workflow%3A%22nf-core+linting%22) [![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?logo=Amazon%20AWS)](https://nf-co.re/scrnaseq/results) -[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8)](https://doi.org/10.5281/zenodo.XXXXXXX) +[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.6656322)](https://doi.org/10.5281/zenodo.6656322) [![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A521.10.3-23aa62.svg)](https://www.nextflow.io/) [![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?logo=anaconda)](https://docs.conda.io/en/latest/) @@ -71,6 +71,7 @@ We thank the following people for their extensive assistance in the development - @KevinMenden - @ggabernet +- @grst - @FloWuenne - @fmalmeida From b1d4a45b0dab2d903dc438a99df9100ff4aa0dde Mon Sep 17 00:00:00 2001 From: Felipe Marques de Almeida Date: Fri, 17 Jun 2022 14:03:15 +0200 Subject: [PATCH 009/165] Update mtx_to_h5ad.nf fit in to new script --- modules/local/mtx_to_h5ad.nf | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/modules/local/mtx_to_h5ad.nf b/modules/local/mtx_to_h5ad.nf index 995005c8..f32d3bc8 100644 --- a/modules/local/mtx_to_h5ad.nf +++ b/modules/local/mtx_to_h5ad.nf @@ -17,9 +17,7 @@ process MTX_TO_H5AD { def prefix = cellranger_outdir.getName().toString() """ mtx_to_h5ad.py \\ - -m \$(find ${cellranger_outdir} -wholename "*filtered_feature_bc_matrix/matrix.mtx.gz") \\ - -f \$(find ${cellranger_outdir} -wholename "*filtered_feature_bc_matrix/features.tsv.gz") \\ - -b \$(find ${cellranger_outdir} -wholename "*filtered_feature_bc_matrix/barcodes.tsv.gz") \\ + -m ${cellranger_outdir}/outs/filtered_feature_bc_matrix/matrix.mtx.gz") \\ -o matrix.h5ad """ } From df36e97099efd7e09169d5937c8d32af7f2a13c6 Mon Sep 17 00:00:00 2001 From: nf-core-bot Date: Fri, 17 Jun 2022 12:04:39 +0000 Subject: [PATCH 010/165] [automated] Fix linting with Prettier --- CHANGELOG.md | 3 --- 1 file changed, 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e60b3b56..68229e01 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,11 +5,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## v2.0.1dev - ### Fixes - - ## v2.0.0 - 2022-06-17 "Gray Nickel Beagle" - Pipeline ported to dsl2 From 5a38db3cd70d51bd5587d1712f8615115f11bdbe Mon Sep 17 00:00:00 2001 From: Felipe Marques de Almeida Date: Fri, 17 Jun 2022 17:08:25 +0200 Subject: [PATCH 011/165] Update mtx_to_h5ad.nf --- modules/local/mtx_to_h5ad.nf | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/modules/local/mtx_to_h5ad.nf b/modules/local/mtx_to_h5ad.nf index f32d3bc8..16abbf45 100644 --- a/modules/local/mtx_to_h5ad.nf +++ b/modules/local/mtx_to_h5ad.nf @@ -1,5 +1,5 @@ process MTX_TO_H5AD { - tag "$prefix" + //tag "$prefix" label 'process_medium' conda (params.enable_conda ? "conda-forge::scanpy conda-forge::python-igraph conda-forge::leidenalg" : null) @@ -14,8 +14,9 @@ process MTX_TO_H5AD { path "matrix.h5ad", emit: h5ad script: - def prefix = cellranger_outdir.getName().toString() + //def prefix = cellranger_outdir.getName().toString() """ + ls ${cellranger_outdir} > ls.txt mtx_to_h5ad.py \\ -m ${cellranger_outdir}/outs/filtered_feature_bc_matrix/matrix.mtx.gz") \\ -o matrix.h5ad From bc80ba39a5a001b37bd5959a2f909b1646a0ed57 Mon Sep 17 00:00:00 2001 From: Felipe Marques de Almeida Date: Fri, 17 Jun 2022 17:14:11 +0200 Subject: [PATCH 012/165] Update align_cellranger.nf --- subworkflows/local/align_cellranger.nf | 1 + 1 file changed, 1 insertion(+) diff --git a/subworkflows/local/align_cellranger.nf b/subworkflows/local/align_cellranger.nf index 05bf0e4d..de459ba3 100644 --- a/subworkflows/local/align_cellranger.nf +++ b/subworkflows/local/align_cellranger.nf @@ -42,6 +42,7 @@ workflow CELLRANGER_ALIGN { ch_versions = ch_versions.mix(CELLRANGER_COUNT.out.versions) // Convert matrix do h5ad + CELLRANGER_COUNT.out.outs.collect().view() MTX_TO_H5AD ( CELLRANGER_COUNT.out.outs ) From 350ef5da28dc58bfe9383a64df83ed524bafff80 Mon Sep 17 00:00:00 2001 From: Felipe Marques de Almeida Date: Fri, 17 Jun 2022 19:58:45 +0200 Subject: [PATCH 013/165] trying to get prefix --- modules/local/mtx_to_h5ad.nf | 7 +++---- subworkflows/local/align_cellranger.nf | 1 - 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/modules/local/mtx_to_h5ad.nf b/modules/local/mtx_to_h5ad.nf index 16abbf45..77b68cc5 100644 --- a/modules/local/mtx_to_h5ad.nf +++ b/modules/local/mtx_to_h5ad.nf @@ -1,5 +1,5 @@ process MTX_TO_H5AD { - //tag "$prefix" + tag "$prefix" label 'process_medium' conda (params.enable_conda ? "conda-forge::scanpy conda-forge::python-igraph conda-forge::leidenalg" : null) @@ -14,11 +14,10 @@ process MTX_TO_H5AD { path "matrix.h5ad", emit: h5ad script: - //def prefix = cellranger_outdir.getName().toString() + def prefix = cellranger_outdir[0].getName().toString()[-3] """ - ls ${cellranger_outdir} > ls.txt mtx_to_h5ad.py \\ - -m ${cellranger_outdir}/outs/filtered_feature_bc_matrix/matrix.mtx.gz") \\ + -m filtered_feature_bc_matrix \\ -o matrix.h5ad """ } diff --git a/subworkflows/local/align_cellranger.nf b/subworkflows/local/align_cellranger.nf index de459ba3..05bf0e4d 100644 --- a/subworkflows/local/align_cellranger.nf +++ b/subworkflows/local/align_cellranger.nf @@ -42,7 +42,6 @@ workflow CELLRANGER_ALIGN { ch_versions = ch_versions.mix(CELLRANGER_COUNT.out.versions) // Convert matrix do h5ad - CELLRANGER_COUNT.out.outs.collect().view() MTX_TO_H5AD ( CELLRANGER_COUNT.out.outs ) From 840f35f22459fcd04f63dad71269c47d87911336 Mon Sep 17 00:00:00 2001 From: Felipe Marques de Almeida Date: Fri, 17 Jun 2022 20:04:28 +0200 Subject: [PATCH 014/165] Update align_cellranger.nf checking map definition --- subworkflows/local/align_cellranger.nf | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/subworkflows/local/align_cellranger.nf b/subworkflows/local/align_cellranger.nf index 05bf0e4d..19cb6e47 100644 --- a/subworkflows/local/align_cellranger.nf +++ b/subworkflows/local/align_cellranger.nf @@ -42,6 +42,10 @@ workflow CELLRANGER_ALIGN { ch_versions = ch_versions.mix(CELLRANGER_COUNT.out.versions) // Convert matrix do h5ad + CELLRANGER_COUNT.out.outs.map{ inputs -> + prefix = inputs[0].toString()[-3] + [ prefix, inputs ] + }.view() MTX_TO_H5AD ( CELLRANGER_COUNT.out.outs ) From 8099ce88090a29cdab8930577377b9a1c36f591e Mon Sep 17 00:00:00 2001 From: Felipe Marques de Almeida Date: Fri, 17 Jun 2022 20:12:16 +0200 Subject: [PATCH 015/165] input receives a prefix value --- modules/local/mtx_to_h5ad.nf | 3 +-- subworkflows/local/align_cellranger.nf | 9 ++++----- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/modules/local/mtx_to_h5ad.nf b/modules/local/mtx_to_h5ad.nf index 77b68cc5..15cf2021 100644 --- a/modules/local/mtx_to_h5ad.nf +++ b/modules/local/mtx_to_h5ad.nf @@ -8,13 +8,12 @@ process MTX_TO_H5AD { 'gcfntnu/scanpy:1.7.0' }" input: - path cellranger_outdir + tuple val(prefix), path(cellranger_outdir) output: path "matrix.h5ad", emit: h5ad script: - def prefix = cellranger_outdir[0].getName().toString()[-3] """ mtx_to_h5ad.py \\ -m filtered_feature_bc_matrix \\ diff --git a/subworkflows/local/align_cellranger.nf b/subworkflows/local/align_cellranger.nf index 19cb6e47..cc12f251 100644 --- a/subworkflows/local/align_cellranger.nf +++ b/subworkflows/local/align_cellranger.nf @@ -42,12 +42,11 @@ workflow CELLRANGER_ALIGN { ch_versions = ch_versions.mix(CELLRANGER_COUNT.out.versions) // Convert matrix do h5ad - CELLRANGER_COUNT.out.outs.map{ inputs -> - prefix = inputs[0].toString()[-3] - [ prefix, inputs ] - }.view() MTX_TO_H5AD ( - CELLRANGER_COUNT.out.outs + CELLRANGER_COUNT.out.outs.map{ inputs -> + prefix = inputs[0].toString()[-3] + [ prefix, inputs ] + } ) emit: From ef6e8b300ea5ef38d13fa43af961f47453353bf1 Mon Sep 17 00:00:00 2001 From: Felipe Marques de Almeida Date: Fri, 17 Jun 2022 20:13:17 +0200 Subject: [PATCH 016/165] Update align_cellranger.nf add tokenize --- subworkflows/local/align_cellranger.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/local/align_cellranger.nf b/subworkflows/local/align_cellranger.nf index cc12f251..504049ac 100644 --- a/subworkflows/local/align_cellranger.nf +++ b/subworkflows/local/align_cellranger.nf @@ -44,7 +44,7 @@ workflow CELLRANGER_ALIGN { // Convert matrix do h5ad MTX_TO_H5AD ( CELLRANGER_COUNT.out.outs.map{ inputs -> - prefix = inputs[0].toString()[-3] + prefix = inputs[0].toString().tokenize('/')[-3] [ prefix, inputs ] } ) From af186a49ead42fe309d3bc3292a3075fc93d8031 Mon Sep 17 00:00:00 2001 From: Felipe Marques de Almeida Date: Fri, 17 Jun 2022 20:14:39 +0200 Subject: [PATCH 017/165] uses prefix from module --- conf/modules.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/modules.config b/conf/modules.config index c1d00e4b..ede9a3d7 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -57,7 +57,7 @@ if(params.aligner == "cellranger") { } withName: MTX_TO_H5AD { publishDir = [ - path: "${params.outdir}/cellranger/mtx_to_h5ad", + path: "${params.outdir}/cellranger/count/${prefix}", mode: params.publish_dir_mode ] } From 6bcaea181ea4df980ba97eacde04788849e0e644 Mon Sep 17 00:00:00 2001 From: Felipe Marques de Almeida Date: Fri, 17 Jun 2022 20:18:31 +0200 Subject: [PATCH 018/165] fix publishing dir --- conf/modules.config | 2 +- modules/local/mtx_to_h5ad.nf | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index ede9a3d7..7478e76f 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -57,7 +57,7 @@ if(params.aligner == "cellranger") { } withName: MTX_TO_H5AD { publishDir = [ - path: "${params.outdir}/cellranger/count/${prefix}", + path: "${params.outdir}/cellranger/count", mode: params.publish_dir_mode ] } diff --git a/modules/local/mtx_to_h5ad.nf b/modules/local/mtx_to_h5ad.nf index 15cf2021..77d52084 100644 --- a/modules/local/mtx_to_h5ad.nf +++ b/modules/local/mtx_to_h5ad.nf @@ -15,8 +15,9 @@ process MTX_TO_H5AD { script: """ + mkdir -p ${prefix} mtx_to_h5ad.py \\ -m filtered_feature_bc_matrix \\ - -o matrix.h5ad + -o ${prefix}/matrix.h5ad """ } From c47b0dacafac55b7fe32d526b9f7aed3b08bae9d Mon Sep 17 00:00:00 2001 From: Felipe Marques de Almeida Date: Fri, 17 Jun 2022 20:20:56 +0200 Subject: [PATCH 019/165] differentiate prefixes --- modules/local/mtx_to_h5ad.nf | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/modules/local/mtx_to_h5ad.nf b/modules/local/mtx_to_h5ad.nf index 77d52084..57500486 100644 --- a/modules/local/mtx_to_h5ad.nf +++ b/modules/local/mtx_to_h5ad.nf @@ -8,16 +8,17 @@ process MTX_TO_H5AD { 'gcfntnu/scanpy:1.7.0' }" input: - tuple val(prefix), path(cellranger_outdir) + tuple val(cellranger_prefix), path(cellranger_outdir) output: path "matrix.h5ad", emit: h5ad script: + def prefix = cellranger_prefix.tokenize('-')[1] """ - mkdir -p ${prefix} + mkdir -p ${cellranger_prefix} mtx_to_h5ad.py \\ -m filtered_feature_bc_matrix \\ - -o ${prefix}/matrix.h5ad + -o ${cellranger_prefix}/matrix.h5ad """ } From 1814f4f46f74924be09f2203e07b25b08f789c2d Mon Sep 17 00:00:00 2001 From: Felipe Marques de Almeida Date: Fri, 17 Jun 2022 20:23:06 +0200 Subject: [PATCH 020/165] saving values in meta map --- modules/local/mtx_to_h5ad.nf | 11 +++++------ subworkflows/local/align_cellranger.nf | 6 ++++-- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/modules/local/mtx_to_h5ad.nf b/modules/local/mtx_to_h5ad.nf index 57500486..781fffa9 100644 --- a/modules/local/mtx_to_h5ad.nf +++ b/modules/local/mtx_to_h5ad.nf @@ -1,5 +1,5 @@ process MTX_TO_H5AD { - tag "$prefix" + tag "$meta.id" label 'process_medium' conda (params.enable_conda ? "conda-forge::scanpy conda-forge::python-igraph conda-forge::leidenalg" : null) @@ -8,17 +8,16 @@ process MTX_TO_H5AD { 'gcfntnu/scanpy:1.7.0' }" input: - tuple val(cellranger_prefix), path(cellranger_outdir) + tuple val(meta), path(cellranger_outdir) output: - path "matrix.h5ad", emit: h5ad + path "${meta.cellranger_prefix}", emit: h5ad script: - def prefix = cellranger_prefix.tokenize('-')[1] """ - mkdir -p ${cellranger_prefix} + mkdir -p ${meta.cellranger_prefix} mtx_to_h5ad.py \\ -m filtered_feature_bc_matrix \\ - -o ${cellranger_prefix}/matrix.h5ad + -o ${meta.cellranger_prefix}/matrix.h5ad """ } diff --git a/subworkflows/local/align_cellranger.nf b/subworkflows/local/align_cellranger.nf index 504049ac..ac00659f 100644 --- a/subworkflows/local/align_cellranger.nf +++ b/subworkflows/local/align_cellranger.nf @@ -44,8 +44,10 @@ workflow CELLRANGER_ALIGN { // Convert matrix do h5ad MTX_TO_H5AD ( CELLRANGER_COUNT.out.outs.map{ inputs -> - prefix = inputs[0].toString().tokenize('/')[-3] - [ prefix, inputs ] + meta = [:] + meta.cellranger_prefix = inputs[0].toString().tokenize('/')[-3] + meta.id = meta.cellranger_prefix.tokenize('-')[1] + [ meta, inputs ] } ) From 220300927dc6eb6383880eb375daebfee7c9d6f1 Mon Sep 17 00:00:00 2001 From: Felipe Marques de Almeida Date: Fri, 17 Jun 2022 20:34:45 +0200 Subject: [PATCH 021/165] changing other images --- modules/local/kallistobustools_count.nf | 2 +- modules/local/mtx_to_h5ad.nf | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/local/kallistobustools_count.nf b/modules/local/kallistobustools_count.nf index 7f3dabff..382de329 100644 --- a/modules/local/kallistobustools_count.nf +++ b/modules/local/kallistobustools_count.nf @@ -4,7 +4,7 @@ process KALLISTOBUSTOOLS_COUNT { conda (params.enable_conda ? "bioconda::kb-python=0.25.1" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/kb-python:0.25.1--py_0' : + 'https://depot.galaxyproject.org/singularity/scanpy-scripts:1.1.6--pypyhdfd78af_0' : 'quay.io/biocontainers/kb-python:0.25.1--py_0' }" input: diff --git a/modules/local/mtx_to_h5ad.nf b/modules/local/mtx_to_h5ad.nf index 781fffa9..58bf8f57 100644 --- a/modules/local/mtx_to_h5ad.nf +++ b/modules/local/mtx_to_h5ad.nf @@ -5,7 +5,7 @@ process MTX_TO_H5AD { conda (params.enable_conda ? "conda-forge::scanpy conda-forge::python-igraph conda-forge::leidenalg" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'docker://gcfntnu/scanpy:1.7.0' : - 'gcfntnu/scanpy:1.7.0' }" + 'quay.io/biocontainers/scanpy-scripts:1.1.6--pypyhdfd78af_0' }" input: tuple val(meta), path(cellranger_outdir) From d7ad56dadaebfab093a0513054fd8d3f0e686901 Mon Sep 17 00:00:00 2001 From: Felipe Marques de Almeida Date: Sun, 19 Jun 2022 12:39:44 +0200 Subject: [PATCH 022/165] Update mtx_to_h5ad.nf changing to biocontainers images --- modules/local/mtx_to_h5ad.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/local/mtx_to_h5ad.nf b/modules/local/mtx_to_h5ad.nf index 58bf8f57..4e7bd535 100644 --- a/modules/local/mtx_to_h5ad.nf +++ b/modules/local/mtx_to_h5ad.nf @@ -4,8 +4,8 @@ process MTX_TO_H5AD { conda (params.enable_conda ? "conda-forge::scanpy conda-forge::python-igraph conda-forge::leidenalg" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'docker://gcfntnu/scanpy:1.7.0' : - 'quay.io/biocontainers/scanpy-scripts:1.1.6--pypyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/scanpy:1.7.2--pyhdfd78af_0' : + 'quay.io/biocontainers/scanpy:1.7.2--pyhdfd78af_0' }" input: tuple val(meta), path(cellranger_outdir) From fac8fcaae4a21ad0f9d3caca7478afb914260f62 Mon Sep 17 00:00:00 2001 From: Felipe Marques de Almeida Date: Sun, 19 Jun 2022 13:49:05 +0200 Subject: [PATCH 023/165] updating publish dir directive --- conf/modules.config | 2 +- modules/local/mtx_to_h5ad.nf | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 7478e76f..fad74332 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -57,7 +57,7 @@ if(params.aligner == "cellranger") { } withName: MTX_TO_H5AD { publishDir = [ - path: "${params.outdir}/cellranger/count", + path: "${params.outdir}/cellranger/count/${meta.cellranger_prefix}/filtered_feature_bc_matrix", mode: params.publish_dir_mode ] } diff --git a/modules/local/mtx_to_h5ad.nf b/modules/local/mtx_to_h5ad.nf index 4e7bd535..3c7631fd 100644 --- a/modules/local/mtx_to_h5ad.nf +++ b/modules/local/mtx_to_h5ad.nf @@ -11,13 +11,14 @@ process MTX_TO_H5AD { tuple val(meta), path(cellranger_outdir) output: - path "${meta.cellranger_prefix}", emit: h5ad + path "matrix.h5ad.gz", emit: h5ad script: """ - mkdir -p ${meta.cellranger_prefix} mtx_to_h5ad.py \\ -m filtered_feature_bc_matrix \\ - -o ${meta.cellranger_prefix}/matrix.h5ad + -o matrix.h5ad + + gzip -c matrix.h5ad > matrix.h5ad.gz """ } From 6f66870d36a9d0165dee000c78948fafef88b664 Mon Sep 17 00:00:00 2001 From: Felipe Marques de Almeida Date: Mon, 20 Jun 2022 09:41:40 +0200 Subject: [PATCH 024/165] single quoting it to check if var can be used --- conf/modules.config | 2 +- modules/local/mtx_to_h5ad.nf | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index fad74332..47aaa942 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -57,7 +57,7 @@ if(params.aligner == "cellranger") { } withName: MTX_TO_H5AD { publishDir = [ - path: "${params.outdir}/cellranger/count/${meta.cellranger_prefix}/filtered_feature_bc_matrix", + path: '${params.outdir}/cellranger/count/${meta.cellranger_prefix}/filtered_feature_bc_matrix', mode: params.publish_dir_mode ] } diff --git a/modules/local/mtx_to_h5ad.nf b/modules/local/mtx_to_h5ad.nf index 3c7631fd..f6ac0319 100644 --- a/modules/local/mtx_to_h5ad.nf +++ b/modules/local/mtx_to_h5ad.nf @@ -8,7 +8,7 @@ process MTX_TO_H5AD { 'quay.io/biocontainers/scanpy:1.7.2--pyhdfd78af_0' }" input: - tuple val(meta), path(cellranger_outdir) + tuple val(meta), path(inputs) output: path "matrix.h5ad.gz", emit: h5ad From 11225fc3c5603f9f167af7d06998388414ccc7ba Mon Sep 17 00:00:00 2001 From: Felipe Marques de Almeida Date: Mon, 20 Jun 2022 10:02:57 +0200 Subject: [PATCH 025/165] trying to publish files in fixed place --- conf/modules.config | 2 +- modules/local/mtx_to_h5ad.nf | 10 ++++++++-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 47aaa942..7478e76f 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -57,7 +57,7 @@ if(params.aligner == "cellranger") { } withName: MTX_TO_H5AD { publishDir = [ - path: '${params.outdir}/cellranger/count/${meta.cellranger_prefix}/filtered_feature_bc_matrix', + path: "${params.outdir}/cellranger/count", mode: params.publish_dir_mode ] } diff --git a/modules/local/mtx_to_h5ad.nf b/modules/local/mtx_to_h5ad.nf index f6ac0319..b81309d0 100644 --- a/modules/local/mtx_to_h5ad.nf +++ b/modules/local/mtx_to_h5ad.nf @@ -8,17 +8,23 @@ process MTX_TO_H5AD { 'quay.io/biocontainers/scanpy:1.7.2--pyhdfd78af_0' }" input: + // inputs from cellranger nf-core module does not come in a single sample dir + // for each sample, the sub-folders and files come directly in array. tuple val(meta), path(inputs) output: - path "matrix.h5ad.gz", emit: h5ad + path "${meta.cellranger_prefix}/outs/filtered_feature_bc_matrix/matrix.h5ad.gz", emit: h5ad script: """ + # create dir to mirror cellranger output organisation to have results published in the same place + mkdir ${meta.cellranger_prefix}/outs/filtered_feature_bc_matrix ; + + # convert file types mtx_to_h5ad.py \\ -m filtered_feature_bc_matrix \\ -o matrix.h5ad - gzip -c matrix.h5ad > matrix.h5ad.gz + gzip -c matrix.h5ad > ${meta.cellranger_prefix}/outs/filtered_feature_bc_matrix/matrix.h5ad.gz """ } From 7ee1e71322a57c30fef41d17b216bcdce4a77d85 Mon Sep 17 00:00:00 2001 From: Felipe Marques de Almeida Date: Mon, 20 Jun 2022 10:59:54 +0200 Subject: [PATCH 026/165] added stub-run and fixed how pipeline pulishes files --- modules/local/mtx_to_h5ad.nf | 11 ++++++++++- subworkflows/local/align_cellranger.nf | 5 ++++- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/modules/local/mtx_to_h5ad.nf b/modules/local/mtx_to_h5ad.nf index b81309d0..774a2686 100644 --- a/modules/local/mtx_to_h5ad.nf +++ b/modules/local/mtx_to_h5ad.nf @@ -18,7 +18,7 @@ process MTX_TO_H5AD { script: """ # create dir to mirror cellranger output organisation to have results published in the same place - mkdir ${meta.cellranger_prefix}/outs/filtered_feature_bc_matrix ; + mkdir -p ${meta.cellranger_prefix}/outs/filtered_feature_bc_matrix ; # convert file types mtx_to_h5ad.py \\ @@ -27,4 +27,13 @@ process MTX_TO_H5AD { gzip -c matrix.h5ad > ${meta.cellranger_prefix}/outs/filtered_feature_bc_matrix/matrix.h5ad.gz """ + + stub: + """ + # create dir to mirror cellranger output organisation to have results published in the same place + mkdir -p ${meta.cellranger_prefix}/outs/filtered_feature_bc_matrix ; + + # create dummy + touch ${meta.cellranger_prefix}/outs/filtered_feature_bc_matrix/matrix.h5ad.gz + """ } diff --git a/subworkflows/local/align_cellranger.nf b/subworkflows/local/align_cellranger.nf index ac00659f..31724d88 100644 --- a/subworkflows/local/align_cellranger.nf +++ b/subworkflows/local/align_cellranger.nf @@ -45,8 +45,11 @@ workflow CELLRANGER_ALIGN { MTX_TO_H5AD ( CELLRANGER_COUNT.out.outs.map{ inputs -> meta = [:] - meta.cellranger_prefix = inputs[0].toString().tokenize('/')[-3] + // in stub-run variable is string and not an array + if (inputs.getName() ==~ 'fake_file.txt') { meta.cellranger_prefix = [inputs][0].toString().tokenize('/')[-3] } + else { meta.cellranger_prefix = inputs[0].toString().tokenize('/')[-3] } meta.id = meta.cellranger_prefix.tokenize('-')[1] + [ meta, inputs ] } ) From eeb46aeb5bb55b10149a0d9bb14332cbadf3b696 Mon Sep 17 00:00:00 2001 From: Felipe Marques de Almeida Date: Mon, 20 Jun 2022 12:35:54 +0200 Subject: [PATCH 027/165] remove trailling whitespace --- modules/local/mtx_to_h5ad.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/local/mtx_to_h5ad.nf b/modules/local/mtx_to_h5ad.nf index 774a2686..8daf17c8 100644 --- a/modules/local/mtx_to_h5ad.nf +++ b/modules/local/mtx_to_h5ad.nf @@ -24,7 +24,7 @@ process MTX_TO_H5AD { mtx_to_h5ad.py \\ -m filtered_feature_bc_matrix \\ -o matrix.h5ad - + gzip -c matrix.h5ad > ${meta.cellranger_prefix}/outs/filtered_feature_bc_matrix/matrix.h5ad.gz """ From 17cd392bdcb9abeff210191612fca20d3021a7d2 Mon Sep 17 00:00:00 2001 From: Felipe Marques de Almeida Date: Mon, 20 Jun 2022 14:57:14 +0200 Subject: [PATCH 028/165] fixed stub-run check --- subworkflows/local/align_cellranger.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/local/align_cellranger.nf b/subworkflows/local/align_cellranger.nf index 31724d88..cd22c42b 100644 --- a/subworkflows/local/align_cellranger.nf +++ b/subworkflows/local/align_cellranger.nf @@ -46,7 +46,7 @@ workflow CELLRANGER_ALIGN { CELLRANGER_COUNT.out.outs.map{ inputs -> meta = [:] // in stub-run variable is string and not an array - if (inputs.getName() ==~ 'fake_file.txt') { meta.cellranger_prefix = [inputs][0].toString().tokenize('/')[-3] } + if (workflow.stubRun) { meta.cellranger_prefix = [inputs][0].toString().tokenize('/')[-3] } else { meta.cellranger_prefix = inputs[0].toString().tokenize('/')[-3] } meta.id = meta.cellranger_prefix.tokenize('-')[1] From 3959cd6e7d8ce63224a6a0b5e0bb5d9b1a96ef9c Mon Sep 17 00:00:00 2001 From: Felipe Marques de Almeida Date: Mon, 20 Jun 2022 15:19:45 +0200 Subject: [PATCH 029/165] Adding `MTX_TO_H5AD` module (#1) Created module to automatically convert `.mtx` file into `.h5ad` using scanpy. --- bin/mtx_to_h5ad.py | 29 ++++++++++++++++++ conf/modules.config | 6 ++++ modules/local/kallistobustools_count.nf | 2 +- modules/local/mtx_to_h5ad.nf | 39 +++++++++++++++++++++++++ subworkflows/local/align_cellranger.nf | 14 +++++++++ 5 files changed, 89 insertions(+), 1 deletion(-) create mode 100755 bin/mtx_to_h5ad.py create mode 100644 modules/local/mtx_to_h5ad.nf diff --git a/bin/mtx_to_h5ad.py b/bin/mtx_to_h5ad.py new file mode 100755 index 00000000..95390271 --- /dev/null +++ b/bin/mtx_to_h5ad.py @@ -0,0 +1,29 @@ +#!/usr/bin/env python3 +import scanpy as sc +import argparse + +def mtx_to_adata( mtx_dir: str, verbose: bool = False ): + + if verbose: + print("Reading in {}".format(mtx_dir)) + + adata = sc.read_10x_mtx(mtx_dir) + + return adata + + +if __name__ == "__main__": + + parser = argparse.ArgumentParser(description="Converts mtx output to h5ad.") + + parser.add_argument("-m", "--mtx", dest="mtx", help="Path to mtx directory." ) + parser.add_argument("-v", "--verbose", dest="verbose", help="Toggle verbose messages", default=False ) + parser.add_argument("-o", "--out", dest="out", help="Output path." ) + + args = vars(parser.parse_args()) + + adata = mtx_to_adata(args["mtx"], verbose=args["verbose"]) + + adata.write_h5ad(args["out"]) + + print("Wrote h5ad file to {}".format(args["out"])) \ No newline at end of file diff --git a/conf/modules.config b/conf/modules.config index 5c78b365..7478e76f 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -55,6 +55,12 @@ if(params.aligner == "cellranger") { mode: params.publish_dir_mode ] } + withName: MTX_TO_H5AD { + publishDir = [ + path: "${params.outdir}/cellranger/count", + mode: params.publish_dir_mode + ] + } } } diff --git a/modules/local/kallistobustools_count.nf b/modules/local/kallistobustools_count.nf index 7f3dabff..382de329 100644 --- a/modules/local/kallistobustools_count.nf +++ b/modules/local/kallistobustools_count.nf @@ -4,7 +4,7 @@ process KALLISTOBUSTOOLS_COUNT { conda (params.enable_conda ? "bioconda::kb-python=0.25.1" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/kb-python:0.25.1--py_0' : + 'https://depot.galaxyproject.org/singularity/scanpy-scripts:1.1.6--pypyhdfd78af_0' : 'quay.io/biocontainers/kb-python:0.25.1--py_0' }" input: diff --git a/modules/local/mtx_to_h5ad.nf b/modules/local/mtx_to_h5ad.nf new file mode 100644 index 00000000..8daf17c8 --- /dev/null +++ b/modules/local/mtx_to_h5ad.nf @@ -0,0 +1,39 @@ +process MTX_TO_H5AD { + tag "$meta.id" + label 'process_medium' + + conda (params.enable_conda ? "conda-forge::scanpy conda-forge::python-igraph conda-forge::leidenalg" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/scanpy:1.7.2--pyhdfd78af_0' : + 'quay.io/biocontainers/scanpy:1.7.2--pyhdfd78af_0' }" + + input: + // inputs from cellranger nf-core module does not come in a single sample dir + // for each sample, the sub-folders and files come directly in array. + tuple val(meta), path(inputs) + + output: + path "${meta.cellranger_prefix}/outs/filtered_feature_bc_matrix/matrix.h5ad.gz", emit: h5ad + + script: + """ + # create dir to mirror cellranger output organisation to have results published in the same place + mkdir -p ${meta.cellranger_prefix}/outs/filtered_feature_bc_matrix ; + + # convert file types + mtx_to_h5ad.py \\ + -m filtered_feature_bc_matrix \\ + -o matrix.h5ad + + gzip -c matrix.h5ad > ${meta.cellranger_prefix}/outs/filtered_feature_bc_matrix/matrix.h5ad.gz + """ + + stub: + """ + # create dir to mirror cellranger output organisation to have results published in the same place + mkdir -p ${meta.cellranger_prefix}/outs/filtered_feature_bc_matrix ; + + # create dummy + touch ${meta.cellranger_prefix}/outs/filtered_feature_bc_matrix/matrix.h5ad.gz + """ +} diff --git a/subworkflows/local/align_cellranger.nf b/subworkflows/local/align_cellranger.nf index 7513935d..cd22c42b 100644 --- a/subworkflows/local/align_cellranger.nf +++ b/subworkflows/local/align_cellranger.nf @@ -5,6 +5,7 @@ include {CELLRANGER_MKGTF} from "../../modules/nf-core/modules/cellranger/mkgtf/main.nf" include {CELLRANGER_MKREF} from "../../modules/nf-core/modules/cellranger/mkref/main.nf" include {CELLRANGER_COUNT} from "../../modules/nf-core/modules/cellranger/count/main.nf" +include {MTX_TO_H5AD } from "../../modules/local/mtx_to_h5ad.nf" // Define workflow to subset and index a genome region fasta file workflow CELLRANGER_ALIGN { @@ -40,6 +41,19 @@ workflow CELLRANGER_ALIGN { ) ch_versions = ch_versions.mix(CELLRANGER_COUNT.out.versions) + // Convert matrix do h5ad + MTX_TO_H5AD ( + CELLRANGER_COUNT.out.outs.map{ inputs -> + meta = [:] + // in stub-run variable is string and not an array + if (workflow.stubRun) { meta.cellranger_prefix = [inputs][0].toString().tokenize('/')[-3] } + else { meta.cellranger_prefix = inputs[0].toString().tokenize('/')[-3] } + meta.id = meta.cellranger_prefix.tokenize('-')[1] + + [ meta, inputs ] + } + ) + emit: ch_versions cellranger_out = CELLRANGER_COUNT.out.outs From 3f6ea9afd4677199f9b34708ef65aa373b512090 Mon Sep 17 00:00:00 2001 From: Felipe Marques de Almeida Date: Mon, 20 Jun 2022 17:12:27 +0200 Subject: [PATCH 030/165] made summary more generic --- conf/modules.config | 2 +- modules/local/mtx_to_h5ad.nf | 18 +++++++----------- 2 files changed, 8 insertions(+), 12 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 7478e76f..8ca6a93b 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -57,7 +57,7 @@ if(params.aligner == "cellranger") { } withName: MTX_TO_H5AD { publishDir = [ - path: "${params.outdir}/cellranger/count", + path: { "${params.outdir}/cellranger/count/${meta.cellranger_prefix}/outs/filtered_feature_bc_matrix" }, mode: params.publish_dir_mode ] } diff --git a/modules/local/mtx_to_h5ad.nf b/modules/local/mtx_to_h5ad.nf index 8daf17c8..4bd649d0 100644 --- a/modules/local/mtx_to_h5ad.nf +++ b/modules/local/mtx_to_h5ad.nf @@ -13,27 +13,23 @@ process MTX_TO_H5AD { tuple val(meta), path(inputs) output: - path "${meta.cellranger_prefix}/outs/filtered_feature_bc_matrix/matrix.h5ad.gz", emit: h5ad + path "matrix.h5ad.gz", emit: h5ad script: + if (params.aligner == 'cellranger') { + matrix_directory = "filtered_feature_bc_matrix" + } """ - # create dir to mirror cellranger output organisation to have results published in the same place - mkdir -p ${meta.cellranger_prefix}/outs/filtered_feature_bc_matrix ; - # convert file types mtx_to_h5ad.py \\ - -m filtered_feature_bc_matrix \\ + -m ${matrix_directory} \\ -o matrix.h5ad - gzip -c matrix.h5ad > ${meta.cellranger_prefix}/outs/filtered_feature_bc_matrix/matrix.h5ad.gz + gzip -c matrix.h5ad > matrix.h5ad.gz """ stub: """ - # create dir to mirror cellranger output organisation to have results published in the same place - mkdir -p ${meta.cellranger_prefix}/outs/filtered_feature_bc_matrix ; - - # create dummy - touch ${meta.cellranger_prefix}/outs/filtered_feature_bc_matrix/matrix.h5ad.gz + touch matrix.h5ad.gz """ } From 1f3d828b362211b7def45f37deda41a4d55b5512 Mon Sep 17 00:00:00 2001 From: Felipe Marques de Almeida Date: Tue, 21 Jun 2022 10:05:38 +0200 Subject: [PATCH 031/165] fixed singularity image --- modules/local/kallistobustools_count.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/local/kallistobustools_count.nf b/modules/local/kallistobustools_count.nf index 382de329..7f3dabff 100644 --- a/modules/local/kallistobustools_count.nf +++ b/modules/local/kallistobustools_count.nf @@ -4,7 +4,7 @@ process KALLISTOBUSTOOLS_COUNT { conda (params.enable_conda ? "bioconda::kb-python=0.25.1" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/scanpy-scripts:1.1.6--pypyhdfd78af_0' : + 'https://depot.galaxyproject.org/singularity/kb-python:0.25.1--py_0' : 'quay.io/biocontainers/kb-python:0.25.1--py_0' }" input: From 7078858921b9f7748aba1acf9fc257d6f7865b32 Mon Sep 17 00:00:00 2001 From: Felipe Marques de Almeida Date: Tue, 21 Jun 2022 10:56:35 +0200 Subject: [PATCH 032/165] added conversion module for kallistobustools --- bin/cellranger_mtx_to_h5ad.py | 29 +++++++++++++++++++++++ bin/mtx_to_h5ad.py | 31 ++++++++++++++++++------- conf/modules.config | 11 +++++++++ modules/local/mtx_to_h5ad.nf | 18 ++++++++++---- subworkflows/local/kallisto_bustools.nf | 6 +++++ 5 files changed, 83 insertions(+), 12 deletions(-) create mode 100755 bin/cellranger_mtx_to_h5ad.py diff --git a/bin/cellranger_mtx_to_h5ad.py b/bin/cellranger_mtx_to_h5ad.py new file mode 100755 index 00000000..95390271 --- /dev/null +++ b/bin/cellranger_mtx_to_h5ad.py @@ -0,0 +1,29 @@ +#!/usr/bin/env python3 +import scanpy as sc +import argparse + +def mtx_to_adata( mtx_dir: str, verbose: bool = False ): + + if verbose: + print("Reading in {}".format(mtx_dir)) + + adata = sc.read_10x_mtx(mtx_dir) + + return adata + + +if __name__ == "__main__": + + parser = argparse.ArgumentParser(description="Converts mtx output to h5ad.") + + parser.add_argument("-m", "--mtx", dest="mtx", help="Path to mtx directory." ) + parser.add_argument("-v", "--verbose", dest="verbose", help="Toggle verbose messages", default=False ) + parser.add_argument("-o", "--out", dest="out", help="Output path." ) + + args = vars(parser.parse_args()) + + adata = mtx_to_adata(args["mtx"], verbose=args["verbose"]) + + adata.write_h5ad(args["out"]) + + print("Wrote h5ad file to {}".format(args["out"])) \ No newline at end of file diff --git a/bin/mtx_to_h5ad.py b/bin/mtx_to_h5ad.py index 95390271..cb03e387 100755 --- a/bin/mtx_to_h5ad.py +++ b/bin/mtx_to_h5ad.py @@ -1,13 +1,22 @@ #!/usr/bin/env python3 +import sys +import os import scanpy as sc +import pandas as pd +import typing import argparse -def mtx_to_adata( mtx_dir: str, verbose: bool = False ): + +def mtx_to_adata( + mtx_file: str, barcode_file: str, feature_file: str, verbose: bool = False +): if verbose: - print("Reading in {}".format(mtx_dir)) + print("Reading in {}".format(mtx_file)) - adata = sc.read_10x_mtx(mtx_dir) + adata = sc.read_mtx(mtx_file) + adata.obs_names = pd.read_csv(barcode_file, header=None)[0].values + adata.var_names = pd.read_csv(feature_file, header=None)[0].values return adata @@ -16,14 +25,20 @@ def mtx_to_adata( mtx_dir: str, verbose: bool = False ): parser = argparse.ArgumentParser(description="Converts mtx output to h5ad.") - parser.add_argument("-m", "--mtx", dest="mtx", help="Path to mtx directory." ) - parser.add_argument("-v", "--verbose", dest="verbose", help="Toggle verbose messages", default=False ) - parser.add_argument("-o", "--out", dest="out", help="Output path." ) + parser.add_argument("-m", "--mtx", dest="mtx", help="Path to mtx file.") + parser.add_argument( + "-v", "--verbose", dest="verbose", help="Toggle verbose messages", default=False + ) + parser.add_argument("-f", "--feature", dest="feature", help="Path to feature file.") + parser.add_argument("-b", "--barcode", dest="barcode", help="Path to barcode file.") + parser.add_argument("-o", "--out", dest="out", help="Output path.") args = vars(parser.parse_args()) - adata = mtx_to_adata(args["mtx"], verbose=args["verbose"]) + adata = mtx_to_adata( + args["mtx"], args["barcode"], args["feature"], verbose=args["verbose"] + ) adata.write_h5ad(args["out"]) - print("Wrote h5ad file to {}".format(args["out"])) \ No newline at end of file + print("Wrote h5ad file to {}".format(args["out"])) diff --git a/conf/modules.config b/conf/modules.config index 8ca6a93b..9484a21f 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -80,3 +80,14 @@ if (params.aligner == "star") { } } } + +if (params.aligner == "kallisto") { + process { + withName: MTX_TO_H5AD { + publishDir = [ + path: { "${params.outdir}/kallistobustools/${meta.id}_kallistobustools_count/counts_unfiltered" }, + mode: params.publish_dir_mode + ] + } + } +} diff --git a/modules/local/mtx_to_h5ad.nf b/modules/local/mtx_to_h5ad.nf index 4bd649d0..7292feaa 100644 --- a/modules/local/mtx_to_h5ad.nf +++ b/modules/local/mtx_to_h5ad.nf @@ -16,13 +16,23 @@ process MTX_TO_H5AD { path "matrix.h5ad.gz", emit: h5ad script: - if (params.aligner == 'cellranger') { - matrix_directory = "filtered_feature_bc_matrix" - } + if (params.aligner == 'cellranger') + """ + # convert file types + cellranger_mtx_to_h5ad.py \\ + -m filtered_feature_bc_matrix \\ + -o matrix.h5ad + + gzip -c matrix.h5ad > matrix.h5ad.gz + """ + + else if (params.aligner == 'kallisto') """ # convert file types mtx_to_h5ad.py \\ - -m ${matrix_directory} \\ + -m ${meta.id}_kallistobustools_count/counts_unfiltered/*.mtx \\ + -b ${meta.id}_kallistobustools_count/counts_unfiltered/*.barcodes.txt \\ + -f ${meta.id}_kallistobustools_count/counts_unfiltered/*.genes.txt \\ -o matrix.h5ad gzip -c matrix.h5ad > matrix.h5ad.gz diff --git a/subworkflows/local/kallisto_bustools.nf b/subworkflows/local/kallisto_bustools.nf index 7503f85a..5d99d864 100644 --- a/subworkflows/local/kallisto_bustools.nf +++ b/subworkflows/local/kallisto_bustools.nf @@ -1,6 +1,7 @@ /* -- IMPORT LOCAL MODULES/SUBWORKFLOWS -- */ include { GENE_MAP } from '../../modules/local/gene_map' include { KALLISTOBUSTOOLS_COUNT } from '../../modules/local/kallistobustools_count' +include {MTX_TO_H5AD } from "../../modules/local/mtx_to_h5ad.nf" /* -- IMPORT NF-CORE MODULES/SUBWORKFLOWS -- */ include { GUNZIP } from '../../modules/nf-core/modules/gunzip/main' @@ -64,6 +65,11 @@ workflow KALLISTO_BUSTOOLS { ) ch_versions = ch_versions.mix(KALLISTOBUSTOOLS_COUNT.out.versions) + // Convert matrix do h5ad + MTX_TO_H5AD ( + KALLISTOBUSTOOLS_COUNT.out.counts + ) + emit: ch_versions From 68e3a4942b3f2aed6ab434f0e3ccfafafe9f5c54 Mon Sep 17 00:00:00 2001 From: Felipe Marques de Almeida Date: Tue, 21 Jun 2022 12:35:51 +0200 Subject: [PATCH 033/165] added conversion module for alevin --- bin/mtx_to_h5ad.py | 2 +- conf/modules.config | 6 ++++++ modules/local/mtx_to_h5ad.nf | 24 ++++++++++++++++++------ subworkflows/local/alevin.nf | 8 ++++++++ subworkflows/local/kallisto_bustools.nf | 2 +- 5 files changed, 34 insertions(+), 8 deletions(-) diff --git a/bin/mtx_to_h5ad.py b/bin/mtx_to_h5ad.py index cb03e387..f006f8a2 100755 --- a/bin/mtx_to_h5ad.py +++ b/bin/mtx_to_h5ad.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3 +#!/usr/bin/env python import sys import os import scanpy as sc diff --git a/conf/modules.config b/conf/modules.config index 9484a21f..1f891688 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -70,6 +70,12 @@ if (params.aligner == "alevin") { ext.args = "--table transcript_id,gene_id" ext.prefix = { "${gff.baseName}_gffread" } } + withName: MTX_TO_H5AD { + publishDir = [ + path: { "${params.outdir}/salmon/${meta.id}_alevin_results/alevin" }, + mode: params.publish_dir_mode + ] + } } } diff --git a/modules/local/mtx_to_h5ad.nf b/modules/local/mtx_to_h5ad.nf index 7292feaa..2fd7474f 100644 --- a/modules/local/mtx_to_h5ad.nf +++ b/modules/local/mtx_to_h5ad.nf @@ -13,7 +13,7 @@ process MTX_TO_H5AD { tuple val(meta), path(inputs) output: - path "matrix.h5ad.gz", emit: h5ad + path "*.h5ad.gz", emit: h5ad script: if (params.aligner == 'cellranger') @@ -30,12 +30,24 @@ process MTX_TO_H5AD { """ # convert file types mtx_to_h5ad.py \\ - -m ${meta.id}_kallistobustools_count/counts_unfiltered/*.mtx \\ - -b ${meta.id}_kallistobustools_count/counts_unfiltered/*.barcodes.txt \\ - -f ${meta.id}_kallistobustools_count/counts_unfiltered/*.genes.txt \\ - -o matrix.h5ad + -m *_kallistobustools_count/counts_unfiltered/*.mtx \\ + -b *_kallistobustools_count/counts_unfiltered/*.barcodes.txt \\ + -f *_kallistobustools_count/counts_unfiltered/*.genes.txt \\ + -o cells_x_genes.h5ad - gzip -c matrix.h5ad > matrix.h5ad.gz + gzip -c cells_x_genes.h5ad > cells_x_genes.h5ad.gz + """ + + else if (params.aligner == 'alevin') + """ + # convert file types + mtx_to_h5ad.py \\ + -m *_alevin_results/alevin/quants_mat.mtx.gz \\ + -b *_alevin_results/alevin/quants_mat_rows.txt \\ + -f *_alevin_results/alevin/quants_mat_cols.txt \\ + -o quants_mat.h5ad + + gzip -c quants_mat.h5ad > quants_mat.h5ad.gz """ stub: diff --git a/subworkflows/local/alevin.nf b/subworkflows/local/alevin.nf index a5cf6607..abaa6efc 100644 --- a/subworkflows/local/alevin.nf +++ b/subworkflows/local/alevin.nf @@ -2,6 +2,7 @@ include { GFFREAD_TRANSCRIPTOME } from '../../modules/local/gffread_transcriptome' include { SALMON_ALEVIN } from '../../modules/local/salmon_alevin' include { ALEVINQC } from '../../modules/local/alevinqc' +include {MTX_TO_H5AD } from '../../modules/local/mtx_to_h5ad.nf' /* -- IMPORT NF-CORE MODULES/SUBWORKFLOWS -- */ include { GUNZIP } from '../../modules/nf-core/modules/gunzip/main' @@ -72,6 +73,13 @@ workflow SCRNASEQ_ALEVIN { ) ch_versions = ch_versions.mix(SALMON_ALEVIN.out.versions) + /* + * Convert matrix do h5ad + */ + MTX_TO_H5AD ( + SALMON_ALEVIN.out.alevin_results + ) + /* * Run alevinQC */ diff --git a/subworkflows/local/kallisto_bustools.nf b/subworkflows/local/kallisto_bustools.nf index 5d99d864..3b8cd968 100644 --- a/subworkflows/local/kallisto_bustools.nf +++ b/subworkflows/local/kallisto_bustools.nf @@ -1,7 +1,7 @@ /* -- IMPORT LOCAL MODULES/SUBWORKFLOWS -- */ include { GENE_MAP } from '../../modules/local/gene_map' include { KALLISTOBUSTOOLS_COUNT } from '../../modules/local/kallistobustools_count' -include {MTX_TO_H5AD } from "../../modules/local/mtx_to_h5ad.nf" +include {MTX_TO_H5AD } from '../../modules/local/mtx_to_h5ad.nf' /* -- IMPORT NF-CORE MODULES/SUBWORKFLOWS -- */ include { GUNZIP } from '../../modules/nf-core/modules/gunzip/main' From 0e02363babd80e61863a8f1823f811d755afa737 Mon Sep 17 00:00:00 2001 From: Felipe Marques de Almeida Date: Tue, 21 Jun 2022 12:36:50 +0200 Subject: [PATCH 034/165] adding conversion module (#3) * added conversion module for kallistobustools * added conversion module for alevin --- bin/cellranger_mtx_to_h5ad.py | 29 +++++++++++++++++++++ bin/mtx_to_h5ad.py | 33 +++++++++++++++++------- conf/modules.config | 17 +++++++++++++ modules/local/mtx_to_h5ad.nf | 34 ++++++++++++++++++++----- subworkflows/local/alevin.nf | 8 ++++++ subworkflows/local/kallisto_bustools.nf | 6 +++++ 6 files changed, 112 insertions(+), 15 deletions(-) create mode 100755 bin/cellranger_mtx_to_h5ad.py diff --git a/bin/cellranger_mtx_to_h5ad.py b/bin/cellranger_mtx_to_h5ad.py new file mode 100755 index 00000000..95390271 --- /dev/null +++ b/bin/cellranger_mtx_to_h5ad.py @@ -0,0 +1,29 @@ +#!/usr/bin/env python3 +import scanpy as sc +import argparse + +def mtx_to_adata( mtx_dir: str, verbose: bool = False ): + + if verbose: + print("Reading in {}".format(mtx_dir)) + + adata = sc.read_10x_mtx(mtx_dir) + + return adata + + +if __name__ == "__main__": + + parser = argparse.ArgumentParser(description="Converts mtx output to h5ad.") + + parser.add_argument("-m", "--mtx", dest="mtx", help="Path to mtx directory." ) + parser.add_argument("-v", "--verbose", dest="verbose", help="Toggle verbose messages", default=False ) + parser.add_argument("-o", "--out", dest="out", help="Output path." ) + + args = vars(parser.parse_args()) + + adata = mtx_to_adata(args["mtx"], verbose=args["verbose"]) + + adata.write_h5ad(args["out"]) + + print("Wrote h5ad file to {}".format(args["out"])) \ No newline at end of file diff --git a/bin/mtx_to_h5ad.py b/bin/mtx_to_h5ad.py index 95390271..f006f8a2 100755 --- a/bin/mtx_to_h5ad.py +++ b/bin/mtx_to_h5ad.py @@ -1,13 +1,22 @@ -#!/usr/bin/env python3 +#!/usr/bin/env python +import sys +import os import scanpy as sc +import pandas as pd +import typing import argparse -def mtx_to_adata( mtx_dir: str, verbose: bool = False ): + +def mtx_to_adata( + mtx_file: str, barcode_file: str, feature_file: str, verbose: bool = False +): if verbose: - print("Reading in {}".format(mtx_dir)) + print("Reading in {}".format(mtx_file)) - adata = sc.read_10x_mtx(mtx_dir) + adata = sc.read_mtx(mtx_file) + adata.obs_names = pd.read_csv(barcode_file, header=None)[0].values + adata.var_names = pd.read_csv(feature_file, header=None)[0].values return adata @@ -16,14 +25,20 @@ def mtx_to_adata( mtx_dir: str, verbose: bool = False ): parser = argparse.ArgumentParser(description="Converts mtx output to h5ad.") - parser.add_argument("-m", "--mtx", dest="mtx", help="Path to mtx directory." ) - parser.add_argument("-v", "--verbose", dest="verbose", help="Toggle verbose messages", default=False ) - parser.add_argument("-o", "--out", dest="out", help="Output path." ) + parser.add_argument("-m", "--mtx", dest="mtx", help="Path to mtx file.") + parser.add_argument( + "-v", "--verbose", dest="verbose", help="Toggle verbose messages", default=False + ) + parser.add_argument("-f", "--feature", dest="feature", help="Path to feature file.") + parser.add_argument("-b", "--barcode", dest="barcode", help="Path to barcode file.") + parser.add_argument("-o", "--out", dest="out", help="Output path.") args = vars(parser.parse_args()) - adata = mtx_to_adata(args["mtx"], verbose=args["verbose"]) + adata = mtx_to_adata( + args["mtx"], args["barcode"], args["feature"], verbose=args["verbose"] + ) adata.write_h5ad(args["out"]) - print("Wrote h5ad file to {}".format(args["out"])) \ No newline at end of file + print("Wrote h5ad file to {}".format(args["out"])) diff --git a/conf/modules.config b/conf/modules.config index 8ca6a93b..1f891688 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -70,6 +70,12 @@ if (params.aligner == "alevin") { ext.args = "--table transcript_id,gene_id" ext.prefix = { "${gff.baseName}_gffread" } } + withName: MTX_TO_H5AD { + publishDir = [ + path: { "${params.outdir}/salmon/${meta.id}_alevin_results/alevin" }, + mode: params.publish_dir_mode + ] + } } } @@ -80,3 +86,14 @@ if (params.aligner == "star") { } } } + +if (params.aligner == "kallisto") { + process { + withName: MTX_TO_H5AD { + publishDir = [ + path: { "${params.outdir}/kallistobustools/${meta.id}_kallistobustools_count/counts_unfiltered" }, + mode: params.publish_dir_mode + ] + } + } +} diff --git a/modules/local/mtx_to_h5ad.nf b/modules/local/mtx_to_h5ad.nf index 4bd649d0..2fd7474f 100644 --- a/modules/local/mtx_to_h5ad.nf +++ b/modules/local/mtx_to_h5ad.nf @@ -13,21 +13,43 @@ process MTX_TO_H5AD { tuple val(meta), path(inputs) output: - path "matrix.h5ad.gz", emit: h5ad + path "*.h5ad.gz", emit: h5ad script: - if (params.aligner == 'cellranger') { - matrix_directory = "filtered_feature_bc_matrix" - } + if (params.aligner == 'cellranger') """ # convert file types - mtx_to_h5ad.py \\ - -m ${matrix_directory} \\ + cellranger_mtx_to_h5ad.py \\ + -m filtered_feature_bc_matrix \\ -o matrix.h5ad gzip -c matrix.h5ad > matrix.h5ad.gz """ + else if (params.aligner == 'kallisto') + """ + # convert file types + mtx_to_h5ad.py \\ + -m *_kallistobustools_count/counts_unfiltered/*.mtx \\ + -b *_kallistobustools_count/counts_unfiltered/*.barcodes.txt \\ + -f *_kallistobustools_count/counts_unfiltered/*.genes.txt \\ + -o cells_x_genes.h5ad + + gzip -c cells_x_genes.h5ad > cells_x_genes.h5ad.gz + """ + + else if (params.aligner == 'alevin') + """ + # convert file types + mtx_to_h5ad.py \\ + -m *_alevin_results/alevin/quants_mat.mtx.gz \\ + -b *_alevin_results/alevin/quants_mat_rows.txt \\ + -f *_alevin_results/alevin/quants_mat_cols.txt \\ + -o quants_mat.h5ad + + gzip -c quants_mat.h5ad > quants_mat.h5ad.gz + """ + stub: """ touch matrix.h5ad.gz diff --git a/subworkflows/local/alevin.nf b/subworkflows/local/alevin.nf index a5cf6607..abaa6efc 100644 --- a/subworkflows/local/alevin.nf +++ b/subworkflows/local/alevin.nf @@ -2,6 +2,7 @@ include { GFFREAD_TRANSCRIPTOME } from '../../modules/local/gffread_transcriptome' include { SALMON_ALEVIN } from '../../modules/local/salmon_alevin' include { ALEVINQC } from '../../modules/local/alevinqc' +include {MTX_TO_H5AD } from '../../modules/local/mtx_to_h5ad.nf' /* -- IMPORT NF-CORE MODULES/SUBWORKFLOWS -- */ include { GUNZIP } from '../../modules/nf-core/modules/gunzip/main' @@ -72,6 +73,13 @@ workflow SCRNASEQ_ALEVIN { ) ch_versions = ch_versions.mix(SALMON_ALEVIN.out.versions) + /* + * Convert matrix do h5ad + */ + MTX_TO_H5AD ( + SALMON_ALEVIN.out.alevin_results + ) + /* * Run alevinQC */ diff --git a/subworkflows/local/kallisto_bustools.nf b/subworkflows/local/kallisto_bustools.nf index 7503f85a..3b8cd968 100644 --- a/subworkflows/local/kallisto_bustools.nf +++ b/subworkflows/local/kallisto_bustools.nf @@ -1,6 +1,7 @@ /* -- IMPORT LOCAL MODULES/SUBWORKFLOWS -- */ include { GENE_MAP } from '../../modules/local/gene_map' include { KALLISTOBUSTOOLS_COUNT } from '../../modules/local/kallistobustools_count' +include {MTX_TO_H5AD } from '../../modules/local/mtx_to_h5ad.nf' /* -- IMPORT NF-CORE MODULES/SUBWORKFLOWS -- */ include { GUNZIP } from '../../modules/nf-core/modules/gunzip/main' @@ -64,6 +65,11 @@ workflow KALLISTO_BUSTOOLS { ) ch_versions = ch_versions.mix(KALLISTOBUSTOOLS_COUNT.out.versions) + // Convert matrix do h5ad + MTX_TO_H5AD ( + KALLISTOBUSTOOLS_COUNT.out.counts + ) + emit: ch_versions From 49613da4d346fd1b76bf5e8ac9ac6e2e7e7ea348 Mon Sep 17 00:00:00 2001 From: Felipe Marques de Almeida Date: Tue, 21 Jun 2022 13:37:22 +0200 Subject: [PATCH 035/165] now using hdf5's internal compression --- bin/cellranger_mtx_to_h5ad.py | 4 ++-- bin/mtx_to_h5ad.py | 2 +- modules/local/mtx_to_h5ad.nf | 8 +------- 3 files changed, 4 insertions(+), 10 deletions(-) diff --git a/bin/cellranger_mtx_to_h5ad.py b/bin/cellranger_mtx_to_h5ad.py index 95390271..88c189d5 100755 --- a/bin/cellranger_mtx_to_h5ad.py +++ b/bin/cellranger_mtx_to_h5ad.py @@ -24,6 +24,6 @@ def mtx_to_adata( mtx_dir: str, verbose: bool = False ): adata = mtx_to_adata(args["mtx"], verbose=args["verbose"]) - adata.write_h5ad(args["out"]) + adata.write_h5ad(args["out"], compression="gzip") - print("Wrote h5ad file to {}".format(args["out"])) \ No newline at end of file + print("Wrote h5ad file to {}".format(args["out"])) diff --git a/bin/mtx_to_h5ad.py b/bin/mtx_to_h5ad.py index f006f8a2..eaf91cd0 100755 --- a/bin/mtx_to_h5ad.py +++ b/bin/mtx_to_h5ad.py @@ -39,6 +39,6 @@ def mtx_to_adata( args["mtx"], args["barcode"], args["feature"], verbose=args["verbose"] ) - adata.write_h5ad(args["out"]) + adata.write_h5ad(args["out"], compression="gzip") print("Wrote h5ad file to {}".format(args["out"])) diff --git a/modules/local/mtx_to_h5ad.nf b/modules/local/mtx_to_h5ad.nf index 2fd7474f..4578b898 100644 --- a/modules/local/mtx_to_h5ad.nf +++ b/modules/local/mtx_to_h5ad.nf @@ -13,7 +13,7 @@ process MTX_TO_H5AD { tuple val(meta), path(inputs) output: - path "*.h5ad.gz", emit: h5ad + path "*.h5ad", emit: h5ad script: if (params.aligner == 'cellranger') @@ -22,8 +22,6 @@ process MTX_TO_H5AD { cellranger_mtx_to_h5ad.py \\ -m filtered_feature_bc_matrix \\ -o matrix.h5ad - - gzip -c matrix.h5ad > matrix.h5ad.gz """ else if (params.aligner == 'kallisto') @@ -34,8 +32,6 @@ process MTX_TO_H5AD { -b *_kallistobustools_count/counts_unfiltered/*.barcodes.txt \\ -f *_kallistobustools_count/counts_unfiltered/*.genes.txt \\ -o cells_x_genes.h5ad - - gzip -c cells_x_genes.h5ad > cells_x_genes.h5ad.gz """ else if (params.aligner == 'alevin') @@ -46,8 +42,6 @@ process MTX_TO_H5AD { -b *_alevin_results/alevin/quants_mat_rows.txt \\ -f *_alevin_results/alevin/quants_mat_cols.txt \\ -o quants_mat.h5ad - - gzip -c quants_mat.h5ad > quants_mat.h5ad.gz """ stub: From 50d2e68c279692cd049e1d7e598a140d911b66a3 Mon Sep 17 00:00:00 2001 From: Felipe Marques de Almeida Date: Tue, 21 Jun 2022 13:43:49 +0200 Subject: [PATCH 036/165] fixed stub --- modules/local/mtx_to_h5ad.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/local/mtx_to_h5ad.nf b/modules/local/mtx_to_h5ad.nf index 4578b898..82b51ec9 100644 --- a/modules/local/mtx_to_h5ad.nf +++ b/modules/local/mtx_to_h5ad.nf @@ -46,6 +46,6 @@ process MTX_TO_H5AD { stub: """ - touch matrix.h5ad.gz + touch matrix.h5ad """ } From f1cd97fb66024e246372cfeefee6367272be6963 Mon Sep 17 00:00:00 2001 From: Felipe Marques de Almeida Date: Wed, 22 Jun 2022 09:53:04 +0200 Subject: [PATCH 037/165] conversion to H5AD is now a subworkflow * Added concatenation of h5ad to alevin pipeline --- bin/cellranger_mtx_to_h5ad.py | 2 +- bin/concat_h5ad.py | 44 ++++++++++++++++++++++++ bin/mtx_to_h5ad.py | 3 -- conf/modules.config | 6 ++++ modules/local/concat_h5ad.nf | 25 ++++++++++++++ modules/local/mtx_to_h5ad.nf | 8 ++--- subworkflows/local/alevin.nf | 12 ++----- subworkflows/local/conversion_to_h5ad.nf | 27 +++++++++++++++ workflows/scrnaseq.nf | 11 +++++- 9 files changed, 119 insertions(+), 19 deletions(-) create mode 100755 bin/concat_h5ad.py create mode 100644 modules/local/concat_h5ad.nf create mode 100644 subworkflows/local/conversion_to_h5ad.nf diff --git a/bin/cellranger_mtx_to_h5ad.py b/bin/cellranger_mtx_to_h5ad.py index 88c189d5..d58be151 100755 --- a/bin/cellranger_mtx_to_h5ad.py +++ b/bin/cellranger_mtx_to_h5ad.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3 +#!/usr/bin/env python import scanpy as sc import argparse diff --git a/bin/concat_h5ad.py b/bin/concat_h5ad.py new file mode 100755 index 00000000..066c956d --- /dev/null +++ b/bin/concat_h5ad.py @@ -0,0 +1,44 @@ +#!/usr/bin/env python +import scanpy as sc, anndata as ad, pandas as pd +from pathlib import Path +import argparse + +# empty list to hold sample datasets +list_of_h5ad = [] + +def read_samplesheet(samplesheet): + df = pd.read_csv(samplesheet) + return(df) + +# find available h5ad files and append to list +def append_h5ad_files(): + for path in Path(".").rglob('*.h5ad'): + adata = sc.read_h5ad(path.name) + list_of_h5ad.append(adata) + +# combine and write +# combination without inner or out join, just a simple concatenation. +def concat_h5ad(outfile): + combined = ad.concat(list_of_h5ad) + return(combined) + +if __name__ == "__main__": + + parser = argparse.ArgumentParser(description="Concatenates h5ad files and merge metadata from samplesheet") + + parser.add_argument("-i", "--input", dest="input", help="Path to samplesheet.csv") + parser.add_argument("-o", "--out", dest="out", help="Output path.") + + args = vars(parser.parse_args()) + + # how to merge this on adata.obs? + df_samplesheet = read_samplesheet(args["input"]) + + # find all and append to list + append_h5ad_files() + + # concat and write + adata = concat_h5ad(args["out"]) + adata.write_h5ad(args["out"], compression="gzip") + + print("Wrote h5ad file to {}".format(args["out"])) \ No newline at end of file diff --git a/bin/mtx_to_h5ad.py b/bin/mtx_to_h5ad.py index eaf91cd0..09013994 100755 --- a/bin/mtx_to_h5ad.py +++ b/bin/mtx_to_h5ad.py @@ -1,9 +1,6 @@ #!/usr/bin/env python -import sys -import os import scanpy as sc import pandas as pd -import typing import argparse diff --git a/conf/modules.config b/conf/modules.config index 1f891688..0dd6fefb 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -76,6 +76,12 @@ if (params.aligner == "alevin") { mode: params.publish_dir_mode ] } + withName: CONCAT_H5AD { + publishDir = [ + path: { "${params.outdir}/salmon/concat_h5ad" }, + mode: params.publish_dir_mode + ] + } } } diff --git a/modules/local/concat_h5ad.nf b/modules/local/concat_h5ad.nf new file mode 100644 index 00000000..616e16d8 --- /dev/null +++ b/modules/local/concat_h5ad.nf @@ -0,0 +1,25 @@ +process CONCAT_H5AD { + label 'process_medium' + + conda (params.enable_conda ? "conda-forge::scanpy conda-forge::python-igraph conda-forge::leidenalg" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/scanpy:1.7.2--pyhdfd78af_0' : + 'quay.io/biocontainers/scanpy:1.7.2--pyhdfd78af_0' }" + + input: + path h5ad + path samplesheet + + output: + path "*.h5ad", emit: h5ad + + script: + """ + concat_h5ad.py -i $samplesheet -o combined_matrix.h5ad + """ + + stub: + """ + touch combined_matrix.h5ad + """ +} diff --git a/modules/local/mtx_to_h5ad.nf b/modules/local/mtx_to_h5ad.nf index 82b51ec9..88b0e1c7 100644 --- a/modules/local/mtx_to_h5ad.nf +++ b/modules/local/mtx_to_h5ad.nf @@ -21,7 +21,7 @@ process MTX_TO_H5AD { # convert file types cellranger_mtx_to_h5ad.py \\ -m filtered_feature_bc_matrix \\ - -o matrix.h5ad + -o ${meta.id}_matrix.h5ad """ else if (params.aligner == 'kallisto') @@ -31,7 +31,7 @@ process MTX_TO_H5AD { -m *_kallistobustools_count/counts_unfiltered/*.mtx \\ -b *_kallistobustools_count/counts_unfiltered/*.barcodes.txt \\ -f *_kallistobustools_count/counts_unfiltered/*.genes.txt \\ - -o cells_x_genes.h5ad + -o ${meta.id}_matrix.h5ad """ else if (params.aligner == 'alevin') @@ -41,11 +41,11 @@ process MTX_TO_H5AD { -m *_alevin_results/alevin/quants_mat.mtx.gz \\ -b *_alevin_results/alevin/quants_mat_rows.txt \\ -f *_alevin_results/alevin/quants_mat_cols.txt \\ - -o quants_mat.h5ad + -o ${meta.id}_matrix.h5ad """ stub: """ - touch matrix.h5ad + touch ${meta.id}_matrix.h5ad """ } diff --git a/subworkflows/local/alevin.nf b/subworkflows/local/alevin.nf index abaa6efc..c1b122e1 100644 --- a/subworkflows/local/alevin.nf +++ b/subworkflows/local/alevin.nf @@ -1,8 +1,7 @@ /* -- IMPORT LOCAL MODULES/SUBWORKFLOWS -- */ include { GFFREAD_TRANSCRIPTOME } from '../../modules/local/gffread_transcriptome' -include { SALMON_ALEVIN } from '../../modules/local/salmon_alevin' -include { ALEVINQC } from '../../modules/local/alevinqc' -include {MTX_TO_H5AD } from '../../modules/local/mtx_to_h5ad.nf' +include { SALMON_ALEVIN } from '../../modules/local/salmon_alevin' +include { ALEVINQC } from '../../modules/local/alevinqc' /* -- IMPORT NF-CORE MODULES/SUBWORKFLOWS -- */ include { GUNZIP } from '../../modules/nf-core/modules/gunzip/main' @@ -73,13 +72,6 @@ workflow SCRNASEQ_ALEVIN { ) ch_versions = ch_versions.mix(SALMON_ALEVIN.out.versions) - /* - * Convert matrix do h5ad - */ - MTX_TO_H5AD ( - SALMON_ALEVIN.out.alevin_results - ) - /* * Run alevinQC */ diff --git a/subworkflows/local/conversion_to_h5ad.nf b/subworkflows/local/conversion_to_h5ad.nf new file mode 100644 index 00000000..67b100d5 --- /dev/null +++ b/subworkflows/local/conversion_to_h5ad.nf @@ -0,0 +1,27 @@ +/* -- IMPORT LOCAL MODULES/SUBWORKFLOWS -- */ +include { MTX_TO_H5AD } from '../../modules/local/mtx_to_h5ad.nf' +include { CONCAT_H5AD } from '../../modules/local/concat_h5ad.nf' + +workflow H5AD_CONVERSION { + + take: + mtx_matrices + samplesheet + + main: + // + // Convert matrix do h5ad + // + MTX_TO_H5AD ( + mtx_matrices + ) + + // + // Concat sample-specific h5ad in one + // + CONCAT_H5AD ( + MTX_TO_H5AD.out.h5ad.collect(), // gather all sample-specific files + samplesheet + ) + +} diff --git a/workflows/scrnaseq.nf b/workflows/scrnaseq.nf index 3b54ced9..f0c9bb00 100644 --- a/workflows/scrnaseq.nf +++ b/workflows/scrnaseq.nf @@ -40,6 +40,7 @@ include { KALLISTO_BUSTOOLS } from '../subworkflows/local/kallisto_bustools' include { SCRNASEQ_ALEVIN } from '../subworkflows/local/alevin' include { STARSOLO } from '../subworkflows/local/starsolo' include { CELLRANGER_ALIGN } from "../subworkflows/local/align_cellranger" +include { H5AD_CONVERSION } from "../subworkflows/local/conversion_to_h5ad" /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -98,7 +99,8 @@ ch_cellranger_index = params.cellranger_index ? file(params.cellranger_index) : workflow SCRNASEQ { - ch_versions = Channel.empty() + ch_versions = Channel.empty() + ch_mtx_matrices = Channel.empty() // Check input files and stage input data ch_fastq = INPUT_CHECK( ch_input ).reads @@ -135,6 +137,7 @@ workflow SCRNASEQ { ) ch_versions = ch_versions.mix(SCRNASEQ_ALEVIN.out.ch_versions) ch_multiqc_alevin = SCRNASEQ_ALEVIN.out.for_multiqc + ch_mtx_matrices = ch_mtx_matrices.mix(SCRNASEQ_ALEVIN.out.alevin_results) } // Run STARSolo pipeline @@ -163,6 +166,12 @@ workflow SCRNASEQ { ch_versions = ch_versions.mix(CELLRANGER_ALIGN.out.ch_versions) } + // Run mtx to h5ad conversion subworkflow + H5AD_CONVERSION ( + ch_mtx_matrices, + ch_input + ) + // collect software versions CUSTOM_DUMPSOFTWAREVERSIONS ( ch_versions.unique().collectFile(name: 'collated_versions.yml') From 68c0dac751b85f9f74a9498a32d23e7d34620e75 Mon Sep 17 00:00:00 2001 From: Felipe Marques de Almeida Date: Wed, 22 Jun 2022 10:02:32 +0200 Subject: [PATCH 038/165] added concatenation module for kallisto --- conf/modules.config | 6 ++++++ subworkflows/local/kallisto_bustools.nf | 7 ------- workflows/scrnaseq.nf | 1 + 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 0dd6fefb..6ccb5af8 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -101,5 +101,11 @@ if (params.aligner == "kallisto") { mode: params.publish_dir_mode ] } + withName: CONCAT_H5AD { + publishDir = [ + path: { "${params.outdir}/kallistobustools/concat_h5ad" }, + mode: params.publish_dir_mode + ] + } } } diff --git a/subworkflows/local/kallisto_bustools.nf b/subworkflows/local/kallisto_bustools.nf index 3b8cd968..204852da 100644 --- a/subworkflows/local/kallisto_bustools.nf +++ b/subworkflows/local/kallisto_bustools.nf @@ -1,7 +1,6 @@ /* -- IMPORT LOCAL MODULES/SUBWORKFLOWS -- */ include { GENE_MAP } from '../../modules/local/gene_map' include { KALLISTOBUSTOOLS_COUNT } from '../../modules/local/kallistobustools_count' -include {MTX_TO_H5AD } from '../../modules/local/mtx_to_h5ad.nf' /* -- IMPORT NF-CORE MODULES/SUBWORKFLOWS -- */ include { GUNZIP } from '../../modules/nf-core/modules/gunzip/main' @@ -65,12 +64,6 @@ workflow KALLISTO_BUSTOOLS { ) ch_versions = ch_versions.mix(KALLISTOBUSTOOLS_COUNT.out.versions) - // Convert matrix do h5ad - MTX_TO_H5AD ( - KALLISTOBUSTOOLS_COUNT.out.counts - ) - - emit: ch_versions counts = KALLISTOBUSTOOLS_COUNT.out.counts diff --git a/workflows/scrnaseq.nf b/workflows/scrnaseq.nf index f0c9bb00..f7c2180f 100644 --- a/workflows/scrnaseq.nf +++ b/workflows/scrnaseq.nf @@ -120,6 +120,7 @@ workflow SCRNASEQ { ch_fastq ) ch_versions = ch_versions.mix(KALLISTO_BUSTOOLS.out.ch_versions) + ch_mtx_matrices = ch_mtx_matrices.mix(KALLISTO_BUSTOOLS.out.counts) } // Run salmon alevin pipeline From 24d52178a076ef4547a06dd9786f207f5faa1e28 Mon Sep 17 00:00:00 2001 From: Felipe Marques de Almeida Date: Wed, 22 Jun 2022 10:08:06 +0200 Subject: [PATCH 039/165] added concatenation module for cellranger --- conf/modules.config | 10 ++++++++-- subworkflows/local/align_cellranger.nf | 13 +++++++------ workflows/scrnaseq.nf | 1 + 3 files changed, 16 insertions(+), 8 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 6ccb5af8..eb61cd75 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -61,6 +61,12 @@ if(params.aligner == "cellranger") { mode: params.publish_dir_mode ] } + withName: CONCAT_H5AD { + publishDir = [ + path: { "${params.outdir}/cellranger/count/concatenated_h5ad" }, + mode: params.publish_dir_mode + ] + } } } @@ -78,7 +84,7 @@ if (params.aligner == "alevin") { } withName: CONCAT_H5AD { publishDir = [ - path: { "${params.outdir}/salmon/concat_h5ad" }, + path: { "${params.outdir}/salmon/concatenated_h5ad" }, mode: params.publish_dir_mode ] } @@ -103,7 +109,7 @@ if (params.aligner == "kallisto") { } withName: CONCAT_H5AD { publishDir = [ - path: { "${params.outdir}/kallistobustools/concat_h5ad" }, + path: { "${params.outdir}/kallistobustools/concatenated_h5ad" }, mode: params.publish_dir_mode ] } diff --git a/subworkflows/local/align_cellranger.nf b/subworkflows/local/align_cellranger.nf index cd22c42b..cda067ff 100644 --- a/subworkflows/local/align_cellranger.nf +++ b/subworkflows/local/align_cellranger.nf @@ -41,20 +41,21 @@ workflow CELLRANGER_ALIGN { ) ch_versions = ch_versions.mix(CELLRANGER_COUNT.out.versions) - // Convert matrix do h5ad - MTX_TO_H5AD ( - CELLRANGER_COUNT.out.outs.map{ inputs -> + // rebuild out channel to be in compliance with what is required for h5ad conversion modules + // out channel comes without meta map from nf-core module + ch_count_outputs_rebuilt = CELLRANGER_COUNT.out.outs.map{ inputs -> meta = [:] + // in stub-run variable is string and not an array if (workflow.stubRun) { meta.cellranger_prefix = [inputs][0].toString().tokenize('/')[-3] } else { meta.cellranger_prefix = inputs[0].toString().tokenize('/')[-3] } meta.id = meta.cellranger_prefix.tokenize('-')[1] - [ meta, inputs ] - } - ) + [ meta, inputs ] + } emit: ch_versions cellranger_out = CELLRANGER_COUNT.out.outs + cellranger_out_rebuilt = ch_count_outputs_rebuilt } diff --git a/workflows/scrnaseq.nf b/workflows/scrnaseq.nf index f7c2180f..ef55aa45 100644 --- a/workflows/scrnaseq.nf +++ b/workflows/scrnaseq.nf @@ -165,6 +165,7 @@ workflow SCRNASEQ { ch_fastq ) ch_versions = ch_versions.mix(CELLRANGER_ALIGN.out.ch_versions) + ch_mtx_matrices = ch_mtx_matrices.mix(CELLRANGER_ALIGN.out.cellranger_out_rebuilt) } // Run mtx to h5ad conversion subworkflow From 16d19f1e778f7a69ebd2c534928bb73117421112 Mon Sep 17 00:00:00 2001 From: Felipe Marques de Almeida Date: Wed, 22 Jun 2022 14:14:40 +0200 Subject: [PATCH 040/165] Simplify function to find and store h5ad files --- bin/concat_h5ad.py | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/bin/concat_h5ad.py b/bin/concat_h5ad.py index 066c956d..318672cc 100755 --- a/bin/concat_h5ad.py +++ b/bin/concat_h5ad.py @@ -3,19 +3,10 @@ from pathlib import Path import argparse -# empty list to hold sample datasets -list_of_h5ad = [] - def read_samplesheet(samplesheet): df = pd.read_csv(samplesheet) return(df) -# find available h5ad files and append to list -def append_h5ad_files(): - for path in Path(".").rglob('*.h5ad'): - adata = sc.read_h5ad(path.name) - list_of_h5ad.append(adata) - # combine and write # combination without inner or out join, just a simple concatenation. def concat_h5ad(outfile): @@ -35,7 +26,7 @@ def concat_h5ad(outfile): df_samplesheet = read_samplesheet(args["input"]) # find all and append to list - append_h5ad_files() + list_of_h5ad = [sc.read_h5ad(path) for path in Path(".").rglob('*.h5ad')] # concat and write adata = concat_h5ad(args["out"]) From 010154c3862bae709cf9b31d9eeb27b918e6178b Mon Sep 17 00:00:00 2001 From: Felipe Marques de Almeida Date: Wed, 22 Jun 2022 19:10:37 +0200 Subject: [PATCH 041/165] added sample information and metadata --- bin/cellranger_mtx_to_h5ad.py | 6 ++++-- bin/concat_h5ad.py | 35 +++++++++++++++++++++++++---------- bin/mtx_to_h5ad.py | 6 ++++-- modules/local/mtx_to_h5ad.nf | 3 +++ 4 files changed, 36 insertions(+), 14 deletions(-) diff --git a/bin/cellranger_mtx_to_h5ad.py b/bin/cellranger_mtx_to_h5ad.py index d58be151..40e365b6 100755 --- a/bin/cellranger_mtx_to_h5ad.py +++ b/bin/cellranger_mtx_to_h5ad.py @@ -2,12 +2,13 @@ import scanpy as sc import argparse -def mtx_to_adata( mtx_dir: str, verbose: bool = False ): +def mtx_to_adata( mtx_dir: str, sample: str, verbose: bool = False ): if verbose: print("Reading in {}".format(mtx_dir)) adata = sc.read_10x_mtx(mtx_dir) + adata.obs["sample"] = sample return adata @@ -18,11 +19,12 @@ def mtx_to_adata( mtx_dir: str, verbose: bool = False ): parser.add_argument("-m", "--mtx", dest="mtx", help="Path to mtx directory." ) parser.add_argument("-v", "--verbose", dest="verbose", help="Toggle verbose messages", default=False ) + parser.add_argument("-s", "--sample", dest="sample", help="Sample name" ) parser.add_argument("-o", "--out", dest="out", help="Output path." ) args = vars(parser.parse_args()) - adata = mtx_to_adata(args["mtx"], verbose=args["verbose"]) + adata = mtx_to_adata(args["mtx"], args["sample"], verbose=args["verbose"]) adata.write_h5ad(args["out"], compression="gzip") diff --git a/bin/concat_h5ad.py b/bin/concat_h5ad.py index 318672cc..57a1969e 100755 --- a/bin/concat_h5ad.py +++ b/bin/concat_h5ad.py @@ -5,12 +5,21 @@ def read_samplesheet(samplesheet): df = pd.read_csv(samplesheet) + df.set_index("sample") + + # samplesheet may contain replicates, when it has, + # group information from replicates and collapse with commas + # only keep unique values using set() + df = df.groupby(["sample"]).agg(lambda column: ",".join(set(column))) + + # return return(df) -# combine and write -# combination without inner or out join, just a simple concatenation. -def concat_h5ad(outfile): - combined = ad.concat(list_of_h5ad) +# combine and write, just a simple concatenation. +def concat_h5ad(adatas): + combined = ad.concat(adatas, label="sample", merge="unique") + + # return return(combined) if __name__ == "__main__": @@ -18,18 +27,24 @@ def concat_h5ad(outfile): parser = argparse.ArgumentParser(description="Concatenates h5ad files and merge metadata from samplesheet") parser.add_argument("-i", "--input", dest="input", help="Path to samplesheet.csv") - parser.add_argument("-o", "--out", dest="out", help="Output path.") + parser.add_argument("-o", "--out", dest="out", help="Output path.") args = vars(parser.parse_args()) - # how to merge this on adata.obs? + # Open samplesheet as dataframe df_samplesheet = read_samplesheet(args["input"]) - # find all and append to list - list_of_h5ad = [sc.read_h5ad(path) for path in Path(".").rglob('*.h5ad')] + # find all h5ad and append to dict + dict_of_h5ad = { + str(path).replace("_matrix.h5ad", ""): sc.read_h5ad(path) + for path in Path(".").rglob('*.h5ad') + } + + # concat h5ad files + adata = concat_h5ad(dict_of_h5ad) - # concat and write - adata = concat_h5ad(args["out"]) + # merge with data.frame, on sample information + adata.obs = adata.obs.join(df_samplesheet, on="sample") adata.write_h5ad(args["out"], compression="gzip") print("Wrote h5ad file to {}".format(args["out"])) \ No newline at end of file diff --git a/bin/mtx_to_h5ad.py b/bin/mtx_to_h5ad.py index 09013994..78116f58 100755 --- a/bin/mtx_to_h5ad.py +++ b/bin/mtx_to_h5ad.py @@ -5,7 +5,7 @@ def mtx_to_adata( - mtx_file: str, barcode_file: str, feature_file: str, verbose: bool = False + mtx_file: str, barcode_file: str, feature_file: str, sample: str, verbose: bool = False ): if verbose: @@ -14,6 +14,7 @@ def mtx_to_adata( adata = sc.read_mtx(mtx_file) adata.obs_names = pd.read_csv(barcode_file, header=None)[0].values adata.var_names = pd.read_csv(feature_file, header=None)[0].values + adata.obs["sample"] = sample return adata @@ -28,12 +29,13 @@ def mtx_to_adata( ) parser.add_argument("-f", "--feature", dest="feature", help="Path to feature file.") parser.add_argument("-b", "--barcode", dest="barcode", help="Path to barcode file.") + parser.add_argument("-s", "--sample", dest="sample", help="Sample name") parser.add_argument("-o", "--out", dest="out", help="Output path.") args = vars(parser.parse_args()) adata = mtx_to_adata( - args["mtx"], args["barcode"], args["feature"], verbose=args["verbose"] + args["mtx"], args["barcode"], args["feature"], args["sample"], verbose=args["verbose"] ) adata.write_h5ad(args["out"], compression="gzip") diff --git a/modules/local/mtx_to_h5ad.nf b/modules/local/mtx_to_h5ad.nf index 88b0e1c7..37321bf4 100644 --- a/modules/local/mtx_to_h5ad.nf +++ b/modules/local/mtx_to_h5ad.nf @@ -21,6 +21,7 @@ process MTX_TO_H5AD { # convert file types cellranger_mtx_to_h5ad.py \\ -m filtered_feature_bc_matrix \\ + -s ${meta.id} \\ -o ${meta.id}_matrix.h5ad """ @@ -28,6 +29,7 @@ process MTX_TO_H5AD { """ # convert file types mtx_to_h5ad.py \\ + -s ${meta.id} \\ -m *_kallistobustools_count/counts_unfiltered/*.mtx \\ -b *_kallistobustools_count/counts_unfiltered/*.barcodes.txt \\ -f *_kallistobustools_count/counts_unfiltered/*.genes.txt \\ @@ -38,6 +40,7 @@ process MTX_TO_H5AD { """ # convert file types mtx_to_h5ad.py \\ + -s ${meta.id} \\ -m *_alevin_results/alevin/quants_mat.mtx.gz \\ -b *_alevin_results/alevin/quants_mat_rows.txt \\ -f *_alevin_results/alevin/quants_mat_cols.txt \\ From 8d2cfa1febcc96fcc2393988ae1b82effa93dbf1 Mon Sep 17 00:00:00 2001 From: Felipe Marques de Almeida Date: Thu, 23 Jun 2022 08:12:53 +0200 Subject: [PATCH 042/165] suffix not hard-coded in py script --- bin/concat_h5ad.py | 7 ++++--- modules/local/concat_h5ad.nf | 5 ++++- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/bin/concat_h5ad.py b/bin/concat_h5ad.py index 57a1969e..d6c197ca 100755 --- a/bin/concat_h5ad.py +++ b/bin/concat_h5ad.py @@ -26,8 +26,9 @@ def concat_h5ad(adatas): parser = argparse.ArgumentParser(description="Concatenates h5ad files and merge metadata from samplesheet") - parser.add_argument("-i", "--input", dest="input", help="Path to samplesheet.csv") - parser.add_argument("-o", "--out", dest="out", help="Output path.") + parser.add_argument("-i", "--input", dest="input", help="Path to samplesheet.csv") + parser.add_argument("-o", "--out", dest="out", help="Output path.") + parser.add_argument("-s", "--suffix", dest="suffix", help="Suffix of matrices to remove and get sample name") args = vars(parser.parse_args()) @@ -36,7 +37,7 @@ def concat_h5ad(adatas): # find all h5ad and append to dict dict_of_h5ad = { - str(path).replace("_matrix.h5ad", ""): sc.read_h5ad(path) + str(path).replace(args["suffix"], ""): sc.read_h5ad(path) for path in Path(".").rglob('*.h5ad') } diff --git a/modules/local/concat_h5ad.nf b/modules/local/concat_h5ad.nf index 616e16d8..a201d9dd 100644 --- a/modules/local/concat_h5ad.nf +++ b/modules/local/concat_h5ad.nf @@ -15,7 +15,10 @@ process CONCAT_H5AD { script: """ - concat_h5ad.py -i $samplesheet -o combined_matrix.h5ad + concat_h5ad.py \\ + -i $samplesheet \\ + -o combined_matrix.h5ad \\ + -s "_matrix.h5ad" """ stub: From b1a5f1de518ce9d729e71af61f8515a656ff572d Mon Sep 17 00:00:00 2001 From: Felipe Marques de Almeida Date: Thu, 23 Jun 2022 11:11:14 +0200 Subject: [PATCH 043/165] Update bin/concat_h5ad.py Not having one line command as separate function Co-authored-by: Isaac Virshup --- bin/concat_h5ad.py | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/bin/concat_h5ad.py b/bin/concat_h5ad.py index d6c197ca..bb5ac2bf 100755 --- a/bin/concat_h5ad.py +++ b/bin/concat_h5ad.py @@ -15,13 +15,6 @@ def read_samplesheet(samplesheet): # return return(df) -# combine and write, just a simple concatenation. -def concat_h5ad(adatas): - combined = ad.concat(adatas, label="sample", merge="unique") - - # return - return(combined) - if __name__ == "__main__": parser = argparse.ArgumentParser(description="Concatenates h5ad files and merge metadata from samplesheet") @@ -42,7 +35,7 @@ def concat_h5ad(adatas): } # concat h5ad files - adata = concat_h5ad(dict_of_h5ad) + adata = ad.concat(dict_of_h5ad, label="sample", merge="unique") # merge with data.frame, on sample information adata.obs = adata.obs.join(df_samplesheet, on="sample") From 7c20c5bebf60fc2688e1b652345f3a9afd0b7feb Mon Sep 17 00:00:00 2001 From: Felipe Marques de Almeida Date: Thu, 23 Jun 2022 12:33:12 +0200 Subject: [PATCH 044/165] Update bin/concat_h5ad.py Co-authored-by: Gregor Sturm --- bin/concat_h5ad.py | 1 - 1 file changed, 1 deletion(-) diff --git a/bin/concat_h5ad.py b/bin/concat_h5ad.py index bb5ac2bf..1c6290e3 100755 --- a/bin/concat_h5ad.py +++ b/bin/concat_h5ad.py @@ -12,7 +12,6 @@ def read_samplesheet(samplesheet): # only keep unique values using set() df = df.groupby(["sample"]).agg(lambda column: ",".join(set(column))) - # return return(df) if __name__ == "__main__": From acbfba455cfab01f343cee1ea1ab65c5cd96eee2 Mon Sep 17 00:00:00 2001 From: Felipe Marques de Almeida Date: Thu, 23 Jun 2022 12:44:08 +0200 Subject: [PATCH 045/165] updated nf-core/cellranger module --- modules.json | 2 +- modules/nf-core/modules/cellranger/count/main.nf | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/modules.json b/modules.json index b0f8bb72..96dba9a3 100644 --- a/modules.json +++ b/modules.json @@ -4,7 +4,7 @@ "repos": { "nf-core/modules": { "cellranger/count": { - "git_sha": "8032833e6e0fd3edeb20d4121885112249f273c4" + "git_sha": "e1a3ae6bf5afce6b23f580e3225e7bdbe1388cd0" }, "cellranger/mkgtf": { "git_sha": "e499abe572bc7c1322dfa66b767b0e65fe8c62ca" diff --git a/modules/nf-core/modules/cellranger/count/main.nf b/modules/nf-core/modules/cellranger/count/main.nf index 7413c990..84e2d921 100644 --- a/modules/nf-core/modules/cellranger/count/main.nf +++ b/modules/nf-core/modules/cellranger/count/main.nf @@ -12,8 +12,8 @@ process CELLRANGER_COUNT { path reference output: - path("sample-${meta.gem}/outs/*"), emit: outs - path "versions.yml" , emit: versions + tuple val(meta), path("sample-${meta.gem}/outs/*"), emit: outs + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when From 2edec15de7c6cb552728e27e974df9a31b8283bf Mon Sep 17 00:00:00 2001 From: Felipe Marques de Almeida Date: Thu, 23 Jun 2022 12:59:02 +0200 Subject: [PATCH 046/165] cellranger directly using meta map from channel --- conf/modules.config | 2 +- subworkflows/local/align_cellranger.nf | 14 -------------- workflows/scrnaseq.nf | 2 +- 3 files changed, 2 insertions(+), 16 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index eb61cd75..c0fcb25b 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -57,7 +57,7 @@ if(params.aligner == "cellranger") { } withName: MTX_TO_H5AD { publishDir = [ - path: { "${params.outdir}/cellranger/count/${meta.cellranger_prefix}/outs/filtered_feature_bc_matrix" }, + path: { "${params.outdir}/cellranger/count/sample-${meta.id}/outs/filtered_feature_bc_matrix" }, mode: params.publish_dir_mode ] } diff --git a/subworkflows/local/align_cellranger.nf b/subworkflows/local/align_cellranger.nf index cda067ff..744215e0 100644 --- a/subworkflows/local/align_cellranger.nf +++ b/subworkflows/local/align_cellranger.nf @@ -41,21 +41,7 @@ workflow CELLRANGER_ALIGN { ) ch_versions = ch_versions.mix(CELLRANGER_COUNT.out.versions) - // rebuild out channel to be in compliance with what is required for h5ad conversion modules - // out channel comes without meta map from nf-core module - ch_count_outputs_rebuilt = CELLRANGER_COUNT.out.outs.map{ inputs -> - meta = [:] - - // in stub-run variable is string and not an array - if (workflow.stubRun) { meta.cellranger_prefix = [inputs][0].toString().tokenize('/')[-3] } - else { meta.cellranger_prefix = inputs[0].toString().tokenize('/')[-3] } - meta.id = meta.cellranger_prefix.tokenize('-')[1] - - [ meta, inputs ] - } - emit: ch_versions cellranger_out = CELLRANGER_COUNT.out.outs - cellranger_out_rebuilt = ch_count_outputs_rebuilt } diff --git a/workflows/scrnaseq.nf b/workflows/scrnaseq.nf index ef55aa45..3c014ff5 100644 --- a/workflows/scrnaseq.nf +++ b/workflows/scrnaseq.nf @@ -165,7 +165,7 @@ workflow SCRNASEQ { ch_fastq ) ch_versions = ch_versions.mix(CELLRANGER_ALIGN.out.ch_versions) - ch_mtx_matrices = ch_mtx_matrices.mix(CELLRANGER_ALIGN.out.cellranger_out_rebuilt) + ch_mtx_matrices = ch_mtx_matrices.mix(CELLRANGER_ALIGN.out.cellranger_out) } // Run mtx to h5ad conversion subworkflow From 9ca1ee7a4557562a4e5f3e7c75a991781a149fd7 Mon Sep 17 00:00:00 2001 From: Felipe Marques de Almeida Date: Thu, 23 Jun 2022 13:03:02 +0200 Subject: [PATCH 047/165] updating to long version parameters --- modules/local/concat_h5ad.nf | 6 +++--- modules/local/mtx_to_h5ad.nf | 26 +++++++++++++------------- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/modules/local/concat_h5ad.nf b/modules/local/concat_h5ad.nf index a201d9dd..3bcf1755 100644 --- a/modules/local/concat_h5ad.nf +++ b/modules/local/concat_h5ad.nf @@ -16,9 +16,9 @@ process CONCAT_H5AD { script: """ concat_h5ad.py \\ - -i $samplesheet \\ - -o combined_matrix.h5ad \\ - -s "_matrix.h5ad" + --input $samplesheet \\ + --out combined_matrix.h5ad \\ + --suffix "_matrix.h5ad" """ stub: diff --git a/modules/local/mtx_to_h5ad.nf b/modules/local/mtx_to_h5ad.nf index 37321bf4..57f9c8e3 100644 --- a/modules/local/mtx_to_h5ad.nf +++ b/modules/local/mtx_to_h5ad.nf @@ -20,31 +20,31 @@ process MTX_TO_H5AD { """ # convert file types cellranger_mtx_to_h5ad.py \\ - -m filtered_feature_bc_matrix \\ - -s ${meta.id} \\ - -o ${meta.id}_matrix.h5ad + --mtx filtered_feature_bc_matrix \\ + --sample ${meta.id} \\ + --out ${meta.id}_matrix.h5ad """ else if (params.aligner == 'kallisto') """ # convert file types mtx_to_h5ad.py \\ - -s ${meta.id} \\ - -m *_kallistobustools_count/counts_unfiltered/*.mtx \\ - -b *_kallistobustools_count/counts_unfiltered/*.barcodes.txt \\ - -f *_kallistobustools_count/counts_unfiltered/*.genes.txt \\ - -o ${meta.id}_matrix.h5ad + --sample ${meta.id} \\ + --mtx *_kallistobustools_count/counts_unfiltered/*.mtx \\ + --barcode *_kallistobustools_count/counts_unfiltered/*.barcodes.txt \\ + --feature *_kallistobustools_count/counts_unfiltered/*.genes.txt \\ + --out ${meta.id}_matrix.h5ad """ else if (params.aligner == 'alevin') """ # convert file types mtx_to_h5ad.py \\ - -s ${meta.id} \\ - -m *_alevin_results/alevin/quants_mat.mtx.gz \\ - -b *_alevin_results/alevin/quants_mat_rows.txt \\ - -f *_alevin_results/alevin/quants_mat_cols.txt \\ - -o ${meta.id}_matrix.h5ad + --sample ${meta.id} \\ + --mtx *_alevin_results/alevin/quants_mat.mtx.gz \\ + --barcode *_alevin_results/alevin/quants_mat_rows.txt \\ + --feature *_alevin_results/alevin/quants_mat_cols.txt \\ + --out ${meta.id}_matrix.h5ad """ stub: From b1a2f2c07feec58b73cf565bed59c70a906bc37f Mon Sep 17 00:00:00 2001 From: Felipe Marques de Almeida Date: Fri, 24 Jun 2022 08:19:06 +0200 Subject: [PATCH 048/165] Updating MTX conversion code (#4) * conversion to H5AD is now a subworkflow * Added concatenation of h5ad to alevin pipeline * added concatenation module for kallisto * added concatenation module for cellranger * Simplify function to find and store h5ad files * added sample information and metadata * suffix not hard-coded in py script * Update bin/concat_h5ad.py Not having one line command as separate function Co-authored-by: Isaac Virshup * Update bin/concat_h5ad.py Co-authored-by: Gregor Sturm * updated nf-core/cellranger module * cellranger directly using meta map from channel * updating to long version parameters Co-authored-by: Isaac Virshup Co-authored-by: Gregor Sturm --- bin/cellranger_mtx_to_h5ad.py | 8 ++-- bin/concat_h5ad.py | 43 +++++++++++++++++++ bin/mtx_to_h5ad.py | 9 ++-- conf/modules.config | 20 ++++++++- modules.json | 2 +- modules/local/concat_h5ad.nf | 28 ++++++++++++ modules/local/mtx_to_h5ad.nf | 25 ++++++----- .../nf-core/modules/cellranger/count/main.nf | 4 +- subworkflows/local/alevin.nf | 12 +----- subworkflows/local/align_cellranger.nf | 13 ------ subworkflows/local/conversion_to_h5ad.nf | 27 ++++++++++++ subworkflows/local/kallisto_bustools.nf | 7 --- workflows/scrnaseq.nf | 13 +++++- 13 files changed, 157 insertions(+), 54 deletions(-) create mode 100755 bin/concat_h5ad.py create mode 100644 modules/local/concat_h5ad.nf create mode 100644 subworkflows/local/conversion_to_h5ad.nf diff --git a/bin/cellranger_mtx_to_h5ad.py b/bin/cellranger_mtx_to_h5ad.py index 88c189d5..40e365b6 100755 --- a/bin/cellranger_mtx_to_h5ad.py +++ b/bin/cellranger_mtx_to_h5ad.py @@ -1,13 +1,14 @@ -#!/usr/bin/env python3 +#!/usr/bin/env python import scanpy as sc import argparse -def mtx_to_adata( mtx_dir: str, verbose: bool = False ): +def mtx_to_adata( mtx_dir: str, sample: str, verbose: bool = False ): if verbose: print("Reading in {}".format(mtx_dir)) adata = sc.read_10x_mtx(mtx_dir) + adata.obs["sample"] = sample return adata @@ -18,11 +19,12 @@ def mtx_to_adata( mtx_dir: str, verbose: bool = False ): parser.add_argument("-m", "--mtx", dest="mtx", help="Path to mtx directory." ) parser.add_argument("-v", "--verbose", dest="verbose", help="Toggle verbose messages", default=False ) + parser.add_argument("-s", "--sample", dest="sample", help="Sample name" ) parser.add_argument("-o", "--out", dest="out", help="Output path." ) args = vars(parser.parse_args()) - adata = mtx_to_adata(args["mtx"], verbose=args["verbose"]) + adata = mtx_to_adata(args["mtx"], args["sample"], verbose=args["verbose"]) adata.write_h5ad(args["out"], compression="gzip") diff --git a/bin/concat_h5ad.py b/bin/concat_h5ad.py new file mode 100755 index 00000000..1c6290e3 --- /dev/null +++ b/bin/concat_h5ad.py @@ -0,0 +1,43 @@ +#!/usr/bin/env python +import scanpy as sc, anndata as ad, pandas as pd +from pathlib import Path +import argparse + +def read_samplesheet(samplesheet): + df = pd.read_csv(samplesheet) + df.set_index("sample") + + # samplesheet may contain replicates, when it has, + # group information from replicates and collapse with commas + # only keep unique values using set() + df = df.groupby(["sample"]).agg(lambda column: ",".join(set(column))) + + return(df) + +if __name__ == "__main__": + + parser = argparse.ArgumentParser(description="Concatenates h5ad files and merge metadata from samplesheet") + + parser.add_argument("-i", "--input", dest="input", help="Path to samplesheet.csv") + parser.add_argument("-o", "--out", dest="out", help="Output path.") + parser.add_argument("-s", "--suffix", dest="suffix", help="Suffix of matrices to remove and get sample name") + + args = vars(parser.parse_args()) + + # Open samplesheet as dataframe + df_samplesheet = read_samplesheet(args["input"]) + + # find all h5ad and append to dict + dict_of_h5ad = { + str(path).replace(args["suffix"], ""): sc.read_h5ad(path) + for path in Path(".").rglob('*.h5ad') + } + + # concat h5ad files + adata = ad.concat(dict_of_h5ad, label="sample", merge="unique") + + # merge with data.frame, on sample information + adata.obs = adata.obs.join(df_samplesheet, on="sample") + adata.write_h5ad(args["out"], compression="gzip") + + print("Wrote h5ad file to {}".format(args["out"])) \ No newline at end of file diff --git a/bin/mtx_to_h5ad.py b/bin/mtx_to_h5ad.py index eaf91cd0..78116f58 100755 --- a/bin/mtx_to_h5ad.py +++ b/bin/mtx_to_h5ad.py @@ -1,14 +1,11 @@ #!/usr/bin/env python -import sys -import os import scanpy as sc import pandas as pd -import typing import argparse def mtx_to_adata( - mtx_file: str, barcode_file: str, feature_file: str, verbose: bool = False + mtx_file: str, barcode_file: str, feature_file: str, sample: str, verbose: bool = False ): if verbose: @@ -17,6 +14,7 @@ def mtx_to_adata( adata = sc.read_mtx(mtx_file) adata.obs_names = pd.read_csv(barcode_file, header=None)[0].values adata.var_names = pd.read_csv(feature_file, header=None)[0].values + adata.obs["sample"] = sample return adata @@ -31,12 +29,13 @@ def mtx_to_adata( ) parser.add_argument("-f", "--feature", dest="feature", help="Path to feature file.") parser.add_argument("-b", "--barcode", dest="barcode", help="Path to barcode file.") + parser.add_argument("-s", "--sample", dest="sample", help="Sample name") parser.add_argument("-o", "--out", dest="out", help="Output path.") args = vars(parser.parse_args()) adata = mtx_to_adata( - args["mtx"], args["barcode"], args["feature"], verbose=args["verbose"] + args["mtx"], args["barcode"], args["feature"], args["sample"], verbose=args["verbose"] ) adata.write_h5ad(args["out"], compression="gzip") diff --git a/conf/modules.config b/conf/modules.config index 1f891688..c0fcb25b 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -57,7 +57,13 @@ if(params.aligner == "cellranger") { } withName: MTX_TO_H5AD { publishDir = [ - path: { "${params.outdir}/cellranger/count/${meta.cellranger_prefix}/outs/filtered_feature_bc_matrix" }, + path: { "${params.outdir}/cellranger/count/sample-${meta.id}/outs/filtered_feature_bc_matrix" }, + mode: params.publish_dir_mode + ] + } + withName: CONCAT_H5AD { + publishDir = [ + path: { "${params.outdir}/cellranger/count/concatenated_h5ad" }, mode: params.publish_dir_mode ] } @@ -76,6 +82,12 @@ if (params.aligner == "alevin") { mode: params.publish_dir_mode ] } + withName: CONCAT_H5AD { + publishDir = [ + path: { "${params.outdir}/salmon/concatenated_h5ad" }, + mode: params.publish_dir_mode + ] + } } } @@ -95,5 +107,11 @@ if (params.aligner == "kallisto") { mode: params.publish_dir_mode ] } + withName: CONCAT_H5AD { + publishDir = [ + path: { "${params.outdir}/kallistobustools/concatenated_h5ad" }, + mode: params.publish_dir_mode + ] + } } } diff --git a/modules.json b/modules.json index b0f8bb72..96dba9a3 100644 --- a/modules.json +++ b/modules.json @@ -4,7 +4,7 @@ "repos": { "nf-core/modules": { "cellranger/count": { - "git_sha": "8032833e6e0fd3edeb20d4121885112249f273c4" + "git_sha": "e1a3ae6bf5afce6b23f580e3225e7bdbe1388cd0" }, "cellranger/mkgtf": { "git_sha": "e499abe572bc7c1322dfa66b767b0e65fe8c62ca" diff --git a/modules/local/concat_h5ad.nf b/modules/local/concat_h5ad.nf new file mode 100644 index 00000000..3bcf1755 --- /dev/null +++ b/modules/local/concat_h5ad.nf @@ -0,0 +1,28 @@ +process CONCAT_H5AD { + label 'process_medium' + + conda (params.enable_conda ? "conda-forge::scanpy conda-forge::python-igraph conda-forge::leidenalg" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/scanpy:1.7.2--pyhdfd78af_0' : + 'quay.io/biocontainers/scanpy:1.7.2--pyhdfd78af_0' }" + + input: + path h5ad + path samplesheet + + output: + path "*.h5ad", emit: h5ad + + script: + """ + concat_h5ad.py \\ + --input $samplesheet \\ + --out combined_matrix.h5ad \\ + --suffix "_matrix.h5ad" + """ + + stub: + """ + touch combined_matrix.h5ad + """ +} diff --git a/modules/local/mtx_to_h5ad.nf b/modules/local/mtx_to_h5ad.nf index 45d86e9f..3feca494 100644 --- a/modules/local/mtx_to_h5ad.nf +++ b/modules/local/mtx_to_h5ad.nf @@ -20,28 +20,31 @@ process MTX_TO_H5AD { """ # convert file types cellranger_mtx_to_h5ad.py \\ - -m filtered_feature_bc_matrix \\ - -o matrix.h5ad + --mtx filtered_feature_bc_matrix \\ + --sample ${meta.id} \\ + --out ${meta.id}_matrix.h5ad """ else if (params.aligner == 'kallisto') """ # convert file types mtx_to_h5ad.py \\ - -m *_kallistobustools_count/counts_unfiltered/*.mtx \\ - -b *_kallistobustools_count/counts_unfiltered/*.barcodes.txt \\ - -f *_kallistobustools_count/counts_unfiltered/*.genes.txt \\ - -o cells_x_genes.h5ad + --sample ${meta.id} \\ + --mtx *_kallistobustools_count/counts_unfiltered/*.mtx \\ + --barcode *_kallistobustools_count/counts_unfiltered/*.barcodes.txt \\ + --feature *_kallistobustools_count/counts_unfiltered/*.genes.txt \\ + --out ${meta.id}_matrix.h5ad """ else if (params.aligner == 'alevin') """ # convert file types mtx_to_h5ad.py \\ - -m *_alevin_results/alevin/quants_mat.mtx.gz \\ - -b *_alevin_results/alevin/quants_mat_rows.txt \\ - -f *_alevin_results/alevin/quants_mat_cols.txt \\ - -o quants_mat.h5ad + --sample ${meta.id} \\ + --mtx *_alevin_results/alevin/quants_mat.mtx.gz \\ + --barcode *_alevin_results/alevin/quants_mat_rows.txt \\ + --feature *_alevin_results/alevin/quants_mat_cols.txt \\ + --out ${meta.id}_matrix.h5ad """ else if (params.aligner == 'kallisto') @@ -70,6 +73,6 @@ process MTX_TO_H5AD { stub: """ - touch matrix.h5ad + touch ${meta.id}_matrix.h5ad """ } diff --git a/modules/nf-core/modules/cellranger/count/main.nf b/modules/nf-core/modules/cellranger/count/main.nf index 7413c990..84e2d921 100644 --- a/modules/nf-core/modules/cellranger/count/main.nf +++ b/modules/nf-core/modules/cellranger/count/main.nf @@ -12,8 +12,8 @@ process CELLRANGER_COUNT { path reference output: - path("sample-${meta.gem}/outs/*"), emit: outs - path "versions.yml" , emit: versions + tuple val(meta), path("sample-${meta.gem}/outs/*"), emit: outs + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when diff --git a/subworkflows/local/alevin.nf b/subworkflows/local/alevin.nf index abaa6efc..c1b122e1 100644 --- a/subworkflows/local/alevin.nf +++ b/subworkflows/local/alevin.nf @@ -1,8 +1,7 @@ /* -- IMPORT LOCAL MODULES/SUBWORKFLOWS -- */ include { GFFREAD_TRANSCRIPTOME } from '../../modules/local/gffread_transcriptome' -include { SALMON_ALEVIN } from '../../modules/local/salmon_alevin' -include { ALEVINQC } from '../../modules/local/alevinqc' -include {MTX_TO_H5AD } from '../../modules/local/mtx_to_h5ad.nf' +include { SALMON_ALEVIN } from '../../modules/local/salmon_alevin' +include { ALEVINQC } from '../../modules/local/alevinqc' /* -- IMPORT NF-CORE MODULES/SUBWORKFLOWS -- */ include { GUNZIP } from '../../modules/nf-core/modules/gunzip/main' @@ -73,13 +72,6 @@ workflow SCRNASEQ_ALEVIN { ) ch_versions = ch_versions.mix(SALMON_ALEVIN.out.versions) - /* - * Convert matrix do h5ad - */ - MTX_TO_H5AD ( - SALMON_ALEVIN.out.alevin_results - ) - /* * Run alevinQC */ diff --git a/subworkflows/local/align_cellranger.nf b/subworkflows/local/align_cellranger.nf index cd22c42b..744215e0 100644 --- a/subworkflows/local/align_cellranger.nf +++ b/subworkflows/local/align_cellranger.nf @@ -41,19 +41,6 @@ workflow CELLRANGER_ALIGN { ) ch_versions = ch_versions.mix(CELLRANGER_COUNT.out.versions) - // Convert matrix do h5ad - MTX_TO_H5AD ( - CELLRANGER_COUNT.out.outs.map{ inputs -> - meta = [:] - // in stub-run variable is string and not an array - if (workflow.stubRun) { meta.cellranger_prefix = [inputs][0].toString().tokenize('/')[-3] } - else { meta.cellranger_prefix = inputs[0].toString().tokenize('/')[-3] } - meta.id = meta.cellranger_prefix.tokenize('-')[1] - - [ meta, inputs ] - } - ) - emit: ch_versions cellranger_out = CELLRANGER_COUNT.out.outs diff --git a/subworkflows/local/conversion_to_h5ad.nf b/subworkflows/local/conversion_to_h5ad.nf new file mode 100644 index 00000000..67b100d5 --- /dev/null +++ b/subworkflows/local/conversion_to_h5ad.nf @@ -0,0 +1,27 @@ +/* -- IMPORT LOCAL MODULES/SUBWORKFLOWS -- */ +include { MTX_TO_H5AD } from '../../modules/local/mtx_to_h5ad.nf' +include { CONCAT_H5AD } from '../../modules/local/concat_h5ad.nf' + +workflow H5AD_CONVERSION { + + take: + mtx_matrices + samplesheet + + main: + // + // Convert matrix do h5ad + // + MTX_TO_H5AD ( + mtx_matrices + ) + + // + // Concat sample-specific h5ad in one + // + CONCAT_H5AD ( + MTX_TO_H5AD.out.h5ad.collect(), // gather all sample-specific files + samplesheet + ) + +} diff --git a/subworkflows/local/kallisto_bustools.nf b/subworkflows/local/kallisto_bustools.nf index 3b8cd968..204852da 100644 --- a/subworkflows/local/kallisto_bustools.nf +++ b/subworkflows/local/kallisto_bustools.nf @@ -1,7 +1,6 @@ /* -- IMPORT LOCAL MODULES/SUBWORKFLOWS -- */ include { GENE_MAP } from '../../modules/local/gene_map' include { KALLISTOBUSTOOLS_COUNT } from '../../modules/local/kallistobustools_count' -include {MTX_TO_H5AD } from '../../modules/local/mtx_to_h5ad.nf' /* -- IMPORT NF-CORE MODULES/SUBWORKFLOWS -- */ include { GUNZIP } from '../../modules/nf-core/modules/gunzip/main' @@ -65,12 +64,6 @@ workflow KALLISTO_BUSTOOLS { ) ch_versions = ch_versions.mix(KALLISTOBUSTOOLS_COUNT.out.versions) - // Convert matrix do h5ad - MTX_TO_H5AD ( - KALLISTOBUSTOOLS_COUNT.out.counts - ) - - emit: ch_versions counts = KALLISTOBUSTOOLS_COUNT.out.counts diff --git a/workflows/scrnaseq.nf b/workflows/scrnaseq.nf index 3b54ced9..3c014ff5 100644 --- a/workflows/scrnaseq.nf +++ b/workflows/scrnaseq.nf @@ -40,6 +40,7 @@ include { KALLISTO_BUSTOOLS } from '../subworkflows/local/kallisto_bustools' include { SCRNASEQ_ALEVIN } from '../subworkflows/local/alevin' include { STARSOLO } from '../subworkflows/local/starsolo' include { CELLRANGER_ALIGN } from "../subworkflows/local/align_cellranger" +include { H5AD_CONVERSION } from "../subworkflows/local/conversion_to_h5ad" /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -98,7 +99,8 @@ ch_cellranger_index = params.cellranger_index ? file(params.cellranger_index) : workflow SCRNASEQ { - ch_versions = Channel.empty() + ch_versions = Channel.empty() + ch_mtx_matrices = Channel.empty() // Check input files and stage input data ch_fastq = INPUT_CHECK( ch_input ).reads @@ -118,6 +120,7 @@ workflow SCRNASEQ { ch_fastq ) ch_versions = ch_versions.mix(KALLISTO_BUSTOOLS.out.ch_versions) + ch_mtx_matrices = ch_mtx_matrices.mix(KALLISTO_BUSTOOLS.out.counts) } // Run salmon alevin pipeline @@ -135,6 +138,7 @@ workflow SCRNASEQ { ) ch_versions = ch_versions.mix(SCRNASEQ_ALEVIN.out.ch_versions) ch_multiqc_alevin = SCRNASEQ_ALEVIN.out.for_multiqc + ch_mtx_matrices = ch_mtx_matrices.mix(SCRNASEQ_ALEVIN.out.alevin_results) } // Run STARSolo pipeline @@ -161,8 +165,15 @@ workflow SCRNASEQ { ch_fastq ) ch_versions = ch_versions.mix(CELLRANGER_ALIGN.out.ch_versions) + ch_mtx_matrices = ch_mtx_matrices.mix(CELLRANGER_ALIGN.out.cellranger_out) } + // Run mtx to h5ad conversion subworkflow + H5AD_CONVERSION ( + ch_mtx_matrices, + ch_input + ) + // collect software versions CUSTOM_DUMPSOFTWAREVERSIONS ( ch_versions.unique().collectFile(name: 'collated_versions.yml') From 81c766b3abff5c927f2aad6c2a2369c8cca6b1c4 Mon Sep 17 00:00:00 2001 From: Felipe Marques de Almeida Date: Fri, 24 Jun 2022 09:57:33 +0200 Subject: [PATCH 049/165] fixed singularity image --- modules/local/kallistobustools_count.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/local/kallistobustools_count.nf b/modules/local/kallistobustools_count.nf index 382de329..7f3dabff 100644 --- a/modules/local/kallistobustools_count.nf +++ b/modules/local/kallistobustools_count.nf @@ -4,7 +4,7 @@ process KALLISTOBUSTOOLS_COUNT { conda (params.enable_conda ? "bioconda::kb-python=0.25.1" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/scanpy-scripts:1.1.6--pypyhdfd78af_0' : + 'https://depot.galaxyproject.org/singularity/kb-python:0.25.1--py_0' : 'quay.io/biocontainers/kb-python:0.25.1--py_0' }" input: From acde6e32d14673dcc11aa854d569c6e3e8a27eca Mon Sep 17 00:00:00 2001 From: Felipe Marques de Almeida Date: Fri, 24 Jun 2022 11:05:56 +0200 Subject: [PATCH 050/165] solved existing conflicts --- modules/local/mtx_to_h5ad.nf | 24 ------------------------ 1 file changed, 24 deletions(-) diff --git a/modules/local/mtx_to_h5ad.nf b/modules/local/mtx_to_h5ad.nf index 3feca494..57f9c8e3 100644 --- a/modules/local/mtx_to_h5ad.nf +++ b/modules/local/mtx_to_h5ad.nf @@ -47,30 +47,6 @@ process MTX_TO_H5AD { --out ${meta.id}_matrix.h5ad """ - else if (params.aligner == 'kallisto') - """ - # convert file types - mtx_to_h5ad.py \\ - -m *_kallistobustools_count/counts_unfiltered/*.mtx \\ - -b *_kallistobustools_count/counts_unfiltered/*.barcodes.txt \\ - -f *_kallistobustools_count/counts_unfiltered/*.genes.txt \\ - -o cells_x_genes.h5ad - - gzip -c cells_x_genes.h5ad > cells_x_genes.h5ad.gz - """ - - else if (params.aligner == 'alevin') - """ - # convert file types - mtx_to_h5ad.py \\ - -m *_alevin_results/alevin/quants_mat.mtx.gz \\ - -b *_alevin_results/alevin/quants_mat_rows.txt \\ - -f *_alevin_results/alevin/quants_mat_cols.txt \\ - -o quants_mat.h5ad - - gzip -c quants_mat.h5ad > quants_mat.h5ad.gz - """ - stub: """ touch ${meta.id}_matrix.h5ad From e03d606686089f412344dc4e9170db0b9858f6c9 Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Fri, 24 Jun 2022 10:52:30 +0100 Subject: [PATCH 051/165] Update nextflow_schema.json --- nextflow_schema.json | 1 + 1 file changed, 1 insertion(+) diff --git a/nextflow_schema.json b/nextflow_schema.json index db3b679f..1d9b1ffb 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -14,6 +14,7 @@ "properties": { "input": { "type": "string", + "mimetype": "text/csv", "fa_icon": "fas fa-dna", "description": "Input FastQ files", "help_text": "Use this to specify the location of your input FastQ files. For example:\n\n```bash\n--input 'path/to/data/sample_*_{1,2}.fastq'\n```\n\nPlease note the following requirements:\n\n1. The path must be enclosed in quotes\n2. The path must have at least one `*` wildcard character\n3. When using the pipeline with paired end data, the path must use `{1,2}` notation to specify read pairs.\n\nIf left unspecified, a default pattern is used: `data/*{1,2}.fastq.gz`" From ef06fd882b22dc419374868707fcca6c94a02943 Mon Sep 17 00:00:00 2001 From: Felipe Marques de Almeida Date: Mon, 27 Jun 2022 13:05:46 +0200 Subject: [PATCH 052/165] added function to convert mtx to Seurat objects --- bin/mtx_to_seurat.R | 25 ++++++++++ conf/modules.config | 6 +-- modules/local/mtx_to_seurat.nf | 46 +++++++++++++++++++ ...onversion_to_h5ad.nf => mtx_conversion.nf} | 14 ++++-- workflows/scrnaseq.nf | 4 +- 5 files changed, 87 insertions(+), 8 deletions(-) create mode 100755 bin/mtx_to_seurat.R create mode 100644 modules/local/mtx_to_seurat.nf rename subworkflows/local/{conversion_to_h5ad.nf => mtx_conversion.nf} (51%) diff --git a/bin/mtx_to_seurat.R b/bin/mtx_to_seurat.R new file mode 100755 index 00000000..b221835d --- /dev/null +++ b/bin/mtx_to_seurat.R @@ -0,0 +1,25 @@ +#!/usr/bin/env Rscript +library(Seurat) + +args <- commandArgs(trailingOnly=TRUE) + +mtx_file <- args[1] +barcode_file <- args[2] +feature_file <- args[3] +out.file <- args[4] +aligner <- args[5] + +if(aligner %in% c("kallisto", "alevin")) { + # for kallisto and alevin, the features file contains only one column and matrix needs to be transposed + expression.matrix <- ReadMtx( + mtx = mtx_file, features = feature_file, cells = barcode_file, feature.column = 1, mtx.transpose = TRUE + ) +} else { + expression.matrix <- ReadMtx( + mtx = mtx_file, features = feature_file, cells = barcode_file + ) +} + +seurat.object <- CreateSeuratObject(counts = expression.matrix) + +saveRDS(seurat.object, file = out.file) diff --git a/conf/modules.config b/conf/modules.config index c0fcb25b..1ddec147 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -55,7 +55,7 @@ if(params.aligner == "cellranger") { mode: params.publish_dir_mode ] } - withName: MTX_TO_H5AD { + withName: 'MTX_TO_H5AD|MTX_TO_SEURAT' { publishDir = [ path: { "${params.outdir}/cellranger/count/sample-${meta.id}/outs/filtered_feature_bc_matrix" }, mode: params.publish_dir_mode @@ -76,7 +76,7 @@ if (params.aligner == "alevin") { ext.args = "--table transcript_id,gene_id" ext.prefix = { "${gff.baseName}_gffread" } } - withName: MTX_TO_H5AD { + withName: 'MTX_TO_H5AD|MTX_TO_SEURAT' { publishDir = [ path: { "${params.outdir}/salmon/${meta.id}_alevin_results/alevin" }, mode: params.publish_dir_mode @@ -101,7 +101,7 @@ if (params.aligner == "star") { if (params.aligner == "kallisto") { process { - withName: MTX_TO_H5AD { + withName: 'MTX_TO_H5AD|MTX_TO_SEURAT' { publishDir = [ path: { "${params.outdir}/kallistobustools/${meta.id}_kallistobustools_count/counts_unfiltered" }, mode: params.publish_dir_mode diff --git a/modules/local/mtx_to_seurat.nf b/modules/local/mtx_to_seurat.nf new file mode 100644 index 00000000..8269ce2b --- /dev/null +++ b/modules/local/mtx_to_seurat.nf @@ -0,0 +1,46 @@ +process MTX_TO_SEURAT { + tag "$meta.id" + label 'process_medium' + + conda (params.enable_conda ? "seurat-scripts" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'docker://satijalab/seurat:4.1.0' : + 'satijalab/seurat:4.1.0' }" + + input: + // inputs from cellranger nf-core module does not come in a single sample dir + // for each sample, the sub-folders and files come directly in array. + tuple val(meta), path(inputs) + + output: + path "*.seurat", emit: h5ad + + script: + def aligner = params.aligner + if (params.aligner == "cellranger") { + matrix = "filtered_feature_bc_matrix/matrix.mtx.gz" + barcodes = "filtered_feature_bc_matrix/barcodes.tsv.gz" + features = "filtered_feature_bc_matrix/features.tsv.gz" + } else if (params.aligner == "kallisto") { + matrix = "*_kallistobustools_count/counts_unfiltered/*.mtx" + barcodes = "*_kallistobustools_count/counts_unfiltered/*.barcodes.txt" + features = "*_kallistobustools_count/counts_unfiltered/*.genes.txt" + } else if (params.aligner == "alevin") { + matrix = "*_alevin_results/alevin/quants_mat.mtx.gz" + barcodes = "*_alevin_results/alevin/quants_mat_rows.txt" + features = "*_alevin_results/alevin/quants_mat_cols.txt" + } + """ + mtx_to_seurat.R \\ + $matrix \\ + $barcodes \\ + $features \\ + ${meta.id}_matrix.seurat \\ + ${aligner} + """ + + stub: + """ + touch ${meta.id}_matrix.h5ad + """ +} diff --git a/subworkflows/local/conversion_to_h5ad.nf b/subworkflows/local/mtx_conversion.nf similarity index 51% rename from subworkflows/local/conversion_to_h5ad.nf rename to subworkflows/local/mtx_conversion.nf index 67b100d5..731842c8 100644 --- a/subworkflows/local/conversion_to_h5ad.nf +++ b/subworkflows/local/mtx_conversion.nf @@ -1,8 +1,9 @@ /* -- IMPORT LOCAL MODULES/SUBWORKFLOWS -- */ -include { MTX_TO_H5AD } from '../../modules/local/mtx_to_h5ad.nf' -include { CONCAT_H5AD } from '../../modules/local/concat_h5ad.nf' +include { MTX_TO_H5AD } from '../../modules/local/mtx_to_h5ad.nf' +include { CONCAT_H5AD } from '../../modules/local/concat_h5ad.nf' +include { MTX_TO_SEURAT } from '../../modules/local/mtx_to_seurat.nf' -workflow H5AD_CONVERSION { +workflow MTX_CONVERSION { take: mtx_matrices @@ -24,4 +25,11 @@ workflow H5AD_CONVERSION { samplesheet ) + // + // Convert matrix do seurat + // + MTX_TO_SEURAT ( + mtx_matrices + ) + } diff --git a/workflows/scrnaseq.nf b/workflows/scrnaseq.nf index 3c014ff5..bac21eb7 100644 --- a/workflows/scrnaseq.nf +++ b/workflows/scrnaseq.nf @@ -40,7 +40,7 @@ include { KALLISTO_BUSTOOLS } from '../subworkflows/local/kallisto_bustools' include { SCRNASEQ_ALEVIN } from '../subworkflows/local/alevin' include { STARSOLO } from '../subworkflows/local/starsolo' include { CELLRANGER_ALIGN } from "../subworkflows/local/align_cellranger" -include { H5AD_CONVERSION } from "../subworkflows/local/conversion_to_h5ad" +include { MTX_CONVERSION } from "../subworkflows/local/mtx_conversion" /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -169,7 +169,7 @@ workflow SCRNASEQ { } // Run mtx to h5ad conversion subworkflow - H5AD_CONVERSION ( + MTX_CONVERSION ( ch_mtx_matrices, ch_input ) From 5a6ec1a7415075e7ea455f3db82123f482078ff1 Mon Sep 17 00:00:00 2001 From: Felipe Marques de Almeida Date: Mon, 27 Jun 2022 13:10:46 +0200 Subject: [PATCH 053/165] conversions are outputted together --- conf/modules.config | 48 ++++++++++++++++++++++----------------------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 1ddec147..f0a48713 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -55,15 +55,15 @@ if(params.aligner == "cellranger") { mode: params.publish_dir_mode ] } - withName: 'MTX_TO_H5AD|MTX_TO_SEURAT' { + // withName: 'MTX_TO_H5AD|MTX_TO_SEURAT' { + // publishDir = [ + // path: { "${params.outdir}/cellranger/count/sample-${meta.id}/outs/filtered_feature_bc_matrix" }, + // mode: params.publish_dir_mode + // ] + // } + withName: 'CONCAT_H5AD|MTX_TO_H5AD|MTX_TO_SEURAT' { publishDir = [ - path: { "${params.outdir}/cellranger/count/sample-${meta.id}/outs/filtered_feature_bc_matrix" }, - mode: params.publish_dir_mode - ] - } - withName: CONCAT_H5AD { - publishDir = [ - path: { "${params.outdir}/cellranger/count/concatenated_h5ad" }, + path: { "${params.outdir}/cellranger/count/h5ad_conversions" }, mode: params.publish_dir_mode ] } @@ -76,15 +76,15 @@ if (params.aligner == "alevin") { ext.args = "--table transcript_id,gene_id" ext.prefix = { "${gff.baseName}_gffread" } } - withName: 'MTX_TO_H5AD|MTX_TO_SEURAT' { - publishDir = [ - path: { "${params.outdir}/salmon/${meta.id}_alevin_results/alevin" }, - mode: params.publish_dir_mode - ] - } - withName: CONCAT_H5AD { + // withName: 'MTX_TO_H5AD|MTX_TO_SEURAT' { + // publishDir = [ + // path: { "${params.outdir}/salmon/${meta.id}_alevin_results/alevin" }, + // mode: params.publish_dir_mode + // ] + // } + withName: 'CONCAT_H5AD|MTX_TO_H5AD|MTX_TO_SEURAT' { publishDir = [ - path: { "${params.outdir}/salmon/concatenated_h5ad" }, + path: { "${params.outdir}/salmon/h5ad_conversions" }, mode: params.publish_dir_mode ] } @@ -101,15 +101,15 @@ if (params.aligner == "star") { if (params.aligner == "kallisto") { process { - withName: 'MTX_TO_H5AD|MTX_TO_SEURAT' { - publishDir = [ - path: { "${params.outdir}/kallistobustools/${meta.id}_kallistobustools_count/counts_unfiltered" }, - mode: params.publish_dir_mode - ] - } - withName: CONCAT_H5AD { + // withName: 'MTX_TO_H5AD|MTX_TO_SEURAT' { + // publishDir = [ + // path: { "${params.outdir}/kallistobustools/${meta.id}_kallistobustools_count/counts_unfiltered" }, + // mode: params.publish_dir_mode + // ] + // } + withName: 'CONCAT_H5AD|MTX_TO_H5AD|MTX_TO_SEURAT' { publishDir = [ - path: { "${params.outdir}/kallistobustools/concatenated_h5ad" }, + path: { "${params.outdir}/kallistobustools/h5ad_conversions" }, mode: params.publish_dir_mode ] } From 3d079f880cdcc1c3d26b5f1aabf7e4c718265ed7 Mon Sep 17 00:00:00 2001 From: Felipe Marques de Almeida Date: Mon, 27 Jun 2022 13:14:40 +0200 Subject: [PATCH 054/165] Update mtx_to_seurat.R fixing linting --- bin/mtx_to_seurat.R | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/bin/mtx_to_seurat.R b/bin/mtx_to_seurat.R index b221835d..f4ef6b8e 100755 --- a/bin/mtx_to_seurat.R +++ b/bin/mtx_to_seurat.R @@ -10,14 +10,14 @@ out.file <- args[4] aligner <- args[5] if(aligner %in% c("kallisto", "alevin")) { - # for kallisto and alevin, the features file contains only one column and matrix needs to be transposed - expression.matrix <- ReadMtx( - mtx = mtx_file, features = feature_file, cells = barcode_file, feature.column = 1, mtx.transpose = TRUE - ) + # for kallisto and alevin, the features file contains only one column and matrix needs to be transposed + expression.matrix <- ReadMtx( + mtx = mtx_file, features = feature_file, cells = barcode_file, feature.column = 1, mtx.transpose = TRUE + ) } else { - expression.matrix <- ReadMtx( - mtx = mtx_file, features = feature_file, cells = barcode_file - ) + expression.matrix <- ReadMtx( + mtx = mtx_file, features = feature_file, cells = barcode_file + ) } seurat.object <- CreateSeuratObject(counts = expression.matrix) From 017141708ae2ff306a60957acc605f9d1bda8626 Mon Sep 17 00:00:00 2001 From: Felipe Marques de Almeida Date: Mon, 27 Jun 2022 18:05:00 +0200 Subject: [PATCH 055/165] fixing stub definition --- modules/local/mtx_to_seurat.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/local/mtx_to_seurat.nf b/modules/local/mtx_to_seurat.nf index 8269ce2b..9ffeb883 100644 --- a/modules/local/mtx_to_seurat.nf +++ b/modules/local/mtx_to_seurat.nf @@ -41,6 +41,6 @@ process MTX_TO_SEURAT { stub: """ - touch ${meta.id}_matrix.h5ad + touch ${meta.id}_matrix.seurat """ } From 1ee8d3e03a9f4d5edb20509481a27430f8b259b7 Mon Sep 17 00:00:00 2001 From: Felipe Marques de Almeida Date: Wed, 29 Jun 2022 11:19:43 +0200 Subject: [PATCH 056/165] cellranger conversion reads .h5 file --- bin/cellranger_mtx_to_h5ad.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/bin/cellranger_mtx_to_h5ad.py b/bin/cellranger_mtx_to_h5ad.py index 40e365b6..e8eb5b23 100755 --- a/bin/cellranger_mtx_to_h5ad.py +++ b/bin/cellranger_mtx_to_h5ad.py @@ -2,12 +2,14 @@ import scanpy as sc import argparse -def mtx_to_adata( mtx_dir: str, sample: str, verbose: bool = False ): +def mtx_to_adata( mtx_h5: str, sample: str, verbose: bool = False ): if verbose: - print("Reading in {}".format(mtx_dir)) + print("Reading in {}".format(mtx_h5)) - adata = sc.read_10x_mtx(mtx_dir) + adata = sc.read_10x_h5(mtx_h5) + adata.var["gene_symbols"] = adata.var_names + adata.var.set_index("gene_ids", inplace=True) adata.obs["sample"] = sample return adata @@ -17,7 +19,7 @@ def mtx_to_adata( mtx_dir: str, sample: str, verbose: bool = False ): parser = argparse.ArgumentParser(description="Converts mtx output to h5ad.") - parser.add_argument("-m", "--mtx", dest="mtx", help="Path to mtx directory." ) + parser.add_argument("-m", "--mtx", dest="mtx", help="Path to mtx h5 file." ) parser.add_argument("-v", "--verbose", dest="verbose", help="Toggle verbose messages", default=False ) parser.add_argument("-s", "--sample", dest="sample", help="Sample name" ) parser.add_argument("-o", "--out", dest="out", help="Output path." ) From 73bb8f8368e893c42e3bfd74b06acbe21f8719a8 Mon Sep 17 00:00:00 2001 From: Felipe Marques de Almeida Date: Wed, 29 Jun 2022 12:29:35 +0200 Subject: [PATCH 057/165] added "index_unique" argument --- bin/concat_h5ad.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/concat_h5ad.py b/bin/concat_h5ad.py index 1c6290e3..29d0037a 100755 --- a/bin/concat_h5ad.py +++ b/bin/concat_h5ad.py @@ -34,7 +34,7 @@ def read_samplesheet(samplesheet): } # concat h5ad files - adata = ad.concat(dict_of_h5ad, label="sample", merge="unique") + adata = ad.concat(dict_of_h5ad, label="sample", merge="unique", index_unique="_") # merge with data.frame, on sample information adata.obs = adata.obs.join(df_samplesheet, on="sample") From 88289cca2fea04aed10fe00786ade69c83202613 Mon Sep 17 00:00:00 2001 From: Felipe Marques de Almeida Date: Wed, 29 Jun 2022 12:34:07 +0200 Subject: [PATCH 058/165] added an option to convert star count matrix --- bin/mtx_to_h5ad.py | 7 ++++-- modules/local/mtx_to_h5ad.nf | 40 ++++++++++++++++++++-------------- modules/local/star_align.nf | 1 + subworkflows/local/starsolo.nf | 1 + workflows/scrnaseq.nf | 1 + 5 files changed, 32 insertions(+), 18 deletions(-) diff --git a/bin/mtx_to_h5ad.py b/bin/mtx_to_h5ad.py index 78116f58..96384e37 100755 --- a/bin/mtx_to_h5ad.py +++ b/bin/mtx_to_h5ad.py @@ -5,13 +5,15 @@ def mtx_to_adata( - mtx_file: str, barcode_file: str, feature_file: str, sample: str, verbose: bool = False + mtx_file: str, barcode_file: str, feature_file: str, sample: str, aligner: str, verbose: bool = False ): if verbose: print("Reading in {}".format(mtx_file)) adata = sc.read_mtx(mtx_file) + if aligner == 'star': # for some reason star matrix comes transposed and doesn't fit when values are appended directly + adata = adata.transpose() adata.obs_names = pd.read_csv(barcode_file, header=None)[0].values adata.var_names = pd.read_csv(feature_file, header=None)[0].values adata.obs["sample"] = sample @@ -31,11 +33,12 @@ def mtx_to_adata( parser.add_argument("-b", "--barcode", dest="barcode", help="Path to barcode file.") parser.add_argument("-s", "--sample", dest="sample", help="Sample name") parser.add_argument("-o", "--out", dest="out", help="Output path.") + parser.add_argument("-a", "--aligner", dest="aligner", help="Which aligner has been used?") args = vars(parser.parse_args()) adata = mtx_to_adata( - args["mtx"], args["barcode"], args["feature"], args["sample"], verbose=args["verbose"] + args["mtx"], args["barcode"], args["feature"], args["sample"], args["aligner"],verbose=args["verbose"] ) adata.write_h5ad(args["out"], compression="gzip") diff --git a/modules/local/mtx_to_h5ad.nf b/modules/local/mtx_to_h5ad.nf index 57f9c8e3..c0c24890 100644 --- a/modules/local/mtx_to_h5ad.nf +++ b/modules/local/mtx_to_h5ad.nf @@ -16,34 +16,42 @@ process MTX_TO_H5AD { path "*.h5ad", emit: h5ad script: + // def file paths for aligners (except cellranger) + if (params.aligner == 'kallisto') { + mtx_matrix = "*_kallistobustools_count/counts_unfiltered/*.mtx" + barcodes_tsv = "*_kallistobustools_count/counts_unfiltered/*.barcodes.txt" + features_tsv = "*_kallistobustools_count/counts_unfiltered/*.genes.txt" + } else if (params.aligner == 'alevin') { + mtx_matrix = "*_alevin_results/alevin/quants_mat.mtx.gz" + barcodes_tsv = "*_alevin_results/alevin/quants_mat_rows.txt" + features_tsv = "*_alevin_results/alevin/quants_mat_cols.txt" + } else if (params.aligner == 'star') { + mtx_matrix = "*.Solo.out/Gene/filtered/matrix.mtx" + barcodes_tsv = "*.Solo.out/Gene/filtered/barcodes.tsv" + features_tsv = "*.Solo.out/Gene/filtered/features.tsv" + } + + // + // run script + // if (params.aligner == 'cellranger') """ # convert file types cellranger_mtx_to_h5ad.py \\ - --mtx filtered_feature_bc_matrix \\ - --sample ${meta.id} \\ - --out ${meta.id}_matrix.h5ad - """ - - else if (params.aligner == 'kallisto') - """ - # convert file types - mtx_to_h5ad.py \\ + --mtx filtered_feature_bc_matrix.h5 \\ --sample ${meta.id} \\ - --mtx *_kallistobustools_count/counts_unfiltered/*.mtx \\ - --barcode *_kallistobustools_count/counts_unfiltered/*.barcodes.txt \\ - --feature *_kallistobustools_count/counts_unfiltered/*.genes.txt \\ --out ${meta.id}_matrix.h5ad """ - else if (params.aligner == 'alevin') + else """ # convert file types mtx_to_h5ad.py \\ + --aligner ${params.aligner} \\ --sample ${meta.id} \\ - --mtx *_alevin_results/alevin/quants_mat.mtx.gz \\ - --barcode *_alevin_results/alevin/quants_mat_rows.txt \\ - --feature *_alevin_results/alevin/quants_mat_cols.txt \\ + --mtx $mtx_matrix \\ + --barcode $barcodes_tsv \\ + --feature $features_tsv \\ --out ${meta.id}_matrix.h5ad """ diff --git a/modules/local/star_align.nf b/modules/local/star_align.nf index 75dac643..d544224f 100644 --- a/modules/local/star_align.nf +++ b/modules/local/star_align.nf @@ -21,6 +21,7 @@ process STAR_ALIGN { output: tuple val(meta), path('*d.out.bam') , emit: bam + tuple val(meta), path('*.Solo.out') , emit: counts tuple val(meta), path('*Log.final.out') , emit: log_final tuple val(meta), path('*Log.out') , emit: log_out tuple val(meta), path('*Log.progress.out'), emit: log_progress diff --git a/subworkflows/local/starsolo.nf b/subworkflows/local/starsolo.nf index 6a7e9c38..2c2f57dd 100644 --- a/subworkflows/local/starsolo.nf +++ b/subworkflows/local/starsolo.nf @@ -52,6 +52,7 @@ workflow STARSOLO { emit: ch_versions star_result = STAR_ALIGN.out.tab + star_counts = STAR_ALIGN.out.counts for_multiqc = STAR_ALIGN.out.log_final.collect{it[1]}.ifEmpty([]) diff --git a/workflows/scrnaseq.nf b/workflows/scrnaseq.nf index 3c014ff5..5d2ce825 100644 --- a/workflows/scrnaseq.nf +++ b/workflows/scrnaseq.nf @@ -153,6 +153,7 @@ workflow SCRNASEQ { other_parameters ) ch_versions = ch_versions.mix(STARSOLO.out.ch_versions) + ch_mtx_matrices = ch_mtx_matrices.mix(STARSOLO.out.star_counts) ch_multiqc_star = STARSOLO.out.for_multiqc } From cc7d84079b6d6a91370f1ebfe5914211aa10833f Mon Sep 17 00:00:00 2001 From: Florian Date: Wed, 29 Jun 2022 15:06:17 +0200 Subject: [PATCH 059/165] Adding subworkflow for FastQC --- subworkflows/local/fastqc.nf | 45 ++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100644 subworkflows/local/fastqc.nf diff --git a/subworkflows/local/fastqc.nf b/subworkflows/local/fastqc.nf new file mode 100644 index 00000000..bd65cf1f --- /dev/null +++ b/subworkflows/local/fastqc.nf @@ -0,0 +1,45 @@ +// +// Check input samplesheet and get read channels +// + +//TODO --> add skip_fastqc to params + +include { FASTQC } from '../../modules/nf-core/modules/fastqc/main' + +workflow FASTQC_CHECK { + take: + ch_fastq + + main: + ch_fastq + .map { ch -> [ ch[0], ch[1] ] } + .set { ch_fastq } + + /* + * FastQ QC using FASTQC + */ + fastqc_zip = Channel.empty() + fastqc_html = Channel.empty() + fastqc_multiqc = Channel.empty() + fastqc_version = Channel.empty() + + FASTQC ( ch_fastq ) + fastqc_zip = FASTQC.out.zip + fastqc_html = FASTQC.out.html + + fastqc_zip + .map { it -> [ it[1] ] } + .set { fastqc_zip_only } + fastqc_html + .map { it -> [ it[1] ] } + .set { fastqc_html_only } + + fastqc_multiqc = fastqc_multiqc.mix( fastqc_zip_only, fastqc_html_only ) + fastqc_version = FASTQC.out.versions + + emit: + fastqc_zip + fastqc_html + fastqc_version + fastqc_multiqc +} From f6800bde699494ec230a0ffc54307d3f4a2b7125 Mon Sep 17 00:00:00 2001 From: Florian Date: Wed, 29 Jun 2022 15:07:00 +0200 Subject: [PATCH 060/165] Integrating fastqc into the scrnaseq pipeline and into multiqc --- modules/local/multiqc.nf | 35 ++++++++++++++++++++++------------- workflows/scrnaseq.nf | 13 ++++++++++++- 2 files changed, 34 insertions(+), 14 deletions(-) diff --git a/modules/local/multiqc.nf b/modules/local/multiqc.nf index 5939624f..f7e745a0 100644 --- a/modules/local/multiqc.nf +++ b/modules/local/multiqc.nf @@ -1,30 +1,39 @@ process MULTIQC { label 'process_medium' - conda (params.enable_conda ? "bioconda::multiqc=1.10.1" : null) + conda (params.enable_conda ? 'bioconda::multiqc=1.11' : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.10.1--py_0' : - 'quay.io/biocontainers/multiqc:1.10.1--py_0' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.11--pyhdfd78af_0' : + 'quay.io/biocontainers/multiqc:1.11--pyhdfd78af_0' }" input: - path 'multiqc_config.yaml' - path multiqc_custom_config - path software_versions + path ch_multiqc_config + path ch_multiqc_custom_config + path software_versions_yaml path workflow_summary + path ('fastqc/*') path ("STAR/*") path ("salmon_alevin/*") output: - path "*multiqc_report.html" , emit: report - path "*_data" , emit: data - path "*variants_metrics_mqc.csv", optional:true, emit: csv_variants - path "*assembly_metrics_mqc.csv", optional:true, emit: csv_assembly - path "*_plots" , optional:true, emit: plots + path "*multiqc_report.html", emit: report + path "*_data" , emit: data + path "*_plots" , optional:true, emit: plots + path "versions.yml" , emit: versions script: - def custom_config = params.multiqc_config ? "--config $multiqc_custom_config" : '' def args = task.ext.args ?: '' + def custom_config = params.multiqc_config ? "--config $multiqc_custom_config" : '' """ - multiqc -f $args $custom_config . + multiqc \\ + -f \\ + $args \\ + $custom_config \\ + . + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + multiqc: \$( multiqc --version | sed -e "s/multiqc, version //g" ) + END_VERSIONS """ } diff --git a/workflows/scrnaseq.nf b/workflows/scrnaseq.nf index 3b54ced9..fa37596a 100644 --- a/workflows/scrnaseq.nf +++ b/workflows/scrnaseq.nf @@ -36,6 +36,7 @@ ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath(params.multi // SUBWORKFLOW: Consisting of a mix of local and nf-core/modules // include { INPUT_CHECK } from '../subworkflows/local/input_check' +include { FASTQC_CHECK } from '../subworkflows/local/fastqc' include { KALLISTO_BUSTOOLS } from '../subworkflows/local/kallisto_bustools' include { SCRNASEQ_ALEVIN } from '../subworkflows/local/alevin' include { STARSOLO } from '../subworkflows/local/starsolo' @@ -105,6 +106,14 @@ workflow SCRNASEQ { ch_versions = ch_versions.mix(INPUT_CHECK.out.versions) + // Run FastQC + ch_multiqc_fastqc = Channel.empty() + if (!params.skip_fastqc){ + FASTQC_CHECK ( ch_fastq ) + ch_versions = ch_versions.mix(FASTQC_CHECK.out.fastqc_version.first().ifEmpty(null)) + ch_multiqc_fastqc = FASTQC_CHECK.out.fastqc_multiqc.ifEmpty([]) + } + // Run kallisto bustools pipeline if (params.aligner == "kallisto") { KALLISTO_BUSTOOLS( @@ -169,15 +178,17 @@ workflow SCRNASEQ { ) if (!params.skip_multiqc) { + ch_workflow_summary = Channel.value( WorkflowScrnaseq.paramsSummaryMultiqc(workflow, summary_params) ).collectFile(name: 'workflow_summary_mqc.yaml') MULTIQC( ch_multiqc_config, - ch_multiqc_custom_config, + ch_multiqc_custom_config.collect().ifEmpty([]), CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect(), ch_workflow_summary, + ch_multiqc_fastqc.collect{it[0]}.ifEmpty([]), ch_multiqc_alevin, ch_multiqc_star ) From 092136e7e52624aea1604ce5664b4ec4c9db7fa5 Mon Sep 17 00:00:00 2001 From: Florian Date: Wed, 29 Jun 2022 15:07:27 +0200 Subject: [PATCH 061/165] adding skip_fastqc parameter to pipeline --- nextflow.config | 3 ++- nextflow_schema.json | 4 ++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/nextflow.config b/nextflow.config index c986adb8..09b80901 100644 --- a/nextflow.config +++ b/nextflow.config @@ -48,7 +48,8 @@ params { igenomes_base = 's3://ngi-igenomes/igenomes' igenomes_ignore = false - // MultiQC options + // QC and MultiQC options + skip_fastqc = false multiqc_config = null multiqc_title = null max_multiqc_email_size = '25.MB' diff --git a/nextflow_schema.json b/nextflow_schema.json index 1d9b1ffb..7f1091d3 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -373,6 +373,10 @@ "hidden": true, "fa_icon": "fas fa-bacon" }, + "skip_fastqc": { + "type": "boolean", + "description": "Skip the FastQC reporting feature in the pipeline." + }, "skip_multiqc": { "type": "boolean", "description": "Skip the MultiQC reporting feature in the pipeline." From 8f4b34fde04d4dca4b69907e0a278f9f682f0d88 Mon Sep 17 00:00:00 2001 From: Florian Date: Wed, 29 Jun 2022 15:07:44 +0200 Subject: [PATCH 062/165] adding fastqc log to output.md --- docs/output.md | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/docs/output.md b/docs/output.md index 8d6bcf5a..3ced038e 100644 --- a/docs/output.md +++ b/docs/output.md @@ -12,6 +12,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d - [:warning: Please read this documentation on the nf-core website: https://nf-co.re/scrnaseq/output](#warning-please-read-this-documentation-on-the-nf-core-website-httpsnf-corescrnaseqoutput) - [Introduction](#introduction) - [Pipeline overview](#pipeline-overview) + - [FastQC](#fastqc) - [Kallisto & Bustools Results](#kallisto--bustools-results) - [STARsolo](#starsolo) - [Salmon Alevin & AlevinQC](#salmon-alevin--alevinqc) @@ -20,6 +21,19 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d - [MultiQC](#multiqc) - [Pipeline information](#pipeline-information) +## FastQC + +See [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc) for details about FastQC. + +The pipeline analyzes the raw data and generates for each file a FastQC report. All report are collected in MultiQC. + +**Output directory: `results/fastqc`** + +- `.html` + - Contains the FastQC report. +- `.zip` + - Contains additional information, such as individual plots, and FastQC raw data. + ## Kallisto & Bustools Results See [Kallisto](https://pachterlab.github.io/kallisto/about) for details about Kallisto and [Bustools](https://bustools.github.io/) for more information on BusTools. From 3259830153e339384c0f5bc1f9bc97330ff91955 Mon Sep 17 00:00:00 2001 From: Felipe Marques de Almeida Date: Wed, 29 Jun 2022 19:37:32 +0200 Subject: [PATCH 063/165] adding sep="\t" argument input is a tsv and not a csv --- bin/mtx_to_h5ad.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bin/mtx_to_h5ad.py b/bin/mtx_to_h5ad.py index 96384e37..2885886e 100755 --- a/bin/mtx_to_h5ad.py +++ b/bin/mtx_to_h5ad.py @@ -14,8 +14,8 @@ def mtx_to_adata( adata = sc.read_mtx(mtx_file) if aligner == 'star': # for some reason star matrix comes transposed and doesn't fit when values are appended directly adata = adata.transpose() - adata.obs_names = pd.read_csv(barcode_file, header=None)[0].values - adata.var_names = pd.read_csv(feature_file, header=None)[0].values + adata.obs_names = pd.read_csv(barcode_file, header=None, sep="\t")[0].values + adata.var_names = pd.read_csv(feature_file, header=None, sep="\t")[0].values adata.obs["sample"] = sample return adata From 893ae789a36aa83136742cc6674f0ac7c376117d Mon Sep 17 00:00:00 2001 From: nf-core-bot Date: Wed, 29 Jun 2022 19:22:43 +0000 Subject: [PATCH 064/165] [automated] Fix linting with Prettier --- nextflow_schema.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 7f1091d3..91757ae4 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -376,7 +376,7 @@ "skip_fastqc": { "type": "boolean", "description": "Skip the FastQC reporting feature in the pipeline." - }, + }, "skip_multiqc": { "type": "boolean", "description": "Skip the MultiQC reporting feature in the pipeline." From 1c94c908706d8d317edbe85d760e88e30c051a76 Mon Sep 17 00:00:00 2001 From: Florian Date: Thu, 30 Jun 2022 10:22:01 +0200 Subject: [PATCH 065/165] Changes to fastqc.nf, cleaning up code --- subworkflows/local/fastqc.nf | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/subworkflows/local/fastqc.nf b/subworkflows/local/fastqc.nf index bd65cf1f..50f55d5b 100644 --- a/subworkflows/local/fastqc.nf +++ b/subworkflows/local/fastqc.nf @@ -1,9 +1,6 @@ // // Check input samplesheet and get read channels // - -//TODO --> add skip_fastqc to params - include { FASTQC } from '../../modules/nf-core/modules/fastqc/main' workflow FASTQC_CHECK { @@ -18,11 +15,6 @@ workflow FASTQC_CHECK { /* * FastQ QC using FASTQC */ - fastqc_zip = Channel.empty() - fastqc_html = Channel.empty() - fastqc_multiqc = Channel.empty() - fastqc_version = Channel.empty() - FASTQC ( ch_fastq ) fastqc_zip = FASTQC.out.zip fastqc_html = FASTQC.out.html @@ -34,6 +26,7 @@ workflow FASTQC_CHECK { .map { it -> [ it[1] ] } .set { fastqc_html_only } + fastqc_multiqc = Channel.empty() fastqc_multiqc = fastqc_multiqc.mix( fastqc_zip_only, fastqc_html_only ) fastqc_version = FASTQC.out.versions From ab4b073dea308f450d23aec999871d7dca3b4967 Mon Sep 17 00:00:00 2001 From: Florian Date: Thu, 30 Jun 2022 10:22:39 +0200 Subject: [PATCH 066/165] Removing unnecessary code in scrnaseq.nf --- workflows/scrnaseq.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/scrnaseq.nf b/workflows/scrnaseq.nf index fa37596a..8e5b9fbe 100644 --- a/workflows/scrnaseq.nf +++ b/workflows/scrnaseq.nf @@ -110,7 +110,7 @@ workflow SCRNASEQ { ch_multiqc_fastqc = Channel.empty() if (!params.skip_fastqc){ FASTQC_CHECK ( ch_fastq ) - ch_versions = ch_versions.mix(FASTQC_CHECK.out.fastqc_version.first().ifEmpty(null)) + ch_versions = ch_versions.mix(FASTQC_CHECK.out.fastqc_version) ch_multiqc_fastqc = FASTQC_CHECK.out.fastqc_multiqc.ifEmpty([]) } From 67cc34cde96b950f78ce8305ce717e66dbc96b9a Mon Sep 17 00:00:00 2001 From: Felipe Marques de Almeida Date: Thu, 30 Jun 2022 16:15:02 +0200 Subject: [PATCH 067/165] Update modules.config --- conf/modules.config | 58 ++++++++++++++++++++------------------------- 1 file changed, 26 insertions(+), 32 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index c0fcb25b..c1351b49 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -31,13 +31,19 @@ process { pattern: '*_versions.yml' ] } + withName: 'MTX_TO_H5AD|CONCAT_H5AD' { + publishDir = [ + path: { "${params.outdir}/${params.aligner}/mtx_conversions" }, + mode: params.publish_dir_mode + ] + } } if(params.aligner == "cellranger") { process { withName: CELLRANGER_MKGTF { publishDir = [ - path: "${params.outdir}/cellranger/mkgtf", + path: "${params.outdir}/${params.aligner}/mkgtf", mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -45,25 +51,13 @@ if(params.aligner == "cellranger") { } withName: CELLRANGER_MKREF { publishDir = [ - path: "${params.outdir}/cellranger/mkref", + path: "${params.outdir}/${params.aligner}/mkref", mode: params.publish_dir_mode ] } withName: CELLRANGER_COUNT { publishDir = [ - path: "${params.outdir}/cellranger/count", - mode: params.publish_dir_mode - ] - } - withName: MTX_TO_H5AD { - publishDir = [ - path: { "${params.outdir}/cellranger/count/sample-${meta.id}/outs/filtered_feature_bc_matrix" }, - mode: params.publish_dir_mode - ] - } - withName: CONCAT_H5AD { - publishDir = [ - path: { "${params.outdir}/cellranger/count/concatenated_h5ad" }, + path: "${params.outdir}/${params.aligner}/count", mode: params.publish_dir_mode ] } @@ -76,15 +70,9 @@ if (params.aligner == "alevin") { ext.args = "--table transcript_id,gene_id" ext.prefix = { "${gff.baseName}_gffread" } } - withName: MTX_TO_H5AD { + withName: 'SALMON_INDEX|SALMON_ALEVIN' { publishDir = [ - path: { "${params.outdir}/salmon/${meta.id}_alevin_results/alevin" }, - mode: params.publish_dir_mode - ] - } - withName: CONCAT_H5AD { - publishDir = [ - path: { "${params.outdir}/salmon/concatenated_h5ad" }, + path: { "${params.outdir}/${params.aligner}" }, mode: params.publish_dir_mode ] } @@ -96,22 +84,28 @@ if (params.aligner == "star") { withName: STAR_ALIGN { ext.args = "--readFilesCommand zcat --runDirPerm All_RWX --outWigType bedGraph --twopassMode Basic --outSAMtype BAM SortedByCoordinate" } - } -} - -if (params.aligner == "kallisto") { - process { - withName: MTX_TO_H5AD { + withName: STAR_GENOMEGENERATE { publishDir = [ - path: { "${params.outdir}/kallistobustools/${meta.id}_kallistobustools_count/counts_unfiltered" }, + path: { "${params.outdir}/${params.aligner}/genome_generate" }, mode: params.publish_dir_mode ] } - withName: CONCAT_H5AD { + withName: STAR_ALIGN { publishDir = [ - path: { "${params.outdir}/kallistobustools/concatenated_h5ad" }, + path: { "${params.outdir}/${params.aligner}/${meta.id}" }, mode: params.publish_dir_mode ] } } } + +if (params.aligner == 'kallisto') { + process { + withName: 'KALLISTOBUSTOOLS_REF|KALLISTOBUSTOOLS_COUNT' { + publishDir = [ + path: { "${params.outdir}/${params.aligner}" }, + mode: params.publish_dir_mode + ] + } + } +} \ No newline at end of file From 4d008da24009e78dd52adca57ce4bd85708f6acf Mon Sep 17 00:00:00 2001 From: Felipe Marques de Almeida Date: Fri, 1 Jul 2022 11:30:24 +0200 Subject: [PATCH 068/165] add conversion info to docs --- conf/modules.config | 2 +- docs/output.md | 9 ++++++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index c1351b49..19b9dabf 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -108,4 +108,4 @@ if (params.aligner == 'kallisto') { ] } } -} \ No newline at end of file +} diff --git a/docs/output.md b/docs/output.md index 8d6bcf5a..71a9be32 100644 --- a/docs/output.md +++ b/docs/output.md @@ -49,8 +49,9 @@ For details on how to load these into R and perform further downstream analysis, ## STARsolo -**Output directory: `results/STAR`** +**Output directory: `results/star`** +- Files will be organized in one directory per sample - Contains the mapped BAM files and output metrics created by STARsolo **Output directory: `results/reference_genome`** @@ -92,6 +93,12 @@ Cell Ranger is a set of analysis scripts that processes 10X Chromium single cell - When supplied with a `--fasta` genome fasta, this contains the extracted transcriptome - The GTF file supplied with `--gtf` is used to extract the transcriptome positions appropriately +**Output directory: `results/${params.aligner}/mtx_conversions` + +- `*_matrix.h5ad` + - `.mtx` files converted to [AnnData](https://anndata.readthedocs.io/en/latest/) in `.h5ad` format, using [scanpy package](https://scanpy.readthedocs.io/en/stable/). + - One per sample and a single one with all samples concatenated together `combined_matrix.h5ad` + ## MultiQC
From 77bffa3458ac603e73990974e40d7aa35fd5272a Mon Sep 17 00:00:00 2001 From: Felipe Marques de Almeida Date: Fri, 1 Jul 2022 11:32:30 +0200 Subject: [PATCH 069/165] Update output.md --- docs/output.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/output.md b/docs/output.md index 71a9be32..d3c57caf 100644 --- a/docs/output.md +++ b/docs/output.md @@ -93,7 +93,7 @@ Cell Ranger is a set of analysis scripts that processes 10X Chromium single cell - When supplied with a `--fasta` genome fasta, this contains the extracted transcriptome - The GTF file supplied with `--gtf` is used to extract the transcriptome positions appropriately -**Output directory: `results/${params.aligner}/mtx_conversions` +**Output directory: `results/${params.aligner}/mtx_conversions`** - `*_matrix.h5ad` - `.mtx` files converted to [AnnData](https://anndata.readthedocs.io/en/latest/) in `.h5ad` format, using [scanpy package](https://scanpy.readthedocs.io/en/stable/). From 4ccbf1adfb08f42087282565748cdb1588d338d1 Mon Sep 17 00:00:00 2001 From: Felipe Marques de Almeida Date: Mon, 4 Jul 2022 09:27:37 +0200 Subject: [PATCH 070/165] added variables for star files --- modules/local/mtx_to_seurat.nf | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/modules/local/mtx_to_seurat.nf b/modules/local/mtx_to_seurat.nf index 9ffeb883..7a122834 100644 --- a/modules/local/mtx_to_seurat.nf +++ b/modules/local/mtx_to_seurat.nf @@ -29,7 +29,12 @@ process MTX_TO_SEURAT { matrix = "*_alevin_results/alevin/quants_mat.mtx.gz" barcodes = "*_alevin_results/alevin/quants_mat_rows.txt" features = "*_alevin_results/alevin/quants_mat_cols.txt" + } else if (params.aligner == 'star') { + matrix = "*.Solo.out/Gene/filtered/matrix.mtx" + barcodes = "*.Solo.out/Gene/filtered/barcodes.tsv" + features = "*.Solo.out/Gene/filtered/features.tsv" } + """ mtx_to_seurat.R \\ $matrix \\ From 1456f452dd7744b44b23f3d9424e65665e79c0bc Mon Sep 17 00:00:00 2001 From: Felipe Marques de Almeida Date: Mon, 4 Jul 2022 10:48:33 +0200 Subject: [PATCH 071/165] Add seurat conversion (#5) * added function to convert mtx to Seurat objects * conversions are outputted together * Update mtx_to_seurat.R fixing linting * fixing stub definition * added variables for star files --- bin/mtx_to_seurat.R | 25 ++++++++++++++ conf/modules.config | 2 +- modules/local/mtx_to_seurat.nf | 51 ++++++++++++++++++++++++++++ subworkflows/local/mtx_conversion.nf | 35 +++++++++++++++++++ workflows/scrnaseq.nf | 4 +-- 5 files changed, 114 insertions(+), 3 deletions(-) create mode 100755 bin/mtx_to_seurat.R create mode 100644 modules/local/mtx_to_seurat.nf create mode 100644 subworkflows/local/mtx_conversion.nf diff --git a/bin/mtx_to_seurat.R b/bin/mtx_to_seurat.R new file mode 100755 index 00000000..f4ef6b8e --- /dev/null +++ b/bin/mtx_to_seurat.R @@ -0,0 +1,25 @@ +#!/usr/bin/env Rscript +library(Seurat) + +args <- commandArgs(trailingOnly=TRUE) + +mtx_file <- args[1] +barcode_file <- args[2] +feature_file <- args[3] +out.file <- args[4] +aligner <- args[5] + +if(aligner %in% c("kallisto", "alevin")) { + # for kallisto and alevin, the features file contains only one column and matrix needs to be transposed + expression.matrix <- ReadMtx( + mtx = mtx_file, features = feature_file, cells = barcode_file, feature.column = 1, mtx.transpose = TRUE + ) +} else { + expression.matrix <- ReadMtx( + mtx = mtx_file, features = feature_file, cells = barcode_file + ) +} + +seurat.object <- CreateSeuratObject(counts = expression.matrix) + +saveRDS(seurat.object, file = out.file) diff --git a/conf/modules.config b/conf/modules.config index 19b9dabf..e75c76e8 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -31,7 +31,7 @@ process { pattern: '*_versions.yml' ] } - withName: 'MTX_TO_H5AD|CONCAT_H5AD' { + withName: 'MTX_TO_H5AD|CONCAT_H5AD|MTX_TO_SEURAT' { publishDir = [ path: { "${params.outdir}/${params.aligner}/mtx_conversions" }, mode: params.publish_dir_mode diff --git a/modules/local/mtx_to_seurat.nf b/modules/local/mtx_to_seurat.nf new file mode 100644 index 00000000..7a122834 --- /dev/null +++ b/modules/local/mtx_to_seurat.nf @@ -0,0 +1,51 @@ +process MTX_TO_SEURAT { + tag "$meta.id" + label 'process_medium' + + conda (params.enable_conda ? "seurat-scripts" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'docker://satijalab/seurat:4.1.0' : + 'satijalab/seurat:4.1.0' }" + + input: + // inputs from cellranger nf-core module does not come in a single sample dir + // for each sample, the sub-folders and files come directly in array. + tuple val(meta), path(inputs) + + output: + path "*.seurat", emit: h5ad + + script: + def aligner = params.aligner + if (params.aligner == "cellranger") { + matrix = "filtered_feature_bc_matrix/matrix.mtx.gz" + barcodes = "filtered_feature_bc_matrix/barcodes.tsv.gz" + features = "filtered_feature_bc_matrix/features.tsv.gz" + } else if (params.aligner == "kallisto") { + matrix = "*_kallistobustools_count/counts_unfiltered/*.mtx" + barcodes = "*_kallistobustools_count/counts_unfiltered/*.barcodes.txt" + features = "*_kallistobustools_count/counts_unfiltered/*.genes.txt" + } else if (params.aligner == "alevin") { + matrix = "*_alevin_results/alevin/quants_mat.mtx.gz" + barcodes = "*_alevin_results/alevin/quants_mat_rows.txt" + features = "*_alevin_results/alevin/quants_mat_cols.txt" + } else if (params.aligner == 'star') { + matrix = "*.Solo.out/Gene/filtered/matrix.mtx" + barcodes = "*.Solo.out/Gene/filtered/barcodes.tsv" + features = "*.Solo.out/Gene/filtered/features.tsv" + } + + """ + mtx_to_seurat.R \\ + $matrix \\ + $barcodes \\ + $features \\ + ${meta.id}_matrix.seurat \\ + ${aligner} + """ + + stub: + """ + touch ${meta.id}_matrix.seurat + """ +} diff --git a/subworkflows/local/mtx_conversion.nf b/subworkflows/local/mtx_conversion.nf new file mode 100644 index 00000000..731842c8 --- /dev/null +++ b/subworkflows/local/mtx_conversion.nf @@ -0,0 +1,35 @@ +/* -- IMPORT LOCAL MODULES/SUBWORKFLOWS -- */ +include { MTX_TO_H5AD } from '../../modules/local/mtx_to_h5ad.nf' +include { CONCAT_H5AD } from '../../modules/local/concat_h5ad.nf' +include { MTX_TO_SEURAT } from '../../modules/local/mtx_to_seurat.nf' + +workflow MTX_CONVERSION { + + take: + mtx_matrices + samplesheet + + main: + // + // Convert matrix do h5ad + // + MTX_TO_H5AD ( + mtx_matrices + ) + + // + // Concat sample-specific h5ad in one + // + CONCAT_H5AD ( + MTX_TO_H5AD.out.h5ad.collect(), // gather all sample-specific files + samplesheet + ) + + // + // Convert matrix do seurat + // + MTX_TO_SEURAT ( + mtx_matrices + ) + +} diff --git a/workflows/scrnaseq.nf b/workflows/scrnaseq.nf index 5d2ce825..018f8b49 100644 --- a/workflows/scrnaseq.nf +++ b/workflows/scrnaseq.nf @@ -40,7 +40,7 @@ include { KALLISTO_BUSTOOLS } from '../subworkflows/local/kallisto_bustools' include { SCRNASEQ_ALEVIN } from '../subworkflows/local/alevin' include { STARSOLO } from '../subworkflows/local/starsolo' include { CELLRANGER_ALIGN } from "../subworkflows/local/align_cellranger" -include { H5AD_CONVERSION } from "../subworkflows/local/conversion_to_h5ad" +include { MTX_CONVERSION } from "../subworkflows/local/mtx_conversion" /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -170,7 +170,7 @@ workflow SCRNASEQ { } // Run mtx to h5ad conversion subworkflow - H5AD_CONVERSION ( + MTX_CONVERSION ( ch_mtx_matrices, ch_input ) From 2ccdc1159600510ef34329d4afed363424ea4ea0 Mon Sep 17 00:00:00 2001 From: Felipe Marques de Almeida Date: Mon, 4 Jul 2022 15:40:14 +0200 Subject: [PATCH 072/165] fixed suffixes for seurat objects --- modules/local/mtx_to_seurat.nf | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/local/mtx_to_seurat.nf b/modules/local/mtx_to_seurat.nf index 7a122834..47be3ce9 100644 --- a/modules/local/mtx_to_seurat.nf +++ b/modules/local/mtx_to_seurat.nf @@ -13,7 +13,7 @@ process MTX_TO_SEURAT { tuple val(meta), path(inputs) output: - path "*.seurat", emit: h5ad + path "*.rds", emit: seuratObjects script: def aligner = params.aligner @@ -40,12 +40,12 @@ process MTX_TO_SEURAT { $matrix \\ $barcodes \\ $features \\ - ${meta.id}_matrix.seurat \\ + ${meta.id}_matrix.rds \\ ${aligner} """ stub: """ - touch ${meta.id}_matrix.seurat + touch ${meta.id}_matrix.rds """ } From a561f4b48d95ecab00869ab058d1db32d4ad5583 Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Tue, 5 Jul 2022 11:37:55 +0000 Subject: [PATCH 073/165] Drop local Kbtools/count, update to latest module version --- modules.json | 9 ++- modules/local/kallistobustools_count.nf | 52 -------------- modules/nf-core/modules/gunzip/main.nf | 10 +++ .../modules/kallistobustools/count/main.nf | 49 ++++++++++++++ .../modules/kallistobustools/count/meta.yml | 67 +++++++++++++++++++ .../modules/kallistobustools/ref/main.nf | 6 +- modules/nf-core/modules/multiqc/main.nf | 4 +- subworkflows/local/kallisto_bustools.nf | 13 ++-- 8 files changed, 144 insertions(+), 66 deletions(-) delete mode 100644 modules/local/kallistobustools_count.nf create mode 100644 modules/nf-core/modules/kallistobustools/count/main.nf create mode 100644 modules/nf-core/modules/kallistobustools/count/meta.yml diff --git a/modules.json b/modules.json index 96dba9a3..2c9b7639 100644 --- a/modules.json +++ b/modules.json @@ -22,13 +22,16 @@ "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" }, "gunzip": { - "git_sha": "9aadd9a6d3f5964476582319b3a1c54a3e3fe7c9" + "git_sha": "fa37e0662690c4ec4260dae282fbce08777503e6" + }, + "kallistobustools/count": { + "git_sha": "ed5594bee3eb38874cb282d288bc22ab6262a73e" }, "kallistobustools/ref": { - "git_sha": "e20e57f90b6787ac9a010a980cf6ea98bd990046" + "git_sha": "0f1e736212e5ae37036ac7e32b225087a8a9b154" }, "multiqc": { - "git_sha": "e5f8924fabf4c8380f55fb7aee89fd2c268161b1" + "git_sha": "08376da6843b14c82d84d444784c0b3635bb7fd5" }, "salmon/index": { "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" diff --git a/modules/local/kallistobustools_count.nf b/modules/local/kallistobustools_count.nf deleted file mode 100644 index 7f3dabff..00000000 --- a/modules/local/kallistobustools_count.nf +++ /dev/null @@ -1,52 +0,0 @@ -process KALLISTOBUSTOOLS_COUNT { - tag "$meta.id" - label 'process_medium' - - conda (params.enable_conda ? "bioconda::kb-python=0.25.1" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/kb-python:0.25.1--py_0' : - 'quay.io/biocontainers/kb-python:0.25.1--py_0' }" - - input: - // - // Input reads are expected to come as: [ meta, [ pair1_read1, pair1_read2, pair2_read1, pair2_read2 ] ] - // Input array for a sample is created in the same order reads appear in samplesheet as pairs from replicates are appended to array. - // - tuple val(meta), path(reads) - path index - path t2g - path t1c - path t2c - val use_t1c - val use_t2c - val sc_workflow - val technology - - output: - tuple val(meta), path ("*_kallistobustools_count*") , emit: counts - path "versions.yml" , emit: versions - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def cdna = use_t1c ? "-c1 $t1c" : '' - def introns = use_t2c ? "-c2 $t2c" : '' - """ - kb count \\ - -t $task.cpus \\ - -i $index \\ - -g $t2g \\ - $cdna \\ - $introns \\ - --workflow $sc_workflow \\ - -x $technology \\ - $args \\ - -o ${prefix}_kallistobustools_count \\ - ${reads.join( " " )} - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - kallistobustools: \$(echo \$(kb --version 2>&1) | sed 's/^.*kb_python //;s/positional arguments.*\$//') - END_VERSIONS - """ -} diff --git a/modules/nf-core/modules/gunzip/main.nf b/modules/nf-core/modules/gunzip/main.nf index 61bf1afa..70367049 100644 --- a/modules/nf-core/modules/gunzip/main.nf +++ b/modules/nf-core/modules/gunzip/main.nf @@ -31,4 +31,14 @@ process GUNZIP { gunzip: \$(echo \$(gunzip --version 2>&1) | sed 's/^.*(gzip) //; s/ Copyright.*\$//') END_VERSIONS """ + + stub: + gunzip = archive.toString() - '.gz' + """ + touch $gunzip + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gunzip: \$(echo \$(gunzip --version 2>&1) | sed 's/^.*(gzip) //; s/ Copyright.*\$//') + END_VERSIONS + """ } diff --git a/modules/nf-core/modules/kallistobustools/count/main.nf b/modules/nf-core/modules/kallistobustools/count/main.nf new file mode 100644 index 00000000..10840da6 --- /dev/null +++ b/modules/nf-core/modules/kallistobustools/count/main.nf @@ -0,0 +1,49 @@ +process KALLISTOBUSTOOLS_COUNT { + tag "$meta.id" + label 'process_medium' + + conda (params.enable_conda ? 'bioconda::kb-python=0.27.2' : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/kb-python:0.27.2--pyhdfd78af_0' : + 'quay.io/biocontainers/kb-python:0.27.2--pyhdfd78af_0' }" + + input: + tuple val(meta), path(reads) + path index + path t2g + path t1c + path t2c + val technology + + output: + tuple val(meta), path ("*.count"), emit: count + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def cdna = t1c ? "-c1 $t1c" : '' + def introns = t2c ? "-c2 $t2c" : '' + """ + kb \\ + count \\ + -t $task.cpus \\ + -i $index \\ + -g $t2g \\ + $cdna \\ + $introns \\ + -x $technology \\ + $args \\ + -o ${prefix}.count \\ + ${reads.join( " " )} \\ + -m ${task.memory.toGiga()}G + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + kallistobustools: \$(echo \$(kb --version 2>&1) | sed 's/^.*kb_python //;s/positional arguments.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/kallistobustools/count/meta.yml b/modules/nf-core/modules/kallistobustools/count/meta.yml new file mode 100644 index 00000000..911697d2 --- /dev/null +++ b/modules/nf-core/modules/kallistobustools/count/meta.yml @@ -0,0 +1,67 @@ +name: kallistobustools_count +description: quantifies scRNA-seq data from fastq files using kb-python. +keywords: + - scRNA-seq + - count +tools: + - kb: + description: kallisto and bustools are wrapped in an easy-to-use program called kb + homepage: https://www.kallistobus.tools/ + documentation: https://kb-python.readthedocs.io/en/latest/index.html + tool_dev_url: https://github.com/pachterlab/kb_python + doi: "" + licence: MIT License + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + - index: + type: file + description: kb-ref index file (.idx) + pattern: "*.{idx}" + - t2g: + type: file + description: t2g file from kallisto + pattern: "*t2g.txt" + - t1c: + type: file + description: kb ref's c1 spliced_t2c file + pattern: "*.{cdna_t2c.txt}" + - t2c: + type: file + description: kb ref's c2 unspliced_t2c file + pattern: "*.{introns_t2c.txt}" + - workflow_mode: + type: value + description: String value defining workflow to use, can be one of "standard", "lamanno", "nucleus" + pattern: "{standard,lamanno,nucleus,kite}" + - technology: + type: value + description: String value defining the sequencing technology used. + pattern: "{10XV1,10XV2,10XV3,CELSEQ,CELSEQ2,DROPSEQ,INDROPSV1,INDROPSV2,INDROPSV3,SCRUBSEQ,SURECELL,SMARTSEQ}" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test'] + - count: + type: file + description: kb count output folder + pattern: "*.{count}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@flowuenne" diff --git a/modules/nf-core/modules/kallistobustools/ref/main.nf b/modules/nf-core/modules/kallistobustools/ref/main.nf index 1e789615..89943ec9 100644 --- a/modules/nf-core/modules/kallistobustools/ref/main.nf +++ b/modules/nf-core/modules/kallistobustools/ref/main.nf @@ -2,10 +2,10 @@ process KALLISTOBUSTOOLS_REF { tag "$fasta" label 'process_medium' - conda (params.enable_conda ? 'bioconda::kb-python=0.26.3' : null) + conda (params.enable_conda ? 'bioconda::kb-python=0.27.2' : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/kb-python:0.26.3--pyhdfd78af_0' : - 'quay.io/biocontainers/kb-python:0.26.3--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/kb-python:0.27.2--pyhdfd78af_0' : + 'quay.io/biocontainers/kb-python:0.27.2--pyhdfd78af_0' }" input: path fasta diff --git a/modules/nf-core/modules/multiqc/main.nf b/modules/nf-core/modules/multiqc/main.nf index b0831b5d..3c3517bf 100644 --- a/modules/nf-core/modules/multiqc/main.nf +++ b/modules/nf-core/modules/multiqc/main.nf @@ -3,8 +3,8 @@ process MULTIQC { conda (params.enable_conda ? 'bioconda::multiqc=1.12' : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.12--pyhdfd78af_0' : - 'quay.io/biocontainers/multiqc:1.12--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.13a--pyhdfd78af_1' : + 'quay.io/biocontainers/multiqc:1.13a--pyhdfd78af_1' }" input: path multiqc_files, stageAs: "?/*" diff --git a/subworkflows/local/kallisto_bustools.nf b/subworkflows/local/kallisto_bustools.nf index 204852da..2b0b7f52 100644 --- a/subworkflows/local/kallisto_bustools.nf +++ b/subworkflows/local/kallisto_bustools.nf @@ -1,6 +1,6 @@ /* -- IMPORT LOCAL MODULES/SUBWORKFLOWS -- */ include { GENE_MAP } from '../../modules/local/gene_map' -include { KALLISTOBUSTOOLS_COUNT } from '../../modules/local/kallistobustools_count' +include {KALLISTOBUSTOOLS_COUNT } from '../../modules/nf-core/modules/kallistobustools/count/main' /* -- IMPORT NF-CORE MODULES/SUBWORKFLOWS -- */ include { GUNZIP } from '../../modules/nf-core/modules/gunzip/main' @@ -46,6 +46,8 @@ workflow KALLISTO_BUSTOOLS { txp2gene = KALLISTOBUSTOOLS_REF.out.t2g.collect() kallisto_index = KALLISTOBUSTOOLS_REF.out.index.collect() ch_versions = ch_versions.mix(KALLISTOBUSTOOLS_REF.out.versions) + t1c = KALLISTOBUSTOOLS_REF.out.cdna_t2c.ifEmpty{ [] } + t2c = KALLISTOBUSTOOLS_REF.out.intron_t2c.ifEmpty{ [] } } /* @@ -55,18 +57,17 @@ workflow KALLISTO_BUSTOOLS { ch_fastq, kallisto_index, txp2gene, - [], - [], - false, - false, + t1c, + t2c, kb_workflow, protocol ) + ch_versions = ch_versions.mix(KALLISTOBUSTOOLS_COUNT.out.versions) emit: ch_versions - counts = KALLISTOBUSTOOLS_COUNT.out.counts + counts = KALLISTOBUSTOOLS_COUNT.out.count } From 9290e4c5439c3f73ce025a47e8d6c1e0b74b21b5 Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Tue, 5 Jul 2022 11:39:58 +0000 Subject: [PATCH 074/165] Changelog --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 68229e01..78ef8355 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixes +- Fixed Kallistobustools workflow [#123](https://github.com/nf-core/scrnaseq/issues/123) by upgrading to nf-core/modules module + ## v2.0.0 - 2022-06-17 "Gray Nickel Beagle" - Pipeline ported to dsl2 From aa7e256b7d545a3666386e6ec5b50eaef7059b35 Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Tue, 5 Jul 2022 11:42:10 +0000 Subject: [PATCH 075/165] Wrong module call --- subworkflows/local/kallisto_bustools.nf | 1 - 1 file changed, 1 deletion(-) diff --git a/subworkflows/local/kallisto_bustools.nf b/subworkflows/local/kallisto_bustools.nf index 2b0b7f52..bc958b47 100644 --- a/subworkflows/local/kallisto_bustools.nf +++ b/subworkflows/local/kallisto_bustools.nf @@ -59,7 +59,6 @@ workflow KALLISTO_BUSTOOLS { txp2gene, t1c, t2c, - kb_workflow, protocol ) From f3ce79ce91843470f1e638e17aae3c043846e53c Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Tue, 5 Jul 2022 12:02:12 +0000 Subject: [PATCH 076/165] Should fix things --- modules/local/mtx_to_h5ad.nf | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/local/mtx_to_h5ad.nf b/modules/local/mtx_to_h5ad.nf index c0c24890..d5a7941b 100644 --- a/modules/local/mtx_to_h5ad.nf +++ b/modules/local/mtx_to_h5ad.nf @@ -18,9 +18,9 @@ process MTX_TO_H5AD { script: // def file paths for aligners (except cellranger) if (params.aligner == 'kallisto') { - mtx_matrix = "*_kallistobustools_count/counts_unfiltered/*.mtx" - barcodes_tsv = "*_kallistobustools_count/counts_unfiltered/*.barcodes.txt" - features_tsv = "*_kallistobustools_count/counts_unfiltered/*.genes.txt" + mtx_matrix = "*count/counts_unfiltered/*.mtx" + barcodes_tsv = "*count/counts_unfiltered/*.barcodes.txt" + features_tsv = "*count/counts_unfiltered/*.genes.txt" } else if (params.aligner == 'alevin') { mtx_matrix = "*_alevin_results/alevin/quants_mat.mtx.gz" barcodes_tsv = "*_alevin_results/alevin/quants_mat_rows.txt" From 70efe3f7b1bd4992adb5870e4f0f3f5138438f09 Mon Sep 17 00:00:00 2001 From: Felipe Marques de Almeida Date: Tue, 5 Jul 2022 21:42:52 +0200 Subject: [PATCH 077/165] fixing paths after kallisto update --- modules/local/mtx_to_seurat.nf | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/local/mtx_to_seurat.nf b/modules/local/mtx_to_seurat.nf index 47be3ce9..9a71ac8a 100644 --- a/modules/local/mtx_to_seurat.nf +++ b/modules/local/mtx_to_seurat.nf @@ -22,9 +22,9 @@ process MTX_TO_SEURAT { barcodes = "filtered_feature_bc_matrix/barcodes.tsv.gz" features = "filtered_feature_bc_matrix/features.tsv.gz" } else if (params.aligner == "kallisto") { - matrix = "*_kallistobustools_count/counts_unfiltered/*.mtx" - barcodes = "*_kallistobustools_count/counts_unfiltered/*.barcodes.txt" - features = "*_kallistobustools_count/counts_unfiltered/*.genes.txt" + matrix = "*count/counts_unfiltered/*.mtx" + barcodes = "*count/counts_unfiltered/*.barcodes.txt" + features = "*count/counts_unfiltered/*.genes.txt" } else if (params.aligner == "alevin") { matrix = "*_alevin_results/alevin/quants_mat.mtx.gz" barcodes = "*_alevin_results/alevin/quants_mat_rows.txt" From 5c79f7781ce8685b9293a829060cc3ad26acfd83 Mon Sep 17 00:00:00 2001 From: Felipe Marques de Almeida Date: Tue, 12 Jul 2022 13:44:49 +0200 Subject: [PATCH 078/165] Update modules.config adding kallisto workflow definition for kallistobustools/count module --- conf/modules.config | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/conf/modules.config b/conf/modules.config index e75c76e8..e9992e7e 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -101,11 +101,18 @@ if (params.aligner == "star") { if (params.aligner == 'kallisto') { process { - withName: 'KALLISTOBUSTOOLS_REF|KALLISTOBUSTOOLS_COUNT' { + withName: KALLISTOBUSTOOLS_REF { publishDir = [ path: { "${params.outdir}/${params.aligner}" }, mode: params.publish_dir_mode ] } + withName: KALLISTOBUSTOOLS_COUNT { + publishDir = [ + path: { "${params.outdir}/${params.aligner}" }, + mode: params.publish_dir_mode + ] + ext.args = "--workflow ${params.kb_workflow}" + } } } From fd3b49055eac6615f6c1d2c9cb5fa227bd7ab598 Mon Sep 17 00:00:00 2001 From: Felipe Marques de Almeida Date: Tue, 12 Jul 2022 15:55:15 +0200 Subject: [PATCH 079/165] Updating nf-core/modules --- modules.json | 4 ++-- modules/nf-core/modules/kallistobustools/count/main.nf | 3 ++- modules/nf-core/modules/multiqc/main.nf | 2 +- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/modules.json b/modules.json index 2c9b7639..b21e1449 100644 --- a/modules.json +++ b/modules.json @@ -25,13 +25,13 @@ "git_sha": "fa37e0662690c4ec4260dae282fbce08777503e6" }, "kallistobustools/count": { - "git_sha": "ed5594bee3eb38874cb282d288bc22ab6262a73e" + "git_sha": "ec806cebf121767b95ad492b0d0f93dbdc2f33da" }, "kallistobustools/ref": { "git_sha": "0f1e736212e5ae37036ac7e32b225087a8a9b154" }, "multiqc": { - "git_sha": "08376da6843b14c82d84d444784c0b3635bb7fd5" + "git_sha": "5138acca0985ca01c38a1c4fba917d83772b1106" }, "salmon/index": { "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" diff --git a/modules/nf-core/modules/kallistobustools/count/main.nf b/modules/nf-core/modules/kallistobustools/count/main.nf index 10840da6..ab61597c 100644 --- a/modules/nf-core/modules/kallistobustools/count/main.nf +++ b/modules/nf-core/modules/kallistobustools/count/main.nf @@ -27,6 +27,7 @@ process KALLISTOBUSTOOLS_COUNT { def prefix = task.ext.prefix ?: "${meta.id}" def cdna = t1c ? "-c1 $t1c" : '' def introns = t2c ? "-c2 $t2c" : '' + def memory = task.memory.toGiga() - 1 """ kb \\ count \\ @@ -39,7 +40,7 @@ process KALLISTOBUSTOOLS_COUNT { $args \\ -o ${prefix}.count \\ ${reads.join( " " )} \\ - -m ${task.memory.toGiga()}G + -m ${memory}G cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/modules/multiqc/main.nf b/modules/nf-core/modules/multiqc/main.nf index 3c3517bf..1e7d6afe 100644 --- a/modules/nf-core/modules/multiqc/main.nf +++ b/modules/nf-core/modules/multiqc/main.nf @@ -1,7 +1,7 @@ process MULTIQC { label 'process_medium' - conda (params.enable_conda ? 'bioconda::multiqc=1.12' : null) + conda (params.enable_conda ? 'bioconda::multiqc=1.13a' : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/multiqc:1.13a--pyhdfd78af_1' : 'quay.io/biocontainers/multiqc:1.13a--pyhdfd78af_1' }" From bef2d22c451d37d461f1cf84e16ff70dcfd5efd2 Mon Sep 17 00:00:00 2001 From: Felipe Marques de Almeida Date: Wed, 13 Jul 2022 10:14:19 +0200 Subject: [PATCH 080/165] add output directive to ensure success run --- modules/nf-core/modules/kallistobustools/count/main.nf | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/nf-core/modules/kallistobustools/count/main.nf b/modules/nf-core/modules/kallistobustools/count/main.nf index ab61597c..1dde0f9d 100644 --- a/modules/nf-core/modules/kallistobustools/count/main.nf +++ b/modules/nf-core/modules/kallistobustools/count/main.nf @@ -18,6 +18,7 @@ process KALLISTOBUSTOOLS_COUNT { output: tuple val(meta), path ("*.count"), emit: count path "versions.yml" , emit: versions + path "*.count/*/*.mtx" // ensure that kallisto finished and produced outputs when: task.ext.when == null || task.ext.when From 8416bb1c46c5bc15e0c2c6b89a9244e68cd1b7d5 Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Thu, 14 Jul 2022 11:27:34 +0000 Subject: [PATCH 081/165] Small update of custom dumpsoftware versions --- modules.json | 2 +- modules/nf-core/modules/custom/dumpsoftwareversions/main.nf | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/modules.json b/modules.json index b21e1449..ed503883 100644 --- a/modules.json +++ b/modules.json @@ -13,7 +13,7 @@ "git_sha": "61d87bb2bcbcd058e1af6e5f14bb821b8ae4ed3e" }, "custom/dumpsoftwareversions": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + "git_sha": "682f789f93070bd047868300dd018faf3d434e7c" }, "fastqc": { "git_sha": "49b18b1639f4f7104187058866a8fab33332bdfe" diff --git a/modules/nf-core/modules/custom/dumpsoftwareversions/main.nf b/modules/nf-core/modules/custom/dumpsoftwareversions/main.nf index 327d5100..12293efc 100644 --- a/modules/nf-core/modules/custom/dumpsoftwareversions/main.nf +++ b/modules/nf-core/modules/custom/dumpsoftwareversions/main.nf @@ -2,10 +2,10 @@ process CUSTOM_DUMPSOFTWAREVERSIONS { label 'process_low' // Requires `pyyaml` which does not have a dedicated container but is in the MultiQC container - conda (params.enable_conda ? "bioconda::multiqc=1.11" : null) + conda (params.enable_conda ? "bioconda::multiqc=1.12" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.11--pyhdfd78af_0' : - 'quay.io/biocontainers/multiqc:1.11--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.12--pyhdfd78af_0' : + 'quay.io/biocontainers/multiqc:1.12--pyhdfd78af_0' }" input: path versions From 773b615451623903032a544251913451b4f1236c Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Thu, 14 Jul 2022 11:37:32 +0000 Subject: [PATCH 082/165] Yeah revert module --- modules/nf-core/modules/kallistobustools/count/main.nf | 1 - 1 file changed, 1 deletion(-) diff --git a/modules/nf-core/modules/kallistobustools/count/main.nf b/modules/nf-core/modules/kallistobustools/count/main.nf index 1dde0f9d..ab61597c 100644 --- a/modules/nf-core/modules/kallistobustools/count/main.nf +++ b/modules/nf-core/modules/kallistobustools/count/main.nf @@ -18,7 +18,6 @@ process KALLISTOBUSTOOLS_COUNT { output: tuple val(meta), path ("*.count"), emit: count path "versions.yml" , emit: versions - path "*.count/*/*.mtx" // ensure that kallisto finished and produced outputs when: task.ext.when == null || task.ext.when From d758d8cb25d5dfe2aca3306b9a4a39fed09b75c8 Mon Sep 17 00:00:00 2001 From: Felipe Marques de Almeida Date: Thu, 14 Jul 2022 14:29:19 +0200 Subject: [PATCH 083/165] update nf-core/kallistobustools/count module --- modules.json | 2 +- modules/nf-core/modules/kallistobustools/count/main.nf | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/modules.json b/modules.json index ed503883..5468da87 100644 --- a/modules.json +++ b/modules.json @@ -25,7 +25,7 @@ "git_sha": "fa37e0662690c4ec4260dae282fbce08777503e6" }, "kallistobustools/count": { - "git_sha": "ec806cebf121767b95ad492b0d0f93dbdc2f33da" + "git_sha": "013035eb5c80c9e3f37f2c89c92a1ae7925df8ea" }, "kallistobustools/ref": { "git_sha": "0f1e736212e5ae37036ac7e32b225087a8a9b154" diff --git a/modules/nf-core/modules/kallistobustools/count/main.nf b/modules/nf-core/modules/kallistobustools/count/main.nf index ab61597c..52accd31 100644 --- a/modules/nf-core/modules/kallistobustools/count/main.nf +++ b/modules/nf-core/modules/kallistobustools/count/main.nf @@ -18,6 +18,7 @@ process KALLISTOBUSTOOLS_COUNT { output: tuple val(meta), path ("*.count"), emit: count path "versions.yml" , emit: versions + path "*.count/*/*.mtx" , emit: matrix //Ensure that kallisto finished and produced outputs when: task.ext.when == null || task.ext.when From 1cdd81bb85a960c3451ddeb8d0506a86921b93c6 Mon Sep 17 00:00:00 2001 From: Marcel Ribeiro-Dantas Date: Fri, 15 Jul 2022 11:17:12 +0200 Subject: [PATCH 084/165] Fix zenodo shield and link --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index aec34ff7..69d94fd9 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ [![GitHub Actions CI Status](https://github.com/nf-core/scrnaseq/workflows/nf-core%20CI/badge.svg)](https://github.com/nf-core/scrnaseq/actions?query=workflow%3A%22nf-core+CI%22) [![GitHub Actions Linting Status](https://github.com/nf-core/scrnaseq/workflows/nf-core%20linting/badge.svg)](https://github.com/nf-core/scrnaseq/actions?query=workflow%3A%22nf-core+linting%22) [![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?logo=Amazon%20AWS)](https://nf-co.re/scrnaseq/results) -[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8)](https://doi.org/10.5281/zenodo.XXXXXXX) +[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.3568187-1073c8)](https://doi.org/10.5281/zenodo.3568187) [![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A521.10.3-23aa62.svg)](https://www.nextflow.io/) [![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?logo=anaconda)](https://docs.conda.io/en/latest/) @@ -82,7 +82,7 @@ For further information or help, don't hesitate to get in touch on the [Slack `# ## Citations -If you use nf-core/scrnaseq for your analysis, please cite it using the following doi: [10.5281/zenodo.3568187](https://doi.org/10.5281/10.5281/zenodo.3568187) +If you use nf-core/scrnaseq for your analysis, please cite it using the following doi: [10.5281/zenodo.3568187](https://doi.org/10.5281/zenodo.3568187) The basic benchmarks that were used as motivation for incorporating the three available modular workflows can be found in [this publication](https://www.biorxiv.org/content/10.1101/673285v2). From 69f2730afd32a6d0336b6dd4eb54f871f8851352 Mon Sep 17 00:00:00 2001 From: Marcel Ribeiro-Dantas Date: Fri, 15 Jul 2022 11:21:24 +0200 Subject: [PATCH 085/165] Remove extra slack shield I removed the extra slack shield, and chose to stay the one that is used in other nf-core GitHub repositories such as rnaseq and eager. --- README.md | 2 -- 1 file changed, 2 deletions(-) diff --git a/README.md b/README.md index 69d94fd9..20f51240 100644 --- a/README.md +++ b/README.md @@ -15,8 +15,6 @@ [![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?logo=twitter)](https://twitter.com/nf_core) [![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?logo=youtube)](https://www.youtube.com/c/nf-core) -[![Join us on Slack](https://img.shields.io/badge/slack-nfcore/scrnaseq-blue.svg)](https://nfcore.slack.com/channels/scrnaseq) - ## Introduction **nf-core/scrnaseq** is a bioinformatics best-practice analysis pipeline for processing 10x Genomics single-cell RNA-seq data. From 6a276d2a4a90953d18b46571a15f2447836dccc0 Mon Sep 17 00:00:00 2001 From: Felipe Marques de Almeida Date: Thu, 21 Jul 2022 14:37:48 +0200 Subject: [PATCH 086/165] adding option to parse non-standard kallisto outputs --- modules/local/mtx_to_h5ad.nf | 14 ++++++++++++ modules/local/mtx_to_seurat.nf | 14 ++++++++++++ subworkflows/local/conversion_to_h5ad.nf | 27 ------------------------ 3 files changed, 28 insertions(+), 27 deletions(-) delete mode 100644 subworkflows/local/conversion_to_h5ad.nf diff --git a/modules/local/mtx_to_h5ad.nf b/modules/local/mtx_to_h5ad.nf index d5a7941b..a4916c6f 100644 --- a/modules/local/mtx_to_h5ad.nf +++ b/modules/local/mtx_to_h5ad.nf @@ -43,6 +43,20 @@ process MTX_TO_H5AD { --out ${meta.id}_matrix.h5ad """ + if (params.aligner == 'kallisto' && params.kb_workflow != 'standard') + """ + # convert file types + for input_type in spliced unspliced ; do + mtx_to_h5ad.py \\ + --aligner ${params.aligner} \\ + --sample ${meta.id} \\ + --mtx *count/counts_unfiltered/\${input_type}.mtx \\ + --barcode *count/counts_unfiltered/\${input_type}.barcodes.txt \\ + --feature *count/counts_unfiltered/\${input_type}.genes.txt \\ + --out ${meta.id}_\${input_type}_matrix.h5ad ; + done + """ + else """ # convert file types diff --git a/modules/local/mtx_to_seurat.nf b/modules/local/mtx_to_seurat.nf index 9a71ac8a..04b3f72c 100644 --- a/modules/local/mtx_to_seurat.nf +++ b/modules/local/mtx_to_seurat.nf @@ -35,6 +35,20 @@ process MTX_TO_SEURAT { features = "*.Solo.out/Gene/filtered/features.tsv" } + if (params.aligner == 'kallisto' && params.kb_workflow != 'standard') + """ + # convert file types + for input_type in spliced unspliced ; do + mtx_to_seurat.R \\ + *count/counts_unfiltered/\${input_type}.mtx \\ + *count/counts_unfiltered/\${input_type}.barcodes.txt \\ + *count/counts_unfiltered/\${input_type}.genes.txt \\ + ${meta.id}_\${input_type}_matrix.rds \\ + ${aligner} + done + """ + + else """ mtx_to_seurat.R \\ $matrix \\ diff --git a/subworkflows/local/conversion_to_h5ad.nf b/subworkflows/local/conversion_to_h5ad.nf deleted file mode 100644 index 67b100d5..00000000 --- a/subworkflows/local/conversion_to_h5ad.nf +++ /dev/null @@ -1,27 +0,0 @@ -/* -- IMPORT LOCAL MODULES/SUBWORKFLOWS -- */ -include { MTX_TO_H5AD } from '../../modules/local/mtx_to_h5ad.nf' -include { CONCAT_H5AD } from '../../modules/local/concat_h5ad.nf' - -workflow H5AD_CONVERSION { - - take: - mtx_matrices - samplesheet - - main: - // - // Convert matrix do h5ad - // - MTX_TO_H5AD ( - mtx_matrices - ) - - // - // Concat sample-specific h5ad in one - // - CONCAT_H5AD ( - MTX_TO_H5AD.out.h5ad.collect(), // gather all sample-specific files - samplesheet - ) - -} From 370e26881e1ed475579718da706ddb9ce028f85d Mon Sep 17 00:00:00 2001 From: RHReynolds Date: Fri, 29 Jul 2022 14:25:11 +0100 Subject: [PATCH 087/165] fix: STAR mtx conversion when using GeneFull When STAR is run with the flag `--soloFeatures GeneFull` (permits counting of exonic and intronic reads), the output is stored in `*.Solo.out/GeneFull/` and not `*.Solo.out/Gene`. As a result, matrix conversion results in an error, as matrix, barcodes and features cannot be found. This error can be fixed by adding an asterisk in the file path provided to the mtx conversion modules. --- modules/local/mtx_to_h5ad.nf | 6 +++--- modules/local/mtx_to_seurat.nf | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/modules/local/mtx_to_h5ad.nf b/modules/local/mtx_to_h5ad.nf index d5a7941b..41be94a6 100644 --- a/modules/local/mtx_to_h5ad.nf +++ b/modules/local/mtx_to_h5ad.nf @@ -26,9 +26,9 @@ process MTX_TO_H5AD { barcodes_tsv = "*_alevin_results/alevin/quants_mat_rows.txt" features_tsv = "*_alevin_results/alevin/quants_mat_cols.txt" } else if (params.aligner == 'star') { - mtx_matrix = "*.Solo.out/Gene/filtered/matrix.mtx" - barcodes_tsv = "*.Solo.out/Gene/filtered/barcodes.tsv" - features_tsv = "*.Solo.out/Gene/filtered/features.tsv" + mtx_matrix = "*.Solo.out/Gene*/filtered/matrix.mtx" + barcodes_tsv = "*.Solo.out/Gene*/filtered/barcodes.tsv" + features_tsv = "*.Solo.out/Gene*/filtered/features.tsv" } // diff --git a/modules/local/mtx_to_seurat.nf b/modules/local/mtx_to_seurat.nf index 9a71ac8a..c4a44db9 100644 --- a/modules/local/mtx_to_seurat.nf +++ b/modules/local/mtx_to_seurat.nf @@ -30,9 +30,9 @@ process MTX_TO_SEURAT { barcodes = "*_alevin_results/alevin/quants_mat_rows.txt" features = "*_alevin_results/alevin/quants_mat_cols.txt" } else if (params.aligner == 'star') { - matrix = "*.Solo.out/Gene/filtered/matrix.mtx" - barcodes = "*.Solo.out/Gene/filtered/barcodes.tsv" - features = "*.Solo.out/Gene/filtered/features.tsv" + matrix = "*.Solo.out/Gene*/filtered/matrix.mtx" + barcodes = "*.Solo.out/Gene*/filtered/barcodes.tsv" + features = "*.Solo.out/Gene*/filtered/features.tsv" } """ From 09b9f636610511a37df12bed7b55a3cc49b9ce7c Mon Sep 17 00:00:00 2001 From: RHReynolds Date: Fri, 29 Jul 2022 14:39:19 +0100 Subject: [PATCH 088/165] docs: update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 78ef8355..54741744 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixes - Fixed Kallistobustools workflow [#123](https://github.com/nf-core/scrnaseq/issues/123) by upgrading to nf-core/modules module +- Fixed matrix conversion error when running STAR with --soloFeatures GeneFull [#135](https://github.com/nf-core/scrnaseq/pull/135) ## v2.0.0 - 2022-06-17 "Gray Nickel Beagle" From f15d9583c4569218b96ae0874177b5b5828c55b3 Mon Sep 17 00:00:00 2001 From: RHReynolds Date: Fri, 29 Jul 2022 18:04:12 +0100 Subject: [PATCH 089/165] feat: gzip starsolo outputs Gzipping outputs for file compression and downstream compatibility with scflow (which requires zipped format, as outputted by cellranger). --- modules/local/mtx_to_h5ad.nf | 6 +++--- modules/local/mtx_to_seurat.nf | 6 +++--- modules/local/star_align.nf | 5 +++++ 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/modules/local/mtx_to_h5ad.nf b/modules/local/mtx_to_h5ad.nf index 41be94a6..22a04191 100644 --- a/modules/local/mtx_to_h5ad.nf +++ b/modules/local/mtx_to_h5ad.nf @@ -26,9 +26,9 @@ process MTX_TO_H5AD { barcodes_tsv = "*_alevin_results/alevin/quants_mat_rows.txt" features_tsv = "*_alevin_results/alevin/quants_mat_cols.txt" } else if (params.aligner == 'star') { - mtx_matrix = "*.Solo.out/Gene*/filtered/matrix.mtx" - barcodes_tsv = "*.Solo.out/Gene*/filtered/barcodes.tsv" - features_tsv = "*.Solo.out/Gene*/filtered/features.tsv" + mtx_matrix = "*.Solo.out/Gene*/filtered/matrix.mtx.gz" + barcodes_tsv = "*.Solo.out/Gene*/filtered/barcodes.tsv.gz" + features_tsv = "*.Solo.out/Gene*/filtered/features.tsv.gz" } // diff --git a/modules/local/mtx_to_seurat.nf b/modules/local/mtx_to_seurat.nf index c4a44db9..33fe0830 100644 --- a/modules/local/mtx_to_seurat.nf +++ b/modules/local/mtx_to_seurat.nf @@ -30,9 +30,9 @@ process MTX_TO_SEURAT { barcodes = "*_alevin_results/alevin/quants_mat_rows.txt" features = "*_alevin_results/alevin/quants_mat_cols.txt" } else if (params.aligner == 'star') { - matrix = "*.Solo.out/Gene*/filtered/matrix.mtx" - barcodes = "*.Solo.out/Gene*/filtered/barcodes.tsv" - features = "*.Solo.out/Gene*/filtered/features.tsv" + matrix = "*.Solo.out/Gene*/filtered/matrix.mtx.gz" + barcodes = "*.Solo.out/Gene*/filtered/barcodes.tsv.gz" + features = "*.Solo.out/Gene*/filtered/features.tsv.gz" } """ diff --git a/modules/local/star_align.nf b/modules/local/star_align.nf index d544224f..c489d0b8 100644 --- a/modules/local/star_align.nf +++ b/modules/local/star_align.nf @@ -69,6 +69,11 @@ process STAR_ALIGN { gzip ${prefix}.unmapped_2.fastq fi + if [ -d ${prefix}.Solo.out ]; then + # Backslashes still need to be escaped (https://github.com/nextflow-io/nextflow/issues/67) + find ${prefix}.Solo.out \\( -name "*.tsv" -o -name "*.mtx" \\) -exec gzip {} \\; + fi + cat <<-END_VERSIONS > versions.yml "${task.process}": star: \$(STAR --version | sed -e "s/STAR_//g") From bf5aaede10d29b03afc924fbca5b1848612b4775 Mon Sep 17 00:00:00 2001 From: Regina H Reynolds Date: Mon, 8 Aug 2022 18:26:40 +0100 Subject: [PATCH 090/165] fix(mtx_to_seurat): could not find ReadMtx using conda profile The conda package, `seurat-scripts`, does not contain the function, `ReadMtx()`. Thus, running the test and conda profile resulting in the error, `"Error in ReadMtx(mtx = mtx_file, features = feature_file, cells = barcode_file, : could not find function "ReadMtx"`. This can be resolved by using the conda package, `r-seurat`. --- modules/local/mtx_to_seurat.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/local/mtx_to_seurat.nf b/modules/local/mtx_to_seurat.nf index 33fe0830..e2aa8217 100644 --- a/modules/local/mtx_to_seurat.nf +++ b/modules/local/mtx_to_seurat.nf @@ -2,7 +2,7 @@ process MTX_TO_SEURAT { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "seurat-scripts" : null) + conda (params.enable_conda ? "r-seurat" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'docker://satijalab/seurat:4.1.0' : 'satijalab/seurat:4.1.0' }" From 6f0457e5b2958e1a04c61d608bad1db03bfc246a Mon Sep 17 00:00:00 2001 From: Regina H Reynolds Date: Mon, 8 Aug 2022 18:37:24 +0100 Subject: [PATCH 091/165] docs: update CHANGELOG --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 54741744..953bcdf0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Fixed Kallistobustools workflow [#123](https://github.com/nf-core/scrnaseq/issues/123) by upgrading to nf-core/modules module - Fixed matrix conversion error when running STAR with --soloFeatures GeneFull [#135](https://github.com/nf-core/scrnaseq/pull/135) +- Fixed seurat matrix conversion error when running with conda profile [#136](https://github.com/nf-core/scrnaseq/pull/136) ## v2.0.0 - 2022-06-17 "Gray Nickel Beagle" From d6a5d52a38b5e1e485015ef1024e5cdba95894be Mon Sep 17 00:00:00 2001 From: Felipe Marques de Almeida Date: Mon, 15 Aug 2022 10:10:16 +0000 Subject: [PATCH 092/165] added line to changelog --- CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 78ef8355..af431a2d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixes - Fixed Kallistobustools workflow [#123](https://github.com/nf-core/scrnaseq/issues/123) by upgrading to nf-core/modules module +- Fixed Kallistobustools module [#116](https://github.com/nf-core/scrnaseq/issues/116). Module was asking the same amount of memory that was being set as limit and not taking into account the different outputs produced by kallisto standard and non-standard workflows. + + ## v2.0.0 - 2022-06-17 "Gray Nickel Beagle" From 9fd36495dc1f28ee76f3072eeeac0948af73b4cb Mon Sep 17 00:00:00 2001 From: Felipe Marques de Almeida Date: Thu, 18 Aug 2022 11:28:59 +0200 Subject: [PATCH 093/165] added simpleaf index module --- conf/modules.config | 7 +++ modules/local/simpleaf_index.nf | 46 +++++++++++++++++++ modules/nf-core/modules/salmon/index/main.nf | 46 ------------------- modules/nf-core/modules/salmon/index/meta.yml | 36 --------------- nextflow.config | 3 +- subworkflows/local/alevin.nf | 14 ++---- 6 files changed, 59 insertions(+), 93 deletions(-) create mode 100644 modules/local/simpleaf_index.nf delete mode 100644 modules/nf-core/modules/salmon/index/main.nf delete mode 100644 modules/nf-core/modules/salmon/index/meta.yml diff --git a/conf/modules.config b/conf/modules.config index e9992e7e..a60a4645 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -70,6 +70,13 @@ if (params.aligner == "alevin") { ext.args = "--table transcript_id,gene_id" ext.prefix = { "${gff.baseName}_gffread" } } + withName: 'SIMPLEAF_INDEX' { + publishDir = [ + path: { "${params.outdir}/${params.aligner}" }, + mode: params.publish_dir_mode + ] + ext.args = "--rlen ${params.simpleaf_rlen}" + } withName: 'SALMON_INDEX|SALMON_ALEVIN' { publishDir = [ path: { "${params.outdir}/${params.aligner}" }, diff --git a/modules/local/simpleaf_index.nf b/modules/local/simpleaf_index.nf new file mode 100644 index 00000000..8f1edc06 --- /dev/null +++ b/modules/local/simpleaf_index.nf @@ -0,0 +1,46 @@ +process SIMPLEAF_INDEX { + tag "$transcript_gtf" + label "process_medium" + + conda (params.enable_conda ? 'bioconda::simpleaf=0.4.0' : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/simpleaf:0.4.0--h9f5acd7_0' : + 'quay.io/biocontainers/simpleaf:0.4.0--h9f5acd7_0' }" + + input: + path genome_fasta + path transcript_fasta + path transcript_gtf + + output: + path "salmon" , emit: index + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def seq_inputs = (params.transcript_fasta) ? "--refseq $transcript_fasta" : "--gtf $transcript_gtf" + """ + # export required var + export ALEVIN_FRY_HOME=. + + # prep simpleaf + simpleaf set-paths + + # run simpleaf index + simpleaf \\ + index \\ + --threads $task.cpus \\ + --fasta $genome_fasta \\ + $seq_inputs \\ + $args \\ + -o salmon + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + simpleaf: 0.4.0 + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/salmon/index/main.nf b/modules/nf-core/modules/salmon/index/main.nf deleted file mode 100644 index 737087f9..00000000 --- a/modules/nf-core/modules/salmon/index/main.nf +++ /dev/null @@ -1,46 +0,0 @@ -process SALMON_INDEX { - tag "$transcript_fasta" - label "process_medium" - - conda (params.enable_conda ? 'bioconda::salmon=1.5.2' : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/salmon:1.5.2--h84f40af_0' : - 'quay.io/biocontainers/salmon:1.5.2--h84f40af_0' }" - - input: - path genome_fasta - path transcript_fasta - - output: - path "salmon" , emit: index - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def get_decoy_ids = "grep '^>' $genome_fasta | cut -d ' ' -f 1 > decoys.txt" - def gentrome = "gentrome.fa" - if (genome_fasta.endsWith('.gz')) { - get_decoy_ids = "grep '^>' <(gunzip -c $genome_fasta) | cut -d ' ' -f 1 > decoys.txt" - gentrome = "gentrome.fa.gz" - } - """ - $get_decoy_ids - sed -i.bak -e 's/>//g' decoys.txt - cat $transcript_fasta $genome_fasta > $gentrome - - salmon \\ - index \\ - --threads $task.cpus \\ - -t $gentrome \\ - -d decoys.txt \\ - $args \\ - -i salmon - cat <<-END_VERSIONS > versions.yml - "${task.process}": - salmon: \$(echo \$(salmon --version) | sed -e "s/salmon //g") - END_VERSIONS - """ -} diff --git a/modules/nf-core/modules/salmon/index/meta.yml b/modules/nf-core/modules/salmon/index/meta.yml deleted file mode 100644 index 53c64152..00000000 --- a/modules/nf-core/modules/salmon/index/meta.yml +++ /dev/null @@ -1,36 +0,0 @@ -name: salmon_index -description: Create index for salmon -keywords: - - index - - fasta - - genome - - reference -tools: - - salmon: - description: | - Salmon is a tool for wicked-fast transcript quantification from RNA-seq data - homepage: https://salmon.readthedocs.io/en/latest/salmon.html - manual: https://salmon.readthedocs.io/en/latest/salmon.html - doi: 10.1038/nmeth.4197 - licence: ["GPL-3.0-or-later"] -input: - - genome_fasta: - type: file - description: Fasta file of the reference genome - - transcriptome_fasta: - type: file - description: Fasta file of the reference transcriptome - -output: - - index: - type: directory - description: Folder containing the star index files - pattern: "salmon" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - -authors: - - "@kevinmenden" - - "@drpatelh" diff --git a/nextflow.config b/nextflow.config index 09b80901..9e1b43ac 100644 --- a/nextflow.config +++ b/nextflow.config @@ -22,7 +22,8 @@ params { gtf = null transcript_fasta = null - // salmon alevin parameters + // salmon alevin parameters (simpleaf) + simpleaf_rlen = 91 barcode_whitelist = null txp2gene = null salmon_index = null diff --git a/subworkflows/local/alevin.nf b/subworkflows/local/alevin.nf index c1b122e1..92198123 100644 --- a/subworkflows/local/alevin.nf +++ b/subworkflows/local/alevin.nf @@ -2,11 +2,11 @@ include { GFFREAD_TRANSCRIPTOME } from '../../modules/local/gffread_transcriptome' include { SALMON_ALEVIN } from '../../modules/local/salmon_alevin' include { ALEVINQC } from '../../modules/local/alevinqc' +include { SIMPLEAF_INDEX } from '../../modules/local/simpleaf_index' /* -- IMPORT NF-CORE MODULES/SUBWORKFLOWS -- */ include { GUNZIP } from '../../modules/nf-core/modules/gunzip/main' include { GFFREAD as GFFREAD_TXP2GENE } from '../../modules/nf-core/modules/gffread/main' -include { SALMON_INDEX } from '../../modules/nf-core/modules/salmon/index/main' def multiqc_report = [] @@ -39,15 +39,9 @@ workflow SCRNASEQ_ALEVIN { * Build salmon index */ if (!salmon_index) { - // Preprocessing - Extract transcriptome fasta from genome fasta - if (!transcript_fasta) { - GFFREAD_TRANSCRIPTOME( genome_fasta, gtf ) - transcript_fasta = GFFREAD_TRANSCRIPTOME.out.transcriptome_extracted - ch_versions = ch_versions.mix(GFFREAD_TRANSCRIPTOME.out.versions) - } - SALMON_INDEX( genome_fasta, transcript_fasta ) - salmon_index = SALMON_INDEX.out.index.collect() - ch_versions = ch_versions.mix(SALMON_INDEX.out.versions) + SIMPLEAF_INDEX( genome_fasta, transcript_fasta, gtf ) + salmon_index = SIMPLEAF_INDEX.out.index.collect() + ch_versions = ch_versions.mix(SIMPLEAF_INDEX.out.versions) } /* From 36f14ed995bd69107a63054e1c211def1519baa4 Mon Sep 17 00:00:00 2001 From: Felipe Marques de Almeida Date: Tue, 23 Aug 2022 08:22:38 +0200 Subject: [PATCH 094/165] FIX: add "else if" instead of if-if --- modules/local/mtx_to_h5ad.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/local/mtx_to_h5ad.nf b/modules/local/mtx_to_h5ad.nf index a79e6ca4..96e0b4a3 100644 --- a/modules/local/mtx_to_h5ad.nf +++ b/modules/local/mtx_to_h5ad.nf @@ -43,7 +43,7 @@ process MTX_TO_H5AD { --out ${meta.id}_matrix.h5ad """ - if (params.aligner == 'kallisto' && params.kb_workflow != 'standard') + else if (params.aligner == 'kallisto' && params.kb_workflow != 'standard') """ # convert file types for input_type in spliced unspliced ; do From 07136b05d5b5e9a60ef9568d495ed18dec8ef31d Mon Sep 17 00:00:00 2001 From: Felipe Marques de Almeida Date: Tue, 23 Aug 2022 15:54:31 +0200 Subject: [PATCH 095/165] add alevin quantification module with simpleaf --- conf/modules.config | 3 +- lib/WorkflowScrnaseq.groovy | 10 +-- modules/local/simpleaf_index.nf | 6 +- .../{salmon_alevin.nf => simpleaf_quant.nf} | 44 +++++++----- modules/nf-core/modules/salmon/quant/main.nf | 72 ------------------- modules/nf-core/modules/salmon/quant/meta.yml | 56 --------------- subworkflows/local/alevin.nf | 14 ++-- 7 files changed, 45 insertions(+), 160 deletions(-) rename modules/local/{salmon_alevin.nf => simpleaf_quant.nf} (56%) delete mode 100644 modules/nf-core/modules/salmon/quant/main.nf delete mode 100644 modules/nf-core/modules/salmon/quant/meta.yml diff --git a/conf/modules.config b/conf/modules.config index a60a4645..2b05f1e7 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -77,11 +77,12 @@ if (params.aligner == "alevin") { ] ext.args = "--rlen ${params.simpleaf_rlen}" } - withName: 'SALMON_INDEX|SALMON_ALEVIN' { + withName: 'SIMPLEAF_QUANT' { publishDir = [ path: { "${params.outdir}/${params.aligner}" }, mode: params.publish_dir_mode ] + ext.args = "-r cr-like" } } } diff --git a/lib/WorkflowScrnaseq.groovy b/lib/WorkflowScrnaseq.groovy index 364b6b19..4f84555d 100755 --- a/lib/WorkflowScrnaseq.groovy +++ b/lib/WorkflowScrnaseq.groovy @@ -83,19 +83,19 @@ class WorkflowScrnaseq { if (aligner == 'alevin') { switch (protocol) { case '10XV1': - new_protocol = 'chromium' + new_protocol = '10xv1' chemistry = 'V1' break case '10XV2': - new_protocol = 'chromium' + new_protocol = '10xv2' chemistry = 'V2' break case '10XV3': - new_protocol = 'chromiumV3' + new_protocol = '10xv3' chemistry = 'V3' break - case 'dropseq': - new_protocol = 'dropseq' + // case 'dropseq': + // new_protocol = 'dropseq' } } diff --git a/modules/local/simpleaf_index.nf b/modules/local/simpleaf_index.nf index 8f1edc06..110f3464 100644 --- a/modules/local/simpleaf_index.nf +++ b/modules/local/simpleaf_index.nf @@ -13,8 +13,10 @@ process SIMPLEAF_INDEX { path transcript_gtf output: - path "salmon" , emit: index - path "versions.yml" , emit: versions + path "salmon/index" , emit: index + path "salmon/ref/*_t2g_3col.tsv" , emit: transcript_tsv + path "versions.yml" , emit: versions + path "salmon" when: task.ext.when == null || task.ext.when diff --git a/modules/local/salmon_alevin.nf b/modules/local/simpleaf_quant.nf similarity index 56% rename from modules/local/salmon_alevin.nf rename to modules/local/simpleaf_quant.nf index 5ca6dd1e..ad97518f 100644 --- a/modules/local/salmon_alevin.nf +++ b/modules/local/simpleaf_quant.nf @@ -1,11 +1,11 @@ -process SALMON_ALEVIN { +process SIMPLEAF_QUANT { tag "$meta.id" - label 'process_medium' + label 'process_high' - conda (params.enable_conda ? "bioconda::salmon=1.4.0" : null) + conda (params.enable_conda ? 'bioconda::simpleaf=0.4.0' : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/salmon:1.4.0--h84f40af_1' : - 'quay.io/biocontainers/salmon:1.4.0--h84f40af_1' }" + 'https://depot.galaxyproject.org/singularity/simpleaf:0.4.0--h9f5acd7_0' : + 'quay.io/biocontainers/simpleaf:0.4.0--h9f5acd7_0' }" input: // @@ -14,6 +14,7 @@ process SALMON_ALEVIN { // tuple val(meta), path(reads) path index + path transcript_tsv path txp2gene val protocol path whitelist @@ -29,19 +30,26 @@ process SALMON_ALEVIN { // separate forward from reverse pairs def (forward, reverse) = reads.collate(2).transpose() """ - salmon alevin \\ - -l ISR \\ - -p $task.cpus \\ - -1 ${forward.join( " " )} \\ - -2 ${reverse.join( " " )} \\ - --${protocol} \\ - -i $index \\ - --tgMap $txp2gene \\ - --dumpFeatures --dumpMtx \\ - $args \\ - -o ${prefix}_alevin_results - - gzip -cdf ${whitelist} > ${prefix}_alevin_results/alevin/whitelist.txt + # export required var + export ALEVIN_FRY_HOME=. + + # prep simpleaf + simpleaf set-paths + + # run simpleaf quant + gzip -dcf $whitelist > whitelist.txt + simpleaf quant \\ + -1 ${forward.join( "," )} \\ + -2 ${reverse.join( "," )} \\ + -i ${index} \\ + -o ${prefix}_alevin_results \\ + -m $transcript_tsv \\ + -t $task.cpus \\ + -c $protocol \\ + -u whitelist.txt \\ + $args + + mv whitelist.txt ${prefix}_alevin_results/ cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/modules/salmon/quant/main.nf b/modules/nf-core/modules/salmon/quant/main.nf deleted file mode 100644 index bd4792c5..00000000 --- a/modules/nf-core/modules/salmon/quant/main.nf +++ /dev/null @@ -1,72 +0,0 @@ -process SALMON_QUANT { - tag "$meta.id" - label "process_medium" - - conda (params.enable_conda ? 'bioconda::salmon=1.5.2' : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/salmon:1.5.2--h84f40af_0' : - 'quay.io/biocontainers/salmon:1.5.2--h84f40af_0' }" - - input: - tuple val(meta), path(reads) - path index - path gtf - path transcript_fasta - val alignment_mode - val lib_type - - output: - tuple val(meta), path("${prefix}"), emit: results - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - prefix = task.ext.prefix ?: "${meta.id}" - - def reference = "--index $index" - def input_reads = meta.single_end ? "-r $reads" : "-1 ${reads[0]} -2 ${reads[1]}" - if (alignment_mode) { - reference = "-t $transcript_fasta" - input_reads = "-a $reads" - } - - def strandedness_opts = [ - 'A', 'U', 'SF', 'SR', - 'IS', 'IU' , 'ISF', 'ISR', - 'OS', 'OU' , 'OSF', 'OSR', - 'MS', 'MU' , 'MSF', 'MSR' - ] - def strandedness = 'A' - if (lib_type) { - if (strandedness_opts.contains(lib_type)) { - strandedness = lib_type - } else { - log.info "[Salmon Quant] Invalid library type specified '--libType=${lib_type}', defaulting to auto-detection with '--libType=A'." - } - } else { - strandedness = meta.single_end ? 'U' : 'IU' - if (meta.strandedness == 'forward') { - strandedness = meta.single_end ? 'SF' : 'ISF' - } else if (meta.strandedness == 'reverse') { - strandedness = meta.single_end ? 'SR' : 'ISR' - } - } - """ - salmon quant \\ - --geneMap $gtf \\ - --threads $task.cpus \\ - --libType=$strandedness \\ - $reference \\ - $input_reads \\ - $args \\ - -o $prefix - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - salmon: \$(echo \$(salmon --version) | sed -e "s/salmon //g") - END_VERSIONS - """ -} diff --git a/modules/nf-core/modules/salmon/quant/meta.yml b/modules/nf-core/modules/salmon/quant/meta.yml deleted file mode 100644 index 109109d8..00000000 --- a/modules/nf-core/modules/salmon/quant/meta.yml +++ /dev/null @@ -1,56 +0,0 @@ -name: salmon_quant -description: gene/transcript quantification with Salmon -keywords: - - index - - fasta - - genome - - reference -tools: - - salmon: - description: | - Salmon is a tool for wicked-fast transcript quantification from RNA-seq data - homepage: https://salmon.readthedocs.io/en/latest/salmon.html - manual: https://salmon.readthedocs.io/en/latest/salmon.html - doi: 10.1038/nmeth.4197 - licence: ["GPL-3.0-or-later"] -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - reads: - type: file - description: | - List of input FastQ files of size 1 and 2 for single-end and paired-end data, - respectively. - - index: - type: directory - description: Folder containing the star index files - - gtf: - type: file - description: GTF of the reference transcriptome - - transcriptome_fasta: - type: file - description: Fasta file of the reference transcriptome - - alignment_mode: - type: boolean - description: whether to run salmon in alignment mode - - lib_type: - type: string - description: | - Override library type inferred based on strandedness defined in meta object - -output: - - sample_output: - type: directory - description: Folder containing the quantification results for a specific sample - pattern: "${prefix}" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - -authors: - - "@kevinmenden" - - "@drpatelh" diff --git a/subworkflows/local/alevin.nf b/subworkflows/local/alevin.nf index 92198123..49874e2f 100644 --- a/subworkflows/local/alevin.nf +++ b/subworkflows/local/alevin.nf @@ -1,8 +1,8 @@ /* -- IMPORT LOCAL MODULES/SUBWORKFLOWS -- */ include { GFFREAD_TRANSCRIPTOME } from '../../modules/local/gffread_transcriptome' -include { SALMON_ALEVIN } from '../../modules/local/salmon_alevin' include { ALEVINQC } from '../../modules/local/alevinqc' include { SIMPLEAF_INDEX } from '../../modules/local/simpleaf_index' +include { SIMPLEAF_QUANT } from '../../modules/local/simpleaf_quant' /* -- IMPORT NF-CORE MODULES/SUBWORKFLOWS -- */ include { GUNZIP } from '../../modules/nf-core/modules/gunzip/main' @@ -41,6 +41,7 @@ workflow SCRNASEQ_ALEVIN { if (!salmon_index) { SIMPLEAF_INDEX( genome_fasta, transcript_fasta, gtf ) salmon_index = SIMPLEAF_INDEX.out.index.collect() + transcript_tsv = SIMPLEAF_INDEX.out.transcript_tsv.collect() ch_versions = ch_versions.mix(SIMPLEAF_INDEX.out.versions) } @@ -57,25 +58,26 @@ workflow SCRNASEQ_ALEVIN { /* * Perform quantification with salmon alevin */ - SALMON_ALEVIN ( + SIMPLEAF_QUANT ( ch_fastq, salmon_index, + transcript_tsv, txp2gene, protocol, barcode_whitelist ) - ch_versions = ch_versions.mix(SALMON_ALEVIN.out.versions) + ch_versions = ch_versions.mix(SIMPLEAF_QUANT.out.versions) /* * Run alevinQC */ - ALEVINQC( SALMON_ALEVIN.out.alevin_results ) + ALEVINQC( SIMPLEAF_QUANT.out.alevin_results ) ch_versions = ch_versions.mix(ALEVINQC.out.versions) emit: ch_versions - alevin_results = SALMON_ALEVIN.out.alevin_results + alevin_results = SIMPLEAF_QUANT.out.alevin_results alevinqc = ALEVINQC.out.report - for_multiqc = SALMON_ALEVIN.out.alevin_results.collect{it[1]}.ifEmpty([]) + for_multiqc = SIMPLEAF_QUANT.out.alevin_results.collect{it[1]}.ifEmpty([]) } From b5234c01af98e650ffa4f6a96e080b18f4987f4d Mon Sep 17 00:00:00 2001 From: Felipe Marques de Almeida Date: Wed, 24 Aug 2022 07:20:33 +0200 Subject: [PATCH 096/165] update simpleaf versions --- modules/local/simpleaf_index.nf | 4 ++-- modules/local/simpleaf_quant.nf | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/modules/local/simpleaf_index.nf b/modules/local/simpleaf_index.nf index 110f3464..853f1f95 100644 --- a/modules/local/simpleaf_index.nf +++ b/modules/local/simpleaf_index.nf @@ -4,8 +4,8 @@ process SIMPLEAF_INDEX { conda (params.enable_conda ? 'bioconda::simpleaf=0.4.0' : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/simpleaf:0.4.0--h9f5acd7_0' : - 'quay.io/biocontainers/simpleaf:0.4.0--h9f5acd7_0' }" + 'https://depot.galaxyproject.org/singularity/simpleaf:0.5.1--h9f5acd7_0' : + 'quay.io/biocontainers/simpleaf:0.5.1--h9f5acd7_0' }" input: path genome_fasta diff --git a/modules/local/simpleaf_quant.nf b/modules/local/simpleaf_quant.nf index ad97518f..6d408fcc 100644 --- a/modules/local/simpleaf_quant.nf +++ b/modules/local/simpleaf_quant.nf @@ -4,8 +4,8 @@ process SIMPLEAF_QUANT { conda (params.enable_conda ? 'bioconda::simpleaf=0.4.0' : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/simpleaf:0.4.0--h9f5acd7_0' : - 'quay.io/biocontainers/simpleaf:0.4.0--h9f5acd7_0' }" + 'https://depot.galaxyproject.org/singularity/simpleaf:0.5.1--h9f5acd7_0' : + 'quay.io/biocontainers/simpleaf:0.5.1--h9f5acd7_0' }" input: // From 4d3c5c520321a44775d8e30ac82e2db483284f4a Mon Sep 17 00:00:00 2001 From: Felipe Marques de Almeida Date: Mon, 29 Aug 2022 11:02:40 +0200 Subject: [PATCH 097/165] update how txp2gene file is loaded --- modules/local/simpleaf_quant.nf | 3 +-- subworkflows/local/alevin.nf | 16 ++++++---------- 2 files changed, 7 insertions(+), 12 deletions(-) diff --git a/modules/local/simpleaf_quant.nf b/modules/local/simpleaf_quant.nf index 6d408fcc..7f14048e 100644 --- a/modules/local/simpleaf_quant.nf +++ b/modules/local/simpleaf_quant.nf @@ -14,7 +14,6 @@ process SIMPLEAF_QUANT { // tuple val(meta), path(reads) path index - path transcript_tsv path txp2gene val protocol path whitelist @@ -43,7 +42,7 @@ process SIMPLEAF_QUANT { -2 ${reverse.join( "," )} \\ -i ${index} \\ -o ${prefix}_alevin_results \\ - -m $transcript_tsv \\ + -m $txp2gene \\ -t $task.cpus \\ -c $protocol \\ -u whitelist.txt \\ diff --git a/subworkflows/local/alevin.nf b/subworkflows/local/alevin.nf index 49874e2f..7db784ff 100644 --- a/subworkflows/local/alevin.nf +++ b/subworkflows/local/alevin.nf @@ -31,8 +31,10 @@ workflow SCRNASEQ_ALEVIN { """Must provide a genome fasta file ('--genome_fasta') and a gtf file ('--gtf'), or a genome fasta file and a transcriptome fasta file ('--transcript_fasta`) if no index is given!""".stripIndent() - assert txp2gene || gtf: - "Must provide either a GTF file ('--gtf') or kallisto gene map ('--kallisto_gene_map') to align with kallisto bustools!" + if (transcript_fasta) { + assert txp2gene: + "Since a built transcript was provided ('--transcript_fasta'), must also provide a simpleaf gene map ('--txp2gene') to use with simpleaf quant!" + } /* @@ -46,14 +48,9 @@ workflow SCRNASEQ_ALEVIN { } /* - * Build txp2gene map + * Select txp2gene map */ - if (!txp2gene){ - GFFREAD_TXP2GENE( gtf ) - txp2gene = GFFREAD_TXP2GENE.out.gtf - // Only collect version if not already done for gffread - ch_versions = ch_versions.mix(GFFREAD_TXP2GENE.out.versions) - } + if (!txp2gene) { txp2gene = SIMPLEAF_INDEX.out.transcript_tsv } /* * Perform quantification with salmon alevin @@ -61,7 +58,6 @@ workflow SCRNASEQ_ALEVIN { SIMPLEAF_QUANT ( ch_fastq, salmon_index, - transcript_tsv, txp2gene, protocol, barcode_whitelist From 351c56cfbe8ca1237505f818251d45db1c3c9a56 Mon Sep 17 00:00:00 2001 From: nf-core-bot Date: Tue, 30 Aug 2022 13:40:26 +0000 Subject: [PATCH 098/165] Template update for nf-core/tools version 2.5 --- .editorconfig | 2 +- .github/PULL_REQUEST_TEMPLATE.md | 3 +- .github/workflows/ci.yml | 23 ++------ .github/workflows/linting.yml | 38 +++++++++++-- CHANGELOG.md | 2 +- CITATION.cff | 56 +++++++++++++++++++ README.md | 21 +++---- assets/email_template.txt | 1 - bin/check_samplesheet.py | 41 +++++++------- conf/base.config | 5 ++ docs/usage.md | 12 ++-- lib/WorkflowMain.groovy | 9 ++- lib/WorkflowScrnaseq.groovy | 5 +- main.nf | 2 +- modules.json | 22 +++++--- .../templates/dumpsoftwareversions.py | 14 +++-- nextflow.config | 23 +++++++- 17 files changed, 186 insertions(+), 93 deletions(-) create mode 100644 CITATION.cff diff --git a/.editorconfig b/.editorconfig index b6b31907..b78de6e6 100644 --- a/.editorconfig +++ b/.editorconfig @@ -8,7 +8,7 @@ trim_trailing_whitespace = true indent_size = 4 indent_style = space -[*.{md,yml,yaml,html,css,scss,js}] +[*.{md,yml,yaml,html,css,scss,js,cff}] indent_size = 2 # These files are edited and tested upstream in nf-core/modules diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 4b79f9bf..9d907223 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -15,8 +15,7 @@ Learn more about contributing: [CONTRIBUTING.md](https://github.com/nf-core/scrn - [ ] This comment contains a description of changes (with reason). - [ ] If you've fixed a bug or added code that should be tested, add tests! - - [ ] If you've added a new tool - have you followed the pipeline conventions in the [contribution docs](https://github.com/nf-core/scrnaseq/tree/master/.github/CONTRIBUTING.md) - - [ ] If necessary, also make a PR on the nf-core/scrnaseq _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository. +- [ ] If you've added a new tool - have you followed the pipeline conventions in the [contribution docs](https://github.com/nf-core/scrnaseq/tree/master/.github/CONTRIBUTING.md)- [ ] If necessary, also make a PR on the nf-core/scrnaseq _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository. - [ ] Make sure your code lints (`nf-core lint`). - [ ] Ensure the test suite passes (`nextflow run . -profile test,docker --outdir `). - [ ] Usage Documentation in `docs/usage.md` is updated. diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 03c26dea..3afad6a3 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -10,7 +10,6 @@ on: env: NXF_ANSI_LOG: false - CAPSULE_LOG: none jobs: test: @@ -20,27 +19,17 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - # Nextflow versions - include: - # Test pipeline minimum Nextflow version - - NXF_VER: "21.10.3" - NXF_EDGE: "" - # Test latest edge release of Nextflow - - NXF_VER: "" - NXF_EDGE: "1" + NXF_VER: + - "21.10.3" + - "latest-everything" steps: - name: Check out pipeline code uses: actions/checkout@v2 - name: Install Nextflow - env: - NXF_VER: ${{ matrix.NXF_VER }} - # Uncomment only if the edge release is more recent than the latest stable release - # See https://github.com/nextflow-io/nextflow/issues/2467 - # NXF_EDGE: ${{ matrix.NXF_EDGE }} - run: | - wget -qO- get.nextflow.io | bash - sudo mv nextflow /usr/local/bin/ + uses: nf-core/setup-nextflow@v1 + with: + version: "${{ matrix.NXF_VER }}" - name: Run pipeline with test data # TODO nf-core: You can customise CI pipeline run tests as required diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index 77358dee..8a5ce69b 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -35,6 +35,36 @@ jobs: - name: Run Prettier --check run: prettier --check ${GITHUB_WORKSPACE} + PythonBlack: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + + - name: Check code lints with Black + uses: psf/black@stable + + # If the above check failed, post a comment on the PR explaining the failure + - name: Post PR comment + if: failure() + uses: mshick/add-pr-comment@v1 + with: + message: | + ## Python linting (`black`) is failing + + To keep the code consistent with lots of contributors, we run automated code consistency checks. + To fix this CI test, please run: + + * Install [`black`](https://black.readthedocs.io/en/stable/): `pip install black` + * Fix formatting errors in your pipeline: `black .` + + Once you push these changes the test should pass, and you can hide this comment :+1: + + We highly recommend setting up Black in your code editor so that this formatting is done automatically on save. Ask about it on Slack for help! + + Thanks again for your contribution! + repo-token: ${{ secrets.GITHUB_TOKEN }} + allow-repeats: false + nf-core: runs-on: ubuntu-latest steps: @@ -42,15 +72,11 @@ jobs: uses: actions/checkout@v2 - name: Install Nextflow - env: - CAPSULE_LOG: none - run: | - wget -qO- get.nextflow.io | bash - sudo mv nextflow /usr/local/bin/ + uses: nf-core/setup-nextflow@v1 - uses: actions/setup-python@v3 with: - python-version: "3.6" + python-version: "3.7" architecture: "x64" - name: Install dependencies diff --git a/CHANGELOG.md b/CHANGELOG.md index 7fff64fc..af3ca32a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## v2.0dev - [date] +## v2.0.1dev - [date] Initial release of nf-core/scrnaseq, created with the [nf-core](https://nf-co.re/) template. diff --git a/CITATION.cff b/CITATION.cff new file mode 100644 index 00000000..4533e2f2 --- /dev/null +++ b/CITATION.cff @@ -0,0 +1,56 @@ +cff-version: 1.2.0 +message: "If you use `nf-core tools` in your work, please cite the `nf-core` publication" +authors: + - family-names: Ewels + given-names: Philip + - family-names: Peltzer + given-names: Alexander + - family-names: Fillinger + given-names: Sven + - family-names: Patel + given-names: Harshil + - family-names: Alneberg + given-names: Johannes + - family-names: Wilm + given-names: Andreas + - family-names: Ulysse Garcia + given-names: Maxime + - family-names: Di Tommaso + given-names: Paolo + - family-names: Nahnsen + given-names: Sven +title: "The nf-core framework for community-curated bioinformatics pipelines." +version: 2.4.1 +doi: 10.1038/s41587-020-0439-x +date-released: 2022-05-16 +url: https://github.com/nf-core/tools +prefered-citation: + type: article + authors: + - family-names: Ewels + given-names: Philip + - family-names: Peltzer + given-names: Alexander + - family-names: Fillinger + given-names: Sven + - family-names: Patel + given-names: Harshil + - family-names: Alneberg + given-names: Johannes + - family-names: Wilm + given-names: Andreas + - family-names: Ulysse Garcia + given-names: Maxime + - family-names: Di Tommaso + given-names: Paolo + - family-names: Nahnsen + given-names: Sven + doi: 10.1038/s41587-020-0439-x + journal: nature biotechnology + start: 276 + end: 278 + title: "The nf-core framework for community-curated bioinformatics pipelines." + issue: 3 + volume: 38 + year: 2020 + url: https://dx.doi.org/10.1038/s41587-020-0439-x diff --git a/README.md b/README.md index e0861259..804539ce 100644 --- a/README.md +++ b/README.md @@ -1,19 +1,14 @@ # ![nf-core/scrnaseq](docs/images/nf-core-scrnaseq_logo_light.png#gh-light-mode-only) ![nf-core/scrnaseq](docs/images/nf-core-scrnaseq_logo_dark.png#gh-dark-mode-only) -[![GitHub Actions CI Status](https://github.com/nf-core/scrnaseq/workflows/nf-core%20CI/badge.svg)](https://github.com/nf-core/scrnaseq/actions?query=workflow%3A%22nf-core+CI%22) -[![GitHub Actions Linting Status](https://github.com/nf-core/scrnaseq/workflows/nf-core%20linting/badge.svg)](https://github.com/nf-core/scrnaseq/actions?query=workflow%3A%22nf-core+linting%22) -[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?logo=Amazon%20AWS)](https://nf-co.re/scrnaseq/results) -[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8)](https://doi.org/10.5281/zenodo.XXXXXXX) +[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/scrnaseq/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX) [![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A521.10.3-23aa62.svg)](https://www.nextflow.io/) -[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?logo=anaconda)](https://docs.conda.io/en/latest/) -[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?logo=docker)](https://www.docker.com/) -[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg)](https://sylabs.io/docs/) +[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/) +[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/) +[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/) [![Launch on Nextflow Tower](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Nextflow%20Tower-%234256e7)](https://tower.nf/launch?pipeline=https://github.com/nf-core/scrnaseq) -[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23scrnaseq-4A154B?logo=slack)](https://nfcore.slack.com/channels/scrnaseq) -[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?logo=twitter)](https://twitter.com/nf_core) -[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?logo=youtube)](https://www.youtube.com/c/nf-core) +[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23scrnaseq-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/scrnaseq)[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?labelColor=000000&logo=twitter)](https://twitter.com/nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core) ## Introduction @@ -25,7 +20,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool -On release, automated continuous integration tests run the pipeline on a full-sized dataset on the AWS cloud infrastructure. This ensures that the pipeline runs on AWS, has sensible resource allocation defaults set to run on real-world datasets, and permits the persistent storage of results to benchmark between pipeline releases and other analysis sources. The results obtained from the full-sized test can be viewed on the [nf-core website](https://nf-co.re/scrnaseq/results). +On release, automated continuous integration tests run the pipeline on a full-sized dataset on the AWS cloud infrastructure. This ensures that the pipeline runs on AWS, has sensible resource allocation defaults set to run on real-world datasets, and permits the persistent storage of results to benchmark between pipeline releases and other analysis sources.The results obtained from the full-sized test can be viewed on the [nf-core website](https://nf-co.re/scrnaseq/results). ## Pipeline summary @@ -42,7 +37,7 @@ On release, automated continuous integration tests run the pipeline on a full-si 3. Download the pipeline and test it on a minimal dataset with a single command: - ```console + ```bash nextflow run nf-core/scrnaseq -profile test,YOURPROFILE --outdir ``` @@ -57,7 +52,7 @@ On release, automated continuous integration tests run the pipeline on a full-si - ```console + ```bash nextflow run nf-core/scrnaseq --input samplesheet.csv --outdir --genome GRCh37 -profile ``` diff --git a/assets/email_template.txt b/assets/email_template.txt index 8513ca81..9065d5c2 100644 --- a/assets/email_template.txt +++ b/assets/email_template.txt @@ -6,7 +6,6 @@ `._,._,' nf-core/scrnaseq v${version} ---------------------------------------------------- - Run Name: $runName <% if (success){ diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py index 3652c63c..9a8b8962 100755 --- a/bin/check_samplesheet.py +++ b/bin/check_samplesheet.py @@ -11,7 +11,6 @@ from collections import Counter from pathlib import Path - logger = logging.getLogger() @@ -79,13 +78,15 @@ def validate_and_transform(self, row): def _validate_sample(self, row): """Assert that the sample name exists and convert spaces to underscores.""" - assert len(row[self._sample_col]) > 0, "Sample input is required." + if len(row[self._sample_col]) <= 0: + raise AssertionError("Sample input is required.") # Sanitize samples slightly. row[self._sample_col] = row[self._sample_col].replace(" ", "_") def _validate_first(self, row): """Assert that the first FASTQ entry is non-empty and has the right format.""" - assert len(row[self._first_col]) > 0, "At least the first FASTQ file is required." + if len(row[self._first_col]) <= 0: + raise AssertionError("At least the first FASTQ file is required.") self._validate_fastq_format(row[self._first_col]) def _validate_second(self, row): @@ -97,36 +98,34 @@ def _validate_pair(self, row): """Assert that read pairs have the same file extension. Report pair status.""" if row[self._first_col] and row[self._second_col]: row[self._single_col] = False - assert ( - Path(row[self._first_col]).suffixes[-2:] == Path(row[self._second_col]).suffixes[-2:] - ), "FASTQ pairs must have the same file extensions." + if Path(row[self._first_col]).suffixes[-2:] != Path(row[self._second_col]).suffixes[-2:]: + raise AssertionError("FASTQ pairs must have the same file extensions.") else: row[self._single_col] = True def _validate_fastq_format(self, filename): """Assert that a given filename has one of the expected FASTQ extensions.""" - assert any(filename.endswith(extension) for extension in self.VALID_FORMATS), ( - f"The FASTQ file has an unrecognized extension: {filename}\n" - f"It should be one of: {', '.join(self.VALID_FORMATS)}" - ) + if not any(filename.endswith(extension) for extension in self.VALID_FORMATS): + raise AssertionError( + f"The FASTQ file has an unrecognized extension: {filename}\n" + f"It should be one of: {', '.join(self.VALID_FORMATS)}" + ) def validate_unique_samples(self): """ Assert that the combination of sample name and FASTQ filename is unique. - In addition to the validation, also rename the sample if more than one sample, - FASTQ file combination exists. + In addition to the validation, also rename all samples to have a suffix of _T{n}, where n is the + number of times the same sample exist, but with different FASTQ files, e.g., multiple runs per experiment. """ - assert len(self._seen) == len(self.modified), "The pair of sample name and FASTQ must be unique." - if len({pair[0] for pair in self._seen}) < len(self._seen): - counts = Counter(pair[0] for pair in self._seen) - seen = Counter() - for row in self.modified: - sample = row[self._sample_col] - seen[sample] += 1 - if counts[sample] > 1: - row[self._sample_col] = f"{sample}_T{seen[sample]}" + if len(self._seen) != len(self.modified): + raise AssertionError("The pair of sample name and FASTQ must be unique.") + seen = Counter() + for row in self.modified: + sample = row[self._sample_col] + seen[sample] += 1 + row[self._sample_col] = f"{sample}_T{seen[sample]}" def read_head(handle, num_lines=10): diff --git a/conf/base.config b/conf/base.config index e18cde2e..938227e2 100644 --- a/conf/base.config +++ b/conf/base.config @@ -26,6 +26,11 @@ process { // adding in your local modules too. // TODO nf-core: Customise requirements for specific processes. // See https://www.nextflow.io/docs/latest/config.html#config-process-selectors + withLabel:process_single { + cpus = { check_max( 1 , 'cpus' ) } + memory = { check_max( 6.GB * task.attempt, 'memory' ) } + time = { check_max( 4.h * task.attempt, 'time' ) } + } withLabel:process_low { cpus = { check_max( 2 * task.attempt, 'cpus' ) } memory = { check_max( 12.GB * task.attempt, 'memory' ) } diff --git a/docs/usage.md b/docs/usage.md index 60c6a74c..c1b85e7a 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -12,7 +12,7 @@ You will need to create a samplesheet with information about the samples you would like to analyse before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row as shown in the examples below. -```console +```bash --input '[path to samplesheet file]' ``` @@ -56,7 +56,7 @@ An [example samplesheet](../assets/samplesheet.csv) has been provided with the p The typical command for running the pipeline is as follows: -```console +```bash nextflow run nf-core/scrnaseq --input samplesheet.csv --outdir --genome GRCh37 -profile docker ``` @@ -64,9 +64,9 @@ This will launch the pipeline with the `docker` configuration profile. See below Note that the pipeline will create the following files in your working directory: -```console +```bash work # Directory containing the nextflow working files - # Finished results in specified location (defined with --outdir) + # Finished results in specified location (defined with --outdir) .nextflow_log # Log file from Nextflow # Other nextflow hidden files, eg. history of pipeline runs and old logs. ``` @@ -75,7 +75,7 @@ work # Directory containing the nextflow working files When you run the above command, Nextflow automatically pulls the pipeline code from GitHub and stores it as a cached version. When running the pipeline after this, it will always use the cached version if available - even if the pipeline has been updated since. To make sure that you're running the latest version of the pipeline, make sure that you regularly update the cached version of the pipeline: -```console +```bash nextflow pull nf-core/scrnaseq ``` @@ -251,6 +251,6 @@ Some HPC setups also allow you to run nextflow within a cluster job submitted yo In some cases, the Nextflow Java virtual machines can start to request a large amount of memory. We recommend adding the following line to your environment to limit this (typically in `~/.bashrc` or `~./bash_profile`): -```console +```bash NXF_OPTS='-Xms1g -Xmx4g' ``` diff --git a/lib/WorkflowMain.groovy b/lib/WorkflowMain.groovy index 0569eece..b8c1f19d 100755 --- a/lib/WorkflowMain.groovy +++ b/lib/WorkflowMain.groovy @@ -59,6 +59,7 @@ class WorkflowMain { } // Print parameter summary log to screen + log.info paramsSummaryLog(workflow, params, log) // Check that a -profile or Nextflow config has been provided to run the pipeline @@ -78,17 +79,15 @@ class WorkflowMain { System.exit(1) } } - // // Get attribute from genome config file e.g. fasta // - public static String getGenomeAttribute(params, attribute) { - def val = '' + public static Object getGenomeAttribute(params, attribute) { if (params.genomes && params.genome && params.genomes.containsKey(params.genome)) { if (params.genomes[ params.genome ].containsKey(attribute)) { - val = params.genomes[ params.genome ][ attribute ] + return params.genomes[ params.genome ][ attribute ] } } - return val + return null } } diff --git a/lib/WorkflowScrnaseq.groovy b/lib/WorkflowScrnaseq.groovy index 290990ae..db31a702 100755 --- a/lib/WorkflowScrnaseq.groovy +++ b/lib/WorkflowScrnaseq.groovy @@ -10,6 +10,7 @@ class WorkflowScrnaseq { public static void initialise(params, log) { genomeExistsError(params, log) + if (!params.fasta) { log.error "Genome fasta file not specified with e.g. '--fasta genome.fa' or via a detectable config file." System.exit(1) @@ -41,9 +42,7 @@ class WorkflowScrnaseq { yaml_file_text += "data: |\n" yaml_file_text += "${summary_section}" return yaml_file_text - } - - // + }// // Exit pipeline if incorrect --genome key provided // private static void genomeExistsError(params, log) { diff --git a/main.nf b/main.nf index 4d1eb0ea..c2fe7ccf 100644 --- a/main.nf +++ b/main.nf @@ -4,7 +4,7 @@ nf-core/scrnaseq ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Github : https://github.com/nf-core/scrnaseq - Website: https://nf-co.re/scrnaseq +Website: https://nf-co.re/scrnaseq Slack : https://nfcore.slack.com/channels/scrnaseq ---------------------------------------------------------------------------------------- */ diff --git a/modules.json b/modules.json index 5bd4d31a..a865b2cd 100644 --- a/modules.json +++ b/modules.json @@ -3,14 +3,20 @@ "homePage": "https://github.com/nf-core/scrnaseq", "repos": { "nf-core/modules": { - "custom/dumpsoftwareversions": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" - }, - "fastqc": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" - }, - "multiqc": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + "git_url": "https://github.com/nf-core/modules.git", + "modules": { + "custom/dumpsoftwareversions": { + "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d", + "branch": "master" + }, + "fastqc": { + "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d", + "branch": "master" + }, + "multiqc": { + "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d", + "branch": "master" + } } } } diff --git a/modules/nf-core/modules/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py b/modules/nf-core/modules/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py index d1390392..787bdb7b 100644 --- a/modules/nf-core/modules/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py +++ b/modules/nf-core/modules/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py @@ -1,9 +1,10 @@ #!/usr/bin/env python -import yaml import platform from textwrap import dedent +import yaml + def _make_versions_html(versions): html = [ @@ -58,11 +59,12 @@ def _make_versions_html(versions): for process, process_versions in versions_by_process.items(): module = process.split(":")[-1] try: - assert versions_by_module[module] == process_versions, ( - "We assume that software versions are the same between all modules. " - "If you see this error-message it means you discovered an edge-case " - "and should open an issue in nf-core/tools. " - ) + if versions_by_module[module] != process_versions: + raise AssertionError( + "We assume that software versions are the same between all modules. " + "If you see this error-message it means you discovered an edge-case " + "and should open an issue in nf-core/tools. " + ) except KeyError: versions_by_module[module] = process_versions diff --git a/nextflow.config b/nextflow.config index 74187da1..a6e37e2a 100644 --- a/nextflow.config +++ b/nextflow.config @@ -13,11 +13,11 @@ params { // Input options input = null + // References genome = null igenomes_base = 's3://ngi-igenomes/igenomes' igenomes_ignore = false - // MultiQC options multiqc_config = null multiqc_title = null @@ -37,6 +37,7 @@ params { schema_ignore_params = 'genomes' enable_conda = false + // Config options custom_config_version = 'master' custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" @@ -45,6 +46,7 @@ params { config_profile_url = null config_profile_name = null + // Max resource options // Defaults only, expecting to be overwritten max_memory = '128.GB' @@ -72,6 +74,7 @@ try { // } + profiles { debug { process.beforeScript = 'echo $HOSTNAME' } conda { @@ -82,6 +85,15 @@ profiles { shifter.enabled = false charliecloud.enabled = false } + mamba { + params.enable_conda = true + conda.useMamba = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } docker { docker.enabled = true docker.userEmulation = true @@ -119,10 +131,16 @@ profiles { podman.enabled = false shifter.enabled = false } + gitpod { + executor.name = 'local' + executor.cpus = 16 + executor.memory = 60.GB + } test { includeConfig 'conf/test.config' } test_full { includeConfig 'conf/test_full.config' } } + // Load igenomes.config if required if (!params.igenomes_ignore) { includeConfig 'conf/igenomes.config' @@ -130,6 +148,7 @@ if (!params.igenomes_ignore) { params.genomes = [:] } + // Export these variables to prevent local Python/R libraries from conflicting with those in the container // The JULIA depot path has been adjusted to a fixed path `/usr/local/share/julia` that needs to be used for packages in the container. // See https://apeltzer.github.io/post/03-julia-lang-nextflow/ for details on that. Once we have a common agreement on where to keep Julia packages, this is adjustable. @@ -169,7 +188,7 @@ manifest { description = 'Pipeline for processing of 10xGenomics single cell rnaseq data' mainScript = 'main.nf' nextflowVersion = '!>=21.10.3' - version = '2.0dev' + version = '2.0.1dev' } // Load modules.config for DSL2 module specific options From 08b26fb87a91879e019b0e13c96f21806f55bdc7 Mon Sep 17 00:00:00 2001 From: nf-core-bot Date: Thu, 1 Sep 2022 13:32:32 +0000 Subject: [PATCH 099/165] Template update for nf-core/tools version 2.5.1 --- bin/check_samplesheet.py | 9 ++++++--- pyproject.toml | 10 ++++++++++ 2 files changed, 16 insertions(+), 3 deletions(-) create mode 100644 pyproject.toml diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py index 9a8b8962..11b15572 100755 --- a/bin/check_samplesheet.py +++ b/bin/check_samplesheet.py @@ -98,7 +98,9 @@ def _validate_pair(self, row): """Assert that read pairs have the same file extension. Report pair status.""" if row[self._first_col] and row[self._second_col]: row[self._single_col] = False - if Path(row[self._first_col]).suffixes[-2:] != Path(row[self._second_col]).suffixes[-2:]: + first_col_suffix = Path(row[self._first_col]).suffixes[-2:] + second_col_suffix = Path(row[self._second_col]).suffixes[-2:] + if first_col_suffix != second_col_suffix: raise AssertionError("FASTQ pairs must have the same file extensions.") else: row[self._single_col] = True @@ -157,7 +159,7 @@ def sniff_format(handle): handle.seek(0) sniffer = csv.Sniffer() if not sniffer.has_header(peek): - logger.critical(f"The given sample sheet does not appear to contain a header.") + logger.critical("The given sample sheet does not appear to contain a header.") sys.exit(1) dialect = sniffer.sniff(peek) return dialect @@ -195,7 +197,8 @@ def check_samplesheet(file_in, file_out): reader = csv.DictReader(in_handle, dialect=sniff_format(in_handle)) # Validate the existence of the expected header columns. if not required_columns.issubset(reader.fieldnames): - logger.critical(f"The sample sheet **must** contain the column headers: {', '.join(required_columns)}.") + req_cols = ", ".join(required_columns) + logger.critical(f"The sample sheet **must** contain these column headers: {req_cols}.") sys.exit(1) # Validate each row. checker = RowChecker() diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 00000000..0d62beb6 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,10 @@ +# Config file for Python. Mostly used to configure linting of bin/check_samplesheet.py with Black. +# Should be kept the same as nf-core/tools to avoid fighting with template synchronisation. +[tool.black] +line-length = 120 +target_version = ["py37", "py38", "py39", "py310"] + +[tool.isort] +profile = "black" +known_first_party = ["nf_core"] +multi_line_output = 3 From 4d7cbef8c59dbcb2f7b2ab6db776c6f13550369f Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Mon, 12 Sep 2022 13:58:31 +0100 Subject: [PATCH 100/165] Recreate modules.json --- modules.json | 92 ++++++++++++++++++++++++++++++---------------------- 1 file changed, 54 insertions(+), 38 deletions(-) diff --git a/modules.json b/modules.json index 5468da87..0b04f799 100644 --- a/modules.json +++ b/modules.json @@ -3,44 +3,60 @@ "homePage": "https://github.com/nf-core/scrnaseq", "repos": { "nf-core/modules": { - "cellranger/count": { - "git_sha": "e1a3ae6bf5afce6b23f580e3225e7bdbe1388cd0" - }, - "cellranger/mkgtf": { - "git_sha": "e499abe572bc7c1322dfa66b767b0e65fe8c62ca" - }, - "cellranger/mkref": { - "git_sha": "61d87bb2bcbcd058e1af6e5f14bb821b8ae4ed3e" - }, - "custom/dumpsoftwareversions": { - "git_sha": "682f789f93070bd047868300dd018faf3d434e7c" - }, - "fastqc": { - "git_sha": "49b18b1639f4f7104187058866a8fab33332bdfe" - }, - "gffread": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" - }, - "gunzip": { - "git_sha": "fa37e0662690c4ec4260dae282fbce08777503e6" - }, - "kallistobustools/count": { - "git_sha": "013035eb5c80c9e3f37f2c89c92a1ae7925df8ea" - }, - "kallistobustools/ref": { - "git_sha": "0f1e736212e5ae37036ac7e32b225087a8a9b154" - }, - "multiqc": { - "git_sha": "5138acca0985ca01c38a1c4fba917d83772b1106" - }, - "salmon/index": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" - }, - "salmon/quant": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" - }, - "star/genomegenerate": { - "git_sha": "fb6c7bca3d55c19a793372513395e3a567bdd7ba" + "git_url": "https://github.com/nf-core/modules.git", + "modules": { + "cellranger/count": { + "branch": "master", + "git_sha": "e1a3ae6bf5afce6b23f580e3225e7bdbe1388cd0" + }, + "cellranger/mkgtf": { + "branch": "master", + "git_sha": "e499abe572bc7c1322dfa66b767b0e65fe8c62ca" + }, + "cellranger/mkref": { + "branch": "master", + "git_sha": "61d87bb2bcbcd058e1af6e5f14bb821b8ae4ed3e" + }, + "custom/dumpsoftwareversions": { + "branch": "master", + "git_sha": "682f789f93070bd047868300dd018faf3d434e7c" + }, + "fastqc": { + "branch": "master", + "git_sha": "49b18b1639f4f7104187058866a8fab33332bdfe" + }, + "gffread": { + "branch": "master", + "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + }, + "gunzip": { + "branch": "master", + "git_sha": "fa37e0662690c4ec4260dae282fbce08777503e6" + }, + "kallistobustools/count": { + "branch": "master", + "git_sha": "013035eb5c80c9e3f37f2c89c92a1ae7925df8ea" + }, + "kallistobustools/ref": { + "branch": "master", + "git_sha": "0f1e736212e5ae37036ac7e32b225087a8a9b154" + }, + "multiqc": { + "branch": "master", + "git_sha": "5138acca0985ca01c38a1c4fba917d83772b1106" + }, + "salmon/index": { + "branch": "master", + "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + }, + "salmon/quant": { + "branch": "master", + "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + }, + "star/genomegenerate": { + "branch": "master", + "git_sha": "fb6c7bca3d55c19a793372513395e3a567bdd7ba" + } } } } From 65082e572224bd1d88ae6990bc0819f491b74b26 Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Mon, 12 Sep 2022 13:59:29 +0100 Subject: [PATCH 101/165] Fix linting --- assets/email_template.html | 2 +- bin/cellranger_mtx_to_h5ad.py | 11 +++--- bin/concat_h5ad.py | 15 ++++----- bin/mtx_to_h5ad.py | 14 ++++---- bin/t2g.py | 63 ++++++++++++++++++----------------- 5 files changed, 52 insertions(+), 53 deletions(-) diff --git a/assets/email_template.html b/assets/email_template.html index b5c9a7b9..2ff8db51 100644 --- a/assets/email_template.html +++ b/assets/email_template.html @@ -4,7 +4,7 @@ - + nf-core/scrnaseq Pipeline Report diff --git a/bin/cellranger_mtx_to_h5ad.py b/bin/cellranger_mtx_to_h5ad.py index e8eb5b23..84305fa3 100755 --- a/bin/cellranger_mtx_to_h5ad.py +++ b/bin/cellranger_mtx_to_h5ad.py @@ -2,7 +2,8 @@ import scanpy as sc import argparse -def mtx_to_adata( mtx_h5: str, sample: str, verbose: bool = False ): + +def mtx_to_adata(mtx_h5: str, sample: str, verbose: bool = False): if verbose: print("Reading in {}".format(mtx_h5)) @@ -19,10 +20,10 @@ def mtx_to_adata( mtx_h5: str, sample: str, verbose: bool = False ): parser = argparse.ArgumentParser(description="Converts mtx output to h5ad.") - parser.add_argument("-m", "--mtx", dest="mtx", help="Path to mtx h5 file." ) - parser.add_argument("-v", "--verbose", dest="verbose", help="Toggle verbose messages", default=False ) - parser.add_argument("-s", "--sample", dest="sample", help="Sample name" ) - parser.add_argument("-o", "--out", dest="out", help="Output path." ) + parser.add_argument("-m", "--mtx", dest="mtx", help="Path to mtx h5 file.") + parser.add_argument("-v", "--verbose", dest="verbose", help="Toggle verbose messages", default=False) + parser.add_argument("-s", "--sample", dest="sample", help="Sample name") + parser.add_argument("-o", "--out", dest="out", help="Output path.") args = vars(parser.parse_args()) diff --git a/bin/concat_h5ad.py b/bin/concat_h5ad.py index 29d0037a..9c40ec6f 100755 --- a/bin/concat_h5ad.py +++ b/bin/concat_h5ad.py @@ -3,6 +3,7 @@ from pathlib import Path import argparse + def read_samplesheet(samplesheet): df = pd.read_csv(samplesheet) df.set_index("sample") @@ -12,14 +13,15 @@ def read_samplesheet(samplesheet): # only keep unique values using set() df = df.groupby(["sample"]).agg(lambda column: ",".join(set(column))) - return(df) + return df + if __name__ == "__main__": parser = argparse.ArgumentParser(description="Concatenates h5ad files and merge metadata from samplesheet") - parser.add_argument("-i", "--input", dest="input", help="Path to samplesheet.csv") - parser.add_argument("-o", "--out", dest="out", help="Output path.") + parser.add_argument("-i", "--input", dest="input", help="Path to samplesheet.csv") + parser.add_argument("-o", "--out", dest="out", help="Output path.") parser.add_argument("-s", "--suffix", dest="suffix", help="Suffix of matrices to remove and get sample name") args = vars(parser.parse_args()) @@ -28,10 +30,7 @@ def read_samplesheet(samplesheet): df_samplesheet = read_samplesheet(args["input"]) # find all h5ad and append to dict - dict_of_h5ad = { - str(path).replace(args["suffix"], ""): sc.read_h5ad(path) - for path in Path(".").rglob('*.h5ad') - } + dict_of_h5ad = {str(path).replace(args["suffix"], ""): sc.read_h5ad(path) for path in Path(".").rglob("*.h5ad")} # concat h5ad files adata = ad.concat(dict_of_h5ad, label="sample", merge="unique", index_unique="_") @@ -40,4 +39,4 @@ def read_samplesheet(samplesheet): adata.obs = adata.obs.join(df_samplesheet, on="sample") adata.write_h5ad(args["out"], compression="gzip") - print("Wrote h5ad file to {}".format(args["out"])) \ No newline at end of file + print("Wrote h5ad file to {}".format(args["out"])) diff --git a/bin/mtx_to_h5ad.py b/bin/mtx_to_h5ad.py index 2885886e..37d7c1ec 100755 --- a/bin/mtx_to_h5ad.py +++ b/bin/mtx_to_h5ad.py @@ -4,15 +4,15 @@ import argparse -def mtx_to_adata( - mtx_file: str, barcode_file: str, feature_file: str, sample: str, aligner: str, verbose: bool = False -): +def mtx_to_adata(mtx_file: str, barcode_file: str, feature_file: str, sample: str, aligner: str, verbose: bool = False): if verbose: print("Reading in {}".format(mtx_file)) adata = sc.read_mtx(mtx_file) - if aligner == 'star': # for some reason star matrix comes transposed and doesn't fit when values are appended directly + if ( + aligner == "star" + ): # for some reason star matrix comes transposed and doesn't fit when values are appended directly adata = adata.transpose() adata.obs_names = pd.read_csv(barcode_file, header=None, sep="\t")[0].values adata.var_names = pd.read_csv(feature_file, header=None, sep="\t")[0].values @@ -26,9 +26,7 @@ def mtx_to_adata( parser = argparse.ArgumentParser(description="Converts mtx output to h5ad.") parser.add_argument("-m", "--mtx", dest="mtx", help="Path to mtx file.") - parser.add_argument( - "-v", "--verbose", dest="verbose", help="Toggle verbose messages", default=False - ) + parser.add_argument("-v", "--verbose", dest="verbose", help="Toggle verbose messages", default=False) parser.add_argument("-f", "--feature", dest="feature", help="Path to feature file.") parser.add_argument("-b", "--barcode", dest="barcode", help="Path to barcode file.") parser.add_argument("-s", "--sample", dest="sample", help="Sample name") @@ -38,7 +36,7 @@ def mtx_to_adata( args = vars(parser.parse_args()) adata = mtx_to_adata( - args["mtx"], args["barcode"], args["feature"], args["sample"], args["aligner"],verbose=args["verbose"] + args["mtx"], args["barcode"], args["feature"], args["sample"], args["aligner"], verbose=args["verbose"] ) adata.write_h5ad(args["out"], compression="gzip") diff --git a/bin/t2g.py b/bin/t2g.py index 6419dd1d..38930cea 100755 --- a/bin/t2g.py +++ b/bin/t2g.py @@ -1,6 +1,6 @@ #!/usr/bin/env python -#This was downloaded on 2019-06-23 from https://github.com/bustools/getting_started/releases/ -#All credit goes to the original authors from the Kallisto/BUStools team! +# This was downloaded on 2019-06-23 from https://github.com/bustools/getting_started/releases/ +# All credit goes to the original authors from the Kallisto/BUStools team! # BSD 2-Clause License # # Copyright (c) 2017, Nicolas Bray, Harold Pimentel, Páll Melsted and Lior Pachter @@ -29,43 +29,43 @@ import sys, argparse -def create_transcript_list(input, use_name = True, use_version = False): + +def create_transcript_list(input, use_name=True, use_version=False): r = {} for line in input: - if len(line) == 0 or line[0] == '#': + if len(line) == 0 or line[0] == "#": continue - l = line.strip().split('\t') - if l[2] == 'transcript': + l = line.strip().split("\t") + if l[2] == "transcript": info = l[8] d = {} - for x in info.split('; '): + for x in info.split("; "): x = x.strip() - p = x.find(' ') + p = x.find(" ") if p == -1: continue k = x[:p] - p = x.find('"',p) - p2 = x.find('"',p+1) - v = x[p+1:p2] + p = x.find('"', p) + p2 = x.find('"', p + 1) + v = x[p + 1 : p2] d[k] = v - - if 'transcript_id' not in d or 'gene_id' not in d: + if "transcript_id" not in d or "gene_id" not in d: continue - tid = d['transcript_id'].split(".")[0] - gid = d['gene_id'].split(".")[0] + tid = d["transcript_id"].split(".")[0] + gid = d["gene_id"].split(".")[0] if use_version: - if 'transcript_version' not in d or 'gene_version' not in d: + if "transcript_version" not in d or "gene_version" not in d: continue - tid += '.' + d['transcript_version'] - gid += '.' + d['gene_version'] + tid += "." + d["transcript_version"] + gid += "." + d["gene_version"] gname = None if use_name: - if 'gene_name' not in d: + if "gene_name" not in d: continue - gname = d['gene_name'] + gname = d["gene_name"] if tid in r: continue @@ -74,26 +74,27 @@ def create_transcript_list(input, use_name = True, use_version = False): return r - -def print_output(output, r, use_name = True): +def print_output(output, r, use_name=True): for tid in r: if use_name: - output.write("%s\t%s\t%s\n"%(tid, r[tid][0], r[tid][1])) + output.write("%s\t%s\t%s\n" % (tid, r[tid][0], r[tid][1])) else: - output.write("%s\t%s\n"%(tid, r[tid][0])) + output.write("%s\t%s\n" % (tid, r[tid][0])) if __name__ == "__main__": - - parser = argparse.ArgumentParser(add_help=True, description='Creates transcript to gene info from GTF files\nreads from standard input and writes to standard output') - parser.add_argument('--use_version', '-v', action='store_true', help='Use version numbers in transcript and gene ids') - parser.add_argument('--skip_gene_names', '-s', action='store_true', help='Do not output gene names') + parser = argparse.ArgumentParser( + add_help=True, + description="Creates transcript to gene info from GTF files\nreads from standard input and writes to standard output", + ) + parser.add_argument( + "--use_version", "-v", action="store_true", help="Use version numbers in transcript and gene ids" + ) + parser.add_argument("--skip_gene_names", "-s", action="store_true", help="Do not output gene names") args = parser.parse_args() - - input = sys.stdin - r = create_transcript_list(input, use_name = not args.skip_gene_names, use_version = args.use_version) + r = create_transcript_list(input, use_name=not args.skip_gene_names, use_version=args.use_version) output = sys.stdout print_output(output, r) From e000e29482ce4b1eab924b1136ae6edc7c25089e Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Mon, 12 Sep 2022 14:01:30 +0100 Subject: [PATCH 102/165] Update CHANGELOG --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 745a987b..a3fd4dab 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Fixed matrix conversion error when running STAR with --soloFeatures GeneFull [#135](https://github.com/nf-core/scrnaseq/pull/135) - Fixed seurat matrix conversion error when running with conda profile [#136](https://github.com/nf-core/scrnaseq/pull/136) - Fixed Kallistobustools module [#116](https://github.com/nf-core/scrnaseq/issues/116). By updating nf-core module and making sure conversion modules take into account the different outputs produced by kallisto standard and non-standard workflows. +- Updated pipeline template to [nf-core/tools 2.5.1](https://github.com/nf-core/tools/releases/tag/2.5.1) ## v2.0.0 - 2022-06-17 "Gray Nickel Beagle" From ad4250d1819b459319d45e94203f67224d36ecd0 Mon Sep 17 00:00:00 2001 From: Rob Syme Date: Mon, 12 Sep 2022 17:14:09 -0400 Subject: [PATCH 103/165] Add missing python function in samplesheet check --- bin/check_samplesheet.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py index 58a13e4a..cf567698 100755 --- a/bin/check_samplesheet.py +++ b/bin/check_samplesheet.py @@ -14,6 +14,15 @@ logger = logging.getLogger() +def print_error(error, context="Line", context_str=""): + error_str = "ERROR: Please check samplesheet -> {}".format(error) + if context != "" and context_str != "": + error_str = "ERROR: Please check samplesheet -> {}\n{}: '{}'".format( + error, context.strip(), context_str.strip() + ) + print(error_str) + sys.exit(1) + class RowChecker: """ From 88711d9334430e893d8171dd9de63469558df18f Mon Sep 17 00:00:00 2001 From: Rob Syme Date: Mon, 12 Sep 2022 17:14:48 -0400 Subject: [PATCH 104/165] Correct input parameter help text. Input is samplesheet not fastq --- nextflow_schema.json | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 91757ae4..98f5421f 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -15,9 +15,10 @@ "input": { "type": "string", "mimetype": "text/csv", + "pattern": "^\\S+\\.csv$", "fa_icon": "fas fa-dna", - "description": "Input FastQ files", - "help_text": "Use this to specify the location of your input FastQ files. For example:\n\n```bash\n--input 'path/to/data/sample_*_{1,2}.fastq'\n```\n\nPlease note the following requirements:\n\n1. The path must be enclosed in quotes\n2. The path must have at least one `*` wildcard character\n3. When using the pipeline with paired end data, the path must use `{1,2}` notation to specify read pairs.\n\nIf left unspecified, a default pattern is used: `data/*{1,2}.fastq.gz`" + "description": "Path to comma-separated file containing information about the samples in the experiment.", + "help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 4 columns, and a header row. See [usage docs](https://nf-co.re/rnaseq/usage#samplesheet-input)." }, "outdir": { "type": "string", From 54730cbc8de62759104368ccc6730ffedaa1009b Mon Sep 17 00:00:00 2001 From: Rob Syme Date: Mon, 12 Sep 2022 22:29:50 -0400 Subject: [PATCH 105/165] Add GTF_GENE_FILTER to remove extraneous features It is possible to use a GTF file that has annotations on non-chromosomal sequences. If these non-chromosomal features are absent from the genome_fasta provided to the pipeline, some downstream tools will break. Here we remove any features that do not have a matching source sequence in the genome_fasta. --- bin/filter_gtf_for_genes_in_genome.py | 86 +++++++++++++++++++++++++++ modules/local/gtf_gene_filter.nf | 31 ++++++++++ workflows/scrnaseq.nf | 12 ++-- 3 files changed, 124 insertions(+), 5 deletions(-) create mode 100755 bin/filter_gtf_for_genes_in_genome.py create mode 100644 modules/local/gtf_gene_filter.nf diff --git a/bin/filter_gtf_for_genes_in_genome.py b/bin/filter_gtf_for_genes_in_genome.py new file mode 100755 index 00000000..c7111c90 --- /dev/null +++ b/bin/filter_gtf_for_genes_in_genome.py @@ -0,0 +1,86 @@ +#!/usr/bin/env python +from __future__ import print_function +import logging +from itertools import groupby +import argparse + +# Create a logger +logging.basicConfig(format="%(name)s - %(asctime)s %(levelname)s: %(message)s") +logger = logging.getLogger(__file__) +logger.setLevel(logging.INFO) + + +def is_header(line): + return line[0] == ">" + + +def extract_fasta_seq_names(fasta_name): + """ + modified from Brent Pedersen + Correct Way To Parse A Fasta File In Python + given a fasta file. yield tuples of header, sequence + from https://www.biostars.org/p/710/ + """ + # first open the file outside + fh = open(fasta_name) + + # ditch the boolean (x[0]) and just keep the header or sequence since + # we know they alternate. + faiter = (x[1] for x in groupby(fh, is_header)) + + for i, header in enumerate(faiter): + line = next(header) + if is_header(line): + # drop the ">" + headerStr = line[1:].strip().split()[0] + yield headerStr + + +def extract_genes_in_genome(fasta, gtf_in, gtf_out): + seq_names_in_genome = set(extract_fasta_seq_names(fasta)) + logger.info("Extracted chromosome sequence names from : %s" % fasta) + logger.info( + "All chromosome names: " + ", ".join(sorted(x for x in seq_names_in_genome)) + ) + seq_names_in_gtf = set([]) + + n_total_lines = 0 + n_lines_in_genome = 0 + with open(gtf_out, "w") as f: + with open(gtf_in) as g: + + for line in g.readlines(): + n_total_lines += 1 + seq_name_gtf = line.split("\t")[0] + seq_names_in_gtf.add(seq_name_gtf) + if seq_name_gtf in seq_names_in_genome: + n_lines_in_genome += 1 + f.write(line) + logger.info( + "Extracted %d / %d lines from %s matching sequences in %s" + % (n_lines_in_genome, n_total_lines, gtf_in, fasta) + ) + logger.info( + "All sequence IDs from GTF: " + ", ".join(sorted(x for x in seq_name_gtf)) + ) + + logger.info("Wrote matching lines to %s" % gtf_out) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="""Filter GTF only for features in the genome""" + ) + parser.add_argument("--gtf", type=str, help="GTF file") + parser.add_argument("--fasta", type=str, help="Genome fasta file") + parser.add_argument( + "-o", + "--output", + dest="output", + default="genes_in_genome.gtf", + type=str, + help="GTF features on fasta genome sequences", + ) + + args = parser.parse_args() + extract_genes_in_genome(args.fasta, args.gtf, args.output) \ No newline at end of file diff --git a/modules/local/gtf_gene_filter.nf b/modules/local/gtf_gene_filter.nf new file mode 100644 index 00000000..7f1a6aa1 --- /dev/null +++ b/modules/local/gtf_gene_filter.nf @@ -0,0 +1,31 @@ +process GTF_GENE_FILTER { + tag "$fasta" + + conda (params.enable_conda ? "conda-forge::python=3.9.5" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/python:3.9--1' : + 'quay.io/biocontainers/python:3.9--1' }" + + input: + path fasta + path gtf + + output: + path "*.gtf" , emit: gtf + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: // filter_gtf_for_genes_in_genome.py is bundled with the pipeline, in nf-core/rnaseq/bin/ + """ + filter_gtf_for_genes_in_genome.py \\ + --gtf $gtf \\ + --fasta $fasta \\ + -o ${fasta.baseName}_genes.gtf + cat <<-END_VERSIONS > versions.yml + "${task.process}": + python: \$(python --version | sed 's/Python //g') + END_VERSIONS + """ +} diff --git a/workflows/scrnaseq.nf b/workflows/scrnaseq.nf index e377465f..3e557eb1 100644 --- a/workflows/scrnaseq.nf +++ b/workflows/scrnaseq.nf @@ -42,7 +42,7 @@ include { SCRNASEQ_ALEVIN } from '../subworkflows/local/alevin' include { STARSOLO } from '../subworkflows/local/starsolo' include { CELLRANGER_ALIGN } from "../subworkflows/local/align_cellranger" include { MTX_CONVERSION } from "../subworkflows/local/mtx_conversion" - +include { GTF_GENE_FILTER } from '../modules/local/gtf_gene_filter' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ IMPORT NF-CORE MODULES/SUBWORKFLOWS @@ -116,11 +116,13 @@ workflow SCRNASEQ { ch_multiqc_fastqc = FASTQC_CHECK.out.fastqc_multiqc.ifEmpty([]) } + ch_filter_gtf = GTF_GENE_FILTER ( ch_genome_fasta, ch_gtf ).gtf + // Run kallisto bustools pipeline if (params.aligner == "kallisto") { KALLISTO_BUSTOOLS( ch_genome_fasta, - ch_gtf, + ch_filter_gtf, ch_kallisto_index, ch_txp2gene, protocol, @@ -136,7 +138,7 @@ workflow SCRNASEQ { if (params.aligner == "alevin") { SCRNASEQ_ALEVIN( ch_genome_fasta, - ch_gtf, + ch_filter_gtf, ch_transcript_fasta, ch_salmon_index, ch_txp2gene, @@ -154,7 +156,7 @@ workflow SCRNASEQ { if (params.aligner == "star") { STARSOLO( ch_genome_fasta, - ch_gtf, + ch_filter_gtf, ch_star_index, protocol, ch_barcode_whitelist, @@ -170,7 +172,7 @@ workflow SCRNASEQ { if (params.aligner == "cellranger") { CELLRANGER_ALIGN( ch_genome_fasta, - ch_gtf, + ch_filter_gtf, ch_cellranger_index, ch_fastq ) From 2235b9bb38a9adfd98a8db65fde1741f9c7c36c2 Mon Sep 17 00:00:00 2001 From: Rob Syme Date: Mon, 12 Sep 2022 22:32:43 -0400 Subject: [PATCH 106/165] Patch to remove transcript small sequences Salmon index will fail if there are transcripts provided that are smaller than k. --- modules.json | 3 +- modules/nf-core/modules/salmon/index/main.nf | 11 ++++-- .../modules/salmon/index/salmon-index.diff | 36 +++++++++++++++++++ 3 files changed, 47 insertions(+), 3 deletions(-) create mode 100644 modules/nf-core/modules/salmon/index/salmon-index.diff diff --git a/modules.json b/modules.json index 0b04f799..e72473dd 100644 --- a/modules.json +++ b/modules.json @@ -47,7 +47,8 @@ }, "salmon/index": { "branch": "master", - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d", + "patch": "modules/nf-core/modules/salmon/index/salmon-index.diff" }, "salmon/quant": { "branch": "master", diff --git a/modules/nf-core/modules/salmon/index/main.nf b/modules/nf-core/modules/salmon/index/main.nf index 737087f9..d875a345 100644 --- a/modules/nf-core/modules/salmon/index/main.nf +++ b/modules/nf-core/modules/salmon/index/main.nf @@ -20,21 +20,28 @@ process SALMON_INDEX { script: def args = task.ext.args ?: '' + def kmer_argmatch = args =~ /\-k *(\d+)/ + def k = kmer_argmatch ? kmer_argmatch[0][1] : 31 def get_decoy_ids = "grep '^>' $genome_fasta | cut -d ' ' -f 1 > decoys.txt" def gentrome = "gentrome.fa" + def maybe_unzip = "cat" if (genome_fasta.endsWith('.gz')) { get_decoy_ids = "grep '^>' <(gunzip -c $genome_fasta) | cut -d ' ' -f 1 > decoys.txt" gentrome = "gentrome.fa.gz" + maybe_unzip = "gunzip -c" } """ $get_decoy_ids sed -i.bak -e 's/>//g' decoys.txt - cat $transcript_fasta $genome_fasta > $gentrome + cat $transcript_fasta $genome_fasta \\ + | $maybe_unzip \\ + | awk '!/^>/ { next } { getline seq } length(seq) >= $k { print \$0 "\\n" seq }' \\ + | gzip -c > gentrome.filtered.fasta.gz salmon \\ index \\ --threads $task.cpus \\ - -t $gentrome \\ + -t gentrome.filtered.fasta.gz \\ -d decoys.txt \\ $args \\ -i salmon diff --git a/modules/nf-core/modules/salmon/index/salmon-index.diff b/modules/nf-core/modules/salmon/index/salmon-index.diff new file mode 100644 index 00000000..87f976cd --- /dev/null +++ b/modules/nf-core/modules/salmon/index/salmon-index.diff @@ -0,0 +1,36 @@ +Changes in module 'nf-core/modules/salmon/index' +--- modules/nf-core/modules/salmon/index/main.nf ++++ modules/nf-core/modules/salmon/index/main.nf +@@ -20,21 +20,28 @@ + + script: + def args = task.ext.args ?: '' ++ def kmer_argmatch = args =~ /\-k *(\d+)/ ++ def k = kmer_argmatch ? kmer_argmatch[0][1] : 31 + def get_decoy_ids = "grep '^>' $genome_fasta | cut -d ' ' -f 1 > decoys.txt" + def gentrome = "gentrome.fa" ++ def maybe_unzip = "cat" + if (genome_fasta.endsWith('.gz')) { + get_decoy_ids = "grep '^>' <(gunzip -c $genome_fasta) | cut -d ' ' -f 1 > decoys.txt" + gentrome = "gentrome.fa.gz" ++ maybe_unzip = "gunzip -c" + } + """ + $get_decoy_ids + sed -i.bak -e 's/>//g' decoys.txt +- cat $transcript_fasta $genome_fasta > $gentrome ++ cat $transcript_fasta $genome_fasta \\ ++ | $maybe_unzip \\ ++ | awk '!/^>/ { next } { getline seq } length(seq) >= $k { print \$0 "\\n" seq }' \\ ++ | gzip -c > gentrome.filtered.fasta.gz + + salmon \\ + index \\ + --threads $task.cpus \\ +- -t $gentrome \\ ++ -t gentrome.filtered.fasta.gz \\ + -d decoys.txt \\ + $args \\ + -i salmon + +************************************************************ From 66d2e2f5bd1818693d5023f4799651956467442b Mon Sep 17 00:00:00 2001 From: vjmarteau Date: Thu, 22 Sep 2022 10:24:13 +0200 Subject: [PATCH 107/165] Add print_error function --- bin/check_samplesheet.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py index d98bdaa3..4a6496bb 100755 --- a/bin/check_samplesheet.py +++ b/bin/check_samplesheet.py @@ -139,6 +139,12 @@ def read_head(handle, num_lines=10): lines.append(line) return "".join(lines) +def print_error(error, context="Line", context_str=""): + error_str = f"ERROR: Please check samplesheet -> {error}" + if context != "" and context_str != "": + error_str = f"ERROR: Please check samplesheet -> {error}\n{context.strip()}: '{context_str.strip()}'" + print(error_str) + sys.exit(1) def sniff_format(handle): """ From 4835d9de4dd04464d462daea8f6a28aa236c6e62 Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Thu, 22 Sep 2022 10:45:47 +0000 Subject: [PATCH 108/165] Fix linting --- bin/check_samplesheet.py | 31 +++++++++++---- bin/t2g.py | 70 ++++++++++++++++++--------------- modules.json | 85 +++++++++++++++++++++++----------------- 3 files changed, 113 insertions(+), 73 deletions(-) diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py index d98bdaa3..4e72568f 100755 --- a/bin/check_samplesheet.py +++ b/bin/check_samplesheet.py @@ -86,7 +86,9 @@ def _validate_sample(self, row): def _validate_first(self, row): """Assert that the first FASTQ entry is non-empty and has the right format.""" - assert len(row[self._first_col]) > 0, "At least the first FASTQ file is required." + assert ( + len(row[self._first_col]) > 0 + ), "At least the first FASTQ file is required." self._validate_fastq_format(row[self._first_col]) def _validate_second(self, row): @@ -99,7 +101,8 @@ def _validate_pair(self, row): if row[self._first_col] and row[self._second_col]: row[self._single_col] = False assert ( - Path(row[self._first_col]).suffixes[-2:] == Path(row[self._second_col]).suffixes[-2:] + Path(row[self._first_col]).suffixes[-2:] + == Path(row[self._second_col]).suffixes[-2:] ), "FASTQ pairs must have the same file extensions." else: row[self._single_col] = True @@ -119,7 +122,9 @@ def validate_unique_samples(self): FASTQ file combination exists. """ - assert len(self._seen) == len(self.modified), "The pair of sample name and FASTQ must be unique." + assert len(self._seen) == len( + self.modified + ), "The pair of sample name and FASTQ must be unique." if len({pair[0] for pair in self._seen}) < len(self._seen): counts = Counter(pair[0] for pair in self._seen) seen = Counter() @@ -200,7 +205,11 @@ def check_samplesheet(file_in, file_out): HEADER = ["sample", "fastq_1", "fastq_2"] header = [x.strip('"') for x in fin.readline().strip().split(",")] if header[: len(HEADER)] != HEADER: - print("ERROR: Please check samplesheet header -> {} != {}".format(",".join(header), ",".join(HEADER))) + print( + "ERROR: Please check samplesheet header -> {} != {}".format( + ",".join(header), ",".join(HEADER) + ) + ) sys.exit(1) ## Check sample entries @@ -217,7 +226,9 @@ def check_samplesheet(file_in, file_out): num_cols = len([x for x in lspl if x]) if num_cols < MIN_COLS: print_error( - "Invalid number of populated columns (minimum = {})!".format(MIN_COLS), + "Invalid number of populated columns (minimum = {})!".format( + MIN_COLS + ), "Line", line, ) @@ -266,8 +277,14 @@ def check_samplesheet(file_in, file_out): for sample in sorted(sample_mapping_dict.keys()): ## Check that multiple runs of the same sample are of the same datatype - if not all(x[0] == sample_mapping_dict[sample][0][0] for x in sample_mapping_dict[sample]): - print_error("Multiple runs of a sample must be of the same datatype!", "Sample: {}".format(sample)) + if not all( + x[0] == sample_mapping_dict[sample][0][0] + for x in sample_mapping_dict[sample] + ): + print_error( + "Multiple runs of a sample must be of the same datatype!", + "Sample: {}".format(sample), + ) for idx, val in enumerate(sample_mapping_dict[sample]): fout.write(",".join(["{}".format(sample)] + val) + "\n") diff --git a/bin/t2g.py b/bin/t2g.py index 6419dd1d..5daf3df5 100755 --- a/bin/t2g.py +++ b/bin/t2g.py @@ -1,6 +1,6 @@ #!/usr/bin/env python -#This was downloaded on 2019-06-23 from https://github.com/bustools/getting_started/releases/ -#All credit goes to the original authors from the Kallisto/BUStools team! +# This was downloaded on 2019-06-23 from https://github.com/bustools/getting_started/releases/ +# All credit goes to the original authors from the Kallisto/BUStools team! # BSD 2-Clause License # # Copyright (c) 2017, Nicolas Bray, Harold Pimentel, Páll Melsted and Lior Pachter @@ -29,43 +29,43 @@ import sys, argparse -def create_transcript_list(input, use_name = True, use_version = False): + +def create_transcript_list(input, use_name=True, use_version=False): r = {} for line in input: - if len(line) == 0 or line[0] == '#': + if len(line) == 0 or line[0] == "#": continue - l = line.strip().split('\t') - if l[2] == 'transcript': + l = line.strip().split("\t") + if l[2] == "transcript": info = l[8] d = {} - for x in info.split('; '): + for x in info.split("; "): x = x.strip() - p = x.find(' ') + p = x.find(" ") if p == -1: continue k = x[:p] - p = x.find('"',p) - p2 = x.find('"',p+1) - v = x[p+1:p2] + p = x.find('"', p) + p2 = x.find('"', p + 1) + v = x[p + 1 : p2] d[k] = v - - if 'transcript_id' not in d or 'gene_id' not in d: + if "transcript_id" not in d or "gene_id" not in d: continue - tid = d['transcript_id'].split(".")[0] - gid = d['gene_id'].split(".")[0] + tid = d["transcript_id"].split(".")[0] + gid = d["gene_id"].split(".")[0] if use_version: - if 'transcript_version' not in d or 'gene_version' not in d: + if "transcript_version" not in d or "gene_version" not in d: continue - tid += '.' + d['transcript_version'] - gid += '.' + d['gene_version'] + tid += "." + d["transcript_version"] + gid += "." + d["gene_version"] gname = None if use_name: - if 'gene_name' not in d: + if "gene_name" not in d: continue - gname = d['gene_name'] + gname = d["gene_name"] if tid in r: continue @@ -74,26 +74,34 @@ def create_transcript_list(input, use_name = True, use_version = False): return r - -def print_output(output, r, use_name = True): +def print_output(output, r, use_name=True): for tid in r: if use_name: - output.write("%s\t%s\t%s\n"%(tid, r[tid][0], r[tid][1])) + output.write("%s\t%s\t%s\n" % (tid, r[tid][0], r[tid][1])) else: - output.write("%s\t%s\n"%(tid, r[tid][0])) + output.write("%s\t%s\n" % (tid, r[tid][0])) if __name__ == "__main__": - - parser = argparse.ArgumentParser(add_help=True, description='Creates transcript to gene info from GTF files\nreads from standard input and writes to standard output') - parser.add_argument('--use_version', '-v', action='store_true', help='Use version numbers in transcript and gene ids') - parser.add_argument('--skip_gene_names', '-s', action='store_true', help='Do not output gene names') + parser = argparse.ArgumentParser( + add_help=True, + description="Creates transcript to gene info from GTF files\nreads from standard input and writes to standard output", + ) + parser.add_argument( + "--use_version", + "-v", + action="store_true", + help="Use version numbers in transcript and gene ids", + ) + parser.add_argument( + "--skip_gene_names", "-s", action="store_true", help="Do not output gene names" + ) args = parser.parse_args() - - input = sys.stdin - r = create_transcript_list(input, use_name = not args.skip_gene_names, use_version = args.use_version) + r = create_transcript_list( + input, use_name=not args.skip_gene_names, use_version=args.use_version + ) output = sys.stdout print_output(output, r) diff --git a/modules.json b/modules.json index b0f8bb72..e18acd52 100644 --- a/modules.json +++ b/modules.json @@ -3,41 +3,56 @@ "homePage": "https://github.com/nf-core/scrnaseq", "repos": { "nf-core/modules": { - "cellranger/count": { - "git_sha": "8032833e6e0fd3edeb20d4121885112249f273c4" - }, - "cellranger/mkgtf": { - "git_sha": "e499abe572bc7c1322dfa66b767b0e65fe8c62ca" - }, - "cellranger/mkref": { - "git_sha": "61d87bb2bcbcd058e1af6e5f14bb821b8ae4ed3e" - }, - "custom/dumpsoftwareversions": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" - }, - "fastqc": { - "git_sha": "49b18b1639f4f7104187058866a8fab33332bdfe" - }, - "gffread": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" - }, - "gunzip": { - "git_sha": "9aadd9a6d3f5964476582319b3a1c54a3e3fe7c9" - }, - "kallistobustools/ref": { - "git_sha": "e20e57f90b6787ac9a010a980cf6ea98bd990046" - }, - "multiqc": { - "git_sha": "e5f8924fabf4c8380f55fb7aee89fd2c268161b1" - }, - "salmon/index": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" - }, - "salmon/quant": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" - }, - "star/genomegenerate": { - "git_sha": "fb6c7bca3d55c19a793372513395e3a567bdd7ba" + "git_url": "https://github.com/nf-core/modules.git", + "modules": { + "cellranger/count": { + "branch": "master", + "git_sha": "8032833e6e0fd3edeb20d4121885112249f273c4" + }, + "cellranger/mkgtf": { + "branch": "master", + "git_sha": "e499abe572bc7c1322dfa66b767b0e65fe8c62ca" + }, + "cellranger/mkref": { + "branch": "master", + "git_sha": "61d87bb2bcbcd058e1af6e5f14bb821b8ae4ed3e" + }, + "custom/dumpsoftwareversions": { + "branch": "master", + "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + }, + "fastqc": { + "branch": "master", + "git_sha": "49b18b1639f4f7104187058866a8fab33332bdfe" + }, + "gffread": { + "branch": "master", + "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + }, + "gunzip": { + "branch": "master", + "git_sha": "9aadd9a6d3f5964476582319b3a1c54a3e3fe7c9" + }, + "kallistobustools/ref": { + "branch": "master", + "git_sha": "e20e57f90b6787ac9a010a980cf6ea98bd990046" + }, + "multiqc": { + "branch": "master", + "git_sha": "e5f8924fabf4c8380f55fb7aee89fd2c268161b1" + }, + "salmon/index": { + "branch": "master", + "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + }, + "salmon/quant": { + "branch": "master", + "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + }, + "star/genomegenerate": { + "branch": "master", + "git_sha": "fb6c7bca3d55c19a793372513395e3a567bdd7ba" + } } } } From ba45b13bfe405f192735fed3499074665e37cb91 Mon Sep 17 00:00:00 2001 From: EC2 Default User Date: Thu, 22 Sep 2022 14:47:00 +0000 Subject: [PATCH 109/165] Linting fix --- bin/check_samplesheet.py | 23 +++++++++++++---------- bin/filter_gtf_for_genes_in_genome.py | 17 +++++------------ 2 files changed, 18 insertions(+), 22 deletions(-) diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py index cf567698..77a22f17 100755 --- a/bin/check_samplesheet.py +++ b/bin/check_samplesheet.py @@ -14,15 +14,6 @@ logger = logging.getLogger() -def print_error(error, context="Line", context_str=""): - error_str = "ERROR: Please check samplesheet -> {}".format(error) - if context != "" and context_str != "": - error_str = "ERROR: Please check samplesheet -> {}\n{}: '{}'".format( - error, context.strip(), context_str.strip() - ) - print(error_str) - sys.exit(1) - class RowChecker: """ @@ -150,6 +141,16 @@ def read_head(handle, num_lines=10): return "".join(lines) +def print_error(error, context="Line", context_str=""): + error_str = "ERROR: Please check samplesheet -> {}".format(error) + if context != "" and context_str != "": + error_str = "ERROR: Please check samplesheet -> {}\n{}: '{}'".format( + error, context.strip(), context_str.strip() + ) + print(error_str) + sys.exit(1) + + def sniff_format(handle): """ Detect the tabular format. @@ -210,7 +211,9 @@ def check_samplesheet(file_in, file_out): HEADER = ["sample", "fastq_1", "fastq_2"] header = [x.strip('"') for x in fin.readline().strip().split(",")] if header[: len(HEADER)] != HEADER: - print("ERROR: Please check samplesheet header -> {} != {}".format(",".join(header), ",".join(HEADER))) + given = ",".join(header) + wanted = ",".join(HEADER) + print(f"ERROR: Please check samplesheet header -> {given} != {wanted}") sys.exit(1) ## Check sample entries diff --git a/bin/filter_gtf_for_genes_in_genome.py b/bin/filter_gtf_for_genes_in_genome.py index c7111c90..ef4c87cd 100755 --- a/bin/filter_gtf_for_genes_in_genome.py +++ b/bin/filter_gtf_for_genes_in_genome.py @@ -39,9 +39,7 @@ def extract_fasta_seq_names(fasta_name): def extract_genes_in_genome(fasta, gtf_in, gtf_out): seq_names_in_genome = set(extract_fasta_seq_names(fasta)) logger.info("Extracted chromosome sequence names from : %s" % fasta) - logger.info( - "All chromosome names: " + ", ".join(sorted(x for x in seq_names_in_genome)) - ) + logger.info("All chromosome names: " + ", ".join(sorted(x for x in seq_names_in_genome))) seq_names_in_gtf = set([]) n_total_lines = 0 @@ -57,20 +55,15 @@ def extract_genes_in_genome(fasta, gtf_in, gtf_out): n_lines_in_genome += 1 f.write(line) logger.info( - "Extracted %d / %d lines from %s matching sequences in %s" - % (n_lines_in_genome, n_total_lines, gtf_in, fasta) - ) - logger.info( - "All sequence IDs from GTF: " + ", ".join(sorted(x for x in seq_name_gtf)) + "Extracted %d / %d lines from %s matching sequences in %s" % (n_lines_in_genome, n_total_lines, gtf_in, fasta) ) + logger.info("All sequence IDs from GTF: " + ", ".join(sorted(x for x in seq_name_gtf))) logger.info("Wrote matching lines to %s" % gtf_out) if __name__ == "__main__": - parser = argparse.ArgumentParser( - description="""Filter GTF only for features in the genome""" - ) + parser = argparse.ArgumentParser(description="""Filter GTF only for features in the genome""") parser.add_argument("--gtf", type=str, help="GTF file") parser.add_argument("--fasta", type=str, help="Genome fasta file") parser.add_argument( @@ -83,4 +76,4 @@ def extract_genes_in_genome(fasta, gtf_in, gtf_out): ) args = parser.parse_args() - extract_genes_in_genome(args.fasta, args.gtf, args.output) \ No newline at end of file + extract_genes_in_genome(args.fasta, args.gtf, args.output) From 1ec928a28d0f9b33db1bd1d05a82806470607ad2 Mon Sep 17 00:00:00 2001 From: Felipe Marques de Almeida Date: Mon, 26 Sep 2022 09:01:33 +0000 Subject: [PATCH 110/165] update alevinQC packages and module --- modules/local/alevinqc.nf | 18 ++++++++++++------ modules/local/mtx_to_h5ad.nf | 6 +++--- modules/local/mtx_to_seurat.nf | 6 +++--- .../execution_trace_2022-09-26_07-01-32.txt | 1 + 4 files changed, 19 insertions(+), 12 deletions(-) create mode 100644 test/null/pipeline_info/execution_trace_2022-09-26_07-01-32.txt diff --git a/modules/local/alevinqc.nf b/modules/local/alevinqc.nf index 1f37d209..c56eb6a6 100644 --- a/modules/local/alevinqc.nf +++ b/modules/local/alevinqc.nf @@ -4,8 +4,8 @@ process ALEVINQC { conda (params.enable_conda ? "bioconda::bioconductor-alevinqc=1.6.1" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/bioconductor-alevinqc:1.10.0--r41hdfd78af_0' : - 'quay.io/biocontainers/bioconductor-alevinqc:1.10.0--r41hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/bioconductor-alevinqc:1.12.1--r41h9f5acd7_0' : + 'quay.io/biocontainers/bioconductor-alevinqc:1.12.1--r41h9f5acd7_0' }" input: tuple val(meta), path(alevin_results) @@ -19,10 +19,16 @@ process ALEVINQC { """ #!/usr/bin/env Rscript require(alevinQC) - alevinQCReport(baseDir = "${alevin_results}", sampleId = "${prefix}", - outputFile = "alevin_report_${meta.id}.html", - outputFormat = "html_document", - outputDir = "./", forceOverwrite = TRUE) + alevinFryQCReport( + mapDir = "${alevin_results}/af_map", + quantDir = "${alevin_results}/af_quant", + permitDir= "${alevin_results}", + sampleId = "${prefix}", + outputFile = "alevin_report_${meta.id}.html", + outputFormat = "html_document", + outputDir = "./", + forceOverwrite = TRUE + ) yaml::write_yaml( list( diff --git a/modules/local/mtx_to_h5ad.nf b/modules/local/mtx_to_h5ad.nf index 22a04191..cf753f30 100644 --- a/modules/local/mtx_to_h5ad.nf +++ b/modules/local/mtx_to_h5ad.nf @@ -22,9 +22,9 @@ process MTX_TO_H5AD { barcodes_tsv = "*count/counts_unfiltered/*.barcodes.txt" features_tsv = "*count/counts_unfiltered/*.genes.txt" } else if (params.aligner == 'alevin') { - mtx_matrix = "*_alevin_results/alevin/quants_mat.mtx.gz" - barcodes_tsv = "*_alevin_results/alevin/quants_mat_rows.txt" - features_tsv = "*_alevin_results/alevin/quants_mat_cols.txt" + mtx_matrix = "*_alevin_results/af_quant/alevin/quants_mat.mtx" + barcodes_tsv = "*_alevin_results/af_quant/alevin/quants_mat_rows.txt" + features_tsv = "*_alevin_results/af_quant/alevin/quants_mat_cols.txt" } else if (params.aligner == 'star') { mtx_matrix = "*.Solo.out/Gene*/filtered/matrix.mtx.gz" barcodes_tsv = "*.Solo.out/Gene*/filtered/barcodes.tsv.gz" diff --git a/modules/local/mtx_to_seurat.nf b/modules/local/mtx_to_seurat.nf index e2aa8217..3d834a2f 100644 --- a/modules/local/mtx_to_seurat.nf +++ b/modules/local/mtx_to_seurat.nf @@ -26,9 +26,9 @@ process MTX_TO_SEURAT { barcodes = "*count/counts_unfiltered/*.barcodes.txt" features = "*count/counts_unfiltered/*.genes.txt" } else if (params.aligner == "alevin") { - matrix = "*_alevin_results/alevin/quants_mat.mtx.gz" - barcodes = "*_alevin_results/alevin/quants_mat_rows.txt" - features = "*_alevin_results/alevin/quants_mat_cols.txt" + matrix = "*_alevin_results/af_quant/alevin/quants_mat.mtx" + barcodes = "*_alevin_results/af_quant/alevin/quants_mat_rows.txt" + features = "*_alevin_results/af_quant/alevin/quants_mat_cols.txt" } else if (params.aligner == 'star') { matrix = "*.Solo.out/Gene*/filtered/matrix.mtx.gz" barcodes = "*.Solo.out/Gene*/filtered/barcodes.tsv.gz" diff --git a/test/null/pipeline_info/execution_trace_2022-09-26_07-01-32.txt b/test/null/pipeline_info/execution_trace_2022-09-26_07-01-32.txt new file mode 100644 index 00000000..6b739acd --- /dev/null +++ b/test/null/pipeline_info/execution_trace_2022-09-26_07-01-32.txt @@ -0,0 +1 @@ +task_id hash native_id name status exit submit duration realtime %cpu peak_rss peak_vmem rchar wchar From 73bfc063e85b16e1256d8fab678c2af6c4ee5adf Mon Sep 17 00:00:00 2001 From: Khajidu Date: Thu, 29 Sep 2022 12:56:53 +0200 Subject: [PATCH 111/165] Repair inconsistency --- modules/local/simpleaf_index.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/local/simpleaf_index.nf b/modules/local/simpleaf_index.nf index 853f1f95..ef05c426 100644 --- a/modules/local/simpleaf_index.nf +++ b/modules/local/simpleaf_index.nf @@ -2,7 +2,7 @@ process SIMPLEAF_INDEX { tag "$transcript_gtf" label "process_medium" - conda (params.enable_conda ? 'bioconda::simpleaf=0.4.0' : null) + conda (params.enable_conda ? 'bioconda::simpleaf=0.5.1--h9f5acd7_0' : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/simpleaf:0.5.1--h9f5acd7_0' : 'quay.io/biocontainers/simpleaf:0.5.1--h9f5acd7_0' }" From b753ab170b024f9635aab5feed21b33c4ce00c60 Mon Sep 17 00:00:00 2001 From: Khajidu Date: Thu, 29 Sep 2022 12:59:06 +0200 Subject: [PATCH 112/165] repair inconsistency --- modules/local/simpleaf_quant.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/local/simpleaf_quant.nf b/modules/local/simpleaf_quant.nf index 7f14048e..3540a877 100644 --- a/modules/local/simpleaf_quant.nf +++ b/modules/local/simpleaf_quant.nf @@ -2,7 +2,7 @@ process SIMPLEAF_QUANT { tag "$meta.id" label 'process_high' - conda (params.enable_conda ? 'bioconda::simpleaf=0.4.0' : null) + conda (params.enable_conda ? 'bioconda::simpleaf=0.5.1--h9f5acd7_0' : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/simpleaf:0.5.1--h9f5acd7_0' : 'quay.io/biocontainers/simpleaf:0.5.1--h9f5acd7_0' }" From c31554f856fd9a5cc793b676ffcbdda5ab1668b9 Mon Sep 17 00:00:00 2001 From: Khajidu Date: Thu, 29 Sep 2022 13:20:02 +0200 Subject: [PATCH 113/165] Update simpleaf_quant.nf --- modules/local/simpleaf_quant.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/local/simpleaf_quant.nf b/modules/local/simpleaf_quant.nf index 3540a877..e1178598 100644 --- a/modules/local/simpleaf_quant.nf +++ b/modules/local/simpleaf_quant.nf @@ -2,7 +2,7 @@ process SIMPLEAF_QUANT { tag "$meta.id" label 'process_high' - conda (params.enable_conda ? 'bioconda::simpleaf=0.5.1--h9f5acd7_0' : null) + conda (params.enable_conda ? 'bioconda::simpleaf=0.5.1' : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/simpleaf:0.5.1--h9f5acd7_0' : 'quay.io/biocontainers/simpleaf:0.5.1--h9f5acd7_0' }" From 13472570da5a657733e9dd86ebd4c7de0fd65494 Mon Sep 17 00:00:00 2001 From: Khajidu Date: Thu, 29 Sep 2022 13:20:30 +0200 Subject: [PATCH 114/165] Update simpleaf_index.nf --- modules/local/simpleaf_index.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/local/simpleaf_index.nf b/modules/local/simpleaf_index.nf index ef05c426..2d3d7cb2 100644 --- a/modules/local/simpleaf_index.nf +++ b/modules/local/simpleaf_index.nf @@ -2,7 +2,7 @@ process SIMPLEAF_INDEX { tag "$transcript_gtf" label "process_medium" - conda (params.enable_conda ? 'bioconda::simpleaf=0.5.1--h9f5acd7_0' : null) + conda (params.enable_conda ? 'bioconda::simpleaf=0.5.1' : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/simpleaf:0.5.1--h9f5acd7_0' : 'quay.io/biocontainers/simpleaf:0.5.1--h9f5acd7_0' }" From e172ecc16688f4422b07b906787aaa2a241c15f0 Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Thu, 29 Sep 2022 18:16:43 +0000 Subject: [PATCH 115/165] Also update conda for alevinqc ! --- modules/local/alevinqc.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/local/alevinqc.nf b/modules/local/alevinqc.nf index c56eb6a6..be8ae8cc 100644 --- a/modules/local/alevinqc.nf +++ b/modules/local/alevinqc.nf @@ -2,7 +2,7 @@ process ALEVINQC { tag "$meta.id" label 'process_low' - conda (params.enable_conda ? "bioconda::bioconductor-alevinqc=1.6.1" : null) + conda (params.enable_conda ? "bioconda::bioconductor-alevinqc=1.12.1" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/bioconductor-alevinqc:1.12.1--r41h9f5acd7_0' : 'quay.io/biocontainers/bioconductor-alevinqc:1.12.1--r41h9f5acd7_0' }" From 9f62dbafa65775ec69f84337429c35ba36878065 Mon Sep 17 00:00:00 2001 From: Felipe Marques de Almeida Date: Thu, 29 Sep 2022 19:40:32 +0000 Subject: [PATCH 116/165] add orientation to module --- conf/modules.config | 2 +- modules/local/alevinqc.nf | 2 +- modules/local/simpleaf_quant.nf | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 2b05f1e7..66584811 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -82,7 +82,7 @@ if (params.aligner == "alevin") { path: { "${params.outdir}/${params.aligner}" }, mode: params.publish_dir_mode ] - ext.args = "-r cr-like" + ext.args = "-r cr-like -d fw" } } } diff --git a/modules/local/alevinqc.nf b/modules/local/alevinqc.nf index be8ae8cc..4832a2d6 100644 --- a/modules/local/alevinqc.nf +++ b/modules/local/alevinqc.nf @@ -22,7 +22,7 @@ process ALEVINQC { alevinFryQCReport( mapDir = "${alevin_results}/af_map", quantDir = "${alevin_results}/af_quant", - permitDir= "${alevin_results}", + permitDir= "${alevin_results}/af_quant", sampleId = "${prefix}", outputFile = "alevin_report_${meta.id}.html", outputFormat = "html_document", diff --git a/modules/local/simpleaf_quant.nf b/modules/local/simpleaf_quant.nf index e1178598..827e8991 100644 --- a/modules/local/simpleaf_quant.nf +++ b/modules/local/simpleaf_quant.nf @@ -23,7 +23,7 @@ process SIMPLEAF_QUANT { path "versions.yml" , emit: versions script: - def args = task.ext.args ?: '' + def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" // separate forward from reverse pairs From 520bc15da9f0025094936456d905afea7389b66e Mon Sep 17 00:00:00 2001 From: Rob Syme Date: Fri, 30 Sep 2022 03:02:29 +0000 Subject: [PATCH 117/165] Use iGenome for fasta and gtf when available --- README.md | 2 +- conf/test.config | 6 +++++- conf/test_full.config | 6 ++++-- docs/usage.md | 2 +- lib/WorkflowMain.groovy | 6 +++++- lib/WorkflowScrnaseq.groovy | 5 +++++ main.nf | 3 +++ nextflow.config | 3 +-- nextflow_schema.json | 2 +- subworkflows/local/alevin.nf | 2 +- subworkflows/local/align_cellranger.nf | 2 +- subworkflows/local/kallisto_bustools.nf | 2 +- subworkflows/local/starsolo.nf | 2 +- workflows/scrnaseq.nf | 4 ++-- 14 files changed, 32 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index 5bd4ca6f..64e7419a 100644 --- a/README.md +++ b/README.md @@ -53,7 +53,7 @@ The nf-core/scrnaseq pipeline comes with documentation about the pipeline [usage 4. Start running your own analysis! ```bash - nextflow run nf-core/scrnaseq --input samplesheet.csv --outdir --genome_fasta GRCm38.p6.genome.chr19.fa --gtf gencode.vM19.annotation.chr19.gtf --protocol 10XV2 --aligner -profile + nextflow run nf-core/scrnaseq --input samplesheet.csv --outdir --fasta GRCm38.p6.genome.chr19.fa --gtf gencode.vM19.annotation.chr19.gtf --protocol 10XV2 --aligner -profile ``` ## Credits diff --git a/conf/test.config b/conf/test.config index 95d1ed1c..34111c6c 100644 --- a/conf/test.config +++ b/conf/test.config @@ -21,9 +21,13 @@ params { // Input data input = 'https://github.com/nf-core/test-datasets/raw/scrnaseq/samplesheet-2-0.csv' - genome_fasta = 'https://github.com/nf-core/test-datasets/raw/scrnaseq/reference/GRCm38.p6.genome.chr19.fa' + + // Genome references + fasta = 'https://github.com/nf-core/test-datasets/raw/scrnaseq/reference/GRCm38.p6.genome.chr19.fa' gtf = 'https://github.com/nf-core/test-datasets/raw/scrnaseq/reference/gencode.vM19.annotation.chr19.gtf' + aligner = 'star' protocol = '10XV2' + // Ignore `--input` as otherwise the parameter validation will throw an error schema_ignore_params = 'genomes,input_paths,input' } diff --git a/conf/test_full.config b/conf/test_full.config index 033450cc..13c716d8 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -16,8 +16,10 @@ params { // Input data input = 'https://raw.githubusercontent.com/nf-core/test-datasets/scrnaseq/samplesheet_2.0_full.csv' - genome_fasta = 's3://nf-core-awsmegatests/scrnaseq/input_data/Homo_sapiens.GRCh38.dna.primary_assembly.fa' - gtf = 's3://nf-core-awsmegatests/scrnaseq/input_data/Homo_sapiens.GRCh38.106.gtf' + + // Genome references + genome = 'GRCh38' + aligner = 'star' protocol = '10XV2' schema_ignore_params = 'genomes' } diff --git a/docs/usage.md b/docs/usage.md index f0215070..a566804f 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -77,7 +77,7 @@ TEST1,TEST1_S1_L001_R1_001.fastq.gz,TEST1_S1_L001_R2_001.fastq.gz The minimum typical command for running the pipeline is as follows: ```bash -nextflow run nf-core/scrnaseq --input 'samplesheet.csv' --genome_fasta human.fasta --gtf human.gtf -profile docker +nextflow run nf-core/scrnaseq --input 'samplesheet.csv' --genome GRCh38 -profile docker ``` This will launch the pipeline with the `docker` configuration profile and default `--type` and `--barcode_whitelist`. See below for more information about profiles and these options. diff --git a/lib/WorkflowMain.groovy b/lib/WorkflowMain.groovy index f81790bf..627cb63c 100755 --- a/lib/WorkflowMain.groovy +++ b/lib/WorkflowMain.groovy @@ -21,7 +21,7 @@ class WorkflowMain { // Print help to screen if required // public static String help(workflow, params, log) { - def command = "nextflow run ${workflow.manifest.name} --input samplesheet.csv --genome_fasta human.fasta --gtf human.gtf -profile docker" + def command = "nextflow run ${workflow.manifest.name} --input samplesheet.csv --fasta human.fasta --gtf human.gtf -profile docker" def help_string = '' help_string += NfcoreTemplate.logo(workflow, params.monochrome_logs) help_string += NfcoreSchema.paramsHelp(workflow, params, command) @@ -85,7 +85,11 @@ class WorkflowMain { if (params.genomes && params.genome && params.genomes.containsKey(params.genome)) { if (params.genomes[ params.genome ].containsKey(attribute)) { return params.genomes[ params.genome ][ attribute ] + } else { + println "Could not find attribute '$attribute'" } + } else { + println "Could not find genome" } return null } diff --git a/lib/WorkflowScrnaseq.groovy b/lib/WorkflowScrnaseq.groovy index af239a67..25c2b120 100755 --- a/lib/WorkflowScrnaseq.groovy +++ b/lib/WorkflowScrnaseq.groovy @@ -14,6 +14,11 @@ class WorkflowScrnaseq { log.error "Please provide an input samplesheet with --input" System.exit(1) } + + if (!params.fasta) { + log.error "Genome fasta file not specified with e.g. '--fasta genome.fa' or via a detectable config file." + System.exit(1) + } } // diff --git a/main.nf b/main.nf index 20a8d1fc..fe560c6d 100644 --- a/main.nf +++ b/main.nf @@ -17,6 +17,9 @@ nextflow.enable.dsl = 2 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ +params.fasta = WorkflowMain.getGenomeAttribute(params, 'fasta') +params.gtf = WorkflowMain.getGenomeAttribute(params, 'gtf') + WorkflowMain.initialise(workflow, params, log) /* diff --git a/nextflow.config b/nextflow.config index f4bfc50a..e6d6bef8 100644 --- a/nextflow.config +++ b/nextflow.config @@ -18,8 +18,7 @@ params { protocol = '10XV3' // reference files - genome_fasta = null - gtf = null + genome = null transcript_fasta = null // salmon alevin parameters diff --git a/nextflow_schema.json b/nextflow_schema.json index 98f5421f..783ecf98 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -81,7 +81,7 @@ "fa_icon": "fas fa-book", "help_text": "If using a reference genome configured in the pipeline using iGenomes, use this parameter to give the ID for the reference. This is then used to build the full paths for all required reference genome files e.g. `--genome GRCh38`. \n\nSee the [nf-core website docs](https://nf-co.re/usage/reference_genomes) for more details." }, - "genome_fasta": { + "fasta": { "type": "string", "format": "file-path", "mimetype": "text/plain", diff --git a/subworkflows/local/alevin.nf b/subworkflows/local/alevin.nf index c1b122e1..7ec0c57e 100644 --- a/subworkflows/local/alevin.nf +++ b/subworkflows/local/alevin.nf @@ -28,7 +28,7 @@ workflow SCRNASEQ_ALEVIN { ch_versions = Channel.empty() assert salmon_index || (genome_fasta && gtf) || (genome_fasta && transcript_fasta): - """Must provide a genome fasta file ('--genome_fasta') and a gtf file ('--gtf'), or a genome fasta file + """Must provide a genome fasta file ('--fasta') and a gtf file ('--gtf'), or a genome fasta file and a transcriptome fasta file ('--transcript_fasta`) if no index is given!""".stripIndent() assert txp2gene || gtf: diff --git a/subworkflows/local/align_cellranger.nf b/subworkflows/local/align_cellranger.nf index 744215e0..99e89656 100644 --- a/subworkflows/local/align_cellranger.nf +++ b/subworkflows/local/align_cellranger.nf @@ -19,7 +19,7 @@ workflow CELLRANGER_ALIGN { ch_versions = Channel.empty() assert cellranger_index || (fasta && gtf): - "Must provide either a cellranger index or both a fasta file ('--genome_fasta') and a gtf file ('--gtf')." + "Must provide either a cellranger index or both a fasta file ('--fasta') and a gtf file ('--gtf')." if (!cellranger_index) { // Filter GTF based on gene biotypes passed in params.modules diff --git a/subworkflows/local/kallisto_bustools.nf b/subworkflows/local/kallisto_bustools.nf index bc958b47..356378c3 100644 --- a/subworkflows/local/kallisto_bustools.nf +++ b/subworkflows/local/kallisto_bustools.nf @@ -23,7 +23,7 @@ workflow KALLISTO_BUSTOOLS { ch_versions = Channel.empty() assert kallisto_index || (genome_fasta && gtf): - "Must provide a genome fasta file ('--genome_fasta') and a gtf file ('--gtf') if no index is given!" + "Must provide a genome fasta file ('--fasta') and a gtf file ('--gtf') if no index is given!" assert txp2gene || gtf: "Must provide either a GTF file ('--gtf') or kallisto gene map ('--kallisto_gene_map') to align with kallisto bustools!" diff --git a/subworkflows/local/starsolo.nf b/subworkflows/local/starsolo.nf index 2c2f57dd..94b4c976 100644 --- a/subworkflows/local/starsolo.nf +++ b/subworkflows/local/starsolo.nf @@ -22,7 +22,7 @@ workflow STARSOLO { ch_versions = Channel.empty() assert star_index || (genome_fasta && gtf): - "Must provide a genome fasta file ('--genome_fasta') and a gtf file ('--gtf') if no index is given!" + "Must provide a genome fasta file ('--fasta') and a gtf file ('--gtf') if no index is given!" assert gtf: "Must provide a gtf file ('--gtf') for STARSOLO" diff --git a/workflows/scrnaseq.nf b/workflows/scrnaseq.nf index 3e557eb1..3e2e5fe8 100644 --- a/workflows/scrnaseq.nf +++ b/workflows/scrnaseq.nf @@ -10,7 +10,7 @@ def summary_params = NfcoreSchema.paramsSummaryMap(workflow, params) WorkflowScrnaseq.initialise(params, log) def checkPathParamList = [ - params.input, params.multiqc_config, params.genome_fasta, params.gtf, + params.input, params.multiqc_config, params.fasta, params.gtf, params.transcript_fasta, params.salmon_index, params.kallisto_index, params.star_index, params.txp2gene, params.barcode_whitelist, params.cellranger_index ] @@ -69,7 +69,7 @@ ch_output_docs_images = file("$projectDir/docs/images/", checkIfExists: true) // general input and params ch_input = file(params.input) -ch_genome_fasta = params.genome_fasta ? file(params.genome_fasta) : [] +ch_genome_fasta = params.fasta ? file(params.fasta) : [] ch_gtf = params.gtf ? file(params.gtf) : [] ch_transcript_fasta = params.transcript_fasta ? file(params.transcript_fasta): [] ch_txp2gene = params.txp2gene ? file(params.txp2gene) : [] From 4077c970222f6ab8e484b65d6ace4a12c671bc50 Mon Sep 17 00:00:00 2001 From: Rob Syme Date: Fri, 30 Sep 2022 03:06:16 +0000 Subject: [PATCH 118/165] Typo and linting fix for schema. --- nextflow_schema.json | 38 ++++++++++++++++++++++++++++++++------ 1 file changed, 32 insertions(+), 6 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 783ecf98..88f1efa1 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -10,7 +10,10 @@ "type": "object", "fa_icon": "fas fa-terminal", "description": "Define where the pipeline should find input data and save output data.", - "required": ["input", "outdir"], + "required": [ + "input", + "outdir" + ], "properties": { "input": { "type": "string", @@ -57,14 +60,24 @@ "default": "alevin", "help_text": "The workflow can handle three types of methods:\n\n- Kallisto/Bustools\n- Salmon Alevin + AlevinQC\n- STARsolo\n\nTo choose which one to use, please specify either `alevin`, `star` or `kallisto` as a parameter option for `--aligner`. By default, the pipeline runs the `alevin` option. Note that specifying another aligner option also requires choosing appropriate parameters (see below) for the selected option.", "fa_icon": "fas fa-align-center", - "enum": ["kallisto", "star", "alevin", "cellranger"] + "enum": [ + "kallisto", + "star", + "alevin", + "cellranger" + ] }, "protocol": { "type": "string", "description": "The protocol that was used to generate the single cell data, e.g. 10XV2 (default).", "default": "10XV2", "fa_icon": "fas fa-cogs", - "enum": ["10XV3", "10XV2", "10XV1", "dropseq"] + "enum": [ + "10XV3", + "10XV2", + "10XV1", + "dropseq" + ] } }, "fa_icon": "fas fa-terminal" @@ -107,7 +120,7 @@ }, "transcript_fasta": { "type": "string", - "description": "A cDNA FASTQ file", + "description": "A cDNA FASTA file", "fa_icon": "fas fa-dna" }, "gtf": { @@ -192,7 +205,13 @@ "default": "standard", "description": "Type of workflow. Use `lamanno` for RNA velocity based on La Manno et al. 2018 logic. Use `nucleus` for RNA velocity on single-nucleus RNA-seq reads. Use `kite` for feature barcoding. Use `kite: 10xFB` for 10x Genomics Feature Barcoding technology. (default: standard)", "fa_icon": "fas fa-fish", - "enum": ["standard", "lamanno", "nucleus", "kite", "kite: 10xFB"] + "enum": [ + "standard", + "lamanno", + "nucleus", + "kite", + "kite: 10xFB" + ] } } }, @@ -310,7 +329,14 @@ "description": "Method used to save pipeline results to output directory.", "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", "fa_icon": "fas fa-copy", - "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], + "enum": [ + "symlink", + "rellink", + "link", + "copy", + "copyNoFollow", + "move" + ], "hidden": true }, "email_on_fail": { From 711a8051207eda0298e97862e5430e60aac71de4 Mon Sep 17 00:00:00 2001 From: Rob Syme Date: Fri, 30 Sep 2022 03:11:12 +0000 Subject: [PATCH 119/165] Set seq_center param default for STARsolo aligner. --- nextflow.config | 1 + 1 file changed, 1 insertion(+) diff --git a/nextflow.config b/nextflow.config index e6d6bef8..2761e80f 100644 --- a/nextflow.config +++ b/nextflow.config @@ -35,6 +35,7 @@ params { // STARsolo parameters star_index = null star_ignore_sjdbgtf = null + seq_center = null // Cellranger parameters cellranger_index = null From adb9cf0e03b748df09378e3437e1d5164f5c899f Mon Sep 17 00:00:00 2001 From: Rob Syme Date: Fri, 30 Sep 2022 03:17:50 +0000 Subject: [PATCH 120/165] Whitespace fix --- lib/WorkflowScrnaseq.groovy | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/WorkflowScrnaseq.groovy b/lib/WorkflowScrnaseq.groovy index 25c2b120..73998b0c 100755 --- a/lib/WorkflowScrnaseq.groovy +++ b/lib/WorkflowScrnaseq.groovy @@ -14,7 +14,7 @@ class WorkflowScrnaseq { log.error "Please provide an input samplesheet with --input" System.exit(1) } - + if (!params.fasta) { log.error "Genome fasta file not specified with e.g. '--fasta genome.fa' or via a detectable config file." System.exit(1) From 4a200466b858bd2655beed0d6f80920cc0c895e0 Mon Sep 17 00:00:00 2001 From: Rob Syme Date: Fri, 30 Sep 2022 03:35:13 +0000 Subject: [PATCH 121/165] Include seq_center in schema --- nextflow_schema.json | 43 +++++++++++-------------------------------- 1 file changed, 11 insertions(+), 32 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 88f1efa1..963abc19 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -2,7 +2,7 @@ "$schema": "http://json-schema.org/draft-07/schema", "$id": "https://raw.githubusercontent.com/nf-core/scrnaseq/master/nextflow_schema.json", "title": "nf-core/scrnaseq pipeline parameters", - "description": "Pipeline for processing of 10xGenomics single cell rnaseq data", + "description": "Pipeline for processing 10x Genomics single cell rnaseq data", "type": "object", "definitions": { "input_output_options": { @@ -10,10 +10,7 @@ "type": "object", "fa_icon": "fas fa-terminal", "description": "Define where the pipeline should find input data and save output data.", - "required": [ - "input", - "outdir" - ], + "required": ["input", "outdir"], "properties": { "input": { "type": "string", @@ -60,24 +57,14 @@ "default": "alevin", "help_text": "The workflow can handle three types of methods:\n\n- Kallisto/Bustools\n- Salmon Alevin + AlevinQC\n- STARsolo\n\nTo choose which one to use, please specify either `alevin`, `star` or `kallisto` as a parameter option for `--aligner`. By default, the pipeline runs the `alevin` option. Note that specifying another aligner option also requires choosing appropriate parameters (see below) for the selected option.", "fa_icon": "fas fa-align-center", - "enum": [ - "kallisto", - "star", - "alevin", - "cellranger" - ] + "enum": ["kallisto", "star", "alevin", "cellranger"] }, "protocol": { "type": "string", "description": "The protocol that was used to generate the single cell data, e.g. 10XV2 (default).", "default": "10XV2", "fa_icon": "fas fa-cogs", - "enum": [ - "10XV3", - "10XV2", - "10XV1", - "dropseq" - ] + "enum": ["10XV3", "10XV2", "10XV1", "dropseq"] } }, "fa_icon": "fas fa-terminal" @@ -169,6 +156,11 @@ "star_ignore_sjdbgtf": { "type": "string", "description": "Ignore the SJDB GTF file." + }, + "seq_center": { + "type": "string", + "description": "Name of sequencing center for BAM read group tag.", + "default": null } }, "fa_icon": "fas fa-star" @@ -205,13 +197,7 @@ "default": "standard", "description": "Type of workflow. Use `lamanno` for RNA velocity based on La Manno et al. 2018 logic. Use `nucleus` for RNA velocity on single-nucleus RNA-seq reads. Use `kite` for feature barcoding. Use `kite: 10xFB` for 10x Genomics Feature Barcoding technology. (default: standard)", "fa_icon": "fas fa-fish", - "enum": [ - "standard", - "lamanno", - "nucleus", - "kite", - "kite: 10xFB" - ] + "enum": ["standard", "lamanno", "nucleus", "kite", "kite: 10xFB"] } } }, @@ -329,14 +315,7 @@ "description": "Method used to save pipeline results to output directory.", "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", "fa_icon": "fas fa-copy", - "enum": [ - "symlink", - "rellink", - "link", - "copy", - "copyNoFollow", - "move" - ], + "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], "hidden": true }, "email_on_fail": { From f2d4c9383a25775f62aff8c33f8cd284867ecc57 Mon Sep 17 00:00:00 2001 From: Rob Syme Date: Fri, 30 Sep 2022 03:37:40 +0000 Subject: [PATCH 122/165] Python black linting suggestions --- bin/check_samplesheet.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py index 77a22f17..38db2319 100755 --- a/bin/check_samplesheet.py +++ b/bin/check_samplesheet.py @@ -142,11 +142,9 @@ def read_head(handle, num_lines=10): def print_error(error, context="Line", context_str=""): - error_str = "ERROR: Please check samplesheet -> {}".format(error) + error_str = f"ERROR: Please check samplesheet -> {error}" if context != "" and context_str != "": - error_str = "ERROR: Please check samplesheet -> {}\n{}: '{}'".format( - error, context.strip(), context_str.strip() - ) + error_str = f"ERROR: Please check samplesheet -> {error}\n{context.strip()}: '{context_str.strip()}'" print(error_str) sys.exit(1) From b1640220cc4aadd1c894e611555a725dd0ce7734 Mon Sep 17 00:00:00 2001 From: Felipe Marques de Almeida Date: Fri, 30 Sep 2022 14:53:34 +0000 Subject: [PATCH 123/165] fixing simple_af results --- conf/modules.config | 2 +- modules/local/alevinqc.nf | 2 +- modules/local/simpleaf_quant.nf | 3 ++- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 66584811..2b05f1e7 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -82,7 +82,7 @@ if (params.aligner == "alevin") { path: { "${params.outdir}/${params.aligner}" }, mode: params.publish_dir_mode ] - ext.args = "-r cr-like -d fw" + ext.args = "-r cr-like" } } } diff --git a/modules/local/alevinqc.nf b/modules/local/alevinqc.nf index 4832a2d6..4e1a7d7e 100644 --- a/modules/local/alevinqc.nf +++ b/modules/local/alevinqc.nf @@ -12,7 +12,7 @@ process ALEVINQC { output: tuple val(meta), path("alevin_report_${meta.id}.html"), emit: report - path "versions.yml" , emit: versions + path "versions.yml", emit: versions script: def prefix = task.ext.prefix ?: "${meta.id}" diff --git a/modules/local/simpleaf_quant.nf b/modules/local/simpleaf_quant.nf index 827e8991..fb485df8 100644 --- a/modules/local/simpleaf_quant.nf +++ b/modules/local/simpleaf_quant.nf @@ -47,8 +47,9 @@ process SIMPLEAF_QUANT { -c $protocol \\ -u whitelist.txt \\ $args - + mv whitelist.txt ${prefix}_alevin_results/ + cp ${prefix}_alevin_results/af_quant/permit_freq.bin ${prefix}_alevin_results/af_quant/all_freq.bin cat <<-END_VERSIONS > versions.yml "${task.process}": From 001f456c24b1588f651e14823600c6efc323b0df Mon Sep 17 00:00:00 2001 From: Felipe Marques de Almeida Date: Fri, 30 Sep 2022 15:00:24 +0000 Subject: [PATCH 124/165] remove wrong file --- test/null/pipeline_info/execution_trace_2022-09-26_07-01-32.txt | 1 - 1 file changed, 1 deletion(-) delete mode 100644 test/null/pipeline_info/execution_trace_2022-09-26_07-01-32.txt diff --git a/test/null/pipeline_info/execution_trace_2022-09-26_07-01-32.txt b/test/null/pipeline_info/execution_trace_2022-09-26_07-01-32.txt deleted file mode 100644 index 6b739acd..00000000 --- a/test/null/pipeline_info/execution_trace_2022-09-26_07-01-32.txt +++ /dev/null @@ -1 +0,0 @@ -task_id hash native_id name status exit submit duration realtime %cpu peak_rss peak_vmem rchar wchar From 69074dc6c58fa4b7d86e5da08a10ba628ea0fffe Mon Sep 17 00:00:00 2001 From: Felipe Marques de Almeida Date: Fri, 30 Sep 2022 15:58:31 +0000 Subject: [PATCH 125/165] detect the use of incompatible parameters --- conf/modules.config | 2 +- modules/local/simpleaf_quant.nf | 23 ++++++++++++++++++----- 2 files changed, 19 insertions(+), 6 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 2b05f1e7..9b3a7887 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -82,7 +82,7 @@ if (params.aligner == "alevin") { path: { "${params.outdir}/${params.aligner}" }, mode: params.publish_dir_mode ] - ext.args = "-r cr-like" + ext.args = "-r cr-like -k" } } } diff --git a/modules/local/simpleaf_quant.nf b/modules/local/simpleaf_quant.nf index fb485df8..5cd3a8a4 100644 --- a/modules/local/simpleaf_quant.nf +++ b/modules/local/simpleaf_quant.nf @@ -23,8 +23,21 @@ process SIMPLEAF_QUANT { path "versions.yml" , emit: versions script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" + def args = task.ext.args ?: '' + def args_list = args.tokenize() + def prefix = task.ext.prefix ?: "${meta.id}" + + // + // check if users are using one of the mutually excludable parameters: + // e.g -k,--knee | -e,--expect-cells | -f, --forced-cells + // + if (args_list.any { it in ['-k', '--knee', '-e', '--expect-cells', '-f', '--forced-cells']}) { + unfiltered_command = "" + save_whitelist = "" + } else { + unfiltered_command = "-u whitelist.txt" + save_whitelist = "mv whitelist.txt ${prefix}_alevin_results/" + } // separate forward from reverse pairs def (forward, reverse) = reads.collate(2).transpose() @@ -45,11 +58,11 @@ process SIMPLEAF_QUANT { -m $txp2gene \\ -t $task.cpus \\ -c $protocol \\ - -u whitelist.txt \\ + $unfiltered_command \\ $args - mv whitelist.txt ${prefix}_alevin_results/ - cp ${prefix}_alevin_results/af_quant/permit_freq.bin ${prefix}_alevin_results/af_quant/all_freq.bin + $save_whitelist + [[ ! -f ${prefix}_alevin_results/af_quant/all_freq.bin ]] && cp ${prefix}_alevin_results/af_quant/permit_freq.bin ${prefix}_alevin_results/af_quant/all_freq.bin cat <<-END_VERSIONS > versions.yml "${task.process}": From f3b072f5f8fb8b7dffe81b13218b67d47aaa5ca9 Mon Sep 17 00:00:00 2001 From: Felipe Marques de Almeida Date: Fri, 30 Sep 2022 15:58:42 +0000 Subject: [PATCH 126/165] remove hard-coded version --- modules/local/simpleaf_index.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/local/simpleaf_index.nf b/modules/local/simpleaf_index.nf index 2d3d7cb2..c81c5a51 100644 --- a/modules/local/simpleaf_index.nf +++ b/modules/local/simpleaf_index.nf @@ -39,10 +39,10 @@ process SIMPLEAF_INDEX { $seq_inputs \\ $args \\ -o salmon - + cat <<-END_VERSIONS > versions.yml "${task.process}": - simpleaf: 0.4.0 + simpleaf: TODO: find a way to grab version END_VERSIONS """ } From 3766144ca37c3628d3f780fd9d26d7cacc535c51 Mon Sep 17 00:00:00 2001 From: Felipe Marques de Almeida Date: Fri, 30 Sep 2022 16:05:18 +0000 Subject: [PATCH 127/165] turning back to default option --- conf/modules.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/modules.config b/conf/modules.config index 9b3a7887..2b05f1e7 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -82,7 +82,7 @@ if (params.aligner == "alevin") { path: { "${params.outdir}/${params.aligner}" }, mode: params.publish_dir_mode ] - ext.args = "-r cr-like -k" + ext.args = "-r cr-like" } } } From b5de9008314216265cea479a4114416d76d66d35 Mon Sep 17 00:00:00 2001 From: Felipe Marques de Almeida Date: Fri, 30 Sep 2022 16:11:44 +0000 Subject: [PATCH 128/165] add suffix to uncompressed whitelist --- modules/local/simpleaf_quant.nf | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/local/simpleaf_quant.nf b/modules/local/simpleaf_quant.nf index 5cd3a8a4..a95b6e68 100644 --- a/modules/local/simpleaf_quant.nf +++ b/modules/local/simpleaf_quant.nf @@ -35,8 +35,8 @@ process SIMPLEAF_QUANT { unfiltered_command = "" save_whitelist = "" } else { - unfiltered_command = "-u whitelist.txt" - save_whitelist = "mv whitelist.txt ${prefix}_alevin_results/" + unfiltered_command = "-u whitelist.uncompressed.txt" + save_whitelist = "mv whitelist.uncompressed.txt ${prefix}_alevin_results/" } // separate forward from reverse pairs @@ -49,7 +49,7 @@ process SIMPLEAF_QUANT { simpleaf set-paths # run simpleaf quant - gzip -dcf $whitelist > whitelist.txt + gzip -dcf $whitelist > whitelist.uncompressed.txt simpleaf quant \\ -1 ${forward.join( "," )} \\ -2 ${reverse.join( "," )} \\ From 16ce654b2b6e4fbf25ec4b248a86c92d3300466c Mon Sep 17 00:00:00 2001 From: Felipe Marques de Almeida Date: Fri, 30 Sep 2022 16:24:45 +0000 Subject: [PATCH 129/165] adding simpleaf version as variable and getting salmon version --- modules/local/simpleaf_index.nf | 5 ++++- modules/local/simpleaf_quant.nf | 3 +++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/modules/local/simpleaf_index.nf b/modules/local/simpleaf_index.nf index c81c5a51..f1837a1d 100644 --- a/modules/local/simpleaf_index.nf +++ b/modules/local/simpleaf_index.nf @@ -24,6 +24,8 @@ process SIMPLEAF_INDEX { script: def args = task.ext.args ?: '' def seq_inputs = (params.transcript_fasta) ? "--refseq $transcript_fasta" : "--gtf $transcript_gtf" + // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions. + def VERSION = '0.5.1' """ # export required var export ALEVIN_FRY_HOME=. @@ -42,7 +44,8 @@ process SIMPLEAF_INDEX { cat <<-END_VERSIONS > versions.yml "${task.process}": - simpleaf: TODO: find a way to grab version + simpleaf: $VERSION + salmon: \$(salmon --version | sed -e "s/salmon //g") END_VERSIONS """ } diff --git a/modules/local/simpleaf_quant.nf b/modules/local/simpleaf_quant.nf index a95b6e68..8385a803 100644 --- a/modules/local/simpleaf_quant.nf +++ b/modules/local/simpleaf_quant.nf @@ -26,6 +26,8 @@ process SIMPLEAF_QUANT { def args = task.ext.args ?: '' def args_list = args.tokenize() def prefix = task.ext.prefix ?: "${meta.id}" + // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions. + def VERSION = '0.5.1' // // check if users are using one of the mutually excludable parameters: @@ -66,6 +68,7 @@ process SIMPLEAF_QUANT { cat <<-END_VERSIONS > versions.yml "${task.process}": + simpleaf: $VERSION salmon: \$(salmon --version | sed -e "s/salmon //g") END_VERSIONS """ From 518ee5d854f430336e2c7e6a56f848413a5cbcd9 Mon Sep 17 00:00:00 2001 From: Felipe Marques de Almeida Date: Sat, 1 Oct 2022 20:44:49 +0000 Subject: [PATCH 130/165] bump simpleaf version --- modules/local/simpleaf_index.nf | 8 +++----- modules/local/simpleaf_quant.nf | 8 +++----- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/modules/local/simpleaf_index.nf b/modules/local/simpleaf_index.nf index f1837a1d..bf31285f 100644 --- a/modules/local/simpleaf_index.nf +++ b/modules/local/simpleaf_index.nf @@ -4,8 +4,8 @@ process SIMPLEAF_INDEX { conda (params.enable_conda ? 'bioconda::simpleaf=0.5.1' : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/simpleaf:0.5.1--h9f5acd7_0' : - 'quay.io/biocontainers/simpleaf:0.5.1--h9f5acd7_0' }" + 'https://depot.galaxyproject.org/singularity/simpleaf:0.5.2--h9f5acd7_0' : + 'quay.io/biocontainers/simpleaf:0.5.2--h9f5acd7_0' }" input: path genome_fasta @@ -24,8 +24,6 @@ process SIMPLEAF_INDEX { script: def args = task.ext.args ?: '' def seq_inputs = (params.transcript_fasta) ? "--refseq $transcript_fasta" : "--gtf $transcript_gtf" - // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions. - def VERSION = '0.5.1' """ # export required var export ALEVIN_FRY_HOME=. @@ -44,7 +42,7 @@ process SIMPLEAF_INDEX { cat <<-END_VERSIONS > versions.yml "${task.process}": - simpleaf: $VERSION + simpleaf: \$(simpleaf -V | tr -d '\n') salmon: \$(salmon --version | sed -e "s/salmon //g") END_VERSIONS """ diff --git a/modules/local/simpleaf_quant.nf b/modules/local/simpleaf_quant.nf index 8385a803..e2e47ccd 100644 --- a/modules/local/simpleaf_quant.nf +++ b/modules/local/simpleaf_quant.nf @@ -4,8 +4,8 @@ process SIMPLEAF_QUANT { conda (params.enable_conda ? 'bioconda::simpleaf=0.5.1' : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/simpleaf:0.5.1--h9f5acd7_0' : - 'quay.io/biocontainers/simpleaf:0.5.1--h9f5acd7_0' }" + 'https://depot.galaxyproject.org/singularity/simpleaf:0.5.2--h9f5acd7_0' : + 'quay.io/biocontainers/simpleaf:0.5.2--h9f5acd7_0' }" input: // @@ -26,8 +26,6 @@ process SIMPLEAF_QUANT { def args = task.ext.args ?: '' def args_list = args.tokenize() def prefix = task.ext.prefix ?: "${meta.id}" - // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions. - def VERSION = '0.5.1' // // check if users are using one of the mutually excludable parameters: @@ -68,7 +66,7 @@ process SIMPLEAF_QUANT { cat <<-END_VERSIONS > versions.yml "${task.process}": - simpleaf: $VERSION + simpleaf: \$(simpleaf -V | tr -d '\n') salmon: \$(salmon --version | sed -e "s/salmon //g") END_VERSIONS """ From 908370682fe13d142e57c4465980bfe04f340207 Mon Sep 17 00:00:00 2001 From: Felipe Marques de Almeida Date: Sat, 1 Oct 2022 20:57:11 +0000 Subject: [PATCH 131/165] fix simpleaf get version command --- modules/local/simpleaf_index.nf | 2 +- modules/local/simpleaf_quant.nf | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/local/simpleaf_index.nf b/modules/local/simpleaf_index.nf index bf31285f..5bf54e7d 100644 --- a/modules/local/simpleaf_index.nf +++ b/modules/local/simpleaf_index.nf @@ -42,7 +42,7 @@ process SIMPLEAF_INDEX { cat <<-END_VERSIONS > versions.yml "${task.process}": - simpleaf: \$(simpleaf -V | tr -d '\n') + simpleaf: \$(simpleaf -V | tr -d '\\n' | cut -d ' ' -f 2) salmon: \$(salmon --version | sed -e "s/salmon //g") END_VERSIONS """ diff --git a/modules/local/simpleaf_quant.nf b/modules/local/simpleaf_quant.nf index e2e47ccd..8f7b91c6 100644 --- a/modules/local/simpleaf_quant.nf +++ b/modules/local/simpleaf_quant.nf @@ -66,7 +66,7 @@ process SIMPLEAF_QUANT { cat <<-END_VERSIONS > versions.yml "${task.process}": - simpleaf: \$(simpleaf -V | tr -d '\n') + simpleaf: \$(simpleaf -V | tr -d '\\n' | cut -d ' ' -f 2) salmon: \$(salmon --version | sed -e "s/salmon //g") END_VERSIONS """ From db77e65e4b572e733a97f599cbbbe9b08344b575 Mon Sep 17 00:00:00 2001 From: Rob Syme Date: Sun, 2 Oct 2022 01:07:18 +0000 Subject: [PATCH 132/165] Cleanup --- lib/WorkflowMain.groovy | 4 ---- subworkflows/local/align_cellranger.nf | 1 - 2 files changed, 5 deletions(-) diff --git a/lib/WorkflowMain.groovy b/lib/WorkflowMain.groovy index 627cb63c..9c3250c6 100755 --- a/lib/WorkflowMain.groovy +++ b/lib/WorkflowMain.groovy @@ -85,11 +85,7 @@ class WorkflowMain { if (params.genomes && params.genome && params.genomes.containsKey(params.genome)) { if (params.genomes[ params.genome ].containsKey(attribute)) { return params.genomes[ params.genome ][ attribute ] - } else { - println "Could not find attribute '$attribute'" } - } else { - println "Could not find genome" } return null } diff --git a/subworkflows/local/align_cellranger.nf b/subworkflows/local/align_cellranger.nf index 99e89656..ff994b6c 100644 --- a/subworkflows/local/align_cellranger.nf +++ b/subworkflows/local/align_cellranger.nf @@ -26,7 +26,6 @@ workflow CELLRANGER_ALIGN { CELLRANGER_MKGTF( gtf ) ch_versions = ch_versions.mix(CELLRANGER_MKGTF.out.versions) - CELLRANGER_MKGTF.out.gtf.view() // Make reference genome CELLRANGER_MKREF( fasta, CELLRANGER_MKGTF.out.gtf, "cellranger_reference" ) ch_versions = ch_versions.mix(CELLRANGER_MKREF.out.versions) From 2a113742c9958595aa17cb969f90410d4090f3b0 Mon Sep 17 00:00:00 2001 From: Rob Syme Date: Sun, 2 Oct 2022 01:09:19 +0000 Subject: [PATCH 133/165] No longer allow gzipped fasta from SALMON_INDEX Previously, I introduced some unnecessary complextity to the SALMON_INDEX process to automatically decompress the genome fasta file. This is unnecessary because the pipeline has a strict requirment for uncompressed fasta elsewhere in the pipeline. --- modules/nf-core/modules/salmon/index/main.nf | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/modules/nf-core/modules/salmon/index/main.nf b/modules/nf-core/modules/salmon/index/main.nf index d875a345..87005fdd 100644 --- a/modules/nf-core/modules/salmon/index/main.nf +++ b/modules/nf-core/modules/salmon/index/main.nf @@ -22,19 +22,12 @@ process SALMON_INDEX { def args = task.ext.args ?: '' def kmer_argmatch = args =~ /\-k *(\d+)/ def k = kmer_argmatch ? kmer_argmatch[0][1] : 31 - def get_decoy_ids = "grep '^>' $genome_fasta | cut -d ' ' -f 1 > decoys.txt" - def gentrome = "gentrome.fa" - def maybe_unzip = "cat" - if (genome_fasta.endsWith('.gz')) { - get_decoy_ids = "grep '^>' <(gunzip -c $genome_fasta) | cut -d ' ' -f 1 > decoys.txt" - gentrome = "gentrome.fa.gz" - maybe_unzip = "gunzip -c" - } """ - $get_decoy_ids - sed -i.bak -e 's/>//g' decoys.txt - cat $transcript_fasta $genome_fasta \\ - | $maybe_unzip \\ + grep '^>' $genome_fasta \\ + | cut -d ' ' -f 1 \\ + | sed 's/>//g' > decoys.txt + + cat $genome_fasta \\ | awk '!/^>/ { next } { getline seq } length(seq) >= $k { print \$0 "\\n" seq }' \\ | gzip -c > gentrome.filtered.fasta.gz From d55ffd5e5f24ef062b15a1c5503dbc0b1cefa27c Mon Sep 17 00:00:00 2001 From: Rob Syme Date: Sun, 2 Oct 2022 01:19:57 +0000 Subject: [PATCH 134/165] Include attribution for gtf_for_genes script --- bin/filter_gtf_for_genes_in_genome.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/bin/filter_gtf_for_genes_in_genome.py b/bin/filter_gtf_for_genes_in_genome.py index ef4c87cd..2ec44b9f 100755 --- a/bin/filter_gtf_for_genes_in_genome.py +++ b/bin/filter_gtf_for_genes_in_genome.py @@ -1,4 +1,7 @@ #!/usr/bin/env python +# Script originally written by Pranathi Vemuri (github.com/pranathivemuri) +# modified by Harshil Patel (github.com/drpatelh) + from __future__ import print_function import logging from itertools import groupby From 10ab4d21eb9bad4e1c1a2712fff39c83463cb7c8 Mon Sep 17 00:00:00 2001 From: Rob Syme Date: Mon, 3 Oct 2022 00:46:53 +0000 Subject: [PATCH 135/165] Turn off filtered gtf publishing and add tower.yml --- conf/modules.config | 8 ++++++++ tower.yml | 11 +++++++++++ 2 files changed, 19 insertions(+) create mode 100644 tower.yml diff --git a/conf/modules.config b/conf/modules.config index e9992e7e..0ff786c2 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -37,6 +37,14 @@ process { mode: params.publish_dir_mode ] } + withName: 'GTF_GENE_FILTER' { + publishDir = [ + path: { "${params.outdir}/gtf_filter" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + enabled: false + ] + } } if(params.aligner == "cellranger") { diff --git a/tower.yml b/tower.yml new file mode 100644 index 00000000..999e82d3 --- /dev/null +++ b/tower.yml @@ -0,0 +1,11 @@ +reports: + multiqc_report.html: + display: "MultiQC HTML report" + "**/fastqc/*_fastqc.html": + display: "FastQC HTML report" + "**/pipeline_info/execution_timeline_*.html": + display: "Execution timeline report" + "**/pipeline_info/execution_report_*.html": + display: "Execution overview report" + "**/star/**/*.Log.final.out": + display: "Star per-sample report" From 5035d9e27d70cd8524f4eec73eed9507bdddb014 Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Mon, 3 Oct 2022 09:38:15 +0100 Subject: [PATCH 136/165] Update check_samplesheet.py --- bin/check_samplesheet.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py index 38db2319..51f9be10 100755 --- a/bin/check_samplesheet.py +++ b/bin/check_samplesheet.py @@ -228,7 +228,9 @@ def check_samplesheet(file_in, file_out): num_cols = len([x for x in lspl if x]) if num_cols < MIN_COLS: print_error( - "Invalid number of populated columns (minimum = {})!".format(MIN_COLS), + "Invalid number of populated columns (minimum = {})!".format( + MIN_COLS + ), "Line", line, ) @@ -277,8 +279,14 @@ def check_samplesheet(file_in, file_out): for sample in sorted(sample_mapping_dict.keys()): ## Check that multiple runs of the same sample are of the same datatype - if not all(x[0] == sample_mapping_dict[sample][0][0] for x in sample_mapping_dict[sample]): - print_error("Multiple runs of a sample must be of the same datatype!", "Sample: {}".format(sample)) + if not all( + x[0] == sample_mapping_dict[sample][0][0] + for x in sample_mapping_dict[sample] + ): + print_error( + "Multiple runs of a sample must be of the same datatype!", + "Sample: {}".format(sample), + ) for idx, val in enumerate(sample_mapping_dict[sample]): fout.write(",".join(["{}".format(sample)] + val) + "\n") From 8bc49f5cac68e617e7ecc791f16f6b377f63931d Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Mon, 3 Oct 2022 09:40:39 +0100 Subject: [PATCH 137/165] How the hell do you run BLACK??!! --- bin/check_samplesheet.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py index 51f9be10..7e2d7e91 100755 --- a/bin/check_samplesheet.py +++ b/bin/check_samplesheet.py @@ -228,9 +228,7 @@ def check_samplesheet(file_in, file_out): num_cols = len([x for x in lspl if x]) if num_cols < MIN_COLS: print_error( - "Invalid number of populated columns (minimum = {})!".format( - MIN_COLS - ), + "Invalid number of populated columns (minimum = {})!".format(MIN_COLS), "Line", line, ) @@ -279,10 +277,7 @@ def check_samplesheet(file_in, file_out): for sample in sorted(sample_mapping_dict.keys()): ## Check that multiple runs of the same sample are of the same datatype - if not all( - x[0] == sample_mapping_dict[sample][0][0] - for x in sample_mapping_dict[sample] - ): + if not all(x[0] == sample_mapping_dict[sample][0][0] for x in sample_mapping_dict[sample]): print_error( "Multiple runs of a sample must be of the same datatype!", "Sample: {}".format(sample), From b0f7979f7769a6778aaf6f998a6998fc0b8e8977 Mon Sep 17 00:00:00 2001 From: nf-core-bot Date: Tue, 4 Oct 2022 22:05:05 +0000 Subject: [PATCH 138/165] Template update for nf-core/tools version 2.6 --- .github/workflows/awsfulltest.yml | 4 ++ .github/workflows/awstest.yml | 4 ++ .prettierignore | 1 + CITATION.cff | 8 +-- LICENSE | 2 +- README.md | 4 +- assets/adaptivecard.json | 67 +++++++++++++++++++ assets/email_template.html | 2 +- assets/methods_description_template.yml | 25 +++++++ assets/multiqc_config.yml | 6 +- docs/usage.md | 8 +++ lib/NfcoreTemplate.groovy | 55 +++++++++++++++ lib/Utils.groovy | 21 ++++-- lib/WorkflowScrnaseq.groovy | 19 ++++++ main.nf | 3 +- modules.json | 27 ++++---- .../custom/dumpsoftwareversions/main.nf | 8 +-- .../custom/dumpsoftwareversions/meta.yml | 0 .../templates/dumpsoftwareversions.py | 0 modules/nf-core/{modules => }/fastqc/main.nf | 12 ++++ modules/nf-core/{modules => }/fastqc/meta.yml | 0 modules/nf-core/modules/multiqc/main.nf | 31 --------- modules/nf-core/multiqc/main.nf | 53 +++++++++++++++ .../nf-core/{modules => }/multiqc/meta.yml | 15 +++++ nextflow.config | 9 ++- nextflow_schema.json | 20 +++++- workflows/scrnaseq.nf | 26 ++++--- 27 files changed, 352 insertions(+), 78 deletions(-) create mode 100644 assets/adaptivecard.json create mode 100644 assets/methods_description_template.yml mode change 100755 => 100644 lib/Utils.groovy rename modules/nf-core/{modules => }/custom/dumpsoftwareversions/main.nf (79%) rename modules/nf-core/{modules => }/custom/dumpsoftwareversions/meta.yml (100%) rename modules/nf-core/{modules => }/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py (100%) rename modules/nf-core/{modules => }/fastqc/main.nf (85%) rename modules/nf-core/{modules => }/fastqc/meta.yml (100%) delete mode 100644 modules/nf-core/modules/multiqc/main.nf create mode 100644 modules/nf-core/multiqc/main.nf rename modules/nf-core/{modules => }/multiqc/meta.yml (73%) diff --git a/.github/workflows/awsfulltest.yml b/.github/workflows/awsfulltest.yml index 16970f5e..3aa2c0c7 100644 --- a/.github/workflows/awsfulltest.yml +++ b/.github/workflows/awsfulltest.yml @@ -28,3 +28,7 @@ jobs: "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/scrnaseq/results-${{ github.sha }}" } profiles: test_full,aws_tower + - uses: actions/upload-artifact@v3 + with: + name: Tower debug log file + path: tower_action_*.log diff --git a/.github/workflows/awstest.yml b/.github/workflows/awstest.yml index 5fa5d0d0..562e0983 100644 --- a/.github/workflows/awstest.yml +++ b/.github/workflows/awstest.yml @@ -23,3 +23,7 @@ jobs: "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/scrnaseq/results-test-${{ github.sha }}" } profiles: test,aws_tower + - uses: actions/upload-artifact@v3 + with: + name: Tower debug log file + path: tower_action_*.log diff --git a/.prettierignore b/.prettierignore index d0e7ae58..eb74a574 100644 --- a/.prettierignore +++ b/.prettierignore @@ -1,4 +1,5 @@ email_template.html +adaptivecard.json .nextflow* work/ data/ diff --git a/CITATION.cff b/CITATION.cff index 4533e2f2..017666c0 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -13,8 +13,8 @@ authors: given-names: Johannes - family-names: Wilm given-names: Andreas - - family-names: Ulysse Garcia - given-names: Maxime + - family-names: Garcia + given-names: Maxime Ulysse - family-names: Di Tommaso given-names: Paolo - family-names: Nahnsen @@ -39,8 +39,8 @@ prefered-citation: given-names: Johannes - family-names: Wilm given-names: Andreas - - family-names: Ulysse Garcia - given-names: Maxime + - family-names: Garcia + given-names: Maxime Ulysse - family-names: Di Tommaso given-names: Paolo - family-names: Nahnsen diff --git a/LICENSE b/LICENSE index 989b1dbc..b7fcc0cd 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) Peter J Bailey, Alexander Peltzer, Olga Botvinnik +Copyright (c) Bailey PJ, Botvinnik O, Marques de Almeida F, Peltzer A, Sturm G Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/README.md b/README.md index 804539ce..3025748a 100644 --- a/README.md +++ b/README.md @@ -14,7 +14,7 @@ -**nf-core/scrnaseq** is a bioinformatics best-practice analysis pipeline for Pipeline for processing of 10xGenomics single cell rnaseq data. +**nf-core/scrnaseq** is a bioinformatics best-practice analysis pipeline for Pipeline for processing 10x Genomics single cell rnaseq data. The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It uses Docker/Singularity containers making installation trivial and results highly reproducible. The [Nextflow DSL2](https://www.nextflow.io/docs/latest/dsl2.html) implementation of this pipeline uses one container per process which makes it much easier to maintain and update software dependencies. Where possible, these processes have been submitted to and installed from [nf-core/modules](https://github.com/nf-core/modules) in order to make them available to all nf-core pipelines, and to everyone within the Nextflow community! @@ -62,7 +62,7 @@ The nf-core/scrnaseq pipeline comes with documentation about the pipeline [usage ## Credits -nf-core/scrnaseq was originally written by Peter J Bailey, Alexander Peltzer, Olga Botvinnik. +nf-core/scrnaseq was originally written by Bailey PJ, Botvinnik O, Marques de Almeida F, Peltzer A, Sturm G. We thank the following people for their extensive assistance in the development of this pipeline: diff --git a/assets/adaptivecard.json b/assets/adaptivecard.json new file mode 100644 index 00000000..03078a14 --- /dev/null +++ b/assets/adaptivecard.json @@ -0,0 +1,67 @@ +{ + "type": "message", + "attachments": [ + { + "contentType": "application/vnd.microsoft.card.adaptive", + "contentUrl": null, + "content": { + "\$schema": "http://adaptivecards.io/schemas/adaptive-card.json", + "msteams": { + "width": "Full" + }, + "type": "AdaptiveCard", + "version": "1.2", + "body": [ + { + "type": "TextBlock", + "size": "Large", + "weight": "Bolder", + "color": "<% if (success) { %>Good<% } else { %>Attention<%} %>", + "text": "nf-core/scrnaseq v${version} - ${runName}", + "wrap": true + }, + { + "type": "TextBlock", + "spacing": "None", + "text": "Completed at ${dateComplete} (duration: ${duration})", + "isSubtle": true, + "wrap": true + }, + { + "type": "TextBlock", + "text": "<% if (success) { %>Pipeline completed successfully!<% } else { %>Pipeline completed with errors. The full error message was: ${errorReport}.<% } %>", + "wrap": true + }, + { + "type": "TextBlock", + "text": "The command used to launch the workflow was as follows:", + "wrap": true + }, + { + "type": "TextBlock", + "text": "${commandLine}", + "isSubtle": true, + "wrap": true + } + ], + "actions": [ + { + "type": "Action.ShowCard", + "title": "Pipeline Configuration", + "card": { + "type": "AdaptiveCard", + "\$schema": "http://adaptivecards.io/schemas/adaptive-card.json", + "body": [ + { + "type": "FactSet", + "facts": [<% out << summary.collect{ k,v -> "{\"title\": \"$k\", \"value\" : \"$v\"}"}.join(",\n") %> + ] + } + ] + } + } + ] + } + } + ] +} diff --git a/assets/email_template.html b/assets/email_template.html index b5c9a7b9..2ff8db51 100644 --- a/assets/email_template.html +++ b/assets/email_template.html @@ -4,7 +4,7 @@ - + nf-core/scrnaseq Pipeline Report diff --git a/assets/methods_description_template.yml b/assets/methods_description_template.yml new file mode 100644 index 00000000..1e84fd63 --- /dev/null +++ b/assets/methods_description_template.yml @@ -0,0 +1,25 @@ +id: "nf-core-scrnaseq-methods-description" +description: "Suggested text and references to use when describing pipeline usage within the methods section of a publication." +section_name: "nf-core/scrnaseq Methods Description" +section_href: "https://github.com/nf-core/scrnaseq" +plot_type: "html" +## TODO nf-core: Update the HTML below to your prefered methods description, e.g. add publication citation for this pipeline +## You inject any metadata in the Nextflow '${workflow}' object +data: | +

Methods

+

Data was processed using nf-core/scrnaseq v${workflow.manifest.version} ${doi_text} of the nf-core collection of workflows (Ewels et al., 2020).

+

The pipeline was executed with Nextflow v${workflow.nextflow.version} (Di Tommaso et al., 2017) with the following command:

+
${workflow.commandLine}
+

References

+
    +
  • Di Tommaso, P., Chatzou, M., Floden, E. W., Barja, P. P., Palumbo, E., & Notredame, C. (2017). Nextflow enables reproducible computational workflows. Nature Biotechnology, 35(4), 316-319. https://doi.org/10.1038/nbt.3820
  • +
  • Ewels, P. A., Peltzer, A., Fillinger, S., Patel, H., Alneberg, J., Wilm, A., Garcia, M. U., Di Tommaso, P., & Nahnsen, S. (2020). The nf-core framework for community-curated bioinformatics pipelines. Nature Biotechnology, 38(3), 276-278. https://doi.org/10.1038/s41587-020-0439-x
  • +
+
+
Notes:
+
    + ${nodoi_text} +
  • The command above does not include parameters contained in any configs or profiles that may have been used. Ensure the config file is also uploaded with your publication!
  • +
  • You should also cite all software used within this run. Check the "Software Versions" of this report to get version information.
  • +
+
diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index a646be0b..3679a380 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -3,9 +3,11 @@ report_comment: > analysis pipeline. For information about how to interpret these results, please see the documentation. report_section_order: - software_versions: + "nf-core-scrnaseq-methods-description": order: -1000 - "nf-core-scrnaseq-summary": + software_versions: order: -1001 + "nf-core-scrnaseq-summary": + order: -1002 export_plots: true diff --git a/docs/usage.md b/docs/usage.md index c1b85e7a..8040b626 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -237,6 +237,14 @@ See the main [Nextflow documentation](https://www.nextflow.io/docs/latest/config If you have any questions or issues please send us a message on [Slack](https://nf-co.re/join/slack) on the [`#configs` channel](https://nfcore.slack.com/channels/configs). +## Azure Resource Requests + +To be used with the `azurebatch` profile by specifying the `-profile azurebatch`. +We recommend providing a compute `params.vm_type` of `Standard_D16_v3` VMs by default but these options can be changed if required. + +Note that the choice of VM size depends on your quota and the overall workload during the analysis. +For a thorough list, please refer the [Azure Sizes for virtual machines in Azure](https://docs.microsoft.com/en-us/azure/virtual-machines/sizes). + ## Running in the background Nextflow handles job submissions and supervises the running jobs. The Nextflow process must run until the pipeline is finished. diff --git a/lib/NfcoreTemplate.groovy b/lib/NfcoreTemplate.groovy index 2fc0a9b9..27feb009 100755 --- a/lib/NfcoreTemplate.groovy +++ b/lib/NfcoreTemplate.groovy @@ -145,6 +145,61 @@ class NfcoreTemplate { output_tf.withWriter { w -> w << email_txt } } + // + // Construct and send adaptive card + // https://adaptivecards.io + // + public static void adaptivecard(workflow, params, summary_params, projectDir, log) { + def hook_url = params.hook_url + + def summary = [:] + for (group in summary_params.keySet()) { + summary << summary_params[group] + } + + def misc_fields = [:] + misc_fields['start'] = workflow.start + misc_fields['complete'] = workflow.complete + misc_fields['scriptfile'] = workflow.scriptFile + misc_fields['scriptid'] = workflow.scriptId + if (workflow.repository) misc_fields['repository'] = workflow.repository + if (workflow.commitId) misc_fields['commitid'] = workflow.commitId + if (workflow.revision) misc_fields['revision'] = workflow.revision + misc_fields['nxf_version'] = workflow.nextflow.version + misc_fields['nxf_build'] = workflow.nextflow.build + misc_fields['nxf_timestamp'] = workflow.nextflow.timestamp + + def msg_fields = [:] + msg_fields['version'] = workflow.manifest.version + msg_fields['runName'] = workflow.runName + msg_fields['success'] = workflow.success + msg_fields['dateComplete'] = workflow.complete + msg_fields['duration'] = workflow.duration + msg_fields['exitStatus'] = workflow.exitStatus + msg_fields['errorMessage'] = (workflow.errorMessage ?: 'None') + msg_fields['errorReport'] = (workflow.errorReport ?: 'None') + msg_fields['commandLine'] = workflow.commandLine + msg_fields['projectDir'] = workflow.projectDir + msg_fields['summary'] = summary << misc_fields + + // Render the JSON template + def engine = new groovy.text.GStringTemplateEngine() + def hf = new File("$projectDir/assets/adaptivecard.json") + def json_template = engine.createTemplate(hf).make(msg_fields) + def json_message = json_template.toString() + + // POST + def post = new URL(hook_url).openConnection(); + post.setRequestMethod("POST") + post.setDoOutput(true) + post.setRequestProperty("Content-Type", "application/json") + post.getOutputStream().write(json_message.getBytes("UTF-8")); + def postRC = post.getResponseCode(); + if (! postRC.equals(200)) { + log.warn(post.getErrorStream().getText()); + } + } + // // Print pipeline summary on completion // diff --git a/lib/Utils.groovy b/lib/Utils.groovy old mode 100755 new mode 100644 index 28567bd7..8d030f4e --- a/lib/Utils.groovy +++ b/lib/Utils.groovy @@ -21,19 +21,26 @@ class Utils { } // Check that all channels are present - def required_channels = ['conda-forge', 'bioconda', 'defaults'] - def conda_check_failed = !required_channels.every { ch -> ch in channels } + // This channel list is ordered by required channel priority. + def required_channels_in_order = ['conda-forge', 'bioconda', 'defaults'] + def channels_missing = ((required_channels_in_order as Set) - (channels as Set)) as Boolean // Check that they are in the right order - conda_check_failed |= !(channels.indexOf('conda-forge') < channels.indexOf('bioconda')) - conda_check_failed |= !(channels.indexOf('bioconda') < channels.indexOf('defaults')) + def channel_priority_violation = false + def n = required_channels_in_order.size() + for (int i = 0; i < n - 1; i++) { + channel_priority_violation |= !(channels.indexOf(required_channels_in_order[i]) < channels.indexOf(required_channels_in_order[i+1])) + } - if (conda_check_failed) { + if (channels_missing | channel_priority_violation) { log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + " There is a problem with your Conda configuration!\n\n" + " You will need to set-up the conda-forge and bioconda channels correctly.\n" + - " Please refer to https://bioconda.github.io/user/install.html#set-up-channels\n" + - " NB: The order of the channels matters!\n" + + " Please refer to https://bioconda.github.io/\n" + + " The observed channel order is \n" + + " ${channels}\n" + + " but the following channel order is required:\n" + + " ${required_channels_in_order}\n" + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" } } diff --git a/lib/WorkflowScrnaseq.groovy b/lib/WorkflowScrnaseq.groovy index db31a702..3efe1b86 100755 --- a/lib/WorkflowScrnaseq.groovy +++ b/lib/WorkflowScrnaseq.groovy @@ -2,6 +2,8 @@ // This file holds several functions specific to the workflow/scrnaseq.nf in the nf-core/scrnaseq pipeline // +import groovy.text.SimpleTemplateEngine + class WorkflowScrnaseq { // @@ -42,6 +44,23 @@ class WorkflowScrnaseq { yaml_file_text += "data: |\n" yaml_file_text += "${summary_section}" return yaml_file_text + } + + public static String methodsDescriptionText(run_workflow, mqc_methods_yaml) { + // Convert to a named map so can be used as with familar NXF ${workflow} variable syntax in the MultiQC YML file + def meta = [:] + meta.workflow = run_workflow.toMap() + meta["manifest_map"] = run_workflow.manifest.toMap() + + meta["doi_text"] = meta.manifest_map.doi ? "(doi: ${meta.manifest_map.doi})" : "" + meta["nodoi_text"] = meta.manifest_map.doi ? "": "
  • If available, make sure to update the text to include the Zenodo DOI of version of the pipeline used.
  • " + + def methods_text = mqc_methods_yaml.text + + def engine = new SimpleTemplateEngine() + def description_html = engine.createTemplate(methods_text).make(meta) + + return description_html }// // Exit pipeline if incorrect --genome key provided // diff --git a/main.nf b/main.nf index c2fe7ccf..e2ce158b 100644 --- a/main.nf +++ b/main.nf @@ -4,7 +4,8 @@ nf-core/scrnaseq ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Github : https://github.com/nf-core/scrnaseq -Website: https://nf-co.re/scrnaseq + + Website: https://nf-co.re/scrnaseq Slack : https://nfcore.slack.com/channels/scrnaseq ---------------------------------------------------------------------------------------- */ diff --git a/modules.json b/modules.json index a865b2cd..4465eece 100644 --- a/modules.json +++ b/modules.json @@ -2,20 +2,21 @@ "name": "nf-core/scrnaseq", "homePage": "https://github.com/nf-core/scrnaseq", "repos": { - "nf-core/modules": { - "git_url": "https://github.com/nf-core/modules.git", + "https://github.com/nf-core/modules.git": { "modules": { - "custom/dumpsoftwareversions": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d", - "branch": "master" - }, - "fastqc": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d", - "branch": "master" - }, - "multiqc": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d", - "branch": "master" + "nf-core": { + "custom/dumpsoftwareversions": { + "branch": "master", + "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + }, + "fastqc": { + "branch": "master", + "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + }, + "multiqc": { + "branch": "master", + "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + } } } } diff --git a/modules/nf-core/modules/custom/dumpsoftwareversions/main.nf b/modules/nf-core/custom/dumpsoftwareversions/main.nf similarity index 79% rename from modules/nf-core/modules/custom/dumpsoftwareversions/main.nf rename to modules/nf-core/custom/dumpsoftwareversions/main.nf index 327d5100..cebb6e05 100644 --- a/modules/nf-core/modules/custom/dumpsoftwareversions/main.nf +++ b/modules/nf-core/custom/dumpsoftwareversions/main.nf @@ -1,11 +1,11 @@ process CUSTOM_DUMPSOFTWAREVERSIONS { - label 'process_low' + label 'process_single' // Requires `pyyaml` which does not have a dedicated container but is in the MultiQC container - conda (params.enable_conda ? "bioconda::multiqc=1.11" : null) + conda (params.enable_conda ? 'bioconda::multiqc=1.13' : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.11--pyhdfd78af_0' : - 'quay.io/biocontainers/multiqc:1.11--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.13--pyhdfd78af_0' : + 'quay.io/biocontainers/multiqc:1.13--pyhdfd78af_0' }" input: path versions diff --git a/modules/nf-core/modules/custom/dumpsoftwareversions/meta.yml b/modules/nf-core/custom/dumpsoftwareversions/meta.yml similarity index 100% rename from modules/nf-core/modules/custom/dumpsoftwareversions/meta.yml rename to modules/nf-core/custom/dumpsoftwareversions/meta.yml diff --git a/modules/nf-core/modules/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py b/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py similarity index 100% rename from modules/nf-core/modules/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py rename to modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py diff --git a/modules/nf-core/modules/fastqc/main.nf b/modules/nf-core/fastqc/main.nf similarity index 85% rename from modules/nf-core/modules/fastqc/main.nf rename to modules/nf-core/fastqc/main.nf index ed6b8c50..05730368 100644 --- a/modules/nf-core/modules/fastqc/main.nf +++ b/modules/nf-core/fastqc/main.nf @@ -44,4 +44,16 @@ process FASTQC { END_VERSIONS """ } + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.html + touch ${prefix}.zip + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" ) + END_VERSIONS + """ } diff --git a/modules/nf-core/modules/fastqc/meta.yml b/modules/nf-core/fastqc/meta.yml similarity index 100% rename from modules/nf-core/modules/fastqc/meta.yml rename to modules/nf-core/fastqc/meta.yml diff --git a/modules/nf-core/modules/multiqc/main.nf b/modules/nf-core/modules/multiqc/main.nf deleted file mode 100644 index 1264aac1..00000000 --- a/modules/nf-core/modules/multiqc/main.nf +++ /dev/null @@ -1,31 +0,0 @@ -process MULTIQC { - label 'process_medium' - - conda (params.enable_conda ? 'bioconda::multiqc=1.12' : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.12--pyhdfd78af_0' : - 'quay.io/biocontainers/multiqc:1.12--pyhdfd78af_0' }" - - input: - path multiqc_files - - output: - path "*multiqc_report.html", emit: report - path "*_data" , emit: data - path "*_plots" , optional:true, emit: plots - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - """ - multiqc -f $args . - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - multiqc: \$( multiqc --version | sed -e "s/multiqc, version //g" ) - END_VERSIONS - """ -} diff --git a/modules/nf-core/multiqc/main.nf b/modules/nf-core/multiqc/main.nf new file mode 100644 index 00000000..a8159a57 --- /dev/null +++ b/modules/nf-core/multiqc/main.nf @@ -0,0 +1,53 @@ +process MULTIQC { + label 'process_single' + + conda (params.enable_conda ? 'bioconda::multiqc=1.13' : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/multiqc:1.13--pyhdfd78af_0' : + 'quay.io/biocontainers/multiqc:1.13--pyhdfd78af_0' }" + + input: + path multiqc_files, stageAs: "?/*" + path(multiqc_config) + path(extra_multiqc_config) + path(multiqc_logo) + + output: + path "*multiqc_report.html", emit: report + path "*_data" , emit: data + path "*_plots" , optional:true, emit: plots + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def config = multiqc_config ? "--config $multiqc_config" : '' + def extra_config = extra_multiqc_config ? "--config $extra_multiqc_config" : '' + """ + multiqc \\ + --force \\ + $args \\ + $config \\ + $extra_config \\ + . + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + multiqc: \$( multiqc --version | sed -e "s/multiqc, version //g" ) + END_VERSIONS + """ + + stub: + """ + touch multiqc_data + touch multiqc_plots + touch multiqc_report.html + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + multiqc: \$( multiqc --version | sed -e "s/multiqc, version //g" ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/multiqc/meta.yml b/modules/nf-core/multiqc/meta.yml similarity index 73% rename from modules/nf-core/modules/multiqc/meta.yml rename to modules/nf-core/multiqc/meta.yml index 6fa891ef..ebc29b27 100644 --- a/modules/nf-core/modules/multiqc/meta.yml +++ b/modules/nf-core/multiqc/meta.yml @@ -12,11 +12,25 @@ tools: homepage: https://multiqc.info/ documentation: https://multiqc.info/docs/ licence: ["GPL-3.0-or-later"] + input: - multiqc_files: type: file description: | List of reports / files recognised by MultiQC, for example the html and zip output of FastQC + - multiqc_config: + type: file + description: Optional config yml for MultiQC + pattern: "*.{yml,yaml}" + - extra_multiqc_config: + type: file + description: Second optional config yml for MultiQC. Will override common sections in multiqc_config. + pattern: "*.{yml,yaml}" + - multiqc_logo: + type: file + description: Optional logo file for MultiQC + pattern: "*.{png}" + output: - report: type: file @@ -38,3 +52,4 @@ authors: - "@abhi18av" - "@bunop" - "@drpatelh" + - "@jfy133" diff --git a/nextflow.config b/nextflow.config index a6e37e2a..3c013c11 100644 --- a/nextflow.config +++ b/nextflow.config @@ -21,7 +21,9 @@ params { // MultiQC options multiqc_config = null multiqc_title = null + multiqc_logo = null max_multiqc_email_size = '25.MB' + multiqc_methods_description = null // Boilerplate options outdir = null @@ -31,6 +33,7 @@ params { email_on_fail = null plaintext_email = false monochrome_logs = false + hook_url = null help = false validate_params = true show_hidden_params = false @@ -74,7 +77,6 @@ try { // } - profiles { debug { process.beforeScript = 'echo $HOSTNAME' } conda { @@ -183,12 +185,13 @@ dag { manifest { name = 'nf-core/scrnaseq' - author = 'Peter J Bailey, Alexander Peltzer, Olga Botvinnik' + author = 'Bailey PJ, Botvinnik O, Marques de Almeida F, Peltzer A, Sturm G' homePage = 'https://github.com/nf-core/scrnaseq' - description = 'Pipeline for processing of 10xGenomics single cell rnaseq data' + description = 'Pipeline for processing 10x Genomics single cell rnaseq data' mainScript = 'main.nf' nextflowVersion = '!>=21.10.3' version = '2.0.1dev' + doi = '' } // Load modules.config for DSL2 module specific options diff --git a/nextflow_schema.json b/nextflow_schema.json index 1cc1997c..22c7c3ef 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -2,7 +2,7 @@ "$schema": "http://json-schema.org/draft-07/schema", "$id": "https://raw.githubusercontent.com/nf-core/scrnaseq/master/nextflow_schema.json", "title": "nf-core/scrnaseq pipeline parameters", - "description": "Pipeline for processing of 10xGenomics single cell rnaseq data", + "description": "Pipeline for processing 10x Genomics single cell rnaseq data", "type": "object", "definitions": { "input_output_options": { @@ -213,12 +213,30 @@ "fa_icon": "fas fa-palette", "hidden": true }, + "hook_url": { + "type": "string", + "description": "Incoming hook URL for messaging service", + "fa_icon": "fas fa-people-group", + "help_text": "Incoming hook URL for messaging service. Currently, only MS Teams is supported.", + "hidden": true + }, "multiqc_config": { "type": "string", "description": "Custom config file to supply to MultiQC.", "fa_icon": "fas fa-cog", "hidden": true }, + "multiqc_logo": { + "type": "string", + "description": "Custom logo file to supply to MultiQC. File name must also be set in the MultiQC config file", + "fa_icon": "fas fa-image", + "hidden": true + }, + "multiqc_methods_description": { + "type": "string", + "description": "Custom MultiQC yaml file containing HTML including a methods description.", + "fa_icon": "fas fa-cog" + }, "tracedir": { "type": "string", "description": "Directory to keep pipeline Nextflow logs and reports.", diff --git a/workflows/scrnaseq.nf b/workflows/scrnaseq.nf index 69888824..f37c6f89 100644 --- a/workflows/scrnaseq.nf +++ b/workflows/scrnaseq.nf @@ -23,8 +23,10 @@ if (params.input) { ch_input = file(params.input) } else { exit 1, 'Input sample ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -ch_multiqc_config = file("$projectDir/assets/multiqc_config.yml", checkIfExists: true) -ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath(params.multiqc_config) : Channel.empty() +ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true) +ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath( params.multiqc_config, checkIfExists: true ) : Channel.empty() +ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath( params.multiqc_logo, checkIfExists: true ) : Channel.empty() +ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -46,9 +48,9 @@ include { INPUT_CHECK } from '../subworkflows/local/input_check' // // MODULE: Installed directly from nf-core/modules // -include { FASTQC } from '../modules/nf-core/modules/fastqc/main' -include { MULTIQC } from '../modules/nf-core/modules/multiqc/main' -include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/modules/custom/dumpsoftwareversions/main' +include { FASTQC } from '../modules/nf-core/fastqc/main' +include { MULTIQC } from '../modules/nf-core/multiqc/main' +include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -89,15 +91,20 @@ workflow SCRNASEQ { workflow_summary = WorkflowScrnaseq.paramsSummaryMultiqc(workflow, summary_params) ch_workflow_summary = Channel.value(workflow_summary) + methods_description = WorkflowScrnaseq.methodsDescriptionText(workflow, ch_multiqc_custom_methods_description) + ch_methods_description = Channel.value(methods_description) + ch_multiqc_files = Channel.empty() - ch_multiqc_files = ch_multiqc_files.mix(Channel.from(ch_multiqc_config)) - ch_multiqc_files = ch_multiqc_files.mix(ch_multiqc_custom_config.collect().ifEmpty([])) ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) + ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml')) ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect()) ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([])) MULTIQC ( - ch_multiqc_files.collect() + ch_multiqc_files.collect(), + ch_multiqc_config.collect().ifEmpty([]), + ch_multiqc_custom_config.collect().ifEmpty([]), + ch_multiqc_logo.collect().ifEmpty([]) ) multiqc_report = MULTIQC.out.report.toList() ch_versions = ch_versions.mix(MULTIQC.out.versions) @@ -114,6 +121,9 @@ workflow.onComplete { NfcoreTemplate.email(workflow, params, summary_params, projectDir, log, multiqc_report) } NfcoreTemplate.summary(workflow, params, log) + if (params.hook_url) { + NfcoreTemplate.adaptivecard(workflow, params, summary_params, projectDir, log) + } } /* From 1600e3bd8985b43b4ca474c6077ef3cbcaf417eb Mon Sep 17 00:00:00 2001 From: Felipe Marques de Almeida Date: Wed, 5 Oct 2022 07:16:47 +0000 Subject: [PATCH 139/165] add new command to schema --- nextflow_schema.json | 42 +++++++++++++++++++++++++++++++++++++----- 1 file changed, 37 insertions(+), 5 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 91757ae4..c8f0608f 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -10,7 +10,10 @@ "type": "object", "fa_icon": "fas fa-terminal", "description": "Define where the pipeline should find input data and save output data.", - "required": ["input", "outdir"], + "required": [ + "input", + "outdir" + ], "properties": { "input": { "type": "string", @@ -56,14 +59,24 @@ "default": "alevin", "help_text": "The workflow can handle three types of methods:\n\n- Kallisto/Bustools\n- Salmon Alevin + AlevinQC\n- STARsolo\n\nTo choose which one to use, please specify either `alevin`, `star` or `kallisto` as a parameter option for `--aligner`. By default, the pipeline runs the `alevin` option. Note that specifying another aligner option also requires choosing appropriate parameters (see below) for the selected option.", "fa_icon": "fas fa-align-center", - "enum": ["kallisto", "star", "alevin", "cellranger"] + "enum": [ + "kallisto", + "star", + "alevin", + "cellranger" + ] }, "protocol": { "type": "string", "description": "The protocol that was used to generate the single cell data, e.g. 10XV2 (default).", "default": "10XV2", "fa_icon": "fas fa-cogs", - "enum": ["10XV3", "10XV2", "10XV1", "dropseq"] + "enum": [ + "10XV3", + "10XV2", + "10XV1", + "dropseq" + ] } }, "fa_icon": "fas fa-terminal" @@ -137,6 +150,12 @@ "description": "Path to transcript to gene mapping file. This allows the specification of a transcript to gene mapping file for Salmon Alevin and AlevinQC.", "help_text": "> This is not the same as the `kallisto_gene_map` parameter down below and is only used by the Salmon Alevin workflow.", "fa_icon": "fas fa-map-marked-alt" + }, + "simpleaf_rlen": { + "type": "integer", + "default": 91, + "description": "It is the target read length the index will be built for, using simpleaf.", + "fa_icon": "fas fa-map-marked-alt" } } }, @@ -191,7 +210,13 @@ "default": "standard", "description": "Type of workflow. Use `lamanno` for RNA velocity based on La Manno et al. 2018 logic. Use `nucleus` for RNA velocity on single-nucleus RNA-seq reads. Use `kite` for feature barcoding. Use `kite: 10xFB` for 10x Genomics Feature Barcoding technology. (default: standard)", "fa_icon": "fas fa-fish", - "enum": ["standard", "lamanno", "nucleus", "kite", "kite: 10xFB"] + "enum": [ + "standard", + "lamanno", + "nucleus", + "kite", + "kite: 10xFB" + ] } } }, @@ -309,7 +334,14 @@ "description": "Method used to save pipeline results to output directory.", "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", "fa_icon": "fas fa-copy", - "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], + "enum": [ + "symlink", + "rellink", + "link", + "copy", + "copyNoFollow", + "move" + ], "hidden": true }, "email_on_fail": { From ba5228fe7c283bc17d362593f5b3390ecf01ac10 Mon Sep 17 00:00:00 2001 From: nf-core-bot Date: Wed, 5 Oct 2022 08:11:06 +0000 Subject: [PATCH 140/165] [automated] Fix linting with Prettier --- nextflow_schema.json | 36 +++++------------------------------- 1 file changed, 5 insertions(+), 31 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index c8f0608f..c6621d70 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -10,10 +10,7 @@ "type": "object", "fa_icon": "fas fa-terminal", "description": "Define where the pipeline should find input data and save output data.", - "required": [ - "input", - "outdir" - ], + "required": ["input", "outdir"], "properties": { "input": { "type": "string", @@ -59,24 +56,14 @@ "default": "alevin", "help_text": "The workflow can handle three types of methods:\n\n- Kallisto/Bustools\n- Salmon Alevin + AlevinQC\n- STARsolo\n\nTo choose which one to use, please specify either `alevin`, `star` or `kallisto` as a parameter option for `--aligner`. By default, the pipeline runs the `alevin` option. Note that specifying another aligner option also requires choosing appropriate parameters (see below) for the selected option.", "fa_icon": "fas fa-align-center", - "enum": [ - "kallisto", - "star", - "alevin", - "cellranger" - ] + "enum": ["kallisto", "star", "alevin", "cellranger"] }, "protocol": { "type": "string", "description": "The protocol that was used to generate the single cell data, e.g. 10XV2 (default).", "default": "10XV2", "fa_icon": "fas fa-cogs", - "enum": [ - "10XV3", - "10XV2", - "10XV1", - "dropseq" - ] + "enum": ["10XV3", "10XV2", "10XV1", "dropseq"] } }, "fa_icon": "fas fa-terminal" @@ -210,13 +197,7 @@ "default": "standard", "description": "Type of workflow. Use `lamanno` for RNA velocity based on La Manno et al. 2018 logic. Use `nucleus` for RNA velocity on single-nucleus RNA-seq reads. Use `kite` for feature barcoding. Use `kite: 10xFB` for 10x Genomics Feature Barcoding technology. (default: standard)", "fa_icon": "fas fa-fish", - "enum": [ - "standard", - "lamanno", - "nucleus", - "kite", - "kite: 10xFB" - ] + "enum": ["standard", "lamanno", "nucleus", "kite", "kite: 10xFB"] } } }, @@ -334,14 +315,7 @@ "description": "Method used to save pipeline results to output directory.", "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", "fa_icon": "fas fa-copy", - "enum": [ - "symlink", - "rellink", - "link", - "copy", - "copyNoFollow", - "move" - ], + "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], "hidden": true }, "email_on_fail": { From 7315db5dcb069fbd54313bf756f32641cfa27046 Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Wed, 5 Oct 2022 08:18:24 +0000 Subject: [PATCH 141/165] Fix black linting --- bin/cellranger_mtx_to_h5ad.py | 13 ++++--- bin/check_samplesheet.py | 31 ++++++++++++---- bin/concat_h5ad.py | 25 +++++++++---- bin/mtx_to_h5ad.py | 22 +++++++++-- bin/t2g.py | 70 +++++++++++++++++++---------------- 5 files changed, 106 insertions(+), 55 deletions(-) diff --git a/bin/cellranger_mtx_to_h5ad.py b/bin/cellranger_mtx_to_h5ad.py index e8eb5b23..ca21f50d 100755 --- a/bin/cellranger_mtx_to_h5ad.py +++ b/bin/cellranger_mtx_to_h5ad.py @@ -2,7 +2,8 @@ import scanpy as sc import argparse -def mtx_to_adata( mtx_h5: str, sample: str, verbose: bool = False ): + +def mtx_to_adata(mtx_h5: str, sample: str, verbose: bool = False): if verbose: print("Reading in {}".format(mtx_h5)) @@ -19,10 +20,12 @@ def mtx_to_adata( mtx_h5: str, sample: str, verbose: bool = False ): parser = argparse.ArgumentParser(description="Converts mtx output to h5ad.") - parser.add_argument("-m", "--mtx", dest="mtx", help="Path to mtx h5 file." ) - parser.add_argument("-v", "--verbose", dest="verbose", help="Toggle verbose messages", default=False ) - parser.add_argument("-s", "--sample", dest="sample", help="Sample name" ) - parser.add_argument("-o", "--out", dest="out", help="Output path." ) + parser.add_argument("-m", "--mtx", dest="mtx", help="Path to mtx h5 file.") + parser.add_argument( + "-v", "--verbose", dest="verbose", help="Toggle verbose messages", default=False + ) + parser.add_argument("-s", "--sample", dest="sample", help="Sample name") + parser.add_argument("-o", "--out", dest="out", help="Output path.") args = vars(parser.parse_args()) diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py index d98bdaa3..4e72568f 100755 --- a/bin/check_samplesheet.py +++ b/bin/check_samplesheet.py @@ -86,7 +86,9 @@ def _validate_sample(self, row): def _validate_first(self, row): """Assert that the first FASTQ entry is non-empty and has the right format.""" - assert len(row[self._first_col]) > 0, "At least the first FASTQ file is required." + assert ( + len(row[self._first_col]) > 0 + ), "At least the first FASTQ file is required." self._validate_fastq_format(row[self._first_col]) def _validate_second(self, row): @@ -99,7 +101,8 @@ def _validate_pair(self, row): if row[self._first_col] and row[self._second_col]: row[self._single_col] = False assert ( - Path(row[self._first_col]).suffixes[-2:] == Path(row[self._second_col]).suffixes[-2:] + Path(row[self._first_col]).suffixes[-2:] + == Path(row[self._second_col]).suffixes[-2:] ), "FASTQ pairs must have the same file extensions." else: row[self._single_col] = True @@ -119,7 +122,9 @@ def validate_unique_samples(self): FASTQ file combination exists. """ - assert len(self._seen) == len(self.modified), "The pair of sample name and FASTQ must be unique." + assert len(self._seen) == len( + self.modified + ), "The pair of sample name and FASTQ must be unique." if len({pair[0] for pair in self._seen}) < len(self._seen): counts = Counter(pair[0] for pair in self._seen) seen = Counter() @@ -200,7 +205,11 @@ def check_samplesheet(file_in, file_out): HEADER = ["sample", "fastq_1", "fastq_2"] header = [x.strip('"') for x in fin.readline().strip().split(",")] if header[: len(HEADER)] != HEADER: - print("ERROR: Please check samplesheet header -> {} != {}".format(",".join(header), ",".join(HEADER))) + print( + "ERROR: Please check samplesheet header -> {} != {}".format( + ",".join(header), ",".join(HEADER) + ) + ) sys.exit(1) ## Check sample entries @@ -217,7 +226,9 @@ def check_samplesheet(file_in, file_out): num_cols = len([x for x in lspl if x]) if num_cols < MIN_COLS: print_error( - "Invalid number of populated columns (minimum = {})!".format(MIN_COLS), + "Invalid number of populated columns (minimum = {})!".format( + MIN_COLS + ), "Line", line, ) @@ -266,8 +277,14 @@ def check_samplesheet(file_in, file_out): for sample in sorted(sample_mapping_dict.keys()): ## Check that multiple runs of the same sample are of the same datatype - if not all(x[0] == sample_mapping_dict[sample][0][0] for x in sample_mapping_dict[sample]): - print_error("Multiple runs of a sample must be of the same datatype!", "Sample: {}".format(sample)) + if not all( + x[0] == sample_mapping_dict[sample][0][0] + for x in sample_mapping_dict[sample] + ): + print_error( + "Multiple runs of a sample must be of the same datatype!", + "Sample: {}".format(sample), + ) for idx, val in enumerate(sample_mapping_dict[sample]): fout.write(",".join(["{}".format(sample)] + val) + "\n") diff --git a/bin/concat_h5ad.py b/bin/concat_h5ad.py index 29d0037a..5d235eac 100755 --- a/bin/concat_h5ad.py +++ b/bin/concat_h5ad.py @@ -3,6 +3,7 @@ from pathlib import Path import argparse + def read_samplesheet(samplesheet): df = pd.read_csv(samplesheet) df.set_index("sample") @@ -12,15 +13,23 @@ def read_samplesheet(samplesheet): # only keep unique values using set() df = df.groupby(["sample"]).agg(lambda column: ",".join(set(column))) - return(df) + return df + if __name__ == "__main__": - parser = argparse.ArgumentParser(description="Concatenates h5ad files and merge metadata from samplesheet") + parser = argparse.ArgumentParser( + description="Concatenates h5ad files and merge metadata from samplesheet" + ) - parser.add_argument("-i", "--input", dest="input", help="Path to samplesheet.csv") - parser.add_argument("-o", "--out", dest="out", help="Output path.") - parser.add_argument("-s", "--suffix", dest="suffix", help="Suffix of matrices to remove and get sample name") + parser.add_argument("-i", "--input", dest="input", help="Path to samplesheet.csv") + parser.add_argument("-o", "--out", dest="out", help="Output path.") + parser.add_argument( + "-s", + "--suffix", + dest="suffix", + help="Suffix of matrices to remove and get sample name", + ) args = vars(parser.parse_args()) @@ -29,8 +38,8 @@ def read_samplesheet(samplesheet): # find all h5ad and append to dict dict_of_h5ad = { - str(path).replace(args["suffix"], ""): sc.read_h5ad(path) - for path in Path(".").rglob('*.h5ad') + str(path).replace(args["suffix"], ""): sc.read_h5ad(path) + for path in Path(".").rglob("*.h5ad") } # concat h5ad files @@ -40,4 +49,4 @@ def read_samplesheet(samplesheet): adata.obs = adata.obs.join(df_samplesheet, on="sample") adata.write_h5ad(args["out"], compression="gzip") - print("Wrote h5ad file to {}".format(args["out"])) \ No newline at end of file + print("Wrote h5ad file to {}".format(args["out"])) diff --git a/bin/mtx_to_h5ad.py b/bin/mtx_to_h5ad.py index 2885886e..9d3346a9 100755 --- a/bin/mtx_to_h5ad.py +++ b/bin/mtx_to_h5ad.py @@ -5,14 +5,21 @@ def mtx_to_adata( - mtx_file: str, barcode_file: str, feature_file: str, sample: str, aligner: str, verbose: bool = False + mtx_file: str, + barcode_file: str, + feature_file: str, + sample: str, + aligner: str, + verbose: bool = False, ): if verbose: print("Reading in {}".format(mtx_file)) adata = sc.read_mtx(mtx_file) - if aligner == 'star': # for some reason star matrix comes transposed and doesn't fit when values are appended directly + if ( + aligner == "star" + ): # for some reason star matrix comes transposed and doesn't fit when values are appended directly adata = adata.transpose() adata.obs_names = pd.read_csv(barcode_file, header=None, sep="\t")[0].values adata.var_names = pd.read_csv(feature_file, header=None, sep="\t")[0].values @@ -33,12 +40,19 @@ def mtx_to_adata( parser.add_argument("-b", "--barcode", dest="barcode", help="Path to barcode file.") parser.add_argument("-s", "--sample", dest="sample", help="Sample name") parser.add_argument("-o", "--out", dest="out", help="Output path.") - parser.add_argument("-a", "--aligner", dest="aligner", help="Which aligner has been used?") + parser.add_argument( + "-a", "--aligner", dest="aligner", help="Which aligner has been used?" + ) args = vars(parser.parse_args()) adata = mtx_to_adata( - args["mtx"], args["barcode"], args["feature"], args["sample"], args["aligner"],verbose=args["verbose"] + args["mtx"], + args["barcode"], + args["feature"], + args["sample"], + args["aligner"], + verbose=args["verbose"], ) adata.write_h5ad(args["out"], compression="gzip") diff --git a/bin/t2g.py b/bin/t2g.py index 6419dd1d..5daf3df5 100755 --- a/bin/t2g.py +++ b/bin/t2g.py @@ -1,6 +1,6 @@ #!/usr/bin/env python -#This was downloaded on 2019-06-23 from https://github.com/bustools/getting_started/releases/ -#All credit goes to the original authors from the Kallisto/BUStools team! +# This was downloaded on 2019-06-23 from https://github.com/bustools/getting_started/releases/ +# All credit goes to the original authors from the Kallisto/BUStools team! # BSD 2-Clause License # # Copyright (c) 2017, Nicolas Bray, Harold Pimentel, Páll Melsted and Lior Pachter @@ -29,43 +29,43 @@ import sys, argparse -def create_transcript_list(input, use_name = True, use_version = False): + +def create_transcript_list(input, use_name=True, use_version=False): r = {} for line in input: - if len(line) == 0 or line[0] == '#': + if len(line) == 0 or line[0] == "#": continue - l = line.strip().split('\t') - if l[2] == 'transcript': + l = line.strip().split("\t") + if l[2] == "transcript": info = l[8] d = {} - for x in info.split('; '): + for x in info.split("; "): x = x.strip() - p = x.find(' ') + p = x.find(" ") if p == -1: continue k = x[:p] - p = x.find('"',p) - p2 = x.find('"',p+1) - v = x[p+1:p2] + p = x.find('"', p) + p2 = x.find('"', p + 1) + v = x[p + 1 : p2] d[k] = v - - if 'transcript_id' not in d or 'gene_id' not in d: + if "transcript_id" not in d or "gene_id" not in d: continue - tid = d['transcript_id'].split(".")[0] - gid = d['gene_id'].split(".")[0] + tid = d["transcript_id"].split(".")[0] + gid = d["gene_id"].split(".")[0] if use_version: - if 'transcript_version' not in d or 'gene_version' not in d: + if "transcript_version" not in d or "gene_version" not in d: continue - tid += '.' + d['transcript_version'] - gid += '.' + d['gene_version'] + tid += "." + d["transcript_version"] + gid += "." + d["gene_version"] gname = None if use_name: - if 'gene_name' not in d: + if "gene_name" not in d: continue - gname = d['gene_name'] + gname = d["gene_name"] if tid in r: continue @@ -74,26 +74,34 @@ def create_transcript_list(input, use_name = True, use_version = False): return r - -def print_output(output, r, use_name = True): +def print_output(output, r, use_name=True): for tid in r: if use_name: - output.write("%s\t%s\t%s\n"%(tid, r[tid][0], r[tid][1])) + output.write("%s\t%s\t%s\n" % (tid, r[tid][0], r[tid][1])) else: - output.write("%s\t%s\n"%(tid, r[tid][0])) + output.write("%s\t%s\n" % (tid, r[tid][0])) if __name__ == "__main__": - - parser = argparse.ArgumentParser(add_help=True, description='Creates transcript to gene info from GTF files\nreads from standard input and writes to standard output') - parser.add_argument('--use_version', '-v', action='store_true', help='Use version numbers in transcript and gene ids') - parser.add_argument('--skip_gene_names', '-s', action='store_true', help='Do not output gene names') + parser = argparse.ArgumentParser( + add_help=True, + description="Creates transcript to gene info from GTF files\nreads from standard input and writes to standard output", + ) + parser.add_argument( + "--use_version", + "-v", + action="store_true", + help="Use version numbers in transcript and gene ids", + ) + parser.add_argument( + "--skip_gene_names", "-s", action="store_true", help="Do not output gene names" + ) args = parser.parse_args() - - input = sys.stdin - r = create_transcript_list(input, use_name = not args.skip_gene_names, use_version = args.use_version) + r = create_transcript_list( + input, use_name=not args.skip_gene_names, use_version=args.use_version + ) output = sys.stdout print_output(output, r) From 9aec18c5532cbb774ffdc44abdaec40fdef85343 Mon Sep 17 00:00:00 2001 From: Felipe Marques de Almeida Date: Wed, 5 Oct 2022 11:01:08 +0200 Subject: [PATCH 142/165] Update conf/modules.config Co-authored-by: Gregor Sturm --- conf/modules.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/modules.config b/conf/modules.config index 2b05f1e7..197e8266 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -75,7 +75,7 @@ if (params.aligner == "alevin") { path: { "${params.outdir}/${params.aligner}" }, mode: params.publish_dir_mode ] - ext.args = "--rlen ${params.simpleaf_rlen}" + ext.args = { "--rlen ${params.simpleaf_rlen}" } } withName: 'SIMPLEAF_QUANT' { publishDir = [ From 11a8de57b3fa6240088c4d08a24c510b622eae34 Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Wed, 5 Oct 2022 09:44:18 +0000 Subject: [PATCH 143/165] Python 3.7 please --- .github/workflows/linting.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index 77358dee..f8fa2595 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -50,7 +50,7 @@ jobs: - uses: actions/setup-python@v3 with: - python-version: "3.6" + python-version: "3.7" architecture: "x64" - name: Install dependencies From e7b79e2355e57f5fe598f815da2ecf55696728d6 Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Wed, 5 Oct 2022 11:19:02 +0000 Subject: [PATCH 144/165] Fixed black linting --- bin/cellranger_mtx_to_h5ad.py | 4 +--- bin/check_samplesheet.py | 26 ++++++-------------------- bin/concat_h5ad.py | 9 ++------- bin/mtx_to_h5ad.py | 8 ++------ bin/t2g.py | 8 ++------ 5 files changed, 13 insertions(+), 42 deletions(-) diff --git a/bin/cellranger_mtx_to_h5ad.py b/bin/cellranger_mtx_to_h5ad.py index ca21f50d..84305fa3 100755 --- a/bin/cellranger_mtx_to_h5ad.py +++ b/bin/cellranger_mtx_to_h5ad.py @@ -21,9 +21,7 @@ def mtx_to_adata(mtx_h5: str, sample: str, verbose: bool = False): parser = argparse.ArgumentParser(description="Converts mtx output to h5ad.") parser.add_argument("-m", "--mtx", dest="mtx", help="Path to mtx h5 file.") - parser.add_argument( - "-v", "--verbose", dest="verbose", help="Toggle verbose messages", default=False - ) + parser.add_argument("-v", "--verbose", dest="verbose", help="Toggle verbose messages", default=False) parser.add_argument("-s", "--sample", dest="sample", help="Sample name") parser.add_argument("-o", "--out", dest="out", help="Output path.") diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py index 9f6a37c3..1e35db6f 100755 --- a/bin/check_samplesheet.py +++ b/bin/check_samplesheet.py @@ -86,9 +86,7 @@ def _validate_sample(self, row): def _validate_first(self, row): """Assert that the first FASTQ entry is non-empty and has the right format.""" - assert ( - len(row[self._first_col]) > 0 - ), "At least the first FASTQ file is required." + assert len(row[self._first_col]) > 0, "At least the first FASTQ file is required." self._validate_fastq_format(row[self._first_col]) def _validate_second(self, row): @@ -101,8 +99,7 @@ def _validate_pair(self, row): if row[self._first_col] and row[self._second_col]: row[self._single_col] = False assert ( - Path(row[self._first_col]).suffixes[-2:] - == Path(row[self._second_col]).suffixes[-2:] + Path(row[self._first_col]).suffixes[-2:] == Path(row[self._second_col]).suffixes[-2:] ), "FASTQ pairs must have the same file extensions." else: row[self._single_col] = True @@ -123,9 +120,7 @@ def validate_unique_samples(self): number of times the same sample exist, but with different FASTQ files, e.g., multiple runs per experiment. """ - assert len(self._seen) == len( - self.modified - ), "The pair of sample name and FASTQ must be unique." + assert len(self._seen) == len(self.modified), "The pair of sample name and FASTQ must be unique." if len({pair[0] for pair in self._seen}) < len(self._seen): counts = Counter(pair[0] for pair in self._seen) seen = Counter() @@ -206,11 +201,7 @@ def check_samplesheet(file_in, file_out): HEADER = ["sample", "fastq_1", "fastq_2"] header = [x.strip('"') for x in fin.readline().strip().split(",")] if header[: len(HEADER)] != HEADER: - print( - "ERROR: Please check samplesheet header -> {} != {}".format( - ",".join(header), ",".join(HEADER) - ) - ) + print("ERROR: Please check samplesheet header -> {} != {}".format(",".join(header), ",".join(HEADER))) sys.exit(1) ## Check sample entries @@ -227,9 +218,7 @@ def check_samplesheet(file_in, file_out): num_cols = len([x for x in lspl if x]) if num_cols < MIN_COLS: print_error( - "Invalid number of populated columns (minimum = {})!".format( - MIN_COLS - ), + "Invalid number of populated columns (minimum = {})!".format(MIN_COLS), "Line", line, ) @@ -278,10 +267,7 @@ def check_samplesheet(file_in, file_out): for sample in sorted(sample_mapping_dict.keys()): ## Check that multiple runs of the same sample are of the same datatype - if not all( - x[0] == sample_mapping_dict[sample][0][0] - for x in sample_mapping_dict[sample] - ): + if not all(x[0] == sample_mapping_dict[sample][0][0] for x in sample_mapping_dict[sample]): print_error( "Multiple runs of a sample must be of the same datatype!", "Sample: {}".format(sample), diff --git a/bin/concat_h5ad.py b/bin/concat_h5ad.py index 5d235eac..9be60b13 100755 --- a/bin/concat_h5ad.py +++ b/bin/concat_h5ad.py @@ -18,9 +18,7 @@ def read_samplesheet(samplesheet): if __name__ == "__main__": - parser = argparse.ArgumentParser( - description="Concatenates h5ad files and merge metadata from samplesheet" - ) + parser = argparse.ArgumentParser(description="Concatenates h5ad files and merge metadata from samplesheet") parser.add_argument("-i", "--input", dest="input", help="Path to samplesheet.csv") parser.add_argument("-o", "--out", dest="out", help="Output path.") @@ -37,10 +35,7 @@ def read_samplesheet(samplesheet): df_samplesheet = read_samplesheet(args["input"]) # find all h5ad and append to dict - dict_of_h5ad = { - str(path).replace(args["suffix"], ""): sc.read_h5ad(path) - for path in Path(".").rglob("*.h5ad") - } + dict_of_h5ad = {str(path).replace(args["suffix"], ""): sc.read_h5ad(path) for path in Path(".").rglob("*.h5ad")} # concat h5ad files adata = ad.concat(dict_of_h5ad, label="sample", merge="unique", index_unique="_") diff --git a/bin/mtx_to_h5ad.py b/bin/mtx_to_h5ad.py index 9d3346a9..1e79aa22 100755 --- a/bin/mtx_to_h5ad.py +++ b/bin/mtx_to_h5ad.py @@ -33,16 +33,12 @@ def mtx_to_adata( parser = argparse.ArgumentParser(description="Converts mtx output to h5ad.") parser.add_argument("-m", "--mtx", dest="mtx", help="Path to mtx file.") - parser.add_argument( - "-v", "--verbose", dest="verbose", help="Toggle verbose messages", default=False - ) + parser.add_argument("-v", "--verbose", dest="verbose", help="Toggle verbose messages", default=False) parser.add_argument("-f", "--feature", dest="feature", help="Path to feature file.") parser.add_argument("-b", "--barcode", dest="barcode", help="Path to barcode file.") parser.add_argument("-s", "--sample", dest="sample", help="Sample name") parser.add_argument("-o", "--out", dest="out", help="Output path.") - parser.add_argument( - "-a", "--aligner", dest="aligner", help="Which aligner has been used?" - ) + parser.add_argument("-a", "--aligner", dest="aligner", help="Which aligner has been used?") args = vars(parser.parse_args()) diff --git a/bin/t2g.py b/bin/t2g.py index 5daf3df5..efa9f0f7 100755 --- a/bin/t2g.py +++ b/bin/t2g.py @@ -94,14 +94,10 @@ def print_output(output, r, use_name=True): action="store_true", help="Use version numbers in transcript and gene ids", ) - parser.add_argument( - "--skip_gene_names", "-s", action="store_true", help="Do not output gene names" - ) + parser.add_argument("--skip_gene_names", "-s", action="store_true", help="Do not output gene names") args = parser.parse_args() input = sys.stdin - r = create_transcript_list( - input, use_name=not args.skip_gene_names, use_version=args.use_version - ) + r = create_transcript_list(input, use_name=not args.skip_gene_names, use_version=args.use_version) output = sys.stdout print_output(output, r) From 5c9c9ade5346917f62f1acb7bec37a57a67d8f26 Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Wed, 5 Oct 2022 11:31:54 +0000 Subject: [PATCH 145/165] Fix linting, new module structure --- modules.json | 95 ++++++++-------- modules/local/simpleaf_index.nf | 2 +- modules/local/simpleaf_quant.nf | 2 +- .../{modules => }/cellranger/count/main.nf | 0 .../{modules => }/cellranger/count/meta.yml | 0 .../{modules => }/cellranger/mkgtf/main.nf | 0 .../{modules => }/cellranger/mkgtf/meta.yml | 0 .../{modules => }/cellranger/mkref/main.nf | 2 +- .../{modules => }/cellranger/mkref/meta.yml | 0 .../templates/dumpsoftwareversions.py | 102 ++++++++++-------- modules/nf-core/{modules => }/gffread/main.nf | 0 .../nf-core/{modules => }/gffread/meta.yml | 0 modules/nf-core/{modules => }/gunzip/main.nf | 2 +- modules/nf-core/{modules => }/gunzip/meta.yml | 0 .../kallistobustools/count/main.nf | 0 .../kallistobustools/count/meta.yml | 0 .../kallistobustools/ref/main.nf | 0 .../kallistobustools/ref/meta.yml | 0 .../{modules => }/star/genomegenerate/main.nf | 28 +++++ .../star/genomegenerate/meta.yml | 0 subworkflows/local/alevin.nf | 4 +- subworkflows/local/align_cellranger.nf | 6 +- subworkflows/local/fastqc.nf | 2 +- subworkflows/local/kallisto_bustools.nf | 6 +- subworkflows/local/starsolo.nf | 4 +- 25 files changed, 145 insertions(+), 110 deletions(-) rename modules/nf-core/{modules => }/cellranger/count/main.nf (100%) rename modules/nf-core/{modules => }/cellranger/count/meta.yml (100%) rename modules/nf-core/{modules => }/cellranger/mkgtf/main.nf (100%) rename modules/nf-core/{modules => }/cellranger/mkgtf/meta.yml (100%) rename modules/nf-core/{modules => }/cellranger/mkref/main.nf (98%) rename modules/nf-core/{modules => }/cellranger/mkref/meta.yml (100%) mode change 100644 => 100755 modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py rename modules/nf-core/{modules => }/gffread/main.nf (100%) rename modules/nf-core/{modules => }/gffread/meta.yml (100%) rename modules/nf-core/{modules => }/gunzip/main.nf (97%) rename modules/nf-core/{modules => }/gunzip/meta.yml (100%) rename modules/nf-core/{modules => }/kallistobustools/count/main.nf (100%) rename modules/nf-core/{modules => }/kallistobustools/count/meta.yml (100%) rename modules/nf-core/{modules => }/kallistobustools/ref/main.nf (100%) rename modules/nf-core/{modules => }/kallistobustools/ref/meta.yml (100%) rename modules/nf-core/{modules => }/star/genomegenerate/main.nf (75%) rename modules/nf-core/{modules => }/star/genomegenerate/meta.yml (100%) diff --git a/modules.json b/modules.json index e18acd52..4b49090a 100644 --- a/modules.json +++ b/modules.json @@ -2,56 +2,53 @@ "name": "nf-core/scrnaseq", "homePage": "https://github.com/nf-core/scrnaseq", "repos": { - "nf-core/modules": { - "git_url": "https://github.com/nf-core/modules.git", + "https://github.com/nf-core/modules.git": { "modules": { - "cellranger/count": { - "branch": "master", - "git_sha": "8032833e6e0fd3edeb20d4121885112249f273c4" - }, - "cellranger/mkgtf": { - "branch": "master", - "git_sha": "e499abe572bc7c1322dfa66b767b0e65fe8c62ca" - }, - "cellranger/mkref": { - "branch": "master", - "git_sha": "61d87bb2bcbcd058e1af6e5f14bb821b8ae4ed3e" - }, - "custom/dumpsoftwareversions": { - "branch": "master", - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" - }, - "fastqc": { - "branch": "master", - "git_sha": "49b18b1639f4f7104187058866a8fab33332bdfe" - }, - "gffread": { - "branch": "master", - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" - }, - "gunzip": { - "branch": "master", - "git_sha": "9aadd9a6d3f5964476582319b3a1c54a3e3fe7c9" - }, - "kallistobustools/ref": { - "branch": "master", - "git_sha": "e20e57f90b6787ac9a010a980cf6ea98bd990046" - }, - "multiqc": { - "branch": "master", - "git_sha": "e5f8924fabf4c8380f55fb7aee89fd2c268161b1" - }, - "salmon/index": { - "branch": "master", - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" - }, - "salmon/quant": { - "branch": "master", - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" - }, - "star/genomegenerate": { - "branch": "master", - "git_sha": "fb6c7bca3d55c19a793372513395e3a567bdd7ba" + "nf-core": { + "cellranger/count": { + "branch": "master", + "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + }, + "cellranger/mkgtf": { + "branch": "master", + "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + }, + "cellranger/mkref": { + "branch": "master", + "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + }, + "custom/dumpsoftwareversions": { + "branch": "master", + "git_sha": "8022c68e7403eecbd8ba9c49496f69f8c49d50f0" + }, + "fastqc": { + "branch": "master", + "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + }, + "gffread": { + "branch": "master", + "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + }, + "gunzip": { + "branch": "master", + "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + }, + "kallistobustools/count": { + "branch": "master", + "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + }, + "kallistobustools/ref": { + "branch": "master", + "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + }, + "multiqc": { + "branch": "master", + "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + }, + "star/genomegenerate": { + "branch": "master", + "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + } } } } diff --git a/modules/local/simpleaf_index.nf b/modules/local/simpleaf_index.nf index 5bf54e7d..939e294d 100644 --- a/modules/local/simpleaf_index.nf +++ b/modules/local/simpleaf_index.nf @@ -2,7 +2,7 @@ process SIMPLEAF_INDEX { tag "$transcript_gtf" label "process_medium" - conda (params.enable_conda ? 'bioconda::simpleaf=0.5.1' : null) + conda (params.enable_conda ? 'bioconda::simpleaf=0.5.2' : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/simpleaf:0.5.2--h9f5acd7_0' : 'quay.io/biocontainers/simpleaf:0.5.2--h9f5acd7_0' }" diff --git a/modules/local/simpleaf_quant.nf b/modules/local/simpleaf_quant.nf index 8f7b91c6..31dc26c4 100644 --- a/modules/local/simpleaf_quant.nf +++ b/modules/local/simpleaf_quant.nf @@ -2,7 +2,7 @@ process SIMPLEAF_QUANT { tag "$meta.id" label 'process_high' - conda (params.enable_conda ? 'bioconda::simpleaf=0.5.1' : null) + conda (params.enable_conda ? 'bioconda::simpleaf=0.5.2' : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/simpleaf:0.5.2--h9f5acd7_0' : 'quay.io/biocontainers/simpleaf:0.5.2--h9f5acd7_0' }" diff --git a/modules/nf-core/modules/cellranger/count/main.nf b/modules/nf-core/cellranger/count/main.nf similarity index 100% rename from modules/nf-core/modules/cellranger/count/main.nf rename to modules/nf-core/cellranger/count/main.nf diff --git a/modules/nf-core/modules/cellranger/count/meta.yml b/modules/nf-core/cellranger/count/meta.yml similarity index 100% rename from modules/nf-core/modules/cellranger/count/meta.yml rename to modules/nf-core/cellranger/count/meta.yml diff --git a/modules/nf-core/modules/cellranger/mkgtf/main.nf b/modules/nf-core/cellranger/mkgtf/main.nf similarity index 100% rename from modules/nf-core/modules/cellranger/mkgtf/main.nf rename to modules/nf-core/cellranger/mkgtf/main.nf diff --git a/modules/nf-core/modules/cellranger/mkgtf/meta.yml b/modules/nf-core/cellranger/mkgtf/meta.yml similarity index 100% rename from modules/nf-core/modules/cellranger/mkgtf/meta.yml rename to modules/nf-core/cellranger/mkgtf/meta.yml diff --git a/modules/nf-core/modules/cellranger/mkref/main.nf b/modules/nf-core/cellranger/mkref/main.nf similarity index 98% rename from modules/nf-core/modules/cellranger/mkref/main.nf rename to modules/nf-core/cellranger/mkref/main.nf index e1bfebde..df27d2bd 100644 --- a/modules/nf-core/modules/cellranger/mkref/main.nf +++ b/modules/nf-core/cellranger/mkref/main.nf @@ -1,5 +1,5 @@ process CELLRANGER_MKREF { - tag 'mkref' + tag "$fasta" label 'process_high' if (params.enable_conda) { diff --git a/modules/nf-core/modules/cellranger/mkref/meta.yml b/modules/nf-core/cellranger/mkref/meta.yml similarity index 100% rename from modules/nf-core/modules/cellranger/mkref/meta.yml rename to modules/nf-core/cellranger/mkref/meta.yml diff --git a/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py b/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py old mode 100644 new mode 100755 index 787bdb7b..da033408 --- a/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py +++ b/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py @@ -1,12 +1,16 @@ #!/usr/bin/env python -import platform -from textwrap import dedent + +"""Provide functions to merge multiple versions.yml files.""" + import yaml +import platform +from textwrap import dedent def _make_versions_html(versions): + """Generate a tabular HTML output of all versions for MultiQC.""" html = [ dedent( """\\ @@ -45,47 +49,53 @@ def _make_versions_html(versions): return "\\n".join(html) -versions_this_module = {} -versions_this_module["${task.process}"] = { - "python": platform.python_version(), - "yaml": yaml.__version__, -} - -with open("$versions") as f: - versions_by_process = yaml.load(f, Loader=yaml.BaseLoader) | versions_this_module - -# aggregate versions by the module name (derived from fully-qualified process name) -versions_by_module = {} -for process, process_versions in versions_by_process.items(): - module = process.split(":")[-1] - try: - if versions_by_module[module] != process_versions: - raise AssertionError( - "We assume that software versions are the same between all modules. " - "If you see this error-message it means you discovered an edge-case " - "and should open an issue in nf-core/tools. " - ) - except KeyError: - versions_by_module[module] = process_versions - -versions_by_module["Workflow"] = { - "Nextflow": "$workflow.nextflow.version", - "$workflow.manifest.name": "$workflow.manifest.version", -} - -versions_mqc = { - "id": "software_versions", - "section_name": "${workflow.manifest.name} Software Versions", - "section_href": "https://github.com/${workflow.manifest.name}", - "plot_type": "html", - "description": "are collected at run time from the software output.", - "data": _make_versions_html(versions_by_module), -} - -with open("software_versions.yml", "w") as f: - yaml.dump(versions_by_module, f, default_flow_style=False) -with open("software_versions_mqc.yml", "w") as f: - yaml.dump(versions_mqc, f, default_flow_style=False) - -with open("versions.yml", "w") as f: - yaml.dump(versions_this_module, f, default_flow_style=False) +def main(): + """Load all version files and generate merged output.""" + versions_this_module = {} + versions_this_module["${task.process}"] = { + "python": platform.python_version(), + "yaml": yaml.__version__, + } + + with open("$versions") as f: + versions_by_process = yaml.load(f, Loader=yaml.BaseLoader) | versions_this_module + + # aggregate versions by the module name (derived from fully-qualified process name) + versions_by_module = {} + for process, process_versions in versions_by_process.items(): + module = process.split(":")[-1] + try: + if versions_by_module[module] != process_versions: + raise AssertionError( + "We assume that software versions are the same between all modules. " + "If you see this error-message it means you discovered an edge-case " + "and should open an issue in nf-core/tools. " + ) + except KeyError: + versions_by_module[module] = process_versions + + versions_by_module["Workflow"] = { + "Nextflow": "$workflow.nextflow.version", + "$workflow.manifest.name": "$workflow.manifest.version", + } + + versions_mqc = { + "id": "software_versions", + "section_name": "${workflow.manifest.name} Software Versions", + "section_href": "https://github.com/${workflow.manifest.name}", + "plot_type": "html", + "description": "are collected at run time from the software output.", + "data": _make_versions_html(versions_by_module), + } + + with open("software_versions.yml", "w") as f: + yaml.dump(versions_by_module, f, default_flow_style=False) + with open("software_versions_mqc.yml", "w") as f: + yaml.dump(versions_mqc, f, default_flow_style=False) + + with open("versions.yml", "w") as f: + yaml.dump(versions_this_module, f, default_flow_style=False) + + +if __name__ == "__main__": + main() diff --git a/modules/nf-core/modules/gffread/main.nf b/modules/nf-core/gffread/main.nf similarity index 100% rename from modules/nf-core/modules/gffread/main.nf rename to modules/nf-core/gffread/main.nf diff --git a/modules/nf-core/modules/gffread/meta.yml b/modules/nf-core/gffread/meta.yml similarity index 100% rename from modules/nf-core/modules/gffread/meta.yml rename to modules/nf-core/gffread/meta.yml diff --git a/modules/nf-core/modules/gunzip/main.nf b/modules/nf-core/gunzip/main.nf similarity index 97% rename from modules/nf-core/modules/gunzip/main.nf rename to modules/nf-core/gunzip/main.nf index 70367049..fa6ba26a 100644 --- a/modules/nf-core/modules/gunzip/main.nf +++ b/modules/nf-core/gunzip/main.nf @@ -1,6 +1,6 @@ process GUNZIP { tag "$archive" - label 'process_low' + label 'process_single' conda (params.enable_conda ? "conda-forge::sed=4.7" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? diff --git a/modules/nf-core/modules/gunzip/meta.yml b/modules/nf-core/gunzip/meta.yml similarity index 100% rename from modules/nf-core/modules/gunzip/meta.yml rename to modules/nf-core/gunzip/meta.yml diff --git a/modules/nf-core/modules/kallistobustools/count/main.nf b/modules/nf-core/kallistobustools/count/main.nf similarity index 100% rename from modules/nf-core/modules/kallistobustools/count/main.nf rename to modules/nf-core/kallistobustools/count/main.nf diff --git a/modules/nf-core/modules/kallistobustools/count/meta.yml b/modules/nf-core/kallistobustools/count/meta.yml similarity index 100% rename from modules/nf-core/modules/kallistobustools/count/meta.yml rename to modules/nf-core/kallistobustools/count/meta.yml diff --git a/modules/nf-core/modules/kallistobustools/ref/main.nf b/modules/nf-core/kallistobustools/ref/main.nf similarity index 100% rename from modules/nf-core/modules/kallistobustools/ref/main.nf rename to modules/nf-core/kallistobustools/ref/main.nf diff --git a/modules/nf-core/modules/kallistobustools/ref/meta.yml b/modules/nf-core/kallistobustools/ref/meta.yml similarity index 100% rename from modules/nf-core/modules/kallistobustools/ref/meta.yml rename to modules/nf-core/kallistobustools/ref/meta.yml diff --git a/modules/nf-core/modules/star/genomegenerate/main.nf b/modules/nf-core/star/genomegenerate/main.nf similarity index 75% rename from modules/nf-core/modules/star/genomegenerate/main.nf rename to modules/nf-core/star/genomegenerate/main.nf index 3c298016..6ec634a1 100644 --- a/modules/nf-core/modules/star/genomegenerate/main.nf +++ b/modules/nf-core/star/genomegenerate/main.nf @@ -65,4 +65,32 @@ process STAR_GENOMEGENERATE { END_VERSIONS """ } + + stub: + """ + mkdir star + touch star/Genome + touch star/Log.out + touch star/SA + touch star/SAindex + touch star/chrLength.txt + touch star/chrName.txt + touch star/chrNameLength.txt + touch star/chrStart.txt + touch star/exonGeTrInfo.tab + touch star/exonInfo.tab + touch star/geneInfo.tab + touch star/genomeParameters.txt + touch star/sjdbInfo.txt + touch star/sjdbList.fromGTF.out.tab + touch star/sjdbList.out.tab + touch star/transcriptInfo.tab + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + star: \$(STAR --version | sed -e "s/STAR_//g") + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + gawk: \$(echo \$(gawk --version 2>&1) | sed 's/^.*GNU Awk //; s/, .*\$//') + END_VERSIONS + """ } diff --git a/modules/nf-core/modules/star/genomegenerate/meta.yml b/modules/nf-core/star/genomegenerate/meta.yml similarity index 100% rename from modules/nf-core/modules/star/genomegenerate/meta.yml rename to modules/nf-core/star/genomegenerate/meta.yml diff --git a/subworkflows/local/alevin.nf b/subworkflows/local/alevin.nf index 7db784ff..0eabc519 100644 --- a/subworkflows/local/alevin.nf +++ b/subworkflows/local/alevin.nf @@ -5,8 +5,8 @@ include { SIMPLEAF_INDEX } from '../../modules/local/simpleaf include { SIMPLEAF_QUANT } from '../../modules/local/simpleaf_quant' /* -- IMPORT NF-CORE MODULES/SUBWORKFLOWS -- */ -include { GUNZIP } from '../../modules/nf-core/modules/gunzip/main' -include { GFFREAD as GFFREAD_TXP2GENE } from '../../modules/nf-core/modules/gffread/main' +include { GUNZIP } from '../../modules/nf-core/gunzip/main' +include { GFFREAD as GFFREAD_TXP2GENE } from '../../modules/nf-core/gffread/main' def multiqc_report = [] diff --git a/subworkflows/local/align_cellranger.nf b/subworkflows/local/align_cellranger.nf index 744215e0..5452dc1e 100644 --- a/subworkflows/local/align_cellranger.nf +++ b/subworkflows/local/align_cellranger.nf @@ -2,9 +2,9 @@ * Alignment with Cellranger */ -include {CELLRANGER_MKGTF} from "../../modules/nf-core/modules/cellranger/mkgtf/main.nf" -include {CELLRANGER_MKREF} from "../../modules/nf-core/modules/cellranger/mkref/main.nf" -include {CELLRANGER_COUNT} from "../../modules/nf-core/modules/cellranger/count/main.nf" +include {CELLRANGER_MKGTF} from "../../modules/nf-core/cellranger/mkgtf/main.nf" +include {CELLRANGER_MKREF} from "../../modules/nf-core/cellranger/mkref/main.nf" +include {CELLRANGER_COUNT} from "../../modules/nf-core/cellranger/count/main.nf" include {MTX_TO_H5AD } from "../../modules/local/mtx_to_h5ad.nf" // Define workflow to subset and index a genome region fasta file diff --git a/subworkflows/local/fastqc.nf b/subworkflows/local/fastqc.nf index 50f55d5b..f18214a1 100644 --- a/subworkflows/local/fastqc.nf +++ b/subworkflows/local/fastqc.nf @@ -1,7 +1,7 @@ // // Check input samplesheet and get read channels // -include { FASTQC } from '../../modules/nf-core/modules/fastqc/main' +include { FASTQC } from '../../modules/nf-core/fastqc/main' workflow FASTQC_CHECK { take: diff --git a/subworkflows/local/kallisto_bustools.nf b/subworkflows/local/kallisto_bustools.nf index bc958b47..1e5318d1 100644 --- a/subworkflows/local/kallisto_bustools.nf +++ b/subworkflows/local/kallisto_bustools.nf @@ -1,10 +1,10 @@ /* -- IMPORT LOCAL MODULES/SUBWORKFLOWS -- */ include { GENE_MAP } from '../../modules/local/gene_map' -include {KALLISTOBUSTOOLS_COUNT } from '../../modules/nf-core/modules/kallistobustools/count/main' +include {KALLISTOBUSTOOLS_COUNT } from '../../modules/nf-core/kallistobustools/count/main' /* -- IMPORT NF-CORE MODULES/SUBWORKFLOWS -- */ -include { GUNZIP } from '../../modules/nf-core/modules/gunzip/main' -include { KALLISTOBUSTOOLS_REF } from '../../modules/nf-core/modules/kallistobustools/ref/main' +include { GUNZIP } from '../../modules/nf-core/gunzip/main' +include { KALLISTOBUSTOOLS_REF } from '../../modules/nf-core/kallistobustools/ref/main' def multiqc_report = [] diff --git a/subworkflows/local/starsolo.nf b/subworkflows/local/starsolo.nf index 2c2f57dd..73a7d55a 100644 --- a/subworkflows/local/starsolo.nf +++ b/subworkflows/local/starsolo.nf @@ -2,8 +2,8 @@ include { STAR_ALIGN } from '../../modules/local/star_align' /* -- IMPORT NF-CORE MODULES/SUBWORKFLOWS -- */ -include { GUNZIP } from '../../modules/nf-core/modules/gunzip/main' -include { STAR_GENOMEGENERATE } from '../../modules/nf-core/modules/star/genomegenerate/main' +include { GUNZIP } from '../../modules/nf-core/gunzip/main' +include { STAR_GENOMEGENERATE } from '../../modules/nf-core/star/genomegenerate/main' def multiqc_report = [] From 84e41062f30a82c68db4b05cf4ce116f448754a2 Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Wed, 5 Oct 2022 11:34:24 +0000 Subject: [PATCH 146/165] Use public multiqc + add module for nf-core dumpsoftware versions --- modules/local/multiqc.nf | 39 --------------------------------------- workflows/scrnaseq.nf | 4 ++-- 2 files changed, 2 insertions(+), 41 deletions(-) delete mode 100644 modules/local/multiqc.nf diff --git a/modules/local/multiqc.nf b/modules/local/multiqc.nf deleted file mode 100644 index f7e745a0..00000000 --- a/modules/local/multiqc.nf +++ /dev/null @@ -1,39 +0,0 @@ -process MULTIQC { - label 'process_medium' - - conda (params.enable_conda ? 'bioconda::multiqc=1.11' : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.11--pyhdfd78af_0' : - 'quay.io/biocontainers/multiqc:1.11--pyhdfd78af_0' }" - - input: - path ch_multiqc_config - path ch_multiqc_custom_config - path software_versions_yaml - path workflow_summary - path ('fastqc/*') - path ("STAR/*") - path ("salmon_alevin/*") - - output: - path "*multiqc_report.html", emit: report - path "*_data" , emit: data - path "*_plots" , optional:true, emit: plots - path "versions.yml" , emit: versions - - script: - def args = task.ext.args ?: '' - def custom_config = params.multiqc_config ? "--config $multiqc_custom_config" : '' - """ - multiqc \\ - -f \\ - $args \\ - $custom_config \\ - . - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - multiqc: \$( multiqc --version | sed -e "s/multiqc, version //g" ) - END_VERSIONS - """ -} diff --git a/workflows/scrnaseq.nf b/workflows/scrnaseq.nf index 9f1df868..8407c6f9 100644 --- a/workflows/scrnaseq.nf +++ b/workflows/scrnaseq.nf @@ -54,8 +54,8 @@ include { MTX_CONVERSION } from "../subworkflows/local/mtx_conversion" // // MODULE: Installed directly from nf-core/modules // -include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/modules/custom/dumpsoftwareversions/main' -include { MULTIQC } from "../modules/local/multiqc" +include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main' +include { MULTIQC } from "../modules/nf-core/multiqc" /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ From 2dc2c0dbd18eb4c3e916d99757f439de147983c5 Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Wed, 5 Oct 2022 11:42:04 +0000 Subject: [PATCH 147/165] Some more fixes for new module structure --- workflows/scrnaseq.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/workflows/scrnaseq.nf b/workflows/scrnaseq.nf index 8407c6f9..6df59c3b 100644 --- a/workflows/scrnaseq.nf +++ b/workflows/scrnaseq.nf @@ -55,7 +55,7 @@ include { MTX_CONVERSION } from "../subworkflows/local/mtx_conversion" // MODULE: Installed directly from nf-core/modules // include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main' -include { MULTIQC } from "../modules/nf-core/multiqc" +include { MULTIQC } from "../modules/nf-core/multiqc/main" /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -204,7 +204,7 @@ workflow SCRNASEQ { ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml')) ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect()) - ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(FASTQC_CHECK.out.fastqc_zip.collect{it[1]}.ifEmpty([])) ch_multiqc_files = ch_multiqc_files.mix(ch_multiqc_alevin) ch_multiqc_files = ch_multiqc_files.mix(ch_multiqc_star) From eb3a09f30fae82110b555d97bdd43c34c5969bc4 Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Wed, 5 Oct 2022 11:48:25 +0000 Subject: [PATCH 148/165] Add more stuff to get lint warnings down --- modules/local/alevinqc.nf | 3 +++ modules/local/concat_h5ad.nf | 3 +++ modules/local/gene_map.nf | 5 +++++ modules/local/gffread_transcriptome.nf | 3 +++ modules/local/mtx_to_h5ad.nf | 3 +++ modules/local/mtx_to_seurat.nf | 3 +++ modules/local/samplesheet_check.nf | 4 ++++ modules/local/simpleaf_index.nf | 2 +- modules/local/simpleaf_quant.nf | 3 +++ modules/local/star_align.nf | 3 +++ 10 files changed, 31 insertions(+), 1 deletion(-) diff --git a/modules/local/alevinqc.nf b/modules/local/alevinqc.nf index 4e1a7d7e..0d9ef0b0 100644 --- a/modules/local/alevinqc.nf +++ b/modules/local/alevinqc.nf @@ -14,6 +14,9 @@ process ALEVINQC { tuple val(meta), path("alevin_report_${meta.id}.html"), emit: report path "versions.yml", emit: versions + when: + task.ext.when == null || task.ext.when + script: def prefix = task.ext.prefix ?: "${meta.id}" """ diff --git a/modules/local/concat_h5ad.nf b/modules/local/concat_h5ad.nf index 3bcf1755..c753bce8 100644 --- a/modules/local/concat_h5ad.nf +++ b/modules/local/concat_h5ad.nf @@ -13,6 +13,9 @@ process CONCAT_H5AD { output: path "*.h5ad", emit: h5ad + when: + task.ext.when == null || task.ext.when + script: """ concat_h5ad.py \\ diff --git a/modules/local/gene_map.nf b/modules/local/gene_map.nf index af1acee4..9a2a1ad3 100644 --- a/modules/local/gene_map.nf +++ b/modules/local/gene_map.nf @@ -3,6 +3,8 @@ */ process GENE_MAP { tag "$gtf" + label 'process_low' + conda (params.enable_conda ? "conda-forge::python=3.8.3" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? @@ -15,6 +17,9 @@ process GENE_MAP { output: path "transcripts_to_genes.txt" , emit: gene_map + when: + task.ext.when == null || task.ext.when + script: if("${gtf}".endsWith('.gz')){ name = "${gtf.baseName}" diff --git a/modules/local/gffread_transcriptome.nf b/modules/local/gffread_transcriptome.nf index 77a17440..6e2a9ba4 100644 --- a/modules/local/gffread_transcriptome.nf +++ b/modules/local/gffread_transcriptome.nf @@ -15,6 +15,9 @@ process GFFREAD_TRANSCRIPTOME { path "${genome_fasta}.transcriptome.fa", emit: transcriptome_extracted path "versions.yml" , emit: versions + when: + task.ext.when == null || task.ext.when + script: """ gffread -F $gtf -w "${genome_fasta}.transcriptome.fa" -g $genome_fasta diff --git a/modules/local/mtx_to_h5ad.nf b/modules/local/mtx_to_h5ad.nf index cf753f30..10bf0c88 100644 --- a/modules/local/mtx_to_h5ad.nf +++ b/modules/local/mtx_to_h5ad.nf @@ -15,6 +15,9 @@ process MTX_TO_H5AD { output: path "*.h5ad", emit: h5ad + when: + task.ext.when == null || task.ext.when + script: // def file paths for aligners (except cellranger) if (params.aligner == 'kallisto') { diff --git a/modules/local/mtx_to_seurat.nf b/modules/local/mtx_to_seurat.nf index 3d834a2f..54208fdd 100644 --- a/modules/local/mtx_to_seurat.nf +++ b/modules/local/mtx_to_seurat.nf @@ -15,6 +15,9 @@ process MTX_TO_SEURAT { output: path "*.rds", emit: seuratObjects + when: + task.ext.when == null || task.ext.when + script: def aligner = params.aligner if (params.aligner == "cellranger") { diff --git a/modules/local/samplesheet_check.nf b/modules/local/samplesheet_check.nf index f0a0b1f9..06595615 100644 --- a/modules/local/samplesheet_check.nf +++ b/modules/local/samplesheet_check.nf @@ -1,5 +1,6 @@ process SAMPLESHEET_CHECK { tag "$samplesheet" + label 'process_low' conda (params.enable_conda ? "conda-forge::python=3.8.3" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? @@ -13,6 +14,9 @@ process SAMPLESHEET_CHECK { path '*.csv' , emit: csv path "versions.yml", emit: versions + when: + task.ext.when == null || task.ext.when + script: // This script is bundled with the pipeline, in nf-core/scrnaseq/bin/ """ check_samplesheet.py \\ diff --git a/modules/local/simpleaf_index.nf b/modules/local/simpleaf_index.nf index 939e294d..f8f35c43 100644 --- a/modules/local/simpleaf_index.nf +++ b/modules/local/simpleaf_index.nf @@ -16,7 +16,7 @@ process SIMPLEAF_INDEX { path "salmon/index" , emit: index path "salmon/ref/*_t2g_3col.tsv" , emit: transcript_tsv path "versions.yml" , emit: versions - path "salmon" + path "salmon" , emit: salmon when: task.ext.when == null || task.ext.when diff --git a/modules/local/simpleaf_quant.nf b/modules/local/simpleaf_quant.nf index 31dc26c4..8fb6139b 100644 --- a/modules/local/simpleaf_quant.nf +++ b/modules/local/simpleaf_quant.nf @@ -22,6 +22,9 @@ process SIMPLEAF_QUANT { tuple val(meta), path("*_alevin_results"), emit: alevin_results path "versions.yml" , emit: versions + when: + task.ext.when == null || task.ext.when + script: def args = task.ext.args ?: '' def args_list = args.tokenize() diff --git a/modules/local/star_align.nf b/modules/local/star_align.nf index c489d0b8..36f26bfe 100644 --- a/modules/local/star_align.nf +++ b/modules/local/star_align.nf @@ -33,6 +33,9 @@ process STAR_ALIGN { tuple val(meta), path('*fastq.gz') , optional:true, emit: fastq tuple val(meta), path('*.tab') , optional:true, emit: tab + when: + task.ext.when == null || task.ext.when + script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" From 71423454e4e6db3ee8bcfcdf9a8efe0546cafdff Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Wed, 5 Oct 2022 12:03:45 +0000 Subject: [PATCH 149/165] Added colelct and ifEmpty construct --- workflows/scrnaseq.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/workflows/scrnaseq.nf b/workflows/scrnaseq.nf index 6df59c3b..93d2a3ca 100644 --- a/workflows/scrnaseq.nf +++ b/workflows/scrnaseq.nf @@ -205,8 +205,8 @@ workflow SCRNASEQ { ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml')) ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect()) ch_multiqc_files = ch_multiqc_files.mix(FASTQC_CHECK.out.fastqc_zip.collect{it[1]}.ifEmpty([])) - ch_multiqc_files = ch_multiqc_files.mix(ch_multiqc_alevin) - ch_multiqc_files = ch_multiqc_files.mix(ch_multiqc_star) + ch_multiqc_files = ch_multiqc_files.mix(ch_multiqc_alevin.collect().ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(ch_multiqc_star.collect().ifEmpty([])) MULTIQC ( ch_multiqc_files.collect(), From e20e37993b0e293202cbb69306ec7a13cd178ef8 Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Wed, 5 Oct 2022 12:08:58 +0000 Subject: [PATCH 150/165] Fix mixing stuff --- workflows/scrnaseq.nf | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/workflows/scrnaseq.nf b/workflows/scrnaseq.nf index 93d2a3ca..a2421246 100644 --- a/workflows/scrnaseq.nf +++ b/workflows/scrnaseq.nf @@ -75,8 +75,8 @@ ch_genome_fasta = params.genome_fasta ? file(params.genome_fasta) : [] ch_gtf = params.gtf ? file(params.gtf) : [] ch_transcript_fasta = params.transcript_fasta ? file(params.transcript_fasta): [] ch_txp2gene = params.txp2gene ? file(params.txp2gene) : [] -ch_multiqc_alevin = [] -ch_multiqc_star = [] +ch_multiqc_alevin = Channel.empty() +ch_multiqc_star = Channel.empty() if (params.barcode_whitelist) { ch_barcode_whitelist = file(params.barcode_whitelist) } else if (params.protocol.contains("10X")) { @@ -205,8 +205,8 @@ workflow SCRNASEQ { ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml')) ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect()) ch_multiqc_files = ch_multiqc_files.mix(FASTQC_CHECK.out.fastqc_zip.collect{it[1]}.ifEmpty([])) - ch_multiqc_files = ch_multiqc_files.mix(ch_multiqc_alevin.collect().ifEmpty([])) - ch_multiqc_files = ch_multiqc_files.mix(ch_multiqc_star.collect().ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(ch_multiqc_alevin.collect{it[1]}.ifEmpty([])), + ch_multiqc_files = ch_multiqc_files.mix(ch_multiqc_star.collect{it[1]}.ifEmpty([])), MULTIQC ( ch_multiqc_files.collect(), From 298233d17d428be262ff4497f0730cb06e1a52e4 Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Wed, 5 Oct 2022 12:43:17 +0000 Subject: [PATCH 151/165] Fix black --- bin/check_samplesheet.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py index 03541ad4..8551a45e 100755 --- a/bin/check_samplesheet.py +++ b/bin/check_samplesheet.py @@ -140,6 +140,7 @@ def read_head(handle, num_lines=10): lines.append(line) return "".join(lines) + def print_error(error, context="Line", context_str=""): error_str = f"ERROR: Please check samplesheet -> {error}" if context != "" and context_str != "": @@ -147,6 +148,7 @@ def print_error(error, context="Line", context_str=""): print(error_str) sys.exit(1) + def sniff_format(handle): """ Detect the tabular format. From ccd798c3605c244e0e87f7a69c1dfdc580585d6b Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Wed, 5 Oct 2022 18:33:38 +0000 Subject: [PATCH 152/165] Remove diff file --- .../modules/salmon/index/salmon-index.diff | 36 ------------------- 1 file changed, 36 deletions(-) delete mode 100644 modules/nf-core/modules/salmon/index/salmon-index.diff diff --git a/modules/nf-core/modules/salmon/index/salmon-index.diff b/modules/nf-core/modules/salmon/index/salmon-index.diff deleted file mode 100644 index 87f976cd..00000000 --- a/modules/nf-core/modules/salmon/index/salmon-index.diff +++ /dev/null @@ -1,36 +0,0 @@ -Changes in module 'nf-core/modules/salmon/index' ---- modules/nf-core/modules/salmon/index/main.nf -+++ modules/nf-core/modules/salmon/index/main.nf -@@ -20,21 +20,28 @@ - - script: - def args = task.ext.args ?: '' -+ def kmer_argmatch = args =~ /\-k *(\d+)/ -+ def k = kmer_argmatch ? kmer_argmatch[0][1] : 31 - def get_decoy_ids = "grep '^>' $genome_fasta | cut -d ' ' -f 1 > decoys.txt" - def gentrome = "gentrome.fa" -+ def maybe_unzip = "cat" - if (genome_fasta.endsWith('.gz')) { - get_decoy_ids = "grep '^>' <(gunzip -c $genome_fasta) | cut -d ' ' -f 1 > decoys.txt" - gentrome = "gentrome.fa.gz" -+ maybe_unzip = "gunzip -c" - } - """ - $get_decoy_ids - sed -i.bak -e 's/>//g' decoys.txt -- cat $transcript_fasta $genome_fasta > $gentrome -+ cat $transcript_fasta $genome_fasta \\ -+ | $maybe_unzip \\ -+ | awk '!/^>/ { next } { getline seq } length(seq) >= $k { print \$0 "\\n" seq }' \\ -+ | gzip -c > gentrome.filtered.fasta.gz - - salmon \\ - index \\ - --threads $task.cpus \\ -- -t $gentrome \\ -+ -t gentrome.filtered.fasta.gz \\ - -d decoys.txt \\ - $args \\ - -i salmon - -************************************************************ From 8dd62e8b9f6c213f33d7441729ac67b5a28082d9 Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Wed, 5 Oct 2022 18:40:35 +0000 Subject: [PATCH 153/165] Chore --- modules/local/gene_map.nf | 1 - modules/local/gffread_transcriptome.nf | 6 +++--- modules/local/star_align.nf | 6 +++--- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/modules/local/gene_map.nf b/modules/local/gene_map.nf index 9a2a1ad3..beca82a1 100644 --- a/modules/local/gene_map.nf +++ b/modules/local/gene_map.nf @@ -5,7 +5,6 @@ process GENE_MAP { tag "$gtf" label 'process_low' - conda (params.enable_conda ? "conda-forge::python=3.8.3" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/python:3.8.3' : diff --git a/modules/local/gffread_transcriptome.nf b/modules/local/gffread_transcriptome.nf index 6e2a9ba4..04d6b412 100644 --- a/modules/local/gffread_transcriptome.nf +++ b/modules/local/gffread_transcriptome.nf @@ -2,10 +2,10 @@ process GFFREAD_TRANSCRIPTOME { tag "${genome_fasta}" label 'process_low' - conda (params.enable_conda ? "bioconda::gffread=0.12.1" : null) + conda (params.enable_conda ? "bioconda::gffread=0.12.7" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gffread:0.12.1--h2e03b76_1' : - 'quay.io/biocontainers/gffread:0.12.1--h2e03b76_1' }" + 'https://depot.galaxyproject.org/singularity/gffread:0.12.7--hd03093a_1' : + 'quay.io/biocontainers/gffread:0.12.7--hd03093a_1' }" input: path genome_fasta diff --git a/modules/local/star_align.nf b/modules/local/star_align.nf index 36f26bfe..da8246a4 100644 --- a/modules/local/star_align.nf +++ b/modules/local/star_align.nf @@ -2,10 +2,10 @@ process STAR_ALIGN { tag "$meta.id" label 'process_high' - conda (params.enable_conda ? 'bioconda::star=2.7.8a' : null) + conda (params.enable_conda ? 'bioconda::star=2.7.10a' : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/star:2.7.8a--h9ee0642_1' : - 'quay.io/biocontainers/star:2.7.8a--h9ee0642_1' }" + 'https://depot.galaxyproject.org/singularity/star:2.7.10a--h9ee0642_0' : + 'quay.io/biocontainers/star:2.7.10a--h9ee0642_0' }" input: // From 55fab735603f6cd88bdb9886ec6a2712b9db274e Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Wed, 5 Oct 2022 18:41:11 +0000 Subject: [PATCH 154/165] Gene Filter label added --- modules/local/gtf_gene_filter.nf | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/local/gtf_gene_filter.nf b/modules/local/gtf_gene_filter.nf index 7f1a6aa1..82a0e092 100644 --- a/modules/local/gtf_gene_filter.nf +++ b/modules/local/gtf_gene_filter.nf @@ -1,5 +1,6 @@ process GTF_GENE_FILTER { tag "$fasta" + label 'process_low' conda (params.enable_conda ? "conda-forge::python=3.9.5" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? From 3d64158e310488cc91b52a55d2bc2f2305d811aa Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Thu, 6 Oct 2022 08:40:29 +0000 Subject: [PATCH 155/165] Bump to 2.1.0 --- CHANGELOG.md | 5 +++-- nextflow.config | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a3fd4dab..7faf2e7a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,15 +3,16 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## v2.0.1dev +## v2.1.0 - 2022-10-06 "Green Mercury Siberian Husky" +- Alevin workflow updated to use Alevin-Fry via simpleaf - thanks to @rob-p for supporting this and @fmalmeida implementing the support ### Fixes - Fixed Kallistobustools workflow [#123](https://github.com/nf-core/scrnaseq/issues/123) by upgrading to nf-core/modules module - Fixed matrix conversion error when running STAR with --soloFeatures GeneFull [#135](https://github.com/nf-core/scrnaseq/pull/135) - Fixed seurat matrix conversion error when running with conda profile [#136](https://github.com/nf-core/scrnaseq/pull/136) - Fixed Kallistobustools module [#116](https://github.com/nf-core/scrnaseq/issues/116). By updating nf-core module and making sure conversion modules take into account the different outputs produced by kallisto standard and non-standard workflows. -- Updated pipeline template to [nf-core/tools 2.5.1](https://github.com/nf-core/tools/releases/tag/2.5.1) +- Updated pipeline template to [nf-core/tools 2.6](https://github.com/nf-core/tools/releases/tag/2.6) ## v2.0.0 - 2022-06-17 "Gray Nickel Beagle" diff --git a/nextflow.config b/nextflow.config index 06b271a6..3c51af8c 100644 --- a/nextflow.config +++ b/nextflow.config @@ -217,7 +217,7 @@ manifest { description = 'Pipeline for processing 10x Genomics single cell rnaseq data' mainScript = 'main.nf' nextflowVersion = '!>=21.10.3' - version = '2.0.1dev' + version = '2.1.0' doi = '' } From 8adc51c1433cd9baa405690e40b65b67c508699f Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Thu, 6 Oct 2022 08:40:37 +0000 Subject: [PATCH 156/165] Add Versions to Seurat --- modules/local/mtx_to_seurat.nf | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/modules/local/mtx_to_seurat.nf b/modules/local/mtx_to_seurat.nf index 1c316ab8..0c3260e3 100644 --- a/modules/local/mtx_to_seurat.nf +++ b/modules/local/mtx_to_seurat.nf @@ -49,6 +49,15 @@ process MTX_TO_SEURAT { ${meta.id}_\${input_type}_matrix.rds \\ ${aligner} done + + yaml::write_yaml( + list( + '${task.process}'=list( + 'Seurat' = paste(packageVersion('Seurat'), collapse='.') + ) + ), + "versions.yml" + ) """ else @@ -59,6 +68,15 @@ process MTX_TO_SEURAT { $features \\ ${meta.id}_matrix.rds \\ ${aligner} + + yaml::write_yaml( + list( + '${task.process}'=list( + 'Seurat' = paste(packageVersion('Seurat'), collapse='.') + ) + ), + "versions.yml" + ) """ stub: From d4eba286e1137038cb6c54f19e16fdd4389aac79 Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Thu, 6 Oct 2022 08:48:22 +0000 Subject: [PATCH 157/165] Add versions to seurat --- modules/local/mtx_to_seurat.nf | 1 + subworkflows/local/mtx_conversion.nf | 3 +++ workflows/scrnaseq.nf | 3 +++ 3 files changed, 7 insertions(+) diff --git a/modules/local/mtx_to_seurat.nf b/modules/local/mtx_to_seurat.nf index 0c3260e3..82e45594 100644 --- a/modules/local/mtx_to_seurat.nf +++ b/modules/local/mtx_to_seurat.nf @@ -14,6 +14,7 @@ process MTX_TO_SEURAT { output: path "*.rds", emit: seuratObjects + path "versions.yml", emit: versions when: task.ext.when == null || task.ext.when diff --git a/subworkflows/local/mtx_conversion.nf b/subworkflows/local/mtx_conversion.nf index 731842c8..2f0b3887 100644 --- a/subworkflows/local/mtx_conversion.nf +++ b/subworkflows/local/mtx_conversion.nf @@ -32,4 +32,7 @@ workflow MTX_CONVERSION { mtx_matrices ) + emit: + MTX_TO_SEURAT.out.versions + } diff --git a/workflows/scrnaseq.nf b/workflows/scrnaseq.nf index 64aba674..6dae8eb9 100644 --- a/workflows/scrnaseq.nf +++ b/workflows/scrnaseq.nf @@ -188,6 +188,9 @@ workflow SCRNASEQ { ch_input ) + //Add Versions from MTX Conversion workflow too + ch_versions.mix(MTX_CONVERSION.out.versions) + // collect software versions CUSTOM_DUMPSOFTWAREVERSIONS ( ch_versions.unique().collectFile(name: 'collated_versions.yml') From d8279f842504b0c92dfcb74699d904d67a7c6ee9 Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Thu, 6 Oct 2022 08:50:32 +0000 Subject: [PATCH 158/165] Add DOI to manifest --- nextflow.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nextflow.config b/nextflow.config index 3c51af8c..1ca66cae 100644 --- a/nextflow.config +++ b/nextflow.config @@ -218,7 +218,7 @@ manifest { mainScript = 'main.nf' nextflowVersion = '!>=21.10.3' version = '2.1.0' - doi = '' + doi = '10.5281/zenodo.3568187' } // Load modules.config for DSL2 module specific options From cb9e21c364746646402c4671e9b2ce542d760916 Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Thu, 6 Oct 2022 08:53:39 +0000 Subject: [PATCH 159/165] Remove TODO --- assets/methods_description_template.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/assets/methods_description_template.yml b/assets/methods_description_template.yml index 1e84fd63..1be6ed21 100644 --- a/assets/methods_description_template.yml +++ b/assets/methods_description_template.yml @@ -3,7 +3,6 @@ description: "Suggested text and references to use when describing pipeline usag section_name: "nf-core/scrnaseq Methods Description" section_href: "https://github.com/nf-core/scrnaseq" plot_type: "html" -## TODO nf-core: Update the HTML below to your prefered methods description, e.g. add publication citation for this pipeline ## You inject any metadata in the Nextflow '${workflow}' object data: |

    Methods

    From 4c9f99b9dca6c215b1372dac2c8c1fc053355d10 Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Thu, 6 Oct 2022 08:56:00 +0000 Subject: [PATCH 160/165] Better todo + small fix for versiosn --- subworkflows/local/mtx_conversion.nf | 5 ++++- workflows/scrnaseq.nf | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/subworkflows/local/mtx_conversion.nf b/subworkflows/local/mtx_conversion.nf index 2f0b3887..924dc836 100644 --- a/subworkflows/local/mtx_conversion.nf +++ b/subworkflows/local/mtx_conversion.nf @@ -32,7 +32,10 @@ workflow MTX_CONVERSION { mtx_matrices ) + //TODO CONCAT h5ad and MTX to h5ad should also have versions.yaml output + ch_version = MTX_TO_SEURAT.out.versions + emit: - MTX_TO_SEURAT.out.versions + ch_versions } diff --git a/workflows/scrnaseq.nf b/workflows/scrnaseq.nf index 6dae8eb9..2f469526 100644 --- a/workflows/scrnaseq.nf +++ b/workflows/scrnaseq.nf @@ -189,7 +189,7 @@ workflow SCRNASEQ { ) //Add Versions from MTX Conversion workflow too - ch_versions.mix(MTX_CONVERSION.out.versions) + ch_versions.mix(MTX_CONVERSION.out.ch_versions) // collect software versions CUSTOM_DUMPSOFTWAREVERSIONS ( From b6d0d7596f50139af14b284cc98670f1ab22c87c Mon Sep 17 00:00:00 2001 From: nf-core-bot Date: Thu, 6 Oct 2022 08:59:15 +0000 Subject: [PATCH 161/165] [automated] Fix linting with Prettier --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7faf2e7a..485ee9f5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## v2.1.0 - 2022-10-06 "Green Mercury Siberian Husky" - Alevin workflow updated to use Alevin-Fry via simpleaf - thanks to @rob-p for supporting this and @fmalmeida implementing the support + ### Fixes - Fixed Kallistobustools workflow [#123](https://github.com/nf-core/scrnaseq/issues/123) by upgrading to nf-core/modules module From a182c389bfb1a7c36fbf2ab35bc7f26fc041df1e Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Thu, 6 Oct 2022 09:02:03 +0000 Subject: [PATCH 162/165] Small fix for versions --- modules/local/mtx_to_seurat.nf | 2 +- subworkflows/local/mtx_conversion.nf | 48 ++++++++++++++-------------- 2 files changed, 25 insertions(+), 25 deletions(-) diff --git a/modules/local/mtx_to_seurat.nf b/modules/local/mtx_to_seurat.nf index 82e45594..d3053b83 100644 --- a/modules/local/mtx_to_seurat.nf +++ b/modules/local/mtx_to_seurat.nf @@ -14,7 +14,7 @@ process MTX_TO_SEURAT { output: path "*.rds", emit: seuratObjects - path "versions.yml", emit: versions + path "versions.yml", emit: versions when: task.ext.when == null || task.ext.when diff --git a/subworkflows/local/mtx_conversion.nf b/subworkflows/local/mtx_conversion.nf index 924dc836..c1be4c70 100644 --- a/subworkflows/local/mtx_conversion.nf +++ b/subworkflows/local/mtx_conversion.nf @@ -10,30 +10,30 @@ workflow MTX_CONVERSION { samplesheet main: - // - // Convert matrix do h5ad - // - MTX_TO_H5AD ( - mtx_matrices - ) - - // - // Concat sample-specific h5ad in one - // - CONCAT_H5AD ( - MTX_TO_H5AD.out.h5ad.collect(), // gather all sample-specific files - samplesheet - ) - - // - // Convert matrix do seurat - // - MTX_TO_SEURAT ( - mtx_matrices - ) - - //TODO CONCAT h5ad and MTX to h5ad should also have versions.yaml output - ch_version = MTX_TO_SEURAT.out.versions + ch_versions = Channel.empty() + // Convert matrix do h5ad + // + MTX_TO_H5AD ( + mtx_matrices + ) + + // + // Concat sample-specific h5ad in one + // + CONCAT_H5AD ( + MTX_TO_H5AD.out.h5ad.collect(), // gather all sample-specific files + samplesheet + ) + + // + // Convert matrix do seurat + // + MTX_TO_SEURAT ( + mtx_matrices + ) + + //TODO CONCAT h5ad and MTX to h5ad should also have versions.yaml output + ch_version = ch_versions.mix(MTX_TO_SEURAT.out.versions) emit: ch_versions From e4f1460c7587862e38fec9d7574ebff64ab34794 Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Thu, 6 Oct 2022 09:07:41 +0000 Subject: [PATCH 163/165] [skip ci] Add Rob to Readme as contributor --- README.md | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 85bb84e6..5449c3ba 100644 --- a/README.md +++ b/README.md @@ -22,14 +22,10 @@ The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool On release, automated continuous integration tests run the pipeline on a full-sized dataset on the AWS cloud infrastructure. This ensures that the pipeline runs on AWS, has sensible resource allocation defaults set to run on real-world datasets, and permits the persistent storage of results to benchmark between pipeline releases and other analysis sources. The results obtained from the full-sized test can be viewed on the [nf-core website](https://nf-co.re/scrnaseq/results). -- Alevin + AlevinQC -- STARSolo -- Kallisto + BUStools -- Cellranger This is a community effort in building a pipeline capable to support: -- Alevin + AlevinQC +- Alevin-Fry + AlevinQC - STARSolo - Kallisto + BUStools - Cellranger @@ -71,6 +67,7 @@ We thank the following people for their extensive assistance in the development - @KevinMenden - @FloWuenne +- @rob-p ## Contributions and Support From 7ad294a9c8fb1e062a72dfb44e51348485d661e2 Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Thu, 6 Oct 2022 09:08:43 +0000 Subject: [PATCH 164/165] [skip ci] prettier --- README.md | 1 - 1 file changed, 1 deletion(-) diff --git a/README.md b/README.md index 5449c3ba..562e8acd 100644 --- a/README.md +++ b/README.md @@ -22,7 +22,6 @@ The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool On release, automated continuous integration tests run the pipeline on a full-sized dataset on the AWS cloud infrastructure. This ensures that the pipeline runs on AWS, has sensible resource allocation defaults set to run on real-world datasets, and permits the persistent storage of results to benchmark between pipeline releases and other analysis sources. The results obtained from the full-sized test can be viewed on the [nf-core website](https://nf-co.re/scrnaseq/results). - This is a community effort in building a pipeline capable to support: - Alevin-Fry + AlevinQC From c2bd430d1f6ee4153b8629bf600d9c81ed0385cb Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Thu, 6 Oct 2022 09:22:02 +0000 Subject: [PATCH 165/165] Move seurat to R script --- bin/mtx_to_seurat.R | 10 ++++++++++ modules/local/mtx_to_seurat.nf | 18 ------------------ 2 files changed, 10 insertions(+), 18 deletions(-) diff --git a/bin/mtx_to_seurat.R b/bin/mtx_to_seurat.R index f4ef6b8e..c5bada16 100755 --- a/bin/mtx_to_seurat.R +++ b/bin/mtx_to_seurat.R @@ -23,3 +23,13 @@ if(aligner %in% c("kallisto", "alevin")) { seurat.object <- CreateSeuratObject(counts = expression.matrix) saveRDS(seurat.object, file = out.file) + + +yaml::write_yaml( +list( + 'MTX_TO_SEURAT'=list( + 'Seurat' = paste(packageVersion('Seurat'), collapse='.') + ) +), +"versions.yml" +) diff --git a/modules/local/mtx_to_seurat.nf b/modules/local/mtx_to_seurat.nf index d3053b83..fc452bcf 100644 --- a/modules/local/mtx_to_seurat.nf +++ b/modules/local/mtx_to_seurat.nf @@ -50,15 +50,6 @@ process MTX_TO_SEURAT { ${meta.id}_\${input_type}_matrix.rds \\ ${aligner} done - - yaml::write_yaml( - list( - '${task.process}'=list( - 'Seurat' = paste(packageVersion('Seurat'), collapse='.') - ) - ), - "versions.yml" - ) """ else @@ -69,15 +60,6 @@ process MTX_TO_SEURAT { $features \\ ${meta.id}_matrix.rds \\ ${aligner} - - yaml::write_yaml( - list( - '${task.process}'=list( - 'Seurat' = paste(packageVersion('Seurat'), collapse='.') - ) - ), - "versions.yml" - ) """ stub: