From 1dbfc3d3e0a67605ddada87da5db2555f80ad3b1 Mon Sep 17 00:00:00 2001
From: Felipe Marques de Almeida <almeidafmarques@gmail.com>
Date: Fri, 17 Jun 2022 09:17:19 +0200
Subject: [PATCH 001/165] Create mtx_to_h5ad.py

Script to convert matrix
---
 bin/mtx_to_h5ad.py | 39 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 39 insertions(+)
 create mode 100755 bin/mtx_to_h5ad.py

diff --git a/bin/mtx_to_h5ad.py b/bin/mtx_to_h5ad.py
new file mode 100755
index 00000000..5055885c
--- /dev/null
+++ b/bin/mtx_to_h5ad.py
@@ -0,0 +1,39 @@
+#!/usr/bin/env python3
+import scanpy as sc
+import argparse
+
+def mtx_to_adata(
+    mtx_file: str, barcode_file: str, feature_file: str, verbose: bool = False
+):
+
+    if verbose:
+        print("Reading in {}".format(mtx_file))
+
+    adata = sc.read_mtx(mtx_file)
+    adata.obs_names = pd.read_csv(barcode_file, header=None)[0].values
+    adata.var_names = pd.read_csv(feature_file, header=None)[0].values
+
+    return adata
+
+
+if __name__ == "__main__":
+
+    parser = argparse.ArgumentParser(description="Converts mtx output to h5ad.")
+
+    parser.add_argument("-m", "--mtx", dest="mtx", help="Path to mtx file.")
+    parser.add_argument(
+        "-v", "--verbose", dest="verbose", help="Toggle verbose messages", default=False
+    )
+    parser.add_argument("-f", "--feature", dest="feature", help="Path to feature file.")
+    parser.add_argument("-b", "--barcode", dest="barcode", help="Path to barcode file.")
+    parser.add_argument("-o", "--out", dest="out", help="Output path.")
+
+    args = vars(parser.parse_args())
+
+    adata = mtx_to_adata(
+        args["mtx"], args["barcode"], args["feature"], verbose=args["verbose"]
+    )
+
+    adata.write_h5ad(args["out"])
+
+    print("Wrote h5ad file to {}".format(args["out"]))
\ No newline at end of file

From 733ef50acd2971a6069e0f22945fdee24f058941 Mon Sep 17 00:00:00 2001
From: Felipe Marques de Almeida <almeidafmarques@gmail.com>
Date: Fri, 17 Jun 2022 09:56:22 +0200
Subject: [PATCH 002/165] adding convertion module

---
 modules/local/mtx_to_h5ad.nf           | 25 +++++++++++++++++++++++++
 subworkflows/local/align_cellranger.nf |  6 ++++++
 2 files changed, 31 insertions(+)
 create mode 100644 modules/local/mtx_to_h5ad.nf

diff --git a/modules/local/mtx_to_h5ad.nf b/modules/local/mtx_to_h5ad.nf
new file mode 100644
index 00000000..995005c8
--- /dev/null
+++ b/modules/local/mtx_to_h5ad.nf
@@ -0,0 +1,25 @@
+process MTX_TO_H5AD {
+    tag "$prefix"
+    label 'process_medium'
+
+    conda (params.enable_conda ? "conda-forge::scanpy conda-forge::python-igraph conda-forge::leidenalg" : null)
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'docker://gcfntnu/scanpy:1.7.0' :
+        'gcfntnu/scanpy:1.7.0' }"
+
+    input:
+    path cellranger_outdir
+
+    output:
+    path "matrix.h5ad", emit: h5ad
+
+    script:
+    def prefix = cellranger_outdir.getName().toString()
+    """
+    mtx_to_h5ad.py \\
+        -m \$(find ${cellranger_outdir} -wholename "*filtered_feature_bc_matrix/matrix.mtx.gz") \\
+        -f \$(find ${cellranger_outdir} -wholename "*filtered_feature_bc_matrix/features.tsv.gz") \\
+        -b \$(find ${cellranger_outdir} -wholename "*filtered_feature_bc_matrix/barcodes.tsv.gz") \\
+        -o matrix.h5ad
+    """
+}
diff --git a/subworkflows/local/align_cellranger.nf b/subworkflows/local/align_cellranger.nf
index 7513935d..05bf0e4d 100644
--- a/subworkflows/local/align_cellranger.nf
+++ b/subworkflows/local/align_cellranger.nf
@@ -5,6 +5,7 @@
 include {CELLRANGER_MKGTF} from "../../modules/nf-core/modules/cellranger/mkgtf/main.nf"
 include {CELLRANGER_MKREF} from "../../modules/nf-core/modules/cellranger/mkref/main.nf"
 include {CELLRANGER_COUNT} from "../../modules/nf-core/modules/cellranger/count/main.nf"
+include {MTX_TO_H5AD     } from "../../modules/local/mtx_to_h5ad.nf"
 
 // Define workflow to subset and index a genome region fasta file
 workflow CELLRANGER_ALIGN {
@@ -40,6 +41,11 @@ workflow CELLRANGER_ALIGN {
         )
         ch_versions = ch_versions.mix(CELLRANGER_COUNT.out.versions)
 
+        // Convert matrix do h5ad
+        MTX_TO_H5AD (
+            CELLRANGER_COUNT.out.outs
+        )
+
     emit:
         ch_versions
         cellranger_out  = CELLRANGER_COUNT.out.outs

From 7c1ba78dcaf58f76b51f27399a0f3164ad7d4f46 Mon Sep 17 00:00:00 2001
From: Felipe Marques de Almeida <almeidafmarques@gmail.com>
Date: Fri, 17 Jun 2022 10:25:29 +0200
Subject: [PATCH 003/165] Update modules.config

add publish dir directive to new module
---
 conf/modules.config | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/conf/modules.config b/conf/modules.config
index 5c78b365..c1d00e4b 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -55,6 +55,12 @@ if(params.aligner == "cellranger") {
                 mode: params.publish_dir_mode
             ]
         }
+        withName: MTX_TO_H5AD {
+            publishDir = [
+                path: "${params.outdir}/cellranger/mtx_to_h5ad",
+                mode: params.publish_dir_mode
+            ]
+        }
     }
 }
 

From e06d4888cf0262598a1f969d5543150d9a511abc Mon Sep 17 00:00:00 2001
From: Felipe Marques de Almeida <almeidafmarques@gmail.com>
Date: Fri, 17 Jun 2022 13:30:04 +0200
Subject: [PATCH 004/165] Update mtx_to_h5ad.py

adding pandas import
---
 bin/mtx_to_h5ad.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/bin/mtx_to_h5ad.py b/bin/mtx_to_h5ad.py
index 5055885c..0868eb37 100755
--- a/bin/mtx_to_h5ad.py
+++ b/bin/mtx_to_h5ad.py
@@ -1,5 +1,6 @@
 #!/usr/bin/env python3
 import scanpy as sc
+import pandas as pd
 import argparse
 
 def mtx_to_adata(

From 336ea7941ffe6546fe1a1cff8564a016fe1c67fd Mon Sep 17 00:00:00 2001
From: Alexander Peltzer <alexander.peltzer@boehringer-ingelheim.com>
Date: Fri, 17 Jun 2022 13:55:05 +0200
Subject: [PATCH 005/165] Bump dev

---
 CHANGELOG.md    | 2 +-
 nextflow.config | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index ae4f9149..92e1d6fc 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -3,7 +3,7 @@
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
-## v2.0dev -
+## v2.0.0 - 2022-06-17 "Gray Nickel Beagle"
 
 - Pipeline ported to dsl2
 - Template update with latest nf-core/tools v2.1
diff --git a/nextflow.config b/nextflow.config
index ad7d4ba6..c986adb8 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -198,7 +198,7 @@ manifest {
     description     = 'Pipeline for processing of 10xGenomics single cell rnaseq data'
     mainScript      = 'main.nf'
     nextflowVersion = '!>=21.10.3'
-    version = '2.0.0'
+    version = '2.0.1dev'
 }
 
 // Load modules.config for DSL2 module specific options

From ab35513172027336f45f1e02e03cc477ba925f0b Mon Sep 17 00:00:00 2001
From: Alexander Peltzer <alexander.peltzer@boehringer-ingelheim.com>
Date: Fri, 17 Jun 2022 13:56:40 +0200
Subject: [PATCH 006/165] Adjust changelog for dev

---
 CHANGELOG.md | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 92e1d6fc..e60b3b56 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -3,6 +3,13 @@
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## v2.0.1dev
+
+
+### Fixes
+
+
+
 ## v2.0.0 - 2022-06-17 "Gray Nickel Beagle"
 
 - Pipeline ported to dsl2

From bd2c1e5dc8b65ee7a03931d57ebc75df3902e2c4 Mon Sep 17 00:00:00 2001
From: Felipe Marques de Almeida <almeidafmarques@gmail.com>
Date: Fri, 17 Jun 2022 14:01:42 +0200
Subject: [PATCH 007/165] Update mtx_to_h5ad.py

made it simpler
---
 bin/mtx_to_h5ad.py | 25 +++++++------------------
 1 file changed, 7 insertions(+), 18 deletions(-)

diff --git a/bin/mtx_to_h5ad.py b/bin/mtx_to_h5ad.py
index 0868eb37..95390271 100755
--- a/bin/mtx_to_h5ad.py
+++ b/bin/mtx_to_h5ad.py
@@ -1,18 +1,13 @@
 #!/usr/bin/env python3
 import scanpy as sc
-import pandas as pd
 import argparse
 
-def mtx_to_adata(
-    mtx_file: str, barcode_file: str, feature_file: str, verbose: bool = False
-):
+def mtx_to_adata( mtx_dir: str, verbose: bool = False ):
 
     if verbose:
-        print("Reading in {}".format(mtx_file))
+        print("Reading in {}".format(mtx_dir))
 
-    adata = sc.read_mtx(mtx_file)
-    adata.obs_names = pd.read_csv(barcode_file, header=None)[0].values
-    adata.var_names = pd.read_csv(feature_file, header=None)[0].values
+    adata = sc.read_10x_mtx(mtx_dir)
 
     return adata
 
@@ -21,19 +16,13 @@ def mtx_to_adata(
 
     parser = argparse.ArgumentParser(description="Converts mtx output to h5ad.")
 
-    parser.add_argument("-m", "--mtx", dest="mtx", help="Path to mtx file.")
-    parser.add_argument(
-        "-v", "--verbose", dest="verbose", help="Toggle verbose messages", default=False
-    )
-    parser.add_argument("-f", "--feature", dest="feature", help="Path to feature file.")
-    parser.add_argument("-b", "--barcode", dest="barcode", help="Path to barcode file.")
-    parser.add_argument("-o", "--out", dest="out", help="Output path.")
+    parser.add_argument("-m", "--mtx",     dest="mtx",     help="Path to mtx directory."                 )
+    parser.add_argument("-v", "--verbose", dest="verbose", help="Toggle verbose messages", default=False )
+    parser.add_argument("-o", "--out",     dest="out",     help="Output path."                           )
 
     args = vars(parser.parse_args())
 
-    adata = mtx_to_adata(
-        args["mtx"], args["barcode"], args["feature"], verbose=args["verbose"]
-    )
+    adata = mtx_to_adata(args["mtx"], verbose=args["verbose"])
 
     adata.write_h5ad(args["out"])
 

From bba06408ce7499c0d698b1b3ffc38cf9cf8c4189 Mon Sep 17 00:00:00 2001
From: Alexander Peltzer <alexander.peltzer@boehringer-ingelheim.com>
Date: Fri, 17 Jun 2022 14:02:52 +0200
Subject: [PATCH 008/165] Add grst, fix badges

---
 README.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index aec34ff7..184a0d38 100644
--- a/README.md
+++ b/README.md
@@ -3,7 +3,7 @@
 [![GitHub Actions CI Status](https://github.com/nf-core/scrnaseq/workflows/nf-core%20CI/badge.svg)](https://github.com/nf-core/scrnaseq/actions?query=workflow%3A%22nf-core+CI%22)
 [![GitHub Actions Linting Status](https://github.com/nf-core/scrnaseq/workflows/nf-core%20linting/badge.svg)](https://github.com/nf-core/scrnaseq/actions?query=workflow%3A%22nf-core+linting%22)
 [![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?logo=Amazon%20AWS)](https://nf-co.re/scrnaseq/results)
-[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8)](https://doi.org/10.5281/zenodo.XXXXXXX)
+[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.6656322)](https://doi.org/10.5281/zenodo.6656322)
 
 [![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A521.10.3-23aa62.svg)](https://www.nextflow.io/)
 [![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?logo=anaconda)](https://docs.conda.io/en/latest/)
@@ -71,6 +71,7 @@ We thank the following people for their extensive assistance in the development
 
 - @KevinMenden
 - @ggabernet
+- @grst
 - @FloWuenne
 - @fmalmeida
 

From b1d4a45b0dab2d903dc438a99df9100ff4aa0dde Mon Sep 17 00:00:00 2001
From: Felipe Marques de Almeida <almeidafmarques@gmail.com>
Date: Fri, 17 Jun 2022 14:03:15 +0200
Subject: [PATCH 009/165] Update mtx_to_h5ad.nf

fit in to new script
---
 modules/local/mtx_to_h5ad.nf | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/modules/local/mtx_to_h5ad.nf b/modules/local/mtx_to_h5ad.nf
index 995005c8..f32d3bc8 100644
--- a/modules/local/mtx_to_h5ad.nf
+++ b/modules/local/mtx_to_h5ad.nf
@@ -17,9 +17,7 @@ process MTX_TO_H5AD {
     def prefix = cellranger_outdir.getName().toString()
     """
     mtx_to_h5ad.py \\
-        -m \$(find ${cellranger_outdir} -wholename "*filtered_feature_bc_matrix/matrix.mtx.gz") \\
-        -f \$(find ${cellranger_outdir} -wholename "*filtered_feature_bc_matrix/features.tsv.gz") \\
-        -b \$(find ${cellranger_outdir} -wholename "*filtered_feature_bc_matrix/barcodes.tsv.gz") \\
+        -m ${cellranger_outdir}/outs/filtered_feature_bc_matrix/matrix.mtx.gz") \\
         -o matrix.h5ad
     """
 }

From df36e97099efd7e09169d5937c8d32af7f2a13c6 Mon Sep 17 00:00:00 2001
From: nf-core-bot <core@nf-co.re>
Date: Fri, 17 Jun 2022 12:04:39 +0000
Subject: [PATCH 010/165] [automated] Fix linting with Prettier

---
 CHANGELOG.md | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index e60b3b56..68229e01 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,11 +5,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## v2.0.1dev
 
-
 ### Fixes
 
-
-
 ## v2.0.0 - 2022-06-17 "Gray Nickel Beagle"
 
 - Pipeline ported to dsl2

From 5a38db3cd70d51bd5587d1712f8615115f11bdbe Mon Sep 17 00:00:00 2001
From: Felipe Marques de Almeida <almeidafmarques@gmail.com>
Date: Fri, 17 Jun 2022 17:08:25 +0200
Subject: [PATCH 011/165] Update mtx_to_h5ad.nf

---
 modules/local/mtx_to_h5ad.nf | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/modules/local/mtx_to_h5ad.nf b/modules/local/mtx_to_h5ad.nf
index f32d3bc8..16abbf45 100644
--- a/modules/local/mtx_to_h5ad.nf
+++ b/modules/local/mtx_to_h5ad.nf
@@ -1,5 +1,5 @@
 process MTX_TO_H5AD {
-    tag "$prefix"
+    //tag "$prefix"
     label 'process_medium'
 
     conda (params.enable_conda ? "conda-forge::scanpy conda-forge::python-igraph conda-forge::leidenalg" : null)
@@ -14,8 +14,9 @@ process MTX_TO_H5AD {
     path "matrix.h5ad", emit: h5ad
 
     script:
-    def prefix = cellranger_outdir.getName().toString()
+    //def prefix = cellranger_outdir.getName().toString()
     """
+    ls ${cellranger_outdir} > ls.txt
     mtx_to_h5ad.py \\
         -m ${cellranger_outdir}/outs/filtered_feature_bc_matrix/matrix.mtx.gz") \\
         -o matrix.h5ad

From bc80ba39a5a001b37bd5959a2f909b1646a0ed57 Mon Sep 17 00:00:00 2001
From: Felipe Marques de Almeida <almeidafmarques@gmail.com>
Date: Fri, 17 Jun 2022 17:14:11 +0200
Subject: [PATCH 012/165] Update align_cellranger.nf

---
 subworkflows/local/align_cellranger.nf | 1 +
 1 file changed, 1 insertion(+)

diff --git a/subworkflows/local/align_cellranger.nf b/subworkflows/local/align_cellranger.nf
index 05bf0e4d..de459ba3 100644
--- a/subworkflows/local/align_cellranger.nf
+++ b/subworkflows/local/align_cellranger.nf
@@ -42,6 +42,7 @@ workflow CELLRANGER_ALIGN {
         ch_versions = ch_versions.mix(CELLRANGER_COUNT.out.versions)
 
         // Convert matrix do h5ad
+        CELLRANGER_COUNT.out.outs.collect().view()
         MTX_TO_H5AD (
             CELLRANGER_COUNT.out.outs
         )

From 350ef5da28dc58bfe9383a64df83ed524bafff80 Mon Sep 17 00:00:00 2001
From: Felipe Marques de Almeida <almeidafmarques@gmail.com>
Date: Fri, 17 Jun 2022 19:58:45 +0200
Subject: [PATCH 013/165] trying to get prefix

---
 modules/local/mtx_to_h5ad.nf           | 7 +++----
 subworkflows/local/align_cellranger.nf | 1 -
 2 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/modules/local/mtx_to_h5ad.nf b/modules/local/mtx_to_h5ad.nf
index 16abbf45..77b68cc5 100644
--- a/modules/local/mtx_to_h5ad.nf
+++ b/modules/local/mtx_to_h5ad.nf
@@ -1,5 +1,5 @@
 process MTX_TO_H5AD {
-    //tag "$prefix"
+    tag "$prefix"
     label 'process_medium'
 
     conda (params.enable_conda ? "conda-forge::scanpy conda-forge::python-igraph conda-forge::leidenalg" : null)
@@ -14,11 +14,10 @@ process MTX_TO_H5AD {
     path "matrix.h5ad", emit: h5ad
 
     script:
-    //def prefix = cellranger_outdir.getName().toString()
+    def prefix = cellranger_outdir[0].getName().toString()[-3]
     """
-    ls ${cellranger_outdir} > ls.txt
     mtx_to_h5ad.py \\
-        -m ${cellranger_outdir}/outs/filtered_feature_bc_matrix/matrix.mtx.gz") \\
+        -m filtered_feature_bc_matrix \\
         -o matrix.h5ad
     """
 }
diff --git a/subworkflows/local/align_cellranger.nf b/subworkflows/local/align_cellranger.nf
index de459ba3..05bf0e4d 100644
--- a/subworkflows/local/align_cellranger.nf
+++ b/subworkflows/local/align_cellranger.nf
@@ -42,7 +42,6 @@ workflow CELLRANGER_ALIGN {
         ch_versions = ch_versions.mix(CELLRANGER_COUNT.out.versions)
 
         // Convert matrix do h5ad
-        CELLRANGER_COUNT.out.outs.collect().view()
         MTX_TO_H5AD (
             CELLRANGER_COUNT.out.outs
         )

From 840f35f22459fcd04f63dad71269c47d87911336 Mon Sep 17 00:00:00 2001
From: Felipe Marques de Almeida <almeidafmarques@gmail.com>
Date: Fri, 17 Jun 2022 20:04:28 +0200
Subject: [PATCH 014/165] Update align_cellranger.nf

checking map definition
---
 subworkflows/local/align_cellranger.nf | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/subworkflows/local/align_cellranger.nf b/subworkflows/local/align_cellranger.nf
index 05bf0e4d..19cb6e47 100644
--- a/subworkflows/local/align_cellranger.nf
+++ b/subworkflows/local/align_cellranger.nf
@@ -42,6 +42,10 @@ workflow CELLRANGER_ALIGN {
         ch_versions = ch_versions.mix(CELLRANGER_COUNT.out.versions)
 
         // Convert matrix do h5ad
+        CELLRANGER_COUNT.out.outs.map{ inputs ->
+            prefix = inputs[0].toString()[-3]
+            [ prefix, inputs ]
+        }.view()
         MTX_TO_H5AD (
             CELLRANGER_COUNT.out.outs
         )

From 8099ce88090a29cdab8930577377b9a1c36f591e Mon Sep 17 00:00:00 2001
From: Felipe Marques de Almeida <almeidafmarques@gmail.com>
Date: Fri, 17 Jun 2022 20:12:16 +0200
Subject: [PATCH 015/165] input receives a prefix value

---
 modules/local/mtx_to_h5ad.nf           | 3 +--
 subworkflows/local/align_cellranger.nf | 9 ++++-----
 2 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/modules/local/mtx_to_h5ad.nf b/modules/local/mtx_to_h5ad.nf
index 77b68cc5..15cf2021 100644
--- a/modules/local/mtx_to_h5ad.nf
+++ b/modules/local/mtx_to_h5ad.nf
@@ -8,13 +8,12 @@ process MTX_TO_H5AD {
         'gcfntnu/scanpy:1.7.0' }"
 
     input:
-    path cellranger_outdir
+    tuple val(prefix), path(cellranger_outdir)
 
     output:
     path "matrix.h5ad", emit: h5ad
 
     script:
-    def prefix = cellranger_outdir[0].getName().toString()[-3]
     """
     mtx_to_h5ad.py \\
         -m filtered_feature_bc_matrix \\
diff --git a/subworkflows/local/align_cellranger.nf b/subworkflows/local/align_cellranger.nf
index 19cb6e47..cc12f251 100644
--- a/subworkflows/local/align_cellranger.nf
+++ b/subworkflows/local/align_cellranger.nf
@@ -42,12 +42,11 @@ workflow CELLRANGER_ALIGN {
         ch_versions = ch_versions.mix(CELLRANGER_COUNT.out.versions)
 
         // Convert matrix do h5ad
-        CELLRANGER_COUNT.out.outs.map{ inputs ->
-            prefix = inputs[0].toString()[-3]
-            [ prefix, inputs ]
-        }.view()
         MTX_TO_H5AD (
-            CELLRANGER_COUNT.out.outs
+            CELLRANGER_COUNT.out.outs.map{ inputs ->
+                prefix = inputs[0].toString()[-3]
+                [ prefix, inputs ]
+            }
         )
 
     emit:

From ef6e8b300ea5ef38d13fa43af961f47453353bf1 Mon Sep 17 00:00:00 2001
From: Felipe Marques de Almeida <almeidafmarques@gmail.com>
Date: Fri, 17 Jun 2022 20:13:17 +0200
Subject: [PATCH 016/165] Update align_cellranger.nf

add tokenize
---
 subworkflows/local/align_cellranger.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/subworkflows/local/align_cellranger.nf b/subworkflows/local/align_cellranger.nf
index cc12f251..504049ac 100644
--- a/subworkflows/local/align_cellranger.nf
+++ b/subworkflows/local/align_cellranger.nf
@@ -44,7 +44,7 @@ workflow CELLRANGER_ALIGN {
         // Convert matrix do h5ad
         MTX_TO_H5AD (
             CELLRANGER_COUNT.out.outs.map{ inputs ->
-                prefix = inputs[0].toString()[-3]
+                prefix = inputs[0].toString().tokenize('/')[-3]
                 [ prefix, inputs ]
             }
         )

From af186a49ead42fe309d3bc3292a3075fc93d8031 Mon Sep 17 00:00:00 2001
From: Felipe Marques de Almeida <almeidafmarques@gmail.com>
Date: Fri, 17 Jun 2022 20:14:39 +0200
Subject: [PATCH 017/165] uses prefix from module

---
 conf/modules.config | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/conf/modules.config b/conf/modules.config
index c1d00e4b..ede9a3d7 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -57,7 +57,7 @@ if(params.aligner == "cellranger") {
         }
         withName: MTX_TO_H5AD {
             publishDir = [
-                path: "${params.outdir}/cellranger/mtx_to_h5ad",
+                path: "${params.outdir}/cellranger/count/${prefix}",
                 mode: params.publish_dir_mode
             ]
         }

From 6bcaea181ea4df980ba97eacde04788849e0e644 Mon Sep 17 00:00:00 2001
From: Felipe Marques de Almeida <almeidafmarques@gmail.com>
Date: Fri, 17 Jun 2022 20:18:31 +0200
Subject: [PATCH 018/165] fix publishing dir

---
 conf/modules.config          | 2 +-
 modules/local/mtx_to_h5ad.nf | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/conf/modules.config b/conf/modules.config
index ede9a3d7..7478e76f 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -57,7 +57,7 @@ if(params.aligner == "cellranger") {
         }
         withName: MTX_TO_H5AD {
             publishDir = [
-                path: "${params.outdir}/cellranger/count/${prefix}",
+                path: "${params.outdir}/cellranger/count",
                 mode: params.publish_dir_mode
             ]
         }
diff --git a/modules/local/mtx_to_h5ad.nf b/modules/local/mtx_to_h5ad.nf
index 15cf2021..77d52084 100644
--- a/modules/local/mtx_to_h5ad.nf
+++ b/modules/local/mtx_to_h5ad.nf
@@ -15,8 +15,9 @@ process MTX_TO_H5AD {
 
     script:
     """
+    mkdir -p ${prefix}
     mtx_to_h5ad.py \\
         -m filtered_feature_bc_matrix \\
-        -o matrix.h5ad
+        -o ${prefix}/matrix.h5ad
     """
 }

From c47b0dacafac55b7fe32d526b9f7aed3b08bae9d Mon Sep 17 00:00:00 2001
From: Felipe Marques de Almeida <almeidafmarques@gmail.com>
Date: Fri, 17 Jun 2022 20:20:56 +0200
Subject: [PATCH 019/165] differentiate prefixes

---
 modules/local/mtx_to_h5ad.nf | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/modules/local/mtx_to_h5ad.nf b/modules/local/mtx_to_h5ad.nf
index 77d52084..57500486 100644
--- a/modules/local/mtx_to_h5ad.nf
+++ b/modules/local/mtx_to_h5ad.nf
@@ -8,16 +8,17 @@ process MTX_TO_H5AD {
         'gcfntnu/scanpy:1.7.0' }"
 
     input:
-    tuple val(prefix), path(cellranger_outdir)
+    tuple val(cellranger_prefix), path(cellranger_outdir)
 
     output:
     path "matrix.h5ad", emit: h5ad
 
     script:
+    def prefix = cellranger_prefix.tokenize('-')[1]
     """
-    mkdir -p ${prefix}
+    mkdir -p ${cellranger_prefix}
     mtx_to_h5ad.py \\
         -m filtered_feature_bc_matrix \\
-        -o ${prefix}/matrix.h5ad
+        -o ${cellranger_prefix}/matrix.h5ad
     """
 }

From 1814f4f46f74924be09f2203e07b25b08f789c2d Mon Sep 17 00:00:00 2001
From: Felipe Marques de Almeida <almeidafmarques@gmail.com>
Date: Fri, 17 Jun 2022 20:23:06 +0200
Subject: [PATCH 020/165] saving values in meta map

---
 modules/local/mtx_to_h5ad.nf           | 11 +++++------
 subworkflows/local/align_cellranger.nf |  6 ++++--
 2 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/modules/local/mtx_to_h5ad.nf b/modules/local/mtx_to_h5ad.nf
index 57500486..781fffa9 100644
--- a/modules/local/mtx_to_h5ad.nf
+++ b/modules/local/mtx_to_h5ad.nf
@@ -1,5 +1,5 @@
 process MTX_TO_H5AD {
-    tag "$prefix"
+    tag "$meta.id"
     label 'process_medium'
 
     conda (params.enable_conda ? "conda-forge::scanpy conda-forge::python-igraph conda-forge::leidenalg" : null)
@@ -8,17 +8,16 @@ process MTX_TO_H5AD {
         'gcfntnu/scanpy:1.7.0' }"
 
     input:
-    tuple val(cellranger_prefix), path(cellranger_outdir)
+    tuple val(meta), path(cellranger_outdir)
 
     output:
-    path "matrix.h5ad", emit: h5ad
+    path "${meta.cellranger_prefix}", emit: h5ad
 
     script:
-    def prefix = cellranger_prefix.tokenize('-')[1]
     """
-    mkdir -p ${cellranger_prefix}
+    mkdir -p ${meta.cellranger_prefix}
     mtx_to_h5ad.py \\
         -m filtered_feature_bc_matrix \\
-        -o ${cellranger_prefix}/matrix.h5ad
+        -o ${meta.cellranger_prefix}/matrix.h5ad
     """
 }
diff --git a/subworkflows/local/align_cellranger.nf b/subworkflows/local/align_cellranger.nf
index 504049ac..ac00659f 100644
--- a/subworkflows/local/align_cellranger.nf
+++ b/subworkflows/local/align_cellranger.nf
@@ -44,8 +44,10 @@ workflow CELLRANGER_ALIGN {
         // Convert matrix do h5ad
         MTX_TO_H5AD (
             CELLRANGER_COUNT.out.outs.map{ inputs ->
-                prefix = inputs[0].toString().tokenize('/')[-3]
-                [ prefix, inputs ]
+            meta = [:]
+            meta.cellranger_prefix = inputs[0].toString().tokenize('/')[-3]
+            meta.id = meta.cellranger_prefix.tokenize('-')[1]
+                [ meta, inputs ]
             }
         )
 

From 220300927dc6eb6383880eb375daebfee7c9d6f1 Mon Sep 17 00:00:00 2001
From: Felipe Marques de Almeida <almeidafmarques@gmail.com>
Date: Fri, 17 Jun 2022 20:34:45 +0200
Subject: [PATCH 021/165] changing other images

---
 modules/local/kallistobustools_count.nf | 2 +-
 modules/local/mtx_to_h5ad.nf            | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/modules/local/kallistobustools_count.nf b/modules/local/kallistobustools_count.nf
index 7f3dabff..382de329 100644
--- a/modules/local/kallistobustools_count.nf
+++ b/modules/local/kallistobustools_count.nf
@@ -4,7 +4,7 @@ process KALLISTOBUSTOOLS_COUNT {
 
     conda (params.enable_conda ? "bioconda::kb-python=0.25.1" : null)
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/kb-python:0.25.1--py_0' :
+        'https://depot.galaxyproject.org/singularity/scanpy-scripts:1.1.6--pypyhdfd78af_0' :
         'quay.io/biocontainers/kb-python:0.25.1--py_0' }"
 
     input:
diff --git a/modules/local/mtx_to_h5ad.nf b/modules/local/mtx_to_h5ad.nf
index 781fffa9..58bf8f57 100644
--- a/modules/local/mtx_to_h5ad.nf
+++ b/modules/local/mtx_to_h5ad.nf
@@ -5,7 +5,7 @@ process MTX_TO_H5AD {
     conda (params.enable_conda ? "conda-forge::scanpy conda-forge::python-igraph conda-forge::leidenalg" : null)
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
         'docker://gcfntnu/scanpy:1.7.0' :
-        'gcfntnu/scanpy:1.7.0' }"
+        'quay.io/biocontainers/scanpy-scripts:1.1.6--pypyhdfd78af_0' }"
 
     input:
     tuple val(meta), path(cellranger_outdir)

From d7ad56dadaebfab093a0513054fd8d3f0e686901 Mon Sep 17 00:00:00 2001
From: Felipe Marques de Almeida <almeidafmarques@gmail.com>
Date: Sun, 19 Jun 2022 12:39:44 +0200
Subject: [PATCH 022/165] Update mtx_to_h5ad.nf

changing to biocontainers images
---
 modules/local/mtx_to_h5ad.nf | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/modules/local/mtx_to_h5ad.nf b/modules/local/mtx_to_h5ad.nf
index 58bf8f57..4e7bd535 100644
--- a/modules/local/mtx_to_h5ad.nf
+++ b/modules/local/mtx_to_h5ad.nf
@@ -4,8 +4,8 @@ process MTX_TO_H5AD {
 
     conda (params.enable_conda ? "conda-forge::scanpy conda-forge::python-igraph conda-forge::leidenalg" : null)
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'docker://gcfntnu/scanpy:1.7.0' :
-        'quay.io/biocontainers/scanpy-scripts:1.1.6--pypyhdfd78af_0' }"
+        'https://depot.galaxyproject.org/singularity/scanpy:1.7.2--pyhdfd78af_0' :
+        'quay.io/biocontainers/scanpy:1.7.2--pyhdfd78af_0' }"
 
     input:
     tuple val(meta), path(cellranger_outdir)

From fac8fcaae4a21ad0f9d3caca7478afb914260f62 Mon Sep 17 00:00:00 2001
From: Felipe Marques de Almeida <almeidafmarques@gmail.com>
Date: Sun, 19 Jun 2022 13:49:05 +0200
Subject: [PATCH 023/165] updating publish dir directive

---
 conf/modules.config          | 2 +-
 modules/local/mtx_to_h5ad.nf | 7 ++++---
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/conf/modules.config b/conf/modules.config
index 7478e76f..fad74332 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -57,7 +57,7 @@ if(params.aligner == "cellranger") {
         }
         withName: MTX_TO_H5AD {
             publishDir = [
-                path: "${params.outdir}/cellranger/count",
+                path: "${params.outdir}/cellranger/count/${meta.cellranger_prefix}/filtered_feature_bc_matrix",
                 mode: params.publish_dir_mode
             ]
         }
diff --git a/modules/local/mtx_to_h5ad.nf b/modules/local/mtx_to_h5ad.nf
index 4e7bd535..3c7631fd 100644
--- a/modules/local/mtx_to_h5ad.nf
+++ b/modules/local/mtx_to_h5ad.nf
@@ -11,13 +11,14 @@ process MTX_TO_H5AD {
     tuple val(meta), path(cellranger_outdir)
 
     output:
-    path "${meta.cellranger_prefix}", emit: h5ad
+    path "matrix.h5ad.gz", emit: h5ad
 
     script:
     """
-    mkdir -p ${meta.cellranger_prefix}
     mtx_to_h5ad.py \\
         -m filtered_feature_bc_matrix \\
-        -o ${meta.cellranger_prefix}/matrix.h5ad
+        -o matrix.h5ad
+    
+    gzip -c matrix.h5ad > matrix.h5ad.gz
     """
 }

From 6f66870d36a9d0165dee000c78948fafef88b664 Mon Sep 17 00:00:00 2001
From: Felipe Marques de Almeida <almeidafmarques@gmail.com>
Date: Mon, 20 Jun 2022 09:41:40 +0200
Subject: [PATCH 024/165] single quoting it to check if var can be used

---
 conf/modules.config          | 2 +-
 modules/local/mtx_to_h5ad.nf | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/conf/modules.config b/conf/modules.config
index fad74332..47aaa942 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -57,7 +57,7 @@ if(params.aligner == "cellranger") {
         }
         withName: MTX_TO_H5AD {
             publishDir = [
-                path: "${params.outdir}/cellranger/count/${meta.cellranger_prefix}/filtered_feature_bc_matrix",
+                path: '${params.outdir}/cellranger/count/${meta.cellranger_prefix}/filtered_feature_bc_matrix',
                 mode: params.publish_dir_mode
             ]
         }
diff --git a/modules/local/mtx_to_h5ad.nf b/modules/local/mtx_to_h5ad.nf
index 3c7631fd..f6ac0319 100644
--- a/modules/local/mtx_to_h5ad.nf
+++ b/modules/local/mtx_to_h5ad.nf
@@ -8,7 +8,7 @@ process MTX_TO_H5AD {
         'quay.io/biocontainers/scanpy:1.7.2--pyhdfd78af_0' }"
 
     input:
-    tuple val(meta), path(cellranger_outdir)
+    tuple val(meta), path(inputs)
 
     output:
     path "matrix.h5ad.gz", emit: h5ad

From 11225fc3c5603f9f167af7d06998388414ccc7ba Mon Sep 17 00:00:00 2001
From: Felipe Marques de Almeida <almeidafmarques@gmail.com>
Date: Mon, 20 Jun 2022 10:02:57 +0200
Subject: [PATCH 025/165] trying to publish files in fixed place

---
 conf/modules.config          |  2 +-
 modules/local/mtx_to_h5ad.nf | 10 ++++++++--
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/conf/modules.config b/conf/modules.config
index 47aaa942..7478e76f 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -57,7 +57,7 @@ if(params.aligner == "cellranger") {
         }
         withName: MTX_TO_H5AD {
             publishDir = [
-                path: '${params.outdir}/cellranger/count/${meta.cellranger_prefix}/filtered_feature_bc_matrix',
+                path: "${params.outdir}/cellranger/count",
                 mode: params.publish_dir_mode
             ]
         }
diff --git a/modules/local/mtx_to_h5ad.nf b/modules/local/mtx_to_h5ad.nf
index f6ac0319..b81309d0 100644
--- a/modules/local/mtx_to_h5ad.nf
+++ b/modules/local/mtx_to_h5ad.nf
@@ -8,17 +8,23 @@ process MTX_TO_H5AD {
         'quay.io/biocontainers/scanpy:1.7.2--pyhdfd78af_0' }"
 
     input:
+    // inputs from cellranger nf-core module does not come in a single sample dir
+    // for each sample, the sub-folders and files come directly in array.
     tuple val(meta), path(inputs)
 
     output:
-    path "matrix.h5ad.gz", emit: h5ad
+    path "${meta.cellranger_prefix}/outs/filtered_feature_bc_matrix/matrix.h5ad.gz", emit: h5ad
 
     script:
     """
+    # create dir to mirror cellranger output organisation to have results published in the same place
+    mkdir ${meta.cellranger_prefix}/outs/filtered_feature_bc_matrix ;
+
+    # convert file types
     mtx_to_h5ad.py \\
         -m filtered_feature_bc_matrix \\
         -o matrix.h5ad
     
-    gzip -c matrix.h5ad > matrix.h5ad.gz
+    gzip -c matrix.h5ad > ${meta.cellranger_prefix}/outs/filtered_feature_bc_matrix/matrix.h5ad.gz
     """
 }

From 7ee1e71322a57c30fef41d17b216bcdce4a77d85 Mon Sep 17 00:00:00 2001
From: Felipe Marques de Almeida <almeidafmarques@gmail.com>
Date: Mon, 20 Jun 2022 10:59:54 +0200
Subject: [PATCH 026/165] added stub-run and fixed how pipeline pulishes files

---
 modules/local/mtx_to_h5ad.nf           | 11 ++++++++++-
 subworkflows/local/align_cellranger.nf |  5 ++++-
 2 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/modules/local/mtx_to_h5ad.nf b/modules/local/mtx_to_h5ad.nf
index b81309d0..774a2686 100644
--- a/modules/local/mtx_to_h5ad.nf
+++ b/modules/local/mtx_to_h5ad.nf
@@ -18,7 +18,7 @@ process MTX_TO_H5AD {
     script:
     """
     # create dir to mirror cellranger output organisation to have results published in the same place
-    mkdir ${meta.cellranger_prefix}/outs/filtered_feature_bc_matrix ;
+    mkdir -p ${meta.cellranger_prefix}/outs/filtered_feature_bc_matrix ;
 
     # convert file types
     mtx_to_h5ad.py \\
@@ -27,4 +27,13 @@ process MTX_TO_H5AD {
     
     gzip -c matrix.h5ad > ${meta.cellranger_prefix}/outs/filtered_feature_bc_matrix/matrix.h5ad.gz
     """
+
+    stub:
+    """
+    # create dir to mirror cellranger output organisation to have results published in the same place
+    mkdir -p ${meta.cellranger_prefix}/outs/filtered_feature_bc_matrix ;
+
+    # create dummy
+    touch ${meta.cellranger_prefix}/outs/filtered_feature_bc_matrix/matrix.h5ad.gz
+    """
 }
diff --git a/subworkflows/local/align_cellranger.nf b/subworkflows/local/align_cellranger.nf
index ac00659f..31724d88 100644
--- a/subworkflows/local/align_cellranger.nf
+++ b/subworkflows/local/align_cellranger.nf
@@ -45,8 +45,11 @@ workflow CELLRANGER_ALIGN {
         MTX_TO_H5AD (
             CELLRANGER_COUNT.out.outs.map{ inputs ->
             meta = [:]
-            meta.cellranger_prefix = inputs[0].toString().tokenize('/')[-3]
+            // in stub-run variable is string and not an array
+            if (inputs.getName() ==~ 'fake_file.txt') { meta.cellranger_prefix = [inputs][0].toString().tokenize('/')[-3] } 
+            else { meta.cellranger_prefix = inputs[0].toString().tokenize('/')[-3] }
             meta.id = meta.cellranger_prefix.tokenize('-')[1]
+            
                 [ meta, inputs ]
             }
         )

From eeb46aeb5bb55b10149a0d9bb14332cbadf3b696 Mon Sep 17 00:00:00 2001
From: Felipe Marques de Almeida <almeidafmarques@gmail.com>
Date: Mon, 20 Jun 2022 12:35:54 +0200
Subject: [PATCH 027/165] remove trailling whitespace

---
 modules/local/mtx_to_h5ad.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/local/mtx_to_h5ad.nf b/modules/local/mtx_to_h5ad.nf
index 774a2686..8daf17c8 100644
--- a/modules/local/mtx_to_h5ad.nf
+++ b/modules/local/mtx_to_h5ad.nf
@@ -24,7 +24,7 @@ process MTX_TO_H5AD {
     mtx_to_h5ad.py \\
         -m filtered_feature_bc_matrix \\
         -o matrix.h5ad
-    
+
     gzip -c matrix.h5ad > ${meta.cellranger_prefix}/outs/filtered_feature_bc_matrix/matrix.h5ad.gz
     """
 

From 17cd392bdcb9abeff210191612fca20d3021a7d2 Mon Sep 17 00:00:00 2001
From: Felipe Marques de Almeida <almeidafmarques@gmail.com>
Date: Mon, 20 Jun 2022 14:57:14 +0200
Subject: [PATCH 028/165] fixed stub-run check

---
 subworkflows/local/align_cellranger.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/subworkflows/local/align_cellranger.nf b/subworkflows/local/align_cellranger.nf
index 31724d88..cd22c42b 100644
--- a/subworkflows/local/align_cellranger.nf
+++ b/subworkflows/local/align_cellranger.nf
@@ -46,7 +46,7 @@ workflow CELLRANGER_ALIGN {
             CELLRANGER_COUNT.out.outs.map{ inputs ->
             meta = [:]
             // in stub-run variable is string and not an array
-            if (inputs.getName() ==~ 'fake_file.txt') { meta.cellranger_prefix = [inputs][0].toString().tokenize('/')[-3] } 
+            if (workflow.stubRun) { meta.cellranger_prefix = [inputs][0].toString().tokenize('/')[-3] } 
             else { meta.cellranger_prefix = inputs[0].toString().tokenize('/')[-3] }
             meta.id = meta.cellranger_prefix.tokenize('-')[1]
             

From 3959cd6e7d8ce63224a6a0b5e0bb5d9b1a96ef9c Mon Sep 17 00:00:00 2001
From: Felipe Marques de Almeida <felipemarques89@gmail.com>
Date: Mon, 20 Jun 2022 15:19:45 +0200
Subject: [PATCH 029/165] Adding `MTX_TO_H5AD` module (#1)

Created module to automatically convert `.mtx` file into `.h5ad` using scanpy.
---
 bin/mtx_to_h5ad.py                      | 29 ++++++++++++++++++
 conf/modules.config                     |  6 ++++
 modules/local/kallistobustools_count.nf |  2 +-
 modules/local/mtx_to_h5ad.nf            | 39 +++++++++++++++++++++++++
 subworkflows/local/align_cellranger.nf  | 14 +++++++++
 5 files changed, 89 insertions(+), 1 deletion(-)
 create mode 100755 bin/mtx_to_h5ad.py
 create mode 100644 modules/local/mtx_to_h5ad.nf

diff --git a/bin/mtx_to_h5ad.py b/bin/mtx_to_h5ad.py
new file mode 100755
index 00000000..95390271
--- /dev/null
+++ b/bin/mtx_to_h5ad.py
@@ -0,0 +1,29 @@
+#!/usr/bin/env python3
+import scanpy as sc
+import argparse
+
+def mtx_to_adata( mtx_dir: str, verbose: bool = False ):
+
+    if verbose:
+        print("Reading in {}".format(mtx_dir))
+
+    adata = sc.read_10x_mtx(mtx_dir)
+
+    return adata
+
+
+if __name__ == "__main__":
+
+    parser = argparse.ArgumentParser(description="Converts mtx output to h5ad.")
+
+    parser.add_argument("-m", "--mtx",     dest="mtx",     help="Path to mtx directory."                 )
+    parser.add_argument("-v", "--verbose", dest="verbose", help="Toggle verbose messages", default=False )
+    parser.add_argument("-o", "--out",     dest="out",     help="Output path."                           )
+
+    args = vars(parser.parse_args())
+
+    adata = mtx_to_adata(args["mtx"], verbose=args["verbose"])
+
+    adata.write_h5ad(args["out"])
+
+    print("Wrote h5ad file to {}".format(args["out"]))
\ No newline at end of file
diff --git a/conf/modules.config b/conf/modules.config
index 5c78b365..7478e76f 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -55,6 +55,12 @@ if(params.aligner == "cellranger") {
                 mode: params.publish_dir_mode
             ]
         }
+        withName: MTX_TO_H5AD {
+            publishDir = [
+                path: "${params.outdir}/cellranger/count",
+                mode: params.publish_dir_mode
+            ]
+        }
     }
 }
 
diff --git a/modules/local/kallistobustools_count.nf b/modules/local/kallistobustools_count.nf
index 7f3dabff..382de329 100644
--- a/modules/local/kallistobustools_count.nf
+++ b/modules/local/kallistobustools_count.nf
@@ -4,7 +4,7 @@ process KALLISTOBUSTOOLS_COUNT {
 
     conda (params.enable_conda ? "bioconda::kb-python=0.25.1" : null)
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/kb-python:0.25.1--py_0' :
+        'https://depot.galaxyproject.org/singularity/scanpy-scripts:1.1.6--pypyhdfd78af_0' :
         'quay.io/biocontainers/kb-python:0.25.1--py_0' }"
 
     input:
diff --git a/modules/local/mtx_to_h5ad.nf b/modules/local/mtx_to_h5ad.nf
new file mode 100644
index 00000000..8daf17c8
--- /dev/null
+++ b/modules/local/mtx_to_h5ad.nf
@@ -0,0 +1,39 @@
+process MTX_TO_H5AD {
+    tag "$meta.id"
+    label 'process_medium'
+
+    conda (params.enable_conda ? "conda-forge::scanpy conda-forge::python-igraph conda-forge::leidenalg" : null)
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/scanpy:1.7.2--pyhdfd78af_0' :
+        'quay.io/biocontainers/scanpy:1.7.2--pyhdfd78af_0' }"
+
+    input:
+    // inputs from cellranger nf-core module does not come in a single sample dir
+    // for each sample, the sub-folders and files come directly in array.
+    tuple val(meta), path(inputs)
+
+    output:
+    path "${meta.cellranger_prefix}/outs/filtered_feature_bc_matrix/matrix.h5ad.gz", emit: h5ad
+
+    script:
+    """
+    # create dir to mirror cellranger output organisation to have results published in the same place
+    mkdir -p ${meta.cellranger_prefix}/outs/filtered_feature_bc_matrix ;
+
+    # convert file types
+    mtx_to_h5ad.py \\
+        -m filtered_feature_bc_matrix \\
+        -o matrix.h5ad
+
+    gzip -c matrix.h5ad > ${meta.cellranger_prefix}/outs/filtered_feature_bc_matrix/matrix.h5ad.gz
+    """
+
+    stub:
+    """
+    # create dir to mirror cellranger output organisation to have results published in the same place
+    mkdir -p ${meta.cellranger_prefix}/outs/filtered_feature_bc_matrix ;
+
+    # create dummy
+    touch ${meta.cellranger_prefix}/outs/filtered_feature_bc_matrix/matrix.h5ad.gz
+    """
+}
diff --git a/subworkflows/local/align_cellranger.nf b/subworkflows/local/align_cellranger.nf
index 7513935d..cd22c42b 100644
--- a/subworkflows/local/align_cellranger.nf
+++ b/subworkflows/local/align_cellranger.nf
@@ -5,6 +5,7 @@
 include {CELLRANGER_MKGTF} from "../../modules/nf-core/modules/cellranger/mkgtf/main.nf"
 include {CELLRANGER_MKREF} from "../../modules/nf-core/modules/cellranger/mkref/main.nf"
 include {CELLRANGER_COUNT} from "../../modules/nf-core/modules/cellranger/count/main.nf"
+include {MTX_TO_H5AD     } from "../../modules/local/mtx_to_h5ad.nf"
 
 // Define workflow to subset and index a genome region fasta file
 workflow CELLRANGER_ALIGN {
@@ -40,6 +41,19 @@ workflow CELLRANGER_ALIGN {
         )
         ch_versions = ch_versions.mix(CELLRANGER_COUNT.out.versions)
 
+        // Convert matrix do h5ad
+        MTX_TO_H5AD (
+            CELLRANGER_COUNT.out.outs.map{ inputs ->
+            meta = [:]
+            // in stub-run variable is string and not an array
+            if (workflow.stubRun) { meta.cellranger_prefix = [inputs][0].toString().tokenize('/')[-3] } 
+            else { meta.cellranger_prefix = inputs[0].toString().tokenize('/')[-3] }
+            meta.id = meta.cellranger_prefix.tokenize('-')[1]
+            
+                [ meta, inputs ]
+            }
+        )
+
     emit:
         ch_versions
         cellranger_out  = CELLRANGER_COUNT.out.outs

From 3f6ea9afd4677199f9b34708ef65aa373b512090 Mon Sep 17 00:00:00 2001
From: Felipe Marques de Almeida <almeidafmarques@gmail.com>
Date: Mon, 20 Jun 2022 17:12:27 +0200
Subject: [PATCH 030/165] made summary more generic

---
 conf/modules.config          |  2 +-
 modules/local/mtx_to_h5ad.nf | 18 +++++++-----------
 2 files changed, 8 insertions(+), 12 deletions(-)

diff --git a/conf/modules.config b/conf/modules.config
index 7478e76f..8ca6a93b 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -57,7 +57,7 @@ if(params.aligner == "cellranger") {
         }
         withName: MTX_TO_H5AD {
             publishDir = [
-                path: "${params.outdir}/cellranger/count",
+                path: { "${params.outdir}/cellranger/count/${meta.cellranger_prefix}/outs/filtered_feature_bc_matrix" },
                 mode: params.publish_dir_mode
             ]
         }
diff --git a/modules/local/mtx_to_h5ad.nf b/modules/local/mtx_to_h5ad.nf
index 8daf17c8..4bd649d0 100644
--- a/modules/local/mtx_to_h5ad.nf
+++ b/modules/local/mtx_to_h5ad.nf
@@ -13,27 +13,23 @@ process MTX_TO_H5AD {
     tuple val(meta), path(inputs)
 
     output:
-    path "${meta.cellranger_prefix}/outs/filtered_feature_bc_matrix/matrix.h5ad.gz", emit: h5ad
+    path "matrix.h5ad.gz", emit: h5ad
 
     script:
+    if (params.aligner == 'cellranger') {
+        matrix_directory = "filtered_feature_bc_matrix"
+    }
     """
-    # create dir to mirror cellranger output organisation to have results published in the same place
-    mkdir -p ${meta.cellranger_prefix}/outs/filtered_feature_bc_matrix ;
-
     # convert file types
     mtx_to_h5ad.py \\
-        -m filtered_feature_bc_matrix \\
+        -m ${matrix_directory} \\
         -o matrix.h5ad
 
-    gzip -c matrix.h5ad > ${meta.cellranger_prefix}/outs/filtered_feature_bc_matrix/matrix.h5ad.gz
+    gzip -c matrix.h5ad > matrix.h5ad.gz
     """
 
     stub:
     """
-    # create dir to mirror cellranger output organisation to have results published in the same place
-    mkdir -p ${meta.cellranger_prefix}/outs/filtered_feature_bc_matrix ;
-
-    # create dummy
-    touch ${meta.cellranger_prefix}/outs/filtered_feature_bc_matrix/matrix.h5ad.gz
+    touch matrix.h5ad.gz
     """
 }

From 1f3d828b362211b7def45f37deda41a4d55b5512 Mon Sep 17 00:00:00 2001
From: Felipe Marques de Almeida <almeidafmarques@gmail.com>
Date: Tue, 21 Jun 2022 10:05:38 +0200
Subject: [PATCH 031/165] fixed singularity image

---
 modules/local/kallistobustools_count.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/local/kallistobustools_count.nf b/modules/local/kallistobustools_count.nf
index 382de329..7f3dabff 100644
--- a/modules/local/kallistobustools_count.nf
+++ b/modules/local/kallistobustools_count.nf
@@ -4,7 +4,7 @@ process KALLISTOBUSTOOLS_COUNT {
 
     conda (params.enable_conda ? "bioconda::kb-python=0.25.1" : null)
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/scanpy-scripts:1.1.6--pypyhdfd78af_0' :
+        'https://depot.galaxyproject.org/singularity/kb-python:0.25.1--py_0' :
         'quay.io/biocontainers/kb-python:0.25.1--py_0' }"
 
     input:

From 7078858921b9f7748aba1acf9fc257d6f7865b32 Mon Sep 17 00:00:00 2001
From: Felipe Marques de Almeida <almeidafmarques@gmail.com>
Date: Tue, 21 Jun 2022 10:56:35 +0200
Subject: [PATCH 032/165] added conversion module for kallistobustools

---
 bin/cellranger_mtx_to_h5ad.py           | 29 +++++++++++++++++++++++
 bin/mtx_to_h5ad.py                      | 31 ++++++++++++++++++-------
 conf/modules.config                     | 11 +++++++++
 modules/local/mtx_to_h5ad.nf            | 18 ++++++++++----
 subworkflows/local/kallisto_bustools.nf |  6 +++++
 5 files changed, 83 insertions(+), 12 deletions(-)
 create mode 100755 bin/cellranger_mtx_to_h5ad.py

diff --git a/bin/cellranger_mtx_to_h5ad.py b/bin/cellranger_mtx_to_h5ad.py
new file mode 100755
index 00000000..95390271
--- /dev/null
+++ b/bin/cellranger_mtx_to_h5ad.py
@@ -0,0 +1,29 @@
+#!/usr/bin/env python3
+import scanpy as sc
+import argparse
+
+def mtx_to_adata( mtx_dir: str, verbose: bool = False ):
+
+    if verbose:
+        print("Reading in {}".format(mtx_dir))
+
+    adata = sc.read_10x_mtx(mtx_dir)
+
+    return adata
+
+
+if __name__ == "__main__":
+
+    parser = argparse.ArgumentParser(description="Converts mtx output to h5ad.")
+
+    parser.add_argument("-m", "--mtx",     dest="mtx",     help="Path to mtx directory."                 )
+    parser.add_argument("-v", "--verbose", dest="verbose", help="Toggle verbose messages", default=False )
+    parser.add_argument("-o", "--out",     dest="out",     help="Output path."                           )
+
+    args = vars(parser.parse_args())
+
+    adata = mtx_to_adata(args["mtx"], verbose=args["verbose"])
+
+    adata.write_h5ad(args["out"])
+
+    print("Wrote h5ad file to {}".format(args["out"]))
\ No newline at end of file
diff --git a/bin/mtx_to_h5ad.py b/bin/mtx_to_h5ad.py
index 95390271..cb03e387 100755
--- a/bin/mtx_to_h5ad.py
+++ b/bin/mtx_to_h5ad.py
@@ -1,13 +1,22 @@
 #!/usr/bin/env python3
+import sys
+import os
 import scanpy as sc
+import pandas as pd
+import typing
 import argparse
 
-def mtx_to_adata( mtx_dir: str, verbose: bool = False ):
+
+def mtx_to_adata(
+    mtx_file: str, barcode_file: str, feature_file: str, verbose: bool = False
+):
 
     if verbose:
-        print("Reading in {}".format(mtx_dir))
+        print("Reading in {}".format(mtx_file))
 
-    adata = sc.read_10x_mtx(mtx_dir)
+    adata = sc.read_mtx(mtx_file)
+    adata.obs_names = pd.read_csv(barcode_file, header=None)[0].values
+    adata.var_names = pd.read_csv(feature_file, header=None)[0].values
 
     return adata
 
@@ -16,14 +25,20 @@ def mtx_to_adata( mtx_dir: str, verbose: bool = False ):
 
     parser = argparse.ArgumentParser(description="Converts mtx output to h5ad.")
 
-    parser.add_argument("-m", "--mtx",     dest="mtx",     help="Path to mtx directory."                 )
-    parser.add_argument("-v", "--verbose", dest="verbose", help="Toggle verbose messages", default=False )
-    parser.add_argument("-o", "--out",     dest="out",     help="Output path."                           )
+    parser.add_argument("-m", "--mtx", dest="mtx", help="Path to mtx file.")
+    parser.add_argument(
+        "-v", "--verbose", dest="verbose", help="Toggle verbose messages", default=False
+    )
+    parser.add_argument("-f", "--feature", dest="feature", help="Path to feature file.")
+    parser.add_argument("-b", "--barcode", dest="barcode", help="Path to barcode file.")
+    parser.add_argument("-o", "--out", dest="out", help="Output path.")
 
     args = vars(parser.parse_args())
 
-    adata = mtx_to_adata(args["mtx"], verbose=args["verbose"])
+    adata = mtx_to_adata(
+        args["mtx"], args["barcode"], args["feature"], verbose=args["verbose"]
+    )
 
     adata.write_h5ad(args["out"])
 
-    print("Wrote h5ad file to {}".format(args["out"]))
\ No newline at end of file
+    print("Wrote h5ad file to {}".format(args["out"]))
diff --git a/conf/modules.config b/conf/modules.config
index 8ca6a93b..9484a21f 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -80,3 +80,14 @@ if (params.aligner == "star") {
         }
     }
 }
+
+if (params.aligner == "kallisto") {
+    process {
+        withName: MTX_TO_H5AD {
+            publishDir = [
+                path: { "${params.outdir}/kallistobustools/${meta.id}_kallistobustools_count/counts_unfiltered" },
+                mode: params.publish_dir_mode
+            ]
+        }
+    }
+}
diff --git a/modules/local/mtx_to_h5ad.nf b/modules/local/mtx_to_h5ad.nf
index 4bd649d0..7292feaa 100644
--- a/modules/local/mtx_to_h5ad.nf
+++ b/modules/local/mtx_to_h5ad.nf
@@ -16,13 +16,23 @@ process MTX_TO_H5AD {
     path "matrix.h5ad.gz", emit: h5ad
 
     script:
-    if (params.aligner == 'cellranger') {
-        matrix_directory = "filtered_feature_bc_matrix"
-    }
+    if (params.aligner == 'cellranger')
+    """
+    # convert file types
+    cellranger_mtx_to_h5ad.py \\
+        -m filtered_feature_bc_matrix \\
+        -o matrix.h5ad
+
+    gzip -c matrix.h5ad > matrix.h5ad.gz
+    """
+
+    else if (params.aligner == 'kallisto')
     """
     # convert file types
     mtx_to_h5ad.py \\
-        -m ${matrix_directory} \\
+        -m ${meta.id}_kallistobustools_count/counts_unfiltered/*.mtx \\
+        -b ${meta.id}_kallistobustools_count/counts_unfiltered/*.barcodes.txt \\
+        -f ${meta.id}_kallistobustools_count/counts_unfiltered/*.genes.txt \\
         -o matrix.h5ad
 
     gzip -c matrix.h5ad > matrix.h5ad.gz
diff --git a/subworkflows/local/kallisto_bustools.nf b/subworkflows/local/kallisto_bustools.nf
index 7503f85a..5d99d864 100644
--- a/subworkflows/local/kallisto_bustools.nf
+++ b/subworkflows/local/kallisto_bustools.nf
@@ -1,6 +1,7 @@
 /* --    IMPORT LOCAL MODULES/SUBWORKFLOWS     -- */
 include { GENE_MAP }                          from '../../modules/local/gene_map'
 include { KALLISTOBUSTOOLS_COUNT }            from '../../modules/local/kallistobustools_count'
+include {MTX_TO_H5AD     }                    from "../../modules/local/mtx_to_h5ad.nf"
 
 /* --    IMPORT NF-CORE MODULES/SUBWORKFLOWS   -- */
 include { GUNZIP }                      from '../../modules/nf-core/modules/gunzip/main'
@@ -64,6 +65,11 @@ workflow KALLISTO_BUSTOOLS {
     )
     ch_versions = ch_versions.mix(KALLISTOBUSTOOLS_COUNT.out.versions)
 
+    // Convert matrix do h5ad
+    MTX_TO_H5AD (
+        KALLISTOBUSTOOLS_COUNT.out.counts
+    )
+
 
     emit:
     ch_versions

From 68e3a4942b3f2aed6ab434f0e3ccfafafe9f5c54 Mon Sep 17 00:00:00 2001
From: Felipe Marques de Almeida <almeidafmarques@gmail.com>
Date: Tue, 21 Jun 2022 12:35:51 +0200
Subject: [PATCH 033/165] added conversion module for alevin

---
 bin/mtx_to_h5ad.py                      |  2 +-
 conf/modules.config                     |  6 ++++++
 modules/local/mtx_to_h5ad.nf            | 24 ++++++++++++++++++------
 subworkflows/local/alevin.nf            |  8 ++++++++
 subworkflows/local/kallisto_bustools.nf |  2 +-
 5 files changed, 34 insertions(+), 8 deletions(-)

diff --git a/bin/mtx_to_h5ad.py b/bin/mtx_to_h5ad.py
index cb03e387..f006f8a2 100755
--- a/bin/mtx_to_h5ad.py
+++ b/bin/mtx_to_h5ad.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python3
+#!/usr/bin/env python
 import sys
 import os
 import scanpy as sc
diff --git a/conf/modules.config b/conf/modules.config
index 9484a21f..1f891688 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -70,6 +70,12 @@ if (params.aligner == "alevin") {
             ext.args = "--table transcript_id,gene_id"
             ext.prefix = { "${gff.baseName}_gffread" }
         }
+        withName: MTX_TO_H5AD {
+            publishDir = [
+                path: { "${params.outdir}/salmon/${meta.id}_alevin_results/alevin" },
+                mode: params.publish_dir_mode
+            ]
+        }
     }
 }
 
diff --git a/modules/local/mtx_to_h5ad.nf b/modules/local/mtx_to_h5ad.nf
index 7292feaa..2fd7474f 100644
--- a/modules/local/mtx_to_h5ad.nf
+++ b/modules/local/mtx_to_h5ad.nf
@@ -13,7 +13,7 @@ process MTX_TO_H5AD {
     tuple val(meta), path(inputs)
 
     output:
-    path "matrix.h5ad.gz", emit: h5ad
+    path "*.h5ad.gz", emit: h5ad
 
     script:
     if (params.aligner == 'cellranger')
@@ -30,12 +30,24 @@ process MTX_TO_H5AD {
     """
     # convert file types
     mtx_to_h5ad.py \\
-        -m ${meta.id}_kallistobustools_count/counts_unfiltered/*.mtx \\
-        -b ${meta.id}_kallistobustools_count/counts_unfiltered/*.barcodes.txt \\
-        -f ${meta.id}_kallistobustools_count/counts_unfiltered/*.genes.txt \\
-        -o matrix.h5ad
+        -m *_kallistobustools_count/counts_unfiltered/*.mtx \\
+        -b *_kallistobustools_count/counts_unfiltered/*.barcodes.txt \\
+        -f *_kallistobustools_count/counts_unfiltered/*.genes.txt \\
+        -o cells_x_genes.h5ad
 
-    gzip -c matrix.h5ad > matrix.h5ad.gz
+    gzip -c cells_x_genes.h5ad > cells_x_genes.h5ad.gz
+    """
+
+    else if (params.aligner == 'alevin')
+    """
+    # convert file types
+    mtx_to_h5ad.py \\
+        -m *_alevin_results/alevin/quants_mat.mtx.gz \\
+        -b *_alevin_results/alevin/quants_mat_rows.txt \\
+        -f *_alevin_results/alevin/quants_mat_cols.txt \\
+        -o quants_mat.h5ad
+
+    gzip -c quants_mat.h5ad > quants_mat.h5ad.gz
     """
 
     stub:
diff --git a/subworkflows/local/alevin.nf b/subworkflows/local/alevin.nf
index a5cf6607..abaa6efc 100644
--- a/subworkflows/local/alevin.nf
+++ b/subworkflows/local/alevin.nf
@@ -2,6 +2,7 @@
 include { GFFREAD_TRANSCRIPTOME }             from '../../modules/local/gffread_transcriptome'
 include { SALMON_ALEVIN }                     from '../../modules/local/salmon_alevin'
 include { ALEVINQC }                          from '../../modules/local/alevinqc'
+include {MTX_TO_H5AD     }                    from '../../modules/local/mtx_to_h5ad.nf'
 
 /* --    IMPORT NF-CORE MODULES/SUBWORKFLOWS   -- */
 include { GUNZIP }                      from '../../modules/nf-core/modules/gunzip/main'
@@ -72,6 +73,13 @@ workflow SCRNASEQ_ALEVIN {
     )
     ch_versions = ch_versions.mix(SALMON_ALEVIN.out.versions)
 
+    /*
+    * Convert matrix do h5ad
+    */
+    MTX_TO_H5AD (
+        SALMON_ALEVIN.out.alevin_results
+    )
+
     /*
     * Run alevinQC
     */
diff --git a/subworkflows/local/kallisto_bustools.nf b/subworkflows/local/kallisto_bustools.nf
index 5d99d864..3b8cd968 100644
--- a/subworkflows/local/kallisto_bustools.nf
+++ b/subworkflows/local/kallisto_bustools.nf
@@ -1,7 +1,7 @@
 /* --    IMPORT LOCAL MODULES/SUBWORKFLOWS     -- */
 include { GENE_MAP }                          from '../../modules/local/gene_map'
 include { KALLISTOBUSTOOLS_COUNT }            from '../../modules/local/kallistobustools_count'
-include {MTX_TO_H5AD     }                    from "../../modules/local/mtx_to_h5ad.nf"
+include {MTX_TO_H5AD     }                    from '../../modules/local/mtx_to_h5ad.nf'
 
 /* --    IMPORT NF-CORE MODULES/SUBWORKFLOWS   -- */
 include { GUNZIP }                      from '../../modules/nf-core/modules/gunzip/main'

From 0e02363babd80e61863a8f1823f811d755afa737 Mon Sep 17 00:00:00 2001
From: Felipe Marques de Almeida <felipemarques89@gmail.com>
Date: Tue, 21 Jun 2022 12:36:50 +0200
Subject: [PATCH 034/165] adding conversion module (#3)

* added conversion module for kallistobustools

* added conversion module for alevin
---
 bin/cellranger_mtx_to_h5ad.py           | 29 +++++++++++++++++++++
 bin/mtx_to_h5ad.py                      | 33 +++++++++++++++++-------
 conf/modules.config                     | 17 +++++++++++++
 modules/local/mtx_to_h5ad.nf            | 34 ++++++++++++++++++++-----
 subworkflows/local/alevin.nf            |  8 ++++++
 subworkflows/local/kallisto_bustools.nf |  6 +++++
 6 files changed, 112 insertions(+), 15 deletions(-)
 create mode 100755 bin/cellranger_mtx_to_h5ad.py

diff --git a/bin/cellranger_mtx_to_h5ad.py b/bin/cellranger_mtx_to_h5ad.py
new file mode 100755
index 00000000..95390271
--- /dev/null
+++ b/bin/cellranger_mtx_to_h5ad.py
@@ -0,0 +1,29 @@
+#!/usr/bin/env python3
+import scanpy as sc
+import argparse
+
+def mtx_to_adata( mtx_dir: str, verbose: bool = False ):
+
+    if verbose:
+        print("Reading in {}".format(mtx_dir))
+
+    adata = sc.read_10x_mtx(mtx_dir)
+
+    return adata
+
+
+if __name__ == "__main__":
+
+    parser = argparse.ArgumentParser(description="Converts mtx output to h5ad.")
+
+    parser.add_argument("-m", "--mtx",     dest="mtx",     help="Path to mtx directory."                 )
+    parser.add_argument("-v", "--verbose", dest="verbose", help="Toggle verbose messages", default=False )
+    parser.add_argument("-o", "--out",     dest="out",     help="Output path."                           )
+
+    args = vars(parser.parse_args())
+
+    adata = mtx_to_adata(args["mtx"], verbose=args["verbose"])
+
+    adata.write_h5ad(args["out"])
+
+    print("Wrote h5ad file to {}".format(args["out"]))
\ No newline at end of file
diff --git a/bin/mtx_to_h5ad.py b/bin/mtx_to_h5ad.py
index 95390271..f006f8a2 100755
--- a/bin/mtx_to_h5ad.py
+++ b/bin/mtx_to_h5ad.py
@@ -1,13 +1,22 @@
-#!/usr/bin/env python3
+#!/usr/bin/env python
+import sys
+import os
 import scanpy as sc
+import pandas as pd
+import typing
 import argparse
 
-def mtx_to_adata( mtx_dir: str, verbose: bool = False ):
+
+def mtx_to_adata(
+    mtx_file: str, barcode_file: str, feature_file: str, verbose: bool = False
+):
 
     if verbose:
-        print("Reading in {}".format(mtx_dir))
+        print("Reading in {}".format(mtx_file))
 
-    adata = sc.read_10x_mtx(mtx_dir)
+    adata = sc.read_mtx(mtx_file)
+    adata.obs_names = pd.read_csv(barcode_file, header=None)[0].values
+    adata.var_names = pd.read_csv(feature_file, header=None)[0].values
 
     return adata
 
@@ -16,14 +25,20 @@ def mtx_to_adata( mtx_dir: str, verbose: bool = False ):
 
     parser = argparse.ArgumentParser(description="Converts mtx output to h5ad.")
 
-    parser.add_argument("-m", "--mtx",     dest="mtx",     help="Path to mtx directory."                 )
-    parser.add_argument("-v", "--verbose", dest="verbose", help="Toggle verbose messages", default=False )
-    parser.add_argument("-o", "--out",     dest="out",     help="Output path."                           )
+    parser.add_argument("-m", "--mtx", dest="mtx", help="Path to mtx file.")
+    parser.add_argument(
+        "-v", "--verbose", dest="verbose", help="Toggle verbose messages", default=False
+    )
+    parser.add_argument("-f", "--feature", dest="feature", help="Path to feature file.")
+    parser.add_argument("-b", "--barcode", dest="barcode", help="Path to barcode file.")
+    parser.add_argument("-o", "--out", dest="out", help="Output path.")
 
     args = vars(parser.parse_args())
 
-    adata = mtx_to_adata(args["mtx"], verbose=args["verbose"])
+    adata = mtx_to_adata(
+        args["mtx"], args["barcode"], args["feature"], verbose=args["verbose"]
+    )
 
     adata.write_h5ad(args["out"])
 
-    print("Wrote h5ad file to {}".format(args["out"]))
\ No newline at end of file
+    print("Wrote h5ad file to {}".format(args["out"]))
diff --git a/conf/modules.config b/conf/modules.config
index 8ca6a93b..1f891688 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -70,6 +70,12 @@ if (params.aligner == "alevin") {
             ext.args = "--table transcript_id,gene_id"
             ext.prefix = { "${gff.baseName}_gffread" }
         }
+        withName: MTX_TO_H5AD {
+            publishDir = [
+                path: { "${params.outdir}/salmon/${meta.id}_alevin_results/alevin" },
+                mode: params.publish_dir_mode
+            ]
+        }
     }
 }
 
@@ -80,3 +86,14 @@ if (params.aligner == "star") {
         }
     }
 }
+
+if (params.aligner == "kallisto") {
+    process {
+        withName: MTX_TO_H5AD {
+            publishDir = [
+                path: { "${params.outdir}/kallistobustools/${meta.id}_kallistobustools_count/counts_unfiltered" },
+                mode: params.publish_dir_mode
+            ]
+        }
+    }
+}
diff --git a/modules/local/mtx_to_h5ad.nf b/modules/local/mtx_to_h5ad.nf
index 4bd649d0..2fd7474f 100644
--- a/modules/local/mtx_to_h5ad.nf
+++ b/modules/local/mtx_to_h5ad.nf
@@ -13,21 +13,43 @@ process MTX_TO_H5AD {
     tuple val(meta), path(inputs)
 
     output:
-    path "matrix.h5ad.gz", emit: h5ad
+    path "*.h5ad.gz", emit: h5ad
 
     script:
-    if (params.aligner == 'cellranger') {
-        matrix_directory = "filtered_feature_bc_matrix"
-    }
+    if (params.aligner == 'cellranger')
     """
     # convert file types
-    mtx_to_h5ad.py \\
-        -m ${matrix_directory} \\
+    cellranger_mtx_to_h5ad.py \\
+        -m filtered_feature_bc_matrix \\
         -o matrix.h5ad
 
     gzip -c matrix.h5ad > matrix.h5ad.gz
     """
 
+    else if (params.aligner == 'kallisto')
+    """
+    # convert file types
+    mtx_to_h5ad.py \\
+        -m *_kallistobustools_count/counts_unfiltered/*.mtx \\
+        -b *_kallistobustools_count/counts_unfiltered/*.barcodes.txt \\
+        -f *_kallistobustools_count/counts_unfiltered/*.genes.txt \\
+        -o cells_x_genes.h5ad
+
+    gzip -c cells_x_genes.h5ad > cells_x_genes.h5ad.gz
+    """
+
+    else if (params.aligner == 'alevin')
+    """
+    # convert file types
+    mtx_to_h5ad.py \\
+        -m *_alevin_results/alevin/quants_mat.mtx.gz \\
+        -b *_alevin_results/alevin/quants_mat_rows.txt \\
+        -f *_alevin_results/alevin/quants_mat_cols.txt \\
+        -o quants_mat.h5ad
+
+    gzip -c quants_mat.h5ad > quants_mat.h5ad.gz
+    """
+
     stub:
     """
     touch matrix.h5ad.gz
diff --git a/subworkflows/local/alevin.nf b/subworkflows/local/alevin.nf
index a5cf6607..abaa6efc 100644
--- a/subworkflows/local/alevin.nf
+++ b/subworkflows/local/alevin.nf
@@ -2,6 +2,7 @@
 include { GFFREAD_TRANSCRIPTOME }             from '../../modules/local/gffread_transcriptome'
 include { SALMON_ALEVIN }                     from '../../modules/local/salmon_alevin'
 include { ALEVINQC }                          from '../../modules/local/alevinqc'
+include {MTX_TO_H5AD     }                    from '../../modules/local/mtx_to_h5ad.nf'
 
 /* --    IMPORT NF-CORE MODULES/SUBWORKFLOWS   -- */
 include { GUNZIP }                      from '../../modules/nf-core/modules/gunzip/main'
@@ -72,6 +73,13 @@ workflow SCRNASEQ_ALEVIN {
     )
     ch_versions = ch_versions.mix(SALMON_ALEVIN.out.versions)
 
+    /*
+    * Convert matrix do h5ad
+    */
+    MTX_TO_H5AD (
+        SALMON_ALEVIN.out.alevin_results
+    )
+
     /*
     * Run alevinQC
     */
diff --git a/subworkflows/local/kallisto_bustools.nf b/subworkflows/local/kallisto_bustools.nf
index 7503f85a..3b8cd968 100644
--- a/subworkflows/local/kallisto_bustools.nf
+++ b/subworkflows/local/kallisto_bustools.nf
@@ -1,6 +1,7 @@
 /* --    IMPORT LOCAL MODULES/SUBWORKFLOWS     -- */
 include { GENE_MAP }                          from '../../modules/local/gene_map'
 include { KALLISTOBUSTOOLS_COUNT }            from '../../modules/local/kallistobustools_count'
+include {MTX_TO_H5AD     }                    from '../../modules/local/mtx_to_h5ad.nf'
 
 /* --    IMPORT NF-CORE MODULES/SUBWORKFLOWS   -- */
 include { GUNZIP }                      from '../../modules/nf-core/modules/gunzip/main'
@@ -64,6 +65,11 @@ workflow KALLISTO_BUSTOOLS {
     )
     ch_versions = ch_versions.mix(KALLISTOBUSTOOLS_COUNT.out.versions)
 
+    // Convert matrix do h5ad
+    MTX_TO_H5AD (
+        KALLISTOBUSTOOLS_COUNT.out.counts
+    )
+
 
     emit:
     ch_versions

From 49613da4d346fd1b76bf5e8ac9ac6e2e7e7ea348 Mon Sep 17 00:00:00 2001
From: Felipe Marques de Almeida <almeidafmarques@gmail.com>
Date: Tue, 21 Jun 2022 13:37:22 +0200
Subject: [PATCH 035/165] now using hdf5's internal compression

---
 bin/cellranger_mtx_to_h5ad.py | 4 ++--
 bin/mtx_to_h5ad.py            | 2 +-
 modules/local/mtx_to_h5ad.nf  | 8 +-------
 3 files changed, 4 insertions(+), 10 deletions(-)

diff --git a/bin/cellranger_mtx_to_h5ad.py b/bin/cellranger_mtx_to_h5ad.py
index 95390271..88c189d5 100755
--- a/bin/cellranger_mtx_to_h5ad.py
+++ b/bin/cellranger_mtx_to_h5ad.py
@@ -24,6 +24,6 @@ def mtx_to_adata( mtx_dir: str, verbose: bool = False ):
 
     adata = mtx_to_adata(args["mtx"], verbose=args["verbose"])
 
-    adata.write_h5ad(args["out"])
+    adata.write_h5ad(args["out"], compression="gzip")
 
-    print("Wrote h5ad file to {}".format(args["out"]))
\ No newline at end of file
+    print("Wrote h5ad file to {}".format(args["out"]))
diff --git a/bin/mtx_to_h5ad.py b/bin/mtx_to_h5ad.py
index f006f8a2..eaf91cd0 100755
--- a/bin/mtx_to_h5ad.py
+++ b/bin/mtx_to_h5ad.py
@@ -39,6 +39,6 @@ def mtx_to_adata(
         args["mtx"], args["barcode"], args["feature"], verbose=args["verbose"]
     )
 
-    adata.write_h5ad(args["out"])
+    adata.write_h5ad(args["out"], compression="gzip")
 
     print("Wrote h5ad file to {}".format(args["out"]))
diff --git a/modules/local/mtx_to_h5ad.nf b/modules/local/mtx_to_h5ad.nf
index 2fd7474f..4578b898 100644
--- a/modules/local/mtx_to_h5ad.nf
+++ b/modules/local/mtx_to_h5ad.nf
@@ -13,7 +13,7 @@ process MTX_TO_H5AD {
     tuple val(meta), path(inputs)
 
     output:
-    path "*.h5ad.gz", emit: h5ad
+    path "*.h5ad", emit: h5ad
 
     script:
     if (params.aligner == 'cellranger')
@@ -22,8 +22,6 @@ process MTX_TO_H5AD {
     cellranger_mtx_to_h5ad.py \\
         -m filtered_feature_bc_matrix \\
         -o matrix.h5ad
-
-    gzip -c matrix.h5ad > matrix.h5ad.gz
     """
 
     else if (params.aligner == 'kallisto')
@@ -34,8 +32,6 @@ process MTX_TO_H5AD {
         -b *_kallistobustools_count/counts_unfiltered/*.barcodes.txt \\
         -f *_kallistobustools_count/counts_unfiltered/*.genes.txt \\
         -o cells_x_genes.h5ad
-
-    gzip -c cells_x_genes.h5ad > cells_x_genes.h5ad.gz
     """
 
     else if (params.aligner == 'alevin')
@@ -46,8 +42,6 @@ process MTX_TO_H5AD {
         -b *_alevin_results/alevin/quants_mat_rows.txt \\
         -f *_alevin_results/alevin/quants_mat_cols.txt \\
         -o quants_mat.h5ad
-
-    gzip -c quants_mat.h5ad > quants_mat.h5ad.gz
     """
 
     stub:

From 50d2e68c279692cd049e1d7e598a140d911b66a3 Mon Sep 17 00:00:00 2001
From: Felipe Marques de Almeida <almeidafmarques@gmail.com>
Date: Tue, 21 Jun 2022 13:43:49 +0200
Subject: [PATCH 036/165] fixed stub

---
 modules/local/mtx_to_h5ad.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/local/mtx_to_h5ad.nf b/modules/local/mtx_to_h5ad.nf
index 4578b898..82b51ec9 100644
--- a/modules/local/mtx_to_h5ad.nf
+++ b/modules/local/mtx_to_h5ad.nf
@@ -46,6 +46,6 @@ process MTX_TO_H5AD {
 
     stub:
     """
-    touch matrix.h5ad.gz
+    touch matrix.h5ad
     """
 }

From f1cd97fb66024e246372cfeefee6367272be6963 Mon Sep 17 00:00:00 2001
From: Felipe Marques de Almeida <almeidafmarques@gmail.com>
Date: Wed, 22 Jun 2022 09:53:04 +0200
Subject: [PATCH 037/165] conversion to H5AD is now a subworkflow

* Added concatenation of h5ad to alevin pipeline
---
 bin/cellranger_mtx_to_h5ad.py            |  2 +-
 bin/concat_h5ad.py                       | 44 ++++++++++++++++++++++++
 bin/mtx_to_h5ad.py                       |  3 --
 conf/modules.config                      |  6 ++++
 modules/local/concat_h5ad.nf             | 25 ++++++++++++++
 modules/local/mtx_to_h5ad.nf             |  8 ++---
 subworkflows/local/alevin.nf             | 12 ++-----
 subworkflows/local/conversion_to_h5ad.nf | 27 +++++++++++++++
 workflows/scrnaseq.nf                    | 11 +++++-
 9 files changed, 119 insertions(+), 19 deletions(-)
 create mode 100755 bin/concat_h5ad.py
 create mode 100644 modules/local/concat_h5ad.nf
 create mode 100644 subworkflows/local/conversion_to_h5ad.nf

diff --git a/bin/cellranger_mtx_to_h5ad.py b/bin/cellranger_mtx_to_h5ad.py
index 88c189d5..d58be151 100755
--- a/bin/cellranger_mtx_to_h5ad.py
+++ b/bin/cellranger_mtx_to_h5ad.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python3
+#!/usr/bin/env python
 import scanpy as sc
 import argparse
 
diff --git a/bin/concat_h5ad.py b/bin/concat_h5ad.py
new file mode 100755
index 00000000..066c956d
--- /dev/null
+++ b/bin/concat_h5ad.py
@@ -0,0 +1,44 @@
+#!/usr/bin/env python
+import scanpy as sc, anndata as ad, pandas as pd
+from pathlib import Path
+import argparse
+
+# empty list to hold sample datasets
+list_of_h5ad = []
+
+def read_samplesheet(samplesheet):
+    df = pd.read_csv(samplesheet)
+    return(df)
+
+# find available h5ad files and append to list
+def append_h5ad_files():
+    for path in Path(".").rglob('*.h5ad'):
+        adata = sc.read_h5ad(path.name)
+        list_of_h5ad.append(adata)
+
+# combine and write
+# combination without inner or out join, just a simple concatenation.
+def concat_h5ad(outfile):
+    combined = ad.concat(list_of_h5ad)
+    return(combined)
+
+if __name__ == "__main__":
+
+    parser = argparse.ArgumentParser(description="Concatenates h5ad files and merge metadata from samplesheet")
+
+    parser.add_argument("-i", "--input", dest="input", help="Path to samplesheet.csv")
+    parser.add_argument("-o", "--out", dest="out", help="Output path.")
+
+    args = vars(parser.parse_args())
+
+    # how to merge this on adata.obs?
+    df_samplesheet = read_samplesheet(args["input"])
+
+    # find all and append to list
+    append_h5ad_files()
+
+    # concat and write
+    adata = concat_h5ad(args["out"])
+    adata.write_h5ad(args["out"], compression="gzip")
+
+    print("Wrote h5ad file to {}".format(args["out"]))
\ No newline at end of file
diff --git a/bin/mtx_to_h5ad.py b/bin/mtx_to_h5ad.py
index eaf91cd0..09013994 100755
--- a/bin/mtx_to_h5ad.py
+++ b/bin/mtx_to_h5ad.py
@@ -1,9 +1,6 @@
 #!/usr/bin/env python
-import sys
-import os
 import scanpy as sc
 import pandas as pd
-import typing
 import argparse
 
 
diff --git a/conf/modules.config b/conf/modules.config
index 1f891688..0dd6fefb 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -76,6 +76,12 @@ if (params.aligner == "alevin") {
                 mode: params.publish_dir_mode
             ]
         }
+        withName: CONCAT_H5AD {
+            publishDir = [
+                path: { "${params.outdir}/salmon/concat_h5ad" },
+                mode: params.publish_dir_mode
+            ]
+        }
     }
 }
 
diff --git a/modules/local/concat_h5ad.nf b/modules/local/concat_h5ad.nf
new file mode 100644
index 00000000..616e16d8
--- /dev/null
+++ b/modules/local/concat_h5ad.nf
@@ -0,0 +1,25 @@
+process CONCAT_H5AD {
+    label 'process_medium'
+
+    conda (params.enable_conda ? "conda-forge::scanpy conda-forge::python-igraph conda-forge::leidenalg" : null)
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/scanpy:1.7.2--pyhdfd78af_0' :
+        'quay.io/biocontainers/scanpy:1.7.2--pyhdfd78af_0' }"
+
+    input:
+    path h5ad
+    path samplesheet
+
+    output:
+    path "*.h5ad", emit: h5ad
+
+    script:
+    """
+    concat_h5ad.py -i $samplesheet -o combined_matrix.h5ad
+    """
+
+    stub:
+    """
+    touch combined_matrix.h5ad
+    """
+}
diff --git a/modules/local/mtx_to_h5ad.nf b/modules/local/mtx_to_h5ad.nf
index 82b51ec9..88b0e1c7 100644
--- a/modules/local/mtx_to_h5ad.nf
+++ b/modules/local/mtx_to_h5ad.nf
@@ -21,7 +21,7 @@ process MTX_TO_H5AD {
     # convert file types
     cellranger_mtx_to_h5ad.py \\
         -m filtered_feature_bc_matrix \\
-        -o matrix.h5ad
+        -o ${meta.id}_matrix.h5ad
     """
 
     else if (params.aligner == 'kallisto')
@@ -31,7 +31,7 @@ process MTX_TO_H5AD {
         -m *_kallistobustools_count/counts_unfiltered/*.mtx \\
         -b *_kallistobustools_count/counts_unfiltered/*.barcodes.txt \\
         -f *_kallistobustools_count/counts_unfiltered/*.genes.txt \\
-        -o cells_x_genes.h5ad
+        -o ${meta.id}_matrix.h5ad
     """
 
     else if (params.aligner == 'alevin')
@@ -41,11 +41,11 @@ process MTX_TO_H5AD {
         -m *_alevin_results/alevin/quants_mat.mtx.gz \\
         -b *_alevin_results/alevin/quants_mat_rows.txt \\
         -f *_alevin_results/alevin/quants_mat_cols.txt \\
-        -o quants_mat.h5ad
+        -o ${meta.id}_matrix.h5ad
     """
 
     stub:
     """
-    touch matrix.h5ad
+    touch ${meta.id}_matrix.h5ad
     """
 }
diff --git a/subworkflows/local/alevin.nf b/subworkflows/local/alevin.nf
index abaa6efc..c1b122e1 100644
--- a/subworkflows/local/alevin.nf
+++ b/subworkflows/local/alevin.nf
@@ -1,8 +1,7 @@
 /* --    IMPORT LOCAL MODULES/SUBWORKFLOWS     -- */
 include { GFFREAD_TRANSCRIPTOME }             from '../../modules/local/gffread_transcriptome'
-include { SALMON_ALEVIN }                     from '../../modules/local/salmon_alevin'
-include { ALEVINQC }                          from '../../modules/local/alevinqc'
-include {MTX_TO_H5AD     }                    from '../../modules/local/mtx_to_h5ad.nf'
+include { SALMON_ALEVIN         }             from '../../modules/local/salmon_alevin'
+include { ALEVINQC              }             from '../../modules/local/alevinqc'
 
 /* --    IMPORT NF-CORE MODULES/SUBWORKFLOWS   -- */
 include { GUNZIP }                      from '../../modules/nf-core/modules/gunzip/main'
@@ -73,13 +72,6 @@ workflow SCRNASEQ_ALEVIN {
     )
     ch_versions = ch_versions.mix(SALMON_ALEVIN.out.versions)
 
-    /*
-    * Convert matrix do h5ad
-    */
-    MTX_TO_H5AD (
-        SALMON_ALEVIN.out.alevin_results
-    )
-
     /*
     * Run alevinQC
     */
diff --git a/subworkflows/local/conversion_to_h5ad.nf b/subworkflows/local/conversion_to_h5ad.nf
new file mode 100644
index 00000000..67b100d5
--- /dev/null
+++ b/subworkflows/local/conversion_to_h5ad.nf
@@ -0,0 +1,27 @@
+/* --    IMPORT LOCAL MODULES/SUBWORKFLOWS     -- */
+include { MTX_TO_H5AD }             from '../../modules/local/mtx_to_h5ad.nf'
+include { CONCAT_H5AD }             from '../../modules/local/concat_h5ad.nf'
+
+workflow H5AD_CONVERSION {
+
+    take:
+    mtx_matrices
+    samplesheet
+
+    main:
+    //
+    // Convert matrix do h5ad
+    //
+    MTX_TO_H5AD (
+        mtx_matrices
+    )
+
+    //
+    // Concat sample-specific h5ad in one
+    //
+    CONCAT_H5AD (
+        MTX_TO_H5AD.out.h5ad.collect(), // gather all sample-specific files
+        samplesheet
+    )
+
+}
diff --git a/workflows/scrnaseq.nf b/workflows/scrnaseq.nf
index 3b54ced9..f0c9bb00 100644
--- a/workflows/scrnaseq.nf
+++ b/workflows/scrnaseq.nf
@@ -40,6 +40,7 @@ include { KALLISTO_BUSTOOLS } from '../subworkflows/local/kallisto_bustools'
 include { SCRNASEQ_ALEVIN   } from '../subworkflows/local/alevin'
 include { STARSOLO          } from '../subworkflows/local/starsolo'
 include { CELLRANGER_ALIGN  } from "../subworkflows/local/align_cellranger"
+include { H5AD_CONVERSION   } from "../subworkflows/local/conversion_to_h5ad"
 
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -98,7 +99,8 @@ ch_cellranger_index = params.cellranger_index ? file(params.cellranger_index) :
 
 workflow SCRNASEQ {
 
-    ch_versions = Channel.empty()
+    ch_versions     = Channel.empty()
+    ch_mtx_matrices = Channel.empty()
 
     // Check input files and stage input data
     ch_fastq = INPUT_CHECK( ch_input ).reads
@@ -135,6 +137,7 @@ workflow SCRNASEQ {
         )
         ch_versions = ch_versions.mix(SCRNASEQ_ALEVIN.out.ch_versions)
         ch_multiqc_alevin = SCRNASEQ_ALEVIN.out.for_multiqc
+        ch_mtx_matrices = ch_mtx_matrices.mix(SCRNASEQ_ALEVIN.out.alevin_results)
     }
 
     // Run STARSolo pipeline
@@ -163,6 +166,12 @@ workflow SCRNASEQ {
         ch_versions = ch_versions.mix(CELLRANGER_ALIGN.out.ch_versions)
     }
 
+    // Run mtx to h5ad conversion subworkflow
+    H5AD_CONVERSION (
+        ch_mtx_matrices,
+        ch_input
+    )
+
     // collect software versions
     CUSTOM_DUMPSOFTWAREVERSIONS (
         ch_versions.unique().collectFile(name: 'collated_versions.yml')

From 68c0dac751b85f9f74a9498a32d23e7d34620e75 Mon Sep 17 00:00:00 2001
From: Felipe Marques de Almeida <almeidafmarques@gmail.com>
Date: Wed, 22 Jun 2022 10:02:32 +0200
Subject: [PATCH 038/165] added concatenation module for kallisto

---
 conf/modules.config                     | 6 ++++++
 subworkflows/local/kallisto_bustools.nf | 7 -------
 workflows/scrnaseq.nf                   | 1 +
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/conf/modules.config b/conf/modules.config
index 0dd6fefb..6ccb5af8 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -101,5 +101,11 @@ if (params.aligner == "kallisto") {
                 mode: params.publish_dir_mode
             ]
         }
+        withName: CONCAT_H5AD {
+            publishDir = [
+                path: { "${params.outdir}/kallistobustools/concat_h5ad" },
+                mode: params.publish_dir_mode
+            ]
+        }
     }
 }
diff --git a/subworkflows/local/kallisto_bustools.nf b/subworkflows/local/kallisto_bustools.nf
index 3b8cd968..204852da 100644
--- a/subworkflows/local/kallisto_bustools.nf
+++ b/subworkflows/local/kallisto_bustools.nf
@@ -1,7 +1,6 @@
 /* --    IMPORT LOCAL MODULES/SUBWORKFLOWS     -- */
 include { GENE_MAP }                          from '../../modules/local/gene_map'
 include { KALLISTOBUSTOOLS_COUNT }            from '../../modules/local/kallistobustools_count'
-include {MTX_TO_H5AD     }                    from '../../modules/local/mtx_to_h5ad.nf'
 
 /* --    IMPORT NF-CORE MODULES/SUBWORKFLOWS   -- */
 include { GUNZIP }                      from '../../modules/nf-core/modules/gunzip/main'
@@ -65,12 +64,6 @@ workflow KALLISTO_BUSTOOLS {
     )
     ch_versions = ch_versions.mix(KALLISTOBUSTOOLS_COUNT.out.versions)
 
-    // Convert matrix do h5ad
-    MTX_TO_H5AD (
-        KALLISTOBUSTOOLS_COUNT.out.counts
-    )
-
-
     emit:
     ch_versions
     counts = KALLISTOBUSTOOLS_COUNT.out.counts
diff --git a/workflows/scrnaseq.nf b/workflows/scrnaseq.nf
index f0c9bb00..f7c2180f 100644
--- a/workflows/scrnaseq.nf
+++ b/workflows/scrnaseq.nf
@@ -120,6 +120,7 @@ workflow SCRNASEQ {
             ch_fastq
         )
         ch_versions = ch_versions.mix(KALLISTO_BUSTOOLS.out.ch_versions)
+        ch_mtx_matrices = ch_mtx_matrices.mix(KALLISTO_BUSTOOLS.out.counts)
     }
 
     // Run salmon alevin pipeline

From 24d52178a076ef4547a06dd9786f207f5faa1e28 Mon Sep 17 00:00:00 2001
From: Felipe Marques de Almeida <almeidafmarques@gmail.com>
Date: Wed, 22 Jun 2022 10:08:06 +0200
Subject: [PATCH 039/165] added concatenation module for cellranger

---
 conf/modules.config                    | 10 ++++++++--
 subworkflows/local/align_cellranger.nf | 13 +++++++------
 workflows/scrnaseq.nf                  |  1 +
 3 files changed, 16 insertions(+), 8 deletions(-)

diff --git a/conf/modules.config b/conf/modules.config
index 6ccb5af8..eb61cd75 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -61,6 +61,12 @@ if(params.aligner == "cellranger") {
                 mode: params.publish_dir_mode
             ]
         }
+        withName: CONCAT_H5AD {
+            publishDir = [
+                path: { "${params.outdir}/cellranger/count/concatenated_h5ad" },
+                mode: params.publish_dir_mode
+            ]
+        }
     }
 }
 
@@ -78,7 +84,7 @@ if (params.aligner == "alevin") {
         }
         withName: CONCAT_H5AD {
             publishDir = [
-                path: { "${params.outdir}/salmon/concat_h5ad" },
+                path: { "${params.outdir}/salmon/concatenated_h5ad" },
                 mode: params.publish_dir_mode
             ]
         }
@@ -103,7 +109,7 @@ if (params.aligner == "kallisto") {
         }
         withName: CONCAT_H5AD {
             publishDir = [
-                path: { "${params.outdir}/kallistobustools/concat_h5ad" },
+                path: { "${params.outdir}/kallistobustools/concatenated_h5ad" },
                 mode: params.publish_dir_mode
             ]
         }
diff --git a/subworkflows/local/align_cellranger.nf b/subworkflows/local/align_cellranger.nf
index cd22c42b..cda067ff 100644
--- a/subworkflows/local/align_cellranger.nf
+++ b/subworkflows/local/align_cellranger.nf
@@ -41,20 +41,21 @@ workflow CELLRANGER_ALIGN {
         )
         ch_versions = ch_versions.mix(CELLRANGER_COUNT.out.versions)
 
-        // Convert matrix do h5ad
-        MTX_TO_H5AD (
-            CELLRANGER_COUNT.out.outs.map{ inputs ->
+        // rebuild out channel to be in compliance with what is required for h5ad conversion modules
+        // out channel comes without meta map from nf-core module
+        ch_count_outputs_rebuilt = CELLRANGER_COUNT.out.outs.map{ inputs ->
             meta = [:]
+            
             // in stub-run variable is string and not an array
             if (workflow.stubRun) { meta.cellranger_prefix = [inputs][0].toString().tokenize('/')[-3] } 
             else { meta.cellranger_prefix = inputs[0].toString().tokenize('/')[-3] }
             meta.id = meta.cellranger_prefix.tokenize('-')[1]
             
-                [ meta, inputs ]
-            }
-        )
+            [ meta, inputs ]
+        }
 
     emit:
         ch_versions
         cellranger_out  = CELLRANGER_COUNT.out.outs
+        cellranger_out_rebuilt = ch_count_outputs_rebuilt
 }
diff --git a/workflows/scrnaseq.nf b/workflows/scrnaseq.nf
index f7c2180f..ef55aa45 100644
--- a/workflows/scrnaseq.nf
+++ b/workflows/scrnaseq.nf
@@ -165,6 +165,7 @@ workflow SCRNASEQ {
             ch_fastq
         )
         ch_versions = ch_versions.mix(CELLRANGER_ALIGN.out.ch_versions)
+        ch_mtx_matrices = ch_mtx_matrices.mix(CELLRANGER_ALIGN.out.cellranger_out_rebuilt)
     }
 
     // Run mtx to h5ad conversion subworkflow

From 16d19f1e778f7a69ebd2c534928bb73117421112 Mon Sep 17 00:00:00 2001
From: Felipe Marques de Almeida <almeidafmarques@gmail.com>
Date: Wed, 22 Jun 2022 14:14:40 +0200
Subject: [PATCH 040/165] Simplify function to find and store h5ad files

---
 bin/concat_h5ad.py | 11 +----------
 1 file changed, 1 insertion(+), 10 deletions(-)

diff --git a/bin/concat_h5ad.py b/bin/concat_h5ad.py
index 066c956d..318672cc 100755
--- a/bin/concat_h5ad.py
+++ b/bin/concat_h5ad.py
@@ -3,19 +3,10 @@
 from pathlib import Path
 import argparse
 
-# empty list to hold sample datasets
-list_of_h5ad = []
-
 def read_samplesheet(samplesheet):
     df = pd.read_csv(samplesheet)
     return(df)
 
-# find available h5ad files and append to list
-def append_h5ad_files():
-    for path in Path(".").rglob('*.h5ad'):
-        adata = sc.read_h5ad(path.name)
-        list_of_h5ad.append(adata)
-
 # combine and write
 # combination without inner or out join, just a simple concatenation.
 def concat_h5ad(outfile):
@@ -35,7 +26,7 @@ def concat_h5ad(outfile):
     df_samplesheet = read_samplesheet(args["input"])
 
     # find all and append to list
-    append_h5ad_files()
+    list_of_h5ad = [sc.read_h5ad(path) for path in Path(".").rglob('*.h5ad')]
 
     # concat and write
     adata = concat_h5ad(args["out"])

From 010154c3862bae709cf9b31d9eeb27b918e6178b Mon Sep 17 00:00:00 2001
From: Felipe Marques de Almeida <almeidafmarques@gmail.com>
Date: Wed, 22 Jun 2022 19:10:37 +0200
Subject: [PATCH 041/165] added sample information and metadata

---
 bin/cellranger_mtx_to_h5ad.py |  6 ++++--
 bin/concat_h5ad.py            | 35 +++++++++++++++++++++++++----------
 bin/mtx_to_h5ad.py            |  6 ++++--
 modules/local/mtx_to_h5ad.nf  |  3 +++
 4 files changed, 36 insertions(+), 14 deletions(-)

diff --git a/bin/cellranger_mtx_to_h5ad.py b/bin/cellranger_mtx_to_h5ad.py
index d58be151..40e365b6 100755
--- a/bin/cellranger_mtx_to_h5ad.py
+++ b/bin/cellranger_mtx_to_h5ad.py
@@ -2,12 +2,13 @@
 import scanpy as sc
 import argparse
 
-def mtx_to_adata( mtx_dir: str, verbose: bool = False ):
+def mtx_to_adata( mtx_dir: str, sample: str, verbose: bool = False ):
 
     if verbose:
         print("Reading in {}".format(mtx_dir))
 
     adata = sc.read_10x_mtx(mtx_dir)
+    adata.obs["sample"] = sample
 
     return adata
 
@@ -18,11 +19,12 @@ def mtx_to_adata( mtx_dir: str, verbose: bool = False ):
 
     parser.add_argument("-m", "--mtx",     dest="mtx",     help="Path to mtx directory."                 )
     parser.add_argument("-v", "--verbose", dest="verbose", help="Toggle verbose messages", default=False )
+    parser.add_argument("-s", "--sample",  dest="sample",  help="Sample name"                            )
     parser.add_argument("-o", "--out",     dest="out",     help="Output path."                           )
 
     args = vars(parser.parse_args())
 
-    adata = mtx_to_adata(args["mtx"], verbose=args["verbose"])
+    adata = mtx_to_adata(args["mtx"], args["sample"], verbose=args["verbose"])
 
     adata.write_h5ad(args["out"], compression="gzip")
 
diff --git a/bin/concat_h5ad.py b/bin/concat_h5ad.py
index 318672cc..57a1969e 100755
--- a/bin/concat_h5ad.py
+++ b/bin/concat_h5ad.py
@@ -5,12 +5,21 @@
 
 def read_samplesheet(samplesheet):
     df = pd.read_csv(samplesheet)
+    df.set_index("sample")
+
+    # samplesheet may contain replicates, when it has,
+    # group information from replicates and collapse with commas
+    # only keep unique values using set()
+    df = df.groupby(["sample"]).agg(lambda column: ",".join(set(column)))
+
+    # return
     return(df)
 
-# combine and write
-# combination without inner or out join, just a simple concatenation.
-def concat_h5ad(outfile):
-    combined = ad.concat(list_of_h5ad)
+# combine and write, just a simple concatenation.
+def concat_h5ad(adatas):
+    combined = ad.concat(adatas, label="sample", merge="unique")
+    
+    # return
     return(combined)
 
 if __name__ == "__main__":
@@ -18,18 +27,24 @@ def concat_h5ad(outfile):
     parser = argparse.ArgumentParser(description="Concatenates h5ad files and merge metadata from samplesheet")
 
     parser.add_argument("-i", "--input", dest="input", help="Path to samplesheet.csv")
-    parser.add_argument("-o", "--out", dest="out", help="Output path.")
+    parser.add_argument("-o", "--out",   dest="out",   help="Output path.")
 
     args = vars(parser.parse_args())
 
-    # how to merge this on adata.obs?
+    # Open samplesheet as dataframe
     df_samplesheet = read_samplesheet(args["input"])
 
-    # find all and append to list
-    list_of_h5ad = [sc.read_h5ad(path) for path in Path(".").rglob('*.h5ad')]
+    # find all h5ad and append to dict
+    dict_of_h5ad = {
+            str(path).replace("_matrix.h5ad", ""): sc.read_h5ad(path)
+            for path in Path(".").rglob('*.h5ad')
+    }
+
+    # concat h5ad files
+    adata = concat_h5ad(dict_of_h5ad)
 
-    # concat and write
-    adata = concat_h5ad(args["out"])
+    # merge with data.frame, on sample information
+    adata.obs = adata.obs.join(df_samplesheet, on="sample")
     adata.write_h5ad(args["out"], compression="gzip")
 
     print("Wrote h5ad file to {}".format(args["out"]))
\ No newline at end of file
diff --git a/bin/mtx_to_h5ad.py b/bin/mtx_to_h5ad.py
index 09013994..78116f58 100755
--- a/bin/mtx_to_h5ad.py
+++ b/bin/mtx_to_h5ad.py
@@ -5,7 +5,7 @@
 
 
 def mtx_to_adata(
-    mtx_file: str, barcode_file: str, feature_file: str, verbose: bool = False
+    mtx_file: str, barcode_file: str, feature_file: str, sample: str, verbose: bool = False
 ):
 
     if verbose:
@@ -14,6 +14,7 @@ def mtx_to_adata(
     adata = sc.read_mtx(mtx_file)
     adata.obs_names = pd.read_csv(barcode_file, header=None)[0].values
     adata.var_names = pd.read_csv(feature_file, header=None)[0].values
+    adata.obs["sample"] = sample
 
     return adata
 
@@ -28,12 +29,13 @@ def mtx_to_adata(
     )
     parser.add_argument("-f", "--feature", dest="feature", help="Path to feature file.")
     parser.add_argument("-b", "--barcode", dest="barcode", help="Path to barcode file.")
+    parser.add_argument("-s", "--sample", dest="sample", help="Sample name")
     parser.add_argument("-o", "--out", dest="out", help="Output path.")
 
     args = vars(parser.parse_args())
 
     adata = mtx_to_adata(
-        args["mtx"], args["barcode"], args["feature"], verbose=args["verbose"]
+        args["mtx"], args["barcode"], args["feature"], args["sample"], verbose=args["verbose"]
     )
 
     adata.write_h5ad(args["out"], compression="gzip")
diff --git a/modules/local/mtx_to_h5ad.nf b/modules/local/mtx_to_h5ad.nf
index 88b0e1c7..37321bf4 100644
--- a/modules/local/mtx_to_h5ad.nf
+++ b/modules/local/mtx_to_h5ad.nf
@@ -21,6 +21,7 @@ process MTX_TO_H5AD {
     # convert file types
     cellranger_mtx_to_h5ad.py \\
         -m filtered_feature_bc_matrix \\
+        -s ${meta.id} \\
         -o ${meta.id}_matrix.h5ad
     """
 
@@ -28,6 +29,7 @@ process MTX_TO_H5AD {
     """
     # convert file types
     mtx_to_h5ad.py \\
+        -s ${meta.id} \\
         -m *_kallistobustools_count/counts_unfiltered/*.mtx \\
         -b *_kallistobustools_count/counts_unfiltered/*.barcodes.txt \\
         -f *_kallistobustools_count/counts_unfiltered/*.genes.txt \\
@@ -38,6 +40,7 @@ process MTX_TO_H5AD {
     """
     # convert file types
     mtx_to_h5ad.py \\
+        -s ${meta.id} \\
         -m *_alevin_results/alevin/quants_mat.mtx.gz \\
         -b *_alevin_results/alevin/quants_mat_rows.txt \\
         -f *_alevin_results/alevin/quants_mat_cols.txt \\

From 8d2cfa1febcc96fcc2393988ae1b82effa93dbf1 Mon Sep 17 00:00:00 2001
From: Felipe Marques de Almeida <almeidafmarques@gmail.com>
Date: Thu, 23 Jun 2022 08:12:53 +0200
Subject: [PATCH 042/165] suffix not hard-coded in py script

---
 bin/concat_h5ad.py           | 7 ++++---
 modules/local/concat_h5ad.nf | 5 ++++-
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/bin/concat_h5ad.py b/bin/concat_h5ad.py
index 57a1969e..d6c197ca 100755
--- a/bin/concat_h5ad.py
+++ b/bin/concat_h5ad.py
@@ -26,8 +26,9 @@ def concat_h5ad(adatas):
 
     parser = argparse.ArgumentParser(description="Concatenates h5ad files and merge metadata from samplesheet")
 
-    parser.add_argument("-i", "--input", dest="input", help="Path to samplesheet.csv")
-    parser.add_argument("-o", "--out",   dest="out",   help="Output path.")
+    parser.add_argument("-i", "--input",  dest="input",  help="Path to samplesheet.csv")
+    parser.add_argument("-o", "--out",    dest="out",    help="Output path.")
+    parser.add_argument("-s", "--suffix", dest="suffix", help="Suffix of matrices to remove and get sample name")
 
     args = vars(parser.parse_args())
 
@@ -36,7 +37,7 @@ def concat_h5ad(adatas):
 
     # find all h5ad and append to dict
     dict_of_h5ad = {
-            str(path).replace("_matrix.h5ad", ""): sc.read_h5ad(path)
+            str(path).replace(args["suffix"], ""): sc.read_h5ad(path)
             for path in Path(".").rglob('*.h5ad')
     }
 
diff --git a/modules/local/concat_h5ad.nf b/modules/local/concat_h5ad.nf
index 616e16d8..a201d9dd 100644
--- a/modules/local/concat_h5ad.nf
+++ b/modules/local/concat_h5ad.nf
@@ -15,7 +15,10 @@ process CONCAT_H5AD {
 
     script:
     """
-    concat_h5ad.py -i $samplesheet -o combined_matrix.h5ad
+    concat_h5ad.py \\
+        -i $samplesheet \\
+        -o combined_matrix.h5ad \\
+        -s "_matrix.h5ad"
     """
 
     stub:

From b1a5f1de518ce9d729e71af61f8515a656ff572d Mon Sep 17 00:00:00 2001
From: Felipe Marques de Almeida <felipemarques89@gmail.com>
Date: Thu, 23 Jun 2022 11:11:14 +0200
Subject: [PATCH 043/165] Update bin/concat_h5ad.py

Not having one line command as separate function

Co-authored-by: Isaac Virshup <ivirshup@gmail.com>
---
 bin/concat_h5ad.py | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/bin/concat_h5ad.py b/bin/concat_h5ad.py
index d6c197ca..bb5ac2bf 100755
--- a/bin/concat_h5ad.py
+++ b/bin/concat_h5ad.py
@@ -15,13 +15,6 @@ def read_samplesheet(samplesheet):
     # return
     return(df)
 
-# combine and write, just a simple concatenation.
-def concat_h5ad(adatas):
-    combined = ad.concat(adatas, label="sample", merge="unique")
-    
-    # return
-    return(combined)
-
 if __name__ == "__main__":
 
     parser = argparse.ArgumentParser(description="Concatenates h5ad files and merge metadata from samplesheet")
@@ -42,7 +35,7 @@ def concat_h5ad(adatas):
     }
 
     # concat h5ad files
-    adata = concat_h5ad(dict_of_h5ad)
+    adata = ad.concat(dict_of_h5ad, label="sample", merge="unique")
 
     # merge with data.frame, on sample information
     adata.obs = adata.obs.join(df_samplesheet, on="sample")

From 7c20c5bebf60fc2688e1b652345f3a9afd0b7feb Mon Sep 17 00:00:00 2001
From: Felipe Marques de Almeida <felipemarques89@gmail.com>
Date: Thu, 23 Jun 2022 12:33:12 +0200
Subject: [PATCH 044/165] Update bin/concat_h5ad.py

Co-authored-by: Gregor Sturm <mail@gregor-sturm.de>
---
 bin/concat_h5ad.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/bin/concat_h5ad.py b/bin/concat_h5ad.py
index bb5ac2bf..1c6290e3 100755
--- a/bin/concat_h5ad.py
+++ b/bin/concat_h5ad.py
@@ -12,7 +12,6 @@ def read_samplesheet(samplesheet):
     # only keep unique values using set()
     df = df.groupby(["sample"]).agg(lambda column: ",".join(set(column)))
 
-    # return
     return(df)
 
 if __name__ == "__main__":

From acbfba455cfab01f343cee1ea1ab65c5cd96eee2 Mon Sep 17 00:00:00 2001
From: Felipe Marques de Almeida <almeidafmarques@gmail.com>
Date: Thu, 23 Jun 2022 12:44:08 +0200
Subject: [PATCH 045/165] updated nf-core/cellranger module

---
 modules.json                                     | 2 +-
 modules/nf-core/modules/cellranger/count/main.nf | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/modules.json b/modules.json
index b0f8bb72..96dba9a3 100644
--- a/modules.json
+++ b/modules.json
@@ -4,7 +4,7 @@
     "repos": {
         "nf-core/modules": {
             "cellranger/count": {
-                "git_sha": "8032833e6e0fd3edeb20d4121885112249f273c4"
+                "git_sha": "e1a3ae6bf5afce6b23f580e3225e7bdbe1388cd0"
             },
             "cellranger/mkgtf": {
                 "git_sha": "e499abe572bc7c1322dfa66b767b0e65fe8c62ca"
diff --git a/modules/nf-core/modules/cellranger/count/main.nf b/modules/nf-core/modules/cellranger/count/main.nf
index 7413c990..84e2d921 100644
--- a/modules/nf-core/modules/cellranger/count/main.nf
+++ b/modules/nf-core/modules/cellranger/count/main.nf
@@ -12,8 +12,8 @@ process CELLRANGER_COUNT {
     path  reference
 
     output:
-    path("sample-${meta.gem}/outs/*"), emit: outs
-    path "versions.yml"              , emit: versions
+    tuple val(meta), path("sample-${meta.gem}/outs/*"), emit: outs
+    path "versions.yml"                               , emit: versions
 
     when:
     task.ext.when == null || task.ext.when

From 2edec15de7c6cb552728e27e974df9a31b8283bf Mon Sep 17 00:00:00 2001
From: Felipe Marques de Almeida <almeidafmarques@gmail.com>
Date: Thu, 23 Jun 2022 12:59:02 +0200
Subject: [PATCH 046/165] cellranger directly using meta map from channel

---
 conf/modules.config                    |  2 +-
 subworkflows/local/align_cellranger.nf | 14 --------------
 workflows/scrnaseq.nf                  |  2 +-
 3 files changed, 2 insertions(+), 16 deletions(-)

diff --git a/conf/modules.config b/conf/modules.config
index eb61cd75..c0fcb25b 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -57,7 +57,7 @@ if(params.aligner == "cellranger") {
         }
         withName: MTX_TO_H5AD {
             publishDir = [
-                path: { "${params.outdir}/cellranger/count/${meta.cellranger_prefix}/outs/filtered_feature_bc_matrix" },
+                path: { "${params.outdir}/cellranger/count/sample-${meta.id}/outs/filtered_feature_bc_matrix" },
                 mode: params.publish_dir_mode
             ]
         }
diff --git a/subworkflows/local/align_cellranger.nf b/subworkflows/local/align_cellranger.nf
index cda067ff..744215e0 100644
--- a/subworkflows/local/align_cellranger.nf
+++ b/subworkflows/local/align_cellranger.nf
@@ -41,21 +41,7 @@ workflow CELLRANGER_ALIGN {
         )
         ch_versions = ch_versions.mix(CELLRANGER_COUNT.out.versions)
 
-        // rebuild out channel to be in compliance with what is required for h5ad conversion modules
-        // out channel comes without meta map from nf-core module
-        ch_count_outputs_rebuilt = CELLRANGER_COUNT.out.outs.map{ inputs ->
-            meta = [:]
-            
-            // in stub-run variable is string and not an array
-            if (workflow.stubRun) { meta.cellranger_prefix = [inputs][0].toString().tokenize('/')[-3] } 
-            else { meta.cellranger_prefix = inputs[0].toString().tokenize('/')[-3] }
-            meta.id = meta.cellranger_prefix.tokenize('-')[1]
-            
-            [ meta, inputs ]
-        }
-
     emit:
         ch_versions
         cellranger_out  = CELLRANGER_COUNT.out.outs
-        cellranger_out_rebuilt = ch_count_outputs_rebuilt
 }
diff --git a/workflows/scrnaseq.nf b/workflows/scrnaseq.nf
index ef55aa45..3c014ff5 100644
--- a/workflows/scrnaseq.nf
+++ b/workflows/scrnaseq.nf
@@ -165,7 +165,7 @@ workflow SCRNASEQ {
             ch_fastq
         )
         ch_versions = ch_versions.mix(CELLRANGER_ALIGN.out.ch_versions)
-        ch_mtx_matrices = ch_mtx_matrices.mix(CELLRANGER_ALIGN.out.cellranger_out_rebuilt)
+        ch_mtx_matrices = ch_mtx_matrices.mix(CELLRANGER_ALIGN.out.cellranger_out)
     }
 
     // Run mtx to h5ad conversion subworkflow

From 9ca1ee7a4557562a4e5f3e7c75a991781a149fd7 Mon Sep 17 00:00:00 2001
From: Felipe Marques de Almeida <almeidafmarques@gmail.com>
Date: Thu, 23 Jun 2022 13:03:02 +0200
Subject: [PATCH 047/165] updating to long version parameters

---
 modules/local/concat_h5ad.nf |  6 +++---
 modules/local/mtx_to_h5ad.nf | 26 +++++++++++++-------------
 2 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/modules/local/concat_h5ad.nf b/modules/local/concat_h5ad.nf
index a201d9dd..3bcf1755 100644
--- a/modules/local/concat_h5ad.nf
+++ b/modules/local/concat_h5ad.nf
@@ -16,9 +16,9 @@ process CONCAT_H5AD {
     script:
     """
     concat_h5ad.py \\
-        -i $samplesheet \\
-        -o combined_matrix.h5ad \\
-        -s "_matrix.h5ad"
+        --input $samplesheet \\
+        --out combined_matrix.h5ad \\
+        --suffix "_matrix.h5ad"
     """
 
     stub:
diff --git a/modules/local/mtx_to_h5ad.nf b/modules/local/mtx_to_h5ad.nf
index 37321bf4..57f9c8e3 100644
--- a/modules/local/mtx_to_h5ad.nf
+++ b/modules/local/mtx_to_h5ad.nf
@@ -20,31 +20,31 @@ process MTX_TO_H5AD {
     """
     # convert file types
     cellranger_mtx_to_h5ad.py \\
-        -m filtered_feature_bc_matrix \\
-        -s ${meta.id} \\
-        -o ${meta.id}_matrix.h5ad
+        --mtx filtered_feature_bc_matrix \\
+        --sample ${meta.id} \\
+        --out ${meta.id}_matrix.h5ad
     """
 
     else if (params.aligner == 'kallisto')
     """
     # convert file types
     mtx_to_h5ad.py \\
-        -s ${meta.id} \\
-        -m *_kallistobustools_count/counts_unfiltered/*.mtx \\
-        -b *_kallistobustools_count/counts_unfiltered/*.barcodes.txt \\
-        -f *_kallistobustools_count/counts_unfiltered/*.genes.txt \\
-        -o ${meta.id}_matrix.h5ad
+        --sample ${meta.id} \\
+        --mtx *_kallistobustools_count/counts_unfiltered/*.mtx \\
+        --barcode *_kallistobustools_count/counts_unfiltered/*.barcodes.txt \\
+        --feature *_kallistobustools_count/counts_unfiltered/*.genes.txt \\
+        --out ${meta.id}_matrix.h5ad
     """
 
     else if (params.aligner == 'alevin')
     """
     # convert file types
     mtx_to_h5ad.py \\
-        -s ${meta.id} \\
-        -m *_alevin_results/alevin/quants_mat.mtx.gz \\
-        -b *_alevin_results/alevin/quants_mat_rows.txt \\
-        -f *_alevin_results/alevin/quants_mat_cols.txt \\
-        -o ${meta.id}_matrix.h5ad
+        --sample ${meta.id} \\
+        --mtx *_alevin_results/alevin/quants_mat.mtx.gz \\
+        --barcode *_alevin_results/alevin/quants_mat_rows.txt \\
+        --feature *_alevin_results/alevin/quants_mat_cols.txt \\
+        --out ${meta.id}_matrix.h5ad
     """
 
     stub:

From b1a2f2c07feec58b73cf565bed59c70a906bc37f Mon Sep 17 00:00:00 2001
From: Felipe Marques de Almeida <felipemarques89@gmail.com>
Date: Fri, 24 Jun 2022 08:19:06 +0200
Subject: [PATCH 048/165] Updating MTX conversion code (#4)

* conversion to H5AD is now a subworkflow

* Added concatenation of h5ad to alevin pipeline

* added concatenation module for kallisto

* added concatenation module for cellranger

* Simplify function to find and store h5ad files

* added sample information and metadata

* suffix not hard-coded in py script

* Update bin/concat_h5ad.py

Not having one line command as separate function

Co-authored-by: Isaac Virshup <ivirshup@gmail.com>

* Update bin/concat_h5ad.py

Co-authored-by: Gregor Sturm <mail@gregor-sturm.de>

* updated nf-core/cellranger module

* cellranger directly using meta map from channel

* updating to long version parameters

Co-authored-by: Isaac Virshup <ivirshup@gmail.com>
Co-authored-by: Gregor Sturm <mail@gregor-sturm.de>
---
 bin/cellranger_mtx_to_h5ad.py                 |  8 ++--
 bin/concat_h5ad.py                            | 43 +++++++++++++++++++
 bin/mtx_to_h5ad.py                            |  9 ++--
 conf/modules.config                           | 20 ++++++++-
 modules.json                                  |  2 +-
 modules/local/concat_h5ad.nf                  | 28 ++++++++++++
 modules/local/mtx_to_h5ad.nf                  | 25 ++++++-----
 .../nf-core/modules/cellranger/count/main.nf  |  4 +-
 subworkflows/local/alevin.nf                  | 12 +-----
 subworkflows/local/align_cellranger.nf        | 13 ------
 subworkflows/local/conversion_to_h5ad.nf      | 27 ++++++++++++
 subworkflows/local/kallisto_bustools.nf       |  7 ---
 workflows/scrnaseq.nf                         | 13 +++++-
 13 files changed, 157 insertions(+), 54 deletions(-)
 create mode 100755 bin/concat_h5ad.py
 create mode 100644 modules/local/concat_h5ad.nf
 create mode 100644 subworkflows/local/conversion_to_h5ad.nf

diff --git a/bin/cellranger_mtx_to_h5ad.py b/bin/cellranger_mtx_to_h5ad.py
index 88c189d5..40e365b6 100755
--- a/bin/cellranger_mtx_to_h5ad.py
+++ b/bin/cellranger_mtx_to_h5ad.py
@@ -1,13 +1,14 @@
-#!/usr/bin/env python3
+#!/usr/bin/env python
 import scanpy as sc
 import argparse
 
-def mtx_to_adata( mtx_dir: str, verbose: bool = False ):
+def mtx_to_adata( mtx_dir: str, sample: str, verbose: bool = False ):
 
     if verbose:
         print("Reading in {}".format(mtx_dir))
 
     adata = sc.read_10x_mtx(mtx_dir)
+    adata.obs["sample"] = sample
 
     return adata
 
@@ -18,11 +19,12 @@ def mtx_to_adata( mtx_dir: str, verbose: bool = False ):
 
     parser.add_argument("-m", "--mtx",     dest="mtx",     help="Path to mtx directory."                 )
     parser.add_argument("-v", "--verbose", dest="verbose", help="Toggle verbose messages", default=False )
+    parser.add_argument("-s", "--sample",  dest="sample",  help="Sample name"                            )
     parser.add_argument("-o", "--out",     dest="out",     help="Output path."                           )
 
     args = vars(parser.parse_args())
 
-    adata = mtx_to_adata(args["mtx"], verbose=args["verbose"])
+    adata = mtx_to_adata(args["mtx"], args["sample"], verbose=args["verbose"])
 
     adata.write_h5ad(args["out"], compression="gzip")
 
diff --git a/bin/concat_h5ad.py b/bin/concat_h5ad.py
new file mode 100755
index 00000000..1c6290e3
--- /dev/null
+++ b/bin/concat_h5ad.py
@@ -0,0 +1,43 @@
+#!/usr/bin/env python
+import scanpy as sc, anndata as ad, pandas as pd
+from pathlib import Path
+import argparse
+
+def read_samplesheet(samplesheet):
+    df = pd.read_csv(samplesheet)
+    df.set_index("sample")
+
+    # samplesheet may contain replicates, when it has,
+    # group information from replicates and collapse with commas
+    # only keep unique values using set()
+    df = df.groupby(["sample"]).agg(lambda column: ",".join(set(column)))
+
+    return(df)
+
+if __name__ == "__main__":
+
+    parser = argparse.ArgumentParser(description="Concatenates h5ad files and merge metadata from samplesheet")
+
+    parser.add_argument("-i", "--input",  dest="input",  help="Path to samplesheet.csv")
+    parser.add_argument("-o", "--out",    dest="out",    help="Output path.")
+    parser.add_argument("-s", "--suffix", dest="suffix", help="Suffix of matrices to remove and get sample name")
+
+    args = vars(parser.parse_args())
+
+    # Open samplesheet as dataframe
+    df_samplesheet = read_samplesheet(args["input"])
+
+    # find all h5ad and append to dict
+    dict_of_h5ad = {
+            str(path).replace(args["suffix"], ""): sc.read_h5ad(path)
+            for path in Path(".").rglob('*.h5ad')
+    }
+
+    # concat h5ad files
+    adata = ad.concat(dict_of_h5ad, label="sample", merge="unique")
+
+    # merge with data.frame, on sample information
+    adata.obs = adata.obs.join(df_samplesheet, on="sample")
+    adata.write_h5ad(args["out"], compression="gzip")
+
+    print("Wrote h5ad file to {}".format(args["out"]))
\ No newline at end of file
diff --git a/bin/mtx_to_h5ad.py b/bin/mtx_to_h5ad.py
index eaf91cd0..78116f58 100755
--- a/bin/mtx_to_h5ad.py
+++ b/bin/mtx_to_h5ad.py
@@ -1,14 +1,11 @@
 #!/usr/bin/env python
-import sys
-import os
 import scanpy as sc
 import pandas as pd
-import typing
 import argparse
 
 
 def mtx_to_adata(
-    mtx_file: str, barcode_file: str, feature_file: str, verbose: bool = False
+    mtx_file: str, barcode_file: str, feature_file: str, sample: str, verbose: bool = False
 ):
 
     if verbose:
@@ -17,6 +14,7 @@ def mtx_to_adata(
     adata = sc.read_mtx(mtx_file)
     adata.obs_names = pd.read_csv(barcode_file, header=None)[0].values
     adata.var_names = pd.read_csv(feature_file, header=None)[0].values
+    adata.obs["sample"] = sample
 
     return adata
 
@@ -31,12 +29,13 @@ def mtx_to_adata(
     )
     parser.add_argument("-f", "--feature", dest="feature", help="Path to feature file.")
     parser.add_argument("-b", "--barcode", dest="barcode", help="Path to barcode file.")
+    parser.add_argument("-s", "--sample", dest="sample", help="Sample name")
     parser.add_argument("-o", "--out", dest="out", help="Output path.")
 
     args = vars(parser.parse_args())
 
     adata = mtx_to_adata(
-        args["mtx"], args["barcode"], args["feature"], verbose=args["verbose"]
+        args["mtx"], args["barcode"], args["feature"], args["sample"], verbose=args["verbose"]
     )
 
     adata.write_h5ad(args["out"], compression="gzip")
diff --git a/conf/modules.config b/conf/modules.config
index 1f891688..c0fcb25b 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -57,7 +57,13 @@ if(params.aligner == "cellranger") {
         }
         withName: MTX_TO_H5AD {
             publishDir = [
-                path: { "${params.outdir}/cellranger/count/${meta.cellranger_prefix}/outs/filtered_feature_bc_matrix" },
+                path: { "${params.outdir}/cellranger/count/sample-${meta.id}/outs/filtered_feature_bc_matrix" },
+                mode: params.publish_dir_mode
+            ]
+        }
+        withName: CONCAT_H5AD {
+            publishDir = [
+                path: { "${params.outdir}/cellranger/count/concatenated_h5ad" },
                 mode: params.publish_dir_mode
             ]
         }
@@ -76,6 +82,12 @@ if (params.aligner == "alevin") {
                 mode: params.publish_dir_mode
             ]
         }
+        withName: CONCAT_H5AD {
+            publishDir = [
+                path: { "${params.outdir}/salmon/concatenated_h5ad" },
+                mode: params.publish_dir_mode
+            ]
+        }
     }
 }
 
@@ -95,5 +107,11 @@ if (params.aligner == "kallisto") {
                 mode: params.publish_dir_mode
             ]
         }
+        withName: CONCAT_H5AD {
+            publishDir = [
+                path: { "${params.outdir}/kallistobustools/concatenated_h5ad" },
+                mode: params.publish_dir_mode
+            ]
+        }
     }
 }
diff --git a/modules.json b/modules.json
index b0f8bb72..96dba9a3 100644
--- a/modules.json
+++ b/modules.json
@@ -4,7 +4,7 @@
     "repos": {
         "nf-core/modules": {
             "cellranger/count": {
-                "git_sha": "8032833e6e0fd3edeb20d4121885112249f273c4"
+                "git_sha": "e1a3ae6bf5afce6b23f580e3225e7bdbe1388cd0"
             },
             "cellranger/mkgtf": {
                 "git_sha": "e499abe572bc7c1322dfa66b767b0e65fe8c62ca"
diff --git a/modules/local/concat_h5ad.nf b/modules/local/concat_h5ad.nf
new file mode 100644
index 00000000..3bcf1755
--- /dev/null
+++ b/modules/local/concat_h5ad.nf
@@ -0,0 +1,28 @@
+process CONCAT_H5AD {
+    label 'process_medium'
+
+    conda (params.enable_conda ? "conda-forge::scanpy conda-forge::python-igraph conda-forge::leidenalg" : null)
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/scanpy:1.7.2--pyhdfd78af_0' :
+        'quay.io/biocontainers/scanpy:1.7.2--pyhdfd78af_0' }"
+
+    input:
+    path h5ad
+    path samplesheet
+
+    output:
+    path "*.h5ad", emit: h5ad
+
+    script:
+    """
+    concat_h5ad.py \\
+        --input $samplesheet \\
+        --out combined_matrix.h5ad \\
+        --suffix "_matrix.h5ad"
+    """
+
+    stub:
+    """
+    touch combined_matrix.h5ad
+    """
+}
diff --git a/modules/local/mtx_to_h5ad.nf b/modules/local/mtx_to_h5ad.nf
index 45d86e9f..3feca494 100644
--- a/modules/local/mtx_to_h5ad.nf
+++ b/modules/local/mtx_to_h5ad.nf
@@ -20,28 +20,31 @@ process MTX_TO_H5AD {
     """
     # convert file types
     cellranger_mtx_to_h5ad.py \\
-        -m filtered_feature_bc_matrix \\
-        -o matrix.h5ad
+        --mtx filtered_feature_bc_matrix \\
+        --sample ${meta.id} \\
+        --out ${meta.id}_matrix.h5ad
     """
 
     else if (params.aligner == 'kallisto')
     """
     # convert file types
     mtx_to_h5ad.py \\
-        -m *_kallistobustools_count/counts_unfiltered/*.mtx \\
-        -b *_kallistobustools_count/counts_unfiltered/*.barcodes.txt \\
-        -f *_kallistobustools_count/counts_unfiltered/*.genes.txt \\
-        -o cells_x_genes.h5ad
+        --sample ${meta.id} \\
+        --mtx *_kallistobustools_count/counts_unfiltered/*.mtx \\
+        --barcode *_kallistobustools_count/counts_unfiltered/*.barcodes.txt \\
+        --feature *_kallistobustools_count/counts_unfiltered/*.genes.txt \\
+        --out ${meta.id}_matrix.h5ad
     """
 
     else if (params.aligner == 'alevin')
     """
     # convert file types
     mtx_to_h5ad.py \\
-        -m *_alevin_results/alevin/quants_mat.mtx.gz \\
-        -b *_alevin_results/alevin/quants_mat_rows.txt \\
-        -f *_alevin_results/alevin/quants_mat_cols.txt \\
-        -o quants_mat.h5ad
+        --sample ${meta.id} \\
+        --mtx *_alevin_results/alevin/quants_mat.mtx.gz \\
+        --barcode *_alevin_results/alevin/quants_mat_rows.txt \\
+        --feature *_alevin_results/alevin/quants_mat_cols.txt \\
+        --out ${meta.id}_matrix.h5ad
     """
 
     else if (params.aligner == 'kallisto')
@@ -70,6 +73,6 @@ process MTX_TO_H5AD {
 
     stub:
     """
-    touch matrix.h5ad
+    touch ${meta.id}_matrix.h5ad
     """
 }
diff --git a/modules/nf-core/modules/cellranger/count/main.nf b/modules/nf-core/modules/cellranger/count/main.nf
index 7413c990..84e2d921 100644
--- a/modules/nf-core/modules/cellranger/count/main.nf
+++ b/modules/nf-core/modules/cellranger/count/main.nf
@@ -12,8 +12,8 @@ process CELLRANGER_COUNT {
     path  reference
 
     output:
-    path("sample-${meta.gem}/outs/*"), emit: outs
-    path "versions.yml"              , emit: versions
+    tuple val(meta), path("sample-${meta.gem}/outs/*"), emit: outs
+    path "versions.yml"                               , emit: versions
 
     when:
     task.ext.when == null || task.ext.when
diff --git a/subworkflows/local/alevin.nf b/subworkflows/local/alevin.nf
index abaa6efc..c1b122e1 100644
--- a/subworkflows/local/alevin.nf
+++ b/subworkflows/local/alevin.nf
@@ -1,8 +1,7 @@
 /* --    IMPORT LOCAL MODULES/SUBWORKFLOWS     -- */
 include { GFFREAD_TRANSCRIPTOME }             from '../../modules/local/gffread_transcriptome'
-include { SALMON_ALEVIN }                     from '../../modules/local/salmon_alevin'
-include { ALEVINQC }                          from '../../modules/local/alevinqc'
-include {MTX_TO_H5AD     }                    from '../../modules/local/mtx_to_h5ad.nf'
+include { SALMON_ALEVIN         }             from '../../modules/local/salmon_alevin'
+include { ALEVINQC              }             from '../../modules/local/alevinqc'
 
 /* --    IMPORT NF-CORE MODULES/SUBWORKFLOWS   -- */
 include { GUNZIP }                      from '../../modules/nf-core/modules/gunzip/main'
@@ -73,13 +72,6 @@ workflow SCRNASEQ_ALEVIN {
     )
     ch_versions = ch_versions.mix(SALMON_ALEVIN.out.versions)
 
-    /*
-    * Convert matrix do h5ad
-    */
-    MTX_TO_H5AD (
-        SALMON_ALEVIN.out.alevin_results
-    )
-
     /*
     * Run alevinQC
     */
diff --git a/subworkflows/local/align_cellranger.nf b/subworkflows/local/align_cellranger.nf
index cd22c42b..744215e0 100644
--- a/subworkflows/local/align_cellranger.nf
+++ b/subworkflows/local/align_cellranger.nf
@@ -41,19 +41,6 @@ workflow CELLRANGER_ALIGN {
         )
         ch_versions = ch_versions.mix(CELLRANGER_COUNT.out.versions)
 
-        // Convert matrix do h5ad
-        MTX_TO_H5AD (
-            CELLRANGER_COUNT.out.outs.map{ inputs ->
-            meta = [:]
-            // in stub-run variable is string and not an array
-            if (workflow.stubRun) { meta.cellranger_prefix = [inputs][0].toString().tokenize('/')[-3] } 
-            else { meta.cellranger_prefix = inputs[0].toString().tokenize('/')[-3] }
-            meta.id = meta.cellranger_prefix.tokenize('-')[1]
-            
-                [ meta, inputs ]
-            }
-        )
-
     emit:
         ch_versions
         cellranger_out  = CELLRANGER_COUNT.out.outs
diff --git a/subworkflows/local/conversion_to_h5ad.nf b/subworkflows/local/conversion_to_h5ad.nf
new file mode 100644
index 00000000..67b100d5
--- /dev/null
+++ b/subworkflows/local/conversion_to_h5ad.nf
@@ -0,0 +1,27 @@
+/* --    IMPORT LOCAL MODULES/SUBWORKFLOWS     -- */
+include { MTX_TO_H5AD }             from '../../modules/local/mtx_to_h5ad.nf'
+include { CONCAT_H5AD }             from '../../modules/local/concat_h5ad.nf'
+
+workflow H5AD_CONVERSION {
+
+    take:
+    mtx_matrices
+    samplesheet
+
+    main:
+    //
+    // Convert matrix do h5ad
+    //
+    MTX_TO_H5AD (
+        mtx_matrices
+    )
+
+    //
+    // Concat sample-specific h5ad in one
+    //
+    CONCAT_H5AD (
+        MTX_TO_H5AD.out.h5ad.collect(), // gather all sample-specific files
+        samplesheet
+    )
+
+}
diff --git a/subworkflows/local/kallisto_bustools.nf b/subworkflows/local/kallisto_bustools.nf
index 3b8cd968..204852da 100644
--- a/subworkflows/local/kallisto_bustools.nf
+++ b/subworkflows/local/kallisto_bustools.nf
@@ -1,7 +1,6 @@
 /* --    IMPORT LOCAL MODULES/SUBWORKFLOWS     -- */
 include { GENE_MAP }                          from '../../modules/local/gene_map'
 include { KALLISTOBUSTOOLS_COUNT }            from '../../modules/local/kallistobustools_count'
-include {MTX_TO_H5AD     }                    from '../../modules/local/mtx_to_h5ad.nf'
 
 /* --    IMPORT NF-CORE MODULES/SUBWORKFLOWS   -- */
 include { GUNZIP }                      from '../../modules/nf-core/modules/gunzip/main'
@@ -65,12 +64,6 @@ workflow KALLISTO_BUSTOOLS {
     )
     ch_versions = ch_versions.mix(KALLISTOBUSTOOLS_COUNT.out.versions)
 
-    // Convert matrix do h5ad
-    MTX_TO_H5AD (
-        KALLISTOBUSTOOLS_COUNT.out.counts
-    )
-
-
     emit:
     ch_versions
     counts = KALLISTOBUSTOOLS_COUNT.out.counts
diff --git a/workflows/scrnaseq.nf b/workflows/scrnaseq.nf
index 3b54ced9..3c014ff5 100644
--- a/workflows/scrnaseq.nf
+++ b/workflows/scrnaseq.nf
@@ -40,6 +40,7 @@ include { KALLISTO_BUSTOOLS } from '../subworkflows/local/kallisto_bustools'
 include { SCRNASEQ_ALEVIN   } from '../subworkflows/local/alevin'
 include { STARSOLO          } from '../subworkflows/local/starsolo'
 include { CELLRANGER_ALIGN  } from "../subworkflows/local/align_cellranger"
+include { H5AD_CONVERSION   } from "../subworkflows/local/conversion_to_h5ad"
 
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -98,7 +99,8 @@ ch_cellranger_index = params.cellranger_index ? file(params.cellranger_index) :
 
 workflow SCRNASEQ {
 
-    ch_versions = Channel.empty()
+    ch_versions     = Channel.empty()
+    ch_mtx_matrices = Channel.empty()
 
     // Check input files and stage input data
     ch_fastq = INPUT_CHECK( ch_input ).reads
@@ -118,6 +120,7 @@ workflow SCRNASEQ {
             ch_fastq
         )
         ch_versions = ch_versions.mix(KALLISTO_BUSTOOLS.out.ch_versions)
+        ch_mtx_matrices = ch_mtx_matrices.mix(KALLISTO_BUSTOOLS.out.counts)
     }
 
     // Run salmon alevin pipeline
@@ -135,6 +138,7 @@ workflow SCRNASEQ {
         )
         ch_versions = ch_versions.mix(SCRNASEQ_ALEVIN.out.ch_versions)
         ch_multiqc_alevin = SCRNASEQ_ALEVIN.out.for_multiqc
+        ch_mtx_matrices = ch_mtx_matrices.mix(SCRNASEQ_ALEVIN.out.alevin_results)
     }
 
     // Run STARSolo pipeline
@@ -161,8 +165,15 @@ workflow SCRNASEQ {
             ch_fastq
         )
         ch_versions = ch_versions.mix(CELLRANGER_ALIGN.out.ch_versions)
+        ch_mtx_matrices = ch_mtx_matrices.mix(CELLRANGER_ALIGN.out.cellranger_out)
     }
 
+    // Run mtx to h5ad conversion subworkflow
+    H5AD_CONVERSION (
+        ch_mtx_matrices,
+        ch_input
+    )
+
     // collect software versions
     CUSTOM_DUMPSOFTWAREVERSIONS (
         ch_versions.unique().collectFile(name: 'collated_versions.yml')

From 81c766b3abff5c927f2aad6c2a2369c8cca6b1c4 Mon Sep 17 00:00:00 2001
From: Felipe Marques de Almeida <almeidafmarques@gmail.com>
Date: Fri, 24 Jun 2022 09:57:33 +0200
Subject: [PATCH 049/165] fixed singularity image

---
 modules/local/kallistobustools_count.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/local/kallistobustools_count.nf b/modules/local/kallistobustools_count.nf
index 382de329..7f3dabff 100644
--- a/modules/local/kallistobustools_count.nf
+++ b/modules/local/kallistobustools_count.nf
@@ -4,7 +4,7 @@ process KALLISTOBUSTOOLS_COUNT {
 
     conda (params.enable_conda ? "bioconda::kb-python=0.25.1" : null)
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/scanpy-scripts:1.1.6--pypyhdfd78af_0' :
+        'https://depot.galaxyproject.org/singularity/kb-python:0.25.1--py_0' :
         'quay.io/biocontainers/kb-python:0.25.1--py_0' }"
 
     input:

From acde6e32d14673dcc11aa854d569c6e3e8a27eca Mon Sep 17 00:00:00 2001
From: Felipe Marques de Almeida <almeidafmarques@gmail.com>
Date: Fri, 24 Jun 2022 11:05:56 +0200
Subject: [PATCH 050/165] solved existing conflicts

---
 modules/local/mtx_to_h5ad.nf | 24 ------------------------
 1 file changed, 24 deletions(-)

diff --git a/modules/local/mtx_to_h5ad.nf b/modules/local/mtx_to_h5ad.nf
index 3feca494..57f9c8e3 100644
--- a/modules/local/mtx_to_h5ad.nf
+++ b/modules/local/mtx_to_h5ad.nf
@@ -47,30 +47,6 @@ process MTX_TO_H5AD {
         --out ${meta.id}_matrix.h5ad
     """
 
-    else if (params.aligner == 'kallisto')
-    """
-    # convert file types
-    mtx_to_h5ad.py \\
-        -m *_kallistobustools_count/counts_unfiltered/*.mtx \\
-        -b *_kallistobustools_count/counts_unfiltered/*.barcodes.txt \\
-        -f *_kallistobustools_count/counts_unfiltered/*.genes.txt \\
-        -o cells_x_genes.h5ad
-
-    gzip -c cells_x_genes.h5ad > cells_x_genes.h5ad.gz
-    """
-
-    else if (params.aligner == 'alevin')
-    """
-    # convert file types
-    mtx_to_h5ad.py \\
-        -m *_alevin_results/alevin/quants_mat.mtx.gz \\
-        -b *_alevin_results/alevin/quants_mat_rows.txt \\
-        -f *_alevin_results/alevin/quants_mat_cols.txt \\
-        -o quants_mat.h5ad
-
-    gzip -c quants_mat.h5ad > quants_mat.h5ad.gz
-    """
-
     stub:
     """
     touch ${meta.id}_matrix.h5ad

From e03d606686089f412344dc4e9170db0b9858f6c9 Mon Sep 17 00:00:00 2001
From: Harshil Patel <drpatelh@users.noreply.github.com>
Date: Fri, 24 Jun 2022 10:52:30 +0100
Subject: [PATCH 051/165] Update nextflow_schema.json

---
 nextflow_schema.json | 1 +
 1 file changed, 1 insertion(+)

diff --git a/nextflow_schema.json b/nextflow_schema.json
index db3b679f..1d9b1ffb 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -14,6 +14,7 @@
             "properties": {
                 "input": {
                     "type": "string",
+                    "mimetype": "text/csv",
                     "fa_icon": "fas fa-dna",
                     "description": "Input FastQ files",
                     "help_text": "Use this to specify the location of your input FastQ files. For example:\n\n```bash\n--input 'path/to/data/sample_*_{1,2}.fastq'\n```\n\nPlease note the following requirements:\n\n1. The path must be enclosed in quotes\n2. The path must have at least one `*` wildcard character\n3. When using the pipeline with paired end data, the path must use `{1,2}` notation to specify read pairs.\n\nIf left unspecified, a default pattern is used: `data/*{1,2}.fastq.gz`"

From ef06fd882b22dc419374868707fcca6c94a02943 Mon Sep 17 00:00:00 2001
From: Felipe Marques de Almeida <almeidafmarques@gmail.com>
Date: Mon, 27 Jun 2022 13:05:46 +0200
Subject: [PATCH 052/165] added function to convert mtx to Seurat objects

---
 bin/mtx_to_seurat.R                           | 25 ++++++++++
 conf/modules.config                           |  6 +--
 modules/local/mtx_to_seurat.nf                | 46 +++++++++++++++++++
 ...onversion_to_h5ad.nf => mtx_conversion.nf} | 14 ++++--
 workflows/scrnaseq.nf                         |  4 +-
 5 files changed, 87 insertions(+), 8 deletions(-)
 create mode 100755 bin/mtx_to_seurat.R
 create mode 100644 modules/local/mtx_to_seurat.nf
 rename subworkflows/local/{conversion_to_h5ad.nf => mtx_conversion.nf} (51%)

diff --git a/bin/mtx_to_seurat.R b/bin/mtx_to_seurat.R
new file mode 100755
index 00000000..b221835d
--- /dev/null
+++ b/bin/mtx_to_seurat.R
@@ -0,0 +1,25 @@
+#!/usr/bin/env Rscript
+library(Seurat)
+
+args <- commandArgs(trailingOnly=TRUE)
+
+mtx_file     <- args[1]
+barcode_file <- args[2]
+feature_file <- args[3]
+out.file     <- args[4]
+aligner      <- args[5]
+
+if(aligner %in% c("kallisto", "alevin")) {
+  # for kallisto and alevin, the features file contains only one column and matrix needs to be transposed
+  expression.matrix <- ReadMtx(
+    mtx = mtx_file, features = feature_file, cells = barcode_file, feature.column = 1, mtx.transpose = TRUE
+  )
+} else {
+  expression.matrix <- ReadMtx(
+    mtx = mtx_file, features = feature_file, cells = barcode_file
+  )
+}
+
+seurat.object <- CreateSeuratObject(counts = expression.matrix)
+
+saveRDS(seurat.object, file = out.file)
diff --git a/conf/modules.config b/conf/modules.config
index c0fcb25b..1ddec147 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -55,7 +55,7 @@ if(params.aligner == "cellranger") {
                 mode: params.publish_dir_mode
             ]
         }
-        withName: MTX_TO_H5AD {
+        withName: 'MTX_TO_H5AD|MTX_TO_SEURAT' {
             publishDir = [
                 path: { "${params.outdir}/cellranger/count/sample-${meta.id}/outs/filtered_feature_bc_matrix" },
                 mode: params.publish_dir_mode
@@ -76,7 +76,7 @@ if (params.aligner == "alevin") {
             ext.args = "--table transcript_id,gene_id"
             ext.prefix = { "${gff.baseName}_gffread" }
         }
-        withName: MTX_TO_H5AD {
+        withName: 'MTX_TO_H5AD|MTX_TO_SEURAT' {
             publishDir = [
                 path: { "${params.outdir}/salmon/${meta.id}_alevin_results/alevin" },
                 mode: params.publish_dir_mode
@@ -101,7 +101,7 @@ if (params.aligner == "star") {
 
 if (params.aligner == "kallisto") {
     process {
-        withName: MTX_TO_H5AD {
+        withName: 'MTX_TO_H5AD|MTX_TO_SEURAT' {
             publishDir = [
                 path: { "${params.outdir}/kallistobustools/${meta.id}_kallistobustools_count/counts_unfiltered" },
                 mode: params.publish_dir_mode
diff --git a/modules/local/mtx_to_seurat.nf b/modules/local/mtx_to_seurat.nf
new file mode 100644
index 00000000..8269ce2b
--- /dev/null
+++ b/modules/local/mtx_to_seurat.nf
@@ -0,0 +1,46 @@
+process MTX_TO_SEURAT {
+    tag "$meta.id"
+    label 'process_medium'
+
+    conda (params.enable_conda ? "seurat-scripts" : null)
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'docker://satijalab/seurat:4.1.0' :
+        'satijalab/seurat:4.1.0' }"
+
+    input:
+    // inputs from cellranger nf-core module does not come in a single sample dir
+    // for each sample, the sub-folders and files come directly in array.
+    tuple val(meta), path(inputs)
+
+    output:
+    path "*.seurat", emit: h5ad
+
+    script:
+    def aligner = params.aligner
+    if (params.aligner == "cellranger") {
+        matrix   = "filtered_feature_bc_matrix/matrix.mtx.gz"
+        barcodes = "filtered_feature_bc_matrix/barcodes.tsv.gz"
+        features = "filtered_feature_bc_matrix/features.tsv.gz"
+    } else if (params.aligner == "kallisto") {
+        matrix   = "*_kallistobustools_count/counts_unfiltered/*.mtx"
+        barcodes = "*_kallistobustools_count/counts_unfiltered/*.barcodes.txt"
+        features = "*_kallistobustools_count/counts_unfiltered/*.genes.txt"
+    } else if (params.aligner == "alevin") {
+        matrix   = "*_alevin_results/alevin/quants_mat.mtx.gz"
+        barcodes = "*_alevin_results/alevin/quants_mat_rows.txt"
+        features = "*_alevin_results/alevin/quants_mat_cols.txt"
+    }
+    """
+    mtx_to_seurat.R \\
+        $matrix \\
+        $barcodes \\
+        $features \\
+        ${meta.id}_matrix.seurat \\
+        ${aligner}
+    """
+
+    stub:
+    """
+    touch ${meta.id}_matrix.h5ad
+    """
+}
diff --git a/subworkflows/local/conversion_to_h5ad.nf b/subworkflows/local/mtx_conversion.nf
similarity index 51%
rename from subworkflows/local/conversion_to_h5ad.nf
rename to subworkflows/local/mtx_conversion.nf
index 67b100d5..731842c8 100644
--- a/subworkflows/local/conversion_to_h5ad.nf
+++ b/subworkflows/local/mtx_conversion.nf
@@ -1,8 +1,9 @@
 /* --    IMPORT LOCAL MODULES/SUBWORKFLOWS     -- */
-include { MTX_TO_H5AD }             from '../../modules/local/mtx_to_h5ad.nf'
-include { CONCAT_H5AD }             from '../../modules/local/concat_h5ad.nf'
+include { MTX_TO_H5AD   }             from '../../modules/local/mtx_to_h5ad.nf'
+include { CONCAT_H5AD   }             from '../../modules/local/concat_h5ad.nf'
+include { MTX_TO_SEURAT }             from '../../modules/local/mtx_to_seurat.nf'
 
-workflow H5AD_CONVERSION {
+workflow MTX_CONVERSION {
 
     take:
     mtx_matrices
@@ -24,4 +25,11 @@ workflow H5AD_CONVERSION {
         samplesheet
     )
 
+    //
+    // Convert matrix do seurat
+    //
+    MTX_TO_SEURAT (
+        mtx_matrices
+    )
+
 }
diff --git a/workflows/scrnaseq.nf b/workflows/scrnaseq.nf
index 3c014ff5..bac21eb7 100644
--- a/workflows/scrnaseq.nf
+++ b/workflows/scrnaseq.nf
@@ -40,7 +40,7 @@ include { KALLISTO_BUSTOOLS } from '../subworkflows/local/kallisto_bustools'
 include { SCRNASEQ_ALEVIN   } from '../subworkflows/local/alevin'
 include { STARSOLO          } from '../subworkflows/local/starsolo'
 include { CELLRANGER_ALIGN  } from "../subworkflows/local/align_cellranger"
-include { H5AD_CONVERSION   } from "../subworkflows/local/conversion_to_h5ad"
+include { MTX_CONVERSION   } from "../subworkflows/local/mtx_conversion"
 
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -169,7 +169,7 @@ workflow SCRNASEQ {
     }
 
     // Run mtx to h5ad conversion subworkflow
-    H5AD_CONVERSION (
+    MTX_CONVERSION (
         ch_mtx_matrices,
         ch_input
     )

From 5a6ec1a7415075e7ea455f3db82123f482078ff1 Mon Sep 17 00:00:00 2001
From: Felipe Marques de Almeida <almeidafmarques@gmail.com>
Date: Mon, 27 Jun 2022 13:10:46 +0200
Subject: [PATCH 053/165] conversions are outputted together

---
 conf/modules.config | 48 ++++++++++++++++++++++-----------------------
 1 file changed, 24 insertions(+), 24 deletions(-)

diff --git a/conf/modules.config b/conf/modules.config
index 1ddec147..f0a48713 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -55,15 +55,15 @@ if(params.aligner == "cellranger") {
                 mode: params.publish_dir_mode
             ]
         }
-        withName: 'MTX_TO_H5AD|MTX_TO_SEURAT' {
+        // withName: 'MTX_TO_H5AD|MTX_TO_SEURAT' {
+        //     publishDir = [
+        //         path: { "${params.outdir}/cellranger/count/sample-${meta.id}/outs/filtered_feature_bc_matrix" },
+        //         mode: params.publish_dir_mode
+        //     ]
+        // }
+        withName: 'CONCAT_H5AD|MTX_TO_H5AD|MTX_TO_SEURAT' {
             publishDir = [
-                path: { "${params.outdir}/cellranger/count/sample-${meta.id}/outs/filtered_feature_bc_matrix" },
-                mode: params.publish_dir_mode
-            ]
-        }
-        withName: CONCAT_H5AD {
-            publishDir = [
-                path: { "${params.outdir}/cellranger/count/concatenated_h5ad" },
+                path: { "${params.outdir}/cellranger/count/h5ad_conversions" },
                 mode: params.publish_dir_mode
             ]
         }
@@ -76,15 +76,15 @@ if (params.aligner == "alevin") {
             ext.args = "--table transcript_id,gene_id"
             ext.prefix = { "${gff.baseName}_gffread" }
         }
-        withName: 'MTX_TO_H5AD|MTX_TO_SEURAT' {
-            publishDir = [
-                path: { "${params.outdir}/salmon/${meta.id}_alevin_results/alevin" },
-                mode: params.publish_dir_mode
-            ]
-        }
-        withName: CONCAT_H5AD {
+        // withName: 'MTX_TO_H5AD|MTX_TO_SEURAT' {
+        //     publishDir = [
+        //         path: { "${params.outdir}/salmon/${meta.id}_alevin_results/alevin" },
+        //         mode: params.publish_dir_mode
+        //     ]
+        // }
+        withName: 'CONCAT_H5AD|MTX_TO_H5AD|MTX_TO_SEURAT' {
             publishDir = [
-                path: { "${params.outdir}/salmon/concatenated_h5ad" },
+                path: { "${params.outdir}/salmon/h5ad_conversions" },
                 mode: params.publish_dir_mode
             ]
         }
@@ -101,15 +101,15 @@ if (params.aligner == "star") {
 
 if (params.aligner == "kallisto") {
     process {
-        withName: 'MTX_TO_H5AD|MTX_TO_SEURAT' {
-            publishDir = [
-                path: { "${params.outdir}/kallistobustools/${meta.id}_kallistobustools_count/counts_unfiltered" },
-                mode: params.publish_dir_mode
-            ]
-        }
-        withName: CONCAT_H5AD {
+        // withName: 'MTX_TO_H5AD|MTX_TO_SEURAT' {
+        //     publishDir = [
+        //         path: { "${params.outdir}/kallistobustools/${meta.id}_kallistobustools_count/counts_unfiltered" },
+        //         mode: params.publish_dir_mode
+        //     ]
+        // }
+        withName: 'CONCAT_H5AD|MTX_TO_H5AD|MTX_TO_SEURAT' {
             publishDir = [
-                path: { "${params.outdir}/kallistobustools/concatenated_h5ad" },
+                path: { "${params.outdir}/kallistobustools/h5ad_conversions" },
                 mode: params.publish_dir_mode
             ]
         }

From 3d079f880cdcc1c3d26b5f1aabf7e4c718265ed7 Mon Sep 17 00:00:00 2001
From: Felipe Marques de Almeida <almeidafmarques@gmail.com>
Date: Mon, 27 Jun 2022 13:14:40 +0200
Subject: [PATCH 054/165] Update mtx_to_seurat.R

fixing linting
---
 bin/mtx_to_seurat.R | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/bin/mtx_to_seurat.R b/bin/mtx_to_seurat.R
index b221835d..f4ef6b8e 100755
--- a/bin/mtx_to_seurat.R
+++ b/bin/mtx_to_seurat.R
@@ -10,14 +10,14 @@ out.file     <- args[4]
 aligner      <- args[5]
 
 if(aligner %in% c("kallisto", "alevin")) {
-  # for kallisto and alevin, the features file contains only one column and matrix needs to be transposed
-  expression.matrix <- ReadMtx(
-    mtx = mtx_file, features = feature_file, cells = barcode_file, feature.column = 1, mtx.transpose = TRUE
-  )
+    # for kallisto and alevin, the features file contains only one column and matrix needs to be transposed
+    expression.matrix <- ReadMtx(
+        mtx = mtx_file, features = feature_file, cells = barcode_file, feature.column = 1, mtx.transpose = TRUE
+    )
 } else {
-  expression.matrix <- ReadMtx(
-    mtx = mtx_file, features = feature_file, cells = barcode_file
-  )
+    expression.matrix <- ReadMtx(
+        mtx = mtx_file, features = feature_file, cells = barcode_file
+    )
 }
 
 seurat.object <- CreateSeuratObject(counts = expression.matrix)

From 017141708ae2ff306a60957acc605f9d1bda8626 Mon Sep 17 00:00:00 2001
From: Felipe Marques de Almeida <almeidafmarques@gmail.com>
Date: Mon, 27 Jun 2022 18:05:00 +0200
Subject: [PATCH 055/165] fixing stub definition

---
 modules/local/mtx_to_seurat.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/local/mtx_to_seurat.nf b/modules/local/mtx_to_seurat.nf
index 8269ce2b..9ffeb883 100644
--- a/modules/local/mtx_to_seurat.nf
+++ b/modules/local/mtx_to_seurat.nf
@@ -41,6 +41,6 @@ process MTX_TO_SEURAT {
 
     stub:
     """
-    touch ${meta.id}_matrix.h5ad
+    touch ${meta.id}_matrix.seurat
     """
 }

From 1ee8d3e03a9f4d5edb20509481a27430f8b259b7 Mon Sep 17 00:00:00 2001
From: Felipe Marques de Almeida <almeidafmarques@gmail.com>
Date: Wed, 29 Jun 2022 11:19:43 +0200
Subject: [PATCH 056/165] cellranger conversion reads .h5 file

---
 bin/cellranger_mtx_to_h5ad.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/bin/cellranger_mtx_to_h5ad.py b/bin/cellranger_mtx_to_h5ad.py
index 40e365b6..e8eb5b23 100755
--- a/bin/cellranger_mtx_to_h5ad.py
+++ b/bin/cellranger_mtx_to_h5ad.py
@@ -2,12 +2,14 @@
 import scanpy as sc
 import argparse
 
-def mtx_to_adata( mtx_dir: str, sample: str, verbose: bool = False ):
+def mtx_to_adata( mtx_h5: str, sample: str, verbose: bool = False ):
 
     if verbose:
-        print("Reading in {}".format(mtx_dir))
+        print("Reading in {}".format(mtx_h5))
 
-    adata = sc.read_10x_mtx(mtx_dir)
+    adata = sc.read_10x_h5(mtx_h5)
+    adata.var["gene_symbols"] = adata.var_names
+    adata.var.set_index("gene_ids", inplace=True)
     adata.obs["sample"] = sample
 
     return adata
@@ -17,7 +19,7 @@ def mtx_to_adata( mtx_dir: str, sample: str, verbose: bool = False ):
 
     parser = argparse.ArgumentParser(description="Converts mtx output to h5ad.")
 
-    parser.add_argument("-m", "--mtx",     dest="mtx",     help="Path to mtx directory."                 )
+    parser.add_argument("-m", "--mtx",     dest="mtx",     help="Path to mtx h5 file."                   )
     parser.add_argument("-v", "--verbose", dest="verbose", help="Toggle verbose messages", default=False )
     parser.add_argument("-s", "--sample",  dest="sample",  help="Sample name"                            )
     parser.add_argument("-o", "--out",     dest="out",     help="Output path."                           )

From 73bb8f8368e893c42e3bfd74b06acbe21f8719a8 Mon Sep 17 00:00:00 2001
From: Felipe Marques de Almeida <almeidafmarques@gmail.com>
Date: Wed, 29 Jun 2022 12:29:35 +0200
Subject: [PATCH 057/165] added "index_unique" argument

---
 bin/concat_h5ad.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bin/concat_h5ad.py b/bin/concat_h5ad.py
index 1c6290e3..29d0037a 100755
--- a/bin/concat_h5ad.py
+++ b/bin/concat_h5ad.py
@@ -34,7 +34,7 @@ def read_samplesheet(samplesheet):
     }
 
     # concat h5ad files
-    adata = ad.concat(dict_of_h5ad, label="sample", merge="unique")
+    adata = ad.concat(dict_of_h5ad, label="sample", merge="unique", index_unique="_")
 
     # merge with data.frame, on sample information
     adata.obs = adata.obs.join(df_samplesheet, on="sample")

From 88289cca2fea04aed10fe00786ade69c83202613 Mon Sep 17 00:00:00 2001
From: Felipe Marques de Almeida <almeidafmarques@gmail.com>
Date: Wed, 29 Jun 2022 12:34:07 +0200
Subject: [PATCH 058/165] added an option to convert star count matrix

---
 bin/mtx_to_h5ad.py             |  7 ++++--
 modules/local/mtx_to_h5ad.nf   | 40 ++++++++++++++++++++--------------
 modules/local/star_align.nf    |  1 +
 subworkflows/local/starsolo.nf |  1 +
 workflows/scrnaseq.nf          |  1 +
 5 files changed, 32 insertions(+), 18 deletions(-)

diff --git a/bin/mtx_to_h5ad.py b/bin/mtx_to_h5ad.py
index 78116f58..96384e37 100755
--- a/bin/mtx_to_h5ad.py
+++ b/bin/mtx_to_h5ad.py
@@ -5,13 +5,15 @@
 
 
 def mtx_to_adata(
-    mtx_file: str, barcode_file: str, feature_file: str, sample: str, verbose: bool = False
+    mtx_file: str, barcode_file: str, feature_file: str, sample: str, aligner: str, verbose: bool = False
 ):
 
     if verbose:
         print("Reading in {}".format(mtx_file))
 
     adata = sc.read_mtx(mtx_file)
+    if aligner == 'star': # for some reason star matrix comes transposed and doesn't fit when values are appended directly
+        adata = adata.transpose()
     adata.obs_names = pd.read_csv(barcode_file, header=None)[0].values
     adata.var_names = pd.read_csv(feature_file, header=None)[0].values
     adata.obs["sample"] = sample
@@ -31,11 +33,12 @@ def mtx_to_adata(
     parser.add_argument("-b", "--barcode", dest="barcode", help="Path to barcode file.")
     parser.add_argument("-s", "--sample", dest="sample", help="Sample name")
     parser.add_argument("-o", "--out", dest="out", help="Output path.")
+    parser.add_argument("-a", "--aligner", dest="aligner", help="Which aligner has been used?")
 
     args = vars(parser.parse_args())
 
     adata = mtx_to_adata(
-        args["mtx"], args["barcode"], args["feature"], args["sample"], verbose=args["verbose"]
+        args["mtx"], args["barcode"], args["feature"], args["sample"], args["aligner"],verbose=args["verbose"]
     )
 
     adata.write_h5ad(args["out"], compression="gzip")
diff --git a/modules/local/mtx_to_h5ad.nf b/modules/local/mtx_to_h5ad.nf
index 57f9c8e3..c0c24890 100644
--- a/modules/local/mtx_to_h5ad.nf
+++ b/modules/local/mtx_to_h5ad.nf
@@ -16,34 +16,42 @@ process MTX_TO_H5AD {
     path "*.h5ad", emit: h5ad
 
     script:
+    // def file paths for aligners (except cellranger)
+    if (params.aligner == 'kallisto') {
+        mtx_matrix   = "*_kallistobustools_count/counts_unfiltered/*.mtx"
+        barcodes_tsv = "*_kallistobustools_count/counts_unfiltered/*.barcodes.txt"
+        features_tsv = "*_kallistobustools_count/counts_unfiltered/*.genes.txt"
+    } else if (params.aligner == 'alevin') {
+        mtx_matrix   = "*_alevin_results/alevin/quants_mat.mtx.gz"
+        barcodes_tsv = "*_alevin_results/alevin/quants_mat_rows.txt"
+        features_tsv = "*_alevin_results/alevin/quants_mat_cols.txt"
+    } else if (params.aligner == 'star') {
+        mtx_matrix   = "*.Solo.out/Gene/filtered/matrix.mtx"
+        barcodes_tsv = "*.Solo.out/Gene/filtered/barcodes.tsv"
+        features_tsv = "*.Solo.out/Gene/filtered/features.tsv"
+    }
+
+    //
+    // run script
+    //
     if (params.aligner == 'cellranger')
     """
     # convert file types
     cellranger_mtx_to_h5ad.py \\
-        --mtx filtered_feature_bc_matrix \\
-        --sample ${meta.id} \\
-        --out ${meta.id}_matrix.h5ad
-    """
-
-    else if (params.aligner == 'kallisto')
-    """
-    # convert file types
-    mtx_to_h5ad.py \\
+        --mtx filtered_feature_bc_matrix.h5 \\
         --sample ${meta.id} \\
-        --mtx *_kallistobustools_count/counts_unfiltered/*.mtx \\
-        --barcode *_kallistobustools_count/counts_unfiltered/*.barcodes.txt \\
-        --feature *_kallistobustools_count/counts_unfiltered/*.genes.txt \\
         --out ${meta.id}_matrix.h5ad
     """
 
-    else if (params.aligner == 'alevin')
+    else
     """
     # convert file types
     mtx_to_h5ad.py \\
+        --aligner ${params.aligner} \\
         --sample ${meta.id} \\
-        --mtx *_alevin_results/alevin/quants_mat.mtx.gz \\
-        --barcode *_alevin_results/alevin/quants_mat_rows.txt \\
-        --feature *_alevin_results/alevin/quants_mat_cols.txt \\
+        --mtx $mtx_matrix \\
+        --barcode $barcodes_tsv \\
+        --feature $features_tsv \\
         --out ${meta.id}_matrix.h5ad
     """
 
diff --git a/modules/local/star_align.nf b/modules/local/star_align.nf
index 75dac643..d544224f 100644
--- a/modules/local/star_align.nf
+++ b/modules/local/star_align.nf
@@ -21,6 +21,7 @@ process STAR_ALIGN {
 
     output:
     tuple val(meta), path('*d.out.bam')       , emit: bam
+    tuple val(meta), path('*.Solo.out')       , emit: counts
     tuple val(meta), path('*Log.final.out')   , emit: log_final
     tuple val(meta), path('*Log.out')         , emit: log_out
     tuple val(meta), path('*Log.progress.out'), emit: log_progress
diff --git a/subworkflows/local/starsolo.nf b/subworkflows/local/starsolo.nf
index 6a7e9c38..2c2f57dd 100644
--- a/subworkflows/local/starsolo.nf
+++ b/subworkflows/local/starsolo.nf
@@ -52,6 +52,7 @@ workflow STARSOLO {
     emit:
     ch_versions
     star_result = STAR_ALIGN.out.tab
+    star_counts = STAR_ALIGN.out.counts
     for_multiqc = STAR_ALIGN.out.log_final.collect{it[1]}.ifEmpty([])
 
 
diff --git a/workflows/scrnaseq.nf b/workflows/scrnaseq.nf
index 3c014ff5..5d2ce825 100644
--- a/workflows/scrnaseq.nf
+++ b/workflows/scrnaseq.nf
@@ -153,6 +153,7 @@ workflow SCRNASEQ {
             other_parameters
         )
         ch_versions = ch_versions.mix(STARSOLO.out.ch_versions)
+        ch_mtx_matrices = ch_mtx_matrices.mix(STARSOLO.out.star_counts)
         ch_multiqc_star = STARSOLO.out.for_multiqc
     }
 

From cc7d84079b6d6a91370f1ebfe5914211aa10833f Mon Sep 17 00:00:00 2001
From: Florian <flo.minion.info@gmail.com>
Date: Wed, 29 Jun 2022 15:06:17 +0200
Subject: [PATCH 059/165] Adding subworkflow for FastQC

---
 subworkflows/local/fastqc.nf | 45 ++++++++++++++++++++++++++++++++++++
 1 file changed, 45 insertions(+)
 create mode 100644 subworkflows/local/fastqc.nf

diff --git a/subworkflows/local/fastqc.nf b/subworkflows/local/fastqc.nf
new file mode 100644
index 00000000..bd65cf1f
--- /dev/null
+++ b/subworkflows/local/fastqc.nf
@@ -0,0 +1,45 @@
+//
+// Check input samplesheet and get read channels
+//
+
+//TODO --> add skip_fastqc to params
+
+include { FASTQC } from '../../modules/nf-core/modules/fastqc/main'
+
+workflow FASTQC_CHECK {
+  take:
+  ch_fastq
+
+  main:
+  ch_fastq
+      .map { ch -> [ ch[0], ch[1] ] }
+      .set { ch_fastq }
+
+  /*
+   * FastQ QC using FASTQC
+   */
+  fastqc_zip     = Channel.empty()
+  fastqc_html    = Channel.empty()
+  fastqc_multiqc = Channel.empty()
+  fastqc_version = Channel.empty()
+  
+  FASTQC ( ch_fastq )
+  fastqc_zip     = FASTQC.out.zip
+  fastqc_html    = FASTQC.out.html
+
+  fastqc_zip
+      .map { it -> [ it[1] ] }
+      .set { fastqc_zip_only }
+  fastqc_html
+      .map { it -> [ it[1] ] }
+      .set { fastqc_html_only }
+
+  fastqc_multiqc = fastqc_multiqc.mix( fastqc_zip_only, fastqc_html_only )
+  fastqc_version = FASTQC.out.versions
+
+  emit:
+  fastqc_zip
+  fastqc_html
+  fastqc_version
+  fastqc_multiqc
+}

From f6800bde699494ec230a0ffc54307d3f4a2b7125 Mon Sep 17 00:00:00 2001
From: Florian <flo.minion.info@gmail.com>
Date: Wed, 29 Jun 2022 15:07:00 +0200
Subject: [PATCH 060/165] Integrating fastqc into the scrnaseq pipeline and
 into multiqc

---
 modules/local/multiqc.nf | 35 ++++++++++++++++++++++-------------
 workflows/scrnaseq.nf    | 13 ++++++++++++-
 2 files changed, 34 insertions(+), 14 deletions(-)

diff --git a/modules/local/multiqc.nf b/modules/local/multiqc.nf
index 5939624f..f7e745a0 100644
--- a/modules/local/multiqc.nf
+++ b/modules/local/multiqc.nf
@@ -1,30 +1,39 @@
 process MULTIQC {
     label 'process_medium'
 
-    conda (params.enable_conda ? "bioconda::multiqc=1.10.1" : null)
+    conda (params.enable_conda ? 'bioconda::multiqc=1.11' : null)
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/multiqc:1.10.1--py_0' :
-        'quay.io/biocontainers/multiqc:1.10.1--py_0' }"
+        'https://depot.galaxyproject.org/singularity/multiqc:1.11--pyhdfd78af_0' :
+        'quay.io/biocontainers/multiqc:1.11--pyhdfd78af_0' }"
 
     input:
-    path 'multiqc_config.yaml'
-    path multiqc_custom_config
-    path software_versions
+    path ch_multiqc_config
+    path ch_multiqc_custom_config
+    path software_versions_yaml
     path workflow_summary
+    path ('fastqc/*')
     path ("STAR/*")
     path ("salmon_alevin/*")
 
     output:
-    path "*multiqc_report.html"     , emit: report
-    path "*_data"                   , emit: data
-    path "*variants_metrics_mqc.csv", optional:true, emit: csv_variants
-    path "*assembly_metrics_mqc.csv", optional:true, emit: csv_assembly
-    path "*_plots"                  , optional:true, emit: plots
+    path "*multiqc_report.html", emit: report
+    path "*_data"              , emit: data
+    path "*_plots"             , optional:true, emit: plots
+    path "versions.yml"        , emit: versions
 
     script:
-    def custom_config = params.multiqc_config ? "--config $multiqc_custom_config" : ''
     def args = task.ext.args ?: ''
+    def custom_config = params.multiqc_config ? "--config $multiqc_custom_config" : ''
     """
-    multiqc -f $args $custom_config .
+    multiqc \\
+        -f \\
+        $args \\
+        $custom_config \\
+        .
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        multiqc: \$( multiqc --version | sed -e "s/multiqc, version //g" )
+    END_VERSIONS
     """
 }
diff --git a/workflows/scrnaseq.nf b/workflows/scrnaseq.nf
index 3b54ced9..fa37596a 100644
--- a/workflows/scrnaseq.nf
+++ b/workflows/scrnaseq.nf
@@ -36,6 +36,7 @@ ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath(params.multi
 // SUBWORKFLOW: Consisting of a mix of local and nf-core/modules
 //
 include { INPUT_CHECK       } from '../subworkflows/local/input_check'
+include { FASTQC_CHECK } from '../subworkflows/local/fastqc'
 include { KALLISTO_BUSTOOLS } from '../subworkflows/local/kallisto_bustools'
 include { SCRNASEQ_ALEVIN   } from '../subworkflows/local/alevin'
 include { STARSOLO          } from '../subworkflows/local/starsolo'
@@ -105,6 +106,14 @@ workflow SCRNASEQ {
 
     ch_versions = ch_versions.mix(INPUT_CHECK.out.versions)
 
+    // Run FastQC
+    ch_multiqc_fastqc = Channel.empty()
+    if (!params.skip_fastqc){
+      FASTQC_CHECK ( ch_fastq )
+      ch_versions = ch_versions.mix(FASTQC_CHECK.out.fastqc_version.first().ifEmpty(null))
+      ch_multiqc_fastqc    = FASTQC_CHECK.out.fastqc_multiqc.ifEmpty([])
+    }
+
     // Run kallisto bustools pipeline
     if (params.aligner == "kallisto") {
         KALLISTO_BUSTOOLS(
@@ -169,15 +178,17 @@ workflow SCRNASEQ {
     )
 
     if (!params.skip_multiqc) {
+
         ch_workflow_summary = Channel.value(
             WorkflowScrnaseq.paramsSummaryMultiqc(workflow, summary_params)
         ).collectFile(name: 'workflow_summary_mqc.yaml')
 
         MULTIQC(
             ch_multiqc_config,
-            ch_multiqc_custom_config,
+            ch_multiqc_custom_config.collect().ifEmpty([]),
             CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect(),
             ch_workflow_summary,
+            ch_multiqc_fastqc.collect{it[0]}.ifEmpty([]),
             ch_multiqc_alevin,
             ch_multiqc_star
         )

From 092136e7e52624aea1604ce5664b4ec4c9db7fa5 Mon Sep 17 00:00:00 2001
From: Florian <flo.minion.info@gmail.com>
Date: Wed, 29 Jun 2022 15:07:27 +0200
Subject: [PATCH 061/165] adding skip_fastqc parameter to pipeline

---
 nextflow.config      | 3 ++-
 nextflow_schema.json | 4 ++++
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/nextflow.config b/nextflow.config
index c986adb8..09b80901 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -48,7 +48,8 @@ params {
     igenomes_base              = 's3://ngi-igenomes/igenomes'
     igenomes_ignore            = false
 
-    // MultiQC options
+    // QC and MultiQC options
+    skip_fastqc                = false
     multiqc_config             = null
     multiqc_title              = null
     max_multiqc_email_size     = '25.MB'
diff --git a/nextflow_schema.json b/nextflow_schema.json
index 1d9b1ffb..7f1091d3 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -373,6 +373,10 @@
                     "hidden": true,
                     "fa_icon": "fas fa-bacon"
                 },
+                "skip_fastqc": {
+                    "type": "boolean",
+                    "description": "Skip the FastQC reporting feature in the pipeline."
+                },                
                 "skip_multiqc": {
                     "type": "boolean",
                     "description": "Skip the MultiQC reporting feature in the pipeline."

From 8f4b34fde04d4dca4b69907e0a278f9f682f0d88 Mon Sep 17 00:00:00 2001
From: Florian <flo.minion.info@gmail.com>
Date: Wed, 29 Jun 2022 15:07:44 +0200
Subject: [PATCH 062/165] adding fastqc log to output.md

---
 docs/output.md | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/docs/output.md b/docs/output.md
index 8d6bcf5a..3ced038e 100644
--- a/docs/output.md
+++ b/docs/output.md
@@ -12,6 +12,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d
   - [:warning: Please read this documentation on the nf-core website: https://nf-co.re/scrnaseq/output](#warning-please-read-this-documentation-on-the-nf-core-website-httpsnf-corescrnaseqoutput)
   - [Introduction](#introduction)
   - [Pipeline overview](#pipeline-overview)
+  - [FastQC](#fastqc)
   - [Kallisto & Bustools Results](#kallisto--bustools-results)
   - [STARsolo](#starsolo)
   - [Salmon Alevin & AlevinQC](#salmon-alevin--alevinqc)
@@ -20,6 +21,19 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d
   - [MultiQC](#multiqc)
   - [Pipeline information](#pipeline-information)
 
+## FastQC
+
+See [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc) for details about FastQC.
+
+The pipeline analyzes the raw data and generates for each file a FastQC report. All report are collected in MultiQC.
+
+**Output directory: `results/fastqc`**
+
+- `.html`
+  - Contains the FastQC report.
+- `.zip`
+  - Contains additional information, such as individual plots, and FastQC raw data.
+
 ## Kallisto & Bustools Results
 
 See [Kallisto](https://pachterlab.github.io/kallisto/about) for details about Kallisto and [Bustools](https://bustools.github.io/) for more information on BusTools.

From 3259830153e339384c0f5bc1f9bc97330ff91955 Mon Sep 17 00:00:00 2001
From: Felipe Marques de Almeida <almeidafmarques@gmail.com>
Date: Wed, 29 Jun 2022 19:37:32 +0200
Subject: [PATCH 063/165] adding sep="\t" argument

input is a tsv and not a csv
---
 bin/mtx_to_h5ad.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/bin/mtx_to_h5ad.py b/bin/mtx_to_h5ad.py
index 96384e37..2885886e 100755
--- a/bin/mtx_to_h5ad.py
+++ b/bin/mtx_to_h5ad.py
@@ -14,8 +14,8 @@ def mtx_to_adata(
     adata = sc.read_mtx(mtx_file)
     if aligner == 'star': # for some reason star matrix comes transposed and doesn't fit when values are appended directly
         adata = adata.transpose()
-    adata.obs_names = pd.read_csv(barcode_file, header=None)[0].values
-    adata.var_names = pd.read_csv(feature_file, header=None)[0].values
+    adata.obs_names = pd.read_csv(barcode_file, header=None, sep="\t")[0].values
+    adata.var_names = pd.read_csv(feature_file, header=None, sep="\t")[0].values
     adata.obs["sample"] = sample
 
     return adata

From 893ae789a36aa83136742cc6674f0ac7c376117d Mon Sep 17 00:00:00 2001
From: nf-core-bot <core@nf-co.re>
Date: Wed, 29 Jun 2022 19:22:43 +0000
Subject: [PATCH 064/165] [automated] Fix linting with Prettier

---
 nextflow_schema.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/nextflow_schema.json b/nextflow_schema.json
index 7f1091d3..91757ae4 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -376,7 +376,7 @@
                 "skip_fastqc": {
                     "type": "boolean",
                     "description": "Skip the FastQC reporting feature in the pipeline."
-                },                
+                },
                 "skip_multiqc": {
                     "type": "boolean",
                     "description": "Skip the MultiQC reporting feature in the pipeline."

From 1c94c908706d8d317edbe85d760e88e30c051a76 Mon Sep 17 00:00:00 2001
From: Florian <flo.minion.info@gmail.com>
Date: Thu, 30 Jun 2022 10:22:01 +0200
Subject: [PATCH 065/165] Changes to fastqc.nf, cleaning up code

---
 subworkflows/local/fastqc.nf | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/subworkflows/local/fastqc.nf b/subworkflows/local/fastqc.nf
index bd65cf1f..50f55d5b 100644
--- a/subworkflows/local/fastqc.nf
+++ b/subworkflows/local/fastqc.nf
@@ -1,9 +1,6 @@
 //
 // Check input samplesheet and get read channels
 //
-
-//TODO --> add skip_fastqc to params
-
 include { FASTQC } from '../../modules/nf-core/modules/fastqc/main'
 
 workflow FASTQC_CHECK {
@@ -18,11 +15,6 @@ workflow FASTQC_CHECK {
   /*
    * FastQ QC using FASTQC
    */
-  fastqc_zip     = Channel.empty()
-  fastqc_html    = Channel.empty()
-  fastqc_multiqc = Channel.empty()
-  fastqc_version = Channel.empty()
-  
   FASTQC ( ch_fastq )
   fastqc_zip     = FASTQC.out.zip
   fastqc_html    = FASTQC.out.html
@@ -34,6 +26,7 @@ workflow FASTQC_CHECK {
       .map { it -> [ it[1] ] }
       .set { fastqc_html_only }
 
+  fastqc_multiqc = Channel.empty()
   fastqc_multiqc = fastqc_multiqc.mix( fastqc_zip_only, fastqc_html_only )
   fastqc_version = FASTQC.out.versions
 

From ab4b073dea308f450d23aec999871d7dca3b4967 Mon Sep 17 00:00:00 2001
From: Florian <flo.minion.info@gmail.com>
Date: Thu, 30 Jun 2022 10:22:39 +0200
Subject: [PATCH 066/165] Removing unnecessary code in scrnaseq.nf

---
 workflows/scrnaseq.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/workflows/scrnaseq.nf b/workflows/scrnaseq.nf
index fa37596a..8e5b9fbe 100644
--- a/workflows/scrnaseq.nf
+++ b/workflows/scrnaseq.nf
@@ -110,7 +110,7 @@ workflow SCRNASEQ {
     ch_multiqc_fastqc = Channel.empty()
     if (!params.skip_fastqc){
       FASTQC_CHECK ( ch_fastq )
-      ch_versions = ch_versions.mix(FASTQC_CHECK.out.fastqc_version.first().ifEmpty(null))
+      ch_versions = ch_versions.mix(FASTQC_CHECK.out.fastqc_version)
       ch_multiqc_fastqc    = FASTQC_CHECK.out.fastqc_multiqc.ifEmpty([])
     }
 

From 67cc34cde96b950f78ce8305ce717e66dbc96b9a Mon Sep 17 00:00:00 2001
From: Felipe Marques de Almeida <almeidafmarques@gmail.com>
Date: Thu, 30 Jun 2022 16:15:02 +0200
Subject: [PATCH 067/165] Update modules.config

---
 conf/modules.config | 58 ++++++++++++++++++++-------------------------
 1 file changed, 26 insertions(+), 32 deletions(-)

diff --git a/conf/modules.config b/conf/modules.config
index c0fcb25b..c1351b49 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -31,13 +31,19 @@ process {
             pattern: '*_versions.yml'
         ]
     }
+    withName: 'MTX_TO_H5AD|CONCAT_H5AD' {
+        publishDir = [
+            path: { "${params.outdir}/${params.aligner}/mtx_conversions" },
+            mode: params.publish_dir_mode
+        ]
+    }
 }
 
 if(params.aligner == "cellranger") {
     process {
         withName: CELLRANGER_MKGTF {
             publishDir = [
-                path: "${params.outdir}/cellranger/mkgtf",
+                path: "${params.outdir}/${params.aligner}/mkgtf",
                 mode: params.publish_dir_mode,
                 saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
             ]
@@ -45,25 +51,13 @@ if(params.aligner == "cellranger") {
         }
         withName: CELLRANGER_MKREF {
             publishDir = [
-                path: "${params.outdir}/cellranger/mkref",
+                path: "${params.outdir}/${params.aligner}/mkref",
                 mode: params.publish_dir_mode
             ]
         }
         withName: CELLRANGER_COUNT {
             publishDir = [
-                path: "${params.outdir}/cellranger/count",
-                mode: params.publish_dir_mode
-            ]
-        }
-        withName: MTX_TO_H5AD {
-            publishDir = [
-                path: { "${params.outdir}/cellranger/count/sample-${meta.id}/outs/filtered_feature_bc_matrix" },
-                mode: params.publish_dir_mode
-            ]
-        }
-        withName: CONCAT_H5AD {
-            publishDir = [
-                path: { "${params.outdir}/cellranger/count/concatenated_h5ad" },
+                path: "${params.outdir}/${params.aligner}/count",
                 mode: params.publish_dir_mode
             ]
         }
@@ -76,15 +70,9 @@ if (params.aligner == "alevin") {
             ext.args = "--table transcript_id,gene_id"
             ext.prefix = { "${gff.baseName}_gffread" }
         }
-        withName: MTX_TO_H5AD {
+        withName: 'SALMON_INDEX|SALMON_ALEVIN' {
             publishDir = [
-                path: { "${params.outdir}/salmon/${meta.id}_alevin_results/alevin" },
-                mode: params.publish_dir_mode
-            ]
-        }
-        withName: CONCAT_H5AD {
-            publishDir = [
-                path: { "${params.outdir}/salmon/concatenated_h5ad" },
+                path: { "${params.outdir}/${params.aligner}" },
                 mode: params.publish_dir_mode
             ]
         }
@@ -96,22 +84,28 @@ if (params.aligner == "star") {
         withName: STAR_ALIGN {
             ext.args = "--readFilesCommand zcat --runDirPerm All_RWX --outWigType bedGraph --twopassMode Basic --outSAMtype BAM SortedByCoordinate"
         }
-    }
-}
-
-if (params.aligner == "kallisto") {
-    process {
-        withName: MTX_TO_H5AD {
+        withName: STAR_GENOMEGENERATE {
             publishDir = [
-                path: { "${params.outdir}/kallistobustools/${meta.id}_kallistobustools_count/counts_unfiltered" },
+                path: { "${params.outdir}/${params.aligner}/genome_generate" },
                 mode: params.publish_dir_mode
             ]
         }
-        withName: CONCAT_H5AD {
+        withName: STAR_ALIGN {
             publishDir = [
-                path: { "${params.outdir}/kallistobustools/concatenated_h5ad" },
+                path: { "${params.outdir}/${params.aligner}/${meta.id}" },
                 mode: params.publish_dir_mode
             ]
         }
     }
 }
+
+if (params.aligner == 'kallisto') {
+    process {
+        withName: 'KALLISTOBUSTOOLS_REF|KALLISTOBUSTOOLS_COUNT' {
+            publishDir = [
+                path: { "${params.outdir}/${params.aligner}" },
+                mode: params.publish_dir_mode
+            ]
+        }
+    }
+}
\ No newline at end of file

From 4d008da24009e78dd52adca57ce4bd85708f6acf Mon Sep 17 00:00:00 2001
From: Felipe Marques de Almeida <almeidafmarques@gmail.com>
Date: Fri, 1 Jul 2022 11:30:24 +0200
Subject: [PATCH 068/165] add conversion info to docs

---
 conf/modules.config | 2 +-
 docs/output.md      | 9 ++++++++-
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/conf/modules.config b/conf/modules.config
index c1351b49..19b9dabf 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -108,4 +108,4 @@ if (params.aligner == 'kallisto') {
             ]
         }
     }
-}
\ No newline at end of file
+}
diff --git a/docs/output.md b/docs/output.md
index 8d6bcf5a..71a9be32 100644
--- a/docs/output.md
+++ b/docs/output.md
@@ -49,8 +49,9 @@ For details on how to load these into R and perform further downstream analysis,
 
 ## STARsolo
 
-**Output directory: `results/STAR`**
+**Output directory: `results/star`**
 
+- Files will be organized in one directory per sample
 - Contains the mapped BAM files and output metrics created by STARsolo
 
 **Output directory: `results/reference_genome`**
@@ -92,6 +93,12 @@ Cell Ranger is a set of analysis scripts that processes 10X Chromium single cell
   - When supplied with a `--fasta` genome fasta, this contains the extracted transcriptome
   - The GTF file supplied with `--gtf` is used to extract the transcriptome positions appropriately
 
+**Output directory: `results/${params.aligner}/mtx_conversions`
+
+- `*_matrix.h5ad`
+  - `.mtx` files converted to [AnnData](https://anndata.readthedocs.io/en/latest/) in `.h5ad` format, using [scanpy package](https://scanpy.readthedocs.io/en/stable/).
+  - One per sample and a single one with all samples concatenated together `combined_matrix.h5ad`
+
 ## MultiQC
 
 <details markdown="1">

From 77bffa3458ac603e73990974e40d7aa35fd5272a Mon Sep 17 00:00:00 2001
From: Felipe Marques de Almeida <almeidafmarques@gmail.com>
Date: Fri, 1 Jul 2022 11:32:30 +0200
Subject: [PATCH 069/165] Update output.md

---
 docs/output.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/output.md b/docs/output.md
index 71a9be32..d3c57caf 100644
--- a/docs/output.md
+++ b/docs/output.md
@@ -93,7 +93,7 @@ Cell Ranger is a set of analysis scripts that processes 10X Chromium single cell
   - When supplied with a `--fasta` genome fasta, this contains the extracted transcriptome
   - The GTF file supplied with `--gtf` is used to extract the transcriptome positions appropriately
 
-**Output directory: `results/${params.aligner}/mtx_conversions`
+**Output directory: `results/${params.aligner}/mtx_conversions`**
 
 - `*_matrix.h5ad`
   - `.mtx` files converted to [AnnData](https://anndata.readthedocs.io/en/latest/) in `.h5ad` format, using [scanpy package](https://scanpy.readthedocs.io/en/stable/).

From 4ccbf1adfb08f42087282565748cdb1588d338d1 Mon Sep 17 00:00:00 2001
From: Felipe Marques de Almeida <almeidafmarques@gmail.com>
Date: Mon, 4 Jul 2022 09:27:37 +0200
Subject: [PATCH 070/165] added variables for star files

---
 modules/local/mtx_to_seurat.nf | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/modules/local/mtx_to_seurat.nf b/modules/local/mtx_to_seurat.nf
index 9ffeb883..7a122834 100644
--- a/modules/local/mtx_to_seurat.nf
+++ b/modules/local/mtx_to_seurat.nf
@@ -29,7 +29,12 @@ process MTX_TO_SEURAT {
         matrix   = "*_alevin_results/alevin/quants_mat.mtx.gz"
         barcodes = "*_alevin_results/alevin/quants_mat_rows.txt"
         features = "*_alevin_results/alevin/quants_mat_cols.txt"
+    } else if (params.aligner == 'star') {
+        matrix   = "*.Solo.out/Gene/filtered/matrix.mtx"
+        barcodes = "*.Solo.out/Gene/filtered/barcodes.tsv"
+        features = "*.Solo.out/Gene/filtered/features.tsv"
     }
+
     """
     mtx_to_seurat.R \\
         $matrix \\

From 1456f452dd7744b44b23f3d9424e65665e79c0bc Mon Sep 17 00:00:00 2001
From: Felipe Marques de Almeida <felipemarques89@gmail.com>
Date: Mon, 4 Jul 2022 10:48:33 +0200
Subject: [PATCH 071/165] Add seurat conversion (#5)

* added function to convert mtx to Seurat objects

* conversions are outputted together

* Update mtx_to_seurat.R

fixing linting

* fixing stub definition

* added variables for star files
---
 bin/mtx_to_seurat.R                  | 25 ++++++++++++++
 conf/modules.config                  |  2 +-
 modules/local/mtx_to_seurat.nf       | 51 ++++++++++++++++++++++++++++
 subworkflows/local/mtx_conversion.nf | 35 +++++++++++++++++++
 workflows/scrnaseq.nf                |  4 +--
 5 files changed, 114 insertions(+), 3 deletions(-)
 create mode 100755 bin/mtx_to_seurat.R
 create mode 100644 modules/local/mtx_to_seurat.nf
 create mode 100644 subworkflows/local/mtx_conversion.nf

diff --git a/bin/mtx_to_seurat.R b/bin/mtx_to_seurat.R
new file mode 100755
index 00000000..f4ef6b8e
--- /dev/null
+++ b/bin/mtx_to_seurat.R
@@ -0,0 +1,25 @@
+#!/usr/bin/env Rscript
+library(Seurat)
+
+args <- commandArgs(trailingOnly=TRUE)
+
+mtx_file     <- args[1]
+barcode_file <- args[2]
+feature_file <- args[3]
+out.file     <- args[4]
+aligner      <- args[5]
+
+if(aligner %in% c("kallisto", "alevin")) {
+    # for kallisto and alevin, the features file contains only one column and matrix needs to be transposed
+    expression.matrix <- ReadMtx(
+        mtx = mtx_file, features = feature_file, cells = barcode_file, feature.column = 1, mtx.transpose = TRUE
+    )
+} else {
+    expression.matrix <- ReadMtx(
+        mtx = mtx_file, features = feature_file, cells = barcode_file
+    )
+}
+
+seurat.object <- CreateSeuratObject(counts = expression.matrix)
+
+saveRDS(seurat.object, file = out.file)
diff --git a/conf/modules.config b/conf/modules.config
index 19b9dabf..e75c76e8 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -31,7 +31,7 @@ process {
             pattern: '*_versions.yml'
         ]
     }
-    withName: 'MTX_TO_H5AD|CONCAT_H5AD' {
+    withName: 'MTX_TO_H5AD|CONCAT_H5AD|MTX_TO_SEURAT' {
         publishDir = [
             path: { "${params.outdir}/${params.aligner}/mtx_conversions" },
             mode: params.publish_dir_mode
diff --git a/modules/local/mtx_to_seurat.nf b/modules/local/mtx_to_seurat.nf
new file mode 100644
index 00000000..7a122834
--- /dev/null
+++ b/modules/local/mtx_to_seurat.nf
@@ -0,0 +1,51 @@
+process MTX_TO_SEURAT {
+    tag "$meta.id"
+    label 'process_medium'
+
+    conda (params.enable_conda ? "seurat-scripts" : null)
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'docker://satijalab/seurat:4.1.0' :
+        'satijalab/seurat:4.1.0' }"
+
+    input:
+    // inputs from cellranger nf-core module does not come in a single sample dir
+    // for each sample, the sub-folders and files come directly in array.
+    tuple val(meta), path(inputs)
+
+    output:
+    path "*.seurat", emit: h5ad
+
+    script:
+    def aligner = params.aligner
+    if (params.aligner == "cellranger") {
+        matrix   = "filtered_feature_bc_matrix/matrix.mtx.gz"
+        barcodes = "filtered_feature_bc_matrix/barcodes.tsv.gz"
+        features = "filtered_feature_bc_matrix/features.tsv.gz"
+    } else if (params.aligner == "kallisto") {
+        matrix   = "*_kallistobustools_count/counts_unfiltered/*.mtx"
+        barcodes = "*_kallistobustools_count/counts_unfiltered/*.barcodes.txt"
+        features = "*_kallistobustools_count/counts_unfiltered/*.genes.txt"
+    } else if (params.aligner == "alevin") {
+        matrix   = "*_alevin_results/alevin/quants_mat.mtx.gz"
+        barcodes = "*_alevin_results/alevin/quants_mat_rows.txt"
+        features = "*_alevin_results/alevin/quants_mat_cols.txt"
+    } else if (params.aligner == 'star') {
+        matrix   = "*.Solo.out/Gene/filtered/matrix.mtx"
+        barcodes = "*.Solo.out/Gene/filtered/barcodes.tsv"
+        features = "*.Solo.out/Gene/filtered/features.tsv"
+    }
+
+    """
+    mtx_to_seurat.R \\
+        $matrix \\
+        $barcodes \\
+        $features \\
+        ${meta.id}_matrix.seurat \\
+        ${aligner}
+    """
+
+    stub:
+    """
+    touch ${meta.id}_matrix.seurat
+    """
+}
diff --git a/subworkflows/local/mtx_conversion.nf b/subworkflows/local/mtx_conversion.nf
new file mode 100644
index 00000000..731842c8
--- /dev/null
+++ b/subworkflows/local/mtx_conversion.nf
@@ -0,0 +1,35 @@
+/* --    IMPORT LOCAL MODULES/SUBWORKFLOWS     -- */
+include { MTX_TO_H5AD   }             from '../../modules/local/mtx_to_h5ad.nf'
+include { CONCAT_H5AD   }             from '../../modules/local/concat_h5ad.nf'
+include { MTX_TO_SEURAT }             from '../../modules/local/mtx_to_seurat.nf'
+
+workflow MTX_CONVERSION {
+
+    take:
+    mtx_matrices
+    samplesheet
+
+    main:
+    //
+    // Convert matrix do h5ad
+    //
+    MTX_TO_H5AD (
+        mtx_matrices
+    )
+
+    //
+    // Concat sample-specific h5ad in one
+    //
+    CONCAT_H5AD (
+        MTX_TO_H5AD.out.h5ad.collect(), // gather all sample-specific files
+        samplesheet
+    )
+
+    //
+    // Convert matrix do seurat
+    //
+    MTX_TO_SEURAT (
+        mtx_matrices
+    )
+
+}
diff --git a/workflows/scrnaseq.nf b/workflows/scrnaseq.nf
index 5d2ce825..018f8b49 100644
--- a/workflows/scrnaseq.nf
+++ b/workflows/scrnaseq.nf
@@ -40,7 +40,7 @@ include { KALLISTO_BUSTOOLS } from '../subworkflows/local/kallisto_bustools'
 include { SCRNASEQ_ALEVIN   } from '../subworkflows/local/alevin'
 include { STARSOLO          } from '../subworkflows/local/starsolo'
 include { CELLRANGER_ALIGN  } from "../subworkflows/local/align_cellranger"
-include { H5AD_CONVERSION   } from "../subworkflows/local/conversion_to_h5ad"
+include { MTX_CONVERSION    } from "../subworkflows/local/mtx_conversion"
 
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -170,7 +170,7 @@ workflow SCRNASEQ {
     }
 
     // Run mtx to h5ad conversion subworkflow
-    H5AD_CONVERSION (
+    MTX_CONVERSION (
         ch_mtx_matrices,
         ch_input
     )

From 2ccdc1159600510ef34329d4afed363424ea4ea0 Mon Sep 17 00:00:00 2001
From: Felipe Marques de Almeida <almeidafmarques@gmail.com>
Date: Mon, 4 Jul 2022 15:40:14 +0200
Subject: [PATCH 072/165] fixed suffixes for seurat objects

---
 modules/local/mtx_to_seurat.nf | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/modules/local/mtx_to_seurat.nf b/modules/local/mtx_to_seurat.nf
index 7a122834..47be3ce9 100644
--- a/modules/local/mtx_to_seurat.nf
+++ b/modules/local/mtx_to_seurat.nf
@@ -13,7 +13,7 @@ process MTX_TO_SEURAT {
     tuple val(meta), path(inputs)
 
     output:
-    path "*.seurat", emit: h5ad
+    path "*.rds", emit: seuratObjects
 
     script:
     def aligner = params.aligner
@@ -40,12 +40,12 @@ process MTX_TO_SEURAT {
         $matrix \\
         $barcodes \\
         $features \\
-        ${meta.id}_matrix.seurat \\
+        ${meta.id}_matrix.rds \\
         ${aligner}
     """
 
     stub:
     """
-    touch ${meta.id}_matrix.seurat
+    touch ${meta.id}_matrix.rds
     """
 }

From a561f4b48d95ecab00869ab058d1db32d4ad5583 Mon Sep 17 00:00:00 2001
From: Alexander Peltzer <alex.peltzer@gmail.com>
Date: Tue, 5 Jul 2022 11:37:55 +0000
Subject: [PATCH 073/165] Drop local Kbtools/count, update to latest module
 version

---
 modules.json                                  |  9 ++-
 modules/local/kallistobustools_count.nf       | 52 --------------
 modules/nf-core/modules/gunzip/main.nf        | 10 +++
 .../modules/kallistobustools/count/main.nf    | 49 ++++++++++++++
 .../modules/kallistobustools/count/meta.yml   | 67 +++++++++++++++++++
 .../modules/kallistobustools/ref/main.nf      |  6 +-
 modules/nf-core/modules/multiqc/main.nf       |  4 +-
 subworkflows/local/kallisto_bustools.nf       | 13 ++--
 8 files changed, 144 insertions(+), 66 deletions(-)
 delete mode 100644 modules/local/kallistobustools_count.nf
 create mode 100644 modules/nf-core/modules/kallistobustools/count/main.nf
 create mode 100644 modules/nf-core/modules/kallistobustools/count/meta.yml

diff --git a/modules.json b/modules.json
index 96dba9a3..2c9b7639 100644
--- a/modules.json
+++ b/modules.json
@@ -22,13 +22,16 @@
                 "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
             },
             "gunzip": {
-                "git_sha": "9aadd9a6d3f5964476582319b3a1c54a3e3fe7c9"
+                "git_sha": "fa37e0662690c4ec4260dae282fbce08777503e6"
+            },
+            "kallistobustools/count": {
+                "git_sha": "ed5594bee3eb38874cb282d288bc22ab6262a73e"
             },
             "kallistobustools/ref": {
-                "git_sha": "e20e57f90b6787ac9a010a980cf6ea98bd990046"
+                "git_sha": "0f1e736212e5ae37036ac7e32b225087a8a9b154"
             },
             "multiqc": {
-                "git_sha": "e5f8924fabf4c8380f55fb7aee89fd2c268161b1"
+                "git_sha": "08376da6843b14c82d84d444784c0b3635bb7fd5"
             },
             "salmon/index": {
                 "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
diff --git a/modules/local/kallistobustools_count.nf b/modules/local/kallistobustools_count.nf
deleted file mode 100644
index 7f3dabff..00000000
--- a/modules/local/kallistobustools_count.nf
+++ /dev/null
@@ -1,52 +0,0 @@
-process KALLISTOBUSTOOLS_COUNT {
-    tag "$meta.id"
-    label 'process_medium'
-
-    conda (params.enable_conda ? "bioconda::kb-python=0.25.1" : null)
-    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/kb-python:0.25.1--py_0' :
-        'quay.io/biocontainers/kb-python:0.25.1--py_0' }"
-
-    input:
-    //
-    // Input reads are expected to come as: [ meta, [ pair1_read1, pair1_read2, pair2_read1, pair2_read2 ] ]
-    // Input array for a sample is created in the same order reads appear in samplesheet as pairs from replicates are appended to array.
-    //
-    tuple   val(meta),  path(reads)
-    path    index
-    path    t2g
-    path    t1c
-    path    t2c
-    val     use_t1c
-    val     use_t2c
-    val     sc_workflow
-    val     technology
-
-    output:
-    tuple val(meta), path ("*_kallistobustools_count*") , emit: counts
-    path  "versions.yml"                                , emit: versions
-
-    script:
-    def args = task.ext.args ?: ''
-    def prefix = task.ext.prefix ?: "${meta.id}"
-    def cdna        = use_t1c ? "-c1 $t1c" : ''
-    def introns     = use_t2c ? "-c2 $t2c" : ''
-    """
-    kb count \\
-        -t $task.cpus \\
-        -i $index \\
-        -g $t2g \\
-        $cdna \\
-        $introns \\
-        --workflow $sc_workflow \\
-        -x $technology \\
-        $args \\
-        -o ${prefix}_kallistobustools_count \\
-        ${reads.join( " " )}
-
-    cat <<-END_VERSIONS > versions.yml
-    "${task.process}":
-        kallistobustools: \$(echo \$(kb --version 2>&1) | sed 's/^.*kb_python //;s/positional arguments.*\$//')
-    END_VERSIONS
-    """
-}
diff --git a/modules/nf-core/modules/gunzip/main.nf b/modules/nf-core/modules/gunzip/main.nf
index 61bf1afa..70367049 100644
--- a/modules/nf-core/modules/gunzip/main.nf
+++ b/modules/nf-core/modules/gunzip/main.nf
@@ -31,4 +31,14 @@ process GUNZIP {
         gunzip: \$(echo \$(gunzip --version 2>&1) | sed 's/^.*(gzip) //; s/ Copyright.*\$//')
     END_VERSIONS
     """
+
+    stub:
+    gunzip = archive.toString() - '.gz'
+    """
+    touch $gunzip
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        gunzip: \$(echo \$(gunzip --version 2>&1) | sed 's/^.*(gzip) //; s/ Copyright.*\$//')
+    END_VERSIONS
+    """
 }
diff --git a/modules/nf-core/modules/kallistobustools/count/main.nf b/modules/nf-core/modules/kallistobustools/count/main.nf
new file mode 100644
index 00000000..10840da6
--- /dev/null
+++ b/modules/nf-core/modules/kallistobustools/count/main.nf
@@ -0,0 +1,49 @@
+process KALLISTOBUSTOOLS_COUNT {
+    tag "$meta.id"
+    label 'process_medium'
+
+    conda (params.enable_conda ? 'bioconda::kb-python=0.27.2' : null)
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/kb-python:0.27.2--pyhdfd78af_0' :
+        'quay.io/biocontainers/kb-python:0.27.2--pyhdfd78af_0' }"
+
+    input:
+    tuple val(meta), path(reads)
+    path  index
+    path  t2g
+    path  t1c
+    path  t2c
+    val   technology
+
+    output:
+    tuple val(meta), path ("*.count"), emit: count
+    path "versions.yml"              , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args    = task.ext.args ?: ''
+    def prefix  = task.ext.prefix ?: "${meta.id}"
+    def cdna    = t1c ? "-c1 $t1c" : ''
+    def introns = t2c ? "-c2 $t2c" : ''
+    """
+    kb \\
+        count \\
+        -t $task.cpus \\
+        -i $index \\
+        -g $t2g \\
+        $cdna \\
+        $introns \\
+        -x $technology \\
+        $args \\
+        -o ${prefix}.count \\
+        ${reads.join( " " )} \\
+        -m ${task.memory.toGiga()}G
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        kallistobustools: \$(echo \$(kb --version 2>&1) | sed 's/^.*kb_python //;s/positional arguments.*\$//')
+    END_VERSIONS
+    """
+}
diff --git a/modules/nf-core/modules/kallistobustools/count/meta.yml b/modules/nf-core/modules/kallistobustools/count/meta.yml
new file mode 100644
index 00000000..911697d2
--- /dev/null
+++ b/modules/nf-core/modules/kallistobustools/count/meta.yml
@@ -0,0 +1,67 @@
+name: kallistobustools_count
+description: quantifies scRNA-seq data from fastq files using kb-python.
+keywords:
+  - scRNA-seq
+  - count
+tools:
+  - kb:
+      description: kallisto and bustools are wrapped in an easy-to-use program called kb
+      homepage: https://www.kallistobus.tools/
+      documentation: https://kb-python.readthedocs.io/en/latest/index.html
+      tool_dev_url: https://github.com/pachterlab/kb_python
+      doi: ""
+      licence: MIT License
+
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - reads:
+    type: file
+    description: |
+      List of input FastQ files of size 1 and 2 for single-end and paired-end data,
+      respectively.
+  - index:
+      type: file
+      description: kb-ref index file (.idx)
+      pattern: "*.{idx}"
+  - t2g:
+      type: file
+      description: t2g file from kallisto
+      pattern: "*t2g.txt"
+  - t1c:
+      type: file
+      description: kb ref's c1 spliced_t2c file
+      pattern: "*.{cdna_t2c.txt}"
+  - t2c:
+      type: file
+      description: kb ref's c2 unspliced_t2c file
+      pattern: "*.{introns_t2c.txt}"
+  - workflow_mode:
+      type: value
+      description: String value defining workflow to use, can be one of "standard", "lamanno", "nucleus"
+      pattern: "{standard,lamanno,nucleus,kite}"
+  - technology:
+      type: value
+      description: String value defining the sequencing technology used.
+      pattern: "{10XV1,10XV2,10XV3,CELSEQ,CELSEQ2,DROPSEQ,INDROPSV1,INDROPSV2,INDROPSV3,SCRUBSEQ,SURECELL,SMARTSEQ}"
+
+output:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test']
+  - count:
+      type: file
+      description: kb count output folder
+      pattern: "*.{count}"
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+
+authors:
+  - "@flowuenne"
diff --git a/modules/nf-core/modules/kallistobustools/ref/main.nf b/modules/nf-core/modules/kallistobustools/ref/main.nf
index 1e789615..89943ec9 100644
--- a/modules/nf-core/modules/kallistobustools/ref/main.nf
+++ b/modules/nf-core/modules/kallistobustools/ref/main.nf
@@ -2,10 +2,10 @@ process KALLISTOBUSTOOLS_REF {
     tag "$fasta"
     label 'process_medium'
 
-    conda (params.enable_conda ? 'bioconda::kb-python=0.26.3' : null)
+    conda (params.enable_conda ? 'bioconda::kb-python=0.27.2' : null)
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/kb-python:0.26.3--pyhdfd78af_0' :
-        'quay.io/biocontainers/kb-python:0.26.3--pyhdfd78af_0' }"
+        'https://depot.galaxyproject.org/singularity/kb-python:0.27.2--pyhdfd78af_0' :
+        'quay.io/biocontainers/kb-python:0.27.2--pyhdfd78af_0' }"
 
     input:
     path fasta
diff --git a/modules/nf-core/modules/multiqc/main.nf b/modules/nf-core/modules/multiqc/main.nf
index b0831b5d..3c3517bf 100644
--- a/modules/nf-core/modules/multiqc/main.nf
+++ b/modules/nf-core/modules/multiqc/main.nf
@@ -3,8 +3,8 @@ process MULTIQC {
 
     conda (params.enable_conda ? 'bioconda::multiqc=1.12' : null)
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/multiqc:1.12--pyhdfd78af_0' :
-        'quay.io/biocontainers/multiqc:1.12--pyhdfd78af_0' }"
+        'https://depot.galaxyproject.org/singularity/multiqc:1.13a--pyhdfd78af_1' :
+        'quay.io/biocontainers/multiqc:1.13a--pyhdfd78af_1' }"
 
     input:
     path  multiqc_files, stageAs: "?/*"
diff --git a/subworkflows/local/kallisto_bustools.nf b/subworkflows/local/kallisto_bustools.nf
index 204852da..2b0b7f52 100644
--- a/subworkflows/local/kallisto_bustools.nf
+++ b/subworkflows/local/kallisto_bustools.nf
@@ -1,6 +1,6 @@
 /* --    IMPORT LOCAL MODULES/SUBWORKFLOWS     -- */
 include { GENE_MAP }                          from '../../modules/local/gene_map'
-include { KALLISTOBUSTOOLS_COUNT }            from '../../modules/local/kallistobustools_count'
+include {KALLISTOBUSTOOLS_COUNT }             from '../../modules/nf-core/modules/kallistobustools/count/main'
 
 /* --    IMPORT NF-CORE MODULES/SUBWORKFLOWS   -- */
 include { GUNZIP }                      from '../../modules/nf-core/modules/gunzip/main'
@@ -46,6 +46,8 @@ workflow KALLISTO_BUSTOOLS {
         txp2gene = KALLISTOBUSTOOLS_REF.out.t2g.collect()
         kallisto_index = KALLISTOBUSTOOLS_REF.out.index.collect()
         ch_versions = ch_versions.mix(KALLISTOBUSTOOLS_REF.out.versions)
+        t1c = KALLISTOBUSTOOLS_REF.out.cdna_t2c.ifEmpty{ [] }
+        t2c = KALLISTOBUSTOOLS_REF.out.intron_t2c.ifEmpty{ [] }
     }
 
     /*
@@ -55,18 +57,17 @@ workflow KALLISTO_BUSTOOLS {
         ch_fastq,
         kallisto_index,
         txp2gene,
-        [],
-        [],
-        false,
-        false,
+        t1c,
+        t2c,
         kb_workflow,
         protocol
     )
+
     ch_versions = ch_versions.mix(KALLISTOBUSTOOLS_COUNT.out.versions)
 
     emit:
     ch_versions
-    counts = KALLISTOBUSTOOLS_COUNT.out.counts
+    counts = KALLISTOBUSTOOLS_COUNT.out.count
 
 
 }

From 9290e4c5439c3f73ce025a47e8d6c1e0b74b21b5 Mon Sep 17 00:00:00 2001
From: Alexander Peltzer <alex.peltzer@gmail.com>
Date: Tue, 5 Jul 2022 11:39:58 +0000
Subject: [PATCH 074/165] Changelog

---
 CHANGELOG.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 68229e01..78ef8355 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,6 +7,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Fixes
 
+- Fixed Kallistobustools workflow [#123](https://github.com/nf-core/scrnaseq/issues/123) by upgrading to nf-core/modules module
+
 ## v2.0.0 - 2022-06-17 "Gray Nickel Beagle"
 
 - Pipeline ported to dsl2

From aa7e256b7d545a3666386e6ec5b50eaef7059b35 Mon Sep 17 00:00:00 2001
From: Alexander Peltzer <alex.peltzer@gmail.com>
Date: Tue, 5 Jul 2022 11:42:10 +0000
Subject: [PATCH 075/165] Wrong module call

---
 subworkflows/local/kallisto_bustools.nf | 1 -
 1 file changed, 1 deletion(-)

diff --git a/subworkflows/local/kallisto_bustools.nf b/subworkflows/local/kallisto_bustools.nf
index 2b0b7f52..bc958b47 100644
--- a/subworkflows/local/kallisto_bustools.nf
+++ b/subworkflows/local/kallisto_bustools.nf
@@ -59,7 +59,6 @@ workflow KALLISTO_BUSTOOLS {
         txp2gene,
         t1c,
         t2c,
-        kb_workflow,
         protocol
     )
 

From f3ce79ce91843470f1e638e17aae3c043846e53c Mon Sep 17 00:00:00 2001
From: Alexander Peltzer <alex.peltzer@gmail.com>
Date: Tue, 5 Jul 2022 12:02:12 +0000
Subject: [PATCH 076/165] Should fix things

---
 modules/local/mtx_to_h5ad.nf | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/modules/local/mtx_to_h5ad.nf b/modules/local/mtx_to_h5ad.nf
index c0c24890..d5a7941b 100644
--- a/modules/local/mtx_to_h5ad.nf
+++ b/modules/local/mtx_to_h5ad.nf
@@ -18,9 +18,9 @@ process MTX_TO_H5AD {
     script:
     // def file paths for aligners (except cellranger)
     if (params.aligner == 'kallisto') {
-        mtx_matrix   = "*_kallistobustools_count/counts_unfiltered/*.mtx"
-        barcodes_tsv = "*_kallistobustools_count/counts_unfiltered/*.barcodes.txt"
-        features_tsv = "*_kallistobustools_count/counts_unfiltered/*.genes.txt"
+        mtx_matrix   = "*count/counts_unfiltered/*.mtx"
+        barcodes_tsv = "*count/counts_unfiltered/*.barcodes.txt"
+        features_tsv = "*count/counts_unfiltered/*.genes.txt"
     } else if (params.aligner == 'alevin') {
         mtx_matrix   = "*_alevin_results/alevin/quants_mat.mtx.gz"
         barcodes_tsv = "*_alevin_results/alevin/quants_mat_rows.txt"

From 70efe3f7b1bd4992adb5870e4f0f3f5138438f09 Mon Sep 17 00:00:00 2001
From: Felipe Marques de Almeida <felipemarques89@gmail.com>
Date: Tue, 5 Jul 2022 21:42:52 +0200
Subject: [PATCH 077/165] fixing paths after kallisto update

---
 modules/local/mtx_to_seurat.nf | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/modules/local/mtx_to_seurat.nf b/modules/local/mtx_to_seurat.nf
index 47be3ce9..9a71ac8a 100644
--- a/modules/local/mtx_to_seurat.nf
+++ b/modules/local/mtx_to_seurat.nf
@@ -22,9 +22,9 @@ process MTX_TO_SEURAT {
         barcodes = "filtered_feature_bc_matrix/barcodes.tsv.gz"
         features = "filtered_feature_bc_matrix/features.tsv.gz"
     } else if (params.aligner == "kallisto") {
-        matrix   = "*_kallistobustools_count/counts_unfiltered/*.mtx"
-        barcodes = "*_kallistobustools_count/counts_unfiltered/*.barcodes.txt"
-        features = "*_kallistobustools_count/counts_unfiltered/*.genes.txt"
+        matrix   = "*count/counts_unfiltered/*.mtx"
+        barcodes = "*count/counts_unfiltered/*.barcodes.txt"
+        features = "*count/counts_unfiltered/*.genes.txt"
     } else if (params.aligner == "alevin") {
         matrix   = "*_alevin_results/alevin/quants_mat.mtx.gz"
         barcodes = "*_alevin_results/alevin/quants_mat_rows.txt"

From 5c79f7781ce8685b9293a829060cc3ad26acfd83 Mon Sep 17 00:00:00 2001
From: Felipe Marques de Almeida <almeidafmarques@gmail.com>
Date: Tue, 12 Jul 2022 13:44:49 +0200
Subject: [PATCH 078/165] Update modules.config

adding kallisto workflow definition for kallistobustools/count module
---
 conf/modules.config | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/conf/modules.config b/conf/modules.config
index e75c76e8..e9992e7e 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -101,11 +101,18 @@ if (params.aligner == "star") {
 
 if (params.aligner == 'kallisto') {
     process {
-        withName: 'KALLISTOBUSTOOLS_REF|KALLISTOBUSTOOLS_COUNT' {
+        withName: KALLISTOBUSTOOLS_REF {
             publishDir = [
                 path: { "${params.outdir}/${params.aligner}" },
                 mode: params.publish_dir_mode
             ]
         }
+        withName: KALLISTOBUSTOOLS_COUNT {
+            publishDir = [
+                path: { "${params.outdir}/${params.aligner}" },
+                mode: params.publish_dir_mode
+            ]
+            ext.args = "--workflow ${params.kb_workflow}"
+        }
     }
 }

From fd3b49055eac6615f6c1d2c9cb5fa227bd7ab598 Mon Sep 17 00:00:00 2001
From: Felipe Marques de Almeida <almeidafmarques@gmail.com>
Date: Tue, 12 Jul 2022 15:55:15 +0200
Subject: [PATCH 079/165] Updating nf-core/modules

---
 modules.json                                           | 4 ++--
 modules/nf-core/modules/kallistobustools/count/main.nf | 3 ++-
 modules/nf-core/modules/multiqc/main.nf                | 2 +-
 3 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/modules.json b/modules.json
index 2c9b7639..b21e1449 100644
--- a/modules.json
+++ b/modules.json
@@ -25,13 +25,13 @@
                 "git_sha": "fa37e0662690c4ec4260dae282fbce08777503e6"
             },
             "kallistobustools/count": {
-                "git_sha": "ed5594bee3eb38874cb282d288bc22ab6262a73e"
+                "git_sha": "ec806cebf121767b95ad492b0d0f93dbdc2f33da"
             },
             "kallistobustools/ref": {
                 "git_sha": "0f1e736212e5ae37036ac7e32b225087a8a9b154"
             },
             "multiqc": {
-                "git_sha": "08376da6843b14c82d84d444784c0b3635bb7fd5"
+                "git_sha": "5138acca0985ca01c38a1c4fba917d83772b1106"
             },
             "salmon/index": {
                 "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
diff --git a/modules/nf-core/modules/kallistobustools/count/main.nf b/modules/nf-core/modules/kallistobustools/count/main.nf
index 10840da6..ab61597c 100644
--- a/modules/nf-core/modules/kallistobustools/count/main.nf
+++ b/modules/nf-core/modules/kallistobustools/count/main.nf
@@ -27,6 +27,7 @@ process KALLISTOBUSTOOLS_COUNT {
     def prefix  = task.ext.prefix ?: "${meta.id}"
     def cdna    = t1c ? "-c1 $t1c" : ''
     def introns = t2c ? "-c2 $t2c" : ''
+    def memory  = task.memory.toGiga() - 1
     """
     kb \\
         count \\
@@ -39,7 +40,7 @@ process KALLISTOBUSTOOLS_COUNT {
         $args \\
         -o ${prefix}.count \\
         ${reads.join( " " )} \\
-        -m ${task.memory.toGiga()}G
+        -m ${memory}G
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
diff --git a/modules/nf-core/modules/multiqc/main.nf b/modules/nf-core/modules/multiqc/main.nf
index 3c3517bf..1e7d6afe 100644
--- a/modules/nf-core/modules/multiqc/main.nf
+++ b/modules/nf-core/modules/multiqc/main.nf
@@ -1,7 +1,7 @@
 process MULTIQC {
     label 'process_medium'
 
-    conda (params.enable_conda ? 'bioconda::multiqc=1.12' : null)
+    conda (params.enable_conda ? 'bioconda::multiqc=1.13a' : null)
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
         'https://depot.galaxyproject.org/singularity/multiqc:1.13a--pyhdfd78af_1' :
         'quay.io/biocontainers/multiqc:1.13a--pyhdfd78af_1' }"

From bef2d22c451d37d461f1cf84e16ff70dcfd5efd2 Mon Sep 17 00:00:00 2001
From: Felipe Marques de Almeida <almeidafmarques@gmail.com>
Date: Wed, 13 Jul 2022 10:14:19 +0200
Subject: [PATCH 080/165] add output directive to ensure success run

---
 modules/nf-core/modules/kallistobustools/count/main.nf | 1 +
 1 file changed, 1 insertion(+)

diff --git a/modules/nf-core/modules/kallistobustools/count/main.nf b/modules/nf-core/modules/kallistobustools/count/main.nf
index ab61597c..1dde0f9d 100644
--- a/modules/nf-core/modules/kallistobustools/count/main.nf
+++ b/modules/nf-core/modules/kallistobustools/count/main.nf
@@ -18,6 +18,7 @@ process KALLISTOBUSTOOLS_COUNT {
     output:
     tuple val(meta), path ("*.count"), emit: count
     path "versions.yml"              , emit: versions
+    path "*.count/*/*.mtx" // ensure that kallisto finished and produced outputs
 
     when:
     task.ext.when == null || task.ext.when

From 8416bb1c46c5bc15e0c2c6b89a9244e68cd1b7d5 Mon Sep 17 00:00:00 2001
From: Alexander Peltzer <alex.peltzer@gmail.com>
Date: Thu, 14 Jul 2022 11:27:34 +0000
Subject: [PATCH 081/165] Small update of custom dumpsoftware versions

---
 modules.json                                                | 2 +-
 modules/nf-core/modules/custom/dumpsoftwareversions/main.nf | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/modules.json b/modules.json
index b21e1449..ed503883 100644
--- a/modules.json
+++ b/modules.json
@@ -13,7 +13,7 @@
                 "git_sha": "61d87bb2bcbcd058e1af6e5f14bb821b8ae4ed3e"
             },
             "custom/dumpsoftwareversions": {
-                "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
+                "git_sha": "682f789f93070bd047868300dd018faf3d434e7c"
             },
             "fastqc": {
                 "git_sha": "49b18b1639f4f7104187058866a8fab33332bdfe"
diff --git a/modules/nf-core/modules/custom/dumpsoftwareversions/main.nf b/modules/nf-core/modules/custom/dumpsoftwareversions/main.nf
index 327d5100..12293efc 100644
--- a/modules/nf-core/modules/custom/dumpsoftwareversions/main.nf
+++ b/modules/nf-core/modules/custom/dumpsoftwareversions/main.nf
@@ -2,10 +2,10 @@ process CUSTOM_DUMPSOFTWAREVERSIONS {
     label 'process_low'
 
     // Requires `pyyaml` which does not have a dedicated container but is in the MultiQC container
-    conda (params.enable_conda ? "bioconda::multiqc=1.11" : null)
+    conda (params.enable_conda ? "bioconda::multiqc=1.12" : null)
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/multiqc:1.11--pyhdfd78af_0' :
-        'quay.io/biocontainers/multiqc:1.11--pyhdfd78af_0' }"
+        'https://depot.galaxyproject.org/singularity/multiqc:1.12--pyhdfd78af_0' :
+        'quay.io/biocontainers/multiqc:1.12--pyhdfd78af_0' }"
 
     input:
     path versions

From 773b615451623903032a544251913451b4f1236c Mon Sep 17 00:00:00 2001
From: Alexander Peltzer <alex.peltzer@gmail.com>
Date: Thu, 14 Jul 2022 11:37:32 +0000
Subject: [PATCH 082/165] Yeah revert module

---
 modules/nf-core/modules/kallistobustools/count/main.nf | 1 -
 1 file changed, 1 deletion(-)

diff --git a/modules/nf-core/modules/kallistobustools/count/main.nf b/modules/nf-core/modules/kallistobustools/count/main.nf
index 1dde0f9d..ab61597c 100644
--- a/modules/nf-core/modules/kallistobustools/count/main.nf
+++ b/modules/nf-core/modules/kallistobustools/count/main.nf
@@ -18,7 +18,6 @@ process KALLISTOBUSTOOLS_COUNT {
     output:
     tuple val(meta), path ("*.count"), emit: count
     path "versions.yml"              , emit: versions
-    path "*.count/*/*.mtx" // ensure that kallisto finished and produced outputs
 
     when:
     task.ext.when == null || task.ext.when

From d758d8cb25d5dfe2aca3306b9a4a39fed09b75c8 Mon Sep 17 00:00:00 2001
From: Felipe Marques de Almeida <almeidafmarques@gmail.com>
Date: Thu, 14 Jul 2022 14:29:19 +0200
Subject: [PATCH 083/165] update nf-core/kallistobustools/count module

---
 modules.json                                           | 2 +-
 modules/nf-core/modules/kallistobustools/count/main.nf | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/modules.json b/modules.json
index ed503883..5468da87 100644
--- a/modules.json
+++ b/modules.json
@@ -25,7 +25,7 @@
                 "git_sha": "fa37e0662690c4ec4260dae282fbce08777503e6"
             },
             "kallistobustools/count": {
-                "git_sha": "ec806cebf121767b95ad492b0d0f93dbdc2f33da"
+                "git_sha": "013035eb5c80c9e3f37f2c89c92a1ae7925df8ea"
             },
             "kallistobustools/ref": {
                 "git_sha": "0f1e736212e5ae37036ac7e32b225087a8a9b154"
diff --git a/modules/nf-core/modules/kallistobustools/count/main.nf b/modules/nf-core/modules/kallistobustools/count/main.nf
index ab61597c..52accd31 100644
--- a/modules/nf-core/modules/kallistobustools/count/main.nf
+++ b/modules/nf-core/modules/kallistobustools/count/main.nf
@@ -18,6 +18,7 @@ process KALLISTOBUSTOOLS_COUNT {
     output:
     tuple val(meta), path ("*.count"), emit: count
     path "versions.yml"              , emit: versions
+    path "*.count/*/*.mtx"           , emit: matrix //Ensure that kallisto finished and produced outputs
 
     when:
     task.ext.when == null || task.ext.when

From 1cdd81bb85a960c3451ddeb8d0506a86921b93c6 Mon Sep 17 00:00:00 2001
From: Marcel Ribeiro-Dantas <marcel.ribeiro-dantas@curie.fr>
Date: Fri, 15 Jul 2022 11:17:12 +0200
Subject: [PATCH 084/165] Fix zenodo shield and link

---
 README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index aec34ff7..69d94fd9 100644
--- a/README.md
+++ b/README.md
@@ -3,7 +3,7 @@
 [![GitHub Actions CI Status](https://github.com/nf-core/scrnaseq/workflows/nf-core%20CI/badge.svg)](https://github.com/nf-core/scrnaseq/actions?query=workflow%3A%22nf-core+CI%22)
 [![GitHub Actions Linting Status](https://github.com/nf-core/scrnaseq/workflows/nf-core%20linting/badge.svg)](https://github.com/nf-core/scrnaseq/actions?query=workflow%3A%22nf-core+linting%22)
 [![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?logo=Amazon%20AWS)](https://nf-co.re/scrnaseq/results)
-[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8)](https://doi.org/10.5281/zenodo.XXXXXXX)
+[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.3568187-1073c8)](https://doi.org/10.5281/zenodo.3568187)
 
 [![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A521.10.3-23aa62.svg)](https://www.nextflow.io/)
 [![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?logo=anaconda)](https://docs.conda.io/en/latest/)
@@ -82,7 +82,7 @@ For further information or help, don't hesitate to get in touch on the [Slack `#
 
 ## Citations
 
-If you use nf-core/scrnaseq for your analysis, please cite it using the following doi: [10.5281/zenodo.3568187](https://doi.org/10.5281/10.5281/zenodo.3568187)
+If you use nf-core/scrnaseq for your analysis, please cite it using the following doi: [10.5281/zenodo.3568187](https://doi.org/10.5281/zenodo.3568187)
 
 The basic benchmarks that were used as motivation for incorporating the three available modular workflows can be found in [this publication](https://www.biorxiv.org/content/10.1101/673285v2).
 

From 69f2730afd32a6d0336b6dd4eb54f871f8851352 Mon Sep 17 00:00:00 2001
From: Marcel Ribeiro-Dantas <marcel.ribeiro-dantas@curie.fr>
Date: Fri, 15 Jul 2022 11:21:24 +0200
Subject: [PATCH 085/165] Remove extra slack shield

I removed the extra slack shield, and chose to stay the one that
is used in other nf-core GitHub repositories such as rnaseq and
eager.
---
 README.md | 2 --
 1 file changed, 2 deletions(-)

diff --git a/README.md b/README.md
index 69d94fd9..20f51240 100644
--- a/README.md
+++ b/README.md
@@ -15,8 +15,6 @@
 [![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?logo=twitter)](https://twitter.com/nf_core)
 [![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?logo=youtube)](https://www.youtube.com/c/nf-core)
 
-[![Join us on Slack](https://img.shields.io/badge/slack-nfcore/scrnaseq-blue.svg)](https://nfcore.slack.com/channels/scrnaseq)
-
 ## Introduction
 
 **nf-core/scrnaseq** is a bioinformatics best-practice analysis pipeline for processing 10x Genomics single-cell RNA-seq data.

From 6a276d2a4a90953d18b46571a15f2447836dccc0 Mon Sep 17 00:00:00 2001
From: Felipe Marques de Almeida <almeidafmarques@gmail.com>
Date: Thu, 21 Jul 2022 14:37:48 +0200
Subject: [PATCH 086/165] adding option to parse non-standard kallisto outputs

---
 modules/local/mtx_to_h5ad.nf             | 14 ++++++++++++
 modules/local/mtx_to_seurat.nf           | 14 ++++++++++++
 subworkflows/local/conversion_to_h5ad.nf | 27 ------------------------
 3 files changed, 28 insertions(+), 27 deletions(-)
 delete mode 100644 subworkflows/local/conversion_to_h5ad.nf

diff --git a/modules/local/mtx_to_h5ad.nf b/modules/local/mtx_to_h5ad.nf
index d5a7941b..a4916c6f 100644
--- a/modules/local/mtx_to_h5ad.nf
+++ b/modules/local/mtx_to_h5ad.nf
@@ -43,6 +43,20 @@ process MTX_TO_H5AD {
         --out ${meta.id}_matrix.h5ad
     """
 
+    if (params.aligner == 'kallisto' && params.kb_workflow != 'standard')
+    """
+    # convert file types
+    for input_type in spliced unspliced ; do
+        mtx_to_h5ad.py \\
+            --aligner ${params.aligner} \\
+            --sample ${meta.id} \\
+            --mtx *count/counts_unfiltered/\${input_type}.mtx \\
+            --barcode *count/counts_unfiltered/\${input_type}.barcodes.txt \\
+            --feature *count/counts_unfiltered/\${input_type}.genes.txt \\
+            --out ${meta.id}_\${input_type}_matrix.h5ad ;
+    done
+    """
+
     else
     """
     # convert file types
diff --git a/modules/local/mtx_to_seurat.nf b/modules/local/mtx_to_seurat.nf
index 9a71ac8a..04b3f72c 100644
--- a/modules/local/mtx_to_seurat.nf
+++ b/modules/local/mtx_to_seurat.nf
@@ -35,6 +35,20 @@ process MTX_TO_SEURAT {
         features = "*.Solo.out/Gene/filtered/features.tsv"
     }
 
+    if (params.aligner == 'kallisto' && params.kb_workflow != 'standard')
+    """
+    # convert file types
+    for input_type in spliced unspliced ; do
+        mtx_to_seurat.R \\
+            *count/counts_unfiltered/\${input_type}.mtx \\
+            *count/counts_unfiltered/\${input_type}.barcodes.txt \\
+            *count/counts_unfiltered/\${input_type}.genes.txt \\
+            ${meta.id}_\${input_type}_matrix.rds \\
+            ${aligner}
+    done
+    """
+
+    else
     """
     mtx_to_seurat.R \\
         $matrix \\
diff --git a/subworkflows/local/conversion_to_h5ad.nf b/subworkflows/local/conversion_to_h5ad.nf
deleted file mode 100644
index 67b100d5..00000000
--- a/subworkflows/local/conversion_to_h5ad.nf
+++ /dev/null
@@ -1,27 +0,0 @@
-/* --    IMPORT LOCAL MODULES/SUBWORKFLOWS     -- */
-include { MTX_TO_H5AD }             from '../../modules/local/mtx_to_h5ad.nf'
-include { CONCAT_H5AD }             from '../../modules/local/concat_h5ad.nf'
-
-workflow H5AD_CONVERSION {
-
-    take:
-    mtx_matrices
-    samplesheet
-
-    main:
-    //
-    // Convert matrix do h5ad
-    //
-    MTX_TO_H5AD (
-        mtx_matrices
-    )
-
-    //
-    // Concat sample-specific h5ad in one
-    //
-    CONCAT_H5AD (
-        MTX_TO_H5AD.out.h5ad.collect(), // gather all sample-specific files
-        samplesheet
-    )
-
-}

From 370e26881e1ed475579718da706ddb9ce028f85d Mon Sep 17 00:00:00 2001
From: RHReynolds <regina.reynolds.16@ucl.ac.uk>
Date: Fri, 29 Jul 2022 14:25:11 +0100
Subject: [PATCH 087/165] fix: STAR mtx conversion when using GeneFull

When STAR is run  with the flag `--soloFeatures GeneFull` (permits
counting of exonic and intronic reads), the output is stored in
`*.Solo.out/GeneFull/` and not `*.Solo.out/Gene`. As a result, matrix
conversion results in an error, as matrix, barcodes and features cannot
be found. This error can be fixed by adding an asterisk in the file
path provided to the mtx conversion modules.
---
 modules/local/mtx_to_h5ad.nf   | 6 +++---
 modules/local/mtx_to_seurat.nf | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/modules/local/mtx_to_h5ad.nf b/modules/local/mtx_to_h5ad.nf
index d5a7941b..41be94a6 100644
--- a/modules/local/mtx_to_h5ad.nf
+++ b/modules/local/mtx_to_h5ad.nf
@@ -26,9 +26,9 @@ process MTX_TO_H5AD {
         barcodes_tsv = "*_alevin_results/alevin/quants_mat_rows.txt"
         features_tsv = "*_alevin_results/alevin/quants_mat_cols.txt"
     } else if (params.aligner == 'star') {
-        mtx_matrix   = "*.Solo.out/Gene/filtered/matrix.mtx"
-        barcodes_tsv = "*.Solo.out/Gene/filtered/barcodes.tsv"
-        features_tsv = "*.Solo.out/Gene/filtered/features.tsv"
+        mtx_matrix   = "*.Solo.out/Gene*/filtered/matrix.mtx"
+        barcodes_tsv = "*.Solo.out/Gene*/filtered/barcodes.tsv"
+        features_tsv = "*.Solo.out/Gene*/filtered/features.tsv"
     }
 
     //
diff --git a/modules/local/mtx_to_seurat.nf b/modules/local/mtx_to_seurat.nf
index 9a71ac8a..c4a44db9 100644
--- a/modules/local/mtx_to_seurat.nf
+++ b/modules/local/mtx_to_seurat.nf
@@ -30,9 +30,9 @@ process MTX_TO_SEURAT {
         barcodes = "*_alevin_results/alevin/quants_mat_rows.txt"
         features = "*_alevin_results/alevin/quants_mat_cols.txt"
     } else if (params.aligner == 'star') {
-        matrix   = "*.Solo.out/Gene/filtered/matrix.mtx"
-        barcodes = "*.Solo.out/Gene/filtered/barcodes.tsv"
-        features = "*.Solo.out/Gene/filtered/features.tsv"
+        matrix   = "*.Solo.out/Gene*/filtered/matrix.mtx"
+        barcodes = "*.Solo.out/Gene*/filtered/barcodes.tsv"
+        features = "*.Solo.out/Gene*/filtered/features.tsv"
     }
 
     """

From 09b9f636610511a37df12bed7b55a3cc49b9ce7c Mon Sep 17 00:00:00 2001
From: RHReynolds <regina.reynolds.16@ucl.ac.uk>
Date: Fri, 29 Jul 2022 14:39:19 +0100
Subject: [PATCH 088/165] docs: update changelog

---
 CHANGELOG.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 78ef8355..54741744 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -8,6 +8,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ### Fixes
 
 - Fixed Kallistobustools workflow [#123](https://github.com/nf-core/scrnaseq/issues/123) by upgrading to nf-core/modules module
+- Fixed matrix conversion error when running STAR with --soloFeatures GeneFull [#135](https://github.com/nf-core/scrnaseq/pull/135)
 
 ## v2.0.0 - 2022-06-17 "Gray Nickel Beagle"
 

From f15d9583c4569218b96ae0874177b5b5828c55b3 Mon Sep 17 00:00:00 2001
From: RHReynolds <regina.reynolds.16@ucl.ac.uk>
Date: Fri, 29 Jul 2022 18:04:12 +0100
Subject: [PATCH 089/165] feat: gzip starsolo outputs

Gzipping outputs for file compression and downstream compatibility
with scflow (which requires zipped format, as outputted by cellranger).
---
 modules/local/mtx_to_h5ad.nf   | 6 +++---
 modules/local/mtx_to_seurat.nf | 6 +++---
 modules/local/star_align.nf    | 5 +++++
 3 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/modules/local/mtx_to_h5ad.nf b/modules/local/mtx_to_h5ad.nf
index 41be94a6..22a04191 100644
--- a/modules/local/mtx_to_h5ad.nf
+++ b/modules/local/mtx_to_h5ad.nf
@@ -26,9 +26,9 @@ process MTX_TO_H5AD {
         barcodes_tsv = "*_alevin_results/alevin/quants_mat_rows.txt"
         features_tsv = "*_alevin_results/alevin/quants_mat_cols.txt"
     } else if (params.aligner == 'star') {
-        mtx_matrix   = "*.Solo.out/Gene*/filtered/matrix.mtx"
-        barcodes_tsv = "*.Solo.out/Gene*/filtered/barcodes.tsv"
-        features_tsv = "*.Solo.out/Gene*/filtered/features.tsv"
+        mtx_matrix   = "*.Solo.out/Gene*/filtered/matrix.mtx.gz"
+        barcodes_tsv = "*.Solo.out/Gene*/filtered/barcodes.tsv.gz"
+        features_tsv = "*.Solo.out/Gene*/filtered/features.tsv.gz"
     }
 
     //
diff --git a/modules/local/mtx_to_seurat.nf b/modules/local/mtx_to_seurat.nf
index c4a44db9..33fe0830 100644
--- a/modules/local/mtx_to_seurat.nf
+++ b/modules/local/mtx_to_seurat.nf
@@ -30,9 +30,9 @@ process MTX_TO_SEURAT {
         barcodes = "*_alevin_results/alevin/quants_mat_rows.txt"
         features = "*_alevin_results/alevin/quants_mat_cols.txt"
     } else if (params.aligner == 'star') {
-        matrix   = "*.Solo.out/Gene*/filtered/matrix.mtx"
-        barcodes = "*.Solo.out/Gene*/filtered/barcodes.tsv"
-        features = "*.Solo.out/Gene*/filtered/features.tsv"
+        matrix   = "*.Solo.out/Gene*/filtered/matrix.mtx.gz"
+        barcodes = "*.Solo.out/Gene*/filtered/barcodes.tsv.gz"
+        features = "*.Solo.out/Gene*/filtered/features.tsv.gz"
     }
 
     """
diff --git a/modules/local/star_align.nf b/modules/local/star_align.nf
index d544224f..c489d0b8 100644
--- a/modules/local/star_align.nf
+++ b/modules/local/star_align.nf
@@ -69,6 +69,11 @@ process STAR_ALIGN {
         gzip ${prefix}.unmapped_2.fastq
     fi
 
+    if [ -d ${prefix}.Solo.out ]; then
+        # Backslashes still need to be escaped (https://github.com/nextflow-io/nextflow/issues/67)
+        find ${prefix}.Solo.out \\( -name "*.tsv" -o -name "*.mtx" \\) -exec gzip {} \\;
+    fi
+
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
         star: \$(STAR --version | sed -e "s/STAR_//g")

From bf5aaede10d29b03afc924fbca5b1848612b4775 Mon Sep 17 00:00:00 2001
From: Regina H Reynolds <regina.reynolds.16@ucl.ac.uk>
Date: Mon, 8 Aug 2022 18:26:40 +0100
Subject: [PATCH 090/165] fix(mtx_to_seurat): could not find ReadMtx using
 conda profile

The conda package, `seurat-scripts`, does not contain the function,
`ReadMtx()`. Thus, running the test and conda profile resulting in
the error, `"Error in ReadMtx(mtx = mtx_file, features = feature_file,
 cells = barcode_file,  : could not find function "ReadMtx"`. This
can be resolved by using the conda package, `r-seurat`.
---
 modules/local/mtx_to_seurat.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/local/mtx_to_seurat.nf b/modules/local/mtx_to_seurat.nf
index 33fe0830..e2aa8217 100644
--- a/modules/local/mtx_to_seurat.nf
+++ b/modules/local/mtx_to_seurat.nf
@@ -2,7 +2,7 @@ process MTX_TO_SEURAT {
     tag "$meta.id"
     label 'process_medium'
 
-    conda (params.enable_conda ? "seurat-scripts" : null)
+    conda (params.enable_conda ? "r-seurat" : null)
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
         'docker://satijalab/seurat:4.1.0' :
         'satijalab/seurat:4.1.0' }"

From 6f0457e5b2958e1a04c61d608bad1db03bfc246a Mon Sep 17 00:00:00 2001
From: Regina H Reynolds <regina.reynolds.16@ucl.ac.uk>
Date: Mon, 8 Aug 2022 18:37:24 +0100
Subject: [PATCH 091/165] docs: update CHANGELOG

---
 CHANGELOG.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 54741744..953bcdf0 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 - Fixed Kallistobustools workflow [#123](https://github.com/nf-core/scrnaseq/issues/123) by upgrading to nf-core/modules module
 - Fixed matrix conversion error when running STAR with --soloFeatures GeneFull [#135](https://github.com/nf-core/scrnaseq/pull/135)
+- Fixed seurat matrix conversion error when running with conda profile [#136](https://github.com/nf-core/scrnaseq/pull/136)
 
 ## v2.0.0 - 2022-06-17 "Gray Nickel Beagle"
 

From d6a5d52a38b5e1e485015ef1024e5cdba95894be Mon Sep 17 00:00:00 2001
From: Felipe Marques de Almeida <felipemarques89@gmail.com>
Date: Mon, 15 Aug 2022 10:10:16 +0000
Subject: [PATCH 092/165] added line to changelog

---
 CHANGELOG.md | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 78ef8355..af431a2d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -8,6 +8,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ### Fixes
 
 - Fixed Kallistobustools workflow [#123](https://github.com/nf-core/scrnaseq/issues/123) by upgrading to nf-core/modules module
+- Fixed Kallistobustools module [#116](https://github.com/nf-core/scrnaseq/issues/116). Module was asking the same amount of memory that was being set as limit and not taking into account the different outputs produced by kallisto standard and non-standard workflows.
+
+
 
 ## v2.0.0 - 2022-06-17 "Gray Nickel Beagle"
 

From 9fd36495dc1f28ee76f3072eeeac0948af73b4cb Mon Sep 17 00:00:00 2001
From: Felipe Marques de Almeida <almeidafmarques@gmail.com>
Date: Thu, 18 Aug 2022 11:28:59 +0200
Subject: [PATCH 093/165] added simpleaf index module

---
 conf/modules.config                           |  7 +++
 modules/local/simpleaf_index.nf               | 46 +++++++++++++++++++
 modules/nf-core/modules/salmon/index/main.nf  | 46 -------------------
 modules/nf-core/modules/salmon/index/meta.yml | 36 ---------------
 nextflow.config                               |  3 +-
 subworkflows/local/alevin.nf                  | 14 ++----
 6 files changed, 59 insertions(+), 93 deletions(-)
 create mode 100644 modules/local/simpleaf_index.nf
 delete mode 100644 modules/nf-core/modules/salmon/index/main.nf
 delete mode 100644 modules/nf-core/modules/salmon/index/meta.yml

diff --git a/conf/modules.config b/conf/modules.config
index e9992e7e..a60a4645 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -70,6 +70,13 @@ if (params.aligner == "alevin") {
             ext.args = "--table transcript_id,gene_id"
             ext.prefix = { "${gff.baseName}_gffread" }
         }
+        withName: 'SIMPLEAF_INDEX' {
+            publishDir = [
+                path: { "${params.outdir}/${params.aligner}" },
+                mode: params.publish_dir_mode
+            ]
+            ext.args = "--rlen ${params.simpleaf_rlen}"
+        }
         withName: 'SALMON_INDEX|SALMON_ALEVIN' {
             publishDir = [
                 path: { "${params.outdir}/${params.aligner}" },
diff --git a/modules/local/simpleaf_index.nf b/modules/local/simpleaf_index.nf
new file mode 100644
index 00000000..8f1edc06
--- /dev/null
+++ b/modules/local/simpleaf_index.nf
@@ -0,0 +1,46 @@
+process SIMPLEAF_INDEX {
+    tag "$transcript_gtf"
+    label "process_medium"
+
+    conda (params.enable_conda ? 'bioconda::simpleaf=0.4.0' : null)
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/simpleaf:0.4.0--h9f5acd7_0' :
+        'quay.io/biocontainers/simpleaf:0.4.0--h9f5acd7_0' }"
+
+    input:
+    path genome_fasta
+    path transcript_fasta
+    path transcript_gtf
+
+    output:
+    path "salmon"       , emit: index
+    path "versions.yml" , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    def seq_inputs = (params.transcript_fasta) ? "--refseq $transcript_fasta" : "--gtf $transcript_gtf"
+    """
+    # export required var
+    export ALEVIN_FRY_HOME=.
+
+    # prep simpleaf
+    simpleaf set-paths
+
+    # run simpleaf index
+    simpleaf \\
+        index \\
+        --threads $task.cpus \\
+        --fasta $genome_fasta \\
+        $seq_inputs \\
+        $args \\
+        -o salmon
+    
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        simpleaf: 0.4.0
+    END_VERSIONS
+    """
+}
diff --git a/modules/nf-core/modules/salmon/index/main.nf b/modules/nf-core/modules/salmon/index/main.nf
deleted file mode 100644
index 737087f9..00000000
--- a/modules/nf-core/modules/salmon/index/main.nf
+++ /dev/null
@@ -1,46 +0,0 @@
-process SALMON_INDEX {
-    tag "$transcript_fasta"
-    label "process_medium"
-
-    conda (params.enable_conda ? 'bioconda::salmon=1.5.2' : null)
-    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/salmon:1.5.2--h84f40af_0' :
-        'quay.io/biocontainers/salmon:1.5.2--h84f40af_0' }"
-
-    input:
-    path genome_fasta
-    path transcript_fasta
-
-    output:
-    path "salmon"       , emit: index
-    path "versions.yml" , emit: versions
-
-    when:
-    task.ext.when == null || task.ext.when
-
-    script:
-    def args = task.ext.args ?: ''
-    def get_decoy_ids = "grep '^>' $genome_fasta | cut -d ' ' -f 1 > decoys.txt"
-    def gentrome      = "gentrome.fa"
-    if (genome_fasta.endsWith('.gz')) {
-        get_decoy_ids = "grep '^>' <(gunzip -c $genome_fasta) | cut -d ' ' -f 1 > decoys.txt"
-        gentrome      = "gentrome.fa.gz"
-    }
-    """
-    $get_decoy_ids
-    sed -i.bak -e 's/>//g' decoys.txt
-    cat $transcript_fasta $genome_fasta > $gentrome
-
-    salmon \\
-        index \\
-        --threads $task.cpus \\
-        -t $gentrome \\
-        -d decoys.txt \\
-        $args \\
-        -i salmon
-    cat <<-END_VERSIONS > versions.yml
-    "${task.process}":
-        salmon: \$(echo \$(salmon --version) | sed -e "s/salmon //g")
-    END_VERSIONS
-    """
-}
diff --git a/modules/nf-core/modules/salmon/index/meta.yml b/modules/nf-core/modules/salmon/index/meta.yml
deleted file mode 100644
index 53c64152..00000000
--- a/modules/nf-core/modules/salmon/index/meta.yml
+++ /dev/null
@@ -1,36 +0,0 @@
-name: salmon_index
-description: Create index for salmon
-keywords:
-  - index
-  - fasta
-  - genome
-  - reference
-tools:
-  - salmon:
-      description: |
-        Salmon is a tool for wicked-fast transcript quantification from RNA-seq data
-      homepage: https://salmon.readthedocs.io/en/latest/salmon.html
-      manual: https://salmon.readthedocs.io/en/latest/salmon.html
-      doi: 10.1038/nmeth.4197
-      licence: ["GPL-3.0-or-later"]
-input:
-  - genome_fasta:
-      type: file
-      description: Fasta file of the reference genome
-  - transcriptome_fasta:
-      type: file
-      description: Fasta file of the reference transcriptome
-
-output:
-  - index:
-      type: directory
-      description: Folder containing the star index files
-      pattern: "salmon"
-  - versions:
-      type: file
-      description: File containing software versions
-      pattern: "versions.yml"
-
-authors:
-  - "@kevinmenden"
-  - "@drpatelh"
diff --git a/nextflow.config b/nextflow.config
index 09b80901..9e1b43ac 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -22,7 +22,8 @@ params {
     gtf               = null
     transcript_fasta  = null
 
-    // salmon alevin parameters
+    // salmon alevin parameters (simpleaf)
+    simpleaf_rlen     = 91
     barcode_whitelist = null
     txp2gene          = null
     salmon_index      = null
diff --git a/subworkflows/local/alevin.nf b/subworkflows/local/alevin.nf
index c1b122e1..92198123 100644
--- a/subworkflows/local/alevin.nf
+++ b/subworkflows/local/alevin.nf
@@ -2,11 +2,11 @@
 include { GFFREAD_TRANSCRIPTOME }             from '../../modules/local/gffread_transcriptome'
 include { SALMON_ALEVIN         }             from '../../modules/local/salmon_alevin'
 include { ALEVINQC              }             from '../../modules/local/alevinqc'
+include { SIMPLEAF_INDEX        }             from '../../modules/local/simpleaf_index'
 
 /* --    IMPORT NF-CORE MODULES/SUBWORKFLOWS   -- */
 include { GUNZIP }                      from '../../modules/nf-core/modules/gunzip/main'
 include { GFFREAD as GFFREAD_TXP2GENE } from '../../modules/nf-core/modules/gffread/main'
-include { SALMON_INDEX }                from '../../modules/nf-core/modules/salmon/index/main'
 
 def multiqc_report    = []
 
@@ -39,15 +39,9 @@ workflow SCRNASEQ_ALEVIN {
     * Build salmon index
     */
     if (!salmon_index) {
-        // Preprocessing - Extract transcriptome fasta from genome fasta
-        if (!transcript_fasta) {
-            GFFREAD_TRANSCRIPTOME( genome_fasta, gtf )
-            transcript_fasta = GFFREAD_TRANSCRIPTOME.out.transcriptome_extracted
-            ch_versions = ch_versions.mix(GFFREAD_TRANSCRIPTOME.out.versions)
-        }
-        SALMON_INDEX( genome_fasta, transcript_fasta )
-        salmon_index = SALMON_INDEX.out.index.collect()
-        ch_versions = ch_versions.mix(SALMON_INDEX.out.versions)
+        SIMPLEAF_INDEX( genome_fasta, transcript_fasta, gtf )
+        salmon_index = SIMPLEAF_INDEX.out.index.collect()
+        ch_versions = ch_versions.mix(SIMPLEAF_INDEX.out.versions)
     }
 
     /*

From 36f14ed995bd69107a63054e1c211def1519baa4 Mon Sep 17 00:00:00 2001
From: Felipe Marques de Almeida <almeidafmarques@gmail.com>
Date: Tue, 23 Aug 2022 08:22:38 +0200
Subject: [PATCH 094/165] FIX: add "else if" instead of if-if

---
 modules/local/mtx_to_h5ad.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/local/mtx_to_h5ad.nf b/modules/local/mtx_to_h5ad.nf
index a79e6ca4..96e0b4a3 100644
--- a/modules/local/mtx_to_h5ad.nf
+++ b/modules/local/mtx_to_h5ad.nf
@@ -43,7 +43,7 @@ process MTX_TO_H5AD {
         --out ${meta.id}_matrix.h5ad
     """
 
-    if (params.aligner == 'kallisto' && params.kb_workflow != 'standard')
+    else if (params.aligner == 'kallisto' && params.kb_workflow != 'standard')
     """
     # convert file types
     for input_type in spliced unspliced ; do

From 07136b05d5b5e9a60ef9568d495ed18dec8ef31d Mon Sep 17 00:00:00 2001
From: Felipe Marques de Almeida <almeidafmarques@gmail.com>
Date: Tue, 23 Aug 2022 15:54:31 +0200
Subject: [PATCH 095/165] add alevin quantification module with simpleaf

---
 conf/modules.config                           |  3 +-
 lib/WorkflowScrnaseq.groovy                   | 10 +--
 modules/local/simpleaf_index.nf               |  6 +-
 .../{salmon_alevin.nf => simpleaf_quant.nf}   | 44 +++++++-----
 modules/nf-core/modules/salmon/quant/main.nf  | 72 -------------------
 modules/nf-core/modules/salmon/quant/meta.yml | 56 ---------------
 subworkflows/local/alevin.nf                  | 14 ++--
 7 files changed, 45 insertions(+), 160 deletions(-)
 rename modules/local/{salmon_alevin.nf => simpleaf_quant.nf} (56%)
 delete mode 100644 modules/nf-core/modules/salmon/quant/main.nf
 delete mode 100644 modules/nf-core/modules/salmon/quant/meta.yml

diff --git a/conf/modules.config b/conf/modules.config
index a60a4645..2b05f1e7 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -77,11 +77,12 @@ if (params.aligner == "alevin") {
             ]
             ext.args = "--rlen ${params.simpleaf_rlen}"
         }
-        withName: 'SALMON_INDEX|SALMON_ALEVIN' {
+        withName: 'SIMPLEAF_QUANT' {
             publishDir = [
                 path: { "${params.outdir}/${params.aligner}" },
                 mode: params.publish_dir_mode
             ]
+            ext.args = "-r cr-like"
         }
     }
 }
diff --git a/lib/WorkflowScrnaseq.groovy b/lib/WorkflowScrnaseq.groovy
index 364b6b19..4f84555d 100755
--- a/lib/WorkflowScrnaseq.groovy
+++ b/lib/WorkflowScrnaseq.groovy
@@ -83,19 +83,19 @@ class WorkflowScrnaseq {
         if (aligner == 'alevin') {
             switch (protocol) {
                 case '10XV1':
-                    new_protocol = 'chromium'
+                    new_protocol = '10xv1'
                     chemistry = 'V1'
                     break
                 case '10XV2':
-                    new_protocol = 'chromium'
+                    new_protocol = '10xv2'
                     chemistry = 'V2'
                     break
                 case '10XV3':
-                    new_protocol = 'chromiumV3'
+                    new_protocol = '10xv3'
                     chemistry = 'V3'
                     break
-                case 'dropseq':
-                    new_protocol = 'dropseq'
+                // case 'dropseq':
+                //     new_protocol = 'dropseq'
             }
         }
 
diff --git a/modules/local/simpleaf_index.nf b/modules/local/simpleaf_index.nf
index 8f1edc06..110f3464 100644
--- a/modules/local/simpleaf_index.nf
+++ b/modules/local/simpleaf_index.nf
@@ -13,8 +13,10 @@ process SIMPLEAF_INDEX {
     path transcript_gtf
 
     output:
-    path "salmon"       , emit: index
-    path "versions.yml" , emit: versions
+    path "salmon/index"              , emit: index
+    path "salmon/ref/*_t2g_3col.tsv" , emit: transcript_tsv
+    path "versions.yml"              , emit: versions
+    path "salmon"
 
     when:
     task.ext.when == null || task.ext.when
diff --git a/modules/local/salmon_alevin.nf b/modules/local/simpleaf_quant.nf
similarity index 56%
rename from modules/local/salmon_alevin.nf
rename to modules/local/simpleaf_quant.nf
index 5ca6dd1e..ad97518f 100644
--- a/modules/local/salmon_alevin.nf
+++ b/modules/local/simpleaf_quant.nf
@@ -1,11 +1,11 @@
-process SALMON_ALEVIN {
+process SIMPLEAF_QUANT {
     tag "$meta.id"
-    label 'process_medium'
+    label 'process_high'
 
-    conda (params.enable_conda ? "bioconda::salmon=1.4.0" : null)
+    conda (params.enable_conda ? 'bioconda::simpleaf=0.4.0' : null)
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/salmon:1.4.0--h84f40af_1' :
-        'quay.io/biocontainers/salmon:1.4.0--h84f40af_1' }"
+        'https://depot.galaxyproject.org/singularity/simpleaf:0.4.0--h9f5acd7_0' :
+        'quay.io/biocontainers/simpleaf:0.4.0--h9f5acd7_0' }"
 
     input:
     //
@@ -14,6 +14,7 @@ process SALMON_ALEVIN {
     //
     tuple val(meta), path(reads)
     path index
+    path transcript_tsv
     path txp2gene
     val protocol
     path whitelist
@@ -29,19 +30,26 @@ process SALMON_ALEVIN {
     // separate forward from reverse pairs
     def (forward, reverse) = reads.collate(2).transpose()
     """
-    salmon alevin \\
-        -l ISR \\
-        -p $task.cpus \\
-        -1 ${forward.join( " " )} \\
-        -2 ${reverse.join( " " )} \\
-        --${protocol} \\
-        -i $index \\
-        --tgMap $txp2gene \\
-        --dumpFeatures --dumpMtx \\
-        $args \\
-        -o ${prefix}_alevin_results
-
-    gzip -cdf ${whitelist} > ${prefix}_alevin_results/alevin/whitelist.txt
+    # export required var
+    export ALEVIN_FRY_HOME=.
+
+    # prep simpleaf
+    simpleaf set-paths
+
+    # run simpleaf quant
+    gzip -dcf $whitelist > whitelist.txt
+    simpleaf quant \\
+        -1 ${forward.join( "," )} \\
+        -2 ${reverse.join( "," )} \\
+        -i ${index} \\
+        -o ${prefix}_alevin_results \\
+        -m $transcript_tsv \\
+        -t $task.cpus \\
+        -c $protocol \\
+        -u whitelist.txt \\
+        $args
+    
+    mv whitelist.txt ${prefix}_alevin_results/
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
diff --git a/modules/nf-core/modules/salmon/quant/main.nf b/modules/nf-core/modules/salmon/quant/main.nf
deleted file mode 100644
index bd4792c5..00000000
--- a/modules/nf-core/modules/salmon/quant/main.nf
+++ /dev/null
@@ -1,72 +0,0 @@
-process SALMON_QUANT {
-    tag "$meta.id"
-    label "process_medium"
-
-    conda (params.enable_conda ? 'bioconda::salmon=1.5.2' : null)
-    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/salmon:1.5.2--h84f40af_0' :
-        'quay.io/biocontainers/salmon:1.5.2--h84f40af_0' }"
-
-    input:
-    tuple val(meta), path(reads)
-    path  index
-    path  gtf
-    path  transcript_fasta
-    val   alignment_mode
-    val   lib_type
-
-    output:
-    tuple val(meta), path("${prefix}"), emit: results
-    path  "versions.yml"              , emit: versions
-
-    when:
-    task.ext.when == null || task.ext.when
-
-    script:
-    def args = task.ext.args   ?: ''
-    prefix   = task.ext.prefix ?: "${meta.id}"
-
-    def reference   = "--index $index"
-    def input_reads = meta.single_end ? "-r $reads" : "-1 ${reads[0]} -2 ${reads[1]}"
-    if (alignment_mode) {
-        reference   = "-t $transcript_fasta"
-        input_reads = "-a $reads"
-    }
-
-    def strandedness_opts = [
-        'A', 'U', 'SF', 'SR',
-        'IS', 'IU' , 'ISF', 'ISR',
-        'OS', 'OU' , 'OSF', 'OSR',
-        'MS', 'MU' , 'MSF', 'MSR'
-    ]
-    def strandedness =  'A'
-    if (lib_type) {
-        if (strandedness_opts.contains(lib_type)) {
-            strandedness = lib_type
-        } else {
-            log.info "[Salmon Quant] Invalid library type specified '--libType=${lib_type}', defaulting to auto-detection with '--libType=A'."
-        }
-    } else {
-        strandedness = meta.single_end ? 'U' : 'IU'
-        if (meta.strandedness == 'forward') {
-            strandedness = meta.single_end ? 'SF' : 'ISF'
-        } else if (meta.strandedness == 'reverse') {
-            strandedness = meta.single_end ? 'SR' : 'ISR'
-        }
-    }
-    """
-    salmon quant \\
-        --geneMap $gtf \\
-        --threads $task.cpus \\
-        --libType=$strandedness \\
-        $reference \\
-        $input_reads \\
-        $args \\
-        -o $prefix
-
-    cat <<-END_VERSIONS > versions.yml
-    "${task.process}":
-        salmon: \$(echo \$(salmon --version) | sed -e "s/salmon //g")
-    END_VERSIONS
-    """
-}
diff --git a/modules/nf-core/modules/salmon/quant/meta.yml b/modules/nf-core/modules/salmon/quant/meta.yml
deleted file mode 100644
index 109109d8..00000000
--- a/modules/nf-core/modules/salmon/quant/meta.yml
+++ /dev/null
@@ -1,56 +0,0 @@
-name: salmon_quant
-description: gene/transcript quantification with Salmon
-keywords:
-  - index
-  - fasta
-  - genome
-  - reference
-tools:
-  - salmon:
-      description: |
-        Salmon is a tool for wicked-fast transcript quantification from RNA-seq data
-      homepage: https://salmon.readthedocs.io/en/latest/salmon.html
-      manual: https://salmon.readthedocs.io/en/latest/salmon.html
-      doi: 10.1038/nmeth.4197
-      licence: ["GPL-3.0-or-later"]
-input:
-  - meta:
-      type: map
-      description: |
-        Groovy Map containing sample information
-        e.g. [ id:'test', single_end:false ]
-  - reads:
-      type: file
-      description: |
-        List of input FastQ files of size 1 and 2 for single-end and paired-end data,
-        respectively.
-  - index:
-      type: directory
-      description: Folder containing the star index files
-  - gtf:
-      type: file
-      description: GTF of the reference transcriptome
-  - transcriptome_fasta:
-      type: file
-      description: Fasta file of the reference transcriptome
-  - alignment_mode:
-    type: boolean
-    description: whether to run salmon in alignment mode
-  - lib_type:
-    type: string
-    description: |
-      Override library type inferred based on strandedness defined in meta object
-
-output:
-  - sample_output:
-      type: directory
-      description: Folder containing the quantification results for a specific sample
-      pattern: "${prefix}"
-  - versions:
-      type: file
-      description: File containing software versions
-      pattern: "versions.yml"
-
-authors:
-  - "@kevinmenden"
-  - "@drpatelh"
diff --git a/subworkflows/local/alevin.nf b/subworkflows/local/alevin.nf
index 92198123..49874e2f 100644
--- a/subworkflows/local/alevin.nf
+++ b/subworkflows/local/alevin.nf
@@ -1,8 +1,8 @@
 /* --    IMPORT LOCAL MODULES/SUBWORKFLOWS     -- */
 include { GFFREAD_TRANSCRIPTOME }             from '../../modules/local/gffread_transcriptome'
-include { SALMON_ALEVIN         }             from '../../modules/local/salmon_alevin'
 include { ALEVINQC              }             from '../../modules/local/alevinqc'
 include { SIMPLEAF_INDEX        }             from '../../modules/local/simpleaf_index'
+include { SIMPLEAF_QUANT        }             from '../../modules/local/simpleaf_quant'
 
 /* --    IMPORT NF-CORE MODULES/SUBWORKFLOWS   -- */
 include { GUNZIP }                      from '../../modules/nf-core/modules/gunzip/main'
@@ -41,6 +41,7 @@ workflow SCRNASEQ_ALEVIN {
     if (!salmon_index) {
         SIMPLEAF_INDEX( genome_fasta, transcript_fasta, gtf )
         salmon_index = SIMPLEAF_INDEX.out.index.collect()
+        transcript_tsv = SIMPLEAF_INDEX.out.transcript_tsv.collect()
         ch_versions = ch_versions.mix(SIMPLEAF_INDEX.out.versions)
     }
 
@@ -57,25 +58,26 @@ workflow SCRNASEQ_ALEVIN {
     /*
     * Perform quantification with salmon alevin
     */
-    SALMON_ALEVIN (
+    SIMPLEAF_QUANT (
         ch_fastq,
         salmon_index,
+        transcript_tsv,
         txp2gene,
         protocol,
         barcode_whitelist
     )
-    ch_versions = ch_versions.mix(SALMON_ALEVIN.out.versions)
+    ch_versions = ch_versions.mix(SIMPLEAF_QUANT.out.versions)
 
     /*
     * Run alevinQC
     */
-    ALEVINQC( SALMON_ALEVIN.out.alevin_results )
+    ALEVINQC( SIMPLEAF_QUANT.out.alevin_results )
     ch_versions = ch_versions.mix(ALEVINQC.out.versions)
 
     emit:
     ch_versions
-    alevin_results = SALMON_ALEVIN.out.alevin_results
+    alevin_results = SIMPLEAF_QUANT.out.alevin_results
     alevinqc = ALEVINQC.out.report
-    for_multiqc = SALMON_ALEVIN.out.alevin_results.collect{it[1]}.ifEmpty([])
+    for_multiqc = SIMPLEAF_QUANT.out.alevin_results.collect{it[1]}.ifEmpty([])
 
 }

From b5234c01af98e650ffa4f6a96e080b18f4987f4d Mon Sep 17 00:00:00 2001
From: Felipe Marques de Almeida <almeidafmarques@gmail.com>
Date: Wed, 24 Aug 2022 07:20:33 +0200
Subject: [PATCH 096/165] update simpleaf versions

---
 modules/local/simpleaf_index.nf | 4 ++--
 modules/local/simpleaf_quant.nf | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/modules/local/simpleaf_index.nf b/modules/local/simpleaf_index.nf
index 110f3464..853f1f95 100644
--- a/modules/local/simpleaf_index.nf
+++ b/modules/local/simpleaf_index.nf
@@ -4,8 +4,8 @@ process SIMPLEAF_INDEX {
 
     conda (params.enable_conda ? 'bioconda::simpleaf=0.4.0' : null)
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/simpleaf:0.4.0--h9f5acd7_0' :
-        'quay.io/biocontainers/simpleaf:0.4.0--h9f5acd7_0' }"
+        'https://depot.galaxyproject.org/singularity/simpleaf:0.5.1--h9f5acd7_0' :
+        'quay.io/biocontainers/simpleaf:0.5.1--h9f5acd7_0' }"
 
     input:
     path genome_fasta
diff --git a/modules/local/simpleaf_quant.nf b/modules/local/simpleaf_quant.nf
index ad97518f..6d408fcc 100644
--- a/modules/local/simpleaf_quant.nf
+++ b/modules/local/simpleaf_quant.nf
@@ -4,8 +4,8 @@ process SIMPLEAF_QUANT {
 
     conda (params.enable_conda ? 'bioconda::simpleaf=0.4.0' : null)
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/simpleaf:0.4.0--h9f5acd7_0' :
-        'quay.io/biocontainers/simpleaf:0.4.0--h9f5acd7_0' }"
+        'https://depot.galaxyproject.org/singularity/simpleaf:0.5.1--h9f5acd7_0' :
+        'quay.io/biocontainers/simpleaf:0.5.1--h9f5acd7_0' }"
 
     input:
     //

From 4d3c5c520321a44775d8e30ac82e2db483284f4a Mon Sep 17 00:00:00 2001
From: Felipe Marques de Almeida <almeidafmarques@gmail.com>
Date: Mon, 29 Aug 2022 11:02:40 +0200
Subject: [PATCH 097/165] update how txp2gene file is loaded

---
 modules/local/simpleaf_quant.nf |  3 +--
 subworkflows/local/alevin.nf    | 16 ++++++----------
 2 files changed, 7 insertions(+), 12 deletions(-)

diff --git a/modules/local/simpleaf_quant.nf b/modules/local/simpleaf_quant.nf
index 6d408fcc..7f14048e 100644
--- a/modules/local/simpleaf_quant.nf
+++ b/modules/local/simpleaf_quant.nf
@@ -14,7 +14,6 @@ process SIMPLEAF_QUANT {
     //
     tuple val(meta), path(reads)
     path index
-    path transcript_tsv
     path txp2gene
     val protocol
     path whitelist
@@ -43,7 +42,7 @@ process SIMPLEAF_QUANT {
         -2 ${reverse.join( "," )} \\
         -i ${index} \\
         -o ${prefix}_alevin_results \\
-        -m $transcript_tsv \\
+        -m $txp2gene \\
         -t $task.cpus \\
         -c $protocol \\
         -u whitelist.txt \\
diff --git a/subworkflows/local/alevin.nf b/subworkflows/local/alevin.nf
index 49874e2f..7db784ff 100644
--- a/subworkflows/local/alevin.nf
+++ b/subworkflows/local/alevin.nf
@@ -31,8 +31,10 @@ workflow SCRNASEQ_ALEVIN {
         """Must provide a genome fasta file ('--genome_fasta') and a gtf file ('--gtf'), or a genome fasta file
         and a transcriptome fasta file ('--transcript_fasta`) if no index is given!""".stripIndent()
 
-    assert txp2gene || gtf:
-        "Must provide either a GTF file ('--gtf') or kallisto gene map ('--kallisto_gene_map') to align with kallisto bustools!"
+    if (transcript_fasta) {
+        assert txp2gene:
+            "Since a built transcript was provided ('--transcript_fasta'), must also provide a simpleaf gene map ('--txp2gene') to use with simpleaf quant!"
+    }
 
 
     /*
@@ -46,14 +48,9 @@ workflow SCRNASEQ_ALEVIN {
     }
 
     /*
-    * Build txp2gene map
+    * Select txp2gene map
     */
-    if (!txp2gene){
-        GFFREAD_TXP2GENE( gtf )
-        txp2gene = GFFREAD_TXP2GENE.out.gtf
-        // Only collect version if not already done for gffread
-        ch_versions = ch_versions.mix(GFFREAD_TXP2GENE.out.versions)
-    }
+    if (!txp2gene) { txp2gene = SIMPLEAF_INDEX.out.transcript_tsv }
 
     /*
     * Perform quantification with salmon alevin
@@ -61,7 +58,6 @@ workflow SCRNASEQ_ALEVIN {
     SIMPLEAF_QUANT (
         ch_fastq,
         salmon_index,
-        transcript_tsv,
         txp2gene,
         protocol,
         barcode_whitelist

From 351c56cfbe8ca1237505f818251d45db1c3c9a56 Mon Sep 17 00:00:00 2001
From: nf-core-bot <core@nf-co.re>
Date: Tue, 30 Aug 2022 13:40:26 +0000
Subject: [PATCH 098/165] Template update for nf-core/tools version 2.5

---
 .editorconfig                                 |  2 +-
 .github/PULL_REQUEST_TEMPLATE.md              |  3 +-
 .github/workflows/ci.yml                      | 23 ++------
 .github/workflows/linting.yml                 | 38 +++++++++++--
 CHANGELOG.md                                  |  2 +-
 CITATION.cff                                  | 56 +++++++++++++++++++
 README.md                                     | 21 +++----
 assets/email_template.txt                     |  1 -
 bin/check_samplesheet.py                      | 41 +++++++-------
 conf/base.config                              |  5 ++
 docs/usage.md                                 | 12 ++--
 lib/WorkflowMain.groovy                       |  9 ++-
 lib/WorkflowScrnaseq.groovy                   |  5 +-
 main.nf                                       |  2 +-
 modules.json                                  | 22 +++++---
 .../templates/dumpsoftwareversions.py         | 14 +++--
 nextflow.config                               | 23 +++++++-
 17 files changed, 186 insertions(+), 93 deletions(-)
 create mode 100644 CITATION.cff

diff --git a/.editorconfig b/.editorconfig
index b6b31907..b78de6e6 100644
--- a/.editorconfig
+++ b/.editorconfig
@@ -8,7 +8,7 @@ trim_trailing_whitespace = true
 indent_size = 4
 indent_style = space
 
-[*.{md,yml,yaml,html,css,scss,js}]
+[*.{md,yml,yaml,html,css,scss,js,cff}]
 indent_size = 2
 
 # These files are edited and tested upstream in nf-core/modules
diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
index 4b79f9bf..9d907223 100644
--- a/.github/PULL_REQUEST_TEMPLATE.md
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@@ -15,8 +15,7 @@ Learn more about contributing: [CONTRIBUTING.md](https://github.com/nf-core/scrn
 
 - [ ] This comment contains a description of changes (with reason).
 - [ ] If you've fixed a bug or added code that should be tested, add tests!
-  - [ ] If you've added a new tool - have you followed the pipeline conventions in the [contribution docs](https://github.com/nf-core/scrnaseq/tree/master/.github/CONTRIBUTING.md)
-  - [ ] If necessary, also make a PR on the nf-core/scrnaseq _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository.
+- [ ] If you've added a new tool - have you followed the pipeline conventions in the [contribution docs](https://github.com/nf-core/scrnaseq/tree/master/.github/CONTRIBUTING.md)- [ ] If necessary, also make a PR on the nf-core/scrnaseq _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository.
 - [ ] Make sure your code lints (`nf-core lint`).
 - [ ] Ensure the test suite passes (`nextflow run . -profile test,docker --outdir <OUTDIR>`).
 - [ ] Usage Documentation in `docs/usage.md` is updated.
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 03c26dea..3afad6a3 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -10,7 +10,6 @@ on:
 
 env:
   NXF_ANSI_LOG: false
-  CAPSULE_LOG: none
 
 jobs:
   test:
@@ -20,27 +19,17 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        # Nextflow versions
-        include:
-          # Test pipeline minimum Nextflow version
-          - NXF_VER: "21.10.3"
-            NXF_EDGE: ""
-          # Test latest edge release of Nextflow
-          - NXF_VER: ""
-            NXF_EDGE: "1"
+        NXF_VER:
+          - "21.10.3"
+          - "latest-everything"
     steps:
       - name: Check out pipeline code
         uses: actions/checkout@v2
 
       - name: Install Nextflow
-        env:
-          NXF_VER: ${{ matrix.NXF_VER }}
-          # Uncomment only if the edge release is more recent than the latest stable release
-          # See https://github.com/nextflow-io/nextflow/issues/2467
-          # NXF_EDGE: ${{ matrix.NXF_EDGE }}
-        run: |
-          wget -qO- get.nextflow.io | bash
-          sudo mv nextflow /usr/local/bin/
+        uses: nf-core/setup-nextflow@v1
+        with:
+          version: "${{ matrix.NXF_VER }}"
 
       - name: Run pipeline with test data
         # TODO nf-core: You can customise CI pipeline run tests as required
diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml
index 77358dee..8a5ce69b 100644
--- a/.github/workflows/linting.yml
+++ b/.github/workflows/linting.yml
@@ -35,6 +35,36 @@ jobs:
       - name: Run Prettier --check
         run: prettier --check ${GITHUB_WORKSPACE}
 
+  PythonBlack:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v2
+
+      - name: Check code lints with Black
+        uses: psf/black@stable
+
+      # If the above check failed, post a comment on the PR explaining the failure
+      - name: Post PR comment
+        if: failure()
+        uses: mshick/add-pr-comment@v1
+        with:
+          message: |
+            ## Python linting (`black`) is failing
+
+            To keep the code consistent with lots of contributors, we run automated code consistency checks.
+            To fix this CI test, please run:
+
+            * Install [`black`](https://black.readthedocs.io/en/stable/): `pip install black`
+            * Fix formatting errors in your pipeline: `black .`
+
+            Once you push these changes the test should pass, and you can hide this comment :+1:
+
+            We highly recommend setting up Black in your code editor so that this formatting is done automatically on save. Ask about it on Slack for help!
+
+            Thanks again for your contribution!
+          repo-token: ${{ secrets.GITHUB_TOKEN }}
+          allow-repeats: false
+
   nf-core:
     runs-on: ubuntu-latest
     steps:
@@ -42,15 +72,11 @@ jobs:
         uses: actions/checkout@v2
 
       - name: Install Nextflow
-        env:
-          CAPSULE_LOG: none
-        run: |
-          wget -qO- get.nextflow.io | bash
-          sudo mv nextflow /usr/local/bin/
+        uses: nf-core/setup-nextflow@v1
 
       - uses: actions/setup-python@v3
         with:
-          python-version: "3.6"
+          python-version: "3.7"
           architecture: "x64"
 
       - name: Install dependencies
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 7fff64fc..af3ca32a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -3,7 +3,7 @@
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
-## v2.0dev - [date]
+## v2.0.1dev - [date]
 
 Initial release of nf-core/scrnaseq, created with the [nf-core](https://nf-co.re/) template.
 
diff --git a/CITATION.cff b/CITATION.cff
new file mode 100644
index 00000000..4533e2f2
--- /dev/null
+++ b/CITATION.cff
@@ -0,0 +1,56 @@
+cff-version: 1.2.0
+message: "If you use `nf-core tools` in your work, please cite the `nf-core` publication"
+authors:
+  - family-names: Ewels
+    given-names: Philip
+  - family-names: Peltzer
+    given-names: Alexander
+  - family-names: Fillinger
+    given-names: Sven
+  - family-names: Patel
+    given-names: Harshil
+  - family-names: Alneberg
+    given-names: Johannes
+  - family-names: Wilm
+    given-names: Andreas
+  - family-names: Ulysse Garcia
+    given-names: Maxime
+  - family-names: Di Tommaso
+    given-names: Paolo
+  - family-names: Nahnsen
+    given-names: Sven
+title: "The nf-core framework for community-curated bioinformatics pipelines."
+version: 2.4.1
+doi: 10.1038/s41587-020-0439-x
+date-released: 2022-05-16
+url: https://github.com/nf-core/tools
+prefered-citation:
+  type: article
+  authors:
+    - family-names: Ewels
+      given-names: Philip
+    - family-names: Peltzer
+      given-names: Alexander
+    - family-names: Fillinger
+      given-names: Sven
+    - family-names: Patel
+      given-names: Harshil
+    - family-names: Alneberg
+      given-names: Johannes
+    - family-names: Wilm
+      given-names: Andreas
+    - family-names: Ulysse Garcia
+      given-names: Maxime
+    - family-names: Di Tommaso
+      given-names: Paolo
+    - family-names: Nahnsen
+      given-names: Sven
+  doi: 10.1038/s41587-020-0439-x
+  journal: nature biotechnology
+  start: 276
+  end: 278
+  title: "The nf-core framework for community-curated bioinformatics pipelines."
+  issue: 3
+  volume: 38
+  year: 2020
+  url: https://dx.doi.org/10.1038/s41587-020-0439-x
diff --git a/README.md b/README.md
index e0861259..804539ce 100644
--- a/README.md
+++ b/README.md
@@ -1,19 +1,14 @@
 # ![nf-core/scrnaseq](docs/images/nf-core-scrnaseq_logo_light.png#gh-light-mode-only) ![nf-core/scrnaseq](docs/images/nf-core-scrnaseq_logo_dark.png#gh-dark-mode-only)
 
-[![GitHub Actions CI Status](https://github.com/nf-core/scrnaseq/workflows/nf-core%20CI/badge.svg)](https://github.com/nf-core/scrnaseq/actions?query=workflow%3A%22nf-core+CI%22)
-[![GitHub Actions Linting Status](https://github.com/nf-core/scrnaseq/workflows/nf-core%20linting/badge.svg)](https://github.com/nf-core/scrnaseq/actions?query=workflow%3A%22nf-core+linting%22)
-[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?logo=Amazon%20AWS)](https://nf-co.re/scrnaseq/results)
-[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8)](https://doi.org/10.5281/zenodo.XXXXXXX)
+[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/scrnaseq/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX)
 
 [![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A521.10.3-23aa62.svg)](https://www.nextflow.io/)
-[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?logo=anaconda)](https://docs.conda.io/en/latest/)
-[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?logo=docker)](https://www.docker.com/)
-[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg)](https://sylabs.io/docs/)
+[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)
+[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)
+[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)
 [![Launch on Nextflow Tower](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Nextflow%20Tower-%234256e7)](https://tower.nf/launch?pipeline=https://github.com/nf-core/scrnaseq)
 
-[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23scrnaseq-4A154B?logo=slack)](https://nfcore.slack.com/channels/scrnaseq)
-[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?logo=twitter)](https://twitter.com/nf_core)
-[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?logo=youtube)](https://www.youtube.com/c/nf-core)
+[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23scrnaseq-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/scrnaseq)[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?labelColor=000000&logo=twitter)](https://twitter.com/nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core)
 
 ## Introduction
 
@@ -25,7 +20,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool
 
 <!-- TODO nf-core: Add full-sized test dataset and amend the paragraph below if applicable -->
 
-On release, automated continuous integration tests run the pipeline on a full-sized dataset on the AWS cloud infrastructure. This ensures that the pipeline runs on AWS, has sensible resource allocation defaults set to run on real-world datasets, and permits the persistent storage of results to benchmark between pipeline releases and other analysis sources. The results obtained from the full-sized test can be viewed on the [nf-core website](https://nf-co.re/scrnaseq/results).
+On release, automated continuous integration tests run the pipeline on a full-sized dataset on the AWS cloud infrastructure. This ensures that the pipeline runs on AWS, has sensible resource allocation defaults set to run on real-world datasets, and permits the persistent storage of results to benchmark between pipeline releases and other analysis sources.The results obtained from the full-sized test can be viewed on the [nf-core website](https://nf-co.re/scrnaseq/results).
 
 ## Pipeline summary
 
@@ -42,7 +37,7 @@ On release, automated continuous integration tests run the pipeline on a full-si
 
 3. Download the pipeline and test it on a minimal dataset with a single command:
 
-   ```console
+   ```bash
    nextflow run nf-core/scrnaseq -profile test,YOURPROFILE --outdir <OUTDIR>
    ```
 
@@ -57,7 +52,7 @@ On release, automated continuous integration tests run the pipeline on a full-si
 
    <!-- TODO nf-core: Update the example "typical command" below used to run the pipeline -->
 
-   ```console
+   ```bash
    nextflow run nf-core/scrnaseq --input samplesheet.csv --outdir <OUTDIR> --genome GRCh37 -profile <docker/singularity/podman/shifter/charliecloud/conda/institute>
    ```
 
diff --git a/assets/email_template.txt b/assets/email_template.txt
index 8513ca81..9065d5c2 100644
--- a/assets/email_template.txt
+++ b/assets/email_template.txt
@@ -6,7 +6,6 @@
                                         `._,._,'
   nf-core/scrnaseq v${version}
 ----------------------------------------------------
-
 Run Name: $runName
 
 <% if (success){
diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py
index 3652c63c..9a8b8962 100755
--- a/bin/check_samplesheet.py
+++ b/bin/check_samplesheet.py
@@ -11,7 +11,6 @@
 from collections import Counter
 from pathlib import Path
 
-
 logger = logging.getLogger()
 
 
@@ -79,13 +78,15 @@ def validate_and_transform(self, row):
 
     def _validate_sample(self, row):
         """Assert that the sample name exists and convert spaces to underscores."""
-        assert len(row[self._sample_col]) > 0, "Sample input is required."
+        if len(row[self._sample_col]) <= 0:
+            raise AssertionError("Sample input is required.")
         # Sanitize samples slightly.
         row[self._sample_col] = row[self._sample_col].replace(" ", "_")
 
     def _validate_first(self, row):
         """Assert that the first FASTQ entry is non-empty and has the right format."""
-        assert len(row[self._first_col]) > 0, "At least the first FASTQ file is required."
+        if len(row[self._first_col]) <= 0:
+            raise AssertionError("At least the first FASTQ file is required.")
         self._validate_fastq_format(row[self._first_col])
 
     def _validate_second(self, row):
@@ -97,36 +98,34 @@ def _validate_pair(self, row):
         """Assert that read pairs have the same file extension. Report pair status."""
         if row[self._first_col] and row[self._second_col]:
             row[self._single_col] = False
-            assert (
-                Path(row[self._first_col]).suffixes[-2:] == Path(row[self._second_col]).suffixes[-2:]
-            ), "FASTQ pairs must have the same file extensions."
+            if Path(row[self._first_col]).suffixes[-2:] != Path(row[self._second_col]).suffixes[-2:]:
+                raise AssertionError("FASTQ pairs must have the same file extensions.")
         else:
             row[self._single_col] = True
 
     def _validate_fastq_format(self, filename):
         """Assert that a given filename has one of the expected FASTQ extensions."""
-        assert any(filename.endswith(extension) for extension in self.VALID_FORMATS), (
-            f"The FASTQ file has an unrecognized extension: {filename}\n"
-            f"It should be one of: {', '.join(self.VALID_FORMATS)}"
-        )
+        if not any(filename.endswith(extension) for extension in self.VALID_FORMATS):
+            raise AssertionError(
+                f"The FASTQ file has an unrecognized extension: {filename}\n"
+                f"It should be one of: {', '.join(self.VALID_FORMATS)}"
+            )
 
     def validate_unique_samples(self):
         """
         Assert that the combination of sample name and FASTQ filename is unique.
 
-        In addition to the validation, also rename the sample if more than one sample,
-        FASTQ file combination exists.
+        In addition to the validation, also rename all samples to have a suffix of _T{n}, where n is the
+        number of times the same sample exist, but with different FASTQ files, e.g., multiple runs per experiment.
 
         """
-        assert len(self._seen) == len(self.modified), "The pair of sample name and FASTQ must be unique."
-        if len({pair[0] for pair in self._seen}) < len(self._seen):
-            counts = Counter(pair[0] for pair in self._seen)
-            seen = Counter()
-            for row in self.modified:
-                sample = row[self._sample_col]
-                seen[sample] += 1
-                if counts[sample] > 1:
-                    row[self._sample_col] = f"{sample}_T{seen[sample]}"
+        if len(self._seen) != len(self.modified):
+            raise AssertionError("The pair of sample name and FASTQ must be unique.")
+        seen = Counter()
+        for row in self.modified:
+            sample = row[self._sample_col]
+            seen[sample] += 1
+            row[self._sample_col] = f"{sample}_T{seen[sample]}"
 
 
 def read_head(handle, num_lines=10):
diff --git a/conf/base.config b/conf/base.config
index e18cde2e..938227e2 100644
--- a/conf/base.config
+++ b/conf/base.config
@@ -26,6 +26,11 @@ process {
     //        adding in your local modules too.
     // TODO nf-core: Customise requirements for specific processes.
     // See https://www.nextflow.io/docs/latest/config.html#config-process-selectors
+    withLabel:process_single {
+        cpus   = { check_max( 1                  , 'cpus'    ) }
+        memory = { check_max( 6.GB * task.attempt, 'memory'  ) }
+        time   = { check_max( 4.h  * task.attempt, 'time'    ) }
+    }
     withLabel:process_low {
         cpus   = { check_max( 2     * task.attempt, 'cpus'    ) }
         memory = { check_max( 12.GB * task.attempt, 'memory'  ) }
diff --git a/docs/usage.md b/docs/usage.md
index 60c6a74c..c1b85e7a 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -12,7 +12,7 @@
 
 You will need to create a samplesheet with information about the samples you would like to analyse before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row as shown in the examples below.
 
-```console
+```bash
 --input '[path to samplesheet file]'
 ```
 
@@ -56,7 +56,7 @@ An [example samplesheet](../assets/samplesheet.csv) has been provided with the p
 
 The typical command for running the pipeline is as follows:
 
-```console
+```bash
 nextflow run nf-core/scrnaseq --input samplesheet.csv --outdir <OUTDIR> --genome GRCh37 -profile docker
 ```
 
@@ -64,9 +64,9 @@ This will launch the pipeline with the `docker` configuration profile. See below
 
 Note that the pipeline will create the following files in your working directory:
 
-```console
+```bash
 work                # Directory containing the nextflow working files
-<OUTIDR>            # Finished results in specified location (defined with --outdir)
+<OUTDIR>            # Finished results in specified location (defined with --outdir)
 .nextflow_log       # Log file from Nextflow
 # Other nextflow hidden files, eg. history of pipeline runs and old logs.
 ```
@@ -75,7 +75,7 @@ work                # Directory containing the nextflow working files
 
 When you run the above command, Nextflow automatically pulls the pipeline code from GitHub and stores it as a cached version. When running the pipeline after this, it will always use the cached version if available - even if the pipeline has been updated since. To make sure that you're running the latest version of the pipeline, make sure that you regularly update the cached version of the pipeline:
 
-```console
+```bash
 nextflow pull nf-core/scrnaseq
 ```
 
@@ -251,6 +251,6 @@ Some HPC setups also allow you to run nextflow within a cluster job submitted yo
 In some cases, the Nextflow Java virtual machines can start to request a large amount of memory.
 We recommend adding the following line to your environment to limit this (typically in `~/.bashrc` or `~./bash_profile`):
 
-```console
+```bash
 NXF_OPTS='-Xms1g -Xmx4g'
 ```
diff --git a/lib/WorkflowMain.groovy b/lib/WorkflowMain.groovy
index 0569eece..b8c1f19d 100755
--- a/lib/WorkflowMain.groovy
+++ b/lib/WorkflowMain.groovy
@@ -59,6 +59,7 @@ class WorkflowMain {
         }
 
         // Print parameter summary log to screen
+
         log.info paramsSummaryLog(workflow, params, log)
 
         // Check that a -profile or Nextflow config has been provided to run the pipeline
@@ -78,17 +79,15 @@ class WorkflowMain {
             System.exit(1)
         }
     }
-
     //
     // Get attribute from genome config file e.g. fasta
     //
-    public static String getGenomeAttribute(params, attribute) {
-        def val = ''
+    public static Object getGenomeAttribute(params, attribute) {
         if (params.genomes && params.genome && params.genomes.containsKey(params.genome)) {
             if (params.genomes[ params.genome ].containsKey(attribute)) {
-                val = params.genomes[ params.genome ][ attribute ]
+                return params.genomes[ params.genome ][ attribute ]
             }
         }
-        return val
+        return null
     }
 }
diff --git a/lib/WorkflowScrnaseq.groovy b/lib/WorkflowScrnaseq.groovy
index 290990ae..db31a702 100755
--- a/lib/WorkflowScrnaseq.groovy
+++ b/lib/WorkflowScrnaseq.groovy
@@ -10,6 +10,7 @@ class WorkflowScrnaseq {
     public static void initialise(params, log) {
         genomeExistsError(params, log)
 
+
         if (!params.fasta) {
             log.error "Genome fasta file not specified with e.g. '--fasta genome.fa' or via a detectable config file."
             System.exit(1)
@@ -41,9 +42,7 @@ class WorkflowScrnaseq {
         yaml_file_text        += "data: |\n"
         yaml_file_text        += "${summary_section}"
         return yaml_file_text
-    }
-
-    //
+    }//
     // Exit pipeline if incorrect --genome key provided
     //
     private static void genomeExistsError(params, log) {
diff --git a/main.nf b/main.nf
index 4d1eb0ea..c2fe7ccf 100644
--- a/main.nf
+++ b/main.nf
@@ -4,7 +4,7 @@
     nf-core/scrnaseq
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
     Github : https://github.com/nf-core/scrnaseq
-    Website: https://nf-co.re/scrnaseq
+Website: https://nf-co.re/scrnaseq
     Slack  : https://nfcore.slack.com/channels/scrnaseq
 ----------------------------------------------------------------------------------------
 */
diff --git a/modules.json b/modules.json
index 5bd4d31a..a865b2cd 100644
--- a/modules.json
+++ b/modules.json
@@ -3,14 +3,20 @@
     "homePage": "https://github.com/nf-core/scrnaseq",
     "repos": {
         "nf-core/modules": {
-            "custom/dumpsoftwareversions": {
-                "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
-            },
-            "fastqc": {
-                "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
-            },
-            "multiqc": {
-                "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
+            "git_url": "https://github.com/nf-core/modules.git",
+            "modules": {
+                "custom/dumpsoftwareversions": {
+                    "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d",
+                    "branch": "master"
+                },
+                "fastqc": {
+                    "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d",
+                    "branch": "master"
+                },
+                "multiqc": {
+                    "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d",
+                    "branch": "master"
+                }
             }
         }
     }
diff --git a/modules/nf-core/modules/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py b/modules/nf-core/modules/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py
index d1390392..787bdb7b 100644
--- a/modules/nf-core/modules/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py
+++ b/modules/nf-core/modules/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py
@@ -1,9 +1,10 @@
 #!/usr/bin/env python
 
-import yaml
 import platform
 from textwrap import dedent
 
+import yaml
+
 
 def _make_versions_html(versions):
     html = [
@@ -58,11 +59,12 @@ def _make_versions_html(versions):
 for process, process_versions in versions_by_process.items():
     module = process.split(":")[-1]
     try:
-        assert versions_by_module[module] == process_versions, (
-            "We assume that software versions are the same between all modules. "
-            "If you see this error-message it means you discovered an edge-case "
-            "and should open an issue in nf-core/tools. "
-        )
+        if versions_by_module[module] != process_versions:
+            raise AssertionError(
+                "We assume that software versions are the same between all modules. "
+                "If you see this error-message it means you discovered an edge-case "
+                "and should open an issue in nf-core/tools. "
+            )
     except KeyError:
         versions_by_module[module] = process_versions
 
diff --git a/nextflow.config b/nextflow.config
index 74187da1..a6e37e2a 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -13,11 +13,11 @@ params {
     // Input options
     input                      = null
 
+
     // References
     genome                     = null
     igenomes_base              = 's3://ngi-igenomes/igenomes'
     igenomes_ignore            = false
-
     // MultiQC options
     multiqc_config             = null
     multiqc_title              = null
@@ -37,6 +37,7 @@ params {
     schema_ignore_params       = 'genomes'
     enable_conda               = false
 
+
     // Config options
     custom_config_version      = 'master'
     custom_config_base         = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}"
@@ -45,6 +46,7 @@ params {
     config_profile_url         = null
     config_profile_name        = null
 
+
     // Max resource options
     // Defaults only, expecting to be overwritten
     max_memory                 = '128.GB'
@@ -72,6 +74,7 @@ try {
 // }
 
 
+
 profiles {
     debug { process.beforeScript = 'echo $HOSTNAME' }
     conda {
@@ -82,6 +85,15 @@ profiles {
         shifter.enabled        = false
         charliecloud.enabled   = false
     }
+    mamba {
+        params.enable_conda    = true
+        conda.useMamba         = true
+        docker.enabled         = false
+        singularity.enabled    = false
+        podman.enabled         = false
+        shifter.enabled        = false
+        charliecloud.enabled   = false
+    }
     docker {
         docker.enabled         = true
         docker.userEmulation   = true
@@ -119,10 +131,16 @@ profiles {
         podman.enabled         = false
         shifter.enabled        = false
     }
+    gitpod {
+        executor.name          = 'local'
+        executor.cpus          = 16
+        executor.memory        = 60.GB
+    }
     test      { includeConfig 'conf/test.config'      }
     test_full { includeConfig 'conf/test_full.config' }
 }
 
+
 // Load igenomes.config if required
 if (!params.igenomes_ignore) {
     includeConfig 'conf/igenomes.config'
@@ -130,6 +148,7 @@ if (!params.igenomes_ignore) {
     params.genomes = [:]
 }
 
+
 // Export these variables to prevent local Python/R libraries from conflicting with those in the container
 // The JULIA depot path has been adjusted to a fixed path `/usr/local/share/julia` that needs to be used for packages in the container.
 // See https://apeltzer.github.io/post/03-julia-lang-nextflow/ for details on that. Once we have a common agreement on where to keep Julia packages, this is adjustable.
@@ -169,7 +188,7 @@ manifest {
     description     = 'Pipeline for processing of 10xGenomics single cell rnaseq data'
     mainScript      = 'main.nf'
     nextflowVersion = '!>=21.10.3'
-    version         = '2.0dev'
+    version         = '2.0.1dev'
 }
 
 // Load modules.config for DSL2 module specific options

From 08b26fb87a91879e019b0e13c96f21806f55bdc7 Mon Sep 17 00:00:00 2001
From: nf-core-bot <core@nf-co.re>
Date: Thu, 1 Sep 2022 13:32:32 +0000
Subject: [PATCH 099/165] Template update for nf-core/tools version 2.5.1

---
 bin/check_samplesheet.py |  9 ++++++---
 pyproject.toml           | 10 ++++++++++
 2 files changed, 16 insertions(+), 3 deletions(-)
 create mode 100644 pyproject.toml

diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py
index 9a8b8962..11b15572 100755
--- a/bin/check_samplesheet.py
+++ b/bin/check_samplesheet.py
@@ -98,7 +98,9 @@ def _validate_pair(self, row):
         """Assert that read pairs have the same file extension. Report pair status."""
         if row[self._first_col] and row[self._second_col]:
             row[self._single_col] = False
-            if Path(row[self._first_col]).suffixes[-2:] != Path(row[self._second_col]).suffixes[-2:]:
+            first_col_suffix = Path(row[self._first_col]).suffixes[-2:]
+            second_col_suffix = Path(row[self._second_col]).suffixes[-2:]
+            if first_col_suffix != second_col_suffix:
                 raise AssertionError("FASTQ pairs must have the same file extensions.")
         else:
             row[self._single_col] = True
@@ -157,7 +159,7 @@ def sniff_format(handle):
     handle.seek(0)
     sniffer = csv.Sniffer()
     if not sniffer.has_header(peek):
-        logger.critical(f"The given sample sheet does not appear to contain a header.")
+        logger.critical("The given sample sheet does not appear to contain a header.")
         sys.exit(1)
     dialect = sniffer.sniff(peek)
     return dialect
@@ -195,7 +197,8 @@ def check_samplesheet(file_in, file_out):
         reader = csv.DictReader(in_handle, dialect=sniff_format(in_handle))
         # Validate the existence of the expected header columns.
         if not required_columns.issubset(reader.fieldnames):
-            logger.critical(f"The sample sheet **must** contain the column headers: {', '.join(required_columns)}.")
+            req_cols = ", ".join(required_columns)
+            logger.critical(f"The sample sheet **must** contain these column headers: {req_cols}.")
             sys.exit(1)
         # Validate each row.
         checker = RowChecker()
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 00000000..0d62beb6
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,10 @@
+# Config file for Python. Mostly used to configure linting of bin/check_samplesheet.py with Black.
+# Should be kept the same as nf-core/tools to avoid fighting with template synchronisation.
+[tool.black]
+line-length = 120
+target_version = ["py37", "py38", "py39", "py310"]
+
+[tool.isort]
+profile = "black"
+known_first_party = ["nf_core"]
+multi_line_output = 3

From 4d7cbef8c59dbcb2f7b2ab6db776c6f13550369f Mon Sep 17 00:00:00 2001
From: Harshil Patel <drpatelhh@gmail.com>
Date: Mon, 12 Sep 2022 13:58:31 +0100
Subject: [PATCH 100/165] Recreate modules.json

---
 modules.json | 92 ++++++++++++++++++++++++++++++----------------------
 1 file changed, 54 insertions(+), 38 deletions(-)

diff --git a/modules.json b/modules.json
index 5468da87..0b04f799 100644
--- a/modules.json
+++ b/modules.json
@@ -3,44 +3,60 @@
     "homePage": "https://github.com/nf-core/scrnaseq",
     "repos": {
         "nf-core/modules": {
-            "cellranger/count": {
-                "git_sha": "e1a3ae6bf5afce6b23f580e3225e7bdbe1388cd0"
-            },
-            "cellranger/mkgtf": {
-                "git_sha": "e499abe572bc7c1322dfa66b767b0e65fe8c62ca"
-            },
-            "cellranger/mkref": {
-                "git_sha": "61d87bb2bcbcd058e1af6e5f14bb821b8ae4ed3e"
-            },
-            "custom/dumpsoftwareversions": {
-                "git_sha": "682f789f93070bd047868300dd018faf3d434e7c"
-            },
-            "fastqc": {
-                "git_sha": "49b18b1639f4f7104187058866a8fab33332bdfe"
-            },
-            "gffread": {
-                "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
-            },
-            "gunzip": {
-                "git_sha": "fa37e0662690c4ec4260dae282fbce08777503e6"
-            },
-            "kallistobustools/count": {
-                "git_sha": "013035eb5c80c9e3f37f2c89c92a1ae7925df8ea"
-            },
-            "kallistobustools/ref": {
-                "git_sha": "0f1e736212e5ae37036ac7e32b225087a8a9b154"
-            },
-            "multiqc": {
-                "git_sha": "5138acca0985ca01c38a1c4fba917d83772b1106"
-            },
-            "salmon/index": {
-                "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
-            },
-            "salmon/quant": {
-                "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
-            },
-            "star/genomegenerate": {
-                "git_sha": "fb6c7bca3d55c19a793372513395e3a567bdd7ba"
+            "git_url": "https://github.com/nf-core/modules.git",
+            "modules": {
+                "cellranger/count": {
+                    "branch": "master",
+                    "git_sha": "e1a3ae6bf5afce6b23f580e3225e7bdbe1388cd0"
+                },
+                "cellranger/mkgtf": {
+                    "branch": "master",
+                    "git_sha": "e499abe572bc7c1322dfa66b767b0e65fe8c62ca"
+                },
+                "cellranger/mkref": {
+                    "branch": "master",
+                    "git_sha": "61d87bb2bcbcd058e1af6e5f14bb821b8ae4ed3e"
+                },
+                "custom/dumpsoftwareversions": {
+                    "branch": "master",
+                    "git_sha": "682f789f93070bd047868300dd018faf3d434e7c"
+                },
+                "fastqc": {
+                    "branch": "master",
+                    "git_sha": "49b18b1639f4f7104187058866a8fab33332bdfe"
+                },
+                "gffread": {
+                    "branch": "master",
+                    "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
+                },
+                "gunzip": {
+                    "branch": "master",
+                    "git_sha": "fa37e0662690c4ec4260dae282fbce08777503e6"
+                },
+                "kallistobustools/count": {
+                    "branch": "master",
+                    "git_sha": "013035eb5c80c9e3f37f2c89c92a1ae7925df8ea"
+                },
+                "kallistobustools/ref": {
+                    "branch": "master",
+                    "git_sha": "0f1e736212e5ae37036ac7e32b225087a8a9b154"
+                },
+                "multiqc": {
+                    "branch": "master",
+                    "git_sha": "5138acca0985ca01c38a1c4fba917d83772b1106"
+                },
+                "salmon/index": {
+                    "branch": "master",
+                    "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
+                },
+                "salmon/quant": {
+                    "branch": "master",
+                    "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
+                },
+                "star/genomegenerate": {
+                    "branch": "master",
+                    "git_sha": "fb6c7bca3d55c19a793372513395e3a567bdd7ba"
+                }
             }
         }
     }

From 65082e572224bd1d88ae6990bc0819f491b74b26 Mon Sep 17 00:00:00 2001
From: Harshil Patel <drpatelhh@gmail.com>
Date: Mon, 12 Sep 2022 13:59:29 +0100
Subject: [PATCH 101/165] Fix linting

---
 assets/email_template.html    |  2 +-
 bin/cellranger_mtx_to_h5ad.py | 11 +++---
 bin/concat_h5ad.py            | 15 ++++-----
 bin/mtx_to_h5ad.py            | 14 ++++----
 bin/t2g.py                    | 63 ++++++++++++++++++-----------------
 5 files changed, 52 insertions(+), 53 deletions(-)

diff --git a/assets/email_template.html b/assets/email_template.html
index b5c9a7b9..2ff8db51 100644
--- a/assets/email_template.html
+++ b/assets/email_template.html
@@ -4,7 +4,7 @@
   <meta http-equiv="X-UA-Compatible" content="IE=edge">
   <meta name="viewport" content="width=device-width, initial-scale=1">
 
-  <meta name="description" content="nf-core/scrnaseq: Pipeline for processing of 10xGenomics single cell rnaseq data">
+  <meta name="description" content="nf-core/scrnaseq: Pipeline for processing 10x Genomics single cell rnaseq data">
   <title>nf-core/scrnaseq Pipeline Report</title>
 </head>
 <body>
diff --git a/bin/cellranger_mtx_to_h5ad.py b/bin/cellranger_mtx_to_h5ad.py
index e8eb5b23..84305fa3 100755
--- a/bin/cellranger_mtx_to_h5ad.py
+++ b/bin/cellranger_mtx_to_h5ad.py
@@ -2,7 +2,8 @@
 import scanpy as sc
 import argparse
 
-def mtx_to_adata( mtx_h5: str, sample: str, verbose: bool = False ):
+
+def mtx_to_adata(mtx_h5: str, sample: str, verbose: bool = False):
 
     if verbose:
         print("Reading in {}".format(mtx_h5))
@@ -19,10 +20,10 @@ def mtx_to_adata( mtx_h5: str, sample: str, verbose: bool = False ):
 
     parser = argparse.ArgumentParser(description="Converts mtx output to h5ad.")
 
-    parser.add_argument("-m", "--mtx",     dest="mtx",     help="Path to mtx h5 file."                   )
-    parser.add_argument("-v", "--verbose", dest="verbose", help="Toggle verbose messages", default=False )
-    parser.add_argument("-s", "--sample",  dest="sample",  help="Sample name"                            )
-    parser.add_argument("-o", "--out",     dest="out",     help="Output path."                           )
+    parser.add_argument("-m", "--mtx", dest="mtx", help="Path to mtx h5 file.")
+    parser.add_argument("-v", "--verbose", dest="verbose", help="Toggle verbose messages", default=False)
+    parser.add_argument("-s", "--sample", dest="sample", help="Sample name")
+    parser.add_argument("-o", "--out", dest="out", help="Output path.")
 
     args = vars(parser.parse_args())
 
diff --git a/bin/concat_h5ad.py b/bin/concat_h5ad.py
index 29d0037a..9c40ec6f 100755
--- a/bin/concat_h5ad.py
+++ b/bin/concat_h5ad.py
@@ -3,6 +3,7 @@
 from pathlib import Path
 import argparse
 
+
 def read_samplesheet(samplesheet):
     df = pd.read_csv(samplesheet)
     df.set_index("sample")
@@ -12,14 +13,15 @@ def read_samplesheet(samplesheet):
     # only keep unique values using set()
     df = df.groupby(["sample"]).agg(lambda column: ",".join(set(column)))
 
-    return(df)
+    return df
+
 
 if __name__ == "__main__":
 
     parser = argparse.ArgumentParser(description="Concatenates h5ad files and merge metadata from samplesheet")
 
-    parser.add_argument("-i", "--input",  dest="input",  help="Path to samplesheet.csv")
-    parser.add_argument("-o", "--out",    dest="out",    help="Output path.")
+    parser.add_argument("-i", "--input", dest="input", help="Path to samplesheet.csv")
+    parser.add_argument("-o", "--out", dest="out", help="Output path.")
     parser.add_argument("-s", "--suffix", dest="suffix", help="Suffix of matrices to remove and get sample name")
 
     args = vars(parser.parse_args())
@@ -28,10 +30,7 @@ def read_samplesheet(samplesheet):
     df_samplesheet = read_samplesheet(args["input"])
 
     # find all h5ad and append to dict
-    dict_of_h5ad = {
-            str(path).replace(args["suffix"], ""): sc.read_h5ad(path)
-            for path in Path(".").rglob('*.h5ad')
-    }
+    dict_of_h5ad = {str(path).replace(args["suffix"], ""): sc.read_h5ad(path) for path in Path(".").rglob("*.h5ad")}
 
     # concat h5ad files
     adata = ad.concat(dict_of_h5ad, label="sample", merge="unique", index_unique="_")
@@ -40,4 +39,4 @@ def read_samplesheet(samplesheet):
     adata.obs = adata.obs.join(df_samplesheet, on="sample")
     adata.write_h5ad(args["out"], compression="gzip")
 
-    print("Wrote h5ad file to {}".format(args["out"]))
\ No newline at end of file
+    print("Wrote h5ad file to {}".format(args["out"]))
diff --git a/bin/mtx_to_h5ad.py b/bin/mtx_to_h5ad.py
index 2885886e..37d7c1ec 100755
--- a/bin/mtx_to_h5ad.py
+++ b/bin/mtx_to_h5ad.py
@@ -4,15 +4,15 @@
 import argparse
 
 
-def mtx_to_adata(
-    mtx_file: str, barcode_file: str, feature_file: str, sample: str, aligner: str, verbose: bool = False
-):
+def mtx_to_adata(mtx_file: str, barcode_file: str, feature_file: str, sample: str, aligner: str, verbose: bool = False):
 
     if verbose:
         print("Reading in {}".format(mtx_file))
 
     adata = sc.read_mtx(mtx_file)
-    if aligner == 'star': # for some reason star matrix comes transposed and doesn't fit when values are appended directly
+    if (
+        aligner == "star"
+    ):  # for some reason star matrix comes transposed and doesn't fit when values are appended directly
         adata = adata.transpose()
     adata.obs_names = pd.read_csv(barcode_file, header=None, sep="\t")[0].values
     adata.var_names = pd.read_csv(feature_file, header=None, sep="\t")[0].values
@@ -26,9 +26,7 @@ def mtx_to_adata(
     parser = argparse.ArgumentParser(description="Converts mtx output to h5ad.")
 
     parser.add_argument("-m", "--mtx", dest="mtx", help="Path to mtx file.")
-    parser.add_argument(
-        "-v", "--verbose", dest="verbose", help="Toggle verbose messages", default=False
-    )
+    parser.add_argument("-v", "--verbose", dest="verbose", help="Toggle verbose messages", default=False)
     parser.add_argument("-f", "--feature", dest="feature", help="Path to feature file.")
     parser.add_argument("-b", "--barcode", dest="barcode", help="Path to barcode file.")
     parser.add_argument("-s", "--sample", dest="sample", help="Sample name")
@@ -38,7 +36,7 @@ def mtx_to_adata(
     args = vars(parser.parse_args())
 
     adata = mtx_to_adata(
-        args["mtx"], args["barcode"], args["feature"], args["sample"], args["aligner"],verbose=args["verbose"]
+        args["mtx"], args["barcode"], args["feature"], args["sample"], args["aligner"], verbose=args["verbose"]
     )
 
     adata.write_h5ad(args["out"], compression="gzip")
diff --git a/bin/t2g.py b/bin/t2g.py
index 6419dd1d..38930cea 100755
--- a/bin/t2g.py
+++ b/bin/t2g.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python
-#This was downloaded on 2019-06-23 from https://github.com/bustools/getting_started/releases/
-#All credit goes to the original authors from the Kallisto/BUStools team!
+# This was downloaded on 2019-06-23 from https://github.com/bustools/getting_started/releases/
+# All credit goes to the original authors from the Kallisto/BUStools team!
 # BSD 2-Clause License
 #
 # Copyright (c) 2017, Nicolas Bray, Harold Pimentel, Páll Melsted and Lior Pachter
@@ -29,43 +29,43 @@
 
 import sys, argparse
 
-def create_transcript_list(input, use_name = True, use_version = False):
+
+def create_transcript_list(input, use_name=True, use_version=False):
     r = {}
     for line in input:
-        if len(line) == 0 or line[0] == '#':
+        if len(line) == 0 or line[0] == "#":
             continue
-        l = line.strip().split('\t')
-        if l[2] == 'transcript':
+        l = line.strip().split("\t")
+        if l[2] == "transcript":
             info = l[8]
             d = {}
-            for x in info.split('; '):
+            for x in info.split("; "):
                 x = x.strip()
-                p = x.find(' ')
+                p = x.find(" ")
                 if p == -1:
                     continue
                 k = x[:p]
-                p = x.find('"',p)
-                p2 = x.find('"',p+1)
-                v = x[p+1:p2]
+                p = x.find('"', p)
+                p2 = x.find('"', p + 1)
+                v = x[p + 1 : p2]
                 d[k] = v
 
-
-            if 'transcript_id' not in d or 'gene_id' not in d:
+            if "transcript_id" not in d or "gene_id" not in d:
                 continue
 
-            tid = d['transcript_id'].split(".")[0]
-            gid = d['gene_id'].split(".")[0]
+            tid = d["transcript_id"].split(".")[0]
+            gid = d["gene_id"].split(".")[0]
             if use_version:
-                if 'transcript_version' not in d or 'gene_version' not in d:
+                if "transcript_version" not in d or "gene_version" not in d:
                     continue
 
-                tid += '.' + d['transcript_version']
-                gid += '.' + d['gene_version']
+                tid += "." + d["transcript_version"]
+                gid += "." + d["gene_version"]
             gname = None
             if use_name:
-                if 'gene_name' not in d:
+                if "gene_name" not in d:
                     continue
-                gname = d['gene_name']
+                gname = d["gene_name"]
 
             if tid in r:
                 continue
@@ -74,26 +74,27 @@ def create_transcript_list(input, use_name = True, use_version = False):
     return r
 
 
-
-def print_output(output, r, use_name = True):
+def print_output(output, r, use_name=True):
     for tid in r:
         if use_name:
-            output.write("%s\t%s\t%s\n"%(tid, r[tid][0], r[tid][1]))
+            output.write("%s\t%s\t%s\n" % (tid, r[tid][0], r[tid][1]))
         else:
-            output.write("%s\t%s\n"%(tid, r[tid][0]))
+            output.write("%s\t%s\n" % (tid, r[tid][0]))
 
 
 if __name__ == "__main__":
 
-
-    parser = argparse.ArgumentParser(add_help=True, description='Creates transcript to gene info from GTF files\nreads from standard input and writes to standard output')
-    parser.add_argument('--use_version', '-v', action='store_true', help='Use version numbers in transcript and gene ids')
-    parser.add_argument('--skip_gene_names', '-s', action='store_true', help='Do not output gene names')
+    parser = argparse.ArgumentParser(
+        add_help=True,
+        description="Creates transcript to gene info from GTF files\nreads from standard input and writes to standard output",
+    )
+    parser.add_argument(
+        "--use_version", "-v", action="store_true", help="Use version numbers in transcript and gene ids"
+    )
+    parser.add_argument("--skip_gene_names", "-s", action="store_true", help="Do not output gene names")
     args = parser.parse_args()
 
-
-
     input = sys.stdin
-    r = create_transcript_list(input, use_name = not args.skip_gene_names, use_version = args.use_version)
+    r = create_transcript_list(input, use_name=not args.skip_gene_names, use_version=args.use_version)
     output = sys.stdout
     print_output(output, r)

From e000e29482ce4b1eab924b1136ae6edc7c25089e Mon Sep 17 00:00:00 2001
From: Harshil Patel <drpatelhh@gmail.com>
Date: Mon, 12 Sep 2022 14:01:30 +0100
Subject: [PATCH 102/165] Update CHANGELOG

---
 CHANGELOG.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 745a987b..a3fd4dab 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -11,6 +11,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Fixed matrix conversion error when running STAR with --soloFeatures GeneFull [#135](https://github.com/nf-core/scrnaseq/pull/135)
 - Fixed seurat matrix conversion error when running with conda profile [#136](https://github.com/nf-core/scrnaseq/pull/136)
 - Fixed Kallistobustools module [#116](https://github.com/nf-core/scrnaseq/issues/116). By updating nf-core module and making sure conversion modules take into account the different outputs produced by kallisto standard and non-standard workflows.
+- Updated pipeline template to [nf-core/tools 2.5.1](https://github.com/nf-core/tools/releases/tag/2.5.1)
 
 ## v2.0.0 - 2022-06-17 "Gray Nickel Beagle"
 

From ad4250d1819b459319d45e94203f67224d36ecd0 Mon Sep 17 00:00:00 2001
From: Rob Syme <rob.syme@gmail.com>
Date: Mon, 12 Sep 2022 17:14:09 -0400
Subject: [PATCH 103/165] Add missing python function in samplesheet check

---
 bin/check_samplesheet.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py
index 58a13e4a..cf567698 100755
--- a/bin/check_samplesheet.py
+++ b/bin/check_samplesheet.py
@@ -14,6 +14,15 @@
 
 logger = logging.getLogger()
 
+def print_error(error, context="Line", context_str=""):
+    error_str = "ERROR: Please check samplesheet -> {}".format(error)
+    if context != "" and context_str != "":
+        error_str = "ERROR: Please check samplesheet -> {}\n{}: '{}'".format(
+            error, context.strip(), context_str.strip()
+        )
+    print(error_str)
+    sys.exit(1)
+
 
 class RowChecker:
     """

From 88711d9334430e893d8171dd9de63469558df18f Mon Sep 17 00:00:00 2001
From: Rob Syme <rob.syme@gmail.com>
Date: Mon, 12 Sep 2022 17:14:48 -0400
Subject: [PATCH 104/165] Correct input parameter help text. Input is
 samplesheet not fastq

---
 nextflow_schema.json | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/nextflow_schema.json b/nextflow_schema.json
index 91757ae4..98f5421f 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -15,9 +15,10 @@
                 "input": {
                     "type": "string",
                     "mimetype": "text/csv",
+                    "pattern": "^\\S+\\.csv$",
                     "fa_icon": "fas fa-dna",
-                    "description": "Input FastQ files",
-                    "help_text": "Use this to specify the location of your input FastQ files. For example:\n\n```bash\n--input 'path/to/data/sample_*_{1,2}.fastq'\n```\n\nPlease note the following requirements:\n\n1. The path must be enclosed in quotes\n2. The path must have at least one `*` wildcard character\n3. When using the pipeline with paired end data, the path must use `{1,2}` notation to specify read pairs.\n\nIf left unspecified, a default pattern is used: `data/*{1,2}.fastq.gz`"
+                    "description": "Path to comma-separated file containing information about the samples in the experiment.",
+                    "help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 4 columns, and a header row. See [usage docs](https://nf-co.re/rnaseq/usage#samplesheet-input)."
                 },
                 "outdir": {
                     "type": "string",

From 54730cbc8de62759104368ccc6730ffedaa1009b Mon Sep 17 00:00:00 2001
From: Rob Syme <rob.syme@gmail.com>
Date: Mon, 12 Sep 2022 22:29:50 -0400
Subject: [PATCH 105/165] Add GTF_GENE_FILTER to remove extraneous features

It is possible to use a GTF file that has annotations on non-chromosomal
sequences. If these non-chromosomal features are absent from the
genome_fasta provided to the pipeline, some downstream tools will break.

Here we remove any features that do not have a matching source
sequence in the genome_fasta.
---
 bin/filter_gtf_for_genes_in_genome.py | 86 +++++++++++++++++++++++++++
 modules/local/gtf_gene_filter.nf      | 31 ++++++++++
 workflows/scrnaseq.nf                 | 12 ++--
 3 files changed, 124 insertions(+), 5 deletions(-)
 create mode 100755 bin/filter_gtf_for_genes_in_genome.py
 create mode 100644 modules/local/gtf_gene_filter.nf

diff --git a/bin/filter_gtf_for_genes_in_genome.py b/bin/filter_gtf_for_genes_in_genome.py
new file mode 100755
index 00000000..c7111c90
--- /dev/null
+++ b/bin/filter_gtf_for_genes_in_genome.py
@@ -0,0 +1,86 @@
+#!/usr/bin/env python
+from __future__ import print_function
+import logging
+from itertools import groupby
+import argparse
+
+# Create a logger
+logging.basicConfig(format="%(name)s - %(asctime)s %(levelname)s: %(message)s")
+logger = logging.getLogger(__file__)
+logger.setLevel(logging.INFO)
+
+
+def is_header(line):
+    return line[0] == ">"
+
+
+def extract_fasta_seq_names(fasta_name):
+    """
+    modified from Brent Pedersen
+    Correct Way To Parse A Fasta File In Python
+    given a fasta file. yield tuples of header, sequence
+    from https://www.biostars.org/p/710/
+    """
+    # first open the file outside
+    fh = open(fasta_name)
+
+    # ditch the boolean (x[0]) and just keep the header or sequence since
+    # we know they alternate.
+    faiter = (x[1] for x in groupby(fh, is_header))
+
+    for i, header in enumerate(faiter):
+        line = next(header)
+        if is_header(line):
+            # drop the ">"
+            headerStr = line[1:].strip().split()[0]
+        yield headerStr
+
+
+def extract_genes_in_genome(fasta, gtf_in, gtf_out):
+    seq_names_in_genome = set(extract_fasta_seq_names(fasta))
+    logger.info("Extracted chromosome sequence names from : %s" % fasta)
+    logger.info(
+        "All chromosome names: " + ", ".join(sorted(x for x in seq_names_in_genome))
+    )
+    seq_names_in_gtf = set([])
+
+    n_total_lines = 0
+    n_lines_in_genome = 0
+    with open(gtf_out, "w") as f:
+        with open(gtf_in) as g:
+
+            for line in g.readlines():
+                n_total_lines += 1
+                seq_name_gtf = line.split("\t")[0]
+                seq_names_in_gtf.add(seq_name_gtf)
+                if seq_name_gtf in seq_names_in_genome:
+                    n_lines_in_genome += 1
+                    f.write(line)
+    logger.info(
+        "Extracted %d / %d lines from %s matching sequences in %s"
+        % (n_lines_in_genome, n_total_lines, gtf_in, fasta)
+    )
+    logger.info(
+        "All sequence IDs from GTF: " + ", ".join(sorted(x for x in seq_name_gtf))
+    )
+
+    logger.info("Wrote matching lines to %s" % gtf_out)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description="""Filter GTF only for features in the genome"""
+    )
+    parser.add_argument("--gtf", type=str, help="GTF file")
+    parser.add_argument("--fasta", type=str, help="Genome fasta file")
+    parser.add_argument(
+        "-o",
+        "--output",
+        dest="output",
+        default="genes_in_genome.gtf",
+        type=str,
+        help="GTF features on fasta genome sequences",
+    )
+
+    args = parser.parse_args()
+    extract_genes_in_genome(args.fasta, args.gtf, args.output)
\ No newline at end of file
diff --git a/modules/local/gtf_gene_filter.nf b/modules/local/gtf_gene_filter.nf
new file mode 100644
index 00000000..7f1a6aa1
--- /dev/null
+++ b/modules/local/gtf_gene_filter.nf
@@ -0,0 +1,31 @@
+process GTF_GENE_FILTER {
+    tag "$fasta"
+
+    conda (params.enable_conda ? "conda-forge::python=3.9.5" : null)
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/python:3.9--1' :
+        'quay.io/biocontainers/python:3.9--1' }"
+
+    input:
+    path fasta
+    path gtf
+
+    output:
+    path "*.gtf"       , emit: gtf
+    path "versions.yml", emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script: // filter_gtf_for_genes_in_genome.py is bundled with the pipeline, in nf-core/rnaseq/bin/
+    """
+    filter_gtf_for_genes_in_genome.py \\
+        --gtf $gtf \\
+        --fasta $fasta \\
+        -o ${fasta.baseName}_genes.gtf
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        python: \$(python --version | sed 's/Python //g')
+    END_VERSIONS
+    """
+}
diff --git a/workflows/scrnaseq.nf b/workflows/scrnaseq.nf
index e377465f..3e557eb1 100644
--- a/workflows/scrnaseq.nf
+++ b/workflows/scrnaseq.nf
@@ -42,7 +42,7 @@ include { SCRNASEQ_ALEVIN   } from '../subworkflows/local/alevin'
 include { STARSOLO          } from '../subworkflows/local/starsolo'
 include { CELLRANGER_ALIGN  } from "../subworkflows/local/align_cellranger"
 include { MTX_CONVERSION    } from "../subworkflows/local/mtx_conversion"
-
+include { GTF_GENE_FILTER   } from '../modules/local/gtf_gene_filter'
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
     IMPORT NF-CORE MODULES/SUBWORKFLOWS
@@ -116,11 +116,13 @@ workflow SCRNASEQ {
       ch_multiqc_fastqc    = FASTQC_CHECK.out.fastqc_multiqc.ifEmpty([])
     }
 
+    ch_filter_gtf = GTF_GENE_FILTER ( ch_genome_fasta, ch_gtf ).gtf
+
     // Run kallisto bustools pipeline
     if (params.aligner == "kallisto") {
         KALLISTO_BUSTOOLS(
             ch_genome_fasta,
-            ch_gtf,
+            ch_filter_gtf,
             ch_kallisto_index,
             ch_txp2gene,
             protocol,
@@ -136,7 +138,7 @@ workflow SCRNASEQ {
     if (params.aligner == "alevin") {
         SCRNASEQ_ALEVIN(
             ch_genome_fasta,
-            ch_gtf,
+            ch_filter_gtf,
             ch_transcript_fasta,
             ch_salmon_index,
             ch_txp2gene,
@@ -154,7 +156,7 @@ workflow SCRNASEQ {
     if (params.aligner == "star") {
         STARSOLO(
             ch_genome_fasta,
-            ch_gtf,
+            ch_filter_gtf,
             ch_star_index,
             protocol,
             ch_barcode_whitelist,
@@ -170,7 +172,7 @@ workflow SCRNASEQ {
     if (params.aligner == "cellranger") {
         CELLRANGER_ALIGN(
             ch_genome_fasta,
-            ch_gtf,
+            ch_filter_gtf,
             ch_cellranger_index,
             ch_fastq
         )

From 2235b9bb38a9adfd98a8db65fde1741f9c7c36c2 Mon Sep 17 00:00:00 2001
From: Rob Syme <rob.syme@gmail.com>
Date: Mon, 12 Sep 2022 22:32:43 -0400
Subject: [PATCH 106/165] Patch to remove transcript small sequences

Salmon index will fail if there are transcripts provided that are
smaller than k.
---
 modules.json                                  |  3 +-
 modules/nf-core/modules/salmon/index/main.nf  | 11 ++++--
 .../modules/salmon/index/salmon-index.diff    | 36 +++++++++++++++++++
 3 files changed, 47 insertions(+), 3 deletions(-)
 create mode 100644 modules/nf-core/modules/salmon/index/salmon-index.diff

diff --git a/modules.json b/modules.json
index 0b04f799..e72473dd 100644
--- a/modules.json
+++ b/modules.json
@@ -47,7 +47,8 @@
                 },
                 "salmon/index": {
                     "branch": "master",
-                    "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
+                    "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d",
+                    "patch": "modules/nf-core/modules/salmon/index/salmon-index.diff"
                 },
                 "salmon/quant": {
                     "branch": "master",
diff --git a/modules/nf-core/modules/salmon/index/main.nf b/modules/nf-core/modules/salmon/index/main.nf
index 737087f9..d875a345 100644
--- a/modules/nf-core/modules/salmon/index/main.nf
+++ b/modules/nf-core/modules/salmon/index/main.nf
@@ -20,21 +20,28 @@ process SALMON_INDEX {
 
     script:
     def args = task.ext.args ?: ''
+    def kmer_argmatch = args =~ /\-k *(\d+)/
+    def k = kmer_argmatch ? kmer_argmatch[0][1] : 31
     def get_decoy_ids = "grep '^>' $genome_fasta | cut -d ' ' -f 1 > decoys.txt"
     def gentrome      = "gentrome.fa"
+    def maybe_unzip   = "cat"
     if (genome_fasta.endsWith('.gz')) {
         get_decoy_ids = "grep '^>' <(gunzip -c $genome_fasta) | cut -d ' ' -f 1 > decoys.txt"
         gentrome      = "gentrome.fa.gz"
+        maybe_unzip   = "gunzip -c" 
     }
     """
     $get_decoy_ids
     sed -i.bak -e 's/>//g' decoys.txt
-    cat $transcript_fasta $genome_fasta > $gentrome
+    cat $transcript_fasta $genome_fasta \\
+    | $maybe_unzip \\
+    | awk '!/^>/ { next } { getline seq } length(seq) >= $k { print \$0 "\\n" seq }' \\
+    | gzip -c > gentrome.filtered.fasta.gz
 
     salmon \\
         index \\
         --threads $task.cpus \\
-        -t $gentrome \\
+        -t gentrome.filtered.fasta.gz \\
         -d decoys.txt \\
         $args \\
         -i salmon
diff --git a/modules/nf-core/modules/salmon/index/salmon-index.diff b/modules/nf-core/modules/salmon/index/salmon-index.diff
new file mode 100644
index 00000000..87f976cd
--- /dev/null
+++ b/modules/nf-core/modules/salmon/index/salmon-index.diff
@@ -0,0 +1,36 @@
+Changes in module 'nf-core/modules/salmon/index'
+--- modules/nf-core/modules/salmon/index/main.nf
++++ modules/nf-core/modules/salmon/index/main.nf
+@@ -20,21 +20,28 @@
+ 
+     script:
+     def args = task.ext.args ?: ''
++    def kmer_argmatch = args =~ /\-k *(\d+)/
++    def k = kmer_argmatch ? kmer_argmatch[0][1] : 31
+     def get_decoy_ids = "grep '^>' $genome_fasta | cut -d ' ' -f 1 > decoys.txt"
+     def gentrome      = "gentrome.fa"
++    def maybe_unzip   = "cat"
+     if (genome_fasta.endsWith('.gz')) {
+         get_decoy_ids = "grep '^>' <(gunzip -c $genome_fasta) | cut -d ' ' -f 1 > decoys.txt"
+         gentrome      = "gentrome.fa.gz"
++        maybe_unzip   = "gunzip -c" 
+     }
+     """
+     $get_decoy_ids
+     sed -i.bak -e 's/>//g' decoys.txt
+-    cat $transcript_fasta $genome_fasta > $gentrome
++    cat $transcript_fasta $genome_fasta \\
++    | $maybe_unzip \\
++    | awk '!/^>/ { next } { getline seq } length(seq) >= $k { print \$0 "\\n" seq }' \\
++    | gzip -c > gentrome.filtered.fasta.gz
+ 
+     salmon \\
+         index \\
+         --threads $task.cpus \\
+-        -t $gentrome \\
++        -t gentrome.filtered.fasta.gz \\
+         -d decoys.txt \\
+         $args \\
+         -i salmon
+
+************************************************************

From 66d2e2f5bd1818693d5023f4799651956467442b Mon Sep 17 00:00:00 2001
From: vjmarteau <valentinmarteau7@gmail.com>
Date: Thu, 22 Sep 2022 10:24:13 +0200
Subject: [PATCH 107/165] Add print_error function

---
 bin/check_samplesheet.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py
index d98bdaa3..4a6496bb 100755
--- a/bin/check_samplesheet.py
+++ b/bin/check_samplesheet.py
@@ -139,6 +139,12 @@ def read_head(handle, num_lines=10):
         lines.append(line)
     return "".join(lines)
 
+def print_error(error, context="Line", context_str=""):
+    error_str = f"ERROR: Please check samplesheet -> {error}"
+    if context != "" and context_str != "":
+        error_str = f"ERROR: Please check samplesheet -> {error}\n{context.strip()}: '{context_str.strip()}'"
+    print(error_str)
+    sys.exit(1)
 
 def sniff_format(handle):
     """

From 4835d9de4dd04464d462daea8f6a28aa236c6e62 Mon Sep 17 00:00:00 2001
From: Alexander Peltzer <alex.peltzer@gmail.com>
Date: Thu, 22 Sep 2022 10:45:47 +0000
Subject: [PATCH 108/165] Fix linting

---
 bin/check_samplesheet.py | 31 +++++++++++----
 bin/t2g.py               | 70 ++++++++++++++++++---------------
 modules.json             | 85 +++++++++++++++++++++++-----------------
 3 files changed, 113 insertions(+), 73 deletions(-)

diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py
index d98bdaa3..4e72568f 100755
--- a/bin/check_samplesheet.py
+++ b/bin/check_samplesheet.py
@@ -86,7 +86,9 @@ def _validate_sample(self, row):
 
     def _validate_first(self, row):
         """Assert that the first FASTQ entry is non-empty and has the right format."""
-        assert len(row[self._first_col]) > 0, "At least the first FASTQ file is required."
+        assert (
+            len(row[self._first_col]) > 0
+        ), "At least the first FASTQ file is required."
         self._validate_fastq_format(row[self._first_col])
 
     def _validate_second(self, row):
@@ -99,7 +101,8 @@ def _validate_pair(self, row):
         if row[self._first_col] and row[self._second_col]:
             row[self._single_col] = False
             assert (
-                Path(row[self._first_col]).suffixes[-2:] == Path(row[self._second_col]).suffixes[-2:]
+                Path(row[self._first_col]).suffixes[-2:]
+                == Path(row[self._second_col]).suffixes[-2:]
             ), "FASTQ pairs must have the same file extensions."
         else:
             row[self._single_col] = True
@@ -119,7 +122,9 @@ def validate_unique_samples(self):
         FASTQ file combination exists.
 
         """
-        assert len(self._seen) == len(self.modified), "The pair of sample name and FASTQ must be unique."
+        assert len(self._seen) == len(
+            self.modified
+        ), "The pair of sample name and FASTQ must be unique."
         if len({pair[0] for pair in self._seen}) < len(self._seen):
             counts = Counter(pair[0] for pair in self._seen)
             seen = Counter()
@@ -200,7 +205,11 @@ def check_samplesheet(file_in, file_out):
         HEADER = ["sample", "fastq_1", "fastq_2"]
         header = [x.strip('"') for x in fin.readline().strip().split(",")]
         if header[: len(HEADER)] != HEADER:
-            print("ERROR: Please check samplesheet header -> {} != {}".format(",".join(header), ",".join(HEADER)))
+            print(
+                "ERROR: Please check samplesheet header -> {} != {}".format(
+                    ",".join(header), ",".join(HEADER)
+                )
+            )
             sys.exit(1)
 
         ## Check sample entries
@@ -217,7 +226,9 @@ def check_samplesheet(file_in, file_out):
             num_cols = len([x for x in lspl if x])
             if num_cols < MIN_COLS:
                 print_error(
-                    "Invalid number of populated columns (minimum = {})!".format(MIN_COLS),
+                    "Invalid number of populated columns (minimum = {})!".format(
+                        MIN_COLS
+                    ),
                     "Line",
                     line,
                 )
@@ -266,8 +277,14 @@ def check_samplesheet(file_in, file_out):
             for sample in sorted(sample_mapping_dict.keys()):
 
                 ## Check that multiple runs of the same sample are of the same datatype
-                if not all(x[0] == sample_mapping_dict[sample][0][0] for x in sample_mapping_dict[sample]):
-                    print_error("Multiple runs of a sample must be of the same datatype!", "Sample: {}".format(sample))
+                if not all(
+                    x[0] == sample_mapping_dict[sample][0][0]
+                    for x in sample_mapping_dict[sample]
+                ):
+                    print_error(
+                        "Multiple runs of a sample must be of the same datatype!",
+                        "Sample: {}".format(sample),
+                    )
 
                 for idx, val in enumerate(sample_mapping_dict[sample]):
                     fout.write(",".join(["{}".format(sample)] + val) + "\n")
diff --git a/bin/t2g.py b/bin/t2g.py
index 6419dd1d..5daf3df5 100755
--- a/bin/t2g.py
+++ b/bin/t2g.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python
-#This was downloaded on 2019-06-23 from https://github.com/bustools/getting_started/releases/
-#All credit goes to the original authors from the Kallisto/BUStools team!
+# This was downloaded on 2019-06-23 from https://github.com/bustools/getting_started/releases/
+# All credit goes to the original authors from the Kallisto/BUStools team!
 # BSD 2-Clause License
 #
 # Copyright (c) 2017, Nicolas Bray, Harold Pimentel, Páll Melsted and Lior Pachter
@@ -29,43 +29,43 @@
 
 import sys, argparse
 
-def create_transcript_list(input, use_name = True, use_version = False):
+
+def create_transcript_list(input, use_name=True, use_version=False):
     r = {}
     for line in input:
-        if len(line) == 0 or line[0] == '#':
+        if len(line) == 0 or line[0] == "#":
             continue
-        l = line.strip().split('\t')
-        if l[2] == 'transcript':
+        l = line.strip().split("\t")
+        if l[2] == "transcript":
             info = l[8]
             d = {}
-            for x in info.split('; '):
+            for x in info.split("; "):
                 x = x.strip()
-                p = x.find(' ')
+                p = x.find(" ")
                 if p == -1:
                     continue
                 k = x[:p]
-                p = x.find('"',p)
-                p2 = x.find('"',p+1)
-                v = x[p+1:p2]
+                p = x.find('"', p)
+                p2 = x.find('"', p + 1)
+                v = x[p + 1 : p2]
                 d[k] = v
 
-
-            if 'transcript_id' not in d or 'gene_id' not in d:
+            if "transcript_id" not in d or "gene_id" not in d:
                 continue
 
-            tid = d['transcript_id'].split(".")[0]
-            gid = d['gene_id'].split(".")[0]
+            tid = d["transcript_id"].split(".")[0]
+            gid = d["gene_id"].split(".")[0]
             if use_version:
-                if 'transcript_version' not in d or 'gene_version' not in d:
+                if "transcript_version" not in d or "gene_version" not in d:
                     continue
 
-                tid += '.' + d['transcript_version']
-                gid += '.' + d['gene_version']
+                tid += "." + d["transcript_version"]
+                gid += "." + d["gene_version"]
             gname = None
             if use_name:
-                if 'gene_name' not in d:
+                if "gene_name" not in d:
                     continue
-                gname = d['gene_name']
+                gname = d["gene_name"]
 
             if tid in r:
                 continue
@@ -74,26 +74,34 @@ def create_transcript_list(input, use_name = True, use_version = False):
     return r
 
 
-
-def print_output(output, r, use_name = True):
+def print_output(output, r, use_name=True):
     for tid in r:
         if use_name:
-            output.write("%s\t%s\t%s\n"%(tid, r[tid][0], r[tid][1]))
+            output.write("%s\t%s\t%s\n" % (tid, r[tid][0], r[tid][1]))
         else:
-            output.write("%s\t%s\n"%(tid, r[tid][0]))
+            output.write("%s\t%s\n" % (tid, r[tid][0]))
 
 
 if __name__ == "__main__":
 
-
-    parser = argparse.ArgumentParser(add_help=True, description='Creates transcript to gene info from GTF files\nreads from standard input and writes to standard output')
-    parser.add_argument('--use_version', '-v', action='store_true', help='Use version numbers in transcript and gene ids')
-    parser.add_argument('--skip_gene_names', '-s', action='store_true', help='Do not output gene names')
+    parser = argparse.ArgumentParser(
+        add_help=True,
+        description="Creates transcript to gene info from GTF files\nreads from standard input and writes to standard output",
+    )
+    parser.add_argument(
+        "--use_version",
+        "-v",
+        action="store_true",
+        help="Use version numbers in transcript and gene ids",
+    )
+    parser.add_argument(
+        "--skip_gene_names", "-s", action="store_true", help="Do not output gene names"
+    )
     args = parser.parse_args()
 
-
-
     input = sys.stdin
-    r = create_transcript_list(input, use_name = not args.skip_gene_names, use_version = args.use_version)
+    r = create_transcript_list(
+        input, use_name=not args.skip_gene_names, use_version=args.use_version
+    )
     output = sys.stdout
     print_output(output, r)
diff --git a/modules.json b/modules.json
index b0f8bb72..e18acd52 100644
--- a/modules.json
+++ b/modules.json
@@ -3,41 +3,56 @@
     "homePage": "https://github.com/nf-core/scrnaseq",
     "repos": {
         "nf-core/modules": {
-            "cellranger/count": {
-                "git_sha": "8032833e6e0fd3edeb20d4121885112249f273c4"
-            },
-            "cellranger/mkgtf": {
-                "git_sha": "e499abe572bc7c1322dfa66b767b0e65fe8c62ca"
-            },
-            "cellranger/mkref": {
-                "git_sha": "61d87bb2bcbcd058e1af6e5f14bb821b8ae4ed3e"
-            },
-            "custom/dumpsoftwareversions": {
-                "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
-            },
-            "fastqc": {
-                "git_sha": "49b18b1639f4f7104187058866a8fab33332bdfe"
-            },
-            "gffread": {
-                "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
-            },
-            "gunzip": {
-                "git_sha": "9aadd9a6d3f5964476582319b3a1c54a3e3fe7c9"
-            },
-            "kallistobustools/ref": {
-                "git_sha": "e20e57f90b6787ac9a010a980cf6ea98bd990046"
-            },
-            "multiqc": {
-                "git_sha": "e5f8924fabf4c8380f55fb7aee89fd2c268161b1"
-            },
-            "salmon/index": {
-                "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
-            },
-            "salmon/quant": {
-                "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
-            },
-            "star/genomegenerate": {
-                "git_sha": "fb6c7bca3d55c19a793372513395e3a567bdd7ba"
+            "git_url": "https://github.com/nf-core/modules.git",
+            "modules": {
+                "cellranger/count": {
+                    "branch": "master",
+                    "git_sha": "8032833e6e0fd3edeb20d4121885112249f273c4"
+                },
+                "cellranger/mkgtf": {
+                    "branch": "master",
+                    "git_sha": "e499abe572bc7c1322dfa66b767b0e65fe8c62ca"
+                },
+                "cellranger/mkref": {
+                    "branch": "master",
+                    "git_sha": "61d87bb2bcbcd058e1af6e5f14bb821b8ae4ed3e"
+                },
+                "custom/dumpsoftwareversions": {
+                    "branch": "master",
+                    "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
+                },
+                "fastqc": {
+                    "branch": "master",
+                    "git_sha": "49b18b1639f4f7104187058866a8fab33332bdfe"
+                },
+                "gffread": {
+                    "branch": "master",
+                    "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
+                },
+                "gunzip": {
+                    "branch": "master",
+                    "git_sha": "9aadd9a6d3f5964476582319b3a1c54a3e3fe7c9"
+                },
+                "kallistobustools/ref": {
+                    "branch": "master",
+                    "git_sha": "e20e57f90b6787ac9a010a980cf6ea98bd990046"
+                },
+                "multiqc": {
+                    "branch": "master",
+                    "git_sha": "e5f8924fabf4c8380f55fb7aee89fd2c268161b1"
+                },
+                "salmon/index": {
+                    "branch": "master",
+                    "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
+                },
+                "salmon/quant": {
+                    "branch": "master",
+                    "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
+                },
+                "star/genomegenerate": {
+                    "branch": "master",
+                    "git_sha": "fb6c7bca3d55c19a793372513395e3a567bdd7ba"
+                }
             }
         }
     }

From ba45b13bfe405f192735fed3499074665e37cb91 Mon Sep 17 00:00:00 2001
From: EC2 Default User <ec2-user@ip-172-31-11-186.ec2.internal>
Date: Thu, 22 Sep 2022 14:47:00 +0000
Subject: [PATCH 109/165] Linting fix

---
 bin/check_samplesheet.py              | 23 +++++++++++++----------
 bin/filter_gtf_for_genes_in_genome.py | 17 +++++------------
 2 files changed, 18 insertions(+), 22 deletions(-)

diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py
index cf567698..77a22f17 100755
--- a/bin/check_samplesheet.py
+++ b/bin/check_samplesheet.py
@@ -14,15 +14,6 @@
 
 logger = logging.getLogger()
 
-def print_error(error, context="Line", context_str=""):
-    error_str = "ERROR: Please check samplesheet -> {}".format(error)
-    if context != "" and context_str != "":
-        error_str = "ERROR: Please check samplesheet -> {}\n{}: '{}'".format(
-            error, context.strip(), context_str.strip()
-        )
-    print(error_str)
-    sys.exit(1)
-
 
 class RowChecker:
     """
@@ -150,6 +141,16 @@ def read_head(handle, num_lines=10):
     return "".join(lines)
 
 
+def print_error(error, context="Line", context_str=""):
+    error_str = "ERROR: Please check samplesheet -> {}".format(error)
+    if context != "" and context_str != "":
+        error_str = "ERROR: Please check samplesheet -> {}\n{}: '{}'".format(
+            error, context.strip(), context_str.strip()
+        )
+    print(error_str)
+    sys.exit(1)
+
+
 def sniff_format(handle):
     """
     Detect the tabular format.
@@ -210,7 +211,9 @@ def check_samplesheet(file_in, file_out):
         HEADER = ["sample", "fastq_1", "fastq_2"]
         header = [x.strip('"') for x in fin.readline().strip().split(",")]
         if header[: len(HEADER)] != HEADER:
-            print("ERROR: Please check samplesheet header -> {} != {}".format(",".join(header), ",".join(HEADER)))
+            given = ",".join(header)
+            wanted = ",".join(HEADER)
+            print(f"ERROR: Please check samplesheet header -> {given} != {wanted}")
             sys.exit(1)
 
         ## Check sample entries
diff --git a/bin/filter_gtf_for_genes_in_genome.py b/bin/filter_gtf_for_genes_in_genome.py
index c7111c90..ef4c87cd 100755
--- a/bin/filter_gtf_for_genes_in_genome.py
+++ b/bin/filter_gtf_for_genes_in_genome.py
@@ -39,9 +39,7 @@ def extract_fasta_seq_names(fasta_name):
 def extract_genes_in_genome(fasta, gtf_in, gtf_out):
     seq_names_in_genome = set(extract_fasta_seq_names(fasta))
     logger.info("Extracted chromosome sequence names from : %s" % fasta)
-    logger.info(
-        "All chromosome names: " + ", ".join(sorted(x for x in seq_names_in_genome))
-    )
+    logger.info("All chromosome names: " + ", ".join(sorted(x for x in seq_names_in_genome)))
     seq_names_in_gtf = set([])
 
     n_total_lines = 0
@@ -57,20 +55,15 @@ def extract_genes_in_genome(fasta, gtf_in, gtf_out):
                     n_lines_in_genome += 1
                     f.write(line)
     logger.info(
-        "Extracted %d / %d lines from %s matching sequences in %s"
-        % (n_lines_in_genome, n_total_lines, gtf_in, fasta)
-    )
-    logger.info(
-        "All sequence IDs from GTF: " + ", ".join(sorted(x for x in seq_name_gtf))
+        "Extracted %d / %d lines from %s matching sequences in %s" % (n_lines_in_genome, n_total_lines, gtf_in, fasta)
     )
+    logger.info("All sequence IDs from GTF: " + ", ".join(sorted(x for x in seq_name_gtf)))
 
     logger.info("Wrote matching lines to %s" % gtf_out)
 
 
 if __name__ == "__main__":
-    parser = argparse.ArgumentParser(
-        description="""Filter GTF only for features in the genome"""
-    )
+    parser = argparse.ArgumentParser(description="""Filter GTF only for features in the genome""")
     parser.add_argument("--gtf", type=str, help="GTF file")
     parser.add_argument("--fasta", type=str, help="Genome fasta file")
     parser.add_argument(
@@ -83,4 +76,4 @@ def extract_genes_in_genome(fasta, gtf_in, gtf_out):
     )
 
     args = parser.parse_args()
-    extract_genes_in_genome(args.fasta, args.gtf, args.output)
\ No newline at end of file
+    extract_genes_in_genome(args.fasta, args.gtf, args.output)

From 1ec928a28d0f9b33db1bd1d05a82806470607ad2 Mon Sep 17 00:00:00 2001
From: Felipe Marques de Almeida <felipemarques89@gmail.com>
Date: Mon, 26 Sep 2022 09:01:33 +0000
Subject: [PATCH 110/165] update alevinQC packages and module

---
 modules/local/alevinqc.nf                      | 18 ++++++++++++------
 modules/local/mtx_to_h5ad.nf                   |  6 +++---
 modules/local/mtx_to_seurat.nf                 |  6 +++---
 .../execution_trace_2022-09-26_07-01-32.txt    |  1 +
 4 files changed, 19 insertions(+), 12 deletions(-)
 create mode 100644 test/null/pipeline_info/execution_trace_2022-09-26_07-01-32.txt

diff --git a/modules/local/alevinqc.nf b/modules/local/alevinqc.nf
index 1f37d209..c56eb6a6 100644
--- a/modules/local/alevinqc.nf
+++ b/modules/local/alevinqc.nf
@@ -4,8 +4,8 @@ process ALEVINQC {
 
     conda (params.enable_conda ? "bioconda::bioconductor-alevinqc=1.6.1" : null)
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/bioconductor-alevinqc:1.10.0--r41hdfd78af_0' :
-        'quay.io/biocontainers/bioconductor-alevinqc:1.10.0--r41hdfd78af_0' }"
+        'https://depot.galaxyproject.org/singularity/bioconductor-alevinqc:1.12.1--r41h9f5acd7_0' :
+        'quay.io/biocontainers/bioconductor-alevinqc:1.12.1--r41h9f5acd7_0' }"
 
     input:
     tuple val(meta), path(alevin_results)
@@ -19,10 +19,16 @@ process ALEVINQC {
     """
     #!/usr/bin/env Rscript
     require(alevinQC)
-    alevinQCReport(baseDir = "${alevin_results}", sampleId = "${prefix}",
-                outputFile = "alevin_report_${meta.id}.html",
-                outputFormat = "html_document",
-                outputDir = "./", forceOverwrite = TRUE)
+    alevinFryQCReport(
+        mapDir = "${alevin_results}/af_map",
+        quantDir = "${alevin_results}/af_quant",
+        permitDir= "${alevin_results}",
+        sampleId = "${prefix}",
+        outputFile = "alevin_report_${meta.id}.html",
+        outputFormat = "html_document",
+        outputDir = "./",
+        forceOverwrite = TRUE
+    )
 
     yaml::write_yaml(
         list(
diff --git a/modules/local/mtx_to_h5ad.nf b/modules/local/mtx_to_h5ad.nf
index 22a04191..cf753f30 100644
--- a/modules/local/mtx_to_h5ad.nf
+++ b/modules/local/mtx_to_h5ad.nf
@@ -22,9 +22,9 @@ process MTX_TO_H5AD {
         barcodes_tsv = "*count/counts_unfiltered/*.barcodes.txt"
         features_tsv = "*count/counts_unfiltered/*.genes.txt"
     } else if (params.aligner == 'alevin') {
-        mtx_matrix   = "*_alevin_results/alevin/quants_mat.mtx.gz"
-        barcodes_tsv = "*_alevin_results/alevin/quants_mat_rows.txt"
-        features_tsv = "*_alevin_results/alevin/quants_mat_cols.txt"
+        mtx_matrix   = "*_alevin_results/af_quant/alevin/quants_mat.mtx"
+        barcodes_tsv = "*_alevin_results/af_quant/alevin/quants_mat_rows.txt"
+        features_tsv = "*_alevin_results/af_quant/alevin/quants_mat_cols.txt"
     } else if (params.aligner == 'star') {
         mtx_matrix   = "*.Solo.out/Gene*/filtered/matrix.mtx.gz"
         barcodes_tsv = "*.Solo.out/Gene*/filtered/barcodes.tsv.gz"
diff --git a/modules/local/mtx_to_seurat.nf b/modules/local/mtx_to_seurat.nf
index e2aa8217..3d834a2f 100644
--- a/modules/local/mtx_to_seurat.nf
+++ b/modules/local/mtx_to_seurat.nf
@@ -26,9 +26,9 @@ process MTX_TO_SEURAT {
         barcodes = "*count/counts_unfiltered/*.barcodes.txt"
         features = "*count/counts_unfiltered/*.genes.txt"
     } else if (params.aligner == "alevin") {
-        matrix   = "*_alevin_results/alevin/quants_mat.mtx.gz"
-        barcodes = "*_alevin_results/alevin/quants_mat_rows.txt"
-        features = "*_alevin_results/alevin/quants_mat_cols.txt"
+        matrix   = "*_alevin_results/af_quant/alevin/quants_mat.mtx"
+        barcodes = "*_alevin_results/af_quant/alevin/quants_mat_rows.txt"
+        features = "*_alevin_results/af_quant/alevin/quants_mat_cols.txt"
     } else if (params.aligner == 'star') {
         matrix   = "*.Solo.out/Gene*/filtered/matrix.mtx.gz"
         barcodes = "*.Solo.out/Gene*/filtered/barcodes.tsv.gz"
diff --git a/test/null/pipeline_info/execution_trace_2022-09-26_07-01-32.txt b/test/null/pipeline_info/execution_trace_2022-09-26_07-01-32.txt
new file mode 100644
index 00000000..6b739acd
--- /dev/null
+++ b/test/null/pipeline_info/execution_trace_2022-09-26_07-01-32.txt
@@ -0,0 +1 @@
+task_id	hash	native_id	name	status	exit	submit	duration	realtime	%cpu	peak_rss	peak_vmem	rchar	wchar

From 73bfc063e85b16e1256d8fab678c2af6c4ee5adf Mon Sep 17 00:00:00 2001
From: Khajidu <Khajidu@users.noreply.github.com>
Date: Thu, 29 Sep 2022 12:56:53 +0200
Subject: [PATCH 111/165] Repair inconsistency

---
 modules/local/simpleaf_index.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/local/simpleaf_index.nf b/modules/local/simpleaf_index.nf
index 853f1f95..ef05c426 100644
--- a/modules/local/simpleaf_index.nf
+++ b/modules/local/simpleaf_index.nf
@@ -2,7 +2,7 @@ process SIMPLEAF_INDEX {
     tag "$transcript_gtf"
     label "process_medium"
 
-    conda (params.enable_conda ? 'bioconda::simpleaf=0.4.0' : null)
+    conda (params.enable_conda ? 'bioconda::simpleaf=0.5.1--h9f5acd7_0' : null)
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
         'https://depot.galaxyproject.org/singularity/simpleaf:0.5.1--h9f5acd7_0' :
         'quay.io/biocontainers/simpleaf:0.5.1--h9f5acd7_0' }"

From b753ab170b024f9635aab5feed21b33c4ce00c60 Mon Sep 17 00:00:00 2001
From: Khajidu <Khajidu@users.noreply.github.com>
Date: Thu, 29 Sep 2022 12:59:06 +0200
Subject: [PATCH 112/165] repair inconsistency

---
 modules/local/simpleaf_quant.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/local/simpleaf_quant.nf b/modules/local/simpleaf_quant.nf
index 7f14048e..3540a877 100644
--- a/modules/local/simpleaf_quant.nf
+++ b/modules/local/simpleaf_quant.nf
@@ -2,7 +2,7 @@ process SIMPLEAF_QUANT {
     tag "$meta.id"
     label 'process_high'
 
-    conda (params.enable_conda ? 'bioconda::simpleaf=0.4.0' : null)
+    conda (params.enable_conda ? 'bioconda::simpleaf=0.5.1--h9f5acd7_0' : null)
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
         'https://depot.galaxyproject.org/singularity/simpleaf:0.5.1--h9f5acd7_0' :
         'quay.io/biocontainers/simpleaf:0.5.1--h9f5acd7_0' }"

From c31554f856fd9a5cc793b676ffcbdda5ab1668b9 Mon Sep 17 00:00:00 2001
From: Khajidu <Khajidu@users.noreply.github.com>
Date: Thu, 29 Sep 2022 13:20:02 +0200
Subject: [PATCH 113/165] Update simpleaf_quant.nf

---
 modules/local/simpleaf_quant.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/local/simpleaf_quant.nf b/modules/local/simpleaf_quant.nf
index 3540a877..e1178598 100644
--- a/modules/local/simpleaf_quant.nf
+++ b/modules/local/simpleaf_quant.nf
@@ -2,7 +2,7 @@ process SIMPLEAF_QUANT {
     tag "$meta.id"
     label 'process_high'
 
-    conda (params.enable_conda ? 'bioconda::simpleaf=0.5.1--h9f5acd7_0' : null)
+    conda (params.enable_conda ? 'bioconda::simpleaf=0.5.1' : null)
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
         'https://depot.galaxyproject.org/singularity/simpleaf:0.5.1--h9f5acd7_0' :
         'quay.io/biocontainers/simpleaf:0.5.1--h9f5acd7_0' }"

From 13472570da5a657733e9dd86ebd4c7de0fd65494 Mon Sep 17 00:00:00 2001
From: Khajidu <Khajidu@users.noreply.github.com>
Date: Thu, 29 Sep 2022 13:20:30 +0200
Subject: [PATCH 114/165] Update simpleaf_index.nf

---
 modules/local/simpleaf_index.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/local/simpleaf_index.nf b/modules/local/simpleaf_index.nf
index ef05c426..2d3d7cb2 100644
--- a/modules/local/simpleaf_index.nf
+++ b/modules/local/simpleaf_index.nf
@@ -2,7 +2,7 @@ process SIMPLEAF_INDEX {
     tag "$transcript_gtf"
     label "process_medium"
 
-    conda (params.enable_conda ? 'bioconda::simpleaf=0.5.1--h9f5acd7_0' : null)
+    conda (params.enable_conda ? 'bioconda::simpleaf=0.5.1' : null)
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
         'https://depot.galaxyproject.org/singularity/simpleaf:0.5.1--h9f5acd7_0' :
         'quay.io/biocontainers/simpleaf:0.5.1--h9f5acd7_0' }"

From e172ecc16688f4422b07b906787aaa2a241c15f0 Mon Sep 17 00:00:00 2001
From: Alexander Peltzer <alex.peltzer@gmail.com>
Date: Thu, 29 Sep 2022 18:16:43 +0000
Subject: [PATCH 115/165] Also update conda for alevinqc !

---
 modules/local/alevinqc.nf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/local/alevinqc.nf b/modules/local/alevinqc.nf
index c56eb6a6..be8ae8cc 100644
--- a/modules/local/alevinqc.nf
+++ b/modules/local/alevinqc.nf
@@ -2,7 +2,7 @@ process ALEVINQC {
     tag "$meta.id"
     label 'process_low'
 
-    conda (params.enable_conda ? "bioconda::bioconductor-alevinqc=1.6.1" : null)
+    conda (params.enable_conda ? "bioconda::bioconductor-alevinqc=1.12.1" : null)
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
         'https://depot.galaxyproject.org/singularity/bioconductor-alevinqc:1.12.1--r41h9f5acd7_0' :
         'quay.io/biocontainers/bioconductor-alevinqc:1.12.1--r41h9f5acd7_0' }"

From 9f62dbafa65775ec69f84337429c35ba36878065 Mon Sep 17 00:00:00 2001
From: Felipe Marques de Almeida <felipemarques89@gmail.com>
Date: Thu, 29 Sep 2022 19:40:32 +0000
Subject: [PATCH 116/165] add orientation to module

---
 conf/modules.config             | 2 +-
 modules/local/alevinqc.nf       | 2 +-
 modules/local/simpleaf_quant.nf | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/conf/modules.config b/conf/modules.config
index 2b05f1e7..66584811 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -82,7 +82,7 @@ if (params.aligner == "alevin") {
                 path: { "${params.outdir}/${params.aligner}" },
                 mode: params.publish_dir_mode
             ]
-            ext.args = "-r cr-like"
+            ext.args = "-r cr-like -d fw"
         }
     }
 }
diff --git a/modules/local/alevinqc.nf b/modules/local/alevinqc.nf
index be8ae8cc..4832a2d6 100644
--- a/modules/local/alevinqc.nf
+++ b/modules/local/alevinqc.nf
@@ -22,7 +22,7 @@ process ALEVINQC {
     alevinFryQCReport(
         mapDir = "${alevin_results}/af_map",
         quantDir = "${alevin_results}/af_quant",
-        permitDir= "${alevin_results}",
+        permitDir= "${alevin_results}/af_quant",
         sampleId = "${prefix}",
         outputFile = "alevin_report_${meta.id}.html",
         outputFormat = "html_document",
diff --git a/modules/local/simpleaf_quant.nf b/modules/local/simpleaf_quant.nf
index e1178598..827e8991 100644
--- a/modules/local/simpleaf_quant.nf
+++ b/modules/local/simpleaf_quant.nf
@@ -23,7 +23,7 @@ process SIMPLEAF_QUANT {
     path  "versions.yml"                     , emit: versions
 
     script:
-    def args = task.ext.args ?: ''
+    def args   = task.ext.args ?: ''
     def prefix = task.ext.prefix ?: "${meta.id}"
 
     // separate forward from reverse pairs

From 520bc15da9f0025094936456d905afea7389b66e Mon Sep 17 00:00:00 2001
From: Rob Syme <rob.syme@gmail.com>
Date: Fri, 30 Sep 2022 03:02:29 +0000
Subject: [PATCH 117/165] Use iGenome for fasta and gtf when available

---
 README.md                               | 2 +-
 conf/test.config                        | 6 +++++-
 conf/test_full.config                   | 6 ++++--
 docs/usage.md                           | 2 +-
 lib/WorkflowMain.groovy                 | 6 +++++-
 lib/WorkflowScrnaseq.groovy             | 5 +++++
 main.nf                                 | 3 +++
 nextflow.config                         | 3 +--
 nextflow_schema.json                    | 2 +-
 subworkflows/local/alevin.nf            | 2 +-
 subworkflows/local/align_cellranger.nf  | 2 +-
 subworkflows/local/kallisto_bustools.nf | 2 +-
 subworkflows/local/starsolo.nf          | 2 +-
 workflows/scrnaseq.nf                   | 4 ++--
 14 files changed, 32 insertions(+), 15 deletions(-)

diff --git a/README.md b/README.md
index 5bd4ca6f..64e7419a 100644
--- a/README.md
+++ b/README.md
@@ -53,7 +53,7 @@ The nf-core/scrnaseq pipeline comes with documentation about the pipeline [usage
 4. Start running your own analysis!
 
    ```bash
-   nextflow run nf-core/scrnaseq --input samplesheet.csv --outdir <OUTDIR> --genome_fasta GRCm38.p6.genome.chr19.fa --gtf gencode.vM19.annotation.chr19.gtf --protocol 10XV2 --aligner <alevin/kallisto/star/cellranger> -profile <docker/singularity/podman/shifter/charliecloud/conda/institute>
+   nextflow run nf-core/scrnaseq --input samplesheet.csv --outdir <OUTDIR> --fasta GRCm38.p6.genome.chr19.fa --gtf gencode.vM19.annotation.chr19.gtf --protocol 10XV2 --aligner <alevin/kallisto/star/cellranger> -profile <docker/singularity/podman/shifter/charliecloud/conda/institute>
    ```
 
 ## Credits
diff --git a/conf/test.config b/conf/test.config
index 95d1ed1c..34111c6c 100644
--- a/conf/test.config
+++ b/conf/test.config
@@ -21,9 +21,13 @@ params {
 
     // Input data
     input        = 'https://github.com/nf-core/test-datasets/raw/scrnaseq/samplesheet-2-0.csv'
-    genome_fasta = 'https://github.com/nf-core/test-datasets/raw/scrnaseq/reference/GRCm38.p6.genome.chr19.fa'
+
+    // Genome references
+    fasta        = 'https://github.com/nf-core/test-datasets/raw/scrnaseq/reference/GRCm38.p6.genome.chr19.fa'
     gtf          = 'https://github.com/nf-core/test-datasets/raw/scrnaseq/reference/gencode.vM19.annotation.chr19.gtf'
+    aligner      = 'star'
     protocol     = '10XV2'
+
     // Ignore `--input` as otherwise the parameter validation will throw an error
     schema_ignore_params = 'genomes,input_paths,input'
 }
diff --git a/conf/test_full.config b/conf/test_full.config
index 033450cc..13c716d8 100644
--- a/conf/test_full.config
+++ b/conf/test_full.config
@@ -16,8 +16,10 @@ params {
 
     // Input data
     input        = 'https://raw.githubusercontent.com/nf-core/test-datasets/scrnaseq/samplesheet_2.0_full.csv'
-    genome_fasta = 's3://nf-core-awsmegatests/scrnaseq/input_data/Homo_sapiens.GRCh38.dna.primary_assembly.fa'
-    gtf          = 's3://nf-core-awsmegatests/scrnaseq/input_data/Homo_sapiens.GRCh38.106.gtf'
+
+    // Genome references
+    genome       = 'GRCh38'
+    aligner      = 'star'
     protocol     = '10XV2'
     schema_ignore_params = 'genomes'
 }
diff --git a/docs/usage.md b/docs/usage.md
index f0215070..a566804f 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -77,7 +77,7 @@ TEST1,TEST1_S1_L001_R1_001.fastq.gz,TEST1_S1_L001_R2_001.fastq.gz
 The minimum typical command for running the pipeline is as follows:
 
 ```bash
-nextflow run nf-core/scrnaseq --input 'samplesheet.csv' --genome_fasta human.fasta --gtf human.gtf -profile docker
+nextflow run nf-core/scrnaseq --input 'samplesheet.csv' --genome GRCh38 -profile docker
 ```
 
 This will launch the pipeline with the `docker` configuration profile and default `--type` and `--barcode_whitelist`. See below for more information about profiles and these options.
diff --git a/lib/WorkflowMain.groovy b/lib/WorkflowMain.groovy
index f81790bf..627cb63c 100755
--- a/lib/WorkflowMain.groovy
+++ b/lib/WorkflowMain.groovy
@@ -21,7 +21,7 @@ class WorkflowMain {
     // Print help to screen if required
     //
     public static String help(workflow, params, log) {
-        def command = "nextflow run ${workflow.manifest.name} --input samplesheet.csv --genome_fasta human.fasta --gtf human.gtf -profile docker"
+        def command = "nextflow run ${workflow.manifest.name} --input samplesheet.csv --fasta human.fasta --gtf human.gtf -profile docker"
         def help_string = ''
         help_string += NfcoreTemplate.logo(workflow, params.monochrome_logs)
         help_string += NfcoreSchema.paramsHelp(workflow, params, command)
@@ -85,7 +85,11 @@ class WorkflowMain {
         if (params.genomes && params.genome && params.genomes.containsKey(params.genome)) {
             if (params.genomes[ params.genome ].containsKey(attribute)) {
                 return params.genomes[ params.genome ][ attribute ]
+            } else {
+                println "Could not find attribute '$attribute'"
             }
+        } else {
+            println "Could not find genome"
         }
         return null
     }
diff --git a/lib/WorkflowScrnaseq.groovy b/lib/WorkflowScrnaseq.groovy
index af239a67..25c2b120 100755
--- a/lib/WorkflowScrnaseq.groovy
+++ b/lib/WorkflowScrnaseq.groovy
@@ -14,6 +14,11 @@ class WorkflowScrnaseq {
             log.error "Please provide an input samplesheet with --input"
             System.exit(1)
         }
+        
+        if (!params.fasta) {
+            log.error "Genome fasta file not specified with e.g. '--fasta genome.fa' or via a detectable config file."
+            System.exit(1)
+        }
     }
 
     //
diff --git a/main.nf b/main.nf
index 20a8d1fc..fe560c6d 100644
--- a/main.nf
+++ b/main.nf
@@ -17,6 +17,9 @@ nextflow.enable.dsl = 2
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 */
 
+params.fasta = WorkflowMain.getGenomeAttribute(params, 'fasta')
+params.gtf   = WorkflowMain.getGenomeAttribute(params, 'gtf')
+
 WorkflowMain.initialise(workflow, params, log)
 
 /*
diff --git a/nextflow.config b/nextflow.config
index f4bfc50a..e6d6bef8 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -18,8 +18,7 @@ params {
     protocol          = '10XV3'
 
     // reference files
-    genome_fasta      = null
-    gtf               = null
+    genome            = null
     transcript_fasta  = null
 
     // salmon alevin parameters
diff --git a/nextflow_schema.json b/nextflow_schema.json
index 98f5421f..783ecf98 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -81,7 +81,7 @@
                     "fa_icon": "fas fa-book",
                     "help_text": "If using a reference genome configured in the pipeline using iGenomes, use this parameter to give the ID for the reference. This is then used to build the full paths for all required reference genome files e.g. `--genome GRCh38`. \n\nSee the [nf-core website docs](https://nf-co.re/usage/reference_genomes) for more details."
                 },
-                "genome_fasta": {
+                "fasta": {
                     "type": "string",
                     "format": "file-path",
                     "mimetype": "text/plain",
diff --git a/subworkflows/local/alevin.nf b/subworkflows/local/alevin.nf
index c1b122e1..7ec0c57e 100644
--- a/subworkflows/local/alevin.nf
+++ b/subworkflows/local/alevin.nf
@@ -28,7 +28,7 @@ workflow SCRNASEQ_ALEVIN {
     ch_versions = Channel.empty()
 
     assert salmon_index || (genome_fasta && gtf) || (genome_fasta && transcript_fasta):
-        """Must provide a genome fasta file ('--genome_fasta') and a gtf file ('--gtf'), or a genome fasta file
+        """Must provide a genome fasta file ('--fasta') and a gtf file ('--gtf'), or a genome fasta file
         and a transcriptome fasta file ('--transcript_fasta`) if no index is given!""".stripIndent()
 
     assert txp2gene || gtf:
diff --git a/subworkflows/local/align_cellranger.nf b/subworkflows/local/align_cellranger.nf
index 744215e0..99e89656 100644
--- a/subworkflows/local/align_cellranger.nf
+++ b/subworkflows/local/align_cellranger.nf
@@ -19,7 +19,7 @@ workflow CELLRANGER_ALIGN {
         ch_versions = Channel.empty()
 
         assert cellranger_index || (fasta && gtf):
-            "Must provide either a cellranger index or both a fasta file ('--genome_fasta') and a gtf file ('--gtf')."
+            "Must provide either a cellranger index or both a fasta file ('--fasta') and a gtf file ('--gtf')."
 
         if (!cellranger_index) {
             // Filter GTF based on gene biotypes passed in params.modules
diff --git a/subworkflows/local/kallisto_bustools.nf b/subworkflows/local/kallisto_bustools.nf
index bc958b47..356378c3 100644
--- a/subworkflows/local/kallisto_bustools.nf
+++ b/subworkflows/local/kallisto_bustools.nf
@@ -23,7 +23,7 @@ workflow KALLISTO_BUSTOOLS {
     ch_versions = Channel.empty()
 
     assert kallisto_index || (genome_fasta && gtf):
-        "Must provide a genome fasta file ('--genome_fasta') and a gtf file ('--gtf') if no index is given!"
+        "Must provide a genome fasta file ('--fasta') and a gtf file ('--gtf') if no index is given!"
 
     assert txp2gene || gtf:
         "Must provide either a GTF file ('--gtf') or kallisto gene map ('--kallisto_gene_map') to align with kallisto bustools!"
diff --git a/subworkflows/local/starsolo.nf b/subworkflows/local/starsolo.nf
index 2c2f57dd..94b4c976 100644
--- a/subworkflows/local/starsolo.nf
+++ b/subworkflows/local/starsolo.nf
@@ -22,7 +22,7 @@ workflow STARSOLO {
     ch_versions = Channel.empty()
 
     assert star_index || (genome_fasta && gtf):
-        "Must provide a genome fasta file ('--genome_fasta') and a gtf file ('--gtf') if no index is given!"
+        "Must provide a genome fasta file ('--fasta') and a gtf file ('--gtf') if no index is given!"
 
     assert gtf: "Must provide a gtf file ('--gtf') for STARSOLO"
 
diff --git a/workflows/scrnaseq.nf b/workflows/scrnaseq.nf
index 3e557eb1..3e2e5fe8 100644
--- a/workflows/scrnaseq.nf
+++ b/workflows/scrnaseq.nf
@@ -10,7 +10,7 @@ def summary_params = NfcoreSchema.paramsSummaryMap(workflow, params)
 WorkflowScrnaseq.initialise(params, log)
 
 def checkPathParamList = [
-    params.input, params.multiqc_config, params.genome_fasta, params.gtf,
+    params.input, params.multiqc_config, params.fasta, params.gtf,
     params.transcript_fasta, params.salmon_index, params.kallisto_index,
     params.star_index, params.txp2gene, params.barcode_whitelist, params.cellranger_index
 ]
@@ -69,7 +69,7 @@ ch_output_docs_images = file("$projectDir/docs/images/", checkIfExists: true)
 
 // general input and params
 ch_input = file(params.input)
-ch_genome_fasta = params.genome_fasta ? file(params.genome_fasta) : []
+ch_genome_fasta = params.fasta ? file(params.fasta) : []
 ch_gtf = params.gtf ? file(params.gtf) : []
 ch_transcript_fasta = params.transcript_fasta ? file(params.transcript_fasta): []
 ch_txp2gene = params.txp2gene ? file(params.txp2gene) : []

From 4077c970222f6ab8e484b65d6ace4a12c671bc50 Mon Sep 17 00:00:00 2001
From: Rob Syme <rob.syme@gmail.com>
Date: Fri, 30 Sep 2022 03:06:16 +0000
Subject: [PATCH 118/165] Typo and linting fix for schema.

---
 nextflow_schema.json | 38 ++++++++++++++++++++++++++++++++------
 1 file changed, 32 insertions(+), 6 deletions(-)

diff --git a/nextflow_schema.json b/nextflow_schema.json
index 783ecf98..88f1efa1 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -10,7 +10,10 @@
             "type": "object",
             "fa_icon": "fas fa-terminal",
             "description": "Define where the pipeline should find input data and save output data.",
-            "required": ["input", "outdir"],
+            "required": [
+                "input",
+                "outdir"
+            ],
             "properties": {
                 "input": {
                     "type": "string",
@@ -57,14 +60,24 @@
                     "default": "alevin",
                     "help_text": "The workflow can handle three types of methods:\n\n- Kallisto/Bustools\n- Salmon Alevin + AlevinQC\n- STARsolo\n\nTo choose which one to use, please specify either `alevin`, `star` or `kallisto` as a parameter option for `--aligner`. By default, the pipeline runs the `alevin` option. Note that specifying another aligner option also requires choosing appropriate parameters (see below) for the selected option.",
                     "fa_icon": "fas fa-align-center",
-                    "enum": ["kallisto", "star", "alevin", "cellranger"]
+                    "enum": [
+                        "kallisto",
+                        "star",
+                        "alevin",
+                        "cellranger"
+                    ]
                 },
                 "protocol": {
                     "type": "string",
                     "description": "The protocol that was used to generate the single cell data, e.g. 10XV2 (default).",
                     "default": "10XV2",
                     "fa_icon": "fas fa-cogs",
-                    "enum": ["10XV3", "10XV2", "10XV1", "dropseq"]
+                    "enum": [
+                        "10XV3",
+                        "10XV2",
+                        "10XV1",
+                        "dropseq"
+                    ]
                 }
             },
             "fa_icon": "fas fa-terminal"
@@ -107,7 +120,7 @@
                 },
                 "transcript_fasta": {
                     "type": "string",
-                    "description": "A cDNA FASTQ file",
+                    "description": "A cDNA FASTA file",
                     "fa_icon": "fas fa-dna"
                 },
                 "gtf": {
@@ -192,7 +205,13 @@
                     "default": "standard",
                     "description": "Type of workflow. Use `lamanno` for RNA velocity based on La Manno et al. 2018 logic. Use `nucleus` for RNA velocity on single-nucleus RNA-seq reads. Use `kite` for feature barcoding. Use `kite: 10xFB` for 10x Genomics Feature Barcoding technology. (default: standard)",
                     "fa_icon": "fas fa-fish",
-                    "enum": ["standard", "lamanno", "nucleus", "kite", "kite: 10xFB"]
+                    "enum": [
+                        "standard",
+                        "lamanno",
+                        "nucleus",
+                        "kite",
+                        "kite: 10xFB"
+                    ]
                 }
             }
         },
@@ -310,7 +329,14 @@
                     "description": "Method used to save pipeline results to output directory.",
                     "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.",
                     "fa_icon": "fas fa-copy",
-                    "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"],
+                    "enum": [
+                        "symlink",
+                        "rellink",
+                        "link",
+                        "copy",
+                        "copyNoFollow",
+                        "move"
+                    ],
                     "hidden": true
                 },
                 "email_on_fail": {

From 711a8051207eda0298e97862e5430e60aac71de4 Mon Sep 17 00:00:00 2001
From: Rob Syme <rob.syme@gmail.com>
Date: Fri, 30 Sep 2022 03:11:12 +0000
Subject: [PATCH 119/165] Set seq_center param default for STARsolo aligner.

---
 nextflow.config | 1 +
 1 file changed, 1 insertion(+)

diff --git a/nextflow.config b/nextflow.config
index e6d6bef8..2761e80f 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -35,6 +35,7 @@ params {
     // STARsolo parameters
     star_index          = null
     star_ignore_sjdbgtf = null
+    seq_center          = null
 
     // Cellranger parameters
     cellranger_index    = null

From adb9cf0e03b748df09378e3437e1d5164f5c899f Mon Sep 17 00:00:00 2001
From: Rob Syme <rob.syme@gmail.com>
Date: Fri, 30 Sep 2022 03:17:50 +0000
Subject: [PATCH 120/165] Whitespace fix

---
 lib/WorkflowScrnaseq.groovy | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/WorkflowScrnaseq.groovy b/lib/WorkflowScrnaseq.groovy
index 25c2b120..73998b0c 100755
--- a/lib/WorkflowScrnaseq.groovy
+++ b/lib/WorkflowScrnaseq.groovy
@@ -14,7 +14,7 @@ class WorkflowScrnaseq {
             log.error "Please provide an input samplesheet with --input"
             System.exit(1)
         }
-        
+
         if (!params.fasta) {
             log.error "Genome fasta file not specified with e.g. '--fasta genome.fa' or via a detectable config file."
             System.exit(1)

From 4a200466b858bd2655beed0d6f80920cc0c895e0 Mon Sep 17 00:00:00 2001
From: Rob Syme <rob.syme@gmail.com>
Date: Fri, 30 Sep 2022 03:35:13 +0000
Subject: [PATCH 121/165] Include seq_center in schema

---
 nextflow_schema.json | 43 +++++++++++--------------------------------
 1 file changed, 11 insertions(+), 32 deletions(-)

diff --git a/nextflow_schema.json b/nextflow_schema.json
index 88f1efa1..963abc19 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -2,7 +2,7 @@
     "$schema": "http://json-schema.org/draft-07/schema",
     "$id": "https://raw.githubusercontent.com/nf-core/scrnaseq/master/nextflow_schema.json",
     "title": "nf-core/scrnaseq pipeline parameters",
-    "description": "Pipeline for processing of 10xGenomics single cell rnaseq data",
+    "description": "Pipeline for processing 10x Genomics single cell rnaseq data",
     "type": "object",
     "definitions": {
         "input_output_options": {
@@ -10,10 +10,7 @@
             "type": "object",
             "fa_icon": "fas fa-terminal",
             "description": "Define where the pipeline should find input data and save output data.",
-            "required": [
-                "input",
-                "outdir"
-            ],
+            "required": ["input", "outdir"],
             "properties": {
                 "input": {
                     "type": "string",
@@ -60,24 +57,14 @@
                     "default": "alevin",
                     "help_text": "The workflow can handle three types of methods:\n\n- Kallisto/Bustools\n- Salmon Alevin + AlevinQC\n- STARsolo\n\nTo choose which one to use, please specify either `alevin`, `star` or `kallisto` as a parameter option for `--aligner`. By default, the pipeline runs the `alevin` option. Note that specifying another aligner option also requires choosing appropriate parameters (see below) for the selected option.",
                     "fa_icon": "fas fa-align-center",
-                    "enum": [
-                        "kallisto",
-                        "star",
-                        "alevin",
-                        "cellranger"
-                    ]
+                    "enum": ["kallisto", "star", "alevin", "cellranger"]
                 },
                 "protocol": {
                     "type": "string",
                     "description": "The protocol that was used to generate the single cell data, e.g. 10XV2 (default).",
                     "default": "10XV2",
                     "fa_icon": "fas fa-cogs",
-                    "enum": [
-                        "10XV3",
-                        "10XV2",
-                        "10XV1",
-                        "dropseq"
-                    ]
+                    "enum": ["10XV3", "10XV2", "10XV1", "dropseq"]
                 }
             },
             "fa_icon": "fas fa-terminal"
@@ -169,6 +156,11 @@
                 "star_ignore_sjdbgtf": {
                     "type": "string",
                     "description": "Ignore the SJDB GTF file."
+                },
+                "seq_center": {
+                    "type": "string",
+                    "description": "Name of sequencing center for BAM read group tag.",
+                    "default": null
                 }
             },
             "fa_icon": "fas fa-star"
@@ -205,13 +197,7 @@
                     "default": "standard",
                     "description": "Type of workflow. Use `lamanno` for RNA velocity based on La Manno et al. 2018 logic. Use `nucleus` for RNA velocity on single-nucleus RNA-seq reads. Use `kite` for feature barcoding. Use `kite: 10xFB` for 10x Genomics Feature Barcoding technology. (default: standard)",
                     "fa_icon": "fas fa-fish",
-                    "enum": [
-                        "standard",
-                        "lamanno",
-                        "nucleus",
-                        "kite",
-                        "kite: 10xFB"
-                    ]
+                    "enum": ["standard", "lamanno", "nucleus", "kite", "kite: 10xFB"]
                 }
             }
         },
@@ -329,14 +315,7 @@
                     "description": "Method used to save pipeline results to output directory.",
                     "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.",
                     "fa_icon": "fas fa-copy",
-                    "enum": [
-                        "symlink",
-                        "rellink",
-                        "link",
-                        "copy",
-                        "copyNoFollow",
-                        "move"
-                    ],
+                    "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"],
                     "hidden": true
                 },
                 "email_on_fail": {

From f2d4c9383a25775f62aff8c33f8cd284867ecc57 Mon Sep 17 00:00:00 2001
From: Rob Syme <rob.syme@gmail.com>
Date: Fri, 30 Sep 2022 03:37:40 +0000
Subject: [PATCH 122/165] Python black linting suggestions

---
 bin/check_samplesheet.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py
index 77a22f17..38db2319 100755
--- a/bin/check_samplesheet.py
+++ b/bin/check_samplesheet.py
@@ -142,11 +142,9 @@ def read_head(handle, num_lines=10):
 
 
 def print_error(error, context="Line", context_str=""):
-    error_str = "ERROR: Please check samplesheet -> {}".format(error)
+    error_str = f"ERROR: Please check samplesheet -> {error}"
     if context != "" and context_str != "":
-        error_str = "ERROR: Please check samplesheet -> {}\n{}: '{}'".format(
-            error, context.strip(), context_str.strip()
-        )
+        error_str = f"ERROR: Please check samplesheet -> {error}\n{context.strip()}: '{context_str.strip()}'"
     print(error_str)
     sys.exit(1)
 

From b1640220cc4aadd1c894e611555a725dd0ce7734 Mon Sep 17 00:00:00 2001
From: Felipe Marques de Almeida <felipemarques89@gmail.com>
Date: Fri, 30 Sep 2022 14:53:34 +0000
Subject: [PATCH 123/165] fixing simple_af results

---
 conf/modules.config             | 2 +-
 modules/local/alevinqc.nf       | 2 +-
 modules/local/simpleaf_quant.nf | 3 ++-
 3 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/conf/modules.config b/conf/modules.config
index 66584811..2b05f1e7 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -82,7 +82,7 @@ if (params.aligner == "alevin") {
                 path: { "${params.outdir}/${params.aligner}" },
                 mode: params.publish_dir_mode
             ]
-            ext.args = "-r cr-like -d fw"
+            ext.args = "-r cr-like"
         }
     }
 }
diff --git a/modules/local/alevinqc.nf b/modules/local/alevinqc.nf
index 4832a2d6..4e1a7d7e 100644
--- a/modules/local/alevinqc.nf
+++ b/modules/local/alevinqc.nf
@@ -12,7 +12,7 @@ process ALEVINQC {
 
     output:
     tuple val(meta), path("alevin_report_${meta.id}.html"), emit: report
-    path  "versions.yml"                      , emit: versions
+    path  "versions.yml", emit: versions
 
     script:
     def prefix = task.ext.prefix ?: "${meta.id}"
diff --git a/modules/local/simpleaf_quant.nf b/modules/local/simpleaf_quant.nf
index 827e8991..fb485df8 100644
--- a/modules/local/simpleaf_quant.nf
+++ b/modules/local/simpleaf_quant.nf
@@ -47,8 +47,9 @@ process SIMPLEAF_QUANT {
         -c $protocol \\
         -u whitelist.txt \\
         $args
-    
+
     mv whitelist.txt ${prefix}_alevin_results/
+    cp ${prefix}_alevin_results/af_quant/permit_freq.bin ${prefix}_alevin_results/af_quant/all_freq.bin
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":

From 001f456c24b1588f651e14823600c6efc323b0df Mon Sep 17 00:00:00 2001
From: Felipe Marques de Almeida <felipemarques89@gmail.com>
Date: Fri, 30 Sep 2022 15:00:24 +0000
Subject: [PATCH 124/165] remove wrong file

---
 test/null/pipeline_info/execution_trace_2022-09-26_07-01-32.txt | 1 -
 1 file changed, 1 deletion(-)
 delete mode 100644 test/null/pipeline_info/execution_trace_2022-09-26_07-01-32.txt

diff --git a/test/null/pipeline_info/execution_trace_2022-09-26_07-01-32.txt b/test/null/pipeline_info/execution_trace_2022-09-26_07-01-32.txt
deleted file mode 100644
index 6b739acd..00000000
--- a/test/null/pipeline_info/execution_trace_2022-09-26_07-01-32.txt
+++ /dev/null
@@ -1 +0,0 @@
-task_id	hash	native_id	name	status	exit	submit	duration	realtime	%cpu	peak_rss	peak_vmem	rchar	wchar

From 69074dc6c58fa4b7d86e5da08a10ba628ea0fffe Mon Sep 17 00:00:00 2001
From: Felipe Marques de Almeida <felipemarques89@gmail.com>
Date: Fri, 30 Sep 2022 15:58:31 +0000
Subject: [PATCH 125/165] detect the use of incompatible parameters

---
 conf/modules.config             |  2 +-
 modules/local/simpleaf_quant.nf | 23 ++++++++++++++++++-----
 2 files changed, 19 insertions(+), 6 deletions(-)

diff --git a/conf/modules.config b/conf/modules.config
index 2b05f1e7..9b3a7887 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -82,7 +82,7 @@ if (params.aligner == "alevin") {
                 path: { "${params.outdir}/${params.aligner}" },
                 mode: params.publish_dir_mode
             ]
-            ext.args = "-r cr-like"
+            ext.args = "-r cr-like -k"
         }
     }
 }
diff --git a/modules/local/simpleaf_quant.nf b/modules/local/simpleaf_quant.nf
index fb485df8..5cd3a8a4 100644
--- a/modules/local/simpleaf_quant.nf
+++ b/modules/local/simpleaf_quant.nf
@@ -23,8 +23,21 @@ process SIMPLEAF_QUANT {
     path  "versions.yml"                     , emit: versions
 
     script:
-    def args   = task.ext.args ?: ''
-    def prefix = task.ext.prefix ?: "${meta.id}"
+    def args      = task.ext.args ?: ''
+    def args_list = args.tokenize()
+    def prefix    = task.ext.prefix ?: "${meta.id}"
+
+    //
+    // check if users are using one of the mutually excludable parameters:
+    //    e.g -k,--knee | -e,--expect-cells | -f, --forced-cells
+    //
+    if (args_list.any { it in ['-k', '--knee', '-e', '--expect-cells', '-f', '--forced-cells']}) {
+        unfiltered_command = ""
+        save_whitelist     = ""
+    } else {
+        unfiltered_command = "-u whitelist.txt"
+        save_whitelist     = "mv whitelist.txt ${prefix}_alevin_results/"
+    }
 
     // separate forward from reverse pairs
     def (forward, reverse) = reads.collate(2).transpose()
@@ -45,11 +58,11 @@ process SIMPLEAF_QUANT {
         -m $txp2gene \\
         -t $task.cpus \\
         -c $protocol \\
-        -u whitelist.txt \\
+        $unfiltered_command \\
         $args
 
-    mv whitelist.txt ${prefix}_alevin_results/
-    cp ${prefix}_alevin_results/af_quant/permit_freq.bin ${prefix}_alevin_results/af_quant/all_freq.bin
+    $save_whitelist
+    [[ ! -f ${prefix}_alevin_results/af_quant/all_freq.bin ]] && cp ${prefix}_alevin_results/af_quant/permit_freq.bin ${prefix}_alevin_results/af_quant/all_freq.bin
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":

From f3b072f5f8fb8b7dffe81b13218b67d47aaa5ca9 Mon Sep 17 00:00:00 2001
From: Felipe Marques de Almeida <felipemarques89@gmail.com>
Date: Fri, 30 Sep 2022 15:58:42 +0000
Subject: [PATCH 126/165] remove hard-coded version

---
 modules/local/simpleaf_index.nf | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/modules/local/simpleaf_index.nf b/modules/local/simpleaf_index.nf
index 2d3d7cb2..c81c5a51 100644
--- a/modules/local/simpleaf_index.nf
+++ b/modules/local/simpleaf_index.nf
@@ -39,10 +39,10 @@ process SIMPLEAF_INDEX {
         $seq_inputs \\
         $args \\
         -o salmon
-    
+
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
-        simpleaf: 0.4.0
+        simpleaf: TODO: find a way to grab version
     END_VERSIONS
     """
 }

From 3766144ca37c3628d3f780fd9d26d7cacc535c51 Mon Sep 17 00:00:00 2001
From: Felipe Marques de Almeida <felipemarques89@gmail.com>
Date: Fri, 30 Sep 2022 16:05:18 +0000
Subject: [PATCH 127/165] turning back to default option

---
 conf/modules.config | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/conf/modules.config b/conf/modules.config
index 9b3a7887..2b05f1e7 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -82,7 +82,7 @@ if (params.aligner == "alevin") {
                 path: { "${params.outdir}/${params.aligner}" },
                 mode: params.publish_dir_mode
             ]
-            ext.args = "-r cr-like -k"
+            ext.args = "-r cr-like"
         }
     }
 }

From b5de9008314216265cea479a4114416d76d66d35 Mon Sep 17 00:00:00 2001
From: Felipe Marques de Almeida <felipemarques89@gmail.com>
Date: Fri, 30 Sep 2022 16:11:44 +0000
Subject: [PATCH 128/165] add suffix to uncompressed whitelist

---
 modules/local/simpleaf_quant.nf | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/modules/local/simpleaf_quant.nf b/modules/local/simpleaf_quant.nf
index 5cd3a8a4..a95b6e68 100644
--- a/modules/local/simpleaf_quant.nf
+++ b/modules/local/simpleaf_quant.nf
@@ -35,8 +35,8 @@ process SIMPLEAF_QUANT {
         unfiltered_command = ""
         save_whitelist     = ""
     } else {
-        unfiltered_command = "-u whitelist.txt"
-        save_whitelist     = "mv whitelist.txt ${prefix}_alevin_results/"
+        unfiltered_command = "-u whitelist.uncompressed.txt"
+        save_whitelist     = "mv whitelist.uncompressed.txt ${prefix}_alevin_results/"
     }
 
     // separate forward from reverse pairs
@@ -49,7 +49,7 @@ process SIMPLEAF_QUANT {
     simpleaf set-paths
 
     # run simpleaf quant
-    gzip -dcf $whitelist > whitelist.txt
+    gzip -dcf $whitelist > whitelist.uncompressed.txt
     simpleaf quant \\
         -1 ${forward.join( "," )} \\
         -2 ${reverse.join( "," )} \\

From 16ce654b2b6e4fbf25ec4b248a86c92d3300466c Mon Sep 17 00:00:00 2001
From: Felipe Marques de Almeida <felipemarques89@gmail.com>
Date: Fri, 30 Sep 2022 16:24:45 +0000
Subject: [PATCH 129/165] adding simpleaf version as variable and getting
 salmon version

---
 modules/local/simpleaf_index.nf | 5 ++++-
 modules/local/simpleaf_quant.nf | 3 +++
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/modules/local/simpleaf_index.nf b/modules/local/simpleaf_index.nf
index c81c5a51..f1837a1d 100644
--- a/modules/local/simpleaf_index.nf
+++ b/modules/local/simpleaf_index.nf
@@ -24,6 +24,8 @@ process SIMPLEAF_INDEX {
     script:
     def args = task.ext.args ?: ''
     def seq_inputs = (params.transcript_fasta) ? "--refseq $transcript_fasta" : "--gtf $transcript_gtf"
+    // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions.
+    def VERSION = '0.5.1'
     """
     # export required var
     export ALEVIN_FRY_HOME=.
@@ -42,7 +44,8 @@ process SIMPLEAF_INDEX {
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
-        simpleaf: TODO: find a way to grab version
+        simpleaf: $VERSION
+        salmon: \$(salmon --version | sed -e "s/salmon //g")
     END_VERSIONS
     """
 }
diff --git a/modules/local/simpleaf_quant.nf b/modules/local/simpleaf_quant.nf
index a95b6e68..8385a803 100644
--- a/modules/local/simpleaf_quant.nf
+++ b/modules/local/simpleaf_quant.nf
@@ -26,6 +26,8 @@ process SIMPLEAF_QUANT {
     def args      = task.ext.args ?: ''
     def args_list = args.tokenize()
     def prefix    = task.ext.prefix ?: "${meta.id}"
+    // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions.
+    def VERSION   = '0.5.1'
 
     //
     // check if users are using one of the mutually excludable parameters:
@@ -66,6 +68,7 @@ process SIMPLEAF_QUANT {
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
+        simpleaf: $VERSION
         salmon: \$(salmon --version | sed -e "s/salmon //g")
     END_VERSIONS
     """

From 518ee5d854f430336e2c7e6a56f848413a5cbcd9 Mon Sep 17 00:00:00 2001
From: Felipe Marques de Almeida <felipemarques89@gmail.com>
Date: Sat, 1 Oct 2022 20:44:49 +0000
Subject: [PATCH 130/165] bump simpleaf version

---
 modules/local/simpleaf_index.nf | 8 +++-----
 modules/local/simpleaf_quant.nf | 8 +++-----
 2 files changed, 6 insertions(+), 10 deletions(-)

diff --git a/modules/local/simpleaf_index.nf b/modules/local/simpleaf_index.nf
index f1837a1d..bf31285f 100644
--- a/modules/local/simpleaf_index.nf
+++ b/modules/local/simpleaf_index.nf
@@ -4,8 +4,8 @@ process SIMPLEAF_INDEX {
 
     conda (params.enable_conda ? 'bioconda::simpleaf=0.5.1' : null)
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/simpleaf:0.5.1--h9f5acd7_0' :
-        'quay.io/biocontainers/simpleaf:0.5.1--h9f5acd7_0' }"
+        'https://depot.galaxyproject.org/singularity/simpleaf:0.5.2--h9f5acd7_0' :
+        'quay.io/biocontainers/simpleaf:0.5.2--h9f5acd7_0' }"
 
     input:
     path genome_fasta
@@ -24,8 +24,6 @@ process SIMPLEAF_INDEX {
     script:
     def args = task.ext.args ?: ''
     def seq_inputs = (params.transcript_fasta) ? "--refseq $transcript_fasta" : "--gtf $transcript_gtf"
-    // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions.
-    def VERSION = '0.5.1'
     """
     # export required var
     export ALEVIN_FRY_HOME=.
@@ -44,7 +42,7 @@ process SIMPLEAF_INDEX {
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
-        simpleaf: $VERSION
+        simpleaf: \$(simpleaf -V | tr -d '\n')
         salmon: \$(salmon --version | sed -e "s/salmon //g")
     END_VERSIONS
     """
diff --git a/modules/local/simpleaf_quant.nf b/modules/local/simpleaf_quant.nf
index 8385a803..e2e47ccd 100644
--- a/modules/local/simpleaf_quant.nf
+++ b/modules/local/simpleaf_quant.nf
@@ -4,8 +4,8 @@ process SIMPLEAF_QUANT {
 
     conda (params.enable_conda ? 'bioconda::simpleaf=0.5.1' : null)
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/simpleaf:0.5.1--h9f5acd7_0' :
-        'quay.io/biocontainers/simpleaf:0.5.1--h9f5acd7_0' }"
+        'https://depot.galaxyproject.org/singularity/simpleaf:0.5.2--h9f5acd7_0' :
+        'quay.io/biocontainers/simpleaf:0.5.2--h9f5acd7_0' }"
 
     input:
     //
@@ -26,8 +26,6 @@ process SIMPLEAF_QUANT {
     def args      = task.ext.args ?: ''
     def args_list = args.tokenize()
     def prefix    = task.ext.prefix ?: "${meta.id}"
-    // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions.
-    def VERSION   = '0.5.1'
 
     //
     // check if users are using one of the mutually excludable parameters:
@@ -68,7 +66,7 @@ process SIMPLEAF_QUANT {
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
-        simpleaf: $VERSION
+        simpleaf: \$(simpleaf -V | tr -d '\n')
         salmon: \$(salmon --version | sed -e "s/salmon //g")
     END_VERSIONS
     """

From 908370682fe13d142e57c4465980bfe04f340207 Mon Sep 17 00:00:00 2001
From: Felipe Marques de Almeida <felipemarques89@gmail.com>
Date: Sat, 1 Oct 2022 20:57:11 +0000
Subject: [PATCH 131/165] fix simpleaf get version command

---
 modules/local/simpleaf_index.nf | 2 +-
 modules/local/simpleaf_quant.nf | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/modules/local/simpleaf_index.nf b/modules/local/simpleaf_index.nf
index bf31285f..5bf54e7d 100644
--- a/modules/local/simpleaf_index.nf
+++ b/modules/local/simpleaf_index.nf
@@ -42,7 +42,7 @@ process SIMPLEAF_INDEX {
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
-        simpleaf: \$(simpleaf -V | tr -d '\n')
+        simpleaf: \$(simpleaf -V | tr -d '\\n' | cut -d ' ' -f 2)
         salmon: \$(salmon --version | sed -e "s/salmon //g")
     END_VERSIONS
     """
diff --git a/modules/local/simpleaf_quant.nf b/modules/local/simpleaf_quant.nf
index e2e47ccd..8f7b91c6 100644
--- a/modules/local/simpleaf_quant.nf
+++ b/modules/local/simpleaf_quant.nf
@@ -66,7 +66,7 @@ process SIMPLEAF_QUANT {
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
-        simpleaf: \$(simpleaf -V | tr -d '\n')
+        simpleaf: \$(simpleaf -V | tr -d '\\n' | cut -d ' ' -f 2)
         salmon: \$(salmon --version | sed -e "s/salmon //g")
     END_VERSIONS
     """

From db77e65e4b572e733a97f599cbbbe9b08344b575 Mon Sep 17 00:00:00 2001
From: Rob Syme <rob.syme@gmail.com>
Date: Sun, 2 Oct 2022 01:07:18 +0000
Subject: [PATCH 132/165] Cleanup

---
 lib/WorkflowMain.groovy                | 4 ----
 subworkflows/local/align_cellranger.nf | 1 -
 2 files changed, 5 deletions(-)

diff --git a/lib/WorkflowMain.groovy b/lib/WorkflowMain.groovy
index 627cb63c..9c3250c6 100755
--- a/lib/WorkflowMain.groovy
+++ b/lib/WorkflowMain.groovy
@@ -85,11 +85,7 @@ class WorkflowMain {
         if (params.genomes && params.genome && params.genomes.containsKey(params.genome)) {
             if (params.genomes[ params.genome ].containsKey(attribute)) {
                 return params.genomes[ params.genome ][ attribute ]
-            } else {
-                println "Could not find attribute '$attribute'"
             }
-        } else {
-            println "Could not find genome"
         }
         return null
     }
diff --git a/subworkflows/local/align_cellranger.nf b/subworkflows/local/align_cellranger.nf
index 99e89656..ff994b6c 100644
--- a/subworkflows/local/align_cellranger.nf
+++ b/subworkflows/local/align_cellranger.nf
@@ -26,7 +26,6 @@ workflow CELLRANGER_ALIGN {
             CELLRANGER_MKGTF( gtf )
             ch_versions = ch_versions.mix(CELLRANGER_MKGTF.out.versions)
 
-            CELLRANGER_MKGTF.out.gtf.view()
             // Make reference genome
             CELLRANGER_MKREF( fasta, CELLRANGER_MKGTF.out.gtf, "cellranger_reference" )
             ch_versions = ch_versions.mix(CELLRANGER_MKREF.out.versions)

From 2a113742c9958595aa17cb969f90410d4090f3b0 Mon Sep 17 00:00:00 2001
From: Rob Syme <rob.syme@gmail.com>
Date: Sun, 2 Oct 2022 01:09:19 +0000
Subject: [PATCH 133/165] No longer allow gzipped fasta from SALMON_INDEX

Previously, I introduced some unnecessary complextity to the
SALMON_INDEX process to automatically decompress the genome fasta file.
This is unnecessary because the pipeline has a strict requirment for
uncompressed fasta elsewhere in the pipeline.
---
 modules/nf-core/modules/salmon/index/main.nf | 17 +++++------------
 1 file changed, 5 insertions(+), 12 deletions(-)

diff --git a/modules/nf-core/modules/salmon/index/main.nf b/modules/nf-core/modules/salmon/index/main.nf
index d875a345..87005fdd 100644
--- a/modules/nf-core/modules/salmon/index/main.nf
+++ b/modules/nf-core/modules/salmon/index/main.nf
@@ -22,19 +22,12 @@ process SALMON_INDEX {
     def args = task.ext.args ?: ''
     def kmer_argmatch = args =~ /\-k *(\d+)/
     def k = kmer_argmatch ? kmer_argmatch[0][1] : 31
-    def get_decoy_ids = "grep '^>' $genome_fasta | cut -d ' ' -f 1 > decoys.txt"
-    def gentrome      = "gentrome.fa"
-    def maybe_unzip   = "cat"
-    if (genome_fasta.endsWith('.gz')) {
-        get_decoy_ids = "grep '^>' <(gunzip -c $genome_fasta) | cut -d ' ' -f 1 > decoys.txt"
-        gentrome      = "gentrome.fa.gz"
-        maybe_unzip   = "gunzip -c" 
-    }
     """
-    $get_decoy_ids
-    sed -i.bak -e 's/>//g' decoys.txt
-    cat $transcript_fasta $genome_fasta \\
-    | $maybe_unzip \\
+    grep '^>' $genome_fasta \\
+    | cut -d ' ' -f 1 \\
+    | sed 's/>//g' > decoys.txt
+
+    cat $genome_fasta \\
     | awk '!/^>/ { next } { getline seq } length(seq) >= $k { print \$0 "\\n" seq }' \\
     | gzip -c > gentrome.filtered.fasta.gz
 

From d55ffd5e5f24ef062b15a1c5503dbc0b1cefa27c Mon Sep 17 00:00:00 2001
From: Rob Syme <rob.syme@gmail.com>
Date: Sun, 2 Oct 2022 01:19:57 +0000
Subject: [PATCH 134/165] Include attribution for gtf_for_genes script

---
 bin/filter_gtf_for_genes_in_genome.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/bin/filter_gtf_for_genes_in_genome.py b/bin/filter_gtf_for_genes_in_genome.py
index ef4c87cd..2ec44b9f 100755
--- a/bin/filter_gtf_for_genes_in_genome.py
+++ b/bin/filter_gtf_for_genes_in_genome.py
@@ -1,4 +1,7 @@
 #!/usr/bin/env python
+# Script originally written by Pranathi Vemuri (github.com/pranathivemuri)
+#   modified by Harshil Patel (github.com/drpatelh)
+
 from __future__ import print_function
 import logging
 from itertools import groupby

From 10ab4d21eb9bad4e1c1a2712fff39c83463cb7c8 Mon Sep 17 00:00:00 2001
From: Rob Syme <rob.syme@gmail.com>
Date: Mon, 3 Oct 2022 00:46:53 +0000
Subject: [PATCH 135/165] Turn off filtered gtf publishing and add tower.yml

---
 conf/modules.config |  8 ++++++++
 tower.yml           | 11 +++++++++++
 2 files changed, 19 insertions(+)
 create mode 100644 tower.yml

diff --git a/conf/modules.config b/conf/modules.config
index e9992e7e..0ff786c2 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -37,6 +37,14 @@ process {
             mode: params.publish_dir_mode
         ]
     }
+    withName: 'GTF_GENE_FILTER' {
+        publishDir = [
+            path: { "${params.outdir}/gtf_filter" },
+            mode: params.publish_dir_mode,
+            saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
+            enabled: false
+        ]
+    }
 }
 
 if(params.aligner == "cellranger") {
diff --git a/tower.yml b/tower.yml
new file mode 100644
index 00000000..999e82d3
--- /dev/null
+++ b/tower.yml
@@ -0,0 +1,11 @@
+reports:
+  multiqc_report.html:
+    display: "MultiQC HTML report"
+  "**/fastqc/*_fastqc.html":
+    display: "FastQC HTML report"
+  "**/pipeline_info/execution_timeline_*.html":
+    display: "Execution timeline report"
+  "**/pipeline_info/execution_report_*.html":
+    display: "Execution overview report"
+  "**/star/**/*.Log.final.out":
+    display: "Star per-sample report"

From 5035d9e27d70cd8524f4eec73eed9507bdddb014 Mon Sep 17 00:00:00 2001
From: Harshil Patel <drpatelh@users.noreply.github.com>
Date: Mon, 3 Oct 2022 09:38:15 +0100
Subject: [PATCH 136/165] Update check_samplesheet.py

---
 bin/check_samplesheet.py | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py
index 38db2319..51f9be10 100755
--- a/bin/check_samplesheet.py
+++ b/bin/check_samplesheet.py
@@ -228,7 +228,9 @@ def check_samplesheet(file_in, file_out):
             num_cols = len([x for x in lspl if x])
             if num_cols < MIN_COLS:
                 print_error(
-                    "Invalid number of populated columns (minimum = {})!".format(MIN_COLS),
+                    "Invalid number of populated columns (minimum = {})!".format(
+                        MIN_COLS
+                    ),
                     "Line",
                     line,
                 )
@@ -277,8 +279,14 @@ def check_samplesheet(file_in, file_out):
             for sample in sorted(sample_mapping_dict.keys()):
 
                 ## Check that multiple runs of the same sample are of the same datatype
-                if not all(x[0] == sample_mapping_dict[sample][0][0] for x in sample_mapping_dict[sample]):
-                    print_error("Multiple runs of a sample must be of the same datatype!", "Sample: {}".format(sample))
+                if not all(
+                    x[0] == sample_mapping_dict[sample][0][0]
+                    for x in sample_mapping_dict[sample]
+                ):
+                    print_error(
+                        "Multiple runs of a sample must be of the same datatype!",
+                        "Sample: {}".format(sample),
+                    )
 
                 for idx, val in enumerate(sample_mapping_dict[sample]):
                     fout.write(",".join(["{}".format(sample)] + val) + "\n")

From 8bc49f5cac68e617e7ecc791f16f6b377f63931d Mon Sep 17 00:00:00 2001
From: Harshil Patel <drpatelh@users.noreply.github.com>
Date: Mon, 3 Oct 2022 09:40:39 +0100
Subject: [PATCH 137/165] How the hell do you run BLACK??!!

---
 bin/check_samplesheet.py | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py
index 51f9be10..7e2d7e91 100755
--- a/bin/check_samplesheet.py
+++ b/bin/check_samplesheet.py
@@ -228,9 +228,7 @@ def check_samplesheet(file_in, file_out):
             num_cols = len([x for x in lspl if x])
             if num_cols < MIN_COLS:
                 print_error(
-                    "Invalid number of populated columns (minimum = {})!".format(
-                        MIN_COLS
-                    ),
+                    "Invalid number of populated columns (minimum = {})!".format(MIN_COLS),
                     "Line",
                     line,
                 )
@@ -279,10 +277,7 @@ def check_samplesheet(file_in, file_out):
             for sample in sorted(sample_mapping_dict.keys()):
 
                 ## Check that multiple runs of the same sample are of the same datatype
-                if not all(
-                    x[0] == sample_mapping_dict[sample][0][0]
-                    for x in sample_mapping_dict[sample]
-                ):
+                if not all(x[0] == sample_mapping_dict[sample][0][0] for x in sample_mapping_dict[sample]):
                     print_error(
                         "Multiple runs of a sample must be of the same datatype!",
                         "Sample: {}".format(sample),

From b0f7979f7769a6778aaf6f998a6998fc0b8e8977 Mon Sep 17 00:00:00 2001
From: nf-core-bot <core@nf-co.re>
Date: Tue, 4 Oct 2022 22:05:05 +0000
Subject: [PATCH 138/165] Template update for nf-core/tools version 2.6

---
 .github/workflows/awsfulltest.yml             |  4 ++
 .github/workflows/awstest.yml                 |  4 ++
 .prettierignore                               |  1 +
 CITATION.cff                                  |  8 +--
 LICENSE                                       |  2 +-
 README.md                                     |  4 +-
 assets/adaptivecard.json                      | 67 +++++++++++++++++++
 assets/email_template.html                    |  2 +-
 assets/methods_description_template.yml       | 25 +++++++
 assets/multiqc_config.yml                     |  6 +-
 docs/usage.md                                 |  8 +++
 lib/NfcoreTemplate.groovy                     | 55 +++++++++++++++
 lib/Utils.groovy                              | 21 ++++--
 lib/WorkflowScrnaseq.groovy                   | 19 ++++++
 main.nf                                       |  3 +-
 modules.json                                  | 27 ++++----
 .../custom/dumpsoftwareversions/main.nf       |  8 +--
 .../custom/dumpsoftwareversions/meta.yml      |  0
 .../templates/dumpsoftwareversions.py         |  0
 modules/nf-core/{modules => }/fastqc/main.nf  | 12 ++++
 modules/nf-core/{modules => }/fastqc/meta.yml |  0
 modules/nf-core/modules/multiqc/main.nf       | 31 ---------
 modules/nf-core/multiqc/main.nf               | 53 +++++++++++++++
 .../nf-core/{modules => }/multiqc/meta.yml    | 15 +++++
 nextflow.config                               |  9 ++-
 nextflow_schema.json                          | 20 +++++-
 workflows/scrnaseq.nf                         | 26 ++++---
 27 files changed, 352 insertions(+), 78 deletions(-)
 create mode 100644 assets/adaptivecard.json
 create mode 100644 assets/methods_description_template.yml
 mode change 100755 => 100644 lib/Utils.groovy
 rename modules/nf-core/{modules => }/custom/dumpsoftwareversions/main.nf (79%)
 rename modules/nf-core/{modules => }/custom/dumpsoftwareversions/meta.yml (100%)
 rename modules/nf-core/{modules => }/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py (100%)
 rename modules/nf-core/{modules => }/fastqc/main.nf (85%)
 rename modules/nf-core/{modules => }/fastqc/meta.yml (100%)
 delete mode 100644 modules/nf-core/modules/multiqc/main.nf
 create mode 100644 modules/nf-core/multiqc/main.nf
 rename modules/nf-core/{modules => }/multiqc/meta.yml (73%)

diff --git a/.github/workflows/awsfulltest.yml b/.github/workflows/awsfulltest.yml
index 16970f5e..3aa2c0c7 100644
--- a/.github/workflows/awsfulltest.yml
+++ b/.github/workflows/awsfulltest.yml
@@ -28,3 +28,7 @@ jobs:
               "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/scrnaseq/results-${{ github.sha }}"
             }
           profiles: test_full,aws_tower
+      - uses: actions/upload-artifact@v3
+        with:
+          name: Tower debug log file
+          path: tower_action_*.log
diff --git a/.github/workflows/awstest.yml b/.github/workflows/awstest.yml
index 5fa5d0d0..562e0983 100644
--- a/.github/workflows/awstest.yml
+++ b/.github/workflows/awstest.yml
@@ -23,3 +23,7 @@ jobs:
               "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/scrnaseq/results-test-${{ github.sha }}"
             }
           profiles: test,aws_tower
+      - uses: actions/upload-artifact@v3
+        with:
+          name: Tower debug log file
+          path: tower_action_*.log
diff --git a/.prettierignore b/.prettierignore
index d0e7ae58..eb74a574 100644
--- a/.prettierignore
+++ b/.prettierignore
@@ -1,4 +1,5 @@
 email_template.html
+adaptivecard.json
 .nextflow*
 work/
 data/
diff --git a/CITATION.cff b/CITATION.cff
index 4533e2f2..017666c0 100644
--- a/CITATION.cff
+++ b/CITATION.cff
@@ -13,8 +13,8 @@ authors:
     given-names: Johannes
   - family-names: Wilm
     given-names: Andreas
-  - family-names: Ulysse Garcia
-    given-names: Maxime
+  - family-names: Garcia
+    given-names: Maxime Ulysse
   - family-names: Di Tommaso
     given-names: Paolo
   - family-names: Nahnsen
@@ -39,8 +39,8 @@ prefered-citation:
       given-names: Johannes
     - family-names: Wilm
       given-names: Andreas
-    - family-names: Ulysse Garcia
-      given-names: Maxime
+    - family-names: Garcia
+      given-names: Maxime Ulysse
     - family-names: Di Tommaso
       given-names: Paolo
     - family-names: Nahnsen
diff --git a/LICENSE b/LICENSE
index 989b1dbc..b7fcc0cd 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,6 +1,6 @@
 MIT License
 
-Copyright (c) Peter J Bailey, Alexander Peltzer, Olga Botvinnik
+Copyright (c) Bailey PJ, Botvinnik O, Marques de Almeida F, Peltzer A, Sturm G
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
diff --git a/README.md b/README.md
index 804539ce..3025748a 100644
--- a/README.md
+++ b/README.md
@@ -14,7 +14,7 @@
 
 <!-- TODO nf-core: Write a 1-2 sentence summary of what data the pipeline is for and what it does -->
 
-**nf-core/scrnaseq** is a bioinformatics best-practice analysis pipeline for Pipeline for processing of 10xGenomics single cell rnaseq data.
+**nf-core/scrnaseq** is a bioinformatics best-practice analysis pipeline for Pipeline for processing 10x Genomics single cell rnaseq data.
 
 The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It uses Docker/Singularity containers making installation trivial and results highly reproducible. The [Nextflow DSL2](https://www.nextflow.io/docs/latest/dsl2.html) implementation of this pipeline uses one container per process which makes it much easier to maintain and update software dependencies. Where possible, these processes have been submitted to and installed from [nf-core/modules](https://github.com/nf-core/modules) in order to make them available to all nf-core pipelines, and to everyone within the Nextflow community!
 
@@ -62,7 +62,7 @@ The nf-core/scrnaseq pipeline comes with documentation about the pipeline [usage
 
 ## Credits
 
-nf-core/scrnaseq was originally written by Peter J Bailey, Alexander Peltzer, Olga Botvinnik.
+nf-core/scrnaseq was originally written by Bailey PJ, Botvinnik O, Marques de Almeida F, Peltzer A, Sturm G.
 
 We thank the following people for their extensive assistance in the development of this pipeline:
 
diff --git a/assets/adaptivecard.json b/assets/adaptivecard.json
new file mode 100644
index 00000000..03078a14
--- /dev/null
+++ b/assets/adaptivecard.json
@@ -0,0 +1,67 @@
+{
+    "type": "message",
+    "attachments": [
+        {
+            "contentType": "application/vnd.microsoft.card.adaptive",
+            "contentUrl": null,
+            "content": {
+                "\$schema": "http://adaptivecards.io/schemas/adaptive-card.json",
+                "msteams": {
+                    "width": "Full"
+                },
+                "type": "AdaptiveCard",
+                "version": "1.2",
+                "body": [
+                    {
+                        "type": "TextBlock",
+                        "size": "Large",
+                        "weight": "Bolder",
+                        "color": "<% if (success) { %>Good<% } else { %>Attention<%} %>",
+                        "text": "nf-core/scrnaseq v${version} - ${runName}",
+                        "wrap": true
+                    },
+                    {
+                        "type": "TextBlock",
+                        "spacing": "None",
+                        "text": "Completed at ${dateComplete} (duration: ${duration})",
+                        "isSubtle": true,
+                        "wrap": true
+                    },
+                    {
+                        "type": "TextBlock",
+                        "text": "<% if (success) { %>Pipeline completed successfully!<% } else { %>Pipeline completed with errors. The full error message was: ${errorReport}.<% } %>",
+                        "wrap": true
+                    },
+                    {
+                        "type": "TextBlock",
+                        "text": "The command used to launch the workflow was as follows:",
+                        "wrap": true
+                    },
+                    {
+                        "type": "TextBlock",
+                        "text": "${commandLine}",
+                        "isSubtle": true,
+                        "wrap": true
+                    }
+                ],
+                "actions": [
+                    {
+                        "type": "Action.ShowCard",
+                        "title": "Pipeline Configuration",
+                        "card": {
+                            "type": "AdaptiveCard",
+                            "\$schema": "http://adaptivecards.io/schemas/adaptive-card.json",
+                            "body": [
+                                {
+                                    "type": "FactSet",
+                                    "facts": [<% out << summary.collect{ k,v -> "{\"title\": \"$k\", \"value\" : \"$v\"}"}.join(",\n") %>
+                                    ]
+                                }
+                            ]
+                        }
+                    }
+                ]
+            }
+        }
+    ]
+}
diff --git a/assets/email_template.html b/assets/email_template.html
index b5c9a7b9..2ff8db51 100644
--- a/assets/email_template.html
+++ b/assets/email_template.html
@@ -4,7 +4,7 @@
   <meta http-equiv="X-UA-Compatible" content="IE=edge">
   <meta name="viewport" content="width=device-width, initial-scale=1">
 
-  <meta name="description" content="nf-core/scrnaseq: Pipeline for processing of 10xGenomics single cell rnaseq data">
+  <meta name="description" content="nf-core/scrnaseq: Pipeline for processing 10x Genomics single cell rnaseq data">
   <title>nf-core/scrnaseq Pipeline Report</title>
 </head>
 <body>
diff --git a/assets/methods_description_template.yml b/assets/methods_description_template.yml
new file mode 100644
index 00000000..1e84fd63
--- /dev/null
+++ b/assets/methods_description_template.yml
@@ -0,0 +1,25 @@
+id: "nf-core-scrnaseq-methods-description"
+description: "Suggested text and references to use when describing pipeline usage within the methods section of a publication."
+section_name: "nf-core/scrnaseq Methods Description"
+section_href: "https://github.com/nf-core/scrnaseq"
+plot_type: "html"
+## TODO nf-core: Update the HTML below to your prefered methods description, e.g. add publication citation for this pipeline
+## You inject any metadata in the Nextflow '${workflow}' object
+data: |
+  <h4>Methods</h4>
+  <p>Data was processed using nf-core/scrnaseq v${workflow.manifest.version} ${doi_text} of the nf-core collection of workflows (<a href="https://doi.org/10.1038/s41587-020-0439-x">Ewels <em>et al.</em>, 2020</a>).</p>
+  <p>The pipeline was executed with Nextflow v${workflow.nextflow.version} (<a href="https://doi.org/10.1038/nbt.3820">Di Tommaso <em>et al.</em>, 2017</a>) with the following command:</p>
+  <pre><code>${workflow.commandLine}</code></pre>
+  <h4>References</h4>
+  <ul>
+    <li>Di Tommaso, P., Chatzou, M., Floden, E. W., Barja, P. P., Palumbo, E., & Notredame, C. (2017). Nextflow enables reproducible computational workflows. Nature Biotechnology, 35(4), 316-319. <a href="https://doi.org/10.1038/nbt.3820">https://doi.org/10.1038/nbt.3820</a></li>
+    <li>Ewels, P. A., Peltzer, A., Fillinger, S., Patel, H., Alneberg, J., Wilm, A., Garcia, M. U., Di Tommaso, P., & Nahnsen, S. (2020). The nf-core framework for community-curated bioinformatics pipelines. Nature Biotechnology, 38(3), 276-278. <a href="https://doi.org/10.1038/s41587-020-0439-x">https://doi.org/10.1038/s41587-020-0439-x</a></li>
+  </ul>
+  <div class="alert alert-info">
+    <h5>Notes:</h5>
+    <ul>
+      ${nodoi_text}
+      <li>The command above does not include parameters contained in any configs or profiles that may have been used. Ensure the config file is also uploaded with your publication!</li>
+      <li>You should also cite all software used within this run. Check the "Software Versions" of this report to get version information.</li>
+    </ul>
+  </div>
diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml
index a646be0b..3679a380 100644
--- a/assets/multiqc_config.yml
+++ b/assets/multiqc_config.yml
@@ -3,9 +3,11 @@ report_comment: >
   analysis pipeline. For information about how to interpret these results, please see the
   <a href="https://nf-co.re/scrnaseq" target="_blank">documentation</a>.
 report_section_order:
-  software_versions:
+  "nf-core-scrnaseq-methods-description":
     order: -1000
-  "nf-core-scrnaseq-summary":
+  software_versions:
     order: -1001
+  "nf-core-scrnaseq-summary":
+    order: -1002
 
 export_plots: true
diff --git a/docs/usage.md b/docs/usage.md
index c1b85e7a..8040b626 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -237,6 +237,14 @@ See the main [Nextflow documentation](https://www.nextflow.io/docs/latest/config
 
 If you have any questions or issues please send us a message on [Slack](https://nf-co.re/join/slack) on the [`#configs` channel](https://nfcore.slack.com/channels/configs).
 
+## Azure Resource Requests
+
+To be used with the `azurebatch` profile by specifying the `-profile azurebatch`.
+We recommend providing a compute `params.vm_type` of `Standard_D16_v3` VMs by default but these options can be changed if required.
+
+Note that the choice of VM size depends on your quota and the overall workload during the analysis.
+For a thorough list, please refer the [Azure Sizes for virtual machines in Azure](https://docs.microsoft.com/en-us/azure/virtual-machines/sizes).
+
 ## Running in the background
 
 Nextflow handles job submissions and supervises the running jobs. The Nextflow process must run until the pipeline is finished.
diff --git a/lib/NfcoreTemplate.groovy b/lib/NfcoreTemplate.groovy
index 2fc0a9b9..27feb009 100755
--- a/lib/NfcoreTemplate.groovy
+++ b/lib/NfcoreTemplate.groovy
@@ -145,6 +145,61 @@ class NfcoreTemplate {
         output_tf.withWriter { w -> w << email_txt }
     }
 
+    //
+    // Construct and send adaptive card
+    // https://adaptivecards.io
+    //
+    public static void adaptivecard(workflow, params, summary_params, projectDir, log) {
+        def hook_url = params.hook_url
+
+        def summary = [:]
+        for (group in summary_params.keySet()) {
+            summary << summary_params[group]
+        }
+
+        def misc_fields = [:]
+        misc_fields['start']                                = workflow.start
+        misc_fields['complete']                             = workflow.complete
+        misc_fields['scriptfile']                           = workflow.scriptFile
+        misc_fields['scriptid']                             = workflow.scriptId
+        if (workflow.repository) misc_fields['repository']  = workflow.repository
+        if (workflow.commitId)   misc_fields['commitid']    = workflow.commitId
+        if (workflow.revision)   misc_fields['revision']    = workflow.revision
+        misc_fields['nxf_version']                          = workflow.nextflow.version
+        misc_fields['nxf_build']                            = workflow.nextflow.build
+        misc_fields['nxf_timestamp']                        = workflow.nextflow.timestamp
+
+        def msg_fields = [:]
+        msg_fields['version']      = workflow.manifest.version
+        msg_fields['runName']      = workflow.runName
+        msg_fields['success']      = workflow.success
+        msg_fields['dateComplete'] = workflow.complete
+        msg_fields['duration']     = workflow.duration
+        msg_fields['exitStatus']   = workflow.exitStatus
+        msg_fields['errorMessage'] = (workflow.errorMessage ?: 'None')
+        msg_fields['errorReport']  = (workflow.errorReport ?: 'None')
+        msg_fields['commandLine']  = workflow.commandLine
+        msg_fields['projectDir']   = workflow.projectDir
+        msg_fields['summary']      = summary << misc_fields
+
+        // Render the JSON template
+        def engine       = new groovy.text.GStringTemplateEngine()
+        def hf = new File("$projectDir/assets/adaptivecard.json")
+        def json_template = engine.createTemplate(hf).make(msg_fields)
+        def json_message  = json_template.toString()
+
+        // POST
+        def post = new URL(hook_url).openConnection();
+        post.setRequestMethod("POST")
+        post.setDoOutput(true)
+        post.setRequestProperty("Content-Type", "application/json")
+        post.getOutputStream().write(json_message.getBytes("UTF-8"));
+        def postRC = post.getResponseCode();
+        if (! postRC.equals(200)) {
+            log.warn(post.getErrorStream().getText());
+        }
+    }
+
     //
     // Print pipeline summary on completion
     //
diff --git a/lib/Utils.groovy b/lib/Utils.groovy
old mode 100755
new mode 100644
index 28567bd7..8d030f4e
--- a/lib/Utils.groovy
+++ b/lib/Utils.groovy
@@ -21,19 +21,26 @@ class Utils {
         }
 
         // Check that all channels are present
-        def required_channels = ['conda-forge', 'bioconda', 'defaults']
-        def conda_check_failed = !required_channels.every { ch -> ch in channels }
+        // This channel list is ordered by required channel priority.
+        def required_channels_in_order = ['conda-forge', 'bioconda', 'defaults']
+        def channels_missing = ((required_channels_in_order as Set) - (channels as Set)) as Boolean
 
         // Check that they are in the right order
-        conda_check_failed |= !(channels.indexOf('conda-forge') < channels.indexOf('bioconda'))
-        conda_check_failed |= !(channels.indexOf('bioconda') < channels.indexOf('defaults'))
+        def channel_priority_violation = false
+        def n = required_channels_in_order.size()
+        for (int i = 0; i < n - 1; i++) {
+            channel_priority_violation |= !(channels.indexOf(required_channels_in_order[i]) < channels.indexOf(required_channels_in_order[i+1]))
+        }
 
-        if (conda_check_failed) {
+        if (channels_missing | channel_priority_violation) {
             log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" +
                 "  There is a problem with your Conda configuration!\n\n" +
                 "  You will need to set-up the conda-forge and bioconda channels correctly.\n" +
-                "  Please refer to https://bioconda.github.io/user/install.html#set-up-channels\n" +
-                "  NB: The order of the channels matters!\n" +
+                "  Please refer to https://bioconda.github.io/\n" +
+                "  The observed channel order is \n" +
+                "  ${channels}\n" +
+                "  but the following channel order is required:\n" +
+                "  ${required_channels_in_order}\n" +
                 "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
         }
     }
diff --git a/lib/WorkflowScrnaseq.groovy b/lib/WorkflowScrnaseq.groovy
index db31a702..3efe1b86 100755
--- a/lib/WorkflowScrnaseq.groovy
+++ b/lib/WorkflowScrnaseq.groovy
@@ -2,6 +2,8 @@
 // This file holds several functions specific to the workflow/scrnaseq.nf in the nf-core/scrnaseq pipeline
 //
 
+import groovy.text.SimpleTemplateEngine
+
 class WorkflowScrnaseq {
 
     //
@@ -42,6 +44,23 @@ class WorkflowScrnaseq {
         yaml_file_text        += "data: |\n"
         yaml_file_text        += "${summary_section}"
         return yaml_file_text
+    }
+
+    public static String methodsDescriptionText(run_workflow, mqc_methods_yaml) {
+        // Convert  to a named map so can be used as with familar NXF ${workflow} variable syntax in the MultiQC YML file
+        def meta = [:]
+        meta.workflow = run_workflow.toMap()
+        meta["manifest_map"] = run_workflow.manifest.toMap()
+
+        meta["doi_text"] = meta.manifest_map.doi ? "(doi: <a href=\'https://doi.org/${meta.manifest_map.doi}\'>${meta.manifest_map.doi}</a>)" : ""
+        meta["nodoi_text"] = meta.manifest_map.doi ? "": "<li>If available, make sure to update the text to include the Zenodo DOI of version of the pipeline used. </li>"
+
+        def methods_text = mqc_methods_yaml.text
+
+        def engine =  new SimpleTemplateEngine()
+        def description_html = engine.createTemplate(methods_text).make(meta)
+
+        return description_html
     }//
     // Exit pipeline if incorrect --genome key provided
     //
diff --git a/main.nf b/main.nf
index c2fe7ccf..e2ce158b 100644
--- a/main.nf
+++ b/main.nf
@@ -4,7 +4,8 @@
     nf-core/scrnaseq
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
     Github : https://github.com/nf-core/scrnaseq
-Website: https://nf-co.re/scrnaseq
+
+    Website: https://nf-co.re/scrnaseq
     Slack  : https://nfcore.slack.com/channels/scrnaseq
 ----------------------------------------------------------------------------------------
 */
diff --git a/modules.json b/modules.json
index a865b2cd..4465eece 100644
--- a/modules.json
+++ b/modules.json
@@ -2,20 +2,21 @@
     "name": "nf-core/scrnaseq",
     "homePage": "https://github.com/nf-core/scrnaseq",
     "repos": {
-        "nf-core/modules": {
-            "git_url": "https://github.com/nf-core/modules.git",
+        "https://github.com/nf-core/modules.git": {
             "modules": {
-                "custom/dumpsoftwareversions": {
-                    "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d",
-                    "branch": "master"
-                },
-                "fastqc": {
-                    "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d",
-                    "branch": "master"
-                },
-                "multiqc": {
-                    "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d",
-                    "branch": "master"
+                "nf-core": {
+                    "custom/dumpsoftwareversions": {
+                        "branch": "master",
+                        "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905"
+                    },
+                    "fastqc": {
+                        "branch": "master",
+                        "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905"
+                    },
+                    "multiqc": {
+                        "branch": "master",
+                        "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905"
+                    }
                 }
             }
         }
diff --git a/modules/nf-core/modules/custom/dumpsoftwareversions/main.nf b/modules/nf-core/custom/dumpsoftwareversions/main.nf
similarity index 79%
rename from modules/nf-core/modules/custom/dumpsoftwareversions/main.nf
rename to modules/nf-core/custom/dumpsoftwareversions/main.nf
index 327d5100..cebb6e05 100644
--- a/modules/nf-core/modules/custom/dumpsoftwareversions/main.nf
+++ b/modules/nf-core/custom/dumpsoftwareversions/main.nf
@@ -1,11 +1,11 @@
 process CUSTOM_DUMPSOFTWAREVERSIONS {
-    label 'process_low'
+    label 'process_single'
 
     // Requires `pyyaml` which does not have a dedicated container but is in the MultiQC container
-    conda (params.enable_conda ? "bioconda::multiqc=1.11" : null)
+    conda (params.enable_conda ? 'bioconda::multiqc=1.13' : null)
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/multiqc:1.11--pyhdfd78af_0' :
-        'quay.io/biocontainers/multiqc:1.11--pyhdfd78af_0' }"
+        'https://depot.galaxyproject.org/singularity/multiqc:1.13--pyhdfd78af_0' :
+        'quay.io/biocontainers/multiqc:1.13--pyhdfd78af_0' }"
 
     input:
     path versions
diff --git a/modules/nf-core/modules/custom/dumpsoftwareversions/meta.yml b/modules/nf-core/custom/dumpsoftwareversions/meta.yml
similarity index 100%
rename from modules/nf-core/modules/custom/dumpsoftwareversions/meta.yml
rename to modules/nf-core/custom/dumpsoftwareversions/meta.yml
diff --git a/modules/nf-core/modules/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py b/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py
similarity index 100%
rename from modules/nf-core/modules/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py
rename to modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py
diff --git a/modules/nf-core/modules/fastqc/main.nf b/modules/nf-core/fastqc/main.nf
similarity index 85%
rename from modules/nf-core/modules/fastqc/main.nf
rename to modules/nf-core/fastqc/main.nf
index ed6b8c50..05730368 100644
--- a/modules/nf-core/modules/fastqc/main.nf
+++ b/modules/nf-core/fastqc/main.nf
@@ -44,4 +44,16 @@ process FASTQC {
         END_VERSIONS
         """
     }
+
+    stub:
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    """
+    touch ${prefix}.html
+    touch ${prefix}.zip
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" )
+    END_VERSIONS
+    """
 }
diff --git a/modules/nf-core/modules/fastqc/meta.yml b/modules/nf-core/fastqc/meta.yml
similarity index 100%
rename from modules/nf-core/modules/fastqc/meta.yml
rename to modules/nf-core/fastqc/meta.yml
diff --git a/modules/nf-core/modules/multiqc/main.nf b/modules/nf-core/modules/multiqc/main.nf
deleted file mode 100644
index 1264aac1..00000000
--- a/modules/nf-core/modules/multiqc/main.nf
+++ /dev/null
@@ -1,31 +0,0 @@
-process MULTIQC {
-    label 'process_medium'
-
-    conda (params.enable_conda ? 'bioconda::multiqc=1.12' : null)
-    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/multiqc:1.12--pyhdfd78af_0' :
-        'quay.io/biocontainers/multiqc:1.12--pyhdfd78af_0' }"
-
-    input:
-    path multiqc_files
-
-    output:
-    path "*multiqc_report.html", emit: report
-    path "*_data"              , emit: data
-    path "*_plots"             , optional:true, emit: plots
-    path "versions.yml"        , emit: versions
-
-    when:
-    task.ext.when == null || task.ext.when
-
-    script:
-    def args = task.ext.args ?: ''
-    """
-    multiqc -f $args .
-
-    cat <<-END_VERSIONS > versions.yml
-    "${task.process}":
-        multiqc: \$( multiqc --version | sed -e "s/multiqc, version //g" )
-    END_VERSIONS
-    """
-}
diff --git a/modules/nf-core/multiqc/main.nf b/modules/nf-core/multiqc/main.nf
new file mode 100644
index 00000000..a8159a57
--- /dev/null
+++ b/modules/nf-core/multiqc/main.nf
@@ -0,0 +1,53 @@
+process MULTIQC {
+    label 'process_single'
+
+    conda (params.enable_conda ? 'bioconda::multiqc=1.13' : null)
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/multiqc:1.13--pyhdfd78af_0' :
+        'quay.io/biocontainers/multiqc:1.13--pyhdfd78af_0' }"
+
+    input:
+    path  multiqc_files, stageAs: "?/*"
+    path(multiqc_config)
+    path(extra_multiqc_config)
+    path(multiqc_logo)
+
+    output:
+    path "*multiqc_report.html", emit: report
+    path "*_data"              , emit: data
+    path "*_plots"             , optional:true, emit: plots
+    path "versions.yml"        , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    def config = multiqc_config ? "--config $multiqc_config" : ''
+    def extra_config = extra_multiqc_config ? "--config $extra_multiqc_config" : ''
+    """
+    multiqc \\
+        --force \\
+        $args \\
+        $config \\
+        $extra_config \\
+        .
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        multiqc: \$( multiqc --version | sed -e "s/multiqc, version //g" )
+    END_VERSIONS
+    """
+
+    stub:
+    """
+    touch multiqc_data
+    touch multiqc_plots
+    touch multiqc_report.html
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        multiqc: \$( multiqc --version | sed -e "s/multiqc, version //g" )
+    END_VERSIONS
+    """
+}
diff --git a/modules/nf-core/modules/multiqc/meta.yml b/modules/nf-core/multiqc/meta.yml
similarity index 73%
rename from modules/nf-core/modules/multiqc/meta.yml
rename to modules/nf-core/multiqc/meta.yml
index 6fa891ef..ebc29b27 100644
--- a/modules/nf-core/modules/multiqc/meta.yml
+++ b/modules/nf-core/multiqc/meta.yml
@@ -12,11 +12,25 @@ tools:
       homepage: https://multiqc.info/
       documentation: https://multiqc.info/docs/
       licence: ["GPL-3.0-or-later"]
+
 input:
   - multiqc_files:
       type: file
       description: |
         List of reports / files recognised by MultiQC, for example the html and zip output of FastQC
+  - multiqc_config:
+      type: file
+      description: Optional config yml for MultiQC
+      pattern: "*.{yml,yaml}"
+  - extra_multiqc_config:
+      type: file
+      description: Second optional config yml for MultiQC. Will override common sections in multiqc_config.
+      pattern: "*.{yml,yaml}"
+  - multiqc_logo:
+      type: file
+      description: Optional logo file for MultiQC
+      pattern: "*.{png}"
+
 output:
   - report:
       type: file
@@ -38,3 +52,4 @@ authors:
   - "@abhi18av"
   - "@bunop"
   - "@drpatelh"
+  - "@jfy133"
diff --git a/nextflow.config b/nextflow.config
index a6e37e2a..3c013c11 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -21,7 +21,9 @@ params {
     // MultiQC options
     multiqc_config             = null
     multiqc_title              = null
+    multiqc_logo               = null
     max_multiqc_email_size     = '25.MB'
+    multiqc_methods_description = null
 
     // Boilerplate options
     outdir                     = null
@@ -31,6 +33,7 @@ params {
     email_on_fail              = null
     plaintext_email            = false
     monochrome_logs            = false
+    hook_url                   = null
     help                       = false
     validate_params            = true
     show_hidden_params         = false
@@ -74,7 +77,6 @@ try {
 // }
 
 
-
 profiles {
     debug { process.beforeScript = 'echo $HOSTNAME' }
     conda {
@@ -183,12 +185,13 @@ dag {
 
 manifest {
     name            = 'nf-core/scrnaseq'
-    author          = 'Peter J Bailey, Alexander Peltzer, Olga Botvinnik'
+    author          = 'Bailey PJ, Botvinnik O, Marques de Almeida F, Peltzer A, Sturm G'
     homePage        = 'https://github.com/nf-core/scrnaseq'
-    description     = 'Pipeline for processing of 10xGenomics single cell rnaseq data'
+    description     = 'Pipeline for processing 10x Genomics single cell rnaseq data'
     mainScript      = 'main.nf'
     nextflowVersion = '!>=21.10.3'
     version         = '2.0.1dev'
+    doi             = ''
 }
 
 // Load modules.config for DSL2 module specific options
diff --git a/nextflow_schema.json b/nextflow_schema.json
index 1cc1997c..22c7c3ef 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -2,7 +2,7 @@
     "$schema": "http://json-schema.org/draft-07/schema",
     "$id": "https://raw.githubusercontent.com/nf-core/scrnaseq/master/nextflow_schema.json",
     "title": "nf-core/scrnaseq pipeline parameters",
-    "description": "Pipeline for processing of 10xGenomics single cell rnaseq data",
+    "description": "Pipeline for processing 10x Genomics single cell rnaseq data",
     "type": "object",
     "definitions": {
         "input_output_options": {
@@ -213,12 +213,30 @@
                     "fa_icon": "fas fa-palette",
                     "hidden": true
                 },
+                "hook_url": {
+                    "type": "string",
+                    "description": "Incoming hook URL for messaging service",
+                    "fa_icon": "fas fa-people-group",
+                    "help_text": "Incoming hook URL for messaging service. Currently, only MS Teams is supported.",
+                    "hidden": true
+                },
                 "multiqc_config": {
                     "type": "string",
                     "description": "Custom config file to supply to MultiQC.",
                     "fa_icon": "fas fa-cog",
                     "hidden": true
                 },
+                "multiqc_logo": {
+                    "type": "string",
+                    "description": "Custom logo file to supply to MultiQC. File name must also be set in the MultiQC config file",
+                    "fa_icon": "fas fa-image",
+                    "hidden": true
+                },
+                "multiqc_methods_description": {
+                    "type": "string",
+                    "description": "Custom MultiQC yaml file containing HTML including a methods description.",
+                    "fa_icon": "fas fa-cog"
+                },
                 "tracedir": {
                     "type": "string",
                     "description": "Directory to keep pipeline Nextflow logs and reports.",
diff --git a/workflows/scrnaseq.nf b/workflows/scrnaseq.nf
index 69888824..f37c6f89 100644
--- a/workflows/scrnaseq.nf
+++ b/workflows/scrnaseq.nf
@@ -23,8 +23,10 @@ if (params.input) { ch_input = file(params.input) } else { exit 1, 'Input sample
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 */
 
-ch_multiqc_config        = file("$projectDir/assets/multiqc_config.yml", checkIfExists: true)
-ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath(params.multiqc_config) : Channel.empty()
+ch_multiqc_config          = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true)
+ch_multiqc_custom_config   = params.multiqc_config ? Channel.fromPath( params.multiqc_config, checkIfExists: true ) : Channel.empty()
+ch_multiqc_logo            = params.multiqc_logo   ? Channel.fromPath( params.multiqc_logo, checkIfExists: true ) : Channel.empty()
+ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true)
 
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -46,9 +48,9 @@ include { INPUT_CHECK } from '../subworkflows/local/input_check'
 //
 // MODULE: Installed directly from nf-core/modules
 //
-include { FASTQC                      } from '../modules/nf-core/modules/fastqc/main'
-include { MULTIQC                     } from '../modules/nf-core/modules/multiqc/main'
-include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/modules/custom/dumpsoftwareversions/main'
+include { FASTQC                      } from '../modules/nf-core/fastqc/main'
+include { MULTIQC                     } from '../modules/nf-core/multiqc/main'
+include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main'
 
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -89,15 +91,20 @@ workflow SCRNASEQ {
     workflow_summary    = WorkflowScrnaseq.paramsSummaryMultiqc(workflow, summary_params)
     ch_workflow_summary = Channel.value(workflow_summary)
 
+    methods_description    = WorkflowScrnaseq.methodsDescriptionText(workflow, ch_multiqc_custom_methods_description)
+    ch_methods_description = Channel.value(methods_description)
+
     ch_multiqc_files = Channel.empty()
-    ch_multiqc_files = ch_multiqc_files.mix(Channel.from(ch_multiqc_config))
-    ch_multiqc_files = ch_multiqc_files.mix(ch_multiqc_custom_config.collect().ifEmpty([]))
     ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml'))
+    ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml'))
     ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect())
     ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([]))
 
     MULTIQC (
-        ch_multiqc_files.collect()
+        ch_multiqc_files.collect(),
+        ch_multiqc_config.collect().ifEmpty([]),
+        ch_multiqc_custom_config.collect().ifEmpty([]),
+        ch_multiqc_logo.collect().ifEmpty([])
     )
     multiqc_report = MULTIQC.out.report.toList()
     ch_versions    = ch_versions.mix(MULTIQC.out.versions)
@@ -114,6 +121,9 @@ workflow.onComplete {
         NfcoreTemplate.email(workflow, params, summary_params, projectDir, log, multiqc_report)
     }
     NfcoreTemplate.summary(workflow, params, log)
+    if (params.hook_url) {
+        NfcoreTemplate.adaptivecard(workflow, params, summary_params, projectDir, log)
+    }
 }
 
 /*

From 1600e3bd8985b43b4ca474c6077ef3cbcaf417eb Mon Sep 17 00:00:00 2001
From: Felipe Marques de Almeida <felipemarques89@gmail.com>
Date: Wed, 5 Oct 2022 07:16:47 +0000
Subject: [PATCH 139/165] add new command to schema

---
 nextflow_schema.json | 42 +++++++++++++++++++++++++++++++++++++-----
 1 file changed, 37 insertions(+), 5 deletions(-)

diff --git a/nextflow_schema.json b/nextflow_schema.json
index 91757ae4..c8f0608f 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -10,7 +10,10 @@
             "type": "object",
             "fa_icon": "fas fa-terminal",
             "description": "Define where the pipeline should find input data and save output data.",
-            "required": ["input", "outdir"],
+            "required": [
+                "input",
+                "outdir"
+            ],
             "properties": {
                 "input": {
                     "type": "string",
@@ -56,14 +59,24 @@
                     "default": "alevin",
                     "help_text": "The workflow can handle three types of methods:\n\n- Kallisto/Bustools\n- Salmon Alevin + AlevinQC\n- STARsolo\n\nTo choose which one to use, please specify either `alevin`, `star` or `kallisto` as a parameter option for `--aligner`. By default, the pipeline runs the `alevin` option. Note that specifying another aligner option also requires choosing appropriate parameters (see below) for the selected option.",
                     "fa_icon": "fas fa-align-center",
-                    "enum": ["kallisto", "star", "alevin", "cellranger"]
+                    "enum": [
+                        "kallisto",
+                        "star",
+                        "alevin",
+                        "cellranger"
+                    ]
                 },
                 "protocol": {
                     "type": "string",
                     "description": "The protocol that was used to generate the single cell data, e.g. 10XV2 (default).",
                     "default": "10XV2",
                     "fa_icon": "fas fa-cogs",
-                    "enum": ["10XV3", "10XV2", "10XV1", "dropseq"]
+                    "enum": [
+                        "10XV3",
+                        "10XV2",
+                        "10XV1",
+                        "dropseq"
+                    ]
                 }
             },
             "fa_icon": "fas fa-terminal"
@@ -137,6 +150,12 @@
                     "description": "Path to transcript to gene mapping file. This allows the specification of a transcript to gene mapping file for Salmon Alevin and AlevinQC.",
                     "help_text": "> This is not the same as the `kallisto_gene_map` parameter down below and is only used by the Salmon Alevin workflow.",
                     "fa_icon": "fas fa-map-marked-alt"
+                },
+                "simpleaf_rlen": {
+                    "type": "integer",
+                    "default": 91,
+                    "description": "It is the target read length the index will be built for, using simpleaf.",
+                    "fa_icon": "fas fa-map-marked-alt"
                 }
             }
         },
@@ -191,7 +210,13 @@
                     "default": "standard",
                     "description": "Type of workflow. Use `lamanno` for RNA velocity based on La Manno et al. 2018 logic. Use `nucleus` for RNA velocity on single-nucleus RNA-seq reads. Use `kite` for feature barcoding. Use `kite: 10xFB` for 10x Genomics Feature Barcoding technology. (default: standard)",
                     "fa_icon": "fas fa-fish",
-                    "enum": ["standard", "lamanno", "nucleus", "kite", "kite: 10xFB"]
+                    "enum": [
+                        "standard",
+                        "lamanno",
+                        "nucleus",
+                        "kite",
+                        "kite: 10xFB"
+                    ]
                 }
             }
         },
@@ -309,7 +334,14 @@
                     "description": "Method used to save pipeline results to output directory.",
                     "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.",
                     "fa_icon": "fas fa-copy",
-                    "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"],
+                    "enum": [
+                        "symlink",
+                        "rellink",
+                        "link",
+                        "copy",
+                        "copyNoFollow",
+                        "move"
+                    ],
                     "hidden": true
                 },
                 "email_on_fail": {

From ba5228fe7c283bc17d362593f5b3390ecf01ac10 Mon Sep 17 00:00:00 2001
From: nf-core-bot <core@nf-co.re>
Date: Wed, 5 Oct 2022 08:11:06 +0000
Subject: [PATCH 140/165] [automated] Fix linting with Prettier

---
 nextflow_schema.json | 36 +++++-------------------------------
 1 file changed, 5 insertions(+), 31 deletions(-)

diff --git a/nextflow_schema.json b/nextflow_schema.json
index c8f0608f..c6621d70 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -10,10 +10,7 @@
             "type": "object",
             "fa_icon": "fas fa-terminal",
             "description": "Define where the pipeline should find input data and save output data.",
-            "required": [
-                "input",
-                "outdir"
-            ],
+            "required": ["input", "outdir"],
             "properties": {
                 "input": {
                     "type": "string",
@@ -59,24 +56,14 @@
                     "default": "alevin",
                     "help_text": "The workflow can handle three types of methods:\n\n- Kallisto/Bustools\n- Salmon Alevin + AlevinQC\n- STARsolo\n\nTo choose which one to use, please specify either `alevin`, `star` or `kallisto` as a parameter option for `--aligner`. By default, the pipeline runs the `alevin` option. Note that specifying another aligner option also requires choosing appropriate parameters (see below) for the selected option.",
                     "fa_icon": "fas fa-align-center",
-                    "enum": [
-                        "kallisto",
-                        "star",
-                        "alevin",
-                        "cellranger"
-                    ]
+                    "enum": ["kallisto", "star", "alevin", "cellranger"]
                 },
                 "protocol": {
                     "type": "string",
                     "description": "The protocol that was used to generate the single cell data, e.g. 10XV2 (default).",
                     "default": "10XV2",
                     "fa_icon": "fas fa-cogs",
-                    "enum": [
-                        "10XV3",
-                        "10XV2",
-                        "10XV1",
-                        "dropseq"
-                    ]
+                    "enum": ["10XV3", "10XV2", "10XV1", "dropseq"]
                 }
             },
             "fa_icon": "fas fa-terminal"
@@ -210,13 +197,7 @@
                     "default": "standard",
                     "description": "Type of workflow. Use `lamanno` for RNA velocity based on La Manno et al. 2018 logic. Use `nucleus` for RNA velocity on single-nucleus RNA-seq reads. Use `kite` for feature barcoding. Use `kite: 10xFB` for 10x Genomics Feature Barcoding technology. (default: standard)",
                     "fa_icon": "fas fa-fish",
-                    "enum": [
-                        "standard",
-                        "lamanno",
-                        "nucleus",
-                        "kite",
-                        "kite: 10xFB"
-                    ]
+                    "enum": ["standard", "lamanno", "nucleus", "kite", "kite: 10xFB"]
                 }
             }
         },
@@ -334,14 +315,7 @@
                     "description": "Method used to save pipeline results to output directory.",
                     "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.",
                     "fa_icon": "fas fa-copy",
-                    "enum": [
-                        "symlink",
-                        "rellink",
-                        "link",
-                        "copy",
-                        "copyNoFollow",
-                        "move"
-                    ],
+                    "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"],
                     "hidden": true
                 },
                 "email_on_fail": {

From 7315db5dcb069fbd54313bf756f32641cfa27046 Mon Sep 17 00:00:00 2001
From: Alexander Peltzer <alex.peltzer@gmail.com>
Date: Wed, 5 Oct 2022 08:18:24 +0000
Subject: [PATCH 141/165] Fix black linting

---
 bin/cellranger_mtx_to_h5ad.py | 13 ++++---
 bin/check_samplesheet.py      | 31 ++++++++++++----
 bin/concat_h5ad.py            | 25 +++++++++----
 bin/mtx_to_h5ad.py            | 22 +++++++++--
 bin/t2g.py                    | 70 +++++++++++++++++++----------------
 5 files changed, 106 insertions(+), 55 deletions(-)

diff --git a/bin/cellranger_mtx_to_h5ad.py b/bin/cellranger_mtx_to_h5ad.py
index e8eb5b23..ca21f50d 100755
--- a/bin/cellranger_mtx_to_h5ad.py
+++ b/bin/cellranger_mtx_to_h5ad.py
@@ -2,7 +2,8 @@
 import scanpy as sc
 import argparse
 
-def mtx_to_adata( mtx_h5: str, sample: str, verbose: bool = False ):
+
+def mtx_to_adata(mtx_h5: str, sample: str, verbose: bool = False):
 
     if verbose:
         print("Reading in {}".format(mtx_h5))
@@ -19,10 +20,12 @@ def mtx_to_adata( mtx_h5: str, sample: str, verbose: bool = False ):
 
     parser = argparse.ArgumentParser(description="Converts mtx output to h5ad.")
 
-    parser.add_argument("-m", "--mtx",     dest="mtx",     help="Path to mtx h5 file."                   )
-    parser.add_argument("-v", "--verbose", dest="verbose", help="Toggle verbose messages", default=False )
-    parser.add_argument("-s", "--sample",  dest="sample",  help="Sample name"                            )
-    parser.add_argument("-o", "--out",     dest="out",     help="Output path."                           )
+    parser.add_argument("-m", "--mtx", dest="mtx", help="Path to mtx h5 file.")
+    parser.add_argument(
+        "-v", "--verbose", dest="verbose", help="Toggle verbose messages", default=False
+    )
+    parser.add_argument("-s", "--sample", dest="sample", help="Sample name")
+    parser.add_argument("-o", "--out", dest="out", help="Output path.")
 
     args = vars(parser.parse_args())
 
diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py
index d98bdaa3..4e72568f 100755
--- a/bin/check_samplesheet.py
+++ b/bin/check_samplesheet.py
@@ -86,7 +86,9 @@ def _validate_sample(self, row):
 
     def _validate_first(self, row):
         """Assert that the first FASTQ entry is non-empty and has the right format."""
-        assert len(row[self._first_col]) > 0, "At least the first FASTQ file is required."
+        assert (
+            len(row[self._first_col]) > 0
+        ), "At least the first FASTQ file is required."
         self._validate_fastq_format(row[self._first_col])
 
     def _validate_second(self, row):
@@ -99,7 +101,8 @@ def _validate_pair(self, row):
         if row[self._first_col] and row[self._second_col]:
             row[self._single_col] = False
             assert (
-                Path(row[self._first_col]).suffixes[-2:] == Path(row[self._second_col]).suffixes[-2:]
+                Path(row[self._first_col]).suffixes[-2:]
+                == Path(row[self._second_col]).suffixes[-2:]
             ), "FASTQ pairs must have the same file extensions."
         else:
             row[self._single_col] = True
@@ -119,7 +122,9 @@ def validate_unique_samples(self):
         FASTQ file combination exists.
 
         """
-        assert len(self._seen) == len(self.modified), "The pair of sample name and FASTQ must be unique."
+        assert len(self._seen) == len(
+            self.modified
+        ), "The pair of sample name and FASTQ must be unique."
         if len({pair[0] for pair in self._seen}) < len(self._seen):
             counts = Counter(pair[0] for pair in self._seen)
             seen = Counter()
@@ -200,7 +205,11 @@ def check_samplesheet(file_in, file_out):
         HEADER = ["sample", "fastq_1", "fastq_2"]
         header = [x.strip('"') for x in fin.readline().strip().split(",")]
         if header[: len(HEADER)] != HEADER:
-            print("ERROR: Please check samplesheet header -> {} != {}".format(",".join(header), ",".join(HEADER)))
+            print(
+                "ERROR: Please check samplesheet header -> {} != {}".format(
+                    ",".join(header), ",".join(HEADER)
+                )
+            )
             sys.exit(1)
 
         ## Check sample entries
@@ -217,7 +226,9 @@ def check_samplesheet(file_in, file_out):
             num_cols = len([x for x in lspl if x])
             if num_cols < MIN_COLS:
                 print_error(
-                    "Invalid number of populated columns (minimum = {})!".format(MIN_COLS),
+                    "Invalid number of populated columns (minimum = {})!".format(
+                        MIN_COLS
+                    ),
                     "Line",
                     line,
                 )
@@ -266,8 +277,14 @@ def check_samplesheet(file_in, file_out):
             for sample in sorted(sample_mapping_dict.keys()):
 
                 ## Check that multiple runs of the same sample are of the same datatype
-                if not all(x[0] == sample_mapping_dict[sample][0][0] for x in sample_mapping_dict[sample]):
-                    print_error("Multiple runs of a sample must be of the same datatype!", "Sample: {}".format(sample))
+                if not all(
+                    x[0] == sample_mapping_dict[sample][0][0]
+                    for x in sample_mapping_dict[sample]
+                ):
+                    print_error(
+                        "Multiple runs of a sample must be of the same datatype!",
+                        "Sample: {}".format(sample),
+                    )
 
                 for idx, val in enumerate(sample_mapping_dict[sample]):
                     fout.write(",".join(["{}".format(sample)] + val) + "\n")
diff --git a/bin/concat_h5ad.py b/bin/concat_h5ad.py
index 29d0037a..5d235eac 100755
--- a/bin/concat_h5ad.py
+++ b/bin/concat_h5ad.py
@@ -3,6 +3,7 @@
 from pathlib import Path
 import argparse
 
+
 def read_samplesheet(samplesheet):
     df = pd.read_csv(samplesheet)
     df.set_index("sample")
@@ -12,15 +13,23 @@ def read_samplesheet(samplesheet):
     # only keep unique values using set()
     df = df.groupby(["sample"]).agg(lambda column: ",".join(set(column)))
 
-    return(df)
+    return df
+
 
 if __name__ == "__main__":
 
-    parser = argparse.ArgumentParser(description="Concatenates h5ad files and merge metadata from samplesheet")
+    parser = argparse.ArgumentParser(
+        description="Concatenates h5ad files and merge metadata from samplesheet"
+    )
 
-    parser.add_argument("-i", "--input",  dest="input",  help="Path to samplesheet.csv")
-    parser.add_argument("-o", "--out",    dest="out",    help="Output path.")
-    parser.add_argument("-s", "--suffix", dest="suffix", help="Suffix of matrices to remove and get sample name")
+    parser.add_argument("-i", "--input", dest="input", help="Path to samplesheet.csv")
+    parser.add_argument("-o", "--out", dest="out", help="Output path.")
+    parser.add_argument(
+        "-s",
+        "--suffix",
+        dest="suffix",
+        help="Suffix of matrices to remove and get sample name",
+    )
 
     args = vars(parser.parse_args())
 
@@ -29,8 +38,8 @@ def read_samplesheet(samplesheet):
 
     # find all h5ad and append to dict
     dict_of_h5ad = {
-            str(path).replace(args["suffix"], ""): sc.read_h5ad(path)
-            for path in Path(".").rglob('*.h5ad')
+        str(path).replace(args["suffix"], ""): sc.read_h5ad(path)
+        for path in Path(".").rglob("*.h5ad")
     }
 
     # concat h5ad files
@@ -40,4 +49,4 @@ def read_samplesheet(samplesheet):
     adata.obs = adata.obs.join(df_samplesheet, on="sample")
     adata.write_h5ad(args["out"], compression="gzip")
 
-    print("Wrote h5ad file to {}".format(args["out"]))
\ No newline at end of file
+    print("Wrote h5ad file to {}".format(args["out"]))
diff --git a/bin/mtx_to_h5ad.py b/bin/mtx_to_h5ad.py
index 2885886e..9d3346a9 100755
--- a/bin/mtx_to_h5ad.py
+++ b/bin/mtx_to_h5ad.py
@@ -5,14 +5,21 @@
 
 
 def mtx_to_adata(
-    mtx_file: str, barcode_file: str, feature_file: str, sample: str, aligner: str, verbose: bool = False
+    mtx_file: str,
+    barcode_file: str,
+    feature_file: str,
+    sample: str,
+    aligner: str,
+    verbose: bool = False,
 ):
 
     if verbose:
         print("Reading in {}".format(mtx_file))
 
     adata = sc.read_mtx(mtx_file)
-    if aligner == 'star': # for some reason star matrix comes transposed and doesn't fit when values are appended directly
+    if (
+        aligner == "star"
+    ):  # for some reason star matrix comes transposed and doesn't fit when values are appended directly
         adata = adata.transpose()
     adata.obs_names = pd.read_csv(barcode_file, header=None, sep="\t")[0].values
     adata.var_names = pd.read_csv(feature_file, header=None, sep="\t")[0].values
@@ -33,12 +40,19 @@ def mtx_to_adata(
     parser.add_argument("-b", "--barcode", dest="barcode", help="Path to barcode file.")
     parser.add_argument("-s", "--sample", dest="sample", help="Sample name")
     parser.add_argument("-o", "--out", dest="out", help="Output path.")
-    parser.add_argument("-a", "--aligner", dest="aligner", help="Which aligner has been used?")
+    parser.add_argument(
+        "-a", "--aligner", dest="aligner", help="Which aligner has been used?"
+    )
 
     args = vars(parser.parse_args())
 
     adata = mtx_to_adata(
-        args["mtx"], args["barcode"], args["feature"], args["sample"], args["aligner"],verbose=args["verbose"]
+        args["mtx"],
+        args["barcode"],
+        args["feature"],
+        args["sample"],
+        args["aligner"],
+        verbose=args["verbose"],
     )
 
     adata.write_h5ad(args["out"], compression="gzip")
diff --git a/bin/t2g.py b/bin/t2g.py
index 6419dd1d..5daf3df5 100755
--- a/bin/t2g.py
+++ b/bin/t2g.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python
-#This was downloaded on 2019-06-23 from https://github.com/bustools/getting_started/releases/
-#All credit goes to the original authors from the Kallisto/BUStools team!
+# This was downloaded on 2019-06-23 from https://github.com/bustools/getting_started/releases/
+# All credit goes to the original authors from the Kallisto/BUStools team!
 # BSD 2-Clause License
 #
 # Copyright (c) 2017, Nicolas Bray, Harold Pimentel, Páll Melsted and Lior Pachter
@@ -29,43 +29,43 @@
 
 import sys, argparse
 
-def create_transcript_list(input, use_name = True, use_version = False):
+
+def create_transcript_list(input, use_name=True, use_version=False):
     r = {}
     for line in input:
-        if len(line) == 0 or line[0] == '#':
+        if len(line) == 0 or line[0] == "#":
             continue
-        l = line.strip().split('\t')
-        if l[2] == 'transcript':
+        l = line.strip().split("\t")
+        if l[2] == "transcript":
             info = l[8]
             d = {}
-            for x in info.split('; '):
+            for x in info.split("; "):
                 x = x.strip()
-                p = x.find(' ')
+                p = x.find(" ")
                 if p == -1:
                     continue
                 k = x[:p]
-                p = x.find('"',p)
-                p2 = x.find('"',p+1)
-                v = x[p+1:p2]
+                p = x.find('"', p)
+                p2 = x.find('"', p + 1)
+                v = x[p + 1 : p2]
                 d[k] = v
 
-
-            if 'transcript_id' not in d or 'gene_id' not in d:
+            if "transcript_id" not in d or "gene_id" not in d:
                 continue
 
-            tid = d['transcript_id'].split(".")[0]
-            gid = d['gene_id'].split(".")[0]
+            tid = d["transcript_id"].split(".")[0]
+            gid = d["gene_id"].split(".")[0]
             if use_version:
-                if 'transcript_version' not in d or 'gene_version' not in d:
+                if "transcript_version" not in d or "gene_version" not in d:
                     continue
 
-                tid += '.' + d['transcript_version']
-                gid += '.' + d['gene_version']
+                tid += "." + d["transcript_version"]
+                gid += "." + d["gene_version"]
             gname = None
             if use_name:
-                if 'gene_name' not in d:
+                if "gene_name" not in d:
                     continue
-                gname = d['gene_name']
+                gname = d["gene_name"]
 
             if tid in r:
                 continue
@@ -74,26 +74,34 @@ def create_transcript_list(input, use_name = True, use_version = False):
     return r
 
 
-
-def print_output(output, r, use_name = True):
+def print_output(output, r, use_name=True):
     for tid in r:
         if use_name:
-            output.write("%s\t%s\t%s\n"%(tid, r[tid][0], r[tid][1]))
+            output.write("%s\t%s\t%s\n" % (tid, r[tid][0], r[tid][1]))
         else:
-            output.write("%s\t%s\n"%(tid, r[tid][0]))
+            output.write("%s\t%s\n" % (tid, r[tid][0]))
 
 
 if __name__ == "__main__":
 
-
-    parser = argparse.ArgumentParser(add_help=True, description='Creates transcript to gene info from GTF files\nreads from standard input and writes to standard output')
-    parser.add_argument('--use_version', '-v', action='store_true', help='Use version numbers in transcript and gene ids')
-    parser.add_argument('--skip_gene_names', '-s', action='store_true', help='Do not output gene names')
+    parser = argparse.ArgumentParser(
+        add_help=True,
+        description="Creates transcript to gene info from GTF files\nreads from standard input and writes to standard output",
+    )
+    parser.add_argument(
+        "--use_version",
+        "-v",
+        action="store_true",
+        help="Use version numbers in transcript and gene ids",
+    )
+    parser.add_argument(
+        "--skip_gene_names", "-s", action="store_true", help="Do not output gene names"
+    )
     args = parser.parse_args()
 
-
-
     input = sys.stdin
-    r = create_transcript_list(input, use_name = not args.skip_gene_names, use_version = args.use_version)
+    r = create_transcript_list(
+        input, use_name=not args.skip_gene_names, use_version=args.use_version
+    )
     output = sys.stdout
     print_output(output, r)

From 9aec18c5532cbb774ffdc44abdaec40fdef85343 Mon Sep 17 00:00:00 2001
From: Felipe Marques de Almeida <felipemarques89@gmail.com>
Date: Wed, 5 Oct 2022 11:01:08 +0200
Subject: [PATCH 142/165] Update conf/modules.config

Co-authored-by: Gregor Sturm <mail@gregor-sturm.de>
---
 conf/modules.config | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/conf/modules.config b/conf/modules.config
index 2b05f1e7..197e8266 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -75,7 +75,7 @@ if (params.aligner == "alevin") {
                 path: { "${params.outdir}/${params.aligner}" },
                 mode: params.publish_dir_mode
             ]
-            ext.args = "--rlen ${params.simpleaf_rlen}"
+            ext.args = { "--rlen ${params.simpleaf_rlen}" }
         }
         withName: 'SIMPLEAF_QUANT' {
             publishDir = [

From 11a8de57b3fa6240088c4d08a24c510b622eae34 Mon Sep 17 00:00:00 2001
From: Alexander Peltzer <alex.peltzer@gmail.com>
Date: Wed, 5 Oct 2022 09:44:18 +0000
Subject: [PATCH 143/165] Python 3.7 please

---
 .github/workflows/linting.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml
index 77358dee..f8fa2595 100644
--- a/.github/workflows/linting.yml
+++ b/.github/workflows/linting.yml
@@ -50,7 +50,7 @@ jobs:
 
       - uses: actions/setup-python@v3
         with:
-          python-version: "3.6"
+          python-version: "3.7"
           architecture: "x64"
 
       - name: Install dependencies

From e7b79e2355e57f5fe598f815da2ecf55696728d6 Mon Sep 17 00:00:00 2001
From: Alexander Peltzer <alex.peltzer@gmail.com>
Date: Wed, 5 Oct 2022 11:19:02 +0000
Subject: [PATCH 144/165] Fixed black linting

---
 bin/cellranger_mtx_to_h5ad.py |  4 +---
 bin/check_samplesheet.py      | 26 ++++++--------------------
 bin/concat_h5ad.py            |  9 ++-------
 bin/mtx_to_h5ad.py            |  8 ++------
 bin/t2g.py                    |  8 ++------
 5 files changed, 13 insertions(+), 42 deletions(-)

diff --git a/bin/cellranger_mtx_to_h5ad.py b/bin/cellranger_mtx_to_h5ad.py
index ca21f50d..84305fa3 100755
--- a/bin/cellranger_mtx_to_h5ad.py
+++ b/bin/cellranger_mtx_to_h5ad.py
@@ -21,9 +21,7 @@ def mtx_to_adata(mtx_h5: str, sample: str, verbose: bool = False):
     parser = argparse.ArgumentParser(description="Converts mtx output to h5ad.")
 
     parser.add_argument("-m", "--mtx", dest="mtx", help="Path to mtx h5 file.")
-    parser.add_argument(
-        "-v", "--verbose", dest="verbose", help="Toggle verbose messages", default=False
-    )
+    parser.add_argument("-v", "--verbose", dest="verbose", help="Toggle verbose messages", default=False)
     parser.add_argument("-s", "--sample", dest="sample", help="Sample name")
     parser.add_argument("-o", "--out", dest="out", help="Output path.")
 
diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py
index 9f6a37c3..1e35db6f 100755
--- a/bin/check_samplesheet.py
+++ b/bin/check_samplesheet.py
@@ -86,9 +86,7 @@ def _validate_sample(self, row):
 
     def _validate_first(self, row):
         """Assert that the first FASTQ entry is non-empty and has the right format."""
-        assert (
-            len(row[self._first_col]) > 0
-        ), "At least the first FASTQ file is required."
+        assert len(row[self._first_col]) > 0, "At least the first FASTQ file is required."
         self._validate_fastq_format(row[self._first_col])
 
     def _validate_second(self, row):
@@ -101,8 +99,7 @@ def _validate_pair(self, row):
         if row[self._first_col] and row[self._second_col]:
             row[self._single_col] = False
             assert (
-                Path(row[self._first_col]).suffixes[-2:]
-                == Path(row[self._second_col]).suffixes[-2:]
+                Path(row[self._first_col]).suffixes[-2:] == Path(row[self._second_col]).suffixes[-2:]
             ), "FASTQ pairs must have the same file extensions."
         else:
             row[self._single_col] = True
@@ -123,9 +120,7 @@ def validate_unique_samples(self):
         number of times the same sample exist, but with different FASTQ files, e.g., multiple runs per experiment.
 
         """
-        assert len(self._seen) == len(
-            self.modified
-        ), "The pair of sample name and FASTQ must be unique."
+        assert len(self._seen) == len(self.modified), "The pair of sample name and FASTQ must be unique."
         if len({pair[0] for pair in self._seen}) < len(self._seen):
             counts = Counter(pair[0] for pair in self._seen)
             seen = Counter()
@@ -206,11 +201,7 @@ def check_samplesheet(file_in, file_out):
         HEADER = ["sample", "fastq_1", "fastq_2"]
         header = [x.strip('"') for x in fin.readline().strip().split(",")]
         if header[: len(HEADER)] != HEADER:
-            print(
-                "ERROR: Please check samplesheet header -> {} != {}".format(
-                    ",".join(header), ",".join(HEADER)
-                )
-            )
+            print("ERROR: Please check samplesheet header -> {} != {}".format(",".join(header), ",".join(HEADER)))
             sys.exit(1)
 
         ## Check sample entries
@@ -227,9 +218,7 @@ def check_samplesheet(file_in, file_out):
             num_cols = len([x for x in lspl if x])
             if num_cols < MIN_COLS:
                 print_error(
-                    "Invalid number of populated columns (minimum = {})!".format(
-                        MIN_COLS
-                    ),
+                    "Invalid number of populated columns (minimum = {})!".format(MIN_COLS),
                     "Line",
                     line,
                 )
@@ -278,10 +267,7 @@ def check_samplesheet(file_in, file_out):
             for sample in sorted(sample_mapping_dict.keys()):
 
                 ## Check that multiple runs of the same sample are of the same datatype
-                if not all(
-                    x[0] == sample_mapping_dict[sample][0][0]
-                    for x in sample_mapping_dict[sample]
-                ):
+                if not all(x[0] == sample_mapping_dict[sample][0][0] for x in sample_mapping_dict[sample]):
                     print_error(
                         "Multiple runs of a sample must be of the same datatype!",
                         "Sample: {}".format(sample),
diff --git a/bin/concat_h5ad.py b/bin/concat_h5ad.py
index 5d235eac..9be60b13 100755
--- a/bin/concat_h5ad.py
+++ b/bin/concat_h5ad.py
@@ -18,9 +18,7 @@ def read_samplesheet(samplesheet):
 
 if __name__ == "__main__":
 
-    parser = argparse.ArgumentParser(
-        description="Concatenates h5ad files and merge metadata from samplesheet"
-    )
+    parser = argparse.ArgumentParser(description="Concatenates h5ad files and merge metadata from samplesheet")
 
     parser.add_argument("-i", "--input", dest="input", help="Path to samplesheet.csv")
     parser.add_argument("-o", "--out", dest="out", help="Output path.")
@@ -37,10 +35,7 @@ def read_samplesheet(samplesheet):
     df_samplesheet = read_samplesheet(args["input"])
 
     # find all h5ad and append to dict
-    dict_of_h5ad = {
-        str(path).replace(args["suffix"], ""): sc.read_h5ad(path)
-        for path in Path(".").rglob("*.h5ad")
-    }
+    dict_of_h5ad = {str(path).replace(args["suffix"], ""): sc.read_h5ad(path) for path in Path(".").rglob("*.h5ad")}
 
     # concat h5ad files
     adata = ad.concat(dict_of_h5ad, label="sample", merge="unique", index_unique="_")
diff --git a/bin/mtx_to_h5ad.py b/bin/mtx_to_h5ad.py
index 9d3346a9..1e79aa22 100755
--- a/bin/mtx_to_h5ad.py
+++ b/bin/mtx_to_h5ad.py
@@ -33,16 +33,12 @@ def mtx_to_adata(
     parser = argparse.ArgumentParser(description="Converts mtx output to h5ad.")
 
     parser.add_argument("-m", "--mtx", dest="mtx", help="Path to mtx file.")
-    parser.add_argument(
-        "-v", "--verbose", dest="verbose", help="Toggle verbose messages", default=False
-    )
+    parser.add_argument("-v", "--verbose", dest="verbose", help="Toggle verbose messages", default=False)
     parser.add_argument("-f", "--feature", dest="feature", help="Path to feature file.")
     parser.add_argument("-b", "--barcode", dest="barcode", help="Path to barcode file.")
     parser.add_argument("-s", "--sample", dest="sample", help="Sample name")
     parser.add_argument("-o", "--out", dest="out", help="Output path.")
-    parser.add_argument(
-        "-a", "--aligner", dest="aligner", help="Which aligner has been used?"
-    )
+    parser.add_argument("-a", "--aligner", dest="aligner", help="Which aligner has been used?")
 
     args = vars(parser.parse_args())
 
diff --git a/bin/t2g.py b/bin/t2g.py
index 5daf3df5..efa9f0f7 100755
--- a/bin/t2g.py
+++ b/bin/t2g.py
@@ -94,14 +94,10 @@ def print_output(output, r, use_name=True):
         action="store_true",
         help="Use version numbers in transcript and gene ids",
     )
-    parser.add_argument(
-        "--skip_gene_names", "-s", action="store_true", help="Do not output gene names"
-    )
+    parser.add_argument("--skip_gene_names", "-s", action="store_true", help="Do not output gene names")
     args = parser.parse_args()
 
     input = sys.stdin
-    r = create_transcript_list(
-        input, use_name=not args.skip_gene_names, use_version=args.use_version
-    )
+    r = create_transcript_list(input, use_name=not args.skip_gene_names, use_version=args.use_version)
     output = sys.stdout
     print_output(output, r)

From 5c9c9ade5346917f62f1acb7bec37a57a67d8f26 Mon Sep 17 00:00:00 2001
From: Alexander Peltzer <alex.peltzer@gmail.com>
Date: Wed, 5 Oct 2022 11:31:54 +0000
Subject: [PATCH 145/165] Fix linting, new module structure

---
 modules.json                                  |  95 ++++++++--------
 modules/local/simpleaf_index.nf               |   2 +-
 modules/local/simpleaf_quant.nf               |   2 +-
 .../{modules => }/cellranger/count/main.nf    |   0
 .../{modules => }/cellranger/count/meta.yml   |   0
 .../{modules => }/cellranger/mkgtf/main.nf    |   0
 .../{modules => }/cellranger/mkgtf/meta.yml   |   0
 .../{modules => }/cellranger/mkref/main.nf    |   2 +-
 .../{modules => }/cellranger/mkref/meta.yml   |   0
 .../templates/dumpsoftwareversions.py         | 102 ++++++++++--------
 modules/nf-core/{modules => }/gffread/main.nf |   0
 .../nf-core/{modules => }/gffread/meta.yml    |   0
 modules/nf-core/{modules => }/gunzip/main.nf  |   2 +-
 modules/nf-core/{modules => }/gunzip/meta.yml |   0
 .../kallistobustools/count/main.nf            |   0
 .../kallistobustools/count/meta.yml           |   0
 .../kallistobustools/ref/main.nf              |   0
 .../kallistobustools/ref/meta.yml             |   0
 .../{modules => }/star/genomegenerate/main.nf |  28 +++++
 .../star/genomegenerate/meta.yml              |   0
 subworkflows/local/alevin.nf                  |   4 +-
 subworkflows/local/align_cellranger.nf        |   6 +-
 subworkflows/local/fastqc.nf                  |   2 +-
 subworkflows/local/kallisto_bustools.nf       |   6 +-
 subworkflows/local/starsolo.nf                |   4 +-
 25 files changed, 145 insertions(+), 110 deletions(-)
 rename modules/nf-core/{modules => }/cellranger/count/main.nf (100%)
 rename modules/nf-core/{modules => }/cellranger/count/meta.yml (100%)
 rename modules/nf-core/{modules => }/cellranger/mkgtf/main.nf (100%)
 rename modules/nf-core/{modules => }/cellranger/mkgtf/meta.yml (100%)
 rename modules/nf-core/{modules => }/cellranger/mkref/main.nf (98%)
 rename modules/nf-core/{modules => }/cellranger/mkref/meta.yml (100%)
 mode change 100644 => 100755 modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py
 rename modules/nf-core/{modules => }/gffread/main.nf (100%)
 rename modules/nf-core/{modules => }/gffread/meta.yml (100%)
 rename modules/nf-core/{modules => }/gunzip/main.nf (97%)
 rename modules/nf-core/{modules => }/gunzip/meta.yml (100%)
 rename modules/nf-core/{modules => }/kallistobustools/count/main.nf (100%)
 rename modules/nf-core/{modules => }/kallistobustools/count/meta.yml (100%)
 rename modules/nf-core/{modules => }/kallistobustools/ref/main.nf (100%)
 rename modules/nf-core/{modules => }/kallistobustools/ref/meta.yml (100%)
 rename modules/nf-core/{modules => }/star/genomegenerate/main.nf (75%)
 rename modules/nf-core/{modules => }/star/genomegenerate/meta.yml (100%)

diff --git a/modules.json b/modules.json
index e18acd52..4b49090a 100644
--- a/modules.json
+++ b/modules.json
@@ -2,56 +2,53 @@
     "name": "nf-core/scrnaseq",
     "homePage": "https://github.com/nf-core/scrnaseq",
     "repos": {
-        "nf-core/modules": {
-            "git_url": "https://github.com/nf-core/modules.git",
+        "https://github.com/nf-core/modules.git": {
             "modules": {
-                "cellranger/count": {
-                    "branch": "master",
-                    "git_sha": "8032833e6e0fd3edeb20d4121885112249f273c4"
-                },
-                "cellranger/mkgtf": {
-                    "branch": "master",
-                    "git_sha": "e499abe572bc7c1322dfa66b767b0e65fe8c62ca"
-                },
-                "cellranger/mkref": {
-                    "branch": "master",
-                    "git_sha": "61d87bb2bcbcd058e1af6e5f14bb821b8ae4ed3e"
-                },
-                "custom/dumpsoftwareversions": {
-                    "branch": "master",
-                    "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
-                },
-                "fastqc": {
-                    "branch": "master",
-                    "git_sha": "49b18b1639f4f7104187058866a8fab33332bdfe"
-                },
-                "gffread": {
-                    "branch": "master",
-                    "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
-                },
-                "gunzip": {
-                    "branch": "master",
-                    "git_sha": "9aadd9a6d3f5964476582319b3a1c54a3e3fe7c9"
-                },
-                "kallistobustools/ref": {
-                    "branch": "master",
-                    "git_sha": "e20e57f90b6787ac9a010a980cf6ea98bd990046"
-                },
-                "multiqc": {
-                    "branch": "master",
-                    "git_sha": "e5f8924fabf4c8380f55fb7aee89fd2c268161b1"
-                },
-                "salmon/index": {
-                    "branch": "master",
-                    "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
-                },
-                "salmon/quant": {
-                    "branch": "master",
-                    "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
-                },
-                "star/genomegenerate": {
-                    "branch": "master",
-                    "git_sha": "fb6c7bca3d55c19a793372513395e3a567bdd7ba"
+                "nf-core": {
+                    "cellranger/count": {
+                        "branch": "master",
+                        "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905"
+                    },
+                    "cellranger/mkgtf": {
+                        "branch": "master",
+                        "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905"
+                    },
+                    "cellranger/mkref": {
+                        "branch": "master",
+                        "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905"
+                    },
+                    "custom/dumpsoftwareversions": {
+                        "branch": "master",
+                        "git_sha": "8022c68e7403eecbd8ba9c49496f69f8c49d50f0"
+                    },
+                    "fastqc": {
+                        "branch": "master",
+                        "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905"
+                    },
+                    "gffread": {
+                        "branch": "master",
+                        "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905"
+                    },
+                    "gunzip": {
+                        "branch": "master",
+                        "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905"
+                    },
+                    "kallistobustools/count": {
+                        "branch": "master",
+                        "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905"
+                    },
+                    "kallistobustools/ref": {
+                        "branch": "master",
+                        "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905"
+                    },
+                    "multiqc": {
+                        "branch": "master",
+                        "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905"
+                    },
+                    "star/genomegenerate": {
+                        "branch": "master",
+                        "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905"
+                    }
                 }
             }
         }
diff --git a/modules/local/simpleaf_index.nf b/modules/local/simpleaf_index.nf
index 5bf54e7d..939e294d 100644
--- a/modules/local/simpleaf_index.nf
+++ b/modules/local/simpleaf_index.nf
@@ -2,7 +2,7 @@ process SIMPLEAF_INDEX {
     tag "$transcript_gtf"
     label "process_medium"
 
-    conda (params.enable_conda ? 'bioconda::simpleaf=0.5.1' : null)
+    conda (params.enable_conda ? 'bioconda::simpleaf=0.5.2' : null)
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
         'https://depot.galaxyproject.org/singularity/simpleaf:0.5.2--h9f5acd7_0' :
         'quay.io/biocontainers/simpleaf:0.5.2--h9f5acd7_0' }"
diff --git a/modules/local/simpleaf_quant.nf b/modules/local/simpleaf_quant.nf
index 8f7b91c6..31dc26c4 100644
--- a/modules/local/simpleaf_quant.nf
+++ b/modules/local/simpleaf_quant.nf
@@ -2,7 +2,7 @@ process SIMPLEAF_QUANT {
     tag "$meta.id"
     label 'process_high'
 
-    conda (params.enable_conda ? 'bioconda::simpleaf=0.5.1' : null)
+    conda (params.enable_conda ? 'bioconda::simpleaf=0.5.2' : null)
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
         'https://depot.galaxyproject.org/singularity/simpleaf:0.5.2--h9f5acd7_0' :
         'quay.io/biocontainers/simpleaf:0.5.2--h9f5acd7_0' }"
diff --git a/modules/nf-core/modules/cellranger/count/main.nf b/modules/nf-core/cellranger/count/main.nf
similarity index 100%
rename from modules/nf-core/modules/cellranger/count/main.nf
rename to modules/nf-core/cellranger/count/main.nf
diff --git a/modules/nf-core/modules/cellranger/count/meta.yml b/modules/nf-core/cellranger/count/meta.yml
similarity index 100%
rename from modules/nf-core/modules/cellranger/count/meta.yml
rename to modules/nf-core/cellranger/count/meta.yml
diff --git a/modules/nf-core/modules/cellranger/mkgtf/main.nf b/modules/nf-core/cellranger/mkgtf/main.nf
similarity index 100%
rename from modules/nf-core/modules/cellranger/mkgtf/main.nf
rename to modules/nf-core/cellranger/mkgtf/main.nf
diff --git a/modules/nf-core/modules/cellranger/mkgtf/meta.yml b/modules/nf-core/cellranger/mkgtf/meta.yml
similarity index 100%
rename from modules/nf-core/modules/cellranger/mkgtf/meta.yml
rename to modules/nf-core/cellranger/mkgtf/meta.yml
diff --git a/modules/nf-core/modules/cellranger/mkref/main.nf b/modules/nf-core/cellranger/mkref/main.nf
similarity index 98%
rename from modules/nf-core/modules/cellranger/mkref/main.nf
rename to modules/nf-core/cellranger/mkref/main.nf
index e1bfebde..df27d2bd 100644
--- a/modules/nf-core/modules/cellranger/mkref/main.nf
+++ b/modules/nf-core/cellranger/mkref/main.nf
@@ -1,5 +1,5 @@
 process CELLRANGER_MKREF {
-    tag 'mkref'
+    tag "$fasta"
     label 'process_high'
 
     if (params.enable_conda) {
diff --git a/modules/nf-core/modules/cellranger/mkref/meta.yml b/modules/nf-core/cellranger/mkref/meta.yml
similarity index 100%
rename from modules/nf-core/modules/cellranger/mkref/meta.yml
rename to modules/nf-core/cellranger/mkref/meta.yml
diff --git a/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py b/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py
old mode 100644
new mode 100755
index 787bdb7b..da033408
--- a/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py
+++ b/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py
@@ -1,12 +1,16 @@
 #!/usr/bin/env python
 
-import platform
-from textwrap import dedent
+
+"""Provide functions to merge multiple versions.yml files."""
+
 
 import yaml
+import platform
+from textwrap import dedent
 
 
 def _make_versions_html(versions):
+    """Generate a tabular HTML output of all versions for MultiQC."""
     html = [
         dedent(
             """\\
@@ -45,47 +49,53 @@ def _make_versions_html(versions):
     return "\\n".join(html)
 
 
-versions_this_module = {}
-versions_this_module["${task.process}"] = {
-    "python": platform.python_version(),
-    "yaml": yaml.__version__,
-}
-
-with open("$versions") as f:
-    versions_by_process = yaml.load(f, Loader=yaml.BaseLoader) | versions_this_module
-
-# aggregate versions by the module name (derived from fully-qualified process name)
-versions_by_module = {}
-for process, process_versions in versions_by_process.items():
-    module = process.split(":")[-1]
-    try:
-        if versions_by_module[module] != process_versions:
-            raise AssertionError(
-                "We assume that software versions are the same between all modules. "
-                "If you see this error-message it means you discovered an edge-case "
-                "and should open an issue in nf-core/tools. "
-            )
-    except KeyError:
-        versions_by_module[module] = process_versions
-
-versions_by_module["Workflow"] = {
-    "Nextflow": "$workflow.nextflow.version",
-    "$workflow.manifest.name": "$workflow.manifest.version",
-}
-
-versions_mqc = {
-    "id": "software_versions",
-    "section_name": "${workflow.manifest.name} Software Versions",
-    "section_href": "https://github.com/${workflow.manifest.name}",
-    "plot_type": "html",
-    "description": "are collected at run time from the software output.",
-    "data": _make_versions_html(versions_by_module),
-}
-
-with open("software_versions.yml", "w") as f:
-    yaml.dump(versions_by_module, f, default_flow_style=False)
-with open("software_versions_mqc.yml", "w") as f:
-    yaml.dump(versions_mqc, f, default_flow_style=False)
-
-with open("versions.yml", "w") as f:
-    yaml.dump(versions_this_module, f, default_flow_style=False)
+def main():
+    """Load all version files and generate merged output."""
+    versions_this_module = {}
+    versions_this_module["${task.process}"] = {
+        "python": platform.python_version(),
+        "yaml": yaml.__version__,
+    }
+
+    with open("$versions") as f:
+        versions_by_process = yaml.load(f, Loader=yaml.BaseLoader) | versions_this_module
+
+    # aggregate versions by the module name (derived from fully-qualified process name)
+    versions_by_module = {}
+    for process, process_versions in versions_by_process.items():
+        module = process.split(":")[-1]
+        try:
+            if versions_by_module[module] != process_versions:
+                raise AssertionError(
+                    "We assume that software versions are the same between all modules. "
+                    "If you see this error-message it means you discovered an edge-case "
+                    "and should open an issue in nf-core/tools. "
+                )
+        except KeyError:
+            versions_by_module[module] = process_versions
+
+    versions_by_module["Workflow"] = {
+        "Nextflow": "$workflow.nextflow.version",
+        "$workflow.manifest.name": "$workflow.manifest.version",
+    }
+
+    versions_mqc = {
+        "id": "software_versions",
+        "section_name": "${workflow.manifest.name} Software Versions",
+        "section_href": "https://github.com/${workflow.manifest.name}",
+        "plot_type": "html",
+        "description": "are collected at run time from the software output.",
+        "data": _make_versions_html(versions_by_module),
+    }
+
+    with open("software_versions.yml", "w") as f:
+        yaml.dump(versions_by_module, f, default_flow_style=False)
+    with open("software_versions_mqc.yml", "w") as f:
+        yaml.dump(versions_mqc, f, default_flow_style=False)
+
+    with open("versions.yml", "w") as f:
+        yaml.dump(versions_this_module, f, default_flow_style=False)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/modules/nf-core/modules/gffread/main.nf b/modules/nf-core/gffread/main.nf
similarity index 100%
rename from modules/nf-core/modules/gffread/main.nf
rename to modules/nf-core/gffread/main.nf
diff --git a/modules/nf-core/modules/gffread/meta.yml b/modules/nf-core/gffread/meta.yml
similarity index 100%
rename from modules/nf-core/modules/gffread/meta.yml
rename to modules/nf-core/gffread/meta.yml
diff --git a/modules/nf-core/modules/gunzip/main.nf b/modules/nf-core/gunzip/main.nf
similarity index 97%
rename from modules/nf-core/modules/gunzip/main.nf
rename to modules/nf-core/gunzip/main.nf
index 70367049..fa6ba26a 100644
--- a/modules/nf-core/modules/gunzip/main.nf
+++ b/modules/nf-core/gunzip/main.nf
@@ -1,6 +1,6 @@
 process GUNZIP {
     tag "$archive"
-    label 'process_low'
+    label 'process_single'
 
     conda (params.enable_conda ? "conda-forge::sed=4.7" : null)
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
diff --git a/modules/nf-core/modules/gunzip/meta.yml b/modules/nf-core/gunzip/meta.yml
similarity index 100%
rename from modules/nf-core/modules/gunzip/meta.yml
rename to modules/nf-core/gunzip/meta.yml
diff --git a/modules/nf-core/modules/kallistobustools/count/main.nf b/modules/nf-core/kallistobustools/count/main.nf
similarity index 100%
rename from modules/nf-core/modules/kallistobustools/count/main.nf
rename to modules/nf-core/kallistobustools/count/main.nf
diff --git a/modules/nf-core/modules/kallistobustools/count/meta.yml b/modules/nf-core/kallistobustools/count/meta.yml
similarity index 100%
rename from modules/nf-core/modules/kallistobustools/count/meta.yml
rename to modules/nf-core/kallistobustools/count/meta.yml
diff --git a/modules/nf-core/modules/kallistobustools/ref/main.nf b/modules/nf-core/kallistobustools/ref/main.nf
similarity index 100%
rename from modules/nf-core/modules/kallistobustools/ref/main.nf
rename to modules/nf-core/kallistobustools/ref/main.nf
diff --git a/modules/nf-core/modules/kallistobustools/ref/meta.yml b/modules/nf-core/kallistobustools/ref/meta.yml
similarity index 100%
rename from modules/nf-core/modules/kallistobustools/ref/meta.yml
rename to modules/nf-core/kallistobustools/ref/meta.yml
diff --git a/modules/nf-core/modules/star/genomegenerate/main.nf b/modules/nf-core/star/genomegenerate/main.nf
similarity index 75%
rename from modules/nf-core/modules/star/genomegenerate/main.nf
rename to modules/nf-core/star/genomegenerate/main.nf
index 3c298016..6ec634a1 100644
--- a/modules/nf-core/modules/star/genomegenerate/main.nf
+++ b/modules/nf-core/star/genomegenerate/main.nf
@@ -65,4 +65,32 @@ process STAR_GENOMEGENERATE {
         END_VERSIONS
         """
     }
+
+    stub:
+    """
+    mkdir star
+    touch star/Genome
+    touch star/Log.out
+    touch star/SA
+    touch star/SAindex
+    touch star/chrLength.txt
+    touch star/chrName.txt
+    touch star/chrNameLength.txt
+    touch star/chrStart.txt
+    touch star/exonGeTrInfo.tab
+    touch star/exonInfo.tab
+    touch star/geneInfo.tab
+    touch star/genomeParameters.txt
+    touch star/sjdbInfo.txt
+    touch star/sjdbList.fromGTF.out.tab
+    touch star/sjdbList.out.tab
+    touch star/transcriptInfo.tab
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        star: \$(STAR --version | sed -e "s/STAR_//g")
+        samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
+        gawk: \$(echo \$(gawk --version 2>&1) | sed 's/^.*GNU Awk //; s/, .*\$//')
+    END_VERSIONS
+    """
 }
diff --git a/modules/nf-core/modules/star/genomegenerate/meta.yml b/modules/nf-core/star/genomegenerate/meta.yml
similarity index 100%
rename from modules/nf-core/modules/star/genomegenerate/meta.yml
rename to modules/nf-core/star/genomegenerate/meta.yml
diff --git a/subworkflows/local/alevin.nf b/subworkflows/local/alevin.nf
index 7db784ff..0eabc519 100644
--- a/subworkflows/local/alevin.nf
+++ b/subworkflows/local/alevin.nf
@@ -5,8 +5,8 @@ include { SIMPLEAF_INDEX        }             from '../../modules/local/simpleaf
 include { SIMPLEAF_QUANT        }             from '../../modules/local/simpleaf_quant'
 
 /* --    IMPORT NF-CORE MODULES/SUBWORKFLOWS   -- */
-include { GUNZIP }                      from '../../modules/nf-core/modules/gunzip/main'
-include { GFFREAD as GFFREAD_TXP2GENE } from '../../modules/nf-core/modules/gffread/main'
+include { GUNZIP }                      from '../../modules/nf-core/gunzip/main'
+include { GFFREAD as GFFREAD_TXP2GENE } from '../../modules/nf-core/gffread/main'
 
 def multiqc_report    = []
 
diff --git a/subworkflows/local/align_cellranger.nf b/subworkflows/local/align_cellranger.nf
index 744215e0..5452dc1e 100644
--- a/subworkflows/local/align_cellranger.nf
+++ b/subworkflows/local/align_cellranger.nf
@@ -2,9 +2,9 @@
  * Alignment with Cellranger
  */
 
-include {CELLRANGER_MKGTF} from "../../modules/nf-core/modules/cellranger/mkgtf/main.nf"
-include {CELLRANGER_MKREF} from "../../modules/nf-core/modules/cellranger/mkref/main.nf"
-include {CELLRANGER_COUNT} from "../../modules/nf-core/modules/cellranger/count/main.nf"
+include {CELLRANGER_MKGTF} from "../../modules/nf-core/cellranger/mkgtf/main.nf"
+include {CELLRANGER_MKREF} from "../../modules/nf-core/cellranger/mkref/main.nf"
+include {CELLRANGER_COUNT} from "../../modules/nf-core/cellranger/count/main.nf"
 include {MTX_TO_H5AD     } from "../../modules/local/mtx_to_h5ad.nf"
 
 // Define workflow to subset and index a genome region fasta file
diff --git a/subworkflows/local/fastqc.nf b/subworkflows/local/fastqc.nf
index 50f55d5b..f18214a1 100644
--- a/subworkflows/local/fastqc.nf
+++ b/subworkflows/local/fastqc.nf
@@ -1,7 +1,7 @@
 //
 // Check input samplesheet and get read channels
 //
-include { FASTQC } from '../../modules/nf-core/modules/fastqc/main'
+include { FASTQC } from '../../modules/nf-core/fastqc/main'
 
 workflow FASTQC_CHECK {
   take:
diff --git a/subworkflows/local/kallisto_bustools.nf b/subworkflows/local/kallisto_bustools.nf
index bc958b47..1e5318d1 100644
--- a/subworkflows/local/kallisto_bustools.nf
+++ b/subworkflows/local/kallisto_bustools.nf
@@ -1,10 +1,10 @@
 /* --    IMPORT LOCAL MODULES/SUBWORKFLOWS     -- */
 include { GENE_MAP }                          from '../../modules/local/gene_map'
-include {KALLISTOBUSTOOLS_COUNT }             from '../../modules/nf-core/modules/kallistobustools/count/main'
+include {KALLISTOBUSTOOLS_COUNT }             from '../../modules/nf-core/kallistobustools/count/main'
 
 /* --    IMPORT NF-CORE MODULES/SUBWORKFLOWS   -- */
-include { GUNZIP }                      from '../../modules/nf-core/modules/gunzip/main'
-include { KALLISTOBUSTOOLS_REF }        from '../../modules/nf-core/modules/kallistobustools/ref/main'
+include { GUNZIP }                      from '../../modules/nf-core/gunzip/main'
+include { KALLISTOBUSTOOLS_REF }        from '../../modules/nf-core/kallistobustools/ref/main'
 
 def multiqc_report    = []
 
diff --git a/subworkflows/local/starsolo.nf b/subworkflows/local/starsolo.nf
index 2c2f57dd..73a7d55a 100644
--- a/subworkflows/local/starsolo.nf
+++ b/subworkflows/local/starsolo.nf
@@ -2,8 +2,8 @@
 include { STAR_ALIGN }                  from '../../modules/local/star_align'
 
 /* --    IMPORT NF-CORE MODULES/SUBWORKFLOWS   -- */
-include { GUNZIP }                      from '../../modules/nf-core/modules/gunzip/main'
-include { STAR_GENOMEGENERATE }         from '../../modules/nf-core/modules/star/genomegenerate/main'
+include { GUNZIP }                      from '../../modules/nf-core/gunzip/main'
+include { STAR_GENOMEGENERATE }         from '../../modules/nf-core/star/genomegenerate/main'
 
 
 def multiqc_report    = []

From 84e41062f30a82c68db4b05cf4ce116f448754a2 Mon Sep 17 00:00:00 2001
From: Alexander Peltzer <alex.peltzer@gmail.com>
Date: Wed, 5 Oct 2022 11:34:24 +0000
Subject: [PATCH 146/165] Use public multiqc + add module for nf-core
 dumpsoftware versions

---
 modules/local/multiqc.nf | 39 ---------------------------------------
 workflows/scrnaseq.nf    |  4 ++--
 2 files changed, 2 insertions(+), 41 deletions(-)
 delete mode 100644 modules/local/multiqc.nf

diff --git a/modules/local/multiqc.nf b/modules/local/multiqc.nf
deleted file mode 100644
index f7e745a0..00000000
--- a/modules/local/multiqc.nf
+++ /dev/null
@@ -1,39 +0,0 @@
-process MULTIQC {
-    label 'process_medium'
-
-    conda (params.enable_conda ? 'bioconda::multiqc=1.11' : null)
-    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/multiqc:1.11--pyhdfd78af_0' :
-        'quay.io/biocontainers/multiqc:1.11--pyhdfd78af_0' }"
-
-    input:
-    path ch_multiqc_config
-    path ch_multiqc_custom_config
-    path software_versions_yaml
-    path workflow_summary
-    path ('fastqc/*')
-    path ("STAR/*")
-    path ("salmon_alevin/*")
-
-    output:
-    path "*multiqc_report.html", emit: report
-    path "*_data"              , emit: data
-    path "*_plots"             , optional:true, emit: plots
-    path "versions.yml"        , emit: versions
-
-    script:
-    def args = task.ext.args ?: ''
-    def custom_config = params.multiqc_config ? "--config $multiqc_custom_config" : ''
-    """
-    multiqc \\
-        -f \\
-        $args \\
-        $custom_config \\
-        .
-
-    cat <<-END_VERSIONS > versions.yml
-    "${task.process}":
-        multiqc: \$( multiqc --version | sed -e "s/multiqc, version //g" )
-    END_VERSIONS
-    """
-}
diff --git a/workflows/scrnaseq.nf b/workflows/scrnaseq.nf
index 9f1df868..8407c6f9 100644
--- a/workflows/scrnaseq.nf
+++ b/workflows/scrnaseq.nf
@@ -54,8 +54,8 @@ include { MTX_CONVERSION    } from "../subworkflows/local/mtx_conversion"
 //
 // MODULE: Installed directly from nf-core/modules
 //
-include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/modules/custom/dumpsoftwareversions/main'
-include { MULTIQC } from "../modules/local/multiqc"
+include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main'
+include { MULTIQC } from "../modules/nf-core/multiqc"
 
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

From 2dc2c0dbd18eb4c3e916d99757f439de147983c5 Mon Sep 17 00:00:00 2001
From: Alexander Peltzer <alex.peltzer@gmail.com>
Date: Wed, 5 Oct 2022 11:42:04 +0000
Subject: [PATCH 147/165] Some more fixes for new module structure

---
 workflows/scrnaseq.nf | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/workflows/scrnaseq.nf b/workflows/scrnaseq.nf
index 8407c6f9..6df59c3b 100644
--- a/workflows/scrnaseq.nf
+++ b/workflows/scrnaseq.nf
@@ -55,7 +55,7 @@ include { MTX_CONVERSION    } from "../subworkflows/local/mtx_conversion"
 // MODULE: Installed directly from nf-core/modules
 //
 include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main'
-include { MULTIQC } from "../modules/nf-core/multiqc"
+include { MULTIQC } from "../modules/nf-core/multiqc/main"
 
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -204,7 +204,7 @@ workflow SCRNASEQ {
     ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml'))
     ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml'))
     ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect())
-    ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([]))
+    ch_multiqc_files = ch_multiqc_files.mix(FASTQC_CHECK.out.fastqc_zip.collect{it[1]}.ifEmpty([]))
     ch_multiqc_files = ch_multiqc_files.mix(ch_multiqc_alevin)
     ch_multiqc_files = ch_multiqc_files.mix(ch_multiqc_star)
 

From eb3a09f30fae82110b555d97bdd43c34c5969bc4 Mon Sep 17 00:00:00 2001
From: Alexander Peltzer <alex.peltzer@gmail.com>
Date: Wed, 5 Oct 2022 11:48:25 +0000
Subject: [PATCH 148/165] Add more stuff to get lint warnings down

---
 modules/local/alevinqc.nf              | 3 +++
 modules/local/concat_h5ad.nf           | 3 +++
 modules/local/gene_map.nf              | 5 +++++
 modules/local/gffread_transcriptome.nf | 3 +++
 modules/local/mtx_to_h5ad.nf           | 3 +++
 modules/local/mtx_to_seurat.nf         | 3 +++
 modules/local/samplesheet_check.nf     | 4 ++++
 modules/local/simpleaf_index.nf        | 2 +-
 modules/local/simpleaf_quant.nf        | 3 +++
 modules/local/star_align.nf            | 3 +++
 10 files changed, 31 insertions(+), 1 deletion(-)

diff --git a/modules/local/alevinqc.nf b/modules/local/alevinqc.nf
index 4e1a7d7e..0d9ef0b0 100644
--- a/modules/local/alevinqc.nf
+++ b/modules/local/alevinqc.nf
@@ -14,6 +14,9 @@ process ALEVINQC {
     tuple val(meta), path("alevin_report_${meta.id}.html"), emit: report
     path  "versions.yml", emit: versions
 
+    when:
+    task.ext.when == null || task.ext.when
+
     script:
     def prefix = task.ext.prefix ?: "${meta.id}"
     """
diff --git a/modules/local/concat_h5ad.nf b/modules/local/concat_h5ad.nf
index 3bcf1755..c753bce8 100644
--- a/modules/local/concat_h5ad.nf
+++ b/modules/local/concat_h5ad.nf
@@ -13,6 +13,9 @@ process CONCAT_H5AD {
     output:
     path "*.h5ad", emit: h5ad
 
+    when:
+    task.ext.when == null || task.ext.when
+
     script:
     """
     concat_h5ad.py \\
diff --git a/modules/local/gene_map.nf b/modules/local/gene_map.nf
index af1acee4..9a2a1ad3 100644
--- a/modules/local/gene_map.nf
+++ b/modules/local/gene_map.nf
@@ -3,6 +3,8 @@
  */
 process GENE_MAP {
     tag "$gtf"
+    label 'process_low'
+
 
     conda (params.enable_conda ? "conda-forge::python=3.8.3" : null)
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
@@ -15,6 +17,9 @@ process GENE_MAP {
     output:
     path "transcripts_to_genes.txt" , emit: gene_map
 
+    when:
+    task.ext.when == null || task.ext.when
+
     script:
     if("${gtf}".endsWith('.gz')){
         name = "${gtf.baseName}"
diff --git a/modules/local/gffread_transcriptome.nf b/modules/local/gffread_transcriptome.nf
index 77a17440..6e2a9ba4 100644
--- a/modules/local/gffread_transcriptome.nf
+++ b/modules/local/gffread_transcriptome.nf
@@ -15,6 +15,9 @@ process GFFREAD_TRANSCRIPTOME {
     path "${genome_fasta}.transcriptome.fa", emit: transcriptome_extracted
     path "versions.yml"                    , emit: versions
 
+    when:
+    task.ext.when == null || task.ext.when
+
     script:
     """
     gffread -F $gtf -w "${genome_fasta}.transcriptome.fa" -g $genome_fasta
diff --git a/modules/local/mtx_to_h5ad.nf b/modules/local/mtx_to_h5ad.nf
index cf753f30..10bf0c88 100644
--- a/modules/local/mtx_to_h5ad.nf
+++ b/modules/local/mtx_to_h5ad.nf
@@ -15,6 +15,9 @@ process MTX_TO_H5AD {
     output:
     path "*.h5ad", emit: h5ad
 
+    when:
+    task.ext.when == null || task.ext.when
+
     script:
     // def file paths for aligners (except cellranger)
     if (params.aligner == 'kallisto') {
diff --git a/modules/local/mtx_to_seurat.nf b/modules/local/mtx_to_seurat.nf
index 3d834a2f..54208fdd 100644
--- a/modules/local/mtx_to_seurat.nf
+++ b/modules/local/mtx_to_seurat.nf
@@ -15,6 +15,9 @@ process MTX_TO_SEURAT {
     output:
     path "*.rds", emit: seuratObjects
 
+    when:
+    task.ext.when == null || task.ext.when
+
     script:
     def aligner = params.aligner
     if (params.aligner == "cellranger") {
diff --git a/modules/local/samplesheet_check.nf b/modules/local/samplesheet_check.nf
index f0a0b1f9..06595615 100644
--- a/modules/local/samplesheet_check.nf
+++ b/modules/local/samplesheet_check.nf
@@ -1,5 +1,6 @@
 process SAMPLESHEET_CHECK {
     tag "$samplesheet"
+    label 'process_low'
 
     conda (params.enable_conda ? "conda-forge::python=3.8.3" : null)
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
@@ -13,6 +14,9 @@ process SAMPLESHEET_CHECK {
     path '*.csv'       , emit: csv
     path "versions.yml", emit: versions
 
+    when:
+    task.ext.when == null || task.ext.when
+
     script: // This script is bundled with the pipeline, in nf-core/scrnaseq/bin/
     """
     check_samplesheet.py \\
diff --git a/modules/local/simpleaf_index.nf b/modules/local/simpleaf_index.nf
index 939e294d..f8f35c43 100644
--- a/modules/local/simpleaf_index.nf
+++ b/modules/local/simpleaf_index.nf
@@ -16,7 +16,7 @@ process SIMPLEAF_INDEX {
     path "salmon/index"              , emit: index
     path "salmon/ref/*_t2g_3col.tsv" , emit: transcript_tsv
     path "versions.yml"              , emit: versions
-    path "salmon"
+    path "salmon"                    , emit: salmon
 
     when:
     task.ext.when == null || task.ext.when
diff --git a/modules/local/simpleaf_quant.nf b/modules/local/simpleaf_quant.nf
index 31dc26c4..8fb6139b 100644
--- a/modules/local/simpleaf_quant.nf
+++ b/modules/local/simpleaf_quant.nf
@@ -22,6 +22,9 @@ process SIMPLEAF_QUANT {
     tuple val(meta), path("*_alevin_results"), emit: alevin_results
     path  "versions.yml"                     , emit: versions
 
+    when:
+    task.ext.when == null || task.ext.when
+
     script:
     def args      = task.ext.args ?: ''
     def args_list = args.tokenize()
diff --git a/modules/local/star_align.nf b/modules/local/star_align.nf
index c489d0b8..36f26bfe 100644
--- a/modules/local/star_align.nf
+++ b/modules/local/star_align.nf
@@ -33,6 +33,9 @@ process STAR_ALIGN {
     tuple val(meta), path('*fastq.gz')               , optional:true, emit: fastq
     tuple val(meta), path('*.tab')                   , optional:true, emit: tab
 
+    when:
+    task.ext.when == null || task.ext.when
+
     script:
     def args = task.ext.args ?: ''
     def prefix = task.ext.prefix ?: "${meta.id}"

From 71423454e4e6db3ee8bcfcdf9a8efe0546cafdff Mon Sep 17 00:00:00 2001
From: Alexander Peltzer <alex.peltzer@gmail.com>
Date: Wed, 5 Oct 2022 12:03:45 +0000
Subject: [PATCH 149/165] Added colelct and ifEmpty construct

---
 workflows/scrnaseq.nf | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/workflows/scrnaseq.nf b/workflows/scrnaseq.nf
index 6df59c3b..93d2a3ca 100644
--- a/workflows/scrnaseq.nf
+++ b/workflows/scrnaseq.nf
@@ -205,8 +205,8 @@ workflow SCRNASEQ {
     ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml'))
     ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect())
     ch_multiqc_files = ch_multiqc_files.mix(FASTQC_CHECK.out.fastqc_zip.collect{it[1]}.ifEmpty([]))
-    ch_multiqc_files = ch_multiqc_files.mix(ch_multiqc_alevin)
-    ch_multiqc_files = ch_multiqc_files.mix(ch_multiqc_star)
+    ch_multiqc_files = ch_multiqc_files.mix(ch_multiqc_alevin.collect().ifEmpty([]))
+    ch_multiqc_files = ch_multiqc_files.mix(ch_multiqc_star.collect().ifEmpty([]))
 
     MULTIQC (
         ch_multiqc_files.collect(),

From e20e37993b0e293202cbb69306ec7a13cd178ef8 Mon Sep 17 00:00:00 2001
From: Alexander Peltzer <alex.peltzer@gmail.com>
Date: Wed, 5 Oct 2022 12:08:58 +0000
Subject: [PATCH 150/165] Fix mixing stuff

---
 workflows/scrnaseq.nf | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/workflows/scrnaseq.nf b/workflows/scrnaseq.nf
index 93d2a3ca..a2421246 100644
--- a/workflows/scrnaseq.nf
+++ b/workflows/scrnaseq.nf
@@ -75,8 +75,8 @@ ch_genome_fasta = params.genome_fasta ? file(params.genome_fasta) : []
 ch_gtf = params.gtf ? file(params.gtf) : []
 ch_transcript_fasta = params.transcript_fasta ? file(params.transcript_fasta): []
 ch_txp2gene = params.txp2gene ? file(params.txp2gene) : []
-ch_multiqc_alevin = []
-ch_multiqc_star = []
+ch_multiqc_alevin = Channel.empty()
+ch_multiqc_star = Channel.empty()
 if (params.barcode_whitelist) {
     ch_barcode_whitelist = file(params.barcode_whitelist)
 } else if (params.protocol.contains("10X")) {
@@ -205,8 +205,8 @@ workflow SCRNASEQ {
     ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml'))
     ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect())
     ch_multiqc_files = ch_multiqc_files.mix(FASTQC_CHECK.out.fastqc_zip.collect{it[1]}.ifEmpty([]))
-    ch_multiqc_files = ch_multiqc_files.mix(ch_multiqc_alevin.collect().ifEmpty([]))
-    ch_multiqc_files = ch_multiqc_files.mix(ch_multiqc_star.collect().ifEmpty([]))
+    ch_multiqc_files = ch_multiqc_files.mix(ch_multiqc_alevin.collect{it[1]}.ifEmpty([])),
+    ch_multiqc_files = ch_multiqc_files.mix(ch_multiqc_star.collect{it[1]}.ifEmpty([])),
 
     MULTIQC (
         ch_multiqc_files.collect(),

From 298233d17d428be262ff4497f0730cb06e1a52e4 Mon Sep 17 00:00:00 2001
From: Alexander Peltzer <alex.peltzer@gmail.com>
Date: Wed, 5 Oct 2022 12:43:17 +0000
Subject: [PATCH 151/165] Fix black

---
 bin/check_samplesheet.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py
index 03541ad4..8551a45e 100755
--- a/bin/check_samplesheet.py
+++ b/bin/check_samplesheet.py
@@ -140,6 +140,7 @@ def read_head(handle, num_lines=10):
         lines.append(line)
     return "".join(lines)
 
+
 def print_error(error, context="Line", context_str=""):
     error_str = f"ERROR: Please check samplesheet -> {error}"
     if context != "" and context_str != "":
@@ -147,6 +148,7 @@ def print_error(error, context="Line", context_str=""):
     print(error_str)
     sys.exit(1)
 
+
 def sniff_format(handle):
     """
     Detect the tabular format.

From ccd798c3605c244e0e87f7a69c1dfdc580585d6b Mon Sep 17 00:00:00 2001
From: Alexander Peltzer <alex.peltzer@gmail.com>
Date: Wed, 5 Oct 2022 18:33:38 +0000
Subject: [PATCH 152/165] Remove diff file

---
 .../modules/salmon/index/salmon-index.diff    | 36 -------------------
 1 file changed, 36 deletions(-)
 delete mode 100644 modules/nf-core/modules/salmon/index/salmon-index.diff

diff --git a/modules/nf-core/modules/salmon/index/salmon-index.diff b/modules/nf-core/modules/salmon/index/salmon-index.diff
deleted file mode 100644
index 87f976cd..00000000
--- a/modules/nf-core/modules/salmon/index/salmon-index.diff
+++ /dev/null
@@ -1,36 +0,0 @@
-Changes in module 'nf-core/modules/salmon/index'
---- modules/nf-core/modules/salmon/index/main.nf
-+++ modules/nf-core/modules/salmon/index/main.nf
-@@ -20,21 +20,28 @@
- 
-     script:
-     def args = task.ext.args ?: ''
-+    def kmer_argmatch = args =~ /\-k *(\d+)/
-+    def k = kmer_argmatch ? kmer_argmatch[0][1] : 31
-     def get_decoy_ids = "grep '^>' $genome_fasta | cut -d ' ' -f 1 > decoys.txt"
-     def gentrome      = "gentrome.fa"
-+    def maybe_unzip   = "cat"
-     if (genome_fasta.endsWith('.gz')) {
-         get_decoy_ids = "grep '^>' <(gunzip -c $genome_fasta) | cut -d ' ' -f 1 > decoys.txt"
-         gentrome      = "gentrome.fa.gz"
-+        maybe_unzip   = "gunzip -c" 
-     }
-     """
-     $get_decoy_ids
-     sed -i.bak -e 's/>//g' decoys.txt
--    cat $transcript_fasta $genome_fasta > $gentrome
-+    cat $transcript_fasta $genome_fasta \\
-+    | $maybe_unzip \\
-+    | awk '!/^>/ { next } { getline seq } length(seq) >= $k { print \$0 "\\n" seq }' \\
-+    | gzip -c > gentrome.filtered.fasta.gz
- 
-     salmon \\
-         index \\
-         --threads $task.cpus \\
--        -t $gentrome \\
-+        -t gentrome.filtered.fasta.gz \\
-         -d decoys.txt \\
-         $args \\
-         -i salmon
-
-************************************************************

From 8dd62e8b9f6c213f33d7441729ac67b5a28082d9 Mon Sep 17 00:00:00 2001
From: Alexander Peltzer <alex.peltzer@gmail.com>
Date: Wed, 5 Oct 2022 18:40:35 +0000
Subject: [PATCH 153/165] Chore

---
 modules/local/gene_map.nf              | 1 -
 modules/local/gffread_transcriptome.nf | 6 +++---
 modules/local/star_align.nf            | 6 +++---
 3 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/modules/local/gene_map.nf b/modules/local/gene_map.nf
index 9a2a1ad3..beca82a1 100644
--- a/modules/local/gene_map.nf
+++ b/modules/local/gene_map.nf
@@ -5,7 +5,6 @@ process GENE_MAP {
     tag "$gtf"
     label 'process_low'
 
-
     conda (params.enable_conda ? "conda-forge::python=3.8.3" : null)
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
         'https://depot.galaxyproject.org/singularity/python:3.8.3' :
diff --git a/modules/local/gffread_transcriptome.nf b/modules/local/gffread_transcriptome.nf
index 6e2a9ba4..04d6b412 100644
--- a/modules/local/gffread_transcriptome.nf
+++ b/modules/local/gffread_transcriptome.nf
@@ -2,10 +2,10 @@ process GFFREAD_TRANSCRIPTOME {
     tag "${genome_fasta}"
     label 'process_low'
 
-    conda (params.enable_conda ? "bioconda::gffread=0.12.1" : null)
+    conda (params.enable_conda ? "bioconda::gffread=0.12.7" : null)
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/gffread:0.12.1--h2e03b76_1' :
-        'quay.io/biocontainers/gffread:0.12.1--h2e03b76_1' }"
+        'https://depot.galaxyproject.org/singularity/gffread:0.12.7--hd03093a_1' :
+        'quay.io/biocontainers/gffread:0.12.7--hd03093a_1' }"
 
     input:
     path genome_fasta
diff --git a/modules/local/star_align.nf b/modules/local/star_align.nf
index 36f26bfe..da8246a4 100644
--- a/modules/local/star_align.nf
+++ b/modules/local/star_align.nf
@@ -2,10 +2,10 @@ process STAR_ALIGN {
     tag "$meta.id"
     label 'process_high'
 
-    conda (params.enable_conda ? 'bioconda::star=2.7.8a' : null)
+    conda (params.enable_conda ? 'bioconda::star=2.7.10a' : null)
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/star:2.7.8a--h9ee0642_1' :
-        'quay.io/biocontainers/star:2.7.8a--h9ee0642_1' }"
+        'https://depot.galaxyproject.org/singularity/star:2.7.10a--h9ee0642_0' :
+        'quay.io/biocontainers/star:2.7.10a--h9ee0642_0' }"
 
     input:
     //

From 55fab735603f6cd88bdb9886ec6a2712b9db274e Mon Sep 17 00:00:00 2001
From: Alexander Peltzer <alex.peltzer@gmail.com>
Date: Wed, 5 Oct 2022 18:41:11 +0000
Subject: [PATCH 154/165] Gene Filter label added

---
 modules/local/gtf_gene_filter.nf | 1 +
 1 file changed, 1 insertion(+)

diff --git a/modules/local/gtf_gene_filter.nf b/modules/local/gtf_gene_filter.nf
index 7f1a6aa1..82a0e092 100644
--- a/modules/local/gtf_gene_filter.nf
+++ b/modules/local/gtf_gene_filter.nf
@@ -1,5 +1,6 @@
 process GTF_GENE_FILTER {
     tag "$fasta"
+    label 'process_low'
 
     conda (params.enable_conda ? "conda-forge::python=3.9.5" : null)
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?

From 3d64158e310488cc91b52a55d2bc2f2305d811aa Mon Sep 17 00:00:00 2001
From: Alexander Peltzer <alex.peltzer@gmail.com>
Date: Thu, 6 Oct 2022 08:40:29 +0000
Subject: [PATCH 155/165] Bump to 2.1.0

---
 CHANGELOG.md    | 5 +++--
 nextflow.config | 2 +-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index a3fd4dab..7faf2e7a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -3,15 +3,16 @@
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
-## v2.0.1dev
+## v2.1.0 - 2022-10-06 "Green Mercury Siberian Husky"
 
+- Alevin workflow updated to use Alevin-Fry via simpleaf - thanks to @rob-p for supporting this and @fmalmeida implementing the support
 ### Fixes
 
 - Fixed Kallistobustools workflow [#123](https://github.com/nf-core/scrnaseq/issues/123) by upgrading to nf-core/modules module
 - Fixed matrix conversion error when running STAR with --soloFeatures GeneFull [#135](https://github.com/nf-core/scrnaseq/pull/135)
 - Fixed seurat matrix conversion error when running with conda profile [#136](https://github.com/nf-core/scrnaseq/pull/136)
 - Fixed Kallistobustools module [#116](https://github.com/nf-core/scrnaseq/issues/116). By updating nf-core module and making sure conversion modules take into account the different outputs produced by kallisto standard and non-standard workflows.
-- Updated pipeline template to [nf-core/tools 2.5.1](https://github.com/nf-core/tools/releases/tag/2.5.1)
+- Updated pipeline template to [nf-core/tools 2.6](https://github.com/nf-core/tools/releases/tag/2.6)
 
 ## v2.0.0 - 2022-06-17 "Gray Nickel Beagle"
 
diff --git a/nextflow.config b/nextflow.config
index 06b271a6..3c51af8c 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -217,7 +217,7 @@ manifest {
     description     = 'Pipeline for processing 10x Genomics single cell rnaseq data'
     mainScript      = 'main.nf'
     nextflowVersion = '!>=21.10.3'
-    version         = '2.0.1dev'
+    version = '2.1.0'
     doi             = ''
 }
 

From 8adc51c1433cd9baa405690e40b65b67c508699f Mon Sep 17 00:00:00 2001
From: Alexander Peltzer <alex.peltzer@gmail.com>
Date: Thu, 6 Oct 2022 08:40:37 +0000
Subject: [PATCH 156/165] Add Versions to Seurat

---
 modules/local/mtx_to_seurat.nf | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/modules/local/mtx_to_seurat.nf b/modules/local/mtx_to_seurat.nf
index 1c316ab8..0c3260e3 100644
--- a/modules/local/mtx_to_seurat.nf
+++ b/modules/local/mtx_to_seurat.nf
@@ -49,6 +49,15 @@ process MTX_TO_SEURAT {
             ${meta.id}_\${input_type}_matrix.rds \\
             ${aligner}
     done
+
+    yaml::write_yaml(
+    list(
+        '${task.process}'=list(
+            'Seurat' = paste(packageVersion('Seurat'), collapse='.')
+        )
+    ),
+    "versions.yml"
+    )
     """
 
     else
@@ -59,6 +68,15 @@ process MTX_TO_SEURAT {
         $features \\
         ${meta.id}_matrix.rds \\
         ${aligner}
+
+    yaml::write_yaml(
+    list(
+        '${task.process}'=list(
+            'Seurat' = paste(packageVersion('Seurat'), collapse='.')
+        )
+    ),
+    "versions.yml"
+    )
     """
 
     stub:

From d4eba286e1137038cb6c54f19e16fdd4389aac79 Mon Sep 17 00:00:00 2001
From: Alexander Peltzer <alex.peltzer@gmail.com>
Date: Thu, 6 Oct 2022 08:48:22 +0000
Subject: [PATCH 157/165] Add versions  to seurat

---
 modules/local/mtx_to_seurat.nf       | 1 +
 subworkflows/local/mtx_conversion.nf | 3 +++
 workflows/scrnaseq.nf                | 3 +++
 3 files changed, 7 insertions(+)

diff --git a/modules/local/mtx_to_seurat.nf b/modules/local/mtx_to_seurat.nf
index 0c3260e3..82e45594 100644
--- a/modules/local/mtx_to_seurat.nf
+++ b/modules/local/mtx_to_seurat.nf
@@ -14,6 +14,7 @@ process MTX_TO_SEURAT {
 
     output:
     path "*.rds", emit: seuratObjects
+    path  "versions.yml", emit: versions
 
     when:
     task.ext.when == null || task.ext.when
diff --git a/subworkflows/local/mtx_conversion.nf b/subworkflows/local/mtx_conversion.nf
index 731842c8..2f0b3887 100644
--- a/subworkflows/local/mtx_conversion.nf
+++ b/subworkflows/local/mtx_conversion.nf
@@ -32,4 +32,7 @@ workflow MTX_CONVERSION {
         mtx_matrices
     )
 
+    emit:
+    MTX_TO_SEURAT.out.versions
+
 }
diff --git a/workflows/scrnaseq.nf b/workflows/scrnaseq.nf
index 64aba674..6dae8eb9 100644
--- a/workflows/scrnaseq.nf
+++ b/workflows/scrnaseq.nf
@@ -188,6 +188,9 @@ workflow SCRNASEQ {
         ch_input
     )
 
+    //Add Versions from MTX Conversion workflow too
+    ch_versions.mix(MTX_CONVERSION.out.versions)
+
     // collect software versions
     CUSTOM_DUMPSOFTWAREVERSIONS (
         ch_versions.unique().collectFile(name: 'collated_versions.yml')

From d8279f842504b0c92dfcb74699d904d67a7c6ee9 Mon Sep 17 00:00:00 2001
From: Alexander Peltzer <alex.peltzer@gmail.com>
Date: Thu, 6 Oct 2022 08:50:32 +0000
Subject: [PATCH 158/165] Add DOI to manifest

---
 nextflow.config | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/nextflow.config b/nextflow.config
index 3c51af8c..1ca66cae 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -218,7 +218,7 @@ manifest {
     mainScript      = 'main.nf'
     nextflowVersion = '!>=21.10.3'
     version = '2.1.0'
-    doi             = ''
+    doi             = '10.5281/zenodo.3568187'
 }
 
 // Load modules.config for DSL2 module specific options

From cb9e21c364746646402c4671e9b2ce542d760916 Mon Sep 17 00:00:00 2001
From: Alexander Peltzer <alex.peltzer@gmail.com>
Date: Thu, 6 Oct 2022 08:53:39 +0000
Subject: [PATCH 159/165] Remove TODO

---
 assets/methods_description_template.yml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/assets/methods_description_template.yml b/assets/methods_description_template.yml
index 1e84fd63..1be6ed21 100644
--- a/assets/methods_description_template.yml
+++ b/assets/methods_description_template.yml
@@ -3,7 +3,6 @@ description: "Suggested text and references to use when describing pipeline usag
 section_name: "nf-core/scrnaseq Methods Description"
 section_href: "https://github.com/nf-core/scrnaseq"
 plot_type: "html"
-## TODO nf-core: Update the HTML below to your prefered methods description, e.g. add publication citation for this pipeline
 ## You inject any metadata in the Nextflow '${workflow}' object
 data: |
   <h4>Methods</h4>

From 4c9f99b9dca6c215b1372dac2c8c1fc053355d10 Mon Sep 17 00:00:00 2001
From: Alexander Peltzer <alex.peltzer@gmail.com>
Date: Thu, 6 Oct 2022 08:56:00 +0000
Subject: [PATCH 160/165] Better todo + small fix for versiosn

---
 subworkflows/local/mtx_conversion.nf | 5 ++++-
 workflows/scrnaseq.nf                | 2 +-
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/subworkflows/local/mtx_conversion.nf b/subworkflows/local/mtx_conversion.nf
index 2f0b3887..924dc836 100644
--- a/subworkflows/local/mtx_conversion.nf
+++ b/subworkflows/local/mtx_conversion.nf
@@ -32,7 +32,10 @@ workflow MTX_CONVERSION {
         mtx_matrices
     )
 
+    //TODO CONCAT h5ad and MTX to h5ad should also have versions.yaml output
+    ch_version = MTX_TO_SEURAT.out.versions
+
     emit:
-    MTX_TO_SEURAT.out.versions
+    ch_versions
 
 }
diff --git a/workflows/scrnaseq.nf b/workflows/scrnaseq.nf
index 6dae8eb9..2f469526 100644
--- a/workflows/scrnaseq.nf
+++ b/workflows/scrnaseq.nf
@@ -189,7 +189,7 @@ workflow SCRNASEQ {
     )
 
     //Add Versions from MTX Conversion workflow too
-    ch_versions.mix(MTX_CONVERSION.out.versions)
+    ch_versions.mix(MTX_CONVERSION.out.ch_versions)
 
     // collect software versions
     CUSTOM_DUMPSOFTWAREVERSIONS (

From b6d0d7596f50139af14b284cc98670f1ab22c87c Mon Sep 17 00:00:00 2001
From: nf-core-bot <core@nf-co.re>
Date: Thu, 6 Oct 2022 08:59:15 +0000
Subject: [PATCH 161/165] [automated] Fix linting with Prettier

---
 CHANGELOG.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 7faf2e7a..485ee9f5 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,6 +6,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ## v2.1.0 - 2022-10-06 "Green Mercury Siberian Husky"
 
 - Alevin workflow updated to use Alevin-Fry via simpleaf - thanks to @rob-p for supporting this and @fmalmeida implementing the support
+
 ### Fixes
 
 - Fixed Kallistobustools workflow [#123](https://github.com/nf-core/scrnaseq/issues/123) by upgrading to nf-core/modules module

From a182c389bfb1a7c36fbf2ab35bc7f26fc041df1e Mon Sep 17 00:00:00 2001
From: Alexander Peltzer <alex.peltzer@gmail.com>
Date: Thu, 6 Oct 2022 09:02:03 +0000
Subject: [PATCH 162/165] Small fix for versions

---
 modules/local/mtx_to_seurat.nf       |  2 +-
 subworkflows/local/mtx_conversion.nf | 48 ++++++++++++++--------------
 2 files changed, 25 insertions(+), 25 deletions(-)

diff --git a/modules/local/mtx_to_seurat.nf b/modules/local/mtx_to_seurat.nf
index 82e45594..d3053b83 100644
--- a/modules/local/mtx_to_seurat.nf
+++ b/modules/local/mtx_to_seurat.nf
@@ -14,7 +14,7 @@ process MTX_TO_SEURAT {
 
     output:
     path "*.rds", emit: seuratObjects
-    path  "versions.yml", emit: versions
+    path "versions.yml", emit: versions
 
     when:
     task.ext.when == null || task.ext.when
diff --git a/subworkflows/local/mtx_conversion.nf b/subworkflows/local/mtx_conversion.nf
index 924dc836..c1be4c70 100644
--- a/subworkflows/local/mtx_conversion.nf
+++ b/subworkflows/local/mtx_conversion.nf
@@ -10,30 +10,30 @@ workflow MTX_CONVERSION {
     samplesheet
 
     main:
-    //
-    // Convert matrix do h5ad
-    //
-    MTX_TO_H5AD (
-        mtx_matrices
-    )
-
-    //
-    // Concat sample-specific h5ad in one
-    //
-    CONCAT_H5AD (
-        MTX_TO_H5AD.out.h5ad.collect(), // gather all sample-specific files
-        samplesheet
-    )
-
-    //
-    // Convert matrix do seurat
-    //
-    MTX_TO_SEURAT (
-        mtx_matrices
-    )
-
-    //TODO CONCAT h5ad and MTX to h5ad should also have versions.yaml output
-    ch_version = MTX_TO_SEURAT.out.versions
+        ch_versions = Channel.empty()
+        // Convert matrix do h5ad
+        //
+        MTX_TO_H5AD (
+            mtx_matrices
+        )
+
+        //
+        // Concat sample-specific h5ad in one
+        //
+        CONCAT_H5AD (
+            MTX_TO_H5AD.out.h5ad.collect(), // gather all sample-specific files
+            samplesheet
+        )
+
+        //
+        // Convert matrix do seurat
+        //
+        MTX_TO_SEURAT (
+            mtx_matrices
+        )
+
+        //TODO CONCAT h5ad and MTX to h5ad should also have versions.yaml output
+        ch_version = ch_versions.mix(MTX_TO_SEURAT.out.versions)
 
     emit:
     ch_versions

From e4f1460c7587862e38fec9d7574ebff64ab34794 Mon Sep 17 00:00:00 2001
From: Alexander Peltzer <alex.peltzer@gmail.com>
Date: Thu, 6 Oct 2022 09:07:41 +0000
Subject: [PATCH 163/165] [skip ci] Add Rob to Readme as contributor

---
 README.md | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/README.md b/README.md
index 85bb84e6..5449c3ba 100644
--- a/README.md
+++ b/README.md
@@ -22,14 +22,10 @@ The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool
 
 On release, automated continuous integration tests run the pipeline on a full-sized dataset on the AWS cloud infrastructure. This ensures that the pipeline runs on AWS, has sensible resource allocation defaults set to run on real-world datasets, and permits the persistent storage of results to benchmark between pipeline releases and other analysis sources. The results obtained from the full-sized test can be viewed on the [nf-core website](https://nf-co.re/scrnaseq/results).
 
-- Alevin + AlevinQC
-- STARSolo
-- Kallisto + BUStools
-- Cellranger
 
 This is a community effort in building a pipeline capable to support:
 
-- Alevin + AlevinQC
+- Alevin-Fry + AlevinQC
 - STARSolo
 - Kallisto + BUStools
 - Cellranger
@@ -71,6 +67,7 @@ We thank the following people for their extensive assistance in the development
 
 - @KevinMenden
 - @FloWuenne
+- @rob-p
 
 ## Contributions and Support
 

From 7ad294a9c8fb1e062a72dfb44e51348485d661e2 Mon Sep 17 00:00:00 2001
From: Alexander Peltzer <alex.peltzer@gmail.com>
Date: Thu, 6 Oct 2022 09:08:43 +0000
Subject: [PATCH 164/165] [skip ci] prettier

---
 README.md | 1 -
 1 file changed, 1 deletion(-)

diff --git a/README.md b/README.md
index 5449c3ba..562e8acd 100644
--- a/README.md
+++ b/README.md
@@ -22,7 +22,6 @@ The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool
 
 On release, automated continuous integration tests run the pipeline on a full-sized dataset on the AWS cloud infrastructure. This ensures that the pipeline runs on AWS, has sensible resource allocation defaults set to run on real-world datasets, and permits the persistent storage of results to benchmark between pipeline releases and other analysis sources. The results obtained from the full-sized test can be viewed on the [nf-core website](https://nf-co.re/scrnaseq/results).
 
-
 This is a community effort in building a pipeline capable to support:
 
 - Alevin-Fry + AlevinQC

From c2bd430d1f6ee4153b8629bf600d9c81ed0385cb Mon Sep 17 00:00:00 2001
From: Alexander Peltzer <alex.peltzer@gmail.com>
Date: Thu, 6 Oct 2022 09:22:02 +0000
Subject: [PATCH 165/165] Move seurat to R script

---
 bin/mtx_to_seurat.R            | 10 ++++++++++
 modules/local/mtx_to_seurat.nf | 18 ------------------
 2 files changed, 10 insertions(+), 18 deletions(-)

diff --git a/bin/mtx_to_seurat.R b/bin/mtx_to_seurat.R
index f4ef6b8e..c5bada16 100755
--- a/bin/mtx_to_seurat.R
+++ b/bin/mtx_to_seurat.R
@@ -23,3 +23,13 @@ if(aligner %in% c("kallisto", "alevin")) {
 seurat.object <- CreateSeuratObject(counts = expression.matrix)
 
 saveRDS(seurat.object, file = out.file)
+
+
+yaml::write_yaml(
+list(
+    'MTX_TO_SEURAT'=list(
+        'Seurat' = paste(packageVersion('Seurat'), collapse='.')
+    )
+),
+"versions.yml"
+)
diff --git a/modules/local/mtx_to_seurat.nf b/modules/local/mtx_to_seurat.nf
index d3053b83..fc452bcf 100644
--- a/modules/local/mtx_to_seurat.nf
+++ b/modules/local/mtx_to_seurat.nf
@@ -50,15 +50,6 @@ process MTX_TO_SEURAT {
             ${meta.id}_\${input_type}_matrix.rds \\
             ${aligner}
     done
-
-    yaml::write_yaml(
-    list(
-        '${task.process}'=list(
-            'Seurat' = paste(packageVersion('Seurat'), collapse='.')
-        )
-    ),
-    "versions.yml"
-    )
     """
 
     else
@@ -69,15 +60,6 @@ process MTX_TO_SEURAT {
         $features \\
         ${meta.id}_matrix.rds \\
         ${aligner}
-
-    yaml::write_yaml(
-    list(
-        '${task.process}'=list(
-            'Seurat' = paste(packageVersion('Seurat'), collapse='.')
-        )
-    ),
-    "versions.yml"
-    )
     """
 
     stub: