diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ddf7f1da..11eab3db 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -48,7 +48,8 @@ jobs: NXF_VER: - "22.10.1" - "latest-everything" - profile: ["test_tcr", "test_no_umi", "test_nocluster", "test_fetchimgt", "test_assembled"] + profile: + ["test_tcr", "test_no_umi", "test_nocluster", "test_fetchimgt", "test_assembled_hs", "test_assembled_mm"] fail-fast: false steps: - name: Check out pipeline code diff --git a/.github/workflows/ci_immcantation.yml b/.github/workflows/ci_immcantation.yml index 934bd81c..8669923d 100644 --- a/.github/workflows/ci_immcantation.yml +++ b/.github/workflows/ci_immcantation.yml @@ -25,7 +25,12 @@ jobs: NXF_VER: - "22.10.1" - "latest-everything" - profile: ["test_assembled_immcantation_devel", "test_raw_immcantation_devel"] + profile: + [ + "test_assembled_immcantation_devel_hs", + "test_assembled_immcantation_devel_mm", + "test_raw_immcantation_devel", + ] fail-fast: false steps: - name: Check out pipeline code diff --git a/CHANGELOG.md b/CHANGELOG.md index e7e1fd26..13adb86f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,27 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html). -## [3.1] - 2023-06-05 "Protego" +## [3.2.0dev] - + +### `Added` + +- [#268](https://github.com/nf-core/airrflow/pull/268) Added parameters for FindThreshold in `modules.config`. +- [#268](https://github.com/nf-core/airrflow/pull/268) Validate samplesheet also for `assembled` samplesheet. +- [#259](https://github.com/nf-core/airrflow/pull/259) Update to `EnchantR v0.1.3`. + +### `Fixed` + +- [#268](https://github.com/nf-core/airrflow/pull/268) Allows for uppercase and lowercase locus in samplesheet `pcr_target_locus`. +- [#259](https://github.com/nf-core/airrflow/pull/259) Samplesheet only allows data from one species. +- [#259](https://github.com/nf-core/airrflow/pull/259) Introduced fix for a too long command with hundreds of datasets. + +### `Dependencies` + +| Dependency | Old version | New version | +| ---------- | ----------- | ----------- | +| r-enchantr | 0.1.2 | 0.1.3 | + +## [3.1.0] - 2023-06-05 "Protego" ### `Added` diff --git a/assets/repertoire_comparison.Rmd b/assets/repertoire_comparison.Rmd index 16de9bd6..64d51a89 100644 --- a/assets/repertoire_comparison.Rmd +++ b/assets/repertoire_comparison.Rmd @@ -31,6 +31,7 @@ library(alakazam) library(shazam) library(stringr) library(plotly) +library(airr) theme_set(theme_bw(base_family = "ArialMT") + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), text = element_text(family="ArialMT"))) @@ -54,21 +55,10 @@ datadir <- "." Number of reads for each of the samples and number of sequences left after performing sequence assembly and alignment to reference data. The full table can be found under [Table_sequences_assembly](repertoire_comparison/Sequence_numbers_summary/Table_sequences_assembly.tsv). -```{r seq_numbers, echo=FALSE, warning=FALSE, results='asis'} -read_table <- function(tab_file){ - tab_seqs <- read.table(tab_file, header=TRUE, sep="\t", check.names = FALSE) - write.table(tab_seqs, file=paste0(seq_dir,"/Table_sequences_assembly.tsv"), sep="\t", quote=F, row.names=F) - } -tryCatch( {read_table("./Table_sequences.tsv")} , - error=function(e){message("No sequence numbers are available if starting with assembled reads.")} -) - -``` - - ```{r seq_numbers_plot, echo=FALSE, warning=FALSE, results='asis'} tryCatch( { tab_seqs <- read.table("./Table_sequences.tsv", header=TRUE, sep="\t", check.names = FALSE) + write.table(tab_seqs, file=paste0(seq_dir,"/Table_sequences_assembly.tsv"), sep="\t", quote=F, row.names=F) plot_table <- tidyr::pivot_longer(tab_seqs, cols=Sequences_R1:Igblast, @@ -88,6 +78,8 @@ tryCatch( { theme(axis.text.x= element_text(angle = 45)) ggplotly(seqs_plot) + + }, error=function(e){message("No sequence numbers are available if starting with assembled reads.")} ) @@ -144,33 +136,37 @@ ggplotly(seqs_plot_assembled) # in the current folder all_files <- system(paste0("find '", datadir, "' -name '*clone-pass.tsv'"), intern=T) -diversity_dir <- paste(outdir, "Diversity", sep="/") -abundance_dir <- paste(outdir, "Abundance", sep="/") vfamily_dir <- paste(outdir, "V_family", sep="/") -dir.create(diversity_dir) -dir.create(abundance_dir) dir.create(vfamily_dir) # Generate one big dataframe from all patient dataframes +col_select <- c( + "sample_id", "subject_id", "sequence_id", "clone_id", + "v_call", "d_call", "j_call", + "locus", + "junction", + "pcr_target_locus" +) +df_all <- dplyr::bind_rows(lapply(all_files, read_rearrangement, col_select=col_select)) -df_list = lapply(all_files, read.csv, sep="\t") - -df_all <- dplyr::bind_rows(df_list) # Remove underscores in these columns -df_all$subject_id <- sapply(df_all$subject_id, function(x) str_replace(as.character(x), "_", "")) -df_all$sample_id <- sapply(df_all$sample_id, function(x) str_replace(as.character(x), "_", "")) +df_all$subject_id <- stringr::str_replace_all(df_all$subject_id, "_", "") +df_all$sample_id <- stringr::str_replace_all(df_all$sample_id , "_", "") # Annotate sample and samplepop (sample + population) by add ing all the conditions df_all$subj_locus <- as.factor(paste(df_all$sample_id, df_all$subject_id, df_all$pcr_target_locus, sep="_")) -# Write table to file -write.table(df_all, paste0(outdir,"/all_data.tsv"), sep = "\t", quote=F, row.names = F, col.names = T) +# Uncomment to save a table with all the sequencess across samples together +# write.table(df_all, paste0(outdir,"/all_data.tsv"), sep = "\t", quote=F, row.names = F, col.names = T) # Set number of bootrstraps -nboot = 200 +nboot <- 200 ``` + + + +```{r clonal_abundance, echo=FALSE, eval=FALSE} +# Set line above to eval=TRUE to include clonal abundance +diversity_dir <- paste(outdir, "Diversity", sep="/") +abundance_dir <- paste(outdir, "Abundance", sep="/") +dir.create(diversity_dir) +dir.create(abundance_dir) + abund <- estimateAbundance(df_all, group = "subj_locus", ci=0.95, nboot=nboot) abund@abundance$sample_id <- sapply(abund@abundance$subj_locus, function(x) unlist(strsplit(as.character(x), "_"))[1]) abund@abundance$subject_id <- sapply(abund@abundance$subj_locus, function(x) unlist(strsplit(as.character(x), "_"))[2]) @@ -208,12 +212,14 @@ p_ca ``` -```{r plot_abundance, include = FALSE} +```{r plot_abundance, include = FALSE, eval=FALSE} +# Set to eval=TRUE to include clonal abundance ggsave(plot=p_ca, filename = paste0(abundance_dir,"/Clonal_abundance_subject.pdf"), device="pdf", width = 25, height = 10, units="cm") ggsave(plot=p_ca, filename = paste0(abundance_dir,"/Clonal_abundance_subject.png"), device="png", width = 25, height = 10, units="cm") write.table(abund@abundance, file = paste0(abundance_dir, "/Clonal_abundance_data_subject.tsv"), sep="\t", quote = F, row.names = F) ``` + - -```{r clonal_diversity, echo = FALSE} +```{r clonal_diversity, echo = FALSE, eval=FALSE} +# Set line above to eval=TRUE to include clonal diversity sample_div <- alphaDiversity(abund, group="subj_locus", min_q=0, max_q=4, step_q=0.05, ci=0.95, nboot=nboot) sample_main <- paste0("Sample diversity (N=", sample_div@n[1], ")") @@ -273,12 +280,14 @@ div_p <- ggplot(sample_div@diversity, aes(x = q, y = d, group=sample_id)) + div_p ``` -```{r plot_diversity, include = FALSE} +```{r plot_diversity, include = FALSE, eval=FALSE} +# Set to eval=TRUE to include clonal diversity ggsave(plot=div_p, filename=paste0(diversity_dir,"/Diversity_patient_grid.png"), device="png", width = 25, height = 10, units="cm") ggsave(plot=div_p, filename=paste0(diversity_dir,"/Diversity_patient_grid.pdf"), device="pdf", width = 25, height = 10, units="cm") write.table(sample_div@diversity, file = paste0(diversity_dir, "/Clonal_diversity_data_subject.tsv"), sep="\t", quote = F, row.names = F) ``` + # V gene usage ## V gene family usage diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py index a75fb3c5..bc686deb 100755 --- a/bin/check_samplesheet.py +++ b/bin/check_samplesheet.py @@ -15,7 +15,8 @@ def parse_args(args=None): Epilog = "Example usage: python check_samplesheet.py " parser = argparse.ArgumentParser(description=Description, epilog=Epilog) - parser.add_argument("FILE_IN", help="Input samplesheet file.") + parser.add_argument("file_in", help="Input samplesheet file.") + parser.add_argument("-a", "--assembled", help="Input samplesheet type", action="store_true", default=False) return parser.parse_args(args) @@ -38,22 +39,22 @@ def print_error(error, context="Line", context_str=""): sys.exit(1) -def check_samplesheet(file_in): +def check_samplesheet(file_in, assembled): """ This function checks that the samplesheet: - contains the compulsory fields: sample_id, filename_R1, filename_R2, subject_id, pcr_target_locus, species, single_cell - sample ids are unique - samples from the same subject come from the same species - - pcr_target_locus is "IG" or "TR" + - pcr_target_locus is "IG"/"ig" or "TR"/"tr" - species is "human" or "mouse" """ sample_run_dict = {} with open(file_in, "r") as fin: - ## Check that required columns are present + # Defining minimum columns and required columns min_cols = 7 - required_columns = [ + required_columns_raw = [ "sample_id", "filename_R1", "filename_R2", @@ -66,7 +67,19 @@ def check_samplesheet(file_in): "biomaterial_provider", "age", ] - no_whitespaces = [ + required_columns_assembled = [ + "sample_id", + "filename", + "subject_id", + "species", + "pcr_target_locus", + "single_cell", + "sex", + "tissue", + "biomaterial_provider", + "age", + ] + no_whitespaces_raw = [ "sample_id", "filename_R1", "filename_R2", @@ -75,13 +88,52 @@ def check_samplesheet(file_in): "pcr_target_locus", "tissue", ] + no_whitespaces_assembled = [ + "sample_id", + "filename", + "subject_id", + "species", + "pcr_target_locus", + "tissue", + ] + + ## Read header header = [x.strip('"') for x in fin.readline().strip().split("\t")] - for col in required_columns: - if col not in header: - print("ERROR: Please check samplesheet header: {} ".format(",".join(header))) - print("Header is missing column {}".format(col)) - print("Header must contain columns {}".format("\t".join(required_columns))) - raise IndexError("Header must contain columns {}".format("\t".join(required_columns))) + ## Read tab + tab = pd.read_csv(file_in, sep="\t", header=0) + + # Check that all required columns for assembled and raw samplesheets are there, and do not contain whitespaces + if assembled: + for col in required_columns_assembled: + if col not in header: + print("ERROR: Please check samplesheet header: {} ".format(",".join(header))) + print("Header is missing column {}".format(col)) + print("Header must contain columns {}".format("\t".join(required_columns))) + raise IndexError("Header must contain columns {}".format("\t".join(required_columns))) + for col in no_whitespaces_assembled: + values = tab[col].tolist() + if any([re.search(r"\s+", s) for s in values]): + print_error( + "The column {} contains values with whitespaces. Please ensure that there are no tabs, spaces or any other whitespaces in these columns as well: {}".format( + col, no_whitespaces_assembled + ) + ) + + else: + for col in required_columns_raw: + if col not in header: + print("ERROR: Please check samplesheet header: {} ".format(",".join(header))) + print("Header is missing column {}".format(col)) + print("Header must contain columns {}".format("\t".join(required_columns))) + raise IndexError("Header must contain columns {}".format("\t".join(required_columns))) + for col in no_whitespaces_raw: + values = tab[col].tolist() + if any([re.search(r"\s+", s) for s in values]): + print_error( + "The column {} contains values with whitespaces. Please ensure that there are no tabs, spaces or any other whitespaces in these columns as well: {}".format( + col, no_whitespaces_raw + ) + ) ## Check that rows have the same fields as header, and at least the compulsory ones are provided for line_num, line in enumerate(fin): @@ -103,7 +155,6 @@ def check_samplesheet(file_in): ) ## Check that sample ids are unique - tab = pd.read_csv(file_in, sep="\t", header=0) if len(tab["sample_id"]) != len(set(tab["sample_id"])): print_error( "Sample IDs are not unique! The sample IDs in the input samplesheet should be unique for each sample." @@ -111,7 +162,7 @@ def check_samplesheet(file_in): ## Check that pcr_target_locus is IG or TR for val in tab["pcr_target_locus"]: - if val not in ["IG", "TR"]: + if val.upper() not in ["IG", "TR"]: print_error("pcr_target_locus must be one of: IG, TR.") ## Check that species is human or mouse @@ -129,20 +180,10 @@ def check_samplesheet(file_in): "The same subject_id cannot belong to different species! Check input file columns 'subject_id' and 'species'." ) - ## Check that values do not contain spaces in the no whitespaces columns - for col in no_whitespaces: - values = tab[col].tolist() - if any([re.search(r"\s+", s) for s in values]): - print_error( - "The column {} contains values with whitespaces. Please ensure that there are no tabs, spaces or any other whitespaces in these columns as well: {}".format( - col, no_whitespaces - ) - ) - def main(args=None): args = parse_args(args) - check_samplesheet(args.FILE_IN) + check_samplesheet(args.file_in, args.assembled) if __name__ == "__main__": diff --git a/bin/reveal_filter_quality.R b/bin/reveal_filter_quality.R index fb97afb8..5ed0258c 100755 --- a/bin/reveal_filter_quality.R +++ b/bin/reveal_filter_quality.R @@ -89,12 +89,16 @@ if (!is.null(opt$OUTPUT)) { } else { output_fn <- sub(".tsv$", "_quality-pass.tsv", basename(opt$REPERTOIRE)) } -write_rearrangement(db[filter_pass, ], file = output_fn) +# don't write if empty +if (sum(filter_pass)>0) { + write_rearrangement(db[filter_pass, ], file = output_fn) +} # cat(" TOTAL_GROUPS> ", n_groups, "\n", sep=" ", file = file.path(out_dir, log_verbose_name), append=TRUE) write("START> FilterQuality", stdout()) write(paste0("FILE> ", basename(opt$REPERTOIRE)), stdout()) +# even if output file not written, because empty, keep track in log write(paste0("OUTPUT> ", basename(output_fn)), stdout()) write(paste0("PASS> ", sum(filter_pass)), stdout()) write(paste0("FAIL> ", sum(!filter_pass) + sum(filter_na)), stdout()) diff --git a/conf/modules.config b/conf/modules.config index d16975a4..cdaa8c78 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -35,6 +35,16 @@ process { ] } + // Validate input assembled + withName: SAMPLESHEET_CHECK_ASSEMBLED { + publishDir = [ + path: { "${params.outdir}/pipeline_info" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + ext.args = '--assembled' + } + withName: 'FASTP' { publishDir = [ [ @@ -286,7 +296,7 @@ process { mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] - ext.args = '--if sequence_id --sf sequence --mf cell_id consensus_count duplicate_count c_call c_cigar c_sequence_start c_sequence_end' + ext.args = '--if sequence_id --sf sequence --mf cell_id consensus_count duplicate_count' } @@ -398,6 +408,11 @@ process { mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] + ext.args = ['findthreshold_method':'gmm', + 'findthreshold_model':'gamma-norm', + 'findthreshold_edge':0.9, + 'findthreshold_cutoff':'user', + 'findthreshold_spc':0.995] } withName: REPORT_THRESHOLD { @@ -428,7 +443,7 @@ process { ] ext.args = ['outname':'', 'model':'hierarchical', 'method':'nt', 'linkage':'single', - 'skip_convergence':true, + 'skip_convergence':false, 'outputby':'sample_id', 'min_n':30] } @@ -438,7 +453,10 @@ process { mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] - ext.args = ['build':'igphyml'] + ext.args = ['build':'igphyml', + 'minseq':5, + 'traits':'c_call', + 'tips':'c_call'] } // ------------------------------- diff --git a/conf/test_assembled.config b/conf/test_assembled_hs.config similarity index 94% rename from conf/test_assembled.config rename to conf/test_assembled_hs.config index 8d3e5e10..602f5462 100644 --- a/conf/test_assembled.config +++ b/conf/test_assembled_hs.config @@ -18,7 +18,7 @@ params { // Input data mode = 'assembled' - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/testdata-reveal/test_reveal_metadata.tsv' + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/testdata-reveal/test_assembled_metadata_hs.tsv' imgtdb_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/database-cache/imgtdb_base.zip' igblast_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/database-cache/igblast_base.zip' diff --git a/conf/test_assembled_immcantation_devel.config b/conf/test_assembled_immcantation_devel_hs.config similarity index 84% rename from conf/test_assembled_immcantation_devel.config rename to conf/test_assembled_immcantation_devel_hs.config index 61ddeeca..dad18d47 100644 --- a/conf/test_assembled_immcantation_devel.config +++ b/conf/test_assembled_immcantation_devel_hs.config @@ -4,12 +4,12 @@ * ------------------------------------------------- * Defines bundled input files and everything required * to run a fast and simple test. Use as follows: - * nextflow run nf-core/airrflow -profile test_assembled_immcantation_devel, + * nextflow run nf-core/airrflow -profile test_assembled_immcantation_hs_devel, */ params { config_profile_name = 'Test assembled mode with Immcantation custom_container' - config_profile_description = 'Minimal test dataset to check pipeline function on assembled mode with Immcantation custom_container' + config_profile_description = 'Minimal human test dataset to check pipeline function on assembled mode with Immcantation custom_container' // Limit resources so that this can run on GitHub Actions max_cpus = 2 @@ -18,7 +18,7 @@ params { // Input data mode = 'assembled' - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/testdata-reveal/test_reveal_metadata.tsv' + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/testdata-reveal/test_assembled_metadata_hs.tsv' imgtdb_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/database-cache/imgtdb_base.zip' igblast_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/database-cache/igblast_base.zip' igphyml = '/usr/local/share/igphyml/src/igphyml' diff --git a/conf/test_assembled_immcantation_devel_mm.config b/conf/test_assembled_immcantation_devel_mm.config new file mode 100644 index 00000000..2aea10a3 --- /dev/null +++ b/conf/test_assembled_immcantation_devel_mm.config @@ -0,0 +1,44 @@ +/* + * ------------------------------------------------- + * Nextflow config file for running tests + * ------------------------------------------------- + * Defines bundled input files and everything required + * to run a fast and simple test. Use as follows: + * nextflow run nf-core/airrflow -profile test_assembled_immcantation_devel_mm, + */ + +params { + config_profile_name = 'Test assembled mode with Immcantation custom_container' + config_profile_description = 'Minimal mouse test dataset to check pipeline function on assembled mode with Immcantation custom_container' + + // Limit resources so that this can run on GitHub Actions + max_cpus = 2 + max_memory = 6.GB + max_time = 6.h + + // Input data + mode = 'assembled' + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/testdata-reveal/test_assembled_metadata_mm.tsv' + imgtdb_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/database-cache/imgtdb_base.zip' + igblast_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/database-cache/igblast_base.zip' + igphyml = '/usr/local/share/igphyml/src/igphyml' + + reassign = true + productive_only = true + collapseby = 'filename' + cloneby = 'subject_id' + crossby = 'subject_id' + remove_chimeric = true +} + +process{ + + // all process with label 'immcantation' will be tested with this container instead. + withLabel:immcantation{ + container = 'docker.io/immcantation/suite:devel' + } +} + +env { + PYTHONNOUSERSITE = 0 +} diff --git a/conf/test_assembled_mm.config b/conf/test_assembled_mm.config new file mode 100644 index 00000000..a80d2099 --- /dev/null +++ b/conf/test_assembled_mm.config @@ -0,0 +1,31 @@ +/* + * ------------------------------------------------- + * Nextflow config file for running tests + * ------------------------------------------------- + * Defines bundled input files and everything required + * to run a fast and simple test. Use as follows: + * nextflow run nf-core/airrflow -profile test, + */ + +params { + config_profile_name = 'Test assembled mode' + config_profile_description = 'Minimal mouse test dataset to test assembled mode' + + // Limit resources so that this can run on GitHub Actions + max_cpus = 2 + max_memory = 6.GB + max_time = 6.h + + // Input data + mode = 'assembled' + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/testdata-reveal/test_assembled_metadata_mm.tsv' + imgtdb_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/database-cache/imgtdb_base.zip' + igblast_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/database-cache/igblast_base.zip' + + reassign = true + productive_only = true + collapseby = 'filename' + cloneby = 'subject_id' + remove_chimeric = true +} + diff --git a/docs/usage.md b/docs/usage.md index f7f4f931..e54b10f8 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -111,15 +111,14 @@ The metadata specified in the input file will then be automatically annotated in ## Assembled input samplesheet (bulk or single-cell) -The required input file for processing raw BCR or TCR bulk targeted sequencing data is a sample sheet in TSV format (tab separated). The columns `sample_id`, `filename`, `subject_id`, `species`, `tissue`, `single_cell`, `sex`, `age` and `biomaterial_provider` are required. +The required input file for processing raw BCR or TCR bulk targeted sequencing data is a sample sheet in TSV format (tab separated). The columns `sample_id`, `filename`, `subject_id`, `species`, `tissue`, `single_cell`, `pcr_target_locus`, `sex`, `age` and `biomaterial_provider` are required. An example samplesheet is -| filename | species | subject_id | sample_id | tissue | sex | age | biomaterial_provider | pcr_target_locus | single_cell | -| -------------------------------------------------------- | ------- | ---------- | --------------------------------- | ---------- | ---- | --- | -------------------- | ---------------- | ----------- | -| sc5p_v2_hs_PBMC_1k_b_airr_rearrangement.tsv | human | subject_x | sc5p_v2_hs_PBMC_1k_5fb | PBMC | NA | NA | 10x Genomics | ig | TRUE | -| sc5p_v2_mm_c57bl6_splenocyte_1k_b_airr_rearrangement.tsv | mouse | mouse_x | sc5p_v2_mm_c57bl6_splenocyte_1k_b | splenocyte | NA | NA | 10x Genomics | ig | TRUE | -| bulk-Laserson-2014.fasta | human | PGP1 | PGP1 | PBMC | male | NA | Laserson-2014 | ig | FALSE | +| filename | species | subject_id | sample_id | tissue | sex | age | biomaterial_provider | pcr_target_locus | single_cell | +| ------------------------------------------- | ------- | ---------- | ---------------------- | ------ | ---- | --- | -------------------- | ---------------- | ----------- | +| sc5p_v2_hs_PBMC_1k_b_airr_rearrangement.tsv | human | subject_x | sc5p_v2_hs_PBMC_1k_5fb | PBMC | NA | NA | 10x Genomics | IG | TRUE | +| bulk-Laserson-2014.fasta | human | PGP1 | PGP1 | PBMC | male | NA | Laserson-2014 | IG | FALSE | ## Supported AIRR metadata fields diff --git a/modules/local/airrflow_report/airrflow_report.nf b/modules/local/airrflow_report/airrflow_report.nf index 5b6113de..10996b6e 100644 --- a/modules/local/airrflow_report/airrflow_report.nf +++ b/modules/local/airrflow_report/airrflow_report.nf @@ -2,10 +2,10 @@ process AIRRFLOW_REPORT { tag "${meta.id}" label 'process_high' - conda "bioconda::r-enchantr=0.1.2" + conda "bioconda::r-enchantr=0.1.3" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/r-enchantr:0.1.2--r42hdfd78af_0': - 'biocontainers/r-enchantr:0.1.2--r42hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/r-enchantr:0.1.3--r42hdfd78af_0': + 'biocontainers/r-enchantr:0.1.3--r42hdfd78af_0' }" input: tuple val(meta), path(tab) // sequence tsv table in AIRR format diff --git a/modules/local/changeo/changeo_parsedb_select.nf b/modules/local/changeo/changeo_parsedb_select.nf index 0bab80ab..9f592e7f 100644 --- a/modules/local/changeo/changeo_parsedb_select.nf +++ b/modules/local/changeo/changeo_parsedb_select.nf @@ -20,7 +20,7 @@ process CHANGEO_PARSEDB_SELECT { script: def args = task.ext.args ?: '' def args2 = task.ext.args2 ?: '' - if (meta.locus == 'IG'){ + if (meta.locus.toUpperCase() == 'IG'){ """ ParseDb.py select -d $tab $args --outname ${meta.id} > ${meta.id}_select_command_log.txt @@ -30,7 +30,7 @@ process CHANGEO_PARSEDB_SELECT { changeo: \$( ParseDb.py --version | awk -F' ' '{print \$2}' ) END_VERSIONS """ - } else if (meta.locus == 'TR'){ + } else if (meta.locus.toUpperCase() == 'TR'){ """ ParseDb.py select -d $tab $args2 --outname ${meta.id} > "${meta.id}_command_log.txt" diff --git a/modules/local/enchantr/collapse_duplicates.nf b/modules/local/enchantr/collapse_duplicates.nf index e1c117a9..2257913f 100644 --- a/modules/local/enchantr/collapse_duplicates.nf +++ b/modules/local/enchantr/collapse_duplicates.nf @@ -4,10 +4,10 @@ process COLLAPSE_DUPLICATES { label 'process_long_parallelized' label 'immcantation' - conda "bioconda::r-enchantr=0.1.2" + conda "bioconda::r-enchantr=0.1.3" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/r-enchantr:0.1.2--r42hdfd78af_0': - 'biocontainers/r-enchantr:0.1.2--r42hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/r-enchantr:0.1.3--r42hdfd78af_0': + 'biocontainers/r-enchantr:0.1.3--r42hdfd78af_0' }" input: tuple val(meta), path(tabs) // tuple [val(meta), sequence tsv in AIRR format ] diff --git a/modules/local/enchantr/define_clones.nf b/modules/local/enchantr/define_clones.nf index c271f874..7321603a 100644 --- a/modules/local/enchantr/define_clones.nf +++ b/modules/local/enchantr/define_clones.nf @@ -21,15 +21,16 @@ process DEFINE_CLONES { label 'process_long_parallelized' label 'immcantation' - conda "bioconda::r-enchantr=0.1.2" + conda "bioconda::r-enchantr=0.1.3" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/r-enchantr:0.1.2--r42hdfd78af_0': - 'biocontainers/r-enchantr:0.1.2--r42hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/r-enchantr:0.1.3--r42hdfd78af_0': + 'biocontainers/r-enchantr:0.1.3--r42hdfd78af_0' }" input: tuple val(meta), path(tabs) // meta, sequence tsv in AIRR format val threshold path imgt_base + path repertoires_samplesheet output: path("*/*/*clone-pass.tsv"), emit: tab // sequence tsv in AIRR format @@ -41,10 +42,17 @@ process DEFINE_CLONES { script: def args = task.ext.args ? asString(task.ext.args) : '' def thr = threshold.join("") + def input = "" + if (repertoires_samplesheet) { + input = repertoires_samplesheet + } else { + input = tabs.join(',') + } """ Rscript -e "enchantr::enchantr_report('define_clones', \\ - report_params=list('input'='${tabs.join(',')}', \\ + report_params=list('input'='${input}', \\ 'imgt_db'='${imgt_base}', \\ + 'species'='auto', \\ 'cloneby'='${params.cloneby}', \\ 'force'=FALSE, \\ 'threshold'=${thr}, \\ diff --git a/modules/local/enchantr/detect_contamination.nf b/modules/local/enchantr/detect_contamination.nf index 38308003..003c2126 100644 --- a/modules/local/enchantr/detect_contamination.nf +++ b/modules/local/enchantr/detect_contamination.nf @@ -5,10 +5,10 @@ process DETECT_CONTAMINATION { label 'immcantation' - conda "bioconda::r-enchantr=0.1.2" + conda "bioconda::r-enchantr=0.1.3" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/r-enchantr:0.1.2--r42hdfd78af_0': - 'biocontainers/r-enchantr:0.1.2--r42hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/r-enchantr:0.1.3--r42hdfd78af_0': + 'biocontainers/r-enchantr:0.1.3--r42hdfd78af_0' }" input: path(tabs) diff --git a/modules/local/enchantr/dowser_lineages.nf b/modules/local/enchantr/dowser_lineages.nf index e74c5e64..2e398f58 100644 --- a/modules/local/enchantr/dowser_lineages.nf +++ b/modules/local/enchantr/dowser_lineages.nf @@ -21,10 +21,10 @@ process DOWSER_LINEAGES { label 'process_long_parallelized' label 'immcantation' - conda "bioconda::r-enchantr=0.1.2" + conda "bioconda::r-enchantr=0.1.3" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/r-enchantr:0.1.2--r42hdfd78af_0': - 'biocontainers/r-enchantr:0.1.2--r42hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/r-enchantr:0.1.3--r42hdfd78af_0': + 'biocontainers/r-enchantr:0.1.3--r42hdfd78af_0' }" input: tuple val(meta), path(tabs) diff --git a/modules/local/enchantr/find_threshold.nf b/modules/local/enchantr/find_threshold.nf index c4fc1697..32ff652e 100644 --- a/modules/local/enchantr/find_threshold.nf +++ b/modules/local/enchantr/find_threshold.nf @@ -21,15 +21,16 @@ process FIND_THRESHOLD { label 'process_long_parallelized' label 'immcantation' - conda "bioconda::r-enchantr=0.1.2" + conda "bioconda::r-enchantr=0.1.3" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/r-enchantr:0.1.2--r42hdfd78af_0': - 'biocontainers/r-enchantr:0.1.2--r42hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/r-enchantr:0.1.3--r42hdfd78af_0': + 'biocontainers/r-enchantr:0.1.3--r42hdfd78af_0' }" input: path tab // sequence tsv in AIRR format path logo + path tabs_samplesheet output: // tuple val(meta), path("*threshold-pass.tsv"), emit: tab // sequence tsv in AIRR format @@ -43,7 +44,7 @@ process FIND_THRESHOLD { def args = task.ext.args ? asString(task.ext.args) : '' """ Rscript -e "enchantr::enchantr_report('find_threshold', \\ - report_params=list('input'='${tab.join(',')}',\\ + report_params=list('input'='${tabs_samplesheet}',\\ 'cloneby'='${params.cloneby}',\\ 'crossby'='${params.crossby}',\\ 'singlecell'='${params.singlecell}',\\ diff --git a/modules/local/enchantr/remove_chimeric.nf b/modules/local/enchantr/remove_chimeric.nf index 59a4e3a5..41467ecb 100644 --- a/modules/local/enchantr/remove_chimeric.nf +++ b/modules/local/enchantr/remove_chimeric.nf @@ -5,10 +5,10 @@ process REMOVE_CHIMERIC { label 'immcantation' - conda "bioconda::r-enchantr=0.1.2" + conda "bioconda::r-enchantr=0.1.3" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/r-enchantr:0.1.2--r42hdfd78af_0': - 'biocontainers/r-enchantr:0.1.2--r42hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/r-enchantr:0.1.3--r42hdfd78af_0': + 'biocontainers/r-enchantr:0.1.3--r42hdfd78af_0' }" input: diff --git a/modules/local/enchantr/report_file_size.nf b/modules/local/enchantr/report_file_size.nf index b4f9130d..c9a2052c 100644 --- a/modules/local/enchantr/report_file_size.nf +++ b/modules/local/enchantr/report_file_size.nf @@ -6,14 +6,15 @@ process REPORT_FILE_SIZE { label 'immcantation' label 'process_single' - conda "bioconda::r-enchantr=0.1.2" + conda "bioconda::r-enchantr=0.1.3" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/r-enchantr:0.1.2--r42hdfd78af_0': - 'biocontainers/r-enchantr:0.1.2--r42hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/r-enchantr:0.1.3--r42hdfd78af_0': + 'biocontainers/r-enchantr:0.1.3--r42hdfd78af_0' }" input: path logs path metadata + path logs_tabs output: path "*_report", emit: file_size @@ -22,9 +23,8 @@ process REPORT_FILE_SIZE { script: """ - echo "${logs.join('\n')}" > logs.txt Rscript -e "enchantr::enchantr_report('file_size', \\ - report_params=list('input'='logs.txt', 'metadata'='${metadata}',\\ + report_params=list('input'='${logs_tabs}', 'metadata'='${metadata}',\\ 'outdir'=getwd()))" echo "\"${task.process}\":" > versions.yml diff --git a/modules/local/enchantr/single_cell_qc.nf b/modules/local/enchantr/single_cell_qc.nf index 87422b86..3275c523 100644 --- a/modules/local/enchantr/single_cell_qc.nf +++ b/modules/local/enchantr/single_cell_qc.nf @@ -20,10 +20,10 @@ process SINGLE_CELL_QC { label 'immcantation' label 'process_medium' - conda "bioconda::r-enchantr=0.1.2" + conda "bioconda::r-enchantr=0.1.3" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/r-enchantr:0.1.2--r42hdfd78af_0': - 'biocontainers/r-enchantr:0.1.2--r42hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/r-enchantr:0.1.3--r42hdfd78af_0': + 'biocontainers/r-enchantr:0.1.3--r42hdfd78af_0' }" input: path(tabs) diff --git a/modules/local/enchantr/validate_input.nf b/modules/local/enchantr/validate_input.nf index 5be240c6..fd6bd345 100644 --- a/modules/local/enchantr/validate_input.nf +++ b/modules/local/enchantr/validate_input.nf @@ -6,10 +6,10 @@ process VALIDATE_INPUT { label 'immcantation' label 'process_single' - conda "bioconda::r-enchantr=0.1.2" + conda "bioconda::r-enchantr=0.1.3" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/r-enchantr:0.1.2--r42hdfd78af_0': - 'biocontainers/r-enchantr:0.1.2--r42hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/r-enchantr:0.1.3--r42hdfd78af_0': + 'biocontainers/r-enchantr:0.1.3--r42hdfd78af_0' }" input: file samplesheet diff --git a/modules/local/reveal/add_meta_to_tab.nf b/modules/local/reveal/add_meta_to_tab.nf index 3e9da456..f24433e2 100644 --- a/modules/local/reveal/add_meta_to_tab.nf +++ b/modules/local/reveal/add_meta_to_tab.nf @@ -3,10 +3,10 @@ process ADD_META_TO_TAB { label 'immcantation' label 'process_single' - conda "bioconda::r-enchantr=0.1.2" + conda "bioconda::r-enchantr=0.1.3" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/r-enchantr:0.1.2--r42hdfd78af_0': - 'biocontainers/r-enchantr:0.1.2--r42hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/r-enchantr:0.1.3--r42hdfd78af_0': + 'biocontainers/r-enchantr:0.1.3--r42hdfd78af_0' }" cache 'deep' // Without 'deep' this process would run when using -resume diff --git a/modules/local/reveal/filter_junction_mod3.nf b/modules/local/reveal/filter_junction_mod3.nf index ff6021ff..841fbba9 100644 --- a/modules/local/reveal/filter_junction_mod3.nf +++ b/modules/local/reveal/filter_junction_mod3.nf @@ -3,10 +3,10 @@ process FILTER_JUNCTION_MOD3 { label 'immcantation' label 'process_single' - conda "bioconda::r-enchantr=0.1.2" + conda "bioconda::r-enchantr=0.1.3" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/r-enchantr:0.1.2--r42hdfd78af_0': - 'biocontainers/r-enchantr:0.1.2--r42hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/r-enchantr:0.1.3--r42hdfd78af_0': + 'biocontainers/r-enchantr:0.1.3--r42hdfd78af_0' }" input: tuple val(meta), path(tab) // sequence tsv in AIRR format diff --git a/modules/local/reveal/filter_quality.nf b/modules/local/reveal/filter_quality.nf index 2c75fcfd..a649caef 100644 --- a/modules/local/reveal/filter_quality.nf +++ b/modules/local/reveal/filter_quality.nf @@ -3,16 +3,16 @@ process FILTER_QUALITY { label 'immcantation' label 'process_single' - conda "bioconda::r-enchantr=0.1.2" + conda "bioconda::r-enchantr=0.1.3" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/r-enchantr:0.1.2--r42hdfd78af_0': - 'biocontainers/r-enchantr:0.1.2--r42hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/r-enchantr:0.1.3--r42hdfd78af_0': + 'biocontainers/r-enchantr:0.1.3--r42hdfd78af_0' }" input: tuple val(meta), path(tab) // sequence tsv in AIRR format output: - tuple val(meta), path("*quality-pass.tsv"), emit: tab // sequence tsv in AIRR format + tuple val(meta), path("*quality-pass.tsv"), optional:true, emit: tab // sequence tsv in AIRR format path("*_command_log.txt"), emit: logs //process logs path "versions.yml", emit: versions diff --git a/modules/local/samplesheet_check.nf b/modules/local/samplesheet_check.nf index b9593c98..757851a7 100644 --- a/modules/local/samplesheet_check.nf +++ b/modules/local/samplesheet_check.nf @@ -18,8 +18,9 @@ process SAMPLESHEET_CHECK { task.ext.when == null || task.ext.when script: // This script is bundled with the pipeline, in nf-core/airrflow/bin/ + def args = task.ext.args ?: '' """ - check_samplesheet.py $samplesheet + check_samplesheet.py $samplesheet $args cp $samplesheet samplesheet.valid.tsv cat <<-END_VERSIONS > versions.yml diff --git a/nextflow.config b/nextflow.config index b25143ba..77d8ef57 100644 --- a/nextflow.config +++ b/nextflow.config @@ -255,9 +255,11 @@ profiles { test_full { includeConfig 'conf/test_full.config' } test_tcr { includeConfig 'conf/test_tcr.config' } test_no_umi { includeConfig 'conf/test_no_umi.config' } - test_assembled { includeConfig 'conf/test_assembled.config' } + test_assembled_hs { includeConfig 'conf/test_assembled_hs.config' } + test_assembled_mm { includeConfig 'conf/test_assembled_mm.config' } test_raw_immcantation_devel { includeConfig 'conf/test_raw_immcantation_devel.config' } - test_assembled_immcantation_devel { includeConfig 'conf/test_assembled_immcantation_devel.config' } + test_assembled_immcantation_devel_hs { includeConfig 'conf/test_assembled_immcantation_devel_hs.config' } + test_assembled_immcantation_devel_mm { includeConfig 'conf/test_assembled_immcantation_devel_mm.config' } test_nocluster { includeConfig 'conf/test_nocluster.config' } test_fetchimgt { includeConfig 'conf/test_fetchimgt.config' } test_igblast { includeConfig 'conf/test_igblast.config' } @@ -310,7 +312,7 @@ manifest { description = """B and T cell repertoire analysis pipeline with the Immcantation framework.""" mainScript = 'main.nf' nextflowVersion = '!>=22.10.1' - version = '3.1.0' + version = '3.2.0dev' doi = '10.5281/zenodo.2642009' } diff --git a/nextflow_schema.json b/nextflow_schema.json index 8b6194e9..e6a3365e 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -22,7 +22,7 @@ "mode": { "type": "string", "default": "fastq", - "description": "Specify the processing mode for the pipeline. Available options are \"fastq\" and \"assembled\".ptions are: 'raw'", + "description": "Specify the processing mode for the pipeline. Available options are \"fastq\" and \"assembled\".", "enum": ["fastq", "assembled"], "fa_icon": "fas fa-terminal" }, diff --git a/subworkflows/local/assembled_input_check.nf b/subworkflows/local/assembled_input_check.nf index b37b359f..3b519c6f 100644 --- a/subworkflows/local/assembled_input_check.nf +++ b/subworkflows/local/assembled_input_check.nf @@ -3,6 +3,7 @@ */ include { VALIDATE_INPUT } from '../../modules/local/enchantr/validate_input' +include { SAMPLESHEET_CHECK as SAMPLESHEET_CHECK_ASSEMBLED } from '../../modules/local/samplesheet_check' workflow ASSEMBLED_INPUT_CHECK { take: @@ -12,8 +13,7 @@ workflow ASSEMBLED_INPUT_CHECK { cloneby main: - // TODO: validate input should check that sample_ids are unique - + SAMPLESHEET_CHECK_ASSEMBLED ( samplesheet ) VALIDATE_INPUT ( samplesheet, miairr, collapseby, cloneby ) //removed reassign ch_validated_input = VALIDATE_INPUT.out.validated_input ch_validated_input diff --git a/subworkflows/local/clonal_analysis.nf b/subworkflows/local/clonal_analysis.nf index 91921756..ff4afacf 100644 --- a/subworkflows/local/clonal_analysis.nf +++ b/subworkflows/local/clonal_analysis.nf @@ -19,10 +19,16 @@ workflow CLONAL_ANALYSIS { ch_find_threshold = ch_repertoire.map{ it -> it[1] } .collect() + ch_find_threshold_samplesheet = ch_find_threshold + .flatten() + .map{ it -> it.getName().toString() } + .dump(tag: 'ch_find_threshold_samplesheet') + .collectFile(name: 'find_threshold_samplesheet.txt', newLine: true) FIND_CLONAL_THRESHOLD ( ch_find_threshold, - ch_logo + ch_logo, + ch_find_threshold_samplesheet ) ch_threshold = FIND_CLONAL_THRESHOLD.out.mean_threshold ch_versions = ch_versions.mix(FIND_CLONAL_THRESHOLD.out.versions) @@ -32,17 +38,23 @@ workflow CLONAL_ANALYSIS { .dump(tag: 'clone_threshold') .filter { it != 'NA'} .filter { it != 'NaN' } - .ifEmpty { error "Automatic clone_threshold is 'NA'. Consider setting params.threshold manually."} + .ifEmpty { error "Automatic clone_threshold is 'NA'. Consider setting --clonal_threshold manually."} } else { clone_threshold = params.clonal_threshold ch_find_threshold = ch_repertoire.map{ it -> it[1] } .collect() + ch_find_threshold_samplesheet = ch_find_threshold + .flatten() + .map{ it -> it.getName().toString() } + .dump(tag: 'ch_find_threshold_samplesheet') + .collectFile(name: 'find_threshold_samplesheet.txt', newLine: true) REPORT_THRESHOLD ( ch_find_threshold, - ch_logo + ch_logo, + ch_find_threshold_samplesheet ) ch_versions = ch_versions.mix(REPORT_THRESHOLD.out.versions) @@ -63,8 +75,10 @@ workflow CLONAL_ANALYSIS { DEFINE_CLONES_COMPUTE( ch_define_clones, clone_threshold.collect(), - ch_imgt.collect() + ch_imgt.collect(), + [] ) + ch_versions = ch_versions.mix(DEFINE_CLONES_COMPUTE.out.versions) ch_logs = ch_logs.mix(DEFINE_CLONES_COMPUTE.out.logs) @@ -76,10 +90,18 @@ workflow CLONAL_ANALYSIS { if (!params.skip_all_clones_report){ + ch_all_repertoires_cloned_samplesheet = ch_all_repertoires_cloned.map{ it -> it[1] } + .collect() + .flatten() + .map{ it -> it.getName().toString() } + .dump(tag: 'ch_all_repertoires_cloned_samplesheet') + .collectFile(name: 'all_repertoires_cloned_samplesheet.txt', newLine: true) + DEFINE_CLONES_REPORT( ch_all_repertoires_cloned, clone_threshold.collect(), - ch_imgt.collect() + ch_imgt.collect(), + ch_all_repertoires_cloned_samplesheet ) } diff --git a/subworkflows/local/repertoire_analysis_reporting.nf b/subworkflows/local/repertoire_analysis_reporting.nf index bab6b21d..1bd61a08 100644 --- a/subworkflows/local/repertoire_analysis_reporting.nf +++ b/subworkflows/local/repertoire_analysis_reporting.nf @@ -55,10 +55,16 @@ workflow REPERTOIRE_ANALYSIS_REPORTING { ch_reassign_logs, ch_sc_qc_and_filter_logs, ch_clonal_analysis_logs) + ch_logs_tabs = ch_logs.collect() + .flatten() + .map{ it -> it.getName().toString() } + .dump(tag: 'ch_logs_tabs') + .collectFile(name: 'all_logs_tabs.txt', newLine: true) REPORT_FILE_SIZE( ch_logs.collect().ifEmpty([]), - ch_metadata + ch_metadata, + ch_logs_tabs ) ch_versions = ch_versions.mix(REPORT_FILE_SIZE.out.versions)