diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index ea27a584..4ecfbfe3 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -2,6 +2,7 @@ "name": "nfcore", "image": "nfcore/gitpod:latest", "remoteUser": "gitpod", + "runArgs": ["--privileged"], // Configure tool-specific properties. "customizations": { diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index efebe9e0..902a3782 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -9,7 +9,9 @@ Please use the pre-filled template to save time. However, don't be put off by this template - other more general issues and suggestions are welcome! Contributions to the code are even more welcome ;) -> If you need help using or modifying nf-core/airrflow then the best place to ask is on the nf-core Slack [#airrflow](https://nfcore.slack.com/channels/airrflow) channel ([join our Slack here](https://nf-co.re/join/slack)). +:::info +If you need help using or modifying nf-core/airrflow then the best place to ask is on the nf-core Slack [#airrflow](https://nfcore.slack.com/channels/airrflow) channel ([join our Slack here](https://nf-co.re/join/slack)). +::: ## Contribution workflow @@ -116,4 +118,3 @@ To get started: Devcontainer specs: - [DevContainer config](.devcontainer/devcontainer.json) -- [Dockerfile](.devcontainer/Dockerfile) diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml index f2e98962..b0260675 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.yml +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -42,7 +42,7 @@ body: attributes: label: System information description: | - * Nextflow version _(eg. 22.10.1)_ + * Nextflow version _(eg. 23.04.0)_ * Hardware _(eg. HPC, Desktop, Cloud)_ * Executor _(eg. slurm, local, awsbatch)_ * Container engine: _(e.g. Docker, Singularity, Conda, Podman, Shifter, Charliecloud, or Apptainer)_ diff --git a/.github/workflows/awsfulltest.yml b/.github/workflows/awsfulltest.yml index 34a98d01..6b25b2c6 100644 --- a/.github/workflows/awsfulltest.yml +++ b/.github/workflows/awsfulltest.yml @@ -14,18 +14,23 @@ jobs: runs-on: ubuntu-latest steps: - name: Launch workflow via tower - uses: seqeralabs/action-tower-launch@v1 + uses: seqeralabs/action-tower-launch@v2 with: workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} + revision: ${{ github.sha }} workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/airrflow/work-${{ github.sha }} parameters: | { + "hook_url": "${{ secrets.MEGATESTS_ALERTS_SLACK_HOOK_URL }}", "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/airrflow/results-${{ github.sha }}" } - profiles: test_full,aws_tower + profiles: test_full + - uses: actions/upload-artifact@v3 with: name: Tower debug log file - path: tower_action_*.log + path: | + tower_action_*.log + tower_action_*.json diff --git a/.github/workflows/awstest.yml b/.github/workflows/awstest.yml index 64fb9141..269c6875 100644 --- a/.github/workflows/awstest.yml +++ b/.github/workflows/awstest.yml @@ -12,18 +12,22 @@ jobs: steps: # Launch workflow using Tower CLI tool action - name: Launch workflow via tower - uses: seqeralabs/action-tower-launch@v1 + uses: seqeralabs/action-tower-launch@v2 with: workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} + revision: ${{ github.sha }} workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/airrflow/work-${{ github.sha }} parameters: | { "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/airrflow/results-test-${{ github.sha }}" } - profiles: test,aws_tower + profiles: test + - uses: actions/upload-artifact@v3 with: name: Tower debug log file - path: tower_action_*.log + path: | + tower_action_*.log + tower_action_*.json diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ddf7f1da..5cd19a48 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -24,7 +24,7 @@ jobs: strategy: matrix: NXF_VER: - - "22.10.1" + - "23.04.0" - "latest-everything" steps: - name: Check out pipeline code @@ -46,9 +46,10 @@ jobs: strategy: matrix: NXF_VER: - - "22.10.1" + - "23.04.0" - "latest-everything" - profile: ["test_tcr", "test_no_umi", "test_nocluster", "test_fetchimgt", "test_assembled"] + profile: + ["test_tcr", "test_no_umi", "test_nocluster", "test_fetchimgt", "test_assembled_hs", "test_assembled_mm"] fail-fast: false steps: - name: Check out pipeline code diff --git a/.github/workflows/ci_immcantation.yml b/.github/workflows/ci_immcantation.yml index 934bd81c..d74bb5f9 100644 --- a/.github/workflows/ci_immcantation.yml +++ b/.github/workflows/ci_immcantation.yml @@ -1,9 +1,6 @@ name: nf-core CI immcantation # This workflow runs the pipeline with the minimal test dataset to check that it completes without any syntax errors on: - push: - branches: - - dev pull_request: branches-ignore: - "master" @@ -23,9 +20,14 @@ jobs: strategy: matrix: NXF_VER: - - "22.10.1" + - "23.04.0" - "latest-everything" - profile: ["test_assembled_immcantation_devel", "test_raw_immcantation_devel"] + profile: + [ + "test_assembled_immcantation_devel_hs", + "test_assembled_immcantation_devel_mm", + "test_raw_immcantation_devel", + ] fail-fast: false steps: - name: Check out pipeline code diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index 888cb4bc..b8bdd214 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -78,7 +78,7 @@ jobs: - uses: actions/setup-python@v4 with: - python-version: "3.8" + python-version: "3.11" architecture: "x64" - name: Install dependencies diff --git a/.github/workflows/release-announcments.yml b/.github/workflows/release-announcments.yml new file mode 100644 index 00000000..6ad33927 --- /dev/null +++ b/.github/workflows/release-announcments.yml @@ -0,0 +1,68 @@ +name: release-announcements +# Automatic release toot and tweet anouncements +on: + release: + types: [published] + workflow_dispatch: + +jobs: + toot: + runs-on: ubuntu-latest + steps: + - uses: rzr/fediverse-action@master + with: + access-token: ${{ secrets.MASTODON_ACCESS_TOKEN }} + host: "mstdn.science" # custom host if not "mastodon.social" (default) + # GitHub event payload + # https://docs.github.com/en/developers/webhooks-and-events/webhooks/webhook-events-and-payloads#release + message: | + Pipeline release! ${{ github.repository }} v${{ github.event.release.tag_name }} - ${{ github.event.release.name }}! + + Please see the changelog: ${{ github.event.release.html_url }} + + send-tweet: + runs-on: ubuntu-latest + + steps: + - uses: actions/setup-python@v4 + with: + python-version: "3.10" + - name: Install dependencies + run: pip install tweepy==4.14.0 + - name: Send tweet + shell: python + run: | + import os + import tweepy + + client = tweepy.Client( + access_token=os.getenv("TWITTER_ACCESS_TOKEN"), + access_token_secret=os.getenv("TWITTER_ACCESS_TOKEN_SECRET"), + consumer_key=os.getenv("TWITTER_CONSUMER_KEY"), + consumer_secret=os.getenv("TWITTER_CONSUMER_SECRET"), + ) + tweet = os.getenv("TWEET") + client.create_tweet(text=tweet) + env: + TWEET: | + Pipeline release! ${{ github.repository }} v${{ github.event.release.tag_name }} - ${{ github.event.release.name }}! + + Please see the changelog: ${{ github.event.release.html_url }} + TWITTER_CONSUMER_KEY: ${{ secrets.TWITTER_CONSUMER_KEY }} + TWITTER_CONSUMER_SECRET: ${{ secrets.TWITTER_CONSUMER_SECRET }} + TWITTER_ACCESS_TOKEN: ${{ secrets.TWITTER_ACCESS_TOKEN }} + TWITTER_ACCESS_TOKEN_SECRET: ${{ secrets.TWITTER_ACCESS_TOKEN_SECRET }} + + bsky-post: + runs-on: ubuntu-latest + steps: + - uses: zentered/bluesky-post-action@v0.0.2 + with: + post: | + Pipeline release! ${{ github.repository }} v${{ github.event.release.tag_name }} - ${{ github.event.release.name }}! + + Please see the changelog: ${{ github.event.release.html_url }} + env: + BSKY_IDENTIFIER: ${{ secrets.BSKY_IDENTIFIER }} + BSKY_PASSWORD: ${{ secrets.BSKY_PASSWORD }} + # diff --git a/.gitpod.yml b/.gitpod.yml index 85d95ecc..25488dcc 100644 --- a/.gitpod.yml +++ b/.gitpod.yml @@ -1,4 +1,9 @@ image: nfcore/gitpod:latest +tasks: + - name: Update Nextflow and setup pre-commit + command: | + pre-commit install --install-hooks + nextflow self-update vscode: extensions: # based on nf-core.nf-core-extensionpack diff --git a/.nf-core.yml b/.nf-core.yml index 2c0141ff..4ae6f7f0 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -1,4 +1,6 @@ lint: files_exist: - conf/igenomes.config + multiqc_config: + - report_comment repository_type: pipeline diff --git a/CHANGELOG.md b/CHANGELOG.md index e7e1fd26..3484b265 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,37 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html). -## [3.1] - 2023-06-05 "Protego" +## [3.2.0] - 2023-10-27 Expecto patronum + +### `Added` + +- [#268](https://github.com/nf-core/airrflow/pull/268) Added parameters for FindThreshold in `modules.config`. +- [#268](https://github.com/nf-core/airrflow/pull/268) Validate samplesheet also for `assembled` samplesheet. +- [#259](https://github.com/nf-core/airrflow/pull/259) Update to `EnchantR v0.1.3`. +- [#266](https://github.com/nf-core/airrflow/pull/266) Added clonal reports tables to final report folder. +- [#266](https://github.com/nf-core/airrflow/pull/266) Added processes to include sampleID to filename in assembled workflow to keep it unique. +- [#276](https://github.com/nf-core/airrflow/pull/276) Parametrize FindThreshold Report and Presto Buildconsensus UMI. +- [#281](https://github.com/nf-core/airrflow/pull/281) Update to nf-core tools v2.10. + +### `Fixed` + +- [#268](https://github.com/nf-core/airrflow/pull/268) Allows for uppercase and lowercase locus in samplesheet `pcr_target_locus`. +- [#259](https://github.com/nf-core/airrflow/pull/259) Samplesheet only allows data from one species. +- [#259](https://github.com/nf-core/airrflow/pull/259) Introduced fix for a too long command with hundreds of datasets. +- [#266](https://github.com/nf-core/airrflow/pull/266) Convert samplesheet required columns to strings when needed. +- [#284](https://github.com/nf-core/bcellmagic/pull/284): Use cached IMGT and IgBlast reference data by default. + +### `Dependencies` + +| Dependency | Old version | New version | +| ---------- | ----------- | ----------- | +| r-enchantr | 0.1.2 | 0.1.9 | +| r-alakazam | 1.2.1 | 1.3.0 | +| r-shazam | 1.1.0 | 1.2.0 | +| r-dowser | 1.2.0 | 2.0.0 | +| fastqc | 0.11.9 | 0.12.1 | + +## [3.1.0] - 2023-06-05 "Protego" ### `Added` @@ -195,7 +225,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. - [#114](https://github.com/nf-core/bcellmagic/pull/114): Added Bcellmagic html report. - [#114](https://github.com/nf-core/bcellmagic/pull/114): Improved documentation on amplicon protocol support. - [#115](https://github.com/nf-core/bcellmagic/pull/115): Improved output file structure and documentation. -- [#124](https://github.com/nf-core/bcellmagic/pull/124): Template update to nf-core tools v2.0.1 +- [#124](https://github.com/nf-core/bcellmagic/pull/124): Template update to nf-core tools v2.0.1. ### `Fixed` diff --git a/CITATIONS.md b/CITATIONS.md index 9f71f033..9faa369d 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -12,6 +12,44 @@ - [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) + > Andrews, S. (2010). FastQC: A Quality Control Tool for High Throughput Sequence Data [Online]. + +- [Fastp](https://doi.org/10.1093/bioinformatics/bty560) + + > Shifu Chen, Yanqing Zhou, Yaru Chen, Jia Gu, fastp: an ultra-fast all-in-one FASTQ preprocessor, Bioinformatics. 2018 Sept 1; 34(17):i884–i890. doi: 10.1093/bioinformatics/bty560. + +- [pRESTO](https://doi.org/10.1093/bioinformatics/btu138) + + > Vander Heiden, J. A., Yaari, G., Uduman, M., Stern, J. N. H., O’Connor, K. C., Hafler, D. A., … Kleinstein, S. H. (2014). pRESTO: a toolkit for processing high-throughput sequencing raw reads of lymphocyte receptor repertoires. Bioinformatics, 30(13), 1930–1932. + +- [SHazaM, Change-O](https://doi.org/10.1093/bioinformatics/btv359) + + > Gupta, N. T., Vander Heiden, J. A., Uduman, M., Gadala-Maria, D., Yaari, G., & Kleinstein, S. H. (2015). Change-O: a toolkit for analyzing large-scale B cell immunoglobulin repertoire sequencing data: Table 1. Bioinformatics, 31(20), 3356–3358. + +- [Alakazam](https://doi.org/10.1126/scitranslmed.3008879) + + > Stern, J. N. H., Yaari, G., Vander Heiden, J. A., Church, G., Donahue, W. F., Hintzen, R. Q., … O’Connor, K. C. (2014). B cells populating the multiple sclerosis brain mature in the draining cervical lymph nodes. Science Translational Medicine, 6(248). + +- [SCOPer](https://doi.org/10.1093/bioinformatics/bty235) + + > Nouri N, Kleinstein S (2018). “A spectral clustering-based method for identifying clones from high-throughput B cell repertoire sequencing data.” Bioinformatics, i341-i349. + + > Nouri N, Kleinstein S (2020). “Somatic hypermutation analysis for improved identification of B cell clonal families from next-generation sequencing data.” PLOS Computational Biology, 16(6), e1007977. + + > Gupta N, Adams K, Briggs A, Timberlake S, Vigneault F, Kleinstein S (2017). “Hierarchical clustering can identify B cell clones with high confidence in Ig repertoire sequencing data.” The Journal of Immunology, 2489-2499. + +- [Dowser](https://doi.org/10.1371/journal.pcbi.1009885) + + > Hoehn K, Pybus O, Kleinstein S (2022). “Phylogenetic analysis of migration, differentiation, and class switching in B cells.” PLoS Computational Biology. + +- [IgPhyML](https://www.pnas.org/doi/10.1073/pnas.1906020116) + + > Hoehn K, Van der Heiden J, Zhou J, Lunter G, Pybus O, Kleinstein S (2019). “Repertoire-wide phylogenetic models of B cell molecular evolution reveal evolutionary signatures of aging and vaccination.” PNAS. + +- [TIgGER](https://doi.org/10.1073/pnas.1417683112) + + > Gadala-maria, D., Yaari, G., Uduman, M., & Kleinstein, S. H. (2015). Automated analysis of high-throughput B-cell sequencing data reveals a high frequency of novel immunoglobulin V gene segment alleles. Proceedings of the National Academy of Sciences, 112(8), 1–9. + - [Fastp](https://doi.org/10.1093/bioinformatics/bty560) > Shifu Chen, Yanqing Zhou, Yaru Chen, Jia Gu, fastp: an ultra-fast all-in-one FASTQ preprocessor, Bioinformatics. 2018 Sept 1; 34(17):i884–i890. doi: 10.1093/bioinformatics/bty560. @@ -68,5 +106,8 @@ - [Docker](https://dl.acm.org/doi/10.5555/2600239.2600241) + > Merkel, D. (2014). Docker: lightweight linux containers for consistent development and deployment. Linux Journal, 2014(239), 2. doi: 10.5555/2600239.2600241. + - [Singularity](https://pubmed.ncbi.nlm.nih.gov/28494014/) + > Kurtzer GM, Sochat V, Bauer MW. Singularity: Scientific containers for mobility of compute. PLoS One. 2017 May 11;12(5):e0177459. doi: 10.1371/journal.pone.0177459. eCollection 2017. PubMed PMID: 28494014; PubMed Central PMCID: PMC5426675. diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md index f4fd052f..c089ec78 100644 --- a/CODE_OF_CONDUCT.md +++ b/CODE_OF_CONDUCT.md @@ -1,18 +1,20 @@ -# Code of Conduct at nf-core (v1.0) +# Code of Conduct at nf-core (v1.4) ## Our Pledge -In the interest of fostering an open, collaborative, and welcoming environment, we as contributors and maintainers of nf-core, pledge to making participation in our projects and community a harassment-free experience for everyone, regardless of: +In the interest of fostering an open, collaborative, and welcoming environment, we as contributors and maintainers of nf-core pledge to making participation in our projects and community a harassment-free experience for everyone, regardless of: - Age +- Ability - Body size +- Caste - Familial status - Gender identity and expression - Geographical location - Level of experience - Nationality and national origins - Native language -- Physical and neurological ability +- Neurodiversity - Race or ethnicity - Religion - Sexual identity and orientation @@ -22,80 +24,133 @@ Please note that the list above is alphabetised and is therefore not ranked in a ## Preamble -> Note: This Code of Conduct (CoC) has been drafted by the nf-core Safety Officer and been edited after input from members of the nf-core team and others. "We", in this document, refers to the Safety Officer and members of the nf-core core team, both of whom are deemed to be members of the nf-core community and are therefore required to abide by this Code of Conduct. This document will amended periodically to keep it up-to-date, and in case of any dispute, the most current version will apply. +:::note +This Code of Conduct (CoC) has been drafted by Renuka Kudva, Cris Tuñí, and Michael Heuer, with input from the nf-core Core Team and Susanna Marquez from the nf-core community. "We", in this document, refers to the Safety Officers and members of the nf-core Core Team, both of whom are deemed to be members of the nf-core community and are therefore required to abide by this Code of Conduct. This document will be amended periodically to keep it up-to-date. In case of any dispute, the most current version will apply. +::: -An up-to-date list of members of the nf-core core team can be found [here](https://nf-co.re/about). Our current safety officer is Renuka Kudva. +An up-to-date list of members of the nf-core core team can be found [here](https://nf-co.re/about). + +Our Safety Officers are Saba Nafees, Cris Tuñí, and Michael Heuer. nf-core is a young and growing community that welcomes contributions from anyone with a shared vision for [Open Science Policies](https://www.fosteropenscience.eu/taxonomy/term/8). Open science policies encompass inclusive behaviours and we strive to build and maintain a safe and inclusive environment for all individuals. -We have therefore adopted this code of conduct (CoC), which we require all members of our community and attendees in nf-core events to adhere to in all our workspaces at all times. Workspaces include but are not limited to Slack, meetings on Zoom, Jitsi, YouTube live etc. +We have therefore adopted this CoC, which we require all members of our community and attendees of nf-core events to adhere to in all our workspaces at all times. Workspaces include, but are not limited to, Slack, meetings on Zoom, gather.town, YouTube live etc. -Our CoC will be strictly enforced and the nf-core team reserve the right to exclude participants who do not comply with our guidelines from our workspaces and future nf-core activities. +Our CoC will be strictly enforced and the nf-core team reserves the right to exclude participants who do not comply with our guidelines from our workspaces and future nf-core activities. -We ask all members of our community to help maintain a supportive and productive workspace and to avoid behaviours that can make individuals feel unsafe or unwelcome. Please help us maintain and uphold this CoC. +We ask all members of our community to help maintain supportive and productive workspaces and to avoid behaviours that can make individuals feel unsafe or unwelcome. Please help us maintain and uphold this CoC. -Questions, concerns or ideas on what we can include? Contact safety [at] nf-co [dot] re +Questions, concerns, or ideas on what we can include? Contact members of the Safety Team on Slack or email safety [at] nf-co [dot] re. ## Our Responsibilities -The safety officer is responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behaviour. +Members of the Safety Team (the Safety Officers) are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behaviour. -The safety officer in consultation with the nf-core core team have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. +The Safety Team, in consultation with the nf-core core team, have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this CoC, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. -Members of the core team or the safety officer who violate the CoC will be required to recuse themselves pending investigation. They will not have access to any reports of the violations and be subject to the same actions as others in violation of the CoC. +Members of the core team or the Safety Team who violate the CoC will be required to recuse themselves pending investigation. They will not have access to any reports of the violations and will be subject to the same actions as others in violation of the CoC. -## When are where does this Code of Conduct apply? +## When and where does this Code of Conduct apply? -Participation in the nf-core community is contingent on following these guidelines in all our workspaces and events. This includes but is not limited to the following listed alphabetically and therefore in no order of preference: +Participation in the nf-core community is contingent on following these guidelines in all our workspaces and events, such as hackathons, workshops, bytesize, and collaborative workspaces on gather.town. These guidelines include, but are not limited to, the following (listed alphabetically and therefore in no order of preference): - Communicating with an official project email address. - Communicating with community members within the nf-core Slack channel. - Participating in hackathons organised by nf-core (both online and in-person events). -- Participating in collaborative work on GitHub, Google Suite, community calls, mentorship meetings, email correspondence. -- Participating in workshops, training, and seminar series organised by nf-core (both online and in-person events). This applies to events hosted on web-based platforms such as Zoom, Jitsi, YouTube live etc. +- Participating in collaborative work on GitHub, Google Suite, community calls, mentorship meetings, email correspondence, and on the nf-core gather.town workspace. +- Participating in workshops, training, and seminar series organised by nf-core (both online and in-person events). This applies to events hosted on web-based platforms such as Zoom, gather.town, Jitsi, YouTube live etc. - Representing nf-core on social media. This includes both official and personal accounts. ## nf-core cares 😊 -nf-core's CoC and expectations of respectful behaviours for all participants (including organisers and the nf-core team) include but are not limited to the following (listed in alphabetical order): +nf-core's CoC and expectations of respectful behaviours for all participants (including organisers and the nf-core team) include, but are not limited to, the following (listed in alphabetical order): - Ask for consent before sharing another community member’s personal information (including photographs) on social media. - Be respectful of differing viewpoints and experiences. We are all here to learn from one another and a difference in opinion can present a good learning opportunity. -- Celebrate your accomplishments at events! (Get creative with your use of emojis 🎉 🥳 💯 🙌 !) +- Celebrate your accomplishments! (Get creative with your use of emojis 🎉 🥳 💯 🙌 !) - Demonstrate empathy towards other community members. (We don’t all have the same amount of time to dedicate to nf-core. If tasks are pending, don’t hesitate to gently remind members of your team. If you are leading a task, ask for help if you feel overwhelmed.) - Engage with and enquire after others. (This is especially important given the geographically remote nature of the nf-core community, so let’s do this the best we can) - Focus on what is best for the team and the community. (When in doubt, ask) -- Graciously accept constructive criticism, yet be unafraid to question, deliberate, and learn. +- Accept feedback, yet be unafraid to question, deliberate, and learn. - Introduce yourself to members of the community. (We’ve all been outsiders and we know that talking to strangers can be hard for some, but remember we’re interested in getting to know you and your visions for open science!) -- Show appreciation and **provide clear feedback**. (This is especially important because we don’t see each other in person and it can be harder to interpret subtleties. Also remember that not everyone understands a certain language to the same extent as you do, so **be clear in your communications to be kind.**) +- Show appreciation and **provide clear feedback**. (This is especially important because we don’t see each other in person and it can be harder to interpret subtleties. Also remember that not everyone understands a certain language to the same extent as you do, so **be clear in your communication to be kind.**) - Take breaks when you feel like you need them. -- Using welcoming and inclusive language. (Participants are encouraged to display their chosen pronouns on Zoom or in communication on Slack.) +- Use welcoming and inclusive language. (Participants are encouraged to display their chosen pronouns on Zoom or in communication on Slack) ## nf-core frowns on 😕 -The following behaviours from any participants within the nf-core community (including the organisers) will be considered unacceptable under this code of conduct. Engaging or advocating for any of the following could result in expulsion from nf-core workspaces. +The following behaviours from any participants within the nf-core community (including the organisers) will be considered unacceptable under this CoC. Engaging or advocating for any of the following could result in expulsion from nf-core workspaces: - Deliberate intimidation, stalking or following and sustained disruption of communication among participants of the community. This includes hijacking shared screens through actions such as using the annotate tool in conferencing software such as Zoom. - “Doxing” i.e. posting (or threatening to post) another person’s personal identifying information online. - Spamming or trolling of individuals on social media. -- Use of sexual or discriminatory imagery, comments, or jokes and unwelcome sexual attention. -- Verbal and text comments that reinforce social structures of domination related to gender, gender identity and expression, sexual orientation, ability, physical appearance, body size, race, age, religion or work experience. +- Use of sexual or discriminatory imagery, comments, jokes, or unwelcome sexual attention. +- Verbal and text comments that reinforce social structures of domination related to gender, gender identity and expression, sexual orientation, ability, physical appearance, body size, race, age, religion, or work experience. ### Online Trolling -The majority of nf-core interactions and events are held online. Unfortunately, holding events online comes with the added issue of online trolling. This is unacceptable, reports of such behaviour will be taken very seriously, and perpetrators will be excluded from activities immediately. +The majority of nf-core interactions and events are held online. Unfortunately, holding events online comes with the risk of online trolling. This is unacceptable — reports of such behaviour will be taken very seriously and perpetrators will be excluded from activities immediately. -All community members are required to ask members of the group they are working within for explicit consent prior to taking screenshots of individuals during video calls. +All community members are **required** to ask members of the group they are working with for explicit consent prior to taking screenshots of individuals during video calls. -## Procedures for Reporting CoC violations +## Procedures for reporting CoC violations If someone makes you feel uncomfortable through their behaviours or actions, report it as soon as possible. -You can reach out to members of the [nf-core core team](https://nf-co.re/about) and they will forward your concerns to the safety officer(s). +You can reach out to members of the Safety Team (Saba Nafees, Cris Tuñí, and Michael Heuer) on Slack. Alternatively, contact a member of the nf-core core team [nf-core core team](https://nf-co.re/about), and they will forward your concerns to the Safety Team. + +Issues directly concerning members of the Core Team or the Safety Team will be dealt with by other members of the core team and the safety manager — possible conflicts of interest will be taken into account. nf-core is also in discussions about having an ombudsperson and details will be shared in due course. + +All reports will be handled with the utmost discretion and confidentiality. + +You can also report any CoC violations to safety [at] nf-co [dot] re. In your email report, please do your best to include: + +- Your contact information. +- Identifying information (e.g. names, nicknames, pseudonyms) of the participant who has violated the Code of Conduct. +- The behaviour that was in violation and the circumstances surrounding the incident. +- The approximate time of the behaviour (if different than the time the report was made). +- Other people involved in the incident, if applicable. +- If you believe the incident is ongoing. +- If there is a publicly available record (e.g. mailing list record, a screenshot). +- Any additional information. + +After you file a report, one or more members of our Safety Team will contact you to follow up on your report. + +## Who will read and handle reports + +All reports will be read and handled by the members of the Safety Team at nf-core. + +If members of the Safety Team are deemed to have a conflict of interest with a report, they will be required to recuse themselves as per our Code of Conduct and will not have access to any follow-ups. + +To keep this first report confidential from any of the Safety Team members, please submit your first report by direct messaging on Slack/direct email to any of the nf-core members you are comfortable disclosing the information to, and be explicit about which member(s) you do not consent to sharing the information with. + +## Reviewing reports + +After receiving the report, members of the Safety Team will review the incident report to determine whether immediate action is required, for example, whether there is immediate threat to participants’ safety. + +The Safety Team, in consultation with members of the nf-core core team, will assess the information to determine whether the report constitutes a Code of Conduct violation, for them to decide on a course of action. + +In the case of insufficient information, one or more members of the Safety Team may contact the reporter, the reportee, or any other attendees to obtain more information. -Issues directly concerning members of the core team will be dealt with by other members of the core team and the safety manager, and possible conflicts of interest will be taken into account. nf-core is also in discussions about having an ombudsperson, and details will be shared in due course. +Once additional information is gathered, the Safety Team will collectively review and decide on the best course of action to take, if any. The Safety Team reserves the right to not act on a report. -All reports will be handled with utmost discretion and confidentially. +## Confidentiality + +All reports, and any additional information included, are only shared with the team of safety officers (and possibly members of the core team, in case the safety officer is in violation of the CoC). We will respect confidentiality requests for the purpose of protecting victims of abuse. + +We will not name harassment victims, beyond discussions between the safety officer and members of the nf-core team, without the explicit consent of the individuals involved. + +## Enforcement + +Actions taken by the nf-core’s Safety Team may include, but are not limited to: + +- Asking anyone to stop a behaviour. +- Asking anyone to leave the event and online spaces either temporarily, for the remainder of the event, or permanently. +- Removing access to the gather.town and Slack, either temporarily or permanently. +- Communicating to all participants to reinforce our expectations for conduct and remind what is unacceptable behaviour; this may be public for practical reasons. +- Communicating to all participants that an incident has taken place and how we will act or have acted — this may be for the purpose of letting event participants know we are aware of and dealing with the incident. +- Banning anyone from participating in nf-core-managed spaces, future events, and activities, either temporarily or permanently. +- No action. ## Attribution and Acknowledgements @@ -106,6 +161,22 @@ All reports will be handled with utmost discretion and confidentially. ## Changelog -### v1.0 - March 12th, 2021 +### v1.4 - February 8th, 2022 + +- Included a new member of the Safety Team. Corrected a typographical error in the text. + +### v1.3 - December 10th, 2021 + +- Added a statement that the CoC applies to nf-core gather.town workspaces. Corrected typographical errors in the text. + +### v1.2 - November 12th, 2021 + +- Removed information specific to reporting CoC violations at the Hackathon in October 2021. + +### v1.1 - October 14th, 2021 + +- Updated with names of new Safety Officers and specific information for the hackathon in October 2021. + +### v1.0 - March 15th, 2021 - Complete rewrite from original [Contributor Covenant](http://contributor-covenant.org/) CoC. diff --git a/README.md b/README.md index 443b9933..7b11703f 100644 --- a/README.md +++ b/README.md @@ -4,18 +4,19 @@ [![GitHub Actions Linting Status](https://github.com/nf-core/airrflow/workflows/nf-core%20linting/badge.svg)](https://github.com/nf-core/airrflow/actions?query=workflow%3A%22nf-core+linting%22) [![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/airrflow/results) [![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.2642009-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.2642009) -[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A522.10.1-23aa62.svg)](https://www.nextflow.io/) +[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A523.04.0-23aa62.svg)](https://www.nextflow.io/) [![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/) [![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/) [![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/) [![Launch on Nextflow Tower](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Nextflow%20Tower-%234256e7)](https://tower.nf/launch?pipeline=https://github.com/nf-core/airrflow) [![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23airrflow-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/airrflow) [![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?labelColor=000000&logo=twitter)](https://twitter.com/nf_core) +[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core) [![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core) ## Introduction -** nf-core/airrflow ** is a bioinformatics best-practice pipeline to analyze B-cell or T-cell repertoire sequencing data. It makes use of the [Immcantation](https://immcantation.readthedocs.io) toolset. The input data can be targeted amplicon bulk sequencing data of the V, D, J and C regions of the B/T-cell receptor with multiplex PCR or 5' RACE protocol, or assembled reads (bulk or single cell). +**nf-core/airrflow** is a bioinformatics best-practice pipeline to analyze B-cell or T-cell repertoire sequencing data. It makes use of the [Immcantation](https://immcantation.readthedocs.io) toolset. The input data can be targeted amplicon bulk sequencing data of the V, D, J and C regions of the B/T-cell receptor with multiplex PCR or 5' RACE protocol, or assembled reads (bulk or single cell). ![nf-core/airrflow overview](docs/images/airrflow_workflow_overview.png) @@ -25,7 +26,7 @@ On release, automated continuous integration tests run the pipeline on a full-si ## Pipeline summary -nf-core/airrflow allows the end-to-end processing of BCR and TCR bulk and single cell targeted sequencing data. Several protocols are supported, please see the [usage documenation](https://nf-co.re/airrflow/usage) for more details on the supported protocols. +nf-core/airrflow allows the end-to-end processing of BCR and TCR bulk and single cell targeted sequencing data. Several protocols are supported, please see the [usage documentation](https://nf-co.re/airrflow/usage) for more details on the supported protocols. ![nf-core/airrflow overview](docs/images/metro-map-airrflow.png) @@ -76,10 +77,11 @@ nf-core/airrflow allows the end-to-end processing of BCR and TCR bulk and single ## Usage -> **Note** -> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how -> to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) -> with `-profile test` before running the workflow on actual data. +:::note +If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how +to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) +with `-profile test` before running the workflow on actual data. +::: First, ensure that the pipeline tests run on your infrastructure: @@ -87,7 +89,7 @@ First, ensure that the pipeline tests run on your infrastructure: nextflow run nf-core/airrflow -profile test, --outdir ``` -To run on your data, prepare a tab-separated samplesheet with your input data. Depending on the input data type (bulk or single-cell, raw reads or assembled reads) the input samplesheet will vary. Please follow the [documentation on samplesheets](https://nf-co.re/airrflow/usage#input-samplesheet) for more details. An example samplesheet for running the pipeline on raw BCR / TCR sequencing data looks as follows: +To run nf-core/airrflow with your data, prepare a tab-separated samplesheet with your input data. Depending on the input data type (bulk or single-cell, raw reads or assembled reads) the input samplesheet will vary. Please follow the [documentation on samplesheets](https://nf-co.re/airrflow/usage#input-samplesheet) for more details. An example samplesheet for running the pipeline on bulk BCR / TCR sequencing data in fastq format looks as follows: | sample_id | filename_R1 | filename_R2 | filename_I1 | subject_id | species | pcr_target_locus | tissue | sex | age | biomaterial_provider | single_cell | intervention | collection_time_point_relative | cell_subset | | --------- | ------------------------------- | ------------------------------- | ------------------------------- | ---------- | ------- | ---------------- | ------ | ------ | --- | -------------------- | ----------- | -------------- | ------------------------------ | ------------ | @@ -96,27 +98,42 @@ To run on your data, prepare a tab-separated samplesheet with your input data. D Each row represents a sample with fastq files (paired-end). -A typical command to run the pipeline is: +A typical command to run the pipeline from **bulk raw fastq files** is: ```bash nextflow run nf-core/airrflow \ +-r 3.2.0 \ -profile \ ---input samplesheet.tsv \ +--mode fastq \ +--input input_samplesheet.tsv \ --library_generation_method specific_pcr_umi \ --cprimers CPrimers.fasta \ --vprimers VPrimers.fasta \ --umi_length 12 \ ---max_memory 8.GB \ ---max_cpus 8 \ +--umi_position R1 \ --outdir ./results ``` -> **Warning:** -> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those -> provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; -> see [docs](https://nf-co.re/usage/configuration#custom-configuration-files). +A typical command to run the pipeline from **single-cell AIRR rearrangement tables or assembled bulk sequencing fasta** data is: -For more details, please refer to the [usage documentation](https://nf-co.re/airrflow/usage) and the [parameter documentation](https://nf-co.re/airrflow/parameters). +```bash +nextflow run nf-core/airrflow \ +-r 3.2.0 \ +-profile \ +--input input_samplesheet.tsv \ +--mode assembled \ +--outdir results +``` + +See the [usage documentation](https://nf-co.re/airrflow/usage) and the [parameter documentation](https://nf-co.re/airrflow/parameters) for more details on how to use the pipeline and all the available parameters. + +:::warning +Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those +provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; +see [docs](https://nf-co.re/usage/configuration#custom-configuration-files). +::: + +For more details and further functionality, please refer to the [usage documentation](https://nf-co.re/airrflow/usage) and the [parameter documentation](https://nf-co.re/airrflow/parameters). ## Pipeline output diff --git a/assets/methods_description_template.yml b/assets/methods_description_template.yml index 4b9bedf5..e4acf864 100644 --- a/assets/methods_description_template.yml +++ b/assets/methods_description_template.yml @@ -3,17 +3,21 @@ description: "Suggested text and references to use when describing pipeline usag section_name: "nf-core/airrflow Methods Description" section_href: "https://github.com/nf-core/airrflow" plot_type: "html" -## TODO nf-core: Update the HTML below to your prefered methods description, e.g. add publication citation for this pipeline +## TODO nf-core: Update the HTML below to your preferred methods description, e.g. add publication citation for this pipeline ## You inject any metadata in the Nextflow '${workflow}' object data: |

Methods

-

Data was processed using nf-core/airrflow v${workflow.manifest.version} ${doi_text} of the nf-core collection of workflows (Ewels et al., 2020).

+

Data was processed using nf-core/airrflow v${workflow.manifest.version} ${doi_text} of the nf-core collection of workflows (Ewels et al., 2020), utilising reproducible software environments from the Bioconda (Grüning et al., 2018) and Biocontainers (da Veiga Leprevost et al., 2017) projects.

The pipeline was executed with Nextflow v${workflow.nextflow.version} (Di Tommaso et al., 2017) with the following command:

${workflow.commandLine}
+

${tool_citations}

References

    -
  • Di Tommaso, P., Chatzou, M., Floden, E. W., Barja, P. P., Palumbo, E., & Notredame, C. (2017). Nextflow enables reproducible computational workflows. Nature Biotechnology, 35(4), 316-319. https://doi.org/10.1038/nbt.3820
  • -
  • Ewels, P. A., Peltzer, A., Fillinger, S., Patel, H., Alneberg, J., Wilm, A., Garcia, M. U., Di Tommaso, P., & Nahnsen, S. (2020). The nf-core framework for community-curated bioinformatics pipelines. Nature Biotechnology, 38(3), 276-278. https://doi.org/10.1038/s41587-020-0439-x
  • +
  • Di Tommaso, P., Chatzou, M., Floden, E. W., Barja, P. P., Palumbo, E., & Notredame, C. (2017). Nextflow enables reproducible computational workflows. Nature Biotechnology, 35(4), 316-319. doi: 10.1038/nbt.3820
  • +
  • Ewels, P. A., Peltzer, A., Fillinger, S., Patel, H., Alneberg, J., Wilm, A., Garcia, M. U., Di Tommaso, P., & Nahnsen, S. (2020). The nf-core framework for community-curated bioinformatics pipelines. Nature Biotechnology, 38(3), 276-278. doi: 10.1038/s41587-020-0439-x
  • +
  • Grüning, B., Dale, R., Sjödin, A., Chapman, B. A., Rowe, J., Tomkins-Tinch, C. H., Valieris, R., Köster, J., & Bioconda Team. (2018). Bioconda: sustainable and comprehensive software distribution for the life sciences. Nature Methods, 15(7), 475–476. doi: 10.1038/s41592-018-0046-7
  • +
  • da Veiga Leprevost, F., Grüning, B. A., Alves Aflitos, S., Röst, H. L., Uszkoreit, J., Barsnes, H., Vaudel, M., Moreno, P., Gatto, L., Weber, J., Bai, M., Jimenez, R. C., Sachsenberg, T., Pfeuffer, J., Vera Alvarez, R., Griss, J., Nesvizhskii, A. I., & Perez-Riverol, Y. (2017). BioContainers: an open-source and community-driven framework for software standardization. Bioinformatics (Oxford, England), 33(16), 2580–2582. doi: 10.1093/bioinformatics/btx192
  • + ${tool_bibliography}
Notes:
diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index e8e62182..b54b1946 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -1,5 +1,5 @@ -report_comment: > - This report has been generated by the nf-core/airrflow +report_comment: + This report has been generated by the nf-core/airrflow analysis pipeline. For information about how to interpret these results, please see the documentation. diff --git a/assets/nf-core-airrflow_logo_light.png b/assets/nf-core-airrflow_logo_light.png index bf5a8518..2c7265f7 100644 Binary files a/assets/nf-core-airrflow_logo_light.png and b/assets/nf-core-airrflow_logo_light.png differ diff --git a/assets/repertoire_comparison.Rmd b/assets/repertoire_comparison.Rmd index 16de9bd6..64d51a89 100644 --- a/assets/repertoire_comparison.Rmd +++ b/assets/repertoire_comparison.Rmd @@ -31,6 +31,7 @@ library(alakazam) library(shazam) library(stringr) library(plotly) +library(airr) theme_set(theme_bw(base_family = "ArialMT") + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), text = element_text(family="ArialMT"))) @@ -54,21 +55,10 @@ datadir <- "." Number of reads for each of the samples and number of sequences left after performing sequence assembly and alignment to reference data. The full table can be found under [Table_sequences_assembly](repertoire_comparison/Sequence_numbers_summary/Table_sequences_assembly.tsv). -```{r seq_numbers, echo=FALSE, warning=FALSE, results='asis'} -read_table <- function(tab_file){ - tab_seqs <- read.table(tab_file, header=TRUE, sep="\t", check.names = FALSE) - write.table(tab_seqs, file=paste0(seq_dir,"/Table_sequences_assembly.tsv"), sep="\t", quote=F, row.names=F) - } -tryCatch( {read_table("./Table_sequences.tsv")} , - error=function(e){message("No sequence numbers are available if starting with assembled reads.")} -) - -``` - - ```{r seq_numbers_plot, echo=FALSE, warning=FALSE, results='asis'} tryCatch( { tab_seqs <- read.table("./Table_sequences.tsv", header=TRUE, sep="\t", check.names = FALSE) + write.table(tab_seqs, file=paste0(seq_dir,"/Table_sequences_assembly.tsv"), sep="\t", quote=F, row.names=F) plot_table <- tidyr::pivot_longer(tab_seqs, cols=Sequences_R1:Igblast, @@ -88,6 +78,8 @@ tryCatch( { theme(axis.text.x= element_text(angle = 45)) ggplotly(seqs_plot) + + }, error=function(e){message("No sequence numbers are available if starting with assembled reads.")} ) @@ -144,33 +136,37 @@ ggplotly(seqs_plot_assembled) # in the current folder all_files <- system(paste0("find '", datadir, "' -name '*clone-pass.tsv'"), intern=T) -diversity_dir <- paste(outdir, "Diversity", sep="/") -abundance_dir <- paste(outdir, "Abundance", sep="/") vfamily_dir <- paste(outdir, "V_family", sep="/") -dir.create(diversity_dir) -dir.create(abundance_dir) dir.create(vfamily_dir) # Generate one big dataframe from all patient dataframes +col_select <- c( + "sample_id", "subject_id", "sequence_id", "clone_id", + "v_call", "d_call", "j_call", + "locus", + "junction", + "pcr_target_locus" +) +df_all <- dplyr::bind_rows(lapply(all_files, read_rearrangement, col_select=col_select)) -df_list = lapply(all_files, read.csv, sep="\t") - -df_all <- dplyr::bind_rows(df_list) # Remove underscores in these columns -df_all$subject_id <- sapply(df_all$subject_id, function(x) str_replace(as.character(x), "_", "")) -df_all$sample_id <- sapply(df_all$sample_id, function(x) str_replace(as.character(x), "_", "")) +df_all$subject_id <- stringr::str_replace_all(df_all$subject_id, "_", "") +df_all$sample_id <- stringr::str_replace_all(df_all$sample_id , "_", "") # Annotate sample and samplepop (sample + population) by add ing all the conditions df_all$subj_locus <- as.factor(paste(df_all$sample_id, df_all$subject_id, df_all$pcr_target_locus, sep="_")) -# Write table to file -write.table(df_all, paste0(outdir,"/all_data.tsv"), sep = "\t", quote=F, row.names = F, col.names = T) +# Uncomment to save a table with all the sequencess across samples together +# write.table(df_all, paste0(outdir,"/all_data.tsv"), sep = "\t", quote=F, row.names = F, col.names = T) # Set number of bootrstraps -nboot = 200 +nboot <- 200 ``` + + + +```{r clonal_abundance, echo=FALSE, eval=FALSE} +# Set line above to eval=TRUE to include clonal abundance +diversity_dir <- paste(outdir, "Diversity", sep="/") +abundance_dir <- paste(outdir, "Abundance", sep="/") +dir.create(diversity_dir) +dir.create(abundance_dir) + abund <- estimateAbundance(df_all, group = "subj_locus", ci=0.95, nboot=nboot) abund@abundance$sample_id <- sapply(abund@abundance$subj_locus, function(x) unlist(strsplit(as.character(x), "_"))[1]) abund@abundance$subject_id <- sapply(abund@abundance$subj_locus, function(x) unlist(strsplit(as.character(x), "_"))[2]) @@ -208,12 +212,14 @@ p_ca ``` -```{r plot_abundance, include = FALSE} +```{r plot_abundance, include = FALSE, eval=FALSE} +# Set to eval=TRUE to include clonal abundance ggsave(plot=p_ca, filename = paste0(abundance_dir,"/Clonal_abundance_subject.pdf"), device="pdf", width = 25, height = 10, units="cm") ggsave(plot=p_ca, filename = paste0(abundance_dir,"/Clonal_abundance_subject.png"), device="png", width = 25, height = 10, units="cm") write.table(abund@abundance, file = paste0(abundance_dir, "/Clonal_abundance_data_subject.tsv"), sep="\t", quote = F, row.names = F) ``` + - -```{r clonal_diversity, echo = FALSE} +```{r clonal_diversity, echo = FALSE, eval=FALSE} +# Set line above to eval=TRUE to include clonal diversity sample_div <- alphaDiversity(abund, group="subj_locus", min_q=0, max_q=4, step_q=0.05, ci=0.95, nboot=nboot) sample_main <- paste0("Sample diversity (N=", sample_div@n[1], ")") @@ -273,12 +280,14 @@ div_p <- ggplot(sample_div@diversity, aes(x = q, y = d, group=sample_id)) + div_p ``` -```{r plot_diversity, include = FALSE} +```{r plot_diversity, include = FALSE, eval=FALSE} +# Set to eval=TRUE to include clonal diversity ggsave(plot=div_p, filename=paste0(diversity_dir,"/Diversity_patient_grid.png"), device="png", width = 25, height = 10, units="cm") ggsave(plot=div_p, filename=paste0(diversity_dir,"/Diversity_patient_grid.pdf"), device="pdf", width = 25, height = 10, units="cm") write.table(sample_div@diversity, file = paste0(diversity_dir, "/Clonal_diversity_data_subject.tsv"), sep="\t", quote = F, row.names = F) ``` + # V gene usage ## V gene family usage diff --git a/assets/schema_input.json b/assets/schema_input.json new file mode 100644 index 00000000..635dcd18 --- /dev/null +++ b/assets/schema_input.json @@ -0,0 +1,64 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema", + "$id": "https://raw.githubusercontent.com/nf-core/airrflow/master/assets/schema_input.json", + "title": "nf-core/airrflow pipeline - params.input schema", + "description": "Schema for the file provided with params.input", + "type": "array", + "items": { + "type": "object", + "properties": { + "sample_id": { + "type": "string", + "pattern": "^\\S+$", + "errorMessage": "Sample name must be provided and cannot contain spaces." + }, + "subject_id": { + "type": "string", + "pattern": "^\\S+$", + "errorMessage": "Subject name must be provided and cannot contain spaces." + }, + "species": { + "type": "string", + "enum": ["mouse", "human"], + "errorMessage": "Species name must be provided and must be one of: mouse, human." + }, + "pcr_target_locus": { + "type": "string", + "enum": ["TR", "IG", "ig", "tr", "Ig", "Tr"], + "errorMessage": "PCR target locus must be provided and must be one of: TR, IG." + }, + "tissue": { + "type": "string", + "errorMessage": "Tissue name must be provided." + }, + "sex": { + "type": "string", + "errorMessage": "Sex must be provided, specify NA if unknown." + }, + "age": { + "type": "string", + "errorMessage": "Age must be provided, specify NA if unknown." + }, + "biomaterial_provider": { + "type": "string", + "errorMessage": "Biomaterial provider must be provided." + }, + "single_cell": { + "type": "boolean", + "pattern": "^\\S+$", + "errorMessage": "Single cell must be provided as a TRUE/FALSE value." + } + }, + "required": [ + "sample_id", + "subject_id", + "species", + "pcr_target_locus", + "tissue", + "sex", + "age", + "biomaterial_provider", + "single_cell" + ] + } +} diff --git a/assets/slackreport.json b/assets/slackreport.json index 043d02f2..bd9523d9 100644 --- a/assets/slackreport.json +++ b/assets/slackreport.json @@ -3,7 +3,7 @@ { "fallback": "Plain-text summary of the attachment.", "color": "<% if (success) { %>good<% } else { %>danger<%} %>", - "author_name": "sanger-tol/readmapping v${version} - ${runName}", + "author_name": "nf-core/airrflow v${version} - ${runName}", "author_icon": "https://www.nextflow.io/docs/latest/_static/favicon.ico", "text": "<% if (success) { %>Pipeline completed successfully!<% } else { %>Pipeline completed with errors<% } %>", "fields": [ diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py index a75fb3c5..0a7dca71 100755 --- a/bin/check_samplesheet.py +++ b/bin/check_samplesheet.py @@ -15,7 +15,8 @@ def parse_args(args=None): Epilog = "Example usage: python check_samplesheet.py " parser = argparse.ArgumentParser(description=Description, epilog=Epilog) - parser.add_argument("FILE_IN", help="Input samplesheet file.") + parser.add_argument("file_in", help="Input samplesheet file.") + parser.add_argument("-a", "--assembled", help="Input samplesheet type", action="store_true", default=False) return parser.parse_args(args) @@ -38,22 +39,22 @@ def print_error(error, context="Line", context_str=""): sys.exit(1) -def check_samplesheet(file_in): +def check_samplesheet(file_in, assembled): """ This function checks that the samplesheet: - contains the compulsory fields: sample_id, filename_R1, filename_R2, subject_id, pcr_target_locus, species, single_cell - sample ids are unique - samples from the same subject come from the same species - - pcr_target_locus is "IG" or "TR" + - pcr_target_locus is "IG"/"ig" or "TR"/"tr" - species is "human" or "mouse" """ sample_run_dict = {} with open(file_in, "r") as fin: - ## Check that required columns are present + # Defining minimum columns and required columns min_cols = 7 - required_columns = [ + required_columns_raw = [ "sample_id", "filename_R1", "filename_R2", @@ -66,22 +67,82 @@ def check_samplesheet(file_in): "biomaterial_provider", "age", ] - no_whitespaces = [ + required_columns_assembled = [ + "filename", + "sample_id", + "subject_id", + "species", + "pcr_target_locus", + "sex", + "tissue", + "biomaterial_provider", + "age", + "single_cell", + ] + no_whitespaces_raw = [ "sample_id", "filename_R1", "filename_R2", "subject_id", "species", "pcr_target_locus", - "tissue", ] + no_whitespaces_assembled = [ + "sample_id", + "filename", + "subject_id", + "species", + "pcr_target_locus", + ] + + ## Read header header = [x.strip('"') for x in fin.readline().strip().split("\t")] - for col in required_columns: - if col not in header: - print("ERROR: Please check samplesheet header: {} ".format(",".join(header))) - print("Header is missing column {}".format(col)) - print("Header must contain columns {}".format("\t".join(required_columns))) - raise IndexError("Header must contain columns {}".format("\t".join(required_columns))) + + ## Read tab + tab = pd.read_csv(file_in, sep="\t", header=0) + + ## Set required columns as strings + types_dict = dict() + types_dict.update({col: str for col in required_columns_assembled[1:7]}) + for col, col_type in types_dict.items(): + tab[col] = tab[col].astype(col_type) + + # Check that all required columns for assembled and raw samplesheets are there, and do not contain whitespaces + if assembled: + for col in required_columns_assembled: + if col not in header: + print("ERROR: Please check samplesheet header: {} ".format(",".join(header))) + print("Header is missing column {}".format(col)) + print("Header must contain columns {}".format("\t".join(required_columns))) + raise IndexError("Header must contain columns {}".format("\t".join(required_columns))) + for col in no_whitespaces_assembled: + values = tab[col].tolist() + if any([re.search(r"\s+", s) for s in values]): + print_error( + "The column {} contains values with whitespaces. Please ensure that there are no tabs, spaces or any other whitespaces in these columns as well: {}".format( + col, no_whitespaces_assembled + ) + ) + else: + if any(tab["single_cell"].tolist()): + print_error( + "Some single cell column values are TRUE. The raw mode only accepts bulk samples. If processing single cell samples, please set the `--mode assembled` flag, and provide an AIRR rearrangement as input." + ) + + for col in required_columns_raw: + if col not in header: + print("ERROR: Please check samplesheet header: {} ".format(",".join(header))) + print("Header is missing column {}".format(col)) + print("Header must contain columns {}".format("\t".join(required_columns))) + raise IndexError("Header must contain columns {}".format("\t".join(required_columns))) + for col in no_whitespaces_raw: + values = tab[col].tolist() + if any([re.search(r"\s+", s) for s in values]): + print_error( + "The column {} contains values with whitespaces. Please ensure that there are no tabs, spaces or any other whitespaces in these columns as well: {}".format( + col, no_whitespaces_raw + ) + ) ## Check that rows have the same fields as header, and at least the compulsory ones are provided for line_num, line in enumerate(fin): @@ -103,7 +164,6 @@ def check_samplesheet(file_in): ) ## Check that sample ids are unique - tab = pd.read_csv(file_in, sep="\t", header=0) if len(tab["sample_id"]) != len(set(tab["sample_id"])): print_error( "Sample IDs are not unique! The sample IDs in the input samplesheet should be unique for each sample." @@ -111,7 +171,7 @@ def check_samplesheet(file_in): ## Check that pcr_target_locus is IG or TR for val in tab["pcr_target_locus"]: - if val not in ["IG", "TR"]: + if val.upper() not in ["IG", "TR"]: print_error("pcr_target_locus must be one of: IG, TR.") ## Check that species is human or mouse @@ -129,20 +189,10 @@ def check_samplesheet(file_in): "The same subject_id cannot belong to different species! Check input file columns 'subject_id' and 'species'." ) - ## Check that values do not contain spaces in the no whitespaces columns - for col in no_whitespaces: - values = tab[col].tolist() - if any([re.search(r"\s+", s) for s in values]): - print_error( - "The column {} contains values with whitespaces. Please ensure that there are no tabs, spaces or any other whitespaces in these columns as well: {}".format( - col, no_whitespaces - ) - ) - def main(args=None): args = parse_args(args) - check_samplesheet(args.FILE_IN) + check_samplesheet(args.file_in, args.assembled) if __name__ == "__main__": diff --git a/bin/fetch_imgt.sh b/bin/fetch_imgt.sh index a7780d49..216ccd84 100755 --- a/bin/fetch_imgt.sh +++ b/bin/fetch_imgt.sh @@ -68,7 +68,7 @@ do echo "|---- Ig" for CHAIN in IGHV IGHD IGHJ IGKV IGKJ IGLV IGLJ do - URL="http://www.imgt.org/IMGT_GENE-DB/GENElect?query=7.14+${CHAIN}&species=${VALUE}" + URL="https://www.imgt.org/genedb/GENElect?query=7.14+${CHAIN}&species=${VALUE}" FILE_NAME="${FILE_PATH}/${REPERTOIRE}_${KEY}_${CHAIN}.fasta" TMP_FILE="${FILE_NAME}.tmp" #echo $URL @@ -76,7 +76,6 @@ do awk '/
/{i++}/<\/pre>/{j++}{if(j==2){exit}}{if(i==2 && j==1 && $0!~"^
"){print}}' $TMP_FILE > $FILE_NAME
 
         # Checking once that file exists and is not empty (checks IMGT server is online)
-        read file
         if [ -s "$FILE_NAME" ]
         then
             echo "IMGT Fasta file exists and is not empty"
@@ -93,7 +92,7 @@ do
     # V amino acid for Ig
     for CHAIN in IGHV IGKV IGLV
     do
-        URL="http://www.imgt.org/IMGT_GENE-DB/GENElect?query=7.3+${CHAIN}&species=${VALUE}"
+        URL="https://www.imgt.org/genedb/GENElect?query=7.3+${CHAIN}&species=${VALUE}"
         FILE_NAME="${FILE_PATH_AA}/${REPERTOIRE}_aa_${KEY}_${CHAIN}.fasta"
         TMP_FILE="${FILE_NAME}.tmp"
         #echo $URL
@@ -108,7 +107,7 @@ do
     echo "|---- TCR"
     for CHAIN in TRAV TRAJ TRBV TRBD TRBJ TRDV TRDD TRDJ TRGV TRGJ
     do
-        URL="http://www.imgt.org/IMGT_GENE-DB/GENElect?query=7.14+${CHAIN}&species=${VALUE}"
+        URL="https://www.imgt.org/genedb/GENElect?query=7.14+${CHAIN}&species=${VALUE}"
         FILE_NAME="${FILE_PATH}/${REPERTOIRE}_${KEY}_${CHAIN}.fasta"
         TMP_FILE="${FILE_NAME}.tmp"
         #echo $URL
@@ -121,7 +120,7 @@ do
     # V amino acid for TCR
     for CHAIN in TRAV TRBV TRDV TRGV
     do
-        URL="http://www.imgt.org/IMGT_GENE-DB/GENElect?query=7.3+${CHAIN}&species=${VALUE}"
+        URL="https://www.imgt.org/genedb/GENElect?query=7.3+${CHAIN}&species=${VALUE}"
         FILE_NAME="${FILE_PATH_AA}/${REPERTOIRE}_aa_${KEY}_${CHAIN}.fasta"
         TMP_FILE="${FILE_NAME}.tmp"
         #echo $URL
@@ -140,7 +139,7 @@ do
     echo "|---- Ig"
     for CHAIN in IGH IGK IGL
     do
-        URL="http://www.imgt.org/IMGT_GENE-DB/GENElect?query=8.1+${CHAIN}V&species=${VALUE}&IMGTlabel=L-PART1+L-PART2"
+        URL="https://www.imgt.org/genedb/GENElect?query=8.1+${CHAIN}V&species=${VALUE}&IMGTlabel=L-PART1+L-PART2"
         FILE_NAME="${FILE_PATH}/${REPERTOIRE}_${KEY}_${CHAIN}L.fasta"
         TMP_FILE="${FILE_NAME}.tmp"
         #echo $URL
@@ -154,7 +153,7 @@ do
     echo "|---- TCR"
     for CHAIN in TRA TRB TRG TRD
     do
-        URL="http://www.imgt.org/IMGT_GENE-DB/GENElect?query=8.1+${CHAIN}V&species=${VALUE}&IMGTlabel=L-PART1+L-PART2"
+        URL="https://www.imgt.org/genedb/GENElect?query=8.1+${CHAIN}V&species=${VALUE}&IMGTlabel=L-PART1+L-PART2"
         FILE_NAME="${FILE_PATH}/${REPERTOIRE}_${KEY}_${CHAIN}L.fasta"
         TMP_FILE="${FILE_NAME}.tmp"
         #echo $URL
@@ -179,7 +178,7 @@ do
             QUERY=7.5
         fi
 
-        URL="http://www.imgt.org/IMGT_GENE-DB/GENElect?query=${QUERY}+${CHAIN}&species=${VALUE}"
+        URL="https://www.imgt.org/genedb/GENElect?query=${QUERY}+${CHAIN}&species=${VALUE}"
         FILE_NAME="${FILE_PATH}/${REPERTOIRE}_${KEY}_${CHAIN}.fasta"
         TMP_FILE="${FILE_NAME}.tmp"
         #echo $URL
@@ -193,7 +192,7 @@ do
     echo "|---- TCR"
     for CHAIN in TRAC TRBC TRGC TRDC
     do
-        URL="http://www.imgt.org/IMGT_GENE-DB/GENElect?query=14.1+${CHAIN}&species=${VALUE}"
+        URL="https://www.imgt.org/genedb/GENElect?query=14.1+${CHAIN}&species=${VALUE}"
         FILE_NAME="${FILE_PATH}/${REPERTOIRE}_${KEY}_${CHAIN}.fasta"
         TMP_FILE="${FILE_NAME}.tmp"
         #echo $URL
@@ -209,7 +208,7 @@ done
 
 # Write download info
 INFO_FILE=${OUTDIR}/IMGT.yaml
-echo -e "source:  http://www.imgt.org/IMGT_GENE-DB" > $INFO_FILE
+echo -e "source:  https://www.imgt.org/genedb" > $INFO_FILE
 echo -e "date:    ${DATE}" >> $INFO_FILE
 echo -e "species:" >> $INFO_FILE
 for Q in ${SPECIES_QUERY[@]}
diff --git a/bin/reveal_filter_quality.R b/bin/reveal_filter_quality.R
index fb97afb8..5ed0258c 100755
--- a/bin/reveal_filter_quality.R
+++ b/bin/reveal_filter_quality.R
@@ -89,12 +89,16 @@ if (!is.null(opt$OUTPUT)) {
 } else {
     output_fn <- sub(".tsv$", "_quality-pass.tsv", basename(opt$REPERTOIRE))
 }
-write_rearrangement(db[filter_pass, ], file = output_fn)
+# don't write if empty
+if (sum(filter_pass)>0) {
+    write_rearrangement(db[filter_pass, ], file = output_fn)
+}
 
 # cat("     TOTAL_GROUPS> ", n_groups,  "\n", sep=" ", file = file.path(out_dir, log_verbose_name), append=TRUE)
 
 write("START> FilterQuality", stdout())
 write(paste0("FILE> ", basename(opt$REPERTOIRE)), stdout())
+# even if output file not written, because empty, keep track in log
 write(paste0("OUTPUT> ", basename(output_fn)), stdout())
 write(paste0("PASS> ", sum(filter_pass)), stdout())
 write(paste0("FAIL> ", sum(!filter_pass) + sum(filter_na)), stdout())
diff --git a/conf/modules.config b/conf/modules.config
index d16975a4..df8fad6f 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -35,6 +35,16 @@ process {
         ]
     }
 
+    // Validate input assembled
+    withName: SAMPLESHEET_CHECK_ASSEMBLED {
+        publishDir = [
+            path: { "${params.outdir}/pipeline_info" },
+            mode: params.publish_dir_mode,
+            saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
+        ]
+        ext.args = '--assembled'
+    }
+
     withName: 'FASTP' {
             publishDir = [
                 [
@@ -89,7 +99,19 @@ process {
         ext.args = '--quiet'
     }
 
-    withName: 'MERGE_UMI' {
+    withName: RENAME_FASTQ {
+        publishDir = [
+            enabled: false
+        ]
+    }
+
+    withName: 'RENAME_FILE_*' {
+        publishDir = [
+            enabled: false
+        ]
+    }
+
+    withName: MERGE_UMI {
         publishDir = [
             [
                 enabled: false
@@ -156,12 +178,15 @@ process {
         ]
     }
 
-    withName: PRESTO_BUILDCONSENSUS {
+    withName: PRESTO_BUILDCONSENSUS_UMI {
         publishDir = [
             path: { "${params.outdir}/presto/06-build-consensus/${meta.id}" },
             mode: params.publish_dir_mode,
             saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
         ]
+        ext.args = ''
+        ext.args2 = ''
+        ext.args3 = 'ID BARCODE SEQCOUNT PRIMER PRCOUNT PRCONS PRFREQ CONSCOUNT'
     }
 
     withName: PRESTO_POSTCONSENSUS_PAIRSEQ {
@@ -286,7 +311,7 @@ process {
             mode: params.publish_dir_mode,
             saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
         ]
-        ext.args = '--if sequence_id --sf sequence --mf cell_id consensus_count duplicate_count c_call c_cigar c_sequence_start c_sequence_end'
+        ext.args = '--if sequence_id --sf sequence --mf cell_id consensus_count duplicate_count'
     }
 
 
@@ -374,6 +399,7 @@ process {
             mode: params.publish_dir_mode,
             saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
         ]
+        errorStrategy = 'retry'
     }
 
     // ------------------------------
@@ -398,6 +424,11 @@ process {
             mode: params.publish_dir_mode,
             saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
         ]
+        ext.args = ['findthreshold_method':'gmm',
+            'findthreshold_model':'gamma-norm',
+            'findthreshold_edge':0.9,
+            'findthreshold_cutoff':'user',
+            'findthreshold_spc':0.995]
     }
 
     withName: REPORT_THRESHOLD {
@@ -406,6 +437,12 @@ process {
             mode: params.publish_dir_mode,
             saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
         ]
+        ext.args = ['findthreshold_method':'gmm',
+            'findthreshold_model':'gamma-norm',
+            'findthreshold_edge':0.9,
+            'findthreshold_cutoff':'user',
+            'findthreshold_spc':0.995,
+            'subsample':10000]
     }
 
     withName: DEFINE_CLONES_COMPUTE {
@@ -417,7 +454,7 @@ process {
         ext.args = ['outname':'', 'model':'hierarchical',
                     'method':'nt', 'linkage':'single',
                     'skip_convergence':true,
-                    'outputby':'sample_id', 'min_n':30]
+                    'min_n':30]
     }
 
     withName: DEFINE_CLONES_REPORT {
@@ -428,8 +465,8 @@ process {
         ]
         ext.args = ['outname':'', 'model':'hierarchical',
                     'method':'nt', 'linkage':'single',
-                    'skip_convergence':true,
-                    'outputby':'sample_id', 'min_n':30]
+                    'skip_convergence':false,
+                    'min_n':30]
     }
 
     withName: DOWSER_LINEAGES {
@@ -438,7 +475,10 @@ process {
             mode: params.publish_dir_mode,
             saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
         ]
-        ext.args = ['build':'igphyml']
+        ext.args = ['build':'igphyml',
+                    'minseq':5,
+                    'traits':'c_call',
+                    'tips':'c_call']
     }
 
     // -------------------------------
@@ -470,4 +510,13 @@ process {
         ]
     }
 
+    withName: 'MULTIQC' {
+        ext.args   = params.multiqc_title ? "--title \"$params.multiqc_title\"" : ''
+        publishDir = [
+            path: { "${params.outdir}/multiqc" },
+            mode: params.publish_dir_mode,
+            saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
+        ]
+    }
+
 }
diff --git a/conf/test_assembled.config b/conf/test_assembled_hs.config
similarity index 94%
rename from conf/test_assembled.config
rename to conf/test_assembled_hs.config
index 8d3e5e10..602f5462 100644
--- a/conf/test_assembled.config
+++ b/conf/test_assembled_hs.config
@@ -18,7 +18,7 @@ params {
 
     // Input data
     mode = 'assembled'
-    input = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/testdata-reveal/test_reveal_metadata.tsv'
+    input = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/testdata-reveal/test_assembled_metadata_hs.tsv'
     imgtdb_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/database-cache/imgtdb_base.zip'
     igblast_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/database-cache/igblast_base.zip'
 
diff --git a/conf/test_assembled_immcantation_devel.config b/conf/test_assembled_immcantation_devel_hs.config
similarity index 84%
rename from conf/test_assembled_immcantation_devel.config
rename to conf/test_assembled_immcantation_devel_hs.config
index 61ddeeca..dad18d47 100644
--- a/conf/test_assembled_immcantation_devel.config
+++ b/conf/test_assembled_immcantation_devel_hs.config
@@ -4,12 +4,12 @@
  * -------------------------------------------------
  * Defines bundled input files and everything required
  * to run a fast and simple test. Use as follows:
- *   nextflow run nf-core/airrflow -profile test_assembled_immcantation_devel,
+ *   nextflow run nf-core/airrflow -profile test_assembled_immcantation_hs_devel,
  */
 
 params {
     config_profile_name        = 'Test assembled mode with Immcantation custom_container'
-    config_profile_description = 'Minimal test dataset to check pipeline function on assembled mode with Immcantation custom_container'
+    config_profile_description = 'Minimal human test dataset to check pipeline function on assembled mode with Immcantation custom_container'
 
     // Limit resources so that this can run on GitHub Actions
     max_cpus   = 2
@@ -18,7 +18,7 @@ params {
 
     // Input data
     mode = 'assembled'
-    input = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/testdata-reveal/test_reveal_metadata.tsv'
+    input = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/testdata-reveal/test_assembled_metadata_hs.tsv'
     imgtdb_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/database-cache/imgtdb_base.zip'
     igblast_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/database-cache/igblast_base.zip'
     igphyml = '/usr/local/share/igphyml/src/igphyml'
diff --git a/conf/test_assembled_immcantation_devel_mm.config b/conf/test_assembled_immcantation_devel_mm.config
new file mode 100644
index 00000000..2aea10a3
--- /dev/null
+++ b/conf/test_assembled_immcantation_devel_mm.config
@@ -0,0 +1,44 @@
+/*
+ * -------------------------------------------------
+ *  Nextflow config file for running tests
+ * -------------------------------------------------
+ * Defines bundled input files and everything required
+ * to run a fast and simple test. Use as follows:
+ *   nextflow run nf-core/airrflow -profile test_assembled_immcantation_devel_mm,
+ */
+
+params {
+    config_profile_name        = 'Test assembled mode with Immcantation custom_container'
+    config_profile_description = 'Minimal mouse test dataset to check pipeline function on assembled mode with Immcantation custom_container'
+
+    // Limit resources so that this can run on GitHub Actions
+    max_cpus   = 2
+    max_memory = 6.GB
+    max_time   = 6.h
+
+    // Input data
+    mode = 'assembled'
+    input = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/testdata-reveal/test_assembled_metadata_mm.tsv'
+    imgtdb_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/database-cache/imgtdb_base.zip'
+    igblast_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/database-cache/igblast_base.zip'
+    igphyml = '/usr/local/share/igphyml/src/igphyml'
+
+    reassign = true
+    productive_only = true
+    collapseby = 'filename'
+    cloneby = 'subject_id'
+    crossby = 'subject_id'
+    remove_chimeric = true
+}
+
+process{
+
+    // all process with label 'immcantation' will be tested with this container instead.
+    withLabel:immcantation{
+        container = 'docker.io/immcantation/suite:devel'
+    }
+}
+
+env {
+    PYTHONNOUSERSITE = 0
+}
diff --git a/conf/test_assembled_mm.config b/conf/test_assembled_mm.config
new file mode 100644
index 00000000..a80d2099
--- /dev/null
+++ b/conf/test_assembled_mm.config
@@ -0,0 +1,31 @@
+/*
+ * -------------------------------------------------
+ *  Nextflow config file for running tests
+ * -------------------------------------------------
+ * Defines bundled input files and everything required
+ * to run a fast and simple test. Use as follows:
+ *   nextflow run nf-core/airrflow -profile test,
+ */
+
+params {
+    config_profile_name        = 'Test assembled mode'
+    config_profile_description = 'Minimal mouse test dataset to test assembled mode'
+
+    // Limit resources so that this can run on GitHub Actions
+    max_cpus   = 2
+    max_memory = 6.GB
+    max_time   = 6.h
+
+    // Input data
+    mode = 'assembled'
+    input = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/testdata-reveal/test_assembled_metadata_mm.tsv'
+    imgtdb_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/database-cache/imgtdb_base.zip'
+    igblast_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/database-cache/igblast_base.zip'
+
+    reassign = true
+    productive_only = true
+    collapseby = 'filename'
+    cloneby = 'subject_id'
+    remove_chimeric = true
+}
+
diff --git a/conf/test_fetchimgt.config b/conf/test_fetchimgt.config
index 81500d7f..e223d687 100644
--- a/conf/test_fetchimgt.config
+++ b/conf/test_fetchimgt.config
@@ -23,6 +23,7 @@ params {
     input = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/testdata-bcr/Metadata_test_airr.tsv'
     cprimers = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/testdata-bcr/C_primers.fasta'
     vprimers = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/testdata-bcr/V_primers.fasta'
+    fetch_imgt = true
 
     mode = 'fastq'
 
diff --git a/conf/test_full.config b/conf/test_full.config
index 52781925..8196a9d3 100644
--- a/conf/test_full.config
+++ b/conf/test_full.config
@@ -28,3 +28,12 @@ params {
     umi_start = 0
     umi_position = 'R1'
 }
+
+process {
+    withName:DOWSER_LINEAGES{
+        ext.args = ['build':'igphyml',
+                    'minseq':5,
+                    'traits':'c_primer',
+                    'tips':'c_primer']
+    }
+}
diff --git a/docs/output.md b/docs/output.md
index e2477c8d..1dedb05c 100644
--- a/docs/output.md
+++ b/docs/output.md
@@ -212,7 +212,9 @@ Remove sequences which do not have 2 representative using [SplitSeq](https://pre
 
 ![MultiQC - FastQC adapter content plot](images/mqc_fastqc_adapter.png)
 
-> **NB:** Two sets of FastQC plots are displayed in the MultiQC report: first for the raw _untrimmed_ and unmated reads and secondly for the assembled and QC filtered reads (but before collapsing duplicates). They may contain adapter sequence and potentially regions with low quality.
+:::note
+Two sets of FastQC plots are displayed in the MultiQC report: first for the raw _untrimmed_ and unmated reads and secondly for the assembled and QC filtered reads (but before collapsing duplicates). They may contain adapter sequence and potentially regions with low quality.
+:::
 
 ## VDJ annotation
 
@@ -388,10 +390,10 @@ This folder is genereated when `detect_contamination` is set to `true`.
 
 - `clonal_analysis/find_threshold/`
   - `*log`: Log of the process that will be parsed to generate a report.
-  - `all_reps_threshold-mean.tsv`: Mean of all hamming distance thresholds of the
-    Junction regions as determined by Shazam.
-  - `all_reps_threshold-summary.tsv`: Thresholds for each group of `--cloneby` samples.
   - `all_reps_dist_report`: Report
+    - `tables/all_reps_threshold-mean.tsv`: Mean of all hamming distance thresholds of the
+      Junction regions as determined by Shazam.
+    - `tables/all_reps_threshold-summary.tsv`: Thresholds for each group of `--cloneby` samples.
 
 
 
@@ -507,6 +509,7 @@ Results generated by MultiQC collate pipeline QC from supported tools e.g. FastQ
   - Reports generated by Nextflow: `execution_report.html`, `execution_timeline.html`, `execution_trace.txt` and `pipeline_dag.dot`/`pipeline_dag.svg`.
   - Reports generated by the pipeline: `pipeline_report.html`, `pipeline_report.txt` and `software_versions.yml`. The `pipeline_report*` files will only be present if the `--email` / `--email_on_fail` parameter's are used when running the pipeline.
   - Reformatted samplesheet files used as input to the pipeline: `samplesheet.valid.csv`.
+  - Parameters used by the pipeline run: `params.json`.
 
 
 
diff --git a/docs/usage.md b/docs/usage.md
index f7f4f931..aafc8349 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -4,43 +4,42 @@
 
 > _Documentation of pipeline parameters is generated automatically from the pipeline schema and can no longer be found in markdown files._
 
-# Introduction
+## Introduction
 
 The nf-core/airrflow pipeline allows processing BCR and TCR targeted sequencing data from bulk and single-cell sequencing protocols. It performs sequence assembly, V(D)J assignment, clonotyping, lineage reconsctruction and repertoire analysis using the [Immcantation](https://immcantation.readthedocs.io/en/stable/) framework.
 
 ![nf-core/airrflow overview](images/airrflow_workflow_overview.png)
 
-# Running the pipeline
+## Running the pipeline
 
-The typical command for running the pipeline departing from bulk raw fastq files is as follows:
+### Quickstart
+
+A typical command for running the pipeline for **bulk raw fastq files** is:
 
 ```bash
 nextflow run nf-core/airrflow \
--profile docker \
+-profile  \
 --mode fastq \
---input samplesheet.tsv \
+--input input_samplesheet.tsv \
 --library_generation_method specific_pcr_umi \
 --cprimers CPrimers.fasta \
 --vprimers VPrimers.fasta \
 --umi_length 12 \
---max_memory 8.GB \
---max_cpus 8 \
---outdir ./results
+--umi_position R1 \
+--outdir results
 ```
 
-The typical command for running the pipeline departing from assembled reads (fasta) or single-cell data (AIRR) is as follows:
+A typical command for running the pipeline departing from **single-cell AIRR rearrangement tables or assembled bulk sequencing fasta** data is:
 
-```
+```bash
 nextflow run nf-core/airrflow \
--profile docker \
+-profile  \
 --input input_samplesheet.tsv \
 --mode assembled \
---outdir results \
---reassign --productive_only --remove_chimeric \
---collapseby filename \
---cloneby subject_id
+--outdir results
 ```
 
+Check the section [Input samplesheet](#input-samplesheet) below for instructions on how to create the samplesheet, and the [Supported library generation protocols](#supported-bulk-library-generation-methods-protocols) section below for examples on how to run the pipeline for different bulk sequencing protocols.
 For more information about the parameters, please refer to the [parameters documentation](https://nf-co.re/airrflow/parameters).
 The command above will launch the pipeline with the `docker` configuration profile. See below for more information about profiles.
 
@@ -57,8 +56,11 @@ If you wish to repeatedly use the same parameters for multiple runs, rather than
 
 Pipeline settings can be provided in a `yaml` or `json` file via `-params-file `.
 
-> ⚠️ Do not use `-c ` to specify parameters as this will result in errors. Custom config files specified with `-c` must only be used for [tuning process resource specifications](https://nf-co.re/docs/usage/configuration#tuning-workflow-resources), other infrastructural tweaks (such as output directories), or module arguments (args).
-> The above pipeline run specified with a params file in yaml format:
+:::warning
+Do not use `-c ` to specify parameters as this will result in errors. Custom config files specified with `-c` must only be used for [tuning process resource specifications](https://nf-co.re/docs/usage/configuration#tuning-workflow-resources), other infrastructural tweaks (such as output directories), or module arguments (args).
+:::
+
+The above pipeline run specified with a params file in yaml format:
 
 ```bash
 nextflow run nf-core/airrflow -profile docker -params-file params.yaml
@@ -70,15 +72,36 @@ with `params.yaml` containing:
 input: './samplesheet.csv'
 outdir: './results/'
 genome: 'GRCh37'
-input: 'data'
 <...>
 ```
 
 You can also generate such `YAML`/`JSON` files via [nf-core/launch](https://nf-co.re/launch).
 
-# Input samplesheet
+## Updating the pipeline
+
+When you run the above command, Nextflow automatically pulls the pipeline code from GitHub and stores it as a cached version. When running the pipeline after this, it will always use the cached version if available - even if the pipeline has been updated since. To make sure that you're running the latest version of the pipeline, make sure that you regularly update the cached version of the pipeline:
+
+```bash
+nextflow pull nf-core/airrflow
+```
+
+## Reproducibility
+
+It is a good idea to specify a pipeline version when running the pipeline on your data. This ensures that a specific version of the pipeline code and software are used when you run your pipeline. If you keep using the same tag, you'll be running the same version of the pipeline, even if there have been changes to the code since.
+
+First, go to the [nf-core/airrflow releases page](https://github.com/nf-core/airrflow/releases) and find the latest pipeline version - numeric only (eg. `1.3.1`). Then specify this when running the pipeline with `-r` (one hyphen) - eg. `-r 1.3.1`. Of course, you can switch to another version by changing the number after the `-r` flag.
+
+This version number will be logged in reports when you run the pipeline, so that you'll know what you used when you look back in the future. For example, at the bottom of the MultiQC reports.
+
+To further assist in reproducibility, you can use share and re-use [parameter files](#running-the-pipeline) to repeat pipeline runs with the same settings without having to write out a command with every single parameter.
+
+:::tip
+If you wish to share such profile (such as upload as supplementary material for academic publications), make sure to NOT include cluster specific paths to files, nor institutional specific profiles.
+:::
+
+## Input samplesheet
 
-## Fastq input samplesheet (bulk)
+### Fastq input samplesheet (bulk sequencing only)
 
 The required input file for processing raw BCR or TCR bulk targeted sequencing data is a sample sheet in TSV format (tab separated). The columns `sample_id`, `filename_R1`, `filename_R2`, `subject_id`, `species`, `tissue`, `pcr_target_locus`, `single_cell`, `sex`, `age` and `biomaterial_provider` are required. An example samplesheet is:
 
@@ -87,41 +110,46 @@ The required input file for processing raw BCR or TCR bulk targeted sequencing d
 | sample01  | sample1_S8_L001_R1_001.fastq.gz | sample1_S8_L001_R2_001.fastq.gz | sample1_S8_L001_I1_001.fastq.gz | Subject02  | human   | IG               | blood  | NA     | 53  | sequencing_facility  | FALSE       | Drug_treatment | Baseline                       | plasmablasts |
 | sample02  | sample2_S8_L001_R1_001.fastq.gz | sample2_S8_L001_R2_001.fastq.gz | sample2_S8_L001_I1_001.fastq.gz | Subject02  | human   | TR               | blood  | female | 78  | sequencing_facility  | FALSE       | Drug_treatment | Baseline                       | plasmablasts |
 
-- sample_id: Sample ID assigned by submitter, unique within study.
-- filename_R1: path to fastq file with first mates of paired-end sequencing.
-- filename_R2: path to fastq file with second mates of paired-end sequencing.
-- filename_I1 (optional): path to fastq with illumina index and UMI (unique molecular identifier) barcode.
-- subject_id: Subject ID assigned by submitter, unique within study.
-- species: species from which the sample was taken. Supported species are `human` and `mouse`.
-- tissue: tissue from which the sample was taken. E.g. `blood`, `PBMC`, `brain`.
-- pcr_target_locus: Designation of the target locus (`IG` or `TR`).
-- sex: Subject biological sex (`female`, `male`, etc.).
-- age: Subject biological age.
-- single_cell: TRUE or FALSE. Fastq input samplesheet only supports a FALSE value.
+- `sample_id`: Sample ID assigned by submitter, unique within study.
+- `filename_R1`: path to fastq file with first mates of paired-end sequencing.
+- `filename_R2`: path to fastq file with second mates of paired-end sequencing.
+- `filename_I1` (optional): path to fastq with illumina index and UMI (unique molecular identifier) barcode.
+- `subject_id`: Subject ID assigned by submitter, unique within study.
+- `species`: species from which the sample was taken. Supported species are `human` and `mouse`.
+- `tissue`: tissue from which the sample was taken. E.g. `blood`, `PBMC`, `brain`.
+- `pcr_target_locus`: Designation of the target locus (`IG` or `TR`).
+- `biomaterial_provider`: Institution / research group that provided the samples.
+- `sex`: Subject biological sex (`female`, `male`, etc.).
+- `age`: Subject biological age.
+- `single_cell`: TRUE or FALSE. Fastq input samplesheet only supports a FALSE value.
 
 Other optional columns can be added. These columns will be available when building the contrasts for the repertoire comparison report. It is recommended that these columns also follow the AIRR nomenclature. Examples are:
 
-- intervention: Description of intervention.
-- disease_diagnosis: Diagnosis of subject.
-- collection_time_point_relative: Time point at which sample was taken, relative to `collection_time_point_reference` (e.g. 14d, 6 months, baseline).
-- collection_time_point_reference: Event in the study schedule to which `Sample collection time` relates to (e.g. primary vaccination, intervention start).
-- cell_subset: Commonly-used designation of isolated cell population.
+- `intervention`: Description of intervention.
+- `disease_diagnosis`: Diagnosis of subject.
+- `collection_time_point_relative`: Time point at which sample was taken, relative to `collection_time_point_reference` (e.g. 14d, 6 months, baseline).
+- `collection_time_point_reference`: Event in the study schedule to which `Sample collection time` relates to (e.g. primary vaccination, intervention start).
+- `cell_subset`: Commonly-used designation of isolated cell population.
 
 The metadata specified in the input file will then be automatically annotated in a column with the same header in the tables generated by the pipeline.
 
-## Assembled input samplesheet (bulk or single-cell)
+### Assembled input samplesheet (bulk or single-cell sequencing)
 
-The required input file for processing raw BCR or TCR bulk targeted sequencing data is a sample sheet in TSV format (tab separated). The columns `sample_id`, `filename`, `subject_id`, `species`, `tissue`, `single_cell`, `sex`, `age` and `biomaterial_provider` are required.
+The required input file for processing raw BCR or TCR bulk targeted sequencing data is a sample sheet in TSV format (tab separated). The columns `sample_id`, `filename`, `subject_id`, `species`, `tissue`, `single_cell`, `sex`, `age` and `biomaterial_provider` are required. All fields are explained in the previous section, with the only difference being that there is only one `filename` column for the assembled input samplesheet. The provided file will be different from assembled single-cell or bulk data:
 
-An example samplesheet is
+- `filename` for single-cell assembled data: path to `airr_rearrangement.tsv` file, for example the one generated when processing the 10x Genomics scBCRseq / scTCRseq with 10x Genomics cellranger `cellranger vdj` or `cellranger multi`. The field accepts any tsv tables following the [AIRR rearrangement Schema specification](https://docs.airr-community.org/en/stable/datarep/rearrangements.html). See [here](https://support.10xgenomics.com/single-cell-vdj/software/pipelines/latest/output/annotation#airr) for more details on the cellranger output.
+- `filename` for bulk assembled data: path to `sequences.fasta` file, containing the assembled and error-corrected reads.
 
-| filename                                                 | species | subject_id | sample_id                         | tissue     | sex  | age | biomaterial_provider | pcr_target_locus | single_cell |
-| -------------------------------------------------------- | ------- | ---------- | --------------------------------- | ---------- | ---- | --- | -------------------- | ---------------- | ----------- |
-| sc5p_v2_hs_PBMC_1k_b_airr_rearrangement.tsv              | human   | subject_x  | sc5p_v2_hs_PBMC_1k_5fb            | PBMC       | NA   | NA  | 10x Genomics         | ig               | TRUE        |
-| sc5p_v2_mm_c57bl6_splenocyte_1k_b_airr_rearrangement.tsv | mouse   | mouse_x    | sc5p_v2_mm_c57bl6_splenocyte_1k_b | splenocyte | NA   | NA  | 10x Genomics         | ig               | TRUE        |
-| bulk-Laserson-2014.fasta                                 | human   | PGP1       | PGP1                              | PBMC       | male | NA  | Laserson-2014        | ig               | FALSE       |
+The required input file for processing raw BCR or TCR bulk targeted sequencing data is a sample sheet in TSV format (tab separated). The columns `sample_id`, `filename`, `subject_id`, `species`, `tissue`, `single_cell`, `pcr_target_locus`, `sex`, `age` and `biomaterial_provider` are required.
 
-## Supported AIRR metadata fields
+An example samplesheet is:
+
+| filename                                    | species | subject_id | sample_id              | tissue | sex  | age | biomaterial_provider | pcr_target_locus | single_cell |
+| ------------------------------------------- | ------- | ---------- | ---------------------- | ------ | ---- | --- | -------------------- | ---------------- | ----------- |
+| sc5p_v2_hs_PBMC_1k_b_airr_rearrangement.tsv | human   | subject_x  | sc5p_v2_hs_PBMC_1k_5fb | PBMC   | NA   | NA  | 10x Genomics         | IG               | TRUE        |
+| bulk-Laserson-2014.fasta                    | human   | PGP1       | PGP1                   | PBMC   | male | NA  | Laserson-2014        | IG               | FALSE       |
+
+### Supported AIRR metadata fields
 
 nf-core/airrflow offers full support for the [AIRR standards 1.4](https://docs.airr-community.org/en/stable/datarep/metadata.html) metadata annotation. The minimum metadata fields that are needed by the pipeline are listed in the table below. Other non-mandatory AIRR fields can be provided in the input samplesheet, which will be available for reporting and introducing comparisons among repertoires.
 
@@ -137,7 +165,7 @@ nf-core/airrflow offers full support for the [AIRR standards 1.4](https://docs.a
 | biomaterial_provider      | Samplesheet column |                               | Name of sample biomaterial provider                   |
 | library_generation_method | Parameter          | `--library_generation_method` | Generic type of library generation                    |
 
-# Supported bulk library generation methods (protocols)
+## Supported bulk library generation methods (protocols)
 
 When processing bulk sequencing data departing from raw `fastq` reads, several sequencing protocols are supported which can be provided with the parameter `--library_generation_method`.
 The following table matches the library generation methods as described in the [AIRR metadata annotation guidelines](https://docs.airr-community.org/en/stable/miairr/metadata_guidelines.html#library-generation-method) to the value that can be provided to the `--library_generation_method` parameter.
@@ -155,13 +183,13 @@ The following table matches the library generation methods as described in the [
 | RT(specific+UMI)+TS+PCR           | 5’-RACE PCR using transcript- specific primers containing UMIs                             | Not supported    |                                           |
 | RT(specific)+TS                   | RT-based generation of dsDNA without subsequent PCR. This is used by RNA-seq kits.         | Not supported    |                                           |
 
-## Multiplex specific PCR (with or without UMI)
+### Multiplex specific PCR (with or without UMI)
 
 This sequencing type requires setting `--library_generation_method specific_pcr_umi` if UMI barcodes were used, or `--library_generation_method specific_pcr` if no UMI barcodes were used (sans-umi). If the option without UMI barcodes is selected, the UMI length will be set automatically to 0.
 
 It is required to provide the sequences for the V-region primers as well as the C-region primers used in the specific PCR amplification. Some examples of UMI and barcode configurations are provided. Depending on the position of the C-region primer, V-region primers and UMI barcodes, there are several possibilities detailed in the following subsections.
 
-### R1 read contains C primer (and UMI barcode)
+#### R1 read contains C primer (and UMI barcode)
 
 The `--cprimer_position` and `--umi_position` (if UMIs are used) parameters need to be set to R1 (this is the default).
 If there are extra bases between the UMI barcode and C primer, specify the number of bases with the `--cprimer_start` parameter (default zero). Set `--cprimer_position R1` (this is the default).
@@ -194,7 +222,7 @@ nextflow run nf-core/airrflow -profile docker \
 --outdir ./results
 ```
 
-### R1 read contains V primer (and UMI barcode)
+#### R1 read contains V primer (and UMI barcode)
 
 The `--umi_position` parameter needs to be set to R1 (if UMIs are used), and `--cprimer_position` to `R2`.
 If there are extra bases between the UMI barcode and V primer, specify the number of bases with the `--vprimer_start` parameter (default zero).
@@ -227,7 +255,7 @@ nextflow run nf-core/airrflow -profile docker \
 --outdir results
 ```
 
-### R2 read contains C primer (and UMI barcode)
+#### R2 read contains C primer (and UMI barcode)
 
 The `--umi_position` and `--cprimer_position` parameters need to be set to R2.
 If there are extra bases between the UMI barcode and C primer, specify the number of bases with the `--cprimer_start` parameter (default zero).
@@ -247,7 +275,7 @@ nextflow run nf-core/airrflow -profile docker \
 --outdir ./results
 ```
 
-### UMI barcode is provided in the index file
+#### UMI barcode is provided in the index file
 
 If the UMI barcodes are provided in an additional index file, please provide it in the column `filename_I1` in the input samplesheet and additionally set the `--index_file` parameter. Specify the UMI barcode length with the `--umi_length` parameter. You can optionally specify the UMI start position in the index sequence with the `--umi_start` parameter (the default is 0).
 
@@ -266,11 +294,11 @@ nextflow run nf-core/airrflow -profile docker \
 --outdir ./results
 ```
 
-## dT-Oligo RT and 5'RACE PCR
+### dT-Oligo RT and 5'RACE PCR
 
 This sequencing type requires setting `--library_generation_method race_5p_umi` or `--library_generation_method race_5p_umi` if UMIs are not being employed, and providing sequences for the C-region primers as well as the linker or template switch oligo sequences with the parameter `--race_linker`. Examples are provided below to run airrflow to process amplicons generated with the TAKARA 5'RACE SMARTer Human BCR and TCR protocols (library structure schema shown below).
 
-### Takara Bio SMARTer Human BCR
+#### Takara Bio SMARTer Human BCR
 
 The read configuration when sequenicng with the TAKARA Bio SMARTer Human BCR protocol is the following:
 
@@ -289,7 +317,7 @@ nextflow run nf-core/airrflow -profile docker \
 --outdir ./results
 ```
 
-### Takara Bio SMARTer Human TCR v2
+#### Takara Bio SMARTer Human TCR v2
 
 The read configuration when sequencing with the Takara Bio SMARTer Human TCR v2 protocol is the following:
 
@@ -328,7 +356,7 @@ GTTTGGTATGAGGCTGACTTCN
 CATCTGCATCAAGTTGTTTATC
 ```
 
-# UMI barcode handling
+## UMI barcode handling
 
 Unique Molecular Identifiers (UMIs) enable the quantification of BCR or TCR abundance in the original sample by allowing to distinguish PCR duplicates from original sample duplicates.
 The UMI indices are random nucleotide sequences of a pre-determined length that are added to the sequencing libraries before any PCR amplification steps, for example as part of the primer sequences.
@@ -341,29 +369,11 @@ The UMI barcodes are typically read from an index file but sometimes can be prov
 
 - No UMIs in R1 or R2 reads: if no UMIs are present in the samples, specify `--umi_length 0` to use the sans-UMI subworkflow.
 
-## Updating the pipeline
-
-When you run the above command, Nextflow automatically pulls the pipeline code from GitHub and stores it as a cached version. When running the pipeline after this, it will always use the cached version if available - even if the pipeline has been updated since. To make sure that you're running the latest version of the pipeline, make sure that you regularly update the cached version of the pipeline:
-
-```bash
-nextflow pull nf-core/airrflow
-```
-
-## Reproducibility
-
-It is a good idea to specify a pipeline version when running the pipeline on your data. This ensures that a specific version of the pipeline code and software are used when you run your pipeline. If you keep using the same tag, you'll be running the same version of the pipeline, even if there have been changes to the code since.
-
-First, go to the [nf-core/airrflow releases page](https://github.com/nf-core/airrflow/releases) and find the latest pipeline version - numeric only (eg. `1.3.1`). Then specify this when running the pipeline with `-r` (one hyphen) - eg. `-r 1.3.1`. Of course, you can switch to another version by changing the number after the `-r` flag.
-
-This version number will be logged in reports when you run the pipeline, so that you'll know what you used when you look back in the future. For example, at the bottom of the MultiQC reports.
-
-To further assist in reproducibility, you can use share and re-use [parameter files](#running-the-pipeline) to repeat pipeline runs with the same settings without having to write out a command with every single parameter.
-
-> 💡 If you wish to share such profile (such as upload as supplementary material for academic publications), make sure to NOT include cluster specific paths to files, nor institutional specific profiles.
-
 ## Core Nextflow arguments
 
-> **NB:** These options are part of Nextflow and use a _single_ hyphen (pipeline parameters use a double-hyphen).
+:::note
+These options are part of Nextflow and use a _single_ hyphen (pipeline parameters use a double-hyphen).
+:::
 
 ### `-profile`
 
@@ -371,7 +381,9 @@ Use this parameter to choose a configuration profile. Profiles can give configur
 
 Several generic profiles are bundled with the pipeline which instruct the pipeline to use software packaged using different methods (Docker, Singularity, Podman, Shifter, Charliecloud, Apptainer, Conda) - see below.
 
-> We highly recommend the use of Docker or Singularity containers for full pipeline reproducibility, however when this is not possible, Conda is also supported.
+:::info
+We highly recommend the use of Docker or Singularity containers for full pipeline reproducibility, however when this is not possible, Conda is also supported.
+:::
 
 The pipeline also dynamically loads configurations from [https://github.com/nf-core/configs](https://github.com/nf-core/configs) when it runs, making multiple config profiles for various institutional clusters available at run time. For more information and to see if your system is available in these configs please see the [nf-core/configs documentation](https://github.com/nf-core/configs#documentation).
 
@@ -418,7 +430,7 @@ To change the resource requests, please see the [max resources](https://nf-co.re
 
 ### Custom Containers
 
-In some cases you may wish to change which container or conda environment a step of the pipeline uses for a particular tool. By default nf-core pipelines use containers and software from the [biocontainers](https://biocontainers.pro/) or [bioconda](https://bioconda.github.io/) projects. However in some cases the pipeline specified version maybe out of date.
+In some cases you may wish to change which container or conda environment a step of the pipeline uses for a particular tool. By default nf-core pipelines use containers and software from the [biocontainers](https://biocontainers.pro/) or [bioconda](https://bioconda.github.io/) projects. However in some cases the pipeline specified version may be out of date.
 
 To use a different container from the default container or conda environment specified in a pipeline, please see the [updating tool versions](https://nf-co.re/docs/usage/configuration#updating-tool-versions) section of the nf-core website.
 
diff --git a/lib/NfcoreSchema.groovy b/lib/NfcoreSchema.groovy
deleted file mode 100755
index 9b34804d..00000000
--- a/lib/NfcoreSchema.groovy
+++ /dev/null
@@ -1,530 +0,0 @@
-//
-// This file holds several functions used to perform JSON parameter validation, help and summary rendering for the nf-core pipeline template.
-//
-
-import nextflow.Nextflow
-import org.everit.json.schema.Schema
-import org.everit.json.schema.loader.SchemaLoader
-import org.everit.json.schema.ValidationException
-import org.json.JSONObject
-import org.json.JSONTokener
-import org.json.JSONArray
-import groovy.json.JsonSlurper
-import groovy.json.JsonBuilder
-
-class NfcoreSchema {
-
-    //
-    // Resolve Schema path relative to main workflow directory
-    //
-    public static String getSchemaPath(workflow, schema_filename='nextflow_schema.json') {
-        return "${workflow.projectDir}/${schema_filename}"
-    }
-
-    //
-    // Function to loop over all parameters defined in schema and check
-    // whether the given parameters adhere to the specifications
-    //
-    /* groovylint-disable-next-line UnusedPrivateMethodParameter */
-    public static void validateParameters(workflow, params, log, schema_filename='nextflow_schema.json') {
-        def has_error = false
-        //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
-        // Check for nextflow core params and unexpected params
-        def json = new File(getSchemaPath(workflow, schema_filename=schema_filename)).text
-        def Map schemaParams = (Map) new JsonSlurper().parseText(json).get('definitions')
-        def nf_params = [
-            // Options for base `nextflow` command
-            'bg',
-            'c',
-            'C',
-            'config',
-            'd',
-            'D',
-            'dockerize',
-            'h',
-            'log',
-            'q',
-            'quiet',
-            'syslog',
-            'v',
-
-            // Options for `nextflow run` command
-            'ansi',
-            'ansi-log',
-            'bg',
-            'bucket-dir',
-            'c',
-            'cache',
-            'config',
-            'dsl2',
-            'dump-channels',
-            'dump-hashes',
-            'E',
-            'entry',
-            'latest',
-            'lib',
-            'main-script',
-            'N',
-            'name',
-            'offline',
-            'params-file',
-            'pi',
-            'plugins',
-            'poll-interval',
-            'pool-size',
-            'profile',
-            'ps',
-            'qs',
-            'queue-size',
-            'r',
-            'resume',
-            'revision',
-            'stdin',
-            'stub',
-            'stub-run',
-            'test',
-            'w',
-            'with-apptainer',
-            'with-charliecloud',
-            'with-conda',
-            'with-dag',
-            'with-docker',
-            'with-mpi',
-            'with-notification',
-            'with-podman',
-            'with-report',
-            'with-singularity',
-            'with-timeline',
-            'with-tower',
-            'with-trace',
-            'with-weblog',
-            'without-docker',
-            'without-podman',
-            'work-dir'
-        ]
-        def unexpectedParams = []
-
-        // Collect expected parameters from the schema
-        def expectedParams = []
-        def enums = [:]
-        for (group in schemaParams) {
-            for (p in group.value['properties']) {
-                expectedParams.push(p.key)
-                if (group.value['properties'][p.key].containsKey('enum')) {
-                    enums[p.key] = group.value['properties'][p.key]['enum']
-                }
-            }
-        }
-
-        for (specifiedParam in params.keySet()) {
-            // nextflow params
-            if (nf_params.contains(specifiedParam)) {
-                log.error "ERROR: You used a core Nextflow option with two hyphens: '--${specifiedParam}'. Please resubmit with '-${specifiedParam}'"
-                has_error = true
-            }
-            // unexpected params
-            def params_ignore = params.schema_ignore_params.split(',') + 'schema_ignore_params'
-            def expectedParamsLowerCase = expectedParams.collect{ it.replace("-", "").toLowerCase() }
-            def specifiedParamLowerCase = specifiedParam.replace("-", "").toLowerCase()
-            def isCamelCaseBug = (specifiedParam.contains("-") && !expectedParams.contains(specifiedParam) && expectedParamsLowerCase.contains(specifiedParamLowerCase))
-            if (!expectedParams.contains(specifiedParam) && !params_ignore.contains(specifiedParam) && !isCamelCaseBug) {
-                // Temporarily remove camelCase/camel-case params #1035
-                def unexpectedParamsLowerCase = unexpectedParams.collect{ it.replace("-", "").toLowerCase()}
-                if (!unexpectedParamsLowerCase.contains(specifiedParamLowerCase)){
-                    unexpectedParams.push(specifiedParam)
-                }
-            }
-        }
-
-        //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
-        // Validate parameters against the schema
-        InputStream input_stream = new File(getSchemaPath(workflow, schema_filename=schema_filename)).newInputStream()
-        JSONObject raw_schema = new JSONObject(new JSONTokener(input_stream))
-
-        // Remove anything that's in params.schema_ignore_params
-        raw_schema = removeIgnoredParams(raw_schema, params)
-
-        Schema schema = SchemaLoader.load(raw_schema)
-
-        // Clean the parameters
-        def cleanedParams = cleanParameters(params)
-
-        // Convert to JSONObject
-        def jsonParams = new JsonBuilder(cleanedParams)
-        JSONObject params_json = new JSONObject(jsonParams.toString())
-
-        // Validate
-        try {
-            schema.validate(params_json)
-        } catch (ValidationException e) {
-            println ''
-            log.error 'ERROR: Validation of pipeline parameters failed!'
-            JSONObject exceptionJSON = e.toJSON()
-            printExceptions(exceptionJSON, params_json, log, enums)
-            println ''
-            has_error = true
-        }
-
-        // Check for unexpected parameters
-        if (unexpectedParams.size() > 0) {
-            Map colors = NfcoreTemplate.logColours(params.monochrome_logs)
-            println ''
-            def warn_msg = 'Found unexpected parameters:'
-            for (unexpectedParam in unexpectedParams) {
-                warn_msg = warn_msg + "\n* --${unexpectedParam}: ${params[unexpectedParam].toString()}"
-            }
-            log.warn warn_msg
-            log.info "- ${colors.dim}Ignore this warning: params.schema_ignore_params = \"${unexpectedParams.join(',')}\" ${colors.reset}"
-            println ''
-        }
-
-        if (has_error) {
-            Nextflow.error('Exiting!')
-        }
-    }
-
-    //
-    // Beautify parameters for --help
-    //
-    public static String paramsHelp(workflow, params, command, schema_filename='nextflow_schema.json') {
-        Map colors = NfcoreTemplate.logColours(params.monochrome_logs)
-        Integer num_hidden = 0
-        String output  = ''
-        output        += 'Typical pipeline command:\n\n'
-        output        += "  ${colors.cyan}${command}${colors.reset}\n\n"
-        Map params_map = paramsLoad(getSchemaPath(workflow, schema_filename=schema_filename))
-        Integer max_chars  = paramsMaxChars(params_map) + 1
-        Integer desc_indent = max_chars + 14
-        Integer dec_linewidth = 160 - desc_indent
-        for (group in params_map.keySet()) {
-            Integer num_params = 0
-            String group_output = colors.underlined + colors.bold + group + colors.reset + '\n'
-            def group_params = params_map.get(group)  // This gets the parameters of that particular group
-            for (param in group_params.keySet()) {
-                if (group_params.get(param).hidden && !params.show_hidden_params) {
-                    num_hidden += 1
-                    continue;
-                }
-                def type = '[' + group_params.get(param).type + ']'
-                def description = group_params.get(param).description
-                def defaultValue = group_params.get(param).default != null ? " [default: " + group_params.get(param).default.toString() + "]" : ''
-                def description_default = description + colors.dim + defaultValue + colors.reset
-                // Wrap long description texts
-                // Loosely based on https://dzone.com/articles/groovy-plain-text-word-wrap
-                if (description_default.length() > dec_linewidth){
-                    List olines = []
-                    String oline = "" // " " * indent
-                    description_default.split(" ").each() { wrd ->
-                        if ((oline.size() + wrd.size()) <= dec_linewidth) {
-                            oline += wrd + " "
-                        } else {
-                            olines += oline
-                            oline = wrd + " "
-                        }
-                    }
-                    olines += oline
-                    description_default = olines.join("\n" + " " * desc_indent)
-                }
-                group_output += "  --" +  param.padRight(max_chars) + colors.dim + type.padRight(10) + colors.reset + description_default + '\n'
-                num_params += 1
-            }
-            group_output += '\n'
-            if (num_params > 0){
-                output += group_output
-            }
-        }
-        if (num_hidden > 0){
-            output += colors.dim + "!! Hiding $num_hidden params, use --show_hidden_params to show them !!\n" + colors.reset
-        }
-        output += NfcoreTemplate.dashedLine(params.monochrome_logs)
-        return output
-    }
-
-    //
-    // Groovy Map summarising parameters/workflow options used by the pipeline
-    //
-    public static LinkedHashMap paramsSummaryMap(workflow, params, schema_filename='nextflow_schema.json') {
-        // Get a selection of core Nextflow workflow options
-        def Map workflow_summary = [:]
-        if (workflow.revision) {
-            workflow_summary['revision'] = workflow.revision
-        }
-        workflow_summary['runName']      = workflow.runName
-        if (workflow.containerEngine) {
-            workflow_summary['containerEngine'] = workflow.containerEngine
-        }
-        if (workflow.container) {
-            workflow_summary['container'] = workflow.container
-        }
-        workflow_summary['launchDir']    = workflow.launchDir
-        workflow_summary['workDir']      = workflow.workDir
-        workflow_summary['projectDir']   = workflow.projectDir
-        workflow_summary['userName']     = workflow.userName
-        workflow_summary['profile']      = workflow.profile
-        workflow_summary['configFiles']  = workflow.configFiles.join(', ')
-
-        // Get pipeline parameters defined in JSON Schema
-        def Map params_summary = [:]
-        def params_map = paramsLoad(getSchemaPath(workflow, schema_filename=schema_filename))
-        for (group in params_map.keySet()) {
-            def sub_params = new LinkedHashMap()
-            def group_params = params_map.get(group)  // This gets the parameters of that particular group
-            for (param in group_params.keySet()) {
-                if (params.containsKey(param)) {
-                    def params_value = params.get(param)
-                    def schema_value = group_params.get(param).default
-                    def param_type   = group_params.get(param).type
-                    if (schema_value != null) {
-                        if (param_type == 'string') {
-                            if (schema_value.contains('$projectDir') || schema_value.contains('${projectDir}')) {
-                                def sub_string = schema_value.replace('\$projectDir', '')
-                                sub_string     = sub_string.replace('\${projectDir}', '')
-                                if (params_value.contains(sub_string)) {
-                                    schema_value = params_value
-                                }
-                            }
-                            if (schema_value.contains('$params.outdir') || schema_value.contains('${params.outdir}')) {
-                                def sub_string = schema_value.replace('\$params.outdir', '')
-                                sub_string     = sub_string.replace('\${params.outdir}', '')
-                                if ("${params.outdir}${sub_string}" == params_value) {
-                                    schema_value = params_value
-                                }
-                            }
-                        }
-                    }
-
-                    // We have a default in the schema, and this isn't it
-                    if (schema_value != null && params_value != schema_value) {
-                        sub_params.put(param, params_value)
-                    }
-                    // No default in the schema, and this isn't empty
-                    else if (schema_value == null && params_value != "" && params_value != null && params_value != false) {
-                        sub_params.put(param, params_value)
-                    }
-                }
-            }
-            params_summary.put(group, sub_params)
-        }
-        return [ 'Core Nextflow options' : workflow_summary ] << params_summary
-    }
-
-    //
-    // Beautify parameters for summary and return as string
-    //
-    public static String paramsSummaryLog(workflow, params) {
-        Map colors = NfcoreTemplate.logColours(params.monochrome_logs)
-        String output  = ''
-        def params_map = paramsSummaryMap(workflow, params)
-        def max_chars  = paramsMaxChars(params_map)
-        for (group in params_map.keySet()) {
-            def group_params = params_map.get(group)  // This gets the parameters of that particular group
-            if (group_params) {
-                output += colors.bold + group + colors.reset + '\n'
-                for (param in group_params.keySet()) {
-                    output += "  " + colors.blue + param.padRight(max_chars) + ": " + colors.green +  group_params.get(param) + colors.reset + '\n'
-                }
-                output += '\n'
-            }
-        }
-        output += "!! Only displaying parameters that differ from the pipeline defaults !!\n"
-        output += NfcoreTemplate.dashedLine(params.monochrome_logs)
-        return output
-    }
-
-    //
-    // Loop over nested exceptions and print the causingException
-    //
-    private static void printExceptions(ex_json, params_json, log, enums, limit=5) {
-        def causingExceptions = ex_json['causingExceptions']
-        if (causingExceptions.length() == 0) {
-            def m = ex_json['message'] =~ /required key \[([^\]]+)\] not found/
-            // Missing required param
-            if (m.matches()) {
-                log.error "* Missing required parameter: --${m[0][1]}"
-            }
-            // Other base-level error
-            else if (ex_json['pointerToViolation'] == '#') {
-                log.error "* ${ex_json['message']}"
-            }
-            // Error with specific param
-            else {
-                def param = ex_json['pointerToViolation'] - ~/^#\//
-                def param_val = params_json[param].toString()
-                if (enums.containsKey(param)) {
-                    def error_msg = "* --${param}: '${param_val}' is not a valid choice (Available choices"
-                    if (enums[param].size() > limit) {
-                        log.error "${error_msg} (${limit} of ${enums[param].size()}): ${enums[param][0..limit-1].join(', ')}, ... )"
-                    } else {
-                        log.error "${error_msg}: ${enums[param].join(', ')})"
-                    }
-                } else {
-                    log.error "* --${param}: ${ex_json['message']} (${param_val})"
-                }
-            }
-        }
-        for (ex in causingExceptions) {
-            printExceptions(ex, params_json, log, enums)
-        }
-    }
-
-    //
-    // Remove an element from a JSONArray
-    //
-    private static JSONArray removeElement(json_array, element) {
-        def list = []
-        int len = json_array.length()
-        for (int i=0;i
-            if(raw_schema.keySet().contains('definitions')){
-                raw_schema.definitions.each { definition ->
-                    for (key in definition.keySet()){
-                        if (definition[key].get("properties").keySet().contains(ignore_param)){
-                            // Remove the param to ignore
-                            definition[key].get("properties").remove(ignore_param)
-                            // If the param was required, change this
-                            if (definition[key].has("required")) {
-                                def cleaned_required = removeElement(definition[key].required, ignore_param)
-                                definition[key].put("required", cleaned_required)
-                            }
-                        }
-                    }
-                }
-            }
-            if(raw_schema.keySet().contains('properties') && raw_schema.get('properties').keySet().contains(ignore_param)) {
-                raw_schema.get("properties").remove(ignore_param)
-            }
-            if(raw_schema.keySet().contains('required') && raw_schema.required.contains(ignore_param)) {
-                def cleaned_required = removeElement(raw_schema.required, ignore_param)
-                raw_schema.put("required", cleaned_required)
-            }
-        }
-        return raw_schema
-    }
-
-    //
-    // Clean and check parameters relative to Nextflow native classes
-    //
-    private static Map cleanParameters(params) {
-        def new_params = params.getClass().newInstance(params)
-        for (p in params) {
-            // remove anything evaluating to false
-            if (!p['value']) {
-                new_params.remove(p.key)
-            }
-            // Cast MemoryUnit to String
-            if (p['value'].getClass() == nextflow.util.MemoryUnit) {
-                new_params.replace(p.key, p['value'].toString())
-            }
-            // Cast Duration to String
-            if (p['value'].getClass() == nextflow.util.Duration) {
-                new_params.replace(p.key, p['value'].toString().replaceFirst(/d(?!\S)/, "day"))
-            }
-            // Cast LinkedHashMap to String
-            if (p['value'].getClass() == LinkedHashMap) {
-                new_params.replace(p.key, p['value'].toString())
-            }
-        }
-        return new_params
-    }
-
-    //
-    // This function tries to read a JSON params file
-    //
-    private static LinkedHashMap paramsLoad(String json_schema) {
-        def params_map = new LinkedHashMap()
-        try {
-            params_map = paramsRead(json_schema)
-        } catch (Exception e) {
-            println "Could not read parameters settings from JSON. $e"
-            params_map = new LinkedHashMap()
-        }
-        return params_map
-    }
-
-    //
-    // Method to actually read in JSON file using Groovy.
-    // Group (as Key), values are all parameters
-    //    - Parameter1 as Key, Description as Value
-    //    - Parameter2 as Key, Description as Value
-    //    ....
-    // Group
-    //    -
-    private static LinkedHashMap paramsRead(String json_schema) throws Exception {
-        def json = new File(json_schema).text
-        def Map schema_definitions = (Map) new JsonSlurper().parseText(json).get('definitions')
-        def Map schema_properties = (Map) new JsonSlurper().parseText(json).get('properties')
-        /* Tree looks like this in nf-core schema
-        * definitions <- this is what the first get('definitions') gets us
-                group 1
-                    title
-                    description
-                        properties
-                        parameter 1
-                            type
-                            description
-                        parameter 2
-                            type
-                            description
-                group 2
-                    title
-                    description
-                        properties
-                        parameter 1
-                            type
-                            description
-        * properties <- parameters can also be ungrouped, outside of definitions
-                parameter 1
-                    type
-                    description
-        */
-
-        // Grouped params
-        def params_map = new LinkedHashMap()
-        schema_definitions.each { key, val ->
-            def Map group = schema_definitions."$key".properties // Gets the property object of the group
-            def title = schema_definitions."$key".title
-            def sub_params = new LinkedHashMap()
-            group.each { innerkey, value ->
-                sub_params.put(innerkey, value)
-            }
-            params_map.put(title, sub_params)
-        }
-
-        // Ungrouped params
-        def ungrouped_params = new LinkedHashMap()
-        schema_properties.each { innerkey, value ->
-            ungrouped_params.put(innerkey, value)
-        }
-        params_map.put("Other parameters", ungrouped_params)
-
-        return params_map
-    }
-
-    //
-    // Get maximum number of characters across all parameter names
-    //
-    private static Integer paramsMaxChars(params_map) {
-        Integer max_chars = 0
-        for (group in params_map.keySet()) {
-            def group_params = params_map.get(group)  // This gets the parameters of that particular group
-            for (param in group_params.keySet()) {
-                if (param.size() > max_chars) {
-                    max_chars = param.size()
-                }
-            }
-        }
-        return max_chars
-    }
-}
diff --git a/lib/NfcoreTemplate.groovy b/lib/NfcoreTemplate.groovy
index 25a0a74a..01b8653d 100755
--- a/lib/NfcoreTemplate.groovy
+++ b/lib/NfcoreTemplate.groovy
@@ -3,6 +3,7 @@
 //
 
 import org.yaml.snakeyaml.Yaml
+import groovy.json.JsonOutput
 
 class NfcoreTemplate {
 
@@ -128,7 +129,7 @@ class NfcoreTemplate {
         def email_html    = html_template.toString()
 
         // Render the sendmail template
-        def max_multiqc_email_size = params.max_multiqc_email_size as nextflow.util.MemoryUnit
+        def max_multiqc_email_size = (params.containsKey('max_multiqc_email_size') ? params.max_multiqc_email_size : 0) as nextflow.util.MemoryUnit
         def smail_fields           = [ email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, projectDir: "$projectDir", mqcFile: mqc_report, mqcMaxSize: max_multiqc_email_size.toBytes() ]
         def sf                     = new File("$projectDir/assets/sendmail_template.txt")
         def sendmail_template      = engine.createTemplate(sf).make(smail_fields)
@@ -222,6 +223,21 @@ class NfcoreTemplate {
         }
     }
 
+    //
+    // Dump pipeline parameters in a json file
+    //
+    public static void dump_parameters(workflow, params) {
+        def output_d = new File("${params.outdir}/pipeline_info/")
+        if (!output_d.exists()) {
+            output_d.mkdirs()
+        }
+
+        def timestamp  = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss')
+        def output_pf  = new File(output_d, "params_${timestamp}.json")
+        def jsonStr    = JsonOutput.toJson(params)
+        output_pf.text = JsonOutput.prettyPrint(jsonStr)
+    }
+
     //
     // Print pipeline summary on completion
     //
diff --git a/lib/WorkflowAirrflow.groovy b/lib/WorkflowAirrflow.groovy
index 800936b4..a87bf915 100755
--- a/lib/WorkflowAirrflow.groovy
+++ b/lib/WorkflowAirrflow.groovy
@@ -40,14 +40,54 @@ class WorkflowAirrflow {
         return yaml_file_text
     }
 
-    public static String methodsDescriptionText(run_workflow, mqc_methods_yaml) {
+    //
+    // Generate methods description for MultiQC
+    //
+
+    public static String toolCitationText(params) {
+
+        // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "Tool (Foo et al. 2023)" : "",
+        // Uncomment function in methodsDescriptionText to render in MultiQC report
+        def citation_text = [
+                "Tools used in the workflow included:",
+                "FastQC (Andrews 2010),",
+                "MultiQC (Ewels et al. 2016)",
+                "."
+            ].join(' ').trim()
+
+        return citation_text
+    }
+
+    public static String toolBibliographyText(params) {
+
+        // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "
  • Author (2023) Pub name, Journal, DOI
  • " : "", + // Uncomment function in methodsDescriptionText to render in MultiQC report + def reference_text = [ + "
  • Andrews S, (2010) FastQC, URL: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/).
  • ", + "
  • Ewels, P., Magnusson, M., Lundin, S., & Käller, M. (2016). MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics , 32(19), 3047–3048. doi: /10.1093/bioinformatics/btw354
  • " + ].join(' ').trim() + + return reference_text + } + + public static String methodsDescriptionText(run_workflow, mqc_methods_yaml, params) { // Convert to a named map so can be used as with familar NXF ${workflow} variable syntax in the MultiQC YML file def meta = [:] meta.workflow = run_workflow.toMap() meta['manifest_map'] = run_workflow.manifest.toMap() - meta['doi_text'] = meta.manifest_map.doi ? "(doi: ${meta.manifest_map.doi})" : '' - meta['nodoi_text'] = meta.manifest_map.doi ? '' : '
  • If available, make sure to update the text to include the Zenodo DOI of version of the pipeline used.
  • ' + // Pipeline DOI + meta["doi_text"] = meta.manifest_map.doi ? "(doi: ${meta.manifest_map.doi})" : "" + meta["nodoi_text"] = meta.manifest_map.doi ? "": "
  • If available, make sure to update the text to include the Zenodo DOI of version of the pipeline used.
  • " + + // Tool references + meta["tool_citations"] = "" + meta["tool_bibliography"] = "" + + // Only uncomment below if logic in toolCitationText/toolBibliographyText has been filled! + //meta["tool_citations"] = toolCitationText(params).replaceAll(", \\.", ".").replaceAll("\\. \\.", ".").replaceAll(", \\.", ".") + //meta["tool_bibliography"] = toolBibliographyText(params) + def methods_text = mqc_methods_yaml.text diff --git a/lib/WorkflowMain.groovy b/lib/WorkflowMain.groovy index 5342cbb3..538ea8a4 100755 --- a/lib/WorkflowMain.groovy +++ b/lib/WorkflowMain.groovy @@ -19,40 +19,11 @@ class WorkflowMain { " https://github.com/${workflow.manifest.name}/blob/master/CITATIONS.md" } - // - // Generate help string - // - public static String help(workflow, params) { - def command = "nextflow run ${workflow.manifest.name} -profile --input samplesheet.tsv --cprimers CPrimers.fasta --vprimers VPrimers.fasta --umi_length 12 --loci ig" - def help_string = '' - help_string += NfcoreTemplate.logo(workflow, params.monochrome_logs) - help_string += NfcoreSchema.paramsHelp(workflow, params, command) - help_string += '\n' + citation(workflow) + '\n' - help_string += NfcoreTemplate.dashedLine(params.monochrome_logs) - return help_string - } - - // - // Generate parameter summary log string - // - public static String paramsSummaryLog(workflow, params) { - def summary_log = '' - summary_log += NfcoreTemplate.logo(workflow, params.monochrome_logs) - summary_log += NfcoreSchema.paramsSummaryLog(workflow, params) - summary_log += '\n' + citation(workflow) + '\n' - summary_log += NfcoreTemplate.dashedLine(params.monochrome_logs) - return summary_log - } // // Validate parameters and print summary to screen // public static void initialise(workflow, params, log) { - // Print help to screen if required - if (params.help) { - log.info help(workflow, params) - System.exit(0) - } // Print workflow version and exit on --version if (params.version) { @@ -61,14 +32,6 @@ class WorkflowMain { System.exit(0) } - // Print parameter summary log to screen - log.info paramsSummaryLog(workflow, params) - - // Validate workflow parameters via the JSON schema - if (params.validate_params) { - NfcoreSchema.validateParameters(workflow, params, log) - } - // Check that a -profile or Nextflow config has been provided to run the pipeline NfcoreTemplate.checkConfigProvided(workflow, log) diff --git a/main.nf b/main.nf index 2c213e4b..24de2277 100644 --- a/main.nf +++ b/main.nf @@ -17,6 +17,22 @@ nextflow.enable.dsl = 2 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ +include { validateParameters; paramsHelp } from 'plugin/nf-validation' + +// Print help message if needed +if (params.help) { + def logo = NfcoreTemplate.logo(workflow, params.monochrome_logs) + def citation = '\n' + WorkflowMain.citation(workflow) + '\n' + def String command = "nextflow run ${workflow.manifest.name} --input samplesheet.csv --genome GRCh37 -profile docker" + log.info logo + paramsHelp(command) + citation + NfcoreTemplate.dashedLine(params.monochrome_logs) + System.exit(0) +} + +// Validate input parameters +if (params.validate_params) { + validateParameters() +} + WorkflowMain.initialise(workflow, params, log) /* diff --git a/modules.json b/modules.json index c15ed8ab..5eb6872f 100644 --- a/modules.json +++ b/modules.json @@ -17,7 +17,7 @@ }, "fastqc": { "branch": "master", - "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", + "git_sha": "cfd937a668919d948f6fcbf4218e79de50c2f36f", "installed_by": ["modules"] }, "multiqc": { diff --git a/modules/local/airrflow_report/airrflow_report.nf b/modules/local/airrflow_report/airrflow_report.nf index 5b6113de..ecac2e49 100644 --- a/modules/local/airrflow_report/airrflow_report.nf +++ b/modules/local/airrflow_report/airrflow_report.nf @@ -2,10 +2,12 @@ process AIRRFLOW_REPORT { tag "${meta.id}" label 'process_high' - conda "bioconda::r-enchantr=0.1.2" + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "nf-core/airrflow currently does not support Conda. Please use a container profile instead." + } container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/r-enchantr:0.1.2--r42hdfd78af_0': - 'biocontainers/r-enchantr:0.1.2--r42hdfd78af_0' }" + 'docker.io/immcantation/airrflow:3.2.0': + 'docker.io/immcantation/airrflow:3.2.0' }" input: tuple val(meta), path(tab) // sequence tsv table in AIRR format @@ -27,6 +29,9 @@ process AIRRFLOW_REPORT { """ execute_report.R --report_file ${repertoire_report} + mkdir repertoire_comparison/repertoires + cp *clone-pass.tsv repertoire_comparison/repertoires/ + cat <<-END_VERSIONS > versions.yml "${task.process}": alakazam: \$(Rscript -e "library(alakazam); cat(paste(packageVersion('alakazam'), collapse='.'))") diff --git a/modules/local/changeo/changeo_parsedb_select.nf b/modules/local/changeo/changeo_parsedb_select.nf index 0bab80ab..9f592e7f 100644 --- a/modules/local/changeo/changeo_parsedb_select.nf +++ b/modules/local/changeo/changeo_parsedb_select.nf @@ -20,7 +20,7 @@ process CHANGEO_PARSEDB_SELECT { script: def args = task.ext.args ?: '' def args2 = task.ext.args2 ?: '' - if (meta.locus == 'IG'){ + if (meta.locus.toUpperCase() == 'IG'){ """ ParseDb.py select -d $tab $args --outname ${meta.id} > ${meta.id}_select_command_log.txt @@ -30,7 +30,7 @@ process CHANGEO_PARSEDB_SELECT { changeo: \$( ParseDb.py --version | awk -F' ' '{print \$2}' ) END_VERSIONS """ - } else if (meta.locus == 'TR'){ + } else if (meta.locus.toUpperCase() == 'TR'){ """ ParseDb.py select -d $tab $args2 --outname ${meta.id} > "${meta.id}_command_log.txt" diff --git a/modules/local/enchantr/collapse_duplicates.nf b/modules/local/enchantr/collapse_duplicates.nf index e1c117a9..af640cf5 100644 --- a/modules/local/enchantr/collapse_duplicates.nf +++ b/modules/local/enchantr/collapse_duplicates.nf @@ -4,10 +4,12 @@ process COLLAPSE_DUPLICATES { label 'process_long_parallelized' label 'immcantation' - conda "bioconda::r-enchantr=0.1.2" + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "nf-core/airrflow currently does not support Conda. Please use a container profile instead." + } container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/r-enchantr:0.1.2--r42hdfd78af_0': - 'biocontainers/r-enchantr:0.1.2--r42hdfd78af_0' }" + 'docker.io/immcantation/airrflow:3.2.0': + 'docker.io/immcantation/airrflow:3.2.0' }" input: tuple val(meta), path(tabs) // tuple [val(meta), sequence tsv in AIRR format ] @@ -29,9 +31,9 @@ process COLLAPSE_DUPLICATES { 'outname'='${meta.id}',\\ 'log'='${meta.id}_collapse_command_log'))" + cp -r enchantr ${meta.id}_collapse_report && rm -r enchantr + echo "${task.process}": > versions.yml Rscript -e "cat(paste0(' enchantr: ',packageVersion('enchantr'),'\n'))" >> versions.yml - - mv enchantr ${meta.id}_collapse_report """ } diff --git a/modules/local/enchantr/define_clones.nf b/modules/local/enchantr/define_clones.nf index c271f874..e24c75d9 100644 --- a/modules/local/enchantr/define_clones.nf +++ b/modules/local/enchantr/define_clones.nf @@ -21,15 +21,18 @@ process DEFINE_CLONES { label 'process_long_parallelized' label 'immcantation' - conda "bioconda::r-enchantr=0.1.2" + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "nf-core/airrflow currently does not support Conda. Please use a container profile instead." + } container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/r-enchantr:0.1.2--r42hdfd78af_0': - 'biocontainers/r-enchantr:0.1.2--r42hdfd78af_0' }" + 'docker.io/immcantation/airrflow:3.2.0': + 'docker.io/immcantation/airrflow:3.2.0' }" input: tuple val(meta), path(tabs) // meta, sequence tsv in AIRR format val threshold path imgt_base + path repertoires_samplesheet output: path("*/*/*clone-pass.tsv"), emit: tab // sequence tsv in AIRR format @@ -41,20 +44,28 @@ process DEFINE_CLONES { script: def args = task.ext.args ? asString(task.ext.args) : '' def thr = threshold.join("") + def input = "" + if (repertoires_samplesheet) { + input = repertoires_samplesheet + } else { + input = tabs.join(',') + } """ Rscript -e "enchantr::enchantr_report('define_clones', \\ - report_params=list('input'='${tabs.join(',')}', \\ + report_params=list('input'='${input}', \\ 'imgt_db'='${imgt_base}', \\ + 'species'='auto', \\ 'cloneby'='${params.cloneby}', \\ + 'outputby'='${params.cloneby}', 'force'=FALSE, \\ 'threshold'=${thr}, \\ 'singlecell'='${params.singlecell}','outdir'=getwd(), \\ 'nproc'=${task.cpus},\\ 'log'='${meta.id}_clone_command_log' ${args}))" + cp -r enchantr ${meta.id}_clone_report && rm -rf enchantr + echo "${task.process}": > versions.yml Rscript -e "cat(paste0(' enchantr: ',packageVersion('enchantr'),'\n'))" >> versions.yml - - mv enchantr '${meta.id}_clone_report' """ } diff --git a/modules/local/enchantr/detect_contamination.nf b/modules/local/enchantr/detect_contamination.nf index 38308003..0267b81a 100644 --- a/modules/local/enchantr/detect_contamination.nf +++ b/modules/local/enchantr/detect_contamination.nf @@ -5,10 +5,12 @@ process DETECT_CONTAMINATION { label 'immcantation' - conda "bioconda::r-enchantr=0.1.2" + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "nf-core/airrflow currently does not support Conda. Please use a container profile instead." + } container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/r-enchantr:0.1.2--r42hdfd78af_0': - 'biocontainers/r-enchantr:0.1.2--r42hdfd78af_0' }" + 'docker.io/immcantation/airrflow:3.2.0': + 'docker.io/immcantation/airrflow:3.2.0' }" input: path(tabs) @@ -28,8 +30,9 @@ process DETECT_CONTAMINATION { 'outname'='cont-flag', \\ 'log'='all_reps_contamination_command_log'))" + cp -r enchantr all_reps_cont_report && rm -rf enchantr + echo "${task.process}": > versions.yml Rscript -e "cat(paste0(' enchantr: ',packageVersion('enchantr'),'\n'))" >> versions.yml - mv enchantr all_reps_cont_report """ } diff --git a/modules/local/enchantr/dowser_lineages.nf b/modules/local/enchantr/dowser_lineages.nf index e74c5e64..b9a8de8e 100644 --- a/modules/local/enchantr/dowser_lineages.nf +++ b/modules/local/enchantr/dowser_lineages.nf @@ -21,10 +21,12 @@ process DOWSER_LINEAGES { label 'process_long_parallelized' label 'immcantation' - conda "bioconda::r-enchantr=0.1.2" + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "nf-core/airrflow currently does not support Conda. Please use a container profile instead." + } container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/r-enchantr:0.1.2--r42hdfd78af_0': - 'biocontainers/r-enchantr:0.1.2--r42hdfd78af_0' }" + 'docker.io/immcantation/airrflow:3.2.0': + 'docker.io/immcantation/airrflow:3.2.0' }" input: tuple val(meta), path(tabs) @@ -46,9 +48,9 @@ process DOWSER_LINEAGES { 'nproc'=${task.cpus},\\ 'log'='${id_name}_dowser_command_log' ${args}))" + cp -r enchantr ${id_name}_dowser_report && rm -rf enchantr + echo "${task.process}": > versions.yml Rscript -e "cat(paste0(' enchantr: ',packageVersion('enchantr'),'\n'))" >> versions.yml - - mv enchantr '${id_name}_dowser_report' """ } diff --git a/modules/local/enchantr/find_threshold.nf b/modules/local/enchantr/find_threshold.nf index c4fc1697..08178111 100644 --- a/modules/local/enchantr/find_threshold.nf +++ b/modules/local/enchantr/find_threshold.nf @@ -21,29 +21,32 @@ process FIND_THRESHOLD { label 'process_long_parallelized' label 'immcantation' - conda "bioconda::r-enchantr=0.1.2" + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "nf-core/airrflow currently does not support Conda. Please use a container profile instead." + } container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/r-enchantr:0.1.2--r42hdfd78af_0': - 'biocontainers/r-enchantr:0.1.2--r42hdfd78af_0' }" + 'docker.io/immcantation/airrflow:3.2.0': + 'docker.io/immcantation/airrflow:3.2.0' }" input: path tab // sequence tsv in AIRR format path logo + path tabs_samplesheet output: // tuple val(meta), path("*threshold-pass.tsv"), emit: tab // sequence tsv in AIRR format path("*_command_log.txt"), emit: logs //process logs path "*_report" - path "*_threshold-summary.tsv", emit: threshold_summary - path "*_threshold-mean.tsv", emit: mean_threshold + path "all_reps_dist_report/tables/*_threshold-summary.tsv", emit: threshold_summary, optional:true + path "all_reps_dist_report/tables/*_threshold-mean.tsv", emit: mean_threshold path "versions.yml", emit: versions script: def args = task.ext.args ? asString(task.ext.args) : '' """ Rscript -e "enchantr::enchantr_report('find_threshold', \\ - report_params=list('input'='${tab.join(',')}',\\ + report_params=list('input'='${tabs_samplesheet}',\\ 'cloneby'='${params.cloneby}',\\ 'crossby'='${params.crossby}',\\ 'singlecell'='${params.singlecell}',\\ @@ -53,8 +56,9 @@ process FIND_THRESHOLD { 'log'='all_reps_threshold_command_log',\\ 'logo'='${logo}' ${args}))" + cp -r enchantr all_reps_dist_report && rm -rf enchantr + echo "${task.process}": > versions.yml Rscript -e "cat(paste0(' enchantr: ',packageVersion('enchantr'),'\n'))" >> versions.yml - mv enchantr all_reps_dist_report """ } diff --git a/modules/local/enchantr/remove_chimeric.nf b/modules/local/enchantr/remove_chimeric.nf index 59a4e3a5..32522aa2 100644 --- a/modules/local/enchantr/remove_chimeric.nf +++ b/modules/local/enchantr/remove_chimeric.nf @@ -5,10 +5,12 @@ process REMOVE_CHIMERIC { label 'immcantation' - conda "bioconda::r-enchantr=0.1.2" + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "nf-core/airrflow currently does not support Conda. Please use a container profile instead." + } container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/r-enchantr:0.1.2--r42hdfd78af_0': - 'biocontainers/r-enchantr:0.1.2--r42hdfd78af_0' }" + 'docker.io/immcantation/airrflow:3.2.0': + 'docker.io/immcantation/airrflow:3.2.0' }" input: @@ -30,9 +32,9 @@ process REMOVE_CHIMERIC { 'outname'='${meta.id}', \\ 'log'='${meta.id}_chimeric_command_log'))" + cp -r enchantr ${meta.id}_chimera_report && rm -rf enchantr + echo "\"${task.process}\":" > versions.yml Rscript -e "cat(paste0(' enchantr: ',packageVersion('enchantr'),'\n'))" >> versions.yml - - mv enchantr ${meta.id}_chimera_report """ } diff --git a/modules/local/enchantr/report_file_size.nf b/modules/local/enchantr/report_file_size.nf index b4f9130d..804ebd61 100644 --- a/modules/local/enchantr/report_file_size.nf +++ b/modules/local/enchantr/report_file_size.nf @@ -6,14 +6,17 @@ process REPORT_FILE_SIZE { label 'immcantation' label 'process_single' - conda "bioconda::r-enchantr=0.1.2" + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "nf-core/airrflow currently does not support Conda. Please use a container profile instead." + } container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/r-enchantr:0.1.2--r42hdfd78af_0': - 'biocontainers/r-enchantr:0.1.2--r42hdfd78af_0' }" + 'docker.io/immcantation/airrflow:3.2.0': + 'docker.io/immcantation/airrflow:3.2.0' }" input: path logs path metadata + path logs_tabs output: path "*_report", emit: file_size @@ -22,14 +25,14 @@ process REPORT_FILE_SIZE { script: """ - echo "${logs.join('\n')}" > logs.txt Rscript -e "enchantr::enchantr_report('file_size', \\ - report_params=list('input'='logs.txt', 'metadata'='${metadata}',\\ + report_params=list('input'='${logs_tabs}', 'metadata'='${metadata}',\\ 'outdir'=getwd()))" + cp -r enchantr file_size_report && rm -rf enchantr + echo "\"${task.process}\":" > versions.yml Rscript -e "cat(paste0(' enchantr: ',packageVersion('enchantr'),'\n'))" >> versions.yml - mv enchantr file_size_report """ } diff --git a/modules/local/enchantr/single_cell_qc.nf b/modules/local/enchantr/single_cell_qc.nf index 87422b86..36733e4d 100644 --- a/modules/local/enchantr/single_cell_qc.nf +++ b/modules/local/enchantr/single_cell_qc.nf @@ -20,10 +20,12 @@ process SINGLE_CELL_QC { label 'immcantation' label 'process_medium' - conda "bioconda::r-enchantr=0.1.2" + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "nf-core/airrflow currently does not support Conda. Please use a container profile instead." + } container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/r-enchantr:0.1.2--r42hdfd78af_0': - 'biocontainers/r-enchantr:0.1.2--r42hdfd78af_0' }" + 'docker.io/immcantation/airrflow:3.2.0': + 'docker.io/immcantation/airrflow:3.2.0' }" input: path(tabs) @@ -43,9 +45,9 @@ process SINGLE_CELL_QC { 'outdir'=getwd(), \\ 'log'='all_reps_scqc_command_log' ${args} ))" + cp -r enchantr all_reps_scqc_report && rm -rf enchantr + echo "${task.process}": > versions.yml Rscript -e "cat(paste0(' enchantr: ',packageVersion('enchantr'),'\n'))" >> versions.yml - - mv enchantr all_reps_scqc_report """ } diff --git a/modules/local/enchantr/validate_input.nf b/modules/local/enchantr/validate_input.nf index 5be240c6..224b391a 100644 --- a/modules/local/enchantr/validate_input.nf +++ b/modules/local/enchantr/validate_input.nf @@ -6,10 +6,12 @@ process VALIDATE_INPUT { label 'immcantation' label 'process_single' - conda "bioconda::r-enchantr=0.1.2" + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "nf-core/airrflow currently does not support Conda. Please use a container profile instead." + } container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/r-enchantr:0.1.2--r42hdfd78af_0': - 'biocontainers/r-enchantr:0.1.2--r42hdfd78af_0' }" + 'docker.io/immcantation/airrflow:3.2.0': + 'docker.io/immcantation/airrflow:3.2.0' }" input: file samplesheet @@ -26,6 +28,8 @@ process VALIDATE_INPUT { """ Rscript -e "enchantr:::enchantr_report('validate_input', report_params=list('input'='${samplesheet}','collapseby'='${collapseby}','cloneby'='${cloneby}','reassign'='${params.reassign}','miairr'='${miairr}','outdir'=getwd()))" + cp -r enchantr validate_input_report && rm -rf enchantr + echo "\"${task.process}\":" > versions.yml Rscript -e "cat(paste0(' enchantr: ',packageVersion('enchantr'),'\n'))" >> versions.yml """ diff --git a/modules/local/fastqc_postassembly.nf b/modules/local/fastqc_postassembly.nf index 2eb9d7f0..02568191 100644 --- a/modules/local/fastqc_postassembly.nf +++ b/modules/local/fastqc_postassembly.nf @@ -3,10 +3,10 @@ process FASTQC_POSTASSEMBLY { tag "$meta.id" label 'process_medium' - conda "bioconda::fastqc=0.11.9" + conda "bioconda::fastqc=0.12.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/fastqc:0.11.9--0' : - 'biocontainers/fastqc:0.11.9--0' }" + 'https://depot.galaxyproject.org/singularity/fastqc:0.12.1--hdfd78af_0' : + 'biocontainers/fastqc:0.12.1--hdfd78af_0' }" input: tuple val(meta), path(reads) diff --git a/modules/local/igblast/igblast_assigngenes.nf b/modules/local/igblast/igblast_assigngenes.nf deleted file mode 100644 index b4ea2c35..00000000 --- a/modules/local/igblast/igblast_assigngenes.nf +++ /dev/null @@ -1,65 +0,0 @@ -process IGBLAST_ASSIGNGENES { - tag "$meta.id" - label 'process_low' - label 'immcantation' - - conda "bioconda::changeo=1.3.0 bioconda::igblast=1.19.0 conda-forge::wget=1.20.1" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-7d8e418eb73acc6a80daea8e111c94cf19a4ecfd:00534555924705cdf2f7ac48b4b8b4083527ca58-1' : - 'biocontainers/mulled-v2-7d8e418eb73acc6a80daea8e111c94cf19a4ecfd:00534555924705cdf2f7ac48b4b8b4083527ca58-1' }" - - input: - tuple val(meta), path(reads) // reads in fasta format - path(igblast) // igblast fasta - - output: - tuple val(meta), path("*db-pass.tsv"), emit: tab - path "versions.yml" , emit: versions - path("*_command_log.txt"), emit: logs //process logs - path("*_makedb_command_log.txt"), emit: makedb_log - - script: - def args = task.ext.args ?: '' - """ - igblastn \ - -germline_db_V igblast_base/database/imgt_${meta.species}_${meta.locus.toLowerCase()}_v \ - -germline_db_D igblast_base/database/imgt_${meta.species}_${meta.locus.toLowerCase()}_d \ - -germline_db_J igblast_base/database/imgt_${meta.species}_${meta.locus.toLowerCase()}_j \ - -auxiliary_data igblast_base/optional_file/${meta.species}_gl.aux \ - -organism ${meta.species} \ - $args \ - -query $reads \ - -out ${meta.id}_db-pass.tsv - - echo "START> AssignGenes" > ${meta.id}_changeo_assigngenes_command_log.txt - echo "COMMAND> igblast" >> ${meta.id}_changeo_assigngenes_command_log.txt - echo "VERSION> \$( igblastn -version | grep -o "igblast[0-9\\. ]\\+" | grep -o "[0-9\\. ]\\+" )" >> ${meta.id}_changeo_assigngenes_command_log.txt - echo "FILE> ${reads}" >> ${meta.id}_changeo_assigngenes_command_log.txt - echo "ORGANISM> ${meta.species}" >> ${meta.id}_changeo_assigngenes_command_log.txt - echo "LOCI> ${meta.locus.toLowerCase()}" >> ${meta.id}_changeo_assigngenes_command_log.txt - echo "NPROC> ${task.cpus}\n" >> ${meta.id}_changeo_assigngenes_command_log.txt - echo "PROGRESS> ...Done \n" >> ${meta.id}_changeo_assigngenes_command_log.txt - echo "PASS> \$(tail -n +2 ${meta.id}_db-pass.tsv | wc -l )" >> ${meta.id}_changeo_assigngenes_command_log.txt - echo "OUTPUT> ${meta.id}_igblast.fmt7" >> ${meta.id}_changeo_assigngenes_command_log.txt - echo "END> AssignGenes" >> ${meta.id}_changeo_assigngenes_command_log.txt - - echo "START> MakeDB" > ${meta.id}_makedb_command_log.txt - echo "COMMAND> igblast" >> ${meta.id}_makedb_command_log.txt - echo "ALIGNER_FILE> ${meta.id}_igblast.fmt7" >> ${meta.id}_makedb_command_log.txt - echo "SEQ_FILE> ${reads}" >> ${meta.id}_makedb_command_log.txt - echo "ASIS_ID> False" >> ${meta.id}_makedb_command_log.txt - echo "ASIS_CALLS> False" >> ${meta.id}_makedb_command_log.txt - echo "VALIDATE> strict" >> ${meta.id}_makedb_command_log.txt - echo "EXTENDED> True" >> ${meta.id}_makedb_command_log.txt - echo "INFER_JUNCTION> False\n" >> ${meta.id}_makedb_command_log.txt - echo "PROGRESS> ...\n" >> ${meta.id}_makedb_command_log.txt - echo "PROGRESS> ... Done\n" >> ${meta.id}_makedb_command_log.txt - echo "OUTPUT> ${meta.id}_db-pass.tsv" >> ${meta.id}_makedb_command_log.txt - echo "PASS> \$(tail -n +2 ${meta.id}_db-pass.tsv | wc -l )" >> ${meta.id}_makedb_command_log.txt - echo "FAIL> 0" >> ${meta.id}_makedb_command_log.txt - echo "END> MakeDB" >> ${meta.id}_makedb_command_log.txt - - echo "\"${task.process}\":" > versions.yml - echo " igblastn: \$( igblastn -version | grep -o "igblast[0-9\\. ]\\+" | grep -o "[0-9\\. ]\\+" )" >> versions.yml - """ -} diff --git a/modules/local/presto/presto_buildconsensus.nf b/modules/local/presto/presto_buildconsensus.nf index d5e92621..6f5d9b20 100644 --- a/modules/local/presto/presto_buildconsensus.nf +++ b/modules/local/presto/presto_buildconsensus.nf @@ -20,11 +20,14 @@ process PRESTO_BUILDCONSENSUS { path "versions.yml" , emit: versions script: - def barcode_field = params.cluster_sets ? "CLUSTER" : "BARCODE" + def barcode_field = params.cluster_sets ? 'CLUSTER' : 'BARCODE' + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def args3 = task.ext.args3 ?: '' """ - BuildConsensus.py -s $R1 --bf ${barcode_field} --nproc ${task.cpus} --pf PRIMER --prcons $params.primer_consensus --maxerror $params.buildconsensus_maxerror --maxgap $params.buildconsensus_maxgap --outname ${meta.id}_R1 --log ${meta.id}_R1.log > ${meta.id}_command_log.txt - BuildConsensus.py -s $R2 --bf ${barcode_field} --nproc ${task.cpus} --pf PRIMER --prcons $params.primer_consensus --maxerror $params.buildconsensus_maxerror --maxgap $params.buildconsensus_maxgap --outname ${meta.id}_R2 --log ${meta.id}_R2.log >> ${meta.id}_command_log.txt - ParseLog.py -l ${meta.id}_R1.log ${meta.id}_R2.log -f ID BARCODE SEQCOUNT PRIMER PRCOUNT PRCONS PRFREQ CONSCOUNT + BuildConsensus.py -s $R1 --bf ${barcode_field} --nproc ${task.cpus} --pf PRIMER --prcons ${params.primer_consensus} --maxerror ${params.buildconsensus_maxerror} --maxgap ${params.buildconsensus_maxgap} ${args} --outname ${meta.id}_R1 --log ${meta.id}_R1.log > ${meta.id}_command_log.txt + BuildConsensus.py -s $R2 --bf ${barcode_field} --nproc ${task.cpus} --pf PRIMER --prcons ${params.primer_consensus} --maxerror ${params.buildconsensus_maxerror} --maxgap ${params.buildconsensus_maxgap} ${args2} --outname ${meta.id}_R2 --log ${meta.id}_R2.log >> ${meta.id}_command_log.txt + ParseLog.py -l ${meta.id}_R1.log ${meta.id}_R2.log -f ${args3} cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/local/rename_file.nf b/modules/local/rename_file.nf new file mode 100644 index 00000000..6f99ef89 --- /dev/null +++ b/modules/local/rename_file.nf @@ -0,0 +1,21 @@ +// Import generic module functions +process RENAME_FILE { + tag "$meta.id" + label 'process_low' + + conda "conda-forge::python=3.8.0 conda-forge::biopython=1.74" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-adc9bb9edc31eb38b3c24786a83b7dfa530e2bea:47d6d7765d7537847ced7dac873190d164146022-0' : + 'biocontainers/mulled-v2-adc9bb9edc31eb38b3c24786a83b7dfa530e2bea:47d6d7765d7537847ced7dac873190d164146022-0' }" + + input: + tuple val(meta), path(file) + + output: + tuple val(meta), path("${meta.id}_${file.name}") , emit: file + + script: + """ + mv ${file} ${meta.id}_${file.name} + """ +} diff --git a/modules/local/reveal/add_meta_to_tab.nf b/modules/local/reveal/add_meta_to_tab.nf index 3e9da456..0423695b 100644 --- a/modules/local/reveal/add_meta_to_tab.nf +++ b/modules/local/reveal/add_meta_to_tab.nf @@ -3,10 +3,12 @@ process ADD_META_TO_TAB { label 'immcantation' label 'process_single' - conda "bioconda::r-enchantr=0.1.2" + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "nf-core/airrflow currently does not support Conda. Please use a container profile instead." + } container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/r-enchantr:0.1.2--r42hdfd78af_0': - 'biocontainers/r-enchantr:0.1.2--r42hdfd78af_0' }" + 'docker.io/immcantation/airrflow:3.2.0': + 'docker.io/immcantation/airrflow:3.2.0' }" cache 'deep' // Without 'deep' this process would run when using -resume diff --git a/modules/local/reveal/filter_junction_mod3.nf b/modules/local/reveal/filter_junction_mod3.nf index ff6021ff..75a06eac 100644 --- a/modules/local/reveal/filter_junction_mod3.nf +++ b/modules/local/reveal/filter_junction_mod3.nf @@ -3,10 +3,12 @@ process FILTER_JUNCTION_MOD3 { label 'immcantation' label 'process_single' - conda "bioconda::r-enchantr=0.1.2" + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "nf-core/airrflow currently does not support Conda. Please use a container profile instead." + } container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/r-enchantr:0.1.2--r42hdfd78af_0': - 'biocontainers/r-enchantr:0.1.2--r42hdfd78af_0' }" + 'docker.io/immcantation/airrflow:3.2.0': + 'docker.io/immcantation/airrflow:3.2.0' }" input: tuple val(meta), path(tab) // sequence tsv in AIRR format diff --git a/modules/local/reveal/filter_quality.nf b/modules/local/reveal/filter_quality.nf index 2c75fcfd..a20d029e 100644 --- a/modules/local/reveal/filter_quality.nf +++ b/modules/local/reveal/filter_quality.nf @@ -3,16 +3,18 @@ process FILTER_QUALITY { label 'immcantation' label 'process_single' - conda "bioconda::r-enchantr=0.1.2" + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "nf-core/airrflow currently does not support Conda. Please use a container profile instead." + } container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/r-enchantr:0.1.2--r42hdfd78af_0': - 'biocontainers/r-enchantr:0.1.2--r42hdfd78af_0' }" + 'docker.io/immcantation/airrflow:3.2.0': + 'docker.io/immcantation/airrflow:3.2.0' }" input: tuple val(meta), path(tab) // sequence tsv in AIRR format output: - tuple val(meta), path("*quality-pass.tsv"), emit: tab // sequence tsv in AIRR format + tuple val(meta), path("*quality-pass.tsv"), optional:true, emit: tab // sequence tsv in AIRR format path("*_command_log.txt"), emit: logs //process logs path "versions.yml", emit: versions diff --git a/modules/local/samplesheet_check.nf b/modules/local/samplesheet_check.nf index b9593c98..757851a7 100644 --- a/modules/local/samplesheet_check.nf +++ b/modules/local/samplesheet_check.nf @@ -18,8 +18,9 @@ process SAMPLESHEET_CHECK { task.ext.when == null || task.ext.when script: // This script is bundled with the pipeline, in nf-core/airrflow/bin/ + def args = task.ext.args ?: '' """ - check_samplesheet.py $samplesheet + check_samplesheet.py $samplesheet $args cp $samplesheet samplesheet.valid.tsv cat <<-END_VERSIONS > versions.yml diff --git a/modules/nf-core/fastqc/main.nf b/modules/nf-core/fastqc/main.nf index 05730368..67209f79 100644 --- a/modules/nf-core/fastqc/main.nf +++ b/modules/nf-core/fastqc/main.nf @@ -2,10 +2,10 @@ process FASTQC { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::fastqc=0.11.9" : null) + conda "bioconda::fastqc=0.12.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/fastqc:0.11.9--0' : - 'quay.io/biocontainers/fastqc:0.11.9--0' }" + 'https://depot.galaxyproject.org/singularity/fastqc:0.12.1--hdfd78af_0' : + 'biocontainers/fastqc:0.12.1--hdfd78af_0' }" input: tuple val(meta), path(reads) @@ -20,30 +20,26 @@ process FASTQC { script: def args = task.ext.args ?: '' - // Add soft-links to original FastQs for consistent naming in pipeline def prefix = task.ext.prefix ?: "${meta.id}" - if (meta.single_end) { - """ - [ ! -f ${prefix}.fastq.gz ] && ln -s $reads ${prefix}.fastq.gz - fastqc $args --threads $task.cpus ${prefix}.fastq.gz - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" ) - END_VERSIONS - """ - } else { - """ - [ ! -f ${prefix}_1.fastq.gz ] && ln -s ${reads[0]} ${prefix}_1.fastq.gz - [ ! -f ${prefix}_2.fastq.gz ] && ln -s ${reads[1]} ${prefix}_2.fastq.gz - fastqc $args --threads $task.cpus ${prefix}_1.fastq.gz ${prefix}_2.fastq.gz - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" ) - END_VERSIONS - """ - } + // Make list of old name and new name pairs to use for renaming in the bash while loop + def old_new_pairs = reads instanceof Path || reads.size() == 1 ? [[ reads, "${prefix}.${reads.extension}" ]] : reads.withIndex().collect { entry, index -> [ entry, "${prefix}_${index + 1}.${entry.extension}" ] } + def rename_to = old_new_pairs*.join(' ').join(' ') + def renamed_files = old_new_pairs.collect{ old_name, new_name -> new_name }.join(' ') + """ + printf "%s %s\\n" $rename_to | while read old_name new_name; do + [ -f "\${new_name}" ] || ln -s \$old_name \$new_name + done + + fastqc \\ + $args \\ + --threads $task.cpus \\ + $renamed_files + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" ) + END_VERSIONS + """ stub: def prefix = task.ext.prefix ?: "${meta.id}" diff --git a/modules/nf-core/fastqc/meta.yml b/modules/nf-core/fastqc/meta.yml index 4da5bb5a..ee5507e0 100644 --- a/modules/nf-core/fastqc/meta.yml +++ b/modules/nf-core/fastqc/meta.yml @@ -50,3 +50,8 @@ authors: - "@grst" - "@ewels" - "@FelixKrueger" +maintainers: + - "@drpatelh" + - "@grst" + - "@ewels" + - "@FelixKrueger" diff --git a/modules/nf-core/fastqc/tests/main.nf.test b/modules/nf-core/fastqc/tests/main.nf.test new file mode 100644 index 00000000..6437a144 --- /dev/null +++ b/modules/nf-core/fastqc/tests/main.nf.test @@ -0,0 +1,41 @@ +nextflow_process { + + name "Test Process FASTQC" + script "../main.nf" + process "FASTQC" + tag "modules" + tag "modules_nfcore" + tag "fastqc" + + test("Single-Read") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [ id: 'test', single_end:true ], + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + // NOTE The report contains the date inside it, which means that the md5sum is stable per day, but not longer than that. So you can't md5sum it. + // looks like this:
    Mon 2 Oct 2023
    test.gz
    + // https://github.com/nf-core/modules/pull/3903#issuecomment-1743620039 + { assert process.out.html.get(0).get(1) ==~ ".*/test_fastqc.html" }, + { assert path(process.out.html.get(0).get(1)).getText().contains("File typeConventional base calls") }, + { assert snapshot(process.out.versions).match("versions") }, + { assert process.out.zip.get(0).get(1) ==~ ".*/test_fastqc.zip" } + ) + } + } +} diff --git a/modules/nf-core/fastqc/tests/main.nf.test.snap b/modules/nf-core/fastqc/tests/main.nf.test.snap new file mode 100644 index 00000000..636a32ce --- /dev/null +++ b/modules/nf-core/fastqc/tests/main.nf.test.snap @@ -0,0 +1,10 @@ +{ + "versions": { + "content": [ + [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ] + ], + "timestamp": "2023-10-09T23:40:54+0000" + } +} \ No newline at end of file diff --git a/modules/nf-core/fastqc/tests/tags.yml b/modules/nf-core/fastqc/tests/tags.yml new file mode 100644 index 00000000..7834294b --- /dev/null +++ b/modules/nf-core/fastqc/tests/tags.yml @@ -0,0 +1,2 @@ +fastqc: + - modules/nf-core/fastqc/** diff --git a/nextflow.config b/nextflow.config index b25143ba..1b772655 100644 --- a/nextflow.config +++ b/nextflow.config @@ -60,8 +60,9 @@ params { // ----------------------- productive_only = true reassign = true - igblast_base = null - imgtdb_base = null + igblast_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/database-cache/igblast_base.zip' + imgtdb_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/database-cache/imgtdb_base.zip' + fetch_imgt = false save_databases = true // ----------------------- @@ -79,9 +80,10 @@ params { singlecell = 'single_cell' clonal_threshold = 'auto' skip_all_clones_report = false + skip_report_threshold = false // tree lineage options - igphyml="/usr/local/bin/igphyml" + igphyml="/usr/local/share/igphyml/src/igphyml" skip_lineage = false // ----------------------- @@ -111,7 +113,6 @@ params { // Boilerplate options outdir = null - tracedir = "${params.outdir}/pipeline_info" publish_dir_mode = 'copy' email = null email_on_fail = null @@ -120,17 +121,15 @@ params { hook_url = null help = false version = false - validate_params = true - show_hidden_params = false - schema_ignore_params = 'genomes' // Config options + config_profile_name = null + config_profile_description = null custom_config_version = 'master' custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" - config_profile_description = null config_profile_contact = null config_profile_url = null - config_profile_name = null + // Max resource options // Defaults only, expecting to be overwritten @@ -138,6 +137,13 @@ params { max_cpus = 16 max_time = '240.h' + // Schema validation default options + validationFailUnrecognisedParams = false + validationLenientMode = false + validationSchemaIgnoreParams = 'genomes,igenomes_base' + validationShowHiddenParams = false + validate_params = true + } // Load base.config by default for all pipelines @@ -157,14 +163,11 @@ try { // } catch (Exception e) { // System.err.println("WARNING: Could not load nf-core/config/airrflow profiles: ${params.custom_config_base}/pipeline/airrflow.config") // } -docker.registry = 'quay.io' -podman.registry = 'quay.io' - profiles { debug { dumpHashes = true process.beforeScript = 'echo $HOSTNAME' - cleanup = false + cleanup = false } conda { conda.enabled = true @@ -239,6 +242,7 @@ profiles { } apptainer { apptainer.enabled = true + apptainer.autoMounts = true conda.enabled = false docker.enabled = false singularity.enabled = false @@ -248,21 +252,35 @@ profiles { } gitpod { executor.name = 'local' - executor.cpus = 16 - executor.memory = 60.GB + executor.cpus = 4 + executor.memory = 8.GB } test { includeConfig 'conf/test.config' } test_full { includeConfig 'conf/test_full.config' } test_tcr { includeConfig 'conf/test_tcr.config' } test_no_umi { includeConfig 'conf/test_no_umi.config' } - test_assembled { includeConfig 'conf/test_assembled.config' } + test_assembled_hs { includeConfig 'conf/test_assembled_hs.config' } + test_assembled_mm { includeConfig 'conf/test_assembled_mm.config' } test_raw_immcantation_devel { includeConfig 'conf/test_raw_immcantation_devel.config' } - test_assembled_immcantation_devel { includeConfig 'conf/test_assembled_immcantation_devel.config' } + test_assembled_immcantation_devel_hs { includeConfig 'conf/test_assembled_immcantation_devel_hs.config' } + test_assembled_immcantation_devel_mm { includeConfig 'conf/test_assembled_immcantation_devel_mm.config' } test_nocluster { includeConfig 'conf/test_nocluster.config' } test_fetchimgt { includeConfig 'conf/test_fetchimgt.config' } test_igblast { includeConfig 'conf/test_igblast.config' } } +// Set default registry for Apptainer, Docker, Podman and Singularity independent of -profile +// Will not be used unless Apptainer / Docker / Podman / Singularity are enabled +// Set to your registry if you have a mirror of containers +apptainer.registry = 'quay.io' +docker.registry = 'quay.io' +podman.registry = 'quay.io' +singularity.registry = 'quay.io' + +// Nextflow plugins +plugins { + id 'nf-validation' // Validation of pipeline parameters and creation of an input channel from a sample sheet +} // Load igenomes.config if required if (!params.igenomes_ignore) { @@ -270,7 +288,6 @@ if (!params.igenomes_ignore) { } else { params.genomes = [:] } - // Export these variables to prevent local Python/R libraries from conflicting with those in the container // The JULIA depot path has been adjusted to a fixed path `/usr/local/share/julia` that needs to be used for packages in the container. // See https://apeltzer.github.io/post/03-julia-lang-nextflow/ for details on that. Once we have a common agreement on where to keep Julia packages, this is adjustable. @@ -279,6 +296,8 @@ env { PYTHONNOUSERSITE = 1 R_PROFILE_USER = "/.Rprofile" R_ENVIRON_USER = "/.Renviron" + R_LIBS_SITE = "NULL" + R_LIBS_USER = "NULL" JULIA_DEPOT_PATH = "/usr/local/share/julia" } @@ -288,19 +307,19 @@ process.shell = ['/bin/bash', '-euo', 'pipefail'] def trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') timeline { enabled = true - file = "${params.tracedir}/execution_timeline_${trace_timestamp}.html" + file = "${params.outdir}/pipeline_info/execution_timeline_${trace_timestamp}.html" } report { enabled = true - file = "${params.tracedir}/execution_report_${trace_timestamp}.html" + file = "${params.outdir}/pipeline_info/execution_report_${trace_timestamp}.html" } trace { enabled = true - file = "${params.tracedir}/execution_trace_${trace_timestamp}.txt" + file = "${params.outdir}/pipeline_info/execution_trace_${trace_timestamp}.txt" } dag { enabled = true - file = "${params.tracedir}/pipeline_dag_${trace_timestamp}.html" + file = "${params.outdir}/pipeline_info/pipeline_dag_${trace_timestamp}.html" } manifest { @@ -309,8 +328,8 @@ manifest { homePage = 'https://github.com/nf-core/airrflow' description = """B and T cell repertoire analysis pipeline with the Immcantation framework.""" mainScript = 'main.nf' - nextflowVersion = '!>=22.10.1' - version = '3.1.0' + nextflowVersion = '!>=23.04.0' + version = '3.2.0' doi = '10.5281/zenodo.2642009' } diff --git a/nextflow_schema.json b/nextflow_schema.json index 8b6194e9..7117deb4 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -14,15 +14,19 @@ "properties": { "input": { "type": "string", - "mimetype": "text/tsv", - "fa_icon": "fas fa-table", - "description": "Path to a tsv file providing paths to the fastq files for each sample and the necessary metadata for the analysis.", - "help_text": "The input file includes important sample metadata and the path to the R1 and R2 fastq files, and index read file (I), if available. Please check the usage docs on information on how to create the input samplesheet." + "format": "file-path", + "exists": true, + "mimetype": "text/csv", + "pattern": "^\\S+\\.tsv$", + "schema": "assets/schema_input.json", + "description": "Path to comma-separated file containing information about the samples in the experiment.", + "help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row. See [usage docs](https://nf-co.re/airrflow/usage#samplesheet-input).", + "fa_icon": "fas fa-file-csv" }, "mode": { "type": "string", "default": "fastq", - "description": "Specify the processing mode for the pipeline. Available options are \"fastq\" and \"assembled\".ptions are: 'raw'", + "description": "Specify the processing mode for the pipeline. Available options are \"fastq\" and \"assembled\".", "enum": ["fastq", "assembled"], "fa_icon": "fas fa-terminal" }, @@ -41,7 +45,7 @@ }, "miairr": { "type": "string", - "default": "bcellmagic/assets/reveal/mapping_MiAIRR_BioSample_v1.3.1.tsv", + "default": "airrflow/assets/reveal/mapping_MiAIRR_BioSample_v1.3.1.tsv", "description": "Path to MiAIRR-BioSample mapping", "fa_icon": "fas fa-table" } @@ -282,14 +286,19 @@ "imgtdb_base": { "type": "string", "description": "Path to the cached IMGT database.", - "help_text": "If it is not provided, the database will be newly downloaded.", + "help_text": "By default, we provide a pre-downloaded version of the IMGT database. It is also possible to provide a custom IMGT reference database. To fetch a fresh version of IMGT, set the `--fetch_imgt` parameter instead.", "fa_icon": "fas fa-database" }, "igblast_base": { "type": "string", "description": "Path to the cached igblast database.", - "help_text": "If it is not provided, the database will be newly downloaded.", + "help_text": "By default, we provide a pre-downloaded version of the IMGT database. It is also possible to provide a custom IMGT reference database. To fetch a fresh version of IMGT, set the `--fetch_imgt` parameter instead.", "fa_icon": "fas fa-database" + }, + "fetch_imgt": { + "type": "boolean", + "description": "Set this flag to fetch the IMGT reference data at runtime.", + "fa_icon": "fas fa-cloud-download-alt" } }, "fa_icon": "fas fa-edit" @@ -364,6 +373,11 @@ "type": "boolean", "description": "Skip report of EnchantR DefineClones for all samples together.", "fa_icon": "fas fa-angle-double-right" + }, + "skip_report_threshold": { + "type": "boolean", + "description": "Skip report of EnchantR FindThreshold for all samples together.", + "fa_icon": "fas fa-angle-double-right" } }, "help_text": "By default, the pipeline will define clones for each of the samples, as two sequences having the same V-gene assignment, C-gene assignment, J-gene assignment, and junction length. Additionally, the similarity of the CDR3 sequences will be assessed by Hamming distances. \n\nA distance threshold for determining if two sequences come from the same clone or not is automatically determined by the process find threshold. Alternatively, a hamming distance threshold can be manually set by setting the `--clonal_threshold` parameter.", @@ -514,7 +528,7 @@ "description": "Maximum amount of time that can be requested for any single job.", "default": "240.h", "fa_icon": "far fa-clock", - "pattern": "^(\\d+\\.?\\s*(s|m|h|day)\\s*)+$", + "pattern": "^(\\d+\\.?\\s*(s|m|h|d|day)\\s*)+$", "hidden": true, "help_text": "Use to set an upper-limit for the time requirement for each process. Should be a string in the format integer-unit e.g. `--max_time '2.h'`" } @@ -590,6 +604,7 @@ }, "multiqc_config": { "type": "string", + "format": "file-path", "description": "Custom config file to supply to MultiQC.", "fa_icon": "fas fa-cog", "hidden": true @@ -605,13 +620,6 @@ "description": "Custom MultiQC yaml file containing HTML including a methods description.", "fa_icon": "fas fa-cog" }, - "tracedir": { - "type": "string", - "description": "Directory to keep pipeline Nextflow logs and reports.", - "default": "${params.outdir}/pipeline_info", - "fa_icon": "fas fa-cogs", - "hidden": true - }, "validate_params": { "type": "boolean", "description": "Boolean whether to validate parameters against the schema at runtime", @@ -619,12 +627,26 @@ "fa_icon": "fas fa-check-square", "hidden": true }, - "show_hidden_params": { + "validationShowHiddenParams": { "type": "boolean", "fa_icon": "far fa-eye-slash", "description": "Show all params when using `--help`", "hidden": true, "help_text": "By default, parameters set as _hidden_ in the schema are not shown on the command line when a user runs with `--help`. Specifying this option will tell the pipeline to show all parameters." + }, + "validationFailUnrecognisedParams": { + "type": "boolean", + "fa_icon": "far fa-check-circle", + "description": "Validation of parameters fails when an unrecognised parameter is found.", + "hidden": true, + "help_text": "By default, when an unrecognised parameter is found, it returns a warinig." + }, + "validationLenientMode": { + "type": "boolean", + "fa_icon": "far fa-check-circle", + "description": "Validation of parameters in lenient more.", + "hidden": true, + "help_text": "Allows string values that are parseable as numbers or booleans. For further information see [JSONSchema docs](https://github.com/everit-org/json-schema#lenient-mode)." } } } diff --git a/subworkflows/local/assembled_input_check.nf b/subworkflows/local/assembled_input_check.nf index b37b359f..3fa4fc97 100644 --- a/subworkflows/local/assembled_input_check.nf +++ b/subworkflows/local/assembled_input_check.nf @@ -3,6 +3,9 @@ */ include { VALIDATE_INPUT } from '../../modules/local/enchantr/validate_input' +include { SAMPLESHEET_CHECK as SAMPLESHEET_CHECK_ASSEMBLED } from '../../modules/local/samplesheet_check' +include { RENAME_FILE as RENAME_FILE_FASTA } from '../../modules/local/rename_file' +include { RENAME_FILE as RENAME_FILE_TSV } from '../../modules/local/rename_file' workflow ASSEMBLED_INPUT_CHECK { take: @@ -12,8 +15,7 @@ workflow ASSEMBLED_INPUT_CHECK { cloneby main: - // TODO: validate input should check that sample_ids are unique - + SAMPLESHEET_CHECK_ASSEMBLED ( samplesheet ) VALIDATE_INPUT ( samplesheet, miairr, collapseby, cloneby ) //removed reassign ch_validated_input = VALIDATE_INPUT.out.validated_input ch_validated_input @@ -25,9 +27,12 @@ workflow ASSEMBLED_INPUT_CHECK { } .set{ ch_metadata } + ch_unique_fasta = RENAME_FILE_FASTA( ch_metadata.fasta ) + ch_unique_tsv = RENAME_FILE_TSV( ch_metadata.tsv ) + emit: - ch_fasta = ch_metadata.fasta - ch_tsv = ch_metadata.tsv + ch_fasta = ch_unique_fasta + ch_tsv = ch_unique_tsv validated_input = ch_validated_input versions = VALIDATE_INPUT.out.versions } diff --git a/subworkflows/local/clonal_analysis.nf b/subworkflows/local/clonal_analysis.nf index 91921756..437aa5a9 100644 --- a/subworkflows/local/clonal_analysis.nf +++ b/subworkflows/local/clonal_analysis.nf @@ -19,10 +19,16 @@ workflow CLONAL_ANALYSIS { ch_find_threshold = ch_repertoire.map{ it -> it[1] } .collect() + ch_find_threshold_samplesheet = ch_find_threshold + .flatten() + .map{ it -> it.getName().toString() } + .dump(tag: 'ch_find_threshold_samplesheet') + .collectFile(name: 'find_threshold_samplesheet.txt', newLine: true) FIND_CLONAL_THRESHOLD ( ch_find_threshold, - ch_logo + ch_logo, + ch_find_threshold_samplesheet ) ch_threshold = FIND_CLONAL_THRESHOLD.out.mean_threshold ch_versions = ch_versions.mix(FIND_CLONAL_THRESHOLD.out.versions) @@ -32,20 +38,27 @@ workflow CLONAL_ANALYSIS { .dump(tag: 'clone_threshold') .filter { it != 'NA'} .filter { it != 'NaN' } - .ifEmpty { error "Automatic clone_threshold is 'NA'. Consider setting params.threshold manually."} + .ifEmpty { error "Automatic clone_threshold is 'NA'. Consider setting --clonal_threshold manually."} } else { clone_threshold = params.clonal_threshold ch_find_threshold = ch_repertoire.map{ it -> it[1] } .collect() - - REPORT_THRESHOLD ( - ch_find_threshold, - ch_logo - ) - ch_versions = ch_versions.mix(REPORT_THRESHOLD.out.versions) - + ch_find_threshold_samplesheet = ch_find_threshold + .flatten() + .map{ it -> it.getName().toString() } + .dump(tag: 'ch_find_threshold_samplesheet') + .collectFile(name: 'find_threshold_samplesheet.txt', newLine: true) + + if (!params.skip_report_threshold){ + REPORT_THRESHOLD ( + ch_find_threshold, + ch_logo, + ch_find_threshold_samplesheet + ) + ch_versions = ch_versions.mix(REPORT_THRESHOLD.out.versions) + } } // prepare ch for define clones @@ -63,10 +76,13 @@ workflow CLONAL_ANALYSIS { DEFINE_CLONES_COMPUTE( ch_define_clones, clone_threshold.collect(), - ch_imgt.collect() + ch_imgt.collect(), + [] ) + ch_versions = ch_versions.mix(DEFINE_CLONES_COMPUTE.out.versions) - ch_logs = ch_logs.mix(DEFINE_CLONES_COMPUTE.out.logs) + // TODO: add clonal analysis logs to report file size + //ch_logs = ch_logs.mix(DEFINE_CLONES_COMPUTE.out.logs) // prepare ch for define clones all samples report DEFINE_CLONES_COMPUTE.out.tab @@ -76,10 +92,18 @@ workflow CLONAL_ANALYSIS { if (!params.skip_all_clones_report){ + ch_all_repertoires_cloned_samplesheet = ch_all_repertoires_cloned.map{ it -> it[1] } + .collect() + .flatten() + .map{ it -> it.getName().toString() } + .dump(tag: 'ch_all_repertoires_cloned_samplesheet') + .collectFile(name: 'all_repertoires_cloned_samplesheet.txt', newLine: true) + DEFINE_CLONES_REPORT( ch_all_repertoires_cloned, clone_threshold.collect(), - ch_imgt.collect() + ch_imgt.collect(), + ch_all_repertoires_cloned_samplesheet ) } diff --git a/subworkflows/local/repertoire_analysis_reporting.nf b/subworkflows/local/repertoire_analysis_reporting.nf index bab6b21d..1bd61a08 100644 --- a/subworkflows/local/repertoire_analysis_reporting.nf +++ b/subworkflows/local/repertoire_analysis_reporting.nf @@ -55,10 +55,16 @@ workflow REPERTOIRE_ANALYSIS_REPORTING { ch_reassign_logs, ch_sc_qc_and_filter_logs, ch_clonal_analysis_logs) + ch_logs_tabs = ch_logs.collect() + .flatten() + .map{ it -> it.getName().toString() } + .dump(tag: 'ch_logs_tabs') + .collectFile(name: 'all_logs_tabs.txt', newLine: true) REPORT_FILE_SIZE( ch_logs.collect().ifEmpty([]), - ch_metadata + ch_metadata, + ch_logs_tabs ) ch_versions = ch_versions.mix(REPORT_FILE_SIZE.out.versions) diff --git a/subworkflows/local/vdj_annotation.nf b/subworkflows/local/vdj_annotation.nf index 31ed0fd6..d30375c6 100644 --- a/subworkflows/local/vdj_annotation.nf +++ b/subworkflows/local/vdj_annotation.nf @@ -4,7 +4,6 @@ include { UNZIP_DB as UNZIP_IMGT } from '../../modules/local/unzip_db' include { CHANGEO_ASSIGNGENES } from '../../modules/local/changeo/changeo_assigngenes' include { CHANGEO_MAKEDB } from '../../modules/local/changeo/changeo_makedb' include { CHANGEO_PARSEDB_SPLIT } from '../../modules/local/changeo/changeo_parsedb_split' -include { IGBLAST_ASSIGNGENES } from '../../modules/local/igblast/igblast_assigngenes' // reveal include { FILTER_QUALITY } from '../../modules/local/reveal/filter_quality' include { FILTER_JUNCTION_MOD3 } from '../../modules/local/reveal/filter_junction_mod3' @@ -25,7 +24,7 @@ workflow VDJ_ANNOTATION { // TODO: this can take a long time, and the progress shows 0%. Would be // nice to have some better progress reporting. // And maybe run this as 2 separate steps, one for IMGT and one for IgBLAST? - if( params.igblast_base ){ + if( !params.fetch_imgt ){ if (params.igblast_base.endsWith(".zip")) { Channel.fromPath("${params.igblast_base}") .ifEmpty{ error "IGBLAST DB not found: ${params.igblast_base}" } @@ -40,7 +39,7 @@ workflow VDJ_ANNOTATION { } } - if( params.imgtdb_base ){ + if( !params.fetch_imgt ){ if (params.imgtdb_base.endsWith(".zip")) { Channel.fromPath("${params.imgtdb_base}") .ifEmpty{ error "IMGTDB not found: ${params.imgtdb_base}" } @@ -50,12 +49,12 @@ workflow VDJ_ANNOTATION { ch_versions = ch_versions.mix(UNZIP_IMGT.out.versions.ifEmpty(null)) } else { Channel.fromPath("${params.imgtdb_base}") - .ifEmpty { error "IMGTDB not found: ${params.imgtdb_base}" } + .ifEmpty { error "IMGT DB not found: ${params.imgtdb_base}" } .set { ch_imgt } } } - if (!params.igblast_base | !params.imgtdb_base) { + if (params.fetch_imgt) { FETCH_DATABASES() ch_igblast = FETCH_DATABASES.out.igblast ch_imgt = FETCH_DATABASES.out.imgt diff --git a/workflows/airrflow.nf b/workflows/airrflow.nf index 74aab699..abfbe497 100644 --- a/workflows/airrflow.nf +++ b/workflows/airrflow.nf @@ -1,12 +1,18 @@ /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - VALIDATE INPUTS + PRINT PARAMS SUMMARY ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -def summary_params = NfcoreSchema.paramsSummaryMap(workflow, params) +include { paramsSummaryLog; paramsSummaryMap } from 'plugin/nf-validation' + +def logo = NfcoreTemplate.logo(workflow, params.monochrome_logs) +def citation = '\n' + WorkflowMain.citation(workflow) + '\n' +def summary_params = paramsSummaryMap(workflow) + +// Print parameter summary log to screen +log.info logo + paramsSummaryLog(workflow) + citation -// Validate input parameters WorkflowAirrflow.initialise(params, log) // Check input path parameters to see if they exist @@ -28,6 +34,8 @@ if (params.input) { ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true) ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath( params.multiqc_config, checkIfExists: true ) : Channel.empty() +ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath( params.multiqc_logo, checkIfExists: true ) : Channel.empty() +ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) // Report files ch_report_rmd = Channel.fromPath(params.report_rmd, checkIfExists: true) @@ -229,9 +237,12 @@ workflow AIRRFLOW { workflow_summary = WorkflowAirrflow.paramsSummaryMultiqc(workflow, summary_params) ch_workflow_summary = Channel.value(workflow_summary) + methods_description = WorkflowAirrflow.methodsDescriptionText(workflow, ch_multiqc_custom_methods_description, params) + ch_methods_description = Channel.value(methods_description) + ch_multiqc_files = Channel.empty() - ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml') ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) + ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml')) ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect()) ch_multiqc_files = ch_multiqc_files.mix(ch_fastp_html.ifEmpty([])) ch_multiqc_files = ch_multiqc_files.mix(ch_fastp_json.ifEmpty([])) @@ -258,6 +269,7 @@ workflow.onComplete { if (params.email || params.email_on_fail) { NfcoreTemplate.email(workflow, params, summary_params, projectDir, log, multiqc_report) } + NfcoreTemplate.dump_parameters(workflow, params) NfcoreTemplate.summary(workflow, params, log) if (params.hook_url) {