diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 4a9bc5c7..b290e090 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -10,19 +10,11 @@ "vscode": { // Set *default* container specific settings.json values on container create. "settings": { - "python.defaultInterpreterPath": "/opt/conda/bin/python", - "python.linting.enabled": true, - "python.linting.pylintEnabled": true, - "python.formatting.autopep8Path": "/opt/conda/bin/autopep8", - "python.formatting.yapfPath": "/opt/conda/bin/yapf", - "python.linting.flake8Path": "/opt/conda/bin/flake8", - "python.linting.pycodestylePath": "/opt/conda/bin/pycodestyle", - "python.linting.pydocstylePath": "/opt/conda/bin/pydocstyle", - "python.linting.pylintPath": "/opt/conda/bin/pylint", + "python.defaultInterpreterPath": "/opt/conda/bin/python" }, // Add the IDs of extensions you want installed when the container is created. - "extensions": ["ms-python.python", "ms-python.vscode-pylance", "nf-core.nf-core-extensionpack"], - }, - }, + "extensions": ["ms-python.python", "ms-python.vscode-pylance", "nf-core.nf-core-extensionpack"] + } + } } diff --git a/.editorconfig b/.editorconfig index b6b31907..dd9ffa53 100644 --- a/.editorconfig +++ b/.editorconfig @@ -18,7 +18,20 @@ end_of_line = unset insert_final_newline = unset trim_trailing_whitespace = unset indent_style = unset -indent_size = unset +[/subworkflows/nf-core/**] +charset = unset +end_of_line = unset +insert_final_newline = unset +trim_trailing_whitespace = unset +indent_style = unset [/assets/email*] indent_size = unset + +# ignore Readme +[README.md] +indent_style = unset + +# ignore python +[*.{py,md}] +indent_style = unset diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index 597c854d..d2dc65c3 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -9,9 +9,8 @@ Please use the pre-filled template to save time. However, don't be put off by this template - other more general issues and suggestions are welcome! Contributions to the code are even more welcome ;) -:::info -If you need help using or modifying nf-core/scrnaseq then the best place to ask is on the nf-core Slack [#scrnaseq](https://nfcore.slack.com/channels/scrnaseq) channel ([join our Slack here](https://nf-co.re/join/slack)). -::: +> [!NOTE] +> If you need help using or modifying nf-core/scrnaseq then the best place to ask is on the nf-core Slack [#scrnaseq](https://nfcore.slack.com/channels/scrnaseq) channel ([join our Slack here](https://nf-co.re/join/slack)). ## Contribution workflow @@ -27,8 +26,11 @@ If you're not used to this workflow with git, you can start with some [docs from ## Tests -You can optionally test your changes by running the pipeline locally. Then it is recommended to use the `debug` profile to -receive warnings about process selectors and other debug info. Example: `nextflow run . -profile debug,test,docker --outdir `. +You have the option to test your changes locally by running the pipeline. For receiving warnings about process selectors and other `debug` information, it is recommended to use the debug profile. Execute all the tests with the following command: + +```bash +nf-test test --profile debug,test,docker --verbose +``` When you create a pull request with changes, [GitHub Actions](https://github.com/features/actions) will run automatic tests. Typically, pull-requests are only fully reviewed when these tests are passing, though of course we can help out before then. @@ -90,7 +92,7 @@ Once there, use `nf-core schema build` to add to `nextflow_schema.json`. Sensible defaults for process resource requirements (CPUs / memory / time) for a process should be defined in `conf/base.config`. These should generally be specified generic with `withLabel:` selectors so they can be shared across multiple processes/steps of the pipeline. A nf-core standard set of labels that should be followed where possible can be seen in the [nf-core pipeline template](https://github.com/nf-core/tools/blob/master/nf_core/pipeline-template/conf/base.config), which has the default process as a single core-process, and then different levels of multi-core configurations for increasingly large memory requirements defined with standardised labels. -The process resources can be passed on to the tool dynamically within the process with the `${task.cpu}` and `${task.memory}` variables in the `script:` block. +The process resources can be passed on to the tool dynamically within the process with the `${task.cpus}` and `${task.memory}` variables in the `script:` block. ### Naming schemes diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 073b2953..0767493c 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -18,7 +18,7 @@ Learn more about contributing: [CONTRIBUTING.md](https://github.com/nf-core/scrn - [ ] If you've added a new tool - have you followed the pipeline conventions in the [contribution docs](https://github.com/nf-core/scrnaseq/tree/master/.github/CONTRIBUTING.md) - [ ] If necessary, also make a PR on the nf-core/scrnaseq _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository. - [ ] Make sure your code lints (`nf-core lint`). -- [ ] Ensure the test suite passes (`nextflow run . -profile test,docker --outdir `). +- [ ] Ensure the test suite passes (`nf-test test main.nf.test -profile test,docker`). - [ ] Check for unexpected warnings in debug mode (`nextflow run . -profile debug,test,docker --outdir `). - [ ] Usage Documentation in `docs/usage.md` is updated. - [ ] Output Documentation in `docs/output.md` is updated. diff --git a/.github/workflows/awsfulltest.yml b/.github/workflows/awsfulltest.yml index ff6537e3..4f6e999f 100644 --- a/.github/workflows/awsfulltest.yml +++ b/.github/workflows/awsfulltest.yml @@ -31,7 +31,7 @@ jobs: } profiles: test_full - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 with: name: Tower debug log file path: | diff --git a/.github/workflows/awstest.yml b/.github/workflows/awstest.yml index 3807805b..ab0d7d3c 100644 --- a/.github/workflows/awstest.yml +++ b/.github/workflows/awstest.yml @@ -28,7 +28,8 @@ jobs: "aligner": "${{ matrix.aligner }}" } profiles: test - - uses: actions/upload-artifact@v3 + + - uses: actions/upload-artifact@v4 with: name: Tower debug log file path: | diff --git a/.github/workflows/branch.yml b/.github/workflows/branch.yml index ae5b5d4b..b7f206b8 100644 --- a/.github/workflows/branch.yml +++ b/.github/workflows/branch.yml @@ -19,7 +19,7 @@ jobs: # NOTE - this doesn't currently work if the PR is coming from a fork, due to limitations in GitHub actions secrets - name: Post PR comment if: failure() - uses: mshick/add-pr-comment@v1 + uses: mshick/add-pr-comment@b8f338c590a895d50bcbfa6c5859251edc8952fc # v2 with: message: | ## This PR is against the `master` branch :x: diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index e8907caf..b5524e7d 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1,5 +1,5 @@ -name: nf-core CI # This workflow runs the pipeline with the minimal test dataset to check that it completes without any syntax errors +name: nf-core CI on: push: branches: @@ -7,50 +7,73 @@ on: pull_request: release: types: [published] + merge_group: + types: + - checks_requested + branches: + - master + - dev env: NXF_ANSI_LOG: false + NFTEST_VER: "0.8.1" concurrency: - group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + group: "${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}" cancel-in-progress: true jobs: test: - name: Run pipeline with test data - # Only run on push if this is the nf-core dev branch (merged PRs) + name: "aligner: ${{ matrix.profile }} ; NF: ${{ matrix.NXF_VER }}" if: "${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/scrnaseq') }}" runs-on: ubuntu-latest strategy: + fail-fast: false matrix: NXF_VER: - "23.04.0" - "latest-everything" - profile: [ - "test,docker --aligner alevin", - "test,docker --aligner kallisto", - "test,docker --aligner star", - "test,docker --aligner cellranger", - # "test,docker --aligner cellrangerarc", // this currently lacks a suitable test profile, see issue https://github.com/nf-core/scrnaseq/issues/290 - # "test,docker --aligner universc", // this is broken, see issue https://github.com/nf-core/scrnaseq/issues/289 - ] + profile: ["alevin", "cellranger", "kallisto", "star"] + steps: - - name: Free some space - run: | - sudo rm -rf /usr/share/dotnet - sudo rm -rf /opt/ghc - sudo rm -rf "/usr/local/share/boost" - sudo rm -rf "$AGENT_TOOLSDIRECTORY" + - name: Disk space cleanup + uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 + - name: Check out pipeline code - uses: actions/checkout@v4 + uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4 - name: Install Nextflow uses: nf-core/setup-nextflow@v1 with: version: "${{ matrix.NXF_VER }}" - - name: Run pipeline with test data - # For example: adding multiple test runs with different parameters - # Remember that you can parallelise this by using strategy.matrix + - name: Cache nf-test installation + id: cache-software + uses: actions/cache@v3 + with: + path: | + /usr/local/bin/nf-test + /home/runner/.nf-test/nf-test.jar + key: ${{ runner.os }}-${{ env.NFTEST_VER }}-nftest + + - name: Install nf-test + if: steps.cache-software.outputs.cache-hit != 'true' + run: | + wget -qO- https://code.askimed.com/install/nf-test | bash + sudo mv nf-test /usr/local/bin/ + + - name: Run nf-test run: | - nextflow run ${GITHUB_WORKSPACE} -profile ${{ matrix.profile }} --outdir ./results + nf-test test tests/main_pipeline_${{ matrix.profile }}.test --junitxml=test.xml + + - name: Output log on failure + if: failure() + run: | + sudo apt install bat > /dev/null + batcat --decorations=always --color=always ${{ github.workspace }}/.nf-test/tests/*/meta/nextflow.log + + - name: Publish Test Report + uses: mikepenz/action-junit-report@v3 + if: always() # always run even if the previous step fails + with: + report_paths: test.xml diff --git a/.github/workflows/clean-up.yml b/.github/workflows/clean-up.yml index 694e90ec..0b6b1f27 100644 --- a/.github/workflows/clean-up.yml +++ b/.github/workflows/clean-up.yml @@ -10,7 +10,7 @@ jobs: issues: write pull-requests: write steps: - - uses: actions/stale@v7 + - uses: actions/stale@28ca1036281a5e5922ead5184a1bbf96e5fc984e # v9 with: stale-issue-message: "This issue has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor. Remove stale label or add a comment otherwise this issue will be closed in 20 days." stale-pr-message: "This PR has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor. Remove stale label or add a comment if it is still useful." diff --git a/.github/workflows/download_pipeline.yml b/.github/workflows/download_pipeline.yml new file mode 100644 index 00000000..e810c84b --- /dev/null +++ b/.github/workflows/download_pipeline.yml @@ -0,0 +1,75 @@ +name: Test successful pipeline download with 'nf-core download' + +# Run the workflow when: +# - dispatched manually +# - when a PR is opened or reopened to master branch +# - the head branch of the pull request is updated, i.e. if fixes for a release are pushed last minute to dev. +on: + workflow_dispatch: + inputs: + testbranch: + description: "The specific branch you wish to utilize for the test execution of nf-core download." + required: true + default: "dev" + pull_request: + types: + - opened + branches: + - master + pull_request_target: + branches: + - master + +env: + NXF_ANSI_LOG: false + +jobs: + download: + runs-on: ubuntu-latest + steps: + - name: Disk space cleanup + uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 + + - name: Install Nextflow + uses: nf-core/setup-nextflow@v1 + + - uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5 + with: + python-version: "3.11" + architecture: "x64" + - uses: eWaterCycle/setup-singularity@931d4e31109e875b13309ae1d07c70ca8fbc8537 # v7 + with: + singularity-version: 3.8.3 + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install git+https://github.com/nf-core/tools.git@dev + + - name: Get the repository name and current branch set as environment variable + run: | + echo "REPO_LOWERCASE=${GITHUB_REPOSITORY,,}" >> ${GITHUB_ENV} + echo "REPOTITLE_LOWERCASE=$(basename ${GITHUB_REPOSITORY,,})" >> ${GITHUB_ENV} + echo "REPO_BRANCH=${{ github.event.inputs.testbranch || 'dev' }}" >> ${GITHUB_ENV} + + - name: Download the pipeline + env: + NXF_SINGULARITY_CACHEDIR: ./ + run: | + nf-core download ${{ env.REPO_LOWERCASE }} \ + --revision ${{ env.REPO_BRANCH }} \ + --outdir ./${{ env.REPOTITLE_LOWERCASE }} \ + --compress "none" \ + --container-system 'singularity' \ + --container-library "quay.io" -l "docker.io" -l "ghcr.io" \ + --container-cache-utilisation 'amend' \ + --download-configuration + + - name: Inspect download + run: tree ./${{ env.REPOTITLE_LOWERCASE }} + + - name: Run the downloaded pipeline + env: + NXF_SINGULARITY_CACHEDIR: ./ + NXF_SINGULARITY_HOME_MOUNT: true + run: nextflow run ./${{ env.REPOTITLE_LOWERCASE }}/$( sed 's/\W/_/g' <<< ${{ env.REPO_BRANCH }}) -stub -profile test,singularity --outdir ./results diff --git a/.github/workflows/fix-linting.yml b/.github/workflows/fix-linting.yml index 05b700e4..76f5e5bd 100644 --- a/.github/workflows/fix-linting.yml +++ b/.github/workflows/fix-linting.yml @@ -4,7 +4,7 @@ on: types: [created] jobs: - deploy: + fix-linting: # Only run if comment is on a PR with the main repo, and if it contains the magic keywords if: > contains(github.event.comment.html_url, '/pull/') && @@ -13,10 +13,17 @@ jobs: runs-on: ubuntu-latest steps: # Use the @nf-core-bot token to check out so we can push later - - uses: actions/checkout@v4 + - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4 with: token: ${{ secrets.nf_core_bot_auth_token }} + # indication that the linting is being fixed + - name: React on comment + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + comment-id: ${{ github.event.comment.id }} + reactions: eyes + # Action runs on the issue comment, so we don't get the PR by default # Use the gh cli to check out the PR - name: Checkout Pull Request @@ -24,32 +31,59 @@ jobs: env: GITHUB_TOKEN: ${{ secrets.nf_core_bot_auth_token }} - - uses: actions/setup-node@v4 + # Install and run pre-commit + - uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5 + with: + python-version: 3.11 - - name: Install Prettier - run: npm install -g prettier @prettier/plugin-php + - name: Install pre-commit + run: pip install pre-commit - # Check that we actually need to fix something - - name: Run 'prettier --check' - id: prettier_status - run: | - if prettier --check ${GITHUB_WORKSPACE}; then - echo "result=pass" >> $GITHUB_OUTPUT - else - echo "result=fail" >> $GITHUB_OUTPUT - fi + - name: Run pre-commit + id: pre-commit + run: pre-commit run --all-files + continue-on-error: true - - name: Run 'prettier --write' - if: steps.prettier_status.outputs.result == 'fail' - run: prettier --write ${GITHUB_WORKSPACE} + # indication that the linting has finished + - name: react if linting finished succesfully + if: steps.pre-commit.outcome == 'success' + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + comment-id: ${{ github.event.comment.id }} + reactions: "+1" - name: Commit & push changes - if: steps.prettier_status.outputs.result == 'fail' + id: commit-and-push + if: steps.pre-commit.outcome == 'failure' run: | git config user.email "core@nf-co.re" git config user.name "nf-core-bot" git config push.default upstream git add . git status - git commit -m "[automated] Fix linting with Prettier" + git commit -m "[automated] Fix code linting" git push + + - name: react if linting errors were fixed + id: react-if-fixed + if: steps.commit-and-push.outcome == 'success' + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + comment-id: ${{ github.event.comment.id }} + reactions: hooray + + - name: react if linting errors were not fixed + if: steps.commit-and-push.outcome == 'failure' + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + comment-id: ${{ github.event.comment.id }} + reactions: confused + + - name: react if linting errors were not fixed + if: steps.commit-and-push.outcome == 'failure' + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + issue-number: ${{ github.event.issue.number }} + body: | + @${{ github.actor }} I tried to fix the linting errors, but it didn't work. Please fix them manually. + See [CI log](https://github.com/nf-core/scrnaseq/actions/runs/${{ github.run_id }}) for more details. diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index 905c58e4..073e1876 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -11,72 +11,33 @@ on: types: [published] jobs: - EditorConfig: + pre-commit: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4 - - uses: actions/setup-node@v4 - - - name: Install editorconfig-checker - run: npm install -g editorconfig-checker - - - name: Run ECLint check - run: editorconfig-checker -exclude README.md $(find .* -type f | grep -v '.git\|.py\|.md\|json\|yml\|yaml\|html\|css\|work\|.nextflow\|build\|nf_core.egg-info\|log.txt\|Makefile') - - Prettier: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - - uses: actions/setup-node@v4 - - - name: Install Prettier - run: npm install -g prettier - - - name: Run Prettier --check - run: prettier --check ${GITHUB_WORKSPACE} - - PythonBlack: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - - name: Check code lints with Black - uses: psf/black@stable - - # If the above check failed, post a comment on the PR explaining the failure - - name: Post PR comment - if: failure() - uses: mshick/add-pr-comment@v1 + - name: Set up Python 3.11 + uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5 with: - message: | - ## Python linting (`black`) is failing - - To keep the code consistent with lots of contributors, we run automated code consistency checks. - To fix this CI test, please run: - - * Install [`black`](https://black.readthedocs.io/en/stable/): `pip install black` - * Fix formatting errors in your pipeline: `black .` - - Once you push these changes the test should pass, and you can hide this comment :+1: + python-version: 3.11 + cache: "pip" - We highly recommend setting up Black in your code editor so that this formatting is done automatically on save. Ask about it on Slack for help! + - name: Install pre-commit + run: pip install pre-commit - Thanks again for your contribution! - repo-token: ${{ secrets.GITHUB_TOKEN }} - allow-repeats: false + - name: Run pre-commit + run: pre-commit run --all-files nf-core: runs-on: ubuntu-latest steps: - name: Check out pipeline code - uses: actions/checkout@v4 + uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4 - name: Install Nextflow uses: nf-core/setup-nextflow@v1 - - uses: actions/setup-python@v4 + - uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5 with: python-version: "3.11" architecture: "x64" @@ -99,7 +60,7 @@ jobs: - name: Upload linting log file artifact if: ${{ always() }} - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3 # v4 with: name: linting-logs path: | diff --git a/.github/workflows/linting_comment.yml b/.github/workflows/linting_comment.yml index 0bbcd30f..b706875f 100644 --- a/.github/workflows/linting_comment.yml +++ b/.github/workflows/linting_comment.yml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Download lint results - uses: dawidd6/action-download-artifact@v2 + uses: dawidd6/action-download-artifact@f6b0bace624032e30a85a8fd9c1a7f8f611f5737 # v3 with: workflow: linting.yml workflow_conclusion: completed @@ -21,7 +21,7 @@ jobs: run: echo "pr_number=$(cat linting-logs/PR_number.txt)" >> $GITHUB_OUTPUT - name: Post PR comment - uses: marocchino/sticky-pull-request-comment@v2 + uses: marocchino/sticky-pull-request-comment@331f8f5b4215f0445d3c07b4967662a32a2d3e31 # v2 with: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} number: ${{ steps.pr_number.outputs.pr_number }} diff --git a/.github/workflows/release-announcements.yml b/.github/workflows/release-announcements.yml index 6ad33927..d468aeaa 100644 --- a/.github/workflows/release-announcements.yml +++ b/.github/workflows/release-announcements.yml @@ -9,6 +9,11 @@ jobs: toot: runs-on: ubuntu-latest steps: + - name: get topics and convert to hashtags + id: get_topics + run: | + curl -s https://nf-co.re/pipelines.json | jq -r '.remote_workflows[] | select(.full_name == "${{ github.repository }}") | .topics[]' | awk '{print "#"$0}' | tr '\n' ' ' >> $GITHUB_OUTPUT + - uses: rzr/fediverse-action@master with: access-token: ${{ secrets.MASTODON_ACCESS_TOKEN }} @@ -20,11 +25,13 @@ jobs: Please see the changelog: ${{ github.event.release.html_url }} + ${{ steps.get_topics.outputs.GITHUB_OUTPUT }} #nfcore #openscience #nextflow #bioinformatics + send-tweet: runs-on: ubuntu-latest steps: - - uses: actions/setup-python@v4 + - uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5 with: python-version: "3.10" - name: Install dependencies @@ -56,7 +63,7 @@ jobs: bsky-post: runs-on: ubuntu-latest steps: - - uses: zentered/bluesky-post-action@v0.0.2 + - uses: zentered/bluesky-post-action@80dbe0a7697de18c15ad22f4619919ceb5ccf597 # v0.1.0 with: post: | Pipeline release! ${{ github.repository }} v${{ github.event.release.tag_name }} - ${{ github.event.release.name }}! diff --git a/.gitignore b/.gitignore index 5d402163..bc675aba 100644 --- a/.gitignore +++ b/.gitignore @@ -9,3 +9,5 @@ testing* log/ reports/ testme.sh +.nf-test* +.vscode diff --git a/.gitpod.yml b/.gitpod.yml index acf72695..105a1821 100644 --- a/.gitpod.yml +++ b/.gitpod.yml @@ -7,15 +7,14 @@ tasks: - name: unset JAVA_TOOL_OPTIONS command: | unset JAVA_TOOL_OPTIONS + vscode: extensions: # based on nf-core.nf-core-extensionpack - - codezombiech.gitignore # Language support for .gitignore files - # - cssho.vscode-svgviewer # SVG viewer - esbenp.prettier-vscode # Markdown/CommonMark linting and style checking for Visual Studio Code - - eamodio.gitlens # Quickly glimpse into whom, why, and when a line or code block was changed - EditorConfig.EditorConfig # override user/workspace settings with settings found in .editorconfig files - Gruntfuggly.todo-tree # Display TODO and FIXME in a tree view in the activity bar - mechatroner.rainbow-csv # Highlight columns in csv files in different colors - # - nextflow.nextflow # Nextflow syntax highlighting + # - nextflow.nextflow # Nextflow syntax highlighting - oderwat.indent-rainbow # Highlight indentation level - streetsidesoftware.code-spell-checker # Spelling checker for source code + - charliermarsh.ruff # Code linter Ruff diff --git a/.nf-core.yml b/.nf-core.yml index 738ad918..90cfd21c 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -3,3 +3,8 @@ lint: template_strings: False files_unchanged: - .github/ISSUE_TEMPLATE/bug_report.yml + files_exist: + - lib/Utils.groovy + # TODO This is because of an issue with the monochromeLogs parameter + # See nextflow.config for details + schema_params: False diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 0c31cdb9..af57081f 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,5 +1,10 @@ repos: - repo: https://github.com/pre-commit/mirrors-prettier - rev: "v2.7.1" + rev: "v3.1.0" hooks: - id: prettier + - repo: https://github.com/editorconfig-checker/editorconfig-checker.python + rev: "2.7.3" + hooks: + - id: editorconfig-checker + alias: ec diff --git a/CHANGELOG.md b/CHANGELOG.md index a7c8ed27..de6ff058 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,8 +3,19 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## v2.5.1 +## v2.6.0 - 2024-04-16 +- Update cellranger to v8.0.0 ([#317](https://github.com/nf-core/scrnaseq/pull/317)) +- Change from pytests to nf-test ([#291](https://github.com/nf-core/scrnaseq/pull/291)) +- Update template to v2.13.1 ([#309](https://github.com/nf-core/scrnaseq/pull/309)) +- Update to kallisto|bustools v0.28.2 ([#294](https://github.com/nf-core/scrnaseq/pull/294)) +- Fix cellrangerarc matrix conversions and protocol selection ([#300](https://github.com/nf-core/scrnaseq/pull/300)) +- Add new emptydrops calling module ([#301](https://github.com/nf-core/scrnaseq/pull/301)) +- Update cellranger modules to latest version ([[#316](https://github.com/nf-core/scrnaseq/issues/316)]) + +## v2.5.1 - 2024-01-23 + +- Template update to v2.12 ([#298](https://github.com/nf-core/scrnaseq/pull/298)). - Fix that cellranger workflow couldn't be run and enable CI for this workflow ([#288](https://github.com/nf-core/scrnaseq/pull/288)). - Update modules ([#288]()https://github.com/nf-core/scrnaseq/pull/288). diff --git a/README.md b/README.md index 55044582..3399f168 100644 --- a/README.md +++ b/README.md @@ -1,13 +1,21 @@ -# ![nf-core/scrnaseq](docs/images/nf-core-scrnaseq_logo_light.png#gh-light-mode-only) ![nf-core/scrnaseq](docs/images/nf-core-scrnaseq_logo_dark.png#gh-dark-mode-only) +

+ + + nf-core/scrnaseq + +

[![GitHub Actions CI Status](https://github.com/nf-core/scrnaseq/workflows/nf-core%20CI/badge.svg)](https://github.com/nf-core/scrnaseq/actions?query=workflow%3A%22nf-core+CI%22) -[![GitHub Actions Linting Status](https://github.com/nf-core/scrnaseq/workflows/nf-core%20linting/badge.svg)](https://github.com/nf-core/scrnaseq/actions?query=workflow%3A%22nf-core+linting%22)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/scrnaseq/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.3568187-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.3568187) +[![GitHub Actions Linting Status](https://github.com/nf-core/scrnaseq/workflows/nf-core%20linting/badge.svg)](https://github.com/nf-core/scrnaseq/actions?query=workflow%3A%22nf-core+linting%22) +[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/scrnaseq/results) +[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.3568187-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.3568187) +[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com) [![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A523.04.0-23aa62.svg)](https://www.nextflow.io/) [![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/) [![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/) [![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/) -[![Launch on Nextflow Tower](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Nextflow%20Tower-%234256e7)](https://tower.nf/launch?pipeline=https://github.com/nf-core/scrnaseq) +[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://tower.nf/launch?pipeline=https://github.com/nf-core/scrnaseq) [![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23scrnaseq-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/scrnaseq)[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?labelColor=000000&logo=twitter)](https://twitter.com/nf_core)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core) @@ -40,8 +48,8 @@ First, prepare a samplesheet with your input data that looks as follows: ```csv sample,fastq_1,fastq_2,expected_cells -pbmc8k,pbmc8k_S1_L007_R1_001.fastq.gz,pbmc8k_S1_L007_R2_001.fastq.gz,"10000" -pbmc8k,pbmc8k_S1_L008_R1_001.fastq.gz,pbmc8k_S1_L008_R2_001.fastq.gz,"10000" +pbmc8k,pbmc8k_S1_L007_R1_001.fastq.gz,pbmc8k_S1_L007_R2_001.fastq.gz,10000 +pbmc8k,pbmc8k_S1_L008_R1_001.fastq.gz,pbmc8k_S1_L008_R2_001.fastq.gz,10000 ``` Each row represents a fastq file (single-end) or a pair of fastq files (paired end). diff --git a/assets/email_template.html b/assets/email_template.html index 2ff8db51..d5ed3696 100644 --- a/assets/email_template.html +++ b/assets/email_template.html @@ -12,7 +12,7 @@ -

nf-core/scrnaseq v${version}

+

nf-core/scrnaseq ${version}

Run Name: $runName

<% if (!success){ diff --git a/assets/email_template.txt b/assets/email_template.txt index 9065d5c2..a0fc3b38 100644 --- a/assets/email_template.txt +++ b/assets/email_template.txt @@ -4,7 +4,7 @@ |\\ | |__ __ / ` / \\ |__) |__ } { | \\| | \\__, \\__/ | \\ |___ \\`-._,-`-, `._,._,' - nf-core/scrnaseq v${version} + nf-core/scrnaseq ${version} ---------------------------------------------------- Run Name: $runName diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index 10ec1af4..adfa48ba 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -1,7 +1,7 @@ report_comment: > - This report has been generated by the nf-core/scrnaseq + This report has been generated by the nf-core/scrnaseq analysis pipeline. For information about how to interpret these results, please see the - documentation. + documentation. report_section_order: "nf-core-scrnaseq-methods-description": order: -1000 @@ -11,3 +11,5 @@ report_section_order: order: -1002 export_plots: true + +disable_version_detection: true diff --git a/assets/nf-core-scrnaseq_logo_light.png b/assets/nf-core-scrnaseq_logo_light.png index 5e5ec430..bccb9c39 100644 Binary files a/assets/nf-core-scrnaseq_logo_light.png and b/assets/nf-core-scrnaseq_logo_light.png differ diff --git a/assets/protocols.json b/assets/protocols.json index 23ff1328..d8da2f8a 100644 --- a/assets/protocols.json +++ b/assets/protocols.json @@ -30,6 +30,11 @@ "protocol": "SC3Pv3" } }, + "cellrangerarc": { + "auto": { + "protocol": "auto" + } + }, "star": { "10XV1": { "protocol": "CB_UMI_Simple", diff --git a/assets/schema_input.json b/assets/schema_input.json index 0a93254d..a531769f 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -10,27 +10,38 @@ "sample": { "type": "string", "pattern": "^\\S+$", - "errorMessage": "Sample name must be provided and cannot contain spaces" + "errorMessage": "Sample name must be provided and cannot contain spaces", + "meta": ["id"] }, "fastq_1": { "type": "string", + "format": "file-path", + "exists": true, "pattern": "^\\S+\\.f(ast)?q\\.gz$", "errorMessage": "FastQ file for reads 1 must be provided, cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'" }, "fastq_2": { - "errorMessage": "FastQ file for reads 2 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'", - "anyOf": [ - { - "type": "string", - "pattern": "^\\S+\\.f(ast)?q\\.gz$" - }, - { - "type": "string", - "maxLength": 0 - } - ] + "type": "string", + "format": "file-path", + "exists": true, + "pattern": "^\\S+\\.f(ast)?q\\.gz$", + "errorMessage": "FastQ file for reads 2 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'" + }, + "expected_cells": { + "type": "integer", + "errorMessage": "Expected cells must be an Integer", + "meta": ["expected_cells"] + }, + "seq_center": { + "type": "string", + "meta": ["seq_center"] + }, + "sample_type": { + "type": "string", + "enum": ["atac", "gex"], + "meta": ["sample_type"] } }, - "required": ["sample", "fastq_1"] + "required": ["sample", "fastq_1", "fastq_2"] } } diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py deleted file mode 100755 index bd713438..00000000 --- a/bin/check_samplesheet.py +++ /dev/null @@ -1,280 +0,0 @@ -#!/usr/bin/env python - -# This script is based on the example at: https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_test_illumina_amplicon.csv - -"""Provide a command line tool to validate and transform tabular samplesheets.""" - - -import argparse -import csv -import logging -import sys -from collections import Counter -from pathlib import Path - -logger = logging.getLogger() - - -def read_head(handle, num_lines=10): - """Read the specified number of lines from the current position in the file.""" - lines = [] - for idx, line in enumerate(handle): - if idx == num_lines: - break - lines.append(line) - return "".join(lines) - - -def print_error(error, context="Line", context_str=""): - error_str = f"ERROR: Please check samplesheet -> {error}" - if context != "" and context_str != "": - error_str = f"ERROR: Please check samplesheet -> {error}\n{context.strip()}: '{context_str.strip()}'" - print(error_str) - sys.exit(1) - - -def sniff_format(handle): - """ - Detect the tabular format. - - Args: - handle (text file): A handle to a `text file`_ object. The read position is - expected to be at the beginning (index 0). - - Returns: - csv.Dialect: The detected tabular format. - - .. _text file: - https://docs.python.org/3/glossary.html#term-text-file - - """ - peek = read_head(handle) - handle.seek(0) - sniffer = csv.Sniffer() - dialect = sniffer.sniff(peek) - return dialect - - -def check_samplesheet(file_in, file_out): - """ - Check that the tabular samplesheet has the structure expected by nf-core pipelines. - - Validate the general shape of the table, expected columns, and each row. Also add - an additional column which records whether one or two FASTQ reads were found. - - Args: - file_in (pathlib.Path): The given tabular samplesheet. The format can be either - CSV, TSV, or any other format automatically recognized by ``csv.Sniffer``. - file_out (pathlib.Path): Where the validated and transformed samplesheet should - be created; always in CSV format. - - Example: - This function checks that the samplesheet follows the following structure, - see also the `viral recon samplesheet`_:: - - sample,fastq_1,fastq_2 - SAMPLE_PE,SAMPLE_PE_RUN1_1.fastq.gz,SAMPLE_PE_RUN1_2.fastq.gz - SAMPLE_PE,SAMPLE_PE_RUN2_1.fastq.gz,SAMPLE_PE_RUN2_2.fastq.gz - SAMPLE_SE,SAMPLE_SE_RUN1_1.fastq.gz, - - .. _viral recon samplesheet: - https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_test_illumina_amplicon.csv - - """ - - sample_mapping_dict = {} - with open(file_in, "r") as fin: - ## Check header - MIN_COLS = 2 - MIN_HEADER = ["sample", "fastq_1", "fastq_2"] - OPT_HEADER = ["expected_cells", "seq_center", "fastq_barcode", "sample_type"] - SAMPLE_TYPES = ["gex", "atac"] - header = [x.strip('"') for x in fin.readline().strip().split(",")] - - unknown_header = 0 - min_header_count = 0 - colmap = {"sample": 0, "fastq_1": 1, "fastq2": 2} - i = 0 - for h in header: - if h not in MIN_HEADER and h not in OPT_HEADER: - unknown_header = 1 - if h in MIN_HEADER: - min_header_count = min_header_count + 1 - colmap[h] = i - i = i + 1 - if unknown_header or min_header_count < len(MIN_HEADER): - given = ",".join(header) - wanted = ",".join(MIN_HEADER) - print(f"ERROR: Please check samplesheet header -> {given} != {wanted}") - sys.exit(1) - - ## Check sample entries - for line in fin: - lspl = [x.strip().strip('"') for x in line.strip().split(",")] - - # Check valid number of columns per row - if len(lspl) < len(header): - print_error( - "Invalid number of columns (minimum = {})!".format(len(header)), - "Line", - line, - ) - num_cols = len([x for x in lspl if x]) - if num_cols < MIN_COLS: - print_error( - "Invalid number of populated columns (minimum = {})!".format(MIN_COLS), - "Line", - line, - ) - - ## Check sample name entries - sample, fastq_1, fastq_2 = lspl[: len(MIN_HEADER)] - sample = sample.replace(" ", "_") - if not sample: - print_error("Sample entry has not been specified!", "Line", line) - - ## Check expected cells is an integer if present - expected_cells = "" - if "expected_cells" in header: - expected_cells = lspl[colmap["expected_cells"]] - if not is_integer(expected_cells): - print_error("Expected cells must be an integer", "Line", line) - - ## If present, replace spaces with _ in sequencing center name - seq_center = "" - if "seq_center" in header: - seq_center = lspl[colmap["seq_center"]] - seq_center = seq_center.replace(" ", "_") - - ## Check FastQ file extension - fastq_list = [fastq_1, fastq_2] - - fastq_barcode = "" - if "fastq_barcode" in header: - fastq_barcode = lspl[colmap["fastq_barcode"]] - fastq_list.append(fastq_barcode) - - sample_type = "" - if "sample_type" in header: - sample_type = lspl[colmap["sample_type"]] - if sample_type not in SAMPLE_TYPES: - print_error( - "Sample type {} is not supported! Please specify either {}".format( - sample_type, " or ".join(SAMPLE_TYPES) - ), - "Line", - line, - ) - - for fastq in fastq_list: - if fastq: - if fastq.find(" ") != -1: - print_error("FastQ file contains spaces!", "Line", line) - if not fastq.endswith(".fastq.gz") and not fastq.endswith(".fq.gz"): - print_error( - "FastQ file does not have extension '.fastq.gz' or '.fq.gz'!", - "Line", - line, - ) - - ## Auto-detect paired-end/single-end - sample_info = [] ## [single_end, fastq_1, fastq_2] - if sample and fastq_1 and fastq_2: ## Paired-end short reads - sample_info = ["0", fastq_1, fastq_2, expected_cells, seq_center, fastq_barcode, sample_type] - elif sample and fastq_1 and not fastq_2: ## Single-end short reads - sample_info = ["1", fastq_1, fastq_2, expected_cells, seq_center, fastq_barcode, sample_type] - else: - print_error("Invalid combination of columns provided!", "Line", line) - - ## Create sample mapping dictionary = { sample: [ single_end, fastq_1, fastq_2 ] } - if sample not in sample_mapping_dict: - sample_mapping_dict[sample] = [sample_info] - else: - if sample_info in sample_mapping_dict[sample]: - # print_error("Samplesheet contains duplicate rows!", "Line", line) - sample_mapping_dict[sample].append(sample_info) - else: - sample_mapping_dict[sample].append(sample_info) - - ## Write validated samplesheet with appropriate columns - if len(sample_mapping_dict) > 0: - with open(file_out, "w") as fout: - fout.write( - ",".join( - [ - "sample", - "single_end", - "fastq_1", - "fastq_2", - "expected_cells", - "seq_center", - "fastq_barcode", - "sample_type", - ] - ) - + "\n" - ) - for sample in sorted(sample_mapping_dict.keys()): - ## Check that multiple runs of the same sample are of the same datatype - if not all(x[0] == sample_mapping_dict[sample][0][0] for x in sample_mapping_dict[sample]): - print_error( - "Multiple runs of a sample must be of the same datatype!", - "Sample: {}".format(sample), - ) - - for idx, val in enumerate(sample_mapping_dict[sample]): - fout.write(",".join(["{}".format(sample)] + val) + "\n") - else: - print_error("No entries to process!", "Samplesheet: {}".format(file_in)) - - -def parse_args(argv=None): - """Define and immediately parse command line arguments.""" - parser = argparse.ArgumentParser( - description="Validate and transform a tabular samplesheet.", - epilog="Example: python check_samplesheet.py samplesheet.csv samplesheet.valid.csv", - ) - parser.add_argument( - "file_in", - metavar="FILE_IN", - type=Path, - help="Tabular input samplesheet in CSV or TSV format.", - ) - parser.add_argument( - "file_out", - metavar="FILE_OUT", - type=Path, - help="Transformed output samplesheet in CSV format.", - ) - parser.add_argument( - "-l", - "--log-level", - help="The desired log level (default WARNING).", - choices=("CRITICAL", "ERROR", "WARNING", "INFO", "DEBUG"), - default="WARNING", - ) - return parser.parse_args(argv) - - -def is_integer(n): - try: - float(n) - except ValueError: - return False - else: - return float(n).is_integer() - - -def main(argv=None): - """Coordinate argument parsing and program execution.""" - args = parse_args(argv) - logging.basicConfig(level=args.log_level, format="[%(levelname)s] %(message)s") - if not args.file_in.is_file(): - logger.error(f"The given input file {args.file_in} was not found!") - sys.exit(2) - args.file_out.parent.mkdir(parents=True, exist_ok=True) - check_samplesheet(args.file_in, args.file_out) - - -if __name__ == "__main__": - sys.exit(main()) diff --git a/bin/emptydrops_cell_calling.R b/bin/emptydrops_cell_calling.R new file mode 100755 index 00000000..23a45267 --- /dev/null +++ b/bin/emptydrops_cell_calling.R @@ -0,0 +1,52 @@ +#!/usr/bin/env Rscript +library("DropletUtils") +library("Matrix") + +args <- commandArgs(trailingOnly=TRUE) + +fn_mtx <- args[1] +fn_barcodes <- args[2] +fn_genes <- args[3] +outdir <- args[4] +aligner <- args[5] + +# Read matrix/barcodes/genes +genes <- read.table(fn_genes,sep='\t') +barcodes <- read.table(fn_barcodes,sep='\t') +mtx <- readMM(fn_mtx) + +get_name <- function(file) { + name <- as.character(basename(file)) + name <- gsub('\\.gz$', '', name) + return(name) +} + +# transpose matrices when required +# based on code of 'mtx_to_seurat.R', only the data from kallisto and alevin would require transposition +print("Only kallisto and alevin have transposed matrices.") +if (aligner %in% c( "kallisto", "alevin" )) { + is_transposed <- TRUE + mtx<-t(mtx) +} else { + is_transposed <- FALSE +} + + +# Call empty drops +e.out <- emptyDrops(mtx) +is.cell <- e.out$FDR <= 0.01 + +# Slice matrix and barcodes +mtx_filtered <-mtx[,which(is.cell),drop=FALSE] +barcodes_filtered<-barcodes[which(is.cell),] + +# If matrix was transposed early, need to transpose back +if (is_transposed){ + mtx_filtered<-t(mtx_filtered) + print('Transposing back matrix.') +} + +# Write output +writeMM(mtx_filtered,file.path(outdir,get_name(fn_mtx))) +write.table(barcodes_filtered,file=file.path(outdir,get_name(fn_barcodes)),col.names=FALSE,row.names=FALSE,sep='\t',quote=FALSE) +write.table(genes,file=file.path(outdir,get_name(fn_genes)),col.names=FALSE,row.names=FALSE,sep='\t',quote=FALSE) diff --git a/bin/mtx_to_h5ad.py b/bin/mtx_to_h5ad.py index 3282122d..2f5dc9ba 100755 --- a/bin/mtx_to_h5ad.py +++ b/bin/mtx_to_h5ad.py @@ -32,9 +32,13 @@ def _mtx_to_adata( aligner: str, ): adata = sc.read_mtx(mtx_file) - if ( - aligner == "star" - ): # for some reason star matrix comes transposed and doesn't fit when values are appended directly + # for some reason star matrix comes transposed and doesn't fit when values are appended directly + # also true for cellranger files ( this is only used when running with the custom emptydrops_filtered files ) + # otherwise, it uses the cellranger .h5 files + if aligner in [ + "cellranger", + "star", + ]: adata = adata.transpose() adata.obs_names = pd.read_csv(barcode_file, header=None, sep="\t")[0].values @@ -57,22 +61,36 @@ def input_to_adata( if verbose and (txp2gene or star_index): print("Reading in {}".format(input_data)) - if aligner == "cellranger": + # + # open main data + # + if aligner == "cellranger" and input_data.lower().endswith('.h5'): adata = _10x_h5_to_adata(input_data, sample) else: adata = _mtx_to_adata(input_data, barcode_file, feature_file, sample, aligner) + # + # open gene information + # if verbose and (txp2gene or star_index): print("Reading in {}".format(txp2gene)) - if txp2gene: - t2g = pd.read_table(txp2gene, header=None, names=["gene_id", "gene_symbol"], usecols=[1, 2]) - elif star_index: - t2g = pd.read_table( - f"{star_index}/geneInfo.tab", header=None, skiprows=1, names=["gene_id", "gene_symbol"], usecols=[0, 1] - ) - - if txp2gene or star_index: + if aligner == "cellranger" and not input_data.lower().endswith('.h5'): + # + # for cellranger workflow, we do not have a txp2gene file, so, when using this normal/manual function for empty drops + # we need to provide this information coming directly from the features.tsv file + # by not using the .h5 file for conversion, we loose the two col information: feature_types and genome + # + t2g = pd.read_table(feature_file, header=None, names=["gene_id", "gene_symbol", "feature_types"], usecols=[0, 1, 2]) + else: + if txp2gene: + t2g = pd.read_table(txp2gene, header=None, names=["gene_id", "gene_symbol"], usecols=[1, 2]) + elif star_index: + t2g = pd.read_table( + f"{star_index}/geneInfo.tab", header=None, skiprows=1, names=["gene_id", "gene_symbol"], usecols=[0, 1] + ) + + if txp2gene or star_index or (aligner == "cellranger" and not input_data.lower().endswith('.h5')): t2g = t2g.drop_duplicates(subset="gene_id").set_index("gene_id") adata.var["gene_symbol"] = t2g["gene_symbol"] diff --git a/bin/mtx_to_seurat.R b/bin/mtx_to_seurat.R index f2680838..99ce2f73 100755 --- a/bin/mtx_to_seurat.R +++ b/bin/mtx_to_seurat.R @@ -3,23 +3,40 @@ library(Seurat) args <- commandArgs(trailingOnly=TRUE) -mtx_file <- args[1] -barcode_file <- args[2] -feature_file <- args[3] -out.file <- args[4] -aligner <- args[5] +mtx_file <- args[1] +barcode_file <- args[2] +feature_file <- args[3] +out.file <- args[4] +aligner <- args[5] +is_emptydrops <- args[6] + +if (is_emptydrops == "--is_emptydrops") { + is_emptydrops <- TRUE +} else{ + is_emptydrops <- FALSE +} -if(aligner %in% c("kallisto", "alevin")) { +if (aligner %in% c( "kallisto", "alevin" )) { + print("1") # for kallisto and alevin, the features file contains only one column and matrix needs to be transposed expression.matrix <- ReadMtx( mtx = mtx_file, features = feature_file, cells = barcode_file, feature.column = 1, mtx.transpose = TRUE ) } else { - expression.matrix <- ReadMtx( - mtx = mtx_file, features = feature_file, cells = barcode_file - ) + if (aligner %in% c( "cellranger", "star" ) && is_emptydrops) { + print("2") + expression.matrix <- ReadMtx( + mtx = mtx_file, features = feature_file, cells = barcode_file, feature.column = 1 + ) + } else{ + print("3") + expression.matrix <- ReadMtx( + mtx = mtx_file, features = feature_file, cells = barcode_file + ) + } } + seurat.object <- CreateSeuratObject(counts = expression.matrix) dir.create(basename(dirname(out.file)), showWarnings = FALSE) diff --git a/conf/modules.config b/conf/modules.config index 5813926a..ffe2a0b6 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -18,6 +18,7 @@ process { ] withName: FASTQC { + ext.args = '--quiet' time = { check_max( 120.h * task.attempt, 'time' ) } } @@ -28,6 +29,7 @@ process { saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } + withName: CUSTOM_DUMPSOFTWAREVERSIONS { publishDir = [ path: { "${params.outdir}/pipeline_info" }, @@ -45,6 +47,20 @@ process { ] } + if (!params.skip_emptydrops) { + withName: EMPTYDROPS_CELL_CALLING { + publishDir = [ + path: { "${params.outdir}/${params.aligner}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> + if ( params.aligner == 'cellranger' ) "count/${meta.id}/${filename}" + else if ( params.aligner == 'kallisto' ) "${meta.id}.count/${filename}" + else "${meta.id}/${filename}" + } + ] + } + } + withName: 'MTX_TO_H5AD|CONCAT_H5AD|MTX_TO_SEURAT' { publishDir = [ path: { "${params.outdir}/${params.aligner}/mtx_conversions" }, @@ -82,7 +98,7 @@ if(params.aligner == "cellranger") { path: "${params.outdir}/${params.aligner}/count", mode: params.publish_dir_mode ] - ext.args = {"--chemistry ${meta.chemistry} " + (meta.expected_cells ? "--expect-cells ${meta.expected_cells}" : '')} + ext.args = {"--chemistry ${meta.chemistry} --create-bam true " + (meta.expected_cells ? "--expect-cells ${meta.expected_cells}" : '')} time = { check_max( 240.h * task.attempt, 'time' ) } } } @@ -204,11 +220,12 @@ if (params.aligner == 'kallisto') { ] } withName: KALLISTOBUSTOOLS_COUNT { + def kb_filter = (params.kb_filter) ? '--filter' : '' publishDir = [ path: { "${params.outdir}/${params.aligner}" }, mode: params.publish_dir_mode ] - ext.args = "--workflow ${params.kb_workflow}" + ext.args = "--workflow ${params.kb_workflow} ${kb_filter}" } } } diff --git a/conf/test.config b/conf/test.config index 45ee54c8..08ab1b69 100644 --- a/conf/test.config +++ b/conf/test.config @@ -20,7 +20,8 @@ params { max_time = '6.h' // Input data - input = 'https://github.com/nf-core/test-datasets/raw/scrnaseq/samplesheet-2-0.csv' + input = 'https://github.com/nf-core/test-datasets/raw/scrnaseq/samplesheet-2-0.csv' + skip_emptydrops = true // module does not work on small dataset // Genome references fasta = 'https://github.com/nf-core/test-datasets/raw/scrnaseq/reference/GRCm38.p6.genome.chr19.fa' diff --git a/docs/images/nf-core-scrnaseq_logo_dark.png b/docs/images/nf-core-scrnaseq_logo_dark.png index 4dbfb4b4..dd2754ee 100644 Binary files a/docs/images/nf-core-scrnaseq_logo_dark.png and b/docs/images/nf-core-scrnaseq_logo_dark.png differ diff --git a/docs/images/nf-core-scrnaseq_logo_light.png b/docs/images/nf-core-scrnaseq_logo_light.png index 5e5ec430..dee21bdd 100644 Binary files a/docs/images/nf-core-scrnaseq_logo_light.png and b/docs/images/nf-core-scrnaseq_logo_light.png differ diff --git a/docs/output.md b/docs/output.md index 7e9f0cd8..cff2d442 100644 --- a/docs/output.md +++ b/docs/output.md @@ -19,6 +19,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d - [Cellranger](#cellranger) - [Cellranger ARC](#cellranger-arc) - [UniverSC](#universc) + - [Custom emptydrops filter](#custom-emptydrops-filter) - [Other output data](#other-output-data) - [MultiQC](#multiqc) - [Pipeline information](#pipeline-information) @@ -128,6 +129,16 @@ Battenberg, K., Kelly, S.T., Ras, R.A., Hetherington, N.A., Hayashi, K., and Min - Contains the mapped BAM files, filtered and unfiltered HDF5 matrices and output metrics created by the open-source implementation of Cell Ranger run via UniverSC +## Custom emptydrops filter + +The pipeline also possess a module to perform empty-drops calling and filtering with a custom-made script that uses a library called `bioconductor-dropletutils` that is available in `bioconda`. The process is simple, it takes a raw/unfiltered matrix file, and performs the empty-drops calling and filtering on it, generating another matrix file. + +> Users can turn it of with `--skip_emptydrops`. + +**Output directory: `results/${params.aligner}/emptydrops_filtered`** + +- Contains the empty-drops filtered matrices results generated by the `bioconductor-dropletutils` custom script + ## Other output data **Output directory: `results/reference_genome`** @@ -143,6 +154,21 @@ Battenberg, K., Kelly, S.T., Ras, R.A., Hetherington, N.A., Hayashi, K., and Min - `*_matrix.h5ad` - `.mtx` files converted to [AnnData](https://anndata.readthedocs.io/en/latest/) in `.h5ad` format, using [scanpy package](https://scanpy.readthedocs.io/en/stable/). - One per sample and a single one with all samples concatenated together `combined_matrix.h5ad` +- `*_matrix.rds` + - `.mtx` files converted to R native data format, rds, using the [Seurat package](https://github.com/satijalab/seurat) + - One per sample + +Because the pipeline has both the data directly from the aligners, and from the custom empty-drops filtering module the conversion modules were modified to understand the difference between raw/filtered from the aligners itself and filtered from the custom empty-drops module. So, to try to avoid confusion by the user, we added "suffixes" to the generated converted files so that we have provenance from what input it came from. + +So, the conversion modules generate data with the following syntax: **`*_{raw,filtered,custom_emptydrops_filter}_matrix.{h5ad,rds}`**. With the following meanings: + +| suffix | meaning | +| :----------------------- | :--------------------------------------------------------------------------------------------------------------------------------------- | +| raw | Conversion of the raw/unprocessed matrix generated by the tool. It is also used for tools that generate only one matrix, such as alevin. | +| filtered | Conversion of the filtered/processed matrix generated by the tool | +| custom_emptydrops_filter | Conversion of the matrix that was generated by the new custom empty drops filter module | + +> Some aligners, like `alevin` do not produce both raw&filtered matrices. When aligners give only one output, they are treated with the `raw` suffix. Some aligners may have an option to give both raw&filtered and only one, like `kallisto`. Be aware when using the tools. ## MultiQC diff --git a/lib/NfcoreTemplate.groovy b/lib/NfcoreTemplate.groovy deleted file mode 100755 index e248e4c3..00000000 --- a/lib/NfcoreTemplate.groovy +++ /dev/null @@ -1,356 +0,0 @@ -// -// This file holds several functions used within the nf-core pipeline template. -// - -import org.yaml.snakeyaml.Yaml -import groovy.json.JsonOutput -import nextflow.extension.FilesEx - -class NfcoreTemplate { - - // - // Check AWS Batch related parameters have been specified correctly - // - public static void awsBatch(workflow, params) { - if (workflow.profile.contains('awsbatch')) { - // Check params.awsqueue and params.awsregion have been set if running on AWSBatch - assert (params.awsqueue && params.awsregion) : "Specify correct --awsqueue and --awsregion parameters on AWSBatch!" - // Check outdir paths to be S3 buckets if running on AWSBatch - assert params.outdir.startsWith('s3:') : "Outdir not on S3 - specify S3 Bucket to run on AWSBatch!" - } - } - - // - // Warn if a -profile or Nextflow config has not been provided to run the pipeline - // - public static void checkConfigProvided(workflow, log) { - if (workflow.profile == 'standard' && workflow.configFiles.size() <= 1) { - log.warn "[$workflow.manifest.name] You are attempting to run the pipeline without any custom configuration!\n\n" + - "This will be dependent on your local compute environment but can be achieved via one or more of the following:\n" + - " (1) Using an existing pipeline profile e.g. `-profile docker` or `-profile singularity`\n" + - " (2) Using an existing nf-core/configs for your Institution e.g. `-profile crick` or `-profile uppmax`\n" + - " (3) Using your own local custom config e.g. `-c /path/to/your/custom.config`\n\n" + - "Please refer to the quick start section and usage docs for the pipeline.\n " - } - } - - // - // Generate version string - // - public static String version(workflow) { - String version_string = "" - - if (workflow.manifest.version) { - def prefix_v = workflow.manifest.version[0] != 'v' ? 'v' : '' - version_string += "${prefix_v}${workflow.manifest.version}" - } - - if (workflow.commitId) { - def git_shortsha = workflow.commitId.substring(0, 7) - version_string += "-g${git_shortsha}" - } - - return version_string - } - - // - // Construct and send completion email - // - public static void email(workflow, params, summary_params, projectDir, log, multiqc_report=[]) { - - // Set up the e-mail variables - def subject = "[$workflow.manifest.name] Successful: $workflow.runName" - if (!workflow.success) { - subject = "[$workflow.manifest.name] FAILED: $workflow.runName" - } - - def summary = [:] - for (group in summary_params.keySet()) { - summary << summary_params[group] - } - - def misc_fields = [:] - misc_fields['Date Started'] = workflow.start - misc_fields['Date Completed'] = workflow.complete - misc_fields['Pipeline script file path'] = workflow.scriptFile - misc_fields['Pipeline script hash ID'] = workflow.scriptId - if (workflow.repository) misc_fields['Pipeline repository Git URL'] = workflow.repository - if (workflow.commitId) misc_fields['Pipeline repository Git Commit'] = workflow.commitId - if (workflow.revision) misc_fields['Pipeline Git branch/tag'] = workflow.revision - misc_fields['Nextflow Version'] = workflow.nextflow.version - misc_fields['Nextflow Build'] = workflow.nextflow.build - misc_fields['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp - - def email_fields = [:] - email_fields['version'] = NfcoreTemplate.version(workflow) - email_fields['runName'] = workflow.runName - email_fields['success'] = workflow.success - email_fields['dateComplete'] = workflow.complete - email_fields['duration'] = workflow.duration - email_fields['exitStatus'] = workflow.exitStatus - email_fields['errorMessage'] = (workflow.errorMessage ?: 'None') - email_fields['errorReport'] = (workflow.errorReport ?: 'None') - email_fields['commandLine'] = workflow.commandLine - email_fields['projectDir'] = workflow.projectDir - email_fields['summary'] = summary << misc_fields - - // On success try attach the multiqc report - def mqc_report = null - try { - if (workflow.success) { - mqc_report = multiqc_report.getVal() - if (mqc_report.getClass() == ArrayList && mqc_report.size() >= 1) { - if (mqc_report.size() > 1) { - log.warn "[$workflow.manifest.name] Found multiple reports from process 'MULTIQC', will use only one" - } - mqc_report = mqc_report[0] - } - } - } catch (all) { - if (multiqc_report) { - log.warn "[$workflow.manifest.name] Could not attach MultiQC report to summary email" - } - } - - // Check if we are only sending emails on failure - def email_address = params.email - if (!params.email && params.email_on_fail && !workflow.success) { - email_address = params.email_on_fail - } - - // Render the TXT template - def engine = new groovy.text.GStringTemplateEngine() - def tf = new File("$projectDir/assets/email_template.txt") - def txt_template = engine.createTemplate(tf).make(email_fields) - def email_txt = txt_template.toString() - - // Render the HTML template - def hf = new File("$projectDir/assets/email_template.html") - def html_template = engine.createTemplate(hf).make(email_fields) - def email_html = html_template.toString() - - // Render the sendmail template - def max_multiqc_email_size = (params.containsKey('max_multiqc_email_size') ? params.max_multiqc_email_size : 0) as nextflow.util.MemoryUnit - def smail_fields = [ email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, projectDir: "$projectDir", mqcFile: mqc_report, mqcMaxSize: max_multiqc_email_size.toBytes() ] - def sf = new File("$projectDir/assets/sendmail_template.txt") - def sendmail_template = engine.createTemplate(sf).make(smail_fields) - def sendmail_html = sendmail_template.toString() - - // Send the HTML e-mail - Map colors = logColours(params.monochrome_logs) - if (email_address) { - try { - if (params.plaintext_email) { throw GroovyException('Send plaintext e-mail, not HTML') } - // Try to send HTML e-mail using sendmail - def sendmail_tf = new File(workflow.launchDir.toString(), ".sendmail_tmp.html") - sendmail_tf.withWriter { w -> w << sendmail_html } - [ 'sendmail', '-t' ].execute() << sendmail_html - log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (sendmail)-" - } catch (all) { - // Catch failures and try with plaintext - def mail_cmd = [ 'mail', '-s', subject, '--content-type=text/html', email_address ] - if ( mqc_report != null && mqc_report.size() <= max_multiqc_email_size.toBytes() ) { - mail_cmd += [ '-A', mqc_report ] - } - mail_cmd.execute() << email_html - log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (mail)-" - } - } - - // Write summary e-mail HTML to a file - def output_hf = new File(workflow.launchDir.toString(), ".pipeline_report.html") - output_hf.withWriter { w -> w << email_html } - FilesEx.copyTo(output_hf.toPath(), "${params.outdir}/pipeline_info/pipeline_report.html"); - output_hf.delete() - - // Write summary e-mail TXT to a file - def output_tf = new File(workflow.launchDir.toString(), ".pipeline_report.txt") - output_tf.withWriter { w -> w << email_txt } - FilesEx.copyTo(output_tf.toPath(), "${params.outdir}/pipeline_info/pipeline_report.txt"); - output_tf.delete() - } - - // - // Construct and send a notification to a web server as JSON - // e.g. Microsoft Teams and Slack - // - public static void IM_notification(workflow, params, summary_params, projectDir, log) { - def hook_url = params.hook_url - - def summary = [:] - for (group in summary_params.keySet()) { - summary << summary_params[group] - } - - def misc_fields = [:] - misc_fields['start'] = workflow.start - misc_fields['complete'] = workflow.complete - misc_fields['scriptfile'] = workflow.scriptFile - misc_fields['scriptid'] = workflow.scriptId - if (workflow.repository) misc_fields['repository'] = workflow.repository - if (workflow.commitId) misc_fields['commitid'] = workflow.commitId - if (workflow.revision) misc_fields['revision'] = workflow.revision - misc_fields['nxf_version'] = workflow.nextflow.version - misc_fields['nxf_build'] = workflow.nextflow.build - misc_fields['nxf_timestamp'] = workflow.nextflow.timestamp - - def msg_fields = [:] - msg_fields['version'] = NfcoreTemplate.version(workflow) - msg_fields['runName'] = workflow.runName - msg_fields['success'] = workflow.success - msg_fields['dateComplete'] = workflow.complete - msg_fields['duration'] = workflow.duration - msg_fields['exitStatus'] = workflow.exitStatus - msg_fields['errorMessage'] = (workflow.errorMessage ?: 'None') - msg_fields['errorReport'] = (workflow.errorReport ?: 'None') - msg_fields['commandLine'] = workflow.commandLine.replaceFirst(/ +--hook_url +[^ ]+/, "") - msg_fields['projectDir'] = workflow.projectDir - msg_fields['summary'] = summary << misc_fields - - // Render the JSON template - def engine = new groovy.text.GStringTemplateEngine() - // Different JSON depending on the service provider - // Defaults to "Adaptive Cards" (https://adaptivecards.io), except Slack which has its own format - def json_path = hook_url.contains("hooks.slack.com") ? "slackreport.json" : "adaptivecard.json" - def hf = new File("$projectDir/assets/${json_path}") - def json_template = engine.createTemplate(hf).make(msg_fields) - def json_message = json_template.toString() - - // POST - def post = new URL(hook_url).openConnection(); - post.setRequestMethod("POST") - post.setDoOutput(true) - post.setRequestProperty("Content-Type", "application/json") - post.getOutputStream().write(json_message.getBytes("UTF-8")); - def postRC = post.getResponseCode(); - if (! postRC.equals(200)) { - log.warn(post.getErrorStream().getText()); - } - } - - // - // Dump pipeline parameters in a json file - // - public static void dump_parameters(workflow, params) { - def timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') - def filename = "params_${timestamp}.json" - def temp_pf = new File(workflow.launchDir.toString(), ".${filename}") - def jsonStr = JsonOutput.toJson(params) - temp_pf.text = JsonOutput.prettyPrint(jsonStr) - - FilesEx.copyTo(temp_pf.toPath(), "${params.outdir}/pipeline_info/params_${timestamp}.json") - temp_pf.delete() - } - - // - // Print pipeline summary on completion - // - public static void summary(workflow, params, log) { - Map colors = logColours(params.monochrome_logs) - if (workflow.success) { - if (workflow.stats.ignoredCount == 0) { - log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Pipeline completed successfully${colors.reset}-" - } else { - log.info "-${colors.purple}[$workflow.manifest.name]${colors.yellow} Pipeline completed successfully, but with errored process(es) ${colors.reset}-" - } - } else { - log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} Pipeline completed with errors${colors.reset}-" - } - } - - // - // ANSII Colours used for terminal logging - // - public static Map logColours(Boolean monochrome_logs) { - Map colorcodes = [:] - - // Reset / Meta - colorcodes['reset'] = monochrome_logs ? '' : "\033[0m" - colorcodes['bold'] = monochrome_logs ? '' : "\033[1m" - colorcodes['dim'] = monochrome_logs ? '' : "\033[2m" - colorcodes['underlined'] = monochrome_logs ? '' : "\033[4m" - colorcodes['blink'] = monochrome_logs ? '' : "\033[5m" - colorcodes['reverse'] = monochrome_logs ? '' : "\033[7m" - colorcodes['hidden'] = monochrome_logs ? '' : "\033[8m" - - // Regular Colors - colorcodes['black'] = monochrome_logs ? '' : "\033[0;30m" - colorcodes['red'] = monochrome_logs ? '' : "\033[0;31m" - colorcodes['green'] = monochrome_logs ? '' : "\033[0;32m" - colorcodes['yellow'] = monochrome_logs ? '' : "\033[0;33m" - colorcodes['blue'] = monochrome_logs ? '' : "\033[0;34m" - colorcodes['purple'] = monochrome_logs ? '' : "\033[0;35m" - colorcodes['cyan'] = monochrome_logs ? '' : "\033[0;36m" - colorcodes['white'] = monochrome_logs ? '' : "\033[0;37m" - - // Bold - colorcodes['bblack'] = monochrome_logs ? '' : "\033[1;30m" - colorcodes['bred'] = monochrome_logs ? '' : "\033[1;31m" - colorcodes['bgreen'] = monochrome_logs ? '' : "\033[1;32m" - colorcodes['byellow'] = monochrome_logs ? '' : "\033[1;33m" - colorcodes['bblue'] = monochrome_logs ? '' : "\033[1;34m" - colorcodes['bpurple'] = monochrome_logs ? '' : "\033[1;35m" - colorcodes['bcyan'] = monochrome_logs ? '' : "\033[1;36m" - colorcodes['bwhite'] = monochrome_logs ? '' : "\033[1;37m" - - // Underline - colorcodes['ublack'] = monochrome_logs ? '' : "\033[4;30m" - colorcodes['ured'] = monochrome_logs ? '' : "\033[4;31m" - colorcodes['ugreen'] = monochrome_logs ? '' : "\033[4;32m" - colorcodes['uyellow'] = monochrome_logs ? '' : "\033[4;33m" - colorcodes['ublue'] = monochrome_logs ? '' : "\033[4;34m" - colorcodes['upurple'] = monochrome_logs ? '' : "\033[4;35m" - colorcodes['ucyan'] = monochrome_logs ? '' : "\033[4;36m" - colorcodes['uwhite'] = monochrome_logs ? '' : "\033[4;37m" - - // High Intensity - colorcodes['iblack'] = monochrome_logs ? '' : "\033[0;90m" - colorcodes['ired'] = monochrome_logs ? '' : "\033[0;91m" - colorcodes['igreen'] = monochrome_logs ? '' : "\033[0;92m" - colorcodes['iyellow'] = monochrome_logs ? '' : "\033[0;93m" - colorcodes['iblue'] = monochrome_logs ? '' : "\033[0;94m" - colorcodes['ipurple'] = monochrome_logs ? '' : "\033[0;95m" - colorcodes['icyan'] = monochrome_logs ? '' : "\033[0;96m" - colorcodes['iwhite'] = monochrome_logs ? '' : "\033[0;97m" - - // Bold High Intensity - colorcodes['biblack'] = monochrome_logs ? '' : "\033[1;90m" - colorcodes['bired'] = monochrome_logs ? '' : "\033[1;91m" - colorcodes['bigreen'] = monochrome_logs ? '' : "\033[1;92m" - colorcodes['biyellow'] = monochrome_logs ? '' : "\033[1;93m" - colorcodes['biblue'] = monochrome_logs ? '' : "\033[1;94m" - colorcodes['bipurple'] = monochrome_logs ? '' : "\033[1;95m" - colorcodes['bicyan'] = monochrome_logs ? '' : "\033[1;96m" - colorcodes['biwhite'] = monochrome_logs ? '' : "\033[1;97m" - - return colorcodes - } - - // - // Does what is says on the tin - // - public static String dashedLine(monochrome_logs) { - Map colors = logColours(monochrome_logs) - return "-${colors.dim}----------------------------------------------------${colors.reset}-" - } - - // - // nf-core logo - // - public static String logo(workflow, monochrome_logs) { - Map colors = logColours(monochrome_logs) - String workflow_version = NfcoreTemplate.version(workflow) - String.format( - """\n - ${dashedLine(monochrome_logs)} - ${colors.green},--.${colors.black}/${colors.green},-.${colors.reset} - ${colors.blue} ___ __ __ __ ___ ${colors.green}/,-._.--~\'${colors.reset} - ${colors.blue} |\\ | |__ __ / ` / \\ |__) |__ ${colors.yellow}} {${colors.reset} - ${colors.blue} | \\| | \\__, \\__/ | \\ |___ ${colors.green}\\`-._,-`-,${colors.reset} - ${colors.green}`._,._,\'${colors.reset} - ${colors.purple} ${workflow.manifest.name} ${workflow_version}${colors.reset} - ${dashedLine(monochrome_logs)} - """.stripIndent() - ) - } -} diff --git a/lib/Utils.groovy b/lib/Utils.groovy old mode 100644 new mode 100755 index 2848f116..07475fe1 --- a/lib/Utils.groovy +++ b/lib/Utils.groovy @@ -1,47 +1,21 @@ -// -// This file holds several Groovy functions that could be useful for any Nextflow pipeline -// +import groovy.json.JsonSlurper -import org.yaml.snakeyaml.Yaml -class Utils { - - // - // When running with -profile conda, warn if channels have not been set-up appropriately - // - public static void checkCondaChannels(log) { - Yaml parser = new Yaml() - def channels = [] - try { - def config = parser.load('conda config --show channels'.execute().text) - channels = config.channels - } catch (NullPointerException | IOException e) { - log.warn 'Could not verify conda channel configuration.' - return - } - - // Check that all channels are present - // This channel list is ordered by required channel priority. - def required_channels_in_order = ['conda-forge', 'bioconda', 'defaults'] - def channels_missing = ((required_channels_in_order as Set) - (channels as Set)) as Boolean - - // Check that they are in the right order - def channel_priority_violation = false - def n = required_channels_in_order.size() - for (int i = 0; i < n - 1; i++) { - channel_priority_violation |= !(channels.indexOf(required_channels_in_order[i]) < channels.indexOf(required_channels_in_order[i+1])) - } - - if (channels_missing | channel_priority_violation) { - log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + - " There is a problem with your Conda configuration!\n\n" + - " You will need to set-up the conda-forge and bioconda channels correctly.\n" + - " Please refer to https://bioconda.github.io/\n" + - " The observed channel order is \n" + - " ${channels}\n" + - " but the following channel order is required:\n" + - " ${required_channels_in_order}\n" + - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" +class WorkflowScrnaseq { + // Retrieve the aligner-specific protocol based on the specified protocol. + // Returns a map ["protocol": protocol, "extra_args": , "whitelist": ] + // extra_args and whitelist are optional. + public static Map getProtocol(workflow, log, aligner, protocol) { + def jsonSlurper = new JsonSlurper() + def json = new File("${workflow.projectDir}/assets/protocols.json").text + def protocols = jsonSlurper.parseText(json) + def aligner_map = protocols[aligner] + if(aligner_map.containsKey(protocol)) { + return aligner_map[protocol] + } else { + log.warn("Protocol '${protocol}' not recognized by the pipeline. Passing on the protocol to the aligner unmodified.") + return ["protocol": protocol] } } + } diff --git a/lib/WorkflowMain.groovy b/lib/WorkflowMain.groovy deleted file mode 100755 index 6ec444fb..00000000 --- a/lib/WorkflowMain.groovy +++ /dev/null @@ -1,62 +0,0 @@ -// -// This file holds several functions specific to the main.nf workflow in the nf-core/scrnaseq pipeline -// - -import nextflow.Nextflow - -class WorkflowMain { - - // - // Citation string for pipeline - // - public static String citation(workflow) { - return "If you use ${workflow.manifest.name} for your analysis please cite:\n\n" + - "* The pipeline\n" + - " https://doi.org/10.5281/zenodo.3568187\n\n" + - "* The nf-core framework\n" + - " https://doi.org/10.1038/s41587-020-0439-x\n\n" + - "* Software dependencies\n" + - " https://github.com/${workflow.manifest.name}/blob/master/CITATIONS.md" - } - - - // - // Validate parameters and print summary to screen - // - public static void initialise(workflow, params, log) { - - // Print workflow version and exit on --version - if (params.version) { - String workflow_version = NfcoreTemplate.version(workflow) - log.info "${workflow.manifest.name} ${workflow_version}" - System.exit(0) - } - - // Check that a -profile or Nextflow config has been provided to run the pipeline - NfcoreTemplate.checkConfigProvided(workflow, log) - - // Check that conda channels are set-up correctly - if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { - Utils.checkCondaChannels(log) - } - - // Check AWS batch settings - NfcoreTemplate.awsBatch(workflow, params) - - // Check input has been provided - if (!params.input) { - Nextflow.error("Please provide an input samplesheet to the pipeline e.g. '--input samplesheet.csv'") - } - } - // - // Get attribute from genome config file e.g. fasta - // - public static Object getGenomeAttribute(params, attribute) { - if (params.genomes && params.genome && params.genomes.containsKey(params.genome)) { - if (params.genomes[ params.genome ].containsKey(attribute)) { - return params.genomes[ params.genome ][ attribute ] - } - } - return null - } -} diff --git a/lib/WorkflowScrnaseq.groovy b/lib/WorkflowScrnaseq.groovy deleted file mode 100755 index e4273887..00000000 --- a/lib/WorkflowScrnaseq.groovy +++ /dev/null @@ -1,143 +0,0 @@ -// -// This file holds several functions specific to the workflow/scrnaseq.nf in the nf-core/scrnaseq pipeline -// - -import nextflow.Nextflow -import groovy.text.SimpleTemplateEngine -import groovy.json.JsonSlurper - - -class WorkflowScrnaseq { - - // - // Check and validate parameters - // - public static void initialise(params, log) { - genomeExists(params, log) - - if (!params.input) { - Nextflow.error "Please provide an input samplesheet with --input" - } - - if (!params.fasta) { - Nextflow.error "Genome fasta file not specified with e.g. '--fasta genome.fa' or via a detectable config file." - } - } - - // - // Get workflow summary for MultiQC - // - public static String paramsSummaryMultiqc(workflow, summary) { - String summary_section = '' - for (group in summary.keySet()) { - def group_params = summary.get(group) // This gets the parameters of that particular group - if (group_params) { - summary_section += "

$group

\n" - summary_section += "
\n" - for (param in group_params.keySet()) { - summary_section += "
$param
${group_params.get(param) ?: 'N/A'}
\n" - } - summary_section += '
\n' - } - } - - String yaml_file_text = "id: '${workflow.manifest.name.replace('/', '-')}-summary'\n" - yaml_file_text += "description: ' - this information is collected when the pipeline is started.'\n" - yaml_file_text += "section_name: '${workflow.manifest.name} Workflow Summary'\n" - yaml_file_text += "section_href: 'https://github.com/${workflow.manifest.name}'\n" - yaml_file_text += "plot_type: 'html'\n" - yaml_file_text += 'data: |\n' - yaml_file_text += "${summary_section}" - return yaml_file_text - } - - // - // Generate methods description for MultiQC - // - - public static String toolCitationText(params) { - - // TODO: Optionally add in-text citation tools to this list. - // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "Tool (Foo et al. 2023)" : "", - // Uncomment function in methodsDescriptionText to render in MultiQC report - def citation_text = [ - "Tools used in the workflow included:", - "FastQC (Andrews 2010),", - "MultiQC (Ewels et al. 2016)", - "." - ].join(' ').trim() - - return citation_text - } - - public static String toolBibliographyText(params) { - - // TODO Optionally add bibliographic entries to this list. - // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "
  • Author (2023) Pub name, Journal, DOI
  • " : "", - // Uncomment function in methodsDescriptionText to render in MultiQC report - def reference_text = [ - "
  • Andrews S, (2010) FastQC, URL: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/).
  • ", - "
  • Ewels, P., Magnusson, M., Lundin, S., & Käller, M. (2016). MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics , 32(19), 3047–3048. doi: /10.1093/bioinformatics/btw354
  • " - ].join(' ').trim() - - return reference_text - } - - public static String methodsDescriptionText(run_workflow, mqc_methods_yaml, params) { - // Convert to a named map so can be used as with familar NXF ${workflow} variable syntax in the MultiQC YML file - def meta = [:] - meta.workflow = run_workflow.toMap() - meta["manifest_map"] = run_workflow.manifest.toMap() - - // Pipeline DOI - meta["doi_text"] = meta.manifest_map.doi ? "(doi: ${meta.manifest_map.doi})" : "" - meta["nodoi_text"] = meta.manifest_map.doi ? "": "
  • If available, make sure to update the text to include the Zenodo DOI of version of the pipeline used.
  • " - - // Tool references - meta["tool_citations"] = "" - meta["tool_bibliography"] = "" - - // TODO Only uncomment below if logic in toolCitationText/toolBibliographyText has been filled! - //meta["tool_citations"] = toolCitationText(params).replaceAll(", \\.", ".").replaceAll("\\. \\.", ".").replaceAll(", \\.", ".") - //meta["tool_bibliography"] = toolBibliographyText(params) - - - def methods_text = mqc_methods_yaml.text - - def engine = new SimpleTemplateEngine() - def description_html = engine.createTemplate(methods_text).make(meta) - - return description_html - } - - // - // Exit pipeline if incorrect --genome key provided - static void genomeExists(params, log) { - if (params.genomes && params.genome && !params.genomes.containsKey(params.genome)) { - def error_string = "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + - " Genome '${params.genome}' not found in any config files provided to the pipeline.\n" + - " Currently, the available genome keys are:\n" + - " ${params.genomes.keySet().join(", ")}\n" + - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" - Nextflow.error(error_string) - } - } - - // - // Retrieve the aligner-specific protocol based on the specified protocol. - // Returns a map ["protocol": protocol, "extra_args": , "whitelist": ] - // extra_args and whitelist are optional. - public static Map getProtocol(workflow, log, aligner, protocol) { - def jsonSlurper = new JsonSlurper() - def json = new File("${workflow.projectDir}/assets/protocols.json").text - def protocols = jsonSlurper.parseText(json) - def aligner_map = protocols[aligner] - if(aligner_map.containsKey(protocol)) { - return aligner_map[protocol] - } else { - log.warn("Protocol '${protocol}' not recognized by the pipeline. Passing on the protocol to the aligner unmodified.") - return ["protocol": protocol] - } - } - -} diff --git a/lib/nfcore_external_java_deps.jar b/lib/nfcore_external_java_deps.jar deleted file mode 100644 index 805c8bb5..00000000 Binary files a/lib/nfcore_external_java_deps.jar and /dev/null differ diff --git a/main.nf b/main.nf index 670b64ce..7d8ba356 100644 --- a/main.nf +++ b/main.nf @@ -13,59 +13,99 @@ nextflow.enable.dsl = 2 /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - VALIDATE & PRINT PARAMETER SUMMARY + IMPORT FUNCTIONS / MODULES / SUBWORKFLOWS / WORKFLOWS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -params.fasta = WorkflowMain.getGenomeAttribute(params, 'fasta') -params.gtf = WorkflowMain.getGenomeAttribute(params, 'gtf') - -include { validateParameters; paramsHelp } from 'plugin/nf-validation' - -// Print help message if needed -if (params.help) { - def logo = NfcoreTemplate.logo(workflow, params.monochrome_logs) - def citation = '\n' + WorkflowMain.citation(workflow) + '\n' - def String command = "nextflow run ${workflow.manifest.name} --input samplesheet.csv --genome GRCh37 -profile docker" - log.info logo + paramsHelp(command) + citation + NfcoreTemplate.dashedLine(params.monochrome_logs) - System.exit(0) -} - -// Validate input parameters -if (params.validate_params) { - validateParameters() -} - -WorkflowMain.initialise(workflow, params, log) +include { SCRNASEQ } from './workflows/scrnaseq' +include { PIPELINE_INITIALISATION } from './subworkflows/local/utils_nfcore_scrnaseq_pipeline' +include { PIPELINE_COMPLETION } from './subworkflows/local/utils_nfcore_scrnaseq_pipeline' +include { getGenomeAttribute } from './subworkflows/local/utils_nfcore_scrnaseq_pipeline' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - NAMED WORKFLOW FOR PIPELINE + GENOME PARAMETER VALUES ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ +// we cannot modify params. here, we must load the files +ch_genome_fasta = params.genome ? file( getGenomeAttribute('fasta'), checkIfExists: true ) : [] +ch_gtf = params.genome ? file( getGenomeAttribute('gtf'), checkIfExists: true ) : [] -include { SCRNASEQ } from './workflows/scrnaseq' +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + NAMED WORKFLOWS FOR PIPELINE +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ // -// WORKFLOW: Run main scrnaseq analysis pipeline +// WORKFLOW: Run main analysis pipeline depending on type of input // +workflow NFCORE_SCRNASEQ { -workflow NFCORE_SCRNASEQ{ - SCRNASEQ() -} + take: + samplesheet // channel: samplesheet read in from --input + ch_genome_fasta + ch_gtf + main: + + // + // WORKFLOW: Run pipeline + // + SCRNASEQ ( + samplesheet, + ch_genome_fasta, + ch_gtf + ) + + emit: + multiqc_report = SCRNASEQ.out.multiqc_report // channel: /path/to/multiqc_report.html + +} /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - RUN ALL WORKFLOWS + RUN MAIN WORKFLOW ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -// -// WORKFLOW: Execute a single named workflow for the pipeline -// See: https://github.com/nf-core/rnaseq/issues/619 -// workflow { - NFCORE_SCRNASEQ () + + main: + + // + // SUBWORKFLOW: Run initialisation tasks + // + PIPELINE_INITIALISATION ( + params.version, + params.help, + params.validate_params, + params.monochrome_logs, + args, + params.outdir, + params.input + ) + + // + // WORKFLOW: Run main workflow + // + NFCORE_SCRNASEQ ( + PIPELINE_INITIALISATION.out.samplesheet, + ch_genome_fasta, + ch_gtf + ) + + // + // SUBWORKFLOW: Run completion tasks + // + PIPELINE_COMPLETION ( + params.email, + params.email_on_fail, + params.plaintext_email, + params.outdir, + params.monochrome_logs, + params.hook_url, + NFCORE_SCRNASEQ.out.multiqc_report + ) } /* diff --git a/modules.json b/modules.json index 94fd5c8b..50c3f5b7 100644 --- a/modules.json +++ b/modules.json @@ -7,27 +7,27 @@ "nf-core": { "cellranger/count": { "branch": "master", - "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "git_sha": "e66183d2ab6a5c2f3fd66b2bee942287cf65536c", "installed_by": ["modules"] }, "cellranger/mkgtf": { "branch": "master", - "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "git_sha": "e66183d2ab6a5c2f3fd66b2bee942287cf65536c", "installed_by": ["modules"] }, "cellranger/mkref": { "branch": "master", - "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "git_sha": "e66183d2ab6a5c2f3fd66b2bee942287cf65536c", "installed_by": ["modules"] }, "cellrangerarc/count": { "branch": "master", - "git_sha": "4196b1b2e7ce265892f3979eabf7a9ddc030702f", + "git_sha": "18e53e27cfeca5dbbfbeee675c05438dec68245f", "installed_by": ["modules"] }, "cellrangerarc/mkgtf": { "branch": "master", - "git_sha": "4196b1b2e7ce265892f3979eabf7a9ddc030702f", + "git_sha": "575e1bc54b083fb15e7dd8b5fcc40bea60e8ce83", "installed_by": ["modules"] }, "cellrangerarc/mkref": { @@ -35,19 +35,14 @@ "git_sha": "4196b1b2e7ce265892f3979eabf7a9ddc030702f", "installed_by": ["modules"] }, - "custom/dumpsoftwareversions": { - "branch": "master", - "git_sha": "8ec825f465b9c17f9d83000022995b4f7de6fe93", - "installed_by": ["modules"] - }, "fastqc": { "branch": "master", - "git_sha": "617777a807a1770f73deb38c80004bac06807eef", + "git_sha": "f4ae1d942bd50c5c0b9bd2de1393ce38315ba57c", "installed_by": ["modules"] }, "gffread": { "branch": "master", - "git_sha": "b8858b10356b87db4325341872816f9672541b7b", + "git_sha": "b1b959609bda44341120aed1766329909f54b8d0", "installed_by": ["modules"] }, "gunzip": { @@ -57,22 +52,22 @@ }, "kallistobustools/count": { "branch": "master", - "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "git_sha": "9d3e489286eead7dfe1010fd324904d8b698eca7", "installed_by": ["modules"] }, "kallistobustools/ref": { "branch": "master", - "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "git_sha": "de8215983defba48cd81961d620a9e844f11c7e7", "installed_by": ["modules"] }, "multiqc": { "branch": "master", - "git_sha": "8ec825f465b9c17f9d83000022995b4f7de6fe93", + "git_sha": "b7ebe95761cd389603f9cc0e0dc384c0f663815a", "installed_by": ["modules"] }, "star/genomegenerate": { "branch": "master", - "git_sha": "d87a6e2156c2099c09280fa70776eaf0a824817a", + "git_sha": "a21faa6a3481af92a343a10926f59c189a2c16c9", "installed_by": ["modules"] }, "universc": { @@ -86,6 +81,25 @@ "installed_by": ["modules"] } } + }, + "subworkflows": { + "nf-core": { + "utils_nextflow_pipeline": { + "branch": "master", + "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", + "installed_by": ["subworkflows"] + }, + "utils_nfcore_pipeline": { + "branch": "master", + "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", + "installed_by": ["subworkflows"] + }, + "utils_nfvalidation_plugin": { + "branch": "master", + "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", + "installed_by": ["subworkflows"] + } + } } } } diff --git a/modules/local/concat_h5ad.nf b/modules/local/concat_h5ad.nf index 96920f9e..cd08cbbe 100644 --- a/modules/local/concat_h5ad.nf +++ b/modules/local/concat_h5ad.nf @@ -7,7 +7,7 @@ process CONCAT_H5AD { 'biocontainers/scanpy:1.7.2--pyhdfd78af_0' }" input: - path h5ad + tuple val(input_type), path(h5ad) path samplesheet output: @@ -20,7 +20,7 @@ process CONCAT_H5AD { """ concat_h5ad.py \\ --input $samplesheet \\ - --out combined_matrix.h5ad \\ + --out combined_${input_type}_matrix.h5ad \\ --suffix "_matrix.h5ad" """ diff --git a/modules/local/emptydrops.nf b/modules/local/emptydrops.nf new file mode 100644 index 00000000..e0b77435 --- /dev/null +++ b/modules/local/emptydrops.nf @@ -0,0 +1,101 @@ +process EMPTYDROPS_CELL_CALLING { + tag "$meta.id" + label 'process_medium' + + conda "bioconda::bioconductor-dropletutils" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bioconductor-dropletutils:1.18.0--r42hf17093f_1' : + 'quay.io/biocontainers/bioconductor-dropletutils:1.18.0--r42hf17093f_1' }" + + input: + // inputs from cellranger nf-core module does not come in a single sample dir + // for each sample, the sub-folders and files come directly in array. + tuple val(meta), path(inputs) + + output: + tuple val(meta), path("emptydrops_filtered"), emit: filtered_matrices + + when: + task.ext.when == null || task.ext.when + + script: + if (params.aligner == "cellranger") { + + matrix = "matrix.mtx.gz" + barcodes = "barcodes.tsv.gz" + features = "features.tsv.gz" + + } else if (params.aligner == "kallisto") { + + matrix = "counts_unfiltered/*.mtx" + barcodes = "counts_unfiltered/*.barcodes.txt" + features = "counts_unfiltered/*.genes.names.txt" + + // kallisto allows the following workflows: ["standard", "lamanno", "nac"] + // lamanno creates "spliced" and "unspliced" + // nac creates "nascent", "ambiguous" "mature" + // also, lamanno produces a barcodes and genes file for both spliced and unspliced + // while nac keep only one for all the different .mtx files produced + kb_non_standard_files = "" + if (params.kb_workflow == "lamanno") { + kb_non_standard_files = "spliced unspliced" + matrix = "counts_unfiltered/\${input_type}.mtx" + barcodes = "counts_unfiltered/\${input_type}.barcodes.txt" + features = "counts_unfiltered/\${input_type}.genes.txt" + } + if (params.kb_workflow == "nac") { + kb_non_standard_files = "nascent ambiguous mature" + matrix = "counts_unfiltered/*\${input_type}.mtx" + features = "counts_unfiltered/*.genes.txt" + } // barcodes tsv has same pattern as standard workflow + + } else if (params.aligner == "alevin") { + + matrix = "*_alevin_results/af_quant/alevin/quants_mat.mtx" + barcodes = "*_alevin_results/af_quant/alevin/quants_mat_rows.txt" + features = "*_alevin_results/af_quant/alevin/quants_mat_cols.txt" + + } else if (params.aligner == 'star') { + + matrix = "raw/matrix.mtx.gz" + barcodes = "raw/barcodes.tsv.gz" + features = "raw/features.tsv.gz" + + } + + // + // run script + // + if (params.aligner == 'kallisto' && params.kb_workflow != 'standard') + """ + # convert file types + for input_type in ${kb_non_standard_files} ; do + mkdir -p emptydrops_filtered/\${input_type} + emptydrops_cell_calling.R \\ + ${matrix} \\ + ${barcodes} \\ + ${features} \\ + emptydrops_filtered/\${input_type} \\ + ${params.aligner} \\ + 0 + done + """ + + else + """ + mkdir emptydrops_filtered/ + emptydrops_cell_calling.R \\ + $matrix \\ + $barcodes \\ + $features \\ + emptydrops_filtered \\ + ${params.aligner} \\ + 0 + """ + + stub: + """ + mkdir emptydrops_filtered + touch emptydrops_filtered/empty_file + """ +} diff --git a/modules/local/gene_map.nf b/modules/local/gene_map.nf deleted file mode 100644 index 9fd29e0a..00000000 --- a/modules/local/gene_map.nf +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Reformat design file and check validity - */ -process GENE_MAP { - tag "$gtf" - label 'process_low' - - conda "conda-forge::python=3.8.3" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/python:3.8.3' : - 'biocontainers/python:3.8.3' }" - - input: - path gtf - - output: - path "transcripts_to_genes.txt" , emit: gene_map - - when: - task.ext.when == null || task.ext.when - - script: - if("${gtf}".endsWith('.gz')){ - name = "${gtf.baseName}" - unzip = "gunzip -f ${gtf}" - } else { - unzip = "" - name = "${gtf}" - } - """ - $unzip - cat $name | t2g.py --use_version > transcripts_to_genes.txt - """ -} diff --git a/modules/local/mtx_to_h5ad.nf b/modules/local/mtx_to_h5ad.nf index 84d98608..ba8a807e 100644 --- a/modules/local/mtx_to_h5ad.nf +++ b/modules/local/mtx_to_h5ad.nf @@ -15,52 +15,100 @@ process MTX_TO_H5AD { path star_index output: - path "${meta.id}/*h5ad", emit: h5ad - path "${meta.id}/*", emit: counts - path "versions.yml", emit: versions + tuple val(input_type), path("${meta.id}/*h5ad") , emit: h5ad + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when script: - // def file paths for aligners (except cellranger) - if (params.aligner == 'kallisto') { - mtx_matrix = "*count/counts_unfiltered/*.mtx" - barcodes_tsv = "*count/counts_unfiltered/*.barcodes.txt" - features_tsv = "*count/counts_unfiltered/*.genes.txt" + // Get a file to check input type. Some aligners bring arrays instead of a single file. + def input_to_check = (inputs instanceof String) ? inputs : inputs[0] + + // check input type of inputs + input_type = (input_to_check.toUriString().contains('unfiltered') || input_to_check.toUriString().contains('raw')) ? 'raw' : 'filtered' + if ( params.aligner == 'alevin' ) { input_type = 'raw' } // alevin has its own filtering methods and mostly output a single mtx, raw here means, the base tool output + if (input_to_check.toUriString().contains('emptydrops')) { input_type = 'custom_emptydrops_filter' } + + // def file paths for aligners. Cellranger is normally converted with the .h5 files + // However, the emptydrops call, always generate .mtx files, thus, cellranger 'emptydrops' required a parsing + if (params.aligner in [ 'cellranger', 'cellrangerarc' ] && input_type == 'custom_emptydrops_filter') { + + aligner = 'cellranger' + txp2gene = '' + star_index = '' + mtx_matrix = "emptydrops_filtered/matrix.mtx" + barcodes_tsv = "emptydrops_filtered/barcodes.tsv" + features_tsv = "emptydrops_filtered/features.tsv" + + } else if (params.aligner == 'kallisto') { + + kb_pattern = (input_type == 'raw') ? 'un' : '' + mtx_dir = (input_type == 'custom_emptydrops_filter') ? 'emptydrops_filtered' : "counts_${kb_pattern}filtered" + if ((input_type == 'custom_emptydrops_filter') && (params.kb_workflow != 'standard')) { mtx_dir = 'emptydrops_filtered/\${input_type}' } // dir has subdirs for non-standard workflows + mtx_matrix = "${mtx_dir}/*.mtx" + barcodes_tsv = "${mtx_dir}/*.barcodes.txt" + features_tsv = "${mtx_dir}/*.genes.names.txt" + + // kallisto allows the following workflows: ["standard", "lamanno", "nac"] + // lamanno creates "spliced" and "unspliced" + // nac creates "nascent", "ambiguous" "mature" + // also, lamanno produces a barcodes and genes file for both spliced and unspliced + // while nac keep only one for all the different .mtx files produced + kb_non_standard_files = "" + if (params.kb_workflow == "lamanno") { + kb_non_standard_files = "spliced unspliced" + matrix = "${mtx_dir}/\${input_type}.mtx" + barcodes_tsv = "${mtx_dir}/\${input_type}.barcodes.txt" + features_tsv = "${mtx_dir}/\${input_type}.genes.txt" + } + if (params.kb_workflow == "nac") { + kb_non_standard_files = "nascent ambiguous mature" + matrix = "${mtx_dir}/*\${input_type}.mtx" + features_tsv = "${mtx_dir}/*.genes.txt" + } // barcodes tsv has same pattern as standard workflow + } else if (params.aligner == 'alevin') { - mtx_matrix = "*_alevin_results/af_quant/alevin/quants_mat.mtx" - barcodes_tsv = "*_alevin_results/af_quant/alevin/quants_mat_rows.txt" - features_tsv = "*_alevin_results/af_quant/alevin/quants_mat_cols.txt" + + // alevin does not have filtered/unfiltered results + mtx_dir = (input_type == 'custom_emptydrops_filter') ? 'emptydrops_filtered' : '*_alevin_results/af_quant/alevin' + mtx_matrix = "${mtx_dir}/quants_mat.mtx" + barcodes_tsv = "${mtx_dir}/quants_mat_rows.txt" + features_tsv = "${mtx_dir}/quants_mat_cols.txt" + } else if (params.aligner == 'star') { - mtx_matrix = "*.Solo.out/Gene*/filtered/matrix.mtx.gz" - barcodes_tsv = "*.Solo.out/Gene*/filtered/barcodes.tsv.gz" - features_tsv = "*.Solo.out/Gene*/filtered/features.tsv.gz" + + mtx_dir = (input_type == 'custom_emptydrops_filter') ? 'emptydrops_filtered' : "${input_type}" + suffix = (input_type == 'custom_emptydrops_filter') ? '' : '.gz' + mtx_matrix = "${mtx_dir}/matrix.mtx${suffix}" + barcodes_tsv = "${mtx_dir}/barcodes.tsv${suffix}" + features_tsv = "${mtx_dir}/features.tsv${suffix}" + } // // run script // - if (params.aligner in [ 'cellranger', 'cellrangerarc' ]) + if (params.aligner in [ 'cellranger', 'cellrangerarc' ] && input_type != 'custom_emptydrops_filter') """ # convert file types mtx_to_h5ad.py \\ --aligner cellranger \\ - --input filtered_feature_bc_matrix.h5 \\ + --input ${input_type}_feature_bc_matrix.h5 \\ --sample ${meta.id} \\ - --out ${meta.id}/${meta.id}_matrix.h5ad + --out ${meta.id}/${meta.id}_${input_type}_matrix.h5ad """ else if (params.aligner == 'kallisto' && params.kb_workflow != 'standard') """ # convert file types - for input_type in spliced unspliced ; do + for input_type in ${kb_non_standard_files} ; do mtx_to_h5ad.py \\ --aligner ${params.aligner} \\ --sample ${meta.id} \\ - --input *count/counts_unfiltered/\${input_type}.mtx \\ - --barcode *count/counts_unfiltered/\${input_type}.barcodes.txt \\ - --feature *count/counts_unfiltered/\${input_type}.genes.txt \\ + --input ${matrix} \\ + --barcode ${barcodes_tsv} \\ + --feature ${features_tsv} \\ --txp2gene ${txp2gene} \\ --star_index ${star_index} \\ --out ${meta.id}/${meta.id}_\${input_type}_matrix.h5ad ; @@ -79,7 +127,7 @@ process MTX_TO_H5AD { --feature $features_tsv \\ --txp2gene ${txp2gene} \\ --star_index ${star_index} \\ - --out ${meta.id}/${meta.id}_matrix.h5ad + --out ${meta.id}/${meta.id}_${input_type}_matrix.h5ad """ stub: diff --git a/modules/local/mtx_to_seurat.nf b/modules/local/mtx_to_seurat.nf index d83575a4..3ba636ff 100644 --- a/modules/local/mtx_to_seurat.nf +++ b/modules/local/mtx_to_seurat.nf @@ -19,23 +19,76 @@ process MTX_TO_SEURAT { script: def aligner = params.aligner + + + // Get a file to check input type. Some aligners bring arrays instead of a single file. + def input_to_check = (inputs instanceof String) ? inputs : inputs[0] + + // check input type of inputs + def is_emptydrops = '0' + input_type = (input_to_check.toUriString().contains('unfiltered') || input_to_check.toUriString().contains('raw')) ? 'raw' : 'filtered' + if ( params.aligner == 'alevin' ) { input_type = 'raw' } // alevin has its own filtering methods and mostly output a single mtx, raw here means, the base tool output + if (input_to_check.toUriString().contains('emptydrops')) { + input_type = 'custom_emptydrops_filter' + is_emptydrops = '--is_emptydrops' + } + + // def file paths for aligners. Cellranger is normally converted with the .h5 files + // However, the emptydrops call, always generate .mtx files, thus, cellranger 'emptydrops' required a parsing if (params.aligner in [ 'cellranger', 'cellrangerarc' ]) { - matrix = "matrix.mtx.gz" - barcodes = "barcodes.tsv.gz" - features = "features.tsv.gz" - } else if (params.aligner == "kallisto") { - matrix = "*count/counts_unfiltered/*.mtx" - barcodes = "*count/counts_unfiltered/*.barcodes.txt" - features = "*count/counts_unfiltered/*.genes.txt" + + mtx_dir = (input_type == 'custom_emptydrops_filter') ? 'emptydrops_filtered/' : '' + matrix = "${mtx_dir}matrix.mtx*" + barcodes = "${mtx_dir}barcodes.tsv*" + features = "${mtx_dir}features.tsv*" + + } else if (params.aligner == 'kallisto') { + + kb_pattern = (input_type == 'raw') ? 'un' : '' + mtx_dir = (input_type == 'custom_emptydrops_filter') ? 'emptydrops_filtered' : "counts_${kb_pattern}filtered" + if ((input_type == 'custom_emptydrops_filter') && (params.kb_workflow != 'standard')) { mtx_dir = 'emptydrops_filtered/\${input_type}' } // dir has subdirs for non-standard workflows + matrix = "${mtx_dir}/*.mtx" + barcodes = "${mtx_dir}/*.barcodes.txt" + features = "${mtx_dir}/*.genes.names.txt" + + // kallisto allows the following workflows: ["standard", "lamanno", "nac"] + // lamanno creates "spliced" and "unspliced" + // nac creates "nascent", "ambiguous" "mature" + // also, lamanno produces a barcodes and genes file for both spliced and unspliced + // while nac keep only one for all the different .mtx files produced + kb_non_standard_files = "" + if (params.kb_workflow == "lamanno") { + kb_non_standard_files = "spliced unspliced" + matrix = "${mtx_dir}/\${input_type}.mtx" + barcodes = "${mtx_dir}/\${input_type}.barcodes.txt" + features = "${mtx_dir}/\${input_type}.genes.txt" + } + if (params.kb_workflow == "nac") { + kb_non_standard_files = "nascent ambiguous mature" + matrix = "${mtx_dir}/*\${input_type}.mtx" + features = "${mtx_dir}/*.genes.txt" + } // barcodes tsv has same pattern as standard workflow + } else if (params.aligner == "alevin") { - matrix = "*_alevin_results/af_quant/alevin/quants_mat.mtx" - barcodes = "*_alevin_results/af_quant/alevin/quants_mat_rows.txt" - features = "*_alevin_results/af_quant/alevin/quants_mat_cols.txt" + + mtx_dir = (input_type == 'custom_emptydrops_filter') ? 'emptydrops_filtered' : '*_alevin_results/af_quant/alevin' + matrix = "${mtx_dir}/quants_mat.mtx" + barcodes = "${mtx_dir}/quants_mat_rows.txt" + features = "${mtx_dir}/quants_mat_cols.txt" + } else if (params.aligner == 'star') { - matrix = "*.Solo.out/Gene*/filtered/matrix.mtx.gz" - barcodes = "*.Solo.out/Gene*/filtered/barcodes.tsv.gz" - features = "*.Solo.out/Gene*/filtered/features.tsv.gz" + + mtx_dir = (input_type == 'custom_emptydrops_filter') ? 'emptydrops_filtered' : "${input_type}" + suffix = (input_type == 'custom_emptydrops_filter') ? '' : '.gz' + matrix = "${mtx_dir}/matrix.mtx${suffix}" + barcodes = "${mtx_dir}/barcodes.tsv${suffix}" + features = "${mtx_dir}/features.tsv${suffix}" + } + + // + // run script + // """ mkdir ${meta.id} """ @@ -43,13 +96,14 @@ process MTX_TO_SEURAT { if (params.aligner == 'kallisto' && params.kb_workflow != 'standard') """ # convert file types - for input_type in spliced unspliced ; do + for input_type in ${kb_non_standard_files} ; do mtx_to_seurat.R \\ - *count/counts_unfiltered/\${input_type}.mtx \\ - *count/counts_unfiltered/\${input_type}.barcodes.txt \\ - *count/counts_unfiltered/\${input_type}.genes.txt \\ + ${matrix} \\ + ${barcodes} \\ + ${features} \\ ${meta.id}/${meta.id}_\${input_type}_matrix.rds \\ - ${aligner} + ${aligner} \\ + ${is_emptydrops} done """ @@ -59,8 +113,9 @@ process MTX_TO_SEURAT { $matrix \\ $barcodes \\ $features \\ - ${meta.id}/${meta.id}_matrix.rds \\ - ${aligner} + ${meta.id}/${meta.id}_${input_type}_matrix.rds \\ + ${aligner} \\ + ${is_emptydrops} """ stub: diff --git a/modules/local/samplesheet_check.nf b/modules/local/samplesheet_check.nf deleted file mode 100644 index feaf3dfc..00000000 --- a/modules/local/samplesheet_check.nf +++ /dev/null @@ -1,31 +0,0 @@ -process SAMPLESHEET_CHECK { - tag "$samplesheet" - label 'process_low' - - conda "conda-forge::python=3.8.3" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/python:3.8.3' : - 'biocontainers/python:3.8.3' }" - - input: - path samplesheet - - output: - path '*.csv' , emit: csv - path "versions.yml", emit: versions - - when: - task.ext.when == null || task.ext.when - - script: // This script is bundled with the pipeline, in nf-core/scrnaseq/bin/ - """ - check_samplesheet.py \\ - $samplesheet \\ - samplesheet.valid.csv - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - python: \$(python --version | sed 's/Python //g') - END_VERSIONS - """ -} diff --git a/modules/local/star_align.nf b/modules/local/star_align.nf index a7dfab3d..4b3df1e1 100644 --- a/modules/local/star_align.nf +++ b/modules/local/star_align.nf @@ -21,12 +21,14 @@ process STAR_ALIGN { val other_10x_parameters output: - tuple val(meta), path('*d.out.bam') , emit: bam - tuple val(meta), path('*.Solo.out') , emit: counts - tuple val(meta), path('*Log.final.out') , emit: log_final - tuple val(meta), path('*Log.out') , emit: log_out - tuple val(meta), path('*Log.progress.out'), emit: log_progress - path "versions.yml" , emit: versions + tuple val(meta), path('*d.out.bam') , emit: bam + tuple val(meta), path('*.Solo.out') , emit: counts + tuple val(meta), path ("*.Solo.out/Gene*/raw") , emit: raw_counts + tuple val(meta), path ("*.Solo.out/Gene*/filtered"), emit: filtered_counts + tuple val(meta), path('*Log.final.out') , emit: log_final + tuple val(meta), path('*Log.out') , emit: log_out + tuple val(meta), path('*Log.progress.out') , emit: log_progress + path "versions.yml" , emit: versions tuple val(meta), path('*sortedByCoord.out.bam') , optional:true, emit: bam_sorted tuple val(meta), path('*toTranscriptome.out.bam'), optional:true, emit: bam_transcript diff --git a/modules/nf-core/cellranger/count/environment.yml b/modules/nf-core/cellranger/count/environment.yml deleted file mode 100644 index 662f747d..00000000 --- a/modules/nf-core/cellranger/count/environment.yml +++ /dev/null @@ -1,5 +0,0 @@ -name: cellranger_count -channels: - - conda-forge - - bioconda - - defaults diff --git a/modules/nf-core/cellranger/count/main.nf b/modules/nf-core/cellranger/count/main.nf index d7a191fc..cf94615b 100644 --- a/modules/nf-core/cellranger/count/main.nf +++ b/modules/nf-core/cellranger/count/main.nf @@ -2,7 +2,7 @@ process CELLRANGER_COUNT { tag "$meta.id" label 'process_high' - container "nf-core/cellranger:7.1.0" + container "nf-core/cellranger:8.0.0" input: tuple val(meta), path(reads, stageAs: "fastq_???/*") @@ -32,7 +32,7 @@ process CELLRANGER_COUNT { def prefix = task.ext.prefix ?: "${meta.id}" """ mkdir -p "${prefix}/outs/" - touch ${prefix}/outs/fake_file.txt + echo "$prefix" > ${prefix}/outs/fake_file.txt cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/cellranger/count/meta.yml b/modules/nf-core/cellranger/count/meta.yml index a672180e..1f1768a8 100644 --- a/modules/nf-core/cellranger/count/meta.yml +++ b/modules/nf-core/cellranger/count/meta.yml @@ -10,7 +10,8 @@ tools: homepage: https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/what-is-cell-ranger documentation: https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/using/tutorial_ov tool_dev_url: https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/using/tutorial_ov - licence: 10x Genomics EULA + licence: + - 10x Genomics EULA input: - meta: type: map @@ -45,7 +46,7 @@ output: pattern: "versions.yml" authors: - "@ggabernet" - - "@Emiller88" + - "@edmundmiller" maintainers: - "@ggabernet" - - "@Emiller88" + - "@edmundmiller" diff --git a/modules/nf-core/cellranger/count/templates/cellranger_count.py b/modules/nf-core/cellranger/count/templates/cellranger_count.py index 4bfb9f4f..53360f23 100644 --- a/modules/nf-core/cellranger/count/templates/cellranger_count.py +++ b/modules/nf-core/cellranger/count/templates/cellranger_count.py @@ -34,11 +34,11 @@ def chunk_iter(seq, size): # Match R1 in the filename, but only if it is followed by a non-digit or non-character # match "file_R1.fastq.gz", "file.R1_000.fastq.gz", etc. but # do not match "SRR12345", "file_INFIXR12", etc -filename_pattern = r"([^a-zA-Z0-9])R1([^a-zA-Z0-9])" +filename_pattern = r'([^a-zA-Z0-9])R1([^a-zA-Z0-9])' -for i, (r1, r2) in enumerate(chunk_iter(fastqs, 2)): +for i, (r1, r2) in enumerate(chunk_iter(fastqs, 2), start=1): # double escapes are required because nextflow processes this python 'template' - if re.sub(filename_pattern, r"\\1R2\\2", r1.name) != r2.name: + if re.sub(filename_pattern, r'\\1R2\\2', r1.name) != r2.name: raise AssertionError( dedent( f"""\ diff --git a/modules/nf-core/cellranger/count/tests/main.nf.test b/modules/nf-core/cellranger/count/tests/main.nf.test new file mode 100644 index 00000000..dc8a58a5 --- /dev/null +++ b/modules/nf-core/cellranger/count/tests/main.nf.test @@ -0,0 +1,102 @@ +nextflow_process { + + name "Test Process CELLRANGER_COUNT" + script "../main.nf" + config "./nextflow.config" + process "CELLRANGER_COUNT" + + tag "modules" + tag "modules_nfcore" + tag "cellranger" + tag "cellranger/count" + tag "cellranger/mkref" + tag "cellranger/mkgtf" + + setup { + config "./nextflow.config" + + run("CELLRANGER_MKGTF") { + script "../../mkgtf/main.nf" + process { + """ + input[0] = file(params.test_data['homo_sapiens']['genome']['genome_gtf'], checkIfExists: true) + """ + } + } + run("CELLRANGER_MKREF") { + script "../../mkref/main.nf" + process { + """ + input[0] = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + input[1] = CELLRANGER_MKGTF.out.gtf + input[2] = "homo_sapiens_reference" + """ + } + } + } + + test("10x example file") { + config "./nextflow.config" + when { + process { + """ + input[0] = [ + [ id:'test_10x', single_end:false, strandedness:'auto' ], // meta map + [ + file(params.test_data['homo_sapiens']['10xgenomics']['cellranger']['test_10x_5k_cmvpos_tcells_gex1_fastq_1_gz'], checkIfExists: true), + file(params.test_data['homo_sapiens']['10xgenomics']['cellranger']['test_10x_5k_cmvpos_tcells_gex1_fastq_2_gz'], checkIfExists: true) + ] + ] + input[1] = CELLRANGER_MKREF.out.reference + """ + } + } + + then { + assertAll( + { assert process.success }, + { + assert snapshot( + process.out.versions, + process.out.outs[0][1].findAll { file(it).name !in [ + 'web_summary.html', // unstable checksum + 'barcodes.tsv.gz' // empty file in test data -> would raise linting error + ]} + ).match() + }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'web_summary.html' }).exists() }, + { assert file(process.out.outs.get(0).get(1).find { file(it).name == 'barcodes.tsv.gz' }).exists() } + ) + } + + } + + test("10x example file - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test_10x', single_end:false, strandedness:'auto' ], // meta map + [ + file(params.test_data['homo_sapiens']['10xgenomics']['cellranger']['test_10x_5k_cmvpos_tcells_gex1_fastq_1_gz'], checkIfExists: true), + file(params.test_data['homo_sapiens']['10xgenomics']['cellranger']['test_10x_5k_cmvpos_tcells_gex1_fastq_2_gz'], checkIfExists: true) + ] + ] + input[1] = CELLRANGER_MKREF.out.reference + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/cellranger/count/tests/main.nf.test.snap b/modules/nf-core/cellranger/count/tests/main.nf.test.snap new file mode 100644 index 00000000..77d7d486 --- /dev/null +++ b/modules/nf-core/cellranger/count/tests/main.nf.test.snap @@ -0,0 +1,63 @@ +{ + "10x example file": { + "content": [ + [ + "versions.yml:md5,3c769adb8bc0f753eb9283b89f6541dd" + ], + [ + "filtered_feature_bc_matrix.h5:md5,a875919b9d468efef9f846c95a7f53d6", + "features.tsv.gz:md5,9f93621be0bede2b75596ad255607633", + "matrix.mtx.gz:md5,149c4055b85f5235d8b93d9e20dcaf4e", + "metrics_summary.csv:md5,c295fe3ccbb6ce118482964ca9c748d4", + "molecule_info.h5:md5,ea14830a187799eac6312181d05530f4", + "possorted_genome_bam.bam:md5,d249eff0cb6eebcfdb46342071ea8388", + "possorted_genome_bam.bam.bai:md5,223db5969de6cc2603eaa8fd0d58f766", + "raw_feature_bc_matrix.h5:md5,c34335a1aadd0d8cb6c770034c658889", + "features.tsv.gz:md5,9f93621be0bede2b75596ad255607633", + "matrix.mtx.gz:md5,6395f3c1ec7700b0c6a72e432830ffc1" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-22T08:17:11.168345294" + }, + "10x example file - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test_10x", + "single_end": false, + "strandedness": "auto" + }, + "fake_file.txt:md5,0d98223c768861fd6af96f00148dbb8d" + ] + ], + "1": [ + "versions.yml:md5,443f4d697e6c549da3d3f3cd0e55b55c" + ], + "outs": [ + [ + { + "id": "test_10x", + "single_end": false, + "strandedness": "auto" + }, + "fake_file.txt:md5,0d98223c768861fd6af96f00148dbb8d" + ] + ], + "versions": [ + "versions.yml:md5,443f4d697e6c549da3d3f3cd0e55b55c" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-22T08:15:18.556883641" + } +} \ No newline at end of file diff --git a/modules/nf-core/cellranger/count/tests/nextflow.config b/modules/nf-core/cellranger/count/tests/nextflow.config new file mode 100644 index 00000000..2d06e957 --- /dev/null +++ b/modules/nf-core/cellranger/count/tests/nextflow.config @@ -0,0 +1,24 @@ +process { + withName: CELLRANGER_MKGTF { + ext.args = '--attribute=gene_biotype:protein_coding \ + --attribute=gene_biotype:lincRNA \ + --attribute=gene_biotype:antisense \ + --attribute=gene_biotype:IG_LV_gene \ + --attribute=gene_biotype:IG_V_gene \ + --attribute=gene_biotype:IG_V_pseudogene \ + --attribute=gene_biotype:IG_D_gene \ + --attribute=gene_biotype:IG_J_gene \ + --attribute=gene_biotype:IG_J_pseudogene \ + --attribute=gene_biotype:IG_C_gene \ + --attribute=gene_biotype:IG_C_pseudogene \ + --attribute=gene_biotype:TR_V_gene \ + --attribute=gene_biotype:TR_V_pseudogene \ + --attribute=gene_biotype:TR_D_gene \ + --attribute=gene_biotype:TR_J_gene \ + --attribute=gene_biotype:TR_J_pseudogene \ + --attribute=gene_biotype:TR_C_gene' + } + withName: CELLRANGER_COUNT { + ext.args = '--chemistry SC3Pv3 --create-bam true' + } +} diff --git a/modules/nf-core/cellranger/count/tests/tags.yml b/modules/nf-core/cellranger/count/tests/tags.yml new file mode 100644 index 00000000..a29a7044 --- /dev/null +++ b/modules/nf-core/cellranger/count/tests/tags.yml @@ -0,0 +1,2 @@ +cellranger/count: + - "modules/nf-core/cellranger/count/**" diff --git a/modules/nf-core/cellranger/mkgtf/environment.yml b/modules/nf-core/cellranger/mkgtf/environment.yml deleted file mode 100644 index c81ef3e4..00000000 --- a/modules/nf-core/cellranger/mkgtf/environment.yml +++ /dev/null @@ -1,5 +0,0 @@ -name: cellranger_mkgtf -channels: - - conda-forge - - bioconda - - defaults diff --git a/modules/nf-core/cellranger/mkgtf/main.nf b/modules/nf-core/cellranger/mkgtf/main.nf index e0b0dd67..6817a4dd 100644 --- a/modules/nf-core/cellranger/mkgtf/main.nf +++ b/modules/nf-core/cellranger/mkgtf/main.nf @@ -2,7 +2,7 @@ process CELLRANGER_MKGTF { tag "$gtf" label 'process_low' - container "nf-core/cellranger:7.1.0" + container "nf-core/cellranger:8.0.0" input: path gtf @@ -33,4 +33,20 @@ process CELLRANGER_MKGTF { cellranger: \$(echo \$( cellranger --version 2>&1) | sed 's/^.*[^0-9]\\([0-9]*\\.[0-9]*\\.[0-9]*\\).*\$/\\1/' ) END_VERSIONS """ + + stub: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "CELLRANGER_MKGTF module does not support Conda. Please use Docker / Singularity / Podman instead." + } + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${gtf.baseName}.filtered" + """ + touch ${prefix}.gtf + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cellranger: \$(echo \$( cellranger --version 2>&1) | sed 's/^.*[^0-9]\\([0-9]*\\.[0-9]*\\.[0-9]*\\).*\$/\\1/' ) + END_VERSIONS + """ } diff --git a/modules/nf-core/cellranger/mkgtf/meta.yml b/modules/nf-core/cellranger/mkgtf/meta.yml index 7ec0e0a3..282fcff0 100644 --- a/modules/nf-core/cellranger/mkgtf/meta.yml +++ b/modules/nf-core/cellranger/mkgtf/meta.yml @@ -10,7 +10,7 @@ tools: homepage: https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/what-is-cell-ranger documentation: https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/using/tutorial_ov tool_dev_url: https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/using/tutorial_ov - licence: 10x Genomics EULA + licence: [10X Genomics EULA] input: - gtf: type: file @@ -27,7 +27,7 @@ output: pattern: "versions.yml" authors: - "@ggabernet" - - "@Emiller88" + - "@edmundmiller" maintainers: - "@ggabernet" - - "@Emiller88" + - "@edmundmiller" diff --git a/modules/nf-core/cellranger/mkgtf/tests/main.nf.test b/modules/nf-core/cellranger/mkgtf/tests/main.nf.test new file mode 100644 index 00000000..bba46b45 --- /dev/null +++ b/modules/nf-core/cellranger/mkgtf/tests/main.nf.test @@ -0,0 +1,53 @@ +// nf-core modules test cellranger/mkgtf +nextflow_process { + + name "Test Process CELLRANGER_MKGTF" + script "../main.nf" + process "CELLRANGER_MKGTF" + + tag "modules" + tag "modules_nfcore" + tag "cellranger" + tag "cellranger/mkgtf" + + test("homo_sapiens - reference") { + + when { + process { + """ + input[0] = file(params.test_data['homo_sapiens']['genome']['genome_gtf'], checkIfExists: true) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("homo_sapiens - reference - stub") { + + options "-stub" + + when { + process { + """ + input[0] = file(params.test_data['homo_sapiens']['genome']['genome_gtf'], checkIfExists: true) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/cellranger/mkgtf/tests/main.nf.test.snap b/modules/nf-core/cellranger/mkgtf/tests/main.nf.test.snap new file mode 100644 index 00000000..05bea037 --- /dev/null +++ b/modules/nf-core/cellranger/mkgtf/tests/main.nf.test.snap @@ -0,0 +1,48 @@ +{ + "homo_sapiens - reference": { + "content": [ + { + "0": [ + "genome.filtered.gtf:md5,50fc877b1c53b36b3b413aff88bda48c" + ], + "1": [ + "versions.yml:md5,440fb677a34013b092e10c5daa9916e9" + ], + "gtf": [ + "genome.filtered.gtf:md5,50fc877b1c53b36b3b413aff88bda48c" + ], + "versions": [ + "versions.yml:md5,440fb677a34013b092e10c5daa9916e9" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-22T08:51:17.576906345" + }, + "homo_sapiens - reference - stub": { + "content": [ + { + "0": [ + "genome.filtered.gtf:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "1": [ + "versions.yml:md5,440fb677a34013b092e10c5daa9916e9" + ], + "gtf": [ + "genome.filtered.gtf:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "versions": [ + "versions.yml:md5,440fb677a34013b092e10c5daa9916e9" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-22T08:51:23.23741543" + } +} \ No newline at end of file diff --git a/modules/nf-core/cellranger/mkgtf/tests/nextflow.config b/modules/nf-core/cellranger/mkgtf/tests/nextflow.config new file mode 100644 index 00000000..4d971a1e --- /dev/null +++ b/modules/nf-core/cellranger/mkgtf/tests/nextflow.config @@ -0,0 +1,21 @@ +process { + withName: CELLRANGER_MKGTF { + ext.args = '--attribute=gene_biotype:protein_coding \ + --attribute=gene_biotype:lincRNA \ + --attribute=gene_biotype:antisense \ + --attribute=gene_biotype:IG_LV_gene \ + --attribute=gene_biotype:IG_V_gene \ + --attribute=gene_biotype:IG_V_pseudogene \ + --attribute=gene_biotype:IG_D_gene \ + --attribute=gene_biotype:IG_J_gene \ + --attribute=gene_biotype:IG_J_pseudogene \ + --attribute=gene_biotype:IG_C_gene \ + --attribute=gene_biotype:IG_C_pseudogene \ + --attribute=gene_biotype:TR_V_gene \ + --attribute=gene_biotype:TR_V_pseudogene \ + --attribute=gene_biotype:TR_D_gene \ + --attribute=gene_biotype:TR_J_gene \ + --attribute=gene_biotype:TR_J_pseudogene \ + --attribute=gene_biotype:TR_C_gene' + } +} diff --git a/modules/nf-core/cellranger/mkgtf/tests/tags.yml b/modules/nf-core/cellranger/mkgtf/tests/tags.yml new file mode 100644 index 00000000..7c95d529 --- /dev/null +++ b/modules/nf-core/cellranger/mkgtf/tests/tags.yml @@ -0,0 +1,2 @@ +cellranger/mkgtf: + - "modules/nf-core/cellranger/mkgtf/**" diff --git a/modules/nf-core/cellranger/mkref/environment.yml b/modules/nf-core/cellranger/mkref/environment.yml deleted file mode 100644 index 9ca3e88c..00000000 --- a/modules/nf-core/cellranger/mkref/environment.yml +++ /dev/null @@ -1,5 +0,0 @@ -name: cellranger_mkref -channels: - - conda-forge - - bioconda - - defaults diff --git a/modules/nf-core/cellranger/mkref/main.nf b/modules/nf-core/cellranger/mkref/main.nf index 986891b8..4325c308 100644 --- a/modules/nf-core/cellranger/mkref/main.nf +++ b/modules/nf-core/cellranger/mkref/main.nf @@ -2,7 +2,7 @@ process CELLRANGER_MKREF { tag "$fasta" label 'process_high' - container "nf-core/cellranger:7.1.0" + container "nf-core/cellranger:8.0.0" input: path fasta @@ -28,6 +28,8 @@ process CELLRANGER_MKREF { --genome=$reference_name \\ --fasta=$fasta \\ --genes=$gtf \\ + --localcores=${task.cpus} \\ + --localmem=${task.memory.toGiga()} \\ $args cat <<-END_VERSIONS > versions.yml @@ -35,4 +37,21 @@ process CELLRANGER_MKREF { cellranger: \$(echo \$( cellranger --version 2>&1) | sed 's/^.*[^0-9]\\([0-9]*\\.[0-9]*\\.[0-9]*\\).*\$/\\1/' ) END_VERSIONS """ + + stub: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "CELLRANGER_MKREF module does not support Conda. Please use Docker / Singularity / Podman instead." + } + def args = task.ext.args ?: '' + """ + mkdir $reference_name + touch ${reference_name}/empty_file + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cellranger: \$(echo \$( cellranger --version 2>&1) | sed 's/^.*[^0-9]\\([0-9]*\\.[0-9]*\\.[0-9]*\\).*\$/\\1/' ) + END_VERSIONS + """ + } diff --git a/modules/nf-core/cellranger/mkref/meta.yml b/modules/nf-core/cellranger/mkref/meta.yml index 4cd9091c..bd8ad5a6 100644 --- a/modules/nf-core/cellranger/mkref/meta.yml +++ b/modules/nf-core/cellranger/mkref/meta.yml @@ -10,7 +10,7 @@ tools: homepage: https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/what-is-cell-ranger documentation: https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/using/tutorial_ov tool_dev_url: https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/using/tutorial_ov - licence: 10x Genomics EULA + licence: [10X Genomics EULA] input: - fasta: type: file diff --git a/modules/nf-core/cellranger/mkref/tests/main.nf.test b/modules/nf-core/cellranger/mkref/tests/main.nf.test new file mode 100644 index 00000000..5282f3f6 --- /dev/null +++ b/modules/nf-core/cellranger/mkref/tests/main.nf.test @@ -0,0 +1,72 @@ +// nf-core modules test cellranger/mkref +nextflow_process { + + name "Test Process CELLRANGER_MKREF" + script "../main.nf" + config "./nextflow.config" + process "CELLRANGER_MKREF" + + tag "modules" + tag "modules_nfcore" + tag "cellranger" + tag "cellranger/mkref" + + test("homo_sapiens - reference") { + + when { + process { + """ + input[0] = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + input[1] = file(params.test_data['homo_sapiens']['genome']['genome_gtf'], checkIfExists: true) + input[2] = "homo_sapiens_reference" + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + path( "${outputDir}/homo_sapiens_reference/fasta" ), + path( "${outputDir}/homo_sapiens_reference/genes" ), + path( "${outputDir}/homo_sapiens_reference/reference.json" ), + path( "${outputDir}/homo_sapiens_reference/star/SA" ), + path( "${outputDir}/homo_sapiens_reference/star/SAindex" ), + path( "${outputDir}/homo_sapiens_reference/star/Genome" ), + path( "${outputDir}/homo_sapiens_reference/star/geneInfo.tab" ), + path( "${outputDir}/homo_sapiens_reference/star/exonGeTrInfo.tab" ), + path( "${outputDir}/homo_sapiens_reference/star/exonInfo.tab" ), + path( "${outputDir}/homo_sapiens_reference/star/sjdbList.out.tab" ) + ).match(), + { assert snapshot(process.out.versions).match("versions") } + } + ) + } + + } + + test("homo_sapiens - reference - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [] + input[1] = [] + input[2] = "homo_sapiens_reference" + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.reference).match() }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + + } + +} diff --git a/modules/nf-core/cellranger/mkref/tests/main.nf.test.snap b/modules/nf-core/cellranger/mkref/tests/main.nf.test.snap new file mode 100644 index 00000000..046fefbd --- /dev/null +++ b/modules/nf-core/cellranger/mkref/tests/main.nf.test.snap @@ -0,0 +1,52 @@ +{ + "homo_sapiens - reference": { + "content": [ + [ + "genome.fa:md5,f315020d899597c1b57e5fe9f60f4c3e", + "genome.fa.fai:md5,3520cd30e1b100e55f578db9c855f685" + ], + [ + "genes.gtf.gz:md5,50fc877b1c53b36b3b413aff88bda48c" + ], + "reference.json:md5,106d79d334779a0d675aa0a2623bea97", + "SA:md5,bcf3e1a855783105150b46c905465333", + "SAindex:md5,b93fb07d342e6c32a00ebc4311c0ad38", + "Genome:md5,22102926fadf5890e905ca71b2da3f35", + "geneInfo.tab:md5,8b608537307443ffaee4927d2b428805", + "exonGeTrInfo.tab:md5,72d4dd88d25e3c5b0bb72b12b4ac99a8", + "exonInfo.tab:md5,0d560290fab688b7268d88d5494bf9fe", + "sjdbList.out.tab:md5,9e4f991abbbfeb3935a2bb21b9e258f1" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-25T09:41:24.407740353" + }, + "homo_sapiens - reference - stub": { + "content": [ + [ + [ + "empty_file:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-25T09:41:46.135604587" + }, + "versions": { + "content": [ + [ + "versions.yml:md5,e9d88519c551d8ccbbdc6bd2673fc41f" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-25T09:41:46.149227444" + } +} \ No newline at end of file diff --git a/modules/nf-core/cellranger/mkref/tests/nextflow.config b/modules/nf-core/cellranger/mkref/tests/nextflow.config new file mode 100644 index 00000000..f10f7974 --- /dev/null +++ b/modules/nf-core/cellranger/mkref/tests/nextflow.config @@ -0,0 +1,6 @@ +process { + withName: CELLRANGER_MKREF { + publishDir = [ path: { "output" } ] + } +} + diff --git a/modules/nf-core/cellranger/mkref/tests/tags.yml b/modules/nf-core/cellranger/mkref/tests/tags.yml new file mode 100644 index 00000000..62b5d963 --- /dev/null +++ b/modules/nf-core/cellranger/mkref/tests/tags.yml @@ -0,0 +1,2 @@ +cellranger/mkref: + - "modules/nf-core/cellranger/mkref/**" diff --git a/modules/nf-core/cellrangerarc/count/main.nf b/modules/nf-core/cellrangerarc/count/main.nf index 2bf0193a..e5ce9225 100644 --- a/modules/nf-core/cellrangerarc/count/main.nf +++ b/modules/nf-core/cellrangerarc/count/main.nf @@ -14,9 +14,9 @@ process CELLRANGERARC_COUNT { path reference output: - tuple val(meta), path("${meta.id}/outs/*"), emit: outs - path("${meta.id}_lib.csv") , emit: lib - path "versions.yml" , emit: versions + tuple val(meta), path("${meta.id}/outs/**"), emit: outs + path("${meta.id}_lib.csv") , emit: lib + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when diff --git a/modules/nf-core/cellrangerarc/count/meta.yml b/modules/nf-core/cellrangerarc/count/meta.yml index 919de4dc..8bc73c6f 100644 --- a/modules/nf-core/cellrangerarc/count/meta.yml +++ b/modules/nf-core/cellrangerarc/count/meta.yml @@ -36,5 +36,5 @@ output: pattern: "versions.yml" authors: - "@ggabernet" - - "@Emiller88" + - "@edmundmiller" - "@heylf" diff --git a/modules/nf-core/cellrangerarc/mkgtf/meta.yml b/modules/nf-core/cellrangerarc/mkgtf/meta.yml index 923c3e18..5fadc3ef 100644 --- a/modules/nf-core/cellrangerarc/mkgtf/meta.yml +++ b/modules/nf-core/cellrangerarc/mkgtf/meta.yml @@ -28,5 +28,5 @@ output: pattern: "versions.yml" authors: - "@ggabernet" - - "@Emiller88" + - "@edmundmiller" - "@heylf" diff --git a/modules/nf-core/custom/dumpsoftwareversions/environment.yml b/modules/nf-core/custom/dumpsoftwareversions/environment.yml deleted file mode 100644 index 9b3272bc..00000000 --- a/modules/nf-core/custom/dumpsoftwareversions/environment.yml +++ /dev/null @@ -1,7 +0,0 @@ -name: custom_dumpsoftwareversions -channels: - - conda-forge - - bioconda - - defaults -dependencies: - - bioconda::multiqc=1.19 diff --git a/modules/nf-core/custom/dumpsoftwareversions/main.nf b/modules/nf-core/custom/dumpsoftwareversions/main.nf deleted file mode 100644 index f2187611..00000000 --- a/modules/nf-core/custom/dumpsoftwareversions/main.nf +++ /dev/null @@ -1,24 +0,0 @@ -process CUSTOM_DUMPSOFTWAREVERSIONS { - label 'process_single' - - // Requires `pyyaml` which does not have a dedicated container but is in the MultiQC container - conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.19--pyhdfd78af_0' : - 'biocontainers/multiqc:1.19--pyhdfd78af_0' }" - - input: - path versions - - output: - path "software_versions.yml" , emit: yml - path "software_versions_mqc.yml", emit: mqc_yml - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - template 'dumpsoftwareversions.py' -} diff --git a/modules/nf-core/custom/dumpsoftwareversions/meta.yml b/modules/nf-core/custom/dumpsoftwareversions/meta.yml deleted file mode 100644 index 5f15a5fd..00000000 --- a/modules/nf-core/custom/dumpsoftwareversions/meta.yml +++ /dev/null @@ -1,37 +0,0 @@ -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json -name: custom_dumpsoftwareversions -description: Custom module used to dump software versions within the nf-core pipeline template -keywords: - - custom - - dump - - version -tools: - - custom: - description: Custom module used to dump software versions within the nf-core pipeline template - homepage: https://github.com/nf-core/tools - documentation: https://github.com/nf-core/tools - licence: ["MIT"] -input: - - versions: - type: file - description: YML file containing software versions - pattern: "*.yml" -output: - - yml: - type: file - description: Standard YML file containing software versions - pattern: "software_versions.yml" - - mqc_yml: - type: file - description: MultiQC custom content YML file containing software versions - pattern: "software_versions_mqc.yml" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@drpatelh" - - "@grst" -maintainers: - - "@drpatelh" - - "@grst" diff --git a/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py b/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py deleted file mode 100755 index da033408..00000000 --- a/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py +++ /dev/null @@ -1,101 +0,0 @@ -#!/usr/bin/env python - - -"""Provide functions to merge multiple versions.yml files.""" - - -import yaml -import platform -from textwrap import dedent - - -def _make_versions_html(versions): - """Generate a tabular HTML output of all versions for MultiQC.""" - html = [ - dedent( - """\\ - - - - - - - - - - """ - ) - ] - for process, tmp_versions in sorted(versions.items()): - html.append("") - for i, (tool, version) in enumerate(sorted(tmp_versions.items())): - html.append( - dedent( - f"""\\ - - - - - - """ - ) - ) - html.append("") - html.append("
    Process Name Software Version
    {process if (i == 0) else ''}{tool}{version}
    ") - return "\\n".join(html) - - -def main(): - """Load all version files and generate merged output.""" - versions_this_module = {} - versions_this_module["${task.process}"] = { - "python": platform.python_version(), - "yaml": yaml.__version__, - } - - with open("$versions") as f: - versions_by_process = yaml.load(f, Loader=yaml.BaseLoader) | versions_this_module - - # aggregate versions by the module name (derived from fully-qualified process name) - versions_by_module = {} - for process, process_versions in versions_by_process.items(): - module = process.split(":")[-1] - try: - if versions_by_module[module] != process_versions: - raise AssertionError( - "We assume that software versions are the same between all modules. " - "If you see this error-message it means you discovered an edge-case " - "and should open an issue in nf-core/tools. " - ) - except KeyError: - versions_by_module[module] = process_versions - - versions_by_module["Workflow"] = { - "Nextflow": "$workflow.nextflow.version", - "$workflow.manifest.name": "$workflow.manifest.version", - } - - versions_mqc = { - "id": "software_versions", - "section_name": "${workflow.manifest.name} Software Versions", - "section_href": "https://github.com/${workflow.manifest.name}", - "plot_type": "html", - "description": "are collected at run time from the software output.", - "data": _make_versions_html(versions_by_module), - } - - with open("software_versions.yml", "w") as f: - yaml.dump(versions_by_module, f, default_flow_style=False) - with open("software_versions_mqc.yml", "w") as f: - yaml.dump(versions_mqc, f, default_flow_style=False) - - with open("versions.yml", "w") as f: - yaml.dump(versions_this_module, f, default_flow_style=False) - - -if __name__ == "__main__": - main() diff --git a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test deleted file mode 100644 index b1e1630b..00000000 --- a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test +++ /dev/null @@ -1,43 +0,0 @@ -nextflow_process { - - name "Test Process CUSTOM_DUMPSOFTWAREVERSIONS" - script "../main.nf" - process "CUSTOM_DUMPSOFTWAREVERSIONS" - tag "modules" - tag "modules_nfcore" - tag "custom" - tag "dumpsoftwareversions" - tag "custom/dumpsoftwareversions" - - test("Should run without failures") { - when { - process { - """ - def tool1_version = ''' - TOOL1: - tool1: 0.11.9 - '''.stripIndent() - - def tool2_version = ''' - TOOL2: - tool2: 1.9 - '''.stripIndent() - - input[0] = Channel.of(tool1_version, tool2_version).collectFile() - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot( - process.out.versions, - file(process.out.mqc_yml[0]).readLines()[0..10], - file(process.out.yml[0]).readLines()[0..7] - ).match() - } - ) - } - } -} diff --git a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap deleted file mode 100644 index 5f59a936..00000000 --- a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap +++ /dev/null @@ -1,33 +0,0 @@ -{ - "Should run without failures": { - "content": [ - [ - "versions.yml:md5,76d454d92244589d32455833f7c1ba6d" - ], - [ - "data: \"\\n\\n \\n \\n \\n \\n \\n \\n \\n\\", - " \\n\\n\\n \\n \\n\\", - " \\ \\n\\n\\n\\n \\n \\", - " \\ \\n \\n\\n\\n\\n\\", - " \\n\\n \\n \\n\\", - " \\ \\n\\n\\n\\n\\n\\n \\n\\", - " \\ \\n \\n\\n\\n\\n\\", - " \\n\\n \\n \\n\\" - ], - [ - "CUSTOM_DUMPSOFTWAREVERSIONS:", - " python: 3.11.7", - " yaml: 5.4.1", - "TOOL1:", - " tool1: 0.11.9", - "TOOL2:", - " tool2: '1.9'", - "Workflow:" - ] - ], - "timestamp": "2024-01-09T23:01:18.710682" - } -} \ No newline at end of file diff --git a/modules/nf-core/custom/dumpsoftwareversions/tests/tags.yml b/modules/nf-core/custom/dumpsoftwareversions/tests/tags.yml deleted file mode 100644 index 405aa24a..00000000 --- a/modules/nf-core/custom/dumpsoftwareversions/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -custom/dumpsoftwareversions: - - modules/nf-core/custom/dumpsoftwareversions/** diff --git a/modules/nf-core/fastqc/tests/main.nf.test b/modules/nf-core/fastqc/tests/main.nf.test index ad9bc54f..70edae4d 100644 --- a/modules/nf-core/fastqc/tests/main.nf.test +++ b/modules/nf-core/fastqc/tests/main.nf.test @@ -13,12 +13,10 @@ nextflow_process { when { process { """ - input[0] = [ - [ id: 'test', single_end:true ], - [ - file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) - ] - ] + input[0] = Channel.of([ + [ id: 'test', single_end:true ], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] + ]) """ } } @@ -35,7 +33,7 @@ nextflow_process { { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" }, { assert path(process.out.html[0][1]).text.contains("") }, - { assert snapshot(process.out.versions).match("versions") } + { assert snapshot(process.out.versions).match("fastqc_versions_single") } ) } } @@ -44,15 +42,13 @@ nextflow_process { when { process { - """ - input[0] = [ - [id: 'test', single_end: false], // meta map - [ - file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) - ] - ] - """ + """ + input[0] = Channel.of([ + [id: 'test', single_end: false], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] + ]) + """ } } @@ -67,7 +63,7 @@ nextflow_process { { assert path(process.out.html[0][1][0]).text.contains("") }, { assert path(process.out.html[0][1][1]).text.contains("") }, - { assert snapshot(process.out.versions).match("versions") } + { assert snapshot(process.out.versions).match("fastqc_versions_paired") } ) } } @@ -76,11 +72,11 @@ nextflow_process { when { process { - """ - input[0] = [ - [id: 'test', single_end: false], // meta map - file(params.test_data['sarscov2']['illumina']['test_interleaved_fastq_gz'], checkIfExists: true) - ] + """ + input[0] = Channel.of([ + [id: 'test', single_end: false], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_interleaved.fastq.gz', checkIfExists: true) + ]) """ } } @@ -93,7 +89,7 @@ nextflow_process { { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" }, { assert path(process.out.html[0][1]).text.contains("") }, - { assert snapshot(process.out.versions).match("versions") } + { assert snapshot(process.out.versions).match("fastqc_versions_interleaved") } ) } } @@ -102,12 +98,12 @@ nextflow_process { when { process { - """ - input[0] = [ - [id: 'test', single_end: false], // meta map - file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) - ] - """ + """ + input[0] = Channel.of([ + [id: 'test', single_end: false], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ]) + """ } } @@ -119,7 +115,7 @@ nextflow_process { { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" }, { assert path(process.out.html[0][1]).text.contains("") }, - { assert snapshot(process.out.versions).match("versions") } + { assert snapshot(process.out.versions).match("fastqc_versions_bam") } ) } } @@ -128,17 +124,15 @@ nextflow_process { when { process { - """ - input[0] = [ - [id: 'test', single_end: false], // meta map - [ - file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test2_1_fastq_gz'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test2_2_fastq_gz'], checkIfExists: true) - ] - ] - """ + """ + input[0] = Channel.of([ + [id: 'test', single_end: false], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_2.fastq.gz', checkIfExists: true) ] + ]) + """ } } @@ -159,7 +153,7 @@ nextflow_process { { assert path(process.out.html[0][1][2]).text.contains("") }, { assert path(process.out.html[0][1][3]).text.contains("") }, - { assert snapshot(process.out.versions).match("versions") } + { assert snapshot(process.out.versions).match("fastqc_versions_multiple") } ) } } @@ -168,12 +162,12 @@ nextflow_process { when { process { - """ - input[0] = [ - [ id:'mysample', single_end:true ], // meta map - file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) - ] - """ + """ + input[0] = Channel.of([ + [ id:'mysample', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ]) + """ } } @@ -185,7 +179,7 @@ nextflow_process { { assert process.out.zip[0][1] ==~ ".*/mysample_fastqc.zip" }, { assert path(process.out.html[0][1]).text.contains("") }, - { assert snapshot(process.out.versions).match("versions") } + { assert snapshot(process.out.versions).match("fastqc_versions_custom_prefix") } ) } } @@ -197,12 +191,10 @@ nextflow_process { when { process { """ - input[0] = [ - [ id: 'test', single_end:true ], - [ - file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) - ] - ] + input[0] = Channel.of([ + [ id: 'test', single_end:true ], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] + ]) """ } } @@ -212,7 +204,7 @@ nextflow_process { { assert process.success }, { assert snapshot(process.out.html.collect { file(it[1]).getName() } + process.out.zip.collect { file(it[1]).getName() } + - process.out.versions ).match() } + process.out.versions ).match("fastqc_stub") } ) } } diff --git a/modules/nf-core/fastqc/tests/main.nf.test.snap b/modules/nf-core/fastqc/tests/main.nf.test.snap index 5ef5afbd..86f7c311 100644 --- a/modules/nf-core/fastqc/tests/main.nf.test.snap +++ b/modules/nf-core/fastqc/tests/main.nf.test.snap @@ -1,5 +1,17 @@ { - "sarscov2 single-end [fastq] - stub": { + "fastqc_versions_interleaved": { + "content": [ + [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-31T17:40:07.293713" + }, + "fastqc_stub": { "content": [ [ "test.html", @@ -7,14 +19,70 @@ "versions.yml:md5,e1cc25ca8af856014824abd842e93978" ] ], - "timestamp": "2023-12-29T02:48:05.126117287" + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-31T17:31:01.425198" + }, + "fastqc_versions_multiple": { + "content": [ + [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-31T17:40:55.797907" + }, + "fastqc_versions_bam": { + "content": [ + [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-31T17:40:26.795862" + }, + "fastqc_versions_single": { + "content": [ + [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-31T17:39:27.043675" + }, + "fastqc_versions_paired": { + "content": [ + [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-31T17:39:47.584191" }, - "versions": { + "fastqc_versions_custom_prefix": { "content": [ [ "versions.yml:md5,e1cc25ca8af856014824abd842e93978" ] ], - "timestamp": "2023-12-29T02:46:49.507942667" + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-31T17:41:14.576531" } } \ No newline at end of file diff --git a/modules/nf-core/gffread/meta.yml b/modules/nf-core/gffread/meta.yml index 27ac3105..d38cbcda 100644 --- a/modules/nf-core/gffread/meta.yml +++ b/modules/nf-core/gffread/meta.yml @@ -31,6 +31,6 @@ output: description: File containing software versions pattern: "versions.yml" authors: - - "@emiller88" + - "@edmundmiller" maintainers: - - "@emiller88" + - "@edmundmiller" diff --git a/modules/nf-core/gffread/tests/main.nf.test b/modules/nf-core/gffread/tests/main.nf.test index c4dfbdf4..452aba1b 100644 --- a/modules/nf-core/gffread/tests/main.nf.test +++ b/modules/nf-core/gffread/tests/main.nf.test @@ -25,10 +25,12 @@ nextflow_process { then { assertAll ( - { assert process.success }, - { assert snapshot(process.out).match() }, - { assert process.out.gtf != null }, - { assert process.out.gffread_gff == [] } + { assert process.success }, + { assert snapshot( + process.out.gtf, + process.out.versions + ).match() }, + { assert process.out.gffread_gff == [] } ) } @@ -36,6 +38,8 @@ nextflow_process { test("sarscov2-gff3-gff3") { + config "./nextflow-gff3.config" + when { params { outdir = "$outputDir" @@ -49,10 +53,12 @@ nextflow_process { then { assertAll ( - { assert process.success }, - { assert snapshot(process.out).match() }, - { assert process.out.gtf == [] }, - { assert process.out.gffread_gff != null }, + { assert process.success }, + { assert snapshot( + process.out.gffread_gff, + process.out.versions + ).match() }, + { assert process.out.gtf == [] }, ) } diff --git a/modules/nf-core/gffread/tests/main.nf.test.snap b/modules/nf-core/gffread/tests/main.nf.test.snap index 1f1342e1..00a11a40 100644 --- a/modules/nf-core/gffread/tests/main.nf.test.snap +++ b/modules/nf-core/gffread/tests/main.nf.test.snap @@ -1,52 +1,24 @@ { "sarscov2-gff3-gtf": { "content": [ - { - "0": [ - "genome.gtf:md5,2394072d7d31530dfd590c4a117bf6e3" - ], - "1": [ - - ], - "2": [ - "versions.yml:md5,a71b6cdfa528dd206a238ec64bae13d6" - ], - "gffread_gff": [ - - ], - "gtf": [ - "genome.gtf:md5,2394072d7d31530dfd590c4a117bf6e3" - ], - "versions": [ - "versions.yml:md5,a71b6cdfa528dd206a238ec64bae13d6" - ] - } + [ + "genome.gtf:md5,2394072d7d31530dfd590c4a117bf6e3" + ], + [ + "versions.yml:md5,a71b6cdfa528dd206a238ec64bae13d6" + ] ], - "timestamp": "2023-11-29T15:39:30.006985" + "timestamp": "2024-01-23T20:00:32.688779117" }, "sarscov2-gff3-gff3": { "content": [ - { - "0": [ - - ], - "1": [ - "genome.gffread.gff3:md5,a7d40d99dcddac23ac673c473279ea2d" - ], - "2": [ - "versions.yml:md5,a71b6cdfa528dd206a238ec64bae13d6" - ], - "gffread_gff": [ - "genome.gffread.gff3:md5,a7d40d99dcddac23ac673c473279ea2d" - ], - "gtf": [ - - ], - "versions": [ - "versions.yml:md5,a71b6cdfa528dd206a238ec64bae13d6" - ] - } + [ + "genome.gffread.gff3:md5,a7d40d99dcddac23ac673c473279ea2d" + ], + [ + "versions.yml:md5,a71b6cdfa528dd206a238ec64bae13d6" + ] ], - "timestamp": "2023-11-29T15:39:34.636061" + "timestamp": "2024-01-23T20:07:11.457356625" } } \ No newline at end of file diff --git a/modules/nf-core/gffread/tests/nextflow-gff3.config b/modules/nf-core/gffread/tests/nextflow-gff3.config new file mode 100644 index 00000000..afe0830e --- /dev/null +++ b/modules/nf-core/gffread/tests/nextflow-gff3.config @@ -0,0 +1,5 @@ +process { + withName: GFFREAD { + ext.args = '' + } +} diff --git a/modules/nf-core/kallistobustools/count/environment.yml b/modules/nf-core/kallistobustools/count/environment.yml index 7ff8a2da..024f0afc 100644 --- a/modules/nf-core/kallistobustools/count/environment.yml +++ b/modules/nf-core/kallistobustools/count/environment.yml @@ -4,4 +4,4 @@ channels: - bioconda - defaults dependencies: - - bioconda::kb-python=0.27.2 + - bioconda::kb-python=0.28.2 diff --git a/modules/nf-core/kallistobustools/count/main.nf b/modules/nf-core/kallistobustools/count/main.nf index 036bb35d..1efda00a 100644 --- a/modules/nf-core/kallistobustools/count/main.nf +++ b/modules/nf-core/kallistobustools/count/main.nf @@ -4,8 +4,8 @@ process KALLISTOBUSTOOLS_COUNT { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/kb-python:0.27.2--pyhdfd78af_0' : - 'biocontainers/kb-python:0.27.2--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/kb-python:0.28.2--pyhdfd78af_2' : + 'biocontainers/kb-python:0.28.2--pyhdfd78af_2' }" input: tuple val(meta), path(reads) @@ -14,11 +14,14 @@ process KALLISTOBUSTOOLS_COUNT { path t1c path t2c val technology + val workflow_mode output: - tuple val(meta), path ("*.count"), emit: count - path "versions.yml" , emit: versions - path "*.count/*/*.mtx" , emit: matrix //Ensure that kallisto finished and produced outputs + tuple val(meta), path ("*.count") , emit: count + tuple val(meta), path ("*.count/counts_unfiltered"), emit: raw_counts + tuple val(meta), path ("*.count/counts_filtered") , emit: filtered_counts, optional: true + path "versions.yml" , emit: versions + path "*.count/*/*.mtx" , emit: matrix //Ensure that kallisto finished and produced outputs when: task.ext.when == null || task.ext.when @@ -27,7 +30,7 @@ process KALLISTOBUSTOOLS_COUNT { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" def cdna = t1c ? "-c1 $t1c" : '' - def introns = t2c ? "-c2 $t2c" : '' + def intron = t2c ? "-c2 $t2c" : '' def memory = task.memory.toGiga() - 1 """ kb \\ @@ -36,12 +39,25 @@ process KALLISTOBUSTOOLS_COUNT { -i $index \\ -g $t2g \\ $cdna \\ - $introns \\ + $intron \\ -x $technology \\ + --workflow $workflow_mode \\ $args \\ -o ${prefix}.count \\ - ${reads.join( " " )} \\ - -m ${memory}G + -m ${memory}G \\ + ${reads.join( " " )} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + kallistobustools: \$(echo \$(kb --version 2>&1) | sed 's/^.*kb_python //;s/positional arguments.*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + mkdir -p ${prefix}.count/counts_unfiltered/ + touch ${prefix}.count/counts_unfiltered/cells_x_genes.mtx cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/kallistobustools/count/meta.yml b/modules/nf-core/kallistobustools/count/meta.yml index 7491248c..d491dffa 100644 --- a/modules/nf-core/kallistobustools/count/meta.yml +++ b/modules/nf-core/kallistobustools/count/meta.yml @@ -12,7 +12,7 @@ tools: homepage: https://www.kallistobus.tools/ documentation: https://kb-python.readthedocs.io/en/latest/index.html tool_dev_url: https://github.com/pachterlab/kb_python - licence: MIT License + licence: ["MIT"] input: - meta: type: map @@ -34,16 +34,16 @@ input: pattern: "*t2g.txt" - t1c: type: file - description: kb ref's c1 spliced_t2c file + description: kb ref's c1 cdna_t2c file pattern: "*.{cdna_t2c.txt}" - t2c: type: file - description: kb ref's c2 unspliced_t2c file - pattern: "*.{introns_t2c.txt}" + description: kb ref's c2 intron_t2c file + pattern: "*.{intron_t2c.txt}" - workflow_mode: type: string - description: String value defining workflow to use, can be one of "standard", "lamanno", "nucleus" - pattern: "{standard,lamanno,nucleus,kite}" + description: String value defining workflow to use, can be one of "standard", "nac", "lamanno" (obsolete) + pattern: "{standard,lamanno,nac}" - technology: type: string description: String value defining the sequencing technology used. @@ -58,6 +58,14 @@ output: type: file description: kb count output folder pattern: "*.{count}" + - raw_counts: + type: file + description: kb raw counts output folder + pattern: "*.{count}/counts_unfiltered" + - filtered_counts: + type: file + description: kb filtered counts output folder + pattern: "*.{count}/counts_filtered" - versions: type: file description: File containing software versions diff --git a/modules/nf-core/kallistobustools/count/tests/main.nf.test b/modules/nf-core/kallistobustools/count/tests/main.nf.test new file mode 100644 index 00000000..550001f9 --- /dev/null +++ b/modules/nf-core/kallistobustools/count/tests/main.nf.test @@ -0,0 +1,98 @@ +nextflow_process { + + name "Test Process KALLISTOBUSTOOLS_COUNT" + script "../main.nf" + process "KALLISTOBUSTOOLS_COUNT" + + tag "modules" + tag "modules_nfcore" + tag "kallistobustools" + tag "kallistobustools/count" + tag "kallistobustools/ref" + + setup { + run("KALLISTOBUSTOOLS_REF") { + script "../../ref/main.nf" + process { + """ + input[0] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + input[1] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) + input[2] = "standard" + """ + } + } + } + + test("genome.fasta + genome.gtf + '10X3' + 'standard'") { + + when { + process { + """ + input[0] = Channel.of( + [ + [id:'test'], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/10xgenomics/cellranger/10k_pbmc_cmo/fastqs/gex_1/subsampled_SC3_v3_NextGem_DI_CellPlex_Human_PBMC_10K_1_gex_S2_L001_R1_001.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/10xgenomics/cellranger/10k_pbmc_cmo/fastqs/gex_1/subsampled_SC3_v3_NextGem_DI_CellPlex_Human_PBMC_10K_1_gex_S2_L001_R2_001.fastq.gz', checkIfExists: true) + ] + ] + ) + input[1] = KALLISTOBUSTOOLS_REF.out.index + input[2] = KALLISTOBUSTOOLS_REF.out.t2g + input[3] = KALLISTOBUSTOOLS_REF.out.cdna_t2c.ifEmpty{ [] } // when empty the module doesn't run unless something is passed. + input[4] = KALLISTOBUSTOOLS_REF.out.intron_t2c.ifEmpty{ [] } // when empty the module doesn't run unless something is passed. + input[5] = "10XV3" + input[6] = "standard" + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.versions, + process.out.matrix, + path(process.out.count.get(0).get(1)).list().findAll { file(it.toString()).name != "run_info.json" && file(it.toString()).name != "kb_info.json" }, + file(path(process.out.count.get(0).get(1)).list().find { file(it.toString()).name == "kb_info.json" }.toString()).readLines()[15..22], + file(path(process.out.count.get(0).get(1)).list().find { file(it.toString()).name == "run_info.json" }.toString()).readLines()[0..9] + ).match() + } + ) + } + } + + test("genome.fasta + genome.gtf + '10X3' + 'standard' - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of( + [ + [id:'test'], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/10xgenomics/cellranger/10k_pbmc_cmo/fastqs/gex_1/subsampled_SC3_v3_NextGem_DI_CellPlex_Human_PBMC_10K_1_gex_S2_L001_R1_001.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/10xgenomics/cellranger/10k_pbmc_cmo/fastqs/gex_1/subsampled_SC3_v3_NextGem_DI_CellPlex_Human_PBMC_10K_1_gex_S2_L001_R2_001.fastq.gz', checkIfExists: true) + ] + ] + ) + input[1] = KALLISTOBUSTOOLS_REF.out.index + input[2] = KALLISTOBUSTOOLS_REF.out.t2g + input[3] = KALLISTOBUSTOOLS_REF.out.cdna_t2c.ifEmpty{ [] } // when empty the module doesn't run unless something is passed. + input[4] = KALLISTOBUSTOOLS_REF.out.intron_t2c.ifEmpty{ [] } // when empty the module doesn't run unless something is passed. + input[5] = "10XV3" + input[6] = "standard" + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/kallistobustools/count/tests/main.nf.test.snap b/modules/nf-core/kallistobustools/count/tests/main.nf.test.snap new file mode 100644 index 00000000..6f6b3183 --- /dev/null +++ b/modules/nf-core/kallistobustools/count/tests/main.nf.test.snap @@ -0,0 +1,126 @@ +{ + "genome.fasta + genome.gtf + '10X3' + 'standard' - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + [ + [ + "cells_x_genes.mtx:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ] + ], + "1": [ + [ + { + "id": "test" + }, + [ + "cells_x_genes.mtx:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,6ec06270afe0a7572c41567160d927d9" + ], + "4": [ + "cells_x_genes.mtx:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "count": [ + [ + { + "id": "test" + }, + [ + [ + "cells_x_genes.mtx:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ] + ], + "filtered_counts": [ + + ], + "matrix": [ + "cells_x_genes.mtx:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "raw_counts": [ + [ + { + "id": "test" + }, + [ + "cells_x_genes.mtx:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "versions": [ + "versions.yml:md5,6ec06270afe0a7572c41567160d927d9" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-18T11:38:48.980939376" + }, + "genome.fasta + genome.gtf + '10X3' + 'standard'": { + "content": [ + [ + "versions.yml:md5,6ec06270afe0a7572c41567160d927d9" + ], + [ + "cells_x_genes.mtx:md5,e12a45e7f7d6527f698dd9cb2e99ecd1" + ], + [ + "10x_version3_whitelist.txt:md5,3d36d0a4021fd292b265e2b5e72aaaf3", + [ + "cells_x_genes.barcodes.txt:md5,41f7adaf43b60f2f4f62d6a7073688de", + "cells_x_genes.genes.names.txt:md5,b29afa75be300c7f24fbd0740a66689b", + "cells_x_genes.genes.txt:md5,fe6d5501923867b514a0447aa4b4995f", + "cells_x_genes.mtx:md5,e12a45e7f7d6527f698dd9cb2e99ecd1" + ], + "inspect.json:md5,bafb47a58ac1bbf9be953f21c361d266", + "matrix.ec:md5,31a4c1a3e8e0c562b12f6569ffbf5459", + "output.bus:md5,d6fa0612a4a16eaf8a3e08bdc13ff49c", + "output.unfiltered.bus:md5,bf899b967657f612ba864188868d58cc", + "transcripts.txt:md5,23861cf43033e7c596e6989a88a3a373" + ], + [ + " \"commands\": [", + " \"kallisto bus -i kb_ref_out.idx -o test.count -x 10XV3 -t 2 subsampled_SC3_v3_NextGem_DI_CellPlex_Human_PBMC_10K_1_gex_S2_L001_R1_001.fastq.gz subsampled_SC3_v3_NextGem_DI_CellPlex_Human_PBMC_10K_1_gex_S2_L001_R2_001.fastq.gz\",", + " \"bustools sort -o test.count/tmp/output.s.bus -T test.count/tmp -t 2 -m 2G test.count/output.bus\",", + " \"bustools inspect -o test.count/inspect.json -w test.count/10x_version3_whitelist.txt test.count/tmp/output.s.bus\",", + " \"bustools correct -o test.count/tmp/output.s.c.bus -w test.count/10x_version3_whitelist.txt test.count/tmp/output.s.bus\",", + " \"bustools sort -o test.count/output.unfiltered.bus -T test.count/tmp -t 2 -m 2G test.count/tmp/output.s.c.bus\",", + " \"bustools count -o test.count/counts_unfiltered/cells_x_genes -g t2g.txt -e test.count/matrix.ec -t test.count/transcripts.txt --genecounts --umi-gene test.count/output.unfiltered.bus\"", + " ]," + ], + [ + "{", + "\t\"n_targets\": 12,", + "\t\"n_bootstraps\": 0,", + "\t\"n_processed\": 10000,", + "\t\"n_pseudoaligned\": 26,", + "\t\"n_unique\": 26,", + "\t\"p_pseudoaligned\": 0.3,", + "\t\"p_unique\": 0.3,", + "\t\"kallisto_version\": \"0.50.1\",", + "\t\"index_version\": 13," + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-01T15:48:24.476953631" + } +} \ No newline at end of file diff --git a/modules/nf-core/kallistobustools/count/tests/nextflow.config b/modules/nf-core/kallistobustools/count/tests/nextflow.config new file mode 100644 index 00000000..7a5cbfb4 --- /dev/null +++ b/modules/nf-core/kallistobustools/count/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: KALLISTOBUSTOOLS_COUNT { + ext.args = '--cellranger -m 1' + } +} diff --git a/modules/nf-core/kallistobustools/count/tests/tags.yml b/modules/nf-core/kallistobustools/count/tests/tags.yml new file mode 100644 index 00000000..9c432071 --- /dev/null +++ b/modules/nf-core/kallistobustools/count/tests/tags.yml @@ -0,0 +1,2 @@ +kallistobustools/count: + - "modules/nf-core/kallistobustools/count/**" diff --git a/modules/nf-core/kallistobustools/ref/environment.yml b/modules/nf-core/kallistobustools/ref/environment.yml index acbd0e0a..6ae07a8c 100644 --- a/modules/nf-core/kallistobustools/ref/environment.yml +++ b/modules/nf-core/kallistobustools/ref/environment.yml @@ -4,5 +4,5 @@ channels: - bioconda - defaults dependencies: - - bioconda::kb-python=0.27.2 + - bioconda::kb-python=0.28.2 - conda-forge::requests>=2.23.0 diff --git a/modules/nf-core/kallistobustools/ref/main.nf b/modules/nf-core/kallistobustools/ref/main.nf index 68d72ca9..0b45203d 100644 --- a/modules/nf-core/kallistobustools/ref/main.nf +++ b/modules/nf-core/kallistobustools/ref/main.nf @@ -4,8 +4,8 @@ process KALLISTOBUSTOOLS_REF { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/kb-python:0.27.2--pyhdfd78af_0' : - 'biocontainers/kb-python:0.27.2--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/kb-python:0.28.2--pyhdfd78af_2' : + 'biocontainers/kb-python:0.28.2--pyhdfd78af_2' }" input: path fasta @@ -62,4 +62,32 @@ process KALLISTOBUSTOOLS_REF { END_VERSIONS """ } + + stub: + if (workflow_mode == "standard") { + """ + touch kb_ref_out.idx \\ + touch t2g.txt \\ + touch cdna.fa + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + kallistobustools: \$(echo \$(kb --version 2>&1) | sed 's/^.*kb_python //;s/positional arguments.*\$//') + END_VERSIONS + """ + } else { + """ + touch kb_ref_out.idx \\ + touch t2g.txt \\ + touch cdna.fa + touch intron.fa \\ + touch cdna_t2c.txt \\ + touch intron_t2c.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + kallistobustools: \$(echo \$(kb --version 2>&1) | sed 's/^.*kb_python //;s/positional arguments.*\$//') + END_VERSIONS + """ + } } diff --git a/modules/nf-core/kallistobustools/ref/meta.yml b/modules/nf-core/kallistobustools/ref/meta.yml index 00be5143..64deab9b 100644 --- a/modules/nf-core/kallistobustools/ref/meta.yml +++ b/modules/nf-core/kallistobustools/ref/meta.yml @@ -14,7 +14,7 @@ tools: documentation: https://kb-python.readthedocs.io/en/latest/index.html tool_dev_url: https://github.com/pachterlab/kb_python doi: "10.22002/D1.1876" - licence: MIT License + licence: ["MIT"] input: - fasta: type: file @@ -26,8 +26,8 @@ input: pattern: "*.{gtf,gtf.gz}" - workflow_mode: type: string - description: String value defining workflow to use, can be one of "standard", "lamanno", "nucleus" - pattern: "{standard,lamanno,nucleus}" + description: String value defining workflow to use, can be one of "standard", "nac", "lamanno" (obsolete) + pattern: "{standard,lamanno,nac}" output: - versions: type: file @@ -43,19 +43,19 @@ output: pattern: "*t2g.{txt}" - cdna: type: file - description: Cdna fasta file + description: cDNA fasta file pattern: "*cdna.{fa}" - intron: type: file - description: intron fasta file + description: Intron fasta file pattern: "*intron.{fa}" - cdna_t2c: type: file - description: cdna transcript to capture file + description: cDNA transcript to capture file pattern: "*cdna_t2c.{txt}" - intron_t2c: type: file - description: intron transcript to capture file + description: Intron transcript to capture file pattern: "*intron_t2c.{txt}" authors: - "@flowuenne" diff --git a/modules/nf-core/kallistobustools/ref/tests/main.nf.test b/modules/nf-core/kallistobustools/ref/tests/main.nf.test new file mode 100644 index 00000000..dc49d9ac --- /dev/null +++ b/modules/nf-core/kallistobustools/ref/tests/main.nf.test @@ -0,0 +1,120 @@ +nextflow_process { + + name "Test Process KALLISTOBUSTOOLS_REF" + script "../main.nf" + process "KALLISTOBUSTOOLS_REF" + + tag "modules" + tag "modules_nfcore" + tag "kallistobustools" + tag "kallistobustools/ref" + + test("genome.fasta + genome.gtf + 'standard'") { + + when { + process { + """ + input[0] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + input[1] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) + input[2] = "standard" + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.versions, + process.out.t2g, + process.out.cdna, + process.out.intron, + process.out.cdna_t2c, + process.out.intron_t2c, + ).match() + }, + { assert file(process.out.index.get(0)).exists() } + ) + } + } + + test("genome.fasta + genome.gtf + 'nac'") { + + when { + process { + """ + input[0] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + input[1] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) + input[2] = "nac" + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.versions, + process.out.t2g, + process.out.cdna, + process.out.intron, + process.out.cdna_t2c, + process.out.intron_t2c, + ).match() + }, + { assert file(process.out.index.get(0)).exists() } + ) + } + } + + test("genome.fasta + genome.gtf + 'standard' - stub") { + + options "-stub" + + when { + process { + """ + input[0] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + input[1] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) + input[2] = "standard" + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("genome.fasta + genome.gtf + 'nac' - stub") { + + when { + process { + """ + input[0] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + input[1] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) + input[2] = "nac" + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.versions, + process.out.t2g, + process.out.cdna, + process.out.intron, + process.out.cdna_t2c, + process.out.intron_t2c, + ).match() + }, + { assert file(process.out.index.get(0)).exists() } + ) + } + } +} diff --git a/modules/nf-core/kallistobustools/ref/tests/main.nf.test.snap b/modules/nf-core/kallistobustools/ref/tests/main.nf.test.snap new file mode 100644 index 00000000..9c2be8a7 --- /dev/null +++ b/modules/nf-core/kallistobustools/ref/tests/main.nf.test.snap @@ -0,0 +1,136 @@ +{ + "genome.fasta + genome.gtf + 'standard'": { + "content": [ + [ + "versions.yml:md5,d3d08c3c5638ae540965f77b8178b3c1" + ], + [ + "t2g.txt:md5,d1a8a22c59b9cb0bda39c0c9bb3f6afe" + ], + [ + "cdna.fa:md5,7bca59288fab822451de250d2eee48dc" + ], + [ + + ], + [ + + ], + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-29T23:37:43.739374859" + }, + "genome.fasta + genome.gtf + 'nac'": { + "content": [ + [ + "versions.yml:md5,d3d08c3c5638ae540965f77b8178b3c1" + ], + [ + "t2g.txt:md5,58591306b33bb948bac7b40f346d0cd7" + ], + [ + "cdna.fa:md5,7bca59288fab822451de250d2eee48dc" + ], + [ + "intron.fa:md5,1aad4e3f5d006f495cc6647fa0bbf6ff" + ], + [ + "cdna_t2c.txt:md5,23861cf43033e7c596e6989a88a3a373" + ], + [ + "intron_t2c.txt:md5,fe6d5501923867b514a0447aa4b4995f" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-29T23:37:59.436989671" + }, + "genome.fasta + genome.gtf + 'standard' - stub": { + "content": [ + { + "0": [ + "versions.yml:md5,d3d08c3c5638ae540965f77b8178b3c1" + ], + "1": [ + "kb_ref_out.idx:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "2": [ + "t2g.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "3": [ + "cdna.fa:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "4": [ + + ], + "5": [ + + ], + "6": [ + + ], + "cdna": [ + "cdna.fa:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "cdna_t2c": [ + + ], + "index": [ + "kb_ref_out.idx:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "intron": [ + + ], + "intron_t2c": [ + + ], + "t2g": [ + "t2g.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "versions": [ + "versions.yml:md5,d3d08c3c5638ae540965f77b8178b3c1" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-29T23:38:09.578411047" + }, + "genome.fasta + genome.gtf + 'nac' - stub": { + "content": [ + [ + "versions.yml:md5,d3d08c3c5638ae540965f77b8178b3c1" + ], + [ + "t2g.txt:md5,58591306b33bb948bac7b40f346d0cd7" + ], + [ + "cdna.fa:md5,7bca59288fab822451de250d2eee48dc" + ], + [ + "intron.fa:md5,1aad4e3f5d006f495cc6647fa0bbf6ff" + ], + [ + "cdna_t2c.txt:md5,23861cf43033e7c596e6989a88a3a373" + ], + [ + "intron_t2c.txt:md5,fe6d5501923867b514a0447aa4b4995f" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-29T23:38:25.355912473" + } +} \ No newline at end of file diff --git a/modules/nf-core/kallistobustools/ref/tests/tags.yml b/modules/nf-core/kallistobustools/ref/tests/tags.yml new file mode 100644 index 00000000..208c8d27 --- /dev/null +++ b/modules/nf-core/kallistobustools/ref/tests/tags.yml @@ -0,0 +1,2 @@ +kallistobustools/ref: + - "modules/nf-core/kallistobustools/ref/**" diff --git a/modules/nf-core/multiqc/environment.yml b/modules/nf-core/multiqc/environment.yml index 7625b752..ca39fb67 100644 --- a/modules/nf-core/multiqc/environment.yml +++ b/modules/nf-core/multiqc/environment.yml @@ -4,4 +4,4 @@ channels: - bioconda - defaults dependencies: - - bioconda::multiqc=1.19 + - bioconda::multiqc=1.21 diff --git a/modules/nf-core/multiqc/main.nf b/modules/nf-core/multiqc/main.nf index 1b9f7c43..47ac352f 100644 --- a/modules/nf-core/multiqc/main.nf +++ b/modules/nf-core/multiqc/main.nf @@ -3,8 +3,8 @@ process MULTIQC { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.19--pyhdfd78af_0' : - 'biocontainers/multiqc:1.19--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.21--pyhdfd78af_0' : + 'biocontainers/multiqc:1.21--pyhdfd78af_0' }" input: path multiqc_files, stageAs: "?/*" diff --git a/modules/nf-core/multiqc/tests/main.nf.test b/modules/nf-core/multiqc/tests/main.nf.test index d0438eda..f1c4242e 100644 --- a/modules/nf-core/multiqc/tests/main.nf.test +++ b/modules/nf-core/multiqc/tests/main.nf.test @@ -3,6 +3,7 @@ nextflow_process { name "Test Process MULTIQC" script "../main.nf" process "MULTIQC" + tag "modules" tag "modules_nfcore" tag "multiqc" @@ -12,7 +13,7 @@ nextflow_process { when { process { """ - input[0] = Channel.of([file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz_fastqc_zip'], checkIfExists: true)]) + input[0] = Channel.of(file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastqc/test_fastqc.zip', checkIfExists: true)) input[1] = [] input[2] = [] input[3] = [] @@ -25,7 +26,7 @@ nextflow_process { { assert process.success }, { assert process.out.report[0] ==~ ".*/multiqc_report.html" }, { assert process.out.data[0] ==~ ".*/multiqc_data" }, - { assert snapshot(process.out.versions).match("versions") } + { assert snapshot(process.out.versions).match("multiqc_versions_single") } ) } @@ -36,7 +37,7 @@ nextflow_process { when { process { """ - input[0] = Channel.of([file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz_fastqc_zip'], checkIfExists: true)]) + input[0] = Channel.of(file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastqc/test_fastqc.zip', checkIfExists: true)) input[1] = Channel.of(file("https://github.com/nf-core/tools/raw/dev/nf_core/pipeline-template/assets/multiqc_config.yml", checkIfExists: true)) input[2] = [] input[3] = [] @@ -49,7 +50,7 @@ nextflow_process { { assert process.success }, { assert process.out.report[0] ==~ ".*/multiqc_report.html" }, { assert process.out.data[0] ==~ ".*/multiqc_data" }, - { assert snapshot(process.out.versions).match("versions") } + { assert snapshot(process.out.versions).match("multiqc_versions_config") } ) } } @@ -61,7 +62,7 @@ nextflow_process { when { process { """ - input[0] = Channel.of([file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz_fastqc_zip'], checkIfExists: true)]) + input[0] = Channel.of(file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastqc/test_fastqc.zip', checkIfExists: true)) input[1] = [] input[2] = [] input[3] = [] @@ -75,7 +76,7 @@ nextflow_process { { assert snapshot(process.out.report.collect { file(it).getName() } + process.out.data.collect { file(it).getName() } + process.out.plots.collect { file(it).getName() } + - process.out.versions ).match() } + process.out.versions ).match("multiqc_stub") } ) } diff --git a/modules/nf-core/multiqc/tests/main.nf.test.snap b/modules/nf-core/multiqc/tests/main.nf.test.snap index d37e7304..bfebd802 100644 --- a/modules/nf-core/multiqc/tests/main.nf.test.snap +++ b/modules/nf-core/multiqc/tests/main.nf.test.snap @@ -1,21 +1,41 @@ { - "versions": { + "multiqc_versions_single": { "content": [ [ - "versions.yml:md5,14e9a2661241abd828f4f06a7b5c222d" + "versions.yml:md5,21f35ee29416b9b3073c28733efe4b7d" ] ], - "timestamp": "2024-01-09T23:02:49.911994" + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-29T08:48:55.657331" }, - "sarscov2 single-end [fastqc] - stub": { + "multiqc_stub": { "content": [ [ "multiqc_report.html", "multiqc_data", "multiqc_plots", - "versions.yml:md5,14e9a2661241abd828f4f06a7b5c222d" + "versions.yml:md5,21f35ee29416b9b3073c28733efe4b7d" ] ], - "timestamp": "2024-01-09T23:03:14.524346" + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-29T08:49:49.071937" + }, + "multiqc_versions_config": { + "content": [ + [ + "versions.yml:md5,21f35ee29416b9b3073c28733efe4b7d" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-29T08:49:25.457567" } } \ No newline at end of file diff --git a/modules/nf-core/star/genomegenerate/environment.yml b/modules/nf-core/star/genomegenerate/environment.yml index 93e4476a..791f255e 100644 --- a/modules/nf-core/star/genomegenerate/environment.yml +++ b/modules/nf-core/star/genomegenerate/environment.yml @@ -1,11 +1,10 @@ name: star_genomegenerate - channels: - conda-forge - bioconda - defaults - dependencies: - bioconda::samtools=1.18 + - bioconda::htslib=1.18 - bioconda::star=2.7.10a - conda-forge::gawk=5.1.0 diff --git a/modules/nf-core/star/genomegenerate/tests/main.nf.test b/modules/nf-core/star/genomegenerate/tests/main.nf.test index af0c9421..c17c8ba4 100644 --- a/modules/nf-core/star/genomegenerate/tests/main.nf.test +++ b/modules/nf-core/star/genomegenerate/tests/main.nf.test @@ -8,18 +8,18 @@ nextflow_process { tag "star" tag "star/genomegenerate" - test("homo_sapiens") { + test("fasta_gtf") { when { process { """ input[0] = Channel.of([ [ id:'test_fasta' ], - [file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)] + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ] ]) input[1] = Channel.of([ [ id:'test_gtf' ], - [file(params.test_data['homo_sapiens']['genome']['genome_gtf'], checkIfExists: true)] + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] ]) """ } @@ -28,14 +28,13 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(file(process.out.index[0][1]).listFiles().collect { it.getName() }.sort().toString()).match("index_with_gtf") }, - { assert snapshot(process.out.versions).match("versions") } + { assert snapshot(file(process.out.index[0][1]).listFiles().collect { it.getName() }.sort().toString()).match("fasta_gtf_index") }, + { assert snapshot(process.out.versions).match("fasta_gtf_versions") } ) } - } - test("homo_sapiens-stub") { + test("fasta_gtf_stub") { options '-stub' @@ -44,11 +43,11 @@ nextflow_process { """ input[0] = Channel.of([ [ id:'test_fasta' ], - [file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)] + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ] ]) input[1] = Channel.of([ [ id:'test_gtf' ], - [file(params.test_data['homo_sapiens']['genome']['genome_gtf'], checkIfExists: true)] + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] ]) """ } @@ -57,21 +56,20 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(file(process.out.index[0][1]).listFiles().collect { it.getName() }.sort().toString()).match("index_with_gtf") }, - { assert snapshot(process.out.versions).match("versions") } + { assert snapshot(file(process.out.index[0][1]).listFiles().collect { it.getName() }.sort().toString()).match("fasta_gtf_stub_index") }, + { assert snapshot(process.out.versions).match("fasta_gtf_stub_versions") } ) } - } - test("homo_sapiens-without_gtf") { + test("fasta") { when { process { """ input[0] = Channel.of([ [ id:'test_fasta' ], - [file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)] + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ] ]) input[1] = Channel.of([ [], [] ]) """ @@ -81,14 +79,14 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(file(process.out.index[0][1]).listFiles().collect { it.getName() }.sort().toString()).match("index_without_gtf") }, - { assert snapshot(process.out.versions).match("versions") } + { assert snapshot(file(process.out.index[0][1]).listFiles().collect { it.getName() }.sort().toString()).match("fasta_index") }, + { assert snapshot(process.out.versions).match("fasta_versions") } ) } } - test("homo_sapiens-without_gtf-stub") { + test("fasta_stub") { options '-stub' @@ -97,7 +95,7 @@ nextflow_process { """ input[0] = Channel.of([ [ id:'test_fasta' ], - [file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)] + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ] ]) input[1] = Channel.of([ [], [] ]) """ @@ -107,11 +105,11 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(file(process.out.index[0][1]).listFiles().collect { it.getName() }.sort().toString()).match("index_without_gtf") }, - { assert snapshot(process.out.versions).match("versions") } + { assert snapshot(file(process.out.index[0][1]).listFiles().collect { it.getName() }.sort().toString()).match("fasta_stub_index") }, + { assert snapshot(process.out.versions).match("fasta_stub_versions") } ) } } -} \ No newline at end of file +} diff --git a/modules/nf-core/star/genomegenerate/tests/main.nf.test.snap b/modules/nf-core/star/genomegenerate/tests/main.nf.test.snap index 9de08c74..5653d6e6 100644 --- a/modules/nf-core/star/genomegenerate/tests/main.nf.test.snap +++ b/modules/nf-core/star/genomegenerate/tests/main.nf.test.snap @@ -1,22 +1,90 @@ { - "versions": { + "fasta_gtf_versions": { "content": [ [ "versions.yml:md5,46b8f1f34bb7f23892cd1eb249ed4d7f" ] ], - "timestamp": "2023-12-19T11:05:51.741109" + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-01T15:54:31.798555" }, - "index_with_gtf": { + "fasta_stub_versions": { + "content": [ + [ + "versions.yml:md5,46b8f1f34bb7f23892cd1eb249ed4d7f" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-01T15:55:07.521209" + }, + "fasta_gtf_stub_index": { + "content": [ + "[Genome, Log.out, SA, SAindex, chrLength.txt, chrName.txt, chrNameLength.txt, chrStart.txt, exonGeTrInfo.tab, exonInfo.tab, geneInfo.tab, genomeParameters.txt, sjdbInfo.txt, sjdbList.fromGTF.out.tab, sjdbList.out.tab, transcriptInfo.tab]" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-01T15:54:46.478098" + }, + "fasta_gtf_stub_versions": { + "content": [ + [ + "versions.yml:md5,46b8f1f34bb7f23892cd1eb249ed4d7f" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-01T15:54:46.491657" + }, + "fasta_index": { + "content": [ + "[Genome, Log.out, SA, SAindex, chrLength.txt, chrName.txt, chrNameLength.txt, chrStart.txt, genomeParameters.txt]" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-01T15:54:57.552329" + }, + "fasta_versions": { + "content": [ + [ + "versions.yml:md5,46b8f1f34bb7f23892cd1eb249ed4d7f" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-01T15:54:57.560541" + }, + "fasta_gtf_index": { "content": [ "[Genome, Log.out, SA, SAindex, chrLength.txt, chrName.txt, chrNameLength.txt, chrStart.txt, exonGeTrInfo.tab, exonInfo.tab, geneInfo.tab, genomeParameters.txt, sjdbInfo.txt, sjdbList.fromGTF.out.tab, sjdbList.out.tab, transcriptInfo.tab]" ], - "timestamp": "2023-12-19T11:38:14.551548" + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-01T15:54:31.786814" }, - "index_without_gtf": { + "fasta_stub_index": { "content": [ "[Genome, Log.out, SA, SAindex, chrLength.txt, chrName.txt, chrNameLength.txt, chrStart.txt, genomeParameters.txt]" ], - "timestamp": "2023-12-19T11:38:22.382905" + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-01T15:55:07.517472" } } \ No newline at end of file diff --git a/modules/nf-core/universc/CITATION.cff b/modules/nf-core/universc/CITATION.cff index b00957d1..35e281e6 100644 --- a/modules/nf-core/universc/CITATION.cff +++ b/modules/nf-core/universc/CITATION.cff @@ -1,23 +1,6 @@ cff-version: 1.2.0 message: "If you use this software, please cite it as below." authors: - - given-names: "S. Thomas" - family-names: "Kelly" - email: "tom.kelly@riken.jp" - affiliation: "Center for Integrative Medical Sciences, RIKEN, Suehiro-cho-1-7-22, Tsurumi Ward, Yokohama, Japan" - orcid: "https://orcid.org/0000-0003-3904-6690" - - family-names: "Battenberg" - given-names: "Kai" - email: "kai.battenberg@riken.jp" - affiliation: "Center for Sustainable Resource Science, RIKEN, Suehiro-cho-1-7-22, Tsurumi Ward, Yokohama, Japan" - orcid: "http://orcid.org/0000-0001-7517-2657" -version: 1.2.5.1 -doi: 10.1101/2021.01.19.427209 -date-released: 2021-02-14 -url: "https://github.com/minoda-lab/universc" -preferred-citation: - type: article - authors: - given-names: "S. Thomas" family-names: "Kelly" email: "tom.kelly@riken.jp" @@ -27,25 +10,42 @@ preferred-citation: given-names: "Kai" email: "kai.battenberg@riken.jp" affiliation: "Center for Sustainable Resource Science, RIKEN, Suehiro-cho-1-7-22, Tsurumi Ward, Yokohama, Japan" - orcid: "https://orcid.org/http://orcid.org/0000-0001-7517-2657" - - family-names: "Hetherington" - given-names: "Nicola A." - affiliation: "Center for Integrative Medical Sciences, RIKEN, Suehiro-cho-1-7-22, Tsurumi Ward, Yokohama, Japan" - orcid: "http://orcid.org/0000-0001-8802-2906" - - family-names: "Hayashi" - given-names: "Makoto" - affiliation: "Center for Sustainable Resource Science, RIKEN, Suehiro-cho-1-7-22, Tsurumi Ward, Yokohama, Japan" - orcid: "http://orcid.org/0000-0001-6389-4265" - - given-names: "Aki" - family-names: "Minoda" - email: "akiko.minoda@riken.jp" - affiliation: Center for Integrative Medical Sciences, RIKEN, Suehiro-cho-1-7-22, Tsurumi Ward, Yokohama, Japan" - orcid: "http://orcid.org/0000-0002-2927-5791" - doi: "10.1101/2021.01.19.427209" - title: "UniverSC: a flexible cross-platform single-cell data processing pipeline" - year: "2021" - journal: "bioRxiv" - start: 2021.01.19.427209 - volume: - issue: - month: 1 + orcid: "http://orcid.org/0000-0001-7517-2657" +version: 1.2.5.1 +doi: 10.1101/2021.01.19.427209 +date-released: 2021-02-14 +url: "https://github.com/minoda-lab/universc" +preferred-citation: + type: article + authors: + - given-names: "S. Thomas" + family-names: "Kelly" + email: "tom.kelly@riken.jp" + affiliation: "Center for Integrative Medical Sciences, RIKEN, Suehiro-cho-1-7-22, Tsurumi Ward, Yokohama, Japan" + orcid: "https://orcid.org/0000-0003-3904-6690" + - family-names: "Battenberg" + given-names: "Kai" + email: "kai.battenberg@riken.jp" + affiliation: "Center for Sustainable Resource Science, RIKEN, Suehiro-cho-1-7-22, Tsurumi Ward, Yokohama, Japan" + orcid: "https://orcid.org/http://orcid.org/0000-0001-7517-2657" + - family-names: "Hetherington" + given-names: "Nicola A." + affiliation: "Center for Integrative Medical Sciences, RIKEN, Suehiro-cho-1-7-22, Tsurumi Ward, Yokohama, Japan" + orcid: "http://orcid.org/0000-0001-8802-2906" + - family-names: "Hayashi" + given-names: "Makoto" + affiliation: "Center for Sustainable Resource Science, RIKEN, Suehiro-cho-1-7-22, Tsurumi Ward, Yokohama, Japan" + orcid: "http://orcid.org/0000-0001-6389-4265" + - given-names: "Aki" + family-names: "Minoda" + email: "akiko.minoda@riken.jp" + affiliation: Center for Integrative Medical Sciences, RIKEN, Suehiro-cho-1-7-22, Tsurumi Ward, Yokohama, Japan" + orcid: "http://orcid.org/0000-0002-2927-5791" + doi: "10.1101/2021.01.19.427209" + title: "UniverSC: a flexible cross-platform single-cell data processing pipeline" + year: "2021" + journal: "bioRxiv" + start: 2021.01.19.427209 + volume: + issue: + month: 1 diff --git a/nextflow.config b/nextflow.config index 9e83c42a..91c9c617 100644 --- a/nextflow.config +++ b/nextflow.config @@ -19,17 +19,19 @@ params { // reference files genome = null transcript_fasta = null + txp2gene = null // salmon alevin parameters (simpleaf) simpleaf_rlen = 91 barcode_whitelist = null - txp2gene = null salmon_index = null - // kallist bustools parameters - kallisto_gene_map = null + // kallisto bustools parameters kallisto_index = null kb_workflow = "standard" + kb_t1c = null + kb_t2c = null + kb_filter = false // STARsolo parameters star_index = null @@ -38,15 +40,18 @@ params { star_feature = "Gene" // Cellranger parameters - cellranger_index = null + cellranger_index = null // Cellranger ARC parameters - motifs = null - cellrangerarc_config = null + motifs = null + cellrangerarc_config = null cellrangerarc_reference = null - // UniverSC paramaters - universc_index = null + // UniverSC parameters + universc_index = null + + // Emptydrops parameters + skip_emptydrops = false // Template Boilerplate options skip_multiqc = false @@ -82,7 +87,6 @@ params { config_profile_contact = null config_profile_url = null - // Max resource options // Defaults only, expecting to be overwritten max_memory = '128.GB' @@ -96,6 +100,11 @@ params { validationShowHiddenParams = false validate_params = true + // TODO temporary workaround a warning + // not used anywhere and should not be necessary anymore after a nf-validation plugin update + // TODO when removing this, also remove the ignored lint check from .nf-core.yml + monochromeLogs = null + } // Load base.config by default for all pipelines @@ -109,7 +118,7 @@ try { } // Load nf-core/scrnaseq custom profiles from different institutions. -// Warning: Uncomment only if a pipeline-specific instititutional config already exists on nf-core/configs! +// Warning: Uncomment only if a pipeline-specific institutional config already exists on nf-core/configs! // try { // includeConfig "${params.custom_config_base}/pipeline/scrnaseq.config" // } catch (Exception e) { @@ -129,6 +138,7 @@ profiles { podman.enabled = false shifter.enabled = false charliecloud.enabled = false + channels = ['conda-forge', 'bioconda', 'defaults'] apptainer.enabled = false } mamba { @@ -272,7 +282,7 @@ manifest { description = """Pipeline for processing 10x Genomics single cell rnaseq data""" mainScript = 'main.nf' nextflowVersion = '!>=23.04.0' - version = '2.5.1' + version = '2.6.0' doi = '10.5281/zenodo.3568187' } diff --git a/nextflow_schema.json b/nextflow_schema.json index 34af4c64..b799d78c 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -16,6 +16,7 @@ "type": "string", "format": "file-path", "exists": true, + "schema": "assets/schema_input.json", "mimetype": "text/csv", "pattern": "^\\S+\\.csv$", "description": "Path to comma-separated file containing information about the samples in the experiment.", @@ -50,7 +51,9 @@ "barcode_whitelist": { "type": "string", "description": "If not using the 10X Genomics platform, a custom barcode whitelist can be used with `--barcode_whitelist`.", - "fa_icon": "fas fa-barcode" + "fa_icon": "fas fa-barcode", + "format": "file-path", + "exists": true }, "aligner": { "type": "string", @@ -84,6 +87,10 @@ "skip_fastqc": { "type": "boolean", "description": "Skip FastQC" + }, + "skip_emptydrops": { + "type": "boolean", + "description": "Skip custom empty drops filter module" } } }, @@ -119,12 +126,16 @@ "transcript_fasta": { "type": "string", "description": "A cDNA FASTA file", - "fa_icon": "fas fa-dna" + "fa_icon": "fas fa-dna", + "format": "file-path", + "exists": true }, "gtf": { "type": "string", "description": "Reference GTF annotation file", - "fa_icon": "fas fa-code-branch" + "fa_icon": "fas fa-code-branch", + "format": "file-path", + "exists": true }, "save_reference": { "type": "boolean", @@ -135,7 +146,7 @@ "type": "string", "format": "directory-path", "description": "Directory / URL base for iGenomes references.", - "default": "s3://ngi-igenomes/igenomes", + "default": "s3://ngi-igenomes/igenomes/", "fa_icon": "fas fa-cloud-download-alt", "hidden": true } @@ -150,13 +161,17 @@ "salmon_index": { "type": "string", "description": "This can be used to specify a precomputed Salmon index in the pipeline, in order to skip the generation of required indices by Salmon itself.", - "fa_icon": "fas fa-fish" + "fa_icon": "fas fa-fish", + "format": "file-path", + "exists": true }, "txp2gene": { "type": "string", "description": "Path to transcript to gene mapping file. This allows the specification of a transcript to gene mapping file for Salmon Alevin and AlevinQC.", - "help_text": "> This is not the same as the `kallisto_gene_map` parameter down below and is only used by the Salmon Alevin workflow.", - "fa_icon": "fas fa-map-marked-alt" + "help_text": "> This is only used by the Salmon Alevin workflow.", + "fa_icon": "fas fa-map-marked-alt", + "format": "file-path", + "exists": true }, "simpleaf_rlen": { "type": "integer", @@ -176,7 +191,9 @@ "type": "string", "description": "Specify a path to the precomputed STAR index.", "help_text": "> NB: This has to be computed with STAR Version 2.7 or later, as STARsolo was only first supported by STAR Version 2.7.", - "fa_icon": "fas fa-asterisk" + "fa_icon": "fas fa-asterisk", + "format": "file-path", + "exists": true }, "star_ignore_sjdbgtf": { "type": "string", @@ -201,24 +218,40 @@ "type": "object", "description": "Params related to Kallisto/BUS tool", "default": "", - "fa_icon": "fas fa-fish", + "fa_icon": "fas fa-rainbow", "properties": { - "kallisto_gene_map": { - "type": "string", - "description": "Specify a Kallisto gene mapping file here. If you don't, this will be automatically created in the Kallisto workflow when specifying a valid `--gtf` file.", - "fa_icon": "fas fa-fish" - }, "kallisto_index": { "type": "string", "description": "Specify a path to the precomputed Kallisto index.", - "fa_icon": "fas fa-fish" + "fa_icon": "fas fa-rainbow", + "format": "file-path", + "exists": true + }, + "kb_t1c": { + "type": "string", + "description": "Specify a path to the cDNA transcripts-to-capture.", + "fa_icon": "fas fa-rainbow", + "format": "file-path", + "exists": true + }, + "kb_t2c": { + "type": "string", + "description": "Specify a path to the intron transcripts-to-capture.", + "fa_icon": "fas fa-rainbow", + "format": "file-path", + "exists": true }, "kb_workflow": { "type": "string", "default": "standard", - "description": "Type of workflow. Use `lamanno` for RNA velocity based on La Manno et al. 2018 logic. Use `nucleus` for RNA velocity on single-nucleus RNA-seq reads. Use `kite` for feature barcoding. Use `kite: 10xFB` for 10x Genomics Feature Barcoding technology. (default: standard)", + "description": "Type of workflow. Use `nac` for an index type that can quantify nascent and mature RNA. Use `lamanno` for RNA velocity based on La Manno et al. 2018 logic. (default: standard)", + "fa_icon": "fas fa-rainbow", + "enum": ["standard", "lamanno", "nac"] + }, + "kb_filter": { + "type": "boolean", "fa_icon": "fas fa-fish", - "enum": ["standard", "lamanno", "nucleus", "kite", "kite: 10xFB"] + "description": "Activate Kallisto/BUStools filtering algorithm" } } }, @@ -230,7 +263,9 @@ "properties": { "cellranger_index": { "type": "string", - "description": "Specify a pre-calculated cellranger index. Readily prepared indexes can be obtained from the 10x Genomics website. " + "description": "Specify a pre-calculated cellranger index. Readily prepared indexes can be obtained from the 10x Genomics website. ", + "format": "file-path", + "exists": true } } }, @@ -262,7 +297,9 @@ "properties": { "universc_index": { "type": "string", - "description": "Specify a pre-calculated cellranger index. Readily prepared indexes can be obtained from the 10x Genomics website." + "description": "Specify a pre-calculated cellranger index. Readily prepared indexes can be obtained from the 10x Genomics website.", + "format": "file-path", + "exists": true } } }, @@ -416,6 +453,7 @@ "type": "string", "format": "file-path", "description": "Custom config file to supply to MultiQC.", + "exists": true, "fa_icon": "fas fa-cog", "hidden": true }, diff --git a/nf-test.config b/nf-test.config new file mode 100644 index 00000000..2fa82adf --- /dev/null +++ b/nf-test.config @@ -0,0 +1,8 @@ +config { + + testsDir "tests" + workDir ".nf-test" + configFile "tests/nextflow.config" + profile "docker" + +} diff --git a/pyproject.toml b/pyproject.toml index 0d62beb6..56110621 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,10 +1,15 @@ -# Config file for Python. Mostly used to configure linting of bin/check_samplesheet.py with Black. +# Config file for Python. Mostly used to configure linting of bin/*.py with Ruff. # Should be kept the same as nf-core/tools to avoid fighting with template synchronisation. -[tool.black] +[tool.ruff] line-length = 120 -target_version = ["py37", "py38", "py39", "py310"] +target-version = "py38" +cache-dir = "~/.cache/ruff" -[tool.isort] -profile = "black" -known_first_party = ["nf_core"] -multi_line_output = 3 +[tool.ruff.lint] +select = ["I", "E1", "E4", "E7", "E9", "F", "UP", "N"] + +[tool.ruff.lint.isort] +known-first-party = ["nf_core"] + +[tool.ruff.lint.per-file-ignores] +"__init__.py" = ["E402", "F401"] diff --git a/subworkflows/local/align_cellranger.nf b/subworkflows/local/align_cellranger.nf index bfdd533e..2461373b 100644 --- a/subworkflows/local/align_cellranger.nf +++ b/subworkflows/local/align_cellranger.nf @@ -40,8 +40,30 @@ workflow CELLRANGER_ALIGN { ) ch_versions = ch_versions.mix(CELLRANGER_COUNT.out.versions) + // + // Split channels of raw and filtered to avoid file collision problems when loading the inputs in conversion modules. + // + ch_matrices_raw = + CELLRANGER_COUNT.out.outs.map { meta, mtx_files -> + def desired_files = [] + mtx_files.each{ + if ( it.toString().contains("raw_feature_bc_matrix") ) { desired_files.add( it ) } + } + [ meta, desired_files ] + } + + ch_matrices_filtered = + CELLRANGER_COUNT.out.outs.map { meta, mtx_files -> + def desired_files = [] + mtx_files.each{ + if ( it.toString().contains("filtered_feature_bc_matrix") ) { desired_files.add( it ) } + } + [ meta, desired_files ] + } + emit: ch_versions - cellranger_out = CELLRANGER_COUNT.out.outs - star_index = cellranger_index + cellranger_out = CELLRANGER_COUNT.out.outs + cellranger_matrices = ch_matrices_raw.mix( ch_matrices_filtered ) + star_index = cellranger_index } diff --git a/subworkflows/local/align_cellrangerarc.nf b/subworkflows/local/align_cellrangerarc.nf index 3232a020..6de84e7b 100644 --- a/subworkflows/local/align_cellrangerarc.nf +++ b/subworkflows/local/align_cellrangerarc.nf @@ -29,8 +29,8 @@ workflow CELLRANGERARC_ALIGN { ch_versions = ch_versions.mix(CELLRANGERARC_MKGTF.out.versions) // Make reference genome - assert ( ( !params.cellrangerarc_reference && !cellrangerarc_config ) || - ( params.cellrangerarc_reference && cellrangerarc_config ) ) : + assert (( !params.cellrangerarc_reference && !cellrangerarc_config ) || + ( params.cellrangerarc_reference && cellrangerarc_config ) ) : "If you provide a config file you also have to specific the reference name and vice versa." cellrangerarc_reference = 'cellrangerarc_reference' @@ -53,4 +53,4 @@ workflow CELLRANGERARC_ALIGN { emit: ch_versions cellranger_arc_out = CELLRANGERARC_COUNT.out.outs -} \ No newline at end of file +} diff --git a/subworkflows/local/align_universc.nf b/subworkflows/local/align_universc.nf index acb2c560..cf16985e 100644 --- a/subworkflows/local/align_universc.nf +++ b/subworkflows/local/align_universc.nf @@ -34,11 +34,13 @@ workflow UNIVERSC_ALIGN { // Obtain read counts UNIVERSC ( - ch_fastq.map{ meta, reads -> [ - // defaults - ["samples": [meta.id], "technology": universc_technology, "chemistry": "auto", "single_end": false, "strandedness": "forward"] + meta, // + meta overrides defaults with information already in meta - reads - ] }, + ch_fastq.map{ + meta, reads -> [ + // defaults + ["samples": [meta.id], "technology": universc_technology, "chemistry": "auto", "single_end": false, "strandedness": "forward"] + meta, // + meta overrides defaults with information already in meta + reads + ] + }, universc_index ) ch_versions = ch_versions.mix(UNIVERSC.out.versions) diff --git a/subworkflows/local/fastqc.nf b/subworkflows/local/fastqc.nf index 6825a9e0..05bb128b 100644 --- a/subworkflows/local/fastqc.nf +++ b/subworkflows/local/fastqc.nf @@ -4,35 +4,35 @@ include { FASTQC } from '../../modules/nf-core/fastqc/main' workflow FASTQC_CHECK { - take: - ch_fastq + take: + ch_fastq - main: + main: - def n = (params.aligner == 'cellrangerarc') ? 3 : 1 - ch_fastq.map { ch -> [ ch[0], ch[n] ] }.set { ch_fastq } + def n = (params.aligner == 'cellrangerarc') ? 3 : 1 + ch_fastq.map { ch -> [ ch[0], ch[n] ] }.set { ch_fastq } - /* - * FastQ QC using FASTQC - */ - FASTQC ( ch_fastq ) - fastqc_zip = FASTQC.out.zip - fastqc_html = FASTQC.out.html + /* + * FastQ QC using FASTQC + */ + FASTQC ( ch_fastq ) + fastqc_zip = FASTQC.out.zip + fastqc_html = FASTQC.out.html - fastqc_zip - .map { it -> [ it[1] ] } - .set { fastqc_zip_only } - fastqc_html - .map { it -> [ it[1] ] } - .set { fastqc_html_only } + fastqc_zip + .map { it -> [ it[1] ] } + .set { fastqc_zip_only } + fastqc_html + .map { it -> [ it[1] ] } + .set { fastqc_html_only } - fastqc_multiqc = Channel.empty() - fastqc_multiqc = fastqc_multiqc.mix( fastqc_zip_only, fastqc_html_only ) - fastqc_version = FASTQC.out.versions + fastqc_multiqc = Channel.empty() + fastqc_multiqc = fastqc_multiqc.mix( fastqc_zip_only, fastqc_html_only ) + fastqc_version = FASTQC.out.versions - emit: - fastqc_zip - fastqc_html - fastqc_version - fastqc_multiqc + emit: + fastqc_zip + fastqc_html + fastqc_version + fastqc_multiqc } diff --git a/subworkflows/local/input_check.nf b/subworkflows/local/input_check.nf deleted file mode 100644 index 2e06e889..00000000 --- a/subworkflows/local/input_check.nf +++ /dev/null @@ -1,96 +0,0 @@ - -// -// Check input samplesheet and get read channels -// - -include { SAMPLESHEET_CHECK } from '../../modules/local/samplesheet_check' - -workflow INPUT_CHECK { - take: - samplesheet // file: /path/to/samplesheet.csv - - main: - - reads = null - versions = null - - grouped_ch = - SAMPLESHEET_CHECK ( samplesheet ) - .csv - .splitCsv ( header:true, sep:',' ) - .map { create_fastq_channel(it) } - // group replicate files together, modifies channel to [ val(meta), [ [reads_rep1], [reads_repN] ] ] - .groupTuple(by: [0]) - - if (params.aligner == 'cellrangerarc' ) { - grouped_ch - .map { meta, sample_type, sub_sample, reads -> [ meta, sample_type.flatten(), sub_sample.flatten(), reads.flatten() ] } - .set { reads } - } else { - grouped_ch - .map { meta, reads -> [ meta, reads.flatten() ] } - .set { reads } - } - - emit: - reads // channel: [ val(meta), [*], [ reads ] ] - versions = SAMPLESHEET_CHECK.out.versions // channel: [ versions.yml ] -} - - -// Function to get list of [ meta, [ multimeta ] , [ fastq_1, fastq_2 ] ] -def create_fastq_channel(LinkedHashMap row) { - // create meta map - def meta = [:] - meta.id = row.sample - meta.single_end = row.single_end.toBoolean() - meta.expected_cells = row.expected_cells != null ? row.expected_cells : null - meta.seq_center = row.seq_center ? row.seq_center : params.seq_center - - // add path(s) of the fastq file(s) to the meta map - def fastq_meta = [] - def fastqs = [] - if (!file(row.fastq_1).exists()) { - exit 1, "ERROR: Please check input samplesheet -> Read 1 FastQ file does not exist!\n${row.fastq_1}" - } - if (meta.single_end) { - fastqs = [ file(row.fastq_1) ] - } else { - if (!file(row.fastq_2).exists()) { - exit 1, "ERROR: Please check input samplesheet -> Read 2 FastQ file does not exist!\n${row.fastq_2}" - } - fastqs = [ file(row.fastq_1), file(row.fastq_2) ] - if (row.sample_type == "atac") { - if (row.fastq_barcode == "") { - exit 1, "ERROR: Please check input samplesheet -> Barcode FastQ (Dual index i5 read) file is missing!\n" - } - if (!file(row.fastq_barcode).exists()) { - exit 1, "ERROR: Please check input samplesheet -> Barcode FastQ (Dual index i5 read) file does not exist!" + - "\n${row.fastq_barcode}" - } - fastqs.add(file(row.fastq_barcode)) - } - } - - // define meta_data for multiome - def sample_type = row.sample_type ? [row.sample_type] : ['gex'] - - def sub_sample = "" - if (params.aligner == "cellrangerarc"){ - sub_sample = row.fastq_1.split("/")[-1].replaceAll("_S[0-9]+_L[0-9]+_R1_[0-9]+.fastq.gz","") - fastqs.each{ - if(!it.name.contains(sub_sample)){ - exit 1, "ERROR: Please check input samplesheet -> Some files do not have the same sample name " + - "${sub_sample} in common!\n${it}" - } - } - } - - fastq_meta = [ meta, fastqs ] - - if (params.aligner == "cellrangerarc"){ - fastq_meta = [ meta, sample_type, sub_sample, fastqs ] - } - - return fastq_meta -} \ No newline at end of file diff --git a/subworkflows/local/kallisto_bustools.nf b/subworkflows/local/kallisto_bustools.nf index 3210e47a..d420ab01 100644 --- a/subworkflows/local/kallisto_bustools.nf +++ b/subworkflows/local/kallisto_bustools.nf @@ -1,5 +1,4 @@ /* -- IMPORT LOCAL MODULES/SUBWORKFLOWS -- */ -include { GENE_MAP } from '../../modules/local/gene_map' include {KALLISTOBUSTOOLS_COUNT } from '../../modules/nf-core/kallistobustools/count/main' /* -- IMPORT NF-CORE MODULES/SUBWORKFLOWS -- */ @@ -14,6 +13,8 @@ workflow KALLISTO_BUSTOOLS { gtf kallisto_index txp2gene + t1c + t2c protocol kb_workflow ch_fastq @@ -21,26 +22,13 @@ workflow KALLISTO_BUSTOOLS { main: ch_versions = Channel.empty() - assert kallisto_index || (genome_fasta && gtf): + assert (txp2gene && kallisto_index) || (genome_fasta && gtf): "Must provide a genome fasta file ('--fasta') and a gtf file ('--gtf') if no index is given!" - assert txp2gene || gtf: - "Must provide either a GTF file ('--gtf') or kallisto gene map ('--kallisto_gene_map') to align with kallisto bustools!" - - /* - * Generate Kallisto Gene Map if not supplied and index is given - * If no index is given, the gene map will be generated in the 'kb ref' step - */ - if (!txp2gene && kallisto_index) { - GENE_MAP( gtf ) - txp2gene = GENE_MAP.out.gene_map - ch_versions = ch_versions.mix(GENE_MAP.out.versions) - } - /* - * Generate kallisto index + * Generate kallisto index and t2g if not already present */ - if (!kallisto_index) { + if (!(txp2gene && kallisto_index)) { KALLISTOBUSTOOLS_REF( genome_fasta, gtf, kb_workflow ) txp2gene = KALLISTOBUSTOOLS_REF.out.t2g.collect() kallisto_index = KALLISTOBUSTOOLS_REF.out.index.collect() @@ -58,7 +46,8 @@ workflow KALLISTO_BUSTOOLS { txp2gene, t1c, t2c, - protocol + protocol, + kb_workflow ) ch_versions = ch_versions.mix(KALLISTOBUSTOOLS_COUNT.out.versions) @@ -66,7 +55,8 @@ workflow KALLISTO_BUSTOOLS { emit: ch_versions counts = KALLISTOBUSTOOLS_COUNT.out.count + raw_counts = KALLISTOBUSTOOLS_COUNT.out.raw_counts + filtered_counts = KALLISTOBUSTOOLS_COUNT.out.filtered_counts txp2gene = txp2gene.collect() - } diff --git a/subworkflows/local/mtx_conversion.nf b/subworkflows/local/mtx_conversion.nf index 958da400..98e49a2e 100644 --- a/subworkflows/local/mtx_conversion.nf +++ b/subworkflows/local/mtx_conversion.nf @@ -15,7 +15,9 @@ workflow MTX_CONVERSION { ch_versions = Channel.empty() // Cellranger module output contains too many files which cause path collisions, we filter to the ones we need. - if (params.aligner in [ 'cellranger', 'cellrangerarc' ]) { + // Keeping backwards compatibility with cellranger-arc. + // TODO: Adapt cellranger-arc subworkflow like cellranger to remove this snippet here. + if (params.aligner in [ 'cellrangerarc' ]) { mtx_matrices = mtx_matrices.map { meta, mtx_files -> [ meta, mtx_files.findAll { it.toString().contains("filtered_feature_bc_matrix") } ] } @@ -34,8 +36,15 @@ workflow MTX_CONVERSION { // // Concat sample-specific h5ad in one // + ch_concat_h5ad_input = MTX_TO_H5AD.out.h5ad.groupTuple() // gather all sample-specific files / per type + if (params.aligner == 'kallisto' && params.kb_workflow != 'standard') { + // when having spliced / unspliced matrices, the collected tuple has two levels ( [[mtx_1, mtx_2]] ) + // which nextflow break because it is not a valid 'path' thus, we have to remove one level + // making it as [ mtx_1, mtx_2 ] + ch_concat_h5ad_input = ch_concat_h5ad_input.map{ type, matrices -> [ type, matrices.flatten().toList() ] } + } CONCAT_H5AD ( - MTX_TO_H5AD.out.h5ad.collect(), // gather all sample-specific files + ch_concat_h5ad_input, samplesheet ) @@ -51,6 +60,6 @@ workflow MTX_CONVERSION { emit: ch_versions - counts = MTX_TO_H5AD.out.counts + // counts = MTX_TO_H5AD.out.counts was this ever used? } diff --git a/subworkflows/local/starsolo.nf b/subworkflows/local/starsolo.nf index 47b2e757..0c11acd1 100644 --- a/subworkflows/local/starsolo.nf +++ b/subworkflows/local/starsolo.nf @@ -57,8 +57,10 @@ workflow STARSOLO { emit: ch_versions // get rid of meta for star index - star_index = star_index.map{ meta, index -> index} + star_index = star_index.map{ meta, index -> index } star_result = STAR_ALIGN.out.tab star_counts = STAR_ALIGN.out.counts - for_multiqc = STAR_ALIGN.out.log_final + raw_counts = STAR_ALIGN.out.raw_counts + filtered_counts = STAR_ALIGN.out.filtered_counts + for_multiqc = STAR_ALIGN.out.log_final.map{ meta, it -> it } } diff --git a/subworkflows/local/utils_nfcore_scrnaseq_pipeline/main.nf b/subworkflows/local/utils_nfcore_scrnaseq_pipeline/main.nf new file mode 100644 index 00000000..841a0a43 --- /dev/null +++ b/subworkflows/local/utils_nfcore_scrnaseq_pipeline/main.nf @@ -0,0 +1,251 @@ +// +// Subworkflow with functionality specific to the nf-core/scrnaseq pipeline +// + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT FUNCTIONS / MODULES / SUBWORKFLOWS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +include { UTILS_NFVALIDATION_PLUGIN } from '../../nf-core/utils_nfvalidation_plugin' +include { paramsSummaryMap } from 'plugin/nf-validation' +include { fromSamplesheet } from 'plugin/nf-validation' +include { UTILS_NEXTFLOW_PIPELINE } from '../../nf-core/utils_nextflow_pipeline' +include { completionEmail } from '../../nf-core/utils_nfcore_pipeline' +include { completionSummary } from '../../nf-core/utils_nfcore_pipeline' +include { dashedLine } from '../../nf-core/utils_nfcore_pipeline' +include { nfCoreLogo } from '../../nf-core/utils_nfcore_pipeline' +include { imNotification } from '../../nf-core/utils_nfcore_pipeline' +include { UTILS_NFCORE_PIPELINE } from '../../nf-core/utils_nfcore_pipeline' +include { workflowCitation } from '../../nf-core/utils_nfcore_pipeline' + +/* +======================================================================================== + SUBWORKFLOW TO INITIALISE PIPELINE +======================================================================================== +*/ + +workflow PIPELINE_INITIALISATION { + + take: + version // boolean: Display version and exit + help // boolean: Display help text + validate_params // boolean: Boolean whether to validate parameters against the schema at runtime + monochrome_logs // boolean: Do not use coloured log outputs + nextflow_cli_args // array: List of positional nextflow CLI args + outdir // string: The output directory where the results will be saved + input // string: Path to input samplesheet + + main: + + ch_versions = Channel.empty() + + // + // Print version and exit if required and dump pipeline parameters to JSON file + // + UTILS_NEXTFLOW_PIPELINE ( + version, + true, + outdir, + workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1 + ) + + // + // Validate parameters and generate parameter summary to stdout + // + pre_help_text = nfCoreLogo(monochrome_logs) + post_help_text = '\n' + workflowCitation() + '\n' + dashedLine(monochrome_logs) + def String workflow_command = "nextflow run ${workflow.manifest.name} -profile --input samplesheet.csv --outdir " + UTILS_NFVALIDATION_PLUGIN ( + help, + workflow_command, + pre_help_text, + post_help_text, + validate_params, + "nextflow_schema.json" + ) + + // + // Check config provided to the pipeline + // + UTILS_NFCORE_PIPELINE ( + nextflow_cli_args + ) + // + // Custom validation for pipeline parameters + // + validateInputParameters() + + // + // Create channel from input file provided through params.input + // + Channel + .fromSamplesheet("input") + .map { + meta, fastq_1, fastq_2 -> + if (!fastq_2) { + return [ meta.id, meta + [ single_end:true ], [ fastq_1 ] ] + } else { + return [ meta.id, meta + [ single_end:false ], [ fastq_1, fastq_2 ] ] + } + } + .groupTuple() + .map { + validateInputSamplesheet(it) + } + .map { + meta, fastqs -> + return [ meta, fastqs.flatten() ] + } + .set { ch_samplesheet } + + emit: + samplesheet = ch_samplesheet + versions = ch_versions +} + +/* +======================================================================================== + SUBWORKFLOW FOR PIPELINE COMPLETION +======================================================================================== +*/ + +workflow PIPELINE_COMPLETION { + + take: + email // string: email address + email_on_fail // string: email address sent on pipeline failure + plaintext_email // boolean: Send plain-text email instead of HTML + outdir // path: Path to output directory where results will be published + monochrome_logs // boolean: Disable ANSI colour codes in log output + hook_url // string: hook URL for notifications + multiqc_report // string: Path to MultiQC report + + main: + + summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json") + + // + // Completion email and summary + // + workflow.onComplete { + if (email || email_on_fail) { + completionEmail(summary_params, email, email_on_fail, plaintext_email, outdir, monochrome_logs, multiqc_report.toList()) + } + + completionSummary(monochrome_logs) + + if (hook_url) { + imNotification(summary_params, hook_url) + } + } +} + +/* +======================================================================================== + FUNCTIONS +======================================================================================== +*/ +// +// Check and validate pipeline parameters +// +def validateInputParameters() { + genomeExistsError() +} + +// +// Validate channels from input samplesheet +// +def validateInputSamplesheet(input) { + def (metas, fastqs) = input[1..2] + + // Check that multiple runs of the same sample are of the same datatype i.e. single-end / paired-end + def endedness_ok = metas.collect{ it.single_end }.unique().size == 1 + if (!endedness_ok) { + error("Please check input samplesheet -> Multiple runs of a sample must be of the same datatype i.e. single-end or paired-end: ${metas[0].id}") + } + + return [ metas[0], fastqs ] +} +// +// Get attribute from genome config file e.g. fasta +// +def getGenomeAttribute(attribute) { + if (params.genomes && params.genome && params.genomes.containsKey(params.genome)) { + if (params.genomes[ params.genome ].containsKey(attribute)) { + return params.genomes[ params.genome ][ attribute ] + } + } + return null +} + +// +// Exit pipeline if incorrect --genome key provided +// +def genomeExistsError() { + if (params.genomes && params.genome && !params.genomes.containsKey(params.genome)) { + def error_string = "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + + " Genome '${params.genome}' not found in any config files provided to the pipeline.\n" + + " Currently, the available genome keys are:\n" + + " ${params.genomes.keySet().join(", ")}\n" + + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + error(error_string) + } +} + +// +// Generate methods description for MultiQC +// +def toolCitationText() { + // TODO nf-core: Optionally add in-text citation tools to this list. + // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "Tool (Foo et al. 2023)" : "", + // Uncomment function in methodsDescriptionText to render in MultiQC report + def citation_text = [ + "Tools used in the workflow included:", + "FastQC (Andrews 2010),", + "MultiQC (Ewels et al. 2016)", + "." + ].join(' ').trim() + + return citation_text +} + +def toolBibliographyText() { + // TODO nf-core: Optionally add bibliographic entries to this list. + // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "
  • Author (2023) Pub name, Journal, DOI
  • " : "", + // Uncomment function in methodsDescriptionText to render in MultiQC report + def reference_text = [ + "
  • Andrews S, (2010) FastQC, URL: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/).
  • ", + "
  • Ewels, P., Magnusson, M., Lundin, S., & Käller, M. (2016). MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics , 32(19), 3047–3048. doi: /10.1093/bioinformatics/btw354
  • " + ].join(' ').trim() + + return reference_text +} + +def methodsDescriptionText(mqc_methods_yaml) { + // Convert to a named map so can be used as with familar NXF ${workflow} variable syntax in the MultiQC YML file + def meta = [:] + meta.workflow = workflow.toMap() + meta["manifest_map"] = workflow.manifest.toMap() + + // Pipeline DOI + meta["doi_text"] = meta.manifest_map.doi ? "(doi: ${meta.manifest_map.doi})" : "" + meta["nodoi_text"] = meta.manifest_map.doi ? "": "
  • If available, make sure to update the text to include the Zenodo DOI of version of the pipeline used.
  • " + + // Tool references + meta["tool_citations"] = "" + meta["tool_bibliography"] = "" + + // TODO nf-core: Only uncomment below if logic in toolCitationText/toolBibliographyText has been filled! + // meta["tool_citations"] = toolCitationText().replaceAll(", \\.", ".").replaceAll("\\. \\.", ".").replaceAll(", \\.", ".") + // meta["tool_bibliography"] = toolBibliographyText() + + + def methods_text = mqc_methods_yaml.text + + def engine = new groovy.text.SimpleTemplateEngine() + def description_html = engine.createTemplate(methods_text).make(meta) + + return description_html.toString() +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/main.nf b/subworkflows/nf-core/utils_nextflow_pipeline/main.nf new file mode 100644 index 00000000..ac31f28f --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/main.nf @@ -0,0 +1,126 @@ +// +// Subworkflow with functionality that may be useful for any Nextflow pipeline +// + +import org.yaml.snakeyaml.Yaml +import groovy.json.JsonOutput +import nextflow.extension.FilesEx + +/* +======================================================================================== + SUBWORKFLOW DEFINITION +======================================================================================== +*/ + +workflow UTILS_NEXTFLOW_PIPELINE { + + take: + print_version // boolean: print version + dump_parameters // boolean: dump parameters + outdir // path: base directory used to publish pipeline results + check_conda_channels // boolean: check conda channels + + main: + + // + // Print workflow version and exit on --version + // + if (print_version) { + log.info "${workflow.manifest.name} ${getWorkflowVersion()}" + System.exit(0) + } + + // + // Dump pipeline parameters to a JSON file + // + if (dump_parameters && outdir) { + dumpParametersToJSON(outdir) + } + + // + // When running with Conda, warn if channels have not been set-up appropriately + // + if (check_conda_channels) { + checkCondaChannels() + } + + emit: + dummy_emit = true +} + +/* +======================================================================================== + FUNCTIONS +======================================================================================== +*/ + +// +// Generate version string +// +def getWorkflowVersion() { + String version_string = "" + if (workflow.manifest.version) { + def prefix_v = workflow.manifest.version[0] != 'v' ? 'v' : '' + version_string += "${prefix_v}${workflow.manifest.version}" + } + + if (workflow.commitId) { + def git_shortsha = workflow.commitId.substring(0, 7) + version_string += "-g${git_shortsha}" + } + + return version_string +} + +// +// Dump pipeline parameters to a JSON file +// +def dumpParametersToJSON(outdir) { + def timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') + def filename = "params_${timestamp}.json" + def temp_pf = new File(workflow.launchDir.toString(), ".${filename}") + def jsonStr = JsonOutput.toJson(params) + temp_pf.text = JsonOutput.prettyPrint(jsonStr) + + FilesEx.copyTo(temp_pf.toPath(), "${outdir}/pipeline_info/params_${timestamp}.json") + temp_pf.delete() +} + +// +// When running with -profile conda, warn if channels have not been set-up appropriately +// +def checkCondaChannels() { + Yaml parser = new Yaml() + def channels = [] + try { + def config = parser.load("conda config --show channels".execute().text) + channels = config.channels + } catch(NullPointerException | IOException e) { + log.warn "Could not verify conda channel configuration." + return + } + + // Check that all channels are present + // This channel list is ordered by required channel priority. + def required_channels_in_order = ['conda-forge', 'bioconda', 'defaults'] + def channels_missing = ((required_channels_in_order as Set) - (channels as Set)) as Boolean + + // Check that they are in the right order + def channel_priority_violation = false + def n = required_channels_in_order.size() + for (int i = 0; i < n - 1; i++) { + channel_priority_violation |= !(channels.indexOf(required_channels_in_order[i]) < channels.indexOf(required_channels_in_order[i+1])) + } + + if (channels_missing | channel_priority_violation) { + log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + + " There is a problem with your Conda configuration!\n\n" + + " You will need to set-up the conda-forge and bioconda channels correctly.\n" + + " Please refer to https://bioconda.github.io/\n" + + " The observed channel order is \n" + + " ${channels}\n" + + " but the following channel order is required:\n" + + " ${required_channels_in_order}\n" + + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + } +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/meta.yml b/subworkflows/nf-core/utils_nextflow_pipeline/meta.yml new file mode 100644 index 00000000..e5c3a0a8 --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/meta.yml @@ -0,0 +1,38 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "UTILS_NEXTFLOW_PIPELINE" +description: Subworkflow with functionality that may be useful for any Nextflow pipeline +keywords: + - utility + - pipeline + - initialise + - version +components: [] +input: + - print_version: + type: boolean + description: | + Print the version of the pipeline and exit + - dump_parameters: + type: boolean + description: | + Dump the parameters of the pipeline to a JSON file + - output_directory: + type: directory + description: Path to output dir to write JSON file to. + pattern: "results/" + - check_conda_channel: + type: boolean + description: | + Check if the conda channel priority is correct. +output: + - dummy_emit: + type: boolean + description: | + Dummy emit to make nf-core subworkflows lint happy +authors: + - "@adamrtalbot" + - "@drpatelh" +maintainers: + - "@adamrtalbot" + - "@drpatelh" + - "@maxulysse" diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test new file mode 100644 index 00000000..68718e4f --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test @@ -0,0 +1,54 @@ + +nextflow_function { + + name "Test Functions" + script "subworkflows/nf-core/utils_nextflow_pipeline/main.nf" + config "subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config" + tag 'subworkflows' + tag 'utils_nextflow_pipeline' + tag 'subworkflows/utils_nextflow_pipeline' + + test("Test Function getWorkflowVersion") { + + function "getWorkflowVersion" + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function dumpParametersToJSON") { + + function "dumpParametersToJSON" + + when { + function { + """ + // define inputs of the function here. Example: + input[0] = "$outputDir" + """.stripIndent() + } + } + + then { + assertAll( + { assert function.success } + ) + } + } + + test("Test Function checkCondaChannels") { + + function "checkCondaChannels" + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test.snap b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test.snap new file mode 100644 index 00000000..e3f0baf4 --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test.snap @@ -0,0 +1,20 @@ +{ + "Test Function getWorkflowVersion": { + "content": [ + "v9.9.9" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:02:05.308243" + }, + "Test Function checkCondaChannels": { + "content": null, + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:02:12.425833" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.workflow.nf.test b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.workflow.nf.test new file mode 100644 index 00000000..ca964ce8 --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.workflow.nf.test @@ -0,0 +1,111 @@ +nextflow_workflow { + + name "Test Workflow UTILS_NEXTFLOW_PIPELINE" + script "../main.nf" + config "subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config" + workflow "UTILS_NEXTFLOW_PIPELINE" + tag 'subworkflows' + tag 'utils_nextflow_pipeline' + tag 'subworkflows/utils_nextflow_pipeline' + + test("Should run no inputs") { + + when { + workflow { + """ + print_version = false + dump_parameters = false + outdir = null + check_conda_channels = false + + input[0] = print_version + input[1] = dump_parameters + input[2] = outdir + input[3] = check_conda_channels + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } + + test("Should print version") { + + when { + workflow { + """ + print_version = true + dump_parameters = false + outdir = null + check_conda_channels = false + + input[0] = print_version + input[1] = dump_parameters + input[2] = outdir + input[3] = check_conda_channels + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert workflow.stdout.contains("nextflow_workflow v9.9.9") } + ) + } + } + + test("Should dump params") { + + when { + workflow { + """ + print_version = false + dump_parameters = true + outdir = 'results' + check_conda_channels = false + + input[0] = false + input[1] = true + input[2] = outdir + input[3] = false + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } + + test("Should not create params JSON if no output directory") { + + when { + workflow { + """ + print_version = false + dump_parameters = true + outdir = null + check_conda_channels = false + + input[0] = false + input[1] = true + input[2] = outdir + input[3] = false + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config b/subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config new file mode 100644 index 00000000..d0a926bf --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config @@ -0,0 +1,9 @@ +manifest { + name = 'nextflow_workflow' + author = """nf-core""" + homePage = 'https://127.0.0.1' + description = """Dummy pipeline""" + nextflowVersion = '!>=23.04.0' + version = '9.9.9' + doi = 'https://doi.org/10.5281/zenodo.5070524' +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/tags.yml b/subworkflows/nf-core/utils_nextflow_pipeline/tests/tags.yml new file mode 100644 index 00000000..f8476112 --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/utils_nextflow_pipeline: + - subworkflows/nf-core/utils_nextflow_pipeline/** diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/main.nf b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf new file mode 100644 index 00000000..a8b55d6f --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf @@ -0,0 +1,440 @@ +// +// Subworkflow with utility functions specific to the nf-core pipeline template +// + +import org.yaml.snakeyaml.Yaml +import nextflow.extension.FilesEx + +/* +======================================================================================== + SUBWORKFLOW DEFINITION +======================================================================================== +*/ + +workflow UTILS_NFCORE_PIPELINE { + + take: + nextflow_cli_args + + main: + valid_config = checkConfigProvided() + checkProfileProvided(nextflow_cli_args) + + emit: + valid_config +} + +/* +======================================================================================== + FUNCTIONS +======================================================================================== +*/ + +// +// Warn if a -profile or Nextflow config has not been provided to run the pipeline +// +def checkConfigProvided() { + valid_config = true + if (workflow.profile == 'standard' && workflow.configFiles.size() <= 1) { + log.warn "[$workflow.manifest.name] You are attempting to run the pipeline without any custom configuration!\n\n" + + "This will be dependent on your local compute environment but can be achieved via one or more of the following:\n" + + " (1) Using an existing pipeline profile e.g. `-profile docker` or `-profile singularity`\n" + + " (2) Using an existing nf-core/configs for your Institution e.g. `-profile crick` or `-profile uppmax`\n" + + " (3) Using your own local custom config e.g. `-c /path/to/your/custom.config`\n\n" + + "Please refer to the quick start section and usage docs for the pipeline.\n " + valid_config = false + } + return valid_config +} + +// +// Exit pipeline if --profile contains spaces +// +def checkProfileProvided(nextflow_cli_args) { + if (workflow.profile.endsWith(',')) { + error "The `-profile` option cannot end with a trailing comma, please remove it and re-run the pipeline!\n" + + "HINT: A common mistake is to provide multiple values separated by spaces e.g. `-profile test, docker`.\n" + } + if (nextflow_cli_args[0]) { + log.warn "nf-core pipelines do not accept positional arguments. The positional argument `${nextflow_cli_args[0]}` has been detected.\n" + + "HINT: A common mistake is to provide multiple values separated by spaces e.g. `-profile test, docker`.\n" + } +} + +// +// Citation string for pipeline +// +def workflowCitation() { + return "If you use ${workflow.manifest.name} for your analysis please cite:\n\n" + + "* The pipeline\n" + + " ${workflow.manifest.doi}\n\n" + + "* The nf-core framework\n" + + " https://doi.org/10.1038/s41587-020-0439-x\n\n" + + "* Software dependencies\n" + + " https://github.com/${workflow.manifest.name}/blob/master/CITATIONS.md" +} + +// +// Generate workflow version string +// +def getWorkflowVersion() { + String version_string = "" + if (workflow.manifest.version) { + def prefix_v = workflow.manifest.version[0] != 'v' ? 'v' : '' + version_string += "${prefix_v}${workflow.manifest.version}" + } + + if (workflow.commitId) { + def git_shortsha = workflow.commitId.substring(0, 7) + version_string += "-g${git_shortsha}" + } + + return version_string +} + +// +// Get software versions for pipeline +// +def processVersionsFromYAML(yaml_file) { + Yaml yaml = new Yaml() + versions = yaml.load(yaml_file).collectEntries { k, v -> [ k.tokenize(':')[-1], v ] } + return yaml.dumpAsMap(versions).trim() +} + +// +// Get workflow version for pipeline +// +def workflowVersionToYAML() { + return """ + Workflow: + $workflow.manifest.name: ${getWorkflowVersion()} + Nextflow: $workflow.nextflow.version + """.stripIndent().trim() +} + +// +// Get channel of software versions used in pipeline in YAML format +// +def softwareVersionsToYAML(ch_versions) { + return ch_versions + .unique() + .map { processVersionsFromYAML(it) } + .unique() + .mix(Channel.of(workflowVersionToYAML())) +} + +// +// Get workflow summary for MultiQC +// +def paramsSummaryMultiqc(summary_params) { + def summary_section = '' + for (group in summary_params.keySet()) { + def group_params = summary_params.get(group) // This gets the parameters of that particular group + if (group_params) { + summary_section += "

    $group

    \n" + summary_section += "
    \n" + for (param in group_params.keySet()) { + summary_section += "
    $param
    ${group_params.get(param) ?: 'N/A'}
    \n" + } + summary_section += "
    \n" + } + } + + String yaml_file_text = "id: '${workflow.manifest.name.replace('/','-')}-summary'\n" + yaml_file_text += "description: ' - this information is collected when the pipeline is started.'\n" + yaml_file_text += "section_name: '${workflow.manifest.name} Workflow Summary'\n" + yaml_file_text += "section_href: 'https://github.com/${workflow.manifest.name}'\n" + yaml_file_text += "plot_type: 'html'\n" + yaml_file_text += "data: |\n" + yaml_file_text += "${summary_section}" + + return yaml_file_text +} + +// +// nf-core logo +// +def nfCoreLogo(monochrome_logs=true) { + Map colors = logColours(monochrome_logs) + String.format( + """\n + ${dashedLine(monochrome_logs)} + ${colors.green},--.${colors.black}/${colors.green},-.${colors.reset} + ${colors.blue} ___ __ __ __ ___ ${colors.green}/,-._.--~\'${colors.reset} + ${colors.blue} |\\ | |__ __ / ` / \\ |__) |__ ${colors.yellow}} {${colors.reset} + ${colors.blue} | \\| | \\__, \\__/ | \\ |___ ${colors.green}\\`-._,-`-,${colors.reset} + ${colors.green}`._,._,\'${colors.reset} + ${colors.purple} ${workflow.manifest.name} ${getWorkflowVersion()}${colors.reset} + ${dashedLine(monochrome_logs)} + """.stripIndent() + ) +} + +// +// Return dashed line +// +def dashedLine(monochrome_logs=true) { + Map colors = logColours(monochrome_logs) + return "-${colors.dim}----------------------------------------------------${colors.reset}-" +} + +// +// ANSII colours used for terminal logging +// +def logColours(monochrome_logs=true) { + Map colorcodes = [:] + + // Reset / Meta + colorcodes['reset'] = monochrome_logs ? '' : "\033[0m" + colorcodes['bold'] = monochrome_logs ? '' : "\033[1m" + colorcodes['dim'] = monochrome_logs ? '' : "\033[2m" + colorcodes['underlined'] = monochrome_logs ? '' : "\033[4m" + colorcodes['blink'] = monochrome_logs ? '' : "\033[5m" + colorcodes['reverse'] = monochrome_logs ? '' : "\033[7m" + colorcodes['hidden'] = monochrome_logs ? '' : "\033[8m" + + // Regular Colors + colorcodes['black'] = monochrome_logs ? '' : "\033[0;30m" + colorcodes['red'] = monochrome_logs ? '' : "\033[0;31m" + colorcodes['green'] = monochrome_logs ? '' : "\033[0;32m" + colorcodes['yellow'] = monochrome_logs ? '' : "\033[0;33m" + colorcodes['blue'] = monochrome_logs ? '' : "\033[0;34m" + colorcodes['purple'] = monochrome_logs ? '' : "\033[0;35m" + colorcodes['cyan'] = monochrome_logs ? '' : "\033[0;36m" + colorcodes['white'] = monochrome_logs ? '' : "\033[0;37m" + + // Bold + colorcodes['bblack'] = monochrome_logs ? '' : "\033[1;30m" + colorcodes['bred'] = monochrome_logs ? '' : "\033[1;31m" + colorcodes['bgreen'] = monochrome_logs ? '' : "\033[1;32m" + colorcodes['byellow'] = monochrome_logs ? '' : "\033[1;33m" + colorcodes['bblue'] = monochrome_logs ? '' : "\033[1;34m" + colorcodes['bpurple'] = monochrome_logs ? '' : "\033[1;35m" + colorcodes['bcyan'] = monochrome_logs ? '' : "\033[1;36m" + colorcodes['bwhite'] = monochrome_logs ? '' : "\033[1;37m" + + // Underline + colorcodes['ublack'] = monochrome_logs ? '' : "\033[4;30m" + colorcodes['ured'] = monochrome_logs ? '' : "\033[4;31m" + colorcodes['ugreen'] = monochrome_logs ? '' : "\033[4;32m" + colorcodes['uyellow'] = monochrome_logs ? '' : "\033[4;33m" + colorcodes['ublue'] = monochrome_logs ? '' : "\033[4;34m" + colorcodes['upurple'] = monochrome_logs ? '' : "\033[4;35m" + colorcodes['ucyan'] = monochrome_logs ? '' : "\033[4;36m" + colorcodes['uwhite'] = monochrome_logs ? '' : "\033[4;37m" + + // High Intensity + colorcodes['iblack'] = monochrome_logs ? '' : "\033[0;90m" + colorcodes['ired'] = monochrome_logs ? '' : "\033[0;91m" + colorcodes['igreen'] = monochrome_logs ? '' : "\033[0;92m" + colorcodes['iyellow'] = monochrome_logs ? '' : "\033[0;93m" + colorcodes['iblue'] = monochrome_logs ? '' : "\033[0;94m" + colorcodes['ipurple'] = monochrome_logs ? '' : "\033[0;95m" + colorcodes['icyan'] = monochrome_logs ? '' : "\033[0;96m" + colorcodes['iwhite'] = monochrome_logs ? '' : "\033[0;97m" + + // Bold High Intensity + colorcodes['biblack'] = monochrome_logs ? '' : "\033[1;90m" + colorcodes['bired'] = monochrome_logs ? '' : "\033[1;91m" + colorcodes['bigreen'] = monochrome_logs ? '' : "\033[1;92m" + colorcodes['biyellow'] = monochrome_logs ? '' : "\033[1;93m" + colorcodes['biblue'] = monochrome_logs ? '' : "\033[1;94m" + colorcodes['bipurple'] = monochrome_logs ? '' : "\033[1;95m" + colorcodes['bicyan'] = monochrome_logs ? '' : "\033[1;96m" + colorcodes['biwhite'] = monochrome_logs ? '' : "\033[1;97m" + + return colorcodes +} + +// +// Attach the multiqc report to email +// +def attachMultiqcReport(multiqc_report) { + def mqc_report = null + try { + if (workflow.success) { + mqc_report = multiqc_report.getVal() + if (mqc_report.getClass() == ArrayList && mqc_report.size() >= 1) { + if (mqc_report.size() > 1) { + log.warn "[$workflow.manifest.name] Found multiple reports from process 'MULTIQC', will use only one" + } + mqc_report = mqc_report[0] + } + } + } catch (all) { + if (multiqc_report) { + log.warn "[$workflow.manifest.name] Could not attach MultiQC report to summary email" + } + } + return mqc_report +} + +// +// Construct and send completion email +// +def completionEmail(summary_params, email, email_on_fail, plaintext_email, outdir, monochrome_logs=true, multiqc_report=null) { + + // Set up the e-mail variables + def subject = "[$workflow.manifest.name] Successful: $workflow.runName" + if (!workflow.success) { + subject = "[$workflow.manifest.name] FAILED: $workflow.runName" + } + + def summary = [:] + for (group in summary_params.keySet()) { + summary << summary_params[group] + } + + def misc_fields = [:] + misc_fields['Date Started'] = workflow.start + misc_fields['Date Completed'] = workflow.complete + misc_fields['Pipeline script file path'] = workflow.scriptFile + misc_fields['Pipeline script hash ID'] = workflow.scriptId + if (workflow.repository) misc_fields['Pipeline repository Git URL'] = workflow.repository + if (workflow.commitId) misc_fields['Pipeline repository Git Commit'] = workflow.commitId + if (workflow.revision) misc_fields['Pipeline Git branch/tag'] = workflow.revision + misc_fields['Nextflow Version'] = workflow.nextflow.version + misc_fields['Nextflow Build'] = workflow.nextflow.build + misc_fields['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp + + def email_fields = [:] + email_fields['version'] = getWorkflowVersion() + email_fields['runName'] = workflow.runName + email_fields['success'] = workflow.success + email_fields['dateComplete'] = workflow.complete + email_fields['duration'] = workflow.duration + email_fields['exitStatus'] = workflow.exitStatus + email_fields['errorMessage'] = (workflow.errorMessage ?: 'None') + email_fields['errorReport'] = (workflow.errorReport ?: 'None') + email_fields['commandLine'] = workflow.commandLine + email_fields['projectDir'] = workflow.projectDir + email_fields['summary'] = summary << misc_fields + + // On success try attach the multiqc report + def mqc_report = attachMultiqcReport(multiqc_report) + + // Check if we are only sending emails on failure + def email_address = email + if (!email && email_on_fail && !workflow.success) { + email_address = email_on_fail + } + + // Render the TXT template + def engine = new groovy.text.GStringTemplateEngine() + def tf = new File("${workflow.projectDir}/assets/email_template.txt") + def txt_template = engine.createTemplate(tf).make(email_fields) + def email_txt = txt_template.toString() + + // Render the HTML template + def hf = new File("${workflow.projectDir}/assets/email_template.html") + def html_template = engine.createTemplate(hf).make(email_fields) + def email_html = html_template.toString() + + // Render the sendmail template + def max_multiqc_email_size = (params.containsKey('max_multiqc_email_size') ? params.max_multiqc_email_size : 0) as nextflow.util.MemoryUnit + def smail_fields = [ email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, projectDir: "${workflow.projectDir}", mqcFile: mqc_report, mqcMaxSize: max_multiqc_email_size.toBytes() ] + def sf = new File("${workflow.projectDir}/assets/sendmail_template.txt") + def sendmail_template = engine.createTemplate(sf).make(smail_fields) + def sendmail_html = sendmail_template.toString() + + // Send the HTML e-mail + Map colors = logColours(monochrome_logs) + if (email_address) { + try { + if (plaintext_email) { throw GroovyException('Send plaintext e-mail, not HTML') } + // Try to send HTML e-mail using sendmail + def sendmail_tf = new File(workflow.launchDir.toString(), ".sendmail_tmp.html") + sendmail_tf.withWriter { w -> w << sendmail_html } + [ 'sendmail', '-t' ].execute() << sendmail_html + log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (sendmail)-" + } catch (all) { + // Catch failures and try with plaintext + def mail_cmd = [ 'mail', '-s', subject, '--content-type=text/html', email_address ] + mail_cmd.execute() << email_html + log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (mail)-" + } + } + + // Write summary e-mail HTML to a file + def output_hf = new File(workflow.launchDir.toString(), ".pipeline_report.html") + output_hf.withWriter { w -> w << email_html } + FilesEx.copyTo(output_hf.toPath(), "${outdir}/pipeline_info/pipeline_report.html"); + output_hf.delete() + + // Write summary e-mail TXT to a file + def output_tf = new File(workflow.launchDir.toString(), ".pipeline_report.txt") + output_tf.withWriter { w -> w << email_txt } + FilesEx.copyTo(output_tf.toPath(), "${outdir}/pipeline_info/pipeline_report.txt"); + output_tf.delete() +} + +// +// Print pipeline summary on completion +// +def completionSummary(monochrome_logs=true) { + Map colors = logColours(monochrome_logs) + if (workflow.success) { + if (workflow.stats.ignoredCount == 0) { + log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Pipeline completed successfully${colors.reset}-" + } else { + log.info "-${colors.purple}[$workflow.manifest.name]${colors.yellow} Pipeline completed successfully, but with errored process(es) ${colors.reset}-" + } + } else { + log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} Pipeline completed with errors${colors.reset}-" + } +} + +// +// Construct and send a notification to a web server as JSON e.g. Microsoft Teams and Slack +// +def imNotification(summary_params, hook_url) { + def summary = [:] + for (group in summary_params.keySet()) { + summary << summary_params[group] + } + + def misc_fields = [:] + misc_fields['start'] = workflow.start + misc_fields['complete'] = workflow.complete + misc_fields['scriptfile'] = workflow.scriptFile + misc_fields['scriptid'] = workflow.scriptId + if (workflow.repository) misc_fields['repository'] = workflow.repository + if (workflow.commitId) misc_fields['commitid'] = workflow.commitId + if (workflow.revision) misc_fields['revision'] = workflow.revision + misc_fields['nxf_version'] = workflow.nextflow.version + misc_fields['nxf_build'] = workflow.nextflow.build + misc_fields['nxf_timestamp'] = workflow.nextflow.timestamp + + def msg_fields = [:] + msg_fields['version'] = getWorkflowVersion() + msg_fields['runName'] = workflow.runName + msg_fields['success'] = workflow.success + msg_fields['dateComplete'] = workflow.complete + msg_fields['duration'] = workflow.duration + msg_fields['exitStatus'] = workflow.exitStatus + msg_fields['errorMessage'] = (workflow.errorMessage ?: 'None') + msg_fields['errorReport'] = (workflow.errorReport ?: 'None') + msg_fields['commandLine'] = workflow.commandLine.replaceFirst(/ +--hook_url +[^ ]+/, "") + msg_fields['projectDir'] = workflow.projectDir + msg_fields['summary'] = summary << misc_fields + + // Render the JSON template + def engine = new groovy.text.GStringTemplateEngine() + // Different JSON depending on the service provider + // Defaults to "Adaptive Cards" (https://adaptivecards.io), except Slack which has its own format + def json_path = hook_url.contains("hooks.slack.com") ? "slackreport.json" : "adaptivecard.json" + def hf = new File("${workflow.projectDir}/assets/${json_path}") + def json_template = engine.createTemplate(hf).make(msg_fields) + def json_message = json_template.toString() + + // POST + def post = new URL(hook_url).openConnection(); + post.setRequestMethod("POST") + post.setDoOutput(true) + post.setRequestProperty("Content-Type", "application/json") + post.getOutputStream().write(json_message.getBytes("UTF-8")); + def postRC = post.getResponseCode(); + if (! postRC.equals(200)) { + log.warn(post.getErrorStream().getText()); + } +} diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/meta.yml b/subworkflows/nf-core/utils_nfcore_pipeline/meta.yml new file mode 100644 index 00000000..d08d2434 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/meta.yml @@ -0,0 +1,24 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "UTILS_NFCORE_PIPELINE" +description: Subworkflow with utility functions specific to the nf-core pipeline template +keywords: + - utility + - pipeline + - initialise + - version +components: [] +input: + - nextflow_cli_args: + type: list + description: | + Nextflow CLI positional arguments +output: + - success: + type: boolean + description: | + Dummy output to indicate success +authors: + - "@adamrtalbot" +maintainers: + - "@adamrtalbot" + - "@maxulysse" diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test new file mode 100644 index 00000000..1dc317f8 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test @@ -0,0 +1,134 @@ + +nextflow_function { + + name "Test Functions" + script "../main.nf" + config "subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config" + tag "subworkflows" + tag "subworkflows_nfcore" + tag "utils_nfcore_pipeline" + tag "subworkflows/utils_nfcore_pipeline" + + test("Test Function checkConfigProvided") { + + function "checkConfigProvided" + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function checkProfileProvided") { + + function "checkProfileProvided" + + when { + function { + """ + input[0] = [] + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function workflowCitation") { + + function "workflowCitation" + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function nfCoreLogo") { + + function "nfCoreLogo" + + when { + function { + """ + input[0] = false + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function dashedLine") { + + function "dashedLine" + + when { + function { + """ + input[0] = false + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function without logColours") { + + function "logColours" + + when { + function { + """ + input[0] = true + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function with logColours") { + function "logColours" + + when { + function { + """ + input[0] = false + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test.snap b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test.snap new file mode 100644 index 00000000..1037232c --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test.snap @@ -0,0 +1,166 @@ +{ + "Test Function checkProfileProvided": { + "content": null, + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:03.360873" + }, + "Test Function checkConfigProvided": { + "content": [ + true + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:02:59.729647" + }, + "Test Function nfCoreLogo": { + "content": [ + "\n\n-\u001b[2m----------------------------------------------------\u001b[0m-\n \u001b[0;32m,--.\u001b[0;30m/\u001b[0;32m,-.\u001b[0m\n\u001b[0;34m ___ __ __ __ ___ \u001b[0;32m/,-._.--~'\u001b[0m\n\u001b[0;34m |\\ | |__ __ / ` / \\ |__) |__ \u001b[0;33m} {\u001b[0m\n\u001b[0;34m | \\| | \\__, \\__/ | \\ |___ \u001b[0;32m\\`-._,-`-,\u001b[0m\n \u001b[0;32m`._,._,'\u001b[0m\n\u001b[0;35m nextflow_workflow v9.9.9\u001b[0m\n-\u001b[2m----------------------------------------------------\u001b[0m-\n" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:10.562934" + }, + "Test Function workflowCitation": { + "content": [ + "If you use nextflow_workflow for your analysis please cite:\n\n* The pipeline\n https://doi.org/10.5281/zenodo.5070524\n\n* The nf-core framework\n https://doi.org/10.1038/s41587-020-0439-x\n\n* Software dependencies\n https://github.com/nextflow_workflow/blob/master/CITATIONS.md" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:07.019761" + }, + "Test Function without logColours": { + "content": [ + { + "reset": "", + "bold": "", + "dim": "", + "underlined": "", + "blink": "", + "reverse": "", + "hidden": "", + "black": "", + "red": "", + "green": "", + "yellow": "", + "blue": "", + "purple": "", + "cyan": "", + "white": "", + "bblack": "", + "bred": "", + "bgreen": "", + "byellow": "", + "bblue": "", + "bpurple": "", + "bcyan": "", + "bwhite": "", + "ublack": "", + "ured": "", + "ugreen": "", + "uyellow": "", + "ublue": "", + "upurple": "", + "ucyan": "", + "uwhite": "", + "iblack": "", + "ired": "", + "igreen": "", + "iyellow": "", + "iblue": "", + "ipurple": "", + "icyan": "", + "iwhite": "", + "biblack": "", + "bired": "", + "bigreen": "", + "biyellow": "", + "biblue": "", + "bipurple": "", + "bicyan": "", + "biwhite": "" + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:17.969323" + }, + "Test Function dashedLine": { + "content": [ + "-\u001b[2m----------------------------------------------------\u001b[0m-" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:14.366181" + }, + "Test Function with logColours": { + "content": [ + { + "reset": "\u001b[0m", + "bold": "\u001b[1m", + "dim": "\u001b[2m", + "underlined": "\u001b[4m", + "blink": "\u001b[5m", + "reverse": "\u001b[7m", + "hidden": "\u001b[8m", + "black": "\u001b[0;30m", + "red": "\u001b[0;31m", + "green": "\u001b[0;32m", + "yellow": "\u001b[0;33m", + "blue": "\u001b[0;34m", + "purple": "\u001b[0;35m", + "cyan": "\u001b[0;36m", + "white": "\u001b[0;37m", + "bblack": "\u001b[1;30m", + "bred": "\u001b[1;31m", + "bgreen": "\u001b[1;32m", + "byellow": "\u001b[1;33m", + "bblue": "\u001b[1;34m", + "bpurple": "\u001b[1;35m", + "bcyan": "\u001b[1;36m", + "bwhite": "\u001b[1;37m", + "ublack": "\u001b[4;30m", + "ured": "\u001b[4;31m", + "ugreen": "\u001b[4;32m", + "uyellow": "\u001b[4;33m", + "ublue": "\u001b[4;34m", + "upurple": "\u001b[4;35m", + "ucyan": "\u001b[4;36m", + "uwhite": "\u001b[4;37m", + "iblack": "\u001b[0;90m", + "ired": "\u001b[0;91m", + "igreen": "\u001b[0;92m", + "iyellow": "\u001b[0;93m", + "iblue": "\u001b[0;94m", + "ipurple": "\u001b[0;95m", + "icyan": "\u001b[0;96m", + "iwhite": "\u001b[0;97m", + "biblack": "\u001b[1;90m", + "bired": "\u001b[1;91m", + "bigreen": "\u001b[1;92m", + "biyellow": "\u001b[1;93m", + "biblue": "\u001b[1;94m", + "bipurple": "\u001b[1;95m", + "bicyan": "\u001b[1;96m", + "biwhite": "\u001b[1;97m" + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:21.714424" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test new file mode 100644 index 00000000..8940d32d --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test @@ -0,0 +1,29 @@ +nextflow_workflow { + + name "Test Workflow UTILS_NFCORE_PIPELINE" + script "../main.nf" + config "subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config" + workflow "UTILS_NFCORE_PIPELINE" + tag "subworkflows" + tag "subworkflows_nfcore" + tag "utils_nfcore_pipeline" + tag "subworkflows/utils_nfcore_pipeline" + + test("Should run without failures") { + + when { + workflow { + """ + input[0] = [] + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out).match() } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test.snap b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test.snap new file mode 100644 index 00000000..859d1030 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test.snap @@ -0,0 +1,19 @@ +{ + "Should run without failures": { + "content": [ + { + "0": [ + true + ], + "valid_config": [ + true + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:25.726491" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config b/subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config new file mode 100644 index 00000000..d0a926bf --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config @@ -0,0 +1,9 @@ +manifest { + name = 'nextflow_workflow' + author = """nf-core""" + homePage = 'https://127.0.0.1' + description = """Dummy pipeline""" + nextflowVersion = '!>=23.04.0' + version = '9.9.9' + doi = 'https://doi.org/10.5281/zenodo.5070524' +} diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/tags.yml b/subworkflows/nf-core/utils_nfcore_pipeline/tests/tags.yml new file mode 100644 index 00000000..ac8523c9 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/utils_nfcore_pipeline: + - subworkflows/nf-core/utils_nfcore_pipeline/** diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/main.nf b/subworkflows/nf-core/utils_nfvalidation_plugin/main.nf new file mode 100644 index 00000000..2585b65d --- /dev/null +++ b/subworkflows/nf-core/utils_nfvalidation_plugin/main.nf @@ -0,0 +1,62 @@ +// +// Subworkflow that uses the nf-validation plugin to render help text and parameter summary +// + +/* +======================================================================================== + IMPORT NF-VALIDATION PLUGIN +======================================================================================== +*/ + +include { paramsHelp } from 'plugin/nf-validation' +include { paramsSummaryLog } from 'plugin/nf-validation' +include { validateParameters } from 'plugin/nf-validation' + +/* +======================================================================================== + SUBWORKFLOW DEFINITION +======================================================================================== +*/ + +workflow UTILS_NFVALIDATION_PLUGIN { + + take: + print_help // boolean: print help + workflow_command // string: default commmand used to run pipeline + pre_help_text // string: string to be printed before help text and summary log + post_help_text // string: string to be printed after help text and summary log + validate_params // boolean: validate parameters + schema_filename // path: JSON schema file, null to use default value + + main: + + log.debug "Using schema file: ${schema_filename}" + + // Default values for strings + pre_help_text = pre_help_text ?: '' + post_help_text = post_help_text ?: '' + workflow_command = workflow_command ?: '' + + // + // Print help message if needed + // + if (print_help) { + log.info pre_help_text + paramsHelp(workflow_command, parameters_schema: schema_filename) + post_help_text + System.exit(0) + } + + // + // Print parameter summary to stdout + // + log.info pre_help_text + paramsSummaryLog(workflow, parameters_schema: schema_filename) + post_help_text + + // + // Validate parameters relative to the parameter JSON schema + // + if (validate_params){ + validateParameters(parameters_schema: schema_filename) + } + + emit: + dummy_emit = true +} diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/meta.yml b/subworkflows/nf-core/utils_nfvalidation_plugin/meta.yml new file mode 100644 index 00000000..3d4a6b04 --- /dev/null +++ b/subworkflows/nf-core/utils_nfvalidation_plugin/meta.yml @@ -0,0 +1,44 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "UTILS_NFVALIDATION_PLUGIN" +description: Use nf-validation to initiate and validate a pipeline +keywords: + - utility + - pipeline + - initialise + - validation +components: [] +input: + - print_help: + type: boolean + description: | + Print help message and exit + - workflow_command: + type: string + description: | + The command to run the workflow e.g. "nextflow run main.nf" + - pre_help_text: + type: string + description: | + Text to print before the help message + - post_help_text: + type: string + description: | + Text to print after the help message + - validate_params: + type: boolean + description: | + Validate the parameters and error if invalid. + - schema_filename: + type: string + description: | + The filename of the schema to validate against. +output: + - dummy_emit: + type: boolean + description: | + Dummy emit to make nf-core subworkflows lint happy +authors: + - "@adamrtalbot" +maintainers: + - "@adamrtalbot" + - "@maxulysse" diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/tests/main.nf.test b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/main.nf.test new file mode 100644 index 00000000..5784a33f --- /dev/null +++ b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/main.nf.test @@ -0,0 +1,200 @@ +nextflow_workflow { + + name "Test Workflow UTILS_NFVALIDATION_PLUGIN" + script "../main.nf" + workflow "UTILS_NFVALIDATION_PLUGIN" + tag "subworkflows" + tag "subworkflows_nfcore" + tag "plugin/nf-validation" + tag "'plugin/nf-validation'" + tag "utils_nfvalidation_plugin" + tag "subworkflows/utils_nfvalidation_plugin" + + test("Should run nothing") { + + when { + + params { + monochrome_logs = true + test_data = '' + } + + workflow { + """ + help = false + workflow_command = null + pre_help_text = null + post_help_text = null + validate_params = false + schema_filename = "$moduleTestDir/nextflow_schema.json" + + input[0] = help + input[1] = workflow_command + input[2] = pre_help_text + input[3] = post_help_text + input[4] = validate_params + input[5] = schema_filename + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } + + test("Should run help") { + + + when { + + params { + monochrome_logs = true + test_data = '' + } + workflow { + """ + help = true + workflow_command = null + pre_help_text = null + post_help_text = null + validate_params = false + schema_filename = "$moduleTestDir/nextflow_schema.json" + + input[0] = help + input[1] = workflow_command + input[2] = pre_help_text + input[3] = post_help_text + input[4] = validate_params + input[5] = schema_filename + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert workflow.exitStatus == 0 }, + { assert workflow.stdout.any { it.contains('Input/output options') } }, + { assert workflow.stdout.any { it.contains('--outdir') } } + ) + } + } + + test("Should run help with command") { + + when { + + params { + monochrome_logs = true + test_data = '' + } + workflow { + """ + help = true + workflow_command = "nextflow run noorg/doesntexist" + pre_help_text = null + post_help_text = null + validate_params = false + schema_filename = "$moduleTestDir/nextflow_schema.json" + + input[0] = help + input[1] = workflow_command + input[2] = pre_help_text + input[3] = post_help_text + input[4] = validate_params + input[5] = schema_filename + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert workflow.exitStatus == 0 }, + { assert workflow.stdout.any { it.contains('nextflow run noorg/doesntexist') } }, + { assert workflow.stdout.any { it.contains('Input/output options') } }, + { assert workflow.stdout.any { it.contains('--outdir') } } + ) + } + } + + test("Should run help with extra text") { + + + when { + + params { + monochrome_logs = true + test_data = '' + } + workflow { + """ + help = true + workflow_command = "nextflow run noorg/doesntexist" + pre_help_text = "pre-help-text" + post_help_text = "post-help-text" + validate_params = false + schema_filename = "$moduleTestDir/nextflow_schema.json" + + input[0] = help + input[1] = workflow_command + input[2] = pre_help_text + input[3] = post_help_text + input[4] = validate_params + input[5] = schema_filename + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert workflow.exitStatus == 0 }, + { assert workflow.stdout.any { it.contains('pre-help-text') } }, + { assert workflow.stdout.any { it.contains('nextflow run noorg/doesntexist') } }, + { assert workflow.stdout.any { it.contains('Input/output options') } }, + { assert workflow.stdout.any { it.contains('--outdir') } }, + { assert workflow.stdout.any { it.contains('post-help-text') } } + ) + } + } + + test("Should validate params") { + + when { + + params { + monochrome_logs = true + test_data = '' + outdir = 1 + } + workflow { + """ + help = false + workflow_command = null + pre_help_text = null + post_help_text = null + validate_params = true + schema_filename = "$moduleTestDir/nextflow_schema.json" + + input[0] = help + input[1] = workflow_command + input[2] = pre_help_text + input[3] = post_help_text + input[4] = validate_params + input[5] = schema_filename + """ + } + } + + then { + assertAll( + { assert workflow.failed }, + { assert workflow.stdout.any { it.contains('ERROR ~ ERROR: Validation of pipeline parameters failed!') } } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/tests/nextflow_schema.json b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/nextflow_schema.json new file mode 100644 index 00000000..7626c1c9 --- /dev/null +++ b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/nextflow_schema.json @@ -0,0 +1,96 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema", + "$id": "https://raw.githubusercontent.com/./master/nextflow_schema.json", + "title": ". pipeline parameters", + "description": "", + "type": "object", + "definitions": { + "input_output_options": { + "title": "Input/output options", + "type": "object", + "fa_icon": "fas fa-terminal", + "description": "Define where the pipeline should find input data and save output data.", + "required": ["outdir"], + "properties": { + "validate_params": { + "type": "boolean", + "description": "Validate parameters?", + "default": true, + "hidden": true + }, + "outdir": { + "type": "string", + "format": "directory-path", + "description": "The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure.", + "fa_icon": "fas fa-folder-open" + }, + "test_data_base": { + "type": "string", + "default": "https://raw.githubusercontent.com/nf-core/test-datasets/modules", + "description": "Base for test data directory", + "hidden": true + }, + "test_data": { + "type": "string", + "description": "Fake test data param", + "hidden": true + } + } + }, + "generic_options": { + "title": "Generic options", + "type": "object", + "fa_icon": "fas fa-file-import", + "description": "Less common options for the pipeline, typically set in a config file.", + "help_text": "These options are common to all nf-core pipelines and allow you to customise some of the core preferences for how the pipeline runs.\n\nTypically these options would be set in a Nextflow config file loaded for all pipeline runs, such as `~/.nextflow/config`.", + "properties": { + "help": { + "type": "boolean", + "description": "Display help text.", + "fa_icon": "fas fa-question-circle", + "hidden": true + }, + "version": { + "type": "boolean", + "description": "Display version and exit.", + "fa_icon": "fas fa-question-circle", + "hidden": true + }, + "logo": { + "type": "boolean", + "default": true, + "description": "Display nf-core logo in console output.", + "fa_icon": "fas fa-image", + "hidden": true + }, + "singularity_pull_docker_container": { + "type": "boolean", + "description": "Pull Singularity container from Docker?", + "hidden": true + }, + "publish_dir_mode": { + "type": "string", + "default": "copy", + "description": "Method used to save pipeline results to output directory.", + "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", + "fa_icon": "fas fa-copy", + "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], + "hidden": true + }, + "monochrome_logs": { + "type": "boolean", + "description": "Use monochrome_logs", + "hidden": true + } + } + } + }, + "allOf": [ + { + "$ref": "#/definitions/input_output_options" + }, + { + "$ref": "#/definitions/generic_options" + } + ] +} diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/tests/tags.yml b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/tags.yml new file mode 100644 index 00000000..60b1cfff --- /dev/null +++ b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/utils_nfvalidation_plugin: + - subworkflows/nf-core/utils_nfvalidation_plugin/** diff --git a/tests/.nf-test.log b/tests/.nf-test.log new file mode 100644 index 00000000..8251fc75 --- /dev/null +++ b/tests/.nf-test.log @@ -0,0 +1,21 @@ +Feb-27 21:54:09.971 [main] INFO com.askimed.nf.test.App - nf-test 0.8.4 +Feb-27 21:54:09.988 [main] INFO com.askimed.nf.test.App - Arguments: [test, tests/main_pipeline_kallisto.test, --update-snapshot] +Feb-27 21:54:10.670 [main] INFO com.askimed.nf.test.App - Nextflow Version: 23.10.1 +Feb-27 21:54:10.674 [main] WARN com.askimed.nf.test.commands.RunTestsCommand - No nf-test config file found. +Feb-27 21:54:10.674 [main] INFO com.askimed.nf.test.commands.RunTestsCommand - Detected 1 test files. +Feb-27 21:54:10.676 [main] ERROR com.askimed.nf.test.commands.RunTestsCommand - Running tests failed. +java.lang.Exception: Test file '/home/ec2-user/scrnaseq/tests/tests/main_pipeline_kallisto.test' not found. + at com.askimed.nf.test.core.TestExecutionEngine.parse(TestExecutionEngine.java:116) + at com.askimed.nf.test.core.TestExecutionEngine.execute(TestExecutionEngine.java:159) + at com.askimed.nf.test.commands.RunTestsCommand.execute(RunTestsCommand.java:184) + at com.askimed.nf.test.commands.AbstractCommand.call(AbstractCommand.java:43) + at com.askimed.nf.test.commands.AbstractCommand.call(AbstractCommand.java:18) + at picocli.CommandLine.executeUserObject(CommandLine.java:1953) + at picocli.CommandLine.access$1300(CommandLine.java:145) + at picocli.CommandLine$RunLast.executeUserObjectOfLastSubcommandWithSameParent(CommandLine.java:2352) + at picocli.CommandLine$RunLast.handle(CommandLine.java:2346) + at picocli.CommandLine$RunLast.handle(CommandLine.java:2311) + at picocli.CommandLine$AbstractParseResultHandler.execute(CommandLine.java:2179) + at picocli.CommandLine.execute(CommandLine.java:2078) + at com.askimed.nf.test.App.run(App.java:44) + at com.askimed.nf.test.App.main(App.java:51) diff --git a/tests/main_pipeline_alevin.test b/tests/main_pipeline_alevin.test new file mode 100644 index 00000000..dc6c081a --- /dev/null +++ b/tests/main_pipeline_alevin.test @@ -0,0 +1,69 @@ +nextflow_pipeline { + + name "Test Workflow main.nf" + script "main.nf" + + test("test-dataset_alevin_aligner") { + + when { + // the rest is taken from shared config + params { + aligner = 'alevin' + outdir = "${outputDir}/results_alevin" + + // Limit resources so that this can run on GitHub Actions -- for some reason it had not been taken from shared config + max_cpus = 2 + max_memory = '6.GB' + max_time = '6.h' + } + } + + then { + + assertAll( + + // + // General assertions + // + + // Did it finish successfully? + {assert workflow.success}, + + // How many tasks were executed? + {assert workflow.trace.tasks().size() == 14}, + + // How many results were produced? + {assert path("${outputDir}/results_alevin").list().size() == 5}, + {assert path("${outputDir}/results_alevin/alevin").list().size() == 4}, + {assert path("${outputDir}/results_alevin/alevin/mtx_conversions").list().size() == 4}, + {assert path("${outputDir}/results_alevin/alevinqc").list().size() == 2}, + {assert path("${outputDir}/results_alevin/fastqc").list().size() == 12}, + {assert path("${outputDir}/results_alevin/multiqc").list().size() == 3}, + + // + // Check if files were produced + // + {assert new File( "${outputDir}/results_alevin/alevin/mtx_conversions/Sample_X/Sample_X_raw_matrix.h5ad" ).exists()}, + {assert new File( "${outputDir}/results_alevin/alevin/mtx_conversions/Sample_Y/Sample_Y_raw_matrix.h5ad" ).exists()}, + + // + // Check if files are the same + // + {assert snapshot( + workflow, + path( "${outputDir}/results_alevin/alevin/Sample_X_alevin_results/af_quant/alevin/quants_mat_cols.txt" ), + path( "${outputDir}/results_alevin/alevin/Sample_X_alevin_results/af_quant/alevin/quants_mat.mtx" ), + path( "${outputDir}/results_alevin/alevin/Sample_X_alevin_results/af_quant/alevin/quants_mat_rows.txt" ), + path( "${outputDir}/results_alevin/alevin/Sample_Y_alevin_results/af_quant/alevin/quants_mat_cols.txt" ), + path( "${outputDir}/results_alevin/alevin/Sample_Y_alevin_results/af_quant/alevin/quants_mat.mtx" ), + path( "${outputDir}/results_alevin/alevin/Sample_Y_alevin_results/af_quant/alevin/quants_mat_rows.txt" ), + path( "${outputDir}/results_alevin/alevin/mtx_conversions/Sample_X/Sample_X_raw_matrix.rds" ), + path( "${outputDir}/results_alevin/alevin/mtx_conversions/Sample_Y/Sample_Y_raw_matrix.rds" ) + ).match()} + + ) // end of assertAll() + + } + } + +} diff --git a/tests/main_pipeline_alevin.test.snap b/tests/main_pipeline_alevin.test.snap new file mode 100644 index 00000000..e3d19f72 --- /dev/null +++ b/tests/main_pipeline_alevin.test.snap @@ -0,0 +1,38 @@ +{ + "test-dataset_alevin_aligner": { + "content": [ + { + "stderr": [ + + ], + "errorReport": "", + "exitStatus": 0, + "failed": false, + "stdout": [ + + ], + "errorMessage": "", + "trace": { + "tasksFailed": 0, + "tasksCount": 14, + "tasksSucceeded": 14 + }, + "name": "workflow", + "success": true + }, + "quants_mat_cols.txt:md5,e9868982c17a330392e38c2a5933cf97", + "quants_mat.mtx:md5,b8aa7b3c488fd8923de50a3621d4991f", + "quants_mat_rows.txt:md5,6227df5a13127b71c71fb18cd8574857", + "quants_mat_cols.txt:md5,e9868982c17a330392e38c2a5933cf97", + "quants_mat.mtx:md5,54cd12666016adce94c025b2e07f4b02", + "quants_mat_rows.txt:md5,6b458a7777260ba90eccbe7919df934b", + "Sample_X_raw_matrix.rds:md5,ad35ee66bf2fc3d5d4656c19a7e64e2b", + "Sample_Y_raw_matrix.rds:md5,baf584142205b1d42bb6fdab1f22a06a" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-14T14:49:46.831540515" + } +} diff --git a/tests/main_pipeline_cellranger.test b/tests/main_pipeline_cellranger.test new file mode 100644 index 00000000..ea68eca6 --- /dev/null +++ b/tests/main_pipeline_cellranger.test @@ -0,0 +1,79 @@ +nextflow_pipeline { + + name "Test Workflow main.nf" + script "main.nf" + + test("test-dataset_cellranger_aligner") { + + when { + // the rest is taken from shared config + params { + aligner = 'cellranger' + outdir = "${outputDir}/results_cellranger" + + // Limit resources so that this can run on GitHub Actions -- for some reason it had not been taken from shared config + max_cpus = 2 + max_memory = '6.GB' + max_time = '6.h' + } + } + + then { + + assertAll( + + // + // General assertions + // + + // Did it finish successfully? + {assert workflow.success}, + + // How many tasks were executed? + {assert workflow.trace.tasks().size() == 18}, + + // How many results were produced? + {assert path("${outputDir}/results_cellranger").list().size() == 4}, + {assert path("${outputDir}/results_cellranger/cellranger").list().size() == 4}, + {assert path("${outputDir}/results_cellranger/cellranger/mtx_conversions").list().size() == 5}, + {assert path("${outputDir}/results_cellranger/cellranger/count").list().size() == 3}, + {assert path("${outputDir}/results_cellranger/fastqc").list().size() == 12}, + {assert path("${outputDir}/results_cellranger/multiqc").list().size() == 3}, + + // + // Check if files were produced + // + {assert new File( "${outputDir}/results_cellranger/cellranger/mtx_conversions/Sample_X/Sample_X_raw_matrix.h5ad" ).exists()}, + {assert new File( "${outputDir}/results_cellranger/cellranger/mtx_conversions/Sample_Y/Sample_Y_raw_matrix.h5ad" ).exists()}, + {assert new File( "${outputDir}/results_cellranger/cellranger/mtx_conversions/Sample_X/Sample_X_filtered_matrix.h5ad" ).exists()}, + {assert new File( "${outputDir}/results_cellranger/cellranger/mtx_conversions/Sample_Y/Sample_Y_filtered_matrix.h5ad" ).exists()}, + + // + // Check if files are the same + // + {assert snapshot( + workflow, + path( "${outputDir}/results_cellranger/cellranger/count/Sample_X/outs/filtered_feature_bc_matrix/barcodes.tsv.gz" ), + path( "${outputDir}/results_cellranger/cellranger/count/Sample_X/outs/filtered_feature_bc_matrix/features.tsv.gz" ), + path( "${outputDir}/results_cellranger/cellranger/count/Sample_X/outs/filtered_feature_bc_matrix/matrix.mtx.gz" ), + path( "${outputDir}/results_cellranger/cellranger/count/Sample_Y/outs/filtered_feature_bc_matrix/barcodes.tsv.gz" ), + path( "${outputDir}/results_cellranger/cellranger/count/Sample_Y/outs/filtered_feature_bc_matrix/features.tsv.gz" ), + path( "${outputDir}/results_cellranger/cellranger/count/Sample_Y/outs/filtered_feature_bc_matrix/matrix.mtx.gz" ), + path( "${outputDir}/results_cellranger/cellranger/count/Sample_X/outs/raw_feature_bc_matrix/barcodes.tsv.gz" ), + path( "${outputDir}/results_cellranger/cellranger/count/Sample_X/outs/raw_feature_bc_matrix/features.tsv.gz" ), + path( "${outputDir}/results_cellranger/cellranger/count/Sample_X/outs/raw_feature_bc_matrix/matrix.mtx.gz" ), + path( "${outputDir}/results_cellranger/cellranger/count/Sample_Y/outs/raw_feature_bc_matrix/barcodes.tsv.gz" ), + path( "${outputDir}/results_cellranger/cellranger/count/Sample_Y/outs/raw_feature_bc_matrix/features.tsv.gz" ), + path( "${outputDir}/results_cellranger/cellranger/count/Sample_Y/outs/raw_feature_bc_matrix/matrix.mtx.gz" ), + path( "${outputDir}/results_cellranger/cellranger/mtx_conversions/Sample_X/Sample_X_raw_matrix.rds" ), + path( "${outputDir}/results_cellranger/cellranger/mtx_conversions/Sample_Y/Sample_Y_raw_matrix.rds" ), + path( "${outputDir}/results_cellranger/cellranger/mtx_conversions/Sample_X/Sample_X_filtered_matrix.rds" ), + path( "${outputDir}/results_cellranger/cellranger/mtx_conversions/Sample_Y/Sample_Y_filtered_matrix.rds" ) + ).match()} + + ) // end of assertAll() + + } + } + +} diff --git a/tests/main_pipeline_cellranger.test.snap b/tests/main_pipeline_cellranger.test.snap new file mode 100644 index 00000000..ef8874f8 --- /dev/null +++ b/tests/main_pipeline_cellranger.test.snap @@ -0,0 +1,46 @@ +{ + "test-dataset_cellranger_aligner": { + "content": [ + { + "stderr": [ + + ], + "errorReport": "", + "exitStatus": 0, + "failed": false, + "stdout": [ + + ], + "errorMessage": "", + "trace": { + "tasksFailed": 0, + "tasksCount": 18, + "tasksSucceeded": 18 + }, + "name": "workflow", + "success": true + }, + "barcodes.tsv.gz:md5,fe6e51564b4405b37ca8604a844b1f2e", + "features.tsv.gz:md5,99e453cb1443a3e43e99405184e51a5e", + "matrix.mtx.gz:md5,1528b9b0fccc78dec95695928e42e710", + "barcodes.tsv.gz:md5,77afe9a76631fc7b44236d3962a55aa5", + "features.tsv.gz:md5,99e453cb1443a3e43e99405184e51a5e", + "matrix.mtx.gz:md5,49db721ca5d5749cf11597e82a010eb6", + "barcodes.tsv.gz:md5,85da6b6e0c78dfe81af8c07c2017ab5e", + "features.tsv.gz:md5,99e453cb1443a3e43e99405184e51a5e", + "matrix.mtx.gz:md5,96943587acf3356c5fa5038056c54c96", + "barcodes.tsv.gz:md5,081f72b5252ccaf5ffd535ffbd235c4c", + "features.tsv.gz:md5,99e453cb1443a3e43e99405184e51a5e", + "matrix.mtx.gz:md5,58182db2706d532ec970526de3d3b70f", + "Sample_X_raw_matrix.rds:md5,306a5477ace4d43d851b8389fdfeaf1f", + "Sample_Y_raw_matrix.rds:md5,74b31532da4cae5a8197d690021d77fc", + "Sample_X_filtered_matrix.rds:md5,f9191ba575a3ab79ada4807715f18573", + "Sample_Y_filtered_matrix.rds:md5,7be3f7b29d668dcf7e951b9f4d371a5e" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-16T09:43:45.32298954" + } +} \ No newline at end of file diff --git a/tests/main_pipeline_kallisto.test b/tests/main_pipeline_kallisto.test new file mode 100644 index 00000000..12e78144 --- /dev/null +++ b/tests/main_pipeline_kallisto.test @@ -0,0 +1,70 @@ +nextflow_pipeline { + + name "Test Workflow main.nf" + script "main.nf" + + test("test-dataset_kallisto_aligner") { + + when { + // the rest is taken from shared config + params { + aligner = 'kallisto' + outdir = "${outputDir}/results_kallisto" + + // Limit resources so that this can run on GitHub Actions -- for some reason it had not been taken from shared config + max_cpus = 2 + max_memory = '6.GB' + max_time = '6.h' + } + } + + then { + + assertAll( + + // + // General assertions + // + + // Did it finish successfully? + {assert workflow.success}, + + // How many tasks were executed? + {assert workflow.trace.tasks().size() == 12}, + + // How many results were produced? + {assert path("${outputDir}/results_kallisto").list().size() == 4}, + {assert path("${outputDir}/results_kallisto/kallisto").list().size() == 4}, + {assert path("${outputDir}/results_kallisto/kallisto/mtx_conversions").list().size() == 4}, + {assert path("${outputDir}/results_kallisto/kallisto/Sample_X.count").list().size() == 9}, + {assert path("${outputDir}/results_kallisto/kallisto/Sample_Y.count").list().size() == 9}, + {assert path("${outputDir}/results_kallisto/fastqc").list().size() == 12}, + {assert path("${outputDir}/results_kallisto/multiqc").list().size() == 3}, + + // + // Check if files were produced + // + {assert new File( "${outputDir}/results_kallisto/kallisto/mtx_conversions/Sample_X/Sample_X_raw_matrix.h5ad" ).exists()}, + {assert new File( "${outputDir}/results_kallisto/kallisto/mtx_conversions/Sample_Y/Sample_Y_raw_matrix.h5ad" ).exists()}, + + // + // Check if files are the same + // + {assert snapshot( + workflow, + path( "${outputDir}/results_kallisto/kallisto/Sample_X.count/counts_unfiltered/cells_x_genes.barcodes.txt" ), + path( "${outputDir}/results_kallisto/kallisto/Sample_X.count/counts_unfiltered/cells_x_genes.genes.txt" ), + path( "${outputDir}/results_kallisto/kallisto/Sample_X.count/counts_unfiltered/cells_x_genes.mtx" ), + path( "${outputDir}/results_kallisto/kallisto/Sample_Y.count/counts_unfiltered/cells_x_genes.barcodes.txt" ), + path( "${outputDir}/results_kallisto/kallisto/Sample_Y.count/counts_unfiltered/cells_x_genes.genes.txt" ), + path( "${outputDir}/results_kallisto/kallisto/Sample_Y.count/counts_unfiltered/cells_x_genes.mtx" ), + path( "${outputDir}/results_kallisto/kallisto/mtx_conversions/Sample_X/Sample_X_raw_matrix.rds" ), + path( "${outputDir}/results_kallisto/kallisto/mtx_conversions/Sample_Y/Sample_Y_raw_matrix.rds" ) + ).match()} + + ) // end of assertAll() + + } + } + +} diff --git a/tests/main_pipeline_kallisto.test.snap b/tests/main_pipeline_kallisto.test.snap new file mode 100644 index 00000000..f9b9c96a --- /dev/null +++ b/tests/main_pipeline_kallisto.test.snap @@ -0,0 +1,38 @@ +{ + "test-dataset_kallisto_aligner": { + "content": [ + { + "stderr": [ + + ], + "errorReport": "", + "exitStatus": 0, + "failed": false, + "stdout": [ + + ], + "errorMessage": "", + "trace": { + "tasksFailed": 0, + "tasksCount": 12, + "tasksSucceeded": 12 + }, + "name": "workflow", + "success": true + }, + "cells_x_genes.barcodes.txt:md5,72d78bb1c1ee7cb174520b30f695aa48", + "cells_x_genes.genes.txt:md5,acd9d00120f52031974b2add3e7521b6", + "cells_x_genes.mtx:md5,894d60da192e3788de11fa8fc1fa711d", + "cells_x_genes.barcodes.txt:md5,a8cf7ea4b2d075296a94bf066a64b7a4", + "cells_x_genes.genes.txt:md5,acd9d00120f52031974b2add3e7521b6", + "cells_x_genes.mtx:md5,abd83de117204d0a77df3c92d00cc025", + "Sample_X_raw_matrix.rds:md5,0938f4189b7a7fd1030abfcee798741c", + "Sample_Y_raw_matrix.rds:md5,93c12abe283ab37c5f37e5cd3cb25302" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-18T14:51:42.040931572" + } +} \ No newline at end of file diff --git a/tests/main_pipeline_star.test b/tests/main_pipeline_star.test new file mode 100644 index 00000000..37c54d57 --- /dev/null +++ b/tests/main_pipeline_star.test @@ -0,0 +1,76 @@ +nextflow_pipeline { + + name "Test Workflow main.nf" + script "main.nf" + + test("test-dataset_star_aligner") { + + when { + // the rest is taken from shared config + params { + aligner = 'star' + outdir = "${outputDir}/results_star" + + // Limit resources so that this can run on GitHub Actions -- for some reason it had not been taken from shared config + max_cpus = 2 + max_memory = '6.GB' + max_time = '6.h' + } + } + + then { + + assertAll( + + // + // General assertions + // + + // Did it finish successfully? + {assert workflow.success}, + + // How many tasks were executed? + {assert workflow.trace.tasks().size() == 17}, + + // How many results were produced? + {assert path("${outputDir}/results_star").list().size() == 4}, + {assert path("${outputDir}/results_star/star").list().size() == 3}, + {assert path("${outputDir}/results_star/star/mtx_conversions").list().size() == 5}, + {assert path("${outputDir}/results_star/fastqc").list().size() == 12}, + {assert path("${outputDir}/results_star/multiqc").list().size() == 3}, + + // + // Check if files were produced + // + {assert new File( "${outputDir}/results_star/star/mtx_conversions/Sample_X/Sample_X_raw_matrix.h5ad" ).exists()}, + {assert new File( "${outputDir}/results_star/star/mtx_conversions/Sample_Y/Sample_Y_raw_matrix.h5ad" ).exists()}, + {assert new File( "${outputDir}/results_star/star/mtx_conversions/Sample_X/Sample_X_filtered_matrix.h5ad" ).exists()}, + {assert new File( "${outputDir}/results_star/star/mtx_conversions/Sample_Y/Sample_Y_filtered_matrix.h5ad" ).exists()}, + + // + // Check if files are the same + // + {assert snapshot( + workflow, + path( "${outputDir}/results_star/star/Sample_X/Sample_X.SJ.out.tab" ), + path( "${outputDir}/results_star/star/Sample_X/Sample_X.Solo.out/Barcodes.stats" ), + path( "${outputDir}/results_star/star/Sample_X/Sample_X.Solo.out/Gene/filtered/matrix.mtx.gz" ), + path( "${outputDir}/results_star/star/Sample_X/Sample_X.Solo.out/Gene/filtered/features.tsv.gz" ), + path( "${outputDir}/results_star/star/Sample_X/Sample_X.Solo.out/Gene/filtered/barcodes.tsv.gz" ), + path( "${outputDir}/results_star/star/Sample_Y/Sample_Y.SJ.out.tab" ), + path( "${outputDir}/results_star/star/Sample_Y/Sample_Y.Solo.out/Barcodes.stats" ), + path( "${outputDir}/results_star/star/Sample_Y/Sample_Y.Solo.out/Gene/filtered/matrix.mtx.gz" ), + path( "${outputDir}/results_star/star/Sample_Y/Sample_Y.Solo.out/Gene/filtered/features.tsv.gz" ), + path( "${outputDir}/results_star/star/Sample_Y/Sample_Y.Solo.out/Gene/filtered/barcodes.tsv.gz" ), + path( "${outputDir}/results_star/star/mtx_conversions/Sample_X/Sample_X_raw_matrix.rds" ), + path( "${outputDir}/results_star/star/mtx_conversions/Sample_Y/Sample_Y_raw_matrix.rds" ), + path( "${outputDir}/results_star/star/mtx_conversions/Sample_X/Sample_X_filtered_matrix.rds" ), + path( "${outputDir}/results_star/star/mtx_conversions/Sample_Y/Sample_Y_filtered_matrix.rds" ), + ).match()} + + ) // end of assertAll() + + } + } + +} diff --git a/tests/main_pipeline_star.test.snap b/tests/main_pipeline_star.test.snap new file mode 100644 index 00000000..0aae74cf --- /dev/null +++ b/tests/main_pipeline_star.test.snap @@ -0,0 +1,44 @@ +{ + "test-dataset_star_aligner": { + "content": [ + { + "stderr": [ + + ], + "errorReport": "", + "exitStatus": 0, + "failed": false, + "stdout": [ + + ], + "errorMessage": "", + "trace": { + "tasksFailed": 0, + "tasksCount": 17, + "tasksSucceeded": 17 + }, + "name": "workflow", + "success": true + }, + "Sample_X.SJ.out.tab:md5,d2d7f0abe38029012571bdf6622fc6eb", + "Barcodes.stats:md5,7f99dc8aa5e074fbe5779ea7712c0886", + "matrix.mtx.gz:md5,6a923393343aa1a69b0cf1bd998c9285", + "features.tsv.gz:md5,99e453cb1443a3e43e99405184e51a5e", + "barcodes.tsv.gz:md5,9a7dacaa1779ea43c1507a947fe6992a", + "Sample_Y.SJ.out.tab:md5,98bd31104a860cf80119dc30d938d163", + "Barcodes.stats:md5,2dbf1ae426c1afd97903ee001f0db5ce", + "matrix.mtx.gz:md5,0ae080bd0002e350531a5816e159345e", + "features.tsv.gz:md5,99e453cb1443a3e43e99405184e51a5e", + "barcodes.tsv.gz:md5,9b695b0b91bcb146ec9c4688ca10a690", + "Sample_X_raw_matrix.rds:md5,31604db3e7846acc8d9a60b1a171ce78", + "Sample_Y_raw_matrix.rds:md5,1a52c823e91acce2b29621c8c99c8c72", + "Sample_X_filtered_matrix.rds:md5,aa2d36dd8507aba864347c88e4ce0d27", + "Sample_Y_filtered_matrix.rds:md5,d459af8f99258bcc88b80b2f7c58e911" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-14T16:30:25.7971791" + } +} diff --git a/tests/nextflow.config b/tests/nextflow.config new file mode 100644 index 00000000..7efd4642 --- /dev/null +++ b/tests/nextflow.config @@ -0,0 +1,35 @@ +/* +======================================================================================== + Nextflow config file for running tests +======================================================================================== +*/ + +// Copy from test.config for standardization +params { + config_profile_name = 'Test profile' + config_profile_description = 'Minimal test dataset to check pipeline function' + + // Limit resources so that this can run on GitHub Actions + max_cpus = 2 + max_memory = '6.GB' + max_time = '6.h' + + // Input data + input = 'https://github.com/nf-core/test-datasets/raw/scrnaseq/samplesheet-2-0.csv' + + // Genome references + fasta = 'https://github.com/nf-core/test-datasets/raw/scrnaseq/reference/GRCm38.p6.genome.chr19.fa' + gtf = 'https://github.com/nf-core/test-datasets/raw/scrnaseq/reference/gencode.vM19.annotation.chr19.gtf' + protocol = '10XV2' + + // small dataset does not have sufficient data for emptydrops module + skip_emptydrops = true + + validationSchemaIgnoreParams = 'genomes' +} + +process { + withName: '.*:CELLRANGER_COUNT' { + maxForks = 1 + } +} diff --git a/workflows/scrnaseq.nf b/workflows/scrnaseq.nf index 25740a8e..7171c970 100644 --- a/workflows/scrnaseq.nf +++ b/workflows/scrnaseq.nf @@ -1,138 +1,87 @@ -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - PRINT PARAMS SUMMARY -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - +include { MULTIQC } from '../modules/nf-core/multiqc/main' +include { FASTQC_CHECK } from '../subworkflows/local/fastqc' +include { KALLISTO_BUSTOOLS } from '../subworkflows/local/kallisto_bustools' +include { SCRNASEQ_ALEVIN } from '../subworkflows/local/alevin' +include { STARSOLO } from '../subworkflows/local/starsolo' +include { CELLRANGER_ALIGN } from "../subworkflows/local/align_cellranger" +include { CELLRANGERARC_ALIGN } from "../subworkflows/local/align_cellrangerarc" +include { UNIVERSC_ALIGN } from "../subworkflows/local/align_universc" +include { MTX_CONVERSION } from "../subworkflows/local/mtx_conversion" +include { GTF_GENE_FILTER } from '../modules/local/gtf_gene_filter' +include { EMPTYDROPS_CELL_CALLING } from '../modules/local/emptydrops' +include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_scrnaseq_pipeline' include { paramsSummaryLog; paramsSummaryMap } from 'plugin/nf-validation' -def summary_params = paramsSummaryMap(workflow) - -def checkPathParamList = [ - params.input, params.multiqc_config, params.fasta, params.gtf, - params.transcript_fasta, params.salmon_index, params.kallisto_index, - params.star_index, params.txp2gene, params.barcode_whitelist, params.cellranger_index, - params.universc_index -] -for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } } - - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - CONFIG FILES -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ -ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true) -ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath( params.multiqc_config, checkIfExists: true ) : Channel.empty() -ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath( params.multiqc_logo, checkIfExists: true ) : Channel.empty() -ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - IMPORT LOCAL MODULES/SUBWORKFLOWS -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -// -// SUBWORKFLOW: Consisting of a mix of local and nf-core/modules -// -include { INPUT_CHECK } from '../subworkflows/local/input_check' -include { FASTQC_CHECK } from '../subworkflows/local/fastqc' -include { KALLISTO_BUSTOOLS } from '../subworkflows/local/kallisto_bustools' -include { SCRNASEQ_ALEVIN } from '../subworkflows/local/alevin' -include { STARSOLO } from '../subworkflows/local/starsolo' -include { CELLRANGER_ALIGN } from "../subworkflows/local/align_cellranger" -include { CELLRANGERARC_ALIGN } from "../subworkflows/local/align_cellrangerarc" -include { UNIVERSC_ALIGN } from "../subworkflows/local/align_universc" -include { MTX_CONVERSION } from "../subworkflows/local/mtx_conversion" -include { GTF_GENE_FILTER } from '../modules/local/gtf_gene_filter' +workflow SCRNASEQ { -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - IMPORT NF-CORE MODULES/SUBWORKFLOWS -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ + take: + ch_fastq + ch_genome_fasta + ch_gtf -// -// MODULE: Installed directly from nf-core/modules -// -include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main' -include { MULTIQC } from '../modules/nf-core/multiqc/main' + main: -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - RUN MAIN WORKFLOW -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ + protocol_config = Utils.getProtocol(workflow, log, params.aligner, params.protocol) + if (protocol_config['protocol'] == 'auto' && params.aligner != "cellranger") { + error "Only cellranger supports `protocol = 'auto'`. Please specify the protocol manually!" + } -// Info required for completion email and summary -// TODO: Are this channels still necessary? -ch_output_docs = file("$projectDir/docs/output.md", checkIfExists: true) -ch_output_docs_images = file("$projectDir/docs/images/", checkIfExists: true) -protocol_config = WorkflowScrnaseq.getProtocol(workflow, log, params.aligner, params.protocol) -if (protocol_config['protocol'] == 'auto' && params.aligner != "cellranger") { - error "Only cellranger supports `protocol = 'auto'`. Please specify the protocol manually!" -} + // overwrite fasta and gtf if user provide a custom one + ch_genome_fasta = Channel.value(params.fasta ? file(params.fasta) : ch_genome_fasta) + ch_gtf = Channel.value(params.gtf ? file(params.gtf) : ch_gtf) -// general input and params -ch_input = file(params.input) -ch_genome_fasta = Channel.value(params.fasta ? file(params.fasta) : []) -ch_gtf = params.gtf ? file(params.gtf) : [] -ch_transcript_fasta = params.transcript_fasta ? file(params.transcript_fasta): [] -ch_motifs = params.motifs ? file(params.motifs) : [] -ch_cellrangerarc_config = params.cellrangerarc_config ? file(params.cellrangerarc_config) : [] -ch_txp2gene = params.txp2gene ? file(params.txp2gene) : [] -ch_multiqc_alevin = Channel.empty() -ch_multiqc_star = Channel.empty() -ch_multiqc_cellranger = Channel.empty() -if (params.barcode_whitelist) { - ch_barcode_whitelist = file(params.barcode_whitelist) -} else if (protocol_config.containsKey("whitelist")) { - ch_barcode_whitelist = file("$projectDir/${protocol_config['whitelist']}") -} else { - ch_barcode_whitelist = [] -} + // general input and params + ch_transcript_fasta = params.transcript_fasta ? file(params.transcript_fasta): [] + ch_motifs = params.motifs ? file(params.motifs) : [] + ch_cellrangerarc_config = params.cellrangerarc_config ? file(params.cellrangerarc_config) : [] + ch_txp2gene = params.txp2gene ? file(params.txp2gene) : [] + ch_multiqc_files = Channel.empty() + if (params.barcode_whitelist) { + ch_barcode_whitelist = file(params.barcode_whitelist) + } else if (protocol_config.containsKey("whitelist")) { + ch_barcode_whitelist = file("$projectDir/${protocol_config['whitelist']}") + } else { + ch_barcode_whitelist = [] + } + //kallisto params + ch_kallisto_index = params.kallisto_index ? file(params.kallisto_index) : [] + kb_workflow = params.kb_workflow + kb_t1c = params.kb_t1c ? file(params.kb_t1c) : [] + kb_t2c = params.kb_t2c ? file(params.kb_t2c) : [] -//kallisto params -ch_kallisto_index = params.kallisto_index ? file(params.kallisto_index) : [] -kb_workflow = params.kb_workflow + // samplesheet - this is passed to the MTX conversion functions to add metadata to the + // AnnData objects. + ch_input = file(params.input) -//salmon params -ch_salmon_index = params.salmon_index ? file(params.salmon_index) : [] + //kallisto params + ch_kallisto_index = params.kallisto_index ? file(params.kallisto_index) : [] + kb_workflow = params.kb_workflow -//star params -ch_star_index = params.star_index ? file(params.star_index) : [] -star_feature = params.star_feature + //salmon params + ch_salmon_index = params.salmon_index ? file(params.salmon_index) : [] -//cellranger params -ch_cellranger_index = params.cellranger_index ? file(params.cellranger_index) : [] + //star params + ch_star_index = params.star_index ? file(params.star_index) : [] + star_feature = params.star_feature -//universc params -ch_universc_index = params.universc_index ? file(params.universc_index) : [] + //cellranger params + ch_cellranger_index = params.cellranger_index ? file(params.cellranger_index) : [] -workflow SCRNASEQ { + //universc params + ch_universc_index = params.universc_index ? file(params.universc_index) : [] ch_versions = Channel.empty() ch_mtx_matrices = Channel.empty() - // Check input files and stage input data - ch_fastq = INPUT_CHECK( ch_input ).reads - - ch_versions = ch_versions.mix(INPUT_CHECK.out.versions) - // TODO: OPTIONAL, you can use nf-validation plugin to create an input channel from the samplesheet with Channel.fromSamplesheet("input") - // See the documentation https://nextflow-io.github.io/nf-validation/samplesheets/fromSamplesheet/ - // ! There is currently no tooling to help you write a sample sheet schema - // Run FastQC - ch_multiqc_fastqc = Channel.empty() if (!params.skip_fastqc) { FASTQC_CHECK ( ch_fastq ) ch_versions = ch_versions.mix(FASTQC_CHECK.out.fastqc_version) - ch_multiqc_fastqc = FASTQC_CHECK.out.fastqc_zip - } else { - ch_multiqc_fastqc = Channel.empty() + ch_multiqc_files = ch_multiqc_files.mix(FASTQC_CHECK.out.fastqc_zip.map{ meta, it -> it }) } ch_filter_gtf = GTF_GENE_FILTER ( ch_genome_fasta, ch_gtf ).gtf @@ -144,12 +93,14 @@ workflow SCRNASEQ { ch_filter_gtf, ch_kallisto_index, ch_txp2gene, + kb_t1c, + kb_t2c, protocol_config['protocol'], kb_workflow, ch_fastq ) ch_versions = ch_versions.mix(KALLISTO_BUSTOOLS.out.ch_versions) - ch_mtx_matrices = ch_mtx_matrices.mix(KALLISTO_BUSTOOLS.out.counts) + ch_mtx_matrices = ch_mtx_matrices.mix(KALLISTO_BUSTOOLS.out.raw_counts, KALLISTO_BUSTOOLS.out.filtered_counts) ch_txp2gene = KALLISTO_BUSTOOLS.out.txp2gene } @@ -166,7 +117,7 @@ workflow SCRNASEQ { ch_fastq ) ch_versions = ch_versions.mix(SCRNASEQ_ALEVIN.out.ch_versions) - ch_multiqc_alevin = SCRNASEQ_ALEVIN.out.alevin_results + ch_multiqc_files = ch_multiqc_files.mix(SCRNASEQ_ALEVIN.out.alevin_results.map{ meta, it -> it }) ch_mtx_matrices = ch_mtx_matrices.mix(SCRNASEQ_ALEVIN.out.alevin_results) } @@ -183,9 +134,9 @@ workflow SCRNASEQ { protocol_config.get('extra_args', ""), ) ch_versions = ch_versions.mix(STARSOLO.out.ch_versions) - ch_mtx_matrices = ch_mtx_matrices.mix(STARSOLO.out.star_counts) + ch_mtx_matrices = ch_mtx_matrices.mix(STARSOLO.out.raw_counts, STARSOLO.out.filtered_counts) ch_star_index = STARSOLO.out.star_index - ch_multiqc_star = STARSOLO.out.for_multiqc + ch_multiqc_files = ch_multiqc_files.mix(STARSOLO.out.for_multiqc) } // Run cellranger pipeline @@ -198,11 +149,11 @@ workflow SCRNASEQ { protocol_config['protocol'] ) ch_versions = ch_versions.mix(CELLRANGER_ALIGN.out.ch_versions) - ch_mtx_matrices = ch_mtx_matrices.mix(CELLRANGER_ALIGN.out.cellranger_out) + ch_mtx_matrices = ch_mtx_matrices.mix(CELLRANGER_ALIGN.out.cellranger_matrices) ch_star_index = CELLRANGER_ALIGN.out.star_index - ch_multiqc_cellranger = CELLRANGER_ALIGN.out.cellranger_out.map{ + ch_multiqc_files = ch_multiqc_files.mix(CELLRANGER_ALIGN.out.cellranger_out.map{ meta, outs -> outs.findAll{ it -> it.name == "web_summary.html"} - } + }) } // Run universc pipeline @@ -232,6 +183,26 @@ workflow SCRNASEQ { ch_mtx_matrices = ch_mtx_matrices.mix(CELLRANGERARC_ALIGN.out.cellranger_arc_out) } + // Run emptydrops calling module + if ( !params.skip_emptydrops ) { + + // + // emptydrops should only run on the raw matrices thus, filter-out the filtered result of the aligners that can produce it + // + if ( params.aligner in [ 'cellranger', 'cellrangerarc', 'kallisto', 'star' ] ) { + ch_mtx_matrices_for_emptydrops = + ch_mtx_matrices.filter { meta, mtx_files -> + mtx_files.toString().contains("raw_feature_bc_matrix") || // cellranger + mtx_files.toString().contains("counts_unfiltered") || // kallisto + mtx_files.toString().contains("raw") // star + } + } else { + ch_mtx_matrices_for_emptydrops = ch_mtx_matrices + } + EMPTYDROPS_CELL_CALLING( ch_mtx_matrices_for_emptydrops ) + ch_mtx_matrices = ch_mtx_matrices.mix( EMPTYDROPS_CELL_CALLING.out.filtered_matrices ) + } + // Run mtx to h5ad conversion subworkflow MTX_CONVERSION ( ch_mtx_matrices, @@ -243,32 +214,19 @@ workflow SCRNASEQ { //Add Versions from MTX Conversion workflow too ch_versions.mix(MTX_CONVERSION.out.ch_versions) - // collect software versions - CUSTOM_DUMPSOFTWAREVERSIONS ( - ch_versions.unique().collectFile(name: 'collated_versions.yml') - ) - // // MODULE: MultiQC // - workflow_summary = WorkflowScrnaseq.paramsSummaryMultiqc(workflow, summary_params) - ch_workflow_summary = Channel.value(workflow_summary) - - methods_description = WorkflowScrnaseq.methodsDescriptionText(workflow, ch_multiqc_custom_methods_description, params) - ch_methods_description = Channel.value(methods_description) - - ch_multiqc_fastqc.dump(tag: 'fastqc', pretty: true) - ch_multiqc_alevin.dump(tag: 'alevin', pretty: true) - ch_multiqc_star.dump(tag: 'star', pretty: true) - - ch_multiqc_files = Channel.empty() - ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) - ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml')) - ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect()) - ch_multiqc_files = ch_multiqc_files.mix(ch_multiqc_fastqc.collect{ meta, qcfile -> qcfile }.ifEmpty([])) - ch_multiqc_files = ch_multiqc_files.mix(ch_multiqc_alevin.collect{ meta, qcfile -> qcfile }.ifEmpty([])) - ch_multiqc_files = ch_multiqc_files.mix(ch_multiqc_star.collect{ meta, qcfile -> qcfile }.ifEmpty([])) - ch_multiqc_files = ch_multiqc_files.mix(ch_multiqc_cellranger.collect().ifEmpty([])) + ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true) + ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath(params.multiqc_config, checkIfExists: true) : Channel.empty() + ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath(params.multiqc_logo, checkIfExists: true) : Channel.empty() + summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json") + ch_workflow_summary = Channel.value(paramsSummaryMultiqc(summary_params)) + ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) + ch_methods_description = Channel.value(methodsDescriptionText(ch_multiqc_custom_methods_description)) + ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) + ch_multiqc_files = ch_multiqc_files.mix(softwareVersionsToYAML(ch_versions).collectFile(name: 'versions.yml')) + ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml', sort: false)) MULTIQC ( ch_multiqc_files.collect(), @@ -276,28 +234,8 @@ workflow SCRNASEQ { ch_multiqc_custom_config.toList(), ch_multiqc_logo.toList() ) - multiqc_report = MULTIQC.out.report.toList() -} -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - COMPLETION EMAIL AND SUMMARY -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -workflow.onComplete { - if (params.email || params.email_on_fail) { - NfcoreTemplate.email(workflow, params, summary_params, projectDir, log, multiqc_report) - } - NfcoreTemplate.dump_parameters(workflow, params) - NfcoreTemplate.summary(workflow, params, log) - if (params.hook_url) { - NfcoreTemplate.IM_notification(workflow, params, summary_params, projectDir, log) - } + emit: + multiqc_report = MULTIQC.out.report.toList() // channel: /path/to/multiqc_report.html + versions = ch_versions // channel: [ path(versions.yml) ] } - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - THE END -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/
    Process Name \\", - " \\ Software Version
    CUSTOM_DUMPSOFTWAREVERSIONSpython3.11.7
    yaml5.4.1
    TOOL1tool10.11.9
    TOOL2tool21.9
    WorkflowNextflow
    File typeConventional base calls
    File typeConventional base calls
    File typeConventional base calls
    File typeConventional base calls
    File typeConventional base calls
    File typeConventional base calls
    File typeConventional base calls
    File typeConventional base calls