Merge branch 'master' into tool/bamtools-split

2024-12-22 19:18:17 +00:00 · 2022-04-29 16:38:59 +01:00 · 2022-04-29 16:38:59 +01:00 · 4df985a065
commit 4df985a065
parent bdbf629d93 0d8acda00c
20 changed files with 339 additions and 137 deletions
--- a/.github/ISSUE_TEMPLATE/bug_report.md
+++ b/.github/ISSUE_TEMPLATE/bug_report.md
@ -1,64 +0,0 @@
---
-name: Bug report
-about: Report something that is broken or incorrect
-title: "[BUG]"
---
-
-<!--
-# nf-core/module bug report
-
-Hi there!
-
-Thanks for telling us about a problem with the modules.
-Please delete this text and anything that's not relevant from the template below:
-->
-
-## Check Documentation
-
-I have checked the following places for your error:
-
- [ ] [nf-core website: troubleshooting](https://nf-co.re/usage/troubleshooting)
- [ ] [nf-core/module documentation](https://github.com/nf-core/modules/blob/master/README.md)
-
-## Description of the bug
-
-<!-- A clear and concise description of what the bug is. -->
-
-## Steps to reproduce
-
-Steps to reproduce the behaviour:
-
-1. Command line: <!-- [e.g. `nextflow run ...`] -->
-2. See error: <!-- [Please provide your error message] -->
-
-## Expected behaviour
-
-<!-- A clear and concise description of what you expected to happen. -->
-
-## Log files
-
-Have you provided the following extra information/files:
-
- [ ] The command used to run the module
- [ ] The `.nextflow.log` file <!-- this is a hidden file in the directory where you launched the module -->
-
-## System
-
- Hardware: <!-- [e.g. HPC, Desktop, Cloud...] -->
- Executor: <!-- [e.g. slurm, local, awsbatch...] -->
- OS: <!-- [e.g. CentOS Linux, macOS, Linux Mint...] -->
- Version <!-- [e.g. 7, 10.13.6, 18.3...] -->
-
-## Nextflow Installation
-
- Version: <!-- [e.g. 19.10.0] -->
-
-## Container engine
-
- Engine: <!-- [e.g. Conda, Docker, Singularity or Podman] -->
- version: <!-- [e.g. 1.0.0] -->
- Image tag: <!-- [e.g. nfcore/module:2.6] -->
-
-## Additional context
-
-<!-- Add any other context about the problem here. -->
--- a/.github/ISSUE_TEMPLATE/bug_report.yml
+++ b/.github/ISSUE_TEMPLATE/bug_report.yml
@ -0,0 +1,52 @@
+name: Bug report
+description: Report something that is broken or incorrect
+labels: bug
+body:
+  - type: checkboxes
+    attributes:
+      label: Have you checked the docs?
+      description: I have checked the following places for my error
+      options:
+        - label: "[nf-core website: troubleshooting](https://nf-co.re/usage/troubleshooting)"
+          required: true
+        - label: "[nf-core modules documentation](https://nf-co.re/docs/contributing/modules)"
+          required: true
+
+  - type: textarea
+    id: description
+    attributes:
+      label: Description of the bug
+      description: A clear and concise description of what the bug is.
+    validations:
+      required: true
+
+  - type: textarea
+    id: command_used
+    attributes:
+      label: Command used and terminal output
+      description: Steps to reproduce the behaviour. Please paste the command you used to launch the pipeline and the output from your terminal.
+      render: console
+      placeholder: |
+        $ nextflow run ...
+
+        Some output where something broke
+
+  - type: textarea
+    id: files
+    attributes:
+      label: Relevant files
+      description: |
+        Please drag and drop the relevant files here. Create a `.zip` archive if the extension is not allowed.
+        Your verbose log file `.nextflow.log` is often useful _(this is a hidden file in the directory where you launched the pipeline)_ as well as custom Nextflow configuration files.
+
+  - type: textarea
+    id: system
+    attributes:
+      label: System information
+      description: |
+        * Nextflow version _(eg. 21.10.3)_
+        * Hardware _(eg. HPC, Desktop, Cloud)_
+        * Executor _(eg. slurm, local, awsbatch)_
+        * Container engine and version: _(e.g. Docker 1.0.0, Singularity, Conda, Podman, Shifter or Charliecloud)_
+        * OS and version: _(eg. CentOS Linux, macOS, Ubuntu 22.04)_
+        * Image tag: <!-- [e.g. nfcore/cellranger:2.6] -->
--- a/.github/ISSUE_TEMPLATE/feature_request.md
+++ b/.github/ISSUE_TEMPLATE/feature_request.md
@ -1,32 +0,0 @@
---
-name: Feature request
-about: Suggest an idea for nf-core/modules
-title: "[FEATURE]"
---
-
-<!--
-# nf-core/modules feature request
-
-Hi there!
-
-Thanks for suggesting a new feature for the modules!
-Please delete this text and anything that's not relevant from the template below:
-->
-
-## Is your feature request related to a problem? Please describe
-
-<!-- A clear and concise description of what the problem is. -->
-
-<!-- e.g. [I'm always frustrated when ...] -->
-
-## Describe the solution you'd like
-
-<!-- A clear and concise description of what you want to happen. -->
-
-## Describe alternatives you've considered
-
-<!-- A clear and concise description of any alternative solutions or features you've considered. -->
-
-## Additional context
-
-<!-- Add any other context about the feature request here. -->
--- a/.github/ISSUE_TEMPLATE/feature_request.yml
+++ b/.github/ISSUE_TEMPLATE/feature_request.yml
@ -0,0 +1,32 @@
+name: Feature request
+description: Suggest an idea for nf-core/modules
+labels: feature
+title: "[FEATURE]"
+body:
+  - type: textarea
+    id: description
+    attributes:
+      label: Is your feature request related to a problem? Please describe
+      description: A clear and concise description of what the bug is.
+      placeholder: |
+        <!-- e.g. [I'm always frustrated when ...] -->
+    validations:
+      required: true
+
+  - type: textarea
+    id: solution
+    attributes:
+      label: Describe the solution you'd like
+      description: A clear and concise description of the solution you want to happen.
+
+  - type: textarea
+    id: alternatives
+    attributes:
+      label: Describe alternatives you've considered
+      description: A clear and concise description of any alternative solutions or features you've considered.
+
+  - type: textarea
+    id: additional_context
+    attributes:
+      label: Additional context
+      description: Add any other context about the feature request here.
--- a/.github/ISSUE_TEMPLATE/new_module.md
+++ b/.github/ISSUE_TEMPLATE/new_module.md
@ -1,26 +0,0 @@
---
-name: New module
-about: Suggest a new module for nf-core/modules
-title: "new module: TOOL/SUBTOOL"
-label: new module
---
-
-<!--
-# nf-core/modules new module suggestion
-
-Hi there!
-
-Thanks for suggesting a new module for the modules!
-Please delete this text and anything that's not relevant from the template below:
-
-Replace TOOL with the bioconda name for the tool in the following text, so that the link is functional.
-
-Replace TOOL/SUBTOOL in the issue title so that it's understandable.
-->
-
-I think it would be good to have a module for [TOOL](https://bioconda.github.io/recipes/TOOL/README.html)
-
- [ ] This module does not exist yet with the [`nf-core modules list`](https://github.com/nf-core/tools#list-modules) command
- [ ] There is no [open pull request](https://github.com/nf-core/modules/pulls) for this module
- [ ] There is no [open issue](https://github.com/nf-core/modules/issues) for this module
- [ ] If I'm planning to work on this module, I added myself to the `Assignees` to facilitate tracking who is working on the module
--- a/.github/ISSUE_TEMPLATE/new_module.yml
+++ b/.github/ISSUE_TEMPLATE/new_module.yml
@ -0,0 +1,36 @@
+name: New module
+description: Suggest a new module for nf-core/modules
+title: "new module: TOOL/SUBTOOL"
+labels: new module
+body:
+  - type: checkboxes
+    attributes:
+      label: Is there an existing module for this?
+      description: This module does not exist yet with the [`nf-core modules list`](https://github.com/nf-core/tools#list-modules) command
+      options:
+        - label: I have searched for the existing module
+          required: true
+
+  - type: checkboxes
+    attributes:
+      label: Is there an open PR for this?
+      description: There is no [open pull request](https://github.com/nf-core/modules/pulls) for this module
+      options:
+        - label: I have searched for existing PRs
+          required: true
+
+  - type: checkboxes
+    attributes:
+      label: Is there an open issue for this?
+      description: There is no [open issue](https://github.com/nf-core/modules/issues) for this module
+      options:
+        - label: I have searched for existing issues
+          required: true
+
+  - type: checkboxes
+    attributes:
+      label: Are you going to work on this?
+      description: If I'm planning to work on this module, I added myself to the `Assignees` to facilitate tracking who is working on the module
+      options:
+        - label: If I'm planning to work on this module, I added myself to the `Assignees` to facilitate tracking who is working on the module
+          required: false
--- a/modules/diamond/blastp/main.nf
+++ b/modules/diamond/blastp/main.nf
@ -11,10 +11,11 @@ process DIAMOND_BLASTP {

    input:
    tuple val(meta), path(fasta)
-    path  db
+    path db
+    val outext

    output:
-    tuple val(meta), path('*.txt'), emit: txt
+    tuple val(meta), path('*.{blast,xml,txt,daa,sam,tsv,paf}'), emit: output
    path "versions.yml"           , emit: versions

    when:
@ -23,6 +24,15 @@ process DIAMOND_BLASTP {
    script:
    def args = task.ext.args ?: ''
    def prefix = task.ext.prefix ?: "${meta.id}"
+    switch ( outext ) {
+        case "blast": outfmt = 0; break
+        case "xml": outfmt = 5; break
+        case "txt": outfmt = 6; break
+        case "daa": outfmt = 100; break
+        case "sam": outfmt = 101; break
+        case "tsv": outfmt = 102; break
+        case "paf": outfmt = 103; break
+    }
    """
    DB=`find -L ./ -name "*.dmnd" | sed 's/.dmnd//'`

@ -31,8 +41,9 @@ process DIAMOND_BLASTP {
        --threads $task.cpus \\
        --db \$DB \\
        --query $fasta \\
+        --outfmt ${outfmt} \\
        $args \\
-        --out ${prefix}.txt
+        --out ${prefix}.${outext}

    cat <<-END_VERSIONS > versions.yml
    "${task.process}":
--- a/modules/diamond/blastp/meta.yml
+++ b/modules/diamond/blastp/meta.yml
@ -28,6 +28,14 @@ input:
      type: directory
      description: Directory containing the protein blast database
      pattern: "*"
+  - outext:
+      type: string
+      description: |
+        Specify the type of output file to be generated. `blast` corresponds to
+        BLAST pairwise format. `xml` corresponds to BLAST xml format.
+        `txt` corresponds to to BLAST tabular format. `tsv` corresponds to
+        taxonomic classification format.
+      pattern: "blast|xml|txt|daa|sam|tsv|paf"

 output:
  - txt:
@ -41,3 +49,4 @@ output:

 authors:
  - "@spficklin"
+  - "@jfy133"
--- a/modules/diamond/blastx/main.nf
+++ b/modules/diamond/blastx/main.nf
@ -11,10 +11,11 @@ process DIAMOND_BLASTX {

    input:
    tuple val(meta), path(fasta)
-    path  db
+    path db
+    val outext

    output:
-    tuple val(meta), path('*.txt'), emit: txt
+    tuple val(meta), path('*.{blast,xml,txt,daa,sam,tsv,paf}'), emit: output
    path "versions.yml"           , emit: versions

    when:
@ -23,6 +24,15 @@ process DIAMOND_BLASTX {
    script:
    def args = task.ext.args ?: ''
    def prefix = task.ext.prefix ?: "${meta.id}"
+    switch ( outext ) {
+        case "blast": outfmt = 0; break
+        case "xml": outfmt = 5; break
+        case "txt": outfmt = 6; break
+        case "daa": outfmt = 100; break
+        case "sam": outfmt = 101; break
+        case "tsv": outfmt = 102; break
+        case "paf": outfmt = 103; break
+    }
    """
    DB=`find -L ./ -name "*.dmnd" | sed 's/.dmnd//'`

@ -31,8 +41,9 @@ process DIAMOND_BLASTX {
        --threads $task.cpus \\
        --db \$DB \\
        --query $fasta \\
+        --outfmt ${outfmt} \\
        $args \\
-        --out ${prefix}.txt
+        --out ${prefix}.${outext}

    cat <<-END_VERSIONS > versions.yml
    "${task.process}":
--- a/modules/diamond/blastx/meta.yml
+++ b/modules/diamond/blastx/meta.yml
@ -28,6 +28,14 @@ input:
      type: directory
      description: Directory containing the nucelotide blast database
      pattern: "*"
+  - outext:
+      type: string
+      description: |
+        Specify the type of output file to be generated. `blast` corresponds to
+        BLAST pairwise format. `xml` corresponds to BLAST xml format.
+        `txt` corresponds to to BLAST tabular format. `tsv` corresponds to
+        taxonomic classification format.
+      pattern: "blast|xml|txt|daa|sam|tsv|paf"

 output:
  - txt:
@ -41,3 +49,4 @@ output:

 authors:
  - "@spficklin"
+  - "@jfy133"
--- a/modules/elprep/merge/main.nf
+++ b/modules/elprep/merge/main.nf
@ -0,0 +1,43 @@
+process ELPREP_MERGE {
+    tag "$meta.id"
+    label 'process_low'
+
+    conda (params.enable_conda ? "bioconda::elprep=5.1.2" : null)
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/elprep:5.1.2--he881be0_0':
+        'quay.io/biocontainers/elprep:5.1.2--he881be0_0' }"
+
+    input:
+    tuple val(meta), path(bam)
+
+    output:
+    tuple val(meta), path("output/**.{bam,sam}")    , emit: bam
+    path "versions.yml"                             , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args        = task.ext.args ?: ''
+    def prefix      = task.ext.prefix ?: "${meta.id}"
+    def suffix      = args.contains("--output-type sam") ? "sam" : "bam"
+    def single_end  = meta.single_end ? " --single-end" : ""
+
+    """
+    # create directory and move all input so elprep can find and merge them before splitting
+    mkdir input
+    mv ${bam} input/
+
+    elprep merge \\
+        input/ \\
+        output/${prefix}.${suffix} \\
+        $args \\
+        ${single_end} \\
+        --nr-of-threads $task.cpus
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        elprep: \$(elprep 2>&1 | head -n2 | tail -n1 |sed 's/^.*version //;s/ compiled.*\$//')
+    END_VERSIONS
+    """
+}
--- a/modules/elprep/merge/meta.yml
+++ b/modules/elprep/merge/meta.yml
@ -0,0 +1,44 @@
+name: "elprep_merge"
+description: Merge split bam/sam chunks in one file
+keywords:
+  - bam
+  - sam
+  - merge
+tools:
+  - "elprep":
+      description: "elPrep is a high-performance tool for preparing .sam/.bam files for variant calling in sequencing pipelines. It can be used as a drop-in replacement for SAMtools/Picard/GATK4."
+      homepage: "https://github.com/ExaScience/elprep"
+      documentation: "https://github.com/ExaScience/elprep"
+      tool_dev_url: "https://github.com/ExaScience/elprep"
+      doi: "10.1371/journal.pone.0244471"
+      licence: "['AGPL v3']"
+
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - bam:
+      type: file
+      description: List of BAM/SAM chunks to merge
+      pattern: "*.{bam,sam}"
+
+output:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  #
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+  - bam:
+      type: file
+      description: Merged BAM/SAM file
+      pattern: "*.{bam,sam}"
+
+authors:
+  - "@matthdsm"
--- a/tests/config/pytest_modules.yml
+++ b/tests/config/pytest_modules.yml
@ -603,6 +603,10 @@ elprep/filter:
  - modules/elprep/filter/**
  - tests/modules/elprep/filter/**

+elprep/merge:
+  - modules/elprep/merge/**
+  - tests/modules/elprep/merge/**
+
 elprep/split:
  - modules/elprep/split/**
  - tests/modules/elprep/split/**
--- a/tests/modules/diamond/blastp/main.nf
+++ b/tests/modules/diamond/blastp/main.nf
@ -9,7 +9,18 @@ workflow test_diamond_blastp {

    db = [ file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) ]
    fasta = [ file(params.test_data['sarscov2']['genome']['transcriptome_fasta'], checkIfExists: true) ]
+    outext = 'txt'

    DIAMOND_MAKEDB ( db )
-    DIAMOND_BLASTP ( [ [id:'test'], fasta ], DIAMOND_MAKEDB.out.db )
+    DIAMOND_BLASTP ( [ [id:'test'], fasta ], DIAMOND_MAKEDB.out.db, outext )
+}
+
+workflow test_diamond_blastp_daa {
+
+    db = [ file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) ]
+    fasta = [ file(params.test_data['sarscov2']['genome']['transcriptome_fasta'], checkIfExists: true) ]
+    outext = 'daa'
+
+    DIAMOND_MAKEDB ( db )
+    DIAMOND_BLASTP ( [ [id:'test'], fasta ], DIAMOND_MAKEDB.out.db, outext )
 }
--- a/tests/modules/diamond/blastp/test.yml
+++ b/tests/modules/diamond/blastp/test.yml
@ -1,8 +1,19 @@
- name: diamond blastp
-  command: nextflow run ./tests/modules/diamond/blastp -entry test_diamond_blastp -c ./tests/config/nextflow.config -c ./tests/modules/diamond/blastp/nextflow.config
+- name: diamond blastp test_diamond_blastp
+  command: nextflow run tests/modules/diamond/blastp -entry test_diamond_blastp -c tests/config/nextflow.config
  tags:
    - diamond
    - diamond/blastp
  files:
-    - path: ./output/diamond/test.diamond_blastp.txt
+    - path: output/diamond/test.diamond_blastp.txt
      md5sum: 3ca7f6290c1d8741c573370e6f8b4db0
+    - path: output/diamond/versions.yml
+
+- name: diamond blastp test_diamond_blastp_daa
+  command: nextflow run tests/modules/diamond/blastp -entry test_diamond_blastp_daa -c tests/config/nextflow.config
+  tags:
+    - diamond
+    - diamond/blastp
+  files:
+    - path: output/diamond/test.diamond_blastp.daa
+      md5sum: d4a79ad1fcb2ec69460e5a09a9468db7
+    - path: output/diamond/versions.yml
--- a/tests/modules/diamond/blastx/main.nf
+++ b/tests/modules/diamond/blastx/main.nf
@ -9,7 +9,18 @@ workflow test_diamond_blastx {

    db = [ file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) ]
    fasta = [ file(params.test_data['sarscov2']['genome']['transcriptome_fasta'], checkIfExists: true) ]
+    outext = 'txt'

    DIAMOND_MAKEDB ( db )
-    DIAMOND_BLASTX ( [ [id:'test'], fasta ], DIAMOND_MAKEDB.out.db )
+    DIAMOND_BLASTX ( [ [id:'test'], fasta ], DIAMOND_MAKEDB.out.db, outext )
+}
+
+workflow test_diamond_blastx_daa {
+
+    db = [ file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) ]
+    fasta = [ file(params.test_data['sarscov2']['genome']['transcriptome_fasta'], checkIfExists: true) ]
+    outext = 'daa'
+
+    DIAMOND_MAKEDB ( db )
+    DIAMOND_BLASTX ( [ [id:'test'], fasta ], DIAMOND_MAKEDB.out.db, outext )
 }
--- a/tests/modules/diamond/blastx/test.yml
+++ b/tests/modules/diamond/blastx/test.yml
@ -1,8 +1,18 @@
- name: diamond blastx
-  command: nextflow run ./tests/modules/diamond/blastx -entry test_diamond_blastx -c ./tests/config/nextflow.config -c ./tests/modules/diamond/blastx/nextflow.config
+- name: diamond blastx test_diamond_blastx
+  command: nextflow run tests/modules/diamond/blastx -entry test_diamond_blastx -c tests/config/nextflow.config
  tags:
    - diamond
    - diamond/blastx
  files:
-    - path: ./output/diamond/test.diamond_blastx.txt
-      md5sum: d41d8cd98f00b204e9800998ecf8427e
+    - path: output/diamond/test.diamond_blastx.txt
+    - path: output/diamond/versions.yml
+
+- name: diamond blastx test_diamond_blastx_daa
+  command: nextflow run tests/modules/diamond/blastx -entry test_diamond_blastx_daa -c tests/config/nextflow.config
+  tags:
+    - diamond
+    - diamond/blastx
+  files:
+    - path: output/diamond/test.diamond_blastx.daa
+      md5sum: 2a0ce0f7e01dcead828b87d5cbaccf7a
+    - path: output/diamond/versions.yml
--- a/tests/modules/elprep/merge/main.nf
+++ b/tests/modules/elprep/merge/main.nf
@ -0,0 +1,17 @@
+#!/usr/bin/env nextflow
+
+nextflow.enable.dsl = 2
+
+include { ELPREP_SPLIT } from '../../../../modules/elprep/split/main.nf'
+include { ELPREP_MERGE } from '../../../../modules/elprep/merge/main.nf'
+
+workflow test_elprep_merge {
+
+    input = [
+        [ id:'test', single_end:false ], // meta map
+        file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true)
+    ]
+
+    ELPREP_SPLIT ( input )
+    ELPREP_MERGE ( ELPREP_SPLIT.out.bam )
+}
--- a/tests/modules/elprep/merge/nextflow.config
+++ b/tests/modules/elprep/merge/nextflow.config
@ -0,0 +1,5 @@
+process {
+    withName : ELPREP_MERGE {
+        publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
+    }
+}
--- a/tests/modules/elprep/merge/test.yml
+++ b/tests/modules/elprep/merge/test.yml
@ -0,0 +1,8 @@
+- name: elprep merge test_elprep_merge
+  command: nextflow run tests/modules/elprep/merge -entry test_elprep_merge -c tests/config/nextflow.config
+  tags:
+    - elprep
+    - elprep/merge
+  files:
+    - path: output/elprep/output/test.bam
+    - path: output/elprep/versions.yml