Merge branch 'master' into tool/elprep-merge

2024-12-22 11:08:17 +00:00 · 2022-04-19 11:28:04 +02:00 · 2022-04-19 11:28:04 +02:00 · 553c1bcf9d
commit 553c1bcf9d
parent ad15d1b792 a6cb75174b
19 changed files with 206 additions and 73 deletions
--- a/modules/allelecounter/meta.yml
+++ b/modules/allelecounter/meta.yml
@ -32,8 +32,8 @@ input:
      description: loci file <CHR><tab><POS1>
      pattern: "*.{tsv}"
  - fasta:
-    type: file
-    description: Input genome fasta file. Required when passing CRAM files.
+      type: file
+      description: Input genome fasta file. Required when passing CRAM files.

 output:
  - meta:
--- a/modules/bwa/mem/main.nf
+++ b/modules/bwa/mem/main.nf
@ -23,14 +23,12 @@ process BWA_MEM {
    def args = task.ext.args ?: ''
    def args2 = task.ext.args2 ?: ''
    def prefix = task.ext.prefix ?: "${meta.id}"
-    def read_group = meta.read_group ? "-R ${meta.read_group}" : ""
    def samtools_command = sort_bam ? 'sort' : 'view'
    """
    INDEX=`find -L ./ -name "*.amb" | sed 's/.amb//'`

    bwa mem \\
        $args \\
-        $read_group \\
        -t $task.cpus \\
        \$INDEX \\
        $reads \\
--- a/modules/bwamem2/mem/main.nf
+++ b/modules/bwamem2/mem/main.nf
@ -23,7 +23,6 @@ process BWAMEM2_MEM {
    def args = task.ext.args ?: ''
    def args2 = task.ext.args2 ?: ''
    def prefix = task.ext.prefix ?: "${meta.id}"
-    def read_group = meta.read_group ? "-R ${meta.read_group}" : ""
    def samtools_command = sort_bam ? 'sort' : 'view'
    """
    INDEX=`find -L ./ -name "*.amb" | sed 's/.amb//'`
@ -31,7 +30,6 @@ process BWAMEM2_MEM {
    bwa-mem2 \\
        mem \\
        $args \\
-        $read_group \\
        -t $task.cpus \\
        \$INDEX \\
        $reads \\
--- a/modules/centrifuge/kreport/main.nf
+++ b/modules/centrifuge/kreport/main.nf
@ -0,0 +1,33 @@
+process CENTRIFUGE_KREPORT {
+    tag "$meta.id"
+    label 'process_low'
+
+    conda (params.enable_conda ? "bioconda::centrifuge=1.0.4_beta" : null)
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/centrifuge:1.0.4_beta--h9a82719_6':
+        'quay.io/biocontainers/centrifuge:1.0.4_beta--h9a82719_6' }"
+
+    input:
+    tuple val(meta), path(results)
+    path db
+
+    output:
+    tuple val(meta), path('*.txt')                , emit: kreport
+    path "versions.yml"                                  , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    """
+    db_name=`find -L ${db} -name "*.1.cf" -not -name "._*"  | sed 's/.1.cf//'`
+    centrifuge-kreport -x \$db_name ${results} > ${prefix}.txt
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        centrifuge: \$( centrifuge --version  | sed -n 1p | sed 's/^.*centrifuge-class version //')
+    END_VERSIONS
+    """
+}
--- a/modules/centrifuge/kreport/meta.yml
+++ b/modules/centrifuge/kreport/meta.yml
@ -0,0 +1,41 @@
+name: "centrifuge_kreport"
+description: Creates Kraken-style reports from centrifuge out files
+keywords:
+  - metagenomics
+tools:
+  - centrifuge:
+      description: Centrifuge is a classifier for metagenomic sequences.
+      homepage: https://ccb.jhu.edu/software/centrifuge/
+      documentation: https://ccb.jhu.edu/software/centrifuge/manual.shtml
+      doi: 10.1101/gr.210641.116
+      licence: ["GPL v3"]
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - results:
+      type: file
+      description: File containing the centrifuge classification results
+      pattern: "*.{txt}"
+
+output:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+  - kreport:
+      type: file
+      description: |
+        File containing kraken-style report from centrifuge
+        out files.
+      pattern: "*.{txt}"
+authors:
+  - "@sofstam"
+  - "@jfy133"
--- a/modules/dragmap/align/main.nf
+++ b/modules/dragmap/align/main.nf
@ -24,44 +24,23 @@ process DRAGMAP_ALIGN {
    def args = task.ext.args ?: ''
    def args2 = task.ext.args2 ?: ''
    def prefix = task.ext.prefix ?: "${meta.id}"
-    def read_group = meta.read_group ? "--RGSM ${meta.read_group}" : ""
+    def reads_command = meta.single_end ? "-1 $reads" : "-1 ${reads[0]} -2 ${reads[1]}"
    def samtools_command = sort_bam ? 'sort' : 'view'
-    if (meta.single_end) {
-        """
-        dragen-os \\
-            -r $hashmap \\
-            $args \\
-            $read_group \\
-            --num-threads $task.cpus \\
-            -1 $reads \\
-            2> ${prefix}.dragmap.log \\
-            | samtools $samtools_command $args2 --threads $task.cpus -o ${prefix}.bam -

-        cat <<-END_VERSIONS > versions.yml
-        "${task.process}":
-            dragmap: \$(echo \$(dragen-os --version 2>&1))
-            samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
-            pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' )
-        END_VERSIONS
-        """
-    } else {
-        """
-        dragen-os \\
-            -r $hashmap \\
-            $args \\
-            $read_group \\
-            --num-threads $task.cpus \\
-            -1 ${reads[0]} \\
-            -2 ${reads[1]} \\
-            2> ${prefix}.dragmap.log \\
-            | samtools $samtools_command $args2 --threads $task.cpus -o ${prefix}.bam -
+    """
+    dragen-os \\
+        -r $hashmap \\
+        $args \\
+        --num-threads $task.cpus \\
+        $reads_command \\
+        2> ${prefix}.dragmap.log \\
+        | samtools $samtools_command $args2 --threads $task.cpus -o ${prefix}.bam -

-        cat <<-END_VERSIONS > versions.yml
-        "${task.process}":
-            dragmap: \$(echo \$(dragen-os --version 2>&1))
-            samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
-            pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' )
-        END_VERSIONS
-        """
-    }
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        dragmap: \$(echo \$(dragen-os --version 2>&1))
+        samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
+        pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' )
+    END_VERSIONS
+    """
 }
--- a/modules/gatk4/markduplicatesspark/main.nf
+++ b/modules/gatk4/markduplicatesspark/main.nf
@ -23,7 +23,7 @@ process GATK4_MARKDUPLICATES_SPARK {
    script:
    def args = task.ext.args ?: ''
    prefix = task.ext.prefix ?: "${meta.id}"
-    def input_list = bam.collect{"--INPUT $it"}.join(' ')
+    def input_list = bam.collect{"--input $it"}.join(' ')

    def avail_mem = 3
    if (!task.memory) {
--- a/modules/kaiju/kaiju/main.nf
+++ b/modules/kaiju/kaiju/main.nf
@ -9,11 +9,11 @@ process KAIJU_KAIJU {

    input:
    tuple val(meta), path(reads)
-    tuple path(db), path(dbnodes)
+    path(db)

    output:
    tuple val(meta), path('*.tsv'), emit: results
-    path "versions.yml"                   , emit: versions
+    path "versions.yml"           , emit: versions

    when:
    task.ext.when == null || task.ext.when
@ -23,11 +23,13 @@ process KAIJU_KAIJU {
    def prefix = task.ext.prefix ?: "${meta.id}"
    def input = meta.single_end ? "-i ${reads}" : "-i ${reads[0]} -j ${reads[1]}"
    """
+    dbnodes=`find -L ${db} -name "*nodes.dmp"`
+    dbname=`find -L ${db} -name "*.fmi" -not -name "._*"`
    kaiju \\
        $args \\
        -z $task.cpus \\
-        -t ${dbnodes} \\
-        -f ${db} \\
+        -t \$dbnodes \\
+        -f \$dbname \\
        -o ${prefix}.tsv \\
        $input

--- a/modules/kaiju/kaiju/meta.yml
+++ b/modules/kaiju/kaiju/meta.yml
@ -50,3 +50,4 @@ output:
 authors:
  - "@talnor"
  - "@sofstam"
+  - "@jfy133"
--- a/modules/stranger/main.nf
+++ b/modules/stranger/main.nf
@ -9,6 +9,7 @@ process STRANGER {

    input:
    tuple val(meta), path(vcf)
+    path variant_catalog

    output:
    tuple val(meta), path("*.gz"), emit: vcf
@ -20,10 +21,12 @@ process STRANGER {
    script:
    def args = task.ext.args ?: ''
    def prefix = task.ext.prefix ?: "${meta.id}"
+    def options_variant_catalog = variant_catalog ? "--repeats-file $variant_catalog" : ""
    """
    stranger \\
        $args \\
-        $vcf | gzip --no-name > ${prefix}.vcf.gz
+        $vcf \\
+        $options_variant_catalog | gzip --no-name > ${prefix}.vcf.gz

    cat <<-END_VERSIONS > versions.yml
    "${task.process}":
--- a/modules/stranger/meta.yml
+++ b/modules/stranger/meta.yml
@ -24,6 +24,10 @@ input:
      type: file
      description: VCF with repeat expansions
      pattern: "*.{vcf.gz,vcf}"
+  - variant_catalog:
+      type: file
+      description: json file with repeat expansion sites to genotype
+      pattern: "*.{json}"

 output:
  - meta:
--- a/tests/config/test_data.config
+++ b/tests/config/test_data.config
@ -28,9 +28,8 @@ params {
                kraken2_bracken                                = "${test_data_dir}/genomics/sarscov2/genome/db/kraken2_bracken"
                kraken2_bracken_tar_gz                         = "${test_data_dir}/genomics/sarscov2/genome/db/kraken2_bracken.tar.gz"

-                kaiju_fmi                                      = "${test_data_dir}/genomics/sarscov2/genome/db/kaiju/proteins.fmi"
-                kaiju_nodes                                    = "${test_data_dir}/genomics/sarscov2/genome/db/kaiju/nodes.dmp"
-                kaiju_names                                    = "${test_data_dir}/genomics/sarscov2/genome/db/kaiju/names.dmp"
+                kaiju                                          = "${test_data_dir}/genomics/sarscov2/genome/db/kaiju"
+                kaiju_tar_gz                                   = "${test_data_dir}/genomics/sarscov2/genome/db/kaiju.tar.gz"

                ncbi_taxmap_zip                                = "${test_data_dir}/genomics/sarscov2/genome/db/maltextract/ncbi_taxmap.zip"
                taxon_list_txt                                 = "${test_data_dir}/genomics/sarscov2/genome/db/maltextract/taxon_list.txt"
--- a/tests/modules/centrifuge/centrifuge/main.nf
+++ b/tests/modules/centrifuge/centrifuge/main.nf
@ -25,7 +25,6 @@ workflow test_centrifuge_centrifuge_paired_end {
                file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ]
            ]
     db    =  [ [], file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/minigut_cf.tar.gz', checkIfExists: true) ]
-     //db_name = "minigut_cf"
     save_unaligned = true
     save_aligned = false
     sam_format = false
--- a/tests/modules/centrifuge/kreport/main.nf
+++ b/tests/modules/centrifuge/kreport/main.nf
@ -0,0 +1,32 @@
+#!/usr/bin/env nextflow
+
+nextflow.enable.dsl = 2
+
+include { UNTAR                } from '../../../../modules/untar/main.nf'
+include { CENTRIFUGE_CENTRIFUGE } from '../../../../modules/centrifuge/centrifuge/main.nf'
+include { CENTRIFUGE_KREPORT    } from '../../../../modules/centrifuge/kreport/main.nf'
+
+workflow test_centrifuge_kreport_single_end {
+
+    input = [ [ id:'test', single_end:true ], // meta map
+              [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ]
+            ]
+    db    =  [ [], file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/minigut_cf.tar.gz', checkIfExists: true) ]
+
+    ch_db = UNTAR ( db )
+    CENTRIFUGE_CENTRIFUGE ( input, ch_db.untar.map{ it[1] }, false, false, false )
+    CENTRIFUGE_KREPORT ( CENTRIFUGE_CENTRIFUGE.out.results, ch_db.untar.map{ it[1] } )
+}
+
+workflow test_centrifuge_kreport_paired_end {
+    input = [ [ id:'test', single_end:false ], // meta map
+              [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
+                file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ]
+            ]
+     db    =  [ [], file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/minigut_cf.tar.gz', checkIfExists: true) ]
+
+    ch_db = UNTAR ( db )
+    CENTRIFUGE_CENTRIFUGE ( input, ch_db.untar.map{ it[1] }, false, false, false )
+    CENTRIFUGE_KREPORT ( CENTRIFUGE_CENTRIFUGE.out.results, ch_db.untar.map{ it[1] } )
+}
+
--- a/tests/modules/centrifuge/kreport/nextflow.config
+++ b/tests/modules/centrifuge/kreport/nextflow.config
@ -0,0 +1,5 @@
+process {
+
+    publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
+    
+}
--- a/tests/modules/centrifuge/kreport/test.yml
+++ b/tests/modules/centrifuge/kreport/test.yml
@ -0,0 +1,21 @@
+- name: centrifuge kreport test_centrifuge_kreport_single_end
+  command: nextflow run tests/modules/centrifuge/kreport -entry test_centrifuge_kreport_single_end -c tests/config/nextflow.config
+  tags:
+    - centrifuge
+    - centrifuge/kreport
+  files:
+    - path: output/centrifuge/test.txt
+      md5sum: af1a51fe57eb6d428350ff4a4bf759d4
+      contains: ["unclassified"]
+    - path: output/centrifuge/versions.yml
+
+- name: centrifuge kreport test_centrifuge_kreport_paired_end
+  command: nextflow run tests/modules/centrifuge/kreport -entry test_centrifuge_kreport_paired_end -c tests/config/nextflow.config
+  tags:
+    - centrifuge
+    - centrifuge/kreport
+  files:
+    - path: output/centrifuge/test.txt
+      md5sum: af1a51fe57eb6d428350ff4a4bf759d4
+      contains: ["unclassified"]
+    - path: output/centrifuge/versions.yml
--- a/tests/modules/kaiju/kaiju/main.nf
+++ b/tests/modules/kaiju/kaiju/main.nf
@ -2,6 +2,7 @@

 nextflow.enable.dsl = 2

+include { UNTAR       } from '../../../../modules/untar/main.nf'
 include { KAIJU_KAIJU } from '../../../../modules/kaiju/kaiju/main.nf'

 workflow test_kaiju_kaiju_single_end {
@ -10,12 +11,10 @@ workflow test_kaiju_kaiju_single_end {
        [ id:'test', single_end:true ], // meta map
        file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true)
    ]
-    db    = [
-        file(params.test_data['sarscov2']['genome']['kaiju_fmi'], checkIfExists: true), // database
-        file(params.test_data['sarscov2']['genome']['kaiju_nodes'], checkIfExists: true) // taxon nodes
-    ]
+    db    = [ [], file(params.test_data['sarscov2']['genome']['kaiju_tar_gz'], checkIfExists: true) ]

-    KAIJU_KAIJU ( input, db )
+    UNTAR ( db )
+    KAIJU_KAIJU ( input, UNTAR.out.untar.map{ it[1] } )
 }

 workflow test_kaiju_kaiju_paired_end {
@ -25,10 +24,9 @@ workflow test_kaiju_kaiju_paired_end {
        [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
          file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ]
    ]
-    db    = [
-        file(params.test_data['sarscov2']['genome']['kaiju_fmi'], checkIfExists: true), // database
-        file(params.test_data['sarscov2']['genome']['kaiju_nodes'], checkIfExists: true) // taxon nodes
-    ]
+    db    = [ [], file(params.test_data['sarscov2']['genome']['kaiju_tar_gz'], checkIfExists: true) ]
+
+    UNTAR ( db )
+    KAIJU_KAIJU ( input, UNTAR.out.untar.map{ it[1] } )

-    KAIJU_KAIJU ( input, db )
 }
--- a/tests/modules/stranger/main.nf
+++ b/tests/modules/stranger/main.nf
@ -5,15 +5,21 @@ nextflow.enable.dsl = 2
 include { EXPANSIONHUNTER } from '../../../modules/expansionhunter/main.nf'
 include { STRANGER } from '../../../modules/stranger/main.nf'

+
+input = [ [ id:'test', gender:'male' ], // meta map
+            file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true),
+            file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true),
+        ]
+fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
+variant_catalog = file(params.test_data['homo_sapiens']['genome']['repeat_expansions'], checkIfExists: true)
+
+
 workflow test_stranger {
-
-    input = [ [ id:'test', gender:'male' ], // meta map
-              file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true),
-              file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true),
-            ]
-    fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
-    variant_catalog = file(params.test_data['homo_sapiens']['genome']['repeat_expansions'], checkIfExists: true)
-
    EXPANSIONHUNTER ( input, fasta, variant_catalog )
-    STRANGER ( EXPANSIONHUNTER.out.vcf )
+    STRANGER ( EXPANSIONHUNTER.out.vcf, variant_catalog )
+}
+
+workflow test_stranger_without_optional_variant_catalog {
+    EXPANSIONHUNTER ( input, fasta, variant_catalog )
+    STRANGER ( EXPANSIONHUNTER.out.vcf, [] )
 }
--- a/tests/modules/stranger/test.yml
+++ b/tests/modules/stranger/test.yml
@ -8,6 +8,20 @@
    - path: output/expansionhunter/versions.yml
      md5sum: f3962a6eecfddf9682414c0f605a885a
    - path: output/stranger/test.vcf.gz
-      md5sum: bbe15159195681d5c18596d3ad85c78f
+      md5sum: 68b0ca1319851134ffa8793a4704dc11
    - path: output/stranger/versions.yml
      md5sum: 5ec35fd835fb1be50bc3e7c004310fc0
+
+- name: stranger test_stranger_without_optional_variant_catalog
+  command: nextflow run tests/modules/stranger -entry test_stranger_without_optional_variant_catalog -c tests/config/nextflow.config
+  tags:
+    - stranger
+  files:
+    - path: output/expansionhunter/test.vcf
+      md5sum: cfd4a1d35c0e469b99eb6aaa6d22de76
+    - path: output/expansionhunter/versions.yml
+      md5sum: c95af9e6d8cd9bd2ce1090ca4e7a6020
+    - path: output/stranger/test.vcf.gz
+      md5sum: bbe15159195681d5c18596d3ad85c78f
+    - path: output/stranger/versions.yml
+      md5sum: 8558542a007e90ea5dcdceed3f12585d