From 977d96ed0bd7d813b6f5498e9423d70a02e190ac Mon Sep 17 00:00:00 2001
From: avantonder <ajv37@cam.ac.uk>
Date: Fri, 29 Oct 2021 13:33:38 +0100
Subject: [PATCH 01/15] Add Racon module to nf-core/modules (#949)

* add racon

* add racon

* add racon

* add racon module

* add racon module

* edit racon module

* edit racon module

* edit racon module

* edit racon module

Co-authored-by: Daniel Straub <42973691+d4straub@users.noreply.github.com>
Co-authored-by: Chris Cheshire <chris.j.cheshire@gmail.com>
---
 modules/racon/functions.nf            | 78 +++++++++++++++++++++++++++
 modules/racon/main.nf                 | 45 ++++++++++++++++
 modules/racon/meta.yml                | 52 ++++++++++++++++++
 tests/config/pytest_modules.yml       |  4 ++
 tests/config/test_data.config         |  5 +-
 tests/modules/minimap2/align/test.yml |  2 +-
 tests/modules/racon/main.nf           | 15 ++++++
 tests/modules/racon/test.yml          |  7 +++
 8 files changed, 206 insertions(+), 2 deletions(-)
 create mode 100644 modules/racon/functions.nf
 create mode 100644 modules/racon/main.nf
 create mode 100644 modules/racon/meta.yml
 create mode 100644 tests/modules/racon/main.nf
 create mode 100644 tests/modules/racon/test.yml

diff --git a/modules/racon/functions.nf b/modules/racon/functions.nf
new file mode 100644
index 00000000..85628ee0
--- /dev/null
+++ b/modules/racon/functions.nf
@@ -0,0 +1,78 @@
+//
+//  Utility functions used in nf-core DSL2 module files
+//
+
+//
+// Extract name of software tool from process name using $task.process
+//
+def getSoftwareName(task_process) {
+    return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()
+}
+
+//
+// Extract name of module from process name using $task.process
+//
+def getProcessName(task_process) {
+    return task_process.tokenize(':')[-1]
+}
+
+//
+// Function to initialise default values and to generate a Groovy Map of available options for nf-core modules
+//
+def initOptions(Map args) {
+    def Map options = [:]
+    options.args            = args.args ?: ''
+    options.args2           = args.args2 ?: ''
+    options.args3           = args.args3 ?: ''
+    options.publish_by_meta = args.publish_by_meta ?: []
+    options.publish_dir     = args.publish_dir ?: ''
+    options.publish_files   = args.publish_files
+    options.suffix          = args.suffix ?: ''
+    return options
+}
+
+//
+// Tidy up and join elements of a list to return a path string
+//
+def getPathFromList(path_list) {
+    def paths = path_list.findAll { item -> !item?.trim().isEmpty() }      // Remove empty entries
+    paths     = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes
+    return paths.join('/')
+}
+
+//
+// Function to save/publish module results
+//
+def saveFiles(Map args) {
+    def ioptions  = initOptions(args.options)
+    def path_list = [ ioptions.publish_dir ?: args.publish_dir ]
+
+    // Do not publish versions.yml unless running from pytest workflow
+    if (args.filename.equals('versions.yml') && !System.getenv("NF_CORE_MODULES_TEST")) {
+        return null
+    }
+    if (ioptions.publish_by_meta) {
+        def key_list = ioptions.publish_by_meta instanceof List ? ioptions.publish_by_meta : args.publish_by_meta
+        for (key in key_list) {
+            if (args.meta && key instanceof String) {
+                def path = key
+                if (args.meta.containsKey(key)) {
+                    path = args.meta[key] instanceof Boolean ? "${key}_${args.meta[key]}".toString() : args.meta[key]
+                }
+                path = path instanceof String ? path : ''
+                path_list.add(path)
+            }
+        }
+    }
+    if (ioptions.publish_files instanceof Map) {
+        for (ext in ioptions.publish_files) {
+            if (args.filename.endsWith(ext.key)) {
+                def ext_list = path_list.collect()
+                ext_list.add(ext.value)
+                return "${getPathFromList(ext_list)}/$args.filename"
+            }
+        }
+    } else if (ioptions.publish_files == null) {
+        return "${getPathFromList(path_list)}/$args.filename"
+    }
+}
diff --git a/modules/racon/main.nf b/modules/racon/main.nf
new file mode 100644
index 00000000..60a5061e
--- /dev/null
+++ b/modules/racon/main.nf
@@ -0,0 +1,45 @@
+// Import generic module functions
+include { initOptions; saveFiles; getSoftwareName; getProcessName } from './functions'
+
+params.options = [:]
+options        = initOptions(params.options)
+
+process RACON {
+    tag "$meta.id"
+    label 'process_high'
+    publishDir "${params.outdir}",
+        mode: params.publish_dir_mode,
+        saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:meta, publish_by_meta:['id']) }
+
+    conda (params.enable_conda ? "bioconda::racon=1.4.20" : null)
+    if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
+        container "https://depot.galaxyproject.org/singularity/racon:1.4.20--h9a82719_1"
+    } else {
+        container "quay.io/biocontainers/racon:1.4.20--h9a82719_1"
+    }
+
+    input:
+    tuple val(meta), path(reads), path(assembly), path(paf)
+
+    output:
+    tuple val(meta), path('*_assembly_consensus.fasta.gz') , emit: improved_assembly
+    path "versions.yml"          , emit: versions
+
+    script:
+    def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}"
+    """
+    racon -t "${task.cpus}" \\
+        "${reads}" \\
+        "${paf}" \\
+        $options.args \\
+        "${assembly}" > \\
+        ${prefix}_assembly_consensus.fasta
+
+    gzip -n ${prefix}_assembly_consensus.fasta
+
+    cat <<-END_VERSIONS > versions.yml
+    ${getProcessName(task.process)}:
+        ${getSoftwareName(task.process)}: \$( racon --version 2>&1 | sed 's/^.*v//' )
+    END_VERSIONS
+    """
+}
diff --git a/modules/racon/meta.yml b/modules/racon/meta.yml
new file mode 100644
index 00000000..2428f044
--- /dev/null
+++ b/modules/racon/meta.yml
@@ -0,0 +1,52 @@
+name: racon
+description: Consensus module for raw de novo DNA assembly of long uncorrected reads
+keywords:
+  - assembly
+  - pacbio
+  - nanopore
+  - polish
+tools:
+  - racon:
+      description: Ultrafast consensus module for raw de novo genome assembly of long uncorrected reads.
+      homepage: https://github.com/lbcb-sci/racon
+      documentation: https://github.com/lbcb-sci/racon
+      tool_dev_url: https://github.com/lbcb-sci/racon
+      doi: https://doi.org/10.1101/gr.214270.116
+      licence: ['MIT']
+
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - reads:
+      type: file
+      description: List of input FastQ files. Racon expects single end reads
+      pattern: "*.{fastq,fastq.gz,fq,fq.gz}"
+  - assembly:
+      type: file
+      description: Genome assembly to be improved
+      pattern: "*.{fasta,fa}"
+  - paf:
+      type: file
+      description: Alignment in PAF format
+      pattern: "*.paf"
+
+output:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+  - improved_assembly:
+      type: file
+      description: Improved genome assembly
+      pattern: "*_assembly_consensus.fasta.gz"
+
+authors:
+  - "@avantonder"
diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml
index 4fdd8303..155ed78d 100644
--- a/tests/config/pytest_modules.yml
+++ b/tests/config/pytest_modules.yml
@@ -923,6 +923,10 @@ quast:
   - modules/quast/**
   - tests/modules/quast/**
 
+racon:
+  - modules/racon/**
+  - tests/modules/racon/**
+
 rapidnj:
   - modules/rapidnj/**
   - tests/modules/rapidnj/**
diff --git a/tests/config/test_data.config b/tests/config/test_data.config
index e8729b9b..c05e1c8f 100644
--- a/tests/config/test_data.config
+++ b/tests/config/test_data.config
@@ -11,6 +11,7 @@ params {
                 genome_gff3                                    = "${test_data_dir}/genomics/sarscov2/genome/genome.gff3"
                 genome_gff3_gz                                 = "${test_data_dir}/genomics/sarscov2/genome/genome.gff3.gz"
                 genome_gtf                                     = "${test_data_dir}/genomics/sarscov2/genome/genome.gtf"
+                genome_paf                                     = "${test_data_dir}/genomics/sarscov2/genome/genome.paf"
                 genome_sizes                                   = "${test_data_dir}/genomics/sarscov2/genome/genome.sizes"
                 transcriptome_fasta                            = "${test_data_dir}/genomics/sarscov2/genome/transcriptome.fasta"
                 transcriptome_paf                              = "${test_data_dir}/genomics/sarscov2/genome/transcriptome.paf"
@@ -243,6 +244,7 @@ params {
         'bacteroides_fragilis'{
             'genome' {
                 genome_fna_gz                   = "${test_data_dir}/genomics/bacteroides_fragilis/genome/genome.fna.gz"
+                genome_paf                      = "${test_data_dir}/genomics/bacteroides_fragilis/genome/genome.paf"
             }
             'illumina' {
                 test1_contigs_fa_gz             = "${test_data_dir}/genomics/bacteroides_fragilis/illumina/fasta/test1.contigs.fa.gz"
@@ -253,7 +255,8 @@ params {
             }
             'nanopore' {
                 test_fastq_gz                   = "${test_data_dir}/genomics/bacteroides_fragilis/nanopore/fastq/test.fastq.gz"
+                overlap_paf                     = "${test_data_dir}/genomics/bacteroides_fragilis/nanopore/overlap.paf"
             }
-        }
+        }    
     }
 }
diff --git a/tests/modules/minimap2/align/test.yml b/tests/modules/minimap2/align/test.yml
index f9b762bb..3309bf4b 100644
--- a/tests/modules/minimap2/align/test.yml
+++ b/tests/modules/minimap2/align/test.yml
@@ -14,4 +14,4 @@
     - minimap2/align
   files:
     - path: ./output/minimap2/test.paf
-      md5sum: 5e7b55a26bf0ea3a2843423d3e0b9a28
+      md5sum: 5e7b55a26bf0ea3a2843423d3e0b9a28
\ No newline at end of file
diff --git a/tests/modules/racon/main.nf b/tests/modules/racon/main.nf
new file mode 100644
index 00000000..b6b864e1
--- /dev/null
+++ b/tests/modules/racon/main.nf
@@ -0,0 +1,15 @@
+#!/usr/bin/env nextflow
+
+nextflow.enable.dsl = 2
+
+include { RACON } from '../../../modules/racon/main.nf' addParams( options: [:] )
+
+workflow test_racon {
+    input = [ [ id:'test', single_end:true ], // meta map
+              file(params.test_data['bacteroides_fragilis']['nanopore']['test_fastq_gz'], checkIfExists: true),
+              file(params.test_data['bacteroides_fragilis']['genome']['genome_fna_gz'], checkIfExists: true),
+              file(params.test_data['bacteroides_fragilis']['genome']['genome_paf'], checkIfExists: true)
+            ]
+
+    RACON ( input )
+}
\ No newline at end of file
diff --git a/tests/modules/racon/test.yml b/tests/modules/racon/test.yml
new file mode 100644
index 00000000..dc8e57dc
--- /dev/null
+++ b/tests/modules/racon/test.yml
@@ -0,0 +1,7 @@
+- name: racon test_racon
+  command: nextflow run tests/modules/racon -entry test_racon -c tests/config/nextflow.config
+  tags:
+    - racon
+  files:
+    - path: output/racon/test_assembly_consensus.fasta.gz
+      md5sum: 96a0ba94c6154f6f37b5a76a0207eb6f

From a4943a9e572ac2a5721f598b7bca059633408481 Mon Sep 17 00:00:00 2001
From: louperelo <44900284+louperelo@users.noreply.github.com>
Date: Fri, 29 Oct 2021 15:02:02 +0200
Subject: [PATCH 02/15] Mtnucratio (#986)

* add new module samtools_depth

* fixed main.nf for samtools/depth

* add new module mtnucratio

* fix main.nf

* Apply suggestions from code review

Co-authored-by: James A. Fellows Yates <jfy133@gmail.com>

* Apply suggestions from code review

Co-authored-by: James A. Fellows Yates <jfy133@gmail.com>

* layout in main.nf

Co-authored-by: James A. Fellows Yates <jfy133@gmail.com>
---
 modules/mtnucratio/functions.nf   | 78 +++++++++++++++++++++++++++++++
 modules/mtnucratio/main.nf        | 43 +++++++++++++++++
 modules/mtnucratio/meta.yml       | 54 +++++++++++++++++++++
 tests/config/pytest_modules.yml   |  4 ++
 tests/modules/mtnucratio/main.nf  | 14 ++++++
 tests/modules/mtnucratio/test.yml |  9 ++++
 6 files changed, 202 insertions(+)
 create mode 100644 modules/mtnucratio/functions.nf
 create mode 100644 modules/mtnucratio/main.nf
 create mode 100644 modules/mtnucratio/meta.yml
 create mode 100644 tests/modules/mtnucratio/main.nf
 create mode 100644 tests/modules/mtnucratio/test.yml

diff --git a/modules/mtnucratio/functions.nf b/modules/mtnucratio/functions.nf
new file mode 100644
index 00000000..85628ee0
--- /dev/null
+++ b/modules/mtnucratio/functions.nf
@@ -0,0 +1,78 @@
+//
+//  Utility functions used in nf-core DSL2 module files
+//
+
+//
+// Extract name of software tool from process name using $task.process
+//
+def getSoftwareName(task_process) {
+    return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()
+}
+
+//
+// Extract name of module from process name using $task.process
+//
+def getProcessName(task_process) {
+    return task_process.tokenize(':')[-1]
+}
+
+//
+// Function to initialise default values and to generate a Groovy Map of available options for nf-core modules
+//
+def initOptions(Map args) {
+    def Map options = [:]
+    options.args            = args.args ?: ''
+    options.args2           = args.args2 ?: ''
+    options.args3           = args.args3 ?: ''
+    options.publish_by_meta = args.publish_by_meta ?: []
+    options.publish_dir     = args.publish_dir ?: ''
+    options.publish_files   = args.publish_files
+    options.suffix          = args.suffix ?: ''
+    return options
+}
+
+//
+// Tidy up and join elements of a list to return a path string
+//
+def getPathFromList(path_list) {
+    def paths = path_list.findAll { item -> !item?.trim().isEmpty() }      // Remove empty entries
+    paths     = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes
+    return paths.join('/')
+}
+
+//
+// Function to save/publish module results
+//
+def saveFiles(Map args) {
+    def ioptions  = initOptions(args.options)
+    def path_list = [ ioptions.publish_dir ?: args.publish_dir ]
+
+    // Do not publish versions.yml unless running from pytest workflow
+    if (args.filename.equals('versions.yml') && !System.getenv("NF_CORE_MODULES_TEST")) {
+        return null
+    }
+    if (ioptions.publish_by_meta) {
+        def key_list = ioptions.publish_by_meta instanceof List ? ioptions.publish_by_meta : args.publish_by_meta
+        for (key in key_list) {
+            if (args.meta && key instanceof String) {
+                def path = key
+                if (args.meta.containsKey(key)) {
+                    path = args.meta[key] instanceof Boolean ? "${key}_${args.meta[key]}".toString() : args.meta[key]
+                }
+                path = path instanceof String ? path : ''
+                path_list.add(path)
+            }
+        }
+    }
+    if (ioptions.publish_files instanceof Map) {
+        for (ext in ioptions.publish_files) {
+            if (args.filename.endsWith(ext.key)) {
+                def ext_list = path_list.collect()
+                ext_list.add(ext.value)
+                return "${getPathFromList(ext_list)}/$args.filename"
+            }
+        }
+    } else if (ioptions.publish_files == null) {
+        return "${getPathFromList(path_list)}/$args.filename"
+    }
+}
diff --git a/modules/mtnucratio/main.nf b/modules/mtnucratio/main.nf
new file mode 100644
index 00000000..28d08a13
--- /dev/null
+++ b/modules/mtnucratio/main.nf
@@ -0,0 +1,43 @@
+include { initOptions; saveFiles; getSoftwareName; getProcessName } from './functions'
+
+params.options = [:]
+options        = initOptions(params.options)
+
+process MTNUCRATIO {
+    tag "$meta.id"
+    label 'process_low'
+    publishDir "${params.outdir}",
+        mode: params.publish_dir_mode,
+        saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:meta, publish_by_meta:['id']) }
+
+    conda (params.enable_conda ? "bioconda::mtnucratio=0.7" : null)
+    if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
+        container "https://depot.galaxyproject.org/singularity/mtnucratio:0.7--hdfd78af_2"
+    } else {
+        container "quay.io/biocontainers/mtnucratio:0.7--hdfd78af_2"
+    }
+
+    input:
+    tuple val(meta), path(bam)
+    val(mt_id)
+
+    output:
+    tuple val(meta), path("*.mtnucratio"), emit: mtnucratio
+    tuple val(meta), path("*.json")      , emit: json
+    path "versions.yml"                  , emit: versions
+
+    script:
+    def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}"
+
+    """
+    mtnucratio \\
+        $options.args \\
+        $bam \\
+        $mt_id
+
+    cat <<-END_VERSIONS > versions.yml
+    ${getProcessName(task.process)}:
+        ${getSoftwareName(task.process)}: \$(echo \$(mtnucratio --version 2>&1) | head -n1 | sed 's/Version: //')
+    END_VERSIONS
+    """
+}
diff --git a/modules/mtnucratio/meta.yml b/modules/mtnucratio/meta.yml
new file mode 100644
index 00000000..824af397
--- /dev/null
+++ b/modules/mtnucratio/meta.yml
@@ -0,0 +1,54 @@
+name: mtnucratio
+description: A small Java tool to calculate ratios between MT and nuclear sequencing reads in a given BAM file.
+keywords:
+  - mtnucratio
+  - ratio
+  - reads
+  - bam
+  - mitochondrial to nuclear ratio
+  - mitochondria
+  - statistics
+tools:
+  - mtnucratio:
+      description: A small tool to determine MT to Nuclear ratios for NGS data.
+      homepage: https://github.com/apeltzer/MTNucRatioCalculator
+      documentation: https://github.com/apeltzer/MTNucRatioCalculator
+      tool_dev_url: https://github.com/apeltzer/MTNucRatioCalculator
+      doi: "10.1186/s13059-016-0918-z"
+      licence: ['GPL v3']
+
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - bam:
+      type: file
+      description: (coordinate) sorted BAM/SAM file
+      pattern: "*.{bam,sam}"
+  - mt_id:
+      type: string
+      description: Identifier of the contig/chromosome of interest (e.g. chromosome, contig) as in the aligned against reference FASTA file, e.g. mt or chrMT for mitochondria
+
+output:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+  - mtnucratio:
+      type: file
+      description: Text file containing metrics (mtreads, mt_cov_avg, nucreads, nuc_cov_avg, mt_nuc_ratio)
+      pattern: "*.mtnucratio"
+  - json:
+      type: file
+      description: JSON file, containing metadata map with sample name, tool name and version, and metrics as in txt file
+      pattern: "*.json"
+
+authors:
+  - "@louperelo"
diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml
index 155ed78d..c0936a81 100644
--- a/tests/config/pytest_modules.yml
+++ b/tests/config/pytest_modules.yml
@@ -778,6 +778,10 @@ msisensor/scan:
   - modules/msisensor/scan/**
   - tests/modules/msisensor/scan/**
 
+mtnucratio:
+  - modules/mtnucratio/**
+  - tests/modules/mtnucratio/**
+
 multiqc:
   - modules/fastqc/**
   - modules/multiqc/**
diff --git a/tests/modules/mtnucratio/main.nf b/tests/modules/mtnucratio/main.nf
new file mode 100644
index 00000000..dd9fc9db
--- /dev/null
+++ b/tests/modules/mtnucratio/main.nf
@@ -0,0 +1,14 @@
+#!/usr/bin/env nextflow
+
+nextflow.enable.dsl = 2
+
+include { MTNUCRATIO } from '../../../modules/mtnucratio/main.nf' addParams( options: [:] )
+
+workflow test_mtnucratio {
+    
+    input = [ [ id:'test', single_end:false ], // meta map
+              file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam'], checkIfExists: true)]
+    mt_id = 'mt_id'
+
+    MTNUCRATIO ( input, mt_id )
+}
diff --git a/tests/modules/mtnucratio/test.yml b/tests/modules/mtnucratio/test.yml
new file mode 100644
index 00000000..76cbaf32
--- /dev/null
+++ b/tests/modules/mtnucratio/test.yml
@@ -0,0 +1,9 @@
+- name: mtnucratio
+  command: nextflow run tests/modules/mtnucratio -entry test_mtnucratio -c tests/config/nextflow.config
+  tags:
+    - mtnucratio
+  files:
+    - path: output/mtnucratio/test.single_end.sorted.bam.mtnucratio
+      md5sum: 19e96849802c70aa0694785f716274b7
+    - path: output/mtnucratio/test.single_end.sorted.bam.mtnucratiomtnuc.json
+      md5sum: 14d24be6272854d6762f0dfad5918ef6

From 0a7368aa6f89a63229f5e04fa937851348e16ccf Mon Sep 17 00:00:00 2001
From: Jose Espinosa-Carrasco <kadomu@gmail.com>
Date: Fri, 29 Oct 2021 16:37:39 +0200
Subject: [PATCH 03/15] Refactor genrich to avoid params in main (#981)

* Refactor genrich to avoid params in main

* Missed to commit test.yml file
---
 modules/genrich/main.nf        | 19 +++++++-----
 modules/genrich/meta.yml       | 15 ++++++++--
 tests/modules/genrich/main.nf  | 53 ++++++++++++++++++++++++++++------
 tests/modules/genrich/test.yml | 20 +++++++++----
 4 files changed, 81 insertions(+), 26 deletions(-)

diff --git a/modules/genrich/main.nf b/modules/genrich/main.nf
index c947e9cf..f34f9cd2 100644
--- a/modules/genrich/main.nf
+++ b/modules/genrich/main.nf
@@ -22,6 +22,10 @@ process GENRICH {
     tuple val(meta), path(treatment_bam)
     path  control_bam
     path  blacklist_bed
+    val   save_pvalues
+    val   save_pileup
+    val   save_bed
+    val   save_duplicates
 
     output:
     tuple val(meta), path("*narrowPeak")                     , emit: peaks
@@ -32,14 +36,14 @@ process GENRICH {
     path "versions.yml"                                      , emit: versions
 
     script:
-    def prefix     = options.suffix              ? "${meta.id}${options.suffix}"   : "${meta.id}"
-    def control    = params.control_bam          ? "-c $control_bam"               : ''
-    def pvalues    = params.pvalues              ? "-f ${prefix}.pvalues.bedGraph" : ""
-    def pileup     = params.pileup               ? "-k ${prefix}.pileup.bedGraph"  : ""
-    def bed        = params.bed                  ? "-b ${prefix}.intervals.bed"    : ""
-    def blacklist  = params.blacklist_bed        ? "-E $blacklist_bed"             : ""
+    def prefix     = options.suffix ? "${meta.id}${options.suffix}"   : "${meta.id}"
+    def control    = control_bam    ? "-c $control_bam"               : ''
+    def blacklist  = blacklist_bed  ? "-E $blacklist_bed"             : ""
+    def pvalues    = save_pvalues   ? "-f ${prefix}.pvalues.bedGraph" : ""
+    def pileup     = save_pileup    ? "-k ${prefix}.pileup.bedGraph"  : ""
+    def bed        = save_bed       ? "-b ${prefix}.intervals.bed"    : ""
     def duplicates = ""
-    if (params.save_duplicates) {
+    if (save_duplicates) {
         if (options.args.contains('-r')) {
             duplicates = "-R ${prefix}.duplicates.txt"
         } else {
@@ -58,7 +62,6 @@ process GENRICH {
         $pileup \\
         $bed \\
         $duplicates \\
-        $blacklist \\
         $control
 
     cat <<-END_VERSIONS > versions.yml
diff --git a/modules/genrich/meta.yml b/modules/genrich/meta.yml
index 8f7b004b..37184190 100644
--- a/modules/genrich/meta.yml
+++ b/modules/genrich/meta.yml
@@ -15,7 +15,6 @@ tools:
       tool_dev_url: https://github.com/jsh58/Genrich
       doi: ""
       licence: ['MIT']
-
 input:
   - meta:
       type: map
@@ -34,7 +33,18 @@ input:
       type: file
       description: Bed file containing genomic intervals to exclude from the analysis
       pattern: "*.{bed}"
-
+  - save_pvalues:
+      type: boolean
+      description: Create bedgraph-ish file for p/q-values file
+  - save_pileup:
+      type: boolean
+      description: Create bedgraph-ish file for pileups and p-values
+  - save_bed:
+      type: boolean
+      description: Create BED file for reads/fragments/intervals
+  - save_duplicates:
+      type: boolean
+      description: Create PCR duplicates file (only works if -r option is set)
 output:
   - meta:
       type: map
@@ -65,7 +75,6 @@ output:
       type: file
       description: File containing software version
       pattern: "*.{version.txt}"
-
 authors:
   - "@JoseEspinosa"
 
diff --git a/tests/modules/genrich/main.nf b/tests/modules/genrich/main.nf
index 654b38e5..aa1a2d49 100644
--- a/tests/modules/genrich/main.nf
+++ b/tests/modules/genrich/main.nf
@@ -2,10 +2,10 @@
 
 nextflow.enable.dsl = 2
 
-include { GENRICH } from '../../../modules/genrich/main.nf' addParams( control_bam: false, pvalues: false, pileup:false, bed:false, blacklist_bed:false, save_duplicates:false, options: ["args": "-p 0.1"] )
-include { GENRICH as GENRICH_BLACKLIST   } from '../../../modules/genrich/main.nf' addParams( control_bam: false, pvalues: false, pileup:false, bed:false, blacklist_bed:true, save_duplicates:false, options: ["args": "-p 0.1"] )
-include { GENRICH as GENRICH_ALL_OUTPUTS } from '../../../modules/genrich/main.nf' addParams( control_bam: false, pvalues: true, pileup:true, bed:true, blacklist_bed:false, save_duplicates:true, options: ["args": "-r -p 0.1"] )
-include { GENRICH as GENRICH_ATACSEQ     } from '../../../modules/genrich/main.nf' addParams( control_bam: false, pvalues: false, pileup:false, bed:false, blacklist_bed:false, save_duplicates:false, options: ["args": "-j -p 0.1"] )
+include { GENRICH } from '../../../modules/genrich/main.nf' addParams( options: ["args": "-p 0.1"] )
+include { GENRICH as GENRICH_CTRL    } from '../../../modules/genrich/main.nf' addParams( options: ["args": "-p 0.9"] )
+include { GENRICH as GENRICH_ALL     } from '../../../modules/genrich/main.nf' addParams( options: ["args": "-r -p 0.1"] )
+include { GENRICH as GENRICH_ATACSEQ } from '../../../modules/genrich/main.nf' addParams( options: ["args": "-j -p 0.1"] )
 
 workflow test_genrich {
     input     = [ [ id:'test', single_end:false ], // meta map
@@ -13,7 +13,12 @@ workflow test_genrich {
     control   = [ ]
     blacklist = [ ]
 
-    GENRICH ( input, control, blacklist )
+    save_pvalues    = false
+    save_pileup     = false
+    save_bed        = false
+    save_duplicates = false
+
+    GENRICH ( input, control, blacklist, save_pvalues, save_pileup, save_bed, save_duplicates )
 }
 
 workflow test_genrich_ctrl {
@@ -22,7 +27,12 @@ workflow test_genrich_ctrl {
     control   = [ file( params.test_data['homo_sapiens']['illumina']['test2_paired_end_name_sorted_bam'], checkIfExists: true) ]
     blacklist = [ ]
 
-    GENRICH ( input, control, blacklist )
+    save_pvalues    = false
+    save_pileup     = false
+    save_bed        = false
+    save_duplicates = false
+
+    GENRICH_CTRL ( input, control, blacklist, save_pvalues, save_pileup, save_bed, save_duplicates )
 }
 
 workflow test_genrich_all_outputs {
@@ -31,14 +41,39 @@ workflow test_genrich_all_outputs {
     control   = [ file( params.test_data['homo_sapiens']['illumina']['test2_paired_end_name_sorted_bam'], checkIfExists: true) ]
     blacklist = [ ]
 
-    GENRICH_ALL_OUTPUTS ( input, control, blacklist )
+    save_pvalues    = true
+    save_pileup     = true
+    save_bed        = true
+    save_duplicates = true
+
+    GENRICH_ALL ( input, control, blacklist, save_pvalues, save_pileup, save_bed, save_duplicates )
+}
+
+workflow test_genrich_blacklist {
+    input     = [ [ id:'test', single_end:false ], // meta map
+                  [ file( params.test_data['homo_sapiens']['illumina']['test_paired_end_name_sorted_bam'], checkIfExists: true) ]]
+    control   = [ ]
+    blacklist = [ file(params.test_data['sarscov2']['genome']['test_bed'], checkIfExists: true)]
+
+    save_pvalues    = false
+    save_pileup     = false
+    save_bed        = false
+    save_duplicates = false
+
+    GENRICH ( input, control, blacklist, save_pvalues, save_pileup, save_bed, save_duplicates )
 }
 
 workflow test_genrich_atacseq {
     input     = [ [ id:'test', single_end:false ], // meta map
                   [ file( params.test_data['homo_sapiens']['illumina']['test_paired_end_name_sorted_bam'], checkIfExists: true) ]]
-    control   = [ file( params.test_data['homo_sapiens']['illumina']['test2_paired_end_name_sorted_bam'], checkIfExists: true) ]
+    control   = [ ]
     blacklist = [ ]
 
-    GENRICH_ATACSEQ ( input, control, blacklist )
+    save_pvalues    = false
+    save_pileup     = false
+    save_bed        = false
+    save_duplicates = false
+
+    GENRICH_ATACSEQ ( input, control, blacklist, save_pvalues, save_pileup, save_bed, save_duplicates )
 }
+
diff --git a/tests/modules/genrich/test.yml b/tests/modules/genrich/test.yml
index bd762f7c..63bf2927 100644
--- a/tests/modules/genrich/test.yml
+++ b/tests/modules/genrich/test.yml
@@ -12,7 +12,7 @@
     - genrich
   files:
     - path: output/genrich/test.narrowPeak
-      md5sum: 6afabdd3f691c7c84c66ff8a23984681
+      md5sum: 2fcc392360b317f5ebee88cdbc149e05
 
 - name: genrich test_genrich_all_outputs
   command: nextflow run tests/modules/genrich -entry test_genrich_all_outputs -c tests/config/nextflow.config
@@ -20,15 +20,23 @@
     - genrich
   files:
     - path: output/genrich/test.duplicates.txt
-      md5sum: a92893f905fd8b3751bc6a960fbfe7ba
+      md5sum: 159d557af7c23bc3cfb802d87fa96c34
     - path: output/genrich/test.intervals.bed
-      md5sum: 52edf47e6641c0cc03f9cca7324f7eaa
+      md5sum: 4bea65caa3f4043d703af4b57161112e
     - path: output/genrich/test.narrowPeak
-      md5sum: e45eb7d000387975050c2e85c164e5be
+      md5sum: d41d8cd98f00b204e9800998ecf8427e
     - path: output/genrich/test.pileup.bedGraph
-      md5sum: e4f7fa664cd4ed2cf3a1a3a9eb415e71
+      md5sum: 03e53848de695b5794f32f15b2709203
     - path: output/genrich/test.pvalues.bedGraph
-      md5sum: 564859953704983393d4b7d6317060cd
+      md5sum: b14feef34b6d2379a173a734ca963cde
+
+- name: genrich test_genrich_blacklist
+  command: nextflow run tests/modules/genrich -entry test_genrich_blacklist -c tests/config/nextflow.config
+  tags:
+    - genrich
+  files:
+    - path: output/genrich/test.narrowPeak
+      md5sum: 6afabdd3f691c7c84c66ff8a23984681
 
 - name: genrich test_genrich_atacseq
   command: nextflow run tests/modules/genrich -entry test_genrich_atacseq -c tests/config/nextflow.config

From 3df4fe60851c19ec8d55ef3a978ae851eaccfd68 Mon Sep 17 00:00:00 2001
From: FriederikeHanssen <Friederike.hanssen@qbic.uni-tuebingen.de>
Date: Fri, 29 Oct 2021 17:05:06 +0200
Subject: [PATCH 04/15] add freebayes/somatic and update freebayes/germline
 (#990)

* add freebayes/somatic and update freebayes/germline to new syntax and cram

* add pytest

* update with new freebayes/germline path
---
 modules/freebayes/{ => germline}/functions.nf |  0
 modules/freebayes/{ => germline}/main.nf      | 31 ++++----
 modules/freebayes/{ => germline}/meta.yml     | 10 +--
 modules/freebayes/somatic/functions.nf        | 78 +++++++++++++++++++
 modules/freebayes/somatic/main.nf             | 74 ++++++++++++++++++
 modules/freebayes/somatic/meta.yml            | 66 ++++++++++++++++
 modules/gatk4/applybqsr/meta.yml              |  1 +
 tests/config/pytest_modules.yml               | 10 ++-
 tests/modules/freebayes/germline/main.nf      | 51 ++++++++++++
 tests/modules/freebayes/germline/test.yml     | 26 +++++++
 tests/modules/freebayes/main.nf               | 35 ---------
 tests/modules/freebayes/somatic/main.nf       | 37 +++++++++
 tests/modules/freebayes/somatic/test.yml      | 17 ++++
 tests/modules/freebayes/test.yml              | 14 ----
 14 files changed, 378 insertions(+), 72 deletions(-)
 rename modules/freebayes/{ => germline}/functions.nf (100%)
 rename modules/freebayes/{ => germline}/main.nf (75%)
 rename modules/freebayes/{ => germline}/meta.yml (95%)
 create mode 100644 modules/freebayes/somatic/functions.nf
 create mode 100644 modules/freebayes/somatic/main.nf
 create mode 100644 modules/freebayes/somatic/meta.yml
 create mode 100644 tests/modules/freebayes/germline/main.nf
 create mode 100644 tests/modules/freebayes/germline/test.yml
 delete mode 100644 tests/modules/freebayes/main.nf
 create mode 100644 tests/modules/freebayes/somatic/main.nf
 create mode 100644 tests/modules/freebayes/somatic/test.yml
 delete mode 100644 tests/modules/freebayes/test.yml

diff --git a/modules/freebayes/functions.nf b/modules/freebayes/germline/functions.nf
similarity index 100%
rename from modules/freebayes/functions.nf
rename to modules/freebayes/germline/functions.nf
diff --git a/modules/freebayes/main.nf b/modules/freebayes/germline/main.nf
similarity index 75%
rename from modules/freebayes/main.nf
rename to modules/freebayes/germline/main.nf
index 63235d8a..eae62036 100644
--- a/modules/freebayes/main.nf
+++ b/modules/freebayes/germline/main.nf
@@ -4,7 +4,7 @@ include { initOptions; saveFiles; getProcessName; getSoftwareName } from './func
 params.options = [:]
 options        = initOptions(params.options)
 
-process FREEBAYES {
+process FREEBAYES_GERMLINE {
     tag "$meta.id"
     label 'process_low'
     publishDir "${params.outdir}",
@@ -19,24 +19,25 @@ process FREEBAYES {
     }
 
     input:
-    tuple val(meta), path(bam), path(bai)
-    tuple path(fasta), path(fai)
-    path(targets)
-    path(samples)
-    path(populations)
-    path(cnv)
-
+    tuple val(meta), path(input), path(input_index)
+    path fasta
+    path fai
+    path targets
+    path samples
+    path populations
+    path cnv
 
     output:
     tuple val(meta), path("*.vcf.gz")   , emit: vcf
     path  "versions.yml"                , emit: versions
 
     script:
-    def prefix   = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}"
-    def targets_file = targets ? "--target ${targets}" : ""
-    def samples_file = samples ? "--samples ${samples}" : ""
-    def populations_file = populations ? "--populations ${populations}" : ""
-    def cnv_file = cnv ? "--cnv-map ${cnv}" : ""
+    def prefix           = options.suffix ? "${meta.id}${options.suffix}"  : "${meta.id}"
+    def targets_file     = targets        ? "--target ${targets}"          : ""
+    def samples_file     = samples        ? "--samples ${samples}"         : ""
+    def populations_file = populations    ? "--populations ${populations}" : ""
+    def cnv_file         = cnv            ? "--cnv-map ${cnv}"             : ""
+
     if (task.cpus > 1) {
         """
         freebayes-parallel \\
@@ -47,7 +48,7 @@ process FREEBAYES {
             $populations_file \\
             $cnv_file \\
             $options.args \\
-            $bam  > ${prefix}.vcf
+            $input  > ${prefix}.vcf
 
         gzip --no-name ${prefix}.vcf
 
@@ -66,7 +67,7 @@ process FREEBAYES {
             $populations_file \\
             $cnv_file \\
             $options.args \\
-            $bam > ${prefix}.vcf
+            $input > ${prefix}.vcf
 
         gzip --no-name ${prefix}.vcf
 
diff --git a/modules/freebayes/meta.yml b/modules/freebayes/germline/meta.yml
similarity index 95%
rename from modules/freebayes/meta.yml
rename to modules/freebayes/germline/meta.yml
index 46eb5309..86650715 100644
--- a/modules/freebayes/meta.yml
+++ b/modules/freebayes/germline/meta.yml
@@ -1,4 +1,4 @@
-name: freebayes
+name: freebayes_germline
 description: A haplotype-based variant detector
 keywords:
   - variant caller
@@ -12,7 +12,7 @@ tools:
       homepage: https://github.com/freebayes/freebayes
       documentation: https://github.com/freebayes/freebayes
       tool_dev_url: https://github.com/freebayes/freebayes
-      doi: ""
+      doi: "arXiv:1207.3907"
       licence: ['MIT']
 
 input:
@@ -21,11 +21,11 @@ input:
       description: |
         Groovy Map containing sample information
         e.g. [ id:'test', single_end:false ]
-  - bam:
+  - input:
       type: file
       description: BAM/CRAM/SAM file
       pattern: "*.{bam,cram,sam}"
-  - bai:
+  - input_index:
       type: file
       description: BAM/CRAM/SAM index file
       pattern: "*.bam.bai"
@@ -59,7 +59,6 @@ input:
           seq_name start end sample_name copy_number
       pattern: "*.bed"
 
-
 output:
   - meta:
       type: map
@@ -76,3 +75,4 @@ output:
       pattern: "*.vcf.gz"
 authors:
   - "@maxibor"
+  - "@FriederikeHanssen"
diff --git a/modules/freebayes/somatic/functions.nf b/modules/freebayes/somatic/functions.nf
new file mode 100644
index 00000000..85628ee0
--- /dev/null
+++ b/modules/freebayes/somatic/functions.nf
@@ -0,0 +1,78 @@
+//
+//  Utility functions used in nf-core DSL2 module files
+//
+
+//
+// Extract name of software tool from process name using $task.process
+//
+def getSoftwareName(task_process) {
+    return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()
+}
+
+//
+// Extract name of module from process name using $task.process
+//
+def getProcessName(task_process) {
+    return task_process.tokenize(':')[-1]
+}
+
+//
+// Function to initialise default values and to generate a Groovy Map of available options for nf-core modules
+//
+def initOptions(Map args) {
+    def Map options = [:]
+    options.args            = args.args ?: ''
+    options.args2           = args.args2 ?: ''
+    options.args3           = args.args3 ?: ''
+    options.publish_by_meta = args.publish_by_meta ?: []
+    options.publish_dir     = args.publish_dir ?: ''
+    options.publish_files   = args.publish_files
+    options.suffix          = args.suffix ?: ''
+    return options
+}
+
+//
+// Tidy up and join elements of a list to return a path string
+//
+def getPathFromList(path_list) {
+    def paths = path_list.findAll { item -> !item?.trim().isEmpty() }      // Remove empty entries
+    paths     = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes
+    return paths.join('/')
+}
+
+//
+// Function to save/publish module results
+//
+def saveFiles(Map args) {
+    def ioptions  = initOptions(args.options)
+    def path_list = [ ioptions.publish_dir ?: args.publish_dir ]
+
+    // Do not publish versions.yml unless running from pytest workflow
+    if (args.filename.equals('versions.yml') && !System.getenv("NF_CORE_MODULES_TEST")) {
+        return null
+    }
+    if (ioptions.publish_by_meta) {
+        def key_list = ioptions.publish_by_meta instanceof List ? ioptions.publish_by_meta : args.publish_by_meta
+        for (key in key_list) {
+            if (args.meta && key instanceof String) {
+                def path = key
+                if (args.meta.containsKey(key)) {
+                    path = args.meta[key] instanceof Boolean ? "${key}_${args.meta[key]}".toString() : args.meta[key]
+                }
+                path = path instanceof String ? path : ''
+                path_list.add(path)
+            }
+        }
+    }
+    if (ioptions.publish_files instanceof Map) {
+        for (ext in ioptions.publish_files) {
+            if (args.filename.endsWith(ext.key)) {
+                def ext_list = path_list.collect()
+                ext_list.add(ext.value)
+                return "${getPathFromList(ext_list)}/$args.filename"
+            }
+        }
+    } else if (ioptions.publish_files == null) {
+        return "${getPathFromList(path_list)}/$args.filename"
+    }
+}
diff --git a/modules/freebayes/somatic/main.nf b/modules/freebayes/somatic/main.nf
new file mode 100644
index 00000000..c1579661
--- /dev/null
+++ b/modules/freebayes/somatic/main.nf
@@ -0,0 +1,74 @@
+// Import generic module functions
+include { initOptions; saveFiles; getProcessName; getSoftwareName } from './functions'
+
+params.options = [:]
+options        = initOptions(params.options)
+
+process FREEBAYES_SOMATIC {
+    tag "$meta.id"
+    label 'process_low'
+    publishDir "${params.outdir}",
+        mode: params.publish_dir_mode,
+        saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:meta, publish_by_meta:['id']) }
+
+    conda (params.enable_conda ? "bioconda::freebayes=1.3.5" : null)
+    if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
+        container "https://depot.galaxyproject.org/singularity/freebayes:1.3.5--py38ha193a2f_3"
+    } else {
+        container "quay.io/biocontainers/freebayes:1.3.5--py38ha193a2f_3"
+    }
+
+    input:
+    tuple val(meta), path(input_normal), path(input_index_normal), path(input_tumor), path(input_index_tumor)
+    path fasta
+    path fai
+    path targets
+    path samples
+
+    output:
+    tuple val(meta), path("*.vcf.gz")   , emit: vcf
+    path  "versions.yml"                , emit: versions
+
+    script:
+    def prefix           = options.suffix ? "${meta.id}${options.suffix}"  : "${meta.id}"
+    def targets_file     = targets        ? "--target ${targets}"          : ""
+    def samples_file     = samples        ? "--samples ${samples}"         : ""
+
+    if (task.cpus > 1) {
+        """
+        freebayes-parallel \\
+            <(fasta_generate_regions.py ${fasta}.fai 10000) ${task.cpus} \\
+            -f $fasta \\
+            $targets_file \\
+            $samples_file \\
+            $options.args \\
+            $input_tumor \\
+            $input_normal  > ${prefix}.vcf
+
+        gzip --no-name ${prefix}.vcf
+
+        cat <<-END_VERSIONS > versions.yml
+        ${getProcessName(task.process)}:
+            ${getSoftwareName(task.process)}: \$(echo \$(freebayes --version 2>&1) | sed 's/version:\s*v//g' )
+        END_VERSIONS
+        """
+
+    } else {
+        """
+        freebayes \\
+            -f $fasta \\
+            $targets_file \\
+            $samples_file \\
+            $options.args \\
+            $input_tumor \\
+            $input_normal  > ${prefix}.vcf
+
+        gzip --no-name ${prefix}.vcf
+
+        cat <<-END_VERSIONS > versions.yml
+        ${getProcessName(task.process)}:
+            ${getSoftwareName(task.process)}: \$(echo \$(freebayes --version 2>&1) | sed 's/version:\s*v//g' )
+        END_VERSIONS
+        """
+    }
+}
diff --git a/modules/freebayes/somatic/meta.yml b/modules/freebayes/somatic/meta.yml
new file mode 100644
index 00000000..391e5007
--- /dev/null
+++ b/modules/freebayes/somatic/meta.yml
@@ -0,0 +1,66 @@
+name: freebayes_somatic
+description: A haplotype-based variant detector
+keywords:
+  - variant caller
+  - SNP
+  - genotyping
+  - somatic variant calling
+  - bayesian
+
+tools:
+  - freebayes:
+      description: Bayesian haplotype-based polymorphism discovery and genotyping
+      homepage: https://github.com/freebayes/freebayes
+      documentation: https://github.com/freebayes/freebayes
+      tool_dev_url: https://github.com/freebayes/freebayes
+      doi: "arXiv:1207.3907"
+      licence: ['MIT']
+
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - input:
+      type: file
+      description: BAM/CRAM/SAM file
+      pattern: "*.{bam,cram,sam}"
+  - input_index:
+      type: file
+      description: BAM/CRAM/SAM index file
+      pattern: "*.bam.bai"
+  - fasta:
+      type: file
+      description: reference fasta file
+      pattern: ".{fa,fa.gz,fasta,fasta.gz}"
+  - fai:
+      type: file
+      description: reference fasta file index
+      pattern: "*.fai"
+  - targets:
+      type: file
+      description: Optional - Limit analysis to targets listed in this BED-format FILE.
+      pattern: "*.bed"
+  - samples:
+      type: file
+      description: Optional - Limit analysis to samples listed (one per line) in the FILE.
+      pattern: "*.txt"
+
+output:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - version:
+      type: file
+      description: File containing software version
+      pattern: "*.{version.txt}"
+  - vcf:
+      type: file
+      description: Compressed VCF file
+      pattern: "*.vcf.gz"
+
+authors:
+  - "@FriederikeHanssen"
diff --git a/modules/gatk4/applybqsr/meta.yml b/modules/gatk4/applybqsr/meta.yml
index b002dca6..e7419860 100644
--- a/modules/gatk4/applybqsr/meta.yml
+++ b/modules/gatk4/applybqsr/meta.yml
@@ -61,3 +61,4 @@ output:
 
 authors:
   - "@yocra3"
+  - "@FriederikeHanssen"
diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml
index c0936a81..d7fed0c0 100644
--- a/tests/config/pytest_modules.yml
+++ b/tests/config/pytest_modules.yml
@@ -410,9 +410,13 @@ flash:
   - modules/flash/**
   - tests/modules/flash/**
 
-freebayes:
-  - modules/freebayes/**
-  - tests/modules/freebayes/**
+freebayes/germline:
+  - modules/freebayes/germline/**
+  - tests/modules/freebayes/germline/**
+
+freebayes/somatic:
+  - modules/freebayes/somatic/**
+  - tests/modules/freebayes/somatic/**
 
 gatk4/applybqsr:
   - modules/gatk4/applybqsr/**
diff --git a/tests/modules/freebayes/germline/main.nf b/tests/modules/freebayes/germline/main.nf
new file mode 100644
index 00000000..1b39eea0
--- /dev/null
+++ b/tests/modules/freebayes/germline/main.nf
@@ -0,0 +1,51 @@
+#!/usr/bin/env nextflow
+
+nextflow.enable.dsl = 2
+
+include { FREEBAYES_GERMLINE } from '../../../../modules/freebayes/germline/main.nf' addParams( options: [:] )
+
+workflow test_freebayes {
+
+    input = [ [ id:'test', single_end:false ], // meta map
+              file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true),
+              file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true)]
+    fasta   = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
+    fai         = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true)
+    targets = []
+    samples = []
+    populations = []
+    cnv = []
+
+    FREEBAYES_GERMLINE ( input, fasta, fai, targets, samples, populations, cnv)
+}
+
+workflow test_freebayes_bed {
+
+    input = [ [ id:'test', single_end:false ], // meta map
+              file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true),
+              file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true)]
+    fasta   = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
+    fai         = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true)
+    targets     = file(params.test_data['sarscov2']['genome']['test_bed'], checkIfExists: true)
+    samples     = []
+    populations = []
+    cnv         = []
+
+    FREEBAYES_GERMLINE ( input, fasta, fai, targets, samples, populations, cnv)
+}
+
+workflow test_freebayes_cram {
+
+    input = [ [ id:'test', single_end:false ], // meta map
+                file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true),
+                file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram_crai'], checkIfExists: true)
+            ]
+    fasta   = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
+    fai     = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
+    targets = []
+    samples = []
+    populations = []
+    cnv = []
+
+    FREEBAYES_GERMLINE ( input, fasta, fai, targets, samples, populations, cnv)
+}
diff --git a/tests/modules/freebayes/germline/test.yml b/tests/modules/freebayes/germline/test.yml
new file mode 100644
index 00000000..55925b92
--- /dev/null
+++ b/tests/modules/freebayes/germline/test.yml
@@ -0,0 +1,26 @@
+- name: freebayes germline test_freebayes
+  command: nextflow run tests/modules/freebayes/germline -entry test_freebayes -c tests/config/nextflow.config
+  tags:
+    - freebayes
+    - freebayes/germline
+  files:
+    - path: output/freebayes/test.vcf.gz
+      md5sum: 1ec210ad27514c7a4140c924dc66d979
+
+- name: freebayes germline test_freebayes_bed
+  command: nextflow run tests/modules/freebayes/germline -entry test_freebayes_bed -c tests/config/nextflow.config
+  tags:
+    - freebayes
+    - freebayes/germline
+  files:
+    - path: output/freebayes/test.vcf.gz
+      md5sum: e8923cccd5dac196f72d3d3997a60706
+
+- name: freebayes germline test_freebayes_cram
+  command: nextflow run tests/modules/freebayes/germline -entry test_freebayes_cram -c tests/config/nextflow.config
+  tags:
+    - freebayes
+    - freebayes/germline
+  files:
+    - path: output/freebayes/test.vcf.gz
+      md5sum: cb57a3ed154618e3aa4a5272fcfb7521
diff --git a/tests/modules/freebayes/main.nf b/tests/modules/freebayes/main.nf
deleted file mode 100644
index 1c07b821..00000000
--- a/tests/modules/freebayes/main.nf
+++ /dev/null
@@ -1,35 +0,0 @@
-#!/usr/bin/env nextflow
-
-nextflow.enable.dsl = 2
-
-include { FREEBAYES } from '../../../modules/freebayes/main.nf' addParams( options: [:] )
-
-workflow test_freebayes {
-
-    input = [ [ id:'test', single_end:false ], // meta map
-              file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true),
-              file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true)]
-    reference = [file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true),
-                 file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true)]
-    targets = []
-    samples = []
-    populations = []
-    cnv = []
-
-    FREEBAYES ( input, reference, targets, samples, populations, cnv)
-}
-
-workflow test_freebayes_bed {
-
-    input = [ [ id:'test', single_end:false ], // meta map
-              file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true),
-              file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true)]
-    reference = [file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true),
-                 file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true)]
-    targets = file(params.test_data['sarscov2']['genome']['test_bed'], checkIfExists: true)
-    samples = []
-    populations = []
-    cnv = []
-
-    FREEBAYES ( input, reference, targets, samples, populations, cnv)
-}
diff --git a/tests/modules/freebayes/somatic/main.nf b/tests/modules/freebayes/somatic/main.nf
new file mode 100644
index 00000000..d26caf34
--- /dev/null
+++ b/tests/modules/freebayes/somatic/main.nf
@@ -0,0 +1,37 @@
+#!/usr/bin/env nextflow
+
+nextflow.enable.dsl = 2
+
+include { FREEBAYES_SOMATIC } from '../../../../modules/freebayes/somatic/main.nf' addParams( options: [:] )
+
+workflow test_freebayes {
+
+    input = [ [ id:'test', single_end:false ], // meta map
+                file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true),
+                file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true),
+                file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_bam'], checkIfExists: true),
+                file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_bam_bai'], checkIfExists: true)
+            ]
+    fasta   = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
+    fai     = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
+    targets = []
+    samples = []
+
+    FREEBAYES_SOMATIC ( input, fasta, fai, targets, samples)
+}
+
+workflow test_freebayes_intervals {
+
+    input = [ [ id:'test', single_end:false ], // meta map
+                file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true),
+                file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram_crai'], checkIfExists: true),
+                file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_cram'], checkIfExists: true),
+                file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_cram_crai'], checkIfExists: true)
+            ]
+    fasta   = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
+    fai     = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
+    targets = file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true)
+    samples = []
+
+    FREEBAYES_SOMATIC ( input, fasta, fai, targets, samples)
+}
diff --git a/tests/modules/freebayes/somatic/test.yml b/tests/modules/freebayes/somatic/test.yml
new file mode 100644
index 00000000..93113e60
--- /dev/null
+++ b/tests/modules/freebayes/somatic/test.yml
@@ -0,0 +1,17 @@
+- name: freebayes somatic test_freebayes
+  command: nextflow run tests/modules/freebayes/somatic -entry test_freebayes -c tests/config/nextflow.config
+  tags:
+    - freebayes/somatic
+    - freebayes
+  files:
+    - path: output/freebayes/test.vcf.gz
+      md5sum: 1c47d02f27ec5918558c8688ce6e7780
+
+- name: freebayes somatic test_freebayes_intervals
+  command: nextflow run tests/modules/freebayes/somatic -entry test_freebayes_intervals -c tests/config/nextflow.config
+  tags:
+    - freebayes/somatic
+    - freebayes
+  files:
+    - path: output/freebayes/test.vcf.gz
+      md5sum: 5b8a12666bde63746dcec7afcd3ef789
diff --git a/tests/modules/freebayes/test.yml b/tests/modules/freebayes/test.yml
deleted file mode 100644
index 9ca54021..00000000
--- a/tests/modules/freebayes/test.yml
+++ /dev/null
@@ -1,14 +0,0 @@
-- name: freebayes test_freebayes
-  command: nextflow run tests/modules/freebayes -entry test_freebayes -c tests/config/nextflow.config
-  tags:
-    - freebayes
-  files:
-    - path: output/freebayes/test.vcf.gz
-
-- name: freebayes test_freebayes_bed
-  command: nextflow run tests/modules/freebayes -entry test_freebayes_bed -c tests/config/nextflow.config
-  tags:
-    - freebayes
-  files:
-    - path: output/freebayes/test.vcf.gz
-

From 7afb962f0bd5d9b504331082fa4bafd498074a4b Mon Sep 17 00:00:00 2001
From: GCJMackenzie <43276267+GCJMackenzie@users.noreply.github.com>
Date: Fri, 29 Oct 2021 17:21:34 +0100
Subject: [PATCH 05/15] New module genomicsdbimport (#857)

* saving changes to checkout

* saving to sort out other branch

* removed yml tracking of files that cant be tracked due to directory name changing between runs

* test data added, ready for pr

* fix eol linting error

* Update modules/gatk4/genomicsdbimport/main.nf

Co-authored-by: Francesco L <53608000+lescai@users.noreply.github.com>

* merging with master

* update push to show progress

* tests now working untar able to pass data to genomicsdbimport

* commit to checkout

* tests updated, module reworked to simplify and emit updated gendb

* Apply suggestions from code review

Co-authored-by: Harshil Patel <drpatelh@users.noreply.github.com>

* update meta.yml

Priority of input options changed, updated to reflect this

* Update test.yml

name prefix changed in main script, test.yml updated to reflect this

* fix tests due to review changes

Co-authored-by: GCJMackenzie <gavin.mackenzie@nibsc.org>
Co-authored-by: Francesco L <53608000+lescai@users.noreply.github.com>
Co-authored-by: Harshil Patel <drpatelh@users.noreply.github.com>
---
 modules/gatk4/genomicsdbimport/functions.nf   | 78 ++++++++++++++++++
 modules/gatk4/genomicsdbimport/main.nf        | 67 ++++++++++++++++
 modules/gatk4/genomicsdbimport/meta.yml       | 80 +++++++++++++++++++
 tests/config/pytest_modules.yml               |  4 +
 tests/config/test_data.config                 |  2 +
 tests/modules/gatk4/genomicsdbimport/main.nf  | 61 ++++++++++++++
 tests/modules/gatk4/genomicsdbimport/test.yml | 50 ++++++++++++
 7 files changed, 342 insertions(+)
 create mode 100644 modules/gatk4/genomicsdbimport/functions.nf
 create mode 100644 modules/gatk4/genomicsdbimport/main.nf
 create mode 100644 modules/gatk4/genomicsdbimport/meta.yml
 create mode 100644 tests/modules/gatk4/genomicsdbimport/main.nf
 create mode 100644 tests/modules/gatk4/genomicsdbimport/test.yml

diff --git a/modules/gatk4/genomicsdbimport/functions.nf b/modules/gatk4/genomicsdbimport/functions.nf
new file mode 100644
index 00000000..85628ee0
--- /dev/null
+++ b/modules/gatk4/genomicsdbimport/functions.nf
@@ -0,0 +1,78 @@
+//
+//  Utility functions used in nf-core DSL2 module files
+//
+
+//
+// Extract name of software tool from process name using $task.process
+//
+def getSoftwareName(task_process) {
+    return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()
+}
+
+//
+// Extract name of module from process name using $task.process
+//
+def getProcessName(task_process) {
+    return task_process.tokenize(':')[-1]
+}
+
+//
+// Function to initialise default values and to generate a Groovy Map of available options for nf-core modules
+//
+def initOptions(Map args) {
+    def Map options = [:]
+    options.args            = args.args ?: ''
+    options.args2           = args.args2 ?: ''
+    options.args3           = args.args3 ?: ''
+    options.publish_by_meta = args.publish_by_meta ?: []
+    options.publish_dir     = args.publish_dir ?: ''
+    options.publish_files   = args.publish_files
+    options.suffix          = args.suffix ?: ''
+    return options
+}
+
+//
+// Tidy up and join elements of a list to return a path string
+//
+def getPathFromList(path_list) {
+    def paths = path_list.findAll { item -> !item?.trim().isEmpty() }      // Remove empty entries
+    paths     = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes
+    return paths.join('/')
+}
+
+//
+// Function to save/publish module results
+//
+def saveFiles(Map args) {
+    def ioptions  = initOptions(args.options)
+    def path_list = [ ioptions.publish_dir ?: args.publish_dir ]
+
+    // Do not publish versions.yml unless running from pytest workflow
+    if (args.filename.equals('versions.yml') && !System.getenv("NF_CORE_MODULES_TEST")) {
+        return null
+    }
+    if (ioptions.publish_by_meta) {
+        def key_list = ioptions.publish_by_meta instanceof List ? ioptions.publish_by_meta : args.publish_by_meta
+        for (key in key_list) {
+            if (args.meta && key instanceof String) {
+                def path = key
+                if (args.meta.containsKey(key)) {
+                    path = args.meta[key] instanceof Boolean ? "${key}_${args.meta[key]}".toString() : args.meta[key]
+                }
+                path = path instanceof String ? path : ''
+                path_list.add(path)
+            }
+        }
+    }
+    if (ioptions.publish_files instanceof Map) {
+        for (ext in ioptions.publish_files) {
+            if (args.filename.endsWith(ext.key)) {
+                def ext_list = path_list.collect()
+                ext_list.add(ext.value)
+                return "${getPathFromList(ext_list)}/$args.filename"
+            }
+        }
+    } else if (ioptions.publish_files == null) {
+        return "${getPathFromList(path_list)}/$args.filename"
+    }
+}
diff --git a/modules/gatk4/genomicsdbimport/main.nf b/modules/gatk4/genomicsdbimport/main.nf
new file mode 100644
index 00000000..aa4fceb0
--- /dev/null
+++ b/modules/gatk4/genomicsdbimport/main.nf
@@ -0,0 +1,67 @@
+// Import generic module functions
+include { initOptions; saveFiles; getSoftwareName; getProcessName } from './functions'
+
+params.options = [:]
+options        = initOptions(params.options)
+
+process GATK4_GENOMICSDBIMPORT {
+    tag "$meta.id"
+    label 'process_low'
+    publishDir "${params.outdir}",
+        mode: params.publish_dir_mode,
+        saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:meta, publish_by_meta:['id']) }
+
+    conda (params.enable_conda ? "bioconda::gatk4=4.2.0.0" : null)
+    if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
+        container "https://depot.galaxyproject.org/singularity/gatk4:4.2.0.0--0"
+    } else {
+        container "quay.io/biocontainers/gatk4:4.2.0.0--0"
+    }
+
+    input:
+    tuple val(meta), path(vcf), path(tbi), path(intervalfile), val(intervalval), path(wspace)
+    val run_intlist
+    val run_updatewspace
+    val input_map
+
+    output:
+    tuple val(meta), path("*_genomicsdb")   , optional:true, emit: genomicsdb
+    tuple val(meta), path("$updated_db")    , optional:true, emit: updatedb
+    tuple val(meta), path("*.interval_list"), optional:true, emit: intervallist
+    path "versions.yml"                                    , emit: versions
+
+    script:
+    def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}"
+
+    // settings for running default create gendb mode
+    def inputs_command = input_map ? "--sample-name-map ${vcf[0]}" : "${'-V ' + vcf.join(' -V')}"
+    def dir_command = "--genomicsdb-workspace-path ${prefix}"
+    def intervals_command = intervalfile ? " -L ${intervalfile} " : " -L ${intervalval} "
+
+    // settings changed for running get intervals list mode if run_intlist is true
+    if (run_intlist) {
+        inputs_command = ''
+        dir_command = "--genomicsdb-update-workspace-path ${wspace}"
+        intervals_command = "--output-interval-list-to-file ${prefix}.interval_list"
+    }
+
+    // settings changed for running update gendb mode. inputs_command same as default, update_db forces module to emit the updated gendb
+    if (run_updatewspace) {
+        dir_command = "--genomicsdb-update-workspace-path ${wspace}"
+        intervals_command = ''
+        updated_db = wspace.toString()
+    }
+
+    """
+    gatk GenomicsDBImport \\
+        $inputs_command \\
+        $dir_command \\
+        $intervals_command \\
+        $options.args
+
+    cat <<-END_VERSIONS > versions.yml
+    ${getProcessName(task.process)}:
+        ${getSoftwareName(task.process)}: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//')
+    END_VERSIONS
+    """
+}
diff --git a/modules/gatk4/genomicsdbimport/meta.yml b/modules/gatk4/genomicsdbimport/meta.yml
new file mode 100644
index 00000000..f7a32e7e
--- /dev/null
+++ b/modules/gatk4/genomicsdbimport/meta.yml
@@ -0,0 +1,80 @@
+name: gatk4_genomicsdbimport
+description: merge GVCFs from multiple samples. For use in joint genotyping or somatic panel of normal creation.
+keywords:
+  - gatk4
+  - genomicsdbimport
+  - genomicsdb
+  - panelofnormalscreation
+  - jointgenotyping
+tools:
+  - gatk4:
+      description: |
+        Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools
+        with a primary focus on variant discovery and genotyping. Its powerful processing engine
+        and high-performance computing features make it capable of taking on projects of any size.
+      homepage: https://gatk.broadinstitute.org/hc/en-us
+      documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s
+      doi: 10.1158/1538-7445.AM2017-3590
+
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test']
+  - vcf:
+      type: list
+      description: either a list of vcf files to be used to create or update a genomicsdb, or a file that contains a map to vcf files to be used.
+      pattern: "*.vcf.gz"
+
+  - tbi:
+      type: list
+      description: list of tbi files that match with the input vcf files
+      pattern: "*.vcf.gz_tbi"
+
+  - wspace:
+      type: path
+      description: path to an existing genomicsdb to be used in update db mode or get intervals mode. This WILL NOT specify name of a new genomicsdb in create db mode.
+      pattern: "/path/to/existing/gendb"
+
+  - intervalfile:
+      type: file
+      description: file containing the intervals to be used when creating the genomicsdb
+      pattern: "*.interval_list"
+
+  - intervalval:
+      type: string
+      description: if an intervals file has not been spcified, the value enetered here will be used as an interval via the "-L" argument
+      pattern: "example: chr1:1000-10000"
+
+  - run_intlist:
+      type: boolean
+      description: Specify whether to run get interval list mode, this option cannot be specified at the same time as run_updatewspace.
+      pattern: "true/false"
+
+  - run_updatewspace:
+      type: boolean
+      description: Specify whether to run update genomicsdb mode, this option takes priority over run_intlist.
+      pattern: "true/false"
+
+  - input_map:
+      type: boolean
+      description: Specify whether the vcf input is providing a list of vcf file(s) or a single file containing a map of paths to vcf files to be used to create or update a genomicsdb.
+      pattern: "*.sample_map"
+
+output:
+  - genomicsdb:
+      type: directory
+      description: Directory containing the files that compose the genomicsdb workspace, this is only output for create mode, as update changes an existing db
+      pattern: "*_genomicsdb"
+  - intervallist:
+      type: file
+      description: File containing the intervals used to generate the genomicsdb, only created by get intervals mode.
+      pattern: "*.interval_list"
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+
+authors:
+  - "@GCJMackenzie"
diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml
index d7fed0c0..db10b55b 100644
--- a/tests/config/pytest_modules.yml
+++ b/tests/config/pytest_modules.yml
@@ -450,6 +450,10 @@ gatk4/fastqtosam:
   - modules/gatk4/fastqtosam/**
   - tests/modules/gatk4/fastqtosam/**
 
+gatk4/genomicsdbimport:
+  - modules/gatk4/genomicsdbimport/**
+  - tests/modules/gatk4/genomicsdbimport/**
+
 gatk4/filtermutectcalls:
   - modules/gatk4/filtermutectcalls/**
   - tests/modules/gatk4/filtermutectcalls/**
diff --git a/tests/config/test_data.config b/tests/config/test_data.config
index c05e1c8f..d0489e03 100644
--- a/tests/config/test_data.config
+++ b/tests/config/test_data.config
@@ -103,6 +103,7 @@ params {
                 genome_dict                                    = "${test_data_dir}/genomics/homo_sapiens/genome/genome.dict"
                 genome_gff3                                    = "${test_data_dir}/genomics/homo_sapiens/genome/genome.gff3"
                 genome_gtf                                     = "${test_data_dir}/genomics/homo_sapiens/genome/genome.gtf"
+                genome_interval_list                           = "${test_data_dir}/genomics/homo_sapiens/genome/genome.interval_list"
                 genome_sizes                                   = "${test_data_dir}/genomics/homo_sapiens/genome/genome.sizes"
                 genome_bed                                     = "${test_data_dir}/genomics/homo_sapiens/genome/genome.bed"
                 genome_header                                  = "${test_data_dir}/genomics/homo_sapiens/genome/genome.header"
@@ -181,6 +182,7 @@ params {
                 test2_baserecalibrator_table                   = "${test_data_dir}/genomics/homo_sapiens/illumina/gatk/test2.baserecalibrator.table"
                 test_pileups_table                             = "${test_data_dir}/genomics/homo_sapiens/illumina/gatk/test.pileups.table"
                 test2_pileups_table                            = "${test_data_dir}/genomics/homo_sapiens/illumina/gatk/test2.pileups.table"
+                test_genomicsdb_tar_gz                         = "${test_data_dir}/genomics/homo_sapiens/illumina/gatk/test_genomicsdb.tar.gz"
 
                 test_test2_paired_mutect2_calls_vcf_gz         = "${test_data_dir}/genomics/homo_sapiens/illumina/gatk/paired_mutect2_calls/test_test2_paired_mutect2_calls.vcf.gz"
                 test_test2_paired_mutect2_calls_vcf_gz_tbi     = "${test_data_dir}/genomics/homo_sapiens/illumina/gatk/paired_mutect2_calls/test_test2_paired_mutect2_calls.vcf.gz.tbi"
diff --git a/tests/modules/gatk4/genomicsdbimport/main.nf b/tests/modules/gatk4/genomicsdbimport/main.nf
new file mode 100644
index 00000000..ef67b04a
--- /dev/null
+++ b/tests/modules/gatk4/genomicsdbimport/main.nf
@@ -0,0 +1,61 @@
+#!/usr/bin/env nextflow
+
+nextflow.enable.dsl = 2
+
+include { UNTAR           } from '../../../../modules/untar/main.nf'           addParams( options: [:] )
+include { GATK4_GENOMICSDBIMPORT } from '../../../../modules/gatk4/genomicsdbimport/main.nf' addParams( options: [:] )
+
+workflow test_gatk4_genomicsdbimport_create_genomicsdb {
+
+    input = [ [ id:'test_genomicsdb'], // meta map
+              file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_gz'], checkIfExists: true) ,
+              file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_gz_tbi'], checkIfExists: true) ,
+              file(params.test_data['homo_sapiens']['genome']['genome_interval_list'], checkIfExists: true) ,
+              [] ,
+              [] ]
+
+    run_intlist = false
+    run_updatewspace = false
+    input_map = false
+
+    GATK4_GENOMICSDBIMPORT ( input, run_intlist, run_updatewspace, input_map )
+}
+
+workflow test_gatk4_genomicsdbimport_get_intervalslist {
+    db    = file(params.test_data['homo_sapiens']['illumina']['test_genomicsdb_tar_gz'], checkIfExists: true)
+
+    UNTAR ( db )
+
+    def input = Channel.of([ [ id:'test_genomicsdb'], // meta map
+              [] ,
+              [] ,
+              [] ,
+              [] ])
+              .combine(UNTAR.out.untar)
+
+    run_intlist = true
+    run_updatewspace = false
+    input_map = false
+
+    GATK4_GENOMICSDBIMPORT ( input, run_intlist, run_updatewspace, input_map )
+}
+
+workflow test_gatk4_genomicsdbimport_update_genomicsdb {
+    db    = file(params.test_data['homo_sapiens']['illumina']['test_genomicsdb_tar_gz'], checkIfExists: true)
+
+    UNTAR ( db )
+
+    def input = Channel.of([ [ id:'test_genomicsdb'], // meta map
+              file( params.test_data['homo_sapiens']['illumina']['test2_genome_vcf_gz'] , checkIfExists: true) ,
+              file( params.test_data['homo_sapiens']['illumina']['test2_genome_vcf_gz_tbi'] , checkIfExists: true) ,
+              [] ,
+              [] ])
+              .combine(UNTAR.out.untar)
+
+    run_intlist = false
+    run_updatewspace = true
+    input_map = false
+
+    GATK4_GENOMICSDBIMPORT ( input, run_intlist, run_updatewspace, input_map )
+
+}
diff --git a/tests/modules/gatk4/genomicsdbimport/test.yml b/tests/modules/gatk4/genomicsdbimport/test.yml
new file mode 100644
index 00000000..68f5ae7a
--- /dev/null
+++ b/tests/modules/gatk4/genomicsdbimport/test.yml
@@ -0,0 +1,50 @@
+- name: gatk4 genomicsdbimport test_gatk4_genomicsdbimport_create_genomicsdb
+  command: nextflow run tests/modules/gatk4/genomicsdbimport -entry test_gatk4_genomicsdbimport_create_genomicsdb -c tests/config/nextflow.config
+  tags:
+    - gatk4/genomicsdbimport
+    - gatk4
+  files:
+    - path: output/gatk4/test_genomicsdb/__tiledb_workspace.tdb
+      md5sum: d41d8cd98f00b204e9800998ecf8427e
+    - path: output/gatk4/test_genomicsdb/callset.json
+      md5sum: a7d07d1c86449bbb1091ff29368da07a
+    - path: output/gatk4/test_genomicsdb/chr22$1$40001/.__consolidation_lock
+      md5sum: d41d8cd98f00b204e9800998ecf8427e
+    - path: output/gatk4/test_genomicsdb/chr22$1$40001/__array_schema.tdb
+    - path: output/gatk4/test_genomicsdb/chr22$1$40001/genomicsdb_meta_dir/genomicsdb_column_bounds.json
+      md5sum: 2502f79658bc000578ebcfddfc1194c0
+    - path: output/gatk4/test_genomicsdb/vcfheader.vcf
+      contains: 
+        - "FORMAT=<ID=AD,Number=R,Type=Integer,Description="
+    - path: output/gatk4/test_genomicsdb/vidmap.json
+      md5sum: 18d3f68bd2cb6f4474990507ff95017a
+
+- name: gatk4 genomicsdbimport test_gatk4_genomicsdbimport_get_intervalslist
+  command: nextflow run tests/modules/gatk4/genomicsdbimport -entry test_gatk4_genomicsdbimport_get_intervalslist -c tests/config/nextflow.config
+  tags:
+    - gatk4/genomicsdbimport
+    - gatk4
+  files:
+    - path: output/gatk4/test_genomicsdb.interval_list
+      md5sum: 4c85812ac15fc1cd29711a851d23c0bf
+
+- name: gatk4 genomicsdbimport test_gatk4_genomicsdbimport_update_genomicsdb
+  command: nextflow run tests/modules/gatk4/genomicsdbimport -entry test_gatk4_genomicsdbimport_update_genomicsdb -c tests/config/nextflow.config
+  tags:
+    - gatk4/genomicsdbimport
+    - gatk4
+  files:
+    - path: output/gatk4/test_genomicsdb/__tiledb_workspace.tdb
+      md5sum: d41d8cd98f00b204e9800998ecf8427e
+    - path: output/gatk4/test_genomicsdb/callset.json
+      md5sum: 1ea31b59b9a218dd5681164aff4a5e07
+    - path: output/gatk4/test_genomicsdb/chr22$1$40001/.__consolidation_lock
+      md5sum: d41d8cd98f00b204e9800998ecf8427e
+    - path: output/gatk4/test_genomicsdb/chr22$1$40001/__array_schema.tdb
+      md5sum: 35e7fdd18149be80c0c3f56fa1f23971
+    - path: output/gatk4/test_genomicsdb/chr22$1$40001/genomicsdb_meta_dir/genomicsdb_column_bounds.json
+      md5sum: 2502f79658bc000578ebcfddfc1194c0
+    - path: output/gatk4/test_genomicsdb/vcfheader.vcf
+      md5sum: 47a615385a49f9261e088104b903bb9b
+    - path: output/gatk4/test_genomicsdb/vidmap.json
+      md5sum: 18d3f68bd2cb6f4474990507ff95017a

From bc8697b7664eea2df22cb6a73e65d3c37ecbd221 Mon Sep 17 00:00:00 2001
From: GCJMackenzie <43276267+GCJMackenzie@users.noreply.github.com>
Date: Fri, 29 Oct 2021 17:45:32 +0100
Subject: [PATCH 06/15] FIX: Createsomaticpanelofnormals add requested test
 changes (#983)

* files created for createsompon, script written, meta written, still needs tests

* updated to 2.0.0 method input, however this requires a genomicsDB input now

* script finished, meta yaml updated. Tests working locally, test yaml made, needs genomicsdb example on nf-core to run repository tests

* versions updated, issue with test data not able to download directory

* updated tests to include repo-side data

* Apply suggestions from code review

* Update modules/gatk4/createsomaticpanelofnormals/main.nf

* temp commit to allow checkout

* updated createsompon tests to use tarred gendb

* resolve conflict

* Update tests/modules/gatk4/createsomaticpanelofnormals/main.nf

Co-authored-by: Harshil Patel <drpatelh@users.noreply.github.com>

Co-authored-by: GCJMackenzie <gavin.mackenzie@nibsc.org>
Co-authored-by: Harshil Patel <drpatelh@users.noreply.github.com>
---
 .../gatk4/createsomaticpanelofnormals/main.nf | 61 ++-----------------
 1 file changed, 5 insertions(+), 56 deletions(-)

diff --git a/tests/modules/gatk4/createsomaticpanelofnormals/main.nf b/tests/modules/gatk4/createsomaticpanelofnormals/main.nf
index 34fc9847..6e5366f5 100644
--- a/tests/modules/gatk4/createsomaticpanelofnormals/main.nf
+++ b/tests/modules/gatk4/createsomaticpanelofnormals/main.nf
@@ -2,67 +2,16 @@
 
 nextflow.enable.dsl = 2
 
+include { UNTAR           } from '../../../../modules/untar/main.nf'           addParams( options: [:] )
 include { GATK4_CREATESOMATICPANELOFNORMALS } from '../../../../modules/gatk4/createsomaticpanelofnormals/main.nf' addParams( options: [suffix:'.pon'] )
 
 workflow test_gatk4_createsomaticpanelofnormals {
-    maindir = file('test_genomicsdb')
-    subdir1 = file('test_genomicsdb/chr22$1$40001')
-    subdir2 = file('test_genomicsdb/chr22$1$40001/__3cf81648-433d-4464-be08-23d082445c9b139814474716928_1630588248448')
-    subdir3 = file('test_genomicsdb/chr22$1$40001/genomicsdb_meta_dir')
-    subdir2.mkdirs()
-    subdir3.mkdirs()
+    db    = file(params.test_data['homo_sapiens']['illumina']['test_genomicsdb_tar_gz'], checkIfExists: true)
 
-    file( 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/test_genomicsdb/__tiledb_workspace.tdb' , checkIfExists: true).copyTo(maindir)
-    file( 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/test_genomicsdb/vcfheader.vcf' , checkIfExists: true).copyTo(maindir)
-    file( 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/test_genomicsdb/vidmap.json' , checkIfExists: true).copyTo(maindir)
-    file( 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/test_genomicsdb/callset.json' , checkIfExists: true).copyTo(maindir)
-    file( 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/test_genomicsdb/chr22$1$40001/.__consolidation_lock' , checkIfExists: true).copyTo(subdir1)
-    file( 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/test_genomicsdb/chr22$1$40001/__array_schema.tdb' , checkIfExists: true).copyTo(subdir1)
-    file( 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/test_genomicsdb/chr22$1$40001/genomicsdb_meta_dir/genomicsdb_column_bounds.json' , checkIfExists: true).copyTo(subdir3)
-    file( 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/test_genomicsdb/chr22$1$40001/genomicsdb_meta_dir/genomicsdb_meta_2b25a6c2-cb94-4a4a-9005-acb7c595d322.json' , checkIfExists: true).copyTo(subdir3)
-    file( 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/test_genomicsdb/chr22$1$40001/__3cf81648-433d-4464-be08-23d082445c9b139814474716928_1630588248448/AD.tdb' , checkIfExists: true).copyTo(subdir2)
-    file( 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/test_genomicsdb/chr22$1$40001/__3cf81648-433d-4464-be08-23d082445c9b139814474716928_1630588248448/AD_var.tdb' , checkIfExists: true).copyTo(subdir2)
-    file( 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/test_genomicsdb/chr22$1$40001/__3cf81648-433d-4464-be08-23d082445c9b139814474716928_1630588248448/ALT.tdb' , checkIfExists: true).copyTo(subdir2)
-    file( 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/test_genomicsdb/chr22$1$40001/__3cf81648-433d-4464-be08-23d082445c9b139814474716928_1630588248448/ALT_var.tdb' , checkIfExists: true).copyTo(subdir2)
-    file( 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/test_genomicsdb/chr22$1$40001/__3cf81648-433d-4464-be08-23d082445c9b139814474716928_1630588248448/BaseQRankSum.tdb' , checkIfExists: true).copyTo(subdir2)
-    file( 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/test_genomicsdb/chr22$1$40001/__3cf81648-433d-4464-be08-23d082445c9b139814474716928_1630588248448/DB.tdb' , checkIfExists: true).copyTo(subdir2)
-    file( 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/test_genomicsdb/chr22$1$40001/__3cf81648-433d-4464-be08-23d082445c9b139814474716928_1630588248448/DP.tdb' , checkIfExists: true).copyTo(subdir2)
-    file( 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/test_genomicsdb/chr22$1$40001/__3cf81648-433d-4464-be08-23d082445c9b139814474716928_1630588248448/DP_FORMAT.tdb' , checkIfExists: true).copyTo(subdir2)
-    file( 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/test_genomicsdb/chr22$1$40001/__3cf81648-433d-4464-be08-23d082445c9b139814474716928_1630588248448/END.tdb' , checkIfExists: true).copyTo(subdir2)
-    file( 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/test_genomicsdb/chr22$1$40001/__3cf81648-433d-4464-be08-23d082445c9b139814474716928_1630588248448/ExcessHet.tdb' , checkIfExists: true).copyTo(subdir2)
-    file( 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/test_genomicsdb/chr22$1$40001/__3cf81648-433d-4464-be08-23d082445c9b139814474716928_1630588248448/FILTER.tdb' , checkIfExists: true).copyTo(subdir2)
-    file( 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/test_genomicsdb/chr22$1$40001/__3cf81648-433d-4464-be08-23d082445c9b139814474716928_1630588248448/FILTER_var.tdb' , checkIfExists: true).copyTo(subdir2)
-    file( 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/test_genomicsdb/chr22$1$40001/__3cf81648-433d-4464-be08-23d082445c9b139814474716928_1630588248448/GQ.tdb' , checkIfExists: true).copyTo(subdir2)
-    file( 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/test_genomicsdb/chr22$1$40001/__3cf81648-433d-4464-be08-23d082445c9b139814474716928_1630588248448/GT.tdb' , checkIfExists: true).copyTo(subdir2)
-    file( 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/test_genomicsdb/chr22$1$40001/__3cf81648-433d-4464-be08-23d082445c9b139814474716928_1630588248448/GT_var.tdb' , checkIfExists: true).copyTo(subdir2)
-    file( 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/test_genomicsdb/chr22$1$40001/__3cf81648-433d-4464-be08-23d082445c9b139814474716928_1630588248448/ID.tdb' , checkIfExists: true).copyTo(subdir2)
-    file( 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/test_genomicsdb/chr22$1$40001/__3cf81648-433d-4464-be08-23d082445c9b139814474716928_1630588248448/ID_var.tdb' , checkIfExists: true).copyTo(subdir2)
-    file( 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/test_genomicsdb/chr22$1$40001/__3cf81648-433d-4464-be08-23d082445c9b139814474716928_1630588248448/InbreedingCoeff.tdb' , checkIfExists: true).copyTo(subdir2)
-    file( 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/test_genomicsdb/chr22$1$40001/__3cf81648-433d-4464-be08-23d082445c9b139814474716928_1630588248448/MIN_DP.tdb' , checkIfExists: true).copyTo(subdir2)
-    file( 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/test_genomicsdb/chr22$1$40001/__3cf81648-433d-4464-be08-23d082445c9b139814474716928_1630588248448/MLEAC.tdb' , checkIfExists: true).copyTo(subdir2)
-    file( 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/test_genomicsdb/chr22$1$40001/__3cf81648-433d-4464-be08-23d082445c9b139814474716928_1630588248448/MLEAC_var.tdb' , checkIfExists: true).copyTo(subdir2)
-    file( 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/test_genomicsdb/chr22$1$40001/__3cf81648-433d-4464-be08-23d082445c9b139814474716928_1630588248448/MLEAF.tdb' , checkIfExists: true).copyTo(subdir2)
-    file( 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/test_genomicsdb/chr22$1$40001/__3cf81648-433d-4464-be08-23d082445c9b139814474716928_1630588248448/MLEAF_var.tdb' , checkIfExists: true).copyTo(subdir2)
-    file( 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/test_genomicsdb/chr22$1$40001/__3cf81648-433d-4464-be08-23d082445c9b139814474716928_1630588248448/MQRankSum.tdb' , checkIfExists: true).copyTo(subdir2)
-    file( 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/test_genomicsdb/chr22$1$40001/__3cf81648-433d-4464-be08-23d082445c9b139814474716928_1630588248448/PGT.tdb' , checkIfExists: true).copyTo(subdir2)
-    file( 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/test_genomicsdb/chr22$1$40001/__3cf81648-433d-4464-be08-23d082445c9b139814474716928_1630588248448/PGT_var.tdb' , checkIfExists: true).copyTo(subdir2)
-    file( 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/test_genomicsdb/chr22$1$40001/__3cf81648-433d-4464-be08-23d082445c9b139814474716928_1630588248448/PID.tdb' , checkIfExists: true).copyTo(subdir2)
-    file( 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/test_genomicsdb/chr22$1$40001/__3cf81648-433d-4464-be08-23d082445c9b139814474716928_1630588248448/PID_var.tdb' , checkIfExists: true).copyTo(subdir2)
-    file( 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/test_genomicsdb/chr22$1$40001/__3cf81648-433d-4464-be08-23d082445c9b139814474716928_1630588248448/PL.tdb' , checkIfExists: true).copyTo(subdir2)
-    file( 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/test_genomicsdb/chr22$1$40001/__3cf81648-433d-4464-be08-23d082445c9b139814474716928_1630588248448/PL_var.tdb' , checkIfExists: true).copyTo(subdir2)
-    file( 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/test_genomicsdb/chr22$1$40001/__3cf81648-433d-4464-be08-23d082445c9b139814474716928_1630588248448/PS.tdb' , checkIfExists: true).copyTo(subdir2)
-    file( 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/test_genomicsdb/chr22$1$40001/__3cf81648-433d-4464-be08-23d082445c9b139814474716928_1630588248448/QUAL.tdb' , checkIfExists: true).copyTo(subdir2)
-    file( 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/test_genomicsdb/chr22$1$40001/__3cf81648-433d-4464-be08-23d082445c9b139814474716928_1630588248448/RAW_MQandDP.tdb' , checkIfExists: true).copyTo(subdir2)
-    file( 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/test_genomicsdb/chr22$1$40001/__3cf81648-433d-4464-be08-23d082445c9b139814474716928_1630588248448/REF.tdb' , checkIfExists: true).copyTo(subdir2)
-    file( 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/test_genomicsdb/chr22$1$40001/__3cf81648-433d-4464-be08-23d082445c9b139814474716928_1630588248448/REF_var.tdb' , checkIfExists: true).copyTo(subdir2)
-    file( 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/test_genomicsdb/chr22$1$40001/__3cf81648-433d-4464-be08-23d082445c9b139814474716928_1630588248448/ReadPosRankSum.tdb' , checkIfExists: true).copyTo(subdir2)
-    file( 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/test_genomicsdb/chr22$1$40001/__3cf81648-433d-4464-be08-23d082445c9b139814474716928_1630588248448/SB.tdb' , checkIfExists: true).copyTo(subdir2)
-    file( 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/test_genomicsdb/chr22$1$40001/__3cf81648-433d-4464-be08-23d082445c9b139814474716928_1630588248448/__book_keeping.tdb.gz' , checkIfExists: true).copyTo(subdir2)
-    file( 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/test_genomicsdb/chr22$1$40001/__3cf81648-433d-4464-be08-23d082445c9b139814474716928_1630588248448/__coords.tdb' , checkIfExists: true).copyTo(subdir2)
-    file( 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gatk/test_genomicsdb/chr22$1$40001/__3cf81648-433d-4464-be08-23d082445c9b139814474716928_1630588248448/__tiledb_fragment.tdb' , checkIfExists: true).copyTo(subdir2)
+    UNTAR ( db )
 
-    input = [ [ id:'test' ], // meta map
-              file( maindir , checkIfExists: true)]
+    input = Channel.of([ id:'test'])
+              .combine(UNTAR.out.untar)
 
     fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
     fastaidx = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)

From 9fb26ae46248c33ad858d79a36beb07912c85a89 Mon Sep 17 00:00:00 2001
From: Harshil Patel <drpatelh@users.noreply.github.com>
Date: Sat, 30 Oct 2021 09:52:13 +0100
Subject: [PATCH 07/15] Add IDR module (#908)

* Add IDR module

* Add meta and implement main todos

* Modifying idr tests

* Update tests/config/test_data.config

Co-authored-by: Harshil Patel <drpatelh@users.noreply.github.com>

* Update tests/config/test_data.config

Co-authored-by: Harshil Patel <drpatelh@users.noreply.github.com>

* Update main.nf

* Update tests/config/test_data.config

Co-authored-by: Harshil Patel <drpatelh@users.noreply.github.com>

* Update test with new file name

Co-authored-by: Jose Espinosa-Carrasco <kadomu@gmail.com>
---
 modules/idr/functions.nf        | 78 +++++++++++++++++++++++++++++++++
 modules/idr/main.nf             | 56 +++++++++++++++++++++++
 modules/idr/meta.yml            | 53 ++++++++++++++++++++++
 tests/config/pytest_modules.yml |  4 ++
 tests/config/test_data.config   |  6 +++
 tests/modules/idr/main.nf       | 35 +++++++++++++++
 tests/modules/idr/test.yml      | 35 +++++++++++++++
 7 files changed, 267 insertions(+)
 create mode 100644 modules/idr/functions.nf
 create mode 100644 modules/idr/main.nf
 create mode 100644 modules/idr/meta.yml
 create mode 100644 tests/modules/idr/main.nf
 create mode 100644 tests/modules/idr/test.yml

diff --git a/modules/idr/functions.nf b/modules/idr/functions.nf
new file mode 100644
index 00000000..85628ee0
--- /dev/null
+++ b/modules/idr/functions.nf
@@ -0,0 +1,78 @@
+//
+//  Utility functions used in nf-core DSL2 module files
+//
+
+//
+// Extract name of software tool from process name using $task.process
+//
+def getSoftwareName(task_process) {
+    return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()
+}
+
+//
+// Extract name of module from process name using $task.process
+//
+def getProcessName(task_process) {
+    return task_process.tokenize(':')[-1]
+}
+
+//
+// Function to initialise default values and to generate a Groovy Map of available options for nf-core modules
+//
+def initOptions(Map args) {
+    def Map options = [:]
+    options.args            = args.args ?: ''
+    options.args2           = args.args2 ?: ''
+    options.args3           = args.args3 ?: ''
+    options.publish_by_meta = args.publish_by_meta ?: []
+    options.publish_dir     = args.publish_dir ?: ''
+    options.publish_files   = args.publish_files
+    options.suffix          = args.suffix ?: ''
+    return options
+}
+
+//
+// Tidy up and join elements of a list to return a path string
+//
+def getPathFromList(path_list) {
+    def paths = path_list.findAll { item -> !item?.trim().isEmpty() }      // Remove empty entries
+    paths     = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes
+    return paths.join('/')
+}
+
+//
+// Function to save/publish module results
+//
+def saveFiles(Map args) {
+    def ioptions  = initOptions(args.options)
+    def path_list = [ ioptions.publish_dir ?: args.publish_dir ]
+
+    // Do not publish versions.yml unless running from pytest workflow
+    if (args.filename.equals('versions.yml') && !System.getenv("NF_CORE_MODULES_TEST")) {
+        return null
+    }
+    if (ioptions.publish_by_meta) {
+        def key_list = ioptions.publish_by_meta instanceof List ? ioptions.publish_by_meta : args.publish_by_meta
+        for (key in key_list) {
+            if (args.meta && key instanceof String) {
+                def path = key
+                if (args.meta.containsKey(key)) {
+                    path = args.meta[key] instanceof Boolean ? "${key}_${args.meta[key]}".toString() : args.meta[key]
+                }
+                path = path instanceof String ? path : ''
+                path_list.add(path)
+            }
+        }
+    }
+    if (ioptions.publish_files instanceof Map) {
+        for (ext in ioptions.publish_files) {
+            if (args.filename.endsWith(ext.key)) {
+                def ext_list = path_list.collect()
+                ext_list.add(ext.value)
+                return "${getPathFromList(ext_list)}/$args.filename"
+            }
+        }
+    } else if (ioptions.publish_files == null) {
+        return "${getPathFromList(path_list)}/$args.filename"
+    }
+}
diff --git a/modules/idr/main.nf b/modules/idr/main.nf
new file mode 100644
index 00000000..006826ac
--- /dev/null
+++ b/modules/idr/main.nf
@@ -0,0 +1,56 @@
+// Import generic module functions
+include { initOptions; saveFiles; getSoftwareName; getProcessName } from './functions'
+
+params.options = [:]
+options        = initOptions(params.options)
+
+process IDR {
+    tag "$prefix"
+    label 'process_low'
+    publishDir "${params.outdir}",
+        mode: params.publish_dir_mode,
+        saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:[:], publish_by_meta:[]) }
+
+    conda (params.enable_conda ? "bioconda::idr=2.0.4.2" : null)
+    if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
+        container "https://depot.galaxyproject.org/singularity/idr:2.0.4.2--py39hcbe4a3b_5"
+    } else {
+        container "quay.io/biocontainers/idr:2.0.4.2--py38h9af456f_5"
+    }
+
+    input:
+    path peaks
+    val peak_type
+    val prefix
+
+    output:
+    path "*idrValues.txt", emit: idr
+    path "*log.txt"      , emit: log
+    path "*.png"         , emit: png
+    path "versions.yml"  , emit: versions
+
+    script:
+    if (peaks.toList().size < 2) {
+        log.error "[ERROR] idr needs at least two replicates only one provided."
+    }
+    def peak_types = ['narrowPeak', 'broadPeak', 'bed']
+    if (!peak_types.contains(peak_type)) {
+        log.error "[ERROR] Invalid option: '${peak_type}'. Valid options for 'peak_type': ${peak_types.join(', ')}."
+    }
+    def idr_vals = prefix ? "${prefix}.idrValues.txt" : "idrValues.txt"
+    def log_file = prefix ? "${prefix}.log.txt" : "log.txt"
+    """
+    idr \\
+        --samples $peaks \\
+        --input-file-type $peak_type \\
+        --output-file $idr_vals \\
+        --log-output-file $log_file \\
+        --plot \\
+        $options.args
+
+    cat <<-END_VERSIONS > versions.yml
+    ${getProcessName(task.process)}:
+        ${getSoftwareName(task.process)}: \$(echo \$(idr --version 2>&1) | sed 's/^.*IDR //; s/ .*\$//')
+    END_VERSIONS
+    """
+}
diff --git a/modules/idr/meta.yml b/modules/idr/meta.yml
new file mode 100644
index 00000000..c89e72a4
--- /dev/null
+++ b/modules/idr/meta.yml
@@ -0,0 +1,53 @@
+name: idr
+description: |
+  Measures reproducibility of ChIP-seq, ATAC-seq peaks using IDR (Irreproducible
+  Discovery Rate)
+keywords:
+  - IDR
+  - peaks
+  - ChIP-seq
+  - ATAC-seq
+tools:
+  - idr:
+      description: |
+        The IDR (Irreproducible Discovery Rate) framework is a uniﬁed approach
+        to measure the reproducibility of ﬁndings identiﬁed from replicate
+        experiments and provide highly stable thresholds based on reproducibility.
+      homepage: None
+      documentation: None
+      tool_dev_url: https://github.com/kundajelab/idr
+      doi: ""
+      licence: ['GPL v2']
+input:
+  - peaks:
+      type: tuple of two files
+      description: BED, narrowPeak or broadPeak files of replicates
+      pattern: "*"
+  - peak_type:
+      type: value
+      description: Type of peak file
+      pattern: "{narrowPeak,broadPeak,bed}"
+  - prefix:
+      type: value
+      description: Prefix for output files
+output:
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+  - idr:
+      type: file
+      description: Text file containing IDR values
+      pattern: "*.{txt}"
+  - log:
+      type: file
+      description: Log file
+      pattern: "*.{txt}"
+  - png:
+      type: file
+      description: Plot generated by idr
+      pattern: "*{.png}"
+
+authors:
+  - "@drpatelh"
+  - "@joseespinosa"
diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml
index db10b55b..ea999b6a 100644
--- a/tests/config/pytest_modules.yml
+++ b/tests/config/pytest_modules.yml
@@ -597,6 +597,10 @@ homer/makeucscfile:
   - modules/homer/makeucscfile/**
   - tests/modules/homer/makeucscfile/**
 
+idr:
+  - modules/idr/**
+  - tests/modules/idr/**
+
 iqtree:
   - modules/iqtree/**
   - tests/modules/iqtree/**
diff --git a/tests/config/test_data.config b/tests/config/test_data.config
index d0489e03..12252542 100644
--- a/tests/config/test_data.config
+++ b/tests/config/test_data.config
@@ -202,6 +202,12 @@ params {
                 test2_genome_vcf_gz_tbi                        = "${test_data_dir}/genomics/homo_sapiens/illumina/gvcf/test2.genome.vcf.gz.tbi"
                 test2_genome_vcf_idx                           = "${test_data_dir}/genomics/homo_sapiens/illumina/gvcf/test2.genome.vcf.idx"
 
+                test_broadpeak                                 = "${test_data_dir}/genomics/homo_sapiens/illumina/broadpeak/test.broadPeak"
+                test2_broadpeak                                = "${test_data_dir}/genomics/homo_sapiens/illumina/broadpeak/test2.broadPeak"
+
+                test_narrowpeak                                = "${test_data_dir}/genomics/homo_sapiens/illumina/narrowpeak/test.narrowPeak"
+                test2_narrowpeak                               = "${test_data_dir}/genomics/homo_sapiens/illumina/narrowpeak/test2.narrowPeak"
+
                 test_10x_1_fastq_gz                           = "${test_data_dir}/genomics/homo_sapiens/illumina/10xgenomics/test.10x_1.fastq.gz"
                 test_10x_2_fastq_gz                           = "${test_data_dir}/genomics/homo_sapiens/illumina/10xgenomics/test.10x_2.fastq.gz"
 
diff --git a/tests/modules/idr/main.nf b/tests/modules/idr/main.nf
new file mode 100644
index 00000000..aa141a57
--- /dev/null
+++ b/tests/modules/idr/main.nf
@@ -0,0 +1,35 @@
+#!/usr/bin/env nextflow
+
+nextflow.enable.dsl = 2
+
+include { IDR } from '../../../modules/idr/main.nf' addParams( options: [:] )
+
+workflow test_idr_narrowpeak {
+
+    input = [
+        file(params.test_data['homo_sapiens']['illumina']['test_narrowpeak'], checkIfExists: true),
+        file(params.test_data['homo_sapiens']['illumina']['test2_narrowpeak'], checkIfExists: true)
+    ]
+
+    IDR ( input, 'narrowPeak', 'test' )
+}
+
+workflow test_idr_broadpeak {
+
+    input = [
+        file(params.test_data['homo_sapiens']['illumina']['test_broadpeak'], checkIfExists: true),
+        file(params.test_data['homo_sapiens']['illumina']['test2_broadpeak'], checkIfExists: true)
+    ]
+
+    IDR ( input, 'broadPeak', 'test' )
+}
+
+workflow test_idr_noprefix {
+
+    input = [
+        file(params.test_data['homo_sapiens']['illumina']['test_narrowpeak'], checkIfExists: true),
+        file(params.test_data['homo_sapiens']['illumina']['test2_narrowpeak'], checkIfExists: true)
+    ]
+
+    IDR ( input, 'narrowPeak', '' )
+}
diff --git a/tests/modules/idr/test.yml b/tests/modules/idr/test.yml
new file mode 100644
index 00000000..35ee4bc9
--- /dev/null
+++ b/tests/modules/idr/test.yml
@@ -0,0 +1,35 @@
+- name: idr test_idr_narrowpeak
+  command: nextflow run tests/modules/idr -entry test_idr_narrowpeak -c tests/config/nextflow.config
+  tags:
+    - idr
+  files:
+    - path: output/idr/test.idrValues.txt
+      md5sum: 09be837cc6abbc3eb5958b74802eea55
+    - path: output/idr/test.idrValues.txt.png
+      md5sum: 4a7143ccc0ccadb37c2317bf626e6d96
+    - path: output/idr/test.log.txt
+      md5sum: 6443507ac66b9d3b64bc56b78328083e
+
+- name: idr test_idr_broadpeak
+  command: nextflow run tests/modules/idr -entry test_idr_broadpeak -c tests/config/nextflow.config
+  tags:
+    - idr
+  files:
+    - path: output/idr/test.idrValues.txt
+      md5sum: 387441c716815e4caec3e70a2cc11a4a
+    - path: output/idr/test.idrValues.txt.png
+      md5sum: 7204083ca5b920b4215a5991c12cb4e7
+    - path: output/idr/test.log.txt
+      md5sum: e6917133112b5cec135c182ffac19237
+
+- name: idr test_idr_noprefix
+  command: nextflow run tests/modules/idr -entry test_idr_noprefix -c tests/config/nextflow.config
+  tags:
+    - idr
+  files:
+    - path: output/idr/idrValues.txt
+      md5sum: 09be837cc6abbc3eb5958b74802eea55
+    - path: output/idr/idrValues.txt.png
+      md5sum: 4a7143ccc0ccadb37c2317bf626e6d96
+    - path: output/idr/log.txt
+      md5sum: 6443507ac66b9d3b64bc56b78328083e

From 81d65d4978a988b20ac21abf6a99c1baf936be65 Mon Sep 17 00:00:00 2001
From: Simon Pearce <24893913+SPPearce@users.noreply.github.com>
Date: Mon, 1 Nov 2021 12:12:14 +0000
Subject: [PATCH 08/15] Samtools fixmate module (#991)

* Samtools fixmate module

* Update modules/samtools/fixmate/meta.yml

Co-authored-by: James A. Fellows Yates <jfy133@gmail.com>

* Update modules/samtools/fixmate/main.nf

Co-authored-by: James A. Fellows Yates <jfy133@gmail.com>

* Update main.nf

* Update modules/samtools/fixmate/meta.yml

Co-authored-by: James A. Fellows Yates <jfy133@gmail.com>

* Update meta.yml

Co-authored-by: Simon Pearce <simon.pearce@cruk.manchester.ac.uk>
Co-authored-by: James A. Fellows Yates <jfy133@gmail.com>
---
 modules/samtools/fixmate/functions.nf   | 78 +++++++++++++++++++++++++
 modules/samtools/fixmate/main.nf        | 45 ++++++++++++++
 modules/samtools/fixmate/meta.yml       | 49 ++++++++++++++++
 tests/config/pytest_modules.yml         |  4 ++
 tests/modules/samtools/fixmate/main.nf  | 14 +++++
 tests/modules/samtools/fixmate/test.yml |  8 +++
 6 files changed, 198 insertions(+)
 create mode 100644 modules/samtools/fixmate/functions.nf
 create mode 100644 modules/samtools/fixmate/main.nf
 create mode 100644 modules/samtools/fixmate/meta.yml
 create mode 100644 tests/modules/samtools/fixmate/main.nf
 create mode 100644 tests/modules/samtools/fixmate/test.yml

diff --git a/modules/samtools/fixmate/functions.nf b/modules/samtools/fixmate/functions.nf
new file mode 100644
index 00000000..85628ee0
--- /dev/null
+++ b/modules/samtools/fixmate/functions.nf
@@ -0,0 +1,78 @@
+//
+//  Utility functions used in nf-core DSL2 module files
+//
+
+//
+// Extract name of software tool from process name using $task.process
+//
+def getSoftwareName(task_process) {
+    return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()
+}
+
+//
+// Extract name of module from process name using $task.process
+//
+def getProcessName(task_process) {
+    return task_process.tokenize(':')[-1]
+}
+
+//
+// Function to initialise default values and to generate a Groovy Map of available options for nf-core modules
+//
+def initOptions(Map args) {
+    def Map options = [:]
+    options.args            = args.args ?: ''
+    options.args2           = args.args2 ?: ''
+    options.args3           = args.args3 ?: ''
+    options.publish_by_meta = args.publish_by_meta ?: []
+    options.publish_dir     = args.publish_dir ?: ''
+    options.publish_files   = args.publish_files
+    options.suffix          = args.suffix ?: ''
+    return options
+}
+
+//
+// Tidy up and join elements of a list to return a path string
+//
+def getPathFromList(path_list) {
+    def paths = path_list.findAll { item -> !item?.trim().isEmpty() }      // Remove empty entries
+    paths     = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes
+    return paths.join('/')
+}
+
+//
+// Function to save/publish module results
+//
+def saveFiles(Map args) {
+    def ioptions  = initOptions(args.options)
+    def path_list = [ ioptions.publish_dir ?: args.publish_dir ]
+
+    // Do not publish versions.yml unless running from pytest workflow
+    if (args.filename.equals('versions.yml') && !System.getenv("NF_CORE_MODULES_TEST")) {
+        return null
+    }
+    if (ioptions.publish_by_meta) {
+        def key_list = ioptions.publish_by_meta instanceof List ? ioptions.publish_by_meta : args.publish_by_meta
+        for (key in key_list) {
+            if (args.meta && key instanceof String) {
+                def path = key
+                if (args.meta.containsKey(key)) {
+                    path = args.meta[key] instanceof Boolean ? "${key}_${args.meta[key]}".toString() : args.meta[key]
+                }
+                path = path instanceof String ? path : ''
+                path_list.add(path)
+            }
+        }
+    }
+    if (ioptions.publish_files instanceof Map) {
+        for (ext in ioptions.publish_files) {
+            if (args.filename.endsWith(ext.key)) {
+                def ext_list = path_list.collect()
+                ext_list.add(ext.value)
+                return "${getPathFromList(ext_list)}/$args.filename"
+            }
+        }
+    } else if (ioptions.publish_files == null) {
+        return "${getPathFromList(path_list)}/$args.filename"
+    }
+}
diff --git a/modules/samtools/fixmate/main.nf b/modules/samtools/fixmate/main.nf
new file mode 100644
index 00000000..e1a766a1
--- /dev/null
+++ b/modules/samtools/fixmate/main.nf
@@ -0,0 +1,45 @@
+// Import generic module functions
+include { initOptions; saveFiles; getSoftwareName; getProcessName } from './functions'
+
+params.options = [:]
+options        = initOptions(params.options)
+
+process SAMTOOLS_FIXMATE {
+    tag "$meta.id"
+    label 'process_low'
+    publishDir "${params.outdir}",
+        mode: params.publish_dir_mode,
+        saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:meta, publish_by_meta:['id']) }
+
+    conda (params.enable_conda ? "bioconda::samtools=1.14" : null)
+    if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
+        container "https://depot.galaxyproject.org/singularity/samtools:1.14--hb421002_0"
+    } else {
+        container "quay.io/biocontainers/samtools:1.14--hb421002_0"
+    }
+
+    input:
+    tuple val(meta), path(bam)
+
+    output:
+    tuple val(meta), path("*.bam"), emit: bam
+    path "versions.yml"           , emit: versions
+
+    script:
+    def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}"
+    if ("$bam" == "${prefix}.bam") error "Input and output names are the same, use the suffix option to disambiguate!"
+
+    """
+    samtools \\
+        fixmate  \\
+        $options.args \\
+        -@ $task.cpus \\
+        $bam \\
+        ${prefix}.bam \\
+
+    cat <<-END_VERSIONS > versions.yml
+    ${getProcessName(task.process)}:
+        ${getSoftwareName(task.process)}: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
+    END_VERSIONS
+    """
+}
diff --git a/modules/samtools/fixmate/meta.yml b/modules/samtools/fixmate/meta.yml
new file mode 100644
index 00000000..2cec6e7c
--- /dev/null
+++ b/modules/samtools/fixmate/meta.yml
@@ -0,0 +1,49 @@
+name: samtools_fixmate
+description: Samtools fixmate is a tool that can fill in information (insert size, cigar, mapq) about paired end reads onto the corresponding other read. Also has options to remove secondary/unmapped alignments and recalculate whether reads are proper pairs.
+keywords:
+    - fixmate
+    - samtools
+    - insert size
+    - repair
+    - bam
+    - paired
+    - read pairs
+tools:
+    - samtools:
+        description: |
+            SAMtools is a set of utilities for interacting with and post-processing
+            short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li.
+            These files are generated as output by short read aligners like BWA.
+        homepage: http://www.htslib.org/
+        documentation: http://www.htslib.org/doc/samtools.html
+        tool_dev_url: https://github.com/samtools/samtools
+        doi: 10.1093/bioinformatics/btp352
+        licence: ['MIT']
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - bam:
+      type: file
+      description: BAM/CRAM/SAM file, must be sorted by name, not coordinate
+      pattern: "*.{bam,cram,sam}"
+
+output:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+  - bam:
+      type: file
+      description: A BAM/CRAM/SAM file with mate information added and/or proper pairs recalled
+      pattern: "*.{bam,cram,sam}"
+
+authors:
+  - "@sppearce"
diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml
index ea999b6a..6ec2d506 100644
--- a/tests/config/pytest_modules.yml
+++ b/tests/config/pytest_modules.yml
@@ -1027,6 +1027,10 @@ samtools/fastq:
   - modules/samtools/fastq/**
   - tests/modules/samtools/fastq/**
 
+samtools/fixmate:
+  - modules/samtools/fixmate/**
+  - tests/modules/samtools/fixmate/**
+
 samtools/flagstat:
   - modules/samtools/flagstat/**
   - tests/modules/samtools/flagstat/**
diff --git a/tests/modules/samtools/fixmate/main.nf b/tests/modules/samtools/fixmate/main.nf
new file mode 100644
index 00000000..5174beab
--- /dev/null
+++ b/tests/modules/samtools/fixmate/main.nf
@@ -0,0 +1,14 @@
+#!/usr/bin/env nextflow
+
+nextflow.enable.dsl = 2
+
+include { SAMTOOLS_FIXMATE } from '../../../../modules/samtools/fixmate/main.nf' addParams( options: [args:'-r -c -m'] )
+
+workflow test_samtools_fixmate {
+
+    input = [ [ id:'test', single_end:false ], // meta map
+              file(params.test_data['sarscov2']['illumina']['test_paired_end_bam'], checkIfExists: true) ]
+
+    SAMTOOLS_FIXMATE ( input )
+
+}
diff --git a/tests/modules/samtools/fixmate/test.yml b/tests/modules/samtools/fixmate/test.yml
new file mode 100644
index 00000000..c7864c04
--- /dev/null
+++ b/tests/modules/samtools/fixmate/test.yml
@@ -0,0 +1,8 @@
+- name: samtools fixmate test_samtools_fixmate
+  command: nextflow run tests/modules/samtools/fixmate -entry test_samtools_fixmate -c tests/config/nextflow.config
+  tags:
+    - samtools/fixmate
+    - samtools
+  files:
+    - path: output/samtools/test.bam
+      md5sum: 92c8463710cdcaef2010aa02ed9e01fd

From cac6dc83bb09d7f33640692fa37d4cb9087abf00 Mon Sep 17 00:00:00 2001
From: Simon Pearce <24893913+SPPearce@users.noreply.github.com>
Date: Tue, 2 Nov 2021 09:59:25 +0000
Subject: [PATCH 09/15] gccounter module for hmmcopy (#1003)

* hmmcopy gccounter working

* Update modules/hmmcopy/gccounter/main.nf

Co-authored-by: Chris Cheshire <chris.j.cheshire@gmail.com>

* Update main.nf

Changed version to 0.1.1 as the container says

Co-authored-by: Simon Pearce <simon.pearce@cruk.manchester.ac.uk>
Co-authored-by: Chris Cheshire <chris.j.cheshire@gmail.com>
---
 modules/hmmcopy/gccounter/functions.nf   | 78 ++++++++++++++++++++++++
 modules/hmmcopy/gccounter/main.nf        | 40 ++++++++++++
 modules/hmmcopy/gccounter/meta.yml       | 33 ++++++++++
 tests/config/pytest_modules.yml          |  4 ++
 tests/modules/hmmcopy/gccounter/main.nf  | 11 ++++
 tests/modules/hmmcopy/gccounter/test.yml |  8 +++
 6 files changed, 174 insertions(+)
 create mode 100644 modules/hmmcopy/gccounter/functions.nf
 create mode 100644 modules/hmmcopy/gccounter/main.nf
 create mode 100644 modules/hmmcopy/gccounter/meta.yml
 create mode 100644 tests/modules/hmmcopy/gccounter/main.nf
 create mode 100644 tests/modules/hmmcopy/gccounter/test.yml

diff --git a/modules/hmmcopy/gccounter/functions.nf b/modules/hmmcopy/gccounter/functions.nf
new file mode 100644
index 00000000..85628ee0
--- /dev/null
+++ b/modules/hmmcopy/gccounter/functions.nf
@@ -0,0 +1,78 @@
+//
+//  Utility functions used in nf-core DSL2 module files
+//
+
+//
+// Extract name of software tool from process name using $task.process
+//
+def getSoftwareName(task_process) {
+    return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()
+}
+
+//
+// Extract name of module from process name using $task.process
+//
+def getProcessName(task_process) {
+    return task_process.tokenize(':')[-1]
+}
+
+//
+// Function to initialise default values and to generate a Groovy Map of available options for nf-core modules
+//
+def initOptions(Map args) {
+    def Map options = [:]
+    options.args            = args.args ?: ''
+    options.args2           = args.args2 ?: ''
+    options.args3           = args.args3 ?: ''
+    options.publish_by_meta = args.publish_by_meta ?: []
+    options.publish_dir     = args.publish_dir ?: ''
+    options.publish_files   = args.publish_files
+    options.suffix          = args.suffix ?: ''
+    return options
+}
+
+//
+// Tidy up and join elements of a list to return a path string
+//
+def getPathFromList(path_list) {
+    def paths = path_list.findAll { item -> !item?.trim().isEmpty() }      // Remove empty entries
+    paths     = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes
+    return paths.join('/')
+}
+
+//
+// Function to save/publish module results
+//
+def saveFiles(Map args) {
+    def ioptions  = initOptions(args.options)
+    def path_list = [ ioptions.publish_dir ?: args.publish_dir ]
+
+    // Do not publish versions.yml unless running from pytest workflow
+    if (args.filename.equals('versions.yml') && !System.getenv("NF_CORE_MODULES_TEST")) {
+        return null
+    }
+    if (ioptions.publish_by_meta) {
+        def key_list = ioptions.publish_by_meta instanceof List ? ioptions.publish_by_meta : args.publish_by_meta
+        for (key in key_list) {
+            if (args.meta && key instanceof String) {
+                def path = key
+                if (args.meta.containsKey(key)) {
+                    path = args.meta[key] instanceof Boolean ? "${key}_${args.meta[key]}".toString() : args.meta[key]
+                }
+                path = path instanceof String ? path : ''
+                path_list.add(path)
+            }
+        }
+    }
+    if (ioptions.publish_files instanceof Map) {
+        for (ext in ioptions.publish_files) {
+            if (args.filename.endsWith(ext.key)) {
+                def ext_list = path_list.collect()
+                ext_list.add(ext.value)
+                return "${getPathFromList(ext_list)}/$args.filename"
+            }
+        }
+    } else if (ioptions.publish_files == null) {
+        return "${getPathFromList(path_list)}/$args.filename"
+    }
+}
diff --git a/modules/hmmcopy/gccounter/main.nf b/modules/hmmcopy/gccounter/main.nf
new file mode 100644
index 00000000..6e7bc11f
--- /dev/null
+++ b/modules/hmmcopy/gccounter/main.nf
@@ -0,0 +1,40 @@
+// Import generic module functions
+include { initOptions; saveFiles; getSoftwareName; getProcessName } from './functions'
+
+params.options = [:]
+options        = initOptions(params.options)
+
+def VERSION = '0.1.1'
+
+process HMMCOPY_GCCOUNTER {
+    label 'process_low'
+    publishDir "${params.outdir}",
+        mode: params.publish_dir_mode,
+        saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:[:], publish_by_meta:[]) }
+
+    conda (params.enable_conda ? "bioconda::hmmcopy=0.1.1" : null)
+    if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
+        container "https://depot.galaxyproject.org/singularity/hmmcopy:0.1.1--h2e03b76_5"
+    } else {
+        container "quay.io/biocontainers/hmmcopy:0.1.1--h2e03b76_5"
+    }
+
+    input:
+    path fasta
+
+    output:
+    path "*.gc.wig"    , emit: wig
+    path "versions.yml", emit: versions
+
+    script:
+    """
+    gcCounter \\
+        $options.args \\
+        ${fasta} > ${fasta.baseName}.gc.wig
+
+    cat <<-END_VERSIONS > versions.yml
+    ${getProcessName(task.process)}:
+        ${getSoftwareName(task.process)}: \$(echo $VERSION)
+    END_VERSIONS
+    """
+}
diff --git a/modules/hmmcopy/gccounter/meta.yml b/modules/hmmcopy/gccounter/meta.yml
new file mode 100644
index 00000000..71727af2
--- /dev/null
+++ b/modules/hmmcopy/gccounter/meta.yml
@@ -0,0 +1,33 @@
+name: hmmcopy_gccounter
+description: gcCounter function from HMMcopy utilities, used to generate GC content in non-overlapping windows from a fasta reference
+keywords:
+  - hmmcopy
+  - gccounter
+  - cnv
+tools:
+  - hmmcopy:
+      description: C++ based programs for analyzing BAM files and preparing read counts -- used with bioconductor-hmmcopy
+      homepage: https://github.com/shahcompbio/hmmcopy_utils
+      documentation: https://github.com/shahcompbio/hmmcopy_utils
+      tool_dev_url: https://github.com/shahcompbio/hmmcopy_utils
+      doi: ""
+      licence: ['GPL v3']
+
+input:
+  - fasta:
+      type: file
+      description: Input genome fasta file
+
+
+output:
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+  - wig:
+      type: file
+      description: wig file containing gc content of each window of the genome
+      pattern: "*.{gc.wig}"
+
+authors:
+  - "@sppearce"
diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml
index 6ec2d506..4e4526b1 100644
--- a/tests/config/pytest_modules.yml
+++ b/tests/config/pytest_modules.yml
@@ -577,6 +577,10 @@ hisat2/extractsplicesites:
   - modules/hisat2/extractsplicesites/**
   - tests/modules/hisat2/extractsplicesites/**
 
+hmmcopy/gccounter:
+  - modules/hmmcopy/gccounter/**
+  - tests/modules/hmmcopy/gccounter/**
+
 hmmer/hmmalign:
   - modules/hmmer/hmmalign/**
   - tests/modules/hmmer/hmmalign/**
diff --git a/tests/modules/hmmcopy/gccounter/main.nf b/tests/modules/hmmcopy/gccounter/main.nf
new file mode 100644
index 00000000..30846ca9
--- /dev/null
+++ b/tests/modules/hmmcopy/gccounter/main.nf
@@ -0,0 +1,11 @@
+#!/usr/bin/env nextflow
+
+nextflow.enable.dsl = 2
+
+include { HMMCOPY_GCCOUNTER } from '../../../../modules/hmmcopy/gccounter/main.nf' addParams( options: [:] )
+
+workflow test_hmmcopy_gccounter {
+    fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
+
+    HMMCOPY_GCCOUNTER (fasta)
+}
diff --git a/tests/modules/hmmcopy/gccounter/test.yml b/tests/modules/hmmcopy/gccounter/test.yml
new file mode 100644
index 00000000..edcd6b92
--- /dev/null
+++ b/tests/modules/hmmcopy/gccounter/test.yml
@@ -0,0 +1,8 @@
+- name: hmmcopy gccounter test_hmmcopy_gccounter
+  command: nextflow run tests/modules/hmmcopy/gccounter -entry test_hmmcopy_gccounter -c tests/config/nextflow.config
+  tags:
+    - hmmcopy
+    - hmmcopy/gccounter
+  files:
+    - path: output/hmmcopy/genome.gc.wig
+      md5sum: 59ad14bc5aaa903187d7b248c9490deb

From 374d81e0b39d24ec2ef84b6f931c74ded5e3e682 Mon Sep 17 00:00:00 2001
From: Simon Pearce <24893913+SPPearce@users.noreply.github.com>
Date: Tue, 2 Nov 2021 11:01:13 +0000
Subject: [PATCH 10/15] readcounter module for hmmcopy (#1001)

* readcounter module for hmmcopy

* Changed version number

* Fix indentation

* Update main.nf

* Update modules/hmmcopy/readcounter/main.nf

Co-authored-by: Chris Cheshire <chris.j.cheshire@gmail.com>

Co-authored-by: Simon Pearce <simon.pearce@cruk.manchester.ac.uk>
Co-authored-by: Chris Cheshire <chris.j.cheshire@gmail.com>
---
 modules/hmmcopy/readcounter/functions.nf   | 78 ++++++++++++++++++++++
 modules/hmmcopy/readcounter/main.nf        | 42 ++++++++++++
 modules/hmmcopy/readcounter/meta.yml       | 43 ++++++++++++
 tests/config/pytest_modules.yml            |  4 ++
 tests/modules/hmmcopy/readcounter/main.nf  | 14 ++++
 tests/modules/hmmcopy/readcounter/test.yml |  8 +++
 6 files changed, 189 insertions(+)
 create mode 100644 modules/hmmcopy/readcounter/functions.nf
 create mode 100644 modules/hmmcopy/readcounter/main.nf
 create mode 100644 modules/hmmcopy/readcounter/meta.yml
 create mode 100644 tests/modules/hmmcopy/readcounter/main.nf
 create mode 100644 tests/modules/hmmcopy/readcounter/test.yml

diff --git a/modules/hmmcopy/readcounter/functions.nf b/modules/hmmcopy/readcounter/functions.nf
new file mode 100644
index 00000000..85628ee0
--- /dev/null
+++ b/modules/hmmcopy/readcounter/functions.nf
@@ -0,0 +1,78 @@
+//
+//  Utility functions used in nf-core DSL2 module files
+//
+
+//
+// Extract name of software tool from process name using $task.process
+//
+def getSoftwareName(task_process) {
+    return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()
+}
+
+//
+// Extract name of module from process name using $task.process
+//
+def getProcessName(task_process) {
+    return task_process.tokenize(':')[-1]
+}
+
+//
+// Function to initialise default values and to generate a Groovy Map of available options for nf-core modules
+//
+def initOptions(Map args) {
+    def Map options = [:]
+    options.args            = args.args ?: ''
+    options.args2           = args.args2 ?: ''
+    options.args3           = args.args3 ?: ''
+    options.publish_by_meta = args.publish_by_meta ?: []
+    options.publish_dir     = args.publish_dir ?: ''
+    options.publish_files   = args.publish_files
+    options.suffix          = args.suffix ?: ''
+    return options
+}
+
+//
+// Tidy up and join elements of a list to return a path string
+//
+def getPathFromList(path_list) {
+    def paths = path_list.findAll { item -> !item?.trim().isEmpty() }      // Remove empty entries
+    paths     = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes
+    return paths.join('/')
+}
+
+//
+// Function to save/publish module results
+//
+def saveFiles(Map args) {
+    def ioptions  = initOptions(args.options)
+    def path_list = [ ioptions.publish_dir ?: args.publish_dir ]
+
+    // Do not publish versions.yml unless running from pytest workflow
+    if (args.filename.equals('versions.yml') && !System.getenv("NF_CORE_MODULES_TEST")) {
+        return null
+    }
+    if (ioptions.publish_by_meta) {
+        def key_list = ioptions.publish_by_meta instanceof List ? ioptions.publish_by_meta : args.publish_by_meta
+        for (key in key_list) {
+            if (args.meta && key instanceof String) {
+                def path = key
+                if (args.meta.containsKey(key)) {
+                    path = args.meta[key] instanceof Boolean ? "${key}_${args.meta[key]}".toString() : args.meta[key]
+                }
+                path = path instanceof String ? path : ''
+                path_list.add(path)
+            }
+        }
+    }
+    if (ioptions.publish_files instanceof Map) {
+        for (ext in ioptions.publish_files) {
+            if (args.filename.endsWith(ext.key)) {
+                def ext_list = path_list.collect()
+                ext_list.add(ext.value)
+                return "${getPathFromList(ext_list)}/$args.filename"
+            }
+        }
+    } else if (ioptions.publish_files == null) {
+        return "${getPathFromList(path_list)}/$args.filename"
+    }
+}
diff --git a/modules/hmmcopy/readcounter/main.nf b/modules/hmmcopy/readcounter/main.nf
new file mode 100644
index 00000000..9e3e72a7
--- /dev/null
+++ b/modules/hmmcopy/readcounter/main.nf
@@ -0,0 +1,42 @@
+// Import generic module functions
+include { initOptions; saveFiles; getSoftwareName; getProcessName } from './functions'
+
+params.options = [:]
+options        = initOptions(params.options)
+
+def VERSION = '0.1.1'
+
+process HMMCOPY_READCOUNTER {
+    tag "$meta.id"
+    label 'process_low'
+    publishDir "${params.outdir}",
+    mode: params.publish_dir_mode,
+    saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:meta, publish_by_meta:['id']) }
+
+    conda (params.enable_conda ? "bioconda::hmmcopy=0.1.1" : null)
+    if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
+        container "https://depot.galaxyproject.org/singularity/hmmcopy:0.1.1--h2e03b76_5"
+    } else {
+        container "quay.io/biocontainers/hmmcopy:0.1.1--h2e03b76_5"
+    }
+
+    input:
+        tuple val(meta), path(bam), path(bai)
+
+    output:
+        tuple val(meta), path("*.wig"), emit: wig
+        path "versions.yml"           , emit: versions
+
+    script:
+        def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}"
+    """
+    readCounter \\
+        $options.args \\
+        ${bam} > ${prefix}.wig
+
+    cat <<-END_VERSIONS > versions.yml
+    ${getProcessName(task.process)}:
+        ${getSoftwareName(task.process)}: \$(echo $VERSION)
+    END_VERSIONS
+    """
+}
diff --git a/modules/hmmcopy/readcounter/meta.yml b/modules/hmmcopy/readcounter/meta.yml
new file mode 100644
index 00000000..9b09a55c
--- /dev/null
+++ b/modules/hmmcopy/readcounter/meta.yml
@@ -0,0 +1,43 @@
+name: hmmcopy_readcounter
+description: readCounter function from HMMcopy utilities, used to generate read in windows
+keywords:
+  - hmmcopy
+  - readcounter
+  - cnv
+tools:
+  - hmmcopy:
+      description: C++ based programs for analyzing BAM files and preparing read counts -- used with bioconductor-hmmcopy
+      homepage: https://github.com/shahcompbio/hmmcopy_utils
+      documentation: https://github.com/shahcompbio/hmmcopy_utils
+      tool_dev_url: https://github.com/shahcompbio/hmmcopy_utils
+      doi: ""
+      licence: ['GPL v3']
+
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - bam:
+      type: file
+      description: BAM/CRAM/SAM file
+      pattern: "*.{bam,cram,sam}"
+
+output:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+  - wig:
+      type: file
+      description: A wig file with the number of reads lying within each window in each chromosome
+      pattern: "*.wig"
+
+authors:
+  - "@sppearce"
diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml
index 4e4526b1..9320245f 100644
--- a/tests/config/pytest_modules.yml
+++ b/tests/config/pytest_modules.yml
@@ -581,6 +581,10 @@ hmmcopy/gccounter:
   - modules/hmmcopy/gccounter/**
   - tests/modules/hmmcopy/gccounter/**
 
+hmmcopy/readcounter:
+  - modules/hmmcopy/readcounter/**
+  - tests/modules/hmmcopy/readcounter/**
+
 hmmer/hmmalign:
   - modules/hmmer/hmmalign/**
   - tests/modules/hmmer/hmmalign/**
diff --git a/tests/modules/hmmcopy/readcounter/main.nf b/tests/modules/hmmcopy/readcounter/main.nf
new file mode 100644
index 00000000..9025f98e
--- /dev/null
+++ b/tests/modules/hmmcopy/readcounter/main.nf
@@ -0,0 +1,14 @@
+#!/usr/bin/env nextflow
+
+nextflow.enable.dsl = 2
+
+include { HMMCOPY_READCOUNTER } from '../../../../modules/hmmcopy/readcounter/main.nf' addParams( options: [:] )
+
+workflow test_hmmcopy_readcounter {
+
+    input = [ [ id:'test'], // meta map
+              [ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_bam'], checkIfExists: true)],
+              [ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_bam_bai'], checkIfExists: true)]
+            ]
+    HMMCOPY_READCOUNTER ( input )
+}
diff --git a/tests/modules/hmmcopy/readcounter/test.yml b/tests/modules/hmmcopy/readcounter/test.yml
new file mode 100644
index 00000000..6c00ee08
--- /dev/null
+++ b/tests/modules/hmmcopy/readcounter/test.yml
@@ -0,0 +1,8 @@
+- name: hmmcopy readcounter test_hmmcopy_readcounter
+  command: nextflow run tests/modules/hmmcopy/readcounter -entry test_hmmcopy_readcounter -c tests/config/nextflow.config
+  tags:
+    - hmmcopy
+    - hmmcopy/readcounter
+  files:
+    - path: output/hmmcopy/test.wig
+      md5sum: 3655d8325baea81b3b690791262c6b57

From 4619d012e562a224b69d9c09d51e12e4394f3238 Mon Sep 17 00:00:00 2001
From: fbdtemme <69114541+fbdtemme@users.noreply.github.com>
Date: Tue, 2 Nov 2021 17:00:25 +0100
Subject: [PATCH 11/15] Add cram support to Allelecounter module (#1013)

* Add CRAM support to allelecounter

* Update meta.yml

* Rename bam,bai to input,input_index

* Apply suggestions from code review

* Fix reference to renamed variable

Co-authored-by: Maxime U. Garcia <maxime.garcia@scilifelab.se>
---
 modules/allelecounter/main.nf        |  8 ++++++--
 modules/allelecounter/meta.yml       |  9 ++++++---
 tests/modules/allelecounter/main.nf  | 16 ++++++++++++++--
 tests/modules/allelecounter/test.yml | 12 ++++++++++--
 4 files changed, 36 insertions(+), 9 deletions(-)

diff --git a/modules/allelecounter/main.nf b/modules/allelecounter/main.nf
index 5184df7d..8d986579 100644
--- a/modules/allelecounter/main.nf
+++ b/modules/allelecounter/main.nf
@@ -19,8 +19,9 @@ process ALLELECOUNTER {
     }
 
     input:
-    tuple val(meta), path(bam), path(bai)
+    tuple val(meta), path(input), path(input_index)
     path loci
+    path fasta
 
     output:
     tuple val(meta), path("*.alleleCount"), emit: allelecount
@@ -28,11 +29,14 @@ process ALLELECOUNTER {
 
     script:
     def prefix   = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}"
+    def reference_options = fasta ? "-r $fasta": ""
+
     """
     alleleCounter \\
         $options.args \\
         -l $loci \\
-        -b $bam \\
+        -b $input \\
+        $reference_options \\
         -o ${prefix}.alleleCount
 
     cat <<-END_VERSIONS > versions.yml
diff --git a/modules/allelecounter/meta.yml b/modules/allelecounter/meta.yml
index a15f3eac..7d921e12 100644
--- a/modules/allelecounter/meta.yml
+++ b/modules/allelecounter/meta.yml
@@ -19,11 +19,11 @@ input:
       description: |
         Groovy Map containing sample information
         e.g. [ id:'test', single_end:false ]
-  - bam:
+  - input:
       type: file
       description: BAM/CRAM/SAM file
       pattern: "*.{bam,cram,sam}"
-  - bai:
+  - input_index:
       type: file
       description: BAM/CRAM/SAM index file
       pattern: "*.{bai,crai,sai}"
@@ -31,7 +31,9 @@ input:
       type: file
       description: loci file <CHR><tab><POS1>
       pattern: "*.{tsv}"
-
+  - fasta:
+    type: file
+    description: Input genome fasta file. Required when passing CRAM files.
 
 output:
   - meta:
@@ -50,3 +52,4 @@ output:
 
 authors:
   - "@fullama"
+  - "@fbdtemme"
diff --git a/tests/modules/allelecounter/main.nf b/tests/modules/allelecounter/main.nf
index 542529c2..b938ab94 100644
--- a/tests/modules/allelecounter/main.nf
+++ b/tests/modules/allelecounter/main.nf
@@ -3,12 +3,24 @@ nextflow.enable.dsl = 2
 
 include { ALLELECOUNTER } from '../../../modules/allelecounter/main.nf' addParams( options: [:] )
 
-workflow test_allelecounter {
+workflow test_allelecounter_bam {
     input = [ [ id:'test', single_end:false ], // meta map
               file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true),
               file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true)
             ]
     positions = [ file(params.test_data['sarscov2']['genome']['test_bed'], checkIfExists: true) ]
 
-    ALLELECOUNTER ( input, positions )
+    ALLELECOUNTER ( input, positions, [] )
+}
+
+
+workflow test_allelecounter_cram {
+    input = [ [ id:'test', single_end:false ], // meta map
+              file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true),
+              file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram_crai'], checkIfExists: true)
+            ]
+    positions = [ file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true) ]
+    fasta = [ file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) ]
+
+    ALLELECOUNTER ( input, positions, fasta )
 }
diff --git a/tests/modules/allelecounter/test.yml b/tests/modules/allelecounter/test.yml
index 7ed71559..bbef0ecc 100644
--- a/tests/modules/allelecounter/test.yml
+++ b/tests/modules/allelecounter/test.yml
@@ -1,7 +1,15 @@
-- name: allelecounter test_allelecounter
-  command: nextflow run tests/modules/allelecounter -entry test_allelecounter -c tests/config/nextflow.config
+- name: allelecounter test_allelecounter_bam
+  command: nextflow run tests/modules/allelecounter -entry test_allelecounter_bam -c tests/config/nextflow.config
   tags:
     - allelecounter
   files:
     - path: output/allelecounter/test.alleleCount
       md5sum: 2bbe9d7331b78bdac30fe30dbc5fdaf3
+
+- name: allelecounter test_allelecounter_cram
+  command: nextflow run tests/modules/allelecounter -entry test_allelecounter_cram -c tests/config/nextflow.config
+  tags:
+    - allelecounter
+  files:
+    - path: output/allelecounter/test.alleleCount
+      md5sum: 2f83352a185168c7c98e9e42550b2856

From 9767b081b9d808d7b6671d34d48c70afb436a5f1 Mon Sep 17 00:00:00 2001
From: "Maxime U. Garcia" <max.u.garcia@gmail.com>
Date: Wed, 3 Nov 2021 11:22:06 +0100
Subject: [PATCH 12/15] Merge freebayes subtools (#1015)

* feat: merge freebayes subtools

* fix: typo

* assess comments from review

* fix: path to module
---
 modules/freebayes/{germline => }/functions.nf |  0
 modules/freebayes/{germline => }/main.nf      |  7 +-
 modules/freebayes/{germline => }/meta.yml     |  9 +-
 modules/freebayes/somatic/functions.nf        | 78 ---------------
 modules/freebayes/somatic/main.nf             | 74 ---------------
 modules/freebayes/somatic/meta.yml            | 66 -------------
 tests/config/pytest_modules.yml               | 10 +-
 tests/modules/freebayes/germline/main.nf      | 51 ----------
 tests/modules/freebayes/germline/test.yml     | 26 -----
 tests/modules/freebayes/main.nf               | 95 +++++++++++++++++++
 tests/modules/freebayes/somatic/main.nf       | 37 --------
 tests/modules/freebayes/somatic/test.yml      | 17 ----
 tests/modules/freebayes/test.yml              | 39 ++++++++
 13 files changed, 148 insertions(+), 361 deletions(-)
 rename modules/freebayes/{germline => }/functions.nf (100%)
 rename modules/freebayes/{germline => }/main.nf (91%)
 rename modules/freebayes/{germline => }/meta.yml (94%)
 delete mode 100644 modules/freebayes/somatic/functions.nf
 delete mode 100644 modules/freebayes/somatic/main.nf
 delete mode 100644 modules/freebayes/somatic/meta.yml
 delete mode 100644 tests/modules/freebayes/germline/main.nf
 delete mode 100644 tests/modules/freebayes/germline/test.yml
 create mode 100644 tests/modules/freebayes/main.nf
 delete mode 100644 tests/modules/freebayes/somatic/main.nf
 delete mode 100644 tests/modules/freebayes/somatic/test.yml
 create mode 100644 tests/modules/freebayes/test.yml

diff --git a/modules/freebayes/germline/functions.nf b/modules/freebayes/functions.nf
similarity index 100%
rename from modules/freebayes/germline/functions.nf
rename to modules/freebayes/functions.nf
diff --git a/modules/freebayes/germline/main.nf b/modules/freebayes/main.nf
similarity index 91%
rename from modules/freebayes/germline/main.nf
rename to modules/freebayes/main.nf
index eae62036..0b23dc40 100644
--- a/modules/freebayes/germline/main.nf
+++ b/modules/freebayes/main.nf
@@ -4,7 +4,7 @@ include { initOptions; saveFiles; getProcessName; getSoftwareName } from './func
 params.options = [:]
 options        = initOptions(params.options)
 
-process FREEBAYES_GERMLINE {
+process FREEBAYES {
     tag "$meta.id"
     label 'process_low'
     publishDir "${params.outdir}",
@@ -19,7 +19,7 @@ process FREEBAYES_GERMLINE {
     }
 
     input:
-    tuple val(meta), path(input), path(input_index)
+    tuple val(meta), path(input_1), path(input_1_index), path(input_2), path(input_2_index)
     path fasta
     path fai
     path targets
@@ -33,6 +33,7 @@ process FREEBAYES_GERMLINE {
 
     script:
     def prefix           = options.suffix ? "${meta.id}${options.suffix}"  : "${meta.id}"
+    def input            = input_2        ? "${input_1} ${input_2}"        : "${input_1}"
     def targets_file     = targets        ? "--target ${targets}"          : ""
     def samples_file     = samples        ? "--samples ${samples}"         : ""
     def populations_file = populations    ? "--populations ${populations}" : ""
@@ -48,7 +49,7 @@ process FREEBAYES_GERMLINE {
             $populations_file \\
             $cnv_file \\
             $options.args \\
-            $input  > ${prefix}.vcf
+            $input > ${prefix}.vcf
 
         gzip --no-name ${prefix}.vcf
 
diff --git a/modules/freebayes/germline/meta.yml b/modules/freebayes/meta.yml
similarity index 94%
rename from modules/freebayes/germline/meta.yml
rename to modules/freebayes/meta.yml
index 86650715..75d44826 100644
--- a/modules/freebayes/germline/meta.yml
+++ b/modules/freebayes/meta.yml
@@ -1,11 +1,14 @@
-name: freebayes_germline
+name: freebayes
 description: A haplotype-based variant detector
 keywords:
   - variant caller
   - SNP
   - genotyping
-  - variant calling
+  - somatic variant calling
+  - germline variant calling
+  - bacterial variant calling
   - bayesian
+
 tools:
   - freebayes:
       description: Bayesian haplotype-based polymorphism discovery and genotyping
@@ -73,6 +76,8 @@ output:
       type: file
       description: Compressed VCF file
       pattern: "*.vcf.gz"
+
 authors:
   - "@maxibor"
   - "@FriederikeHanssen"
+  - "@maxulysse"
diff --git a/modules/freebayes/somatic/functions.nf b/modules/freebayes/somatic/functions.nf
deleted file mode 100644
index 85628ee0..00000000
--- a/modules/freebayes/somatic/functions.nf
+++ /dev/null
@@ -1,78 +0,0 @@
-//
-//  Utility functions used in nf-core DSL2 module files
-//
-
-//
-// Extract name of software tool from process name using $task.process
-//
-def getSoftwareName(task_process) {
-    return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()
-}
-
-//
-// Extract name of module from process name using $task.process
-//
-def getProcessName(task_process) {
-    return task_process.tokenize(':')[-1]
-}
-
-//
-// Function to initialise default values and to generate a Groovy Map of available options for nf-core modules
-//
-def initOptions(Map args) {
-    def Map options = [:]
-    options.args            = args.args ?: ''
-    options.args2           = args.args2 ?: ''
-    options.args3           = args.args3 ?: ''
-    options.publish_by_meta = args.publish_by_meta ?: []
-    options.publish_dir     = args.publish_dir ?: ''
-    options.publish_files   = args.publish_files
-    options.suffix          = args.suffix ?: ''
-    return options
-}
-
-//
-// Tidy up and join elements of a list to return a path string
-//
-def getPathFromList(path_list) {
-    def paths = path_list.findAll { item -> !item?.trim().isEmpty() }      // Remove empty entries
-    paths     = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes
-    return paths.join('/')
-}
-
-//
-// Function to save/publish module results
-//
-def saveFiles(Map args) {
-    def ioptions  = initOptions(args.options)
-    def path_list = [ ioptions.publish_dir ?: args.publish_dir ]
-
-    // Do not publish versions.yml unless running from pytest workflow
-    if (args.filename.equals('versions.yml') && !System.getenv("NF_CORE_MODULES_TEST")) {
-        return null
-    }
-    if (ioptions.publish_by_meta) {
-        def key_list = ioptions.publish_by_meta instanceof List ? ioptions.publish_by_meta : args.publish_by_meta
-        for (key in key_list) {
-            if (args.meta && key instanceof String) {
-                def path = key
-                if (args.meta.containsKey(key)) {
-                    path = args.meta[key] instanceof Boolean ? "${key}_${args.meta[key]}".toString() : args.meta[key]
-                }
-                path = path instanceof String ? path : ''
-                path_list.add(path)
-            }
-        }
-    }
-    if (ioptions.publish_files instanceof Map) {
-        for (ext in ioptions.publish_files) {
-            if (args.filename.endsWith(ext.key)) {
-                def ext_list = path_list.collect()
-                ext_list.add(ext.value)
-                return "${getPathFromList(ext_list)}/$args.filename"
-            }
-        }
-    } else if (ioptions.publish_files == null) {
-        return "${getPathFromList(path_list)}/$args.filename"
-    }
-}
diff --git a/modules/freebayes/somatic/main.nf b/modules/freebayes/somatic/main.nf
deleted file mode 100644
index c1579661..00000000
--- a/modules/freebayes/somatic/main.nf
+++ /dev/null
@@ -1,74 +0,0 @@
-// Import generic module functions
-include { initOptions; saveFiles; getProcessName; getSoftwareName } from './functions'
-
-params.options = [:]
-options        = initOptions(params.options)
-
-process FREEBAYES_SOMATIC {
-    tag "$meta.id"
-    label 'process_low'
-    publishDir "${params.outdir}",
-        mode: params.publish_dir_mode,
-        saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:meta, publish_by_meta:['id']) }
-
-    conda (params.enable_conda ? "bioconda::freebayes=1.3.5" : null)
-    if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
-        container "https://depot.galaxyproject.org/singularity/freebayes:1.3.5--py38ha193a2f_3"
-    } else {
-        container "quay.io/biocontainers/freebayes:1.3.5--py38ha193a2f_3"
-    }
-
-    input:
-    tuple val(meta), path(input_normal), path(input_index_normal), path(input_tumor), path(input_index_tumor)
-    path fasta
-    path fai
-    path targets
-    path samples
-
-    output:
-    tuple val(meta), path("*.vcf.gz")   , emit: vcf
-    path  "versions.yml"                , emit: versions
-
-    script:
-    def prefix           = options.suffix ? "${meta.id}${options.suffix}"  : "${meta.id}"
-    def targets_file     = targets        ? "--target ${targets}"          : ""
-    def samples_file     = samples        ? "--samples ${samples}"         : ""
-
-    if (task.cpus > 1) {
-        """
-        freebayes-parallel \\
-            <(fasta_generate_regions.py ${fasta}.fai 10000) ${task.cpus} \\
-            -f $fasta \\
-            $targets_file \\
-            $samples_file \\
-            $options.args \\
-            $input_tumor \\
-            $input_normal  > ${prefix}.vcf
-
-        gzip --no-name ${prefix}.vcf
-
-        cat <<-END_VERSIONS > versions.yml
-        ${getProcessName(task.process)}:
-            ${getSoftwareName(task.process)}: \$(echo \$(freebayes --version 2>&1) | sed 's/version:\s*v//g' )
-        END_VERSIONS
-        """
-
-    } else {
-        """
-        freebayes \\
-            -f $fasta \\
-            $targets_file \\
-            $samples_file \\
-            $options.args \\
-            $input_tumor \\
-            $input_normal  > ${prefix}.vcf
-
-        gzip --no-name ${prefix}.vcf
-
-        cat <<-END_VERSIONS > versions.yml
-        ${getProcessName(task.process)}:
-            ${getSoftwareName(task.process)}: \$(echo \$(freebayes --version 2>&1) | sed 's/version:\s*v//g' )
-        END_VERSIONS
-        """
-    }
-}
diff --git a/modules/freebayes/somatic/meta.yml b/modules/freebayes/somatic/meta.yml
deleted file mode 100644
index 391e5007..00000000
--- a/modules/freebayes/somatic/meta.yml
+++ /dev/null
@@ -1,66 +0,0 @@
-name: freebayes_somatic
-description: A haplotype-based variant detector
-keywords:
-  - variant caller
-  - SNP
-  - genotyping
-  - somatic variant calling
-  - bayesian
-
-tools:
-  - freebayes:
-      description: Bayesian haplotype-based polymorphism discovery and genotyping
-      homepage: https://github.com/freebayes/freebayes
-      documentation: https://github.com/freebayes/freebayes
-      tool_dev_url: https://github.com/freebayes/freebayes
-      doi: "arXiv:1207.3907"
-      licence: ['MIT']
-
-input:
-  - meta:
-      type: map
-      description: |
-        Groovy Map containing sample information
-        e.g. [ id:'test', single_end:false ]
-  - input:
-      type: file
-      description: BAM/CRAM/SAM file
-      pattern: "*.{bam,cram,sam}"
-  - input_index:
-      type: file
-      description: BAM/CRAM/SAM index file
-      pattern: "*.bam.bai"
-  - fasta:
-      type: file
-      description: reference fasta file
-      pattern: ".{fa,fa.gz,fasta,fasta.gz}"
-  - fai:
-      type: file
-      description: reference fasta file index
-      pattern: "*.fai"
-  - targets:
-      type: file
-      description: Optional - Limit analysis to targets listed in this BED-format FILE.
-      pattern: "*.bed"
-  - samples:
-      type: file
-      description: Optional - Limit analysis to samples listed (one per line) in the FILE.
-      pattern: "*.txt"
-
-output:
-  - meta:
-      type: map
-      description: |
-        Groovy Map containing sample information
-        e.g. [ id:'test', single_end:false ]
-  - version:
-      type: file
-      description: File containing software version
-      pattern: "*.{version.txt}"
-  - vcf:
-      type: file
-      description: Compressed VCF file
-      pattern: "*.vcf.gz"
-
-authors:
-  - "@FriederikeHanssen"
diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml
index 9320245f..047f83ff 100644
--- a/tests/config/pytest_modules.yml
+++ b/tests/config/pytest_modules.yml
@@ -410,13 +410,9 @@ flash:
   - modules/flash/**
   - tests/modules/flash/**
 
-freebayes/germline:
-  - modules/freebayes/germline/**
-  - tests/modules/freebayes/germline/**
-
-freebayes/somatic:
-  - modules/freebayes/somatic/**
-  - tests/modules/freebayes/somatic/**
+freebayes:
+  - modules/freebayes/**
+  - tests/modules/freebayes/**
 
 gatk4/applybqsr:
   - modules/gatk4/applybqsr/**
diff --git a/tests/modules/freebayes/germline/main.nf b/tests/modules/freebayes/germline/main.nf
deleted file mode 100644
index 1b39eea0..00000000
--- a/tests/modules/freebayes/germline/main.nf
+++ /dev/null
@@ -1,51 +0,0 @@
-#!/usr/bin/env nextflow
-
-nextflow.enable.dsl = 2
-
-include { FREEBAYES_GERMLINE } from '../../../../modules/freebayes/germline/main.nf' addParams( options: [:] )
-
-workflow test_freebayes {
-
-    input = [ [ id:'test', single_end:false ], // meta map
-              file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true),
-              file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true)]
-    fasta   = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
-    fai         = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true)
-    targets = []
-    samples = []
-    populations = []
-    cnv = []
-
-    FREEBAYES_GERMLINE ( input, fasta, fai, targets, samples, populations, cnv)
-}
-
-workflow test_freebayes_bed {
-
-    input = [ [ id:'test', single_end:false ], // meta map
-              file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true),
-              file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true)]
-    fasta   = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
-    fai         = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true)
-    targets     = file(params.test_data['sarscov2']['genome']['test_bed'], checkIfExists: true)
-    samples     = []
-    populations = []
-    cnv         = []
-
-    FREEBAYES_GERMLINE ( input, fasta, fai, targets, samples, populations, cnv)
-}
-
-workflow test_freebayes_cram {
-
-    input = [ [ id:'test', single_end:false ], // meta map
-                file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true),
-                file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram_crai'], checkIfExists: true)
-            ]
-    fasta   = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
-    fai     = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
-    targets = []
-    samples = []
-    populations = []
-    cnv = []
-
-    FREEBAYES_GERMLINE ( input, fasta, fai, targets, samples, populations, cnv)
-}
diff --git a/tests/modules/freebayes/germline/test.yml b/tests/modules/freebayes/germline/test.yml
deleted file mode 100644
index 55925b92..00000000
--- a/tests/modules/freebayes/germline/test.yml
+++ /dev/null
@@ -1,26 +0,0 @@
-- name: freebayes germline test_freebayes
-  command: nextflow run tests/modules/freebayes/germline -entry test_freebayes -c tests/config/nextflow.config
-  tags:
-    - freebayes
-    - freebayes/germline
-  files:
-    - path: output/freebayes/test.vcf.gz
-      md5sum: 1ec210ad27514c7a4140c924dc66d979
-
-- name: freebayes germline test_freebayes_bed
-  command: nextflow run tests/modules/freebayes/germline -entry test_freebayes_bed -c tests/config/nextflow.config
-  tags:
-    - freebayes
-    - freebayes/germline
-  files:
-    - path: output/freebayes/test.vcf.gz
-      md5sum: e8923cccd5dac196f72d3d3997a60706
-
-- name: freebayes germline test_freebayes_cram
-  command: nextflow run tests/modules/freebayes/germline -entry test_freebayes_cram -c tests/config/nextflow.config
-  tags:
-    - freebayes
-    - freebayes/germline
-  files:
-    - path: output/freebayes/test.vcf.gz
-      md5sum: cb57a3ed154618e3aa4a5272fcfb7521
diff --git a/tests/modules/freebayes/main.nf b/tests/modules/freebayes/main.nf
new file mode 100644
index 00000000..c6f5641f
--- /dev/null
+++ b/tests/modules/freebayes/main.nf
@@ -0,0 +1,95 @@
+#!/usr/bin/env nextflow
+
+nextflow.enable.dsl = 2
+
+include { FREEBAYES } from '../../../modules/freebayes/main.nf' addParams( options: [:] )
+
+workflow test_freebayes {
+
+    input = [ [ id:'test', single_end:false ], // meta map
+              file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true),
+              file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true),
+              [],
+              []
+            ]
+    fasta       = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
+    fai         = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true)
+    targets     = []
+    samples     = []
+    populations = []
+    cnv         = []
+
+    FREEBAYES (input, fasta, fai, targets, samples, populations, cnv)
+}
+
+workflow test_freebayes_bed {
+
+    input = [ [ id:'test', single_end:false ], // meta map
+              file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true),
+              file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true),
+              [],
+              []
+            ]
+    fasta       = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
+    fai         = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true)
+    targets     = file(params.test_data['sarscov2']['genome']['test_bed'], checkIfExists: true)
+    samples     = []
+    populations = []
+    cnv         = []
+
+    FREEBAYES (input, fasta, fai, targets, samples, populations, cnv)
+}
+
+workflow test_freebayes_cram {
+
+    input = [ [ id:'test', single_end:false ], // meta map
+                file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true),
+                file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram_crai'], checkIfExists: true),
+              [],
+              []
+            ]
+    fasta       = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
+    fai         = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
+    targets     = []
+    samples     = []
+    populations = []
+    cnv         = []
+
+    FREEBAYES (input, fasta, fai, targets, samples, populations, cnv)
+}
+
+workflow test_freebayes_somatic {
+
+    input = [ [ id:'test', single_end:false ], // meta map
+                file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true),
+                file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true),
+                file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_bam'], checkIfExists: true),
+                file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_bam_bai'], checkIfExists: true)
+            ]
+    fasta       = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
+    fai         = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
+    targets     = []
+    samples     = []
+    populations = []
+    cnv         = []
+
+    FREEBAYES (input, fasta, fai, targets, samples, populations, cnv)
+}
+
+workflow test_freebayes_somatic_cram_intervals {
+
+    input = [ [ id:'test', single_end:false ], // meta map
+                file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true),
+                file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram_crai'], checkIfExists: true),
+                file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_cram'], checkIfExists: true),
+                file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_cram_crai'], checkIfExists: true)
+            ]
+    fasta       = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
+    fai         = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
+    targets     = file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true)
+    samples     = []
+    populations = []
+    cnv         = []
+
+    FREEBAYES (input, fasta, fai, targets, samples, populations, cnv)
+}
diff --git a/tests/modules/freebayes/somatic/main.nf b/tests/modules/freebayes/somatic/main.nf
deleted file mode 100644
index d26caf34..00000000
--- a/tests/modules/freebayes/somatic/main.nf
+++ /dev/null
@@ -1,37 +0,0 @@
-#!/usr/bin/env nextflow
-
-nextflow.enable.dsl = 2
-
-include { FREEBAYES_SOMATIC } from '../../../../modules/freebayes/somatic/main.nf' addParams( options: [:] )
-
-workflow test_freebayes {
-
-    input = [ [ id:'test', single_end:false ], // meta map
-                file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true),
-                file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true),
-                file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_bam'], checkIfExists: true),
-                file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_bam_bai'], checkIfExists: true)
-            ]
-    fasta   = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
-    fai     = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
-    targets = []
-    samples = []
-
-    FREEBAYES_SOMATIC ( input, fasta, fai, targets, samples)
-}
-
-workflow test_freebayes_intervals {
-
-    input = [ [ id:'test', single_end:false ], // meta map
-                file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true),
-                file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram_crai'], checkIfExists: true),
-                file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_cram'], checkIfExists: true),
-                file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_cram_crai'], checkIfExists: true)
-            ]
-    fasta   = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
-    fai     = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
-    targets = file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true)
-    samples = []
-
-    FREEBAYES_SOMATIC ( input, fasta, fai, targets, samples)
-}
diff --git a/tests/modules/freebayes/somatic/test.yml b/tests/modules/freebayes/somatic/test.yml
deleted file mode 100644
index 93113e60..00000000
--- a/tests/modules/freebayes/somatic/test.yml
+++ /dev/null
@@ -1,17 +0,0 @@
-- name: freebayes somatic test_freebayes
-  command: nextflow run tests/modules/freebayes/somatic -entry test_freebayes -c tests/config/nextflow.config
-  tags:
-    - freebayes/somatic
-    - freebayes
-  files:
-    - path: output/freebayes/test.vcf.gz
-      md5sum: 1c47d02f27ec5918558c8688ce6e7780
-
-- name: freebayes somatic test_freebayes_intervals
-  command: nextflow run tests/modules/freebayes/somatic -entry test_freebayes_intervals -c tests/config/nextflow.config
-  tags:
-    - freebayes/somatic
-    - freebayes
-  files:
-    - path: output/freebayes/test.vcf.gz
-      md5sum: 5b8a12666bde63746dcec7afcd3ef789
diff --git a/tests/modules/freebayes/test.yml b/tests/modules/freebayes/test.yml
new file mode 100644
index 00000000..a5840609
--- /dev/null
+++ b/tests/modules/freebayes/test.yml
@@ -0,0 +1,39 @@
+- name: freebayes test_freebayes
+  command: nextflow run tests/modules/freebayes -entry test_freebayes -c tests/config/nextflow.config
+  tags:
+    - freebayes
+  files:
+    - path: output/freebayes/test.vcf.gz
+      md5sum: 04d60a7135768777e0c764daec6519db
+
+- name: freebayes test_freebayes_bed
+  command: nextflow run tests/modules/freebayes -entry test_freebayes_bed -c tests/config/nextflow.config
+  tags:
+    - freebayes
+  files:
+    - path: output/freebayes/test.vcf.gz
+      md5sum: fc6e228c8ac5508bd83da45eafc2e7b2
+
+- name: freebayes test_freebayes_cram
+  command: nextflow run tests/modules/freebayes -entry test_freebayes_cram -c tests/config/nextflow.config
+  tags:
+    - freebayes
+  files:
+    - path: output/freebayes/test.vcf.gz
+      md5sum: 91b8caaa0e396e1ba4f264a83bb67254
+
+- name: freebayes test_freebayes_somatic
+  command: nextflow run tests/modules/freebayes -entry test_freebayes_somatic -c tests/config/nextflow.config
+  tags:
+    - freebayes
+  files:
+    - path: output/freebayes/test.vcf.gz
+      md5sum: 40da977199f16d9888e0e0e07e8bebee
+
+- name: freebayes test_freebayes_somatic_cram_intervals
+  command: nextflow run tests/modules/freebayes -entry test_freebayes_somatic_cram_intervals -c tests/config/nextflow.config
+  tags:
+    - freebayes
+  files:
+    - path: output/freebayes/test.vcf.gz
+      md5sum: dd976880365287d9ad31a606eb4d091f

From 11226d9d98a59af9519a871f28df183391c0d300 Mon Sep 17 00:00:00 2001
From: "Maxime U. Garcia" <max.u.garcia@gmail.com>
Date: Wed, 3 Nov 2021 11:41:12 +0100
Subject: [PATCH 13/15] fix remove md5sum check (#1017)

---
 tests/modules/freebayes/test.yml | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/tests/modules/freebayes/test.yml b/tests/modules/freebayes/test.yml
index a5840609..22fd0e88 100644
--- a/tests/modules/freebayes/test.yml
+++ b/tests/modules/freebayes/test.yml
@@ -4,7 +4,6 @@
     - freebayes
   files:
     - path: output/freebayes/test.vcf.gz
-      md5sum: 04d60a7135768777e0c764daec6519db
 
 - name: freebayes test_freebayes_bed
   command: nextflow run tests/modules/freebayes -entry test_freebayes_bed -c tests/config/nextflow.config
@@ -12,7 +11,6 @@
     - freebayes
   files:
     - path: output/freebayes/test.vcf.gz
-      md5sum: fc6e228c8ac5508bd83da45eafc2e7b2
 
 - name: freebayes test_freebayes_cram
   command: nextflow run tests/modules/freebayes -entry test_freebayes_cram -c tests/config/nextflow.config
@@ -20,7 +18,6 @@
     - freebayes
   files:
     - path: output/freebayes/test.vcf.gz
-      md5sum: 91b8caaa0e396e1ba4f264a83bb67254
 
 - name: freebayes test_freebayes_somatic
   command: nextflow run tests/modules/freebayes -entry test_freebayes_somatic -c tests/config/nextflow.config
@@ -28,7 +25,6 @@
     - freebayes
   files:
     - path: output/freebayes/test.vcf.gz
-      md5sum: 40da977199f16d9888e0e0e07e8bebee
 
 - name: freebayes test_freebayes_somatic_cram_intervals
   command: nextflow run tests/modules/freebayes -entry test_freebayes_somatic_cram_intervals -c tests/config/nextflow.config
@@ -36,4 +32,3 @@
     - freebayes
   files:
     - path: output/freebayes/test.vcf.gz
-      md5sum: dd976880365287d9ad31a606eb4d091f

From 08b71fa85f69147f7c56552bfca08045ac43a137 Mon Sep 17 00:00:00 2001
From: "James A. Fellows Yates" <jfy133@gmail.com>
Date: Wed, 3 Nov 2021 17:01:23 +0100
Subject: [PATCH 14/15] New module: `gunc run` (+ `gunc downloaddb`) (#880)

* Specify more guidelines on input channels

* Linting

* Updates based on code review

* Update README.md

* Fix broken sentence

* feat: add megahit module, currently decompressed output

* Update main.nf

* Update tests/modules/megahit/test.yml

Co-authored-by: Maxime Borry <maxibor@users.noreply.github.com>

* Apply suggestions from code review

Co-authored-by: Harshil Patel <drpatelh@users.noreply.github.com>

* feat: compress all outputs, remove md5sums due to gz stochasicity

* fix: wrong conda channel for pigz

* fix: broken singleend tests and update meta.yml

* Missed one

* Apply suggestions from code review

Co-authored-by: Harshil Patel <drpatelh@users.noreply.github.com>

* fix: pigz formatting

* Apply suggestions from code review

Co-authored-by: Harshil Patel <drpatelh@users.noreply.github.com>

* Apply suggestions from code review

* Add GUNC download_db and run commands

* Bump with version without zgrep

* Apply suggestions from code review

Co-authored-by: Robert A. Petit III <robbie.petit@gmail.com>

* Harshil formatting

* Apply suggestions from code review

Co-authored-by: Robert A. Petit III <robbie.petit@gmail.com>

Co-authored-by: Harshil Patel <drpatelh@users.noreply.github.com>
Co-authored-by: Maxime Borry <maxibor@users.noreply.github.com>
Co-authored-by: Robert A. Petit III <robbie.petit@gmail.com>
---
 modules/gunc/downloaddb/functions.nf   | 78 ++++++++++++++++++++++++++
 modules/gunc/downloaddb/main.nf        | 37 ++++++++++++
 modules/gunc/downloaddb/meta.yml       | 36 ++++++++++++
 modules/gunc/run/functions.nf          | 78 ++++++++++++++++++++++++++
 modules/gunc/run/main.nf               | 45 +++++++++++++++
 modules/gunc/run/meta.yml              | 53 +++++++++++++++++
 tests/config/pytest_modules.yml        |  8 +++
 tests/modules/gunc/downloaddb/main.nf  | 12 ++++
 tests/modules/gunc/downloaddb/test.yml |  8 +++
 tests/modules/gunc/run/main.nf         | 17 ++++++
 tests/modules/gunc/run/test.yml        |  8 +++
 11 files changed, 380 insertions(+)
 create mode 100644 modules/gunc/downloaddb/functions.nf
 create mode 100644 modules/gunc/downloaddb/main.nf
 create mode 100644 modules/gunc/downloaddb/meta.yml
 create mode 100644 modules/gunc/run/functions.nf
 create mode 100644 modules/gunc/run/main.nf
 create mode 100644 modules/gunc/run/meta.yml
 create mode 100644 tests/modules/gunc/downloaddb/main.nf
 create mode 100644 tests/modules/gunc/downloaddb/test.yml
 create mode 100644 tests/modules/gunc/run/main.nf
 create mode 100644 tests/modules/gunc/run/test.yml

diff --git a/modules/gunc/downloaddb/functions.nf b/modules/gunc/downloaddb/functions.nf
new file mode 100644
index 00000000..85628ee0
--- /dev/null
+++ b/modules/gunc/downloaddb/functions.nf
@@ -0,0 +1,78 @@
+//
+//  Utility functions used in nf-core DSL2 module files
+//
+
+//
+// Extract name of software tool from process name using $task.process
+//
+def getSoftwareName(task_process) {
+    return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()
+}
+
+//
+// Extract name of module from process name using $task.process
+//
+def getProcessName(task_process) {
+    return task_process.tokenize(':')[-1]
+}
+
+//
+// Function to initialise default values and to generate a Groovy Map of available options for nf-core modules
+//
+def initOptions(Map args) {
+    def Map options = [:]
+    options.args            = args.args ?: ''
+    options.args2           = args.args2 ?: ''
+    options.args3           = args.args3 ?: ''
+    options.publish_by_meta = args.publish_by_meta ?: []
+    options.publish_dir     = args.publish_dir ?: ''
+    options.publish_files   = args.publish_files
+    options.suffix          = args.suffix ?: ''
+    return options
+}
+
+//
+// Tidy up and join elements of a list to return a path string
+//
+def getPathFromList(path_list) {
+    def paths = path_list.findAll { item -> !item?.trim().isEmpty() }      // Remove empty entries
+    paths     = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes
+    return paths.join('/')
+}
+
+//
+// Function to save/publish module results
+//
+def saveFiles(Map args) {
+    def ioptions  = initOptions(args.options)
+    def path_list = [ ioptions.publish_dir ?: args.publish_dir ]
+
+    // Do not publish versions.yml unless running from pytest workflow
+    if (args.filename.equals('versions.yml') && !System.getenv("NF_CORE_MODULES_TEST")) {
+        return null
+    }
+    if (ioptions.publish_by_meta) {
+        def key_list = ioptions.publish_by_meta instanceof List ? ioptions.publish_by_meta : args.publish_by_meta
+        for (key in key_list) {
+            if (args.meta && key instanceof String) {
+                def path = key
+                if (args.meta.containsKey(key)) {
+                    path = args.meta[key] instanceof Boolean ? "${key}_${args.meta[key]}".toString() : args.meta[key]
+                }
+                path = path instanceof String ? path : ''
+                path_list.add(path)
+            }
+        }
+    }
+    if (ioptions.publish_files instanceof Map) {
+        for (ext in ioptions.publish_files) {
+            if (args.filename.endsWith(ext.key)) {
+                def ext_list = path_list.collect()
+                ext_list.add(ext.value)
+                return "${getPathFromList(ext_list)}/$args.filename"
+            }
+        }
+    } else if (ioptions.publish_files == null) {
+        return "${getPathFromList(path_list)}/$args.filename"
+    }
+}
diff --git a/modules/gunc/downloaddb/main.nf b/modules/gunc/downloaddb/main.nf
new file mode 100644
index 00000000..af421608
--- /dev/null
+++ b/modules/gunc/downloaddb/main.nf
@@ -0,0 +1,37 @@
+// Import generic module functions
+include { initOptions; saveFiles; getSoftwareName; getProcessName } from './functions'
+
+params.options = [:]
+options        = initOptions(params.options)
+
+process GUNC_DOWNLOADDB {
+    tag '$db_name'
+    label 'process_low'
+    publishDir "${params.outdir}",
+        mode: params.publish_dir_mode,
+        saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:[:], publish_by_meta:[]) }
+
+    conda (params.enable_conda ? "bioconda::gunc=1.0.5" : null)
+    if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
+        container "https://depot.galaxyproject.org/singularity/gunc:1.0.5--pyhdfd78af_0"
+    } else {
+        container "quay.io/biocontainers/gunc:1.0.5--pyhdfd78af_0"
+    }
+
+    input:
+    val db_name
+
+    output:
+    path "*.dmnd"       , emit: db
+    path "versions.yml" , emit: versions
+
+    script:
+    """
+    gunc download_db . -db $db_name $options.args
+
+    cat <<-END_VERSIONS > versions.yml
+    ${getProcessName(task.process)}:
+        ${getSoftwareName(task.process)}: \$( gunc --version )
+    END_VERSIONS
+    """
+}
diff --git a/modules/gunc/downloaddb/meta.yml b/modules/gunc/downloaddb/meta.yml
new file mode 100644
index 00000000..cb486da0
--- /dev/null
+++ b/modules/gunc/downloaddb/meta.yml
@@ -0,0 +1,36 @@
+name: gunc_downloaddb
+description: Download database for GUNC detection of Chimerism and Contamination in Prokaryotic Genomes
+keywords:
+  - download
+  - prokaryote
+  - assembly
+  - genome
+  - quality control
+  - chimeras
+tools:
+  - gunc:
+      description: Python package for detection of chimerism and contamination in prokaryotic genomes.
+      homepage: https://grp-bork.embl-community.io/gunc/
+      documentation: https://grp-bork.embl-community.io/gunc/
+      tool_dev_url: https://github.com/grp-bork/gunc
+      doi: "10.1186/s13059-021-02393-0"
+      licence: ['GNU General Public v3 or later (GPL v3+)']
+
+input:
+  - db_name:
+      type: string
+      description: "Which database to download. Options: progenomes or gtdb"
+      pattern: "progenomes|gtdb"
+
+output:
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+  - db:
+      type: file
+      description: GUNC database file
+      pattern: "*.dmnd"
+
+authors:
+  - "@jfy133"
diff --git a/modules/gunc/run/functions.nf b/modules/gunc/run/functions.nf
new file mode 100644
index 00000000..85628ee0
--- /dev/null
+++ b/modules/gunc/run/functions.nf
@@ -0,0 +1,78 @@
+//
+//  Utility functions used in nf-core DSL2 module files
+//
+
+//
+// Extract name of software tool from process name using $task.process
+//
+def getSoftwareName(task_process) {
+    return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()
+}
+
+//
+// Extract name of module from process name using $task.process
+//
+def getProcessName(task_process) {
+    return task_process.tokenize(':')[-1]
+}
+
+//
+// Function to initialise default values and to generate a Groovy Map of available options for nf-core modules
+//
+def initOptions(Map args) {
+    def Map options = [:]
+    options.args            = args.args ?: ''
+    options.args2           = args.args2 ?: ''
+    options.args3           = args.args3 ?: ''
+    options.publish_by_meta = args.publish_by_meta ?: []
+    options.publish_dir     = args.publish_dir ?: ''
+    options.publish_files   = args.publish_files
+    options.suffix          = args.suffix ?: ''
+    return options
+}
+
+//
+// Tidy up and join elements of a list to return a path string
+//
+def getPathFromList(path_list) {
+    def paths = path_list.findAll { item -> !item?.trim().isEmpty() }      // Remove empty entries
+    paths     = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes
+    return paths.join('/')
+}
+
+//
+// Function to save/publish module results
+//
+def saveFiles(Map args) {
+    def ioptions  = initOptions(args.options)
+    def path_list = [ ioptions.publish_dir ?: args.publish_dir ]
+
+    // Do not publish versions.yml unless running from pytest workflow
+    if (args.filename.equals('versions.yml') && !System.getenv("NF_CORE_MODULES_TEST")) {
+        return null
+    }
+    if (ioptions.publish_by_meta) {
+        def key_list = ioptions.publish_by_meta instanceof List ? ioptions.publish_by_meta : args.publish_by_meta
+        for (key in key_list) {
+            if (args.meta && key instanceof String) {
+                def path = key
+                if (args.meta.containsKey(key)) {
+                    path = args.meta[key] instanceof Boolean ? "${key}_${args.meta[key]}".toString() : args.meta[key]
+                }
+                path = path instanceof String ? path : ''
+                path_list.add(path)
+            }
+        }
+    }
+    if (ioptions.publish_files instanceof Map) {
+        for (ext in ioptions.publish_files) {
+            if (args.filename.endsWith(ext.key)) {
+                def ext_list = path_list.collect()
+                ext_list.add(ext.value)
+                return "${getPathFromList(ext_list)}/$args.filename"
+            }
+        }
+    } else if (ioptions.publish_files == null) {
+        return "${getPathFromList(path_list)}/$args.filename"
+    }
+}
diff --git a/modules/gunc/run/main.nf b/modules/gunc/run/main.nf
new file mode 100644
index 00000000..f873a7df
--- /dev/null
+++ b/modules/gunc/run/main.nf
@@ -0,0 +1,45 @@
+// Import generic module functions
+include { initOptions; saveFiles; getSoftwareName; getProcessName } from './functions'
+
+params.options = [:]
+options        = initOptions(params.options)
+
+process GUNC_RUN {
+    tag "$meta.id"
+    label 'process_medium'
+    publishDir "${params.outdir}",
+        mode: params.publish_dir_mode,
+        saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:meta, publish_by_meta:['id']) }
+
+    conda (params.enable_conda ? "bioconda::gunc=1.0.5" : null)
+    if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
+        container "https://depot.galaxyproject.org/singularity/gunc:1.0.5--pyhdfd78af_0"
+    } else {
+        container "quay.io/biocontainers/gunc:1.0.5--pyhdfd78af_0"
+    }
+
+    input:
+    tuple val(meta), path(fasta)
+    path(db)
+
+    output:
+    tuple val(meta), path("*maxCSS_level.tsv")                  , emit: maxcss_level_tsv
+    tuple val(meta), path("*all_levels.tsv")    , optional: true, emit: all_levels_tsv
+    path "versions.yml"                                         , emit: versions
+
+    script:
+    def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}"
+    """
+    gunc \\
+        run \\
+        --input_fasta $fasta \\
+        --db_file $db \\
+        --threads $task.cpus \\
+        $options.args
+
+    cat <<-END_VERSIONS > versions.yml
+    ${getProcessName(task.process)}:
+        ${getSoftwareName(task.process)}: \$( gunc --version )
+    END_VERSIONS
+    """
+}
diff --git a/modules/gunc/run/meta.yml b/modules/gunc/run/meta.yml
new file mode 100644
index 00000000..1dd4a8ae
--- /dev/null
+++ b/modules/gunc/run/meta.yml
@@ -0,0 +1,53 @@
+name: gunc_run
+description: Detection of Chimerism and Contamination in Prokaryotic Genomes
+keywords:
+  - prokaryote
+  - assembly
+  - genome
+  - quality control
+  - chimeras
+tools:
+  - gunc:
+      description: Python package for detection of chimerism and contamination in prokaryotic genomes.
+      homepage: https://grp-bork.embl-community.io/gunc/
+      documentation: https://grp-bork.embl-community.io/gunc/
+      tool_dev_url: https://github.com/grp-bork/gunc
+      doi: "10.1186/s13059-021-02393-0"
+      licence: ['GNU General Public v3 or later (GPL v3+)']
+
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - fasta:
+      type: file
+      description: FASTA file containing contig (bins)
+      pattern: "*.fa"
+  - db:
+      type: file
+      description: GUNC database file
+      pattern: "*.dmnd"
+
+output:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+  - maxcss_levels_tsv:
+      type: file
+      description: Output file with scores for a taxonomic level with the highest CSS score
+      pattern: "*.tsv"
+  - all_levels_tsv:
+      type: file
+      description: Optional output file with results for each taxonomic level
+      pattern: "*.tsv"
+
+authors:
+  - "@jfy133"
diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml
index 047f83ff..8d8f32f3 100644
--- a/tests/config/pytest_modules.yml
+++ b/tests/config/pytest_modules.yml
@@ -546,6 +546,14 @@ gubbins:
   - modules/gubbins/**
   - tests/modules/gubbins/**
 
+gunc/downloaddb:
+  - modules/gunc/downloaddb/**
+  - tests/modules/gunc/downloaddb/**
+
+gunc/run:
+  - modules/gunc/run/**
+  - tests/modules/gunc/run/**
+
 gunzip:
   - modules/gunzip/**
   - tests/modules/gunzip/**
diff --git a/tests/modules/gunc/downloaddb/main.nf b/tests/modules/gunc/downloaddb/main.nf
new file mode 100644
index 00000000..c0321279
--- /dev/null
+++ b/tests/modules/gunc/downloaddb/main.nf
@@ -0,0 +1,12 @@
+#!/usr/bin/env nextflow
+
+nextflow.enable.dsl = 2
+
+include { GUNC_DOWNLOADDB } from '../../../../modules/gunc/downloaddb/main.nf' addParams( options: [:] )
+
+workflow test_gunc_downloaddb {
+
+    input = 'progenomes'
+
+    GUNC_DOWNLOADDB ( input )
+}
diff --git a/tests/modules/gunc/downloaddb/test.yml b/tests/modules/gunc/downloaddb/test.yml
new file mode 100644
index 00000000..d1aafae7
--- /dev/null
+++ b/tests/modules/gunc/downloaddb/test.yml
@@ -0,0 +1,8 @@
+- name: gunc downloaddb
+  command: nextflow run ./tests/modules/gunc/downloaddb -entry test_gunc_downloaddb -c tests/config/nextflow.config
+  tags:
+    - gunc
+    - gunc/downloaddb
+  files:
+    - path: output/gunc/gunc_db_progenomes2.1.dmnd
+      md5sum: 447c9330056b02f29f30fe81fe4af4eb
diff --git a/tests/modules/gunc/run/main.nf b/tests/modules/gunc/run/main.nf
new file mode 100644
index 00000000..a1a191dc
--- /dev/null
+++ b/tests/modules/gunc/run/main.nf
@@ -0,0 +1,17 @@
+#!/usr/bin/env nextflow
+
+nextflow.enable.dsl = 2
+
+include { GUNC_RUN        } from '../../../../modules/gunc/run/main.nf' addParams( options: [:] )
+include { GUNC_DOWNLOADDB } from '../../../../modules/gunc/downloaddb/main.nf' addParams( options: [:] )
+
+
+workflow test_gunc_run {
+
+    input = [ [ id:'test', single_end:false ], // meta map
+              file(params.test_data['sarscov2']['illumina']['contigs_fasta'], checkIfExists: true) ]
+
+    GUNC_DOWNLOADDB('progenomes')
+
+    GUNC_RUN ( input, GUNC_DOWNLOADDB.out.db )
+}
diff --git a/tests/modules/gunc/run/test.yml b/tests/modules/gunc/run/test.yml
new file mode 100644
index 00000000..d527f37e
--- /dev/null
+++ b/tests/modules/gunc/run/test.yml
@@ -0,0 +1,8 @@
+- name: gunc run
+  command: nextflow run ./tests/modules/gunc/run -entry test_gunc_run -c tests/config/nextflow.config
+  tags:
+    - gunc
+    - gunc/run
+  files:
+    - path: output/gunc/GUNC.progenomes_2.1.maxCSS_level.tsv
+      md5sum: 0420c1a9f2c50fefaee9fab5d80a551a

From fc4f3e8822865d85904c1e96e93868dae7247a81 Mon Sep 17 00:00:00 2001
From: "Robert A. Petit III" <robbie.petit@gmail.com>
Date: Thu, 4 Nov 2021 01:49:30 -0600
Subject: [PATCH 15/15] add seqsero2 module (#1016)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* add seqsero2 module

* correct lint errors

* Update modules/seqsero2/main.nf

Co-authored-by: Sébastien Guizard <sguizard@ed.ac.uk>

* set output directory

Co-authored-by: Sébastien Guizard <sguizard@ed.ac.uk>
---
 modules/seqsero2/functions.nf   | 78 +++++++++++++++++++++++++++++++++
 modules/seqsero2/main.nf        | 45 +++++++++++++++++++
 modules/seqsero2/meta.yml       | 52 ++++++++++++++++++++++
 tests/config/pytest_modules.yml | 12 +++--
 tests/modules/seqsero2/main.nf  | 13 ++++++
 tests/modules/seqsero2/test.yml | 11 +++++
 6 files changed, 207 insertions(+), 4 deletions(-)
 create mode 100644 modules/seqsero2/functions.nf
 create mode 100644 modules/seqsero2/main.nf
 create mode 100644 modules/seqsero2/meta.yml
 create mode 100644 tests/modules/seqsero2/main.nf
 create mode 100644 tests/modules/seqsero2/test.yml

diff --git a/modules/seqsero2/functions.nf b/modules/seqsero2/functions.nf
new file mode 100644
index 00000000..85628ee0
--- /dev/null
+++ b/modules/seqsero2/functions.nf
@@ -0,0 +1,78 @@
+//
+//  Utility functions used in nf-core DSL2 module files
+//
+
+//
+// Extract name of software tool from process name using $task.process
+//
+def getSoftwareName(task_process) {
+    return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()
+}
+
+//
+// Extract name of module from process name using $task.process
+//
+def getProcessName(task_process) {
+    return task_process.tokenize(':')[-1]
+}
+
+//
+// Function to initialise default values and to generate a Groovy Map of available options for nf-core modules
+//
+def initOptions(Map args) {
+    def Map options = [:]
+    options.args            = args.args ?: ''
+    options.args2           = args.args2 ?: ''
+    options.args3           = args.args3 ?: ''
+    options.publish_by_meta = args.publish_by_meta ?: []
+    options.publish_dir     = args.publish_dir ?: ''
+    options.publish_files   = args.publish_files
+    options.suffix          = args.suffix ?: ''
+    return options
+}
+
+//
+// Tidy up and join elements of a list to return a path string
+//
+def getPathFromList(path_list) {
+    def paths = path_list.findAll { item -> !item?.trim().isEmpty() }      // Remove empty entries
+    paths     = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes
+    return paths.join('/')
+}
+
+//
+// Function to save/publish module results
+//
+def saveFiles(Map args) {
+    def ioptions  = initOptions(args.options)
+    def path_list = [ ioptions.publish_dir ?: args.publish_dir ]
+
+    // Do not publish versions.yml unless running from pytest workflow
+    if (args.filename.equals('versions.yml') && !System.getenv("NF_CORE_MODULES_TEST")) {
+        return null
+    }
+    if (ioptions.publish_by_meta) {
+        def key_list = ioptions.publish_by_meta instanceof List ? ioptions.publish_by_meta : args.publish_by_meta
+        for (key in key_list) {
+            if (args.meta && key instanceof String) {
+                def path = key
+                if (args.meta.containsKey(key)) {
+                    path = args.meta[key] instanceof Boolean ? "${key}_${args.meta[key]}".toString() : args.meta[key]
+                }
+                path = path instanceof String ? path : ''
+                path_list.add(path)
+            }
+        }
+    }
+    if (ioptions.publish_files instanceof Map) {
+        for (ext in ioptions.publish_files) {
+            if (args.filename.endsWith(ext.key)) {
+                def ext_list = path_list.collect()
+                ext_list.add(ext.value)
+                return "${getPathFromList(ext_list)}/$args.filename"
+            }
+        }
+    } else if (ioptions.publish_files == null) {
+        return "${getPathFromList(path_list)}/$args.filename"
+    }
+}
diff --git a/modules/seqsero2/main.nf b/modules/seqsero2/main.nf
new file mode 100644
index 00000000..3748a6e4
--- /dev/null
+++ b/modules/seqsero2/main.nf
@@ -0,0 +1,45 @@
+// Import generic module functions
+include { initOptions; saveFiles; getSoftwareName; getProcessName } from './functions'
+
+params.options = [:]
+options        = initOptions(params.options)
+
+process SEQSERO2 {
+    tag "$meta.id"
+    label 'process_low'
+    publishDir "${params.outdir}",
+        mode: params.publish_dir_mode,
+        saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:meta, publish_by_meta:['id']) }
+
+    conda (params.enable_conda ? "bioconda::seqsero2=1.2.1" : null)
+    if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
+        container "https://depot.galaxyproject.org/singularity/seqsero2:1.2.1--py_0"
+    } else {
+        container "quay.io/biocontainers/seqsero2:1.2.1--py_0"
+    }
+
+    input:
+    tuple val(meta), path(seqs)
+
+    output:
+    tuple val(meta), path("results/*_log.txt")   , emit: log
+    tuple val(meta), path("results/*_result.tsv"), emit: tsv
+    tuple val(meta), path("results/*_result.txt"), emit: txt
+    path "versions.yml"                          , emit: versions
+
+    script:
+    def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}"
+    """
+    SeqSero2_package.py \\
+        $options.args \\
+        -d results/ \\
+        -n $prefix \\
+        -p $task.cpus \\
+        -i $seqs
+
+    cat <<-END_VERSIONS > versions.yml
+    ${getProcessName(task.process)}:
+        ${getSoftwareName(task.process)}: \$( echo \$( SeqSero2_package.py --version 2>&1) | sed 's/^.*SeqSero2_package.py //' )
+    END_VERSIONS
+    """
+}
diff --git a/modules/seqsero2/meta.yml b/modules/seqsero2/meta.yml
new file mode 100644
index 00000000..ceea80e3
--- /dev/null
+++ b/modules/seqsero2/meta.yml
@@ -0,0 +1,52 @@
+name: seqsero2
+description: Salmonella serotype prediction from reads and assemblies
+keywords:
+  - fasta
+  - fastq
+  - salmonella
+  - sertotype
+tools:
+  - seqsero2:
+      description: Salmonella serotype prediction from genome sequencing data
+      homepage: https://github.com/denglab/SeqSero2
+      documentation: https://github.com/denglab/SeqSero2
+      tool_dev_url: https://github.com/denglab/SeqSero2
+      doi: "10.1128/AEM.01746-19"
+      licence: ['GPL v2']
+
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - seqs:
+      type: file
+      description: FASTQ or FASTA formated sequences
+      pattern: "*.{fq.gz,fastq.gz,fna.gz,fna,fasta.gz,fasta,fa.gz,fa}"
+
+output:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+  - log:
+      type: file
+      description: A log of serotype antigen results
+      pattern: "*_log.txt"
+  - tsv:
+      type: file
+      description: Tab-delimited summary of the SeqSero2 results
+      pattern: "*_result.tsv"
+  - txt:
+      type: file
+      description: Detailed summary of the SeqSero2 results
+      pattern: "*_result.txt"
+
+authors:
+  - "@rpetit3"
diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml
index 8d8f32f3..008c98dc 100644
--- a/tests/config/pytest_modules.yml
+++ b/tests/config/pytest_modules.yml
@@ -446,14 +446,14 @@ gatk4/fastqtosam:
   - modules/gatk4/fastqtosam/**
   - tests/modules/gatk4/fastqtosam/**
 
-gatk4/genomicsdbimport:
-  - modules/gatk4/genomicsdbimport/**
-  - tests/modules/gatk4/genomicsdbimport/**
-
 gatk4/filtermutectcalls:
   - modules/gatk4/filtermutectcalls/**
   - tests/modules/gatk4/filtermutectcalls/**
 
+gatk4/genomicsdbimport:
+  - modules/gatk4/genomicsdbimport/**
+  - tests/modules/gatk4/genomicsdbimport/**
+
 gatk4/getpileupsummaries:
   - modules/gatk4/getpileupsummaries/**
   - tests/modules/gatk4/getpileupsummaries/**
@@ -1083,6 +1083,10 @@ seqkit/split2:
   - modules/seqkit/split2/**
   - tests/modules/seqkit/split2/**
 
+seqsero2:
+  - modules/seqsero2/**
+  - tests/modules/seqsero2/**
+
 seqtk/mergepe:
   - modules/seqtk/mergepe/**
   - tests/modules/seqtk/mergepe/**
diff --git a/tests/modules/seqsero2/main.nf b/tests/modules/seqsero2/main.nf
new file mode 100644
index 00000000..04ee8e27
--- /dev/null
+++ b/tests/modules/seqsero2/main.nf
@@ -0,0 +1,13 @@
+#!/usr/bin/env nextflow
+
+nextflow.enable.dsl = 2
+
+include { SEQSERO2 } from '../../../modules/seqsero2/main.nf' addParams( options: [args: '-m k -t 4'] )
+
+workflow test_seqsero2 {
+    
+    input = [ [ id:'test', single_end:false ], // meta map
+              file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) ]
+
+    SEQSERO2 ( input )
+}
diff --git a/tests/modules/seqsero2/test.yml b/tests/modules/seqsero2/test.yml
new file mode 100644
index 00000000..2aa49686
--- /dev/null
+++ b/tests/modules/seqsero2/test.yml
@@ -0,0 +1,11 @@
+- name: seqsero2 test_seqsero2
+  command: nextflow run tests/modules/seqsero2 -entry test_seqsero2 -c tests/config/nextflow.config
+  tags:
+    - seqsero2
+  files:
+    - path: output/seqsero2/results/SeqSero_log.txt
+      md5sum: d00242dfa734b5abb3622a6048f0b4fb
+    - path: output/seqsero2/results/SeqSero_result.tsv
+      contains: ['Sample', 'Predicted', 'Note']
+    - path: output/seqsero2/results/SeqSero_result.txt
+      contains: ['Sample', 'Predicted', 'Note']