From 71945a5b5f4126593aef76abdf1a2f82aa468566 Mon Sep 17 00:00:00 2001
From: GCJMackenzie <43276267+GCJMackenzie@users.noreply.github.com>
Date: Fri, 29 Oct 2021 11:27:56 +0100
Subject: [PATCH 1/5] Mutect2 add mitochondria mode and update tests (#967)

* new mitochondria mode added, tests updated to allow for temp fix for test data

* add cram test

* bam/bam_idx renamed to input and input_index

Co-authored-by: GCJMackenzie <gavin.mackenzie@nibsc.org>
---
 modules/gatk4/mutect2/main.nf        | 41 ++++++++++----------
 modules/gatk4/mutect2/meta.yml       | 23 ++++++++---
 tests/modules/gatk4/mutect2/main.nf  | 57 ++++++++++++++++++++++++++--
 tests/modules/gatk4/mutect2/test.yml | 26 ++++++++++++-
 4 files changed, 116 insertions(+), 31 deletions(-)

diff --git a/modules/gatk4/mutect2/main.nf b/modules/gatk4/mutect2/main.nf
index 9b3f8b3f..7999eec3 100644
--- a/modules/gatk4/mutect2/main.nf
+++ b/modules/gatk4/mutect2/main.nf
@@ -19,9 +19,11 @@ process GATK4_MUTECT2 {
     }
 
     input:
-    tuple val(meta) , path(bam) , path(bai) , val(which_norm)
-    val run_single
-    val run_pon
+    tuple val(meta) , path(input) , path(input_index) , val(which_norm)
+    val  run_single
+    val  run_pon
+    val  run_mito
+    val  interval_label
     path fasta
     path fastaidx
     path dict
@@ -39,35 +41,34 @@ process GATK4_MUTECT2 {
 
     script:
     def prefix   = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}"
-    def inputsList = []
-    def normalsList = []
-    def inputsCommand = ''
-    def panelsCommand = ''
-    def normalsCommand = ''
+    def panels_command = ''
+    def normals_command = ''
 
-    bam.each() {a -> inputsList.add(" -I " + a ) }
-    inputsCommand = inputsList.join( ' ')
+    def inputs_command = '-I ' + input.join( ' -I ')
 
     if(run_pon) {
-        panelsCommand = ''
-        normalsCommand = ''
+        panels_command = ''
+        normals_command = ''
 
     } else if(run_single) {
-        panelsCommand = " --germline-resource $germline_resource --panel-of-normals $panel_of_normals"
-        normalsCommand = ''
+        panels_command = " --germline-resource $germline_resource --panel-of-normals $panel_of_normals"
+        normals_command = ''
+
+    } else if(run_mito){
+        panels_command = "-L ${interval_label} --mitochondria-mode"
+        normals_command = ''
 
     } else {
-        panelsCommand = " --germline-resource $germline_resource --panel-of-normals $panel_of_normals --f1r2-tar-gz ${prefix}.f1r2.tar.gz"
-        which_norm.each() {a -> normalsList.add(" -normal " + a ) }
-        normalsCommand = normalsList.join( ' ')
+        panels_command = " --germline-resource $germline_resource --panel-of-normals $panel_of_normals --f1r2-tar-gz ${prefix}.f1r2.tar.gz"
+        normals_command = '-normal ' + which_norm.join( ' -normal ')
     }
 
     """
     gatk Mutect2 \\
         -R ${fasta} \\
-        ${inputsCommand} \\
-        ${normalsCommand} \\
-        ${panelsCommand} \\
+        ${inputs_command} \\
+        ${normals_command} \\
+        ${panels_command} \\
         -O ${prefix}.vcf.gz \\
         $options.args
 
diff --git a/modules/gatk4/mutect2/meta.yml b/modules/gatk4/mutect2/meta.yml
index 4c38a049..44601e41 100644
--- a/modules/gatk4/mutect2/meta.yml
+++ b/modules/gatk4/mutect2/meta.yml
@@ -22,23 +22,34 @@ input:
       description: |
         Groovy Map containing sample information
         e.g. [ id:'test']
-  - bam:
+  - input:
       type: list
-      description: list of BAM files
-      pattern: "*.bam"
-  - bai:
+      description: list of BAM files, also able to take CRAM as an input
+      pattern: "*.{bam/cram}"
+  - input_index:
       type: list
-      description: list of BAM file indexes
-      pattern: "*.bam.bai"
+      description: list of BAM file indexes, also able to take CRAM indexes as an input
+      pattern: "*.{bam.bai/cram.crai}"
   - which_norm:
       type: list
       description: optional list of sample headers contained in the normal sample bam files (these are required for tumor_normal_pair mode)
+      pattern: "testN"
   - run_single:
       type: boolean
       description: Specify whether or not to run in tumor_single mode instead of tumor_normal_pair mode (will be ignored if run_pon is also true)
+      pattern: "true/false"
   - run_pon:
       type: boolean
       description: Specify whether or not to run in panel_of_normal mode instead of tumor_normal_pair mode
+      pattern: "true/false"
+  - run_mito:
+      type: boolean
+      description: Specify whether or not to run in mitochondria-mode instead of tumor_normal_pair mode
+      pattern: "true/false"
+  - interval_label:
+      type: string
+      description: Specify the label used for mitochondrial chromosome when mutect2 is run in mitochondria mode.
+      pattern: "chrM"
   - fasta:
       type: file
       description: The reference fasta file
diff --git a/tests/modules/gatk4/mutect2/main.nf b/tests/modules/gatk4/mutect2/main.nf
index 072b3125..293739e4 100644
--- a/tests/modules/gatk4/mutect2/main.nf
+++ b/tests/modules/gatk4/mutect2/main.nf
@@ -3,6 +3,8 @@
 nextflow.enable.dsl = 2
 
 include { GATK4_MUTECT2 } from '../../../../modules/gatk4/mutect2/main.nf' addParams( options: [:] )
+// used to run with the mitochondria mode setting as this increases sensitivity, allowing for some tumor_normal variants to be detected while the old test data is still in use, will be removed when new test data for sarek is available.
+include { GATK4_MUTECT2 as GATK4_TEMPFIX_MUTECT2 } from '../../../../modules/gatk4/mutect2/main.nf' addParams( options: [args: '--mitochondria-mode'] )
 
 workflow test_gatk4_mutect2_tumor_normal_pair {
     input = [ [ id:'test'], // meta map
@@ -12,6 +14,8 @@ workflow test_gatk4_mutect2_tumor_normal_pair {
             ]
     run_single = false
     run_pon = false
+    run_mito = false
+    interval_label = []
     fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
     fastaidx = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
     dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)
@@ -20,7 +24,7 @@ workflow test_gatk4_mutect2_tumor_normal_pair {
     panel_of_normals = file(params.test_data['homo_sapiens']['genome']['mills_and_1000g_indels_vcf_gz'], checkIfExists: true)
     panel_of_normals_idx = file(params.test_data['homo_sapiens']['genome']['mills_and_1000g_indels_vcf_gz_tbi'], checkIfExists: true)
 
-    GATK4_MUTECT2 ( input , run_single , run_pon , fasta , fastaidx , dict , germline_resource, germline_resource_idx , panel_of_normals , panel_of_normals_idx )
+    GATK4_TEMPFIX_MUTECT2 ( input , run_single , run_pon , run_mito , interval_label , fasta , fastaidx , dict , germline_resource, germline_resource_idx , panel_of_normals , panel_of_normals_idx )
 }
 
 workflow test_gatk4_mutect2_tumor_single {
@@ -31,6 +35,8 @@ workflow test_gatk4_mutect2_tumor_single {
             ]
     run_single = true
     run_pon = false
+    run_mito = false
+    interval_label = []
     fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
     fastaidx = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
     dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)
@@ -39,7 +45,28 @@ workflow test_gatk4_mutect2_tumor_single {
     panel_of_normals = file(params.test_data['homo_sapiens']['genome']['mills_and_1000g_indels_vcf_gz'], checkIfExists: true)
     panel_of_normals_idx = file(params.test_data['homo_sapiens']['genome']['mills_and_1000g_indels_vcf_gz_tbi'], checkIfExists: true)
 
-    GATK4_MUTECT2 ( input , run_single , run_pon , fasta , fastaidx , dict , germline_resource, germline_resource_idx , panel_of_normals , panel_of_normals_idx )
+    GATK4_MUTECT2 ( input , run_single , run_pon , run_mito , interval_label , fasta , fastaidx , dict , germline_resource, germline_resource_idx , panel_of_normals , panel_of_normals_idx )
+}
+
+workflow test_gatk4_mutect2_cram_input {
+    input = [ [ id:'test'], // meta map
+              [ file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_cram'], checkIfExists: true)],
+              [ file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_cram_crai'], checkIfExists: true)],
+              []
+            ]
+    run_single = true
+    run_pon = false
+    run_mito = false
+    interval_label = []
+    fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
+    fastaidx = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
+    dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)
+    germline_resource = file(params.test_data['homo_sapiens']['genome']['gnomad_r2_1_1_vcf_gz'], checkIfExists: true)
+    germline_resource_idx = file(params.test_data['homo_sapiens']['genome']['gnomad_r2_1_1_vcf_gz_tbi'], checkIfExists: true)
+    panel_of_normals = file(params.test_data['homo_sapiens']['genome']['mills_and_1000g_indels_vcf_gz'], checkIfExists: true)
+    panel_of_normals_idx = file(params.test_data['homo_sapiens']['genome']['mills_and_1000g_indels_vcf_gz_tbi'], checkIfExists: true)
+
+    GATK4_MUTECT2 ( input , run_single , run_pon , run_mito , interval_label , fasta , fastaidx , dict , germline_resource, germline_resource_idx , panel_of_normals , panel_of_normals_idx )
 }
 
 workflow test_gatk4_mutect2_generate_pon {
@@ -50,6 +77,8 @@ workflow test_gatk4_mutect2_generate_pon {
             ]
     run_single = false
     run_pon = true
+    run_mito = false
+    interval_label = []
     fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
     fastaidx = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
     dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)
@@ -58,5 +87,27 @@ workflow test_gatk4_mutect2_generate_pon {
     panel_of_normals = []
     panel_of_normals_idx = []
 
-    GATK4_MUTECT2 ( input , run_single , run_pon , fasta , fastaidx , dict , germline_resource, germline_resource_idx , panel_of_normals , panel_of_normals_idx )
+    GATK4_MUTECT2 ( input , run_single , run_pon, run_mito , interval_label , fasta , fastaidx , dict , germline_resource, germline_resource_idx , panel_of_normals , panel_of_normals_idx )
+}
+
+// mitochondria mode would ideally have some mitochondria test data, but since the mitochondria settings only increase detection sensitivity, we can use the chr22 data as a stand in as it is already a small dataset, the extra variants detected compared to generate_pon shows the mode is working.
+workflow test_gatk4_mutect2_mitochondria {
+    input = [ [ id:'test'], // meta map
+              [ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_bam'], checkIfExists: true)],
+              [ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_bam_bai'], checkIfExists: true)],
+              []
+            ]
+    run_single = false
+    run_pon = false
+    run_mito = true
+    interval_label = 'chr22'
+    fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
+    fastaidx = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
+    dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)
+    germline_resource = []
+    germline_resource_idx = []
+    panel_of_normals = []
+    panel_of_normals_idx = []
+
+    GATK4_MUTECT2 ( input , run_single , run_pon, run_mito , interval_label , fasta , fastaidx , dict , germline_resource, germline_resource_idx , panel_of_normals , panel_of_normals_idx )
 }
diff --git a/tests/modules/gatk4/mutect2/test.yml b/tests/modules/gatk4/mutect2/test.yml
index 16f39875..031ed072 100644
--- a/tests/modules/gatk4/mutect2/test.yml
+++ b/tests/modules/gatk4/mutect2/test.yml
@@ -7,7 +7,7 @@
     - path: output/gatk4/test.f1r2.tar.gz
     - path: output/gatk4/test.vcf.gz
     - path: output/gatk4/test.vcf.gz.stats
-      md5sum: 6ecb874e6a95aa48233587b876c2a7a9
+      md5sum: 887d54e393510f1d0aa2c33bc6155161
     - path: output/gatk4/test.vcf.gz.tbi
 
 - name: gatk4 mutect2 test_gatk4_mutect2_tumor_single
@@ -18,7 +18,18 @@
   files:
     - path: output/gatk4/test.vcf.gz
     - path: output/gatk4/test.vcf.gz.stats
-      md5sum: e7ef613f7d158b8a0adf44abe5db2029
+      md5sum: 106c5828b02b906c97922618b6072169
+    - path: output/gatk4/test.vcf.gz.tbi
+
+- name: gatk4 mutect2 test_gatk4_mutect2_cram_input
+  command: nextflow run tests/modules/gatk4/mutect2 -entry test_gatk4_mutect2_cram_input -c tests/config/nextflow.config
+  tags:
+    - gatk4
+    - gatk4/mutect2
+  files:
+    - path: output/gatk4/test.vcf.gz
+    - path: output/gatk4/test.vcf.gz.stats
+      md5sum: 106c5828b02b906c97922618b6072169
     - path: output/gatk4/test.vcf.gz.tbi
 
 - name: gatk4 mutect2 test_gatk4_mutect2_generate_pon
@@ -31,3 +42,14 @@
     - path: output/gatk4/test.vcf.gz.stats
       md5sum: 4f77301a125913170b8e9e7828b4ca3f
     - path: output/gatk4/test.vcf.gz.tbi
+
+- name: gatk4 mutect2 test_gatk4_mutect2_mitochondria
+  command: nextflow run tests/modules/gatk4/mutect2 -entry test_gatk4_mutect2_mitochondria -c tests/config/nextflow.config
+  tags:
+    - gatk4
+    - gatk4/mutect2
+  files:
+    - path: output/gatk4/test.vcf.gz
+    - path: output/gatk4/test.vcf.gz.stats
+      md5sum: fc6ea14ca2da346babe78161beea28c9
+    - path: output/gatk4/test.vcf.gz.tbi

From ac1e6df076195cec553a2079c9cebd94026a0d47 Mon Sep 17 00:00:00 2001
From: FriederikeHanssen <Friederike.hanssen@qbic.uni-tuebingen.de>
Date: Fri, 29 Oct 2021 13:01:05 +0200
Subject: [PATCH 2/5] Update to allow cram + update needed to use the gatk4
 modules in sarek (#976)

* Make samtools/merge cram compliant

* samtools/stats cram compliance

* update yml file

* samtools/view to deal with crams

* Update tests to make sure cram works

* also fix tmp dir and min mem in one go

* basequalityrecal test for cram + min mem + tmpdir

* update haplotypecaller for sarek

* update haplotype yml

* update markdup to allow multiple bams, take out params to be passed with options.args

* remove TODO statement

* Remove variable md5sum

* add emtpy input to stats module in subworkflows

* subworkflows seem to work now on my side

* Apply code review

Co-authored-by: Maxime U. Garcia <maxime.garcia@scilifelab.se>

* replace bam with input to be more inclusive

* rename everywhere

* rename input

* remove variable checksum

Co-authored-by: Maxime U. Garcia <maxime.garcia@scilifelab.se>
---
 modules/gatk4/applybqsr/main.nf               | 10 +++++--
 modules/gatk4/applybqsr/meta.yml              | 10 +++++--
 modules/gatk4/baserecalibrator/main.nf        | 11 +++++--
 modules/gatk4/baserecalibrator/meta.yml       | 11 +++++--
 modules/gatk4/haplotypecaller/main.nf         | 18 +++++++----
 modules/gatk4/haplotypecaller/meta.yml        | 23 ++++++++++----
 modules/gatk4/markduplicates/main.nf          | 13 ++++++--
 modules/gatk4/markduplicates/meta.yml         |  1 +
 modules/manta/germline/main.nf                |  4 +--
 modules/manta/germline/meta.yml               |  4 +--
 modules/manta/somatic/main.nf                 |  6 ++--
 modules/manta/somatic/meta.yml                |  8 ++---
 modules/manta/tumoronly/main.nf               |  4 +--
 modules/manta/tumoronly/meta.yml              |  5 ++--
 modules/samtools/merge/main.nf                | 12 +++++---
 modules/samtools/merge/meta.yml               | 17 ++++++++---
 modules/samtools/stats/main.nf                |  6 ++--
 modules/samtools/stats/meta.yml               | 21 ++++++++-----
 modules/samtools/view/main.nf                 | 12 +++++---
 modules/samtools/view/meta.yml                | 15 ++++++++--
 modules/strelka/germline/main.nf              |  4 +--
 modules/strelka/germline/meta.yml             | 12 ++++----
 modules/strelka/somatic/main.nf               |  6 ++--
 modules/strelka/somatic/meta.yml              |  8 ++---
 .../nf-core/bam_stats_samtools/main.nf        |  2 +-
 tests/modules/gatk4/applybqsr/main.nf         | 14 +++++++++
 tests/modules/gatk4/applybqsr/test.yml        | 17 ++++++++---
 tests/modules/gatk4/baserecalibrator/main.nf  | 15 ++++++++++
 tests/modules/gatk4/baserecalibrator/test.yml | 15 ++++++++--
 tests/modules/gatk4/haplotypecaller/main.nf   | 30 ++++++++++++++++++-
 tests/modules/gatk4/haplotypecaller/test.yml  | 25 ++++++++++++----
 tests/modules/gatk4/markduplicates/main.nf    |  9 ++++++
 tests/modules/gatk4/markduplicates/test.yml   | 19 ++++++++++--
 tests/modules/samtools/merge/main.nf          | 12 +++++++-
 tests/modules/samtools/merge/test.yml         | 14 +++++++--
 tests/modules/samtools/stats/main.nf          | 12 +++++++-
 tests/modules/samtools/stats/test.yml         | 15 ++++++++--
 tests/modules/samtools/view/main.nf           | 13 ++++++--
 tests/modules/samtools/view/test.yml          | 12 ++++++--
 39 files changed, 356 insertions(+), 109 deletions(-)

diff --git a/modules/gatk4/applybqsr/main.nf b/modules/gatk4/applybqsr/main.nf
index e804bcff..508a29ca 100644
--- a/modules/gatk4/applybqsr/main.nf
+++ b/modules/gatk4/applybqsr/main.nf
@@ -19,7 +19,7 @@ process GATK4_APPLYBQSR {
     }
 
     input:
-    tuple val(meta), path(bam), path(bai), path(bqsr_table)
+    tuple val(meta), path(input), path(input_index), path(bqsr_table)
     path  fasta
     path  fastaidx
     path  dict
@@ -32,12 +32,18 @@ process GATK4_APPLYBQSR {
     script:
     def prefix   = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}"
     def interval = intervals ? "-L ${intervals}" : ""
+    if (!task.memory) {
+        log.info '[GATK ApplyBQSR] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.'
+    } else {
+        avail_mem = task.memory.giga
+    }
     """
     gatk ApplyBQSR \\
         -R $fasta \\
-        -I $bam \\
+        -I $input \\
         --bqsr-recal-file $bqsr_table \\
         $interval \\
+        --tmp-dir . \\
         -O ${prefix}.bam \\
         $options.args
 
diff --git a/modules/gatk4/applybqsr/meta.yml b/modules/gatk4/applybqsr/meta.yml
index e09e8c52..b002dca6 100644
--- a/modules/gatk4/applybqsr/meta.yml
+++ b/modules/gatk4/applybqsr/meta.yml
@@ -20,10 +20,14 @@ input:
       description: |
         Groovy Map containing sample information
         e.g. [ id:'test', single_end:false ]
-  - bam:
+  - input:
       type: file
-      description: BAM file from alignment
-      pattern: "*.{bam}"
+      description: BAM/CRAM file from alignment
+      pattern: "*.{bam,cram}"
+  - input_index:
+      type: file
+      description: BAI/CRAI file from alignment
+      pattern: "*.{bai,crai}"
   - bqsr_table:
       type: file
       description: Recalibration table from gatk4_baserecalibrator
diff --git a/modules/gatk4/baserecalibrator/main.nf b/modules/gatk4/baserecalibrator/main.nf
index 6033fbf1..85c30daf 100644
--- a/modules/gatk4/baserecalibrator/main.nf
+++ b/modules/gatk4/baserecalibrator/main.nf
@@ -19,7 +19,7 @@ process GATK4_BASERECALIBRATOR {
     }
 
     input:
-    tuple val(meta), path(bam), path(bai)
+    tuple val(meta), path(input), path(input_index)
     path fasta
     path fastaidx
     path dict
@@ -35,12 +35,19 @@ process GATK4_BASERECALIBRATOR {
     def prefix   = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}"
     def intervalsCommand = intervalsBed ? "-L ${intervalsBed}" : ""
     def sitesCommand = knownSites.collect{"--known-sites ${it}"}.join(' ')
+
+    if (!task.memory) {
+        log.info '[GATK BaseRecalibrator] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.'
+    } else {
+        avail_mem = task.memory.giga
+    }
     """
     gatk BaseRecalibrator  \
         -R $fasta \
-        -I $bam \
+        -I $input \
         $sitesCommand \
         $intervalsCommand \
+        --tmp-dir . \
         $options.args \
         -O ${prefix}.table
 
diff --git a/modules/gatk4/baserecalibrator/meta.yml b/modules/gatk4/baserecalibrator/meta.yml
index d579d9e5..7fd273e1 100644
--- a/modules/gatk4/baserecalibrator/meta.yml
+++ b/modules/gatk4/baserecalibrator/meta.yml
@@ -20,10 +20,14 @@ input:
       description: |
         Groovy Map containing sample information
         e.g. [ id:'test', single_end:false ]
-  - bam:
+  - input:
       type: file
-      description: BAM file from alignment
-      pattern: "*.{bam}"
+      description: BAM/CRAM file from alignment
+      pattern: "*.{bam,cram}"
+  - input_index:
+      type: file
+      description: BAI/CRAI file from alignment
+      pattern: "*.{bai,crai}"
   - fasta:
       type: file
       description: The reference fasta file
@@ -57,3 +61,4 @@ output:
 
 authors:
   - "@yocra3"
+  - "@FriederikeHanssen"
diff --git a/modules/gatk4/haplotypecaller/main.nf b/modules/gatk4/haplotypecaller/main.nf
index 01b71ccb..4bddbb6d 100644
--- a/modules/gatk4/haplotypecaller/main.nf
+++ b/modules/gatk4/haplotypecaller/main.nf
@@ -19,10 +19,13 @@ process GATK4_HAPLOTYPECALLER {
     }
 
     input:
-    tuple val(meta), path(bam), path(bai)
+    tuple val(meta), path(input), path(input_index)
     path fasta
     path fai
     path dict
+    path dbsnp
+    path dbsnp_tbi
+    path interval
 
     output:
     tuple val(meta), path("*.vcf.gz"), emit: vcf
@@ -30,8 +33,10 @@ process GATK4_HAPLOTYPECALLER {
     path "versions.yml"              , emit: versions
 
     script:
-    def prefix    = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}"
-    def avail_mem = 3
+    def prefix          = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}"
+    def interval_option = interval ? "-L ${interval}" : ""
+    def dbsnp_option    = dbsnp ? "-D ${dbsnp}" : ""
+    def avail_mem       = 3
     if (!task.memory) {
         log.info '[GATK HaplotypeCaller] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.'
     } else {
@@ -42,9 +47,12 @@ process GATK4_HAPLOTYPECALLER {
         --java-options "-Xmx${avail_mem}g" \\
         HaplotypeCaller \\
         -R $fasta \\
-        -I $bam \\
+        -I $input \\
+        ${dbsnp_option} \\
+        ${interval_option} \\
         -O ${prefix}.vcf.gz \\
-        $options.args
+        $options.args \\
+        --tmp-dir .
 
     cat <<-END_VERSIONS > versions.yml
     ${getProcessName(task.process)}:
diff --git a/modules/gatk4/haplotypecaller/meta.yml b/modules/gatk4/haplotypecaller/meta.yml
index 6a1bd7ed..6c9d0891 100644
--- a/modules/gatk4/haplotypecaller/meta.yml
+++ b/modules/gatk4/haplotypecaller/meta.yml
@@ -21,14 +21,14 @@ input:
       description: |
         Groovy Map containing sample information
         e.g. [ id:'test', single_end:false ]
-  - bam:
+  - input:
       type: file
-      description: BAM file
-      pattern: "*.bam"
-  - bai:
+      description: BAM/CRAM file from alignment
+      pattern: "*.{bam,cram}"
+  - input_index:
       type: file
-      description: Index of BAM file
-      pattern: "*.bam.bai"
+      description: BAI/CRAI file from alignment
+      pattern: "*.{bai,crai}"
   - fasta:
       type: file
       description: The reference fasta file
@@ -41,6 +41,16 @@ input:
       type: file
       description: GATK sequence dictionary
       pattern: "*.dict"
+  - dbsnp:
+      type: file
+      description: VCF file containing known sites (optional)
+  - dbsnp_tbi:
+      type: file
+      description: VCF index of dbsnp (optional)
+  - interval:
+      type: file
+      description: Bed file with the genomic regions included in the library (optional)
+
 output:
   - meta:
       type: map
@@ -62,3 +72,4 @@ output:
 
 authors:
   - "@suzannejin"
+  - "@FriederikeHanssen"
diff --git a/modules/gatk4/markduplicates/main.nf b/modules/gatk4/markduplicates/main.nf
index 8f94f4dd..b1ff5222 100644
--- a/modules/gatk4/markduplicates/main.nf
+++ b/modules/gatk4/markduplicates/main.nf
@@ -19,21 +19,28 @@ process GATK4_MARKDUPLICATES {
     }
 
     input:
-    tuple val(meta), path(bam)
+    tuple val(meta), path(bams)
 
     output:
     tuple val(meta), path("*.bam")    , emit: bam
+    tuple val(meta), path("*.bai")    , emit: bai
     tuple val(meta), path("*.metrics"), emit: metrics
     path "versions.yml"               , emit: versions
 
     script:
     def prefix   = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}"
+    def bam_list = bams.collect(){ bam -> "--INPUT ".concat(bam.toString()) }.join(" ")
+    def avail_mem       = 3
+    if (!task.memory) {
+        log.info '[GATK HaplotypeCaller] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.'
+    } else {
+        avail_mem = task.memory.giga
+    }
     """
     gatk MarkDuplicates \\
-        --INPUT $bam \\
+        $bam_list \\
         --METRICS_FILE ${prefix}.metrics \\
         --TMP_DIR . \\
-        --ASSUME_SORT_ORDER coordinate \\
         --CREATE_INDEX true \\
         --OUTPUT ${prefix}.bam \\
         $options.args
diff --git a/modules/gatk4/markduplicates/meta.yml b/modules/gatk4/markduplicates/meta.yml
index 59aaad4d..5777067a 100644
--- a/modules/gatk4/markduplicates/meta.yml
+++ b/modules/gatk4/markduplicates/meta.yml
@@ -47,3 +47,4 @@ output:
 
 authors:
   - "@ajodeh-juma"
+  - "@FriederikeHanssen"
diff --git a/modules/manta/germline/main.nf b/modules/manta/germline/main.nf
index ca2ac9dc..f957a7ec 100644
--- a/modules/manta/germline/main.nf
+++ b/modules/manta/germline/main.nf
@@ -19,7 +19,7 @@ process MANTA_GERMLINE {
     }
 
     input:
-    tuple val(meta), path(cram), path(crai)
+    tuple val(meta), path(input), path(input_index)
     path fasta
     path fai
     path target_bed
@@ -39,7 +39,7 @@ process MANTA_GERMLINE {
     def options_manta = target_bed ? "--exome --callRegions $target_bed" : ""
     """
     configManta.py \
-        --bam $cram \
+        --bam $input \
         --reference $fasta \
         $options_manta \
         --runDir manta
diff --git a/modules/manta/germline/meta.yml b/modules/manta/germline/meta.yml
index 7933fd6c..3bdb8264 100644
--- a/modules/manta/germline/meta.yml
+++ b/modules/manta/germline/meta.yml
@@ -23,11 +23,11 @@ input:
       description: |
         Groovy Map containing sample information
         e.g. [ id:'test', single_end:false ]
-  - cram:
+  - input:
       type: file
       description: BAM/CRAM/SAM file
       pattern: "*.{bam,cram,sam}"
-  - crai:
+  - input_index:
       type: file
       description: BAM/CRAM/SAM index file
       pattern: "*.{bai,crai,sai}"
diff --git a/modules/manta/somatic/main.nf b/modules/manta/somatic/main.nf
index 16a30f17..f912d478 100644
--- a/modules/manta/somatic/main.nf
+++ b/modules/manta/somatic/main.nf
@@ -19,7 +19,7 @@ process MANTA_SOMATIC {
     }
 
     input:
-    tuple val(meta), path(cram_normal), path(crai_normal), path(cram_tumor), path(crai_tumor)
+    tuple val(meta), path(input_normal), path(input_index_normal), path(input_tumor), path(input_index_tumor)
     path fasta
     path fai
     path target_bed
@@ -42,8 +42,8 @@ process MANTA_SOMATIC {
 
     """
     configManta.py \
-        --tumorBam $cram_tumor \
-        --normalBam $cram_normal \
+        --tumorBam $input_tumor \
+        --normalBam $input_normal \
         --reference $fasta \
         $options_manta \
         --runDir manta
diff --git a/modules/manta/somatic/meta.yml b/modules/manta/somatic/meta.yml
index 08103ba7..ddd0eafe 100644
--- a/modules/manta/somatic/meta.yml
+++ b/modules/manta/somatic/meta.yml
@@ -23,19 +23,19 @@ input:
       description: |
         Groovy Map containing sample information
         e.g. [ id:'test', single_end:false ]
-  - cram_normal:
+  - input_normal:
       type: file
       description: BAM/CRAM/SAM file
       pattern: "*.{bam,cram,sam}"
-  - crai_normal:
+  - input_index_normal:
       type: file
       description: BAM/CRAM/SAM index file
       pattern: "*.{bai,crai,sai}"
-  - cram_tumor:
+  - input_tumor:
       type: file
       description: BAM/CRAM/SAM file
       pattern: "*.{bam,cram,sam}"
-  - crai_tumor:
+  - input_index_tumor:
       type: file
       description: BAM/CRAM/SAM index file
       pattern: "*.{bai,crai,sai}"
diff --git a/modules/manta/tumoronly/main.nf b/modules/manta/tumoronly/main.nf
index a86279df..f20e8128 100644
--- a/modules/manta/tumoronly/main.nf
+++ b/modules/manta/tumoronly/main.nf
@@ -19,7 +19,7 @@ process MANTA_TUMORONLY {
     }
 
     input:
-    tuple val(meta), path(cram), path(crai)
+    tuple val(meta), path(input), path(input_index)
     path fasta
     path fai
     path target_bed
@@ -39,7 +39,7 @@ process MANTA_TUMORONLY {
     def options_manta = target_bed ? "--exome --callRegions $target_bed" : ""
     """
     configManta.py \
-        --tumorBam $cram \
+        --tumorBam $input \
         --reference $fasta \
         $options_manta \
         --runDir manta
diff --git a/modules/manta/tumoronly/meta.yml b/modules/manta/tumoronly/meta.yml
index d4af9402..86d1c6c0 100644
--- a/modules/manta/tumoronly/meta.yml
+++ b/modules/manta/tumoronly/meta.yml
@@ -23,11 +23,11 @@ input:
       description: |
         Groovy Map containing sample information
         e.g. [ id:'test', single_end:false ]
-  - cram:
+  - input:
       type: file
       description: BAM/CRAM/SAM file
       pattern: "*.{bam,cram,sam}"
-  - crai:
+  - input_index:
       type: file
       description: BAM/CRAM/SAM index file
       pattern: "*.{bai,crai,sai}"
@@ -54,7 +54,6 @@ output:
       description: |
         Groovy Map containing sample information
         e.g. [ id:'test', single_end:false ]
-
   - candidate_small_indels_vcf:
       type: file
       description: Gzipped VCF file containing variants
diff --git a/modules/samtools/merge/main.nf b/modules/samtools/merge/main.nf
index 34c40d57..fefb423b 100644
--- a/modules/samtools/merge/main.nf
+++ b/modules/samtools/merge/main.nf
@@ -19,16 +19,20 @@ process SAMTOOLS_MERGE {
     }
 
     input:
-    tuple val(meta), path(bams)
+    tuple val(meta), path(input_files)
+    path fasta
 
     output:
-    tuple val(meta), path("${prefix}.bam"), emit: bam
-    path  "versions.yml"                  , emit: versions
+    tuple val(meta), path("${prefix}.bam"),  optional:true, emit: bam
+    tuple val(meta), path("${prefix}.cram"), optional:true, emit: cram
+    path  "versions.yml"                                  , emit: versions
 
     script:
     prefix   = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}"
+    def file_type = input_files[0].getExtension()
+    def reference = fasta ? "--reference ${fasta}" : ""
     """
-    samtools merge ${prefix}.bam $bams
+    samtools merge ${reference} ${prefix}.${file_type} $input_files
     cat <<-END_VERSIONS > versions.yml
     ${getProcessName(task.process)}:
         ${getSoftwareName(task.process)}: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
diff --git a/modules/samtools/merge/meta.yml b/modules/samtools/merge/meta.yml
index 78b75b36..2576a3a3 100644
--- a/modules/samtools/merge/meta.yml
+++ b/modules/samtools/merge/meta.yml
@@ -1,5 +1,5 @@
 name: samtools_merge
-description: Merge BAM file
+description: Merge BAM or CRAM file
 keywords:
     - merge
     - bam
@@ -21,20 +21,28 @@ input:
         description: |
             Groovy Map containing sample information
             e.g. [ id:'test', single_end:false ]
-    - bam:
+    - input_files:
         type: file
-        description: BAM file
+        description: BAM/CRAM file
         pattern: "*.{bam,cram,sam}"
+    - fasta:
+        type: optional file
+        description: Reference file the CRAM was created with
+        pattern: "*.{fasta,fa}"
 output:
     - meta:
         type: map
         description: |
             Groovy Map containing sample information
             e.g. [ id:'test', single_end:false ]
-    - merged_bam:
+    - bam:
         type: file
         description: BAM file
         pattern: "*.{bam}"
+    - cram:
+        type: file
+        description: CRAM file
+        pattern: "*.{cram}"
     - versions:
         type: file
         description: File containing software versions
@@ -43,3 +51,4 @@ authors:
     - "@drpatelh"
     - "@yuukiiwa "
     - "@maxulysse"
+    - "@FriederikeHanssen"
diff --git a/modules/samtools/stats/main.nf b/modules/samtools/stats/main.nf
index 6218dd2d..aab43410 100644
--- a/modules/samtools/stats/main.nf
+++ b/modules/samtools/stats/main.nf
@@ -19,15 +19,17 @@ process SAMTOOLS_STATS {
     }
 
     input:
-    tuple val(meta), path(bam), path(bai)
+    tuple val(meta), path(input), path(input_index)
+    path fasta
 
     output:
     tuple val(meta), path("*.stats"), emit: stats
     path  "versions.yml"            , emit: versions
 
     script:
+    def reference = fasta ? "--reference ${fasta}" : ""
     """
-    samtools stats $bam > ${bam}.stats
+    samtools stats ${reference} ${input} > ${input}.stats
     cat <<-END_VERSIONS > versions.yml
     ${getProcessName(task.process)}:
         ${getSoftwareName(task.process)}: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
diff --git a/modules/samtools/stats/meta.yml b/modules/samtools/stats/meta.yml
index ae41498a..869e62e3 100644
--- a/modules/samtools/stats/meta.yml
+++ b/modules/samtools/stats/meta.yml
@@ -22,14 +22,18 @@ input:
         description: |
             Groovy Map containing sample information
             e.g. [ id:'test', single_end:false ]
-    - bam:
-        type: file
-        description: BAM/CRAM/SAM file
-        pattern: "*.{bam,cram,sam}"
-    - bai:
-        type: file
-        description: Index for BAM/CRAM/SAM file
-        pattern: "*.{bai,crai,sai}"
+    - input:
+      type: file
+      description: BAM/CRAM file from alignment
+      pattern: "*.{bam,cram}"
+    - input_index:
+      type: file
+      description: BAI/CRAI file from alignment
+      pattern: "*.{bai,crai}"
+    - fasta:
+        type: optional file
+        description: Reference file the CRAM was created with
+        pattern: "*.{fasta,fa}"
 output:
     - meta:
         type: map
@@ -46,3 +50,4 @@ output:
         pattern: "versions.yml"
 authors:
     - "@drpatelh"
+    - "@FriederikeHanssen"
diff --git a/modules/samtools/view/main.nf b/modules/samtools/view/main.nf
index ec1663e0..b7a047ee 100644
--- a/modules/samtools/view/main.nf
+++ b/modules/samtools/view/main.nf
@@ -19,16 +19,20 @@ process SAMTOOLS_VIEW {
     }
 
     input:
-    tuple val(meta), path(bam)
+    tuple val(meta), path(input)
+    path fasta
 
     output:
-    tuple val(meta), path("*.bam"), emit: bam
-    path  "versions.yml"          , emit: versions
+    tuple val(meta), path("*.bam") , optional: true, emit: bam
+    tuple val(meta), path("*.cram"), optional: true, emit: cram
+    path  "versions.yml"                           , emit: versions
 
     script:
     def prefix   = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}"
+    def reference = fasta ? "--reference ${fasta} -C" : ""
+    def file_type = input.getExtension()
     """
-    samtools view $options.args $bam > ${prefix}.bam
+    samtools view ${reference} $options.args $input > ${prefix}.${file_type}
     cat <<-END_VERSIONS > versions.yml
     ${getProcessName(task.process)}:
         ${getSoftwareName(task.process)}: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
diff --git a/modules/samtools/view/meta.yml b/modules/samtools/view/meta.yml
index 29d1ecc1..8abf34af 100644
--- a/modules/samtools/view/meta.yml
+++ b/modules/samtools/view/meta.yml
@@ -21,10 +21,14 @@ input:
         description: |
             Groovy Map containing sample information
             e.g. [ id:'test', single_end:false ]
-    - bam:
+    - input:
         type: file
         description: BAM/CRAM/SAM file
         pattern: "*.{bam,cram,sam}"
+    - fasta:
+        type: optional file
+        description: Reference file the CRAM was created with
+        pattern: "*.{fasta,fa}"
 output:
     - meta:
         type: map
@@ -33,8 +37,12 @@ output:
             e.g. [ id:'test', single_end:false ]
     - bam:
         type: file
-        description: filtered/converted BAM/CRAM/SAM file
-        pattern: "*.{bam,cram,sam}"
+        description: filtered/converted BAM/SAM file
+        pattern: "*.{bam,sam}"
+    - cram:
+        type: file
+        description: filtered/converted CRAM file
+        pattern: "*.cram"
     - versions:
         type: file
         description: File containing software versions
@@ -42,3 +50,4 @@ output:
 authors:
     - "@drpatelh"
     - "@joseespinosa"
+    - "@FriederikeHanssen"
diff --git a/modules/strelka/germline/main.nf b/modules/strelka/germline/main.nf
index 0d201940..5e913c40 100644
--- a/modules/strelka/germline/main.nf
+++ b/modules/strelka/germline/main.nf
@@ -19,7 +19,7 @@ process STRELKA_GERMLINE {
     }
 
     input:
-    tuple val(meta), path(bam), path(bai)
+    tuple val(meta), path(input), path(input_index)
     path  fasta
     path  fai
     path  target_bed
@@ -38,7 +38,7 @@ process STRELKA_GERMLINE {
     def regions  = target_bed ? "--exome --callRegions ${target_bed}" : ""
     """
     configureStrelkaGermlineWorkflow.py \\
-        --bam $bam \\
+        --bam $input \\
         --referenceFasta $fasta \\
         $regions \\
         $options.args \\
diff --git a/modules/strelka/germline/meta.yml b/modules/strelka/germline/meta.yml
index 3f86b045..2eeb0f8f 100644
--- a/modules/strelka/germline/meta.yml
+++ b/modules/strelka/germline/meta.yml
@@ -21,14 +21,14 @@ input:
       description: |
         Groovy Map containing sample information
         e.g. [ id:'test']
-  - bam:
+  - input:
       type: file
-      description: BAM file
-      pattern: "*.{bam}"
-  - bai:
+      description: BAM/CRAM file
+      pattern: "*.{bam,cram}"
+  - input_index:
       type: file
-      description: BAM index file
-      pattern: "*.{bai}"
+      description: BAM/CRAI index file
+      pattern: "*.{bai,crai}"
   - target_bed:
       type: file
       description: An optional bed file
diff --git a/modules/strelka/somatic/main.nf b/modules/strelka/somatic/main.nf
index 02bd5822..633b0a2c 100644
--- a/modules/strelka/somatic/main.nf
+++ b/modules/strelka/somatic/main.nf
@@ -19,7 +19,7 @@ process STRELKA_SOMATIC {
     }
 
     input:
-    tuple val(meta), path(cram_normal), path(crai_normal), path(cram_tumor), path(crai_tumor),  path(manta_candidate_small_indels), path(manta_candidate_small_indels_tbi)
+    tuple val(meta), path(input_normal), path(input_index_normal), path(input_tumor), path(input_index_tumor),  path(manta_candidate_small_indels), path(manta_candidate_small_indels_tbi)
     path  fasta
     path  fai
     path  target_bed
@@ -38,8 +38,8 @@ process STRELKA_SOMATIC {
     def options_manta = manta_candidate_small_indels ? "--indelCandidates ${manta_candidate_small_indels}" : ""
     """
     configureStrelkaSomaticWorkflow.py \\
-        --tumor $cram_tumor \\
-        --normal $cram_normal \\
+        --tumor $input_tumor \\
+        --normal $input_normal \\
         --referenceFasta $fasta \\
         $options_target_bed \\
         $options_manta \\
diff --git a/modules/strelka/somatic/meta.yml b/modules/strelka/somatic/meta.yml
index ce5acb33..076c1036 100644
--- a/modules/strelka/somatic/meta.yml
+++ b/modules/strelka/somatic/meta.yml
@@ -21,19 +21,19 @@ input:
       description: |
         Groovy Map containing sample information
         e.g. [ id:'test', single_end:false ]
-  - cram_normal:
+  - input_normal:
       type: file
       description: BAM/CRAM/SAM file
       pattern: "*.{bam,cram,sam}"
-  - crai_normal:
+  - input_index_normal:
       type: file
       description: BAM/CRAM/SAM index file
       pattern: "*.{bai,crai,sai}"
-  - cram_tumor:
+  - input_tumor:
       type: file
       description: BAM/CRAM/SAM file
       pattern: "*.{bam,cram,sam}"
-  - crai_tumor:
+  - input_index_tumor:
       type: file
       description: BAM/CRAM/SAM index file
       pattern: "*.{bai,crai,sai}"
diff --git a/subworkflows/nf-core/bam_stats_samtools/main.nf b/subworkflows/nf-core/bam_stats_samtools/main.nf
index 9276232c..463ec99d 100644
--- a/subworkflows/nf-core/bam_stats_samtools/main.nf
+++ b/subworkflows/nf-core/bam_stats_samtools/main.nf
@@ -15,7 +15,7 @@ workflow BAM_STATS_SAMTOOLS {
     main:
     ch_versions = Channel.empty()
 
-    SAMTOOLS_STATS ( ch_bam_bai )
+    SAMTOOLS_STATS ( ch_bam_bai, [] )
     ch_versions = ch_versions.mix(SAMTOOLS_STATS.out.versions.first())
 
     SAMTOOLS_FLAGSTAT ( ch_bam_bai )
diff --git a/tests/modules/gatk4/applybqsr/main.nf b/tests/modules/gatk4/applybqsr/main.nf
index 5fb590b0..80b51015 100644
--- a/tests/modules/gatk4/applybqsr/main.nf
+++ b/tests/modules/gatk4/applybqsr/main.nf
@@ -30,3 +30,17 @@ workflow test_gatk4_applybqsr_intervals {
 
   GATK4_APPLYBQSR ( input, fasta, fai, dict, intervals )
 }
+
+workflow test_gatk4_applybqsr_cram {
+    input = [ [ id:'test' ], // meta map
+                file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true),
+                file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram_crai'], checkIfExists: true),
+                file(params.test_data['homo_sapiens']['illumina']['test_baserecalibrator_table'], checkIfExists: true)
+              ]
+    fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
+    fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
+    dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)
+    intervals = file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true)
+
+  GATK4_APPLYBQSR ( input, fasta, fai, dict, intervals )
+}
diff --git a/tests/modules/gatk4/applybqsr/test.yml b/tests/modules/gatk4/applybqsr/test.yml
index 983cc09a..ed89c6ff 100644
--- a/tests/modules/gatk4/applybqsr/test.yml
+++ b/tests/modules/gatk4/applybqsr/test.yml
@@ -1,17 +1,26 @@
 - name: gatk4 applybqsr test_gatk4_applybqsr
   command: nextflow run tests/modules/gatk4/applybqsr -entry test_gatk4_applybqsr -c tests/config/nextflow.config
   tags:
-    - gatk4
     - gatk4/applybqsr
+    - gatk4
   files:
     - path: output/gatk4/test.bam
-      md5sum: dac716c394db5e83c12b44355c098ca7
+      md5sum: 87a2eabae2b7b41574f966612b5addae
 
 - name: gatk4 applybqsr test_gatk4_applybqsr_intervals
   command: nextflow run tests/modules/gatk4/applybqsr -entry test_gatk4_applybqsr_intervals -c tests/config/nextflow.config
   tags:
-    - gatk4
     - gatk4/applybqsr
+    - gatk4
   files:
     - path: output/gatk4/test.bam
-      md5sum: 400441dbe5344658580ba0a24ba57069
+      md5sum: 9c015d3c1dbd9eee793b7386f432b6aa
+
+- name: gatk4 applybqsr test_gatk4_applybqsr_cram
+  command: nextflow run tests/modules/gatk4/applybqsr -entry test_gatk4_applybqsr_cram -c tests/config/nextflow.config
+  tags:
+    - gatk4/applybqsr
+    - gatk4
+  files:
+    - path: output/gatk4/test.bam
+      md5sum: 02f84815fdbc99c21c8d42ebdcabbbf7
diff --git a/tests/modules/gatk4/baserecalibrator/main.nf b/tests/modules/gatk4/baserecalibrator/main.nf
index 671a1d67..a50c09e3 100644
--- a/tests/modules/gatk4/baserecalibrator/main.nf
+++ b/tests/modules/gatk4/baserecalibrator/main.nf
@@ -18,6 +18,21 @@ workflow test_gatk4_baserecalibrator {
     GATK4_BASERECALIBRATOR ( input, fasta, fai, dict, [], sites, sites_tbi )
 }
 
+workflow test_gatk4_baserecalibrator_cram {
+   input = [ [ id:'test' ], // meta map
+                file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true),
+                file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram_crai'], checkIfExists: true),
+                file(params.test_data['homo_sapiens']['illumina']['test_baserecalibrator_table'], checkIfExists: true)
+              ]
+    fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
+    fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
+    dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)
+    sites = file(params.test_data['homo_sapiens']['genome']['dbsnp_146_hg38_vcf_gz'], checkIfExists: true)
+    sites_tbi = file(params.test_data['homo_sapiens']['genome']['dbsnp_146_hg38_vcf_gz_tbi'], checkIfExists: true)
+
+    GATK4_BASERECALIBRATOR ( input, fasta, fai, dict, [], sites, sites_tbi )
+}
+
 workflow test_gatk4_baserecalibrator_intervals {
     input     = [ [ id:'test' ], // meta map
                   file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true),
diff --git a/tests/modules/gatk4/baserecalibrator/test.yml b/tests/modules/gatk4/baserecalibrator/test.yml
index 3c30d78f..a15c9ee3 100644
--- a/tests/modules/gatk4/baserecalibrator/test.yml
+++ b/tests/modules/gatk4/baserecalibrator/test.yml
@@ -1,17 +1,26 @@
 - name: gatk4 baserecalibrator test_gatk4_baserecalibrator
   command: nextflow run tests/modules/gatk4/baserecalibrator -entry test_gatk4_baserecalibrator -c tests/config/nextflow.config
   tags:
-    - gatk4/baserecalibrator
     - gatk4
+    - gatk4/baserecalibrator
   files:
     - path: output/gatk4/test.table
       md5sum: e2e43abdc0c943c1a54dae816d0b9ea7
 
+- name: gatk4 baserecalibrator test_gatk4_baserecalibrator_cram
+  command: nextflow run tests/modules/gatk4/baserecalibrator -entry test_gatk4_baserecalibrator_cram -c tests/config/nextflow.config
+  tags:
+    - gatk4
+    - gatk4/baserecalibrator
+  files:
+    - path: output/gatk4/test.table
+      md5sum: 35d89a3811aa31711fc9815b6b80e6ec
+
 - name: gatk4 baserecalibrator test_gatk4_baserecalibrator_intervals
   command: nextflow run tests/modules/gatk4/baserecalibrator -entry test_gatk4_baserecalibrator_intervals -c tests/config/nextflow.config
   tags:
-    - gatk4/baserecalibrator
     - gatk4
+    - gatk4/baserecalibrator
   files:
     - path: output/gatk4/test.table
       md5sum: 9ecb5f00a2229291705addc09c0ec231
@@ -19,8 +28,8 @@
 - name: gatk4 baserecalibrator test_gatk4_baserecalibrator_multiple_sites
   command: nextflow run tests/modules/gatk4/baserecalibrator -entry test_gatk4_baserecalibrator_multiple_sites -c tests/config/nextflow.config
   tags:
-    - gatk4/baserecalibrator
     - gatk4
+    - gatk4/baserecalibrator
   files:
     - path: output/gatk4/test.table
       md5sum: e2e43abdc0c943c1a54dae816d0b9ea7
diff --git a/tests/modules/gatk4/haplotypecaller/main.nf b/tests/modules/gatk4/haplotypecaller/main.nf
index 76059074..fd5f30fa 100644
--- a/tests/modules/gatk4/haplotypecaller/main.nf
+++ b/tests/modules/gatk4/haplotypecaller/main.nf
@@ -13,5 +13,33 @@ workflow test_gatk4_haplotypecaller {
     fai = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true)
     dict = file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true)
 
-    GATK4_HAPLOTYPECALLER ( input, fasta, fai, dict )
+    GATK4_HAPLOTYPECALLER ( input, fasta, fai, dict, [], [], [] )
+}
+
+workflow test_gatk4_haplotypecaller_cram {
+    input = [ [ id:'test' ], // meta map
+                file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true),
+                file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram_crai'], checkIfExists: true)
+              ]
+    fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
+    fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
+    dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)
+
+    GATK4_HAPLOTYPECALLER ( input, fasta, fai, dict, [], [], [] )
+}
+
+workflow test_gatk4_haplotypecaller_intervals_dbsnp {
+   input = [ [ id:'test' ], // meta map
+                file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true),
+                file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram_crai'], checkIfExists: true)
+            ]
+
+    fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
+    fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
+    dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)
+    sites = file(params.test_data['homo_sapiens']['genome']['dbsnp_146_hg38_vcf_gz'], checkIfExists: true)
+    sites_tbi = file(params.test_data['homo_sapiens']['genome']['dbsnp_146_hg38_vcf_gz_tbi'], checkIfExists: true)
+    intervals = file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true)
+
+    GATK4_HAPLOTYPECALLER ( input, fasta, fai, dict, sites, sites_tbi, intervals )
 }
diff --git a/tests/modules/gatk4/haplotypecaller/test.yml b/tests/modules/gatk4/haplotypecaller/test.yml
index dee2a2ab..480ff8f0 100644
--- a/tests/modules/gatk4/haplotypecaller/test.yml
+++ b/tests/modules/gatk4/haplotypecaller/test.yml
@@ -1,13 +1,26 @@
 - name: gatk4 haplotypecaller test_gatk4_haplotypecaller
   command: nextflow run tests/modules/gatk4/haplotypecaller -entry test_gatk4_haplotypecaller -c tests/config/nextflow.config
   tags:
-    - gatk4
     - gatk4/haplotypecaller
+    - gatk4
+  files:
+    - path: output/gatk4/test.vcf.gz
+    - path: output/gatk4/test.vcf.gz.tbi
+
+- name: gatk4 haplotypecaller test_gatk4_haplotypecaller_cram
+  command: nextflow run tests/modules/gatk4/haplotypecaller -entry test_gatk4_haplotypecaller_cram -c tests/config/nextflow.config
+  tags:
+    - gatk4/haplotypecaller
+    - gatk4
+  files:
+    - path: output/gatk4/test.vcf.gz
+    - path: output/gatk4/test.vcf.gz.tbi
+
+- name: gatk4 haplotypecaller test_gatk4_haplotypecaller_intervals_dbsnp
+  command: nextflow run tests/modules/gatk4/haplotypecaller -entry test_gatk4_haplotypecaller_intervals_dbsnp -c tests/config/nextflow.config
+  tags:
+    - gatk4/haplotypecaller
+    - gatk4
   files:
     - path: output/gatk4/test.vcf.gz
-      should_exist: true
-      contains:
-        - 'MT192765.1'
-        - '54.60'
-        - '37.32'
     - path: output/gatk4/test.vcf.gz.tbi
diff --git a/tests/modules/gatk4/markduplicates/main.nf b/tests/modules/gatk4/markduplicates/main.nf
index 06425088..b9709dc0 100644
--- a/tests/modules/gatk4/markduplicates/main.nf
+++ b/tests/modules/gatk4/markduplicates/main.nf
@@ -11,3 +11,12 @@ workflow test_gatk4_markduplicates {
 
     GATK4_MARKDUPLICATES ( input )
 }
+
+workflow test_gatk4_markduplicates_multiple_bams {
+    input = [ [ id:'test', single_end:false ], // meta map
+              file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true),
+              file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_bam'], checkIfExists: true)
+            ]
+
+    GATK4_MARKDUPLICATES ( input )
+}
diff --git a/tests/modules/gatk4/markduplicates/test.yml b/tests/modules/gatk4/markduplicates/test.yml
index 028147e6..99296ca4 100644
--- a/tests/modules/gatk4/markduplicates/test.yml
+++ b/tests/modules/gatk4/markduplicates/test.yml
@@ -1,8 +1,23 @@
 - name: gatk4 markduplicates test_gatk4_markduplicates
   command: nextflow run tests/modules/gatk4/markduplicates -entry test_gatk4_markduplicates -c tests/config/nextflow.config
   tags:
-    - gatk4
     - gatk4/markduplicates
+    - gatk4
   files:
+    - path: output/gatk4/test.bai
+      md5sum: e9c125e82553209933883b4fe2b8d7c2
     - path: output/gatk4/test.bam
-      md5sum: 3b6facab3afbacfa08a7a975efbd2c6b
+      md5sum: bda9a7bf5057f2288ed70be3eb8a753f
+    - path: output/gatk4/test.metrics
+
+- name: gatk4 markduplicates test_gatk4_markduplicates_multiple_bams
+  command: nextflow run tests/modules/gatk4/markduplicates -entry test_gatk4_markduplicates_multiple_bams -c tests/config/nextflow.config
+  tags:
+    - gatk4/markduplicates
+    - gatk4
+  files:
+    - path: output/gatk4/test.bai
+      md5sum: 93cebe29e7cca2064262b739235cca9b
+    - path: output/gatk4/test.bam
+      md5sum: dcd6f584006b04141fb787001a8ecacc
+    - path: output/gatk4/test.metrics
diff --git a/tests/modules/samtools/merge/main.nf b/tests/modules/samtools/merge/main.nf
index a4511a34..07485df1 100644
--- a/tests/modules/samtools/merge/main.nf
+++ b/tests/modules/samtools/merge/main.nf
@@ -11,5 +11,15 @@ workflow test_samtools_merge {
                  file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam'], checkIfExists: true)]
                ]
 
-    SAMTOOLS_MERGE ( input )
+    SAMTOOLS_MERGE ( input, [] )
+}
+
+workflow test_samtools_merge_cram {
+    input = [ [ id: 'test' ], // meta map
+               [ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_cram'], checkIfExists: true),
+                 file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_cram'], checkIfExists: true),
+                 ]
+               ]
+    fasta   = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
+    SAMTOOLS_MERGE ( input, fasta )
 }
diff --git a/tests/modules/samtools/merge/test.yml b/tests/modules/samtools/merge/test.yml
index d0674ca4..b39ca2ec 100644
--- a/tests/modules/samtools/merge/test.yml
+++ b/tests/modules/samtools/merge/test.yml
@@ -1,7 +1,15 @@
-- name: samtools merge
-  command: nextflow run ./tests/modules/samtools/merge -entry test_samtools_merge -c tests/config/nextflow.config
+- name: samtools merge test_samtools_merge
+  command: nextflow run tests/modules/samtools/merge -entry test_samtools_merge -c tests/config/nextflow.config
   tags:
-    - samtools
     - samtools/merge
+    - samtools
   files:
     - path: output/samtools/test_merged.bam
+
+- name: samtools merge test_samtools_merge_cram
+  command: nextflow run tests/modules/samtools/merge -entry test_samtools_merge_cram -c tests/config/nextflow.config
+  tags:
+    - samtools/merge
+    - samtools
+  files:
+    - path: output/samtools/test_merged.cram
diff --git a/tests/modules/samtools/stats/main.nf b/tests/modules/samtools/stats/main.nf
index 04a689fe..8e8b0c88 100644
--- a/tests/modules/samtools/stats/main.nf
+++ b/tests/modules/samtools/stats/main.nf
@@ -10,5 +10,15 @@ workflow test_samtools_stats {
                 file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true)
             ]
 
-    SAMTOOLS_STATS ( input )
+    SAMTOOLS_STATS ( input, [])
+}
+
+workflow test_samtools_stats_cram {
+   input = [ [ id: 'test' ], // meta map
+               file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_cram'], checkIfExists: true),
+               file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_cram_crai'], checkIfExists: true)
+            ]
+    fasta   = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
+
+    SAMTOOLS_STATS ( input, fasta )
 }
diff --git a/tests/modules/samtools/stats/test.yml b/tests/modules/samtools/stats/test.yml
index cf44b846..a194c666 100644
--- a/tests/modules/samtools/stats/test.yml
+++ b/tests/modules/samtools/stats/test.yml
@@ -1,8 +1,17 @@
-- name: samtools stats
-  command: nextflow run ./tests/modules/samtools/stats -entry test_samtools_stats -c tests/config/nextflow.config
+- name: samtools stats test_samtools_stats
+  command: nextflow run tests/modules/samtools/stats -entry test_samtools_stats -c tests/config/nextflow.config
   tags:
     - samtools
     - samtools/stats
   files:
-    - path: ./output/samtools/test.paired_end.sorted.bam.stats
+    - path: output/samtools/test.paired_end.sorted.bam.stats
       md5sum: a7f36cf11fd3bf97e0a0ae29c0627296
+
+- name: samtools stats test_samtools_stats_cram
+  command: nextflow run tests/modules/samtools/stats -entry test_samtools_stats_cram -c tests/config/nextflow.config
+  tags:
+    - samtools
+    - samtools/stats
+  files:
+    - path: output/samtools/test.paired_end.recalibrated.sorted.cram.stats
+      md5sum: bd55a1da30028403f4b66dacf7a2a20e
diff --git a/tests/modules/samtools/view/main.nf b/tests/modules/samtools/view/main.nf
index c60acb73..bd270cd8 100644
--- a/tests/modules/samtools/view/main.nf
+++ b/tests/modules/samtools/view/main.nf
@@ -7,8 +7,17 @@ include { SAMTOOLS_VIEW } from '../../../../modules/samtools/view/main.nf' addPa
 workflow test_samtools_view {
     input = [ [ id:'test', single_end:false ], // meta map
                 file(params.test_data['sarscov2']['illumina']['test_paired_end_bam'], checkIfExists: true)
-
             ]
 
-    SAMTOOLS_VIEW ( input )
+    SAMTOOLS_VIEW ( input, [] )
+}
+
+workflow test_samtools_view_cram {
+   input = [ [ id: 'test' ], // meta map
+               file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_cram'], checkIfExists: true),
+               file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_cram_crai'], checkIfExists: true)
+            ]
+    fasta   = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
+
+    SAMTOOLS_VIEW ( input, fasta )
 }
diff --git a/tests/modules/samtools/view/test.yml b/tests/modules/samtools/view/test.yml
index 383dfa87..ceaa0e89 100644
--- a/tests/modules/samtools/view/test.yml
+++ b/tests/modules/samtools/view/test.yml
@@ -1,8 +1,16 @@
-- name: samtools view
+- name: samtools view test_samtools_view
   command: nextflow run tests/modules/samtools/view -entry test_samtools_view -c tests/config/nextflow.config
   tags:
-    - samtools
     - samtools/view
+    - samtools
   files:
     - path: output/samtools/test.bam
       md5sum: 8fb1e82f76416e9e30fc6b2357e2cf13
+
+- name: samtools view test_samtools_view_cram
+  command: nextflow run tests/modules/samtools/view -entry test_samtools_view_cram -c tests/config/nextflow.config
+  tags:
+    - samtools/view
+    - samtools
+  files:
+    - path: output/samtools/test.cram

From 84cb78cc98ba7fe5c9de7227cf9824b13624ce88 Mon Sep 17 00:00:00 2001
From: Daniel Lundin <daniel.lundin@lnu.se>
Date: Fri, 29 Oct 2021 13:23:34 +0200
Subject: [PATCH 3/5] Khmer normalizebymedian (#985)

* Templates for new module

* pe only test passing

* only_pe and only_se passing

* only_pe, only_se, mixed passes

* Multiple pe + se tc passes

* Passing args works

* Add 'interleaved' to description

* Fixed linting message

* Update modules/khmer/normalizebymedian/main.nf

Good point.

Co-authored-by: Daniel Straub <42973691+d4straub@users.noreply.github.com>

* Update meta.yml

Co-authored-by: Daniel Straub <42973691+d4straub@users.noreply.github.com>
---
 modules/khmer/normalizebymedian/functions.nf  | 78 +++++++++++++++++
 modules/khmer/normalizebymedian/main.nf       | 49 +++++++++++
 modules/khmer/normalizebymedian/meta.yml      | 39 +++++++++
 tests/config/pytest_modules.yml               |  4 +
 tests/modules/khmer/normalizebymedian/main.nf | 85 +++++++++++++++++++
 .../modules/khmer/normalizebymedian/test.yml  | 42 +++++++++
 6 files changed, 297 insertions(+)
 create mode 100644 modules/khmer/normalizebymedian/functions.nf
 create mode 100644 modules/khmer/normalizebymedian/main.nf
 create mode 100644 modules/khmer/normalizebymedian/meta.yml
 create mode 100644 tests/modules/khmer/normalizebymedian/main.nf
 create mode 100644 tests/modules/khmer/normalizebymedian/test.yml

diff --git a/modules/khmer/normalizebymedian/functions.nf b/modules/khmer/normalizebymedian/functions.nf
new file mode 100644
index 00000000..85628ee0
--- /dev/null
+++ b/modules/khmer/normalizebymedian/functions.nf
@@ -0,0 +1,78 @@
+//
+//  Utility functions used in nf-core DSL2 module files
+//
+
+//
+// Extract name of software tool from process name using $task.process
+//
+def getSoftwareName(task_process) {
+    return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()
+}
+
+//
+// Extract name of module from process name using $task.process
+//
+def getProcessName(task_process) {
+    return task_process.tokenize(':')[-1]
+}
+
+//
+// Function to initialise default values and to generate a Groovy Map of available options for nf-core modules
+//
+def initOptions(Map args) {
+    def Map options = [:]
+    options.args            = args.args ?: ''
+    options.args2           = args.args2 ?: ''
+    options.args3           = args.args3 ?: ''
+    options.publish_by_meta = args.publish_by_meta ?: []
+    options.publish_dir     = args.publish_dir ?: ''
+    options.publish_files   = args.publish_files
+    options.suffix          = args.suffix ?: ''
+    return options
+}
+
+//
+// Tidy up and join elements of a list to return a path string
+//
+def getPathFromList(path_list) {
+    def paths = path_list.findAll { item -> !item?.trim().isEmpty() }      // Remove empty entries
+    paths     = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes
+    return paths.join('/')
+}
+
+//
+// Function to save/publish module results
+//
+def saveFiles(Map args) {
+    def ioptions  = initOptions(args.options)
+    def path_list = [ ioptions.publish_dir ?: args.publish_dir ]
+
+    // Do not publish versions.yml unless running from pytest workflow
+    if (args.filename.equals('versions.yml') && !System.getenv("NF_CORE_MODULES_TEST")) {
+        return null
+    }
+    if (ioptions.publish_by_meta) {
+        def key_list = ioptions.publish_by_meta instanceof List ? ioptions.publish_by_meta : args.publish_by_meta
+        for (key in key_list) {
+            if (args.meta && key instanceof String) {
+                def path = key
+                if (args.meta.containsKey(key)) {
+                    path = args.meta[key] instanceof Boolean ? "${key}_${args.meta[key]}".toString() : args.meta[key]
+                }
+                path = path instanceof String ? path : ''
+                path_list.add(path)
+            }
+        }
+    }
+    if (ioptions.publish_files instanceof Map) {
+        for (ext in ioptions.publish_files) {
+            if (args.filename.endsWith(ext.key)) {
+                def ext_list = path_list.collect()
+                ext_list.add(ext.value)
+                return "${getPathFromList(ext_list)}/$args.filename"
+            }
+        }
+    } else if (ioptions.publish_files == null) {
+        return "${getPathFromList(path_list)}/$args.filename"
+    }
+}
diff --git a/modules/khmer/normalizebymedian/main.nf b/modules/khmer/normalizebymedian/main.nf
new file mode 100644
index 00000000..234d172b
--- /dev/null
+++ b/modules/khmer/normalizebymedian/main.nf
@@ -0,0 +1,49 @@
+// Import generic module functions
+include { initOptions; saveFiles; getSoftwareName; getProcessName } from './functions'
+
+params.options = [:]
+options        = initOptions(params.options)
+
+process KHMER_NORMALIZEBYMEDIAN {
+    tag "${name}"
+    label 'process_long'
+    publishDir "${params.outdir}",
+        mode: params.publish_dir_mode,
+        saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:[:], publish_by_meta:[]) }
+
+    conda (params.enable_conda ? "bioconda::khmer=3.0.0a3" : null)
+    if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
+        container "https://depot.galaxyproject.org/singularity/khmer:3.0.0a3--py37haa7609a_2"
+    } else {
+        container "quay.io/biocontainers/khmer:3.0.0a3--py37haa7609a_2"
+    }
+
+    input:
+    path pe_reads
+    path se_reads
+    val  name
+
+    output:
+    path "${name}.fastq.gz", emit: reads
+    path "versions.yml"    , emit: versions
+
+    script:
+    pe_args = pe_reads ? "--paired" : ""
+    se_args = se_reads ? "--unpaired-reads ${se_reads}" : ""
+    files   = pe_reads ? pe_reads : se_reads
+
+    """
+    normalize-by-median.py \\
+        -M ${task.memory.toGiga()}e9 \\
+        --gzip ${options.args} \\
+        -o ${name}.fastq.gz \\
+        ${pe_args} \\
+        ${se_args} \\
+        ${files}
+
+    cat <<-END_VERSIONS > versions.yml
+    ${getProcessName(task.process)}:
+        ${getSoftwareName(task.process)}: \$( normalize-by-median.py --version 2>&1 | grep ^khmer | sed 's/^khmer //' )
+    END_VERSIONS
+    """
+}
diff --git a/modules/khmer/normalizebymedian/meta.yml b/modules/khmer/normalizebymedian/meta.yml
new file mode 100644
index 00000000..2227750f
--- /dev/null
+++ b/modules/khmer/normalizebymedian/meta.yml
@@ -0,0 +1,39 @@
+name: khmer_normalizebymedian
+description: Module that calls normalize-by-median.py from khmer. The module can take a mix of paired end (interleaved) and single end reads. If both types are provided, only a single file with single ends is possible.
+keywords:
+  - digital normalization
+  - khmer
+tools:
+  - khmer:
+      description: khmer k-mer counting library
+      homepage: https://github.com/dib-lab/khmer
+      documentation: https://khmer.readthedocs.io/en/latest/
+      tool_dev_url: https://github.com/dib-lab/khmer
+      doi: "https://doi.org/10.12688/f1000research.6924.1"
+      licence: ['BSD License']
+
+input:
+  - pe_reads:
+      type: files
+      description: Paired-end interleaved fastq files
+      pattern: "*.{fq,fastq}.gz"
+  - se_reads:
+      type: files
+      description: Single-end fastq files
+      pattern: "*.{fq,fastq}.gz"
+  - name:
+      type: string
+      description: filename for output file(s); ".fastq.gz" will be appended
+
+output:
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+  - reads:
+      type: file
+      description: Interleaved fastq files
+      pattern: "*.{fq,fastq}.gz"
+
+authors:
+  - "@erikrikarddaniel"
diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml
index da9de7aa..0fd84d24 100644
--- a/tests/config/pytest_modules.yml
+++ b/tests/config/pytest_modules.yml
@@ -629,6 +629,10 @@ kallistobustools/ref:
   - modules/kallistobustools/ref/**
   - tests/modules/kallistobustools/ref/**
 
+khmer/normalizebymedian:
+  - modules/khmer/normalizebymedian/**
+  - tests/modules/khmer/normalizebymedian/**
+
 kleborate:
   - modules/kleborate/**
   - tests/modules/kleborate/**
diff --git a/tests/modules/khmer/normalizebymedian/main.nf b/tests/modules/khmer/normalizebymedian/main.nf
new file mode 100644
index 00000000..3a3b348c
--- /dev/null
+++ b/tests/modules/khmer/normalizebymedian/main.nf
@@ -0,0 +1,85 @@
+#!/usr/bin/env nextflow
+
+nextflow.enable.dsl = 2
+
+include { SEQTK_MERGEPE                                           } from '../../../../modules/seqtk/mergepe/main.nf'           addParams( options: [:] )
+include { KHMER_NORMALIZEBYMEDIAN                                 } from '../../../../modules/khmer/normalizebymedian/main.nf' addParams( options: [:] )
+include { KHMER_NORMALIZEBYMEDIAN as KHMER_NORMALIZEBYMEDIAN_ARGS } from '../../../../modules/khmer/normalizebymedian/main.nf' addParams( options: [args: '-C 20 -k 32'] )
+
+workflow test_khmer_normalizebymedian_only_pe {
+    
+    pe_reads = [
+        [ id:'khmer_test', single_end:false ], // meta map
+        [
+            file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
+            file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) 
+        ]
+    ]
+
+    SEQTK_MERGEPE(pe_reads)
+
+    KHMER_NORMALIZEBYMEDIAN ( SEQTK_MERGEPE.out.reads.collect { it[1] }, [], 'only_pe' )
+}
+
+workflow test_khmer_normalizebymedian_only_se {
+    
+    se_reads = [
+        file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
+        file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) 
+    ]
+
+    KHMER_NORMALIZEBYMEDIAN ( [], se_reads, 'only_se' )
+}
+
+workflow test_khmer_normalizebymedian_mixed {
+    
+    pe_reads = [
+        [ id:'khmer_test', single_end:false ], // meta map
+        [
+            file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
+            file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) 
+        ]
+    ]
+    se_reads = file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true)
+
+    SEQTK_MERGEPE(pe_reads)
+
+    KHMER_NORMALIZEBYMEDIAN ( SEQTK_MERGEPE.out.reads.map { it[1] }, se_reads, 'mixed' )
+}
+
+workflow test_khmer_normalizebymedian_multiple_pe {
+    
+    pe_reads = [
+        [ id:'khmer_test0', single_end:false ], // meta map
+        [
+            file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
+            file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) 
+        ],
+        [ id:'khmer_test1', single_end:false ], // meta map
+        [
+            file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
+            file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) 
+        ]
+    ]
+    se_reads = file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true)
+
+    SEQTK_MERGEPE(pe_reads)
+
+    KHMER_NORMALIZEBYMEDIAN ( SEQTK_MERGEPE.out.reads.collect { it[1] }, se_reads, 'multiple_pe' )
+}
+
+workflow test_khmer_normalizebymedian_args {
+    
+    pe_reads = [
+        [ id:'khmer_test0', single_end:false ], // meta map
+        [
+            file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
+            file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) 
+        ]
+    ]
+    se_reads = file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true)
+
+    SEQTK_MERGEPE(pe_reads)
+
+    KHMER_NORMALIZEBYMEDIAN_ARGS ( SEQTK_MERGEPE.out.reads.collect { it[1] }, se_reads, 'args' )
+}
diff --git a/tests/modules/khmer/normalizebymedian/test.yml b/tests/modules/khmer/normalizebymedian/test.yml
new file mode 100644
index 00000000..a914a8ef
--- /dev/null
+++ b/tests/modules/khmer/normalizebymedian/test.yml
@@ -0,0 +1,42 @@
+#                nf-core modules create-test-yml khmer/normalizebymedian
+- name: khmer normalizebymedian only pe reads
+  command: nextflow run ./tests/modules/khmer/normalizebymedian -entry test_khmer_normalizebymedian_only_pe -c tests/config/nextflow.config
+  tags:
+    - khmer
+    - khmer/normalizebymedian
+  files:
+    - path: output/khmer/only_pe.fastq.gz
+      # md5sum not stable even locally with docker (gzip done by tool)
+      #md5sum: 75e05f2e80cf4bd0b534d4b73f7c059c
+      
+- name: khmer normalizebymedian only se reads
+  command: nextflow run ./tests/modules/khmer/normalizebymedian -entry test_khmer_normalizebymedian_only_se -c tests/config/nextflow.config
+  tags:
+    - khmer
+    - khmer/normalizebymedian
+  files:
+    - path: output/khmer/only_se.fastq.gz
+      
+- name: khmer normalizebymedian mixed reads
+  command: nextflow run ./tests/modules/khmer/normalizebymedian -entry test_khmer_normalizebymedian_mixed -c tests/config/nextflow.config
+  tags:
+    - khmer
+    - khmer/normalizebymedian
+  files:
+    - path: output/khmer/mixed.fastq.gz
+
+- name: khmer normalizebymedian multiple pe reads
+  command: nextflow run ./tests/modules/khmer/normalizebymedian -entry test_khmer_normalizebymedian_multiple_pe -c tests/config/nextflow.config
+  tags:
+    - khmer
+    - khmer/normalizebymedian
+  files:
+    - path: output/khmer/multiple_pe.fastq.gz
+
+- name: khmer normalizebymedian args
+  command: nextflow run ./tests/modules/khmer/normalizebymedian -entry test_khmer_normalizebymedian_args -c tests/config/nextflow.config
+  tags:
+    - khmer
+    - khmer/normalizebymedian
+  files:
+    - path: output/khmer/args.fastq.gz

From 460a3ed87bcd918aee869256cae298457752f921 Mon Sep 17 00:00:00 2001
From: Francesco L <53608000+lescai@users.noreply.github.com>
Date: Fri, 29 Oct 2021 14:00:54 +0200
Subject: [PATCH 4/5] Fgbio group reads by umi (#952)

* adding template for module groupreadsbyumi

* update modules with code

* strategy is required argument so moving it to input rather than options.args

* tests successful committing yml

* added meta to output

Co-authored-by: Gregor Sturm <mail@gregor-sturm.de>
---
 modules/fgbio/groupreadsbyumi/functions.nf   | 78 ++++++++++++++++++++
 modules/fgbio/groupreadsbyumi/main.nf        | 50 +++++++++++++
 modules/fgbio/groupreadsbyumi/meta.yml       | 59 +++++++++++++++
 tests/config/pytest_modules.yml              |  4 +
 tests/modules/fgbio/groupreadsbyumi/main.nf  | 15 ++++
 tests/modules/fgbio/groupreadsbyumi/test.yml | 10 +++
 6 files changed, 216 insertions(+)
 create mode 100644 modules/fgbio/groupreadsbyumi/functions.nf
 create mode 100644 modules/fgbio/groupreadsbyumi/main.nf
 create mode 100644 modules/fgbio/groupreadsbyumi/meta.yml
 create mode 100644 tests/modules/fgbio/groupreadsbyumi/main.nf
 create mode 100644 tests/modules/fgbio/groupreadsbyumi/test.yml

diff --git a/modules/fgbio/groupreadsbyumi/functions.nf b/modules/fgbio/groupreadsbyumi/functions.nf
new file mode 100644
index 00000000..85628ee0
--- /dev/null
+++ b/modules/fgbio/groupreadsbyumi/functions.nf
@@ -0,0 +1,78 @@
+//
+//  Utility functions used in nf-core DSL2 module files
+//
+
+//
+// Extract name of software tool from process name using $task.process
+//
+def getSoftwareName(task_process) {
+    return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()
+}
+
+//
+// Extract name of module from process name using $task.process
+//
+def getProcessName(task_process) {
+    return task_process.tokenize(':')[-1]
+}
+
+//
+// Function to initialise default values and to generate a Groovy Map of available options for nf-core modules
+//
+def initOptions(Map args) {
+    def Map options = [:]
+    options.args            = args.args ?: ''
+    options.args2           = args.args2 ?: ''
+    options.args3           = args.args3 ?: ''
+    options.publish_by_meta = args.publish_by_meta ?: []
+    options.publish_dir     = args.publish_dir ?: ''
+    options.publish_files   = args.publish_files
+    options.suffix          = args.suffix ?: ''
+    return options
+}
+
+//
+// Tidy up and join elements of a list to return a path string
+//
+def getPathFromList(path_list) {
+    def paths = path_list.findAll { item -> !item?.trim().isEmpty() }      // Remove empty entries
+    paths     = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes
+    return paths.join('/')
+}
+
+//
+// Function to save/publish module results
+//
+def saveFiles(Map args) {
+    def ioptions  = initOptions(args.options)
+    def path_list = [ ioptions.publish_dir ?: args.publish_dir ]
+
+    // Do not publish versions.yml unless running from pytest workflow
+    if (args.filename.equals('versions.yml') && !System.getenv("NF_CORE_MODULES_TEST")) {
+        return null
+    }
+    if (ioptions.publish_by_meta) {
+        def key_list = ioptions.publish_by_meta instanceof List ? ioptions.publish_by_meta : args.publish_by_meta
+        for (key in key_list) {
+            if (args.meta && key instanceof String) {
+                def path = key
+                if (args.meta.containsKey(key)) {
+                    path = args.meta[key] instanceof Boolean ? "${key}_${args.meta[key]}".toString() : args.meta[key]
+                }
+                path = path instanceof String ? path : ''
+                path_list.add(path)
+            }
+        }
+    }
+    if (ioptions.publish_files instanceof Map) {
+        for (ext in ioptions.publish_files) {
+            if (args.filename.endsWith(ext.key)) {
+                def ext_list = path_list.collect()
+                ext_list.add(ext.value)
+                return "${getPathFromList(ext_list)}/$args.filename"
+            }
+        }
+    } else if (ioptions.publish_files == null) {
+        return "${getPathFromList(path_list)}/$args.filename"
+    }
+}
diff --git a/modules/fgbio/groupreadsbyumi/main.nf b/modules/fgbio/groupreadsbyumi/main.nf
new file mode 100644
index 00000000..8e16f0a5
--- /dev/null
+++ b/modules/fgbio/groupreadsbyumi/main.nf
@@ -0,0 +1,50 @@
+// Import generic module functions
+include { initOptions; saveFiles; getSoftwareName; getProcessName } from './functions'
+
+params.options = [:]
+options        = initOptions(params.options)
+
+process FGBIO_GROUPREADSBYUMI {
+    tag "$meta.id"
+    label 'process_low'
+    publishDir "${params.outdir}",
+        mode: params.publish_dir_mode,
+        saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:meta, publish_by_meta:['id']) }
+
+    conda (params.enable_conda ? "bioconda::fgbio=1.4.0" : null)
+    if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
+        container "https://depot.galaxyproject.org/singularity/fgbio:1.4.0--hdfd78af_0"
+    } else {
+        container "quay.io/biocontainers/fgbio:1.4.0--hdfd78af_0"
+    }
+
+    input:
+    tuple val(meta), path(taggedbam)
+    val(strategy)
+
+    output:
+    tuple val(meta), path("*_umi-grouped.bam")  , emit: bam
+    tuple val(meta), path("*_umi_histogram.txt"), emit: histogram
+    path "versions.yml"                         , emit: versions
+
+    script:
+    def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}"
+
+    """
+    mkdir tmp
+
+    fgbio \\
+        --tmp-dir=${PWD}/tmp \\
+        GroupReadsByUmi \\
+        -s $strategy \\
+        ${options.args} \\
+        -i $taggedbam \\
+        -o ${prefix}_umi-grouped.bam \\
+        -f ${prefix}_umi_histogram.txt
+
+    cat <<-END_VERSIONS > versions.yml
+    ${getProcessName(task.process)}:
+        ${getSoftwareName(task.process)}: \$( echo \$(fgbio --version 2>&1 | tr -d '[:cntrl:]' ) | sed -e 's/^.*Version: //;s/\\[.*\$//')
+    END_VERSIONS
+    """
+}
diff --git a/modules/fgbio/groupreadsbyumi/meta.yml b/modules/fgbio/groupreadsbyumi/meta.yml
new file mode 100644
index 00000000..18ce149e
--- /dev/null
+++ b/modules/fgbio/groupreadsbyumi/meta.yml
@@ -0,0 +1,59 @@
+name: fgbio_groupreadsbyumi
+description: |
+      Groups reads together that appear to have come from the same original molecule.
+      Reads are grouped by template, and then templates are sorted by the 5’ mapping positions
+      of the reads from the template, used from earliest mapping position to latest.
+      Reads that have the same end positions are then sub-grouped by UMI sequence.
+      (!) Note: the MQ tag is required on reads with mapped mates (!)
+      This can be added using samblaster with the optional argument --addMateTags.
+keywords:
+  - UMI
+  - groupreads
+  - fgbio
+tools:
+  - fgbio:
+      description: A set of tools for working with genomic and high throughput sequencing data, including UMIs
+      homepage: http://fulcrumgenomics.github.io/fgbio/
+      documentation: http://fulcrumgenomics.github.io/fgbio/tools/latest/
+      tool_dev_url: https://github.com/fulcrumgenomics/fgbio
+      doi: ""
+      licence: ['MIT']
+
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - bam:
+      type: file
+      description: |
+        BAM file. Note: the MQ tag is required on reads with mapped mates (!)
+      pattern: "*.bam"
+  - strategy:
+      type: value
+      description: |
+        Reguired argument: defines the UMI assignment strategy.
+        Must be chosen among: Identity, Edit, Adjacency, Paired.
+
+output:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+  - bam:
+      type: file
+      description: UMI-grouped BAM
+      pattern: "*.bam"
+  - histogram:
+      type: file
+      description: A text file containing the tag family size counts
+      pattern: "*.txt"
+
+authors:
+  - "@lescai"
diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml
index 0fd84d24..4fdd8303 100644
--- a/tests/config/pytest_modules.yml
+++ b/tests/config/pytest_modules.yml
@@ -394,6 +394,10 @@ fgbio/fastqtobam:
   - modules/fgbio/fastqtobam/**
   - tests/modules/fgbio/fastqtobam/**
 
+fgbio/groupreadsbyumi:
+  - modules/fgbio/groupreadsbyumi/**
+  - tests/modules/fgbio/groupreadsbyumi/**
+
 fgbio/sortbam:
   - modules/fgbio/sortbam/**
   - tests/modules/fgbio/sortbam/**
diff --git a/tests/modules/fgbio/groupreadsbyumi/main.nf b/tests/modules/fgbio/groupreadsbyumi/main.nf
new file mode 100644
index 00000000..31f55724
--- /dev/null
+++ b/tests/modules/fgbio/groupreadsbyumi/main.nf
@@ -0,0 +1,15 @@
+#!/usr/bin/env nextflow
+
+nextflow.enable.dsl = 2
+
+include { FGBIO_GROUPREADSBYUMI } from '../../../../modules/fgbio/groupreadsbyumi/main.nf' addParams( options: [:] )
+
+workflow test_fgbio_groupreadsbyumi {
+
+    input = [ [ id:'test', single_end:false ], // meta map
+              file(params.test_data['homo_sapiens']['illumina']['test_paired_end_umi_unsorted_tagged_bam'], checkIfExists: true) ]
+
+    strategy = "Adjacency"
+
+    FGBIO_GROUPREADSBYUMI ( input, strategy )
+}
diff --git a/tests/modules/fgbio/groupreadsbyumi/test.yml b/tests/modules/fgbio/groupreadsbyumi/test.yml
new file mode 100644
index 00000000..ce70f129
--- /dev/null
+++ b/tests/modules/fgbio/groupreadsbyumi/test.yml
@@ -0,0 +1,10 @@
+- name: fgbio groupreadsbyumi test_fgbio_groupreadsbyumi
+  command: nextflow run tests/modules/fgbio/groupreadsbyumi -entry test_fgbio_groupreadsbyumi -c tests/config/nextflow.config
+  tags:
+    - fgbio
+    - fgbio/groupreadsbyumi
+  files:
+    - path: output/fgbio/test_umi-grouped.bam
+      md5sum: f1e53fc845fd99a3da172eb8063dff0b
+    - path: output/fgbio/test_umi_histogram.txt
+      md5sum: d17fd167b2a765d46e4b01bf08ece01b

From 2959b4ba070d138d1577acc48d35cc669fbef972 Mon Sep 17 00:00:00 2001
From: Chris Cheshire <chris.j.cheshire@gmail.com>
Date: Fri, 29 Oct 2021 13:22:17 +0100
Subject: [PATCH 5/5] Bedtools sort add extension choice input (#984)

* hifiasm copied from fastqc

* hifiasm tests init from fastqc

* meta.yml init; test.yml and main.nf for printing version

* Add hifiasm version printing

* Removed spaced on an empty line

* Reverted hifiasm from main

* Added extension input for bedtools sort

* whitespace

* Updated docs

Co-authored-by: Sviatoslav Sidorov <sviatoslav.sidorov@crick.ac.uk>
Co-authored-by: Svyatoslav Sidorov <svet.sidorov@gmail.com>
---
 modules/bedtools/sort/main.nf        | 11 ++++++-----
 modules/bedtools/sort/meta.yml       | 19 +++++++++++++------
 tests/modules/bedtools/sort/main.nf  |  2 +-
 tests/modules/bedtools/sort/test.yml |  2 +-
 4 files changed, 21 insertions(+), 13 deletions(-)

diff --git a/modules/bedtools/sort/main.nf b/modules/bedtools/sort/main.nf
index bdba3376..4a51c4b2 100644
--- a/modules/bedtools/sort/main.nf
+++ b/modules/bedtools/sort/main.nf
@@ -19,20 +19,21 @@ process BEDTOOLS_SORT {
     }
 
     input:
-    tuple val(meta), path(bed)
+    tuple val(meta), path(intervals)
+    val   extension
 
     output:
-    tuple val(meta), path('*.bed'), emit: bed
-    path  "versions.yml"          , emit: versions
+    tuple val(meta), path("*.${extension}"), emit: sorted
+    path  "versions.yml"                   , emit: versions
 
     script:
     def prefix   = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}"
     """
     bedtools \\
         sort \\
-        -i $bed \\
+        -i $intervals \\
         $options.args \\
-        > ${prefix}.bed
+        > ${prefix}.${extension}
 
     cat <<-END_VERSIONS > versions.yml
     ${getProcessName(task.process)}:
diff --git a/modules/bedtools/sort/meta.yml b/modules/bedtools/sort/meta.yml
index 5b8b41d7..c7b1b098 100644
--- a/modules/bedtools/sort/meta.yml
+++ b/modules/bedtools/sort/meta.yml
@@ -15,20 +15,26 @@ input:
         description: |
             Groovy Map containing sample information
             e.g. [ id:'test', single_end:false ]
-    - bed:
+    - intervals:
         type: file
-        description: Input BED file
-        pattern: "*.{bed}"
+        description: BED/BEDGRAPH
+        pattern: "*.{bed|bedGraph}"
+
+    - extension:
+        type: string
+        description: Extension of the output file (e. g., ".bg", ".bedgraph", ".txt", ".tab", etc.) It is set arbitrarily by the user and corresponds to the file format which depends on arguments.
 output:
     - meta:
         type: map
         description: |
             Groovy Map containing sample information
             e.g. [ id:'test', single_end:false ]
-    - bed:
+
+    - sorted:
         type: file
-        description: Sorted BED file
-        pattern: "*.{bed}"
+        description: Sorted output file
+        pattern: "*.${extension}"
+
     - versions:
         type: file
         description: File containing software versions
@@ -37,3 +43,4 @@ authors:
     - "@Emiller88"
     - "@sruthipsuresh"
     - "@drpatelh"
+    - "@chris-cheshire"
diff --git a/tests/modules/bedtools/sort/main.nf b/tests/modules/bedtools/sort/main.nf
index ad1a3df4..b5d34e2f 100644
--- a/tests/modules/bedtools/sort/main.nf
+++ b/tests/modules/bedtools/sort/main.nf
@@ -9,5 +9,5 @@ workflow test_bedtools_sort {
               file(params.test_data['sarscov2']['genome']['test_bed'], checkIfExists: true)
             ]
 
-    BEDTOOLS_SORT ( input )
+    BEDTOOLS_SORT ( input, "testext" )
 }
diff --git a/tests/modules/bedtools/sort/test.yml b/tests/modules/bedtools/sort/test.yml
index ceb25f7d..1dd04507 100644
--- a/tests/modules/bedtools/sort/test.yml
+++ b/tests/modules/bedtools/sort/test.yml
@@ -4,5 +4,5 @@
     - bedtools
     - bedtools/sort
   files:
-    - path: ./output/bedtools/test_out.bed
+    - path: ./output/bedtools/test_out.testext
       md5sum: fe4053cf4de3aebbdfc3be2efb125a74