Merge remote-tracking branch 'nf-core/master'

2024-12-23 19:48:16 +00:00 · 2021-10-29 13:24:25 +01:00 · 2021-10-29 13:24:25 +01:00 · 2cfd15e0f9
commit 2cfd15e0f9
parent de862972ab 2959b4ba07
58 changed files with 1006 additions and 153 deletions
--- a/modules/bedtools/sort/main.nf
+++ b/modules/bedtools/sort/main.nf
@ -19,20 +19,21 @@ process BEDTOOLS_SORT {
    }

    input:
-    tuple val(meta), path(bed)
+    tuple val(meta), path(intervals)
+    val   extension

    output:
-    tuple val(meta), path('*.bed'), emit: bed
-    path  "versions.yml"          , emit: versions
+    tuple val(meta), path("*.${extension}"), emit: sorted
+    path  "versions.yml"                   , emit: versions

    script:
    def prefix   = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}"
    """
    bedtools \\
        sort \\
-        -i $bed \\
+        -i $intervals \\
        $options.args \\
-        > ${prefix}.bed
+        > ${prefix}.${extension}

    cat <<-END_VERSIONS > versions.yml
    ${getProcessName(task.process)}:
--- a/modules/bedtools/sort/meta.yml
+++ b/modules/bedtools/sort/meta.yml
@ -15,20 +15,26 @@ input:
        description: |
            Groovy Map containing sample information
            e.g. [ id:'test', single_end:false ]
-    - bed:
+    - intervals:
        type: file
-        description: Input BED file
-        pattern: "*.{bed}"
+        description: BED/BEDGRAPH
+        pattern: "*.{bed|bedGraph}"
+
+    - extension:
+        type: string
+        description: Extension of the output file (e. g., ".bg", ".bedgraph", ".txt", ".tab", etc.) It is set arbitrarily by the user and corresponds to the file format which depends on arguments.
 output:
    - meta:
        type: map
        description: |
            Groovy Map containing sample information
            e.g. [ id:'test', single_end:false ]
-    - bed:
+
+    - sorted:
        type: file
-        description: Sorted BED file
-        pattern: "*.{bed}"
+        description: Sorted output file
+        pattern: "*.${extension}"
+
    - versions:
        type: file
        description: File containing software versions
@ -37,3 +43,4 @@ authors:
    - "@Emiller88"
    - "@sruthipsuresh"
    - "@drpatelh"
+    - "@chris-cheshire"
--- a/modules/fgbio/groupreadsbyumi/functions.nf
+++ b/modules/fgbio/groupreadsbyumi/functions.nf
@ -0,0 +1,78 @@
+//
+//  Utility functions used in nf-core DSL2 module files
+//
+
+//
+// Extract name of software tool from process name using $task.process
+//
+def getSoftwareName(task_process) {
+    return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()
+}
+
+//
+// Extract name of module from process name using $task.process
+//
+def getProcessName(task_process) {
+    return task_process.tokenize(':')[-1]
+}
+
+//
+// Function to initialise default values and to generate a Groovy Map of available options for nf-core modules
+//
+def initOptions(Map args) {
+    def Map options = [:]
+    options.args            = args.args ?: ''
+    options.args2           = args.args2 ?: ''
+    options.args3           = args.args3 ?: ''
+    options.publish_by_meta = args.publish_by_meta ?: []
+    options.publish_dir     = args.publish_dir ?: ''
+    options.publish_files   = args.publish_files
+    options.suffix          = args.suffix ?: ''
+    return options
+}
+
+//
+// Tidy up and join elements of a list to return a path string
+//
+def getPathFromList(path_list) {
+    def paths = path_list.findAll { item -> !item?.trim().isEmpty() }      // Remove empty entries
+    paths     = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes
+    return paths.join('/')
+}
+
+//
+// Function to save/publish module results
+//
+def saveFiles(Map args) {
+    def ioptions  = initOptions(args.options)
+    def path_list = [ ioptions.publish_dir ?: args.publish_dir ]
+
+    // Do not publish versions.yml unless running from pytest workflow
+    if (args.filename.equals('versions.yml') && !System.getenv("NF_CORE_MODULES_TEST")) {
+        return null
+    }
+    if (ioptions.publish_by_meta) {
+        def key_list = ioptions.publish_by_meta instanceof List ? ioptions.publish_by_meta : args.publish_by_meta
+        for (key in key_list) {
+            if (args.meta && key instanceof String) {
+                def path = key
+                if (args.meta.containsKey(key)) {
+                    path = args.meta[key] instanceof Boolean ? "${key}_${args.meta[key]}".toString() : args.meta[key]
+                }
+                path = path instanceof String ? path : ''
+                path_list.add(path)
+            }
+        }
+    }
+    if (ioptions.publish_files instanceof Map) {
+        for (ext in ioptions.publish_files) {
+            if (args.filename.endsWith(ext.key)) {
+                def ext_list = path_list.collect()
+                ext_list.add(ext.value)
+                return "${getPathFromList(ext_list)}/$args.filename"
+            }
+        }
+    } else if (ioptions.publish_files == null) {
+        return "${getPathFromList(path_list)}/$args.filename"
+    }
+}
--- a/modules/fgbio/groupreadsbyumi/main.nf
+++ b/modules/fgbio/groupreadsbyumi/main.nf
@ -0,0 +1,50 @@
+// Import generic module functions
+include { initOptions; saveFiles; getSoftwareName; getProcessName } from './functions'
+
+params.options = [:]
+options        = initOptions(params.options)
+
+process FGBIO_GROUPREADSBYUMI {
+    tag "$meta.id"
+    label 'process_low'
+    publishDir "${params.outdir}",
+        mode: params.publish_dir_mode,
+        saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:meta, publish_by_meta:['id']) }
+
+    conda (params.enable_conda ? "bioconda::fgbio=1.4.0" : null)
+    if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
+        container "https://depot.galaxyproject.org/singularity/fgbio:1.4.0--hdfd78af_0"
+    } else {
+        container "quay.io/biocontainers/fgbio:1.4.0--hdfd78af_0"
+    }
+
+    input:
+    tuple val(meta), path(taggedbam)
+    val(strategy)
+
+    output:
+    tuple val(meta), path("*_umi-grouped.bam")  , emit: bam
+    tuple val(meta), path("*_umi_histogram.txt"), emit: histogram
+    path "versions.yml"                         , emit: versions
+
+    script:
+    def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}"
+
+    """
+    mkdir tmp
+
+    fgbio \\
+        --tmp-dir=${PWD}/tmp \\
+        GroupReadsByUmi \\
+        -s $strategy \\
+        ${options.args} \\
+        -i $taggedbam \\
+        -o ${prefix}_umi-grouped.bam \\
+        -f ${prefix}_umi_histogram.txt
+
+    cat <<-END_VERSIONS > versions.yml
+    ${getProcessName(task.process)}:
+        ${getSoftwareName(task.process)}: \$( echo \$(fgbio --version 2>&1 | tr -d '[:cntrl:]' ) | sed -e 's/^.*Version: //;s/\\[.*\$//')
+    END_VERSIONS
+    """
+}
--- a/modules/fgbio/groupreadsbyumi/meta.yml
+++ b/modules/fgbio/groupreadsbyumi/meta.yml
@ -0,0 +1,59 @@
+name: fgbio_groupreadsbyumi
+description: |
+      Groups reads together that appear to have come from the same original molecule.
+      Reads are grouped by template, and then templates are sorted by the 5’ mapping positions
+      of the reads from the template, used from earliest mapping position to latest.
+      Reads that have the same end positions are then sub-grouped by UMI sequence.
+      (!) Note: the MQ tag is required on reads with mapped mates (!)
+      This can be added using samblaster with the optional argument --addMateTags.
+keywords:
+  - UMI
+  - groupreads
+  - fgbio
+tools:
+  - fgbio:
+      description: A set of tools for working with genomic and high throughput sequencing data, including UMIs
+      homepage: http://fulcrumgenomics.github.io/fgbio/
+      documentation: http://fulcrumgenomics.github.io/fgbio/tools/latest/
+      tool_dev_url: https://github.com/fulcrumgenomics/fgbio
+      doi: ""
+      licence: ['MIT']
+
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - bam:
+      type: file
+      description: |
+        BAM file. Note: the MQ tag is required on reads with mapped mates (!)
+      pattern: "*.bam"
+  - strategy:
+      type: value
+      description: |
+        Reguired argument: defines the UMI assignment strategy.
+        Must be chosen among: Identity, Edit, Adjacency, Paired.
+
+output:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+  - bam:
+      type: file
+      description: UMI-grouped BAM
+      pattern: "*.bam"
+  - histogram:
+      type: file
+      description: A text file containing the tag family size counts
+      pattern: "*.txt"
+
+authors:
+  - "@lescai"
--- a/modules/gatk4/applybqsr/main.nf
+++ b/modules/gatk4/applybqsr/main.nf
@ -19,7 +19,7 @@ process GATK4_APPLYBQSR {
    }

    input:
-    tuple val(meta), path(bam), path(bai), path(bqsr_table)
+    tuple val(meta), path(input), path(input_index), path(bqsr_table)
    path  fasta
    path  fastaidx
    path  dict
@ -32,12 +32,18 @@ process GATK4_APPLYBQSR {
    script:
    def prefix   = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}"
    def interval = intervals ? "-L ${intervals}" : ""
+    if (!task.memory) {
+        log.info '[GATK ApplyBQSR] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.'
+    } else {
+        avail_mem = task.memory.giga
+    }
    """
    gatk ApplyBQSR \\
        -R $fasta \\
-        -I $bam \\
+        -I $input \\
        --bqsr-recal-file $bqsr_table \\
        $interval \\
+        --tmp-dir . \\
        -O ${prefix}.bam \\
        $options.args

--- a/modules/gatk4/applybqsr/meta.yml
+++ b/modules/gatk4/applybqsr/meta.yml
@ -20,10 +20,14 @@ input:
      description: |
        Groovy Map containing sample information
        e.g. [ id:'test', single_end:false ]
-  - bam:
+  - input:
      type: file
-      description: BAM file from alignment
-      pattern: "*.{bam}"
+      description: BAM/CRAM file from alignment
+      pattern: "*.{bam,cram}"
+  - input_index:
+      type: file
+      description: BAI/CRAI file from alignment
+      pattern: "*.{bai,crai}"
  - bqsr_table:
      type: file
      description: Recalibration table from gatk4_baserecalibrator
--- a/modules/gatk4/baserecalibrator/main.nf
+++ b/modules/gatk4/baserecalibrator/main.nf
@ -19,7 +19,7 @@ process GATK4_BASERECALIBRATOR {
    }

    input:
-    tuple val(meta), path(bam), path(bai)
+    tuple val(meta), path(input), path(input_index)
    path fasta
    path fastaidx
    path dict
@ -35,12 +35,19 @@ process GATK4_BASERECALIBRATOR {
    def prefix   = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}"
    def intervalsCommand = intervalsBed ? "-L ${intervalsBed}" : ""
    def sitesCommand = knownSites.collect{"--known-sites ${it}"}.join(' ')
+
+    if (!task.memory) {
+        log.info '[GATK BaseRecalibrator] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.'
+    } else {
+        avail_mem = task.memory.giga
+    }
    """
    gatk BaseRecalibrator  \
        -R $fasta \
-        -I $bam \
+        -I $input \
        $sitesCommand \
        $intervalsCommand \
+        --tmp-dir . \
        $options.args \
        -O ${prefix}.table

--- a/modules/gatk4/baserecalibrator/meta.yml
+++ b/modules/gatk4/baserecalibrator/meta.yml
@ -20,10 +20,14 @@ input:
      description: |
        Groovy Map containing sample information
        e.g. [ id:'test', single_end:false ]
-  - bam:
+  - input:
      type: file
-      description: BAM file from alignment
-      pattern: "*.{bam}"
+      description: BAM/CRAM file from alignment
+      pattern: "*.{bam,cram}"
+  - input_index:
+      type: file
+      description: BAI/CRAI file from alignment
+      pattern: "*.{bai,crai}"
  - fasta:
      type: file
      description: The reference fasta file
@ -57,3 +61,4 @@ output:

 authors:
  - "@yocra3"
+  - "@FriederikeHanssen"
--- a/modules/gatk4/haplotypecaller/main.nf
+++ b/modules/gatk4/haplotypecaller/main.nf
@ -19,10 +19,13 @@ process GATK4_HAPLOTYPECALLER {
    }

    input:
-    tuple val(meta), path(bam), path(bai)
+    tuple val(meta), path(input), path(input_index)
    path fasta
    path fai
    path dict
+    path dbsnp
+    path dbsnp_tbi
+    path interval

    output:
    tuple val(meta), path("*.vcf.gz"), emit: vcf
@ -30,8 +33,10 @@ process GATK4_HAPLOTYPECALLER {
    path "versions.yml"              , emit: versions

    script:
-    def prefix    = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}"
-    def avail_mem = 3
+    def prefix          = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}"
+    def interval_option = interval ? "-L ${interval}" : ""
+    def dbsnp_option    = dbsnp ? "-D ${dbsnp}" : ""
+    def avail_mem       = 3
    if (!task.memory) {
        log.info '[GATK HaplotypeCaller] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.'
    } else {
@ -42,9 +47,12 @@ process GATK4_HAPLOTYPECALLER {
        --java-options "-Xmx${avail_mem}g" \\
        HaplotypeCaller \\
        -R $fasta \\
-        -I $bam \\
+        -I $input \\
+        ${dbsnp_option} \\
+        ${interval_option} \\
        -O ${prefix}.vcf.gz \\
-        $options.args
+        $options.args \\
+        --tmp-dir .

    cat <<-END_VERSIONS > versions.yml
    ${getProcessName(task.process)}:
--- a/modules/gatk4/haplotypecaller/meta.yml
+++ b/modules/gatk4/haplotypecaller/meta.yml
@ -21,14 +21,14 @@ input:
      description: |
        Groovy Map containing sample information
        e.g. [ id:'test', single_end:false ]
-  - bam:
+  - input:
      type: file
-      description: BAM file
-      pattern: "*.bam"
-  - bai:
+      description: BAM/CRAM file from alignment
+      pattern: "*.{bam,cram}"
+  - input_index:
      type: file
-      description: Index of BAM file
-      pattern: "*.bam.bai"
+      description: BAI/CRAI file from alignment
+      pattern: "*.{bai,crai}"
  - fasta:
      type: file
      description: The reference fasta file
@ -41,6 +41,16 @@ input:
      type: file
      description: GATK sequence dictionary
      pattern: "*.dict"
+  - dbsnp:
+      type: file
+      description: VCF file containing known sites (optional)
+  - dbsnp_tbi:
+      type: file
+      description: VCF index of dbsnp (optional)
+  - interval:
+      type: file
+      description: Bed file with the genomic regions included in the library (optional)
+
 output:
  - meta:
      type: map
@ -62,3 +72,4 @@ output:

 authors:
  - "@suzannejin"
+  - "@FriederikeHanssen"
--- a/modules/gatk4/markduplicates/main.nf
+++ b/modules/gatk4/markduplicates/main.nf
@ -19,21 +19,28 @@ process GATK4_MARKDUPLICATES {
    }

    input:
-    tuple val(meta), path(bam)
+    tuple val(meta), path(bams)

    output:
    tuple val(meta), path("*.bam")    , emit: bam
+    tuple val(meta), path("*.bai")    , emit: bai
    tuple val(meta), path("*.metrics"), emit: metrics
    path "versions.yml"               , emit: versions

    script:
    def prefix   = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}"
+    def bam_list = bams.collect(){ bam -> "--INPUT ".concat(bam.toString()) }.join(" ")
+    def avail_mem       = 3
+    if (!task.memory) {
+        log.info '[GATK HaplotypeCaller] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.'
+    } else {
+        avail_mem = task.memory.giga
+    }
    """
    gatk MarkDuplicates \\
-        --INPUT $bam \\
+        $bam_list \\
        --METRICS_FILE ${prefix}.metrics \\
        --TMP_DIR . \\
-        --ASSUME_SORT_ORDER coordinate \\
        --CREATE_INDEX true \\
        --OUTPUT ${prefix}.bam \\
        $options.args
--- a/modules/gatk4/markduplicates/meta.yml
+++ b/modules/gatk4/markduplicates/meta.yml
@ -47,3 +47,4 @@ output:

 authors:
  - "@ajodeh-juma"
+  - "@FriederikeHanssen"
--- a/modules/gatk4/mutect2/main.nf
+++ b/modules/gatk4/mutect2/main.nf
@ -19,9 +19,11 @@ process GATK4_MUTECT2 {
    }

    input:
-    tuple val(meta) , path(bam) , path(bai) , val(which_norm)
-    val run_single
-    val run_pon
+    tuple val(meta) , path(input) , path(input_index) , val(which_norm)
+    val  run_single
+    val  run_pon
+    val  run_mito
+    val  interval_label
    path fasta
    path fastaidx
    path dict
@ -39,35 +41,34 @@ process GATK4_MUTECT2 {

    script:
    def prefix   = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}"
-    def inputsList = []
-    def normalsList = []
-    def inputsCommand = ''
-    def panelsCommand = ''
-    def normalsCommand = ''
+    def panels_command = ''
+    def normals_command = ''

-    bam.each() {a -> inputsList.add(" -I " + a ) }
-    inputsCommand = inputsList.join( ' ')
+    def inputs_command = '-I ' + input.join( ' -I ')

    if(run_pon) {
-        panelsCommand = ''
-        normalsCommand = ''
+        panels_command = ''
+        normals_command = ''

    } else if(run_single) {
-        panelsCommand = " --germline-resource $germline_resource --panel-of-normals $panel_of_normals"
-        normalsCommand = ''
+        panels_command = " --germline-resource $germline_resource --panel-of-normals $panel_of_normals"
+        normals_command = ''
+
+    } else if(run_mito){
+        panels_command = "-L ${interval_label} --mitochondria-mode"
+        normals_command = ''

    } else {
-        panelsCommand = " --germline-resource $germline_resource --panel-of-normals $panel_of_normals --f1r2-tar-gz ${prefix}.f1r2.tar.gz"
-        which_norm.each() {a -> normalsList.add(" -normal " + a ) }
-        normalsCommand = normalsList.join( ' ')
+        panels_command = " --germline-resource $germline_resource --panel-of-normals $panel_of_normals --f1r2-tar-gz ${prefix}.f1r2.tar.gz"
+        normals_command = '-normal ' + which_norm.join( ' -normal ')
    }

    """
    gatk Mutect2 \\
        -R ${fasta} \\
-        ${inputsCommand} \\
-        ${normalsCommand} \\
-        ${panelsCommand} \\
+        ${inputs_command} \\
+        ${normals_command} \\
+        ${panels_command} \\
        -O ${prefix}.vcf.gz \\
        $options.args

--- a/modules/gatk4/mutect2/meta.yml
+++ b/modules/gatk4/mutect2/meta.yml
@ -22,23 +22,34 @@ input:
      description: |
        Groovy Map containing sample information
        e.g. [ id:'test']
-  - bam:
+  - input:
      type: list
-      description: list of BAM files
-      pattern: "*.bam"
-  - bai:
+      description: list of BAM files, also able to take CRAM as an input
+      pattern: "*.{bam/cram}"
+  - input_index:
      type: list
-      description: list of BAM file indexes
-      pattern: "*.bam.bai"
+      description: list of BAM file indexes, also able to take CRAM indexes as an input
+      pattern: "*.{bam.bai/cram.crai}"
  - which_norm:
      type: list
      description: optional list of sample headers contained in the normal sample bam files (these are required for tumor_normal_pair mode)
+      pattern: "testN"
  - run_single:
      type: boolean
      description: Specify whether or not to run in tumor_single mode instead of tumor_normal_pair mode (will be ignored if run_pon is also true)
+      pattern: "true/false"
  - run_pon:
      type: boolean
      description: Specify whether or not to run in panel_of_normal mode instead of tumor_normal_pair mode
+      pattern: "true/false"
+  - run_mito:
+      type: boolean
+      description: Specify whether or not to run in mitochondria-mode instead of tumor_normal_pair mode
+      pattern: "true/false"
+  - interval_label:
+      type: string
+      description: Specify the label used for mitochondrial chromosome when mutect2 is run in mitochondria mode.
+      pattern: "chrM"
  - fasta:
      type: file
      description: The reference fasta file
--- a/modules/khmer/normalizebymedian/functions.nf
+++ b/modules/khmer/normalizebymedian/functions.nf
@ -0,0 +1,78 @@
+//
+//  Utility functions used in nf-core DSL2 module files
+//
+
+//
+// Extract name of software tool from process name using $task.process
+//
+def getSoftwareName(task_process) {
+    return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()
+}
+
+//
+// Extract name of module from process name using $task.process
+//
+def getProcessName(task_process) {
+    return task_process.tokenize(':')[-1]
+}
+
+//
+// Function to initialise default values and to generate a Groovy Map of available options for nf-core modules
+//
+def initOptions(Map args) {
+    def Map options = [:]
+    options.args            = args.args ?: ''
+    options.args2           = args.args2 ?: ''
+    options.args3           = args.args3 ?: ''
+    options.publish_by_meta = args.publish_by_meta ?: []
+    options.publish_dir     = args.publish_dir ?: ''
+    options.publish_files   = args.publish_files
+    options.suffix          = args.suffix ?: ''
+    return options
+}
+
+//
+// Tidy up and join elements of a list to return a path string
+//
+def getPathFromList(path_list) {
+    def paths = path_list.findAll { item -> !item?.trim().isEmpty() }      // Remove empty entries
+    paths     = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes
+    return paths.join('/')
+}
+
+//
+// Function to save/publish module results
+//
+def saveFiles(Map args) {
+    def ioptions  = initOptions(args.options)
+    def path_list = [ ioptions.publish_dir ?: args.publish_dir ]
+
+    // Do not publish versions.yml unless running from pytest workflow
+    if (args.filename.equals('versions.yml') && !System.getenv("NF_CORE_MODULES_TEST")) {
+        return null
+    }
+    if (ioptions.publish_by_meta) {
+        def key_list = ioptions.publish_by_meta instanceof List ? ioptions.publish_by_meta : args.publish_by_meta
+        for (key in key_list) {
+            if (args.meta && key instanceof String) {
+                def path = key
+                if (args.meta.containsKey(key)) {
+                    path = args.meta[key] instanceof Boolean ? "${key}_${args.meta[key]}".toString() : args.meta[key]
+                }
+                path = path instanceof String ? path : ''
+                path_list.add(path)
+            }
+        }
+    }
+    if (ioptions.publish_files instanceof Map) {
+        for (ext in ioptions.publish_files) {
+            if (args.filename.endsWith(ext.key)) {
+                def ext_list = path_list.collect()
+                ext_list.add(ext.value)
+                return "${getPathFromList(ext_list)}/$args.filename"
+            }
+        }
+    } else if (ioptions.publish_files == null) {
+        return "${getPathFromList(path_list)}/$args.filename"
+    }
+}
--- a/modules/khmer/normalizebymedian/main.nf
+++ b/modules/khmer/normalizebymedian/main.nf
@ -0,0 +1,49 @@
+// Import generic module functions
+include { initOptions; saveFiles; getSoftwareName; getProcessName } from './functions'
+
+params.options = [:]
+options        = initOptions(params.options)
+
+process KHMER_NORMALIZEBYMEDIAN {
+    tag "${name}"
+    label 'process_long'
+    publishDir "${params.outdir}",
+        mode: params.publish_dir_mode,
+        saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:[:], publish_by_meta:[]) }
+
+    conda (params.enable_conda ? "bioconda::khmer=3.0.0a3" : null)
+    if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
+        container "https://depot.galaxyproject.org/singularity/khmer:3.0.0a3--py37haa7609a_2"
+    } else {
+        container "quay.io/biocontainers/khmer:3.0.0a3--py37haa7609a_2"
+    }
+
+    input:
+    path pe_reads
+    path se_reads
+    val  name
+
+    output:
+    path "${name}.fastq.gz", emit: reads
+    path "versions.yml"    , emit: versions
+
+    script:
+    pe_args = pe_reads ? "--paired" : ""
+    se_args = se_reads ? "--unpaired-reads ${se_reads}" : ""
+    files   = pe_reads ? pe_reads : se_reads
+
+    """
+    normalize-by-median.py \\
+        -M ${task.memory.toGiga()}e9 \\
+        --gzip ${options.args} \\
+        -o ${name}.fastq.gz \\
+        ${pe_args} \\
+        ${se_args} \\
+        ${files}
+
+    cat <<-END_VERSIONS > versions.yml
+    ${getProcessName(task.process)}:
+        ${getSoftwareName(task.process)}: \$( normalize-by-median.py --version 2>&1 | grep ^khmer | sed 's/^khmer //' )
+    END_VERSIONS
+    """
+}
--- a/modules/khmer/normalizebymedian/meta.yml
+++ b/modules/khmer/normalizebymedian/meta.yml
@ -0,0 +1,39 @@
+name: khmer_normalizebymedian
+description: Module that calls normalize-by-median.py from khmer. The module can take a mix of paired end (interleaved) and single end reads. If both types are provided, only a single file with single ends is possible.
+keywords:
+  - digital normalization
+  - khmer
+tools:
+  - khmer:
+      description: khmer k-mer counting library
+      homepage: https://github.com/dib-lab/khmer
+      documentation: https://khmer.readthedocs.io/en/latest/
+      tool_dev_url: https://github.com/dib-lab/khmer
+      doi: "https://doi.org/10.12688/f1000research.6924.1"
+      licence: ['BSD License']
+
+input:
+  - pe_reads:
+      type: files
+      description: Paired-end interleaved fastq files
+      pattern: "*.{fq,fastq}.gz"
+  - se_reads:
+      type: files
+      description: Single-end fastq files
+      pattern: "*.{fq,fastq}.gz"
+  - name:
+      type: string
+      description: filename for output file(s); ".fastq.gz" will be appended
+
+output:
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+  - reads:
+      type: file
+      description: Interleaved fastq files
+      pattern: "*.{fq,fastq}.gz"
+
+authors:
+  - "@erikrikarddaniel"
--- a/modules/manta/germline/main.nf
+++ b/modules/manta/germline/main.nf
@ -19,7 +19,7 @@ process MANTA_GERMLINE {
    }

    input:
-    tuple val(meta), path(cram), path(crai)
+    tuple val(meta), path(input), path(input_index)
    path fasta
    path fai
    path target_bed
@ -39,7 +39,7 @@ process MANTA_GERMLINE {
    def options_manta = target_bed ? "--exome --callRegions $target_bed" : ""
    """
    configManta.py \
-        --bam $cram \
+        --bam $input \
        --reference $fasta \
        $options_manta \
        --runDir manta
--- a/modules/manta/germline/meta.yml
+++ b/modules/manta/germline/meta.yml
@ -23,11 +23,11 @@ input:
      description: |
        Groovy Map containing sample information
        e.g. [ id:'test', single_end:false ]
-  - cram:
+  - input:
      type: file
      description: BAM/CRAM/SAM file
      pattern: "*.{bam,cram,sam}"
-  - crai:
+  - input_index:
      type: file
      description: BAM/CRAM/SAM index file
      pattern: "*.{bai,crai,sai}"
--- a/modules/manta/somatic/main.nf
+++ b/modules/manta/somatic/main.nf
@ -19,7 +19,7 @@ process MANTA_SOMATIC {
    }

    input:
-    tuple val(meta), path(cram_normal), path(crai_normal), path(cram_tumor), path(crai_tumor)
+    tuple val(meta), path(input_normal), path(input_index_normal), path(input_tumor), path(input_index_tumor)
    path fasta
    path fai
    path target_bed
@ -42,8 +42,8 @@ process MANTA_SOMATIC {

    """
    configManta.py \
-        --tumorBam $cram_tumor \
-        --normalBam $cram_normal \
+        --tumorBam $input_tumor \
+        --normalBam $input_normal \
        --reference $fasta \
        $options_manta \
        --runDir manta
--- a/modules/manta/somatic/meta.yml
+++ b/modules/manta/somatic/meta.yml
@ -23,19 +23,19 @@ input:
      description: |
        Groovy Map containing sample information
        e.g. [ id:'test', single_end:false ]
-  - cram_normal:
+  - input_normal:
      type: file
      description: BAM/CRAM/SAM file
      pattern: "*.{bam,cram,sam}"
-  - crai_normal:
+  - input_index_normal:
      type: file
      description: BAM/CRAM/SAM index file
      pattern: "*.{bai,crai,sai}"
-  - cram_tumor:
+  - input_tumor:
      type: file
      description: BAM/CRAM/SAM file
      pattern: "*.{bam,cram,sam}"
-  - crai_tumor:
+  - input_index_tumor:
      type: file
      description: BAM/CRAM/SAM index file
      pattern: "*.{bai,crai,sai}"
--- a/modules/manta/tumoronly/main.nf
+++ b/modules/manta/tumoronly/main.nf
@ -19,7 +19,7 @@ process MANTA_TUMORONLY {
    }

    input:
-    tuple val(meta), path(cram), path(crai)
+    tuple val(meta), path(input), path(input_index)
    path fasta
    path fai
    path target_bed
@ -39,7 +39,7 @@ process MANTA_TUMORONLY {
    def options_manta = target_bed ? "--exome --callRegions $target_bed" : ""
    """
    configManta.py \
-        --tumorBam $cram \
+        --tumorBam $input \
        --reference $fasta \
        $options_manta \
        --runDir manta
--- a/modules/manta/tumoronly/meta.yml
+++ b/modules/manta/tumoronly/meta.yml
@ -23,11 +23,11 @@ input:
      description: |
        Groovy Map containing sample information
        e.g. [ id:'test', single_end:false ]
-  - cram:
+  - input:
      type: file
      description: BAM/CRAM/SAM file
      pattern: "*.{bam,cram,sam}"
-  - crai:
+  - input_index:
      type: file
      description: BAM/CRAM/SAM index file
      pattern: "*.{bai,crai,sai}"
@ -54,7 +54,6 @@ output:
      description: |
        Groovy Map containing sample information
        e.g. [ id:'test', single_end:false ]
-
  - candidate_small_indels_vcf:
      type: file
      description: Gzipped VCF file containing variants
--- a/modules/samtools/merge/main.nf
+++ b/modules/samtools/merge/main.nf
@ -19,16 +19,20 @@ process SAMTOOLS_MERGE {
    }

    input:
-    tuple val(meta), path(bams)
+    tuple val(meta), path(input_files)
+    path fasta

    output:
-    tuple val(meta), path("${prefix}.bam"), emit: bam
-    path  "versions.yml"                  , emit: versions
+    tuple val(meta), path("${prefix}.bam"),  optional:true, emit: bam
+    tuple val(meta), path("${prefix}.cram"), optional:true, emit: cram
+    path  "versions.yml"                                  , emit: versions

    script:
    prefix   = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}"
+    def file_type = input_files[0].getExtension()
+    def reference = fasta ? "--reference ${fasta}" : ""
    """
-    samtools merge ${prefix}.bam $bams
+    samtools merge ${reference} ${prefix}.${file_type} $input_files
    cat <<-END_VERSIONS > versions.yml
    ${getProcessName(task.process)}:
        ${getSoftwareName(task.process)}: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
--- a/modules/samtools/merge/meta.yml
+++ b/modules/samtools/merge/meta.yml
@ -1,5 +1,5 @@
 name: samtools_merge
-description: Merge BAM file
+description: Merge BAM or CRAM file
 keywords:
    - merge
    - bam
@ -21,20 +21,28 @@ input:
        description: |
            Groovy Map containing sample information
            e.g. [ id:'test', single_end:false ]
-    - bam:
+    - input_files:
        type: file
-        description: BAM file
+        description: BAM/CRAM file
        pattern: "*.{bam,cram,sam}"
+    - fasta:
+        type: optional file
+        description: Reference file the CRAM was created with
+        pattern: "*.{fasta,fa}"
 output:
    - meta:
        type: map
        description: |
            Groovy Map containing sample information
            e.g. [ id:'test', single_end:false ]
-    - merged_bam:
+    - bam:
        type: file
        description: BAM file
        pattern: "*.{bam}"
+    - cram:
+        type: file
+        description: CRAM file
+        pattern: "*.{cram}"
    - versions:
        type: file
        description: File containing software versions
@ -43,3 +51,4 @@ authors:
    - "@drpatelh"
    - "@yuukiiwa "
    - "@maxulysse"
+    - "@FriederikeHanssen"
--- a/modules/samtools/stats/main.nf
+++ b/modules/samtools/stats/main.nf
@ -19,15 +19,17 @@ process SAMTOOLS_STATS {
    }

    input:
-    tuple val(meta), path(bam), path(bai)
+    tuple val(meta), path(input), path(input_index)
+    path fasta

    output:
    tuple val(meta), path("*.stats"), emit: stats
    path  "versions.yml"            , emit: versions

    script:
+    def reference = fasta ? "--reference ${fasta}" : ""
    """
-    samtools stats $bam > ${bam}.stats
+    samtools stats ${reference} ${input} > ${input}.stats
    cat <<-END_VERSIONS > versions.yml
    ${getProcessName(task.process)}:
        ${getSoftwareName(task.process)}: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
--- a/modules/samtools/stats/meta.yml
+++ b/modules/samtools/stats/meta.yml
@ -22,14 +22,18 @@ input:
        description: |
            Groovy Map containing sample information
            e.g. [ id:'test', single_end:false ]
-    - bam:
-        type: file
-        description: BAM/CRAM/SAM file
-        pattern: "*.{bam,cram,sam}"
-    - bai:
-        type: file
-        description: Index for BAM/CRAM/SAM file
-        pattern: "*.{bai,crai,sai}"
+    - input:
+      type: file
+      description: BAM/CRAM file from alignment
+      pattern: "*.{bam,cram}"
+    - input_index:
+      type: file
+      description: BAI/CRAI file from alignment
+      pattern: "*.{bai,crai}"
+    - fasta:
+        type: optional file
+        description: Reference file the CRAM was created with
+        pattern: "*.{fasta,fa}"
 output:
    - meta:
        type: map
@ -46,3 +50,4 @@ output:
        pattern: "versions.yml"
 authors:
    - "@drpatelh"
+    - "@FriederikeHanssen"
--- a/modules/samtools/view/main.nf
+++ b/modules/samtools/view/main.nf
@ -19,16 +19,20 @@ process SAMTOOLS_VIEW {
    }

    input:
-    tuple val(meta), path(bam)
+    tuple val(meta), path(input)
+    path fasta

    output:
-    tuple val(meta), path("*.bam"), emit: bam
-    path  "versions.yml"          , emit: versions
+    tuple val(meta), path("*.bam") , optional: true, emit: bam
+    tuple val(meta), path("*.cram"), optional: true, emit: cram
+    path  "versions.yml"                           , emit: versions

    script:
    def prefix   = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}"
+    def reference = fasta ? "--reference ${fasta} -C" : ""
+    def file_type = input.getExtension()
    """
-    samtools view $options.args $bam > ${prefix}.bam
+    samtools view ${reference} $options.args $input > ${prefix}.${file_type}
    cat <<-END_VERSIONS > versions.yml
    ${getProcessName(task.process)}:
        ${getSoftwareName(task.process)}: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
--- a/modules/samtools/view/meta.yml
+++ b/modules/samtools/view/meta.yml
@ -21,10 +21,14 @@ input:
        description: |
            Groovy Map containing sample information
            e.g. [ id:'test', single_end:false ]
-    - bam:
+    - input:
        type: file
        description: BAM/CRAM/SAM file
        pattern: "*.{bam,cram,sam}"
+    - fasta:
+        type: optional file
+        description: Reference file the CRAM was created with
+        pattern: "*.{fasta,fa}"
 output:
    - meta:
        type: map
@ -33,8 +37,12 @@ output:
            e.g. [ id:'test', single_end:false ]
    - bam:
        type: file
-        description: filtered/converted BAM/CRAM/SAM file
-        pattern: "*.{bam,cram,sam}"
+        description: filtered/converted BAM/SAM file
+        pattern: "*.{bam,sam}"
+    - cram:
+        type: file
+        description: filtered/converted CRAM file
+        pattern: "*.cram"
    - versions:
        type: file
        description: File containing software versions
@ -42,3 +50,4 @@ output:
 authors:
    - "@drpatelh"
    - "@joseespinosa"
+    - "@FriederikeHanssen"
--- a/modules/strelka/germline/main.nf
+++ b/modules/strelka/germline/main.nf
@ -19,7 +19,7 @@ process STRELKA_GERMLINE {
    }

    input:
-    tuple val(meta), path(bam), path(bai)
+    tuple val(meta), path(input), path(input_index)
    path  fasta
    path  fai
    path  target_bed
@ -38,7 +38,7 @@ process STRELKA_GERMLINE {
    def regions  = target_bed ? "--exome --callRegions ${target_bed}" : ""
    """
    configureStrelkaGermlineWorkflow.py \\
-        --bam $bam \\
+        --bam $input \\
        --referenceFasta $fasta \\
        $regions \\
        $options.args \\
--- a/modules/strelka/germline/meta.yml
+++ b/modules/strelka/germline/meta.yml
@ -21,14 +21,14 @@ input:
      description: |
        Groovy Map containing sample information
        e.g. [ id:'test']
-  - bam:
+  - input:
      type: file
-      description: BAM file
-      pattern: "*.{bam}"
-  - bai:
+      description: BAM/CRAM file
+      pattern: "*.{bam,cram}"
+  - input_index:
      type: file
-      description: BAM index file
-      pattern: "*.{bai}"
+      description: BAM/CRAI index file
+      pattern: "*.{bai,crai}"
  - target_bed:
      type: file
      description: An optional bed file
--- a/modules/strelka/somatic/main.nf
+++ b/modules/strelka/somatic/main.nf
@ -19,7 +19,7 @@ process STRELKA_SOMATIC {
    }

    input:
-    tuple val(meta), path(cram_normal), path(crai_normal), path(cram_tumor), path(crai_tumor),  path(manta_candidate_small_indels), path(manta_candidate_small_indels_tbi)
+    tuple val(meta), path(input_normal), path(input_index_normal), path(input_tumor), path(input_index_tumor),  path(manta_candidate_small_indels), path(manta_candidate_small_indels_tbi)
    path  fasta
    path  fai
    path  target_bed
@ -38,8 +38,8 @@ process STRELKA_SOMATIC {
    def options_manta = manta_candidate_small_indels ? "--indelCandidates ${manta_candidate_small_indels}" : ""
    """
    configureStrelkaSomaticWorkflow.py \\
-        --tumor $cram_tumor \\
-        --normal $cram_normal \\
+        --tumor $input_tumor \\
+        --normal $input_normal \\
        --referenceFasta $fasta \\
        $options_target_bed \\
        $options_manta \\
--- a/modules/strelka/somatic/meta.yml
+++ b/modules/strelka/somatic/meta.yml
@ -21,19 +21,19 @@ input:
      description: |
        Groovy Map containing sample information
        e.g. [ id:'test', single_end:false ]
-  - cram_normal:
+  - input_normal:
      type: file
      description: BAM/CRAM/SAM file
      pattern: "*.{bam,cram,sam}"
-  - crai_normal:
+  - input_index_normal:
      type: file
      description: BAM/CRAM/SAM index file
      pattern: "*.{bai,crai,sai}"
-  - cram_tumor:
+  - input_tumor:
      type: file
      description: BAM/CRAM/SAM file
      pattern: "*.{bam,cram,sam}"
-  - crai_tumor:
+  - input_index_tumor:
      type: file
      description: BAM/CRAM/SAM index file
      pattern: "*.{bai,crai,sai}"
--- a/subworkflows/nf-core/bam_stats_samtools/main.nf
+++ b/subworkflows/nf-core/bam_stats_samtools/main.nf
@ -15,7 +15,7 @@ workflow BAM_STATS_SAMTOOLS {
    main:
    ch_versions = Channel.empty()

-    SAMTOOLS_STATS ( ch_bam_bai )
+    SAMTOOLS_STATS ( ch_bam_bai, [] )
    ch_versions = ch_versions.mix(SAMTOOLS_STATS.out.versions.first())

    SAMTOOLS_FLAGSTAT ( ch_bam_bai )
--- a/tests/config/pytest_modules.yml
+++ b/tests/config/pytest_modules.yml
@ -394,6 +394,10 @@ fgbio/fastqtobam:
  - modules/fgbio/fastqtobam/**
  - tests/modules/fgbio/fastqtobam/**

+fgbio/groupreadsbyumi:
+  - modules/fgbio/groupreadsbyumi/**
+  - tests/modules/fgbio/groupreadsbyumi/**
+
 fgbio/sortbam:
  - modules/fgbio/sortbam/**
  - tests/modules/fgbio/sortbam/**
@ -629,6 +633,10 @@ kallistobustools/ref:
  - modules/kallistobustools/ref/**
  - tests/modules/kallistobustools/ref/**

+khmer/normalizebymedian:
+  - modules/khmer/normalizebymedian/**
+  - tests/modules/khmer/normalizebymedian/**
+
 kleborate:
  - modules/kleborate/**
  - tests/modules/kleborate/**
--- a/tests/modules/bedtools/sort/main.nf
+++ b/tests/modules/bedtools/sort/main.nf
@ -9,5 +9,5 @@ workflow test_bedtools_sort {
              file(params.test_data['sarscov2']['genome']['test_bed'], checkIfExists: true)
            ]

-    BEDTOOLS_SORT ( input )
+    BEDTOOLS_SORT ( input, "testext" )
 }
--- a/tests/modules/bedtools/sort/test.yml
+++ b/tests/modules/bedtools/sort/test.yml
@ -4,5 +4,5 @@
    - bedtools
    - bedtools/sort
  files:
-    - path: ./output/bedtools/test_out.bed
+    - path: ./output/bedtools/test_out.testext
      md5sum: fe4053cf4de3aebbdfc3be2efb125a74
--- a/tests/modules/fgbio/groupreadsbyumi/main.nf
+++ b/tests/modules/fgbio/groupreadsbyumi/main.nf
@ -0,0 +1,15 @@
+#!/usr/bin/env nextflow
+
+nextflow.enable.dsl = 2
+
+include { FGBIO_GROUPREADSBYUMI } from '../../../../modules/fgbio/groupreadsbyumi/main.nf' addParams( options: [:] )
+
+workflow test_fgbio_groupreadsbyumi {
+
+    input = [ [ id:'test', single_end:false ], // meta map
+              file(params.test_data['homo_sapiens']['illumina']['test_paired_end_umi_unsorted_tagged_bam'], checkIfExists: true) ]
+
+    strategy = "Adjacency"
+
+    FGBIO_GROUPREADSBYUMI ( input, strategy )
+}
--- a/tests/modules/fgbio/groupreadsbyumi/test.yml
+++ b/tests/modules/fgbio/groupreadsbyumi/test.yml
@ -0,0 +1,10 @@
+- name: fgbio groupreadsbyumi test_fgbio_groupreadsbyumi
+  command: nextflow run tests/modules/fgbio/groupreadsbyumi -entry test_fgbio_groupreadsbyumi -c tests/config/nextflow.config
+  tags:
+    - fgbio
+    - fgbio/groupreadsbyumi
+  files:
+    - path: output/fgbio/test_umi-grouped.bam
+      md5sum: f1e53fc845fd99a3da172eb8063dff0b
+    - path: output/fgbio/test_umi_histogram.txt
+      md5sum: d17fd167b2a765d46e4b01bf08ece01b
--- a/tests/modules/gatk4/applybqsr/main.nf
+++ b/tests/modules/gatk4/applybqsr/main.nf
@ -30,3 +30,17 @@ workflow test_gatk4_applybqsr_intervals {

  GATK4_APPLYBQSR ( input, fasta, fai, dict, intervals )
 }
+
+workflow test_gatk4_applybqsr_cram {
+    input = [ [ id:'test' ], // meta map
+                file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true),
+                file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram_crai'], checkIfExists: true),
+                file(params.test_data['homo_sapiens']['illumina']['test_baserecalibrator_table'], checkIfExists: true)
+              ]
+    fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
+    fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
+    dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)
+    intervals = file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true)
+
+  GATK4_APPLYBQSR ( input, fasta, fai, dict, intervals )
+}
--- a/tests/modules/gatk4/applybqsr/test.yml
+++ b/tests/modules/gatk4/applybqsr/test.yml
@ -1,17 +1,26 @@
 - name: gatk4 applybqsr test_gatk4_applybqsr
  command: nextflow run tests/modules/gatk4/applybqsr -entry test_gatk4_applybqsr -c tests/config/nextflow.config
  tags:
-    - gatk4
    - gatk4/applybqsr
+    - gatk4
  files:
    - path: output/gatk4/test.bam
-      md5sum: dac716c394db5e83c12b44355c098ca7
+      md5sum: 87a2eabae2b7b41574f966612b5addae

 - name: gatk4 applybqsr test_gatk4_applybqsr_intervals
  command: nextflow run tests/modules/gatk4/applybqsr -entry test_gatk4_applybqsr_intervals -c tests/config/nextflow.config
  tags:
-    - gatk4
    - gatk4/applybqsr
+    - gatk4
  files:
    - path: output/gatk4/test.bam
-      md5sum: 400441dbe5344658580ba0a24ba57069
+      md5sum: 9c015d3c1dbd9eee793b7386f432b6aa
+
+- name: gatk4 applybqsr test_gatk4_applybqsr_cram
+  command: nextflow run tests/modules/gatk4/applybqsr -entry test_gatk4_applybqsr_cram -c tests/config/nextflow.config
+  tags:
+    - gatk4/applybqsr
+    - gatk4
+  files:
+    - path: output/gatk4/test.bam
+      md5sum: 02f84815fdbc99c21c8d42ebdcabbbf7
--- a/tests/modules/gatk4/baserecalibrator/main.nf
+++ b/tests/modules/gatk4/baserecalibrator/main.nf
@ -18,6 +18,21 @@ workflow test_gatk4_baserecalibrator {
    GATK4_BASERECALIBRATOR ( input, fasta, fai, dict, [], sites, sites_tbi )
 }

+workflow test_gatk4_baserecalibrator_cram {
+   input = [ [ id:'test' ], // meta map
+                file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true),
+                file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram_crai'], checkIfExists: true),
+                file(params.test_data['homo_sapiens']['illumina']['test_baserecalibrator_table'], checkIfExists: true)
+              ]
+    fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
+    fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
+    dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)
+    sites = file(params.test_data['homo_sapiens']['genome']['dbsnp_146_hg38_vcf_gz'], checkIfExists: true)
+    sites_tbi = file(params.test_data['homo_sapiens']['genome']['dbsnp_146_hg38_vcf_gz_tbi'], checkIfExists: true)
+
+    GATK4_BASERECALIBRATOR ( input, fasta, fai, dict, [], sites, sites_tbi )
+}
+
 workflow test_gatk4_baserecalibrator_intervals {
    input     = [ [ id:'test' ], // meta map
                  file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true),
--- a/tests/modules/gatk4/baserecalibrator/test.yml
+++ b/tests/modules/gatk4/baserecalibrator/test.yml
@ -1,17 +1,26 @@
 - name: gatk4 baserecalibrator test_gatk4_baserecalibrator
  command: nextflow run tests/modules/gatk4/baserecalibrator -entry test_gatk4_baserecalibrator -c tests/config/nextflow.config
  tags:
-    - gatk4/baserecalibrator
    - gatk4
+    - gatk4/baserecalibrator
  files:
    - path: output/gatk4/test.table
      md5sum: e2e43abdc0c943c1a54dae816d0b9ea7

+- name: gatk4 baserecalibrator test_gatk4_baserecalibrator_cram
+  command: nextflow run tests/modules/gatk4/baserecalibrator -entry test_gatk4_baserecalibrator_cram -c tests/config/nextflow.config
+  tags:
+    - gatk4
+    - gatk4/baserecalibrator
+  files:
+    - path: output/gatk4/test.table
+      md5sum: 35d89a3811aa31711fc9815b6b80e6ec
+
 - name: gatk4 baserecalibrator test_gatk4_baserecalibrator_intervals
  command: nextflow run tests/modules/gatk4/baserecalibrator -entry test_gatk4_baserecalibrator_intervals -c tests/config/nextflow.config
  tags:
-    - gatk4/baserecalibrator
    - gatk4
+    - gatk4/baserecalibrator
  files:
    - path: output/gatk4/test.table
      md5sum: 9ecb5f00a2229291705addc09c0ec231
@ -19,8 +28,8 @@
 - name: gatk4 baserecalibrator test_gatk4_baserecalibrator_multiple_sites
  command: nextflow run tests/modules/gatk4/baserecalibrator -entry test_gatk4_baserecalibrator_multiple_sites -c tests/config/nextflow.config
  tags:
-    - gatk4/baserecalibrator
    - gatk4
+    - gatk4/baserecalibrator
  files:
    - path: output/gatk4/test.table
      md5sum: e2e43abdc0c943c1a54dae816d0b9ea7
--- a/tests/modules/gatk4/haplotypecaller/main.nf
+++ b/tests/modules/gatk4/haplotypecaller/main.nf
@ -13,5 +13,33 @@ workflow test_gatk4_haplotypecaller {
    fai = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true)
    dict = file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true)

-    GATK4_HAPLOTYPECALLER ( input, fasta, fai, dict )
+    GATK4_HAPLOTYPECALLER ( input, fasta, fai, dict, [], [], [] )
+}
+
+workflow test_gatk4_haplotypecaller_cram {
+    input = [ [ id:'test' ], // meta map
+                file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true),
+                file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram_crai'], checkIfExists: true)
+              ]
+    fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
+    fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
+    dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)
+
+    GATK4_HAPLOTYPECALLER ( input, fasta, fai, dict, [], [], [] )
+}
+
+workflow test_gatk4_haplotypecaller_intervals_dbsnp {
+   input = [ [ id:'test' ], // meta map
+                file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true),
+                file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram_crai'], checkIfExists: true)
+            ]
+
+    fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
+    fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
+    dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)
+    sites = file(params.test_data['homo_sapiens']['genome']['dbsnp_146_hg38_vcf_gz'], checkIfExists: true)
+    sites_tbi = file(params.test_data['homo_sapiens']['genome']['dbsnp_146_hg38_vcf_gz_tbi'], checkIfExists: true)
+    intervals = file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true)
+
+    GATK4_HAPLOTYPECALLER ( input, fasta, fai, dict, sites, sites_tbi, intervals )
 }
--- a/tests/modules/gatk4/haplotypecaller/test.yml
+++ b/tests/modules/gatk4/haplotypecaller/test.yml
@ -1,13 +1,26 @@
 - name: gatk4 haplotypecaller test_gatk4_haplotypecaller
  command: nextflow run tests/modules/gatk4/haplotypecaller -entry test_gatk4_haplotypecaller -c tests/config/nextflow.config
  tags:
-    - gatk4
    - gatk4/haplotypecaller
+    - gatk4
+  files:
+    - path: output/gatk4/test.vcf.gz
+    - path: output/gatk4/test.vcf.gz.tbi
+
+- name: gatk4 haplotypecaller test_gatk4_haplotypecaller_cram
+  command: nextflow run tests/modules/gatk4/haplotypecaller -entry test_gatk4_haplotypecaller_cram -c tests/config/nextflow.config
+  tags:
+    - gatk4/haplotypecaller
+    - gatk4
+  files:
+    - path: output/gatk4/test.vcf.gz
+    - path: output/gatk4/test.vcf.gz.tbi
+
+- name: gatk4 haplotypecaller test_gatk4_haplotypecaller_intervals_dbsnp
+  command: nextflow run tests/modules/gatk4/haplotypecaller -entry test_gatk4_haplotypecaller_intervals_dbsnp -c tests/config/nextflow.config
+  tags:
+    - gatk4/haplotypecaller
+    - gatk4
  files:
    - path: output/gatk4/test.vcf.gz
-      should_exist: true
-      contains:
-        - 'MT192765.1'
-        - '54.60'
-        - '37.32'
    - path: output/gatk4/test.vcf.gz.tbi
--- a/tests/modules/gatk4/markduplicates/main.nf
+++ b/tests/modules/gatk4/markduplicates/main.nf
@ -11,3 +11,12 @@ workflow test_gatk4_markduplicates {

    GATK4_MARKDUPLICATES ( input )
 }
+
+workflow test_gatk4_markduplicates_multiple_bams {
+    input = [ [ id:'test', single_end:false ], // meta map
+              file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true),
+              file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_bam'], checkIfExists: true)
+            ]
+
+    GATK4_MARKDUPLICATES ( input )
+}
--- a/tests/modules/gatk4/markduplicates/test.yml
+++ b/tests/modules/gatk4/markduplicates/test.yml
@ -1,8 +1,23 @@
 - name: gatk4 markduplicates test_gatk4_markduplicates
  command: nextflow run tests/modules/gatk4/markduplicates -entry test_gatk4_markduplicates -c tests/config/nextflow.config
  tags:
-    - gatk4
    - gatk4/markduplicates
+    - gatk4
  files:
+    - path: output/gatk4/test.bai
+      md5sum: e9c125e82553209933883b4fe2b8d7c2
    - path: output/gatk4/test.bam
-      md5sum: 3b6facab3afbacfa08a7a975efbd2c6b
+      md5sum: bda9a7bf5057f2288ed70be3eb8a753f
+    - path: output/gatk4/test.metrics
+
+- name: gatk4 markduplicates test_gatk4_markduplicates_multiple_bams
+  command: nextflow run tests/modules/gatk4/markduplicates -entry test_gatk4_markduplicates_multiple_bams -c tests/config/nextflow.config
+  tags:
+    - gatk4/markduplicates
+    - gatk4
+  files:
+    - path: output/gatk4/test.bai
+      md5sum: 93cebe29e7cca2064262b739235cca9b
+    - path: output/gatk4/test.bam
+      md5sum: dcd6f584006b04141fb787001a8ecacc
+    - path: output/gatk4/test.metrics
--- a/tests/modules/gatk4/mutect2/main.nf
+++ b/tests/modules/gatk4/mutect2/main.nf
@ -3,6 +3,8 @@
 nextflow.enable.dsl = 2

 include { GATK4_MUTECT2 } from '../../../../modules/gatk4/mutect2/main.nf' addParams( options: [:] )
+// used to run with the mitochondria mode setting as this increases sensitivity, allowing for some tumor_normal variants to be detected while the old test data is still in use, will be removed when new test data for sarek is available.
+include { GATK4_MUTECT2 as GATK4_TEMPFIX_MUTECT2 } from '../../../../modules/gatk4/mutect2/main.nf' addParams( options: [args: '--mitochondria-mode'] )

 workflow test_gatk4_mutect2_tumor_normal_pair {
    input = [ [ id:'test'], // meta map
@ -12,6 +14,8 @@ workflow test_gatk4_mutect2_tumor_normal_pair {
            ]
    run_single = false
    run_pon = false
+    run_mito = false
+    interval_label = []
    fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
    fastaidx = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
    dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)
@ -20,7 +24,7 @@ workflow test_gatk4_mutect2_tumor_normal_pair {
    panel_of_normals = file(params.test_data['homo_sapiens']['genome']['mills_and_1000g_indels_vcf_gz'], checkIfExists: true)
    panel_of_normals_idx = file(params.test_data['homo_sapiens']['genome']['mills_and_1000g_indels_vcf_gz_tbi'], checkIfExists: true)

-    GATK4_MUTECT2 ( input , run_single , run_pon , fasta , fastaidx , dict , germline_resource, germline_resource_idx , panel_of_normals , panel_of_normals_idx )
+    GATK4_TEMPFIX_MUTECT2 ( input , run_single , run_pon , run_mito , interval_label , fasta , fastaidx , dict , germline_resource, germline_resource_idx , panel_of_normals , panel_of_normals_idx )
 }

 workflow test_gatk4_mutect2_tumor_single {
@ -31,6 +35,8 @@ workflow test_gatk4_mutect2_tumor_single {
            ]
    run_single = true
    run_pon = false
+    run_mito = false
+    interval_label = []
    fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
    fastaidx = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
    dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)
@ -39,7 +45,28 @@ workflow test_gatk4_mutect2_tumor_single {
    panel_of_normals = file(params.test_data['homo_sapiens']['genome']['mills_and_1000g_indels_vcf_gz'], checkIfExists: true)
    panel_of_normals_idx = file(params.test_data['homo_sapiens']['genome']['mills_and_1000g_indels_vcf_gz_tbi'], checkIfExists: true)

-    GATK4_MUTECT2 ( input , run_single , run_pon , fasta , fastaidx , dict , germline_resource, germline_resource_idx , panel_of_normals , panel_of_normals_idx )
+    GATK4_MUTECT2 ( input , run_single , run_pon , run_mito , interval_label , fasta , fastaidx , dict , germline_resource, germline_resource_idx , panel_of_normals , panel_of_normals_idx )
+}
+
+workflow test_gatk4_mutect2_cram_input {
+    input = [ [ id:'test'], // meta map
+              [ file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_cram'], checkIfExists: true)],
+              [ file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_cram_crai'], checkIfExists: true)],
+              []
+            ]
+    run_single = true
+    run_pon = false
+    run_mito = false
+    interval_label = []
+    fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
+    fastaidx = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
+    dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)
+    germline_resource = file(params.test_data['homo_sapiens']['genome']['gnomad_r2_1_1_vcf_gz'], checkIfExists: true)
+    germline_resource_idx = file(params.test_data['homo_sapiens']['genome']['gnomad_r2_1_1_vcf_gz_tbi'], checkIfExists: true)
+    panel_of_normals = file(params.test_data['homo_sapiens']['genome']['mills_and_1000g_indels_vcf_gz'], checkIfExists: true)
+    panel_of_normals_idx = file(params.test_data['homo_sapiens']['genome']['mills_and_1000g_indels_vcf_gz_tbi'], checkIfExists: true)
+
+    GATK4_MUTECT2 ( input , run_single , run_pon , run_mito , interval_label , fasta , fastaidx , dict , germline_resource, germline_resource_idx , panel_of_normals , panel_of_normals_idx )
 }

 workflow test_gatk4_mutect2_generate_pon {
@ -50,6 +77,8 @@ workflow test_gatk4_mutect2_generate_pon {
            ]
    run_single = false
    run_pon = true
+    run_mito = false
+    interval_label = []
    fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
    fastaidx = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
    dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)
@ -58,5 +87,27 @@ workflow test_gatk4_mutect2_generate_pon {
    panel_of_normals = []
    panel_of_normals_idx = []

-    GATK4_MUTECT2 ( input , run_single , run_pon , fasta , fastaidx , dict , germline_resource, germline_resource_idx , panel_of_normals , panel_of_normals_idx )
+    GATK4_MUTECT2 ( input , run_single , run_pon, run_mito , interval_label , fasta , fastaidx , dict , germline_resource, germline_resource_idx , panel_of_normals , panel_of_normals_idx )
+}
+
+// mitochondria mode would ideally have some mitochondria test data, but since the mitochondria settings only increase detection sensitivity, we can use the chr22 data as a stand in as it is already a small dataset, the extra variants detected compared to generate_pon shows the mode is working.
+workflow test_gatk4_mutect2_mitochondria {
+    input = [ [ id:'test'], // meta map
+              [ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_bam'], checkIfExists: true)],
+              [ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_bam_bai'], checkIfExists: true)],
+              []
+            ]
+    run_single = false
+    run_pon = false
+    run_mito = true
+    interval_label = 'chr22'
+    fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
+    fastaidx = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
+    dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)
+    germline_resource = []
+    germline_resource_idx = []
+    panel_of_normals = []
+    panel_of_normals_idx = []
+
+    GATK4_MUTECT2 ( input , run_single , run_pon, run_mito , interval_label , fasta , fastaidx , dict , germline_resource, germline_resource_idx , panel_of_normals , panel_of_normals_idx )
 }
--- a/tests/modules/gatk4/mutect2/test.yml
+++ b/tests/modules/gatk4/mutect2/test.yml
@ -7,7 +7,7 @@
    - path: output/gatk4/test.f1r2.tar.gz
    - path: output/gatk4/test.vcf.gz
    - path: output/gatk4/test.vcf.gz.stats
-      md5sum: 6ecb874e6a95aa48233587b876c2a7a9
+      md5sum: 887d54e393510f1d0aa2c33bc6155161
    - path: output/gatk4/test.vcf.gz.tbi

 - name: gatk4 mutect2 test_gatk4_mutect2_tumor_single
@ -18,7 +18,18 @@
  files:
    - path: output/gatk4/test.vcf.gz
    - path: output/gatk4/test.vcf.gz.stats
-      md5sum: e7ef613f7d158b8a0adf44abe5db2029
+      md5sum: 106c5828b02b906c97922618b6072169
+    - path: output/gatk4/test.vcf.gz.tbi
+
+- name: gatk4 mutect2 test_gatk4_mutect2_cram_input
+  command: nextflow run tests/modules/gatk4/mutect2 -entry test_gatk4_mutect2_cram_input -c tests/config/nextflow.config
+  tags:
+    - gatk4
+    - gatk4/mutect2
+  files:
+    - path: output/gatk4/test.vcf.gz
+    - path: output/gatk4/test.vcf.gz.stats
+      md5sum: 106c5828b02b906c97922618b6072169
    - path: output/gatk4/test.vcf.gz.tbi

 - name: gatk4 mutect2 test_gatk4_mutect2_generate_pon
@ -31,3 +42,14 @@
    - path: output/gatk4/test.vcf.gz.stats
      md5sum: 4f77301a125913170b8e9e7828b4ca3f
    - path: output/gatk4/test.vcf.gz.tbi
+
+- name: gatk4 mutect2 test_gatk4_mutect2_mitochondria
+  command: nextflow run tests/modules/gatk4/mutect2 -entry test_gatk4_mutect2_mitochondria -c tests/config/nextflow.config
+  tags:
+    - gatk4
+    - gatk4/mutect2
+  files:
+    - path: output/gatk4/test.vcf.gz
+    - path: output/gatk4/test.vcf.gz.stats
+      md5sum: fc6ea14ca2da346babe78161beea28c9
+    - path: output/gatk4/test.vcf.gz.tbi
--- a/tests/modules/khmer/normalizebymedian/main.nf
+++ b/tests/modules/khmer/normalizebymedian/main.nf
@ -0,0 +1,85 @@
+#!/usr/bin/env nextflow
+
+nextflow.enable.dsl = 2
+
+include { SEQTK_MERGEPE                                           } from '../../../../modules/seqtk/mergepe/main.nf'           addParams( options: [:] )
+include { KHMER_NORMALIZEBYMEDIAN                                 } from '../../../../modules/khmer/normalizebymedian/main.nf' addParams( options: [:] )
+include { KHMER_NORMALIZEBYMEDIAN as KHMER_NORMALIZEBYMEDIAN_ARGS } from '../../../../modules/khmer/normalizebymedian/main.nf' addParams( options: [args: '-C 20 -k 32'] )
+
+workflow test_khmer_normalizebymedian_only_pe {
+    
+    pe_reads = [
+        [ id:'khmer_test', single_end:false ], // meta map
+        [
+            file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
+            file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) 
+        ]
+    ]
+
+    SEQTK_MERGEPE(pe_reads)
+
+    KHMER_NORMALIZEBYMEDIAN ( SEQTK_MERGEPE.out.reads.collect { it[1] }, [], 'only_pe' )
+}
+
+workflow test_khmer_normalizebymedian_only_se {
+    
+    se_reads = [
+        file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
+        file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) 
+    ]
+
+    KHMER_NORMALIZEBYMEDIAN ( [], se_reads, 'only_se' )
+}
+
+workflow test_khmer_normalizebymedian_mixed {
+    
+    pe_reads = [
+        [ id:'khmer_test', single_end:false ], // meta map
+        [
+            file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
+            file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) 
+        ]
+    ]
+    se_reads = file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true)
+
+    SEQTK_MERGEPE(pe_reads)
+
+    KHMER_NORMALIZEBYMEDIAN ( SEQTK_MERGEPE.out.reads.map { it[1] }, se_reads, 'mixed' )
+}
+
+workflow test_khmer_normalizebymedian_multiple_pe {
+    
+    pe_reads = [
+        [ id:'khmer_test0', single_end:false ], // meta map
+        [
+            file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
+            file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) 
+        ],
+        [ id:'khmer_test1', single_end:false ], // meta map
+        [
+            file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
+            file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) 
+        ]
+    ]
+    se_reads = file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true)
+
+    SEQTK_MERGEPE(pe_reads)
+
+    KHMER_NORMALIZEBYMEDIAN ( SEQTK_MERGEPE.out.reads.collect { it[1] }, se_reads, 'multiple_pe' )
+}
+
+workflow test_khmer_normalizebymedian_args {
+    
+    pe_reads = [
+        [ id:'khmer_test0', single_end:false ], // meta map
+        [
+            file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
+            file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) 
+        ]
+    ]
+    se_reads = file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true)
+
+    SEQTK_MERGEPE(pe_reads)
+
+    KHMER_NORMALIZEBYMEDIAN_ARGS ( SEQTK_MERGEPE.out.reads.collect { it[1] }, se_reads, 'args' )
+}
--- a/tests/modules/khmer/normalizebymedian/test.yml
+++ b/tests/modules/khmer/normalizebymedian/test.yml
@ -0,0 +1,42 @@
+#                nf-core modules create-test-yml khmer/normalizebymedian
+- name: khmer normalizebymedian only pe reads
+  command: nextflow run ./tests/modules/khmer/normalizebymedian -entry test_khmer_normalizebymedian_only_pe -c tests/config/nextflow.config
+  tags:
+    - khmer
+    - khmer/normalizebymedian
+  files:
+    - path: output/khmer/only_pe.fastq.gz
+      # md5sum not stable even locally with docker (gzip done by tool)
+      #md5sum: 75e05f2e80cf4bd0b534d4b73f7c059c
+      
+- name: khmer normalizebymedian only se reads
+  command: nextflow run ./tests/modules/khmer/normalizebymedian -entry test_khmer_normalizebymedian_only_se -c tests/config/nextflow.config
+  tags:
+    - khmer
+    - khmer/normalizebymedian
+  files:
+    - path: output/khmer/only_se.fastq.gz
+      
+- name: khmer normalizebymedian mixed reads
+  command: nextflow run ./tests/modules/khmer/normalizebymedian -entry test_khmer_normalizebymedian_mixed -c tests/config/nextflow.config
+  tags:
+    - khmer
+    - khmer/normalizebymedian
+  files:
+    - path: output/khmer/mixed.fastq.gz
+
+- name: khmer normalizebymedian multiple pe reads
+  command: nextflow run ./tests/modules/khmer/normalizebymedian -entry test_khmer_normalizebymedian_multiple_pe -c tests/config/nextflow.config
+  tags:
+    - khmer
+    - khmer/normalizebymedian
+  files:
+    - path: output/khmer/multiple_pe.fastq.gz
+
+- name: khmer normalizebymedian args
+  command: nextflow run ./tests/modules/khmer/normalizebymedian -entry test_khmer_normalizebymedian_args -c tests/config/nextflow.config
+  tags:
+    - khmer
+    - khmer/normalizebymedian
+  files:
+    - path: output/khmer/args.fastq.gz
--- a/tests/modules/samtools/merge/main.nf
+++ b/tests/modules/samtools/merge/main.nf
@ -11,5 +11,15 @@ workflow test_samtools_merge {
                 file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam'], checkIfExists: true)]
               ]

-    SAMTOOLS_MERGE ( input )
+    SAMTOOLS_MERGE ( input, [] )
+}
+
+workflow test_samtools_merge_cram {
+    input = [ [ id: 'test' ], // meta map
+               [ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_cram'], checkIfExists: true),
+                 file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_cram'], checkIfExists: true),
+                 ]
+               ]
+    fasta   = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
+    SAMTOOLS_MERGE ( input, fasta )
 }
--- a/tests/modules/samtools/merge/test.yml
+++ b/tests/modules/samtools/merge/test.yml
@ -1,7 +1,15 @@
- name: samtools merge
-  command: nextflow run ./tests/modules/samtools/merge -entry test_samtools_merge -c tests/config/nextflow.config
+- name: samtools merge test_samtools_merge
+  command: nextflow run tests/modules/samtools/merge -entry test_samtools_merge -c tests/config/nextflow.config
  tags:
-    - samtools
    - samtools/merge
+    - samtools
  files:
    - path: output/samtools/test_merged.bam
+
+- name: samtools merge test_samtools_merge_cram
+  command: nextflow run tests/modules/samtools/merge -entry test_samtools_merge_cram -c tests/config/nextflow.config
+  tags:
+    - samtools/merge
+    - samtools
+  files:
+    - path: output/samtools/test_merged.cram
--- a/tests/modules/samtools/stats/main.nf
+++ b/tests/modules/samtools/stats/main.nf
@ -10,5 +10,15 @@ workflow test_samtools_stats {
                file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true)
            ]

-    SAMTOOLS_STATS ( input )
+    SAMTOOLS_STATS ( input, [])
+}
+
+workflow test_samtools_stats_cram {
+   input = [ [ id: 'test' ], // meta map
+               file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_cram'], checkIfExists: true),
+               file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_cram_crai'], checkIfExists: true)
+            ]
+    fasta   = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
+
+    SAMTOOLS_STATS ( input, fasta )
 }
--- a/tests/modules/samtools/stats/test.yml
+++ b/tests/modules/samtools/stats/test.yml
@ -1,8 +1,17 @@
- name: samtools stats
-  command: nextflow run ./tests/modules/samtools/stats -entry test_samtools_stats -c tests/config/nextflow.config
+- name: samtools stats test_samtools_stats
+  command: nextflow run tests/modules/samtools/stats -entry test_samtools_stats -c tests/config/nextflow.config
  tags:
    - samtools
    - samtools/stats
  files:
-    - path: ./output/samtools/test.paired_end.sorted.bam.stats
+    - path: output/samtools/test.paired_end.sorted.bam.stats
      md5sum: a7f36cf11fd3bf97e0a0ae29c0627296
+
+- name: samtools stats test_samtools_stats_cram
+  command: nextflow run tests/modules/samtools/stats -entry test_samtools_stats_cram -c tests/config/nextflow.config
+  tags:
+    - samtools
+    - samtools/stats
+  files:
+    - path: output/samtools/test.paired_end.recalibrated.sorted.cram.stats
+      md5sum: bd55a1da30028403f4b66dacf7a2a20e
--- a/tests/modules/samtools/view/main.nf
+++ b/tests/modules/samtools/view/main.nf
@ -7,8 +7,17 @@ include { SAMTOOLS_VIEW } from '../../../../modules/samtools/view/main.nf' addPa
 workflow test_samtools_view {
    input = [ [ id:'test', single_end:false ], // meta map
                file(params.test_data['sarscov2']['illumina']['test_paired_end_bam'], checkIfExists: true)
-
            ]

-    SAMTOOLS_VIEW ( input )
+    SAMTOOLS_VIEW ( input, [] )
+}
+
+workflow test_samtools_view_cram {
+   input = [ [ id: 'test' ], // meta map
+               file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_cram'], checkIfExists: true),
+               file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_cram_crai'], checkIfExists: true)
+            ]
+    fasta   = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
+
+    SAMTOOLS_VIEW ( input, fasta )
 }
--- a/tests/modules/samtools/view/test.yml
+++ b/tests/modules/samtools/view/test.yml
@ -1,8 +1,16 @@
- name: samtools view
+- name: samtools view test_samtools_view
  command: nextflow run tests/modules/samtools/view -entry test_samtools_view -c tests/config/nextflow.config
  tags:
-    - samtools
    - samtools/view
+    - samtools
  files:
    - path: output/samtools/test.bam
      md5sum: 8fb1e82f76416e9e30fc6b2357e2cf13
+
+- name: samtools view test_samtools_view_cram
+  command: nextflow run tests/modules/samtools/view -entry test_samtools_view_cram -c tests/config/nextflow.config
+  tags:
+    - samtools/view
+    - samtools
+  files:
+    - path: output/samtools/test.cram