Merge branch 'master' into haplocheck

2024-12-22 11:08:17 +00:00 · 2022-06-10 20:39:33 -03:00 · 2022-06-10 20:39:33 -03:00 · 547f483b74
commit 547f483b74
parent 89bffe3a5e ab422e54b5
50 changed files with 1023 additions and 66 deletions
--- a/modules/ampir/main.nf
+++ b/modules/ampir/main.nf
@ -0,0 +1,48 @@
+process AMPIR {
+    tag "$meta.id"
+    label 'process_low'
+
+    conda (params.enable_conda ? "conda-forge::r-ampir=1.1.0" : null)
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/r-ampir:1.1.0':
+        'quay.io/biocontainers/r-ampir:1.1.0' }"
+
+    input:
+    tuple val(meta), path(faa)
+    val model
+    val min_length
+    val min_probability
+
+    output:
+    tuple val(meta), path("*.faa"), emit: amps_faa
+    tuple val(meta), path("*.tsv"), emit: amps_tsv
+    path "versions.yml"           , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    min_length = ("${min_length}" == "[]") ? "": " min_len = as.integer(${min_length})," // Fall back to AMPir default value if none specified
+    if ("$faa" == "${prefix}.faa") error "Input and output names are the same, set prefix in module configuration to disambiguate!"
+    """
+    #!/usr/bin/env Rscript
+    library(ampir)
+
+    input_seqs <- read_faa('${faa}')
+    prediction <- predict_amps(input_seqs,${min_length} model = '${model}')
+    prediction <- prediction[which(prediction\$prob_AMP >= as.numeric(${min_probability})), ]
+    output_seqs <- input_seqs[row.names(prediction), ]
+    write.table(prediction, file = "${prefix}.tsv", row.names = FALSE, sep = "\t", quote = FALSE, dec = '.')
+    df_to_faa(output_seqs, "${prefix}.faa")
+
+    version_file_path <- "versions.yml"
+    version_ampir <- paste(unlist(packageVersion("ampir")), collapse = ".")
+    f <- file(version_file_path, "w")
+    writeLines('"${task.process}":', f)
+    writeLines("    ampir: ", f, sep = "")
+    writeLines(version_ampir, f)
+    close(f)
+    """
+}
--- a/modules/ampir/meta.yml
+++ b/modules/ampir/meta.yml
@ -0,0 +1,59 @@
+name: "ampir"
+description: A fast and user-friendly method to predict antimicrobial peptides (AMPs) from any given size protein dataset. ampir uses a supervised statistical machine learning approach to predict AMPs.
+keywords:
+  - ampir
+  - amp
+  - antimicrobial peptide prediction
+tools:
+  - "ampir":
+      description: "A toolkit to predict antimicrobial peptides from protein sequences on a genome-wide scale."
+      homepage: "https://github.com/Legana/ampir"
+      documentation: "https://cran.r-project.org/web/packages/ampir/index.html"
+      tool_dev_url: "https://github.com/Legana/ampir"
+      doi: "10.1093/bioinformatics/btaa653"
+      licence: ["GPL v2"]
+
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - faa:
+      type: file
+      description: FASTA file containing amino acid sequences
+      pattern: "*.{faa,fasta}"
+  - model:
+      type: value
+      description: Built-in model for AMP prediction
+      pattern: "{precursor,mature}"
+  - min_length:
+      type: value
+      description: Minimum protein length for which predictions will be generated
+      pattern: "[0-9]+"
+  - min_probability:
+      type: value
+      description: Cut-off for AMP prediction
+      pattern: "[0-9][0-9]"
+
+output:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+  - amps_faa:
+      type: file
+      description: File containing AMP predictions in amino acid FASTA format
+      pattern: "*.{faa}"
+  - amps_tsv:
+      type: file
+      description: File containing AMP predictions in TSV format
+      pattern: "*.tsv"
+
+authors:
+  - "@jasmezz"
--- a/modules/cellranger/Dockerfile
+++ b/modules/cellranger/Dockerfile
@ -1,4 +1,4 @@
-# Dockerfile to create container with Cell Ranger v6.1.2
+# Dockerfile to create container with Cell Ranger v7.0.0
 # Push to nfcore/cellranger:<VER>

 FROM continuumio/miniconda3:4.8.2
--- a/modules/cellranger/count/main.nf
+++ b/modules/cellranger/count/main.nf
@ -5,7 +5,7 @@ process CELLRANGER_COUNT {
    if (params.enable_conda) {
        exit 1, "Conda environments cannot be used when using the Cell Ranger tool. Please use docker or singularity containers."
    }
-    container "nfcore/cellranger:6.1.2"
+    container "nfcore/cellranger:7.0.0"

    input:
    tuple val(meta), path(reads)
--- a/modules/cellranger/mkfastq/Dockerfile
+++ b/modules/cellranger/mkfastq/Dockerfile
@ -1,4 +1,4 @@
-# Dockerfile to create container with Cell Ranger v6.1.2 and bcl2fastq v2.20.0
+# Dockerfile to create container with Cell Ranger v7.0.0 and bcl2fastq v2.20.0
 # Push to nfcore/cellrangermkfastq:<VER>

 FROM continuumio/miniconda3:4.8.2
@ -17,7 +17,7 @@ RUN apt-get update --allow-releaseinfo-change \

 # Copy pre-downloaded bcl2fastq2 and cellranger file
 ENV BCL2FASTQ2_VER=v2-20-0-linux-x86-64 \
-    CELLRANGER_VER=6.1.2
+    CELLRANGER_VER=7.0.0
 COPY bcl2fastq2-$BCL2FASTQ2_VER.zip /tmp/bcl2fastq2-$BCL2FASTQ2_VER.zip
 COPY cellranger-$CELLRANGER_VER.tar.gz /opt/cellranger-$CELLRANGER_VER.tar.gz

--- a/modules/cellranger/mkfastq/main.nf
+++ b/modules/cellranger/mkfastq/main.nf
@ -5,7 +5,7 @@ process CELLRANGER_MKFASTQ {
    if (params.enable_conda) {
        exit 1, "Conda environments cannot be used when using the Cell Ranger tool. Please use docker or singularity containers."
    }
-    container "nfcore/cellrangermkfastq:6.1.2"
+    container "nfcore/cellrangermkfastq:7.0.0"

    input:
    path bcl
--- a/modules/cellranger/mkgtf/main.nf
+++ b/modules/cellranger/mkgtf/main.nf
@ -5,7 +5,7 @@ process CELLRANGER_MKGTF {
    if (params.enable_conda) {
        exit 1, "Conda environments cannot be used when using the Cell Ranger tool. Please use docker or singularity containers."
    }
-    container "nfcore/cellranger:6.1.2"
+    container "nfcore/cellranger:7.0.0"

    input:
    path gtf
--- a/modules/cellranger/mkref/main.nf
+++ b/modules/cellranger/mkref/main.nf
@ -5,7 +5,7 @@ process CELLRANGER_MKREF {
    if (params.enable_conda) {
        exit 1, "Conda environments cannot be used when using the Cell Ranger tool. Please use docker or singularity containers."
    }
-    container "nfcore/cellranger:6.1.2"
+    container "nfcore/cellranger:7.0.0"

    input:
    path fasta
--- a/modules/ensemblvep/main.nf
+++ b/modules/ensemblvep/main.nf
@ -13,6 +13,7 @@ process ENSEMBLVEP {
    val   species
    val   cache_version
    path  cache
+    path  fasta
    path  extra_files

    output:
@ -27,6 +28,8 @@ process ENSEMBLVEP {
    def args = task.ext.args ?: ''
    def prefix = task.ext.prefix ?: "${meta.id}"
    def dir_cache = cache ? "\${PWD}/${cache}" : "/.vep"
+    def reference = fasta ? "--fasta $fasta" : ""
+
    """
    mkdir $prefix

@ -34,6 +37,7 @@ process ENSEMBLVEP {
        -i $vcf \\
        -o ${prefix}.ann.vcf \\
        $args \\
+        $reference \\
        --assembly $genome \\
        --species $species \\
        --cache \\
--- a/modules/ensemblvep/meta.yml
+++ b/modules/ensemblvep/meta.yml
@ -36,6 +36,11 @@ input:
      type: file
      description: |
        path to VEP cache (optional)
+  - fasta:
+      type: file
+      description: |
+        reference FASTA file (optional)
+      pattern: "*.{fasta,fa}"
  - extra_files:
      type: tuple
      description: |
--- a/modules/gatk/indelrealigner/main.nf
+++ b/modules/gatk/indelrealigner/main.nf
@ -0,0 +1,54 @@
+process GATK_INDELREALIGNER {
+    tag "$meta.id"
+    label 'process_low'
+
+    conda (params.enable_conda ? "bioconda::gatk=3.5" : null)
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/gatk:3.5--hdfd78af_11':
+        'quay.io/biocontainers/gatk:3.5--hdfd78af_11' }"
+
+    input:
+    tuple val(meta), path(bam), path(bai), path(intervals)
+    path(fasta)
+    path(fai)
+    path(dict)
+    path(known_vcf)
+
+    output:
+    tuple val(meta), path("*.bam"), path("*.bai"), emit: bam
+    path "versions.yml"           , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    def known = known_vcf ? "-known ${known_vcf}" : ""
+
+    if ("$bam" == "${prefix}.bam") error "Input and output names are the same, set prefix in module configuration to disambiguate!"
+
+    def avail_mem = 3
+    if (!task.memory) {
+        log.info '[GATK IndelRealigner] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.'
+    } else {
+        avail_mem = task.memory.giga
+    }
+
+    """
+    gatk3 \\
+        -Xmx${avail_mem}g \\
+        -T IndelRealigner \\
+        -R ${fasta} \\
+        -I ${bam} \\
+        --targetIntervals ${intervals} \\
+        ${known} \\
+        -o ${prefix}.bam \\
+        $args
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        gatk: \$(echo \$(gatk3 --version))
+    END_VERSIONS
+    """
+}
--- a/modules/gatk/indelrealigner/meta.yml
+++ b/modules/gatk/indelrealigner/meta.yml
@ -0,0 +1,71 @@
+name: "gatk_indelrealigner"
+description: Performs local realignment around indels to correct for mapping errors
+keywords:
+  - bam
+  - vcf
+  - variant calling
+  - indel
+  - realignment
+tools:
+  - "gatk":
+      description: "The full Genome Analysis Toolkit (GATK) framework, license restricted."
+      homepage: "https://gatk.broadinstitute.org/hc/en-us"
+      documentation: "https://github.com/broadinstitute/gatk-docs"
+      licence: "['https://software.broadinstitute.org/gatk/download/licensing', 'BSD', 'https://www.broadinstitute.org/gatk/about/#licensing']"
+
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - bam:
+      type: file
+      description: Sorted and indexed BAM file
+      pattern: "*.bam"
+  - bai:
+      type: file
+      description: BAM index file
+      pattern: "*.bai"
+  - intervals:
+      type: file
+      description: Intervals file created by gatk3 RealignerTargetCreator
+      pattern: "*.{intervals,list}"
+  - fasta:
+      type: file
+      description: Reference file used to generate BAM file
+      pattern: ".{fasta,fa,fna}"
+  - fai:
+      type: file
+      description: Index of reference file used to generate BAM file
+      pattern: ".fai"
+  - dict:
+      type: file
+      description: GATK dict file for reference
+      pattern: ".dict"
+  - known_vcf:
+      type: file
+      description: Optional input VCF file(s) with known indels
+      pattern: ".vcf"
+
+output:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+  - bam:
+      type: file
+      description: Sorted and indexed BAM file with local realignment around variants
+      pattern: "*.bam"
+  - bai:
+      type: file
+      description: Output BAM Index file
+      pattern: "*.bai"
+
+authors:
+  - "@jfy133"
--- a/modules/gatk/realignertargetcreator/main.nf
+++ b/modules/gatk/realignertargetcreator/main.nf
@ -0,0 +1,53 @@
+process GATK_REALIGNERTARGETCREATOR {
+    tag "$meta.id"
+    label 'process_low'
+
+    conda (params.enable_conda ? "bioconda::gatk=3.5" : null)
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/gatk:3.5--hdfd78af_11':
+        'quay.io/biocontainers/gatk:3.5--hdfd78af_11' }"
+
+    input:
+    tuple val(meta), path(input), path(index)
+    path fasta
+    path fai
+    path dict
+    path known_vcf
+
+    output:
+    tuple val(meta), path("*.intervals"), emit: intervals
+    path "versions.yml"                 , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    def known = known_vcf ? "-known ${known_vcf}" : ""
+    if ("$input" == "${prefix}.bam") error "Input and output names are the same, set prefix in module configuration to disambiguate!"
+
+    def avail_mem = 3
+    if (!task.memory) {
+        log.info '[GATK RealignerTargetCreator] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.'
+    } else {
+        avail_mem = task.memory.giga
+    }
+
+    """
+    gatk3 \\
+        -Xmx${avail_mem}g \\
+        -T RealignerTargetCreator \\
+        -nt ${task.cpus} \\
+        -I ${input} \\
+        -R ${fasta} \\
+        -o ${prefix}.intervals \\
+        ${known} \\
+        $args
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        gatk: \$(echo \$(gatk3 --version))
+    END_VERSIONS
+    """
+}
--- a/modules/gatk/realignertargetcreator/meta.yml
+++ b/modules/gatk/realignertargetcreator/meta.yml
@ -0,0 +1,64 @@
+name: "gatk_realignertargetcreator"
+description: Generates a list of locations that should be considered for local realignment prior genotyping.
+keywords:
+  - bam
+  - vcf
+  - variant calling
+  - indel
+  - realignment
+  - targets
+tools:
+  - "gatk":
+      description: "The full Genome Analysis Toolkit (GATK) framework, license restricted."
+      homepage: "https://gatk.broadinstitute.org/hc/en-us"
+      documentation: "https://github.com/broadinstitute/gatk-docs"
+      licence: "['https://software.broadinstitute.org/gatk/download/licensing', 'BSD', 'https://www.broadinstitute.org/gatk/about/#licensing']"
+
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - input:
+      type: file
+      description: Sorted and indexed BAM/CRAM/SAM file
+      pattern: "*.bam"
+  - index:
+      type: file
+      description: BAM index file
+      pattern: "*.bai"
+  - fasta:
+      type: file
+      description: Reference file used to generate BAM file
+      pattern: ".{fasta,fa,fna}"
+  - fai:
+      type: file
+      description: Index of reference file used to generate BAM file
+      pattern: ".fai"
+  - dict:
+      type: file
+      description: GATK dict file for reference
+      pattern: ".dict"
+  - known_vcf:
+      type: file
+      description: Optional input VCF file(s) with known indels
+      pattern: ".vcf"
+
+output:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+  - intervals:
+      type: file
+      description: File containg intervals that represent sites of extant and potential indels.
+      pattern: "*.intervals"
+
+authors:
+  - "@jfy133"
--- a/modules/gatk4/applybqsrspark/main.nf
+++ b/modules/gatk4/applybqsrspark/main.nf
@ -2,10 +2,8 @@ process GATK4_APPLYBQSR_SPARK {
    tag "$meta.id"
    label 'process_low'

-    conda (params.enable_conda ? "bioconda::gatk4=4.2.6.1" : null)
-    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/gatk4:4.2.6.1--hdfd78af_0':
-        'quay.io/biocontainers/gatk4:4.2.6.1--hdfd78af_0' }"
+    conda (params.enable_conda ? "bioconda::gatk4=4.2.6.1 conda-forge::openjdk=8.0.312" : null)
+    container 'broadinstitute/gatk:4.2.6.1'

    input:
    tuple val(meta), path(input), path(input_index), path(bqsr_table), path(intervals)
--- a/modules/gatk4/baserecalibratorspark/main.nf
+++ b/modules/gatk4/baserecalibratorspark/main.nf
@ -2,10 +2,8 @@ process GATK4_BASERECALIBRATOR_SPARK {
    tag "$meta.id"
    label 'process_low'

-    conda (params.enable_conda ? "bioconda::gatk4=4.2.6.1" : null)
-    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/gatk4:4.2.6.1--hdfd78af_0':
-        'quay.io/biocontainers/gatk4:4.2.6.1--hdfd78af_0' }"
+    conda (params.enable_conda ? "bioconda::gatk4=4.2.6.1 conda-forge::openjdk=8.0.312" : null)
+    container 'broadinstitute/gatk:4.2.6.1'

    input:
    tuple val(meta), path(input), path(input_index), path(intervals)
--- a/modules/gatk4/markduplicatesspark/main.nf
+++ b/modules/gatk4/markduplicatesspark/main.nf
@ -2,10 +2,8 @@ process GATK4_MARKDUPLICATES_SPARK {
    tag "$meta.id"
    label 'process_high'

-    conda (params.enable_conda ? "bioconda::gatk4=4.2.3.0" : null)
-    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/gatk4:4.2.3.0--hdfd78af_0' :
-        'broadinstitute/gatk:4.2.3.0' }"
+    conda (params.enable_conda ? "bioconda::gatk4=4.2.6.1 conda-forge::openjdk=8.0.312" : null)
+    container 'broadinstitute/gatk:4.2.6.1'

    input:
    tuple val(meta), path(bam)
@ -15,6 +13,7 @@ process GATK4_MARKDUPLICATES_SPARK {

    output:
    tuple val(meta), path("${prefix}"),     emit: output
+    tuple val(meta), path("*.metrics"),     emit: metrics, optional: true
    path "versions.yml"               ,     emit: versions

    when:
@ -25,6 +24,7 @@ process GATK4_MARKDUPLICATES_SPARK {
    prefix = task.ext.prefix ?: "${meta.id}"
    def input_list = bam.collect{"--input $it"}.join(' ')

+
    def avail_mem = 3
    if (!task.memory) {
        log.info '[GATK MarkDuplicatesSpark] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.'
@ -32,8 +32,6 @@ process GATK4_MARKDUPLICATES_SPARK {
        avail_mem = task.memory.giga
    }
    """
-    export SPARK_USER=spark3
-
    gatk --java-options "-Xmx${avail_mem}g" MarkDuplicatesSpark \\
        $input_list \\
        --output $prefix \\
@ -45,6 +43,7 @@ process GATK4_MARKDUPLICATES_SPARK {
    cat <<-END_VERSIONS > versions.yml
    "${task.process}":
        gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//')
+        openjdk: \$(echo \$(java -version 2>&1) | grep version | sed 's/\"//g' | cut -f3 -d ' ')
    END_VERSIONS
    """
 }
--- a/modules/gatk4/markduplicatesspark/meta.yml
+++ b/modules/gatk4/markduplicatesspark/meta.yml
@ -58,3 +58,4 @@ authors:
  - "@ajodeh-juma"
  - "@FriederikeHanssen"
  - "@maxulysse"
+  - "@SusiJo"
--- a/modules/gatk4/reblockgvcf/main.nf
+++ b/modules/gatk4/reblockgvcf/main.nf
@ -0,0 +1,52 @@
+process GATK4_REBLOCKGVCF {
+    tag "$meta.id"
+    label 'process_low'
+
+    conda (params.enable_conda ? "bioconda::gatk4=4.2.6.1" : null)
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/gatk4:4.2.6.1--hdfd78af_0':
+        'quay.io/biocontainers/gatk4:4.2.6.1--hdfd78af_0' }"
+
+    input:
+    tuple val(meta), path(gvcf), path(tbi), path(intervals)
+    path fasta
+    path fai
+    path dict
+    path dbsnp
+    path dbsnp_tbi
+
+    output:
+    tuple val(meta), path("*.rb.g.vcf.gz"), path("*.tbi")  , emit: vcf
+    path "versions.yml"                                    , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    def dbsnp_command = dbsnp ? "--dbsnp $dbsnp" : ""
+    def interval_command = intervals ? "--intervals $intervals" : ""
+
+    def avail_mem = 3
+    if (!task.memory) {
+        log.info '[GATK ReblockGVCF] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.'
+    } else {
+        avail_mem = task.memory.giga
+    }
+    """
+    gatk --java-options "-Xmx${avail_mem}g" ReblockGVCF \\
+        --variant $gvcf \\
+        --output ${prefix}.rb.g.vcf.gz \\
+        --reference $fasta \\
+        $dbsnp_command \\
+        $interval_command \\
+        --tmp-dir . \\
+        $args
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//')
+    END_VERSIONS
+    """
+}
--- a/modules/gatk4/reblockgvcf/meta.yml
+++ b/modules/gatk4/reblockgvcf/meta.yml
@ -0,0 +1,74 @@
+name: "gatk4_reblockgvcf"
+description: Condenses homRef blocks in a single-sample GVCF
+keywords:
+  - gatk4
+  - reblockgvcf
+  - gvcf
+tools:
+  - gatk4:
+    description: |
+      Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools
+      with a primary focus on variant discovery and genotyping. Its powerful processing engine
+      and high-performance computing features make it capable of taking on projects of any size.
+    homepage: https://gatk.broadinstitute.org/hc/en-us
+    documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s
+    doi: 10.1158/1538-7445.AM2017-3590
+    licence: ["Apache-2.0"]
+
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - gvcf:
+      type: file
+      description: GVCF file created using HaplotypeCaller using the '-ERC GVCF' or '-ERC BP_RESOLUTION' mode
+      pattern: "*.{vcf,gvcf}.gz"
+  - tbi:
+      type: file
+      description: Index of the GVCF file
+      pattern: "*.tbi"
+  - intervals:
+      type: file
+      description: Bed file with the genomic regions included in the library (optional)
+  - fasta:
+      type: file
+      description: The reference fasta file
+      pattern: "*.fasta"
+  - fai:
+      type: file
+      description: Index of reference fasta file
+      pattern: "fasta.fai"
+  - dict:
+      type: file
+      description: GATK sequence dictionary
+      pattern: "*.dict"
+  - dbsnp:
+      type: file
+      description: VCF file containing known sites (optional)
+  - dbsnp_tbi:
+      type: file
+      description: VCF index of dbsnp (optional)
+
+output:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+  - gvcf:
+      type: file
+      description: Filtered GVCF
+      pattern: "*rb.g.vcf.gz"
+  - tbi:
+      type: file
+      description: Index of the filtered GVCF
+      pattern: "*rb.g.vcf.gz.tbi"
+
+authors:
+  - "@nvnieuwk"
--- a/modules/sexdeterrmine/main.nf
+++ b/modules/sexdeterrmine/main.nf
@ -0,0 +1,40 @@
+process SEXDETERRMINE {
+    tag "$meta.id"
+    label 'process_low'
+
+    conda (params.enable_conda ? "bioconda::sexdeterrmine=1.1.2" : null)
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/sexdeterrmine:1.1.2--hdfd78af_1':
+        'quay.io/biocontainers/sexdeterrmine:1.1.2--hdfd78af_1' }"
+
+    input:
+    tuple val(meta), path(depth)
+    path sample_list_file
+
+    output:
+    tuple val(meta), path("*.json"), emit: json
+    tuple val(meta), path("*.tsv") , emit: tsv
+    path "versions.yml"            , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    def sample_list = sample_list_file ? '-f ${sample_list_file}' : ''
+    if ("$depth" == "${prefix}.tsv") error "Input depth and output TSV names are the same, set prefix in module configuration to disambiguate!"
+
+    """
+    sexdeterrmine \\
+        -I $depth \\
+        $sample_list \\
+        $args \\
+        > ${prefix}.tsv
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        sexdeterrmine: \$(echo \$(sexdeterrmine --version 2>&1))
+    END_VERSIONS
+    """
+}
--- a/modules/sexdeterrmine/meta.yml
+++ b/modules/sexdeterrmine/meta.yml
@ -0,0 +1,48 @@
+name: "sexdeterrmine"
+description: Calculate the relative coverage on the Gonosomes vs Autosomes from the output of samtools depth, with error bars.
+keywords:
+  - sex determination
+  - genetic sex
+  - relative coverage
+  - ancient dna
+tools:
+  - "sexdeterrmine":
+      description: "A python script carry out calculate the relative coverage of X and Y chromosomes, and their associated error bars, out of capture data."
+      homepage: "https://github.com/TCLamnidis/Sex.DetERRmine"
+      documentation: "https://github.com/TCLamnidis/Sex.DetERRmine/README.md"
+      tool_dev_url: "https://github.com/TCLamnidis/Sex.DetERRmine"
+      doi: "https://doi.org/10.1038/s41467-018-07483-5"
+      licence: "['GPL v3']"
+
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - depth:
+      type: file
+      description: Output from samtools depth (with header)
+      pattern: "*"
+
+output:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+  - json:
+      type: file
+      description: JSON formatted table of relative coverages on the X and Y, with associated error bars.
+      pattern: "*.json"
+  - tsv:
+      type: file
+      description: TSV table of relative coverages on the X and Y, with associated error bars.
+      pattern: "*.tsv"
+
+authors:
+  - "@TCLamnidis"
--- a/tests/config/pytest_modules.yml
+++ b/tests/config/pytest_modules.yml
@ -26,6 +26,10 @@ allelecounter:
  - modules/allelecounter/**
  - tests/modules/allelecounter/**

+ampir:
+  - modules/ampir/**
+  - tests/modules/ampir/**
+
 amplify/predict:
  - modules/amplify/predict/**
  - tests/modules/amplify/predict/**
@ -715,6 +719,14 @@ gamma/gamma:
  - modules/gamma/gamma/**
  - tests/modules/gamma/gamma/**

+gatk/indelrealigner:
+  - modules/gatk/indelrealigner/**
+  - tests/modules/gatk/indelrealigner/**
+
+gatk/realignertargetcreator:
+  - modules/gatk/realignertargetcreator/**
+  - tests/modules/gatk/realignertargetcreator/**
+
 gatk/unifiedgenotyper:
  - modules/gatk/unifiedgenotyper/**
  - tests/modules/gatk/unifiedgenotyper/**
@ -851,6 +863,10 @@ gatk4/mutect2:
  - modules/gatk4/mutect2/**
  - tests/modules/gatk4/mutect2/**

+gatk4/reblockgvcf:
+  - modules/gatk4/reblockgvcf/**
+  - tests/modules/gatk4/reblockgvcf/**
+
 gatk4/revertsam:
  - modules/gatk4/revertsam/**
  - tests/modules/gatk4/revertsam/**
@ -1843,6 +1859,10 @@ seqwish/induce:
  - modules/seqwish/induce/**
  - tests/modules/seqwish/induce/**

+sexdeterrmine:
+  - modules/sexdeterrmine/**
+  - tests/modules/sexdeterrmine/**
+
 shasum:
  - modules/shasum/**
  - tests/modules/shasum/**
--- a/tests/config/test_data.config
+++ b/tests/config/test_data.config
@ -232,10 +232,11 @@ params {
                test2_paired_end_umi_unsorted_bam              = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/umi/test2.paired_end.umi_unsorted.bam"
                test2_paired_end_umi_unsorted_tagged_bam       = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/umi/test2.paired_end.unsorted_tagged.bam"

-
                mitochon_standin_recalibrated_sorted_bam       = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/mitochon_standin.recalibrated.sorted.bam"
                mitochon_standin_recalibrated_sorted_bam_bai   = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/mitochon_standin.recalibrated.sorted.bam.bai"

+                test3_single_end_markduplicates_sorted_bam       = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/test3.single_end.markduplicates.sorted.bam"
+
                test_paired_end_sorted_cram                      = "${test_data_dir}/genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram"
                test_paired_end_sorted_cram_crai                 = "${test_data_dir}/genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram.crai"
                test_paired_end_markduplicates_sorted_cram       = "${test_data_dir}/genomics/homo_sapiens/illumina/cram/test.paired_end.markduplicates.sorted.cram"
--- a/tests/modules/ampir/main.nf
+++ b/tests/modules/ampir/main.nf
@ -0,0 +1,20 @@
+#!/usr/bin/env nextflow
+
+nextflow.enable.dsl = 2
+
+include { AMPIR } from '../../../modules/ampir/main.nf'
+
+workflow test_ampir {
+
+    fasta = [ [ id:'test', single_end:false ], // meta map
+              file(params.test_data['candidatus_portiera_aleyrodidarum']['genome']['proteome_fasta'], checkIfExists: true),
+    ]
+
+    model = "precursor"
+
+    min_length = []
+
+    min_probability = "0.7"
+
+    AMPIR ( fasta, model, min_length, min_probability )
+}
--- a/tests/modules/ampir/nextflow.config
+++ b/tests/modules/ampir/nextflow.config
@ -0,0 +1,5 @@
+process {
+
+    publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
+    
+}
--- a/tests/modules/ampir/test.yml
+++ b/tests/modules/ampir/test.yml
@ -0,0 +1,11 @@
+- name: ampir test_ampir
+  command: nextflow run ./tests/modules/ampir -entry test_ampir -c ./tests/config/nextflow.config  -c ./tests/modules/ampir/nextflow.config
+  tags:
+    - ampir
+  files:
+    - path: output/ampir/test.tsv
+      contains: ["seq_name\tseq_aa\tprob_AMP", "WP_014895017.1"]
+    - path: output/ampir/test.faa
+      md5sum: 0435609144022c55ac196db053f0df89
+    - path: output/ampir/versions.yml
+      md5sum: 4a11d25b8a904a7ffb34ae88f6826888
--- a/tests/modules/cellranger/count/test.yml
+++ b/tests/modules/cellranger/count/test.yml
@ -1,19 +1,65 @@
 - name: cellranger count test_cellranger_count
-  command: nextflow run tests/modules/cellranger/count -entry test_cellranger_count -c tests/config/nextflow.config -c tests/modules/cellranger/count/nextflow.config
+  command: nextflow run ./tests/modules/cellranger/count -entry test_cellranger_count -c ./tests/config/nextflow.config  -c ./tests/modules/cellranger/count/nextflow.config
  tags:
-    - cellranger
    - cellranger/count
+    - cellranger
  files:
+    - path: output/cellranger/genome.filtered.gtf
+      md5sum: a8b8a7b5039e05d3a9cf9151ea138b5b
+    - path: output/cellranger/homo_sapiens_chr22_reference/fasta/genome.fa
+      md5sum: f315020d899597c1b57e5fe9f60f4c3e
+    - path: output/cellranger/homo_sapiens_chr22_reference/fasta/genome.fa.fai
+      md5sum: 3520cd30e1b100e55f578db9c855f685
+    - path: output/cellranger/homo_sapiens_chr22_reference/genes/genes.gtf.gz
+      md5sum: d1e05cd46684fa26d852b6bc9f05e31f
+    - path: output/cellranger/homo_sapiens_chr22_reference/reference.json
+      md5sum: 8405fd7f527a944eafb9c2909045840b
+    - path: output/cellranger/homo_sapiens_chr22_reference/star/Genome
+      md5sum: 897cec2d191945335f8b320438bd9135
+    - path: output/cellranger/homo_sapiens_chr22_reference/star/SA
+      md5sum: 7961129ac5d0e1706105be1d31c6b30c
+    - path: output/cellranger/homo_sapiens_chr22_reference/star/SAindex
+      md5sum: dcceb480b30cda93fb8c63ddc339093b
+    - path: output/cellranger/homo_sapiens_chr22_reference/star/chrLength.txt
+      md5sum: c81f40f27e72606d7d07097c1d56a5b5
+    - path: output/cellranger/homo_sapiens_chr22_reference/star/chrName.txt
+      md5sum: 5ae68a67b70976ee95342a7451cb5af1
+    - path: output/cellranger/homo_sapiens_chr22_reference/star/chrNameLength.txt
+      md5sum: b190587cae0531f3cf25552d8aa674db
+    - path: output/cellranger/homo_sapiens_chr22_reference/star/chrStart.txt
+      md5sum: bc73df776dd3d5bb9cfcbcba60880519
+    - path: output/cellranger/homo_sapiens_chr22_reference/star/exonGeTrInfo.tab
+      md5sum: 9129691eeb4ed0d02b17be879fa3edb0
+    - path: output/cellranger/homo_sapiens_chr22_reference/star/exonInfo.tab
+      md5sum: 209b82f0683efd03e17d2c729676554f
+    - path: output/cellranger/homo_sapiens_chr22_reference/star/geneInfo.tab
+      md5sum: 02a8f4575bdfcd4a42b4d8d07f2e9369
+    - path: output/cellranger/homo_sapiens_chr22_reference/star/genomeParameters.txt
+    - path: output/cellranger/homo_sapiens_chr22_reference/star/sjdbInfo.txt
+      md5sum: 1082ab459363b3f2f7aabcef0979c1ed
+    - path: output/cellranger/homo_sapiens_chr22_reference/star/sjdbList.fromGTF.out.tab
+    - path: output/cellranger/homo_sapiens_chr22_reference/star/sjdbList.out.tab
+    - path: output/cellranger/homo_sapiens_chr22_reference/star/transcriptInfo.tab
+      md5sum: cedcb5f4e7d97bc548cd5daa022e092c
    - path: output/cellranger/sample-123/outs/filtered_feature_bc_matrix.h5
+      md5sum: f8b6b7cc8248151a98c46d4ebec450c6
+    - path: output/cellranger/sample-123/outs/filtered_feature_bc_matrix/barcodes.tsv.gz
+    - path: output/cellranger/sample-123/outs/filtered_feature_bc_matrix/features.tsv.gz
+    - path: output/cellranger/sample-123/outs/filtered_feature_bc_matrix/matrix.mtx.gz
    - path: output/cellranger/sample-123/outs/metrics_summary.csv
      md5sum: 707df0f101d479d93f412ca74f9c4131
    - path: output/cellranger/sample-123/outs/molecule_info.h5
-      md5sum: 0e56836ef0725f2ab05f56ca5a71e55b
+      md5sum: a13bd7425f441c8d0eac8ffc50082996
    - path: output/cellranger/sample-123/outs/possorted_genome_bam.bam
      md5sum: 15441da9cfceea0bb48c8b66b1b860df
    - path: output/cellranger/sample-123/outs/possorted_genome_bam.bam.bai
      md5sum: 7c3d49c77016a09535aff61a027f750c
-    - path: output/cellranger/sample-123/outs/raw_feature_bc_matrix
    - path: output/cellranger/sample-123/outs/raw_feature_bc_matrix.h5
-      md5sum: cdad1cd7b215d7137cf92515e81a8525
+      md5sum: a5290f3e300a4070f3d68a0c2e215f54
+    - path: output/cellranger/sample-123/outs/raw_feature_bc_matrix/barcodes.tsv.gz
+      md5sum: 5cc39ef0c7ac85f2b758b164aabf9157
+    - path: output/cellranger/sample-123/outs/raw_feature_bc_matrix/features.tsv.gz
+      md5sum: 07d497c7ce3e22f374af7b2cf9b97d72
+    - path: output/cellranger/sample-123/outs/raw_feature_bc_matrix/matrix.mtx.gz
+      md5sum: bdce94a51f16e22d40301724080b76ee
    - path: output/cellranger/sample-123/outs/web_summary.html
--- a/tests/modules/cellranger/mkfastq/test.yml
+++ b/tests/modules/cellranger/mkfastq/test.yml
@ -5,7 +5,6 @@
    - cellranger/mkfastq
  files:
    - path: output/cellranger/cellranger-tiny-bcl-1/outs/fastq_path/fake_file.fastq.gz
-      md5sum: d41d8cd98f00b204e9800998ecf8427e
 - name: cellranger mkfastq test_cellranger_mkfastq_illumina
  command: nextflow run tests/modules/cellranger/mkfastq -entry test_cellranger_mkfastq_illumina -c tests/config/nextflow.config -c ./tests/modules/cellranger/mkfastq/nextflow.config -stub-run
  tags:
@ -13,4 +12,3 @@
    - cellranger/mkfastq
  files:
    - path: output/cellranger/cellranger-tiny-bcl-1/outs/fastq_path/fake_file.fastq.gz
-      md5sum: d41d8cd98f00b204e9800998ecf8427e
--- a/tests/modules/cellranger/mkgtf/test.yml
+++ b/tests/modules/cellranger/mkgtf/test.yml
@ -1,8 +1,8 @@
 - name: cellranger mkgtf test_cellranger_mkgtf
-  command: nextflow run tests/modules/cellranger/mkgtf -entry test_cellranger_mkgtf -c tests/config/nextflow.config -c tests/modules/cellranger/mkgtf/nextflow.config
+  command: nextflow run ./tests/modules/cellranger/mkgtf -entry test_cellranger_mkgtf -c ./tests/config/nextflow.config  -c ./tests/modules/cellranger/mkgtf/nextflow.config
  tags:
-    - cellranger
    - cellranger/mkgtf
+    - cellranger
  files:
    - path: output/cellranger/genome.filtered.gtf
      md5sum: a8b8a7b5039e05d3a9cf9151ea138b5b
--- a/tests/modules/cellranger/mkref/test.yml
+++ b/tests/modules/cellranger/mkref/test.yml
@ -1,8 +1,8 @@
 - name: cellranger mkref test_cellranger_mkref
  command: nextflow run ./tests/modules/cellranger/mkref -entry test_cellranger_mkref -c ./tests/config/nextflow.config  -c ./tests/modules/cellranger/mkref/nextflow.config
  tags:
-    - cellranger
    - cellranger/mkref
+    - cellranger
  files:
    - path: output/cellranger/homo_sapiens_chr22_reference/fasta/genome.fa
      md5sum: f315020d899597c1b57e5fe9f60f4c3e
@ -11,7 +11,7 @@
    - path: output/cellranger/homo_sapiens_chr22_reference/genes/genes.gtf.gz
      md5sum: 6d9b5f409bfea95022bc25b9590e194e
    - path: output/cellranger/homo_sapiens_chr22_reference/reference.json
-      md5sum: 5d8d1669cd251433505f183e1c9ed6bc
+      md5sum: 6cc817f0923062e780e6573806840cea
    - path: output/cellranger/homo_sapiens_chr22_reference/star/Genome
      md5sum: 22102926fadf5890e905ca71b2da3f35
    - path: output/cellranger/homo_sapiens_chr22_reference/star/SA
--- a/tests/modules/ensemblvep/main.nf
+++ b/tests/modules/ensemblvep/main.nf
@ -4,11 +4,22 @@ nextflow.enable.dsl = 2

 include { ENSEMBLVEP } from '../../../modules/ensemblvep/main.nf'

-workflow test_ensemblvep {
+workflow test_ensemblvep_fasta {
    input = [
        [ id:'test' ], // meta map
        file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true)
    ]

-    ENSEMBLVEP ( input, "WBcel235", "caenorhabditis_elegans", "104", [], [] )
+    fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
+
+    ENSEMBLVEP ( input, "WBcel235", "caenorhabditis_elegans", "104", [], fasta, [] )
+}
+
+workflow test_ensemblvep_no_fasta {
+    input = [
+        [ id:'test' ], // meta map
+        file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true)
+    ]
+
+    ENSEMBLVEP ( input, "WBcel235", "caenorhabditis_elegans", "104", [], [], [] )
 }
--- a/tests/modules/ensemblvep/test.yml
+++ b/tests/modules/ensemblvep/test.yml
@ -1,5 +1,13 @@
- name: ensemblvep test_ensemblvep
-  command: nextflow run ./tests/modules/ensemblvep -entry test_ensemblvep -c ./tests/config/nextflow.config -c ./tests/modules/ensemblvep/nextflow.config
+- name: ensemblvep test_ensemblvep_fasta
+  command: nextflow run ./tests/modules/ensemblvep -entry test_ensemblvep_fasta -c ./tests/config/nextflow.config -c ./tests/modules/ensemblvep/nextflow.config
+  tags:
+    - ensemblvep
+  files:
+    - path: output/ensemblvep/test.ann.vcf
+    - path: output/ensemblvep/test.summary.html
+
+- name: ensemblvep test_ensemblvep_no_fasta
+  command: nextflow run ./tests/modules/ensemblvep -entry test_ensemblvep_no_fasta -c ./tests/config/nextflow.config -c ./tests/modules/ensemblvep/nextflow.config
  tags:
    - ensemblvep
  files:
--- a/tests/modules/gatk/indelrealigner/main.nf
+++ b/tests/modules/gatk/indelrealigner/main.nf
@ -0,0 +1,33 @@
+#!/usr/bin/env nextflow
+
+nextflow.enable.dsl = 2
+
+include { GATK_REALIGNERTARGETCREATOR } from '../../../../modules/gatk/realignertargetcreator/main.nf'
+include { GATK_INDELREALIGNER } from '../../../../modules/gatk/indelrealigner/main.nf'
+
+
+workflow test_gatk_indelrealigner {
+
+
+    fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
+    fai   = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true)
+    dict  = file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true)
+
+    input_realignertargetcreator     = [ [ id:'test' ], // meta map
+                  file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true),
+                  file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true),
+                ]
+
+    GATK_REALIGNERTARGETCREATOR ( input_realignertargetcreator, fasta, fai, dict, [] )
+
+    ch_intervals = GATK_REALIGNERTARGETCREATOR.out.intervals
+
+    ch_bams_indelrealigner = Channel.of([ [ id:'test' ], // meta map
+                  file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true),
+                  file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true)
+                ])
+
+    ch_input_indelrealigner = ch_bams_indelrealigner.mix(ch_intervals).groupTuple(by: 0).map{ [it[0], it[1][0], it[2], it[1][1] ] }.dump(tag: "input")
+
+    GATK_INDELREALIGNER ( ch_input_indelrealigner, fasta, fai, dict, [] )
+}
--- a/tests/modules/gatk/indelrealigner/nextflow.config
+++ b/tests/modules/gatk/indelrealigner/nextflow.config
@ -0,0 +1,6 @@
+process {
+
+    publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
+    ext.prefix = { "${meta.id}.realigned" }
+
+}
--- a/tests/modules/gatk/indelrealigner/test.yml
+++ b/tests/modules/gatk/indelrealigner/test.yml
@ -0,0 +1,12 @@
+- name: gatk indelrealigner test_gatk_indelrealigner
+  command: nextflow run ./tests/modules/gatk/indelrealigner -entry test_gatk_indelrealigner -c ./tests/config/nextflow.config  -c ./tests/modules/gatk/indelrealigner/nextflow.config
+  tags:
+    - gatk/indelrealigner
+    - gatk
+  files:
+    - path: output/gatk/test.realigned.bai
+      md5sum: 85a67df8827fe426e7f3a458134c0551
+    - path: output/gatk/test.realigned.bam
+      md5sum: ea1df6f7fcafc408fae4dc1574813d8a
+    - path: output/gatk/test.realigned.intervals
+      md5sum: 7aa7a1b235a510e6591e262382086bf8
--- a/tests/modules/gatk/realignertargetcreator/main.nf
+++ b/tests/modules/gatk/realignertargetcreator/main.nf
@ -0,0 +1,18 @@
+#!/usr/bin/env nextflow
+
+nextflow.enable.dsl = 2
+
+include { GATK_REALIGNERTARGETCREATOR } from '../../../../modules/gatk/realignertargetcreator/main.nf'
+
+workflow test_gatk_realignertargetcreator {
+
+    input     = [ [ id:'test' ], // meta map
+                  file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true),
+                  file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true),
+                ]
+    fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
+    fai   = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true)
+    dict  = file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true)
+
+    GATK_REALIGNERTARGETCREATOR ( input, fasta, fai, dict, [] )
+}
--- a/tests/modules/gatk/realignertargetcreator/nextflow.config
+++ b/tests/modules/gatk/realignertargetcreator/nextflow.config
@ -0,0 +1,5 @@
+process {
+
+    publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
+    
+}
--- a/tests/modules/gatk/realignertargetcreator/test.yml
+++ b/tests/modules/gatk/realignertargetcreator/test.yml
@ -0,0 +1,8 @@
+- name: gatk realignertargetcreator test_gatk_realignertargetcreator
+  command: nextflow run ./tests/modules/gatk/realignertargetcreator -entry test_gatk_realignertargetcreator -c ./tests/config/nextflow.config  -c ./tests/modules/gatk/realignertargetcreator/nextflow.config
+  tags:
+    - gatk
+    - gatk/realignertargetcreator
+  files:
+    - path: output/gatk/test.intervals
+      md5sum: 7aa7a1b235a510e6591e262382086bf8
--- a/tests/modules/gatk4/applybqsrspark/test.yml
+++ b/tests/modules/gatk4/applybqsrspark/test.yml
@ -15,7 +15,6 @@
    - gatk4/applybqsrspark
  files:
    - path: output/gatk4/test.bam
-      md5sum: 2ca2446f0125890280056fd7da822732
    - path: output/gatk4/versions.yml

 - name: gatk4 applybqsr test_gatk4_applybqsr_spark_cram
--- a/tests/modules/gatk4/markduplicatesspark/main.nf
+++ b/tests/modules/gatk4/markduplicatesspark/main.nf
@ -3,26 +3,55 @@
 nextflow.enable.dsl = 2

 include { GATK4_MARKDUPLICATES_SPARK } from '../../../../modules/gatk4/markduplicatesspark/main.nf'
+include { GATK4_MARKDUPLICATES_SPARK as GATK4_MARKDUPLICATES_SPARK_CRAM } from '../../../../modules/gatk4/markduplicatesspark/main.nf'
+include { GATK4_MARKDUPLICATES_SPARK as GATK4_MARKDUPLICATES_SPARK_METRICS } from '../../../../modules/gatk4/markduplicatesspark/main.nf'

 workflow test_gatk4_markduplicates_spark {
    input = [ [ id:'test', single_end:false ], // meta map
            file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true)
            ]
-    fasta = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true)
-    fai = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta_fai'], checkIfExists: true)
-    dict = file(params.test_data['homo_sapiens']['genome']['genome_21_dict'], checkIfExists: true)
+    fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
+    fai = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true)
+    dict = file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true)

    GATK4_MARKDUPLICATES_SPARK ( input, fasta, fai, dict )
 }

+// chr 22
 workflow test_gatk4_markduplicates_spark_multiple_bams {
    input = [ [ id:'test', single_end:false ], // meta map
-              [ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true),
-                file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_bam'], checkIfExists: true)
+            [   file(params.test_data['homo_sapiens']['illumina']['test_paired_end_name_sorted_bam'], checkIfExists: true),
+                file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_name_sorted_bam'], checkIfExists: true)
            ] ]
-    fasta = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true)
-    fai = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta_fai'], checkIfExists: true)
-    dict = file(params.test_data['homo_sapiens']['genome']['genome_21_dict'], checkIfExists: true)
+    fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
+    fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
+    dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)

    GATK4_MARKDUPLICATES_SPARK ( input, fasta, fai, dict )
 }
+
+// chr 22
+workflow test_gatk4_markduplicates_spark_multiple_bams_cram_out {
+    input = [ [ id:'test', single_end:false ], // meta map
+            [   file(params.test_data['homo_sapiens']['illumina']['test_paired_end_name_sorted_bam'], checkIfExists: true),
+                file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_name_sorted_bam'], checkIfExists: true)
+            ] ]
+    fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
+    fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
+    dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)
+
+    GATK4_MARKDUPLICATES_SPARK_CRAM ( input, fasta, fai, dict )
+}
+
+// chr 22
+workflow test_gatk4_markduplicates_spark_multiple_bams_metrics {
+    input = [ [ id:'test', single_end:false ], // meta map
+            [   file(params.test_data['homo_sapiens']['illumina']['test_paired_end_name_sorted_bam'], checkIfExists: true),
+                file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_name_sorted_bam'], checkIfExists: true)
+            ] ]
+    fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
+    fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
+    dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)
+
+    GATK4_MARKDUPLICATES_SPARK_METRICS ( input, fasta, fai, dict )
+}
--- a/tests/modules/gatk4/markduplicatesspark/nextflow.config
+++ b/tests/modules/gatk4/markduplicatesspark/nextflow.config
@ -2,4 +2,18 @@ process {

    publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }

+    withName: GATK4_MARKDUPLICATES_SPARK {
+        ext.prefix = { "${meta.id}.bam" }
    }
+    withName: GATK4_MARKDUPLICATES_SPARK_CRAM {
+        ext.prefix = { "${meta.id}.cram" }
+    }
+    withName: GATK4_MARKDUPLICATES_SPARK_METRICS {
+        ext.args = '--metrics-file test.metrics'
+        ext.prefix = { "${meta.id}.bam" }
+    }
+
+}
+// override tests/config/nextflow.config
+docker.userEmulation = false
+
--- a/tests/modules/gatk4/markduplicatesspark/test.yml
+++ b/tests/modules/gatk4/markduplicatesspark/test.yml
@ -1,25 +1,41 @@
- name: gatk4 markduplicates test_gatk4_markduplicates_spark
-  command: nextflow run tests/modules/gatk4/markduplicatesspark -entry test_gatk4_markduplicates_spark -c tests/config/nextflow.config -c ./tests/modules/gatk4/markduplicatesspark/nextflow.config
+- name: gatk4 markduplicatesspark test_gatk4_markduplicates_spark
+  command: nextflow run ./tests/modules/gatk4/markduplicatesspark -entry test_gatk4_markduplicates_spark -c ./tests/config/nextflow.config  -c ./tests/modules/gatk4/markduplicatesspark/nextflow.config
  tags:
    - gatk4
    - gatk4/markduplicatesspark
  files:
-    - path: output/gatk4/test.bai
-      md5sum: e9c125e82553209933883b4fe2b8d7c2
    - path: output/gatk4/test.bam
-      md5sum: 2efd50b2e6b7fd9bdf242cd9e266cfa9
-    - path: output/gatk4/test.metrics
+      md5sum: dc1a09ac6371aab7c50d1a554baa06d3
    - path: output/gatk4/versions.yml

- name: gatk4 markduplicates test_gatk4_markduplicates_spark_multiple_bams
-  command: nextflow run tests/modules/gatk4/markduplicatesspark -entry test_gatk4_markduplicates_spark_multiple_bams -c tests/config/nextflow.config -c ./tests/modules/gatk4/markduplicatesspark/nextflow.config
+- name: gatk4 markduplicatesspark test_gatk4_markduplicates_spark_multiple_bams
+  command: nextflow run ./tests/modules/gatk4/markduplicatesspark -entry test_gatk4_markduplicates_spark_multiple_bams -c ./tests/config/nextflow.config  -c ./tests/modules/gatk4/markduplicatesspark/nextflow.config
  tags:
    - gatk4
    - gatk4/markduplicatesspark
  files:
-    - path: output/gatk4/test.bai
-      md5sum: bad71df9c876e72a5bc0a3e0fd755f92
    - path: output/gatk4/test.bam
-      md5sum: 8187febc6108ffef7f907e89b9c091a4
-    - path: output/gatk4/test.metrics
+      md5sum: 898cb0a6616897d8ada90bab53bf0837
+    - path: output/gatk4/versions.yml
+
+- name: gatk4 markduplicatesspark test_gatk4_markduplicates_spark_multiple_bams_cram_out
+  command: nextflow run ./tests/modules/gatk4/markduplicatesspark -entry test_gatk4_markduplicates_spark_multiple_bams_cram_out -c ./tests/config/nextflow.config  -c ./tests/modules/gatk4/markduplicatesspark/nextflow.config
+  tags:
+    - gatk4
+    - gatk4/markduplicatesspark
+  files:
+    - path: output/gatk4/test.cram
+      md5sum: 2271016de5e4199736598f39d12d7587
+    - path: output/gatk4/versions.yml
+
+- name: gatk4 markduplicatesspark test_gatk4_markduplicates_spark_multiple_bams_metrics
+  command: nextflow run ./tests/modules/gatk4/markduplicatesspark -entry test_gatk4_markduplicates_spark_multiple_bams_metrics -c ./tests/config/nextflow.config  -c ./tests/modules/gatk4/markduplicatesspark/nextflow.config
+  tags:
+    - gatk4
+    - gatk4/markduplicatesspark
+  files:
+    - path: output/gatk4/test.bam
+      md5sum: 898cb0a6616897d8ada90bab53bf0837
+    - path: output/gatk4/test.metrics
+      contains: ["## METRICS CLASS", "org.broadinstitute.hellbender.utils.read.markduplicates.GATKDuplicationMetrics"]
    - path: output/gatk4/versions.yml
--- a/tests/modules/gatk4/reblockgvcf/main.nf
+++ b/tests/modules/gatk4/reblockgvcf/main.nf
@ -0,0 +1,55 @@
+#!/usr/bin/env nextflow
+
+nextflow.enable.dsl = 2
+
+include { GATK4_REBLOCKGVCF } from '../../../../modules/gatk4/reblockgvcf/main.nf'
+
+workflow test_gatk4_reblockgvcf {
+    
+    input = [
+        [ id:'test', single_end:false ], // meta map
+        file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true),
+        file(params.test_data['sarscov2']['illumina']['test_vcf_gz_tbi'], checkIfExists: true),
+        []
+    ]
+
+    fasta       = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
+    fasta_index = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true)
+    dict        = file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true)
+
+    GATK4_REBLOCKGVCF ( input, fasta, fasta_index, dict, [], [] )
+}
+
+workflow test_gatk4_reblockgvcf_intervals {
+    
+    input = [
+        [ id:'test', single_end:false ], // meta map
+        file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true),
+        file(params.test_data['sarscov2']['illumina']['test_vcf_gz_tbi'], checkIfExists: true),
+        file(params.test_data['sarscov2']['genome']['test_bed'], checkIfExists: true)
+    ]
+
+    fasta       = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
+    fasta_index = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true)
+    dict        = file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true)
+
+    GATK4_REBLOCKGVCF ( input, fasta, fasta_index, dict, [], [] )
+}
+
+workflow test_gatk4_reblockgvcf_dbsnp {
+    
+    input = [
+        [ id:'test', single_end:false ], // meta map
+        file(params.test_data['homo_sapiens']['illumina']['test_haplotc_cnn_vcf_gz'], checkIfExists: true),
+        file(params.test_data['homo_sapiens']['illumina']['test_haplotc_cnn_vcf_gz_tbi'], checkIfExists: true),
+        []
+    ]
+
+    fasta       = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
+    fasta_index = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
+    dict        = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)
+    dbsnp       = file(params.test_data['homo_sapiens']['genome']['dbsnp_146_hg38_vcf_gz'], checkIfExists: true)
+    dbsnp_tbi   = file(params.test_data['homo_sapiens']['genome']['dbsnp_146_hg38_vcf_gz_tbi'], checkIfExists: true)
+
+    GATK4_REBLOCKGVCF ( input, fasta, fasta_index, dict, dbsnp, dbsnp_tbi )
+}
--- a/tests/modules/gatk4/reblockgvcf/nextflow.config
+++ b/tests/modules/gatk4/reblockgvcf/nextflow.config
@ -0,0 +1,5 @@
+process {
+
+    publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
+    
+}
--- a/tests/modules/gatk4/reblockgvcf/test.yml
+++ b/tests/modules/gatk4/reblockgvcf/test.yml
@ -0,0 +1,26 @@
+- name: gatk4 reblockgvcf test_gatk4_reblockgvcf
+  command: nextflow run ./tests/modules/gatk4/reblockgvcf -entry test_gatk4_reblockgvcf -c ./tests/config/nextflow.config  -c ./tests/modules/gatk4/reblockgvcf/nextflow.config
+  tags:
+    - gatk4/reblockgvcf
+    - gatk4
+  files:
+    - path: output/gatk4/test.rb.g.vcf.gz
+    - path: output/gatk4/test.rb.g.vcf.gz.tbi
+
+- name: gatk4 reblockgvcf test_gatk4_reblockgvcf_intervals
+  command: nextflow run ./tests/modules/gatk4/reblockgvcf -entry test_gatk4_reblockgvcf_intervals -c ./tests/config/nextflow.config  -c ./tests/modules/gatk4/reblockgvcf/nextflow.config
+  tags:
+    - gatk4/reblockgvcf
+    - gatk4
+  files:
+    - path: output/gatk4/test.rb.g.vcf.gz
+    - path: output/gatk4/test.rb.g.vcf.gz.tbi
+
+- name: gatk4 reblockgvcf test_gatk4_reblockgvcf_dbsnp
+  command: nextflow run ./tests/modules/gatk4/reblockgvcf -entry test_gatk4_reblockgvcf_dbsnp -c ./tests/config/nextflow.config  -c ./tests/modules/gatk4/reblockgvcf/nextflow.config
+  tags:
+    - gatk4/reblockgvcf
+    - gatk4
+  files:
+    - path: output/gatk4/test.rb.g.vcf.gz
+    - path: output/gatk4/test.rb.g.vcf.gz.tbi
--- a/tests/modules/sexdeterrmine/main.nf
+++ b/tests/modules/sexdeterrmine/main.nf
@ -0,0 +1,16 @@
+#!/usr/bin/env nextflow
+
+nextflow.enable.dsl = 2
+
+include { SAMTOOLS_DEPTH } from '../../../modules/samtools/depth/main.nf'
+include { SEXDETERRMINE } from '../../../modules/sexdeterrmine/main.nf'
+
+workflow test_sexdeterrmine {
+
+    input = [
+        [ id:'test', single_end:false ], // meta map
+        file(params.test_data['homo_sapiens']['illumina']['test3_single_end_markduplicates_sorted_bam'], checkIfExists: true) ]
+
+    SAMTOOLS_DEPTH ( input )
+    SEXDETERRMINE ( SAMTOOLS_DEPTH.out.tsv, [] )
+}
--- a/tests/modules/sexdeterrmine/nextflow.config
+++ b/tests/modules/sexdeterrmine/nextflow.config
@ -0,0 +1,12 @@
+process {
+
+    publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
+
+    withName:SAMTOOLS_DEPTH {
+        ext.args = "-H"
+    }
+
+    withName:SEXDETERRMINE {
+        ext.prefix = { "${meta.id}_sexdet" }
+    }
+}
--- a/tests/modules/sexdeterrmine/test.yml
+++ b/tests/modules/sexdeterrmine/test.yml
@ -0,0 +1,15 @@
+- name: sexdeterrmine test_sexdeterrmine
+  command: nextflow run tests/modules/sexdeterrmine -entry test_sexdeterrmine -c tests/config/nextflow.config
+  tags:
+    - sexdeterrmine
+  files:
+    - path: output/samtools/test.tsv
+      md5sum: fa2992ca1ea93a6e1b3e838476191935
+    - path: output/samtools/versions.yml
+      md5sum: dbd04b700335c8ad236bd667254c8dd8
+    - path: output/sexdeterrmine/sexdeterrmine.json
+      md5sum: bafb2419bb8630eda29a251c20e97166
+    - path: output/sexdeterrmine/test_sexdet.tsv
+      md5sum: 1cf8a2b97b38353eb97a96ab872dcca9
+    - path: output/sexdeterrmine/versions.yml
+      md5sum: 077361101e8e7997aec3da8a01e59eee
--- a/tests/test_versions_yml.py
+++ b/tests/test_versions_yml.py
@ -56,5 +56,5 @@ def test_ensure_valid_version_yml(workflow_dir):
    assert len(software_versions), "There must be at least one version emitted."
    for tool, version in software_versions.items():
        assert re.match(
-            r"^\d+.*", str(version)
-        ), f"Version number for {tool} must start with a number. "
+            r"^\d.*|^[a-f0-9]{40}$", str(version)
+        ), f"Version number for {tool} must start with a number, or be a Git SHA commit id. "