Merge branch 'master' into haplocheck

2024-12-31 03:32:10 -05:00 · 2022-06-10 20:39:33 -03:00 · 2022-06-10 20:39:33 -03:00 · 547f483b74
commit 547f483b74
parent 89bffe3a5e ab422e54b5
50 changed files with 1023 additions and 66 deletions
--- a/modules/ampir/main.nf
+++ b/modules/ampir/main.nf
@ -0,0 +1,48 @@
 process AMPIR {
    tag "$meta.id"
    label 'process_low'
    conda (params.enable_conda ? "conda-forge::r-ampir=1.1.0" : null)
    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
        'https://depot.galaxyproject.org/singularity/r-ampir:1.1.0':
        'quay.io/biocontainers/r-ampir:1.1.0' }"
    input:
    tuple val(meta), path(faa)
    val model
    val min_length
    val min_probability
    output:
    tuple val(meta), path("*.faa"), emit: amps_faa
    tuple val(meta), path("*.tsv"), emit: amps_tsv
    path "versions.yml"           , emit: versions
    when:
    task.ext.when == null || task.ext.when
    script:
    def args = task.ext.args ?: ''
    def prefix = task.ext.prefix ?: "${meta.id}"
    min_length = ("${min_length}" == "[]") ? "": " min_len = as.integer(${min_length})," // Fall back to AMPir default value if none specified
    if ("$faa" == "${prefix}.faa") error "Input and output names are the same, set prefix in module configuration to disambiguate!"
    """
    #!/usr/bin/env Rscript
    library(ampir)
    input_seqs <- read_faa('${faa}')
    prediction <- predict_amps(input_seqs,${min_length} model = '${model}')
    prediction <- prediction[which(prediction\$prob_AMP >= as.numeric(${min_probability})), ]
    output_seqs <- input_seqs[row.names(prediction), ]
    write.table(prediction, file = "${prefix}.tsv", row.names = FALSE, sep = "\t", quote = FALSE, dec = '.')
    df_to_faa(output_seqs, "${prefix}.faa")
    version_file_path <- "versions.yml"
    version_ampir <- paste(unlist(packageVersion("ampir")), collapse = ".")
    f <- file(version_file_path, "w")
    writeLines('"${task.process}":', f)
    writeLines("    ampir: ", f, sep = "")
    writeLines(version_ampir, f)
    close(f)
    """
 }
--- a/modules/ampir/meta.yml
+++ b/modules/ampir/meta.yml
@ -0,0 +1,59 @@
 name: "ampir"
 description: A fast and user-friendly method to predict antimicrobial peptides (AMPs) from any given size protein dataset. ampir uses a supervised statistical machine learning approach to predict AMPs.
 keywords:
  - ampir
  - amp
  - antimicrobial peptide prediction
 tools:
  - "ampir":
      description: "A toolkit to predict antimicrobial peptides from protein sequences on a genome-wide scale."
      homepage: "https://github.com/Legana/ampir"
      documentation: "https://cran.r-project.org/web/packages/ampir/index.html"
      tool_dev_url: "https://github.com/Legana/ampir"
      doi: "10.1093/bioinformatics/btaa653"
      licence: ["GPL v2"]
 input:
  - meta:
      type: map
      description: |
        Groovy Map containing sample information
        e.g. [ id:'test', single_end:false ]
  - faa:
      type: file
      description: FASTA file containing amino acid sequences
      pattern: "*.{faa,fasta}"
  - model:
      type: value
      description: Built-in model for AMP prediction
      pattern: "{precursor,mature}"
  - min_length:
      type: value
      description: Minimum protein length for which predictions will be generated
      pattern: "[0-9]+"
  - min_probability:
      type: value
      description: Cut-off for AMP prediction
      pattern: "[0-9][0-9]"
 output:
  - meta:
      type: map
      description: |
        Groovy Map containing sample information
        e.g. [ id:'test', single_end:false ]
  - versions:
      type: file
      description: File containing software versions
      pattern: "versions.yml"
  - amps_faa:
      type: file
      description: File containing AMP predictions in amino acid FASTA format
      pattern: "*.{faa}"
  - amps_tsv:
      type: file
      description: File containing AMP predictions in TSV format
      pattern: "*.tsv"
 authors:
  - "@jasmezz"
--- a/modules/cellranger/Dockerfile
+++ b/modules/cellranger/Dockerfile
@ -1,4 +1,4 @@
-# Dockerfile to create container with Cell Ranger v6.1.2
+# Dockerfile to create container with Cell Ranger v7.0.0
 # Push to nfcore/cellranger:<VER>
 FROM continuumio/miniconda3:4.8.2
--- a/modules/cellranger/count/main.nf
+++ b/modules/cellranger/count/main.nf
@ -5,7 +5,7 @@ process CELLRANGER_COUNT {
    if (params.enable_conda) {
        exit 1, "Conda environments cannot be used when using the Cell Ranger tool. Please use docker or singularity containers."
    }
-    container "nfcore/cellranger:6.1.2"
+    container "nfcore/cellranger:7.0.0"
    input:
    tuple val(meta), path(reads)
--- a/modules/cellranger/mkfastq/Dockerfile
+++ b/modules/cellranger/mkfastq/Dockerfile
@ -1,4 +1,4 @@
-# Dockerfile to create container with Cell Ranger v6.1.2 and bcl2fastq v2.20.0
+# Dockerfile to create container with Cell Ranger v7.0.0 and bcl2fastq v2.20.0
 # Push to nfcore/cellrangermkfastq:<VER>
 FROM continuumio/miniconda3:4.8.2
@ -17,7 +17,7 @@ RUN apt-get update --allow-releaseinfo-change \
 # Copy pre-downloaded bcl2fastq2 and cellranger file
 ENV BCL2FASTQ2_VER=v2-20-0-linux-x86-64 \
-    CELLRANGER_VER=6.1.2
+    CELLRANGER_VER=7.0.0
 COPY bcl2fastq2-$BCL2FASTQ2_VER.zip /tmp/bcl2fastq2-$BCL2FASTQ2_VER.zip
 COPY cellranger-$CELLRANGER_VER.tar.gz /opt/cellranger-$CELLRANGER_VER.tar.gz
--- a/modules/cellranger/mkfastq/main.nf
+++ b/modules/cellranger/mkfastq/main.nf
@ -5,7 +5,7 @@ process CELLRANGER_MKFASTQ {
    if (params.enable_conda) {
        exit 1, "Conda environments cannot be used when using the Cell Ranger tool. Please use docker or singularity containers."
    }
-    container "nfcore/cellrangermkfastq:6.1.2"
+    container "nfcore/cellrangermkfastq:7.0.0"
    input:
    path bcl
--- a/modules/cellranger/mkgtf/main.nf
+++ b/modules/cellranger/mkgtf/main.nf
@ -5,7 +5,7 @@ process CELLRANGER_MKGTF {
    if (params.enable_conda) {
        exit 1, "Conda environments cannot be used when using the Cell Ranger tool. Please use docker or singularity containers."
    }
-    container "nfcore/cellranger:6.1.2"
+    container "nfcore/cellranger:7.0.0"
    input:
    path gtf
--- a/modules/cellranger/mkref/main.nf
+++ b/modules/cellranger/mkref/main.nf
@ -5,7 +5,7 @@ process CELLRANGER_MKREF {
    if (params.enable_conda) {
        exit 1, "Conda environments cannot be used when using the Cell Ranger tool. Please use docker or singularity containers."
    }
-    container "nfcore/cellranger:6.1.2"
+    container "nfcore/cellranger:7.0.0"
    input:
    path fasta
--- a/modules/ensemblvep/main.nf
+++ b/modules/ensemblvep/main.nf
@ -13,6 +13,7 @@ process ENSEMBLVEP {
    val   species
    val   cache_version
    path  cache
    path  fasta
    path  extra_files
    output:
@ -27,6 +28,8 @@ process ENSEMBLVEP {
    def args = task.ext.args ?: ''
    def prefix = task.ext.prefix ?: "${meta.id}"
    def dir_cache = cache ? "\${PWD}/${cache}" : "/.vep"
    def reference = fasta ? "--fasta $fasta" : ""
    """
    mkdir $prefix
@ -34,6 +37,7 @@ process ENSEMBLVEP {
        -i $vcf \\
        -o ${prefix}.ann.vcf \\
        $args \\
        $reference \\
        --assembly $genome \\
        --species $species \\
        --cache \\
--- a/modules/ensemblvep/meta.yml
+++ b/modules/ensemblvep/meta.yml
@ -36,6 +36,11 @@ input:
      type: file
      description: |
        path to VEP cache (optional)
  - fasta:
      type: file
      description: |
        reference FASTA file (optional)
      pattern: "*.{fasta,fa}"
  - extra_files:
      type: tuple
      description: |
--- a/modules/gatk/indelrealigner/main.nf
+++ b/modules/gatk/indelrealigner/main.nf
@ -0,0 +1,54 @@
 process GATK_INDELREALIGNER {
    tag "$meta.id"
    label 'process_low'
    conda (params.enable_conda ? "bioconda::gatk=3.5" : null)
    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
        'https://depot.galaxyproject.org/singularity/gatk:3.5--hdfd78af_11':
        'quay.io/biocontainers/gatk:3.5--hdfd78af_11' }"
    input:
    tuple val(meta), path(bam), path(bai), path(intervals)
    path(fasta)
    path(fai)
    path(dict)
    path(known_vcf)
    output:
    tuple val(meta), path("*.bam"), path("*.bai"), emit: bam
    path "versions.yml"           , emit: versions
    when:
    task.ext.when == null || task.ext.when
    script:
    def args = task.ext.args ?: ''
    def prefix = task.ext.prefix ?: "${meta.id}"
    def known = known_vcf ? "-known ${known_vcf}" : ""
    if ("$bam" == "${prefix}.bam") error "Input and output names are the same, set prefix in module configuration to disambiguate!"
    def avail_mem = 3
    if (!task.memory) {
        log.info '[GATK IndelRealigner] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.'
    } else {
        avail_mem = task.memory.giga
    }
    """
    gatk3 \\
        -Xmx${avail_mem}g \\
        -T IndelRealigner \\
        -R ${fasta} \\
        -I ${bam} \\
        --targetIntervals ${intervals} \\
        ${known} \\
        -o ${prefix}.bam \\
        $args
    cat <<-END_VERSIONS > versions.yml
    "${task.process}":
        gatk: \$(echo \$(gatk3 --version))
    END_VERSIONS
    """
 }
--- a/modules/gatk/indelrealigner/meta.yml
+++ b/modules/gatk/indelrealigner/meta.yml
@ -0,0 +1,71 @@
 name: "gatk_indelrealigner"
 description: Performs local realignment around indels to correct for mapping errors
 keywords:
  - bam
  - vcf
  - variant calling
  - indel
  - realignment
 tools:
  - "gatk":
      description: "The full Genome Analysis Toolkit (GATK) framework, license restricted."
      homepage: "https://gatk.broadinstitute.org/hc/en-us"
      documentation: "https://github.com/broadinstitute/gatk-docs"
      licence: "['https://software.broadinstitute.org/gatk/download/licensing', 'BSD', 'https://www.broadinstitute.org/gatk/about/#licensing']"
 input:
  - meta:
      type: map
      description: |
        Groovy Map containing sample information
        e.g. [ id:'test', single_end:false ]
  - bam:
      type: file
      description: Sorted and indexed BAM file
      pattern: "*.bam"
  - bai:
      type: file
      description: BAM index file
      pattern: "*.bai"
  - intervals:
      type: file
      description: Intervals file created by gatk3 RealignerTargetCreator
      pattern: "*.{intervals,list}"
  - fasta:
      type: file
      description: Reference file used to generate BAM file
      pattern: ".{fasta,fa,fna}"
  - fai:
      type: file
      description: Index of reference file used to generate BAM file
      pattern: ".fai"
  - dict:
      type: file
      description: GATK dict file for reference
      pattern: ".dict"
  - known_vcf:
      type: file
      description: Optional input VCF file(s) with known indels
      pattern: ".vcf"
 output:
  - meta:
      type: map
      description: |
        Groovy Map containing sample information
        e.g. [ id:'test', single_end:false ]
  - versions:
      type: file
      description: File containing software versions
      pattern: "versions.yml"
  - bam:
      type: file
      description: Sorted and indexed BAM file with local realignment around variants
      pattern: "*.bam"
  - bai:
      type: file
      description: Output BAM Index file
      pattern: "*.bai"
 authors:
  - "@jfy133"
--- a/modules/gatk/realignertargetcreator/main.nf
+++ b/modules/gatk/realignertargetcreator/main.nf
@ -0,0 +1,53 @@
 process GATK_REALIGNERTARGETCREATOR {
    tag "$meta.id"
    label 'process_low'
    conda (params.enable_conda ? "bioconda::gatk=3.5" : null)
    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
        'https://depot.galaxyproject.org/singularity/gatk:3.5--hdfd78af_11':
        'quay.io/biocontainers/gatk:3.5--hdfd78af_11' }"
    input:
    tuple val(meta), path(input), path(index)
    path fasta
    path fai
    path dict
    path known_vcf
    output:
    tuple val(meta), path("*.intervals"), emit: intervals
    path "versions.yml"                 , emit: versions
    when:
    task.ext.when == null || task.ext.when
    script:
    def args = task.ext.args ?: ''
    def prefix = task.ext.prefix ?: "${meta.id}"
    def known = known_vcf ? "-known ${known_vcf}" : ""
    if ("$input" == "${prefix}.bam") error "Input and output names are the same, set prefix in module configuration to disambiguate!"
    def avail_mem = 3
    if (!task.memory) {
        log.info '[GATK RealignerTargetCreator] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.'
    } else {
        avail_mem = task.memory.giga
    }
    """
    gatk3 \\
        -Xmx${avail_mem}g \\
        -T RealignerTargetCreator \\
        -nt ${task.cpus} \\
        -I ${input} \\
        -R ${fasta} \\
        -o ${prefix}.intervals \\
        ${known} \\
        $args
    cat <<-END_VERSIONS > versions.yml
    "${task.process}":
        gatk: \$(echo \$(gatk3 --version))
    END_VERSIONS
    """
 }
--- a/modules/gatk/realignertargetcreator/meta.yml
+++ b/modules/gatk/realignertargetcreator/meta.yml
@ -0,0 +1,64 @@
 name: "gatk_realignertargetcreator"
 description: Generates a list of locations that should be considered for local realignment prior genotyping.
 keywords:
  - bam
  - vcf
  - variant calling
  - indel
  - realignment
  - targets
 tools:
  - "gatk":
      description: "The full Genome Analysis Toolkit (GATK) framework, license restricted."
      homepage: "https://gatk.broadinstitute.org/hc/en-us"
      documentation: "https://github.com/broadinstitute/gatk-docs"
      licence: "['https://software.broadinstitute.org/gatk/download/licensing', 'BSD', 'https://www.broadinstitute.org/gatk/about/#licensing']"
 input:
  - meta:
      type: map
      description: |
        Groovy Map containing sample information
        e.g. [ id:'test', single_end:false ]
  - input:
      type: file
      description: Sorted and indexed BAM/CRAM/SAM file
      pattern: "*.bam"
  - index:
      type: file
      description: BAM index file
      pattern: "*.bai"
  - fasta:
      type: file
      description: Reference file used to generate BAM file
      pattern: ".{fasta,fa,fna}"
  - fai:
      type: file
      description: Index of reference file used to generate BAM file
      pattern: ".fai"
  - dict:
      type: file
      description: GATK dict file for reference
      pattern: ".dict"
  - known_vcf:
      type: file
      description: Optional input VCF file(s) with known indels
      pattern: ".vcf"
 output:
  - meta:
      type: map
      description: |
        Groovy Map containing sample information
        e.g. [ id:'test', single_end:false ]
  - versions:
      type: file
      description: File containing software versions
      pattern: "versions.yml"
  - intervals:
      type: file
      description: File containg intervals that represent sites of extant and potential indels.
      pattern: "*.intervals"
 authors:
  - "@jfy133"
--- a/modules/gatk4/applybqsrspark/main.nf
+++ b/modules/gatk4/applybqsrspark/main.nf
@ -2,10 +2,8 @@ process GATK4_APPLYBQSR_SPARK {
    tag "$meta.id"
    label 'process_low'
-    conda (params.enable_conda ? "bioconda::gatk4=4.2.6.1" : null)
+    conda (params.enable_conda ? "bioconda::gatk4=4.2.6.1 conda-forge::openjdk=8.0.312" : null)
-    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+    container 'broadinstitute/gatk:4.2.6.1'
        'https://depot.galaxyproject.org/singularity/gatk4:4.2.6.1--hdfd78af_0':
        'quay.io/biocontainers/gatk4:4.2.6.1--hdfd78af_0' }"
    input:
    tuple val(meta), path(input), path(input_index), path(bqsr_table), path(intervals)
--- a/modules/gatk4/baserecalibratorspark/main.nf
+++ b/modules/gatk4/baserecalibratorspark/main.nf
@ -2,10 +2,8 @@ process GATK4_BASERECALIBRATOR_SPARK {
    tag "$meta.id"
    label 'process_low'
-    conda (params.enable_conda ? "bioconda::gatk4=4.2.6.1" : null)
+    conda (params.enable_conda ? "bioconda::gatk4=4.2.6.1 conda-forge::openjdk=8.0.312" : null)
-    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+    container 'broadinstitute/gatk:4.2.6.1'
        'https://depot.galaxyproject.org/singularity/gatk4:4.2.6.1--hdfd78af_0':
        'quay.io/biocontainers/gatk4:4.2.6.1--hdfd78af_0' }"
    input:
    tuple val(meta), path(input), path(input_index), path(intervals)
--- a/modules/gatk4/markduplicatesspark/main.nf
+++ b/modules/gatk4/markduplicatesspark/main.nf
@ -2,10 +2,8 @@ process GATK4_MARKDUPLICATES_SPARK {
    tag "$meta.id"
    label 'process_high'
-    conda (params.enable_conda ? "bioconda::gatk4=4.2.3.0" : null)
+    conda (params.enable_conda ? "bioconda::gatk4=4.2.6.1 conda-forge::openjdk=8.0.312" : null)
-    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+    container 'broadinstitute/gatk:4.2.6.1'
        'https://depot.galaxyproject.org/singularity/gatk4:4.2.3.0--hdfd78af_0' :
        'broadinstitute/gatk:4.2.3.0' }"
    input:
    tuple val(meta), path(bam)
@ -14,8 +12,9 @@ process GATK4_MARKDUPLICATES_SPARK {
    path  dict
    output:
-    tuple val(meta), path("${prefix}"), emit: output
+    tuple val(meta), path("${prefix}"),     emit: output
-    path "versions.yml"               , emit: versions
+    tuple val(meta), path("*.metrics"),     emit: metrics, optional: true
    path "versions.yml"               ,     emit: versions
    when:
    task.ext.when == null || task.ext.when
@ -25,6 +24,7 @@ process GATK4_MARKDUPLICATES_SPARK {
    prefix = task.ext.prefix ?: "${meta.id}"
    def input_list = bam.collect{"--input $it"}.join(' ')
    def avail_mem = 3
    if (!task.memory) {
        log.info '[GATK MarkDuplicatesSpark] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.'
@ -32,8 +32,6 @@ process GATK4_MARKDUPLICATES_SPARK {
        avail_mem = task.memory.giga
    }
    """
    export SPARK_USER=spark3
    gatk --java-options "-Xmx${avail_mem}g" MarkDuplicatesSpark \\
        $input_list \\
        --output $prefix \\
@ -45,6 +43,7 @@ process GATK4_MARKDUPLICATES_SPARK {
    cat <<-END_VERSIONS > versions.yml
    "${task.process}":
        gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//')
        openjdk: \$(echo \$(java -version 2>&1) | grep version | sed 's/\"//g' | cut -f3 -d ' ')
    END_VERSIONS
    """
 }
--- a/modules/gatk4/markduplicatesspark/meta.yml
+++ b/modules/gatk4/markduplicatesspark/meta.yml
@ -58,3 +58,4 @@ authors:
  - "@ajodeh-juma"
  - "@FriederikeHanssen"
  - "@maxulysse"
  - "@SusiJo"
--- a/modules/gatk4/reblockgvcf/main.nf
+++ b/modules/gatk4/reblockgvcf/main.nf
@ -0,0 +1,52 @@
 process GATK4_REBLOCKGVCF {
    tag "$meta.id"
    label 'process_low'
    conda (params.enable_conda ? "bioconda::gatk4=4.2.6.1" : null)
    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
        'https://depot.galaxyproject.org/singularity/gatk4:4.2.6.1--hdfd78af_0':
        'quay.io/biocontainers/gatk4:4.2.6.1--hdfd78af_0' }"
    input:
    tuple val(meta), path(gvcf), path(tbi), path(intervals)
    path fasta
    path fai
    path dict
    path dbsnp
    path dbsnp_tbi
    output:
    tuple val(meta), path("*.rb.g.vcf.gz"), path("*.tbi")  , emit: vcf
    path "versions.yml"                                    , emit: versions
    when:
    task.ext.when == null || task.ext.when
    script:
    def args = task.ext.args ?: ''
    def prefix = task.ext.prefix ?: "${meta.id}"
    def dbsnp_command = dbsnp ? "--dbsnp $dbsnp" : ""
    def interval_command = intervals ? "--intervals $intervals" : ""
    def avail_mem = 3
    if (!task.memory) {
        log.info '[GATK ReblockGVCF] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.'
    } else {
        avail_mem = task.memory.giga
    }
    """
    gatk --java-options "-Xmx${avail_mem}g" ReblockGVCF \\
        --variant $gvcf \\
        --output ${prefix}.rb.g.vcf.gz \\
        --reference $fasta \\
        $dbsnp_command \\
        $interval_command \\
        --tmp-dir . \\
        $args
    cat <<-END_VERSIONS > versions.yml
    "${task.process}":
        gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//')
    END_VERSIONS
    """
 }
--- a/modules/gatk4/reblockgvcf/meta.yml
+++ b/modules/gatk4/reblockgvcf/meta.yml
@ -0,0 +1,74 @@
 name: "gatk4_reblockgvcf"
 description: Condenses homRef blocks in a single-sample GVCF
 keywords:
  - gatk4
  - reblockgvcf
  - gvcf
 tools:
  - gatk4:
    description: |
      Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools
      with a primary focus on variant discovery and genotyping. Its powerful processing engine
      and high-performance computing features make it capable of taking on projects of any size.
    homepage: https://gatk.broadinstitute.org/hc/en-us
    documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s
    doi: 10.1158/1538-7445.AM2017-3590
    licence: ["Apache-2.0"]
 input:
  - meta:
      type: map
      description: |
        Groovy Map containing sample information
        e.g. [ id:'test', single_end:false ]
  - gvcf:
      type: file
      description: GVCF file created using HaplotypeCaller using the '-ERC GVCF' or '-ERC BP_RESOLUTION' mode
      pattern: "*.{vcf,gvcf}.gz"
  - tbi:
      type: file
      description: Index of the GVCF file
      pattern: "*.tbi"
  - intervals:
      type: file
      description: Bed file with the genomic regions included in the library (optional)
  - fasta:
      type: file
      description: The reference fasta file
      pattern: "*.fasta"
  - fai:
      type: file
      description: Index of reference fasta file
      pattern: "fasta.fai"
  - dict:
      type: file
      description: GATK sequence dictionary
      pattern: "*.dict"
  - dbsnp:
      type: file
      description: VCF file containing known sites (optional)
  - dbsnp_tbi:
      type: file
      description: VCF index of dbsnp (optional)
 output:
  - meta:
      type: map
      description: |
        Groovy Map containing sample information
        e.g. [ id:'test', single_end:false ]
  - versions:
      type: file
      description: File containing software versions
      pattern: "versions.yml"
  - gvcf:
      type: file
      description: Filtered GVCF
      pattern: "*rb.g.vcf.gz"
  - tbi:
      type: file
      description: Index of the filtered GVCF
      pattern: "*rb.g.vcf.gz.tbi"
 authors:
  - "@nvnieuwk"
--- a/modules/sexdeterrmine/main.nf
+++ b/modules/sexdeterrmine/main.nf
@ -0,0 +1,40 @@
 process SEXDETERRMINE {
    tag "$meta.id"
    label 'process_low'
    conda (params.enable_conda ? "bioconda::sexdeterrmine=1.1.2" : null)
    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
        'https://depot.galaxyproject.org/singularity/sexdeterrmine:1.1.2--hdfd78af_1':
        'quay.io/biocontainers/sexdeterrmine:1.1.2--hdfd78af_1' }"
    input:
    tuple val(meta), path(depth)
    path sample_list_file
    output:
    tuple val(meta), path("*.json"), emit: json
    tuple val(meta), path("*.tsv") , emit: tsv
    path "versions.yml"            , emit: versions
    when:
    task.ext.when == null || task.ext.when
    script:
    def args = task.ext.args ?: ''
    def prefix = task.ext.prefix ?: "${meta.id}"
    def sample_list = sample_list_file ? '-f ${sample_list_file}' : ''
    if ("$depth" == "${prefix}.tsv") error "Input depth and output TSV names are the same, set prefix in module configuration to disambiguate!"
    """
    sexdeterrmine \\
        -I $depth \\
        $sample_list \\
        $args \\
        > ${prefix}.tsv
    cat <<-END_VERSIONS > versions.yml
    "${task.process}":
        sexdeterrmine: \$(echo \$(sexdeterrmine --version 2>&1))
    END_VERSIONS
    """
 }
--- a/modules/sexdeterrmine/meta.yml
+++ b/modules/sexdeterrmine/meta.yml
@ -0,0 +1,48 @@
 name: "sexdeterrmine"
 description: Calculate the relative coverage on the Gonosomes vs Autosomes from the output of samtools depth, with error bars.
 keywords:
  - sex determination
  - genetic sex
  - relative coverage
  - ancient dna
 tools:
  - "sexdeterrmine":
      description: "A python script carry out calculate the relative coverage of X and Y chromosomes, and their associated error bars, out of capture data."
      homepage: "https://github.com/TCLamnidis/Sex.DetERRmine"
      documentation: "https://github.com/TCLamnidis/Sex.DetERRmine/README.md"
      tool_dev_url: "https://github.com/TCLamnidis/Sex.DetERRmine"
      doi: "https://doi.org/10.1038/s41467-018-07483-5"
      licence: "['GPL v3']"
 input:
  - meta:
      type: map
      description: |
        Groovy Map containing sample information
        e.g. [ id:'test', single_end:false ]
  - depth:
      type: file
      description: Output from samtools depth (with header)
      pattern: "*"
 output:
  - meta:
      type: map
      description: |
        Groovy Map containing sample information
        e.g. [ id:'test', single_end:false ]
  - versions:
      type: file
      description: File containing software versions
      pattern: "versions.yml"
  - json:
      type: file
      description: JSON formatted table of relative coverages on the X and Y, with associated error bars.
      pattern: "*.json"
  - tsv:
      type: file
      description: TSV table of relative coverages on the X and Y, with associated error bars.
      pattern: "*.tsv"
 authors:
  - "@TCLamnidis"
--- a/tests/config/pytest_modules.yml
+++ b/tests/config/pytest_modules.yml
@ -26,6 +26,10 @@ allelecounter:
  - modules/allelecounter/**
  - tests/modules/allelecounter/**
 ampir:
  - modules/ampir/**
  - tests/modules/ampir/**
 amplify/predict:
  - modules/amplify/predict/**
  - tests/modules/amplify/predict/**
@ -715,6 +719,14 @@ gamma/gamma:
  - modules/gamma/gamma/**
  - tests/modules/gamma/gamma/**
 gatk/indelrealigner:
  - modules/gatk/indelrealigner/**
  - tests/modules/gatk/indelrealigner/**
 gatk/realignertargetcreator:
  - modules/gatk/realignertargetcreator/**
  - tests/modules/gatk/realignertargetcreator/**
 gatk/unifiedgenotyper:
  - modules/gatk/unifiedgenotyper/**
  - tests/modules/gatk/unifiedgenotyper/**
@ -851,6 +863,10 @@ gatk4/mutect2:
  - modules/gatk4/mutect2/**
  - tests/modules/gatk4/mutect2/**
 gatk4/reblockgvcf:
  - modules/gatk4/reblockgvcf/**
  - tests/modules/gatk4/reblockgvcf/**
 gatk4/revertsam:
  - modules/gatk4/revertsam/**
  - tests/modules/gatk4/revertsam/**
@ -1843,6 +1859,10 @@ seqwish/induce:
  - modules/seqwish/induce/**
  - tests/modules/seqwish/induce/**
 sexdeterrmine:
  - modules/sexdeterrmine/**
  - tests/modules/sexdeterrmine/**
 shasum:
  - modules/shasum/**
  - tests/modules/shasum/**
--- a/tests/config/test_data.config
+++ b/tests/config/test_data.config
@ -232,10 +232,11 @@ params {
                test2_paired_end_umi_unsorted_bam              = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/umi/test2.paired_end.umi_unsorted.bam"
                test2_paired_end_umi_unsorted_tagged_bam       = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/umi/test2.paired_end.unsorted_tagged.bam"
                mitochon_standin_recalibrated_sorted_bam       = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/mitochon_standin.recalibrated.sorted.bam"
                mitochon_standin_recalibrated_sorted_bam_bai   = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/mitochon_standin.recalibrated.sorted.bam.bai"
                test3_single_end_markduplicates_sorted_bam       = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/test3.single_end.markduplicates.sorted.bam"
                test_paired_end_sorted_cram                      = "${test_data_dir}/genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram"
                test_paired_end_sorted_cram_crai                 = "${test_data_dir}/genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram.crai"
                test_paired_end_markduplicates_sorted_cram       = "${test_data_dir}/genomics/homo_sapiens/illumina/cram/test.paired_end.markduplicates.sorted.cram"
--- a/tests/modules/ampir/main.nf
+++ b/tests/modules/ampir/main.nf
@ -0,0 +1,20 @@
 #!/usr/bin/env nextflow
 nextflow.enable.dsl = 2
 include { AMPIR } from '../../../modules/ampir/main.nf'
 workflow test_ampir {
    fasta = [ [ id:'test', single_end:false ], // meta map
              file(params.test_data['candidatus_portiera_aleyrodidarum']['genome']['proteome_fasta'], checkIfExists: true),
    ]
    model = "precursor"
    min_length = []
    min_probability = "0.7"
    AMPIR ( fasta, model, min_length, min_probability )
 }
--- a/tests/modules/ampir/nextflow.config
+++ b/tests/modules/ampir/nextflow.config
@ -0,0 +1,5 @@
 process {
    publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
 }
--- a/tests/modules/ampir/test.yml
+++ b/tests/modules/ampir/test.yml
@ -0,0 +1,11 @@
 - name: ampir test_ampir
  command: nextflow run ./tests/modules/ampir -entry test_ampir -c ./tests/config/nextflow.config  -c ./tests/modules/ampir/nextflow.config
  tags:
    - ampir
  files:
    - path: output/ampir/test.tsv
      contains: ["seq_name\tseq_aa\tprob_AMP", "WP_014895017.1"]
    - path: output/ampir/test.faa
      md5sum: 0435609144022c55ac196db053f0df89
    - path: output/ampir/versions.yml
      md5sum: 4a11d25b8a904a7ffb34ae88f6826888
--- a/tests/modules/cellranger/count/test.yml
+++ b/tests/modules/cellranger/count/test.yml
@ -1,19 +1,65 @@
 - name: cellranger count test_cellranger_count
-  command: nextflow run tests/modules/cellranger/count -entry test_cellranger_count -c tests/config/nextflow.config -c tests/modules/cellranger/count/nextflow.config
+  command: nextflow run ./tests/modules/cellranger/count -entry test_cellranger_count -c ./tests/config/nextflow.config  -c ./tests/modules/cellranger/count/nextflow.config
  tags:
    - cellranger
    - cellranger/count
    - cellranger
  files:
    - path: output/cellranger/genome.filtered.gtf
      md5sum: a8b8a7b5039e05d3a9cf9151ea138b5b
    - path: output/cellranger/homo_sapiens_chr22_reference/fasta/genome.fa
      md5sum: f315020d899597c1b57e5fe9f60f4c3e
    - path: output/cellranger/homo_sapiens_chr22_reference/fasta/genome.fa.fai
      md5sum: 3520cd30e1b100e55f578db9c855f685
    - path: output/cellranger/homo_sapiens_chr22_reference/genes/genes.gtf.gz
      md5sum: d1e05cd46684fa26d852b6bc9f05e31f
    - path: output/cellranger/homo_sapiens_chr22_reference/reference.json
      md5sum: 8405fd7f527a944eafb9c2909045840b
    - path: output/cellranger/homo_sapiens_chr22_reference/star/Genome
      md5sum: 897cec2d191945335f8b320438bd9135
    - path: output/cellranger/homo_sapiens_chr22_reference/star/SA
      md5sum: 7961129ac5d0e1706105be1d31c6b30c
    - path: output/cellranger/homo_sapiens_chr22_reference/star/SAindex
      md5sum: dcceb480b30cda93fb8c63ddc339093b
    - path: output/cellranger/homo_sapiens_chr22_reference/star/chrLength.txt
      md5sum: c81f40f27e72606d7d07097c1d56a5b5
    - path: output/cellranger/homo_sapiens_chr22_reference/star/chrName.txt
      md5sum: 5ae68a67b70976ee95342a7451cb5af1
    - path: output/cellranger/homo_sapiens_chr22_reference/star/chrNameLength.txt
      md5sum: b190587cae0531f3cf25552d8aa674db
    - path: output/cellranger/homo_sapiens_chr22_reference/star/chrStart.txt
      md5sum: bc73df776dd3d5bb9cfcbcba60880519
    - path: output/cellranger/homo_sapiens_chr22_reference/star/exonGeTrInfo.tab
      md5sum: 9129691eeb4ed0d02b17be879fa3edb0
    - path: output/cellranger/homo_sapiens_chr22_reference/star/exonInfo.tab
      md5sum: 209b82f0683efd03e17d2c729676554f
    - path: output/cellranger/homo_sapiens_chr22_reference/star/geneInfo.tab
      md5sum: 02a8f4575bdfcd4a42b4d8d07f2e9369
    - path: output/cellranger/homo_sapiens_chr22_reference/star/genomeParameters.txt
    - path: output/cellranger/homo_sapiens_chr22_reference/star/sjdbInfo.txt
      md5sum: 1082ab459363b3f2f7aabcef0979c1ed
    - path: output/cellranger/homo_sapiens_chr22_reference/star/sjdbList.fromGTF.out.tab
    - path: output/cellranger/homo_sapiens_chr22_reference/star/sjdbList.out.tab
    - path: output/cellranger/homo_sapiens_chr22_reference/star/transcriptInfo.tab
      md5sum: cedcb5f4e7d97bc548cd5daa022e092c
    - path: output/cellranger/sample-123/outs/filtered_feature_bc_matrix.h5
      md5sum: f8b6b7cc8248151a98c46d4ebec450c6
    - path: output/cellranger/sample-123/outs/filtered_feature_bc_matrix/barcodes.tsv.gz
    - path: output/cellranger/sample-123/outs/filtered_feature_bc_matrix/features.tsv.gz
    - path: output/cellranger/sample-123/outs/filtered_feature_bc_matrix/matrix.mtx.gz
    - path: output/cellranger/sample-123/outs/metrics_summary.csv
      md5sum: 707df0f101d479d93f412ca74f9c4131
    - path: output/cellranger/sample-123/outs/molecule_info.h5
-      md5sum: 0e56836ef0725f2ab05f56ca5a71e55b
+      md5sum: a13bd7425f441c8d0eac8ffc50082996
    - path: output/cellranger/sample-123/outs/possorted_genome_bam.bam
      md5sum: 15441da9cfceea0bb48c8b66b1b860df
    - path: output/cellranger/sample-123/outs/possorted_genome_bam.bam.bai
      md5sum: 7c3d49c77016a09535aff61a027f750c
    - path: output/cellranger/sample-123/outs/raw_feature_bc_matrix
    - path: output/cellranger/sample-123/outs/raw_feature_bc_matrix.h5
-      md5sum: cdad1cd7b215d7137cf92515e81a8525
+      md5sum: a5290f3e300a4070f3d68a0c2e215f54
    - path: output/cellranger/sample-123/outs/raw_feature_bc_matrix/barcodes.tsv.gz
      md5sum: 5cc39ef0c7ac85f2b758b164aabf9157
    - path: output/cellranger/sample-123/outs/raw_feature_bc_matrix/features.tsv.gz
      md5sum: 07d497c7ce3e22f374af7b2cf9b97d72
    - path: output/cellranger/sample-123/outs/raw_feature_bc_matrix/matrix.mtx.gz
      md5sum: bdce94a51f16e22d40301724080b76ee
    - path: output/cellranger/sample-123/outs/web_summary.html
--- a/tests/modules/cellranger/mkfastq/test.yml
+++ b/tests/modules/cellranger/mkfastq/test.yml
@ -5,7 +5,6 @@
    - cellranger/mkfastq
  files:
    - path: output/cellranger/cellranger-tiny-bcl-1/outs/fastq_path/fake_file.fastq.gz
      md5sum: d41d8cd98f00b204e9800998ecf8427e
 - name: cellranger mkfastq test_cellranger_mkfastq_illumina
  command: nextflow run tests/modules/cellranger/mkfastq -entry test_cellranger_mkfastq_illumina -c tests/config/nextflow.config -c ./tests/modules/cellranger/mkfastq/nextflow.config -stub-run
  tags:
@ -13,4 +12,3 @@
    - cellranger/mkfastq
  files:
    - path: output/cellranger/cellranger-tiny-bcl-1/outs/fastq_path/fake_file.fastq.gz
      md5sum: d41d8cd98f00b204e9800998ecf8427e
--- a/tests/modules/cellranger/mkgtf/test.yml
+++ b/tests/modules/cellranger/mkgtf/test.yml
@ -1,8 +1,8 @@
 - name: cellranger mkgtf test_cellranger_mkgtf
-  command: nextflow run tests/modules/cellranger/mkgtf -entry test_cellranger_mkgtf -c tests/config/nextflow.config -c tests/modules/cellranger/mkgtf/nextflow.config
+  command: nextflow run ./tests/modules/cellranger/mkgtf -entry test_cellranger_mkgtf -c ./tests/config/nextflow.config  -c ./tests/modules/cellranger/mkgtf/nextflow.config
  tags:
    - cellranger
    - cellranger/mkgtf
    - cellranger
  files:
    - path: output/cellranger/genome.filtered.gtf
      md5sum: a8b8a7b5039e05d3a9cf9151ea138b5b
--- a/tests/modules/cellranger/mkref/test.yml
+++ b/tests/modules/cellranger/mkref/test.yml
@ -1,8 +1,8 @@
 - name: cellranger mkref test_cellranger_mkref
-  command: nextflow run ./tests/modules/cellranger/mkref -entry test_cellranger_mkref -c ./tests/config/nextflow.config -c ./tests/modules/cellranger/mkref/nextflow.config
+  command: nextflow run ./tests/modules/cellranger/mkref -entry test_cellranger_mkref -c ./tests/config/nextflow.config  -c ./tests/modules/cellranger/mkref/nextflow.config
  tags:
    - cellranger
    - cellranger/mkref
    - cellranger
  files:
    - path: output/cellranger/homo_sapiens_chr22_reference/fasta/genome.fa
      md5sum: f315020d899597c1b57e5fe9f60f4c3e
@ -11,7 +11,7 @@
    - path: output/cellranger/homo_sapiens_chr22_reference/genes/genes.gtf.gz
      md5sum: 6d9b5f409bfea95022bc25b9590e194e
    - path: output/cellranger/homo_sapiens_chr22_reference/reference.json
-      md5sum: 5d8d1669cd251433505f183e1c9ed6bc
+      md5sum: 6cc817f0923062e780e6573806840cea
    - path: output/cellranger/homo_sapiens_chr22_reference/star/Genome
      md5sum: 22102926fadf5890e905ca71b2da3f35
    - path: output/cellranger/homo_sapiens_chr22_reference/star/SA
--- a/tests/modules/ensemblvep/main.nf
+++ b/tests/modules/ensemblvep/main.nf
@ -4,11 +4,22 @@ nextflow.enable.dsl = 2
 include { ENSEMBLVEP } from '../../../modules/ensemblvep/main.nf'
-workflow test_ensemblvep {
+workflow test_ensemblvep_fasta {
    input = [
        [ id:'test' ], // meta map
        file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true)
    ]
-    ENSEMBLVEP ( input, "WBcel235", "caenorhabditis_elegans", "104", [], [] )
+    fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
    ENSEMBLVEP ( input, "WBcel235", "caenorhabditis_elegans", "104", [], fasta, [] )
 }
 workflow test_ensemblvep_no_fasta {
    input = [
        [ id:'test' ], // meta map
        file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true)
    ]
    ENSEMBLVEP ( input, "WBcel235", "caenorhabditis_elegans", "104", [], [], [] )
 }
--- a/tests/modules/ensemblvep/test.yml
+++ b/tests/modules/ensemblvep/test.yml
@ -1,5 +1,13 @@
- name: ensemblvep test_ensemblvep
+- name: ensemblvep test_ensemblvep_fasta
-  command: nextflow run ./tests/modules/ensemblvep -entry test_ensemblvep -c ./tests/config/nextflow.config -c ./tests/modules/ensemblvep/nextflow.config
+  command: nextflow run ./tests/modules/ensemblvep -entry test_ensemblvep_fasta -c ./tests/config/nextflow.config -c ./tests/modules/ensemblvep/nextflow.config
  tags:
    - ensemblvep
  files:
    - path: output/ensemblvep/test.ann.vcf
    - path: output/ensemblvep/test.summary.html
 - name: ensemblvep test_ensemblvep_no_fasta
  command: nextflow run ./tests/modules/ensemblvep -entry test_ensemblvep_no_fasta -c ./tests/config/nextflow.config -c ./tests/modules/ensemblvep/nextflow.config
  tags:
    - ensemblvep
  files:
--- a/tests/modules/gatk/indelrealigner/main.nf
+++ b/tests/modules/gatk/indelrealigner/main.nf
@ -0,0 +1,33 @@
 #!/usr/bin/env nextflow
 nextflow.enable.dsl = 2
 include { GATK_REALIGNERTARGETCREATOR } from '../../../../modules/gatk/realignertargetcreator/main.nf'
 include { GATK_INDELREALIGNER } from '../../../../modules/gatk/indelrealigner/main.nf'
 workflow test_gatk_indelrealigner {
    fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
    fai   = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true)
    dict  = file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true)
    input_realignertargetcreator     = [ [ id:'test' ], // meta map
                  file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true),
                  file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true),
                ]
    GATK_REALIGNERTARGETCREATOR ( input_realignertargetcreator, fasta, fai, dict, [] )
    ch_intervals = GATK_REALIGNERTARGETCREATOR.out.intervals
    ch_bams_indelrealigner = Channel.of([ [ id:'test' ], // meta map
                  file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true),
                  file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true)
                ])
    ch_input_indelrealigner = ch_bams_indelrealigner.mix(ch_intervals).groupTuple(by: 0).map{ [it[0], it[1][0], it[2], it[1][1] ] }.dump(tag: "input")
    GATK_INDELREALIGNER ( ch_input_indelrealigner, fasta, fai, dict, [] )
 }
--- a/tests/modules/gatk/indelrealigner/nextflow.config
+++ b/tests/modules/gatk/indelrealigner/nextflow.config
@ -0,0 +1,6 @@
 process {
    publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
    ext.prefix = { "${meta.id}.realigned" }
 }
--- a/tests/modules/gatk/indelrealigner/test.yml
+++ b/tests/modules/gatk/indelrealigner/test.yml
@ -0,0 +1,12 @@
 - name: gatk indelrealigner test_gatk_indelrealigner
  command: nextflow run ./tests/modules/gatk/indelrealigner -entry test_gatk_indelrealigner -c ./tests/config/nextflow.config  -c ./tests/modules/gatk/indelrealigner/nextflow.config
  tags:
    - gatk/indelrealigner
    - gatk
  files:
    - path: output/gatk/test.realigned.bai
      md5sum: 85a67df8827fe426e7f3a458134c0551
    - path: output/gatk/test.realigned.bam
      md5sum: ea1df6f7fcafc408fae4dc1574813d8a
    - path: output/gatk/test.realigned.intervals
      md5sum: 7aa7a1b235a510e6591e262382086bf8
--- a/tests/modules/gatk/realignertargetcreator/main.nf
+++ b/tests/modules/gatk/realignertargetcreator/main.nf
@ -0,0 +1,18 @@
 #!/usr/bin/env nextflow
 nextflow.enable.dsl = 2
 include { GATK_REALIGNERTARGETCREATOR } from '../../../../modules/gatk/realignertargetcreator/main.nf'
 workflow test_gatk_realignertargetcreator {
    input     = [ [ id:'test' ], // meta map
                  file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true),
                  file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true),
                ]
    fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
    fai   = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true)
    dict  = file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true)
    GATK_REALIGNERTARGETCREATOR ( input, fasta, fai, dict, [] )
 }
--- a/tests/modules/gatk/realignertargetcreator/nextflow.config
+++ b/tests/modules/gatk/realignertargetcreator/nextflow.config
@ -0,0 +1,5 @@
 process {
    publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
 }
--- a/tests/modules/gatk/realignertargetcreator/test.yml
+++ b/tests/modules/gatk/realignertargetcreator/test.yml
@ -0,0 +1,8 @@
 - name: gatk realignertargetcreator test_gatk_realignertargetcreator
  command: nextflow run ./tests/modules/gatk/realignertargetcreator -entry test_gatk_realignertargetcreator -c ./tests/config/nextflow.config  -c ./tests/modules/gatk/realignertargetcreator/nextflow.config
  tags:
    - gatk
    - gatk/realignertargetcreator
  files:
    - path: output/gatk/test.intervals
      md5sum: 7aa7a1b235a510e6591e262382086bf8
--- a/tests/modules/gatk4/applybqsrspark/test.yml
+++ b/tests/modules/gatk4/applybqsrspark/test.yml
@ -15,7 +15,6 @@
    - gatk4/applybqsrspark
  files:
    - path: output/gatk4/test.bam
      md5sum: 2ca2446f0125890280056fd7da822732
    - path: output/gatk4/versions.yml
 - name: gatk4 applybqsr test_gatk4_applybqsr_spark_cram
--- a/tests/modules/gatk4/markduplicatesspark/main.nf
+++ b/tests/modules/gatk4/markduplicatesspark/main.nf
@ -3,26 +3,55 @@
 nextflow.enable.dsl = 2
 include { GATK4_MARKDUPLICATES_SPARK } from '../../../../modules/gatk4/markduplicatesspark/main.nf'
 include { GATK4_MARKDUPLICATES_SPARK as GATK4_MARKDUPLICATES_SPARK_CRAM } from '../../../../modules/gatk4/markduplicatesspark/main.nf'
 include { GATK4_MARKDUPLICATES_SPARK as GATK4_MARKDUPLICATES_SPARK_METRICS } from '../../../../modules/gatk4/markduplicatesspark/main.nf'
 workflow test_gatk4_markduplicates_spark {
    input = [ [ id:'test', single_end:false ], // meta map
-              file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true)
+            file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true)
            ]
-    fasta = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true)
+    fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
-    fai = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta_fai'], checkIfExists: true)
+    fai = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true)
-    dict = file(params.test_data['homo_sapiens']['genome']['genome_21_dict'], checkIfExists: true)
+    dict = file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true)
    GATK4_MARKDUPLICATES_SPARK ( input, fasta, fai, dict )
 }
 // chr 22
 workflow test_gatk4_markduplicates_spark_multiple_bams {
    input = [ [ id:'test', single_end:false ], // meta map
-              [ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true),
+            [   file(params.test_data['homo_sapiens']['illumina']['test_paired_end_name_sorted_bam'], checkIfExists: true),
-                file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_bam'], checkIfExists: true)
+                file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_name_sorted_bam'], checkIfExists: true)
            ] ]
-    fasta = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true)
+    fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
-    fai = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta_fai'], checkIfExists: true)
+    fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
-    dict = file(params.test_data['homo_sapiens']['genome']['genome_21_dict'], checkIfExists: true)
+    dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)
    GATK4_MARKDUPLICATES_SPARK ( input, fasta, fai, dict )
 }
 // chr 22
 workflow test_gatk4_markduplicates_spark_multiple_bams_cram_out {
    input = [ [ id:'test', single_end:false ], // meta map
            [   file(params.test_data['homo_sapiens']['illumina']['test_paired_end_name_sorted_bam'], checkIfExists: true),
                file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_name_sorted_bam'], checkIfExists: true)
            ] ]
    fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
    fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
    dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)
    GATK4_MARKDUPLICATES_SPARK_CRAM ( input, fasta, fai, dict )
 }
 // chr 22
 workflow test_gatk4_markduplicates_spark_multiple_bams_metrics {
    input = [ [ id:'test', single_end:false ], // meta map
            [   file(params.test_data['homo_sapiens']['illumina']['test_paired_end_name_sorted_bam'], checkIfExists: true),
                file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_name_sorted_bam'], checkIfExists: true)
            ] ]
    fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
    fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
    dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)
    GATK4_MARKDUPLICATES_SPARK_METRICS ( input, fasta, fai, dict )
 }
--- a/tests/modules/gatk4/markduplicatesspark/nextflow.config
+++ b/tests/modules/gatk4/markduplicatesspark/nextflow.config
@ -2,4 +2,18 @@ process {
    publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
    withName: GATK4_MARKDUPLICATES_SPARK {
        ext.prefix = { "${meta.id}.bam" }
    }
    withName: GATK4_MARKDUPLICATES_SPARK_CRAM {
        ext.prefix = { "${meta.id}.cram" }
    }
    withName: GATK4_MARKDUPLICATES_SPARK_METRICS {
        ext.args = '--metrics-file test.metrics'
        ext.prefix = { "${meta.id}.bam" }
    }
 }
 // override tests/config/nextflow.config
 docker.userEmulation = false
--- a/tests/modules/gatk4/markduplicatesspark/test.yml
+++ b/tests/modules/gatk4/markduplicatesspark/test.yml
@ -1,25 +1,41 @@
- name: gatk4 markduplicates test_gatk4_markduplicates_spark
+- name: gatk4 markduplicatesspark test_gatk4_markduplicates_spark
-  command: nextflow run tests/modules/gatk4/markduplicatesspark -entry test_gatk4_markduplicates_spark -c tests/config/nextflow.config -c ./tests/modules/gatk4/markduplicatesspark/nextflow.config
+  command: nextflow run ./tests/modules/gatk4/markduplicatesspark -entry test_gatk4_markduplicates_spark -c ./tests/config/nextflow.config  -c ./tests/modules/gatk4/markduplicatesspark/nextflow.config
  tags:
    - gatk4
    - gatk4/markduplicatesspark
  files:
    - path: output/gatk4/test.bai
      md5sum: e9c125e82553209933883b4fe2b8d7c2
    - path: output/gatk4/test.bam
-      md5sum: 2efd50b2e6b7fd9bdf242cd9e266cfa9
+      md5sum: dc1a09ac6371aab7c50d1a554baa06d3
    - path: output/gatk4/test.metrics
    - path: output/gatk4/versions.yml
- name: gatk4 markduplicates test_gatk4_markduplicates_spark_multiple_bams
+- name: gatk4 markduplicatesspark test_gatk4_markduplicates_spark_multiple_bams
-  command: nextflow run tests/modules/gatk4/markduplicatesspark -entry test_gatk4_markduplicates_spark_multiple_bams -c tests/config/nextflow.config -c ./tests/modules/gatk4/markduplicatesspark/nextflow.config
+  command: nextflow run ./tests/modules/gatk4/markduplicatesspark -entry test_gatk4_markduplicates_spark_multiple_bams -c ./tests/config/nextflow.config  -c ./tests/modules/gatk4/markduplicatesspark/nextflow.config
  tags:
    - gatk4
    - gatk4/markduplicatesspark
  files:
    - path: output/gatk4/test.bai
      md5sum: bad71df9c876e72a5bc0a3e0fd755f92
    - path: output/gatk4/test.bam
-      md5sum: 8187febc6108ffef7f907e89b9c091a4
+      md5sum: 898cb0a6616897d8ada90bab53bf0837
-    - path: output/gatk4/test.metrics
+    - path: output/gatk4/versions.yml
 - name: gatk4 markduplicatesspark test_gatk4_markduplicates_spark_multiple_bams_cram_out
  command: nextflow run ./tests/modules/gatk4/markduplicatesspark -entry test_gatk4_markduplicates_spark_multiple_bams_cram_out -c ./tests/config/nextflow.config  -c ./tests/modules/gatk4/markduplicatesspark/nextflow.config
  tags:
    - gatk4
    - gatk4/markduplicatesspark
  files:
    - path: output/gatk4/test.cram
      md5sum: 2271016de5e4199736598f39d12d7587
    - path: output/gatk4/versions.yml
 - name: gatk4 markduplicatesspark test_gatk4_markduplicates_spark_multiple_bams_metrics
  command: nextflow run ./tests/modules/gatk4/markduplicatesspark -entry test_gatk4_markduplicates_spark_multiple_bams_metrics -c ./tests/config/nextflow.config  -c ./tests/modules/gatk4/markduplicatesspark/nextflow.config
  tags:
    - gatk4
    - gatk4/markduplicatesspark
  files:
    - path: output/gatk4/test.bam
      md5sum: 898cb0a6616897d8ada90bab53bf0837
    - path: output/gatk4/test.metrics
      contains: ["## METRICS CLASS", "org.broadinstitute.hellbender.utils.read.markduplicates.GATKDuplicationMetrics"]
    - path: output/gatk4/versions.yml
--- a/tests/modules/gatk4/reblockgvcf/main.nf
+++ b/tests/modules/gatk4/reblockgvcf/main.nf
@ -0,0 +1,55 @@
 #!/usr/bin/env nextflow
 nextflow.enable.dsl = 2
 include { GATK4_REBLOCKGVCF } from '../../../../modules/gatk4/reblockgvcf/main.nf'
 workflow test_gatk4_reblockgvcf {
    input = [
        [ id:'test', single_end:false ], // meta map
        file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true),
        file(params.test_data['sarscov2']['illumina']['test_vcf_gz_tbi'], checkIfExists: true),
        []
    ]
    fasta       = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
    fasta_index = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true)
    dict        = file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true)
    GATK4_REBLOCKGVCF ( input, fasta, fasta_index, dict, [], [] )
 }
 workflow test_gatk4_reblockgvcf_intervals {
    input = [
        [ id:'test', single_end:false ], // meta map
        file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true),
        file(params.test_data['sarscov2']['illumina']['test_vcf_gz_tbi'], checkIfExists: true),
        file(params.test_data['sarscov2']['genome']['test_bed'], checkIfExists: true)
    ]
    fasta       = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
    fasta_index = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true)
    dict        = file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true)
    GATK4_REBLOCKGVCF ( input, fasta, fasta_index, dict, [], [] )
 }
 workflow test_gatk4_reblockgvcf_dbsnp {
    input = [
        [ id:'test', single_end:false ], // meta map
        file(params.test_data['homo_sapiens']['illumina']['test_haplotc_cnn_vcf_gz'], checkIfExists: true),
        file(params.test_data['homo_sapiens']['illumina']['test_haplotc_cnn_vcf_gz_tbi'], checkIfExists: true),
        []
    ]
    fasta       = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
    fasta_index = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
    dict        = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)
    dbsnp       = file(params.test_data['homo_sapiens']['genome']['dbsnp_146_hg38_vcf_gz'], checkIfExists: true)
    dbsnp_tbi   = file(params.test_data['homo_sapiens']['genome']['dbsnp_146_hg38_vcf_gz_tbi'], checkIfExists: true)
    GATK4_REBLOCKGVCF ( input, fasta, fasta_index, dict, dbsnp, dbsnp_tbi )
 }
--- a/tests/modules/gatk4/reblockgvcf/nextflow.config
+++ b/tests/modules/gatk4/reblockgvcf/nextflow.config
@ -0,0 +1,5 @@
 process {
    publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
 }
--- a/tests/modules/gatk4/reblockgvcf/test.yml
+++ b/tests/modules/gatk4/reblockgvcf/test.yml
@ -0,0 +1,26 @@
 - name: gatk4 reblockgvcf test_gatk4_reblockgvcf
  command: nextflow run ./tests/modules/gatk4/reblockgvcf -entry test_gatk4_reblockgvcf -c ./tests/config/nextflow.config  -c ./tests/modules/gatk4/reblockgvcf/nextflow.config
  tags:
    - gatk4/reblockgvcf
    - gatk4
  files:
    - path: output/gatk4/test.rb.g.vcf.gz
    - path: output/gatk4/test.rb.g.vcf.gz.tbi
 - name: gatk4 reblockgvcf test_gatk4_reblockgvcf_intervals
  command: nextflow run ./tests/modules/gatk4/reblockgvcf -entry test_gatk4_reblockgvcf_intervals -c ./tests/config/nextflow.config  -c ./tests/modules/gatk4/reblockgvcf/nextflow.config
  tags:
    - gatk4/reblockgvcf
    - gatk4
  files:
    - path: output/gatk4/test.rb.g.vcf.gz
    - path: output/gatk4/test.rb.g.vcf.gz.tbi
 - name: gatk4 reblockgvcf test_gatk4_reblockgvcf_dbsnp
  command: nextflow run ./tests/modules/gatk4/reblockgvcf -entry test_gatk4_reblockgvcf_dbsnp -c ./tests/config/nextflow.config  -c ./tests/modules/gatk4/reblockgvcf/nextflow.config
  tags:
    - gatk4/reblockgvcf
    - gatk4
  files:
    - path: output/gatk4/test.rb.g.vcf.gz
    - path: output/gatk4/test.rb.g.vcf.gz.tbi
--- a/tests/modules/sexdeterrmine/main.nf
+++ b/tests/modules/sexdeterrmine/main.nf
@ -0,0 +1,16 @@
 #!/usr/bin/env nextflow
 nextflow.enable.dsl = 2
 include { SAMTOOLS_DEPTH } from '../../../modules/samtools/depth/main.nf'
 include { SEXDETERRMINE } from '../../../modules/sexdeterrmine/main.nf'
 workflow test_sexdeterrmine {
    input = [
        [ id:'test', single_end:false ], // meta map
        file(params.test_data['homo_sapiens']['illumina']['test3_single_end_markduplicates_sorted_bam'], checkIfExists: true) ]
    SAMTOOLS_DEPTH ( input )
    SEXDETERRMINE ( SAMTOOLS_DEPTH.out.tsv, [] )
 }
--- a/tests/modules/sexdeterrmine/nextflow.config
+++ b/tests/modules/sexdeterrmine/nextflow.config
@ -0,0 +1,12 @@
 process {
    publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
    withName:SAMTOOLS_DEPTH {
        ext.args = "-H"
    }
    withName:SEXDETERRMINE {
        ext.prefix = { "${meta.id}_sexdet" }
    }
 }
--- a/tests/modules/sexdeterrmine/test.yml
+++ b/tests/modules/sexdeterrmine/test.yml
@ -0,0 +1,15 @@
 - name: sexdeterrmine test_sexdeterrmine
  command: nextflow run tests/modules/sexdeterrmine -entry test_sexdeterrmine -c tests/config/nextflow.config
  tags:
    - sexdeterrmine
  files:
    - path: output/samtools/test.tsv
      md5sum: fa2992ca1ea93a6e1b3e838476191935
    - path: output/samtools/versions.yml
      md5sum: dbd04b700335c8ad236bd667254c8dd8
    - path: output/sexdeterrmine/sexdeterrmine.json
      md5sum: bafb2419bb8630eda29a251c20e97166
    - path: output/sexdeterrmine/test_sexdet.tsv
      md5sum: 1cf8a2b97b38353eb97a96ab872dcca9
    - path: output/sexdeterrmine/versions.yml
      md5sum: 077361101e8e7997aec3da8a01e59eee
--- a/tests/test_versions_yml.py
+++ b/tests/test_versions_yml.py
@ -16,9 +16,9 @@ def _get_workflow_names():
        # test_config = yaml.safe_load(f.read_text())
        test_config = yaml.load(f.read_text(), Loader=yaml.BaseLoader)
        for workflow in test_config:
-            # https://github.com/nf-core/modules/pull/1242 - added to cover tests 
+            # https://github.com/nf-core/modules/pull/1242 - added to cover tests
            # that expect an error and therefore will not generate a versions.yml
-            if 'exit_code' not in workflow: 
+            if 'exit_code' not in workflow:
                yield workflow["name"]
@ -56,5 +56,5 @@ def test_ensure_valid_version_yml(workflow_dir):
    assert len(software_versions), "There must be at least one version emitted."
    for tool, version in software_versions.items():
        assert re.match(
-            r"^\d+.*", str(version)
+            r"^\d.*|^[a-f0-9]{40}$", str(version)
-        ), f"Version number for {tool} must start with a number. "
+        ), f"Version number for {tool} must start with a number, or be a Git SHA commit id. "