diff --git a/modules/ampir/main.nf b/modules/ampir/main.nf new file mode 100644 index 00000000..57e20902 --- /dev/null +++ b/modules/ampir/main.nf @@ -0,0 +1,48 @@ +process AMPIR { + tag "$meta.id" + label 'process_low' + + conda (params.enable_conda ? "conda-forge::r-ampir=1.1.0" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/r-ampir:1.1.0': + 'quay.io/biocontainers/r-ampir:1.1.0' }" + + input: + tuple val(meta), path(faa) + val model + val min_length + val min_probability + + output: + tuple val(meta), path("*.faa"), emit: amps_faa + tuple val(meta), path("*.tsv"), emit: amps_tsv + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + min_length = ("${min_length}" == "[]") ? "": " min_len = as.integer(${min_length})," // Fall back to AMPir default value if none specified + if ("$faa" == "${prefix}.faa") error "Input and output names are the same, set prefix in module configuration to disambiguate!" + """ + #!/usr/bin/env Rscript + library(ampir) + + input_seqs <- read_faa('${faa}') + prediction <- predict_amps(input_seqs,${min_length} model = '${model}') + prediction <- prediction[which(prediction\$prob_AMP >= as.numeric(${min_probability})), ] + output_seqs <- input_seqs[row.names(prediction), ] + write.table(prediction, file = "${prefix}.tsv", row.names = FALSE, sep = "\t", quote = FALSE, dec = '.') + df_to_faa(output_seqs, "${prefix}.faa") + + version_file_path <- "versions.yml" + version_ampir <- paste(unlist(packageVersion("ampir")), collapse = ".") + f <- file(version_file_path, "w") + writeLines('"${task.process}":', f) + writeLines(" ampir: ", f, sep = "") + writeLines(version_ampir, f) + close(f) + """ +} diff --git a/modules/ampir/meta.yml b/modules/ampir/meta.yml new file mode 100644 index 00000000..7569ca69 --- /dev/null +++ b/modules/ampir/meta.yml @@ -0,0 +1,59 @@ +name: "ampir" +description: A fast and user-friendly method to predict antimicrobial peptides (AMPs) from any given size protein dataset. ampir uses a supervised statistical machine learning approach to predict AMPs. +keywords: + - ampir + - amp + - antimicrobial peptide prediction +tools: + - "ampir": + description: "A toolkit to predict antimicrobial peptides from protein sequences on a genome-wide scale." + homepage: "https://github.com/Legana/ampir" + documentation: "https://cran.r-project.org/web/packages/ampir/index.html" + tool_dev_url: "https://github.com/Legana/ampir" + doi: "10.1093/bioinformatics/btaa653" + licence: ["GPL v2"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - faa: + type: file + description: FASTA file containing amino acid sequences + pattern: "*.{faa,fasta}" + - model: + type: value + description: Built-in model for AMP prediction + pattern: "{precursor,mature}" + - min_length: + type: value + description: Minimum protein length for which predictions will be generated + pattern: "[0-9]+" + - min_probability: + type: value + description: Cut-off for AMP prediction + pattern: "[0-9][0-9]" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - amps_faa: + type: file + description: File containing AMP predictions in amino acid FASTA format + pattern: "*.{faa}" + - amps_tsv: + type: file + description: File containing AMP predictions in TSV format + pattern: "*.tsv" + +authors: + - "@jasmezz" diff --git a/modules/cellranger/Dockerfile b/modules/cellranger/Dockerfile index 3e52ca6a..266a24ef 100644 --- a/modules/cellranger/Dockerfile +++ b/modules/cellranger/Dockerfile @@ -1,4 +1,4 @@ -# Dockerfile to create container with Cell Ranger v6.1.2 +# Dockerfile to create container with Cell Ranger v7.0.0 # Push to nfcore/cellranger: FROM continuumio/miniconda3:4.8.2 diff --git a/modules/cellranger/count/main.nf b/modules/cellranger/count/main.nf index 6a206b6e..7413c990 100644 --- a/modules/cellranger/count/main.nf +++ b/modules/cellranger/count/main.nf @@ -5,7 +5,7 @@ process CELLRANGER_COUNT { if (params.enable_conda) { exit 1, "Conda environments cannot be used when using the Cell Ranger tool. Please use docker or singularity containers." } - container "nfcore/cellranger:6.1.2" + container "nfcore/cellranger:7.0.0" input: tuple val(meta), path(reads) diff --git a/modules/cellranger/mkfastq/Dockerfile b/modules/cellranger/mkfastq/Dockerfile index 8660293a..7b6b84ec 100644 --- a/modules/cellranger/mkfastq/Dockerfile +++ b/modules/cellranger/mkfastq/Dockerfile @@ -1,4 +1,4 @@ -# Dockerfile to create container with Cell Ranger v6.1.2 and bcl2fastq v2.20.0 +# Dockerfile to create container with Cell Ranger v7.0.0 and bcl2fastq v2.20.0 # Push to nfcore/cellrangermkfastq: FROM continuumio/miniconda3:4.8.2 @@ -17,7 +17,7 @@ RUN apt-get update --allow-releaseinfo-change \ # Copy pre-downloaded bcl2fastq2 and cellranger file ENV BCL2FASTQ2_VER=v2-20-0-linux-x86-64 \ - CELLRANGER_VER=6.1.2 + CELLRANGER_VER=7.0.0 COPY bcl2fastq2-$BCL2FASTQ2_VER.zip /tmp/bcl2fastq2-$BCL2FASTQ2_VER.zip COPY cellranger-$CELLRANGER_VER.tar.gz /opt/cellranger-$CELLRANGER_VER.tar.gz diff --git a/modules/cellranger/mkfastq/main.nf b/modules/cellranger/mkfastq/main.nf index 9c023bca..30c638ea 100644 --- a/modules/cellranger/mkfastq/main.nf +++ b/modules/cellranger/mkfastq/main.nf @@ -5,7 +5,7 @@ process CELLRANGER_MKFASTQ { if (params.enable_conda) { exit 1, "Conda environments cannot be used when using the Cell Ranger tool. Please use docker or singularity containers." } - container "nfcore/cellrangermkfastq:6.1.2" + container "nfcore/cellrangermkfastq:7.0.0" input: path bcl diff --git a/modules/cellranger/mkgtf/main.nf b/modules/cellranger/mkgtf/main.nf index 9dc2fe1a..a115706d 100644 --- a/modules/cellranger/mkgtf/main.nf +++ b/modules/cellranger/mkgtf/main.nf @@ -5,7 +5,7 @@ process CELLRANGER_MKGTF { if (params.enable_conda) { exit 1, "Conda environments cannot be used when using the Cell Ranger tool. Please use docker or singularity containers." } - container "nfcore/cellranger:6.1.2" + container "nfcore/cellranger:7.0.0" input: path gtf diff --git a/modules/cellranger/mkref/main.nf b/modules/cellranger/mkref/main.nf index 11cf80e3..e1bfebde 100644 --- a/modules/cellranger/mkref/main.nf +++ b/modules/cellranger/mkref/main.nf @@ -5,7 +5,7 @@ process CELLRANGER_MKREF { if (params.enable_conda) { exit 1, "Conda environments cannot be used when using the Cell Ranger tool. Please use docker or singularity containers." } - container "nfcore/cellranger:6.1.2" + container "nfcore/cellranger:7.0.0" input: path fasta diff --git a/modules/ensemblvep/main.nf b/modules/ensemblvep/main.nf index a5a9b1ab..d2efe35f 100644 --- a/modules/ensemblvep/main.nf +++ b/modules/ensemblvep/main.nf @@ -13,6 +13,7 @@ process ENSEMBLVEP { val species val cache_version path cache + path fasta path extra_files output: @@ -27,6 +28,8 @@ process ENSEMBLVEP { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" def dir_cache = cache ? "\${PWD}/${cache}" : "/.vep" + def reference = fasta ? "--fasta $fasta" : "" + """ mkdir $prefix @@ -34,6 +37,7 @@ process ENSEMBLVEP { -i $vcf \\ -o ${prefix}.ann.vcf \\ $args \\ + $reference \\ --assembly $genome \\ --species $species \\ --cache \\ diff --git a/modules/ensemblvep/meta.yml b/modules/ensemblvep/meta.yml index 418bb970..9891815d 100644 --- a/modules/ensemblvep/meta.yml +++ b/modules/ensemblvep/meta.yml @@ -36,6 +36,11 @@ input: type: file description: | path to VEP cache (optional) + - fasta: + type: file + description: | + reference FASTA file (optional) + pattern: "*.{fasta,fa}" - extra_files: type: tuple description: | diff --git a/modules/gatk/indelrealigner/main.nf b/modules/gatk/indelrealigner/main.nf new file mode 100644 index 00000000..94cf78da --- /dev/null +++ b/modules/gatk/indelrealigner/main.nf @@ -0,0 +1,54 @@ +process GATK_INDELREALIGNER { + tag "$meta.id" + label 'process_low' + + conda (params.enable_conda ? "bioconda::gatk=3.5" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gatk:3.5--hdfd78af_11': + 'quay.io/biocontainers/gatk:3.5--hdfd78af_11' }" + + input: + tuple val(meta), path(bam), path(bai), path(intervals) + path(fasta) + path(fai) + path(dict) + path(known_vcf) + + output: + tuple val(meta), path("*.bam"), path("*.bai"), emit: bam + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def known = known_vcf ? "-known ${known_vcf}" : "" + + if ("$bam" == "${prefix}.bam") error "Input and output names are the same, set prefix in module configuration to disambiguate!" + + def avail_mem = 3 + if (!task.memory) { + log.info '[GATK IndelRealigner] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = task.memory.giga + } + + """ + gatk3 \\ + -Xmx${avail_mem}g \\ + -T IndelRealigner \\ + -R ${fasta} \\ + -I ${bam} \\ + --targetIntervals ${intervals} \\ + ${known} \\ + -o ${prefix}.bam \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk: \$(echo \$(gatk3 --version)) + END_VERSIONS + """ +} diff --git a/modules/gatk/indelrealigner/meta.yml b/modules/gatk/indelrealigner/meta.yml new file mode 100644 index 00000000..35ad28e8 --- /dev/null +++ b/modules/gatk/indelrealigner/meta.yml @@ -0,0 +1,71 @@ +name: "gatk_indelrealigner" +description: Performs local realignment around indels to correct for mapping errors +keywords: + - bam + - vcf + - variant calling + - indel + - realignment +tools: + - "gatk": + description: "The full Genome Analysis Toolkit (GATK) framework, license restricted." + homepage: "https://gatk.broadinstitute.org/hc/en-us" + documentation: "https://github.com/broadinstitute/gatk-docs" + licence: "['https://software.broadinstitute.org/gatk/download/licensing', 'BSD', 'https://www.broadinstitute.org/gatk/about/#licensing']" + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: Sorted and indexed BAM file + pattern: "*.bam" + - bai: + type: file + description: BAM index file + pattern: "*.bai" + - intervals: + type: file + description: Intervals file created by gatk3 RealignerTargetCreator + pattern: "*.{intervals,list}" + - fasta: + type: file + description: Reference file used to generate BAM file + pattern: ".{fasta,fa,fna}" + - fai: + type: file + description: Index of reference file used to generate BAM file + pattern: ".fai" + - dict: + type: file + description: GATK dict file for reference + pattern: ".dict" + - known_vcf: + type: file + description: Optional input VCF file(s) with known indels + pattern: ".vcf" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - bam: + type: file + description: Sorted and indexed BAM file with local realignment around variants + pattern: "*.bam" + - bai: + type: file + description: Output BAM Index file + pattern: "*.bai" + +authors: + - "@jfy133" diff --git a/modules/gatk/realignertargetcreator/main.nf b/modules/gatk/realignertargetcreator/main.nf new file mode 100644 index 00000000..ba6a2592 --- /dev/null +++ b/modules/gatk/realignertargetcreator/main.nf @@ -0,0 +1,53 @@ +process GATK_REALIGNERTARGETCREATOR { + tag "$meta.id" + label 'process_low' + + conda (params.enable_conda ? "bioconda::gatk=3.5" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gatk:3.5--hdfd78af_11': + 'quay.io/biocontainers/gatk:3.5--hdfd78af_11' }" + + input: + tuple val(meta), path(input), path(index) + path fasta + path fai + path dict + path known_vcf + + output: + tuple val(meta), path("*.intervals"), emit: intervals + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def known = known_vcf ? "-known ${known_vcf}" : "" + if ("$input" == "${prefix}.bam") error "Input and output names are the same, set prefix in module configuration to disambiguate!" + + def avail_mem = 3 + if (!task.memory) { + log.info '[GATK RealignerTargetCreator] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = task.memory.giga + } + + """ + gatk3 \\ + -Xmx${avail_mem}g \\ + -T RealignerTargetCreator \\ + -nt ${task.cpus} \\ + -I ${input} \\ + -R ${fasta} \\ + -o ${prefix}.intervals \\ + ${known} \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk: \$(echo \$(gatk3 --version)) + END_VERSIONS + """ +} diff --git a/modules/gatk/realignertargetcreator/meta.yml b/modules/gatk/realignertargetcreator/meta.yml new file mode 100644 index 00000000..c49d2a8d --- /dev/null +++ b/modules/gatk/realignertargetcreator/meta.yml @@ -0,0 +1,64 @@ +name: "gatk_realignertargetcreator" +description: Generates a list of locations that should be considered for local realignment prior genotyping. +keywords: + - bam + - vcf + - variant calling + - indel + - realignment + - targets +tools: + - "gatk": + description: "The full Genome Analysis Toolkit (GATK) framework, license restricted." + homepage: "https://gatk.broadinstitute.org/hc/en-us" + documentation: "https://github.com/broadinstitute/gatk-docs" + licence: "['https://software.broadinstitute.org/gatk/download/licensing', 'BSD', 'https://www.broadinstitute.org/gatk/about/#licensing']" + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: Sorted and indexed BAM/CRAM/SAM file + pattern: "*.bam" + - index: + type: file + description: BAM index file + pattern: "*.bai" + - fasta: + type: file + description: Reference file used to generate BAM file + pattern: ".{fasta,fa,fna}" + - fai: + type: file + description: Index of reference file used to generate BAM file + pattern: ".fai" + - dict: + type: file + description: GATK dict file for reference + pattern: ".dict" + - known_vcf: + type: file + description: Optional input VCF file(s) with known indels + pattern: ".vcf" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - intervals: + type: file + description: File containg intervals that represent sites of extant and potential indels. + pattern: "*.intervals" + +authors: + - "@jfy133" diff --git a/modules/gatk4/applybqsrspark/main.nf b/modules/gatk4/applybqsrspark/main.nf index 9d7891ba..5890278a 100644 --- a/modules/gatk4/applybqsrspark/main.nf +++ b/modules/gatk4/applybqsrspark/main.nf @@ -2,10 +2,8 @@ process GATK4_APPLYBQSR_SPARK { tag "$meta.id" label 'process_low' - conda (params.enable_conda ? "bioconda::gatk4=4.2.6.1" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.2.6.1--hdfd78af_0': - 'quay.io/biocontainers/gatk4:4.2.6.1--hdfd78af_0' }" + conda (params.enable_conda ? "bioconda::gatk4=4.2.6.1 conda-forge::openjdk=8.0.312" : null) + container 'broadinstitute/gatk:4.2.6.1' input: tuple val(meta), path(input), path(input_index), path(bqsr_table), path(intervals) diff --git a/modules/gatk4/baserecalibratorspark/main.nf b/modules/gatk4/baserecalibratorspark/main.nf index 79988b72..755122cb 100644 --- a/modules/gatk4/baserecalibratorspark/main.nf +++ b/modules/gatk4/baserecalibratorspark/main.nf @@ -2,10 +2,8 @@ process GATK4_BASERECALIBRATOR_SPARK { tag "$meta.id" label 'process_low' - conda (params.enable_conda ? "bioconda::gatk4=4.2.6.1" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.2.6.1--hdfd78af_0': - 'quay.io/biocontainers/gatk4:4.2.6.1--hdfd78af_0' }" + conda (params.enable_conda ? "bioconda::gatk4=4.2.6.1 conda-forge::openjdk=8.0.312" : null) + container 'broadinstitute/gatk:4.2.6.1' input: tuple val(meta), path(input), path(input_index), path(intervals) diff --git a/modules/gatk4/markduplicatesspark/main.nf b/modules/gatk4/markduplicatesspark/main.nf index 77e135db..b8c315fc 100644 --- a/modules/gatk4/markduplicatesspark/main.nf +++ b/modules/gatk4/markduplicatesspark/main.nf @@ -2,10 +2,8 @@ process GATK4_MARKDUPLICATES_SPARK { tag "$meta.id" label 'process_high' - conda (params.enable_conda ? "bioconda::gatk4=4.2.3.0" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.2.3.0--hdfd78af_0' : - 'broadinstitute/gatk:4.2.3.0' }" + conda (params.enable_conda ? "bioconda::gatk4=4.2.6.1 conda-forge::openjdk=8.0.312" : null) + container 'broadinstitute/gatk:4.2.6.1' input: tuple val(meta), path(bam) @@ -14,8 +12,9 @@ process GATK4_MARKDUPLICATES_SPARK { path dict output: - tuple val(meta), path("${prefix}"), emit: output - path "versions.yml" , emit: versions + tuple val(meta), path("${prefix}"), emit: output + tuple val(meta), path("*.metrics"), emit: metrics, optional: true + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -25,6 +24,7 @@ process GATK4_MARKDUPLICATES_SPARK { prefix = task.ext.prefix ?: "${meta.id}" def input_list = bam.collect{"--input $it"}.join(' ') + def avail_mem = 3 if (!task.memory) { log.info '[GATK MarkDuplicatesSpark] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' @@ -32,8 +32,6 @@ process GATK4_MARKDUPLICATES_SPARK { avail_mem = task.memory.giga } """ - export SPARK_USER=spark3 - gatk --java-options "-Xmx${avail_mem}g" MarkDuplicatesSpark \\ $input_list \\ --output $prefix \\ @@ -45,6 +43,7 @@ process GATK4_MARKDUPLICATES_SPARK { cat <<-END_VERSIONS > versions.yml "${task.process}": gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + openjdk: \$(echo \$(java -version 2>&1) | grep version | sed 's/\"//g' | cut -f3 -d ' ') END_VERSIONS """ } diff --git a/modules/gatk4/markduplicatesspark/meta.yml b/modules/gatk4/markduplicatesspark/meta.yml index bf3e02ba..59be9b6d 100644 --- a/modules/gatk4/markduplicatesspark/meta.yml +++ b/modules/gatk4/markduplicatesspark/meta.yml @@ -58,3 +58,4 @@ authors: - "@ajodeh-juma" - "@FriederikeHanssen" - "@maxulysse" + - "@SusiJo" diff --git a/modules/gatk4/reblockgvcf/main.nf b/modules/gatk4/reblockgvcf/main.nf new file mode 100644 index 00000000..5640e8ae --- /dev/null +++ b/modules/gatk4/reblockgvcf/main.nf @@ -0,0 +1,52 @@ +process GATK4_REBLOCKGVCF { + tag "$meta.id" + label 'process_low' + + conda (params.enable_conda ? "bioconda::gatk4=4.2.6.1" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gatk4:4.2.6.1--hdfd78af_0': + 'quay.io/biocontainers/gatk4:4.2.6.1--hdfd78af_0' }" + + input: + tuple val(meta), path(gvcf), path(tbi), path(intervals) + path fasta + path fai + path dict + path dbsnp + path dbsnp_tbi + + output: + tuple val(meta), path("*.rb.g.vcf.gz"), path("*.tbi") , emit: vcf + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def dbsnp_command = dbsnp ? "--dbsnp $dbsnp" : "" + def interval_command = intervals ? "--intervals $intervals" : "" + + def avail_mem = 3 + if (!task.memory) { + log.info '[GATK ReblockGVCF] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = task.memory.giga + } + """ + gatk --java-options "-Xmx${avail_mem}g" ReblockGVCF \\ + --variant $gvcf \\ + --output ${prefix}.rb.g.vcf.gz \\ + --reference $fasta \\ + $dbsnp_command \\ + $interval_command \\ + --tmp-dir . \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/gatk4/reblockgvcf/meta.yml b/modules/gatk4/reblockgvcf/meta.yml new file mode 100644 index 00000000..23518416 --- /dev/null +++ b/modules/gatk4/reblockgvcf/meta.yml @@ -0,0 +1,74 @@ +name: "gatk4_reblockgvcf" +description: Condenses homRef blocks in a single-sample GVCF +keywords: + - gatk4 + - reblockgvcf + - gvcf +tools: + - gatk4: + description: | + Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s + doi: 10.1158/1538-7445.AM2017-3590 + licence: ["Apache-2.0"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - gvcf: + type: file + description: GVCF file created using HaplotypeCaller using the '-ERC GVCF' or '-ERC BP_RESOLUTION' mode + pattern: "*.{vcf,gvcf}.gz" + - tbi: + type: file + description: Index of the GVCF file + pattern: "*.tbi" + - intervals: + type: file + description: Bed file with the genomic regions included in the library (optional) + - fasta: + type: file + description: The reference fasta file + pattern: "*.fasta" + - fai: + type: file + description: Index of reference fasta file + pattern: "fasta.fai" + - dict: + type: file + description: GATK sequence dictionary + pattern: "*.dict" + - dbsnp: + type: file + description: VCF file containing known sites (optional) + - dbsnp_tbi: + type: file + description: VCF index of dbsnp (optional) + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - gvcf: + type: file + description: Filtered GVCF + pattern: "*rb.g.vcf.gz" + - tbi: + type: file + description: Index of the filtered GVCF + pattern: "*rb.g.vcf.gz.tbi" + +authors: + - "@nvnieuwk" diff --git a/modules/sexdeterrmine/main.nf b/modules/sexdeterrmine/main.nf new file mode 100644 index 00000000..98c72307 --- /dev/null +++ b/modules/sexdeterrmine/main.nf @@ -0,0 +1,40 @@ +process SEXDETERRMINE { + tag "$meta.id" + label 'process_low' + + conda (params.enable_conda ? "bioconda::sexdeterrmine=1.1.2" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/sexdeterrmine:1.1.2--hdfd78af_1': + 'quay.io/biocontainers/sexdeterrmine:1.1.2--hdfd78af_1' }" + + input: + tuple val(meta), path(depth) + path sample_list_file + + output: + tuple val(meta), path("*.json"), emit: json + tuple val(meta), path("*.tsv") , emit: tsv + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def sample_list = sample_list_file ? '-f ${sample_list_file}' : '' + if ("$depth" == "${prefix}.tsv") error "Input depth and output TSV names are the same, set prefix in module configuration to disambiguate!" + + """ + sexdeterrmine \\ + -I $depth \\ + $sample_list \\ + $args \\ + > ${prefix}.tsv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + sexdeterrmine: \$(echo \$(sexdeterrmine --version 2>&1)) + END_VERSIONS + """ +} diff --git a/modules/sexdeterrmine/meta.yml b/modules/sexdeterrmine/meta.yml new file mode 100644 index 00000000..3f56b0ab --- /dev/null +++ b/modules/sexdeterrmine/meta.yml @@ -0,0 +1,48 @@ +name: "sexdeterrmine" +description: Calculate the relative coverage on the Gonosomes vs Autosomes from the output of samtools depth, with error bars. +keywords: + - sex determination + - genetic sex + - relative coverage + - ancient dna +tools: + - "sexdeterrmine": + description: "A python script carry out calculate the relative coverage of X and Y chromosomes, and their associated error bars, out of capture data." + homepage: "https://github.com/TCLamnidis/Sex.DetERRmine" + documentation: "https://github.com/TCLamnidis/Sex.DetERRmine/README.md" + tool_dev_url: "https://github.com/TCLamnidis/Sex.DetERRmine" + doi: "https://doi.org/10.1038/s41467-018-07483-5" + licence: "['GPL v3']" + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - depth: + type: file + description: Output from samtools depth (with header) + pattern: "*" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - json: + type: file + description: JSON formatted table of relative coverages on the X and Y, with associated error bars. + pattern: "*.json" + - tsv: + type: file + description: TSV table of relative coverages on the X and Y, with associated error bars. + pattern: "*.tsv" + +authors: + - "@TCLamnidis" diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index 680943da..3915a4ac 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -26,6 +26,10 @@ allelecounter: - modules/allelecounter/** - tests/modules/allelecounter/** +ampir: + - modules/ampir/** + - tests/modules/ampir/** + amplify/predict: - modules/amplify/predict/** - tests/modules/amplify/predict/** @@ -715,6 +719,14 @@ gamma/gamma: - modules/gamma/gamma/** - tests/modules/gamma/gamma/** +gatk/indelrealigner: + - modules/gatk/indelrealigner/** + - tests/modules/gatk/indelrealigner/** + +gatk/realignertargetcreator: + - modules/gatk/realignertargetcreator/** + - tests/modules/gatk/realignertargetcreator/** + gatk/unifiedgenotyper: - modules/gatk/unifiedgenotyper/** - tests/modules/gatk/unifiedgenotyper/** @@ -851,6 +863,10 @@ gatk4/mutect2: - modules/gatk4/mutect2/** - tests/modules/gatk4/mutect2/** +gatk4/reblockgvcf: + - modules/gatk4/reblockgvcf/** + - tests/modules/gatk4/reblockgvcf/** + gatk4/revertsam: - modules/gatk4/revertsam/** - tests/modules/gatk4/revertsam/** @@ -1843,6 +1859,10 @@ seqwish/induce: - modules/seqwish/induce/** - tests/modules/seqwish/induce/** +sexdeterrmine: + - modules/sexdeterrmine/** + - tests/modules/sexdeterrmine/** + shasum: - modules/shasum/** - tests/modules/shasum/** diff --git a/tests/config/test_data.config b/tests/config/test_data.config index df199642..e3713bd1 100644 --- a/tests/config/test_data.config +++ b/tests/config/test_data.config @@ -232,10 +232,11 @@ params { test2_paired_end_umi_unsorted_bam = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/umi/test2.paired_end.umi_unsorted.bam" test2_paired_end_umi_unsorted_tagged_bam = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/umi/test2.paired_end.unsorted_tagged.bam" - mitochon_standin_recalibrated_sorted_bam = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/mitochon_standin.recalibrated.sorted.bam" mitochon_standin_recalibrated_sorted_bam_bai = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/mitochon_standin.recalibrated.sorted.bam.bai" + test3_single_end_markduplicates_sorted_bam = "${test_data_dir}/genomics/homo_sapiens/illumina/bam/test3.single_end.markduplicates.sorted.bam" + test_paired_end_sorted_cram = "${test_data_dir}/genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram" test_paired_end_sorted_cram_crai = "${test_data_dir}/genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram.crai" test_paired_end_markduplicates_sorted_cram = "${test_data_dir}/genomics/homo_sapiens/illumina/cram/test.paired_end.markduplicates.sorted.cram" diff --git a/tests/modules/ampir/main.nf b/tests/modules/ampir/main.nf new file mode 100644 index 00000000..bee2ff90 --- /dev/null +++ b/tests/modules/ampir/main.nf @@ -0,0 +1,20 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { AMPIR } from '../../../modules/ampir/main.nf' + +workflow test_ampir { + + fasta = [ [ id:'test', single_end:false ], // meta map + file(params.test_data['candidatus_portiera_aleyrodidarum']['genome']['proteome_fasta'], checkIfExists: true), + ] + + model = "precursor" + + min_length = [] + + min_probability = "0.7" + + AMPIR ( fasta, model, min_length, min_probability ) +} diff --git a/tests/modules/ampir/nextflow.config b/tests/modules/ampir/nextflow.config new file mode 100644 index 00000000..50f50a7a --- /dev/null +++ b/tests/modules/ampir/nextflow.config @@ -0,0 +1,5 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + +} \ No newline at end of file diff --git a/tests/modules/ampir/test.yml b/tests/modules/ampir/test.yml new file mode 100644 index 00000000..54921e9a --- /dev/null +++ b/tests/modules/ampir/test.yml @@ -0,0 +1,11 @@ +- name: ampir test_ampir + command: nextflow run ./tests/modules/ampir -entry test_ampir -c ./tests/config/nextflow.config -c ./tests/modules/ampir/nextflow.config + tags: + - ampir + files: + - path: output/ampir/test.tsv + contains: ["seq_name\tseq_aa\tprob_AMP", "WP_014895017.1"] + - path: output/ampir/test.faa + md5sum: 0435609144022c55ac196db053f0df89 + - path: output/ampir/versions.yml + md5sum: 4a11d25b8a904a7ffb34ae88f6826888 diff --git a/tests/modules/cellranger/count/test.yml b/tests/modules/cellranger/count/test.yml index 121d9eea..93e52c4a 100644 --- a/tests/modules/cellranger/count/test.yml +++ b/tests/modules/cellranger/count/test.yml @@ -1,19 +1,65 @@ - name: cellranger count test_cellranger_count - command: nextflow run tests/modules/cellranger/count -entry test_cellranger_count -c tests/config/nextflow.config -c tests/modules/cellranger/count/nextflow.config + command: nextflow run ./tests/modules/cellranger/count -entry test_cellranger_count -c ./tests/config/nextflow.config -c ./tests/modules/cellranger/count/nextflow.config tags: - - cellranger - cellranger/count + - cellranger files: + - path: output/cellranger/genome.filtered.gtf + md5sum: a8b8a7b5039e05d3a9cf9151ea138b5b + - path: output/cellranger/homo_sapiens_chr22_reference/fasta/genome.fa + md5sum: f315020d899597c1b57e5fe9f60f4c3e + - path: output/cellranger/homo_sapiens_chr22_reference/fasta/genome.fa.fai + md5sum: 3520cd30e1b100e55f578db9c855f685 + - path: output/cellranger/homo_sapiens_chr22_reference/genes/genes.gtf.gz + md5sum: d1e05cd46684fa26d852b6bc9f05e31f + - path: output/cellranger/homo_sapiens_chr22_reference/reference.json + md5sum: 8405fd7f527a944eafb9c2909045840b + - path: output/cellranger/homo_sapiens_chr22_reference/star/Genome + md5sum: 897cec2d191945335f8b320438bd9135 + - path: output/cellranger/homo_sapiens_chr22_reference/star/SA + md5sum: 7961129ac5d0e1706105be1d31c6b30c + - path: output/cellranger/homo_sapiens_chr22_reference/star/SAindex + md5sum: dcceb480b30cda93fb8c63ddc339093b + - path: output/cellranger/homo_sapiens_chr22_reference/star/chrLength.txt + md5sum: c81f40f27e72606d7d07097c1d56a5b5 + - path: output/cellranger/homo_sapiens_chr22_reference/star/chrName.txt + md5sum: 5ae68a67b70976ee95342a7451cb5af1 + - path: output/cellranger/homo_sapiens_chr22_reference/star/chrNameLength.txt + md5sum: b190587cae0531f3cf25552d8aa674db + - path: output/cellranger/homo_sapiens_chr22_reference/star/chrStart.txt + md5sum: bc73df776dd3d5bb9cfcbcba60880519 + - path: output/cellranger/homo_sapiens_chr22_reference/star/exonGeTrInfo.tab + md5sum: 9129691eeb4ed0d02b17be879fa3edb0 + - path: output/cellranger/homo_sapiens_chr22_reference/star/exonInfo.tab + md5sum: 209b82f0683efd03e17d2c729676554f + - path: output/cellranger/homo_sapiens_chr22_reference/star/geneInfo.tab + md5sum: 02a8f4575bdfcd4a42b4d8d07f2e9369 + - path: output/cellranger/homo_sapiens_chr22_reference/star/genomeParameters.txt + - path: output/cellranger/homo_sapiens_chr22_reference/star/sjdbInfo.txt + md5sum: 1082ab459363b3f2f7aabcef0979c1ed + - path: output/cellranger/homo_sapiens_chr22_reference/star/sjdbList.fromGTF.out.tab + - path: output/cellranger/homo_sapiens_chr22_reference/star/sjdbList.out.tab + - path: output/cellranger/homo_sapiens_chr22_reference/star/transcriptInfo.tab + md5sum: cedcb5f4e7d97bc548cd5daa022e092c - path: output/cellranger/sample-123/outs/filtered_feature_bc_matrix.h5 + md5sum: f8b6b7cc8248151a98c46d4ebec450c6 + - path: output/cellranger/sample-123/outs/filtered_feature_bc_matrix/barcodes.tsv.gz + - path: output/cellranger/sample-123/outs/filtered_feature_bc_matrix/features.tsv.gz + - path: output/cellranger/sample-123/outs/filtered_feature_bc_matrix/matrix.mtx.gz - path: output/cellranger/sample-123/outs/metrics_summary.csv md5sum: 707df0f101d479d93f412ca74f9c4131 - path: output/cellranger/sample-123/outs/molecule_info.h5 - md5sum: 0e56836ef0725f2ab05f56ca5a71e55b + md5sum: a13bd7425f441c8d0eac8ffc50082996 - path: output/cellranger/sample-123/outs/possorted_genome_bam.bam md5sum: 15441da9cfceea0bb48c8b66b1b860df - path: output/cellranger/sample-123/outs/possorted_genome_bam.bam.bai md5sum: 7c3d49c77016a09535aff61a027f750c - - path: output/cellranger/sample-123/outs/raw_feature_bc_matrix - path: output/cellranger/sample-123/outs/raw_feature_bc_matrix.h5 - md5sum: cdad1cd7b215d7137cf92515e81a8525 + md5sum: a5290f3e300a4070f3d68a0c2e215f54 + - path: output/cellranger/sample-123/outs/raw_feature_bc_matrix/barcodes.tsv.gz + md5sum: 5cc39ef0c7ac85f2b758b164aabf9157 + - path: output/cellranger/sample-123/outs/raw_feature_bc_matrix/features.tsv.gz + md5sum: 07d497c7ce3e22f374af7b2cf9b97d72 + - path: output/cellranger/sample-123/outs/raw_feature_bc_matrix/matrix.mtx.gz + md5sum: bdce94a51f16e22d40301724080b76ee - path: output/cellranger/sample-123/outs/web_summary.html diff --git a/tests/modules/cellranger/mkfastq/test.yml b/tests/modules/cellranger/mkfastq/test.yml index 890f2557..273d8fca 100644 --- a/tests/modules/cellranger/mkfastq/test.yml +++ b/tests/modules/cellranger/mkfastq/test.yml @@ -5,7 +5,6 @@ - cellranger/mkfastq files: - path: output/cellranger/cellranger-tiny-bcl-1/outs/fastq_path/fake_file.fastq.gz - md5sum: d41d8cd98f00b204e9800998ecf8427e - name: cellranger mkfastq test_cellranger_mkfastq_illumina command: nextflow run tests/modules/cellranger/mkfastq -entry test_cellranger_mkfastq_illumina -c tests/config/nextflow.config -c ./tests/modules/cellranger/mkfastq/nextflow.config -stub-run tags: @@ -13,4 +12,3 @@ - cellranger/mkfastq files: - path: output/cellranger/cellranger-tiny-bcl-1/outs/fastq_path/fake_file.fastq.gz - md5sum: d41d8cd98f00b204e9800998ecf8427e diff --git a/tests/modules/cellranger/mkgtf/test.yml b/tests/modules/cellranger/mkgtf/test.yml index 2130afd2..59eb9072 100644 --- a/tests/modules/cellranger/mkgtf/test.yml +++ b/tests/modules/cellranger/mkgtf/test.yml @@ -1,8 +1,8 @@ - name: cellranger mkgtf test_cellranger_mkgtf - command: nextflow run tests/modules/cellranger/mkgtf -entry test_cellranger_mkgtf -c tests/config/nextflow.config -c tests/modules/cellranger/mkgtf/nextflow.config + command: nextflow run ./tests/modules/cellranger/mkgtf -entry test_cellranger_mkgtf -c ./tests/config/nextflow.config -c ./tests/modules/cellranger/mkgtf/nextflow.config tags: - - cellranger - cellranger/mkgtf + - cellranger files: - path: output/cellranger/genome.filtered.gtf md5sum: a8b8a7b5039e05d3a9cf9151ea138b5b diff --git a/tests/modules/cellranger/mkref/test.yml b/tests/modules/cellranger/mkref/test.yml index e40592bb..83ec5f95 100644 --- a/tests/modules/cellranger/mkref/test.yml +++ b/tests/modules/cellranger/mkref/test.yml @@ -1,8 +1,8 @@ - name: cellranger mkref test_cellranger_mkref - command: nextflow run ./tests/modules/cellranger/mkref -entry test_cellranger_mkref -c ./tests/config/nextflow.config -c ./tests/modules/cellranger/mkref/nextflow.config + command: nextflow run ./tests/modules/cellranger/mkref -entry test_cellranger_mkref -c ./tests/config/nextflow.config -c ./tests/modules/cellranger/mkref/nextflow.config tags: - - cellranger - cellranger/mkref + - cellranger files: - path: output/cellranger/homo_sapiens_chr22_reference/fasta/genome.fa md5sum: f315020d899597c1b57e5fe9f60f4c3e @@ -11,7 +11,7 @@ - path: output/cellranger/homo_sapiens_chr22_reference/genes/genes.gtf.gz md5sum: 6d9b5f409bfea95022bc25b9590e194e - path: output/cellranger/homo_sapiens_chr22_reference/reference.json - md5sum: 5d8d1669cd251433505f183e1c9ed6bc + md5sum: 6cc817f0923062e780e6573806840cea - path: output/cellranger/homo_sapiens_chr22_reference/star/Genome md5sum: 22102926fadf5890e905ca71b2da3f35 - path: output/cellranger/homo_sapiens_chr22_reference/star/SA diff --git a/tests/modules/ensemblvep/main.nf b/tests/modules/ensemblvep/main.nf index 30d19957..3c8afada 100644 --- a/tests/modules/ensemblvep/main.nf +++ b/tests/modules/ensemblvep/main.nf @@ -4,11 +4,22 @@ nextflow.enable.dsl = 2 include { ENSEMBLVEP } from '../../../modules/ensemblvep/main.nf' -workflow test_ensemblvep { +workflow test_ensemblvep_fasta { input = [ [ id:'test' ], // meta map file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true) ] - ENSEMBLVEP ( input, "WBcel235", "caenorhabditis_elegans", "104", [], [] ) + fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + + ENSEMBLVEP ( input, "WBcel235", "caenorhabditis_elegans", "104", [], fasta, [] ) +} + +workflow test_ensemblvep_no_fasta { + input = [ + [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true) + ] + + ENSEMBLVEP ( input, "WBcel235", "caenorhabditis_elegans", "104", [], [], [] ) } diff --git a/tests/modules/ensemblvep/test.yml b/tests/modules/ensemblvep/test.yml index 42384d6e..7e94d5fc 100644 --- a/tests/modules/ensemblvep/test.yml +++ b/tests/modules/ensemblvep/test.yml @@ -1,5 +1,13 @@ -- name: ensemblvep test_ensemblvep - command: nextflow run ./tests/modules/ensemblvep -entry test_ensemblvep -c ./tests/config/nextflow.config -c ./tests/modules/ensemblvep/nextflow.config +- name: ensemblvep test_ensemblvep_fasta + command: nextflow run ./tests/modules/ensemblvep -entry test_ensemblvep_fasta -c ./tests/config/nextflow.config -c ./tests/modules/ensemblvep/nextflow.config + tags: + - ensemblvep + files: + - path: output/ensemblvep/test.ann.vcf + - path: output/ensemblvep/test.summary.html + +- name: ensemblvep test_ensemblvep_no_fasta + command: nextflow run ./tests/modules/ensemblvep -entry test_ensemblvep_no_fasta -c ./tests/config/nextflow.config -c ./tests/modules/ensemblvep/nextflow.config tags: - ensemblvep files: diff --git a/tests/modules/gatk/indelrealigner/main.nf b/tests/modules/gatk/indelrealigner/main.nf new file mode 100644 index 00000000..d4e01e12 --- /dev/null +++ b/tests/modules/gatk/indelrealigner/main.nf @@ -0,0 +1,33 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { GATK_REALIGNERTARGETCREATOR } from '../../../../modules/gatk/realignertargetcreator/main.nf' +include { GATK_INDELREALIGNER } from '../../../../modules/gatk/indelrealigner/main.nf' + + +workflow test_gatk_indelrealigner { + + + fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + fai = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true) + dict = file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true) + + input_realignertargetcreator = [ [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true), + ] + + GATK_REALIGNERTARGETCREATOR ( input_realignertargetcreator, fasta, fai, dict, [] ) + + ch_intervals = GATK_REALIGNERTARGETCREATOR.out.intervals + + ch_bams_indelrealigner = Channel.of([ [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true) + ]) + + ch_input_indelrealigner = ch_bams_indelrealigner.mix(ch_intervals).groupTuple(by: 0).map{ [it[0], it[1][0], it[2], it[1][1] ] }.dump(tag: "input") + + GATK_INDELREALIGNER ( ch_input_indelrealigner, fasta, fai, dict, [] ) +} diff --git a/tests/modules/gatk/indelrealigner/nextflow.config b/tests/modules/gatk/indelrealigner/nextflow.config new file mode 100644 index 00000000..489a478f --- /dev/null +++ b/tests/modules/gatk/indelrealigner/nextflow.config @@ -0,0 +1,6 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + ext.prefix = { "${meta.id}.realigned" } + +} diff --git a/tests/modules/gatk/indelrealigner/test.yml b/tests/modules/gatk/indelrealigner/test.yml new file mode 100644 index 00000000..b7f0c465 --- /dev/null +++ b/tests/modules/gatk/indelrealigner/test.yml @@ -0,0 +1,12 @@ +- name: gatk indelrealigner test_gatk_indelrealigner + command: nextflow run ./tests/modules/gatk/indelrealigner -entry test_gatk_indelrealigner -c ./tests/config/nextflow.config -c ./tests/modules/gatk/indelrealigner/nextflow.config + tags: + - gatk/indelrealigner + - gatk + files: + - path: output/gatk/test.realigned.bai + md5sum: 85a67df8827fe426e7f3a458134c0551 + - path: output/gatk/test.realigned.bam + md5sum: ea1df6f7fcafc408fae4dc1574813d8a + - path: output/gatk/test.realigned.intervals + md5sum: 7aa7a1b235a510e6591e262382086bf8 diff --git a/tests/modules/gatk/realignertargetcreator/main.nf b/tests/modules/gatk/realignertargetcreator/main.nf new file mode 100644 index 00000000..4b9f8eff --- /dev/null +++ b/tests/modules/gatk/realignertargetcreator/main.nf @@ -0,0 +1,18 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { GATK_REALIGNERTARGETCREATOR } from '../../../../modules/gatk/realignertargetcreator/main.nf' + +workflow test_gatk_realignertargetcreator { + + input = [ [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true), + ] + fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + fai = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true) + dict = file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true) + + GATK_REALIGNERTARGETCREATOR ( input, fasta, fai, dict, [] ) +} diff --git a/tests/modules/gatk/realignertargetcreator/nextflow.config b/tests/modules/gatk/realignertargetcreator/nextflow.config new file mode 100644 index 00000000..50f50a7a --- /dev/null +++ b/tests/modules/gatk/realignertargetcreator/nextflow.config @@ -0,0 +1,5 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + +} \ No newline at end of file diff --git a/tests/modules/gatk/realignertargetcreator/test.yml b/tests/modules/gatk/realignertargetcreator/test.yml new file mode 100644 index 00000000..0e247013 --- /dev/null +++ b/tests/modules/gatk/realignertargetcreator/test.yml @@ -0,0 +1,8 @@ +- name: gatk realignertargetcreator test_gatk_realignertargetcreator + command: nextflow run ./tests/modules/gatk/realignertargetcreator -entry test_gatk_realignertargetcreator -c ./tests/config/nextflow.config -c ./tests/modules/gatk/realignertargetcreator/nextflow.config + tags: + - gatk + - gatk/realignertargetcreator + files: + - path: output/gatk/test.intervals + md5sum: 7aa7a1b235a510e6591e262382086bf8 diff --git a/tests/modules/gatk4/applybqsrspark/test.yml b/tests/modules/gatk4/applybqsrspark/test.yml index 1e4f8b47..b7b8c72d 100644 --- a/tests/modules/gatk4/applybqsrspark/test.yml +++ b/tests/modules/gatk4/applybqsrspark/test.yml @@ -15,7 +15,6 @@ - gatk4/applybqsrspark files: - path: output/gatk4/test.bam - md5sum: 2ca2446f0125890280056fd7da822732 - path: output/gatk4/versions.yml - name: gatk4 applybqsr test_gatk4_applybqsr_spark_cram diff --git a/tests/modules/gatk4/markduplicatesspark/main.nf b/tests/modules/gatk4/markduplicatesspark/main.nf index 2f294f59..004fbb1e 100644 --- a/tests/modules/gatk4/markduplicatesspark/main.nf +++ b/tests/modules/gatk4/markduplicatesspark/main.nf @@ -3,26 +3,55 @@ nextflow.enable.dsl = 2 include { GATK4_MARKDUPLICATES_SPARK } from '../../../../modules/gatk4/markduplicatesspark/main.nf' +include { GATK4_MARKDUPLICATES_SPARK as GATK4_MARKDUPLICATES_SPARK_CRAM } from '../../../../modules/gatk4/markduplicatesspark/main.nf' +include { GATK4_MARKDUPLICATES_SPARK as GATK4_MARKDUPLICATES_SPARK_METRICS } from '../../../../modules/gatk4/markduplicatesspark/main.nf' workflow test_gatk4_markduplicates_spark { input = [ [ id:'test', single_end:false ], // meta map - file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) ] - fasta = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true) - fai = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta_fai'], checkIfExists: true) - dict = file(params.test_data['homo_sapiens']['genome']['genome_21_dict'], checkIfExists: true) + fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + fai = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true) + dict = file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true) GATK4_MARKDUPLICATES_SPARK ( input, fasta, fai, dict ) } +// chr 22 workflow test_gatk4_markduplicates_spark_multiple_bams { input = [ [ id:'test', single_end:false ], // meta map - [ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), - file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_bam'], checkIfExists: true) + [ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_name_sorted_bam'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_name_sorted_bam'], checkIfExists: true) ] ] - fasta = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true) - fai = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta_fai'], checkIfExists: true) - dict = file(params.test_data['homo_sapiens']['genome']['genome_21_dict'], checkIfExists: true) + fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) + dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) GATK4_MARKDUPLICATES_SPARK ( input, fasta, fai, dict ) } + +// chr 22 +workflow test_gatk4_markduplicates_spark_multiple_bams_cram_out { + input = [ [ id:'test', single_end:false ], // meta map + [ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_name_sorted_bam'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_name_sorted_bam'], checkIfExists: true) + ] ] + fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) + dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) + + GATK4_MARKDUPLICATES_SPARK_CRAM ( input, fasta, fai, dict ) +} + +// chr 22 +workflow test_gatk4_markduplicates_spark_multiple_bams_metrics { + input = [ [ id:'test', single_end:false ], // meta map + [ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_name_sorted_bam'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_name_sorted_bam'], checkIfExists: true) + ] ] + fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) + dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) + + GATK4_MARKDUPLICATES_SPARK_METRICS ( input, fasta, fai, dict ) +} diff --git a/tests/modules/gatk4/markduplicatesspark/nextflow.config b/tests/modules/gatk4/markduplicatesspark/nextflow.config index 8730f1c4..e0455b4b 100644 --- a/tests/modules/gatk4/markduplicatesspark/nextflow.config +++ b/tests/modules/gatk4/markduplicatesspark/nextflow.config @@ -2,4 +2,18 @@ process { publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + withName: GATK4_MARKDUPLICATES_SPARK { + ext.prefix = { "${meta.id}.bam" } + } + withName: GATK4_MARKDUPLICATES_SPARK_CRAM { + ext.prefix = { "${meta.id}.cram" } + } + withName: GATK4_MARKDUPLICATES_SPARK_METRICS { + ext.args = '--metrics-file test.metrics' + ext.prefix = { "${meta.id}.bam" } + } + } +// override tests/config/nextflow.config +docker.userEmulation = false + diff --git a/tests/modules/gatk4/markduplicatesspark/test.yml b/tests/modules/gatk4/markduplicatesspark/test.yml index b0c0b40d..4ef9b916 100644 --- a/tests/modules/gatk4/markduplicatesspark/test.yml +++ b/tests/modules/gatk4/markduplicatesspark/test.yml @@ -1,25 +1,41 @@ -- name: gatk4 markduplicates test_gatk4_markduplicates_spark - command: nextflow run tests/modules/gatk4/markduplicatesspark -entry test_gatk4_markduplicates_spark -c tests/config/nextflow.config -c ./tests/modules/gatk4/markduplicatesspark/nextflow.config +- name: gatk4 markduplicatesspark test_gatk4_markduplicates_spark + command: nextflow run ./tests/modules/gatk4/markduplicatesspark -entry test_gatk4_markduplicates_spark -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/markduplicatesspark/nextflow.config tags: - gatk4 - gatk4/markduplicatesspark files: - - path: output/gatk4/test.bai - md5sum: e9c125e82553209933883b4fe2b8d7c2 - path: output/gatk4/test.bam - md5sum: 2efd50b2e6b7fd9bdf242cd9e266cfa9 - - path: output/gatk4/test.metrics + md5sum: dc1a09ac6371aab7c50d1a554baa06d3 - path: output/gatk4/versions.yml -- name: gatk4 markduplicates test_gatk4_markduplicates_spark_multiple_bams - command: nextflow run tests/modules/gatk4/markduplicatesspark -entry test_gatk4_markduplicates_spark_multiple_bams -c tests/config/nextflow.config -c ./tests/modules/gatk4/markduplicatesspark/nextflow.config +- name: gatk4 markduplicatesspark test_gatk4_markduplicates_spark_multiple_bams + command: nextflow run ./tests/modules/gatk4/markduplicatesspark -entry test_gatk4_markduplicates_spark_multiple_bams -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/markduplicatesspark/nextflow.config tags: - gatk4 - gatk4/markduplicatesspark files: - - path: output/gatk4/test.bai - md5sum: bad71df9c876e72a5bc0a3e0fd755f92 - path: output/gatk4/test.bam - md5sum: 8187febc6108ffef7f907e89b9c091a4 - - path: output/gatk4/test.metrics + md5sum: 898cb0a6616897d8ada90bab53bf0837 + - path: output/gatk4/versions.yml + +- name: gatk4 markduplicatesspark test_gatk4_markduplicates_spark_multiple_bams_cram_out + command: nextflow run ./tests/modules/gatk4/markduplicatesspark -entry test_gatk4_markduplicates_spark_multiple_bams_cram_out -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/markduplicatesspark/nextflow.config + tags: + - gatk4 + - gatk4/markduplicatesspark + files: + - path: output/gatk4/test.cram + md5sum: 2271016de5e4199736598f39d12d7587 + - path: output/gatk4/versions.yml + +- name: gatk4 markduplicatesspark test_gatk4_markduplicates_spark_multiple_bams_metrics + command: nextflow run ./tests/modules/gatk4/markduplicatesspark -entry test_gatk4_markduplicates_spark_multiple_bams_metrics -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/markduplicatesspark/nextflow.config + tags: + - gatk4 + - gatk4/markduplicatesspark + files: + - path: output/gatk4/test.bam + md5sum: 898cb0a6616897d8ada90bab53bf0837 + - path: output/gatk4/test.metrics + contains: ["## METRICS CLASS", "org.broadinstitute.hellbender.utils.read.markduplicates.GATKDuplicationMetrics"] - path: output/gatk4/versions.yml diff --git a/tests/modules/gatk4/reblockgvcf/main.nf b/tests/modules/gatk4/reblockgvcf/main.nf new file mode 100644 index 00000000..2233a5c5 --- /dev/null +++ b/tests/modules/gatk4/reblockgvcf/main.nf @@ -0,0 +1,55 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { GATK4_REBLOCKGVCF } from '../../../../modules/gatk4/reblockgvcf/main.nf' + +workflow test_gatk4_reblockgvcf { + + input = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_vcf_gz_tbi'], checkIfExists: true), + [] + ] + + fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + fasta_index = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true) + dict = file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true) + + GATK4_REBLOCKGVCF ( input, fasta, fasta_index, dict, [], [] ) +} + +workflow test_gatk4_reblockgvcf_intervals { + + input = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_vcf_gz_tbi'], checkIfExists: true), + file(params.test_data['sarscov2']['genome']['test_bed'], checkIfExists: true) + ] + + fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + fasta_index = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true) + dict = file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true) + + GATK4_REBLOCKGVCF ( input, fasta, fasta_index, dict, [], [] ) +} + +workflow test_gatk4_reblockgvcf_dbsnp { + + input = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['homo_sapiens']['illumina']['test_haplotc_cnn_vcf_gz'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_haplotc_cnn_vcf_gz_tbi'], checkIfExists: true), + [] + ] + + fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + fasta_index = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) + dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) + dbsnp = file(params.test_data['homo_sapiens']['genome']['dbsnp_146_hg38_vcf_gz'], checkIfExists: true) + dbsnp_tbi = file(params.test_data['homo_sapiens']['genome']['dbsnp_146_hg38_vcf_gz_tbi'], checkIfExists: true) + + GATK4_REBLOCKGVCF ( input, fasta, fasta_index, dict, dbsnp, dbsnp_tbi ) +} \ No newline at end of file diff --git a/tests/modules/gatk4/reblockgvcf/nextflow.config b/tests/modules/gatk4/reblockgvcf/nextflow.config new file mode 100644 index 00000000..50f50a7a --- /dev/null +++ b/tests/modules/gatk4/reblockgvcf/nextflow.config @@ -0,0 +1,5 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + +} \ No newline at end of file diff --git a/tests/modules/gatk4/reblockgvcf/test.yml b/tests/modules/gatk4/reblockgvcf/test.yml new file mode 100644 index 00000000..623f58a6 --- /dev/null +++ b/tests/modules/gatk4/reblockgvcf/test.yml @@ -0,0 +1,26 @@ +- name: gatk4 reblockgvcf test_gatk4_reblockgvcf + command: nextflow run ./tests/modules/gatk4/reblockgvcf -entry test_gatk4_reblockgvcf -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/reblockgvcf/nextflow.config + tags: + - gatk4/reblockgvcf + - gatk4 + files: + - path: output/gatk4/test.rb.g.vcf.gz + - path: output/gatk4/test.rb.g.vcf.gz.tbi + +- name: gatk4 reblockgvcf test_gatk4_reblockgvcf_intervals + command: nextflow run ./tests/modules/gatk4/reblockgvcf -entry test_gatk4_reblockgvcf_intervals -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/reblockgvcf/nextflow.config + tags: + - gatk4/reblockgvcf + - gatk4 + files: + - path: output/gatk4/test.rb.g.vcf.gz + - path: output/gatk4/test.rb.g.vcf.gz.tbi + +- name: gatk4 reblockgvcf test_gatk4_reblockgvcf_dbsnp + command: nextflow run ./tests/modules/gatk4/reblockgvcf -entry test_gatk4_reblockgvcf_dbsnp -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/reblockgvcf/nextflow.config + tags: + - gatk4/reblockgvcf + - gatk4 + files: + - path: output/gatk4/test.rb.g.vcf.gz + - path: output/gatk4/test.rb.g.vcf.gz.tbi diff --git a/tests/modules/sexdeterrmine/main.nf b/tests/modules/sexdeterrmine/main.nf new file mode 100644 index 00000000..fc777905 --- /dev/null +++ b/tests/modules/sexdeterrmine/main.nf @@ -0,0 +1,16 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { SAMTOOLS_DEPTH } from '../../../modules/samtools/depth/main.nf' +include { SEXDETERRMINE } from '../../../modules/sexdeterrmine/main.nf' + +workflow test_sexdeterrmine { + + input = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['homo_sapiens']['illumina']['test3_single_end_markduplicates_sorted_bam'], checkIfExists: true) ] + + SAMTOOLS_DEPTH ( input ) + SEXDETERRMINE ( SAMTOOLS_DEPTH.out.tsv, [] ) +} diff --git a/tests/modules/sexdeterrmine/nextflow.config b/tests/modules/sexdeterrmine/nextflow.config new file mode 100644 index 00000000..4ba3dfe3 --- /dev/null +++ b/tests/modules/sexdeterrmine/nextflow.config @@ -0,0 +1,12 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + + withName:SAMTOOLS_DEPTH { + ext.args = "-H" + } + + withName:SEXDETERRMINE { + ext.prefix = { "${meta.id}_sexdet" } + } +} diff --git a/tests/modules/sexdeterrmine/test.yml b/tests/modules/sexdeterrmine/test.yml new file mode 100644 index 00000000..bb3f701e --- /dev/null +++ b/tests/modules/sexdeterrmine/test.yml @@ -0,0 +1,15 @@ +- name: sexdeterrmine test_sexdeterrmine + command: nextflow run tests/modules/sexdeterrmine -entry test_sexdeterrmine -c tests/config/nextflow.config + tags: + - sexdeterrmine + files: + - path: output/samtools/test.tsv + md5sum: fa2992ca1ea93a6e1b3e838476191935 + - path: output/samtools/versions.yml + md5sum: dbd04b700335c8ad236bd667254c8dd8 + - path: output/sexdeterrmine/sexdeterrmine.json + md5sum: bafb2419bb8630eda29a251c20e97166 + - path: output/sexdeterrmine/test_sexdet.tsv + md5sum: 1cf8a2b97b38353eb97a96ab872dcca9 + - path: output/sexdeterrmine/versions.yml + md5sum: 077361101e8e7997aec3da8a01e59eee diff --git a/tests/test_versions_yml.py b/tests/test_versions_yml.py index 5d0bb39e..7da6707d 100644 --- a/tests/test_versions_yml.py +++ b/tests/test_versions_yml.py @@ -16,9 +16,9 @@ def _get_workflow_names(): # test_config = yaml.safe_load(f.read_text()) test_config = yaml.load(f.read_text(), Loader=yaml.BaseLoader) for workflow in test_config: - # https://github.com/nf-core/modules/pull/1242 - added to cover tests + # https://github.com/nf-core/modules/pull/1242 - added to cover tests # that expect an error and therefore will not generate a versions.yml - if 'exit_code' not in workflow: + if 'exit_code' not in workflow: yield workflow["name"] @@ -56,5 +56,5 @@ def test_ensure_valid_version_yml(workflow_dir): assert len(software_versions), "There must be at least one version emitted." for tool, version in software_versions.items(): assert re.match( - r"^\d+.*", str(version) - ), f"Version number for {tool} must start with a number. " + r"^\d.*|^[a-f0-9]{40}$", str(version) + ), f"Version number for {tool} must start with a number, or be a Git SHA commit id. "