From 409af2f27cbe45109acc7fee70718d2bf20aa449 Mon Sep 17 00:00:00 2001 From: "Maxime U. Garcia" Date: Tue, 12 Apr 2022 17:15:39 +0200 Subject: [PATCH] Improve syntax/logic coherence in all gatk4 plugins (#1459) * feat: code polishing * Apply suggestions from code review Co-authored-by: FriederikeHanssen * code polishing * more code polishing * code polishing * tests for applybqsrspark * fix typo * no need to check md5sum for versions.yml * fix: use correct syntax * code polishing again * add tests for markduplicatesspark * simplify mergevcfs tests * add tests for baserecalibratorspark * fix: path to entry * code polishing * fix linting * simplify module * update meta.yml * fix pair mode * fix: MITO mode * more tests * fix command * bad copy paste * fix typos * fix tests * fix test * update meta.yml * correct versions.yml in all test.yml * code polishing * code polishing * more code polishing * fix args * add tmpdir for all Co-authored-by: FriederikeHanssen --- modules/gatk4/applybqsr/main.nf | 15 +- modules/gatk4/applybqsr/meta.yml | 4 + modules/gatk4/applybqsrspark/main.nf | 51 +++++++ modules/gatk4/applybqsrspark/meta.yml | 72 ++++++++++ modules/gatk4/applyvqsr/main.nf | 23 ++-- modules/gatk4/applyvqsr/meta.yml | 8 +- modules/gatk4/baserecalibrator/main.nf | 33 +++-- modules/gatk4/baserecalibrator/meta.yml | 10 +- modules/gatk4/baserecalibratorspark/main.nf | 53 +++++++ modules/gatk4/baserecalibratorspark/meta.yml | 72 ++++++++++ modules/gatk4/bedtointervallist/main.nf | 10 +- modules/gatk4/calculatecontamination/main.nf | 11 +- modules/gatk4/calculatecontamination/meta.yml | 6 +- modules/gatk4/combinegvcfs/main.nf | 24 ++-- modules/gatk4/combinegvcfs/meta.yml | 32 +++-- .../gatk4/createsequencedictionary/main.nf | 9 +- .../gatk4/createsomaticpanelofnormals/main.nf | 17 +-- .../createsomaticpanelofnormals/meta.yml | 2 +- .../gatk4/estimatelibrarycomplexity/main.nf | 24 ++-- .../gatk4/estimatelibrarycomplexity/meta.yml | 3 +- modules/gatk4/fastqtosam/main.nf | 10 +- modules/gatk4/fastqtosam/meta.yml | 8 +- modules/gatk4/filtermutectcalls/main.nf | 39 +++--- modules/gatk4/filtermutectcalls/meta.yml | 13 +- modules/gatk4/gatherbqsrreports/main.nf | 15 +- modules/gatk4/gatherbqsrreports/meta.yml | 10 +- modules/gatk4/gatherpileupsummaries/main.nf | 17 +-- modules/gatk4/gatherpileupsummaries/meta.yml | 9 +- modules/gatk4/genomicsdbimport/main.nf | 37 ++--- modules/gatk4/genotypegvcfs/main.nf | 29 ++-- modules/gatk4/genotypegvcfs/meta.yml | 20 ++- modules/gatk4/getpileupsummaries/main.nf | 27 ++-- modules/gatk4/haplotypecaller/main.nf | 35 +++-- modules/gatk4/indexfeaturefile/main.nf | 9 +- modules/gatk4/intervallisttobed/main.nf | 5 +- modules/gatk4/intervallisttools/main.nf | 13 +- .../gatk4/learnreadorientationmodel/main.nf | 12 +- modules/gatk4/markduplicates/main.nf | 10 +- modules/gatk4/markduplicates/meta.yml | 1 + modules/gatk4/markduplicatesspark/main.nf | 50 +++++++ modules/gatk4/markduplicatesspark/meta.yml | 60 ++++++++ modules/gatk4/mergebamalignment/main.nf | 10 +- modules/gatk4/mergemutectstats/main.nf | 8 +- modules/gatk4/mergevcfs/main.nf | 20 ++- modules/gatk4/mutect2/main.nf | 44 ++---- modules/gatk4/mutect2/meta.yml | 16 --- modules/gatk4/revertsam/main.nf | 6 +- modules/gatk4/samtofastq/main.nf | 6 +- modules/gatk4/selectvariants/main.nf | 6 +- modules/gatk4/splitncigarreads/main.nf | 8 +- modules/gatk4/variantfiltration/main.nf | 10 +- modules/gatk4/variantrecalibrator/main.nf | 23 ++-- tests/modules/gatk4/applybqsr/test.yml | 3 - tests/modules/gatk4/applybqsrspark/main.nf | 47 +++++++ .../gatk4/applybqsrspark/nextflow.config | 5 + tests/modules/gatk4/applybqsrspark/test.yml | 29 ++++ tests/modules/gatk4/applyvqsr/test.yml | 2 - tests/modules/gatk4/baserecalibrator/test.yml | 4 + .../gatk4/baserecalibratorspark/main.nf | 69 ++++++++++ .../baserecalibratorspark/nextflow.config | 5 + .../gatk4/baserecalibratorspark/test.yml | 39 ++++++ .../modules/gatk4/bedtointervallist/test.yml | 1 + .../gatk4/calculatecontamination/main.nf | 15 +- .../calculatecontamination/nextflow.config | 4 + .../gatk4/calculatecontamination/test.yml | 3 - tests/modules/gatk4/combinegvcfs/test.yml | 1 - .../gatk4/createsequencedictionary/test.yml | 1 + .../createsomaticpanelofnormals/test.yml | 1 + .../gatk4/estimatelibrarycomplexity/test.yml | 1 + tests/modules/gatk4/fastqtosam/test.yml | 2 - .../modules/gatk4/filtermutectcalls/test.yml | 3 + .../modules/gatk4/gatherbqsrreports/test.yml | 2 - .../gatk4/gatherpileupsummaries/test.yml | 1 + tests/modules/gatk4/genomicsdbimport/test.yml | 9 -- tests/modules/gatk4/genotypegvcfs/main.nf | 129 +++++++++--------- tests/modules/gatk4/genotypegvcfs/test.yml | 9 ++ .../modules/gatk4/getpileupsummaries/test.yml | 3 - tests/modules/gatk4/haplotypecaller/test.yml | 3 + tests/modules/gatk4/indexfeaturefile/test.yml | 4 +- .../modules/gatk4/intervallisttobed/test.yml | 1 + .../modules/gatk4/intervallisttools/test.yml | 1 + .../gatk4/learnreadorientationmodel/test.yml | 1 + .../gatk4/markduplicates/nextflow.config | 4 + tests/modules/gatk4/markduplicates/test.yml | 2 - .../modules/gatk4/markduplicatesspark/main.nf | 28 ++++ .../gatk4/markduplicatesspark/nextflow.config | 5 + .../gatk4/markduplicatesspark/test.yml | 25 ++++ .../modules/gatk4/mergebamalignment/test.yml | 1 + tests/modules/gatk4/mergemutectstats/test.yml | 1 + tests/modules/gatk4/mergevcfs/main.nf | 24 ++-- tests/modules/gatk4/mergevcfs/test.yml | 6 +- tests/modules/gatk4/mutect2/main.nf | 81 +++++------ tests/modules/gatk4/mutect2/nextflow.config | 12 ++ tests/modules/gatk4/mutect2/test.yml | 16 +++ tests/modules/gatk4/revertsam/test.yml | 1 + tests/modules/gatk4/samtofastq/test.yml | 2 + tests/modules/gatk4/selectvariants/test.yml | 2 - tests/modules/gatk4/splitncigarreads/test.yml | 1 - .../modules/gatk4/variantfiltration/test.yml | 2 + .../modules/gatk4/variantrecalibrator/main.nf | 88 ++++++------ .../gatk4/variantrecalibrator/nextflow.config | 1 + .../gatk4/variantrecalibrator/test.yml | 2 + 102 files changed, 1209 insertions(+), 558 deletions(-) create mode 100644 modules/gatk4/applybqsrspark/main.nf create mode 100644 modules/gatk4/applybqsrspark/meta.yml create mode 100644 modules/gatk4/baserecalibratorspark/main.nf create mode 100644 modules/gatk4/baserecalibratorspark/meta.yml create mode 100644 modules/gatk4/markduplicatesspark/main.nf create mode 100644 modules/gatk4/markduplicatesspark/meta.yml create mode 100644 tests/modules/gatk4/applybqsrspark/main.nf create mode 100644 tests/modules/gatk4/applybqsrspark/nextflow.config create mode 100644 tests/modules/gatk4/applybqsrspark/test.yml create mode 100644 tests/modules/gatk4/baserecalibratorspark/main.nf create mode 100644 tests/modules/gatk4/baserecalibratorspark/nextflow.config create mode 100644 tests/modules/gatk4/baserecalibratorspark/test.yml create mode 100644 tests/modules/gatk4/markduplicatesspark/main.nf create mode 100644 tests/modules/gatk4/markduplicatesspark/nextflow.config create mode 100644 tests/modules/gatk4/markduplicatesspark/test.yml diff --git a/modules/gatk4/applybqsr/main.nf b/modules/gatk4/applybqsr/main.nf index 851afc04..7a64dab2 100644 --- a/modules/gatk4/applybqsr/main.nf +++ b/modules/gatk4/applybqsr/main.nf @@ -14,9 +14,9 @@ process GATK4_APPLYBQSR { path dict output: - tuple val(meta), path("*.bam"), emit: bam, optional: true + tuple val(meta), path("*.bam") , emit: bam, optional: true tuple val(meta), path("*.cram"), emit: cram, optional: true - path "versions.yml" , emit: versions + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -24,8 +24,7 @@ process GATK4_APPLYBQSR { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def interval = intervals ? "-L ${intervals}" : "" - def file_type = input.getExtension() + def interval_command = intervals ? "--intervals $intervals" : "" def avail_mem = 3 if (!task.memory) { @@ -35,12 +34,12 @@ process GATK4_APPLYBQSR { } """ gatk --java-options "-Xmx${avail_mem}g" ApplyBQSR \\ - -R $fasta \\ - -I $input \\ + --input $input \\ + --output ${prefix}.${input.getExtension()} \\ + --reference $fasta \\ --bqsr-recal-file $bqsr_table \\ - $interval \\ + $interval_command \\ --tmp-dir . \\ - -O ${prefix}.${file_type} \\ $args cat <<-END_VERSIONS > versions.yml diff --git a/modules/gatk4/applybqsr/meta.yml b/modules/gatk4/applybqsr/meta.yml index 82d3cbf3..3fc93f10 100644 --- a/modules/gatk4/applybqsr/meta.yml +++ b/modules/gatk4/applybqsr/meta.yml @@ -61,6 +61,10 @@ output: type: file description: Recalibrated BAM file pattern: "*.{bam}" + - cram: + type: file + description: Recalibrated CRAM file + pattern: "*.{cram}" authors: - "@yocra3" diff --git a/modules/gatk4/applybqsrspark/main.nf b/modules/gatk4/applybqsrspark/main.nf new file mode 100644 index 00000000..04303c09 --- /dev/null +++ b/modules/gatk4/applybqsrspark/main.nf @@ -0,0 +1,51 @@ +process GATK4_APPLYBQSR_SPARK { + tag "$meta.id" + label 'process_low' + + conda (params.enable_conda ? "bioconda::gatk4=4.2.3.0" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gatk4:4.2.3.0--hdfd78af_0' : + 'quay.io/biocontainers/gatk4:4.2.3.0--hdfd78af_0' }" + + input: + tuple val(meta), path(input), path(input_index), path(bqsr_table), path(intervals) + path fasta + path fai + path dict + + output: + tuple val(meta), path("*.bam") , emit: bam, optional: true + tuple val(meta), path("*.cram"), emit: cram, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def interval_command = intervals ? "--intervals $intervals" : "" + + def avail_mem = 3 + if (!task.memory) { + log.info '[GATK ApplyBQSRSpark] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = task.memory.giga + } + """ + gatk --java-options "-Xmx${avail_mem}g" ApplyBQSRSpark \\ + --input $input \\ + --output ${prefix}.${input.getExtension()} \\ + --reference $fasta \\ + --bqsr-recal-file $bqsr_table \\ + $interval_command \\ + --spark-master local[${task.cpus}] \\ + --tmp-dir . \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/gatk4/applybqsrspark/meta.yml b/modules/gatk4/applybqsrspark/meta.yml new file mode 100644 index 00000000..070b37ab --- /dev/null +++ b/modules/gatk4/applybqsrspark/meta.yml @@ -0,0 +1,72 @@ +name: gatk4_applybqsr_spark +description: Apply base quality score recalibration (BQSR) to a bam file +keywords: + - bqsr + - bam +tools: + - gatk4: + description: | + Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s + doi: 10.1158/1538-7445.AM2017-3590 + licence: ["Apache-2.0"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: BAM/CRAM file from alignment + pattern: "*.{bam,cram}" + - input_index: + type: file + description: BAI/CRAI file from alignment + pattern: "*.{bai,crai}" + - bqsr_table: + type: file + description: Recalibration table from gatk4_baserecalibrator + - intervals: + type: file + description: Bed file with the genomic regions included in the library (optional) + - fasta: + type: file + description: The reference fasta file + pattern: "*.fasta" + - fai: + type: file + description: Index of reference fasta file + pattern: "*.fasta.fai" + - dict: + type: file + description: GATK sequence dictionary + pattern: "*.dict" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - bam: + type: file + description: Recalibrated BAM file + pattern: "*.{bam}" + - cram: + type: file + description: Recalibrated CRAM file + pattern: "*.{cram}" + +authors: + - "@yocra3" + - "@FriederikeHanssen" + - "@maxulysse" diff --git a/modules/gatk4/applyvqsr/main.nf b/modules/gatk4/applyvqsr/main.nf index 3049aa79..8b235809 100644 --- a/modules/gatk4/applyvqsr/main.nf +++ b/modules/gatk4/applyvqsr/main.nf @@ -8,15 +8,15 @@ process GATK4_APPLYVQSR { 'quay.io/biocontainers/gatk4:4.2.5.0--hdfd78af_0' }" input: - tuple val(meta), path(vcf), path(tbi), path(recal), path(recalidx), path(tranches) - path fasta - path fai - path dict + tuple val(meta), path(vcf), path(vcf_tbi), path(recal), path(recal_index), path(tranches) + path fasta + path fai + path dict output: - tuple val(meta), path("*.vcf.gz") , emit: vcf - tuple val(meta), path("*.tbi") , emit: tbi - path "versions.yml" , emit: versions + tuple val(meta), path("*.vcf.gz"), emit: vcf + tuple val(meta), path("*.tbi") , emit: tbi + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -24,7 +24,7 @@ process GATK4_APPLYVQSR { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - refCommand = fasta ? "-R ${fasta} " : '' + def reference_command = fasta ? "--reference $fasta" : '' def avail_mem = 3 if (!task.memory) { @@ -34,11 +34,12 @@ process GATK4_APPLYVQSR { } """ gatk --java-options "-Xmx${avail_mem}g" ApplyVQSR \\ - ${refCommand} \\ - -V ${vcf} \\ - -O ${prefix}.vcf.gz \\ + --variant ${vcf} \\ + --output ${prefix}.vcf.gz \\ + $reference_command \\ --tranches-file $tranches \\ --recal-file $recal \\ + --tmp-dir . \\ $args cat <<-END_VERSIONS > versions.yml diff --git a/modules/gatk4/applyvqsr/meta.yml b/modules/gatk4/applyvqsr/meta.yml index 4a99db45..a05813d1 100644 --- a/modules/gatk4/applyvqsr/meta.yml +++ b/modules/gatk4/applyvqsr/meta.yml @@ -29,20 +29,20 @@ input: type: file description: VCF file to be recalibrated, this should be the same file as used for the first stage VariantRecalibrator. pattern: "*.vcf" - - tbi: + - vcf_tbi: type: file - description: Tbi index for the input vcf file. + description: tabix index for the input vcf file. pattern: "*.vcf.tbi" - recal: type: file description: Recalibration file produced when the input vcf was run through VariantRecalibrator in stage 1. pattern: "*.recal" - - recalidx: + - recal_index: type: file description: Index file for the recalibration file. pattern: ".recal.idx" - tranches: - type: boolean + type: file description: Tranches file produced when the input vcf was run through VariantRecalibrator in stage 1. pattern: ".tranches" - fasta: diff --git a/modules/gatk4/baserecalibrator/main.nf b/modules/gatk4/baserecalibrator/main.nf index ecb41d9b..766a8338 100644 --- a/modules/gatk4/baserecalibrator/main.nf +++ b/modules/gatk4/baserecalibrator/main.nf @@ -9,15 +9,15 @@ process GATK4_BASERECALIBRATOR { input: tuple val(meta), path(input), path(input_index), path(intervals) - path fasta - path fai - path dict - path knownSites - path knownSites_tbi + path fasta + path fai + path dict + path known_sites + path known_sites_tbi output: tuple val(meta), path("*.table"), emit: table - path "versions.yml" , emit: versions + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -25,8 +25,8 @@ process GATK4_BASERECALIBRATOR { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def intervalsCommand = intervals ? "-L ${intervals}" : "" - def sitesCommand = knownSites.collect{"--known-sites ${it}"}.join(' ') + def interval_command = intervals ? "--intervals $intervals" : "" + def sites_command = known_sites.collect{"--known-sites $it"}.join(' ') def avail_mem = 3 if (!task.memory) { @@ -34,16 +34,15 @@ process GATK4_BASERECALIBRATOR { } else { avail_mem = task.memory.giga } - """ - gatk --java-options "-Xmx${avail_mem}g" BaseRecalibrator \ - -R $fasta \ - -I $input \ - $sitesCommand \ - $intervalsCommand \ - --tmp-dir . \ - $args \ - -O ${prefix}.table + gatk --java-options "-Xmx${avail_mem}g" BaseRecalibrator \\ + --input $input \\ + --output ${prefix}.table \\ + --reference $fasta \\ + $interval_command \\ + $sites_command \\ + --tmp-dir . \\ + $args cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/gatk4/baserecalibrator/meta.yml b/modules/gatk4/baserecalibrator/meta.yml index 2e52b8ab..08c1ebbf 100644 --- a/modules/gatk4/baserecalibrator/meta.yml +++ b/modules/gatk4/baserecalibrator/meta.yml @@ -42,9 +42,14 @@ input: type: file description: GATK sequence dictionary pattern: "*.dict" - - knownSites: + - known_sites: type: file - description: Bed file with the genomic regions included in the library (optional) + description: VCF files with known sites for indels / snps (optional) + pattern: "*.vcf.gz" + - known_sites_tbi: + type: file + description: Tabix index of the known_sites (optional) + pattern: "*.vcf.gz.tbi" output: - meta: @@ -64,3 +69,4 @@ output: authors: - "@yocra3" - "@FriederikeHanssen" + - "@maxulysse" diff --git a/modules/gatk4/baserecalibratorspark/main.nf b/modules/gatk4/baserecalibratorspark/main.nf new file mode 100644 index 00000000..70c70181 --- /dev/null +++ b/modules/gatk4/baserecalibratorspark/main.nf @@ -0,0 +1,53 @@ +process GATK4_BASERECALIBRATOR_SPARK { + tag "$meta.id" + label 'process_low' + + conda (params.enable_conda ? "bioconda::gatk4=4.2.3.0" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gatk4:4.2.3.0--hdfd78af_0' : + 'broadinstitute/gatk:4.2.3.0' }" + + input: + tuple val(meta), path(input), path(input_index), path(intervals) + path fasta + path fai + path dict + path known_sites + path known_sites_tbi + + output: + tuple val(meta), path("*.table"), emit: table + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def interval_command = intervals ? "--intervals $intervals" : "" + def sites_command = known_sites.collect{"--known-sites $it"}.join(' ') + + def avail_mem = 3 + if (!task.memory) { + log.info '[GATK BaseRecalibratorSpark] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = task.memory.giga + } + """ + gatk --java-options "-Xmx${avail_mem}g" BaseRecalibratorSpark \\ + --input $input \\ + --output ${prefix}.table \\ + --reference $fasta \\ + $interval_command \\ + $sites_command \\ + --spark-master local[${task.cpus}] \\ + --tmp-dir . \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/gatk4/baserecalibratorspark/meta.yml b/modules/gatk4/baserecalibratorspark/meta.yml new file mode 100644 index 00000000..581c48ef --- /dev/null +++ b/modules/gatk4/baserecalibratorspark/meta.yml @@ -0,0 +1,72 @@ +name: gatk4_baserecalibrator_spark +description: Generate recalibration table for Base Quality Score Recalibration (BQSR) +keywords: + - sort +tools: + - gatk4: + description: | + Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s + doi: 10.1158/1538-7445.AM2017-3590 + licence: ["Apache-2.0"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: BAM/CRAM file from alignment + pattern: "*.{bam,cram}" + - input_index: + type: file + description: BAI/CRAI file from alignment + pattern: "*.{bai,crai}" + - intervals: + type: file + description: Bed file with the genomic regions included in the library (optional) + - fasta: + type: file + description: The reference fasta file + pattern: "*.fasta" + - fai: + type: file + description: Index of reference fasta file + pattern: "*.fasta.fai" + - dict: + type: file + description: GATK sequence dictionary + pattern: "*.dict" + - known_sites: + type: file + description: VCF files with known sites for indels / snps (optional) + pattern: "*.vcf.gz" + - known_sites_tbi: + type: file + description: Tabix index of the known_sites (optional) + pattern: "*.vcf.gz.tbi" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - table: + type: file + description: Recalibration table from BaseRecalibrator + pattern: "*.{table}" + +authors: + - "@yocra3" + - "@FriederikeHanssen" + - "@maxulysse" diff --git a/modules/gatk4/bedtointervallist/main.nf b/modules/gatk4/bedtointervallist/main.nf index c3b624a8..118f535b 100644 --- a/modules/gatk4/bedtointervallist/main.nf +++ b/modules/gatk4/bedtointervallist/main.nf @@ -9,7 +9,7 @@ process GATK4_BEDTOINTERVALLIST { input: tuple val(meta), path(bed) - path sequence_dict + path dict output: tuple val(meta), path('*.interval_list'), emit: interval_list @@ -21,6 +21,7 @@ process GATK4_BEDTOINTERVALLIST { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" + def avail_mem = 3 if (!task.memory) { log.info '[GATK BedToIntervalList] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' @@ -29,9 +30,10 @@ process GATK4_BEDTOINTERVALLIST { } """ gatk --java-options "-Xmx${avail_mem}g" BedToIntervalList \\ - -I $bed \\ - -SD $sequence_dict \\ - -O ${prefix}.interval_list \\ + --INPUT $bed \\ + --OUTPUT ${prefix}.interval_list \\ + --SEQUENCE_DICTIONARY $dict \\ + --TMP_DIR . \\ $args cat <<-END_VERSIONS > versions.yml diff --git a/modules/gatk4/calculatecontamination/main.nf b/modules/gatk4/calculatecontamination/main.nf index 298739ab..197fe6c2 100644 --- a/modules/gatk4/calculatecontamination/main.nf +++ b/modules/gatk4/calculatecontamination/main.nf @@ -9,7 +9,6 @@ process GATK4_CALCULATECONTAMINATION { input: tuple val(meta), path(pileup), path(matched) - val segmentout output: tuple val(meta), path('*.contamination.table'), emit: contamination @@ -22,8 +21,8 @@ process GATK4_CALCULATECONTAMINATION { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def matched_command = matched ? " -matched ${matched} " : '' - def segment_command = segmentout ? " -segments ${prefix}.segmentation.table" : '' + def matched_command = matched ? "--matched-normal $matched" : '' + def avail_mem = 3 if (!task.memory) { log.info '[GATK CalculateContamination] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' @@ -32,10 +31,10 @@ process GATK4_CALCULATECONTAMINATION { } """ gatk --java-options "-Xmx${avail_mem}g" CalculateContamination \\ - -I $pileup \\ + --input $pileup \\ + --output ${prefix}.contamination.table \\ $matched_command \\ - -O ${prefix}.contamination.table \\ - $segment_command \\ + --tmp-dir . \\ $args cat <<-END_VERSIONS > versions.yml diff --git a/modules/gatk4/calculatecontamination/meta.yml b/modules/gatk4/calculatecontamination/meta.yml index e5e870dc..7767bd08 100644 --- a/modules/gatk4/calculatecontamination/meta.yml +++ b/modules/gatk4/calculatecontamination/meta.yml @@ -32,9 +32,6 @@ input: type: file description: File containing the pileups summary table of a normal sample that matches with the tumor sample specified in pileup argument. This is an optional input. pattern: "*.pileups.table" - - segmentout: - type: boolean - description: specifies whether to output the segmentation table. output: - contamination: @@ -43,7 +40,7 @@ output: pattern: "*.contamination.table" - segmentation: type: file - description: optional output table containing segmentation of tumor minor allele fractions. + description: output table containing segmentation of tumor minor allele fractions (optional) pattern: "*.segmentation.table" - versions: type: file @@ -52,3 +49,4 @@ output: authors: - "@GCJMackenzie" + - "@maxulysse" diff --git a/modules/gatk4/combinegvcfs/main.nf b/modules/gatk4/combinegvcfs/main.nf index c0a7ac45..45bf4372 100644 --- a/modules/gatk4/combinegvcfs/main.nf +++ b/modules/gatk4/combinegvcfs/main.nf @@ -9,9 +9,9 @@ process GATK4_COMBINEGVCFS { input: tuple val(meta), path(vcf), path(vcf_idx) - path (fasta) - path (fasta_fai) - path (fasta_dict) + path fasta + path fai + path dict output: tuple val(meta), path("*.combined.g.vcf.gz"), emit: combined_gvcf @@ -23,21 +23,21 @@ process GATK4_COMBINEGVCFS { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def avail_mem = 3 + def input_list = vcf.collect{"--variant $it"}.join(' ') + + def avail_mem = 3 if (!task.memory) { log.info '[GATK COMBINEGVCFS] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' } else { avail_mem = task.memory.giga } - def input_files = vcf.collect{"-V ${it}"}.join(' ') // add '-V' to each vcf file """ - gatk \\ - --java-options "-Xmx${avail_mem}g" \\ - CombineGVCFs \\ - -R ${fasta} \\ - -O ${prefix}.combined.g.vcf.gz \\ - ${args} \\ - ${input_files} + gatk --java-options "-Xmx${avail_mem}g" CombineGVCFs \\ + $input_list \\ + --output ${prefix}.combined.g.vcf.gz \\ + --reference ${fasta} \\ + --tmp-dir . \\ + $args cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/gatk4/combinegvcfs/meta.yml b/modules/gatk4/combinegvcfs/meta.yml index b891de90..9330e084 100644 --- a/modules/gatk4/combinegvcfs/meta.yml +++ b/modules/gatk4/combinegvcfs/meta.yml @@ -19,18 +19,11 @@ tools: licence: ["Apache-2.0"] input: - - fasta: - type: file - description: The reference fasta file - pattern: "*.fasta" - - fai: - type: file - description: FASTA index file - pattern: "*.{fai}" - - dict: - type: file - description: FASTA dictionary file - pattern: "*.{dict}" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] - vcf: type: file description: Compressed VCF files @@ -38,7 +31,19 @@ input: - vcf_idx: type: file description: VCF Index file - pattern: "*.{fai}" + pattern: "*.vcf.gz.idx" + - fasta: + type: file + description: The reference fasta file + pattern: "*.fasta" + - fai: + type: file + description: FASTA index file + pattern: "*.fasta.fai" + - dict: + type: file + description: FASTA dictionary file + pattern: "*.dict" output: - gvcf: type: file @@ -53,3 +58,4 @@ authors: - "@sateeshperi" - "@mjcipriano" - "@hseabolt" + - "@maxulysse" diff --git a/modules/gatk4/createsequencedictionary/main.nf b/modules/gatk4/createsequencedictionary/main.nf index dea77a1d..dbf37048 100644 --- a/modules/gatk4/createsequencedictionary/main.nf +++ b/modules/gatk4/createsequencedictionary/main.nf @@ -11,14 +11,15 @@ process GATK4_CREATESEQUENCEDICTIONARY { path fasta output: - path "*.dict" , emit: dict - path "versions.yml" , emit: versions + path "*.dict" , emit: dict + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when script: def args = task.ext.args ?: '' + def avail_mem = 6 if (!task.memory) { log.info '[GATK CreateSequenceDictionary] Available memory not known - defaulting to 6GB. Specify process memory requirements to change this.' @@ -26,10 +27,10 @@ process GATK4_CREATESEQUENCEDICTIONARY { avail_mem = task.memory.giga } """ - gatk --java-options "-Xmx${avail_mem}g" \\ - CreateSequenceDictionary \\ + gatk --java-options "-Xmx${avail_mem}g" CreateSequenceDictionary \\ --REFERENCE $fasta \\ --URI $fasta \\ + --TMP_DIR . \\ $args cat <<-END_VERSIONS > versions.yml diff --git a/modules/gatk4/createsomaticpanelofnormals/main.nf b/modules/gatk4/createsomaticpanelofnormals/main.nf index c030f4e3..3df29947 100644 --- a/modules/gatk4/createsomaticpanelofnormals/main.nf +++ b/modules/gatk4/createsomaticpanelofnormals/main.nf @@ -9,9 +9,9 @@ process GATK4_CREATESOMATICPANELOFNORMALS { input: tuple val(meta), path(genomicsdb) - path fasta - path fai - path dict + path fasta + path fai + path dict output: tuple val(meta), path("*.vcf.gz"), emit: vcf @@ -24,6 +24,7 @@ process GATK4_CREATESOMATICPANELOFNORMALS { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" + def avail_mem = 3 if (!task.memory) { log.info '[GATK CreateSomaticPanelOfNormals] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' @@ -31,11 +32,11 @@ process GATK4_CREATESOMATICPANELOFNORMALS { avail_mem = task.memory.giga } """ - gatk --java-options "-Xmx${avail_mem}g" \\ - CreateSomaticPanelOfNormals \\ - -R $fasta \\ - -V gendb://$genomicsdb \\ - -O ${prefix}.vcf.gz \\ + gatk --java-options "-Xmx${avail_mem}g" CreateSomaticPanelOfNormals \\ + --variant gendb://$genomicsdb \\ + --output ${prefix}.vcf.gz \\ + --reference $fasta \\ + --tmp-dir . \\ $args cat <<-END_VERSIONS > versions.yml diff --git a/modules/gatk4/createsomaticpanelofnormals/meta.yml b/modules/gatk4/createsomaticpanelofnormals/meta.yml index e450c68a..43e675fb 100644 --- a/modules/gatk4/createsomaticpanelofnormals/meta.yml +++ b/modules/gatk4/createsomaticpanelofnormals/meta.yml @@ -44,7 +44,7 @@ output: pattern: "*.vcf.gz" - tbi: type: file - description: Index of vcf file + description: Tabix index of vcf file pattern: "*vcf.gz.tbi" - versions: type: file diff --git a/modules/gatk4/estimatelibrarycomplexity/main.nf b/modules/gatk4/estimatelibrarycomplexity/main.nf index ba68bf70..caa34630 100644 --- a/modules/gatk4/estimatelibrarycomplexity/main.nf +++ b/modules/gatk4/estimatelibrarycomplexity/main.nf @@ -8,14 +8,14 @@ process GATK4_ESTIMATELIBRARYCOMPLEXITY { 'quay.io/biocontainers/gatk4:4.2.5.0--hdfd78af_0' }" input: - tuple val(meta), path(cram) - path(fasta) - path(fai) - path(dict) + tuple val(meta), path(input) + path fasta + path fai + path dict output: tuple val(meta), path('*.metrics'), emit: metrics - path "versions.yml" , emit: versions + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -23,7 +23,7 @@ process GATK4_ESTIMATELIBRARYCOMPLEXITY { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def crams = cram.collect(){ x -> "-I ".concat(x.toString()) }.join(" ") + def input_list = input.collect(){"--INPUT $it"}.join(" ") def avail_mem = 3 if (!task.memory) { @@ -32,12 +32,12 @@ process GATK4_ESTIMATELIBRARYCOMPLEXITY { avail_mem = task.memory.giga } """ - gatk --java-options "-Xmx${avail_mem}g" EstimateLibraryComplexity \ - ${crams} \ - -O ${prefix}.metrics \ - --REFERENCE_SEQUENCE ${fasta} \ - --VALIDATION_STRINGENCY SILENT \ - --TMP_DIR . $args + gatk --java-options "-Xmx${avail_mem}g" EstimateLibraryComplexity \\ + $input_list \\ + --OUTPUT ${prefix}.metrics \\ + --REFERENCE_SEQUENCE ${fasta} \\ + --TMP_DIR . \\ + $args cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/gatk4/estimatelibrarycomplexity/meta.yml b/modules/gatk4/estimatelibrarycomplexity/meta.yml index 9f2dee60..72a679e9 100644 --- a/modules/gatk4/estimatelibrarycomplexity/meta.yml +++ b/modules/gatk4/estimatelibrarycomplexity/meta.yml @@ -20,7 +20,7 @@ input: description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - cram: + - input: type: file description: BAM/CRAM/SAM file pattern: "*.{bam,cram,sam}" @@ -54,3 +54,4 @@ output: authors: - "@FriederikeHanssen" + - "@maxulysse" diff --git a/modules/gatk4/fastqtosam/main.nf b/modules/gatk4/fastqtosam/main.nf index 0c85a74f..199058d0 100644 --- a/modules/gatk4/fastqtosam/main.nf +++ b/modules/gatk4/fastqtosam/main.nf @@ -20,7 +20,8 @@ process GATK4_FASTQTOSAM { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def read_files = meta.single_end ? "-F1 $reads" : "-F1 ${reads[0]} -F2 ${reads[1]}" + def reads_command = meta.single_end ? "--FASTQ $reads" : "--FASTQ ${reads[0]} --FASTQ2 ${reads[1]}" + def avail_mem = 3 if (!task.memory) { log.info '[GATK FastqToSam] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' @@ -29,9 +30,10 @@ process GATK4_FASTQTOSAM { } """ gatk --java-options "-Xmx${avail_mem}g" FastqToSam \\ - $read_files \\ - -O ${prefix}.bam \\ - -SM $prefix \\ + $reads_command \\ + --OUTPUT ${prefix}.bam \\ + --SAMPLE_NAME $prefix \\ + --TMP_DIR . \\ $args cat <<-END_VERSIONS > versions.yml diff --git a/modules/gatk4/fastqtosam/meta.yml b/modules/gatk4/fastqtosam/meta.yml index 59e305b8..6e5bf1cd 100644 --- a/modules/gatk4/fastqtosam/meta.yml +++ b/modules/gatk4/fastqtosam/meta.yml @@ -34,14 +34,14 @@ output: description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - bam: type: file description: Converted BAM file pattern: "*.bam" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@ntoda03" diff --git a/modules/gatk4/filtermutectcalls/main.nf b/modules/gatk4/filtermutectcalls/main.nf index 77175c7d..c1c82e0b 100644 --- a/modules/gatk4/filtermutectcalls/main.nf +++ b/modules/gatk4/filtermutectcalls/main.nf @@ -8,10 +8,10 @@ process GATK4_FILTERMUTECTCALLS { 'quay.io/biocontainers/gatk4:4.2.5.0--hdfd78af_0' }" input: - tuple val(meta), path(vcf), path(tbi), path(stats), path(orientationbias), path(segmentation), path(contaminationfile), val(contaminationest) - path fasta - path fai - path dict + tuple val(meta), path(vcf), path(vcf_tbi), path(stats), path(orientationbias), path(segmentation), path(table), val(estimate) + path fasta + path fai + path dict output: tuple val(meta), path("*.vcf.gz") , emit: vcf @@ -26,20 +26,11 @@ process GATK4_FILTERMUTECTCALLS { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def orientationbias_options = '' - if (orientationbias) { - orientationbias_options = '--orientation-bias-artifact-priors ' + orientationbias.join(' --orientation-bias-artifact-priors ') - } + def orientationbias_command = orientationbias ? orientationbias.collect{"--orientation-bias-artifact-priors $it"}.join(' ') : '' + def segmentation_command = segmentation ? segmentation.collect{"--tumor-segmentation $it"}.join(' ') : '' + def estimate_command = estimate ? " --contamination-estimate ${estimate} " : '' + def table_command = table ? " --contamination-table ${table} " : '' - def segmentation_options = '' - if (segmentation) { - segmentation_options = '--tumor-segmentation ' + segmentation.join(' --tumor-segmentation ') - } - - def contamination_options = contaminationest ? " --contamination-estimate ${contaminationest} " : '' - if (contaminationfile) { - contamination_options = '--contamination-table ' + contaminationfile.join(' --contamination-table ') - } def avail_mem = 3 if (!task.memory) { log.info '[GATK FilterMutectCalls] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' @@ -48,12 +39,14 @@ process GATK4_FILTERMUTECTCALLS { } """ gatk --java-options "-Xmx${avail_mem}g" FilterMutectCalls \\ - -R $fasta \\ - -V $vcf \\ - $orientationbias_options \\ - $segmentation_options \\ - $contamination_options \\ - -O ${prefix}.vcf.gz \\ + --variant $vcf \\ + --output ${prefix}.vcf.gz \\ + --reference $fasta \\ + $orientationbias_command \\ + $segmentation_command \\ + $estimate_command \\ + $table_command \\ + --tmp-dir . \\ $args cat <<-END_VERSIONS > versions.yml diff --git a/modules/gatk4/filtermutectcalls/meta.yml b/modules/gatk4/filtermutectcalls/meta.yml index 5182c89f..d1972d70 100644 --- a/modules/gatk4/filtermutectcalls/meta.yml +++ b/modules/gatk4/filtermutectcalls/meta.yml @@ -26,9 +26,9 @@ input: type: file description: compressed vcf file of mutect2calls pattern: "*.vcf.gz" - - tbi: + - vcf_tbi: type: file - description: Index of vcf file + description: Tabix index of vcf file pattern: "*vcf.gz.tbi" - stats: type: file @@ -42,13 +42,13 @@ input: type: list description: tables containing segmentation information for input vcf. Optional input. pattern: "*.segmentation.table" - - contaminationfile: + - table: type: list - description: table(s) containing contamination contamination data for input vcf. Optional input, takes priority over contaminationest. + description: table(s) containing contamination data for input vcf. Optional input, takes priority over estimate. pattern: "*.contamination.table" - - contaminationest: + - estimate: type: val - description: estimation of contamination value as a double. Optional input, will only be used if contaminationfile is not specified. + description: estimation of contamination value as a double. Optional input, will only be used if table is not specified. - fasta: type: file description: The reference fasta file @@ -82,3 +82,4 @@ output: authors: - "@GCJMackenzie" + - "@maxulysse" diff --git a/modules/gatk4/gatherbqsrreports/main.nf b/modules/gatk4/gatherbqsrreports/main.nf index 279f1ac8..1f5f2e1b 100644 --- a/modules/gatk4/gatherbqsrreports/main.nf +++ b/modules/gatk4/gatherbqsrreports/main.nf @@ -8,7 +8,7 @@ process GATK4_GATHERBQSRREPORTS { 'quay.io/biocontainers/gatk4:4.2.5.0--hdfd78af_0' }" input: - tuple val(meta), path(recal_table) + tuple val(meta), path(table) output: tuple val(meta), path("*.table"), emit: table @@ -20,7 +20,7 @@ process GATK4_GATHERBQSRREPORTS { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def input = recal_table.collect{"-I ${it}"}.join(' ') + def input_list = table.collect{"--input $it"}.join(' ') def avail_mem = 3 if (!task.memory) { @@ -29,12 +29,11 @@ process GATK4_GATHERBQSRREPORTS { avail_mem = task.memory.giga } """ - gatk --java-options "-Xmx${avail_mem}g" \\ - GatherBQSRReports \ - ${input} \ - --tmp-dir . \ - $args \ - --output ${prefix}.table + gatk --java-options "-Xmx${avail_mem}g" GatherBQSRReports \\ + $input_list \\ + --output ${prefix}.table \\ + --tmp-dir . \\ + $args cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/gatk4/gatherbqsrreports/meta.yml b/modules/gatk4/gatherbqsrreports/meta.yml index 62d008d2..99e74951 100644 --- a/modules/gatk4/gatherbqsrreports/meta.yml +++ b/modules/gatk4/gatherbqsrreports/meta.yml @@ -19,7 +19,7 @@ input: description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - recal_table: + - table: type: file description: File(s) containing BQSR table(s) pattern: "*.table" @@ -30,14 +30,14 @@ output: description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] + - table: + type: file + description: File containing joined BQSR table + pattern: "*.table" - versions: type: file description: File containing software versions pattern: "versions.yml" - - recal_table: - type: file - description: File containing joined BQSR table - pattern: "*.table" authors: - "@FriederikeHanssen" diff --git a/modules/gatk4/gatherpileupsummaries/main.nf b/modules/gatk4/gatherpileupsummaries/main.nf index 52e57127..f5e9cf22 100644 --- a/modules/gatk4/gatherpileupsummaries/main.nf +++ b/modules/gatk4/gatherpileupsummaries/main.nf @@ -10,11 +10,11 @@ process GATK4_GATHERPILEUPSUMMARIES { input: tuple val(meta), path(pileup) - path dict + path dict output: tuple val(meta), path("*.pileupsummaries.table"), emit: table - path "versions.yml" , emit: versions + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -22,7 +22,7 @@ process GATK4_GATHERPILEUPSUMMARIES { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def input = pileup.collect{ "-I ${it} " }.join(' ') + def input_list = pileup.collect{ "--I $it" }.join(' ') def avail_mem = 3 if (!task.memory) { @@ -31,11 +31,12 @@ process GATK4_GATHERPILEUPSUMMARIES { avail_mem = task.memory.giga } """ - gatk --java-options "-Xmx${avail_mem}g" \ - GatherPileupSummaries \ - --sequence-dictionary ${dict} \ - ${input} \ - -O ${prefix}.pileupsummaries.table + gatk --java-options "-Xmx${avail_mem}g" GatherPileupSummaries \\ + $input_list \\ + --O ${prefix}.pileupsummaries.table \\ + --sequence-dictionary $dict \\ + --tmp-dir . \\ + $args cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/gatk4/gatherpileupsummaries/meta.yml b/modules/gatk4/gatherpileupsummaries/meta.yml index 2dc92d55..823ea365 100644 --- a/modules/gatk4/gatherpileupsummaries/meta.yml +++ b/modules/gatk4/gatherpileupsummaries/meta.yml @@ -28,14 +28,15 @@ output: description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] + - table: + type: file + description: pileup summaries table file + pattern: "*.pileupsummaries.table" - versions: type: file description: File containing software versions pattern: "versions.yml" - - table: - type: file - description: Pileup file - pattern: "*.pileups.table" authors: - "@FriederikeHanssen" + - "@maxulysse" diff --git a/modules/gatk4/genomicsdbimport/main.nf b/modules/gatk4/genomicsdbimport/main.nf index d2d89ccc..d2b78899 100644 --- a/modules/gatk4/genomicsdbimport/main.nf +++ b/modules/gatk4/genomicsdbimport/main.nf @@ -8,13 +8,13 @@ process GATK4_GENOMICSDBIMPORT { 'quay.io/biocontainers/gatk4:4.2.5.0--hdfd78af_0' }" input: - tuple val(meta), path(vcf), path(tbi), path(intervalfile), val(intervalval), path(wspace) - val run_intlist - val run_updatewspace - val input_map + tuple val(meta), path(vcf), path(tbi), path(interval_file), val(interval_value), path(wspace) + val run_intlist + val run_updatewspace + val input_map output: - tuple val(meta), path("${prefix}") , optional:true, emit: genomicsdb + tuple val(meta), path("$prefix") , optional:true, emit: genomicsdb tuple val(meta), path("$updated_db") , optional:true, emit: updatedb tuple val(meta), path("*.interval_list"), optional:true, emit: intervallist path "versions.yml" , emit: versions @@ -27,22 +27,22 @@ process GATK4_GENOMICSDBIMPORT { prefix = task.ext.prefix ?: "${meta.id}" // settings for running default create gendb mode - inputs_command = input_map ? "--sample-name-map ${vcf[0]}" : "${'-V ' + vcf.join(' -V ')}" - dir_command = "--genomicsdb-workspace-path ${prefix}" - intervals_command = intervalfile ? " -L ${intervalfile} " : " -L ${intervalval} " + input_command = input_map ? "--sample-name-map ${vcf[0]}" : vcf.collect(){"--variant $it"}.join(' ') + + genomicsdb_command = "--genomicsdb-workspace-path ${prefix}" + interval_command = interval_file ? "--intervals ${interval_file}" : "--intervals ${interval_value}" // settings changed for running get intervals list mode if run_intlist is true if (run_intlist) { - inputs_command = '' - dir_command = "--genomicsdb-update-workspace-path ${wspace}" - intervals_command = "--output-interval-list-to-file ${prefix}.interval_list" + genomicsdb_command = "--genomicsdb-update-workspace-path ${wspace}" + interval_command = "--output-interval-list-to-file ${prefix}.interval_list" } - // settings changed for running update gendb mode. inputs_command same as default, update_db forces module to emit the updated gendb + // settings changed for running update gendb mode. input_command same as default, update_db forces module to emit the updated gendb if (run_updatewspace) { - dir_command = "--genomicsdb-update-workspace-path ${wspace}" - intervals_command = '' - updated_db = wspace.toString() + genomicsdb_command = "--genomicsdb-update-workspace-path ${wspace}" + interval_command = '' + updated_db = "${wspace}" } def avail_mem = 3 @@ -53,9 +53,10 @@ process GATK4_GENOMICSDBIMPORT { } """ gatk --java-options "-Xmx${avail_mem}g" GenomicsDBImport \\ - $inputs_command \\ - $dir_command \\ - $intervals_command \\ + $input_command \\ + $genomicsdb_command \\ + $interval_command \\ + --tmp-dir . \\ $args cat <<-END_VERSIONS > versions.yml diff --git a/modules/gatk4/genotypegvcfs/main.nf b/modules/gatk4/genotypegvcfs/main.nf index 4a42ad0a..0df88d66 100644 --- a/modules/gatk4/genotypegvcfs/main.nf +++ b/modules/gatk4/genotypegvcfs/main.nf @@ -10,10 +10,10 @@ process GATK4_GENOTYPEGVCFS { input: tuple val(meta), path(gvcf), path(gvcf_index), path(intervals), path(intervals_index) path fasta - path fasta_index - path fasta_dict + path fai + path dict path dbsnp - path dbsnp_index + path dbsnp_tbi output: tuple val(meta), path("*.vcf.gz"), emit: vcf @@ -26,9 +26,10 @@ process GATK4_GENOTYPEGVCFS { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def dbsnp_options = dbsnp ? "-D ${dbsnp}" : "" - def interval_options = intervals ? "-L ${intervals}" : "" - def gvcf_options = gvcf.name.endsWith(".vcf") || gvcf.name.endsWith(".vcf.gz") ? "$gvcf" : "gendb://$gvcf" + def gvcf_command = gvcf.name.endsWith(".vcf") || gvcf.name.endsWith(".vcf.gz") ? "$gvcf" : "gendb://$gvcf" + def dbsnp_command = dbsnp ? "--dbsnp $dbsnp" : "" + def interval_command = intervals ? "--intervals $intervals" : "" + def avail_mem = 3 if (!task.memory) { log.info '[GATK GenotypeGVCFs] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' @@ -36,14 +37,14 @@ process GATK4_GENOTYPEGVCFS { avail_mem = task.memory.giga } """ - gatk --java-options "-Xmx${avail_mem}g" \\ - GenotypeGVCFs \\ - $args \\ - $interval_options \\ - $dbsnp_options \\ - -R $fasta \\ - -V $gvcf_options \\ - -O ${prefix}.vcf.gz + gatk --java-options "-Xmx${avail_mem}g" GenotypeGVCFs \\ + --variant $gvcf_command \\ + --output ${prefix}.vcf.gz \\ + --reference $fasta \\ + $interval_command \\ + $dbsnp_command \\ + --tmp-dir . \\ + $args cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/gatk4/genotypegvcfs/meta.yml b/modules/gatk4/genotypegvcfs/meta.yml index f465f835..7bec10ed 100644 --- a/modules/gatk4/genotypegvcfs/meta.yml +++ b/modules/gatk4/genotypegvcfs/meta.yml @@ -21,10 +21,15 @@ input: Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - gvcf: - type: tuple of files + type: file description: | - Tuple of gVCF(.gz) file (first) and its index (second) or the path to a GenomicsDB (and empty) - pattern: ["*.{vcf,vcf.gz}", "*.{idx,tbi}"] + gVCF(.gz) file or to a GenomicsDB + pattern: "*.{vcf,vcf.gz}" + - gvcf_index: + type: file + description: | + index of gvcf file, or empty when providing GenomicsDB + pattern: "*.{idx,tbi}" - intervals: type: file description: Interval file with the genomic regions included in the library (optional) @@ -35,11 +40,11 @@ input: type: file description: Reference fasta file pattern: "*.fasta" - - fasta_index: + - fai: type: file description: Reference fasta index file pattern: "*.fai" - - fasta_dict: + - dict: type: file description: Reference fasta sequence dict file pattern: "*.dict" @@ -47,8 +52,8 @@ input: type: file description: dbSNP VCF file pattern: "*.vcf.gz" - - dbsnp_index: - type: tuple of files + - dbsnp_tbi: + type: file description: dbSNP VCF index file pattern: "*.tbi" @@ -73,3 +78,4 @@ output: authors: - "@santiagorevale" + - "@maxulysse" diff --git a/modules/gatk4/getpileupsummaries/main.nf b/modules/gatk4/getpileupsummaries/main.nf index 5395c068..c0946f71 100644 --- a/modules/gatk4/getpileupsummaries/main.nf +++ b/modules/gatk4/getpileupsummaries/main.nf @@ -9,15 +9,15 @@ process GATK4_GETPILEUPSUMMARIES { input: tuple val(meta), path(input), path(index), path(intervals) - path fasta - path fai - path dict - path variants - path variants_tbi + path fasta + path fai + path dict + path variants + path variants_tbi output: tuple val(meta), path('*.pileups.table'), emit: table - path "versions.yml" , emit: versions + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -25,8 +25,8 @@ process GATK4_GETPILEUPSUMMARIES { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def sitesCommand = intervals ? " -L ${intervals} " : " -L ${variants} " - def reference = fasta ? " -R ${fasta}" :"" + def interval_command = intervals ? "--intervals $intervals" : "" + def reference_command = fasta ? "--reference $fasta" : '' def avail_mem = 3 if (!task.memory) { @@ -36,11 +36,12 @@ process GATK4_GETPILEUPSUMMARIES { } """ gatk --java-options "-Xmx${avail_mem}g" GetPileupSummaries \\ - -I $input \\ - -V $variants \\ - $sitesCommand \\ - ${reference} \\ - -O ${prefix}.pileups.table \\ + --input $input \\ + --variant $variants \\ + --output ${prefix}.pileups.table \\ + $reference_command \\ + $sites_command \\ + --tmp-dir . \\ $args cat <<-END_VERSIONS > versions.yml diff --git a/modules/gatk4/haplotypecaller/main.nf b/modules/gatk4/haplotypecaller/main.nf index 33871fcf..57f69ecd 100644 --- a/modules/gatk4/haplotypecaller/main.nf +++ b/modules/gatk4/haplotypecaller/main.nf @@ -9,11 +9,11 @@ process GATK4_HAPLOTYPECALLER { input: tuple val(meta), path(input), path(input_index), path(intervals) - path fasta - path fai - path dict - path dbsnp - path dbsnp_tbi + path fasta + path fai + path dict + path dbsnp + path dbsnp_tbi output: tuple val(meta), path("*.vcf.gz"), emit: vcf @@ -26,25 +26,24 @@ process GATK4_HAPLOTYPECALLER { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def interval_option = intervals ? "-L ${intervals}" : "" - def dbsnp_option = dbsnp ? "-D ${dbsnp}" : "" - def avail_mem = 3 + def dbsnp_command = dbsnp ? "--dbsnp $dbsnp" : "" + def interval_command = intervals ? "--intervals $intervals" : "" + + def avail_mem = 3 if (!task.memory) { log.info '[GATK HaplotypeCaller] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' } else { avail_mem = task.memory.giga } """ - gatk \\ - --java-options "-Xmx${avail_mem}g" \\ - HaplotypeCaller \\ - -R $fasta \\ - -I $input \\ - ${dbsnp_option} \\ - ${interval_option} \\ - -O ${prefix}.vcf.gz \\ - $args \\ - --tmp-dir . + gatk --java-options "-Xmx${avail_mem}g" HaplotypeCaller \\ + --input $input \\ + --output ${prefix}.vcf.gz \\ + --reference $fasta \\ + $dbsnp_command \\ + $interval_command \\ + --tmp-dir . \\ + $args cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/gatk4/indexfeaturefile/main.nf b/modules/gatk4/indexfeaturefile/main.nf index 275e51f5..90ff94e6 100644 --- a/modules/gatk4/indexfeaturefile/main.nf +++ b/modules/gatk4/indexfeaturefile/main.nf @@ -19,6 +19,7 @@ process GATK4_INDEXFEATUREFILE { script: def args = task.ext.args ?: '' + def avail_mem = 3 if (!task.memory) { log.info '[GATK IndexFeatureFile] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' @@ -26,10 +27,10 @@ process GATK4_INDEXFEATUREFILE { avail_mem = task.memory.giga } """ - gatk --java-options "-Xmx${avail_mem}g" \\ - IndexFeatureFile \\ - $args \\ - -I $feature_file + gatk --java-options "-Xmx${avail_mem}g" IndexFeatureFile \\ + --input $feature_file \\ + --tmp-dir . \\ + $args cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/gatk4/intervallisttobed/main.nf b/modules/gatk4/intervallisttobed/main.nf index 24d20be1..c0f9df63 100644 --- a/modules/gatk4/intervallisttobed/main.nf +++ b/modules/gatk4/intervallisttobed/main.nf @@ -8,7 +8,7 @@ process GATK4_INTERVALLISTTOBED { 'quay.io/biocontainers/gatk4:4.2.5.0--hdfd78af_0' }" input: - tuple val(meta), path(interval) + tuple val(meta), path(intervals) output: tuple val(meta), path("*.bed"), emit: bed @@ -29,8 +29,9 @@ process GATK4_INTERVALLISTTOBED { } """ gatk --java-options "-Xmx${avail_mem}g" IntervalListToBed \\ - --INPUT ${interval} \\ + --INPUT $intervals \\ --OUTPUT ${prefix}.bed \\ + --TMP_DIR . \\ $args cat <<-END_VERSIONS > versions.yml diff --git a/modules/gatk4/intervallisttools/main.nf b/modules/gatk4/intervallisttools/main.nf index 82c3222c..1b9b37f4 100644 --- a/modules/gatk4/intervallisttools/main.nf +++ b/modules/gatk4/intervallisttools/main.nf @@ -8,11 +8,11 @@ process GATK4_INTERVALLISTTOOLS { 'quay.io/biocontainers/gatk4:4.2.5.0--hdfd78af_0' }" input: - tuple val(meta), path(interval_list) + tuple val(meta), path(intervals) output: tuple val(meta), path("*_split/*/*.interval_list"), emit: interval_list - path "versions.yml" , emit: versions + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -20,6 +20,7 @@ process GATK4_INTERVALLISTTOOLS { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" + def avail_mem = 3 if (!task.memory) { log.info '[GATK IntervalListTools] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' @@ -30,10 +31,10 @@ process GATK4_INTERVALLISTTOOLS { mkdir ${prefix}_split - gatk --java-options "-Xmx${avail_mem}g" \\ - IntervalListTools \\ - -I ${interval_list} \\ - -O ${prefix}_split \\ + gatk --java-options "-Xmx${avail_mem}g" IntervalListTools \\ + --INPUT $intervals \\ + --OUTPUT ${prefix}_split \\ + --TMP_DIR . \\ $args python3 < inputs_list.add(" -I " + a) } + def input_list = f1r2.collect{"--input $it"}.join(' ') + def avail_mem = 3 if (!task.memory) { log.info '[GATK LearnReadOrientationModel] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' @@ -29,10 +29,10 @@ process GATK4_LEARNREADORIENTATIONMODEL { avail_mem = task.memory.giga } """ - gatk --java-options "-Xmx${avail_mem}g" \\ - LearnReadOrientationModel \\ - ${inputs_list.join(' ')} \\ - -O ${prefix}.tar.gz \\ + gatk --java-options "-Xmx${avail_mem}g" LearnReadOrientationModel \\ + $input_list \\ + --output ${prefix}.tar.gz \\ + --tmp-dir . \\ $args cat <<-END_VERSIONS > versions.yml diff --git a/modules/gatk4/markduplicates/main.nf b/modules/gatk4/markduplicates/main.nf index 6b150655..e8a98156 100644 --- a/modules/gatk4/markduplicates/main.nf +++ b/modules/gatk4/markduplicates/main.nf @@ -8,7 +8,7 @@ process GATK4_MARKDUPLICATES { 'quay.io/biocontainers/gatk4:4.2.5.0--hdfd78af_0' }" input: - tuple val(meta), path(bams) + tuple val(meta), path(bam) output: tuple val(meta), path("*.bam") , emit: bam @@ -22,7 +22,8 @@ process GATK4_MARKDUPLICATES { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def bam_list = bams.collect(){ bam -> "--INPUT ".concat(bam.toString()) }.join(" ") + def input_list = bam.collect{"--INPUT $it"}.join(' ') + def avail_mem = 3 if (!task.memory) { log.info '[GATK MarkDuplicates] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' @@ -31,11 +32,10 @@ process GATK4_MARKDUPLICATES { } """ gatk --java-options "-Xmx${avail_mem}g" MarkDuplicates \\ - $bam_list \\ + $input_list \\ + --OUTPUT ${prefix}.bam \\ --METRICS_FILE ${prefix}.metrics \\ --TMP_DIR . \\ - --CREATE_INDEX true \\ - --OUTPUT ${prefix}.bam \\ $args cat <<-END_VERSIONS > versions.yml diff --git a/modules/gatk4/markduplicates/meta.yml b/modules/gatk4/markduplicates/meta.yml index a7dbe8ec..93877f47 100644 --- a/modules/gatk4/markduplicates/meta.yml +++ b/modules/gatk4/markduplicates/meta.yml @@ -49,3 +49,4 @@ output: authors: - "@ajodeh-juma" - "@FriederikeHanssen" + - "@maxulysse" diff --git a/modules/gatk4/markduplicatesspark/main.nf b/modules/gatk4/markduplicatesspark/main.nf new file mode 100644 index 00000000..01a19e5c --- /dev/null +++ b/modules/gatk4/markduplicatesspark/main.nf @@ -0,0 +1,50 @@ +process GATK4_MARKDUPLICATES_SPARK { + tag "$meta.id" + label 'process_high' + + conda (params.enable_conda ? "bioconda::gatk4=4.2.3.0" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gatk4:4.2.3.0--hdfd78af_0' : + 'broadinstitute/gatk:4.2.3.0' }" + + input: + tuple val(meta), path(bam) + path fasta + path fasta_fai + path dict + + output: + tuple val(meta), path("${prefix}"), emit: output + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + def input_list = bam.collect{"--INPUT $it"}.join(' ') + + def avail_mem = 3 + if (!task.memory) { + log.info '[GATK MarkDuplicatesSpark] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = task.memory.giga + } + """ + export SPARK_USER=spark3 + + gatk --java-options "-Xmx${avail_mem}g" MarkDuplicatesSpark \\ + $input_list \\ + --output $prefix \\ + --reference $fasta \\ + --spark-master local[${task.cpus}] \\ + --tmp-dir . \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/gatk4/markduplicatesspark/meta.yml b/modules/gatk4/markduplicatesspark/meta.yml new file mode 100644 index 00000000..bf3e02ba --- /dev/null +++ b/modules/gatk4/markduplicatesspark/meta.yml @@ -0,0 +1,60 @@ +name: gatk4_markduplicates_spark +description: This tool locates and tags duplicate reads in a BAM or SAM file, where duplicate reads are defined as originating from a single fragment of DNA. +keywords: + - markduplicates + - bam + - sort +tools: + - gatk4: + description: + Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/articles/360037052812-MarkDuplicates-Picard- + tool_dev_url: https://github.com/broadinstitute/gatk + doi: 10.1158/1538-7445.AM2017-3590 + licence: ["MIT"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: Sorted BAM file + pattern: "*.{bam}" + - fasta: + type: file + description: The reference fasta file + pattern: "*.fasta" + - fai: + type: file + description: Index of reference fasta file + pattern: "*.fasta.fai" + - dict: + type: file + description: GATK sequence dictionary + pattern: "*.dict" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - bam: + type: file + description: Marked duplicates BAM file + pattern: "*.{bam}" + +authors: + - "@ajodeh-juma" + - "@FriederikeHanssen" + - "@maxulysse" diff --git a/modules/gatk4/mergebamalignment/main.nf b/modules/gatk4/mergebamalignment/main.nf index cfeb23dd..7ba9ccda 100644 --- a/modules/gatk4/mergebamalignment/main.nf +++ b/modules/gatk4/mergebamalignment/main.nf @@ -22,6 +22,7 @@ process GATK4_MERGEBAMALIGNMENT { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" + def avail_mem = 3 if (!task.memory) { log.info '[GATK MergeBamAlignment] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' @@ -30,10 +31,11 @@ process GATK4_MERGEBAMALIGNMENT { } """ gatk --java-options "-Xmx${avail_mem}g" MergeBamAlignment \\ - -ALIGNED $aligned \\ - -UNMAPPED $unmapped \\ - -R $fasta \\ - -O ${prefix}.bam \\ + --UNMAPPED_BAM $unmapped \\ + --ALIGNED_BAM $aligned \\ + --OUTPUT ${prefix}.bam \\ + --REFERENCE_SEQUENCE $fasta \\ + --TMP_DIR . \\ $args cat <<-END_VERSIONS > versions.yml diff --git a/modules/gatk4/mergemutectstats/main.nf b/modules/gatk4/mergemutectstats/main.nf index bb9f91fb..409e06f6 100644 --- a/modules/gatk4/mergemutectstats/main.nf +++ b/modules/gatk4/mergemutectstats/main.nf @@ -9,6 +9,7 @@ process GATK4_MERGEMUTECTSTATS { input: tuple val(meta), path(stats) + output: tuple val(meta), path("*.vcf.gz.stats"), emit: stats path "versions.yml" , emit: versions @@ -19,7 +20,7 @@ process GATK4_MERGEMUTECTSTATS { script: def args = task.ext.args ?: '' prefix = task.ext.prefix ?: "${meta.id}" - def input = stats.collect{ " -stats ${it} "}.join() + def input_list = stats.collect{ "--stats ${it}"}.join(' ') def avail_mem = 3 if (!task.memory) { @@ -29,8 +30,9 @@ process GATK4_MERGEMUTECTSTATS { } """ gatk --java-options "-Xmx${avail_mem}g" MergeMutectStats \\ - ${input} \\ - -output ${meta.id}.vcf.gz.stats \\ + $input_list \\ + --output ${prefix}.vcf.gz.stats \\ + --tmp-dir . \\ $args cat <<-END_VERSIONS > versions.yml diff --git a/modules/gatk4/mergevcfs/main.nf b/modules/gatk4/mergevcfs/main.nf index 54e38667..06ff3acb 100644 --- a/modules/gatk4/mergevcfs/main.nf +++ b/modules/gatk4/mergevcfs/main.nf @@ -8,9 +8,8 @@ process GATK4_MERGEVCFS { 'quay.io/biocontainers/gatk4:4.2.5.0--hdfd78af_0' }" input: - tuple val(meta), path(vcfs) - path ref_dict - val use_ref_dict + tuple val(meta), path(vcf) + path dict output: tuple val(meta), path('*.vcf.gz'), emit: vcf @@ -22,13 +21,9 @@ process GATK4_MERGEVCFS { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" + def input_list = vcf.collect{ "--INPUT $it"}.join(' ') + def reference_command = dict ? "--SEQUENCE_DICTIONARY $dict" : "" - // Make list of VCFs to merge - def input = "" - for (vcf in vcfs) { - input += " I=${vcf}" - } - def ref = use_ref_dict ? "D=${ref_dict}" : "" def avail_mem = 3 if (!task.memory) { log.info '[GATK MergeVcfs] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' @@ -37,9 +32,10 @@ process GATK4_MERGEVCFS { } """ gatk --java-options "-Xmx${avail_mem}g" MergeVcfs \\ - $input \\ - O=${prefix}.vcf.gz \\ - $ref \\ + $input_list \\ + --OUTPUT ${prefix}.vcf.gz \\ + $reference_command \\ + --TMP_DIR . \\ $args cat <<-END_VERSIONS > versions.yml diff --git a/modules/gatk4/mutect2/main.nf b/modules/gatk4/mutect2/main.nf index 568d3393..4a1f5768 100644 --- a/modules/gatk4/mutect2/main.nf +++ b/modules/gatk4/mutect2/main.nf @@ -8,10 +8,7 @@ process GATK4_MUTECT2 { 'quay.io/biocontainers/gatk4:4.2.5.0--hdfd78af_0' }" input: - tuple val(meta) , path(input) , path(input_index) , path(intervals), val(which_norm) - val run_single - val run_pon - val run_mito + tuple val(meta), path(input), path(input_index), path(intervals) path fasta path fai path dict @@ -33,28 +30,10 @@ process GATK4_MUTECT2 { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def panels_command = '' - def normals_command = '' - - def inputs_command = '-I ' + input.join( ' -I ') - def interval = intervals ? "-L ${intervals}" : "" - - if(run_pon) { - panels_command = '' - normals_command = '' - - } else if(run_single) { - panels_command = " --germline-resource $germline_resource --panel-of-normals $panel_of_normals" - normals_command = '' - - } else if(run_mito){ - panels_command = "-L ${intervals} --mitochondria-mode" - normals_command = '' - - } else { - panels_command = " --germline-resource $germline_resource --panel-of-normals $panel_of_normals --f1r2-tar-gz ${prefix}.f1r2.tar.gz" - normals_command = '-normal ' + which_norm.join( ' -normal ') - } + def inputs = input.collect{ "--input $it"}.join(" ") + def interval_command = intervals ? "--intervals $intervals" : "" + def pon_command = panel_of_normals ? "--panel-of-normals $panel_of_normals" : "" + def gr_command = germline_resource ? "--germline-resource $germline_resource" : "" def avail_mem = 3 if (!task.memory) { @@ -64,12 +43,13 @@ process GATK4_MUTECT2 { } """ gatk --java-options "-Xmx${avail_mem}g" Mutect2 \\ - -R ${fasta} \\ - ${inputs_command} \\ - ${normals_command} \\ - ${panels_command} \\ - ${interval} \\ - -O ${prefix}.vcf.gz \\ + $inputs \\ + --output ${prefix}.vcf.gz \\ + --reference $fasta \\ + $pon_command \\ + $gr_command \\ + $interval_command \\ + --tmp-dir . \\ $args cat <<-END_VERSIONS > versions.yml diff --git a/modules/gatk4/mutect2/meta.yml b/modules/gatk4/mutect2/meta.yml index 69a4acfe..aa0a02aa 100644 --- a/modules/gatk4/mutect2/meta.yml +++ b/modules/gatk4/mutect2/meta.yml @@ -34,22 +34,6 @@ input: type: File/string description: Specify region the tools is run on. pattern: ".{bed,interval_list}/chrM" - - which_norm: - type: list - description: optional list of sample headers contained in the normal sample bam files (these are required for tumor_normal_pair mode) - pattern: "testN" - - run_single: - type: boolean - description: Specify whether or not to run in tumor_single mode instead of tumor_normal_pair mode (will be ignored if run_pon is also true) - pattern: "true/false" - - run_pon: - type: boolean - description: Specify whether or not to run in panel_of_normal mode instead of tumor_normal_pair mode - pattern: "true/false" - - run_mito: - type: boolean - description: Specify whether or not to run in mitochondria-mode instead of tumor_normal_pair mode - pattern: "true/false" - fasta: type: file description: The reference fasta file diff --git a/modules/gatk4/revertsam/main.nf b/modules/gatk4/revertsam/main.nf index b3bf9f95..4e8e9ddc 100644 --- a/modules/gatk4/revertsam/main.nf +++ b/modules/gatk4/revertsam/main.nf @@ -20,6 +20,7 @@ process GATK4_REVERTSAM { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" + def avail_mem = 3 if (!task.memory) { log.info '[GATK RevertSam] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' @@ -28,8 +29,9 @@ process GATK4_REVERTSAM { } """ gatk --java-options "-Xmx${avail_mem}g" RevertSam \\ - I=$bam \\ - O=${prefix}.reverted.bam \\ + --INPUT $bam \\ + --OUTPUT ${prefix}.reverted.bam \\ + --TMP_DIR . \\ $args cat <<-END_VERSIONS > versions.yml diff --git a/modules/gatk4/samtofastq/main.nf b/modules/gatk4/samtofastq/main.nf index 53e5013f..8553e419 100644 --- a/modules/gatk4/samtofastq/main.nf +++ b/modules/gatk4/samtofastq/main.nf @@ -20,7 +20,8 @@ process GATK4_SAMTOFASTQ { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def output = meta.single_end ? "FASTQ=${prefix}.fastq.gz" : "FASTQ=${prefix}_1.fastq.gz SECOND_END_FASTQ=${prefix}_2.fastq.gz" + def output = meta.single_end ? "--FASTQ ${prefix}.fastq.gz" : "--FASTQ ${prefix}_1.fastq.gz --SECOND_END_FASTQ ${prefix}_2.fastq.gz" + def avail_mem = 3 if (!task.memory) { log.info '[GATK SamToFastq] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' @@ -29,8 +30,9 @@ process GATK4_SAMTOFASTQ { } """ gatk --java-options "-Xmx${avail_mem}g" SamToFastq \\ - I=$bam \\ + --INPUT $bam \\ $output \\ + --TMP_DIR . \\ $args cat <<-END_VERSIONS > versions.yml diff --git a/modules/gatk4/selectvariants/main.nf b/modules/gatk4/selectvariants/main.nf index fd750a9b..22779211 100644 --- a/modules/gatk4/selectvariants/main.nf +++ b/modules/gatk4/selectvariants/main.nf @@ -21,6 +21,7 @@ process GATK4_SELECTVARIANTS { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" + def avail_mem = 3 if (!task.memory) { log.info '[GATK VariantFiltration] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' @@ -29,8 +30,9 @@ process GATK4_SELECTVARIANTS { } """ gatk --java-options "-Xmx${avail_mem}G" SelectVariants \\ - -V $vcf \\ - -O ${prefix}.selectvariants.vcf.gz \\ + --variant $vcf \\ + --output ${prefix}.selectvariants.vcf.gz \\ + --tmp-dir . \\ $args cat <<-END_VERSIONS > versions.yml diff --git a/modules/gatk4/splitncigarreads/main.nf b/modules/gatk4/splitncigarreads/main.nf index fdd1d974..f7c559d9 100644 --- a/modules/gatk4/splitncigarreads/main.nf +++ b/modules/gatk4/splitncigarreads/main.nf @@ -23,6 +23,7 @@ process GATK4_SPLITNCIGARREADS { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" + def avail_mem = 3 if (!task.memory) { log.info '[GATK SplitNCigarReads] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' @@ -31,9 +32,10 @@ process GATK4_SPLITNCIGARREADS { } """ gatk --java-options "-Xmx${avail_mem}g" SplitNCigarReads \\ - -R $fasta \\ - -I $bam \\ - -O ${prefix}.bam \\ + --input $bam \\ + --output ${prefix}.bam \\ + --reference $fasta \\ + --tmp-dir . \\ $args cat <<-END_VERSIONS > versions.yml diff --git a/modules/gatk4/variantfiltration/main.nf b/modules/gatk4/variantfiltration/main.nf index 68f3d636..6beb87ef 100644 --- a/modules/gatk4/variantfiltration/main.nf +++ b/modules/gatk4/variantfiltration/main.nf @@ -8,7 +8,7 @@ process GATK4_VARIANTFILTRATION { 'quay.io/biocontainers/gatk4:4.2.5.0--hdfd78af_0' }" input: - tuple val(meta), path(vcf), path(vcf_tbi) + tuple val(meta), path(vcf), path(tbi) path fasta path fai path dict @@ -24,6 +24,7 @@ process GATK4_VARIANTFILTRATION { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" + def avail_mem = 3 if (!task.memory) { log.info '[GATK VariantFiltration] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' @@ -32,9 +33,10 @@ process GATK4_VARIANTFILTRATION { } """ gatk --java-options "-Xmx${avail_mem}G" VariantFiltration \\ - -R $fasta \\ - -V $vcf \\ - -O ${prefix}.vcf.gz \\ + --variant $vcf \\ + --output ${prefix}.vcf.gz \\ + --reference $fasta \\ + --tmp-dir . \\ $args cat <<-END_VERSIONS > versions.yml diff --git a/modules/gatk4/variantrecalibrator/main.nf b/modules/gatk4/variantrecalibrator/main.nf index 31c9efbd..cdcc1221 100644 --- a/modules/gatk4/variantrecalibrator/main.nf +++ b/modules/gatk4/variantrecalibrator/main.nf @@ -8,11 +8,11 @@ process GATK4_VARIANTRECALIBRATOR { 'quay.io/biocontainers/gatk4:4.2.5.0--hdfd78af_0' }" input: - tuple val(meta), path(vcf) , path(tbi) - path fasta - path fai - path dict - tuple path(resvcfs), path(restbis), val(reslabels) + tuple val(meta), path(vcf), path(tbi) + tuple path(vcfs), path(tbis), val(labels) + path fasta + path fai + path dict output: tuple val(meta), path("*.recal") , emit: recal @@ -27,8 +27,8 @@ process GATK4_VARIANTRECALIBRATOR { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - refCommand = fasta ? "-R ${fasta} " : '' - resourceCommand = '--resource:' + reslabels.join( ' --resource:') + def reference_command = fasta ? "--reference $fasta " : '' + def resource_command = labels.collect{"--resource:$it"}.join(' ') def avail_mem = 3 if (!task.memory) { @@ -38,11 +38,12 @@ process GATK4_VARIANTRECALIBRATOR { } """ gatk --java-options "-Xmx${avail_mem}g" VariantRecalibrator \\ - ${refCommand} \\ - -V ${vcf} \\ - -O ${prefix}.recal \\ + --variant $vcf \\ + --output ${prefix}.recal \\ --tranches-file ${prefix}.tranches \\ - ${resourceCommand} \\ + $reference_command \\ + $resource_command \\ + --tmp-dir . \\ $args cat <<-END_VERSIONS > versions.yml diff --git a/tests/modules/gatk4/applybqsr/test.yml b/tests/modules/gatk4/applybqsr/test.yml index 4520c34b..eaf1a08e 100644 --- a/tests/modules/gatk4/applybqsr/test.yml +++ b/tests/modules/gatk4/applybqsr/test.yml @@ -7,7 +7,6 @@ - path: output/gatk4/test.bam md5sum: d088422be886dc8507ff97fcc7dd968a - path: output/gatk4/versions.yml - md5sum: d5c6455d8a77aecc63f87c795fc3443e - name: gatk4 applybqsr test_gatk4_applybqsr_intervals command: nextflow run tests/modules/gatk4/applybqsr -entry test_gatk4_applybqsr_intervals -c tests/config/nextflow.config -c ./tests/modules/gatk4/applybqsr/nextflow.config @@ -18,7 +17,6 @@ - path: output/gatk4/test.bam md5sum: 4bfa18d651abd945e240b05e70107716 - path: output/gatk4/versions.yml - md5sum: cb4cb8a62e117b4adc643ae47883d53c - name: gatk4 applybqsr test_gatk4_applybqsr_cram command: nextflow run tests/modules/gatk4/applybqsr -entry test_gatk4_applybqsr_cram -c tests/config/nextflow.config -c ./tests/modules/gatk4/applybqsr/nextflow.config @@ -29,4 +27,3 @@ - path: output/gatk4/test.cram md5sum: 2e0bca197af4f043a4a85152e6edbe04 - path: output/gatk4/versions.yml - md5sum: 1efaa18be943bab4e4c54191d6eaa260 diff --git a/tests/modules/gatk4/applybqsrspark/main.nf b/tests/modules/gatk4/applybqsrspark/main.nf new file mode 100644 index 00000000..ee1f88dd --- /dev/null +++ b/tests/modules/gatk4/applybqsrspark/main.nf @@ -0,0 +1,47 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { GATK4_APPLYBQSR_SPARK } from '../../../../modules/gatk4/applybqsrspark/main.nf' + +workflow test_gatk4_applybqsr_spark { + input = [ [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_baserecalibrator_table'], checkIfExists: true), + [] + ] + fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + fai = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true) + dict = file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true) + + GATK4_APPLYBQSR_SPARK ( input, fasta, fai, dict ) +} + +workflow test_gatk4_applybqsr_spark_intervals { + input = [ [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_baserecalibrator_table'], checkIfExists: true), + file(params.test_data['sarscov2']['genome']['test_bed'], checkIfExists: true) + ] + fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + fai = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true) + dict = file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true) + + GATK4_APPLYBQSR_SPARK ( input, fasta, fai, dict ) +} + +workflow test_gatk4_applybqsr_spark_cram { + input = [ [ id:'test' ], // meta map + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram_crai'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_baserecalibrator_table'], checkIfExists: true), + file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true) + ] + fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) + dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) + + GATK4_APPLYBQSR_SPARK ( input, fasta, fai, dict ) +} diff --git a/tests/modules/gatk4/applybqsrspark/nextflow.config b/tests/modules/gatk4/applybqsrspark/nextflow.config new file mode 100644 index 00000000..8730f1c4 --- /dev/null +++ b/tests/modules/gatk4/applybqsrspark/nextflow.config @@ -0,0 +1,5 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + +} diff --git a/tests/modules/gatk4/applybqsrspark/test.yml b/tests/modules/gatk4/applybqsrspark/test.yml new file mode 100644 index 00000000..d230c000 --- /dev/null +++ b/tests/modules/gatk4/applybqsrspark/test.yml @@ -0,0 +1,29 @@ +- name: gatk4 applybqsr test_gatk4_applybqsr_spark + command: nextflow run tests/modules/gatk4/applybqsrspark -entry test_gatk4_applybqsr_spark -c tests/config/nextflow.config -c ./tests/modules/gatk4/applybqsrspark/nextflow.config + tags: + - gatk4 + - gatk4/applybqsrspark + files: + - path: output/gatk4/test.bam + md5sum: d088422be886dc8507ff97fcc7dd968a + - path: output/gatk4/versions.yml + +- name: gatk4 applybqsr test_gatk4_applybqsr_spark_intervals + command: nextflow run tests/modules/gatk4/applybqsrspark -entry test_gatk4_applybqsr_spark_intervals -c tests/config/nextflow.config -c ./tests/modules/gatk4/applybqsrspark/nextflow.config + tags: + - gatk4 + - gatk4/applybqsrspark + files: + - path: output/gatk4/test.bam + md5sum: 4bfa18d651abd945e240b05e70107716 + - path: output/gatk4/versions.yml + +- name: gatk4 applybqsr test_gatk4_applybqsr_spark_cram + command: nextflow run tests/modules/gatk4/applybqsrspark -entry test_gatk4_applybqsr_spark_cram -c tests/config/nextflow.config -c ./tests/modules/gatk4/applybqsrspark/nextflow.config + tags: + - gatk4 + - gatk4/applybqsrspark + files: + - path: output/gatk4/test.cram + md5sum: 2e0bca197af4f043a4a85152e6edbe04 + - path: output/gatk4/versions.yml diff --git a/tests/modules/gatk4/applyvqsr/test.yml b/tests/modules/gatk4/applyvqsr/test.yml index 7cb91c43..5b367bcc 100644 --- a/tests/modules/gatk4/applyvqsr/test.yml +++ b/tests/modules/gatk4/applyvqsr/test.yml @@ -7,7 +7,6 @@ - path: output/gatk4/test.vcf.gz - path: output/gatk4/test.vcf.gz.tbi - path: output/gatk4/versions.yml - md5sum: ce9c443375683e7f2958fe958759ad29 - name: gatk4 applyvqsr test_gatk4_applyvqsr_allele_specific command: nextflow run tests/modules/gatk4/applyvqsr -entry test_gatk4_applyvqsr_allele_specific -c tests/config/nextflow.config -c ./tests/modules/gatk4/applyvqsr/nextflow.config @@ -18,4 +17,3 @@ - path: output/gatk4/test.vcf.gz - path: output/gatk4/test.vcf.gz.tbi - path: output/gatk4/versions.yml - md5sum: 521353d12d576de2864f1d18a3e54f14 diff --git a/tests/modules/gatk4/baserecalibrator/test.yml b/tests/modules/gatk4/baserecalibrator/test.yml index 163fac08..ec103dd4 100644 --- a/tests/modules/gatk4/baserecalibrator/test.yml +++ b/tests/modules/gatk4/baserecalibrator/test.yml @@ -6,6 +6,7 @@ files: - path: output/gatk4/test.table md5sum: e2e43abdc0c943c1a54dae816d0b9ea7 + - path: output/gatk4/versions.yml - name: gatk4 baserecalibrator test_gatk4_baserecalibrator_cram command: nextflow run ./tests/modules/gatk4/baserecalibrator -entry test_gatk4_baserecalibrator_cram -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/baserecalibrator/nextflow.config @@ -15,6 +16,7 @@ files: - path: output/gatk4/test.table md5sum: 35d89a3811aa31711fc9815b6b80e6ec + - path: output/gatk4/versions.yml - name: gatk4 baserecalibrator test_gatk4_baserecalibrator_intervals command: nextflow run ./tests/modules/gatk4/baserecalibrator -entry test_gatk4_baserecalibrator_intervals -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/baserecalibrator/nextflow.config @@ -24,6 +26,7 @@ files: - path: output/gatk4/test.table md5sum: 9ecb5f00a2229291705addc09c0ec231 + - path: output/gatk4/versions.yml - name: gatk4 baserecalibrator test_gatk4_baserecalibrator_multiple_sites command: nextflow run ./tests/modules/gatk4/baserecalibrator -entry test_gatk4_baserecalibrator_multiple_sites -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/baserecalibrator/nextflow.config @@ -33,3 +36,4 @@ files: - path: output/gatk4/test.table md5sum: e2e43abdc0c943c1a54dae816d0b9ea7 + - path: output/gatk4/versions.yml diff --git a/tests/modules/gatk4/baserecalibratorspark/main.nf b/tests/modules/gatk4/baserecalibratorspark/main.nf new file mode 100644 index 00000000..8419e16b --- /dev/null +++ b/tests/modules/gatk4/baserecalibratorspark/main.nf @@ -0,0 +1,69 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { GATK4_BASERECALIBRATOR_SPARK } from '../../../../modules/gatk4/baserecalibratorspark/main.nf' + +workflow test_gatk4_baserecalibrator_spark { + input = [ [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true), + [] + ] + fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + fai = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true) + dict = file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true) + sites = file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true) + sites_tbi = file(params.test_data['sarscov2']['illumina']['test_vcf_gz_tbi'], checkIfExists: true) + + GATK4_BASERECALIBRATOR_SPARK ( input, fasta, fai, dict, sites, sites_tbi ) +} + +workflow test_gatk4_baserecalibrator_spark_cram { + input = [ [ id:'test' ], // meta map + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram_crai'], checkIfExists: true), + [] + ] + fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) + dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) + sites = file(params.test_data['homo_sapiens']['genome']['dbsnp_146_hg38_vcf_gz'], checkIfExists: true) + sites_tbi = file(params.test_data['homo_sapiens']['genome']['dbsnp_146_hg38_vcf_gz_tbi'], checkIfExists: true) + + GATK4_BASERECALIBRATOR_SPARK ( input, fasta, fai, dict, sites, sites_tbi ) +} + +workflow test_gatk4_baserecalibrator_spark_intervals { + input = [ [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true), + file(params.test_data['sarscov2']['genome']['test_bed'], checkIfExists: true) + ] + fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + fai = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true) + dict = file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true) + sites = file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true) + sites_tbi = file(params.test_data['sarscov2']['illumina']['test_vcf_gz_tbi'], checkIfExists: true) + + GATK4_BASERECALIBRATOR_SPARK ( input, fasta, fai, dict, sites, sites_tbi ) +} + +workflow test_gatk4_baserecalibrator_spark_multiple_sites { + input = [ [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true), + [] + ] + fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + fai = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true) + dict = file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true) + sites = [ file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test2_vcf_gz'], checkIfExists: true) + ] + sites_tbi = [ file(params.test_data['sarscov2']['illumina']['test_vcf_gz_tbi'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test2_vcf_gz_tbi'], checkIfExists: true) + ] + + GATK4_BASERECALIBRATOR_SPARK ( input, fasta, fai, dict, sites, sites_tbi ) +} diff --git a/tests/modules/gatk4/baserecalibratorspark/nextflow.config b/tests/modules/gatk4/baserecalibratorspark/nextflow.config new file mode 100644 index 00000000..8730f1c4 --- /dev/null +++ b/tests/modules/gatk4/baserecalibratorspark/nextflow.config @@ -0,0 +1,5 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + +} diff --git a/tests/modules/gatk4/baserecalibratorspark/test.yml b/tests/modules/gatk4/baserecalibratorspark/test.yml new file mode 100644 index 00000000..6eb9d91d --- /dev/null +++ b/tests/modules/gatk4/baserecalibratorspark/test.yml @@ -0,0 +1,39 @@ +- name: gatk4 baserecalibrator test_gatk4_baserecalibrator_spark + command: nextflow run ./tests/modules/gatk4/baserecalibratorspark -entry test_gatk4_baserecalibrator_spark -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/baserecalibratorspark/nextflow.config + tags: + - gatk4 + - gatk4/baserecalibratorspark + files: + - path: output/gatk4/test.table + md5sum: e2e43abdc0c943c1a54dae816d0b9ea7 + - path: output/gatk4/versions.yml + +- name: gatk4 baserecalibrator test_gatk4_baserecalibrator_spark_cram + command: nextflow run ./tests/modules/gatk4/baserecalibratorspark -entry test_gatk4_baserecalibrator_spark_cram -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/baserecalibratorspark/nextflow.config + tags: + - gatk4 + - gatk4/baserecalibratorspark + files: + - path: output/gatk4/test.table + md5sum: 35d89a3811aa31711fc9815b6b80e6ec + - path: output/gatk4/versions.yml + +- name: gatk4 baserecalibrator test_gatk4_baserecalibrator_spark_intervals + command: nextflow run ./tests/modules/gatk4/baserecalibratorspark -entry test_gatk4_baserecalibrator_spark_intervals -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/baserecalibratorspark/nextflow.config + tags: + - gatk4 + - gatk4/baserecalibratorspark + files: + - path: output/gatk4/test.table + md5sum: 9ecb5f00a2229291705addc09c0ec231 + - path: output/gatk4/versions.yml + +- name: gatk4 baserecalibrator test_gatk4_baserecalibrator_spark_multiple_sites + command: nextflow run ./tests/modules/gatk4/baserecalibratorspark -entry test_gatk4_baserecalibrator_spark_multiple_sites -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/baserecalibratorspark/nextflow.config + tags: + - gatk4 + - gatk4/baserecalibratorspark + files: + - path: output/gatk4/test.table + md5sum: e2e43abdc0c943c1a54dae816d0b9ea7 + - path: output/gatk4/versions.yml diff --git a/tests/modules/gatk4/bedtointervallist/test.yml b/tests/modules/gatk4/bedtointervallist/test.yml index 3482fa6c..d8eade51 100644 --- a/tests/modules/gatk4/bedtointervallist/test.yml +++ b/tests/modules/gatk4/bedtointervallist/test.yml @@ -6,3 +6,4 @@ files: - path: output/gatk4/test.interval_list md5sum: e51101c9357fb2d59fd30e370eefa39c + - path: output/gatk4/versions.yml diff --git a/tests/modules/gatk4/calculatecontamination/main.nf b/tests/modules/gatk4/calculatecontamination/main.nf index 4b659ed3..c6e085b1 100644 --- a/tests/modules/gatk4/calculatecontamination/main.nf +++ b/tests/modules/gatk4/calculatecontamination/main.nf @@ -2,7 +2,8 @@ nextflow.enable.dsl = 2 -include { GATK4_CALCULATECONTAMINATION } from '../../../../modules/gatk4/calculatecontamination/main.nf' +include { GATK4_CALCULATECONTAMINATION } from '../../../../modules/gatk4/calculatecontamination/main.nf' +include { GATK4_CALCULATECONTAMINATION as GATK4_CALCULATECONTAMINATION_SEGMENTATION } from '../../../../modules/gatk4/calculatecontamination/main.nf' workflow test_gatk4_calculatecontamination_tumor_only { @@ -10,9 +11,7 @@ workflow test_gatk4_calculatecontamination_tumor_only { file(params.test_data['homo_sapiens']['illumina']['test2_pileups_table'], checkIfExists: true), [] ] - segmentout = false - - GATK4_CALCULATECONTAMINATION ( input, segmentout ) + GATK4_CALCULATECONTAMINATION ( input ) } workflow test_gatk4_calculatecontamination_matched_pair { @@ -21,9 +20,7 @@ workflow test_gatk4_calculatecontamination_matched_pair { file(params.test_data['homo_sapiens']['illumina']['test2_pileups_table'], checkIfExists: true), file(params.test_data['homo_sapiens']['illumina']['test_pileups_table'], checkIfExists: true) ] - segmentout = false - - GATK4_CALCULATECONTAMINATION ( input, segmentout ) + GATK4_CALCULATECONTAMINATION ( input ) } workflow test_gatk4_calculatecontamination_segmentation { @@ -32,7 +29,5 @@ workflow test_gatk4_calculatecontamination_segmentation { file(params.test_data['homo_sapiens']['illumina']['test2_pileups_table'], checkIfExists: true), file(params.test_data['homo_sapiens']['illumina']['test_pileups_table'], checkIfExists: true) ] - segmentout = true - - GATK4_CALCULATECONTAMINATION ( input, segmentout ) + GATK4_CALCULATECONTAMINATION_SEGMENTATION ( input ) } diff --git a/tests/modules/gatk4/calculatecontamination/nextflow.config b/tests/modules/gatk4/calculatecontamination/nextflow.config index 8730f1c4..3789a000 100644 --- a/tests/modules/gatk4/calculatecontamination/nextflow.config +++ b/tests/modules/gatk4/calculatecontamination/nextflow.config @@ -2,4 +2,8 @@ process { publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + withName: GATK4_CALCULATECONTAMINATION_SEGMENTATION { + ext.args = { "--tumor-segmentation ${meta.id}.segmentation.table" } + } + } diff --git a/tests/modules/gatk4/calculatecontamination/test.yml b/tests/modules/gatk4/calculatecontamination/test.yml index 4cd44281..a00e26f0 100644 --- a/tests/modules/gatk4/calculatecontamination/test.yml +++ b/tests/modules/gatk4/calculatecontamination/test.yml @@ -7,7 +7,6 @@ - path: output/gatk4/test.contamination.table md5sum: 46c708c943b453da89a3da08acfdb2a7 - path: output/gatk4/versions.yml - md5sum: 3da8f1c0de968886330a3f7a3a1c6616 - name: gatk4 calculatecontamination test_gatk4_calculatecontamination_matched_pair command: nextflow run tests/modules/gatk4/calculatecontamination -entry test_gatk4_calculatecontamination_matched_pair -c tests/config/nextflow.config -c ./tests/modules/gatk4/calculatecontamination/nextflow.config @@ -18,7 +17,6 @@ - path: output/gatk4/test.contamination.table md5sum: 46c708c943b453da89a3da08acfdb2a7 - path: output/gatk4/versions.yml - md5sum: 14ab12a71b0c2b87d8cd53639a991b3a - name: gatk4 calculatecontamination test_gatk4_calculatecontamination_segmentation command: nextflow run tests/modules/gatk4/calculatecontamination -entry test_gatk4_calculatecontamination_segmentation -c tests/config/nextflow.config -c ./tests/modules/gatk4/calculatecontamination/nextflow.config @@ -31,4 +29,3 @@ - path: output/gatk4/test.segmentation.table md5sum: f4643d9319bde4efbfbe516d6fb13052 - path: output/gatk4/versions.yml - md5sum: d2e61315de31f512e448f0cb4b77db54 diff --git a/tests/modules/gatk4/combinegvcfs/test.yml b/tests/modules/gatk4/combinegvcfs/test.yml index 54948668..762a72f3 100644 --- a/tests/modules/gatk4/combinegvcfs/test.yml +++ b/tests/modules/gatk4/combinegvcfs/test.yml @@ -7,4 +7,3 @@ - path: output/gatk4/test.combined.g.vcf.gz contains: ["VCFv4.2"] - path: output/gatk4/versions.yml - md5sum: 49d9c467f84b6a99a4da3ef161af26bd diff --git a/tests/modules/gatk4/createsequencedictionary/test.yml b/tests/modules/gatk4/createsequencedictionary/test.yml index 134a9d74..3656e0e2 100644 --- a/tests/modules/gatk4/createsequencedictionary/test.yml +++ b/tests/modules/gatk4/createsequencedictionary/test.yml @@ -6,3 +6,4 @@ files: - path: output/gatk4/genome.dict md5sum: 7362679f176e0f52add03c08f457f646 + - path: output/gatk4/versions.yml diff --git a/tests/modules/gatk4/createsomaticpanelofnormals/test.yml b/tests/modules/gatk4/createsomaticpanelofnormals/test.yml index a0e2bf26..00d8cae9 100644 --- a/tests/modules/gatk4/createsomaticpanelofnormals/test.yml +++ b/tests/modules/gatk4/createsomaticpanelofnormals/test.yml @@ -7,3 +7,4 @@ - path: output/gatk4/test.pon.vcf.gz - path: output/gatk4/test.pon.vcf.gz.tbi md5sum: e7ca7e9fe76ce12198fd54ec9a64fad4 + - path: output/gatk4/versions.yml diff --git a/tests/modules/gatk4/estimatelibrarycomplexity/test.yml b/tests/modules/gatk4/estimatelibrarycomplexity/test.yml index a33e4ec1..cf5d187f 100644 --- a/tests/modules/gatk4/estimatelibrarycomplexity/test.yml +++ b/tests/modules/gatk4/estimatelibrarycomplexity/test.yml @@ -5,3 +5,4 @@ - gatk4 files: - path: output/gatk4/test.metrics + - path: output/gatk4/versions.yml diff --git a/tests/modules/gatk4/fastqtosam/test.yml b/tests/modules/gatk4/fastqtosam/test.yml index 07f9af15..f4b2c561 100644 --- a/tests/modules/gatk4/fastqtosam/test.yml +++ b/tests/modules/gatk4/fastqtosam/test.yml @@ -6,7 +6,6 @@ files: - path: output/gatk4/test.bam - path: output/gatk4/versions.yml - md5sum: 381cdb2496b2fcc7bbc371a6e4156c7e - name: gatk4 fastqtosam test_gatk4_fastqtosam_paired_end command: nextflow run tests/modules/gatk4/fastqtosam -entry test_gatk4_fastqtosam_paired_end -c tests/config/nextflow.config -c ./tests/modules/gatk4/fastqtosam/nextflow.config @@ -16,4 +15,3 @@ files: - path: output/gatk4/test.bam - path: output/gatk4/versions.yml - md5sum: 1d07c90cbd31992c9ba003f02d1b3502 diff --git a/tests/modules/gatk4/filtermutectcalls/test.yml b/tests/modules/gatk4/filtermutectcalls/test.yml index 12cf4e69..6f650f32 100644 --- a/tests/modules/gatk4/filtermutectcalls/test.yml +++ b/tests/modules/gatk4/filtermutectcalls/test.yml @@ -8,6 +8,7 @@ - path: output/gatk4/test.filtered.vcf.gz.filteringStats.tsv md5sum: 55f228e5520c8b9fbac017d3a3a6c5fd - path: output/gatk4/test.filtered.vcf.gz.tbi + - path: output/gatk4/versions.yml - name: gatk4 filtermutectcalls test_gatk4_filtermutectcalls_with_files command: nextflow run ./tests/modules/gatk4/filtermutectcalls -entry test_gatk4_filtermutectcalls_with_files -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/filtermutectcalls/nextflow.config @@ -19,6 +20,7 @@ - path: output/gatk4/test.filtered.vcf.gz.filteringStats.tsv md5sum: 9ae27fbd04af1a2ea574e2ff1c3a683b - path: output/gatk4/test.filtered.vcf.gz.tbi + - path: output/gatk4/versions.yml - name: gatk4 filtermutectcalls test_gatk4_filtermutectcalls_use_val command: nextflow run ./tests/modules/gatk4/filtermutectcalls -entry test_gatk4_filtermutectcalls_use_val -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/filtermutectcalls/nextflow.config @@ -30,3 +32,4 @@ - path: output/gatk4/test.filtered.vcf.gz.filteringStats.tsv md5sum: 95cc3e37705bd3b97a292c5d46ab82f3 - path: output/gatk4/test.filtered.vcf.gz.tbi + - path: output/gatk4/versions.yml diff --git a/tests/modules/gatk4/gatherbqsrreports/test.yml b/tests/modules/gatk4/gatherbqsrreports/test.yml index 576889de..2fb4917a 100644 --- a/tests/modules/gatk4/gatherbqsrreports/test.yml +++ b/tests/modules/gatk4/gatherbqsrreports/test.yml @@ -7,7 +7,6 @@ - path: output/gatk4/test.table md5sum: 9603b69fdc3b5090de2e0dd78bfcc4bf - path: output/gatk4/versions.yml - md5sum: 8d52c5aaab73294e9ea5491b95f3e1e1 - name: gatk4 gatherbqsrreports test_gatk4_gatherbqsrreports_multiple command: nextflow run tests/modules/gatk4/gatherbqsrreports -entry test_gatk4_gatherbqsrreports_multiple -c tests/config/nextflow.config @@ -18,4 +17,3 @@ - path: output/gatk4/test.table md5sum: 0c1257eececf95db8ca378272d0f21f9 - path: output/gatk4/versions.yml - md5sum: 91cad396b9f2045c3cd8c0f256672e80 diff --git a/tests/modules/gatk4/gatherpileupsummaries/test.yml b/tests/modules/gatk4/gatherpileupsummaries/test.yml index 0c38a602..efd02f52 100644 --- a/tests/modules/gatk4/gatherpileupsummaries/test.yml +++ b/tests/modules/gatk4/gatherpileupsummaries/test.yml @@ -6,3 +6,4 @@ files: - path: output/gatk4/test.pileupsummaries.table md5sum: 8e0ca6f66e112bd2f7ec1d31a2d62469 + - path: output/gatk4/versions.yml diff --git a/tests/modules/gatk4/genomicsdbimport/test.yml b/tests/modules/gatk4/genomicsdbimport/test.yml index 5c4ea2bb..765cbfbe 100644 --- a/tests/modules/gatk4/genomicsdbimport/test.yml +++ b/tests/modules/gatk4/genomicsdbimport/test.yml @@ -5,11 +5,9 @@ - gatk4/genomicsdbimport files: - path: output/gatk4/test/__tiledb_workspace.tdb - md5sum: d41d8cd98f00b204e9800998ecf8427e - path: output/gatk4/test/callset.json md5sum: a7d07d1c86449bbb1091ff29368da07a - path: output/gatk4/test/chr22$1$40001/.__consolidation_lock - md5sum: d41d8cd98f00b204e9800998ecf8427e - path: output/gatk4/test/chr22$1$40001/__array_schema.tdb - path: output/gatk4/test/chr22$1$40001/genomicsdb_meta_dir/genomicsdb_column_bounds.json md5sum: 2502f79658bc000578ebcfddfc1194c0 @@ -19,7 +17,6 @@ - path: output/gatk4/test/vidmap.json md5sum: 18d3f68bd2cb6f4474990507ff95017a - path: output/gatk4/versions.yml - md5sum: 91f5c3e9529982f9c819860b403576ce - name: gatk4 genomicsdbimport test_gatk4_genomicsdbimport_get_intervalslist command: nextflow run tests/modules/gatk4/genomicsdbimport -entry test_gatk4_genomicsdbimport_get_intervalslist -c tests/config/nextflow.config -c ./tests/modules/gatk4/genomicsdbimport/nextflow.config @@ -30,9 +27,7 @@ - path: output/gatk4/test.interval_list md5sum: 4c85812ac15fc1cd29711a851d23c0bf - path: output/gatk4/versions.yml - md5sum: a898fe1cbc4acfa5936c0ffdcf121401 - path: output/untar/versions.yml - md5sum: 8f080677b109aea2cfca50208b077534 - name: gatk4 genomicsdbimport test_gatk4_genomicsdbimport_update_genomicsdb command: nextflow run tests/modules/gatk4/genomicsdbimport -entry test_gatk4_genomicsdbimport_update_genomicsdb -c tests/config/nextflow.config -c ./tests/modules/gatk4/genomicsdbimport/nextflow.config @@ -41,11 +36,9 @@ - gatk4/genomicsdbimport files: - path: output/gatk4/test_genomicsdb/__tiledb_workspace.tdb - md5sum: d41d8cd98f00b204e9800998ecf8427e - path: output/gatk4/test_genomicsdb/callset.json md5sum: 1ea31b59b9a218dd5681164aff4a5e07 - path: output/gatk4/test_genomicsdb/chr22$1$40001/.__consolidation_lock - md5sum: d41d8cd98f00b204e9800998ecf8427e - path: output/gatk4/test_genomicsdb/chr22$1$40001/__array_schema.tdb md5sum: 6709e67921ae840bf61fbfb192554eda - path: output/gatk4/test_genomicsdb/chr22$1$40001/genomicsdb_meta_dir/genomicsdb_column_bounds.json @@ -55,6 +48,4 @@ - path: output/gatk4/test_genomicsdb/vidmap.json md5sum: 18d3f68bd2cb6f4474990507ff95017a - path: output/gatk4/versions.yml - md5sum: d87baa3f4218c5554cad3c008cb6cbc4 - path: output/untar/versions.yml - md5sum: 9b2916aea9790bdf427c0cb38109110c diff --git a/tests/modules/gatk4/genotypegvcfs/main.nf b/tests/modules/gatk4/genotypegvcfs/main.nf index a5ae8d46..75990958 100644 --- a/tests/modules/gatk4/genotypegvcfs/main.nf +++ b/tests/modules/gatk4/genotypegvcfs/main.nf @@ -9,93 +9,96 @@ include { UNTAR } from '../../../../modules/untar/main.nf' workflow test_gatk4_genotypegvcfs_vcf_input { input = [ [ id:'test' ], // meta map - file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf'], checkIfExists: true), - file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_idx'], checkIfExists: true), - [] - ] + file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_idx'], checkIfExists: true), + [], + [] + ] - fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) - fastaIndex = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) - fastaDict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) + fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) + dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) - GATK4_GENOTYPEGVCFS ( input, fasta, fastaIndex, fastaDict, [], []) + GATK4_GENOTYPEGVCFS ( input, fasta, fai, dict, [], []) } // Basic parameters with compressed VCF input workflow test_gatk4_genotypegvcfs_gz_input { input = [ [ id:'test' ], // meta map - file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_gz'], checkIfExists: true), - file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_gz_tbi'], checkIfExists: true), - [] - ] + file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_gz'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_gz_tbi'], checkIfExists: true), + [], + [] + ] - fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) - fastaIndex = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) - fastaDict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) + fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) + dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) - GATK4_GENOTYPEGVCFS ( input, fasta, fastaIndex, fastaDict, [], []) + GATK4_GENOTYPEGVCFS ( input, fasta, fai, dict, [], []) } // Basic parameters + optional dbSNP workflow test_gatk4_genotypegvcfs_gz_input_dbsnp { input = [ [ id:'test' ], // meta map - file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_gz'], checkIfExists: true), - file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_gz_tbi'], checkIfExists: true), - [] - ] + file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_gz'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_gz_tbi'], checkIfExists: true), + [], + [] + ] - fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) - fastaIndex = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) - fastaDict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) + fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) + dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) - dbsnp = file(params.test_data['homo_sapiens']['genome']['dbsnp_146_hg38_vcf_gz'], checkIfExists: true) - dbsnpIndex = file(params.test_data['homo_sapiens']['genome']['dbsnp_146_hg38_vcf_gz_tbi'], checkIfExists: true) + dbsnp = file(params.test_data['homo_sapiens']['genome']['dbsnp_146_hg38_vcf_gz'], checkIfExists: true) + dbsnp_tbi = file(params.test_data['homo_sapiens']['genome']['dbsnp_146_hg38_vcf_gz_tbi'], checkIfExists: true) - GATK4_GENOTYPEGVCFS ( input, fasta, fastaIndex, fastaDict, dbsnp, dbsnpIndex) + GATK4_GENOTYPEGVCFS ( input, fasta, fai, dict, dbsnp, dbsnp_tbi) } // Basic parameters + optional intervals workflow test_gatk4_genotypegvcfs_gz_input_intervals { input = [ [ id:'test' ], // meta map - file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_gz'], checkIfExists: true), - file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_gz_tbi'], checkIfExists: true), - file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true) ] + file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_gz'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_gz_tbi'], checkIfExists: true), + file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true) ] - fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) - fastaIndex = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) - fastaDict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) + fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) + dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) - GATK4_GENOTYPEGVCFS ( input, fasta, fastaIndex, fastaDict, [], []) + GATK4_GENOTYPEGVCFS ( input, fasta, fai, dict, [], []) } // Basic parameters + optional dbSNP + optional intervals workflow test_gatk4_genotypegvcfs_gz_input_dbsnp_intervals { input = [ [ id:'test' ], // meta map - file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_gz'], checkIfExists: true), - file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_gz_tbi'], checkIfExists: true), - file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true) - ] + file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_gz'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_gz_tbi'], checkIfExists: true), + file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true) + ] - fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) - fastaIndex = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) - fastaDict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) + fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) + dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) - dbsnp = file(params.test_data['homo_sapiens']['genome']['dbsnp_146_hg38_vcf_gz'], checkIfExists: true) - dbsnpIndex = file(params.test_data['homo_sapiens']['genome']['dbsnp_146_hg38_vcf_gz_tbi'], checkIfExists: true) + dbsnp = file(params.test_data['homo_sapiens']['genome']['dbsnp_146_hg38_vcf_gz'], checkIfExists: true) + dbsnp_tbi = file(params.test_data['homo_sapiens']['genome']['dbsnp_146_hg38_vcf_gz_tbi'], checkIfExists: true) - GATK4_GENOTYPEGVCFS ( input, fasta, fastaIndex, fastaDict, dbsnp, dbsnpIndex ) + GATK4_GENOTYPEGVCFS ( input, fasta, fai, dict, dbsnp, dbsnp_tbi ) } // Basic parameters with GenomicsDB input workflow test_gatk4_genotypegvcfs_gendb_input { - fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) - fastaIndex = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) - fastaDict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) + fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) + dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) test_genomicsdb = [ [], file(params.test_data['homo_sapiens']['illumina']['test_genomicsdb_tar_gz'], checkIfExists: true) ] @@ -106,18 +109,18 @@ workflow test_gatk4_genotypegvcfs_gendb_input { input = Channel.of([ id:'test' ]).combine(gendb) - GATK4_GENOTYPEGVCFS ( input, fasta, fastaIndex, fastaDict, [], []) + GATK4_GENOTYPEGVCFS ( input, fasta, fai, dict, [], []) } // Basic parameters with GenomicsDB + optional dbSNP workflow test_gatk4_genotypegvcfs_gendb_input_dbsnp { - fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) - fastaIndex = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) - fastaDict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) + fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) + dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) - dbsnp = file(params.test_data['homo_sapiens']['genome']['dbsnp_146_hg38_vcf_gz'], checkIfExists: true) - dbsnpIndex = file(params.test_data['homo_sapiens']['genome']['dbsnp_146_hg38_vcf_gz_tbi'], checkIfExists: true) + dbsnp = file(params.test_data['homo_sapiens']['genome']['dbsnp_146_hg38_vcf_gz'], checkIfExists: true) + dbsnp_tbi = file(params.test_data['homo_sapiens']['genome']['dbsnp_146_hg38_vcf_gz_tbi'], checkIfExists: true) test_genomicsdb = [ [], file(params.test_data['homo_sapiens']['illumina']['test_genomicsdb_tar_gz'], checkIfExists: true) ] @@ -127,15 +130,15 @@ workflow test_gatk4_genotypegvcfs_gendb_input_dbsnp { gendb.add([]) input = Channel.of([ id:'test' ]).combine(gendb) - GATK4_GENOTYPEGVCFS ( input, fasta, fastaIndex, fastaDict, dbsnp, dbsnpIndex) + GATK4_GENOTYPEGVCFS ( input, fasta, fai, dict, dbsnp, dbsnp_tbi) } // Basic parameters with GenomicsDB + optional intervals workflow test_gatk4_genotypegvcfs_gendb_input_intervals { - fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) - fastaIndex = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) - fastaDict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) + fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) + dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) test_genomicsdb = [ [], file(params.test_data['homo_sapiens']['illumina']['test_genomicsdb_tar_gz'], checkIfExists: true) ] @@ -145,18 +148,18 @@ workflow test_gatk4_genotypegvcfs_gendb_input_intervals { gendb.add([file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true)]) input = Channel.of([ id:'test' ]).combine(gendb) - GATK4_GENOTYPEGVCFS ( input, fasta, fastaIndex, fastaDict, [], [] ) + GATK4_GENOTYPEGVCFS ( input, fasta, fai, dict, [], [] ) } // Basic parameters with GenomicsDB + optional dbSNP + optional intervals workflow test_gatk4_genotypegvcfs_gendb_input_dbsnp_intervals { - fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) - fastaIndex = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) - fastaDict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) + fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) + dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) - dbsnp = file(params.test_data['homo_sapiens']['genome']['dbsnp_146_hg38_vcf_gz'], checkIfExists: true) - dbsnpIndex = file(params.test_data['homo_sapiens']['genome']['dbsnp_146_hg38_vcf_gz_tbi'], checkIfExists: true) + dbsnp = file(params.test_data['homo_sapiens']['genome']['dbsnp_146_hg38_vcf_gz'], checkIfExists: true) + dbsnp_tbi = file(params.test_data['homo_sapiens']['genome']['dbsnp_146_hg38_vcf_gz_tbi'], checkIfExists: true) test_genomicsdb = [ [], file(params.test_data['homo_sapiens']['illumina']['test_genomicsdb_tar_gz'], checkIfExists: true) ] @@ -166,5 +169,5 @@ workflow test_gatk4_genotypegvcfs_gendb_input_dbsnp_intervals { gendb.add([file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true)]) input = Channel.of([ id:'test' ]).combine(gendb) - GATK4_GENOTYPEGVCFS ( input, fasta, fastaIndex, fastaDict, dbsnp, dbsnpIndex ) + GATK4_GENOTYPEGVCFS ( input, fasta, fai, dict, dbsnp, dbsnp_tbi ) } diff --git a/tests/modules/gatk4/genotypegvcfs/test.yml b/tests/modules/gatk4/genotypegvcfs/test.yml index dff79460..ec8ee951 100644 --- a/tests/modules/gatk4/genotypegvcfs/test.yml +++ b/tests/modules/gatk4/genotypegvcfs/test.yml @@ -10,6 +10,7 @@ "AC=1;AF=0.500;AN=2;BaseQRankSum=0.00;DP=211;ExcessHet=0.0000;FS=0.000;MLEAC=1;MLEAF=0.500;MQ=60.00;MQRankSum=0.00;QD=0.95;ReadPosRankSum=1.09;SOR=0.680", ] - path: output/gatk4/test.genotyped.vcf.gz.tbi + - path: output/gatk4/versions.yml - name: gatk4 genotypegvcfs test_gatk4_genotypegvcfs_gz_input command: nextflow run ./tests/modules/gatk4/genotypegvcfs -entry test_gatk4_genotypegvcfs_gz_input -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/genotypegvcfs/nextflow.config @@ -23,6 +24,7 @@ "AC=1;AF=0.500;AN=2;BaseQRankSum=0.00;DP=211;ExcessHet=0.0000;FS=0.000;MLEAC=1;MLEAF=0.500;MQ=60.00;MQRankSum=0.00;QD=0.95;ReadPosRankSum=1.09;SOR=0.680", ] - path: output/gatk4/test.genotyped.vcf.gz.tbi + - path: output/gatk4/versions.yml - name: gatk4 genotypegvcfs test_gatk4_genotypegvcfs_gz_input_dbsnp command: nextflow run ./tests/modules/gatk4/genotypegvcfs -entry test_gatk4_genotypegvcfs_gz_input_dbsnp -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/genotypegvcfs/nextflow.config @@ -36,6 +38,7 @@ "AC=1;AF=0.500;AN=2;BaseQRankSum=0.00;DB;DP=211;ExcessHet=0.0000;FS=0.000;MLEAC=1;MLEAF=0.500;MQ=60.00;MQRankSum=0.00;QD=0.95;ReadPosRankSum=1.09;SOR=0.680", ] - path: output/gatk4/test.genotyped.vcf.gz.tbi + - path: output/gatk4/versions.yml - name: gatk4 genotypegvcfs test_gatk4_genotypegvcfs_gz_input_intervals command: nextflow run ./tests/modules/gatk4/genotypegvcfs -entry test_gatk4_genotypegvcfs_gz_input_intervals -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/genotypegvcfs/nextflow.config @@ -49,6 +52,7 @@ "AC=1;AF=0.500;AN=2;BaseQRankSum=0.00;DP=211;ExcessHet=0.0000;FS=0.000;MLEAC=1;MLEAF=0.500;MQ=60.00;MQRankSum=0.00;QD=0.95;ReadPosRankSum=1.09;SOR=0.680", ] - path: output/gatk4/test.genotyped.vcf.gz.tbi + - path: output/gatk4/versions.yml - name: gatk4 genotypegvcfs test_gatk4_genotypegvcfs_gz_input_dbsnp_intervals command: nextflow run ./tests/modules/gatk4/genotypegvcfs -entry test_gatk4_genotypegvcfs_gz_input_dbsnp_intervals -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/genotypegvcfs/nextflow.config @@ -59,6 +63,7 @@ - path: output/gatk4/test.genotyped.vcf.gz contains: ["AC=2;AF=1.00;AN=2;DB;DP=20;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=60.00;QD=24.05;SOR=0.693"] - path: output/gatk4/test.genotyped.vcf.gz.tbi + - path: output/gatk4/versions.yml - name: gatk4 genotypegvcfs test_gatk4_genotypegvcfs_gendb_input command: nextflow run ./tests/modules/gatk4/genotypegvcfs -entry test_gatk4_genotypegvcfs_gendb_input -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/genotypegvcfs/nextflow.config @@ -72,6 +77,7 @@ "AC=1;AF=0.500;AN=2;BaseQRankSum=0.00;DP=211;ExcessHet=0.0000;FS=0.000;MLEAC=1;MLEAF=0.500;MQ=60.00;MQRankSum=0.00;QD=0.95;ReadPosRankSum=1.09;SOR=0.680", ] - path: output/gatk4/test.genotyped.vcf.gz.tbi + - path: output/gatk4/versions.yml - name: gatk4 genotypegvcfs test_gatk4_genotypegvcfs_gendb_input_dbsnp command: nextflow run ./tests/modules/gatk4/genotypegvcfs -entry test_gatk4_genotypegvcfs_gendb_input_dbsnp -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/genotypegvcfs/nextflow.config @@ -85,6 +91,7 @@ "AC=1;AF=0.500;AN=2;BaseQRankSum=0.00;DB;DP=211;ExcessHet=0.0000;FS=0.000;MLEAC=1;MLEAF=0.500;MQ=60.00;MQRankSum=0.00;QD=0.95;ReadPosRankSum=1.09;SOR=0.680", ] - path: output/gatk4/test.genotyped.vcf.gz.tbi + - path: output/gatk4/versions.yml - name: gatk4 genotypegvcfs test_gatk4_genotypegvcfs_gendb_input_intervals command: nextflow run ./tests/modules/gatk4/genotypegvcfs -entry test_gatk4_genotypegvcfs_gendb_input_intervals -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/genotypegvcfs/nextflow.config @@ -98,6 +105,7 @@ "AC=1;AF=0.500;AN=2;BaseQRankSum=0.00;DP=211;ExcessHet=0.0000;FS=0.000;MLEAC=1;MLEAF=0.500;MQ=60.00;MQRankSum=0.00;QD=0.95;ReadPosRankSum=1.09;SOR=0.680", ] - path: output/gatk4/test.genotyped.vcf.gz.tbi + - path: output/gatk4/versions.yml - name: gatk4 genotypegvcfs test_gatk4_genotypegvcfs_gendb_input_dbsnp_intervals command: nextflow run ./tests/modules/gatk4/genotypegvcfs -entry test_gatk4_genotypegvcfs_gendb_input_dbsnp_intervals -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/genotypegvcfs/nextflow.config @@ -108,3 +116,4 @@ - path: output/gatk4/test.genotyped.vcf.gz contains: ["AC=2;AF=1.00;AN=2;DP=2;ExcessHet=0.0000;FS=0.000;MLEAC=1;MLEAF=0.500;MQ=60.00;QD=18.66;SOR=0.693"] - path: output/gatk4/test.genotyped.vcf.gz.tbi + - path: output/gatk4/versions.yml diff --git a/tests/modules/gatk4/getpileupsummaries/test.yml b/tests/modules/gatk4/getpileupsummaries/test.yml index e305d412..2e4acce9 100644 --- a/tests/modules/gatk4/getpileupsummaries/test.yml +++ b/tests/modules/gatk4/getpileupsummaries/test.yml @@ -7,7 +7,6 @@ - path: output/gatk4/test.pileups.table md5sum: 8e0ca6f66e112bd2f7ec1d31a2d62469 - path: output/gatk4/versions.yml - md5sum: 059123619f3ed8d4cd178c4390b81e69 - name: gatk4 getpileupsummaries test_gatk4_getpileupsummaries_separate_sites command: nextflow run tests/modules/gatk4/getpileupsummaries -entry test_gatk4_getpileupsummaries_separate_sites -c tests/config/nextflow.config @@ -18,7 +17,6 @@ - path: output/gatk4/test.pileups.table md5sum: 8e0ca6f66e112bd2f7ec1d31a2d62469 - path: output/gatk4/versions.yml - md5sum: 76b5388b0c5b5762d8d33e34b23f181d - name: gatk4 getpileupsummaries test_gatk4_getpileupsummaries_separate_sites_cram command: nextflow run tests/modules/gatk4/getpileupsummaries -entry test_gatk4_getpileupsummaries_separate_sites_cram -c tests/config/nextflow.config @@ -29,4 +27,3 @@ - path: output/gatk4/test.pileups.table md5sum: 8e0ca6f66e112bd2f7ec1d31a2d62469 - path: output/gatk4/versions.yml - md5sum: 2fa51319c2b1d678ee00ab09512cf268 diff --git a/tests/modules/gatk4/haplotypecaller/test.yml b/tests/modules/gatk4/haplotypecaller/test.yml index 31dd23fd..3d416a0d 100644 --- a/tests/modules/gatk4/haplotypecaller/test.yml +++ b/tests/modules/gatk4/haplotypecaller/test.yml @@ -6,6 +6,7 @@ files: - path: output/gatk4/test.vcf.gz - path: output/gatk4/test.vcf.gz.tbi + - path: output/gatk4/versions.yml - name: gatk4 haplotypecaller test_gatk4_haplotypecaller_cram command: nextflow run ./tests/modules/gatk4/haplotypecaller -entry test_gatk4_haplotypecaller_cram -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/haplotypecaller/nextflow.config @@ -15,6 +16,7 @@ files: - path: output/gatk4/test.vcf.gz - path: output/gatk4/test.vcf.gz.tbi + - path: output/gatk4/versions.yml - name: gatk4 haplotypecaller test_gatk4_haplotypecaller_intervals_dbsnp command: nextflow run ./tests/modules/gatk4/haplotypecaller -entry test_gatk4_haplotypecaller_intervals_dbsnp -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/haplotypecaller/nextflow.config @@ -24,3 +26,4 @@ files: - path: output/gatk4/test.vcf.gz - path: output/gatk4/test.vcf.gz.tbi + - path: output/gatk4/versions.yml diff --git a/tests/modules/gatk4/indexfeaturefile/test.yml b/tests/modules/gatk4/indexfeaturefile/test.yml index 938c2b91..9187fa4a 100644 --- a/tests/modules/gatk4/indexfeaturefile/test.yml +++ b/tests/modules/gatk4/indexfeaturefile/test.yml @@ -5,6 +5,7 @@ - gatk4/indexfeaturefile files: - path: output/gatk4/genome.bed.idx + - path: output/gatk4/versions.yml - name: gatk4 indexfeaturefile test_gatk4_indexfeaturefile_bed_gz command: nextflow run tests/modules/gatk4/indexfeaturefile -entry test_gatk4_indexfeaturefile_bed_gz -c tests/config/nextflow.config -c ./tests/modules/gatk4/indexfeaturefile/nextflow.config @@ -15,7 +16,6 @@ - path: output/gatk4/genome.bed.gz.tbi md5sum: 4bc51e2351a6e83f20e13be75861f941 - path: output/gatk4/versions.yml - md5sum: e5003204702f83aabdb4141272c704d2 - name: gatk4 indexfeaturefile test_gatk4_indexfeaturefile_vcf command: nextflow run tests/modules/gatk4/indexfeaturefile -entry test_gatk4_indexfeaturefile_vcf -c tests/config/nextflow.config -c ./tests/modules/gatk4/indexfeaturefile/nextflow.config @@ -25,7 +25,6 @@ files: - path: output/gatk4/test.genome.vcf.idx - path: output/gatk4/versions.yml - md5sum: 08cd7c49cfb752fc2905f600106a0345 - name: gatk4 indexfeaturefile test_gatk4_indexfeaturefile_vcf_gz command: nextflow run tests/modules/gatk4/indexfeaturefile -entry test_gatk4_indexfeaturefile_vcf_gz -c tests/config/nextflow.config @@ -36,4 +35,3 @@ - path: output/gatk4/test.genome.vcf.gz.tbi md5sum: fedd68eaddf8d31257853d9da8325bd3 - path: output/gatk4/versions.yml - md5sum: b388d1681831a40264a7a27f67a8b247 diff --git a/tests/modules/gatk4/intervallisttobed/test.yml b/tests/modules/gatk4/intervallisttobed/test.yml index 9e6e38c5..a148f745 100644 --- a/tests/modules/gatk4/intervallisttobed/test.yml +++ b/tests/modules/gatk4/intervallisttobed/test.yml @@ -6,3 +6,4 @@ files: - path: output/gatk4/test.bed md5sum: 9046675d01199fbbee79f2bc1c5dce52 + - path: output/gatk4/versions.yml diff --git a/tests/modules/gatk4/intervallisttools/test.yml b/tests/modules/gatk4/intervallisttools/test.yml index c9cb23b8..5542714b 100644 --- a/tests/modules/gatk4/intervallisttools/test.yml +++ b/tests/modules/gatk4/intervallisttools/test.yml @@ -14,3 +14,4 @@ md5sum: 55da0f3c69504148f4e7002a0e072cfe - path: output/gatk4/test_split/temp_0004_of_6/4scattered.interval_list md5sum: d29ca4447f32547f2936567fa902796a + - path: output/gatk4/versions.yml diff --git a/tests/modules/gatk4/learnreadorientationmodel/test.yml b/tests/modules/gatk4/learnreadorientationmodel/test.yml index b88df15f..d3d64c50 100644 --- a/tests/modules/gatk4/learnreadorientationmodel/test.yml +++ b/tests/modules/gatk4/learnreadorientationmodel/test.yml @@ -5,3 +5,4 @@ - gatk4/learnreadorientationmodel files: - path: output/gatk4/test.artifact-prior.tar.gz + - path: output/gatk4/versions.yml diff --git a/tests/modules/gatk4/markduplicates/nextflow.config b/tests/modules/gatk4/markduplicates/nextflow.config index 8730f1c4..787c9589 100644 --- a/tests/modules/gatk4/markduplicates/nextflow.config +++ b/tests/modules/gatk4/markduplicates/nextflow.config @@ -2,4 +2,8 @@ process { publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + withName: GATK4_MARKDUPLICATES { + ext.args = '--CREATE_INDEX true' + } + } diff --git a/tests/modules/gatk4/markduplicates/test.yml b/tests/modules/gatk4/markduplicates/test.yml index 7bf49b56..1fbd768e 100644 --- a/tests/modules/gatk4/markduplicates/test.yml +++ b/tests/modules/gatk4/markduplicates/test.yml @@ -10,7 +10,6 @@ md5sum: 2efd50b2e6b7fd9bdf242cd9e266cfa9 - path: output/gatk4/test.metrics - path: output/gatk4/versions.yml - md5sum: 0bc949aaa8792cd6c537cdaab0e2c145 - name: gatk4 markduplicates test_gatk4_markduplicates_multiple_bams command: nextflow run tests/modules/gatk4/markduplicates -entry test_gatk4_markduplicates_multiple_bams -c tests/config/nextflow.config -c ./tests/modules/gatk4/markduplicates/nextflow.config @@ -24,4 +23,3 @@ md5sum: 8187febc6108ffef7f907e89b9c091a4 - path: output/gatk4/test.metrics - path: output/gatk4/versions.yml - md5sum: b10d63cf7b2b672915cb30cea081ccd5 diff --git a/tests/modules/gatk4/markduplicatesspark/main.nf b/tests/modules/gatk4/markduplicatesspark/main.nf new file mode 100644 index 00000000..2f294f59 --- /dev/null +++ b/tests/modules/gatk4/markduplicatesspark/main.nf @@ -0,0 +1,28 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { GATK4_MARKDUPLICATES_SPARK } from '../../../../modules/gatk4/markduplicatesspark/main.nf' + +workflow test_gatk4_markduplicates_spark { + input = [ [ id:'test', single_end:false ], // meta map + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) + ] + fasta = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true) + fai = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta_fai'], checkIfExists: true) + dict = file(params.test_data['homo_sapiens']['genome']['genome_21_dict'], checkIfExists: true) + + GATK4_MARKDUPLICATES_SPARK ( input, fasta, fai, dict ) +} + +workflow test_gatk4_markduplicates_spark_multiple_bams { + input = [ [ id:'test', single_end:false ], // meta map + [ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_bam'], checkIfExists: true) + ] ] + fasta = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true) + fai = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta_fai'], checkIfExists: true) + dict = file(params.test_data['homo_sapiens']['genome']['genome_21_dict'], checkIfExists: true) + + GATK4_MARKDUPLICATES_SPARK ( input, fasta, fai, dict ) +} diff --git a/tests/modules/gatk4/markduplicatesspark/nextflow.config b/tests/modules/gatk4/markduplicatesspark/nextflow.config new file mode 100644 index 00000000..8730f1c4 --- /dev/null +++ b/tests/modules/gatk4/markduplicatesspark/nextflow.config @@ -0,0 +1,5 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + +} diff --git a/tests/modules/gatk4/markduplicatesspark/test.yml b/tests/modules/gatk4/markduplicatesspark/test.yml new file mode 100644 index 00000000..b0c0b40d --- /dev/null +++ b/tests/modules/gatk4/markduplicatesspark/test.yml @@ -0,0 +1,25 @@ +- name: gatk4 markduplicates test_gatk4_markduplicates_spark + command: nextflow run tests/modules/gatk4/markduplicatesspark -entry test_gatk4_markduplicates_spark -c tests/config/nextflow.config -c ./tests/modules/gatk4/markduplicatesspark/nextflow.config + tags: + - gatk4 + - gatk4/markduplicatesspark + files: + - path: output/gatk4/test.bai + md5sum: e9c125e82553209933883b4fe2b8d7c2 + - path: output/gatk4/test.bam + md5sum: 2efd50b2e6b7fd9bdf242cd9e266cfa9 + - path: output/gatk4/test.metrics + - path: output/gatk4/versions.yml + +- name: gatk4 markduplicates test_gatk4_markduplicates_spark_multiple_bams + command: nextflow run tests/modules/gatk4/markduplicatesspark -entry test_gatk4_markduplicates_spark_multiple_bams -c tests/config/nextflow.config -c ./tests/modules/gatk4/markduplicatesspark/nextflow.config + tags: + - gatk4 + - gatk4/markduplicatesspark + files: + - path: output/gatk4/test.bai + md5sum: bad71df9c876e72a5bc0a3e0fd755f92 + - path: output/gatk4/test.bam + md5sum: 8187febc6108ffef7f907e89b9c091a4 + - path: output/gatk4/test.metrics + - path: output/gatk4/versions.yml diff --git a/tests/modules/gatk4/mergebamalignment/test.yml b/tests/modules/gatk4/mergebamalignment/test.yml index 5e1ab8d5..b1bb32b2 100644 --- a/tests/modules/gatk4/mergebamalignment/test.yml +++ b/tests/modules/gatk4/mergebamalignment/test.yml @@ -6,3 +6,4 @@ files: - path: output/gatk4/test.bam md5sum: e6f1b343700b7ccb94e81ae127433988 + - path: output/gatk4/versions.yml diff --git a/tests/modules/gatk4/mergemutectstats/test.yml b/tests/modules/gatk4/mergemutectstats/test.yml index 44d1c5f2..cc71854e 100644 --- a/tests/modules/gatk4/mergemutectstats/test.yml +++ b/tests/modules/gatk4/mergemutectstats/test.yml @@ -6,3 +6,4 @@ files: - path: output/gatk4/test.vcf.gz.stats md5sum: 17d2091015d04cbd4a26b7a67dc659e6 + - path: output/gatk4/versions.yml diff --git a/tests/modules/gatk4/mergevcfs/main.nf b/tests/modules/gatk4/mergevcfs/main.nf index fa09d758..99a2158d 100644 --- a/tests/modules/gatk4/mergevcfs/main.nf +++ b/tests/modules/gatk4/mergevcfs/main.nf @@ -5,22 +5,22 @@ nextflow.enable.dsl = 2 include { GATK4_MERGEVCFS } from '../../../../modules/gatk4/mergevcfs/main.nf' workflow test_gatk4_mergevcfs { - input = [ [ id:'test' ], // meta map - [ file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test2_vcf'], checkIfExists: true) ] - ] - dict = file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true) - - GATK4_MERGEVCFS ( input, dict, false ) -} - -workflow test_gatk4_mergevcfs_refdict { def input = [] input = [ [ id:'test' ], // meta map [ file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test2_vcf'], checkIfExists: true) ] + file(params.test_data['sarscov2']['illumina']['test2_vcf'], checkIfExists: true) ] ] + dict = file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true) - GATK4_MERGEVCFS ( input, dict, true ) + GATK4_MERGEVCFS ( input, dict ) +} + +workflow test_gatk4_mergevcfs_no_dict { + input = [ [ id:'test' ], // meta map + [ file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test2_vcf'], checkIfExists: true) ] + ] + + GATK4_MERGEVCFS ( input, [] ) } diff --git a/tests/modules/gatk4/mergevcfs/test.yml b/tests/modules/gatk4/mergevcfs/test.yml index 3ff2bf93..da2f7578 100644 --- a/tests/modules/gatk4/mergevcfs/test.yml +++ b/tests/modules/gatk4/mergevcfs/test.yml @@ -6,12 +6,14 @@ files: - path: output/gatk4/test.vcf.gz md5sum: 5b289bda88d3a3504f2e19ee8cff177c + - path: output/gatk4/versions.yml -- name: gatk4 mergevcfs test_gatk4_mergevcfs_refdict - command: nextflow run ./tests/modules/gatk4/mergevcfs -entry test_gatk4_mergevcfs_refdict -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/mergevcfs/nextflow.config +- name: gatk4 mergevcfs test_gatk4_mergevcfs_no_dict + command: nextflow run ./tests/modules/gatk4/mergevcfs -entry test_gatk4_mergevcfs_no_dict -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/mergevcfs/nextflow.config tags: - gatk4/mergevcfs - gatk4 files: - path: output/gatk4/test.vcf.gz md5sum: 5b289bda88d3a3504f2e19ee8cff177c + - path: output/gatk4/versions.yml diff --git a/tests/modules/gatk4/mutect2/main.nf b/tests/modules/gatk4/mutect2/main.nf index 19b22914..0b4339f0 100644 --- a/tests/modules/gatk4/mutect2/main.nf +++ b/tests/modules/gatk4/mutect2/main.nf @@ -2,22 +2,22 @@ nextflow.enable.dsl = 2 -include { GATK4_MUTECT2 } from '../../../../modules/gatk4/mutect2/main.nf' +include { GATK4_MUTECT2 } from '../../../../modules/gatk4/mutect2/main.nf' +include { GATK4_MUTECT2 as GATK4_MUTECT2_PAIR } from '../../../../modules/gatk4/mutect2/main.nf' +include { GATK4_MUTECT2 as GATK4_MUTECT2_MITO } from '../../../../modules/gatk4/mutect2/main.nf' +include { GATK4_MUTECT2 as GATK4_MUTECT2_F1R2 } from '../../../../modules/gatk4/mutect2/main.nf' workflow test_gatk4_mutect2_tumor_normal_pair { - input = [ [ id:'test'], // meta map + input = [ [ id:'test', normal_id:'normal', tumor_id:'tumour' ], // meta map [ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_bam'], checkIfExists: true), file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_bam'], checkIfExists: true) ], [ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_bam_bai'], checkIfExists: true), file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_bam_bai'], checkIfExists: true) ], - [], - ["normal"] + [] ] - run_single = false - run_pon = false - run_mito = false + fasta = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true) fai = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta_fai'], checkIfExists: true) dict = file(params.test_data['homo_sapiens']['genome']['genome_21_dict'], checkIfExists: true) @@ -26,19 +26,38 @@ workflow test_gatk4_mutect2_tumor_normal_pair { panel_of_normals = file(params.test_data['homo_sapiens']['genome']['mills_and_1000g_indels_21_vcf_gz'], checkIfExists: true) panel_of_normals_tbi = file(params.test_data['homo_sapiens']['genome']['mills_and_1000g_indels_21_vcf_gz_tbi'], checkIfExists: true) - GATK4_MUTECT2 ( input, run_single, run_pon, run_mito, fasta, fai, dict, germline_resource, germline_resource_tbi, panel_of_normals, panel_of_normals_tbi ) + GATK4_MUTECT2_PAIR ( input, fasta, fai, dict, germline_resource, germline_resource_tbi, panel_of_normals, panel_of_normals_tbi ) +} + +workflow test_gatk4_mutect2_tumor_normal_pair_f1r2 { + input = [ [ id:'test', normal_id:'normal', tumor_id:'tumour' ], // meta map + [ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_bam'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_bam'], checkIfExists: true) + ], + [ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_bam_bai'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_bam_bai'], checkIfExists: true) + ], + [] + ] + + fasta = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true) + fai = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta_fai'], checkIfExists: true) + dict = file(params.test_data['homo_sapiens']['genome']['genome_21_dict'], checkIfExists: true) + germline_resource = file(params.test_data['homo_sapiens']['genome']['gnomad_r2_1_1_21_vcf_gz'], checkIfExists: true) + germline_resource_tbi = file(params.test_data['homo_sapiens']['genome']['gnomad_r2_1_1_21_vcf_gz_tbi'], checkIfExists: true) + panel_of_normals = file(params.test_data['homo_sapiens']['genome']['mills_and_1000g_indels_21_vcf_gz'], checkIfExists: true) + panel_of_normals_tbi = file(params.test_data['homo_sapiens']['genome']['mills_and_1000g_indels_21_vcf_gz_tbi'], checkIfExists: true) + + GATK4_MUTECT2_F1R2 ( input, fasta, fai, dict, germline_resource, germline_resource_tbi, panel_of_normals, panel_of_normals_tbi ) } workflow test_gatk4_mutect2_tumor_single { input = [ [ id:'test'], // meta map [ file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_bam'], checkIfExists: true)], [ file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_bam_bai'], checkIfExists: true)], - [], [] ] - run_single = true - run_pon = false - run_mito = false + fasta = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true) fai = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta_fai'], checkIfExists: true) dict = file(params.test_data['homo_sapiens']['genome']['genome_21_dict'], checkIfExists: true) @@ -47,19 +66,16 @@ workflow test_gatk4_mutect2_tumor_single { panel_of_normals = file(params.test_data['homo_sapiens']['genome']['mills_and_1000g_indels_21_vcf_gz'], checkIfExists: true) panel_of_normals_tbi = file(params.test_data['homo_sapiens']['genome']['mills_and_1000g_indels_21_vcf_gz_tbi'], checkIfExists: true) - GATK4_MUTECT2 ( input, run_single, run_pon, run_mito, fasta, fai, dict, germline_resource, germline_resource_tbi, panel_of_normals, panel_of_normals_tbi ) + GATK4_MUTECT2 ( input, fasta, fai, dict, germline_resource, germline_resource_tbi, panel_of_normals, panel_of_normals_tbi ) } workflow test_gatk4_mutect2_cram_input { input = [ [ id:'test'], // meta map [ file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_cram'], checkIfExists: true)], [ file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_cram_crai'], checkIfExists: true)], - [], [] ] - run_single = true - run_pon = false - run_mito = false + fasta = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true) fai = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta_fai'], checkIfExists: true) dict = file(params.test_data['homo_sapiens']['genome']['genome_21_dict'], checkIfExists: true) @@ -68,48 +84,37 @@ workflow test_gatk4_mutect2_cram_input { panel_of_normals = file(params.test_data['homo_sapiens']['genome']['mills_and_1000g_indels_21_vcf_gz'], checkIfExists: true) panel_of_normals_tbi = file(params.test_data['homo_sapiens']['genome']['mills_and_1000g_indels_21_vcf_gz_tbi'], checkIfExists: true) - GATK4_MUTECT2 ( input, run_single, run_pon, run_mito, fasta, fai, dict, germline_resource, germline_resource_tbi, panel_of_normals, panel_of_normals_tbi ) + GATK4_MUTECT2 ( input, fasta, fai, dict, germline_resource, germline_resource_tbi, panel_of_normals, panel_of_normals_tbi ) } workflow test_gatk4_mutect2_generate_pon { input = [ [ id:'test'], // meta map [ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_bam'], checkIfExists: true)], [ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_bam_bai'], checkIfExists: true)], - [], [] ] - run_single = false - run_pon = true - run_mito = false + fasta = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true) fai = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta_fai'], checkIfExists: true) dict = file(params.test_data['homo_sapiens']['genome']['genome_21_dict'], checkIfExists: true) - germline_resource = [] - germline_resource_tbi = [] - panel_of_normals = [] - panel_of_normals_tbi = [] - GATK4_MUTECT2 ( input, run_single, run_pon, run_mito, fasta, fai, dict, germline_resource, germline_resource_tbi, panel_of_normals, panel_of_normals_tbi ) + GATK4_MUTECT2 ( input, fasta, fai, dict, [], [], [], [] ) } -// mitochondria mode would ideally have some mitochondria test data, but since the mitochondria settings only increase detection sensitivity, we can use the chr22 data as a stand in as it is already a small dataset, the extra variants detected compared to generate_pon shows the mode is working. +// mitochondria mode would ideally have some mitochondria test data +// but since the mitochondria settings only increase detection sensitivity +// we can use the chr22 data as a stand in as it is already a small dataset +// the extra variants detected compared to generate_pon shows the mode is working workflow test_gatk4_mutect2_mitochondria { input = [ [ id:'test'], // meta map [ file(params.test_data['homo_sapiens']['illumina']['mitochon_standin_recalibrated_sorted_bam'], checkIfExists: true)], [ file(params.test_data['homo_sapiens']['illumina']['mitochon_standin_recalibrated_sorted_bam_bai'], checkIfExists: true)], - [ file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true)], - [] + [ file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true)] ] - run_single = false - run_pon = false - run_mito = true + fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) - germline_resource = [] - germline_resource_tbi = [] - panel_of_normals = [] - panel_of_normals_tbi = [] - GATK4_MUTECT2 ( input, run_single, run_pon, run_mito, fasta, fai, dict, germline_resource, germline_resource_tbi, panel_of_normals, panel_of_normals_tbi ) + GATK4_MUTECT2_MITO ( input, fasta, fai, dict, [], [], [], [] ) } diff --git a/tests/modules/gatk4/mutect2/nextflow.config b/tests/modules/gatk4/mutect2/nextflow.config index 8730f1c4..3b5f6c62 100644 --- a/tests/modules/gatk4/mutect2/nextflow.config +++ b/tests/modules/gatk4/mutect2/nextflow.config @@ -2,4 +2,16 @@ process { publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + withName: GATK4_MUTECT2_PAIR { + ext.args = { "--normal-sample $meta.normal_id" } + } + + withName: GATK4_MUTECT2_MITO { + ext.args = { "--mitochondria-mode" } + } + + withName: GATK4_MUTECT2_F1R2 { + ext.args = { "--normal-sample $meta.normal_id --f1r2-tar-gz ${meta.id}.f1r2.tar.gz" } + } + } diff --git a/tests/modules/gatk4/mutect2/test.yml b/tests/modules/gatk4/mutect2/test.yml index f8107e6d..3cefce09 100644 --- a/tests/modules/gatk4/mutect2/test.yml +++ b/tests/modules/gatk4/mutect2/test.yml @@ -1,5 +1,16 @@ - name: gatk4 mutect2 test_gatk4_mutect2_tumor_normal_pair command: nextflow run ./tests/modules/gatk4/mutect2 -entry test_gatk4_mutect2_tumor_normal_pair -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/mutect2/nextflow.config + tags: + - gatk4 + - gatk4/mutect2 + files: + - path: output/gatk4/test.vcf.gz + - path: output/gatk4/test.vcf.gz.stats + md5sum: 17d2091015d04cbd4a26b7a67dc659e6 + - path: output/gatk4/test.vcf.gz.tbi + +- name: gatk4 mutect2 test_gatk4_mutect2_tumor_normal_pair_f1r2 + command: nextflow run ./tests/modules/gatk4/mutect2 -entry test_gatk4_mutect2_tumor_normal_pair_f1r2 -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/mutect2/nextflow.config tags: - gatk4 - gatk4/mutect2 @@ -9,6 +20,7 @@ - path: output/gatk4/test.vcf.gz.stats md5sum: 17d2091015d04cbd4a26b7a67dc659e6 - path: output/gatk4/test.vcf.gz.tbi + - path: output/gatk4/versions.yml - name: gatk4 mutect2 test_gatk4_mutect2_tumor_single command: nextflow run ./tests/modules/gatk4/mutect2 -entry test_gatk4_mutect2_tumor_single -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/mutect2/nextflow.config @@ -20,6 +32,7 @@ - path: output/gatk4/test.vcf.gz.stats md5sum: 55ed641e16089afb33cdbc478e202d3d - path: output/gatk4/test.vcf.gz.tbi + - path: output/gatk4/versions.yml - name: gatk4 mutect2 test_gatk4_mutect2_cram_input command: nextflow run ./tests/modules/gatk4/mutect2 -entry test_gatk4_mutect2_cram_input -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/mutect2/nextflow.config @@ -31,6 +44,7 @@ - path: output/gatk4/test.vcf.gz.stats md5sum: 55ed641e16089afb33cdbc478e202d3d - path: output/gatk4/test.vcf.gz.tbi + - path: output/gatk4/versions.yml - name: gatk4 mutect2 test_gatk4_mutect2_generate_pon command: nextflow run ./tests/modules/gatk4/mutect2 -entry test_gatk4_mutect2_generate_pon -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/mutect2/nextflow.config @@ -42,6 +56,7 @@ - path: output/gatk4/test.vcf.gz.stats md5sum: b569ce66bbffe9588b3d221e821023ee - path: output/gatk4/test.vcf.gz.tbi + - path: output/gatk4/versions.yml - name: gatk4 mutect2 test_gatk4_mutect2_mitochondria command: nextflow run ./tests/modules/gatk4/mutect2 -entry test_gatk4_mutect2_mitochondria -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/mutect2/nextflow.config @@ -53,3 +68,4 @@ - path: output/gatk4/test.vcf.gz.stats md5sum: fc6ea14ca2da346babe78161beea28c9 - path: output/gatk4/test.vcf.gz.tbi + - path: output/gatk4/versions.yml diff --git a/tests/modules/gatk4/revertsam/test.yml b/tests/modules/gatk4/revertsam/test.yml index 4199b118..2ebdb685 100644 --- a/tests/modules/gatk4/revertsam/test.yml +++ b/tests/modules/gatk4/revertsam/test.yml @@ -6,3 +6,4 @@ files: - path: output/gatk4/test.reverted.bam md5sum: f783a88deb45c3a2c20ca12cbe1c5652 + - path: output/gatk4/versions.yml diff --git a/tests/modules/gatk4/samtofastq/test.yml b/tests/modules/gatk4/samtofastq/test.yml index 66d3ee4c..eb25f33b 100644 --- a/tests/modules/gatk4/samtofastq/test.yml +++ b/tests/modules/gatk4/samtofastq/test.yml @@ -6,6 +6,7 @@ files: - path: output/gatk4/test.fastq.gz md5sum: 50ace41d4c24467f24f8b929540a7797 + - path: output/gatk4/versions.yml - name: gatk4 samtofastq test_gatk4_samtofastq_paired_end command: nextflow run ./tests/modules/gatk4/samtofastq -entry test_gatk4_samtofastq_paired_end -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/samtofastq/nextflow.config @@ -17,3 +18,4 @@ md5sum: cfea607c9d75fd9ea9704780ad3a499c - path: output/gatk4/test_2.fastq.gz md5sum: 613bf64c023609e1c62ad6ce9e4be8d7 + - path: output/gatk4/versions.yml diff --git a/tests/modules/gatk4/selectvariants/test.yml b/tests/modules/gatk4/selectvariants/test.yml index 5bc32330..42d9dbd8 100644 --- a/tests/modules/gatk4/selectvariants/test.yml +++ b/tests/modules/gatk4/selectvariants/test.yml @@ -7,7 +7,6 @@ - path: output/gatk4/test.selectvariants.vcf.gz - path: output/gatk4/test.selectvariants.vcf.gz.tbi - path: output/gatk4/versions.yml - md5sum: a35d78af179f43652274bc7405d5a785 - name: gatk4 selectvariants test_gatk4_selectvariants_gz_input command: nextflow run tests/modules/gatk4/selectvariants -entry test_gatk4_selectvariants_gz_input -c tests/config/nextflow.config @@ -18,4 +17,3 @@ - path: output/gatk4/test.selectvariants.vcf.gz - path: output/gatk4/test.selectvariants.vcf.gz.tbi - path: output/gatk4/versions.yml - md5sum: c943f3579a369968ca63444eb43fb6e7 diff --git a/tests/modules/gatk4/splitncigarreads/test.yml b/tests/modules/gatk4/splitncigarreads/test.yml index 059d5e75..c38064e2 100644 --- a/tests/modules/gatk4/splitncigarreads/test.yml +++ b/tests/modules/gatk4/splitncigarreads/test.yml @@ -7,4 +7,3 @@ - path: output/gatk4/test.bam md5sum: ceed15c0bd64ff5c38d3816905933b0b - path: output/gatk4/versions.yml - md5sum: 27fceace2528a905ddca2b4db47c4bf5 diff --git a/tests/modules/gatk4/variantfiltration/test.yml b/tests/modules/gatk4/variantfiltration/test.yml index 068e8d63..0ab91091 100644 --- a/tests/modules/gatk4/variantfiltration/test.yml +++ b/tests/modules/gatk4/variantfiltration/test.yml @@ -10,6 +10,7 @@ "BaseQRankSum=-1.318;DP=17;ExcessHet=3.0103;MLEAC=1,0,0;MLEAF=0.500,0.00,0.00;MQRankSum=0.000;RAW_MQandDP=61200,17;ReadPosRankSum=2.365", ] - path: output/gatk4/test.filtered.vcf.gz.tbi + - path: output/gatk4/versions.yml - name: gatk4 variantfiltration test_gatk4_variantfiltration_gz_input command: nextflow run ./tests/modules/gatk4/variantfiltration -entry test_gatk4_variantfiltration_gz_input -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/variantfiltration/nextflow.config @@ -23,3 +24,4 @@ "BaseQRankSum=-1.318;DP=17;ExcessHet=3.0103;MLEAC=1,0,0;MLEAF=0.500,0.00,0.00;MQRankSum=0.000;RAW_MQandDP=61200,17;ReadPosRankSum=2.365", ] - path: output/gatk4/test.filtered.vcf.gz.tbi + - path: output/gatk4/versions.yml diff --git a/tests/modules/gatk4/variantrecalibrator/main.nf b/tests/modules/gatk4/variantrecalibrator/main.nf index be7004e7..66dde5dd 100644 --- a/tests/modules/gatk4/variantrecalibrator/main.nf +++ b/tests/modules/gatk4/variantrecalibrator/main.nf @@ -2,73 +2,67 @@ nextflow.enable.dsl = 2 -include { GATK4_VARIANTRECALIBRATOR as GATK4_VARIANTRECALIBRATOR_NO_ALLELESPECIFICTY } from '../../../../modules/gatk4/variantrecalibrator/main.nf' +include { GATK4_VARIANTRECALIBRATOR as GATK4_VARIANTRECALIBRATOR_NO_ALLELESPECIFICTY } from '../../../../modules/gatk4/variantrecalibrator/main.nf' include { GATK4_VARIANTRECALIBRATOR as GATK4_VARIANTRECALIBRATOR_WITH_ALLELESPECIFICTY } from '../../../../modules/gatk4/variantrecalibrator/main.nf' workflow test_gatk4_variantrecalibrator { input = [ [ id:'test' ], // meta map - file(params.test_data['homo_sapiens']['illumina']['test2_haplotc_ann_vcf_gz'], checkIfExists: true), - file(params.test_data['homo_sapiens']['illumina']['test2_haplotc_ann_vcf_gz_tbi'], checkIfExists: true) + file(params.test_data['homo_sapiens']['illumina']['test2_haplotc_ann_vcf_gz'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test2_haplotc_ann_vcf_gz_tbi'], checkIfExists: true) ] + resources = [[ + file(params.test_data['homo_sapiens']['genome']['hapmap_3_3_hg38_21_vcf_gz'], checkIfExists: true), + file(params.test_data['homo_sapiens']['genome']['res_1000g_omni2_5_hg38_21_vcf_gz'], checkIfExists: true), + file(params.test_data['homo_sapiens']['genome']['res_1000g_phase1_snps_hg38_21_vcf_gz'], checkIfExists: true), + file(params.test_data['homo_sapiens']['genome']['dbsnp_138_hg38_21_vcf_gz'], checkIfExists: true) + ], [ + file(params.test_data['homo_sapiens']['genome']['hapmap_3_3_hg38_21_vcf_gz_tbi'], checkIfExists: true), + file(params.test_data['homo_sapiens']['genome']['res_1000g_omni2_5_hg38_21_vcf_gz_tbi'], checkIfExists: true), + file(params.test_data['homo_sapiens']['genome']['res_1000g_phase1_snps_hg38_21_vcf_gz_tbi'], checkIfExists: true), + file(params.test_data['homo_sapiens']['genome']['dbsnp_138_hg38_21_vcf_gz_tbi'], checkIfExists: true) + ], [ + 'hapmap,known=false,training=true,truth=true,prior=15.0 hapmap_3.3.hg38.vcf.gz', + 'omni,known=false,training=true,truth=false,prior=12.0 1000G_omni2.5.hg38.vcf.gz', + '1000G,known=false,training=true,truth=false,prior=10.0 1000G_phase1.snps.hg38.vcf.gz', + 'dbsnp,known=true,training=false,truth=false,prior=2.0 dbsnp_138.hg38.vcf.gz' + ]] + fasta = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true) fai = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta_fai'], checkIfExists: true) dict = file(params.test_data['homo_sapiens']['genome']['genome_21_dict'], checkIfExists: true) - resources = [ - [ - file(params.test_data['homo_sapiens']['genome']['hapmap_3_3_hg38_21_vcf_gz'], checkIfExists: true), - file(params.test_data['homo_sapiens']['genome']['res_1000g_omni2_5_hg38_21_vcf_gz'], checkIfExists: true), - file(params.test_data['homo_sapiens']['genome']['res_1000g_phase1_snps_hg38_21_vcf_gz'], checkIfExists: true), - file(params.test_data['homo_sapiens']['genome']['dbsnp_138_hg38_21_vcf_gz'], checkIfExists: true) - ], - [ - file(params.test_data['homo_sapiens']['genome']['hapmap_3_3_hg38_21_vcf_gz_tbi'], checkIfExists: true), - file(params.test_data['homo_sapiens']['genome']['res_1000g_omni2_5_hg38_21_vcf_gz_tbi'], checkIfExists: true), - file(params.test_data['homo_sapiens']['genome']['res_1000g_phase1_snps_hg38_21_vcf_gz_tbi'], checkIfExists: true), - file(params.test_data['homo_sapiens']['genome']['dbsnp_138_hg38_21_vcf_gz_tbi'], checkIfExists: true) - ], - [ - 'hapmap,known=false,training=true,truth=true,prior=15.0 hapmap_3.3.hg38.vcf.gz', - 'omni,known=false,training=true,truth=false,prior=12.0 1000G_omni2.5.hg38.vcf.gz', - '1000G,known=false,training=true,truth=false,prior=10.0 1000G_phase1.snps.hg38.vcf.gz', - 'dbsnp,known=true,training=false,truth=false,prior=2.0 dbsnp_138.hg38.vcf.gz' - ] - ] - GATK4_VARIANTRECALIBRATOR_NO_ALLELESPECIFICTY ( input, fasta, fai, dict, resources) + GATK4_VARIANTRECALIBRATOR_NO_ALLELESPECIFICTY(input, resources, fasta, fai, dict) } workflow test_gatk4_variantrecalibrator_allele_specific { input = [ [ id:'test' ], // meta map - file(params.test_data['homo_sapiens']['illumina']['test2_haplotc_ann_vcf_gz'], checkIfExists: true), - file(params.test_data['homo_sapiens']['illumina']['test2_haplotc_ann_vcf_gz_tbi'], checkIfExists: true) + file(params.test_data['homo_sapiens']['illumina']['test2_haplotc_ann_vcf_gz'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test2_haplotc_ann_vcf_gz_tbi'], checkIfExists: true) ] + resources = [[ + file(params.test_data['homo_sapiens']['genome']['hapmap_3_3_hg38_21_vcf_gz'], checkIfExists: true), + file(params.test_data['homo_sapiens']['genome']['res_1000g_omni2_5_hg38_21_vcf_gz'], checkIfExists: true), + file(params.test_data['homo_sapiens']['genome']['res_1000g_phase1_snps_hg38_21_vcf_gz'], checkIfExists: true), + file(params.test_data['homo_sapiens']['genome']['dbsnp_138_hg38_21_vcf_gz'], checkIfExists: true) + ], [ + file(params.test_data['homo_sapiens']['genome']['hapmap_3_3_hg38_21_vcf_gz_tbi'], checkIfExists: true), + file(params.test_data['homo_sapiens']['genome']['res_1000g_omni2_5_hg38_21_vcf_gz_tbi'], checkIfExists: true), + file(params.test_data['homo_sapiens']['genome']['res_1000g_phase1_snps_hg38_21_vcf_gz_tbi'], checkIfExists: true), + file(params.test_data['homo_sapiens']['genome']['dbsnp_138_hg38_21_vcf_gz_tbi'], checkIfExists: true) + ], [ + 'hapmap,known=false,training=true,truth=true,prior=15.0 hapmap_3.3.hg38.vcf.gz', + 'omni,known=false,training=true,truth=false,prior=12.0 1000G_omni2.5.hg38.vcf.gz', + '1000G,known=false,training=true,truth=false,prior=10.0 1000G_phase1.snps.hg38.vcf.gz', + 'dbsnp,known=true,training=false,truth=false,prior=2.0 dbsnp_138.hg38.vcf.gz' + ]] + fasta = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true) fai = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta_fai'], checkIfExists: true) dict = file(params.test_data['homo_sapiens']['genome']['genome_21_dict'], checkIfExists: true) - resources = [ - [ - file(params.test_data['homo_sapiens']['genome']['hapmap_3_3_hg38_21_vcf_gz'], checkIfExists: true), - file(params.test_data['homo_sapiens']['genome']['res_1000g_omni2_5_hg38_21_vcf_gz'], checkIfExists: true), - file(params.test_data['homo_sapiens']['genome']['res_1000g_phase1_snps_hg38_21_vcf_gz'], checkIfExists: true), - file(params.test_data['homo_sapiens']['genome']['dbsnp_138_hg38_21_vcf_gz'], checkIfExists: true) - ], - [ - file(params.test_data['homo_sapiens']['genome']['hapmap_3_3_hg38_21_vcf_gz_tbi'], checkIfExists: true), - file(params.test_data['homo_sapiens']['genome']['res_1000g_omni2_5_hg38_21_vcf_gz_tbi'], checkIfExists: true), - file(params.test_data['homo_sapiens']['genome']['res_1000g_phase1_snps_hg38_21_vcf_gz_tbi'], checkIfExists: true), - file(params.test_data['homo_sapiens']['genome']['dbsnp_138_hg38_21_vcf_gz_tbi'], checkIfExists: true) - ], - [ - 'hapmap,known=false,training=true,truth=true,prior=15.0 hapmap_3.3.hg38.vcf.gz', - 'omni,known=false,training=true,truth=false,prior=12.0 1000G_omni2.5.hg38.vcf.gz', - '1000G,known=false,training=true,truth=false,prior=10.0 1000G_phase1.snps.hg38.vcf.gz', - 'dbsnp,known=true,training=false,truth=false,prior=2.0 dbsnp_138.hg38.vcf.gz' - ] - ] - GATK4_VARIANTRECALIBRATOR_WITH_ALLELESPECIFICTY ( input, fasta, fai, dict, resources) + GATK4_VARIANTRECALIBRATOR_WITH_ALLELESPECIFICTY(input, resources, fasta, fai, dict) } diff --git a/tests/modules/gatk4/variantrecalibrator/nextflow.config b/tests/modules/gatk4/variantrecalibrator/nextflow.config index 69be3b9c..6c3a9116 100644 --- a/tests/modules/gatk4/variantrecalibrator/nextflow.config +++ b/tests/modules/gatk4/variantrecalibrator/nextflow.config @@ -1,6 +1,7 @@ process { publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + withName: GATK4_VARIANTRECALIBRATOR { ext.args = '--mode SNP -an QD -an MQ -an FS -an SOR' } diff --git a/tests/modules/gatk4/variantrecalibrator/test.yml b/tests/modules/gatk4/variantrecalibrator/test.yml index 42b18e36..bc84bfb3 100644 --- a/tests/modules/gatk4/variantrecalibrator/test.yml +++ b/tests/modules/gatk4/variantrecalibrator/test.yml @@ -10,6 +10,7 @@ - path: output/gatk4/test.recal.idx - path: output/gatk4/test.tranches md5sum: d238e97bf996863969dac7751e345549 + - path: output/gatk4/versions.yml - name: gatk4 variantrecalibrator test_gatk4_variantrecalibrator_allele_specific command: nextflow run tests/modules/gatk4/variantrecalibrator -entry test_gatk4_variantrecalibrator_allele_specific -c tests/config/nextflow.config -c ./tests/modules/gatk4/variantrecalibrator/nextflow.config @@ -23,3 +24,4 @@ - path: output/gatk4/test.recal.idx - path: output/gatk4/test.tranches md5sum: 444438d46716593634a6817958099292 + - path: output/gatk4/versions.yml