Improve syntax/logic coherence in all gatk4 plugins (#1459)

* feat: code polishing

* Apply suggestions from code review

Co-authored-by: FriederikeHanssen <Friederike.hanssen@qbic.uni-tuebingen.de>

* code polishing

* more code polishing

* code polishing

* tests for applybqsrspark

* fix typo

* no need to check md5sum for versions.yml

* fix: use correct syntax

* code polishing again

* add tests for markduplicatesspark

* simplify mergevcfs tests

* add tests for baserecalibratorspark

* fix: path to entry

* code polishing

* fix linting

* simplify module

* update meta.yml

* fix pair mode

* fix: MITO mode

* more tests

* fix command

* bad copy paste

* fix typos

* fix tests

* fix test

* update meta.yml

* correct versions.yml in all test.yml

* code polishing

* code polishing

* more code polishing

* fix args

* add tmpdir for all

Co-authored-by: FriederikeHanssen <Friederike.hanssen@qbic.uni-tuebingen.de>
This commit is contained in:
Maxime U. Garcia 2022-04-12 17:15:39 +02:00 committed by GitHub
parent b59713e623
commit 409af2f27c
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
102 changed files with 1209 additions and 558 deletions

View file

@ -24,8 +24,7 @@ process GATK4_APPLYBQSR {
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def interval = intervals ? "-L ${intervals}" : ""
def file_type = input.getExtension()
def interval_command = intervals ? "--intervals $intervals" : ""
def avail_mem = 3
if (!task.memory) {
@ -35,12 +34,12 @@ process GATK4_APPLYBQSR {
}
"""
gatk --java-options "-Xmx${avail_mem}g" ApplyBQSR \\
-R $fasta \\
-I $input \\
--input $input \\
--output ${prefix}.${input.getExtension()} \\
--reference $fasta \\
--bqsr-recal-file $bqsr_table \\
$interval \\
$interval_command \\
--tmp-dir . \\
-O ${prefix}.${file_type} \\
$args
cat <<-END_VERSIONS > versions.yml

View file

@ -61,6 +61,10 @@ output:
type: file
description: Recalibrated BAM file
pattern: "*.{bam}"
- cram:
type: file
description: Recalibrated CRAM file
pattern: "*.{cram}"
authors:
- "@yocra3"

View file

@ -0,0 +1,51 @@
process GATK4_APPLYBQSR_SPARK {
tag "$meta.id"
label 'process_low'
conda (params.enable_conda ? "bioconda::gatk4=4.2.3.0" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/gatk4:4.2.3.0--hdfd78af_0' :
'quay.io/biocontainers/gatk4:4.2.3.0--hdfd78af_0' }"
input:
tuple val(meta), path(input), path(input_index), path(bqsr_table), path(intervals)
path fasta
path fai
path dict
output:
tuple val(meta), path("*.bam") , emit: bam, optional: true
tuple val(meta), path("*.cram"), emit: cram, optional: true
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def interval_command = intervals ? "--intervals $intervals" : ""
def avail_mem = 3
if (!task.memory) {
log.info '[GATK ApplyBQSRSpark] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.'
} else {
avail_mem = task.memory.giga
}
"""
gatk --java-options "-Xmx${avail_mem}g" ApplyBQSRSpark \\
--input $input \\
--output ${prefix}.${input.getExtension()} \\
--reference $fasta \\
--bqsr-recal-file $bqsr_table \\
$interval_command \\
--spark-master local[${task.cpus}] \\
--tmp-dir . \\
$args
cat <<-END_VERSIONS > versions.yml
"${task.process}":
gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//')
END_VERSIONS
"""
}

View file

@ -0,0 +1,72 @@
name: gatk4_applybqsr_spark
description: Apply base quality score recalibration (BQSR) to a bam file
keywords:
- bqsr
- bam
tools:
- gatk4:
description: |
Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools
with a primary focus on variant discovery and genotyping. Its powerful processing engine
and high-performance computing features make it capable of taking on projects of any size.
homepage: https://gatk.broadinstitute.org/hc/en-us
documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s
doi: 10.1158/1538-7445.AM2017-3590
licence: ["Apache-2.0"]
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- input:
type: file
description: BAM/CRAM file from alignment
pattern: "*.{bam,cram}"
- input_index:
type: file
description: BAI/CRAI file from alignment
pattern: "*.{bai,crai}"
- bqsr_table:
type: file
description: Recalibration table from gatk4_baserecalibrator
- intervals:
type: file
description: Bed file with the genomic regions included in the library (optional)
- fasta:
type: file
description: The reference fasta file
pattern: "*.fasta"
- fai:
type: file
description: Index of reference fasta file
pattern: "*.fasta.fai"
- dict:
type: file
description: GATK sequence dictionary
pattern: "*.dict"
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- bam:
type: file
description: Recalibrated BAM file
pattern: "*.{bam}"
- cram:
type: file
description: Recalibrated CRAM file
pattern: "*.{cram}"
authors:
- "@yocra3"
- "@FriederikeHanssen"
- "@maxulysse"

View file

@ -8,7 +8,7 @@ process GATK4_APPLYVQSR {
'quay.io/biocontainers/gatk4:4.2.5.0--hdfd78af_0' }"
input:
tuple val(meta), path(vcf), path(tbi), path(recal), path(recalidx), path(tranches)
tuple val(meta), path(vcf), path(vcf_tbi), path(recal), path(recal_index), path(tranches)
path fasta
path fai
path dict
@ -24,7 +24,7 @@ process GATK4_APPLYVQSR {
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
refCommand = fasta ? "-R ${fasta} " : ''
def reference_command = fasta ? "--reference $fasta" : ''
def avail_mem = 3
if (!task.memory) {
@ -34,11 +34,12 @@ process GATK4_APPLYVQSR {
}
"""
gatk --java-options "-Xmx${avail_mem}g" ApplyVQSR \\
${refCommand} \\
-V ${vcf} \\
-O ${prefix}.vcf.gz \\
--variant ${vcf} \\
--output ${prefix}.vcf.gz \\
$reference_command \\
--tranches-file $tranches \\
--recal-file $recal \\
--tmp-dir . \\
$args
cat <<-END_VERSIONS > versions.yml

View file

@ -29,20 +29,20 @@ input:
type: file
description: VCF file to be recalibrated, this should be the same file as used for the first stage VariantRecalibrator.
pattern: "*.vcf"
- tbi:
- vcf_tbi:
type: file
description: Tbi index for the input vcf file.
description: tabix index for the input vcf file.
pattern: "*.vcf.tbi"
- recal:
type: file
description: Recalibration file produced when the input vcf was run through VariantRecalibrator in stage 1.
pattern: "*.recal"
- recalidx:
- recal_index:
type: file
description: Index file for the recalibration file.
pattern: ".recal.idx"
- tranches:
type: boolean
type: file
description: Tranches file produced when the input vcf was run through VariantRecalibrator in stage 1.
pattern: ".tranches"
- fasta:

View file

@ -12,8 +12,8 @@ process GATK4_BASERECALIBRATOR {
path fasta
path fai
path dict
path knownSites
path knownSites_tbi
path known_sites
path known_sites_tbi
output:
tuple val(meta), path("*.table"), emit: table
@ -25,8 +25,8 @@ process GATK4_BASERECALIBRATOR {
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def intervalsCommand = intervals ? "-L ${intervals}" : ""
def sitesCommand = knownSites.collect{"--known-sites ${it}"}.join(' ')
def interval_command = intervals ? "--intervals $intervals" : ""
def sites_command = known_sites.collect{"--known-sites $it"}.join(' ')
def avail_mem = 3
if (!task.memory) {
@ -34,16 +34,15 @@ process GATK4_BASERECALIBRATOR {
} else {
avail_mem = task.memory.giga
}
"""
gatk --java-options "-Xmx${avail_mem}g" BaseRecalibrator \
-R $fasta \
-I $input \
$sitesCommand \
$intervalsCommand \
--tmp-dir . \
$args \
-O ${prefix}.table
gatk --java-options "-Xmx${avail_mem}g" BaseRecalibrator \\
--input $input \\
--output ${prefix}.table \\
--reference $fasta \\
$interval_command \\
$sites_command \\
--tmp-dir . \\
$args
cat <<-END_VERSIONS > versions.yml
"${task.process}":

View file

@ -42,9 +42,14 @@ input:
type: file
description: GATK sequence dictionary
pattern: "*.dict"
- knownSites:
- known_sites:
type: file
description: Bed file with the genomic regions included in the library (optional)
description: VCF files with known sites for indels / snps (optional)
pattern: "*.vcf.gz"
- known_sites_tbi:
type: file
description: Tabix index of the known_sites (optional)
pattern: "*.vcf.gz.tbi"
output:
- meta:
@ -64,3 +69,4 @@ output:
authors:
- "@yocra3"
- "@FriederikeHanssen"
- "@maxulysse"

View file

@ -0,0 +1,53 @@
process GATK4_BASERECALIBRATOR_SPARK {
tag "$meta.id"
label 'process_low'
conda (params.enable_conda ? "bioconda::gatk4=4.2.3.0" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/gatk4:4.2.3.0--hdfd78af_0' :
'broadinstitute/gatk:4.2.3.0' }"
input:
tuple val(meta), path(input), path(input_index), path(intervals)
path fasta
path fai
path dict
path known_sites
path known_sites_tbi
output:
tuple val(meta), path("*.table"), emit: table
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def interval_command = intervals ? "--intervals $intervals" : ""
def sites_command = known_sites.collect{"--known-sites $it"}.join(' ')
def avail_mem = 3
if (!task.memory) {
log.info '[GATK BaseRecalibratorSpark] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.'
} else {
avail_mem = task.memory.giga
}
"""
gatk --java-options "-Xmx${avail_mem}g" BaseRecalibratorSpark \\
--input $input \\
--output ${prefix}.table \\
--reference $fasta \\
$interval_command \\
$sites_command \\
--spark-master local[${task.cpus}] \\
--tmp-dir . \\
$args
cat <<-END_VERSIONS > versions.yml
"${task.process}":
gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//')
END_VERSIONS
"""
}

View file

@ -0,0 +1,72 @@
name: gatk4_baserecalibrator_spark
description: Generate recalibration table for Base Quality Score Recalibration (BQSR)
keywords:
- sort
tools:
- gatk4:
description: |
Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools
with a primary focus on variant discovery and genotyping. Its powerful processing engine
and high-performance computing features make it capable of taking on projects of any size.
homepage: https://gatk.broadinstitute.org/hc/en-us
documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s
doi: 10.1158/1538-7445.AM2017-3590
licence: ["Apache-2.0"]
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- input:
type: file
description: BAM/CRAM file from alignment
pattern: "*.{bam,cram}"
- input_index:
type: file
description: BAI/CRAI file from alignment
pattern: "*.{bai,crai}"
- intervals:
type: file
description: Bed file with the genomic regions included in the library (optional)
- fasta:
type: file
description: The reference fasta file
pattern: "*.fasta"
- fai:
type: file
description: Index of reference fasta file
pattern: "*.fasta.fai"
- dict:
type: file
description: GATK sequence dictionary
pattern: "*.dict"
- known_sites:
type: file
description: VCF files with known sites for indels / snps (optional)
pattern: "*.vcf.gz"
- known_sites_tbi:
type: file
description: Tabix index of the known_sites (optional)
pattern: "*.vcf.gz.tbi"
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- table:
type: file
description: Recalibration table from BaseRecalibrator
pattern: "*.{table}"
authors:
- "@yocra3"
- "@FriederikeHanssen"
- "@maxulysse"

View file

@ -9,7 +9,7 @@ process GATK4_BEDTOINTERVALLIST {
input:
tuple val(meta), path(bed)
path sequence_dict
path dict
output:
tuple val(meta), path('*.interval_list'), emit: interval_list
@ -21,6 +21,7 @@ process GATK4_BEDTOINTERVALLIST {
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def avail_mem = 3
if (!task.memory) {
log.info '[GATK BedToIntervalList] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.'
@ -29,9 +30,10 @@ process GATK4_BEDTOINTERVALLIST {
}
"""
gatk --java-options "-Xmx${avail_mem}g" BedToIntervalList \\
-I $bed \\
-SD $sequence_dict \\
-O ${prefix}.interval_list \\
--INPUT $bed \\
--OUTPUT ${prefix}.interval_list \\
--SEQUENCE_DICTIONARY $dict \\
--TMP_DIR . \\
$args
cat <<-END_VERSIONS > versions.yml

View file

@ -9,7 +9,6 @@ process GATK4_CALCULATECONTAMINATION {
input:
tuple val(meta), path(pileup), path(matched)
val segmentout
output:
tuple val(meta), path('*.contamination.table'), emit: contamination
@ -22,8 +21,8 @@ process GATK4_CALCULATECONTAMINATION {
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def matched_command = matched ? " -matched ${matched} " : ''
def segment_command = segmentout ? " -segments ${prefix}.segmentation.table" : ''
def matched_command = matched ? "--matched-normal $matched" : ''
def avail_mem = 3
if (!task.memory) {
log.info '[GATK CalculateContamination] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.'
@ -32,10 +31,10 @@ process GATK4_CALCULATECONTAMINATION {
}
"""
gatk --java-options "-Xmx${avail_mem}g" CalculateContamination \\
-I $pileup \\
--input $pileup \\
--output ${prefix}.contamination.table \\
$matched_command \\
-O ${prefix}.contamination.table \\
$segment_command \\
--tmp-dir . \\
$args
cat <<-END_VERSIONS > versions.yml

View file

@ -32,9 +32,6 @@ input:
type: file
description: File containing the pileups summary table of a normal sample that matches with the tumor sample specified in pileup argument. This is an optional input.
pattern: "*.pileups.table"
- segmentout:
type: boolean
description: specifies whether to output the segmentation table.
output:
- contamination:
@ -43,7 +40,7 @@ output:
pattern: "*.contamination.table"
- segmentation:
type: file
description: optional output table containing segmentation of tumor minor allele fractions.
description: output table containing segmentation of tumor minor allele fractions (optional)
pattern: "*.segmentation.table"
- versions:
type: file
@ -52,3 +49,4 @@ output:
authors:
- "@GCJMackenzie"
- "@maxulysse"

View file

@ -9,9 +9,9 @@ process GATK4_COMBINEGVCFS {
input:
tuple val(meta), path(vcf), path(vcf_idx)
path (fasta)
path (fasta_fai)
path (fasta_dict)
path fasta
path fai
path dict
output:
tuple val(meta), path("*.combined.g.vcf.gz"), emit: combined_gvcf
@ -23,21 +23,21 @@ process GATK4_COMBINEGVCFS {
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def input_list = vcf.collect{"--variant $it"}.join(' ')
def avail_mem = 3
if (!task.memory) {
log.info '[GATK COMBINEGVCFS] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.'
} else {
avail_mem = task.memory.giga
}
def input_files = vcf.collect{"-V ${it}"}.join(' ') // add '-V' to each vcf file
"""
gatk \\
--java-options "-Xmx${avail_mem}g" \\
CombineGVCFs \\
-R ${fasta} \\
-O ${prefix}.combined.g.vcf.gz \\
${args} \\
${input_files}
gatk --java-options "-Xmx${avail_mem}g" CombineGVCFs \\
$input_list \\
--output ${prefix}.combined.g.vcf.gz \\
--reference ${fasta} \\
--tmp-dir . \\
$args
cat <<-END_VERSIONS > versions.yml
"${task.process}":

View file

@ -19,18 +19,11 @@ tools:
licence: ["Apache-2.0"]
input:
- fasta:
type: file
description: The reference fasta file
pattern: "*.fasta"
- fai:
type: file
description: FASTA index file
pattern: "*.{fai}"
- dict:
type: file
description: FASTA dictionary file
pattern: "*.{dict}"
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test' ]
- vcf:
type: file
description: Compressed VCF files
@ -38,7 +31,19 @@ input:
- vcf_idx:
type: file
description: VCF Index file
pattern: "*.{fai}"
pattern: "*.vcf.gz.idx"
- fasta:
type: file
description: The reference fasta file
pattern: "*.fasta"
- fai:
type: file
description: FASTA index file
pattern: "*.fasta.fai"
- dict:
type: file
description: FASTA dictionary file
pattern: "*.dict"
output:
- gvcf:
type: file
@ -53,3 +58,4 @@ authors:
- "@sateeshperi"
- "@mjcipriano"
- "@hseabolt"
- "@maxulysse"

View file

@ -19,6 +19,7 @@ process GATK4_CREATESEQUENCEDICTIONARY {
script:
def args = task.ext.args ?: ''
def avail_mem = 6
if (!task.memory) {
log.info '[GATK CreateSequenceDictionary] Available memory not known - defaulting to 6GB. Specify process memory requirements to change this.'
@ -26,10 +27,10 @@ process GATK4_CREATESEQUENCEDICTIONARY {
avail_mem = task.memory.giga
}
"""
gatk --java-options "-Xmx${avail_mem}g" \\
CreateSequenceDictionary \\
gatk --java-options "-Xmx${avail_mem}g" CreateSequenceDictionary \\
--REFERENCE $fasta \\
--URI $fasta \\
--TMP_DIR . \\
$args
cat <<-END_VERSIONS > versions.yml

View file

@ -24,6 +24,7 @@ process GATK4_CREATESOMATICPANELOFNORMALS {
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def avail_mem = 3
if (!task.memory) {
log.info '[GATK CreateSomaticPanelOfNormals] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.'
@ -31,11 +32,11 @@ process GATK4_CREATESOMATICPANELOFNORMALS {
avail_mem = task.memory.giga
}
"""
gatk --java-options "-Xmx${avail_mem}g" \\
CreateSomaticPanelOfNormals \\
-R $fasta \\
-V gendb://$genomicsdb \\
-O ${prefix}.vcf.gz \\
gatk --java-options "-Xmx${avail_mem}g" CreateSomaticPanelOfNormals \\
--variant gendb://$genomicsdb \\
--output ${prefix}.vcf.gz \\
--reference $fasta \\
--tmp-dir . \\
$args
cat <<-END_VERSIONS > versions.yml

View file

@ -44,7 +44,7 @@ output:
pattern: "*.vcf.gz"
- tbi:
type: file
description: Index of vcf file
description: Tabix index of vcf file
pattern: "*vcf.gz.tbi"
- versions:
type: file

View file

@ -8,10 +8,10 @@ process GATK4_ESTIMATELIBRARYCOMPLEXITY {
'quay.io/biocontainers/gatk4:4.2.5.0--hdfd78af_0' }"
input:
tuple val(meta), path(cram)
path(fasta)
path(fai)
path(dict)
tuple val(meta), path(input)
path fasta
path fai
path dict
output:
tuple val(meta), path('*.metrics'), emit: metrics
@ -23,7 +23,7 @@ process GATK4_ESTIMATELIBRARYCOMPLEXITY {
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def crams = cram.collect(){ x -> "-I ".concat(x.toString()) }.join(" ")
def input_list = input.collect(){"--INPUT $it"}.join(" ")
def avail_mem = 3
if (!task.memory) {
@ -32,12 +32,12 @@ process GATK4_ESTIMATELIBRARYCOMPLEXITY {
avail_mem = task.memory.giga
}
"""
gatk --java-options "-Xmx${avail_mem}g" EstimateLibraryComplexity \
${crams} \
-O ${prefix}.metrics \
--REFERENCE_SEQUENCE ${fasta} \
--VALIDATION_STRINGENCY SILENT \
--TMP_DIR . $args
gatk --java-options "-Xmx${avail_mem}g" EstimateLibraryComplexity \\
$input_list \\
--OUTPUT ${prefix}.metrics \\
--REFERENCE_SEQUENCE ${fasta} \\
--TMP_DIR . \\
$args
cat <<-END_VERSIONS > versions.yml
"${task.process}":

View file

@ -20,7 +20,7 @@ input:
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- cram:
- input:
type: file
description: BAM/CRAM/SAM file
pattern: "*.{bam,cram,sam}"
@ -54,3 +54,4 @@ output:
authors:
- "@FriederikeHanssen"
- "@maxulysse"

View file

@ -20,7 +20,8 @@ process GATK4_FASTQTOSAM {
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def read_files = meta.single_end ? "-F1 $reads" : "-F1 ${reads[0]} -F2 ${reads[1]}"
def reads_command = meta.single_end ? "--FASTQ $reads" : "--FASTQ ${reads[0]} --FASTQ2 ${reads[1]}"
def avail_mem = 3
if (!task.memory) {
log.info '[GATK FastqToSam] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.'
@ -29,9 +30,10 @@ process GATK4_FASTQTOSAM {
}
"""
gatk --java-options "-Xmx${avail_mem}g" FastqToSam \\
$read_files \\
-O ${prefix}.bam \\
-SM $prefix \\
$reads_command \\
--OUTPUT ${prefix}.bam \\
--SAMPLE_NAME $prefix \\
--TMP_DIR . \\
$args
cat <<-END_VERSIONS > versions.yml

View file

@ -34,14 +34,14 @@ output:
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- bam:
type: file
description: Converted BAM file
pattern: "*.bam"
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
authors:
- "@ntoda03"

View file

@ -8,7 +8,7 @@ process GATK4_FILTERMUTECTCALLS {
'quay.io/biocontainers/gatk4:4.2.5.0--hdfd78af_0' }"
input:
tuple val(meta), path(vcf), path(tbi), path(stats), path(orientationbias), path(segmentation), path(contaminationfile), val(contaminationest)
tuple val(meta), path(vcf), path(vcf_tbi), path(stats), path(orientationbias), path(segmentation), path(table), val(estimate)
path fasta
path fai
path dict
@ -26,20 +26,11 @@ process GATK4_FILTERMUTECTCALLS {
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def orientationbias_options = ''
if (orientationbias) {
orientationbias_options = '--orientation-bias-artifact-priors ' + orientationbias.join(' --orientation-bias-artifact-priors ')
}
def orientationbias_command = orientationbias ? orientationbias.collect{"--orientation-bias-artifact-priors $it"}.join(' ') : ''
def segmentation_command = segmentation ? segmentation.collect{"--tumor-segmentation $it"}.join(' ') : ''
def estimate_command = estimate ? " --contamination-estimate ${estimate} " : ''
def table_command = table ? " --contamination-table ${table} " : ''
def segmentation_options = ''
if (segmentation) {
segmentation_options = '--tumor-segmentation ' + segmentation.join(' --tumor-segmentation ')
}
def contamination_options = contaminationest ? " --contamination-estimate ${contaminationest} " : ''
if (contaminationfile) {
contamination_options = '--contamination-table ' + contaminationfile.join(' --contamination-table ')
}
def avail_mem = 3
if (!task.memory) {
log.info '[GATK FilterMutectCalls] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.'
@ -48,12 +39,14 @@ process GATK4_FILTERMUTECTCALLS {
}
"""
gatk --java-options "-Xmx${avail_mem}g" FilterMutectCalls \\
-R $fasta \\
-V $vcf \\
$orientationbias_options \\
$segmentation_options \\
$contamination_options \\
-O ${prefix}.vcf.gz \\
--variant $vcf \\
--output ${prefix}.vcf.gz \\
--reference $fasta \\
$orientationbias_command \\
$segmentation_command \\
$estimate_command \\
$table_command \\
--tmp-dir . \\
$args
cat <<-END_VERSIONS > versions.yml

View file

@ -26,9 +26,9 @@ input:
type: file
description: compressed vcf file of mutect2calls
pattern: "*.vcf.gz"
- tbi:
- vcf_tbi:
type: file
description: Index of vcf file
description: Tabix index of vcf file
pattern: "*vcf.gz.tbi"
- stats:
type: file
@ -42,13 +42,13 @@ input:
type: list
description: tables containing segmentation information for input vcf. Optional input.
pattern: "*.segmentation.table"
- contaminationfile:
- table:
type: list
description: table(s) containing contamination contamination data for input vcf. Optional input, takes priority over contaminationest.
description: table(s) containing contamination data for input vcf. Optional input, takes priority over estimate.
pattern: "*.contamination.table"
- contaminationest:
- estimate:
type: val
description: estimation of contamination value as a double. Optional input, will only be used if contaminationfile is not specified.
description: estimation of contamination value as a double. Optional input, will only be used if table is not specified.
- fasta:
type: file
description: The reference fasta file
@ -82,3 +82,4 @@ output:
authors:
- "@GCJMackenzie"
- "@maxulysse"

View file

@ -8,7 +8,7 @@ process GATK4_GATHERBQSRREPORTS {
'quay.io/biocontainers/gatk4:4.2.5.0--hdfd78af_0' }"
input:
tuple val(meta), path(recal_table)
tuple val(meta), path(table)
output:
tuple val(meta), path("*.table"), emit: table
@ -20,7 +20,7 @@ process GATK4_GATHERBQSRREPORTS {
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def input = recal_table.collect{"-I ${it}"}.join(' ')
def input_list = table.collect{"--input $it"}.join(' ')
def avail_mem = 3
if (!task.memory) {
@ -29,12 +29,11 @@ process GATK4_GATHERBQSRREPORTS {
avail_mem = task.memory.giga
}
"""
gatk --java-options "-Xmx${avail_mem}g" \\
GatherBQSRReports \
${input} \
--tmp-dir . \
$args \
--output ${prefix}.table
gatk --java-options "-Xmx${avail_mem}g" GatherBQSRReports \\
$input_list \\
--output ${prefix}.table \\
--tmp-dir . \\
$args
cat <<-END_VERSIONS > versions.yml
"${task.process}":

View file

@ -19,7 +19,7 @@ input:
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- recal_table:
- table:
type: file
description: File(s) containing BQSR table(s)
pattern: "*.table"
@ -30,14 +30,14 @@ output:
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- table:
type: file
description: File containing joined BQSR table
pattern: "*.table"
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- recal_table:
type: file
description: File containing joined BQSR table
pattern: "*.table"
authors:
- "@FriederikeHanssen"

View file

@ -22,7 +22,7 @@ process GATK4_GATHERPILEUPSUMMARIES {
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def input = pileup.collect{ "-I ${it} " }.join(' ')
def input_list = pileup.collect{ "--I $it" }.join(' ')
def avail_mem = 3
if (!task.memory) {
@ -31,11 +31,12 @@ process GATK4_GATHERPILEUPSUMMARIES {
avail_mem = task.memory.giga
}
"""
gatk --java-options "-Xmx${avail_mem}g" \
GatherPileupSummaries \
--sequence-dictionary ${dict} \
${input} \
-O ${prefix}.pileupsummaries.table
gatk --java-options "-Xmx${avail_mem}g" GatherPileupSummaries \\
$input_list \\
--O ${prefix}.pileupsummaries.table \\
--sequence-dictionary $dict \\
--tmp-dir . \\
$args
cat <<-END_VERSIONS > versions.yml
"${task.process}":

View file

@ -28,14 +28,15 @@ output:
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- table:
type: file
description: pileup summaries table file
pattern: "*.pileupsummaries.table"
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- table:
type: file
description: Pileup file
pattern: "*.pileups.table"
authors:
- "@FriederikeHanssen"
- "@maxulysse"

View file

@ -8,13 +8,13 @@ process GATK4_GENOMICSDBIMPORT {
'quay.io/biocontainers/gatk4:4.2.5.0--hdfd78af_0' }"
input:
tuple val(meta), path(vcf), path(tbi), path(intervalfile), val(intervalval), path(wspace)
tuple val(meta), path(vcf), path(tbi), path(interval_file), val(interval_value), path(wspace)
val run_intlist
val run_updatewspace
val input_map
output:
tuple val(meta), path("${prefix}") , optional:true, emit: genomicsdb
tuple val(meta), path("$prefix") , optional:true, emit: genomicsdb
tuple val(meta), path("$updated_db") , optional:true, emit: updatedb
tuple val(meta), path("*.interval_list"), optional:true, emit: intervallist
path "versions.yml" , emit: versions
@ -27,22 +27,22 @@ process GATK4_GENOMICSDBIMPORT {
prefix = task.ext.prefix ?: "${meta.id}"
// settings for running default create gendb mode
inputs_command = input_map ? "--sample-name-map ${vcf[0]}" : "${'-V ' + vcf.join(' -V ')}"
dir_command = "--genomicsdb-workspace-path ${prefix}"
intervals_command = intervalfile ? " -L ${intervalfile} " : " -L ${intervalval} "
input_command = input_map ? "--sample-name-map ${vcf[0]}" : vcf.collect(){"--variant $it"}.join(' ')
genomicsdb_command = "--genomicsdb-workspace-path ${prefix}"
interval_command = interval_file ? "--intervals ${interval_file}" : "--intervals ${interval_value}"
// settings changed for running get intervals list mode if run_intlist is true
if (run_intlist) {
inputs_command = ''
dir_command = "--genomicsdb-update-workspace-path ${wspace}"
intervals_command = "--output-interval-list-to-file ${prefix}.interval_list"
genomicsdb_command = "--genomicsdb-update-workspace-path ${wspace}"
interval_command = "--output-interval-list-to-file ${prefix}.interval_list"
}
// settings changed for running update gendb mode. inputs_command same as default, update_db forces module to emit the updated gendb
// settings changed for running update gendb mode. input_command same as default, update_db forces module to emit the updated gendb
if (run_updatewspace) {
dir_command = "--genomicsdb-update-workspace-path ${wspace}"
intervals_command = ''
updated_db = wspace.toString()
genomicsdb_command = "--genomicsdb-update-workspace-path ${wspace}"
interval_command = ''
updated_db = "${wspace}"
}
def avail_mem = 3
@ -53,9 +53,10 @@ process GATK4_GENOMICSDBIMPORT {
}
"""
gatk --java-options "-Xmx${avail_mem}g" GenomicsDBImport \\
$inputs_command \\
$dir_command \\
$intervals_command \\
$input_command \\
$genomicsdb_command \\
$interval_command \\
--tmp-dir . \\
$args
cat <<-END_VERSIONS > versions.yml

View file

@ -10,10 +10,10 @@ process GATK4_GENOTYPEGVCFS {
input:
tuple val(meta), path(gvcf), path(gvcf_index), path(intervals), path(intervals_index)
path fasta
path fasta_index
path fasta_dict
path fai
path dict
path dbsnp
path dbsnp_index
path dbsnp_tbi
output:
tuple val(meta), path("*.vcf.gz"), emit: vcf
@ -26,9 +26,10 @@ process GATK4_GENOTYPEGVCFS {
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def dbsnp_options = dbsnp ? "-D ${dbsnp}" : ""
def interval_options = intervals ? "-L ${intervals}" : ""
def gvcf_options = gvcf.name.endsWith(".vcf") || gvcf.name.endsWith(".vcf.gz") ? "$gvcf" : "gendb://$gvcf"
def gvcf_command = gvcf.name.endsWith(".vcf") || gvcf.name.endsWith(".vcf.gz") ? "$gvcf" : "gendb://$gvcf"
def dbsnp_command = dbsnp ? "--dbsnp $dbsnp" : ""
def interval_command = intervals ? "--intervals $intervals" : ""
def avail_mem = 3
if (!task.memory) {
log.info '[GATK GenotypeGVCFs] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.'
@ -36,14 +37,14 @@ process GATK4_GENOTYPEGVCFS {
avail_mem = task.memory.giga
}
"""
gatk --java-options "-Xmx${avail_mem}g" \\
GenotypeGVCFs \\
$args \\
$interval_options \\
$dbsnp_options \\
-R $fasta \\
-V $gvcf_options \\
-O ${prefix}.vcf.gz
gatk --java-options "-Xmx${avail_mem}g" GenotypeGVCFs \\
--variant $gvcf_command \\
--output ${prefix}.vcf.gz \\
--reference $fasta \\
$interval_command \\
$dbsnp_command \\
--tmp-dir . \\
$args
cat <<-END_VERSIONS > versions.yml
"${task.process}":

View file

@ -21,10 +21,15 @@ input:
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- gvcf:
type: tuple of files
type: file
description: |
Tuple of gVCF(.gz) file (first) and its index (second) or the path to a GenomicsDB (and empty)
pattern: ["*.{vcf,vcf.gz}", "*.{idx,tbi}"]
gVCF(.gz) file or to a GenomicsDB
pattern: "*.{vcf,vcf.gz}"
- gvcf_index:
type: file
description: |
index of gvcf file, or empty when providing GenomicsDB
pattern: "*.{idx,tbi}"
- intervals:
type: file
description: Interval file with the genomic regions included in the library (optional)
@ -35,11 +40,11 @@ input:
type: file
description: Reference fasta file
pattern: "*.fasta"
- fasta_index:
- fai:
type: file
description: Reference fasta index file
pattern: "*.fai"
- fasta_dict:
- dict:
type: file
description: Reference fasta sequence dict file
pattern: "*.dict"
@ -47,8 +52,8 @@ input:
type: file
description: dbSNP VCF file
pattern: "*.vcf.gz"
- dbsnp_index:
type: tuple of files
- dbsnp_tbi:
type: file
description: dbSNP VCF index file
pattern: "*.tbi"
@ -73,3 +78,4 @@ output:
authors:
- "@santiagorevale"
- "@maxulysse"

View file

@ -25,8 +25,8 @@ process GATK4_GETPILEUPSUMMARIES {
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def sitesCommand = intervals ? " -L ${intervals} " : " -L ${variants} "
def reference = fasta ? " -R ${fasta}" :""
def interval_command = intervals ? "--intervals $intervals" : ""
def reference_command = fasta ? "--reference $fasta" : ''
def avail_mem = 3
if (!task.memory) {
@ -36,11 +36,12 @@ process GATK4_GETPILEUPSUMMARIES {
}
"""
gatk --java-options "-Xmx${avail_mem}g" GetPileupSummaries \\
-I $input \\
-V $variants \\
$sitesCommand \\
${reference} \\
-O ${prefix}.pileups.table \\
--input $input \\
--variant $variants \\
--output ${prefix}.pileups.table \\
$reference_command \\
$sites_command \\
--tmp-dir . \\
$args
cat <<-END_VERSIONS > versions.yml

View file

@ -26,8 +26,9 @@ process GATK4_HAPLOTYPECALLER {
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def interval_option = intervals ? "-L ${intervals}" : ""
def dbsnp_option = dbsnp ? "-D ${dbsnp}" : ""
def dbsnp_command = dbsnp ? "--dbsnp $dbsnp" : ""
def interval_command = intervals ? "--intervals $intervals" : ""
def avail_mem = 3
if (!task.memory) {
log.info '[GATK HaplotypeCaller] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.'
@ -35,16 +36,14 @@ process GATK4_HAPLOTYPECALLER {
avail_mem = task.memory.giga
}
"""
gatk \\
--java-options "-Xmx${avail_mem}g" \\
HaplotypeCaller \\
-R $fasta \\
-I $input \\
${dbsnp_option} \\
${interval_option} \\
-O ${prefix}.vcf.gz \\
$args \\
--tmp-dir .
gatk --java-options "-Xmx${avail_mem}g" HaplotypeCaller \\
--input $input \\
--output ${prefix}.vcf.gz \\
--reference $fasta \\
$dbsnp_command \\
$interval_command \\
--tmp-dir . \\
$args
cat <<-END_VERSIONS > versions.yml
"${task.process}":

View file

@ -19,6 +19,7 @@ process GATK4_INDEXFEATUREFILE {
script:
def args = task.ext.args ?: ''
def avail_mem = 3
if (!task.memory) {
log.info '[GATK IndexFeatureFile] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.'
@ -26,10 +27,10 @@ process GATK4_INDEXFEATUREFILE {
avail_mem = task.memory.giga
}
"""
gatk --java-options "-Xmx${avail_mem}g" \\
IndexFeatureFile \\
$args \\
-I $feature_file
gatk --java-options "-Xmx${avail_mem}g" IndexFeatureFile \\
--input $feature_file \\
--tmp-dir . \\
$args
cat <<-END_VERSIONS > versions.yml
"${task.process}":

View file

@ -8,7 +8,7 @@ process GATK4_INTERVALLISTTOBED {
'quay.io/biocontainers/gatk4:4.2.5.0--hdfd78af_0' }"
input:
tuple val(meta), path(interval)
tuple val(meta), path(intervals)
output:
tuple val(meta), path("*.bed"), emit: bed
@ -29,8 +29,9 @@ process GATK4_INTERVALLISTTOBED {
}
"""
gatk --java-options "-Xmx${avail_mem}g" IntervalListToBed \\
--INPUT ${interval} \\
--INPUT $intervals \\
--OUTPUT ${prefix}.bed \\
--TMP_DIR . \\
$args
cat <<-END_VERSIONS > versions.yml

View file

@ -8,7 +8,7 @@ process GATK4_INTERVALLISTTOOLS {
'quay.io/biocontainers/gatk4:4.2.5.0--hdfd78af_0' }"
input:
tuple val(meta), path(interval_list)
tuple val(meta), path(intervals)
output:
tuple val(meta), path("*_split/*/*.interval_list"), emit: interval_list
@ -20,6 +20,7 @@ process GATK4_INTERVALLISTTOOLS {
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def avail_mem = 3
if (!task.memory) {
log.info '[GATK IntervalListTools] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.'
@ -30,10 +31,10 @@ process GATK4_INTERVALLISTTOOLS {
mkdir ${prefix}_split
gatk --java-options "-Xmx${avail_mem}g" \\
IntervalListTools \\
-I ${interval_list} \\
-O ${prefix}_split \\
gatk --java-options "-Xmx${avail_mem}g" IntervalListTools \\
--INPUT $intervals \\
--OUTPUT ${prefix}_split \\
--TMP_DIR . \\
$args
python3 <<CODE

View file

@ -20,8 +20,8 @@ process GATK4_LEARNREADORIENTATIONMODEL {
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def inputs_list = []
f1r2.each() { a -> inputs_list.add(" -I " + a) }
def input_list = f1r2.collect{"--input $it"}.join(' ')
def avail_mem = 3
if (!task.memory) {
log.info '[GATK LearnReadOrientationModel] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.'
@ -29,10 +29,10 @@ process GATK4_LEARNREADORIENTATIONMODEL {
avail_mem = task.memory.giga
}
"""
gatk --java-options "-Xmx${avail_mem}g" \\
LearnReadOrientationModel \\
${inputs_list.join(' ')} \\
-O ${prefix}.tar.gz \\
gatk --java-options "-Xmx${avail_mem}g" LearnReadOrientationModel \\
$input_list \\
--output ${prefix}.tar.gz \\
--tmp-dir . \\
$args
cat <<-END_VERSIONS > versions.yml

View file

@ -8,7 +8,7 @@ process GATK4_MARKDUPLICATES {
'quay.io/biocontainers/gatk4:4.2.5.0--hdfd78af_0' }"
input:
tuple val(meta), path(bams)
tuple val(meta), path(bam)
output:
tuple val(meta), path("*.bam") , emit: bam
@ -22,7 +22,8 @@ process GATK4_MARKDUPLICATES {
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def bam_list = bams.collect(){ bam -> "--INPUT ".concat(bam.toString()) }.join(" ")
def input_list = bam.collect{"--INPUT $it"}.join(' ')
def avail_mem = 3
if (!task.memory) {
log.info '[GATK MarkDuplicates] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.'
@ -31,11 +32,10 @@ process GATK4_MARKDUPLICATES {
}
"""
gatk --java-options "-Xmx${avail_mem}g" MarkDuplicates \\
$bam_list \\
$input_list \\
--OUTPUT ${prefix}.bam \\
--METRICS_FILE ${prefix}.metrics \\
--TMP_DIR . \\
--CREATE_INDEX true \\
--OUTPUT ${prefix}.bam \\
$args
cat <<-END_VERSIONS > versions.yml

View file

@ -49,3 +49,4 @@ output:
authors:
- "@ajodeh-juma"
- "@FriederikeHanssen"
- "@maxulysse"

View file

@ -0,0 +1,50 @@
process GATK4_MARKDUPLICATES_SPARK {
tag "$meta.id"
label 'process_high'
conda (params.enable_conda ? "bioconda::gatk4=4.2.3.0" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/gatk4:4.2.3.0--hdfd78af_0' :
'broadinstitute/gatk:4.2.3.0' }"
input:
tuple val(meta), path(bam)
path fasta
path fasta_fai
path dict
output:
tuple val(meta), path("${prefix}"), emit: output
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
prefix = task.ext.prefix ?: "${meta.id}"
def input_list = bam.collect{"--INPUT $it"}.join(' ')
def avail_mem = 3
if (!task.memory) {
log.info '[GATK MarkDuplicatesSpark] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.'
} else {
avail_mem = task.memory.giga
}
"""
export SPARK_USER=spark3
gatk --java-options "-Xmx${avail_mem}g" MarkDuplicatesSpark \\
$input_list \\
--output $prefix \\
--reference $fasta \\
--spark-master local[${task.cpus}] \\
--tmp-dir . \\
$args
cat <<-END_VERSIONS > versions.yml
"${task.process}":
gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//')
END_VERSIONS
"""
}

View file

@ -0,0 +1,60 @@
name: gatk4_markduplicates_spark
description: This tool locates and tags duplicate reads in a BAM or SAM file, where duplicate reads are defined as originating from a single fragment of DNA.
keywords:
- markduplicates
- bam
- sort
tools:
- gatk4:
description:
Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools
with a primary focus on variant discovery and genotyping. Its powerful processing engine
and high-performance computing features make it capable of taking on projects of any size.
homepage: https://gatk.broadinstitute.org/hc/en-us
documentation: https://gatk.broadinstitute.org/hc/en-us/articles/360037052812-MarkDuplicates-Picard-
tool_dev_url: https://github.com/broadinstitute/gatk
doi: 10.1158/1538-7445.AM2017-3590
licence: ["MIT"]
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- bam:
type: file
description: Sorted BAM file
pattern: "*.{bam}"
- fasta:
type: file
description: The reference fasta file
pattern: "*.fasta"
- fai:
type: file
description: Index of reference fasta file
pattern: "*.fasta.fai"
- dict:
type: file
description: GATK sequence dictionary
pattern: "*.dict"
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- bam:
type: file
description: Marked duplicates BAM file
pattern: "*.{bam}"
authors:
- "@ajodeh-juma"
- "@FriederikeHanssen"
- "@maxulysse"

View file

@ -22,6 +22,7 @@ process GATK4_MERGEBAMALIGNMENT {
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def avail_mem = 3
if (!task.memory) {
log.info '[GATK MergeBamAlignment] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.'
@ -30,10 +31,11 @@ process GATK4_MERGEBAMALIGNMENT {
}
"""
gatk --java-options "-Xmx${avail_mem}g" MergeBamAlignment \\
-ALIGNED $aligned \\
-UNMAPPED $unmapped \\
-R $fasta \\
-O ${prefix}.bam \\
--UNMAPPED_BAM $unmapped \\
--ALIGNED_BAM $aligned \\
--OUTPUT ${prefix}.bam \\
--REFERENCE_SEQUENCE $fasta \\
--TMP_DIR . \\
$args
cat <<-END_VERSIONS > versions.yml

View file

@ -9,6 +9,7 @@ process GATK4_MERGEMUTECTSTATS {
input:
tuple val(meta), path(stats)
output:
tuple val(meta), path("*.vcf.gz.stats"), emit: stats
path "versions.yml" , emit: versions
@ -19,7 +20,7 @@ process GATK4_MERGEMUTECTSTATS {
script:
def args = task.ext.args ?: ''
prefix = task.ext.prefix ?: "${meta.id}"
def input = stats.collect{ " -stats ${it} "}.join()
def input_list = stats.collect{ "--stats ${it}"}.join(' ')
def avail_mem = 3
if (!task.memory) {
@ -29,8 +30,9 @@ process GATK4_MERGEMUTECTSTATS {
}
"""
gatk --java-options "-Xmx${avail_mem}g" MergeMutectStats \\
${input} \\
-output ${meta.id}.vcf.gz.stats \\
$input_list \\
--output ${prefix}.vcf.gz.stats \\
--tmp-dir . \\
$args
cat <<-END_VERSIONS > versions.yml

View file

@ -8,9 +8,8 @@ process GATK4_MERGEVCFS {
'quay.io/biocontainers/gatk4:4.2.5.0--hdfd78af_0' }"
input:
tuple val(meta), path(vcfs)
path ref_dict
val use_ref_dict
tuple val(meta), path(vcf)
path dict
output:
tuple val(meta), path('*.vcf.gz'), emit: vcf
@ -22,13 +21,9 @@ process GATK4_MERGEVCFS {
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def input_list = vcf.collect{ "--INPUT $it"}.join(' ')
def reference_command = dict ? "--SEQUENCE_DICTIONARY $dict" : ""
// Make list of VCFs to merge
def input = ""
for (vcf in vcfs) {
input += " I=${vcf}"
}
def ref = use_ref_dict ? "D=${ref_dict}" : ""
def avail_mem = 3
if (!task.memory) {
log.info '[GATK MergeVcfs] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.'
@ -37,9 +32,10 @@ process GATK4_MERGEVCFS {
}
"""
gatk --java-options "-Xmx${avail_mem}g" MergeVcfs \\
$input \\
O=${prefix}.vcf.gz \\
$ref \\
$input_list \\
--OUTPUT ${prefix}.vcf.gz \\
$reference_command \\
--TMP_DIR . \\
$args
cat <<-END_VERSIONS > versions.yml

View file

@ -8,10 +8,7 @@ process GATK4_MUTECT2 {
'quay.io/biocontainers/gatk4:4.2.5.0--hdfd78af_0' }"
input:
tuple val(meta) , path(input) , path(input_index) , path(intervals), val(which_norm)
val run_single
val run_pon
val run_mito
tuple val(meta), path(input), path(input_index), path(intervals)
path fasta
path fai
path dict
@ -33,28 +30,10 @@ process GATK4_MUTECT2 {
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def panels_command = ''
def normals_command = ''
def inputs_command = '-I ' + input.join( ' -I ')
def interval = intervals ? "-L ${intervals}" : ""
if(run_pon) {
panels_command = ''
normals_command = ''
} else if(run_single) {
panels_command = " --germline-resource $germline_resource --panel-of-normals $panel_of_normals"
normals_command = ''
} else if(run_mito){
panels_command = "-L ${intervals} --mitochondria-mode"
normals_command = ''
} else {
panels_command = " --germline-resource $germline_resource --panel-of-normals $panel_of_normals --f1r2-tar-gz ${prefix}.f1r2.tar.gz"
normals_command = '-normal ' + which_norm.join( ' -normal ')
}
def inputs = input.collect{ "--input $it"}.join(" ")
def interval_command = intervals ? "--intervals $intervals" : ""
def pon_command = panel_of_normals ? "--panel-of-normals $panel_of_normals" : ""
def gr_command = germline_resource ? "--germline-resource $germline_resource" : ""
def avail_mem = 3
if (!task.memory) {
@ -64,12 +43,13 @@ process GATK4_MUTECT2 {
}
"""
gatk --java-options "-Xmx${avail_mem}g" Mutect2 \\
-R ${fasta} \\
${inputs_command} \\
${normals_command} \\
${panels_command} \\
${interval} \\
-O ${prefix}.vcf.gz \\
$inputs \\
--output ${prefix}.vcf.gz \\
--reference $fasta \\
$pon_command \\
$gr_command \\
$interval_command \\
--tmp-dir . \\
$args
cat <<-END_VERSIONS > versions.yml

View file

@ -34,22 +34,6 @@ input:
type: File/string
description: Specify region the tools is run on.
pattern: ".{bed,interval_list}/chrM"
- which_norm:
type: list
description: optional list of sample headers contained in the normal sample bam files (these are required for tumor_normal_pair mode)
pattern: "testN"
- run_single:
type: boolean
description: Specify whether or not to run in tumor_single mode instead of tumor_normal_pair mode (will be ignored if run_pon is also true)
pattern: "true/false"
- run_pon:
type: boolean
description: Specify whether or not to run in panel_of_normal mode instead of tumor_normal_pair mode
pattern: "true/false"
- run_mito:
type: boolean
description: Specify whether or not to run in mitochondria-mode instead of tumor_normal_pair mode
pattern: "true/false"
- fasta:
type: file
description: The reference fasta file

View file

@ -20,6 +20,7 @@ process GATK4_REVERTSAM {
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def avail_mem = 3
if (!task.memory) {
log.info '[GATK RevertSam] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.'
@ -28,8 +29,9 @@ process GATK4_REVERTSAM {
}
"""
gatk --java-options "-Xmx${avail_mem}g" RevertSam \\
I=$bam \\
O=${prefix}.reverted.bam \\
--INPUT $bam \\
--OUTPUT ${prefix}.reverted.bam \\
--TMP_DIR . \\
$args
cat <<-END_VERSIONS > versions.yml

View file

@ -20,7 +20,8 @@ process GATK4_SAMTOFASTQ {
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def output = meta.single_end ? "FASTQ=${prefix}.fastq.gz" : "FASTQ=${prefix}_1.fastq.gz SECOND_END_FASTQ=${prefix}_2.fastq.gz"
def output = meta.single_end ? "--FASTQ ${prefix}.fastq.gz" : "--FASTQ ${prefix}_1.fastq.gz --SECOND_END_FASTQ ${prefix}_2.fastq.gz"
def avail_mem = 3
if (!task.memory) {
log.info '[GATK SamToFastq] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.'
@ -29,8 +30,9 @@ process GATK4_SAMTOFASTQ {
}
"""
gatk --java-options "-Xmx${avail_mem}g" SamToFastq \\
I=$bam \\
--INPUT $bam \\
$output \\
--TMP_DIR . \\
$args
cat <<-END_VERSIONS > versions.yml

View file

@ -21,6 +21,7 @@ process GATK4_SELECTVARIANTS {
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def avail_mem = 3
if (!task.memory) {
log.info '[GATK VariantFiltration] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.'
@ -29,8 +30,9 @@ process GATK4_SELECTVARIANTS {
}
"""
gatk --java-options "-Xmx${avail_mem}G" SelectVariants \\
-V $vcf \\
-O ${prefix}.selectvariants.vcf.gz \\
--variant $vcf \\
--output ${prefix}.selectvariants.vcf.gz \\
--tmp-dir . \\
$args
cat <<-END_VERSIONS > versions.yml

View file

@ -23,6 +23,7 @@ process GATK4_SPLITNCIGARREADS {
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def avail_mem = 3
if (!task.memory) {
log.info '[GATK SplitNCigarReads] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.'
@ -31,9 +32,10 @@ process GATK4_SPLITNCIGARREADS {
}
"""
gatk --java-options "-Xmx${avail_mem}g" SplitNCigarReads \\
-R $fasta \\
-I $bam \\
-O ${prefix}.bam \\
--input $bam \\
--output ${prefix}.bam \\
--reference $fasta \\
--tmp-dir . \\
$args
cat <<-END_VERSIONS > versions.yml

View file

@ -8,7 +8,7 @@ process GATK4_VARIANTFILTRATION {
'quay.io/biocontainers/gatk4:4.2.5.0--hdfd78af_0' }"
input:
tuple val(meta), path(vcf), path(vcf_tbi)
tuple val(meta), path(vcf), path(tbi)
path fasta
path fai
path dict
@ -24,6 +24,7 @@ process GATK4_VARIANTFILTRATION {
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def avail_mem = 3
if (!task.memory) {
log.info '[GATK VariantFiltration] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.'
@ -32,9 +33,10 @@ process GATK4_VARIANTFILTRATION {
}
"""
gatk --java-options "-Xmx${avail_mem}G" VariantFiltration \\
-R $fasta \\
-V $vcf \\
-O ${prefix}.vcf.gz \\
--variant $vcf \\
--output ${prefix}.vcf.gz \\
--reference $fasta \\
--tmp-dir . \\
$args
cat <<-END_VERSIONS > versions.yml

View file

@ -9,10 +9,10 @@ process GATK4_VARIANTRECALIBRATOR {
input:
tuple val(meta), path(vcf), path(tbi)
tuple path(vcfs), path(tbis), val(labels)
path fasta
path fai
path dict
tuple path(resvcfs), path(restbis), val(reslabels)
output:
tuple val(meta), path("*.recal") , emit: recal
@ -27,8 +27,8 @@ process GATK4_VARIANTRECALIBRATOR {
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
refCommand = fasta ? "-R ${fasta} " : ''
resourceCommand = '--resource:' + reslabels.join( ' --resource:')
def reference_command = fasta ? "--reference $fasta " : ''
def resource_command = labels.collect{"--resource:$it"}.join(' ')
def avail_mem = 3
if (!task.memory) {
@ -38,11 +38,12 @@ process GATK4_VARIANTRECALIBRATOR {
}
"""
gatk --java-options "-Xmx${avail_mem}g" VariantRecalibrator \\
${refCommand} \\
-V ${vcf} \\
-O ${prefix}.recal \\
--variant $vcf \\
--output ${prefix}.recal \\
--tranches-file ${prefix}.tranches \\
${resourceCommand} \\
$reference_command \\
$resource_command \\
--tmp-dir . \\
$args
cat <<-END_VERSIONS > versions.yml

View file

@ -7,7 +7,6 @@
- path: output/gatk4/test.bam
md5sum: d088422be886dc8507ff97fcc7dd968a
- path: output/gatk4/versions.yml
md5sum: d5c6455d8a77aecc63f87c795fc3443e
- name: gatk4 applybqsr test_gatk4_applybqsr_intervals
command: nextflow run tests/modules/gatk4/applybqsr -entry test_gatk4_applybqsr_intervals -c tests/config/nextflow.config -c ./tests/modules/gatk4/applybqsr/nextflow.config
@ -18,7 +17,6 @@
- path: output/gatk4/test.bam
md5sum: 4bfa18d651abd945e240b05e70107716
- path: output/gatk4/versions.yml
md5sum: cb4cb8a62e117b4adc643ae47883d53c
- name: gatk4 applybqsr test_gatk4_applybqsr_cram
command: nextflow run tests/modules/gatk4/applybqsr -entry test_gatk4_applybqsr_cram -c tests/config/nextflow.config -c ./tests/modules/gatk4/applybqsr/nextflow.config
@ -29,4 +27,3 @@
- path: output/gatk4/test.cram
md5sum: 2e0bca197af4f043a4a85152e6edbe04
- path: output/gatk4/versions.yml
md5sum: 1efaa18be943bab4e4c54191d6eaa260

View file

@ -0,0 +1,47 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { GATK4_APPLYBQSR_SPARK } from '../../../../modules/gatk4/applybqsrspark/main.nf'
workflow test_gatk4_applybqsr_spark {
input = [ [ id:'test' ], // meta map
file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true),
file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true),
file(params.test_data['sarscov2']['illumina']['test_baserecalibrator_table'], checkIfExists: true),
[]
]
fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
fai = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true)
dict = file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true)
GATK4_APPLYBQSR_SPARK ( input, fasta, fai, dict )
}
workflow test_gatk4_applybqsr_spark_intervals {
input = [ [ id:'test' ], // meta map
file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true),
file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true),
file(params.test_data['sarscov2']['illumina']['test_baserecalibrator_table'], checkIfExists: true),
file(params.test_data['sarscov2']['genome']['test_bed'], checkIfExists: true)
]
fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
fai = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true)
dict = file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true)
GATK4_APPLYBQSR_SPARK ( input, fasta, fai, dict )
}
workflow test_gatk4_applybqsr_spark_cram {
input = [ [ id:'test' ], // meta map
file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true),
file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram_crai'], checkIfExists: true),
file(params.test_data['homo_sapiens']['illumina']['test_baserecalibrator_table'], checkIfExists: true),
file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true)
]
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)
GATK4_APPLYBQSR_SPARK ( input, fasta, fai, dict )
}

View file

@ -0,0 +1,5 @@
process {
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
}

View file

@ -0,0 +1,29 @@
- name: gatk4 applybqsr test_gatk4_applybqsr_spark
command: nextflow run tests/modules/gatk4/applybqsrspark -entry test_gatk4_applybqsr_spark -c tests/config/nextflow.config -c ./tests/modules/gatk4/applybqsrspark/nextflow.config
tags:
- gatk4
- gatk4/applybqsrspark
files:
- path: output/gatk4/test.bam
md5sum: d088422be886dc8507ff97fcc7dd968a
- path: output/gatk4/versions.yml
- name: gatk4 applybqsr test_gatk4_applybqsr_spark_intervals
command: nextflow run tests/modules/gatk4/applybqsrspark -entry test_gatk4_applybqsr_spark_intervals -c tests/config/nextflow.config -c ./tests/modules/gatk4/applybqsrspark/nextflow.config
tags:
- gatk4
- gatk4/applybqsrspark
files:
- path: output/gatk4/test.bam
md5sum: 4bfa18d651abd945e240b05e70107716
- path: output/gatk4/versions.yml
- name: gatk4 applybqsr test_gatk4_applybqsr_spark_cram
command: nextflow run tests/modules/gatk4/applybqsrspark -entry test_gatk4_applybqsr_spark_cram -c tests/config/nextflow.config -c ./tests/modules/gatk4/applybqsrspark/nextflow.config
tags:
- gatk4
- gatk4/applybqsrspark
files:
- path: output/gatk4/test.cram
md5sum: 2e0bca197af4f043a4a85152e6edbe04
- path: output/gatk4/versions.yml

View file

@ -7,7 +7,6 @@
- path: output/gatk4/test.vcf.gz
- path: output/gatk4/test.vcf.gz.tbi
- path: output/gatk4/versions.yml
md5sum: ce9c443375683e7f2958fe958759ad29
- name: gatk4 applyvqsr test_gatk4_applyvqsr_allele_specific
command: nextflow run tests/modules/gatk4/applyvqsr -entry test_gatk4_applyvqsr_allele_specific -c tests/config/nextflow.config -c ./tests/modules/gatk4/applyvqsr/nextflow.config
@ -18,4 +17,3 @@
- path: output/gatk4/test.vcf.gz
- path: output/gatk4/test.vcf.gz.tbi
- path: output/gatk4/versions.yml
md5sum: 521353d12d576de2864f1d18a3e54f14

View file

@ -6,6 +6,7 @@
files:
- path: output/gatk4/test.table
md5sum: e2e43abdc0c943c1a54dae816d0b9ea7
- path: output/gatk4/versions.yml
- name: gatk4 baserecalibrator test_gatk4_baserecalibrator_cram
command: nextflow run ./tests/modules/gatk4/baserecalibrator -entry test_gatk4_baserecalibrator_cram -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/baserecalibrator/nextflow.config
@ -15,6 +16,7 @@
files:
- path: output/gatk4/test.table
md5sum: 35d89a3811aa31711fc9815b6b80e6ec
- path: output/gatk4/versions.yml
- name: gatk4 baserecalibrator test_gatk4_baserecalibrator_intervals
command: nextflow run ./tests/modules/gatk4/baserecalibrator -entry test_gatk4_baserecalibrator_intervals -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/baserecalibrator/nextflow.config
@ -24,6 +26,7 @@
files:
- path: output/gatk4/test.table
md5sum: 9ecb5f00a2229291705addc09c0ec231
- path: output/gatk4/versions.yml
- name: gatk4 baserecalibrator test_gatk4_baserecalibrator_multiple_sites
command: nextflow run ./tests/modules/gatk4/baserecalibrator -entry test_gatk4_baserecalibrator_multiple_sites -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/baserecalibrator/nextflow.config
@ -33,3 +36,4 @@
files:
- path: output/gatk4/test.table
md5sum: e2e43abdc0c943c1a54dae816d0b9ea7
- path: output/gatk4/versions.yml

View file

@ -0,0 +1,69 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { GATK4_BASERECALIBRATOR_SPARK } from '../../../../modules/gatk4/baserecalibratorspark/main.nf'
workflow test_gatk4_baserecalibrator_spark {
input = [ [ id:'test' ], // meta map
file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true),
file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true),
[]
]
fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
fai = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true)
dict = file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true)
sites = file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true)
sites_tbi = file(params.test_data['sarscov2']['illumina']['test_vcf_gz_tbi'], checkIfExists: true)
GATK4_BASERECALIBRATOR_SPARK ( input, fasta, fai, dict, sites, sites_tbi )
}
workflow test_gatk4_baserecalibrator_spark_cram {
input = [ [ id:'test' ], // meta map
file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true),
file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram_crai'], checkIfExists: true),
[]
]
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)
sites = file(params.test_data['homo_sapiens']['genome']['dbsnp_146_hg38_vcf_gz'], checkIfExists: true)
sites_tbi = file(params.test_data['homo_sapiens']['genome']['dbsnp_146_hg38_vcf_gz_tbi'], checkIfExists: true)
GATK4_BASERECALIBRATOR_SPARK ( input, fasta, fai, dict, sites, sites_tbi )
}
workflow test_gatk4_baserecalibrator_spark_intervals {
input = [ [ id:'test' ], // meta map
file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true),
file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true),
file(params.test_data['sarscov2']['genome']['test_bed'], checkIfExists: true)
]
fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
fai = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true)
dict = file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true)
sites = file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true)
sites_tbi = file(params.test_data['sarscov2']['illumina']['test_vcf_gz_tbi'], checkIfExists: true)
GATK4_BASERECALIBRATOR_SPARK ( input, fasta, fai, dict, sites, sites_tbi )
}
workflow test_gatk4_baserecalibrator_spark_multiple_sites {
input = [ [ id:'test' ], // meta map
file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true),
file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true),
[]
]
fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
fai = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true)
dict = file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true)
sites = [ file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true),
file(params.test_data['sarscov2']['illumina']['test2_vcf_gz'], checkIfExists: true)
]
sites_tbi = [ file(params.test_data['sarscov2']['illumina']['test_vcf_gz_tbi'], checkIfExists: true),
file(params.test_data['sarscov2']['illumina']['test2_vcf_gz_tbi'], checkIfExists: true)
]
GATK4_BASERECALIBRATOR_SPARK ( input, fasta, fai, dict, sites, sites_tbi )
}

View file

@ -0,0 +1,5 @@
process {
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
}

View file

@ -0,0 +1,39 @@
- name: gatk4 baserecalibrator test_gatk4_baserecalibrator_spark
command: nextflow run ./tests/modules/gatk4/baserecalibratorspark -entry test_gatk4_baserecalibrator_spark -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/baserecalibratorspark/nextflow.config
tags:
- gatk4
- gatk4/baserecalibratorspark
files:
- path: output/gatk4/test.table
md5sum: e2e43abdc0c943c1a54dae816d0b9ea7
- path: output/gatk4/versions.yml
- name: gatk4 baserecalibrator test_gatk4_baserecalibrator_spark_cram
command: nextflow run ./tests/modules/gatk4/baserecalibratorspark -entry test_gatk4_baserecalibrator_spark_cram -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/baserecalibratorspark/nextflow.config
tags:
- gatk4
- gatk4/baserecalibratorspark
files:
- path: output/gatk4/test.table
md5sum: 35d89a3811aa31711fc9815b6b80e6ec
- path: output/gatk4/versions.yml
- name: gatk4 baserecalibrator test_gatk4_baserecalibrator_spark_intervals
command: nextflow run ./tests/modules/gatk4/baserecalibratorspark -entry test_gatk4_baserecalibrator_spark_intervals -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/baserecalibratorspark/nextflow.config
tags:
- gatk4
- gatk4/baserecalibratorspark
files:
- path: output/gatk4/test.table
md5sum: 9ecb5f00a2229291705addc09c0ec231
- path: output/gatk4/versions.yml
- name: gatk4 baserecalibrator test_gatk4_baserecalibrator_spark_multiple_sites
command: nextflow run ./tests/modules/gatk4/baserecalibratorspark -entry test_gatk4_baserecalibrator_spark_multiple_sites -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/baserecalibratorspark/nextflow.config
tags:
- gatk4
- gatk4/baserecalibratorspark
files:
- path: output/gatk4/test.table
md5sum: e2e43abdc0c943c1a54dae816d0b9ea7
- path: output/gatk4/versions.yml

View file

@ -6,3 +6,4 @@
files:
- path: output/gatk4/test.interval_list
md5sum: e51101c9357fb2d59fd30e370eefa39c
- path: output/gatk4/versions.yml

View file

@ -3,6 +3,7 @@
nextflow.enable.dsl = 2
include { GATK4_CALCULATECONTAMINATION } from '../../../../modules/gatk4/calculatecontamination/main.nf'
include { GATK4_CALCULATECONTAMINATION as GATK4_CALCULATECONTAMINATION_SEGMENTATION } from '../../../../modules/gatk4/calculatecontamination/main.nf'
workflow test_gatk4_calculatecontamination_tumor_only {
@ -10,9 +11,7 @@ workflow test_gatk4_calculatecontamination_tumor_only {
file(params.test_data['homo_sapiens']['illumina']['test2_pileups_table'], checkIfExists: true),
[] ]
segmentout = false
GATK4_CALCULATECONTAMINATION ( input, segmentout )
GATK4_CALCULATECONTAMINATION ( input )
}
workflow test_gatk4_calculatecontamination_matched_pair {
@ -21,9 +20,7 @@ workflow test_gatk4_calculatecontamination_matched_pair {
file(params.test_data['homo_sapiens']['illumina']['test2_pileups_table'], checkIfExists: true),
file(params.test_data['homo_sapiens']['illumina']['test_pileups_table'], checkIfExists: true) ]
segmentout = false
GATK4_CALCULATECONTAMINATION ( input, segmentout )
GATK4_CALCULATECONTAMINATION ( input )
}
workflow test_gatk4_calculatecontamination_segmentation {
@ -32,7 +29,5 @@ workflow test_gatk4_calculatecontamination_segmentation {
file(params.test_data['homo_sapiens']['illumina']['test2_pileups_table'], checkIfExists: true),
file(params.test_data['homo_sapiens']['illumina']['test_pileups_table'], checkIfExists: true) ]
segmentout = true
GATK4_CALCULATECONTAMINATION ( input, segmentout )
GATK4_CALCULATECONTAMINATION_SEGMENTATION ( input )
}

View file

@ -2,4 +2,8 @@ process {
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
withName: GATK4_CALCULATECONTAMINATION_SEGMENTATION {
ext.args = { "--tumor-segmentation ${meta.id}.segmentation.table" }
}
}

View file

@ -7,7 +7,6 @@
- path: output/gatk4/test.contamination.table
md5sum: 46c708c943b453da89a3da08acfdb2a7
- path: output/gatk4/versions.yml
md5sum: 3da8f1c0de968886330a3f7a3a1c6616
- name: gatk4 calculatecontamination test_gatk4_calculatecontamination_matched_pair
command: nextflow run tests/modules/gatk4/calculatecontamination -entry test_gatk4_calculatecontamination_matched_pair -c tests/config/nextflow.config -c ./tests/modules/gatk4/calculatecontamination/nextflow.config
@ -18,7 +17,6 @@
- path: output/gatk4/test.contamination.table
md5sum: 46c708c943b453da89a3da08acfdb2a7
- path: output/gatk4/versions.yml
md5sum: 14ab12a71b0c2b87d8cd53639a991b3a
- name: gatk4 calculatecontamination test_gatk4_calculatecontamination_segmentation
command: nextflow run tests/modules/gatk4/calculatecontamination -entry test_gatk4_calculatecontamination_segmentation -c tests/config/nextflow.config -c ./tests/modules/gatk4/calculatecontamination/nextflow.config
@ -31,4 +29,3 @@
- path: output/gatk4/test.segmentation.table
md5sum: f4643d9319bde4efbfbe516d6fb13052
- path: output/gatk4/versions.yml
md5sum: d2e61315de31f512e448f0cb4b77db54

View file

@ -7,4 +7,3 @@
- path: output/gatk4/test.combined.g.vcf.gz
contains: ["VCFv4.2"]
- path: output/gatk4/versions.yml
md5sum: 49d9c467f84b6a99a4da3ef161af26bd

View file

@ -6,3 +6,4 @@
files:
- path: output/gatk4/genome.dict
md5sum: 7362679f176e0f52add03c08f457f646
- path: output/gatk4/versions.yml

View file

@ -7,3 +7,4 @@
- path: output/gatk4/test.pon.vcf.gz
- path: output/gatk4/test.pon.vcf.gz.tbi
md5sum: e7ca7e9fe76ce12198fd54ec9a64fad4
- path: output/gatk4/versions.yml

View file

@ -5,3 +5,4 @@
- gatk4
files:
- path: output/gatk4/test.metrics
- path: output/gatk4/versions.yml

View file

@ -6,7 +6,6 @@
files:
- path: output/gatk4/test.bam
- path: output/gatk4/versions.yml
md5sum: 381cdb2496b2fcc7bbc371a6e4156c7e
- name: gatk4 fastqtosam test_gatk4_fastqtosam_paired_end
command: nextflow run tests/modules/gatk4/fastqtosam -entry test_gatk4_fastqtosam_paired_end -c tests/config/nextflow.config -c ./tests/modules/gatk4/fastqtosam/nextflow.config
@ -16,4 +15,3 @@
files:
- path: output/gatk4/test.bam
- path: output/gatk4/versions.yml
md5sum: 1d07c90cbd31992c9ba003f02d1b3502

View file

@ -8,6 +8,7 @@
- path: output/gatk4/test.filtered.vcf.gz.filteringStats.tsv
md5sum: 55f228e5520c8b9fbac017d3a3a6c5fd
- path: output/gatk4/test.filtered.vcf.gz.tbi
- path: output/gatk4/versions.yml
- name: gatk4 filtermutectcalls test_gatk4_filtermutectcalls_with_files
command: nextflow run ./tests/modules/gatk4/filtermutectcalls -entry test_gatk4_filtermutectcalls_with_files -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/filtermutectcalls/nextflow.config
@ -19,6 +20,7 @@
- path: output/gatk4/test.filtered.vcf.gz.filteringStats.tsv
md5sum: 9ae27fbd04af1a2ea574e2ff1c3a683b
- path: output/gatk4/test.filtered.vcf.gz.tbi
- path: output/gatk4/versions.yml
- name: gatk4 filtermutectcalls test_gatk4_filtermutectcalls_use_val
command: nextflow run ./tests/modules/gatk4/filtermutectcalls -entry test_gatk4_filtermutectcalls_use_val -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/filtermutectcalls/nextflow.config
@ -30,3 +32,4 @@
- path: output/gatk4/test.filtered.vcf.gz.filteringStats.tsv
md5sum: 95cc3e37705bd3b97a292c5d46ab82f3
- path: output/gatk4/test.filtered.vcf.gz.tbi
- path: output/gatk4/versions.yml

View file

@ -7,7 +7,6 @@
- path: output/gatk4/test.table
md5sum: 9603b69fdc3b5090de2e0dd78bfcc4bf
- path: output/gatk4/versions.yml
md5sum: 8d52c5aaab73294e9ea5491b95f3e1e1
- name: gatk4 gatherbqsrreports test_gatk4_gatherbqsrreports_multiple
command: nextflow run tests/modules/gatk4/gatherbqsrreports -entry test_gatk4_gatherbqsrreports_multiple -c tests/config/nextflow.config
@ -18,4 +17,3 @@
- path: output/gatk4/test.table
md5sum: 0c1257eececf95db8ca378272d0f21f9
- path: output/gatk4/versions.yml
md5sum: 91cad396b9f2045c3cd8c0f256672e80

View file

@ -6,3 +6,4 @@
files:
- path: output/gatk4/test.pileupsummaries.table
md5sum: 8e0ca6f66e112bd2f7ec1d31a2d62469
- path: output/gatk4/versions.yml

View file

@ -5,11 +5,9 @@
- gatk4/genomicsdbimport
files:
- path: output/gatk4/test/__tiledb_workspace.tdb
md5sum: d41d8cd98f00b204e9800998ecf8427e
- path: output/gatk4/test/callset.json
md5sum: a7d07d1c86449bbb1091ff29368da07a
- path: output/gatk4/test/chr22$1$40001/.__consolidation_lock
md5sum: d41d8cd98f00b204e9800998ecf8427e
- path: output/gatk4/test/chr22$1$40001/__array_schema.tdb
- path: output/gatk4/test/chr22$1$40001/genomicsdb_meta_dir/genomicsdb_column_bounds.json
md5sum: 2502f79658bc000578ebcfddfc1194c0
@ -19,7 +17,6 @@
- path: output/gatk4/test/vidmap.json
md5sum: 18d3f68bd2cb6f4474990507ff95017a
- path: output/gatk4/versions.yml
md5sum: 91f5c3e9529982f9c819860b403576ce
- name: gatk4 genomicsdbimport test_gatk4_genomicsdbimport_get_intervalslist
command: nextflow run tests/modules/gatk4/genomicsdbimport -entry test_gatk4_genomicsdbimport_get_intervalslist -c tests/config/nextflow.config -c ./tests/modules/gatk4/genomicsdbimport/nextflow.config
@ -30,9 +27,7 @@
- path: output/gatk4/test.interval_list
md5sum: 4c85812ac15fc1cd29711a851d23c0bf
- path: output/gatk4/versions.yml
md5sum: a898fe1cbc4acfa5936c0ffdcf121401
- path: output/untar/versions.yml
md5sum: 8f080677b109aea2cfca50208b077534
- name: gatk4 genomicsdbimport test_gatk4_genomicsdbimport_update_genomicsdb
command: nextflow run tests/modules/gatk4/genomicsdbimport -entry test_gatk4_genomicsdbimport_update_genomicsdb -c tests/config/nextflow.config -c ./tests/modules/gatk4/genomicsdbimport/nextflow.config
@ -41,11 +36,9 @@
- gatk4/genomicsdbimport
files:
- path: output/gatk4/test_genomicsdb/__tiledb_workspace.tdb
md5sum: d41d8cd98f00b204e9800998ecf8427e
- path: output/gatk4/test_genomicsdb/callset.json
md5sum: 1ea31b59b9a218dd5681164aff4a5e07
- path: output/gatk4/test_genomicsdb/chr22$1$40001/.__consolidation_lock
md5sum: d41d8cd98f00b204e9800998ecf8427e
- path: output/gatk4/test_genomicsdb/chr22$1$40001/__array_schema.tdb
md5sum: 6709e67921ae840bf61fbfb192554eda
- path: output/gatk4/test_genomicsdb/chr22$1$40001/genomicsdb_meta_dir/genomicsdb_column_bounds.json
@ -55,6 +48,4 @@
- path: output/gatk4/test_genomicsdb/vidmap.json
md5sum: 18d3f68bd2cb6f4474990507ff95017a
- path: output/gatk4/versions.yml
md5sum: d87baa3f4218c5554cad3c008cb6cbc4
- path: output/untar/versions.yml
md5sum: 9b2916aea9790bdf427c0cb38109110c

View file

@ -11,14 +11,15 @@ workflow test_gatk4_genotypegvcfs_vcf_input {
input = [ [ id:'test' ], // meta map
file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf'], checkIfExists: true),
file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_idx'], checkIfExists: true),
[],
[]
]
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
fastaIndex = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
fastaDict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)
fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)
GATK4_GENOTYPEGVCFS ( input, fasta, fastaIndex, fastaDict, [], [])
GATK4_GENOTYPEGVCFS ( input, fasta, fai, dict, [], [])
}
// Basic parameters with compressed VCF input
@ -27,14 +28,15 @@ workflow test_gatk4_genotypegvcfs_gz_input {
input = [ [ id:'test' ], // meta map
file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_gz'], checkIfExists: true),
file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_gz_tbi'], checkIfExists: true),
[],
[]
]
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
fastaIndex = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
fastaDict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)
fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)
GATK4_GENOTYPEGVCFS ( input, fasta, fastaIndex, fastaDict, [], [])
GATK4_GENOTYPEGVCFS ( input, fasta, fai, dict, [], [])
}
// Basic parameters + optional dbSNP
@ -43,17 +45,18 @@ workflow test_gatk4_genotypegvcfs_gz_input_dbsnp {
input = [ [ id:'test' ], // meta map
file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_gz'], checkIfExists: true),
file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_gz_tbi'], checkIfExists: true),
[],
[]
]
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
fastaIndex = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
fastaDict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)
fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)
dbsnp = file(params.test_data['homo_sapiens']['genome']['dbsnp_146_hg38_vcf_gz'], checkIfExists: true)
dbsnpIndex = file(params.test_data['homo_sapiens']['genome']['dbsnp_146_hg38_vcf_gz_tbi'], checkIfExists: true)
dbsnp_tbi = file(params.test_data['homo_sapiens']['genome']['dbsnp_146_hg38_vcf_gz_tbi'], checkIfExists: true)
GATK4_GENOTYPEGVCFS ( input, fasta, fastaIndex, fastaDict, dbsnp, dbsnpIndex)
GATK4_GENOTYPEGVCFS ( input, fasta, fai, dict, dbsnp, dbsnp_tbi)
}
// Basic parameters + optional intervals
@ -65,10 +68,10 @@ workflow test_gatk4_genotypegvcfs_gz_input_intervals {
file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true) ]
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
fastaIndex = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
fastaDict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)
fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)
GATK4_GENOTYPEGVCFS ( input, fasta, fastaIndex, fastaDict, [], [])
GATK4_GENOTYPEGVCFS ( input, fasta, fai, dict, [], [])
}
// Basic parameters + optional dbSNP + optional intervals
@ -81,21 +84,21 @@ workflow test_gatk4_genotypegvcfs_gz_input_dbsnp_intervals {
]
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
fastaIndex = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
fastaDict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)
fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)
dbsnp = file(params.test_data['homo_sapiens']['genome']['dbsnp_146_hg38_vcf_gz'], checkIfExists: true)
dbsnpIndex = file(params.test_data['homo_sapiens']['genome']['dbsnp_146_hg38_vcf_gz_tbi'], checkIfExists: true)
dbsnp_tbi = file(params.test_data['homo_sapiens']['genome']['dbsnp_146_hg38_vcf_gz_tbi'], checkIfExists: true)
GATK4_GENOTYPEGVCFS ( input, fasta, fastaIndex, fastaDict, dbsnp, dbsnpIndex )
GATK4_GENOTYPEGVCFS ( input, fasta, fai, dict, dbsnp, dbsnp_tbi )
}
// Basic parameters with GenomicsDB input
workflow test_gatk4_genotypegvcfs_gendb_input {
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
fastaIndex = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
fastaDict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)
fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)
test_genomicsdb = [ [], file(params.test_data['homo_sapiens']['illumina']['test_genomicsdb_tar_gz'], checkIfExists: true) ]
@ -106,18 +109,18 @@ workflow test_gatk4_genotypegvcfs_gendb_input {
input = Channel.of([ id:'test' ]).combine(gendb)
GATK4_GENOTYPEGVCFS ( input, fasta, fastaIndex, fastaDict, [], [])
GATK4_GENOTYPEGVCFS ( input, fasta, fai, dict, [], [])
}
// Basic parameters with GenomicsDB + optional dbSNP
workflow test_gatk4_genotypegvcfs_gendb_input_dbsnp {
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
fastaIndex = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
fastaDict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)
fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)
dbsnp = file(params.test_data['homo_sapiens']['genome']['dbsnp_146_hg38_vcf_gz'], checkIfExists: true)
dbsnpIndex = file(params.test_data['homo_sapiens']['genome']['dbsnp_146_hg38_vcf_gz_tbi'], checkIfExists: true)
dbsnp_tbi = file(params.test_data['homo_sapiens']['genome']['dbsnp_146_hg38_vcf_gz_tbi'], checkIfExists: true)
test_genomicsdb = [ [], file(params.test_data['homo_sapiens']['illumina']['test_genomicsdb_tar_gz'], checkIfExists: true) ]
@ -127,15 +130,15 @@ workflow test_gatk4_genotypegvcfs_gendb_input_dbsnp {
gendb.add([])
input = Channel.of([ id:'test' ]).combine(gendb)
GATK4_GENOTYPEGVCFS ( input, fasta, fastaIndex, fastaDict, dbsnp, dbsnpIndex)
GATK4_GENOTYPEGVCFS ( input, fasta, fai, dict, dbsnp, dbsnp_tbi)
}
// Basic parameters with GenomicsDB + optional intervals
workflow test_gatk4_genotypegvcfs_gendb_input_intervals {
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
fastaIndex = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
fastaDict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)
fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)
test_genomicsdb = [ [], file(params.test_data['homo_sapiens']['illumina']['test_genomicsdb_tar_gz'], checkIfExists: true) ]
@ -145,18 +148,18 @@ workflow test_gatk4_genotypegvcfs_gendb_input_intervals {
gendb.add([file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true)])
input = Channel.of([ id:'test' ]).combine(gendb)
GATK4_GENOTYPEGVCFS ( input, fasta, fastaIndex, fastaDict, [], [] )
GATK4_GENOTYPEGVCFS ( input, fasta, fai, dict, [], [] )
}
// Basic parameters with GenomicsDB + optional dbSNP + optional intervals
workflow test_gatk4_genotypegvcfs_gendb_input_dbsnp_intervals {
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
fastaIndex = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
fastaDict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)
fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)
dbsnp = file(params.test_data['homo_sapiens']['genome']['dbsnp_146_hg38_vcf_gz'], checkIfExists: true)
dbsnpIndex = file(params.test_data['homo_sapiens']['genome']['dbsnp_146_hg38_vcf_gz_tbi'], checkIfExists: true)
dbsnp_tbi = file(params.test_data['homo_sapiens']['genome']['dbsnp_146_hg38_vcf_gz_tbi'], checkIfExists: true)
test_genomicsdb = [ [], file(params.test_data['homo_sapiens']['illumina']['test_genomicsdb_tar_gz'], checkIfExists: true) ]
@ -166,5 +169,5 @@ workflow test_gatk4_genotypegvcfs_gendb_input_dbsnp_intervals {
gendb.add([file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true)])
input = Channel.of([ id:'test' ]).combine(gendb)
GATK4_GENOTYPEGVCFS ( input, fasta, fastaIndex, fastaDict, dbsnp, dbsnpIndex )
GATK4_GENOTYPEGVCFS ( input, fasta, fai, dict, dbsnp, dbsnp_tbi )
}

View file

@ -10,6 +10,7 @@
"AC=1;AF=0.500;AN=2;BaseQRankSum=0.00;DP=211;ExcessHet=0.0000;FS=0.000;MLEAC=1;MLEAF=0.500;MQ=60.00;MQRankSum=0.00;QD=0.95;ReadPosRankSum=1.09;SOR=0.680",
]
- path: output/gatk4/test.genotyped.vcf.gz.tbi
- path: output/gatk4/versions.yml
- name: gatk4 genotypegvcfs test_gatk4_genotypegvcfs_gz_input
command: nextflow run ./tests/modules/gatk4/genotypegvcfs -entry test_gatk4_genotypegvcfs_gz_input -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/genotypegvcfs/nextflow.config
@ -23,6 +24,7 @@
"AC=1;AF=0.500;AN=2;BaseQRankSum=0.00;DP=211;ExcessHet=0.0000;FS=0.000;MLEAC=1;MLEAF=0.500;MQ=60.00;MQRankSum=0.00;QD=0.95;ReadPosRankSum=1.09;SOR=0.680",
]
- path: output/gatk4/test.genotyped.vcf.gz.tbi
- path: output/gatk4/versions.yml
- name: gatk4 genotypegvcfs test_gatk4_genotypegvcfs_gz_input_dbsnp
command: nextflow run ./tests/modules/gatk4/genotypegvcfs -entry test_gatk4_genotypegvcfs_gz_input_dbsnp -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/genotypegvcfs/nextflow.config
@ -36,6 +38,7 @@
"AC=1;AF=0.500;AN=2;BaseQRankSum=0.00;DB;DP=211;ExcessHet=0.0000;FS=0.000;MLEAC=1;MLEAF=0.500;MQ=60.00;MQRankSum=0.00;QD=0.95;ReadPosRankSum=1.09;SOR=0.680",
]
- path: output/gatk4/test.genotyped.vcf.gz.tbi
- path: output/gatk4/versions.yml
- name: gatk4 genotypegvcfs test_gatk4_genotypegvcfs_gz_input_intervals
command: nextflow run ./tests/modules/gatk4/genotypegvcfs -entry test_gatk4_genotypegvcfs_gz_input_intervals -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/genotypegvcfs/nextflow.config
@ -49,6 +52,7 @@
"AC=1;AF=0.500;AN=2;BaseQRankSum=0.00;DP=211;ExcessHet=0.0000;FS=0.000;MLEAC=1;MLEAF=0.500;MQ=60.00;MQRankSum=0.00;QD=0.95;ReadPosRankSum=1.09;SOR=0.680",
]
- path: output/gatk4/test.genotyped.vcf.gz.tbi
- path: output/gatk4/versions.yml
- name: gatk4 genotypegvcfs test_gatk4_genotypegvcfs_gz_input_dbsnp_intervals
command: nextflow run ./tests/modules/gatk4/genotypegvcfs -entry test_gatk4_genotypegvcfs_gz_input_dbsnp_intervals -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/genotypegvcfs/nextflow.config
@ -59,6 +63,7 @@
- path: output/gatk4/test.genotyped.vcf.gz
contains: ["AC=2;AF=1.00;AN=2;DB;DP=20;ExcessHet=0.0000;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=60.00;QD=24.05;SOR=0.693"]
- path: output/gatk4/test.genotyped.vcf.gz.tbi
- path: output/gatk4/versions.yml
- name: gatk4 genotypegvcfs test_gatk4_genotypegvcfs_gendb_input
command: nextflow run ./tests/modules/gatk4/genotypegvcfs -entry test_gatk4_genotypegvcfs_gendb_input -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/genotypegvcfs/nextflow.config
@ -72,6 +77,7 @@
"AC=1;AF=0.500;AN=2;BaseQRankSum=0.00;DP=211;ExcessHet=0.0000;FS=0.000;MLEAC=1;MLEAF=0.500;MQ=60.00;MQRankSum=0.00;QD=0.95;ReadPosRankSum=1.09;SOR=0.680",
]
- path: output/gatk4/test.genotyped.vcf.gz.tbi
- path: output/gatk4/versions.yml
- name: gatk4 genotypegvcfs test_gatk4_genotypegvcfs_gendb_input_dbsnp
command: nextflow run ./tests/modules/gatk4/genotypegvcfs -entry test_gatk4_genotypegvcfs_gendb_input_dbsnp -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/genotypegvcfs/nextflow.config
@ -85,6 +91,7 @@
"AC=1;AF=0.500;AN=2;BaseQRankSum=0.00;DB;DP=211;ExcessHet=0.0000;FS=0.000;MLEAC=1;MLEAF=0.500;MQ=60.00;MQRankSum=0.00;QD=0.95;ReadPosRankSum=1.09;SOR=0.680",
]
- path: output/gatk4/test.genotyped.vcf.gz.tbi
- path: output/gatk4/versions.yml
- name: gatk4 genotypegvcfs test_gatk4_genotypegvcfs_gendb_input_intervals
command: nextflow run ./tests/modules/gatk4/genotypegvcfs -entry test_gatk4_genotypegvcfs_gendb_input_intervals -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/genotypegvcfs/nextflow.config
@ -98,6 +105,7 @@
"AC=1;AF=0.500;AN=2;BaseQRankSum=0.00;DP=211;ExcessHet=0.0000;FS=0.000;MLEAC=1;MLEAF=0.500;MQ=60.00;MQRankSum=0.00;QD=0.95;ReadPosRankSum=1.09;SOR=0.680",
]
- path: output/gatk4/test.genotyped.vcf.gz.tbi
- path: output/gatk4/versions.yml
- name: gatk4 genotypegvcfs test_gatk4_genotypegvcfs_gendb_input_dbsnp_intervals
command: nextflow run ./tests/modules/gatk4/genotypegvcfs -entry test_gatk4_genotypegvcfs_gendb_input_dbsnp_intervals -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/genotypegvcfs/nextflow.config
@ -108,3 +116,4 @@
- path: output/gatk4/test.genotyped.vcf.gz
contains: ["AC=2;AF=1.00;AN=2;DP=2;ExcessHet=0.0000;FS=0.000;MLEAC=1;MLEAF=0.500;MQ=60.00;QD=18.66;SOR=0.693"]
- path: output/gatk4/test.genotyped.vcf.gz.tbi
- path: output/gatk4/versions.yml

View file

@ -7,7 +7,6 @@
- path: output/gatk4/test.pileups.table
md5sum: 8e0ca6f66e112bd2f7ec1d31a2d62469
- path: output/gatk4/versions.yml
md5sum: 059123619f3ed8d4cd178c4390b81e69
- name: gatk4 getpileupsummaries test_gatk4_getpileupsummaries_separate_sites
command: nextflow run tests/modules/gatk4/getpileupsummaries -entry test_gatk4_getpileupsummaries_separate_sites -c tests/config/nextflow.config
@ -18,7 +17,6 @@
- path: output/gatk4/test.pileups.table
md5sum: 8e0ca6f66e112bd2f7ec1d31a2d62469
- path: output/gatk4/versions.yml
md5sum: 76b5388b0c5b5762d8d33e34b23f181d
- name: gatk4 getpileupsummaries test_gatk4_getpileupsummaries_separate_sites_cram
command: nextflow run tests/modules/gatk4/getpileupsummaries -entry test_gatk4_getpileupsummaries_separate_sites_cram -c tests/config/nextflow.config
@ -29,4 +27,3 @@
- path: output/gatk4/test.pileups.table
md5sum: 8e0ca6f66e112bd2f7ec1d31a2d62469
- path: output/gatk4/versions.yml
md5sum: 2fa51319c2b1d678ee00ab09512cf268

View file

@ -6,6 +6,7 @@
files:
- path: output/gatk4/test.vcf.gz
- path: output/gatk4/test.vcf.gz.tbi
- path: output/gatk4/versions.yml
- name: gatk4 haplotypecaller test_gatk4_haplotypecaller_cram
command: nextflow run ./tests/modules/gatk4/haplotypecaller -entry test_gatk4_haplotypecaller_cram -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/haplotypecaller/nextflow.config
@ -15,6 +16,7 @@
files:
- path: output/gatk4/test.vcf.gz
- path: output/gatk4/test.vcf.gz.tbi
- path: output/gatk4/versions.yml
- name: gatk4 haplotypecaller test_gatk4_haplotypecaller_intervals_dbsnp
command: nextflow run ./tests/modules/gatk4/haplotypecaller -entry test_gatk4_haplotypecaller_intervals_dbsnp -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/haplotypecaller/nextflow.config
@ -24,3 +26,4 @@
files:
- path: output/gatk4/test.vcf.gz
- path: output/gatk4/test.vcf.gz.tbi
- path: output/gatk4/versions.yml

View file

@ -5,6 +5,7 @@
- gatk4/indexfeaturefile
files:
- path: output/gatk4/genome.bed.idx
- path: output/gatk4/versions.yml
- name: gatk4 indexfeaturefile test_gatk4_indexfeaturefile_bed_gz
command: nextflow run tests/modules/gatk4/indexfeaturefile -entry test_gatk4_indexfeaturefile_bed_gz -c tests/config/nextflow.config -c ./tests/modules/gatk4/indexfeaturefile/nextflow.config
@ -15,7 +16,6 @@
- path: output/gatk4/genome.bed.gz.tbi
md5sum: 4bc51e2351a6e83f20e13be75861f941
- path: output/gatk4/versions.yml
md5sum: e5003204702f83aabdb4141272c704d2
- name: gatk4 indexfeaturefile test_gatk4_indexfeaturefile_vcf
command: nextflow run tests/modules/gatk4/indexfeaturefile -entry test_gatk4_indexfeaturefile_vcf -c tests/config/nextflow.config -c ./tests/modules/gatk4/indexfeaturefile/nextflow.config
@ -25,7 +25,6 @@
files:
- path: output/gatk4/test.genome.vcf.idx
- path: output/gatk4/versions.yml
md5sum: 08cd7c49cfb752fc2905f600106a0345
- name: gatk4 indexfeaturefile test_gatk4_indexfeaturefile_vcf_gz
command: nextflow run tests/modules/gatk4/indexfeaturefile -entry test_gatk4_indexfeaturefile_vcf_gz -c tests/config/nextflow.config
@ -36,4 +35,3 @@
- path: output/gatk4/test.genome.vcf.gz.tbi
md5sum: fedd68eaddf8d31257853d9da8325bd3
- path: output/gatk4/versions.yml
md5sum: b388d1681831a40264a7a27f67a8b247

View file

@ -6,3 +6,4 @@
files:
- path: output/gatk4/test.bed
md5sum: 9046675d01199fbbee79f2bc1c5dce52
- path: output/gatk4/versions.yml

View file

@ -14,3 +14,4 @@
md5sum: 55da0f3c69504148f4e7002a0e072cfe
- path: output/gatk4/test_split/temp_0004_of_6/4scattered.interval_list
md5sum: d29ca4447f32547f2936567fa902796a
- path: output/gatk4/versions.yml

View file

@ -5,3 +5,4 @@
- gatk4/learnreadorientationmodel
files:
- path: output/gatk4/test.artifact-prior.tar.gz
- path: output/gatk4/versions.yml

View file

@ -2,4 +2,8 @@ process {
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
withName: GATK4_MARKDUPLICATES {
ext.args = '--CREATE_INDEX true'
}
}

View file

@ -10,7 +10,6 @@
md5sum: 2efd50b2e6b7fd9bdf242cd9e266cfa9
- path: output/gatk4/test.metrics
- path: output/gatk4/versions.yml
md5sum: 0bc949aaa8792cd6c537cdaab0e2c145
- name: gatk4 markduplicates test_gatk4_markduplicates_multiple_bams
command: nextflow run tests/modules/gatk4/markduplicates -entry test_gatk4_markduplicates_multiple_bams -c tests/config/nextflow.config -c ./tests/modules/gatk4/markduplicates/nextflow.config
@ -24,4 +23,3 @@
md5sum: 8187febc6108ffef7f907e89b9c091a4
- path: output/gatk4/test.metrics
- path: output/gatk4/versions.yml
md5sum: b10d63cf7b2b672915cb30cea081ccd5

View file

@ -0,0 +1,28 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl = 2
include { GATK4_MARKDUPLICATES_SPARK } from '../../../../modules/gatk4/markduplicatesspark/main.nf'
workflow test_gatk4_markduplicates_spark {
input = [ [ id:'test', single_end:false ], // meta map
file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true)
]
fasta = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true)
fai = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta_fai'], checkIfExists: true)
dict = file(params.test_data['homo_sapiens']['genome']['genome_21_dict'], checkIfExists: true)
GATK4_MARKDUPLICATES_SPARK ( input, fasta, fai, dict )
}
workflow test_gatk4_markduplicates_spark_multiple_bams {
input = [ [ id:'test', single_end:false ], // meta map
[ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true),
file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_bam'], checkIfExists: true)
] ]
fasta = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true)
fai = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta_fai'], checkIfExists: true)
dict = file(params.test_data['homo_sapiens']['genome']['genome_21_dict'], checkIfExists: true)
GATK4_MARKDUPLICATES_SPARK ( input, fasta, fai, dict )
}

View file

@ -0,0 +1,5 @@
process {
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
}

View file

@ -0,0 +1,25 @@
- name: gatk4 markduplicates test_gatk4_markduplicates_spark
command: nextflow run tests/modules/gatk4/markduplicatesspark -entry test_gatk4_markduplicates_spark -c tests/config/nextflow.config -c ./tests/modules/gatk4/markduplicatesspark/nextflow.config
tags:
- gatk4
- gatk4/markduplicatesspark
files:
- path: output/gatk4/test.bai
md5sum: e9c125e82553209933883b4fe2b8d7c2
- path: output/gatk4/test.bam
md5sum: 2efd50b2e6b7fd9bdf242cd9e266cfa9
- path: output/gatk4/test.metrics
- path: output/gatk4/versions.yml
- name: gatk4 markduplicates test_gatk4_markduplicates_spark_multiple_bams
command: nextflow run tests/modules/gatk4/markduplicatesspark -entry test_gatk4_markduplicates_spark_multiple_bams -c tests/config/nextflow.config -c ./tests/modules/gatk4/markduplicatesspark/nextflow.config
tags:
- gatk4
- gatk4/markduplicatesspark
files:
- path: output/gatk4/test.bai
md5sum: bad71df9c876e72a5bc0a3e0fd755f92
- path: output/gatk4/test.bam
md5sum: 8187febc6108ffef7f907e89b9c091a4
- path: output/gatk4/test.metrics
- path: output/gatk4/versions.yml

View file

@ -6,3 +6,4 @@
files:
- path: output/gatk4/test.bam
md5sum: e6f1b343700b7ccb94e81ae127433988
- path: output/gatk4/versions.yml

View file

@ -6,3 +6,4 @@
files:
- path: output/gatk4/test.vcf.gz.stats
md5sum: 17d2091015d04cbd4a26b7a67dc659e6
- path: output/gatk4/versions.yml

View file

@ -5,22 +5,22 @@ nextflow.enable.dsl = 2
include { GATK4_MERGEVCFS } from '../../../../modules/gatk4/mergevcfs/main.nf'
workflow test_gatk4_mergevcfs {
input = [ [ id:'test' ], // meta map
[ file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true),
file(params.test_data['sarscov2']['illumina']['test2_vcf'], checkIfExists: true) ]
]
dict = file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true)
GATK4_MERGEVCFS ( input, dict, false )
}
workflow test_gatk4_mergevcfs_refdict {
def input = []
input = [ [ id:'test' ], // meta map
[ file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true),
file(params.test_data['sarscov2']['illumina']['test2_vcf'], checkIfExists: true) ]
]
dict = file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true)
GATK4_MERGEVCFS ( input, dict, true )
GATK4_MERGEVCFS ( input, dict )
}
workflow test_gatk4_mergevcfs_no_dict {
input = [ [ id:'test' ], // meta map
[ file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true),
file(params.test_data['sarscov2']['illumina']['test2_vcf'], checkIfExists: true) ]
]
GATK4_MERGEVCFS ( input, [] )
}

View file

@ -6,12 +6,14 @@
files:
- path: output/gatk4/test.vcf.gz
md5sum: 5b289bda88d3a3504f2e19ee8cff177c
- path: output/gatk4/versions.yml
- name: gatk4 mergevcfs test_gatk4_mergevcfs_refdict
command: nextflow run ./tests/modules/gatk4/mergevcfs -entry test_gatk4_mergevcfs_refdict -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/mergevcfs/nextflow.config
- name: gatk4 mergevcfs test_gatk4_mergevcfs_no_dict
command: nextflow run ./tests/modules/gatk4/mergevcfs -entry test_gatk4_mergevcfs_no_dict -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/mergevcfs/nextflow.config
tags:
- gatk4/mergevcfs
- gatk4
files:
- path: output/gatk4/test.vcf.gz
md5sum: 5b289bda88d3a3504f2e19ee8cff177c
- path: output/gatk4/versions.yml

View file

@ -3,21 +3,21 @@
nextflow.enable.dsl = 2
include { GATK4_MUTECT2 } from '../../../../modules/gatk4/mutect2/main.nf'
include { GATK4_MUTECT2 as GATK4_MUTECT2_PAIR } from '../../../../modules/gatk4/mutect2/main.nf'
include { GATK4_MUTECT2 as GATK4_MUTECT2_MITO } from '../../../../modules/gatk4/mutect2/main.nf'
include { GATK4_MUTECT2 as GATK4_MUTECT2_F1R2 } from '../../../../modules/gatk4/mutect2/main.nf'
workflow test_gatk4_mutect2_tumor_normal_pair {
input = [ [ id:'test'], // meta map
input = [ [ id:'test', normal_id:'normal', tumor_id:'tumour' ], // meta map
[ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_bam'], checkIfExists: true),
file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_bam'], checkIfExists: true)
],
[ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_bam_bai'], checkIfExists: true),
file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_bam_bai'], checkIfExists: true)
],
[],
["normal"]
[]
]
run_single = false
run_pon = false
run_mito = false
fasta = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true)
fai = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta_fai'], checkIfExists: true)
dict = file(params.test_data['homo_sapiens']['genome']['genome_21_dict'], checkIfExists: true)
@ -26,19 +26,38 @@ workflow test_gatk4_mutect2_tumor_normal_pair {
panel_of_normals = file(params.test_data['homo_sapiens']['genome']['mills_and_1000g_indels_21_vcf_gz'], checkIfExists: true)
panel_of_normals_tbi = file(params.test_data['homo_sapiens']['genome']['mills_and_1000g_indels_21_vcf_gz_tbi'], checkIfExists: true)
GATK4_MUTECT2 ( input, run_single, run_pon, run_mito, fasta, fai, dict, germline_resource, germline_resource_tbi, panel_of_normals, panel_of_normals_tbi )
GATK4_MUTECT2_PAIR ( input, fasta, fai, dict, germline_resource, germline_resource_tbi, panel_of_normals, panel_of_normals_tbi )
}
workflow test_gatk4_mutect2_tumor_normal_pair_f1r2 {
input = [ [ id:'test', normal_id:'normal', tumor_id:'tumour' ], // meta map
[ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_bam'], checkIfExists: true),
file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_bam'], checkIfExists: true)
],
[ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_bam_bai'], checkIfExists: true),
file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_bam_bai'], checkIfExists: true)
],
[]
]
fasta = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true)
fai = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta_fai'], checkIfExists: true)
dict = file(params.test_data['homo_sapiens']['genome']['genome_21_dict'], checkIfExists: true)
germline_resource = file(params.test_data['homo_sapiens']['genome']['gnomad_r2_1_1_21_vcf_gz'], checkIfExists: true)
germline_resource_tbi = file(params.test_data['homo_sapiens']['genome']['gnomad_r2_1_1_21_vcf_gz_tbi'], checkIfExists: true)
panel_of_normals = file(params.test_data['homo_sapiens']['genome']['mills_and_1000g_indels_21_vcf_gz'], checkIfExists: true)
panel_of_normals_tbi = file(params.test_data['homo_sapiens']['genome']['mills_and_1000g_indels_21_vcf_gz_tbi'], checkIfExists: true)
GATK4_MUTECT2_F1R2 ( input, fasta, fai, dict, germline_resource, germline_resource_tbi, panel_of_normals, panel_of_normals_tbi )
}
workflow test_gatk4_mutect2_tumor_single {
input = [ [ id:'test'], // meta map
[ file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_bam'], checkIfExists: true)],
[ file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_bam_bai'], checkIfExists: true)],
[],
[]
]
run_single = true
run_pon = false
run_mito = false
fasta = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true)
fai = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta_fai'], checkIfExists: true)
dict = file(params.test_data['homo_sapiens']['genome']['genome_21_dict'], checkIfExists: true)
@ -47,19 +66,16 @@ workflow test_gatk4_mutect2_tumor_single {
panel_of_normals = file(params.test_data['homo_sapiens']['genome']['mills_and_1000g_indels_21_vcf_gz'], checkIfExists: true)
panel_of_normals_tbi = file(params.test_data['homo_sapiens']['genome']['mills_and_1000g_indels_21_vcf_gz_tbi'], checkIfExists: true)
GATK4_MUTECT2 ( input, run_single, run_pon, run_mito, fasta, fai, dict, germline_resource, germline_resource_tbi, panel_of_normals, panel_of_normals_tbi )
GATK4_MUTECT2 ( input, fasta, fai, dict, germline_resource, germline_resource_tbi, panel_of_normals, panel_of_normals_tbi )
}
workflow test_gatk4_mutect2_cram_input {
input = [ [ id:'test'], // meta map
[ file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_cram'], checkIfExists: true)],
[ file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_cram_crai'], checkIfExists: true)],
[],
[]
]
run_single = true
run_pon = false
run_mito = false
fasta = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true)
fai = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta_fai'], checkIfExists: true)
dict = file(params.test_data['homo_sapiens']['genome']['genome_21_dict'], checkIfExists: true)
@ -68,48 +84,37 @@ workflow test_gatk4_mutect2_cram_input {
panel_of_normals = file(params.test_data['homo_sapiens']['genome']['mills_and_1000g_indels_21_vcf_gz'], checkIfExists: true)
panel_of_normals_tbi = file(params.test_data['homo_sapiens']['genome']['mills_and_1000g_indels_21_vcf_gz_tbi'], checkIfExists: true)
GATK4_MUTECT2 ( input, run_single, run_pon, run_mito, fasta, fai, dict, germline_resource, germline_resource_tbi, panel_of_normals, panel_of_normals_tbi )
GATK4_MUTECT2 ( input, fasta, fai, dict, germline_resource, germline_resource_tbi, panel_of_normals, panel_of_normals_tbi )
}
workflow test_gatk4_mutect2_generate_pon {
input = [ [ id:'test'], // meta map
[ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_bam'], checkIfExists: true)],
[ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_bam_bai'], checkIfExists: true)],
[],
[]
]
run_single = false
run_pon = true
run_mito = false
fasta = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true)
fai = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta_fai'], checkIfExists: true)
dict = file(params.test_data['homo_sapiens']['genome']['genome_21_dict'], checkIfExists: true)
germline_resource = []
germline_resource_tbi = []
panel_of_normals = []
panel_of_normals_tbi = []
GATK4_MUTECT2 ( input, run_single, run_pon, run_mito, fasta, fai, dict, germline_resource, germline_resource_tbi, panel_of_normals, panel_of_normals_tbi )
GATK4_MUTECT2 ( input, fasta, fai, dict, [], [], [], [] )
}
// mitochondria mode would ideally have some mitochondria test data, but since the mitochondria settings only increase detection sensitivity, we can use the chr22 data as a stand in as it is already a small dataset, the extra variants detected compared to generate_pon shows the mode is working.
// mitochondria mode would ideally have some mitochondria test data
// but since the mitochondria settings only increase detection sensitivity
// we can use the chr22 data as a stand in as it is already a small dataset
// the extra variants detected compared to generate_pon shows the mode is working
workflow test_gatk4_mutect2_mitochondria {
input = [ [ id:'test'], // meta map
[ file(params.test_data['homo_sapiens']['illumina']['mitochon_standin_recalibrated_sorted_bam'], checkIfExists: true)],
[ file(params.test_data['homo_sapiens']['illumina']['mitochon_standin_recalibrated_sorted_bam_bai'], checkIfExists: true)],
[ file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true)],
[]
[ file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true)]
]
run_single = false
run_pon = false
run_mito = true
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)
germline_resource = []
germline_resource_tbi = []
panel_of_normals = []
panel_of_normals_tbi = []
GATK4_MUTECT2 ( input, run_single, run_pon, run_mito, fasta, fai, dict, germline_resource, germline_resource_tbi, panel_of_normals, panel_of_normals_tbi )
GATK4_MUTECT2_MITO ( input, fasta, fai, dict, [], [], [], [] )
}

View file

@ -2,4 +2,16 @@ process {
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
withName: GATK4_MUTECT2_PAIR {
ext.args = { "--normal-sample $meta.normal_id" }
}
withName: GATK4_MUTECT2_MITO {
ext.args = { "--mitochondria-mode" }
}
withName: GATK4_MUTECT2_F1R2 {
ext.args = { "--normal-sample $meta.normal_id --f1r2-tar-gz ${meta.id}.f1r2.tar.gz" }
}
}

View file

@ -1,5 +1,16 @@
- name: gatk4 mutect2 test_gatk4_mutect2_tumor_normal_pair
command: nextflow run ./tests/modules/gatk4/mutect2 -entry test_gatk4_mutect2_tumor_normal_pair -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/mutect2/nextflow.config
tags:
- gatk4
- gatk4/mutect2
files:
- path: output/gatk4/test.vcf.gz
- path: output/gatk4/test.vcf.gz.stats
md5sum: 17d2091015d04cbd4a26b7a67dc659e6
- path: output/gatk4/test.vcf.gz.tbi
- name: gatk4 mutect2 test_gatk4_mutect2_tumor_normal_pair_f1r2
command: nextflow run ./tests/modules/gatk4/mutect2 -entry test_gatk4_mutect2_tumor_normal_pair_f1r2 -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/mutect2/nextflow.config
tags:
- gatk4
- gatk4/mutect2
@ -9,6 +20,7 @@
- path: output/gatk4/test.vcf.gz.stats
md5sum: 17d2091015d04cbd4a26b7a67dc659e6
- path: output/gatk4/test.vcf.gz.tbi
- path: output/gatk4/versions.yml
- name: gatk4 mutect2 test_gatk4_mutect2_tumor_single
command: nextflow run ./tests/modules/gatk4/mutect2 -entry test_gatk4_mutect2_tumor_single -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/mutect2/nextflow.config
@ -20,6 +32,7 @@
- path: output/gatk4/test.vcf.gz.stats
md5sum: 55ed641e16089afb33cdbc478e202d3d
- path: output/gatk4/test.vcf.gz.tbi
- path: output/gatk4/versions.yml
- name: gatk4 mutect2 test_gatk4_mutect2_cram_input
command: nextflow run ./tests/modules/gatk4/mutect2 -entry test_gatk4_mutect2_cram_input -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/mutect2/nextflow.config
@ -31,6 +44,7 @@
- path: output/gatk4/test.vcf.gz.stats
md5sum: 55ed641e16089afb33cdbc478e202d3d
- path: output/gatk4/test.vcf.gz.tbi
- path: output/gatk4/versions.yml
- name: gatk4 mutect2 test_gatk4_mutect2_generate_pon
command: nextflow run ./tests/modules/gatk4/mutect2 -entry test_gatk4_mutect2_generate_pon -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/mutect2/nextflow.config
@ -42,6 +56,7 @@
- path: output/gatk4/test.vcf.gz.stats
md5sum: b569ce66bbffe9588b3d221e821023ee
- path: output/gatk4/test.vcf.gz.tbi
- path: output/gatk4/versions.yml
- name: gatk4 mutect2 test_gatk4_mutect2_mitochondria
command: nextflow run ./tests/modules/gatk4/mutect2 -entry test_gatk4_mutect2_mitochondria -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/mutect2/nextflow.config
@ -53,3 +68,4 @@
- path: output/gatk4/test.vcf.gz.stats
md5sum: fc6ea14ca2da346babe78161beea28c9
- path: output/gatk4/test.vcf.gz.tbi
- path: output/gatk4/versions.yml

View file

@ -6,3 +6,4 @@
files:
- path: output/gatk4/test.reverted.bam
md5sum: f783a88deb45c3a2c20ca12cbe1c5652
- path: output/gatk4/versions.yml

View file

@ -6,6 +6,7 @@
files:
- path: output/gatk4/test.fastq.gz
md5sum: 50ace41d4c24467f24f8b929540a7797
- path: output/gatk4/versions.yml
- name: gatk4 samtofastq test_gatk4_samtofastq_paired_end
command: nextflow run ./tests/modules/gatk4/samtofastq -entry test_gatk4_samtofastq_paired_end -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/samtofastq/nextflow.config
@ -17,3 +18,4 @@
md5sum: cfea607c9d75fd9ea9704780ad3a499c
- path: output/gatk4/test_2.fastq.gz
md5sum: 613bf64c023609e1c62ad6ce9e4be8d7
- path: output/gatk4/versions.yml

View file

@ -7,7 +7,6 @@
- path: output/gatk4/test.selectvariants.vcf.gz
- path: output/gatk4/test.selectvariants.vcf.gz.tbi
- path: output/gatk4/versions.yml
md5sum: a35d78af179f43652274bc7405d5a785
- name: gatk4 selectvariants test_gatk4_selectvariants_gz_input
command: nextflow run tests/modules/gatk4/selectvariants -entry test_gatk4_selectvariants_gz_input -c tests/config/nextflow.config
@ -18,4 +17,3 @@
- path: output/gatk4/test.selectvariants.vcf.gz
- path: output/gatk4/test.selectvariants.vcf.gz.tbi
- path: output/gatk4/versions.yml
md5sum: c943f3579a369968ca63444eb43fb6e7

View file

@ -7,4 +7,3 @@
- path: output/gatk4/test.bam
md5sum: ceed15c0bd64ff5c38d3816905933b0b
- path: output/gatk4/versions.yml
md5sum: 27fceace2528a905ddca2b4db47c4bf5

View file

@ -10,6 +10,7 @@
"BaseQRankSum=-1.318;DP=17;ExcessHet=3.0103;MLEAC=1,0,0;MLEAF=0.500,0.00,0.00;MQRankSum=0.000;RAW_MQandDP=61200,17;ReadPosRankSum=2.365",
]
- path: output/gatk4/test.filtered.vcf.gz.tbi
- path: output/gatk4/versions.yml
- name: gatk4 variantfiltration test_gatk4_variantfiltration_gz_input
command: nextflow run ./tests/modules/gatk4/variantfiltration -entry test_gatk4_variantfiltration_gz_input -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/variantfiltration/nextflow.config
@ -23,3 +24,4 @@
"BaseQRankSum=-1.318;DP=17;ExcessHet=3.0103;MLEAC=1,0,0;MLEAF=0.500,0.00,0.00;MQRankSum=0.000;RAW_MQandDP=61200,17;ReadPosRankSum=2.365",
]
- path: output/gatk4/test.filtered.vcf.gz.tbi
- path: output/gatk4/versions.yml

View file

@ -12,31 +12,28 @@ workflow test_gatk4_variantrecalibrator {
file(params.test_data['homo_sapiens']['illumina']['test2_haplotc_ann_vcf_gz_tbi'], checkIfExists: true)
]
fasta = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true)
fai = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta_fai'], checkIfExists: true)
dict = file(params.test_data['homo_sapiens']['genome']['genome_21_dict'], checkIfExists: true)
resources = [
[
resources = [[
file(params.test_data['homo_sapiens']['genome']['hapmap_3_3_hg38_21_vcf_gz'], checkIfExists: true),
file(params.test_data['homo_sapiens']['genome']['res_1000g_omni2_5_hg38_21_vcf_gz'], checkIfExists: true),
file(params.test_data['homo_sapiens']['genome']['res_1000g_phase1_snps_hg38_21_vcf_gz'], checkIfExists: true),
file(params.test_data['homo_sapiens']['genome']['dbsnp_138_hg38_21_vcf_gz'], checkIfExists: true)
],
[
], [
file(params.test_data['homo_sapiens']['genome']['hapmap_3_3_hg38_21_vcf_gz_tbi'], checkIfExists: true),
file(params.test_data['homo_sapiens']['genome']['res_1000g_omni2_5_hg38_21_vcf_gz_tbi'], checkIfExists: true),
file(params.test_data['homo_sapiens']['genome']['res_1000g_phase1_snps_hg38_21_vcf_gz_tbi'], checkIfExists: true),
file(params.test_data['homo_sapiens']['genome']['dbsnp_138_hg38_21_vcf_gz_tbi'], checkIfExists: true)
],
[
], [
'hapmap,known=false,training=true,truth=true,prior=15.0 hapmap_3.3.hg38.vcf.gz',
'omni,known=false,training=true,truth=false,prior=12.0 1000G_omni2.5.hg38.vcf.gz',
'1000G,known=false,training=true,truth=false,prior=10.0 1000G_phase1.snps.hg38.vcf.gz',
'dbsnp,known=true,training=false,truth=false,prior=2.0 dbsnp_138.hg38.vcf.gz'
]
]
]]
GATK4_VARIANTRECALIBRATOR_NO_ALLELESPECIFICTY ( input, fasta, fai, dict, resources)
fasta = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true)
fai = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta_fai'], checkIfExists: true)
dict = file(params.test_data['homo_sapiens']['genome']['genome_21_dict'], checkIfExists: true)
GATK4_VARIANTRECALIBRATOR_NO_ALLELESPECIFICTY(input, resources, fasta, fai, dict)
}
workflow test_gatk4_variantrecalibrator_allele_specific {
@ -46,29 +43,26 @@ workflow test_gatk4_variantrecalibrator_allele_specific {
file(params.test_data['homo_sapiens']['illumina']['test2_haplotc_ann_vcf_gz_tbi'], checkIfExists: true)
]
fasta = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true)
fai = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta_fai'], checkIfExists: true)
dict = file(params.test_data['homo_sapiens']['genome']['genome_21_dict'], checkIfExists: true)
resources = [
[
resources = [[
file(params.test_data['homo_sapiens']['genome']['hapmap_3_3_hg38_21_vcf_gz'], checkIfExists: true),
file(params.test_data['homo_sapiens']['genome']['res_1000g_omni2_5_hg38_21_vcf_gz'], checkIfExists: true),
file(params.test_data['homo_sapiens']['genome']['res_1000g_phase1_snps_hg38_21_vcf_gz'], checkIfExists: true),
file(params.test_data['homo_sapiens']['genome']['dbsnp_138_hg38_21_vcf_gz'], checkIfExists: true)
],
[
], [
file(params.test_data['homo_sapiens']['genome']['hapmap_3_3_hg38_21_vcf_gz_tbi'], checkIfExists: true),
file(params.test_data['homo_sapiens']['genome']['res_1000g_omni2_5_hg38_21_vcf_gz_tbi'], checkIfExists: true),
file(params.test_data['homo_sapiens']['genome']['res_1000g_phase1_snps_hg38_21_vcf_gz_tbi'], checkIfExists: true),
file(params.test_data['homo_sapiens']['genome']['dbsnp_138_hg38_21_vcf_gz_tbi'], checkIfExists: true)
],
[
], [
'hapmap,known=false,training=true,truth=true,prior=15.0 hapmap_3.3.hg38.vcf.gz',
'omni,known=false,training=true,truth=false,prior=12.0 1000G_omni2.5.hg38.vcf.gz',
'1000G,known=false,training=true,truth=false,prior=10.0 1000G_phase1.snps.hg38.vcf.gz',
'dbsnp,known=true,training=false,truth=false,prior=2.0 dbsnp_138.hg38.vcf.gz'
]
]
]]
GATK4_VARIANTRECALIBRATOR_WITH_ALLELESPECIFICTY ( input, fasta, fai, dict, resources)
fasta = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true)
fai = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta_fai'], checkIfExists: true)
dict = file(params.test_data['homo_sapiens']['genome']['genome_21_dict'], checkIfExists: true)
GATK4_VARIANTRECALIBRATOR_WITH_ALLELESPECIFICTY(input, resources, fasta, fai, dict)
}

Some files were not shown because too many files have changed in this diff Show more