Update to allow cram + update needed to use the gatk4 modules in sarek (#976)

* Make samtools/merge cram compliant

* samtools/stats cram compliance

* update yml file

* samtools/view to deal with crams

* Update tests to make sure cram works

* also fix tmp dir and min mem in one go

* basequalityrecal test for cram + min mem + tmpdir

* update haplotypecaller for sarek

* update haplotype yml

* update markdup to allow multiple bams, take out params to be passed with options.args

* remove TODO statement

* Remove variable md5sum

* add emtpy input to stats module in subworkflows

* subworkflows seem to work now on my side

* Apply code review

Co-authored-by: Maxime U. Garcia <maxime.garcia@scilifelab.se>

* replace bam with input to be more inclusive

* rename everywhere

* rename input

* remove variable checksum

Co-authored-by: Maxime U. Garcia <maxime.garcia@scilifelab.se>
This commit is contained in:
FriederikeHanssen 2021-10-29 13:01:05 +02:00 committed by GitHub
parent 71945a5b5f
commit ac1e6df076
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
39 changed files with 356 additions and 109 deletions

View file

@ -19,7 +19,7 @@ process GATK4_APPLYBQSR {
} }
input: input:
tuple val(meta), path(bam), path(bai), path(bqsr_table) tuple val(meta), path(input), path(input_index), path(bqsr_table)
path fasta path fasta
path fastaidx path fastaidx
path dict path dict
@ -32,12 +32,18 @@ process GATK4_APPLYBQSR {
script: script:
def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}" def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}"
def interval = intervals ? "-L ${intervals}" : "" def interval = intervals ? "-L ${intervals}" : ""
if (!task.memory) {
log.info '[GATK ApplyBQSR] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.'
} else {
avail_mem = task.memory.giga
}
""" """
gatk ApplyBQSR \\ gatk ApplyBQSR \\
-R $fasta \\ -R $fasta \\
-I $bam \\ -I $input \\
--bqsr-recal-file $bqsr_table \\ --bqsr-recal-file $bqsr_table \\
$interval \\ $interval \\
--tmp-dir . \\
-O ${prefix}.bam \\ -O ${prefix}.bam \\
$options.args $options.args

View file

@ -20,10 +20,14 @@ input:
description: | description: |
Groovy Map containing sample information Groovy Map containing sample information
e.g. [ id:'test', single_end:false ] e.g. [ id:'test', single_end:false ]
- bam: - input:
type: file type: file
description: BAM file from alignment description: BAM/CRAM file from alignment
pattern: "*.{bam}" pattern: "*.{bam,cram}"
- input_index:
type: file
description: BAI/CRAI file from alignment
pattern: "*.{bai,crai}"
- bqsr_table: - bqsr_table:
type: file type: file
description: Recalibration table from gatk4_baserecalibrator description: Recalibration table from gatk4_baserecalibrator

View file

@ -19,7 +19,7 @@ process GATK4_BASERECALIBRATOR {
} }
input: input:
tuple val(meta), path(bam), path(bai) tuple val(meta), path(input), path(input_index)
path fasta path fasta
path fastaidx path fastaidx
path dict path dict
@ -35,12 +35,19 @@ process GATK4_BASERECALIBRATOR {
def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}" def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}"
def intervalsCommand = intervalsBed ? "-L ${intervalsBed}" : "" def intervalsCommand = intervalsBed ? "-L ${intervalsBed}" : ""
def sitesCommand = knownSites.collect{"--known-sites ${it}"}.join(' ') def sitesCommand = knownSites.collect{"--known-sites ${it}"}.join(' ')
if (!task.memory) {
log.info '[GATK BaseRecalibrator] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.'
} else {
avail_mem = task.memory.giga
}
""" """
gatk BaseRecalibrator \ gatk BaseRecalibrator \
-R $fasta \ -R $fasta \
-I $bam \ -I $input \
$sitesCommand \ $sitesCommand \
$intervalsCommand \ $intervalsCommand \
--tmp-dir . \
$options.args \ $options.args \
-O ${prefix}.table -O ${prefix}.table

View file

@ -20,10 +20,14 @@ input:
description: | description: |
Groovy Map containing sample information Groovy Map containing sample information
e.g. [ id:'test', single_end:false ] e.g. [ id:'test', single_end:false ]
- bam: - input:
type: file type: file
description: BAM file from alignment description: BAM/CRAM file from alignment
pattern: "*.{bam}" pattern: "*.{bam,cram}"
- input_index:
type: file
description: BAI/CRAI file from alignment
pattern: "*.{bai,crai}"
- fasta: - fasta:
type: file type: file
description: The reference fasta file description: The reference fasta file
@ -57,3 +61,4 @@ output:
authors: authors:
- "@yocra3" - "@yocra3"
- "@FriederikeHanssen"

View file

@ -19,10 +19,13 @@ process GATK4_HAPLOTYPECALLER {
} }
input: input:
tuple val(meta), path(bam), path(bai) tuple val(meta), path(input), path(input_index)
path fasta path fasta
path fai path fai
path dict path dict
path dbsnp
path dbsnp_tbi
path interval
output: output:
tuple val(meta), path("*.vcf.gz"), emit: vcf tuple val(meta), path("*.vcf.gz"), emit: vcf
@ -30,8 +33,10 @@ process GATK4_HAPLOTYPECALLER {
path "versions.yml" , emit: versions path "versions.yml" , emit: versions
script: script:
def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}" def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}"
def avail_mem = 3 def interval_option = interval ? "-L ${interval}" : ""
def dbsnp_option = dbsnp ? "-D ${dbsnp}" : ""
def avail_mem = 3
if (!task.memory) { if (!task.memory) {
log.info '[GATK HaplotypeCaller] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' log.info '[GATK HaplotypeCaller] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.'
} else { } else {
@ -42,9 +47,12 @@ process GATK4_HAPLOTYPECALLER {
--java-options "-Xmx${avail_mem}g" \\ --java-options "-Xmx${avail_mem}g" \\
HaplotypeCaller \\ HaplotypeCaller \\
-R $fasta \\ -R $fasta \\
-I $bam \\ -I $input \\
${dbsnp_option} \\
${interval_option} \\
-O ${prefix}.vcf.gz \\ -O ${prefix}.vcf.gz \\
$options.args $options.args \\
--tmp-dir .
cat <<-END_VERSIONS > versions.yml cat <<-END_VERSIONS > versions.yml
${getProcessName(task.process)}: ${getProcessName(task.process)}:

View file

@ -21,14 +21,14 @@ input:
description: | description: |
Groovy Map containing sample information Groovy Map containing sample information
e.g. [ id:'test', single_end:false ] e.g. [ id:'test', single_end:false ]
- bam: - input:
type: file type: file
description: BAM file description: BAM/CRAM file from alignment
pattern: "*.bam" pattern: "*.{bam,cram}"
- bai: - input_index:
type: file type: file
description: Index of BAM file description: BAI/CRAI file from alignment
pattern: "*.bam.bai" pattern: "*.{bai,crai}"
- fasta: - fasta:
type: file type: file
description: The reference fasta file description: The reference fasta file
@ -41,6 +41,16 @@ input:
type: file type: file
description: GATK sequence dictionary description: GATK sequence dictionary
pattern: "*.dict" pattern: "*.dict"
- dbsnp:
type: file
description: VCF file containing known sites (optional)
- dbsnp_tbi:
type: file
description: VCF index of dbsnp (optional)
- interval:
type: file
description: Bed file with the genomic regions included in the library (optional)
output: output:
- meta: - meta:
type: map type: map
@ -62,3 +72,4 @@ output:
authors: authors:
- "@suzannejin" - "@suzannejin"
- "@FriederikeHanssen"

View file

@ -19,21 +19,28 @@ process GATK4_MARKDUPLICATES {
} }
input: input:
tuple val(meta), path(bam) tuple val(meta), path(bams)
output: output:
tuple val(meta), path("*.bam") , emit: bam tuple val(meta), path("*.bam") , emit: bam
tuple val(meta), path("*.bai") , emit: bai
tuple val(meta), path("*.metrics"), emit: metrics tuple val(meta), path("*.metrics"), emit: metrics
path "versions.yml" , emit: versions path "versions.yml" , emit: versions
script: script:
def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}" def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}"
def bam_list = bams.collect(){ bam -> "--INPUT ".concat(bam.toString()) }.join(" ")
def avail_mem = 3
if (!task.memory) {
log.info '[GATK HaplotypeCaller] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.'
} else {
avail_mem = task.memory.giga
}
""" """
gatk MarkDuplicates \\ gatk MarkDuplicates \\
--INPUT $bam \\ $bam_list \\
--METRICS_FILE ${prefix}.metrics \\ --METRICS_FILE ${prefix}.metrics \\
--TMP_DIR . \\ --TMP_DIR . \\
--ASSUME_SORT_ORDER coordinate \\
--CREATE_INDEX true \\ --CREATE_INDEX true \\
--OUTPUT ${prefix}.bam \\ --OUTPUT ${prefix}.bam \\
$options.args $options.args

View file

@ -47,3 +47,4 @@ output:
authors: authors:
- "@ajodeh-juma" - "@ajodeh-juma"
- "@FriederikeHanssen"

View file

@ -19,7 +19,7 @@ process MANTA_GERMLINE {
} }
input: input:
tuple val(meta), path(cram), path(crai) tuple val(meta), path(input), path(input_index)
path fasta path fasta
path fai path fai
path target_bed path target_bed
@ -39,7 +39,7 @@ process MANTA_GERMLINE {
def options_manta = target_bed ? "--exome --callRegions $target_bed" : "" def options_manta = target_bed ? "--exome --callRegions $target_bed" : ""
""" """
configManta.py \ configManta.py \
--bam $cram \ --bam $input \
--reference $fasta \ --reference $fasta \
$options_manta \ $options_manta \
--runDir manta --runDir manta

View file

@ -23,11 +23,11 @@ input:
description: | description: |
Groovy Map containing sample information Groovy Map containing sample information
e.g. [ id:'test', single_end:false ] e.g. [ id:'test', single_end:false ]
- cram: - input:
type: file type: file
description: BAM/CRAM/SAM file description: BAM/CRAM/SAM file
pattern: "*.{bam,cram,sam}" pattern: "*.{bam,cram,sam}"
- crai: - input_index:
type: file type: file
description: BAM/CRAM/SAM index file description: BAM/CRAM/SAM index file
pattern: "*.{bai,crai,sai}" pattern: "*.{bai,crai,sai}"

View file

@ -19,7 +19,7 @@ process MANTA_SOMATIC {
} }
input: input:
tuple val(meta), path(cram_normal), path(crai_normal), path(cram_tumor), path(crai_tumor) tuple val(meta), path(input_normal), path(input_index_normal), path(input_tumor), path(input_index_tumor)
path fasta path fasta
path fai path fai
path target_bed path target_bed
@ -42,8 +42,8 @@ process MANTA_SOMATIC {
""" """
configManta.py \ configManta.py \
--tumorBam $cram_tumor \ --tumorBam $input_tumor \
--normalBam $cram_normal \ --normalBam $input_normal \
--reference $fasta \ --reference $fasta \
$options_manta \ $options_manta \
--runDir manta --runDir manta

View file

@ -23,19 +23,19 @@ input:
description: | description: |
Groovy Map containing sample information Groovy Map containing sample information
e.g. [ id:'test', single_end:false ] e.g. [ id:'test', single_end:false ]
- cram_normal: - input_normal:
type: file type: file
description: BAM/CRAM/SAM file description: BAM/CRAM/SAM file
pattern: "*.{bam,cram,sam}" pattern: "*.{bam,cram,sam}"
- crai_normal: - input_index_normal:
type: file type: file
description: BAM/CRAM/SAM index file description: BAM/CRAM/SAM index file
pattern: "*.{bai,crai,sai}" pattern: "*.{bai,crai,sai}"
- cram_tumor: - input_tumor:
type: file type: file
description: BAM/CRAM/SAM file description: BAM/CRAM/SAM file
pattern: "*.{bam,cram,sam}" pattern: "*.{bam,cram,sam}"
- crai_tumor: - input_index_tumor:
type: file type: file
description: BAM/CRAM/SAM index file description: BAM/CRAM/SAM index file
pattern: "*.{bai,crai,sai}" pattern: "*.{bai,crai,sai}"

View file

@ -19,7 +19,7 @@ process MANTA_TUMORONLY {
} }
input: input:
tuple val(meta), path(cram), path(crai) tuple val(meta), path(input), path(input_index)
path fasta path fasta
path fai path fai
path target_bed path target_bed
@ -39,7 +39,7 @@ process MANTA_TUMORONLY {
def options_manta = target_bed ? "--exome --callRegions $target_bed" : "" def options_manta = target_bed ? "--exome --callRegions $target_bed" : ""
""" """
configManta.py \ configManta.py \
--tumorBam $cram \ --tumorBam $input \
--reference $fasta \ --reference $fasta \
$options_manta \ $options_manta \
--runDir manta --runDir manta

View file

@ -23,11 +23,11 @@ input:
description: | description: |
Groovy Map containing sample information Groovy Map containing sample information
e.g. [ id:'test', single_end:false ] e.g. [ id:'test', single_end:false ]
- cram: - input:
type: file type: file
description: BAM/CRAM/SAM file description: BAM/CRAM/SAM file
pattern: "*.{bam,cram,sam}" pattern: "*.{bam,cram,sam}"
- crai: - input_index:
type: file type: file
description: BAM/CRAM/SAM index file description: BAM/CRAM/SAM index file
pattern: "*.{bai,crai,sai}" pattern: "*.{bai,crai,sai}"
@ -54,7 +54,6 @@ output:
description: | description: |
Groovy Map containing sample information Groovy Map containing sample information
e.g. [ id:'test', single_end:false ] e.g. [ id:'test', single_end:false ]
- candidate_small_indels_vcf: - candidate_small_indels_vcf:
type: file type: file
description: Gzipped VCF file containing variants description: Gzipped VCF file containing variants

View file

@ -19,16 +19,20 @@ process SAMTOOLS_MERGE {
} }
input: input:
tuple val(meta), path(bams) tuple val(meta), path(input_files)
path fasta
output: output:
tuple val(meta), path("${prefix}.bam"), emit: bam tuple val(meta), path("${prefix}.bam"), optional:true, emit: bam
path "versions.yml" , emit: versions tuple val(meta), path("${prefix}.cram"), optional:true, emit: cram
path "versions.yml" , emit: versions
script: script:
prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}" prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}"
def file_type = input_files[0].getExtension()
def reference = fasta ? "--reference ${fasta}" : ""
""" """
samtools merge ${prefix}.bam $bams samtools merge ${reference} ${prefix}.${file_type} $input_files
cat <<-END_VERSIONS > versions.yml cat <<-END_VERSIONS > versions.yml
${getProcessName(task.process)}: ${getProcessName(task.process)}:
${getSoftwareName(task.process)}: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') ${getSoftwareName(task.process)}: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')

View file

@ -1,5 +1,5 @@
name: samtools_merge name: samtools_merge
description: Merge BAM file description: Merge BAM or CRAM file
keywords: keywords:
- merge - merge
- bam - bam
@ -21,20 +21,28 @@ input:
description: | description: |
Groovy Map containing sample information Groovy Map containing sample information
e.g. [ id:'test', single_end:false ] e.g. [ id:'test', single_end:false ]
- bam: - input_files:
type: file type: file
description: BAM file description: BAM/CRAM file
pattern: "*.{bam,cram,sam}" pattern: "*.{bam,cram,sam}"
- fasta:
type: optional file
description: Reference file the CRAM was created with
pattern: "*.{fasta,fa}"
output: output:
- meta: - meta:
type: map type: map
description: | description: |
Groovy Map containing sample information Groovy Map containing sample information
e.g. [ id:'test', single_end:false ] e.g. [ id:'test', single_end:false ]
- merged_bam: - bam:
type: file type: file
description: BAM file description: BAM file
pattern: "*.{bam}" pattern: "*.{bam}"
- cram:
type: file
description: CRAM file
pattern: "*.{cram}"
- versions: - versions:
type: file type: file
description: File containing software versions description: File containing software versions
@ -43,3 +51,4 @@ authors:
- "@drpatelh" - "@drpatelh"
- "@yuukiiwa " - "@yuukiiwa "
- "@maxulysse" - "@maxulysse"
- "@FriederikeHanssen"

View file

@ -19,15 +19,17 @@ process SAMTOOLS_STATS {
} }
input: input:
tuple val(meta), path(bam), path(bai) tuple val(meta), path(input), path(input_index)
path fasta
output: output:
tuple val(meta), path("*.stats"), emit: stats tuple val(meta), path("*.stats"), emit: stats
path "versions.yml" , emit: versions path "versions.yml" , emit: versions
script: script:
def reference = fasta ? "--reference ${fasta}" : ""
""" """
samtools stats $bam > ${bam}.stats samtools stats ${reference} ${input} > ${input}.stats
cat <<-END_VERSIONS > versions.yml cat <<-END_VERSIONS > versions.yml
${getProcessName(task.process)}: ${getProcessName(task.process)}:
${getSoftwareName(task.process)}: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') ${getSoftwareName(task.process)}: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')

View file

@ -22,14 +22,18 @@ input:
description: | description: |
Groovy Map containing sample information Groovy Map containing sample information
e.g. [ id:'test', single_end:false ] e.g. [ id:'test', single_end:false ]
- bam: - input:
type: file type: file
description: BAM/CRAM/SAM file description: BAM/CRAM file from alignment
pattern: "*.{bam,cram,sam}" pattern: "*.{bam,cram}"
- bai: - input_index:
type: file type: file
description: Index for BAM/CRAM/SAM file description: BAI/CRAI file from alignment
pattern: "*.{bai,crai,sai}" pattern: "*.{bai,crai}"
- fasta:
type: optional file
description: Reference file the CRAM was created with
pattern: "*.{fasta,fa}"
output: output:
- meta: - meta:
type: map type: map
@ -46,3 +50,4 @@ output:
pattern: "versions.yml" pattern: "versions.yml"
authors: authors:
- "@drpatelh" - "@drpatelh"
- "@FriederikeHanssen"

View file

@ -19,16 +19,20 @@ process SAMTOOLS_VIEW {
} }
input: input:
tuple val(meta), path(bam) tuple val(meta), path(input)
path fasta
output: output:
tuple val(meta), path("*.bam"), emit: bam tuple val(meta), path("*.bam") , optional: true, emit: bam
path "versions.yml" , emit: versions tuple val(meta), path("*.cram"), optional: true, emit: cram
path "versions.yml" , emit: versions
script: script:
def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}" def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}"
def reference = fasta ? "--reference ${fasta} -C" : ""
def file_type = input.getExtension()
""" """
samtools view $options.args $bam > ${prefix}.bam samtools view ${reference} $options.args $input > ${prefix}.${file_type}
cat <<-END_VERSIONS > versions.yml cat <<-END_VERSIONS > versions.yml
${getProcessName(task.process)}: ${getProcessName(task.process)}:
${getSoftwareName(task.process)}: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') ${getSoftwareName(task.process)}: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')

View file

@ -21,10 +21,14 @@ input:
description: | description: |
Groovy Map containing sample information Groovy Map containing sample information
e.g. [ id:'test', single_end:false ] e.g. [ id:'test', single_end:false ]
- bam: - input:
type: file type: file
description: BAM/CRAM/SAM file description: BAM/CRAM/SAM file
pattern: "*.{bam,cram,sam}" pattern: "*.{bam,cram,sam}"
- fasta:
type: optional file
description: Reference file the CRAM was created with
pattern: "*.{fasta,fa}"
output: output:
- meta: - meta:
type: map type: map
@ -33,8 +37,12 @@ output:
e.g. [ id:'test', single_end:false ] e.g. [ id:'test', single_end:false ]
- bam: - bam:
type: file type: file
description: filtered/converted BAM/CRAM/SAM file description: filtered/converted BAM/SAM file
pattern: "*.{bam,cram,sam}" pattern: "*.{bam,sam}"
- cram:
type: file
description: filtered/converted CRAM file
pattern: "*.cram"
- versions: - versions:
type: file type: file
description: File containing software versions description: File containing software versions
@ -42,3 +50,4 @@ output:
authors: authors:
- "@drpatelh" - "@drpatelh"
- "@joseespinosa" - "@joseespinosa"
- "@FriederikeHanssen"

View file

@ -19,7 +19,7 @@ process STRELKA_GERMLINE {
} }
input: input:
tuple val(meta), path(bam), path(bai) tuple val(meta), path(input), path(input_index)
path fasta path fasta
path fai path fai
path target_bed path target_bed
@ -38,7 +38,7 @@ process STRELKA_GERMLINE {
def regions = target_bed ? "--exome --callRegions ${target_bed}" : "" def regions = target_bed ? "--exome --callRegions ${target_bed}" : ""
""" """
configureStrelkaGermlineWorkflow.py \\ configureStrelkaGermlineWorkflow.py \\
--bam $bam \\ --bam $input \\
--referenceFasta $fasta \\ --referenceFasta $fasta \\
$regions \\ $regions \\
$options.args \\ $options.args \\

View file

@ -21,14 +21,14 @@ input:
description: | description: |
Groovy Map containing sample information Groovy Map containing sample information
e.g. [ id:'test'] e.g. [ id:'test']
- bam: - input:
type: file type: file
description: BAM file description: BAM/CRAM file
pattern: "*.{bam}" pattern: "*.{bam,cram}"
- bai: - input_index:
type: file type: file
description: BAM index file description: BAM/CRAI index file
pattern: "*.{bai}" pattern: "*.{bai,crai}"
- target_bed: - target_bed:
type: file type: file
description: An optional bed file description: An optional bed file

View file

@ -19,7 +19,7 @@ process STRELKA_SOMATIC {
} }
input: input:
tuple val(meta), path(cram_normal), path(crai_normal), path(cram_tumor), path(crai_tumor), path(manta_candidate_small_indels), path(manta_candidate_small_indels_tbi) tuple val(meta), path(input_normal), path(input_index_normal), path(input_tumor), path(input_index_tumor), path(manta_candidate_small_indels), path(manta_candidate_small_indels_tbi)
path fasta path fasta
path fai path fai
path target_bed path target_bed
@ -38,8 +38,8 @@ process STRELKA_SOMATIC {
def options_manta = manta_candidate_small_indels ? "--indelCandidates ${manta_candidate_small_indels}" : "" def options_manta = manta_candidate_small_indels ? "--indelCandidates ${manta_candidate_small_indels}" : ""
""" """
configureStrelkaSomaticWorkflow.py \\ configureStrelkaSomaticWorkflow.py \\
--tumor $cram_tumor \\ --tumor $input_tumor \\
--normal $cram_normal \\ --normal $input_normal \\
--referenceFasta $fasta \\ --referenceFasta $fasta \\
$options_target_bed \\ $options_target_bed \\
$options_manta \\ $options_manta \\

View file

@ -21,19 +21,19 @@ input:
description: | description: |
Groovy Map containing sample information Groovy Map containing sample information
e.g. [ id:'test', single_end:false ] e.g. [ id:'test', single_end:false ]
- cram_normal: - input_normal:
type: file type: file
description: BAM/CRAM/SAM file description: BAM/CRAM/SAM file
pattern: "*.{bam,cram,sam}" pattern: "*.{bam,cram,sam}"
- crai_normal: - input_index_normal:
type: file type: file
description: BAM/CRAM/SAM index file description: BAM/CRAM/SAM index file
pattern: "*.{bai,crai,sai}" pattern: "*.{bai,crai,sai}"
- cram_tumor: - input_tumor:
type: file type: file
description: BAM/CRAM/SAM file description: BAM/CRAM/SAM file
pattern: "*.{bam,cram,sam}" pattern: "*.{bam,cram,sam}"
- crai_tumor: - input_index_tumor:
type: file type: file
description: BAM/CRAM/SAM index file description: BAM/CRAM/SAM index file
pattern: "*.{bai,crai,sai}" pattern: "*.{bai,crai,sai}"

View file

@ -15,7 +15,7 @@ workflow BAM_STATS_SAMTOOLS {
main: main:
ch_versions = Channel.empty() ch_versions = Channel.empty()
SAMTOOLS_STATS ( ch_bam_bai ) SAMTOOLS_STATS ( ch_bam_bai, [] )
ch_versions = ch_versions.mix(SAMTOOLS_STATS.out.versions.first()) ch_versions = ch_versions.mix(SAMTOOLS_STATS.out.versions.first())
SAMTOOLS_FLAGSTAT ( ch_bam_bai ) SAMTOOLS_FLAGSTAT ( ch_bam_bai )

View file

@ -30,3 +30,17 @@ workflow test_gatk4_applybqsr_intervals {
GATK4_APPLYBQSR ( input, fasta, fai, dict, intervals ) GATK4_APPLYBQSR ( input, fasta, fai, dict, intervals )
} }
workflow test_gatk4_applybqsr_cram {
input = [ [ id:'test' ], // meta map
file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true),
file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram_crai'], checkIfExists: true),
file(params.test_data['homo_sapiens']['illumina']['test_baserecalibrator_table'], checkIfExists: true)
]
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)
intervals = file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true)
GATK4_APPLYBQSR ( input, fasta, fai, dict, intervals )
}

View file

@ -1,17 +1,26 @@
- name: gatk4 applybqsr test_gatk4_applybqsr - name: gatk4 applybqsr test_gatk4_applybqsr
command: nextflow run tests/modules/gatk4/applybqsr -entry test_gatk4_applybqsr -c tests/config/nextflow.config command: nextflow run tests/modules/gatk4/applybqsr -entry test_gatk4_applybqsr -c tests/config/nextflow.config
tags: tags:
- gatk4
- gatk4/applybqsr - gatk4/applybqsr
- gatk4
files: files:
- path: output/gatk4/test.bam - path: output/gatk4/test.bam
md5sum: dac716c394db5e83c12b44355c098ca7 md5sum: 87a2eabae2b7b41574f966612b5addae
- name: gatk4 applybqsr test_gatk4_applybqsr_intervals - name: gatk4 applybqsr test_gatk4_applybqsr_intervals
command: nextflow run tests/modules/gatk4/applybqsr -entry test_gatk4_applybqsr_intervals -c tests/config/nextflow.config command: nextflow run tests/modules/gatk4/applybqsr -entry test_gatk4_applybqsr_intervals -c tests/config/nextflow.config
tags: tags:
- gatk4
- gatk4/applybqsr - gatk4/applybqsr
- gatk4
files: files:
- path: output/gatk4/test.bam - path: output/gatk4/test.bam
md5sum: 400441dbe5344658580ba0a24ba57069 md5sum: 9c015d3c1dbd9eee793b7386f432b6aa
- name: gatk4 applybqsr test_gatk4_applybqsr_cram
command: nextflow run tests/modules/gatk4/applybqsr -entry test_gatk4_applybqsr_cram -c tests/config/nextflow.config
tags:
- gatk4/applybqsr
- gatk4
files:
- path: output/gatk4/test.bam
md5sum: 02f84815fdbc99c21c8d42ebdcabbbf7

View file

@ -18,6 +18,21 @@ workflow test_gatk4_baserecalibrator {
GATK4_BASERECALIBRATOR ( input, fasta, fai, dict, [], sites, sites_tbi ) GATK4_BASERECALIBRATOR ( input, fasta, fai, dict, [], sites, sites_tbi )
} }
workflow test_gatk4_baserecalibrator_cram {
input = [ [ id:'test' ], // meta map
file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true),
file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram_crai'], checkIfExists: true),
file(params.test_data['homo_sapiens']['illumina']['test_baserecalibrator_table'], checkIfExists: true)
]
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)
sites = file(params.test_data['homo_sapiens']['genome']['dbsnp_146_hg38_vcf_gz'], checkIfExists: true)
sites_tbi = file(params.test_data['homo_sapiens']['genome']['dbsnp_146_hg38_vcf_gz_tbi'], checkIfExists: true)
GATK4_BASERECALIBRATOR ( input, fasta, fai, dict, [], sites, sites_tbi )
}
workflow test_gatk4_baserecalibrator_intervals { workflow test_gatk4_baserecalibrator_intervals {
input = [ [ id:'test' ], // meta map input = [ [ id:'test' ], // meta map
file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true),

View file

@ -1,17 +1,26 @@
- name: gatk4 baserecalibrator test_gatk4_baserecalibrator - name: gatk4 baserecalibrator test_gatk4_baserecalibrator
command: nextflow run tests/modules/gatk4/baserecalibrator -entry test_gatk4_baserecalibrator -c tests/config/nextflow.config command: nextflow run tests/modules/gatk4/baserecalibrator -entry test_gatk4_baserecalibrator -c tests/config/nextflow.config
tags: tags:
- gatk4/baserecalibrator
- gatk4 - gatk4
- gatk4/baserecalibrator
files: files:
- path: output/gatk4/test.table - path: output/gatk4/test.table
md5sum: e2e43abdc0c943c1a54dae816d0b9ea7 md5sum: e2e43abdc0c943c1a54dae816d0b9ea7
- name: gatk4 baserecalibrator test_gatk4_baserecalibrator_cram
command: nextflow run tests/modules/gatk4/baserecalibrator -entry test_gatk4_baserecalibrator_cram -c tests/config/nextflow.config
tags:
- gatk4
- gatk4/baserecalibrator
files:
- path: output/gatk4/test.table
md5sum: 35d89a3811aa31711fc9815b6b80e6ec
- name: gatk4 baserecalibrator test_gatk4_baserecalibrator_intervals - name: gatk4 baserecalibrator test_gatk4_baserecalibrator_intervals
command: nextflow run tests/modules/gatk4/baserecalibrator -entry test_gatk4_baserecalibrator_intervals -c tests/config/nextflow.config command: nextflow run tests/modules/gatk4/baserecalibrator -entry test_gatk4_baserecalibrator_intervals -c tests/config/nextflow.config
tags: tags:
- gatk4/baserecalibrator
- gatk4 - gatk4
- gatk4/baserecalibrator
files: files:
- path: output/gatk4/test.table - path: output/gatk4/test.table
md5sum: 9ecb5f00a2229291705addc09c0ec231 md5sum: 9ecb5f00a2229291705addc09c0ec231
@ -19,8 +28,8 @@
- name: gatk4 baserecalibrator test_gatk4_baserecalibrator_multiple_sites - name: gatk4 baserecalibrator test_gatk4_baserecalibrator_multiple_sites
command: nextflow run tests/modules/gatk4/baserecalibrator -entry test_gatk4_baserecalibrator_multiple_sites -c tests/config/nextflow.config command: nextflow run tests/modules/gatk4/baserecalibrator -entry test_gatk4_baserecalibrator_multiple_sites -c tests/config/nextflow.config
tags: tags:
- gatk4/baserecalibrator
- gatk4 - gatk4
- gatk4/baserecalibrator
files: files:
- path: output/gatk4/test.table - path: output/gatk4/test.table
md5sum: e2e43abdc0c943c1a54dae816d0b9ea7 md5sum: e2e43abdc0c943c1a54dae816d0b9ea7

View file

@ -13,5 +13,33 @@ workflow test_gatk4_haplotypecaller {
fai = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true) fai = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true)
dict = file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true) dict = file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true)
GATK4_HAPLOTYPECALLER ( input, fasta, fai, dict ) GATK4_HAPLOTYPECALLER ( input, fasta, fai, dict, [], [], [] )
}
workflow test_gatk4_haplotypecaller_cram {
input = [ [ id:'test' ], // meta map
file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true),
file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram_crai'], checkIfExists: true)
]
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)
GATK4_HAPLOTYPECALLER ( input, fasta, fai, dict, [], [], [] )
}
workflow test_gatk4_haplotypecaller_intervals_dbsnp {
input = [ [ id:'test' ], // meta map
file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true),
file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram_crai'], checkIfExists: true)
]
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)
sites = file(params.test_data['homo_sapiens']['genome']['dbsnp_146_hg38_vcf_gz'], checkIfExists: true)
sites_tbi = file(params.test_data['homo_sapiens']['genome']['dbsnp_146_hg38_vcf_gz_tbi'], checkIfExists: true)
intervals = file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true)
GATK4_HAPLOTYPECALLER ( input, fasta, fai, dict, sites, sites_tbi, intervals )
} }

View file

@ -1,13 +1,26 @@
- name: gatk4 haplotypecaller test_gatk4_haplotypecaller - name: gatk4 haplotypecaller test_gatk4_haplotypecaller
command: nextflow run tests/modules/gatk4/haplotypecaller -entry test_gatk4_haplotypecaller -c tests/config/nextflow.config command: nextflow run tests/modules/gatk4/haplotypecaller -entry test_gatk4_haplotypecaller -c tests/config/nextflow.config
tags: tags:
- gatk4
- gatk4/haplotypecaller - gatk4/haplotypecaller
- gatk4
files:
- path: output/gatk4/test.vcf.gz
- path: output/gatk4/test.vcf.gz.tbi
- name: gatk4 haplotypecaller test_gatk4_haplotypecaller_cram
command: nextflow run tests/modules/gatk4/haplotypecaller -entry test_gatk4_haplotypecaller_cram -c tests/config/nextflow.config
tags:
- gatk4/haplotypecaller
- gatk4
files:
- path: output/gatk4/test.vcf.gz
- path: output/gatk4/test.vcf.gz.tbi
- name: gatk4 haplotypecaller test_gatk4_haplotypecaller_intervals_dbsnp
command: nextflow run tests/modules/gatk4/haplotypecaller -entry test_gatk4_haplotypecaller_intervals_dbsnp -c tests/config/nextflow.config
tags:
- gatk4/haplotypecaller
- gatk4
files: files:
- path: output/gatk4/test.vcf.gz - path: output/gatk4/test.vcf.gz
should_exist: true
contains:
- 'MT192765.1'
- '54.60'
- '37.32'
- path: output/gatk4/test.vcf.gz.tbi - path: output/gatk4/test.vcf.gz.tbi

View file

@ -11,3 +11,12 @@ workflow test_gatk4_markduplicates {
GATK4_MARKDUPLICATES ( input ) GATK4_MARKDUPLICATES ( input )
} }
workflow test_gatk4_markduplicates_multiple_bams {
input = [ [ id:'test', single_end:false ], // meta map
file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true),
file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_bam'], checkIfExists: true)
]
GATK4_MARKDUPLICATES ( input )
}

View file

@ -1,8 +1,23 @@
- name: gatk4 markduplicates test_gatk4_markduplicates - name: gatk4 markduplicates test_gatk4_markduplicates
command: nextflow run tests/modules/gatk4/markduplicates -entry test_gatk4_markduplicates -c tests/config/nextflow.config command: nextflow run tests/modules/gatk4/markduplicates -entry test_gatk4_markduplicates -c tests/config/nextflow.config
tags: tags:
- gatk4
- gatk4/markduplicates - gatk4/markduplicates
- gatk4
files: files:
- path: output/gatk4/test.bai
md5sum: e9c125e82553209933883b4fe2b8d7c2
- path: output/gatk4/test.bam - path: output/gatk4/test.bam
md5sum: 3b6facab3afbacfa08a7a975efbd2c6b md5sum: bda9a7bf5057f2288ed70be3eb8a753f
- path: output/gatk4/test.metrics
- name: gatk4 markduplicates test_gatk4_markduplicates_multiple_bams
command: nextflow run tests/modules/gatk4/markduplicates -entry test_gatk4_markduplicates_multiple_bams -c tests/config/nextflow.config
tags:
- gatk4/markduplicates
- gatk4
files:
- path: output/gatk4/test.bai
md5sum: 93cebe29e7cca2064262b739235cca9b
- path: output/gatk4/test.bam
md5sum: dcd6f584006b04141fb787001a8ecacc
- path: output/gatk4/test.metrics

View file

@ -11,5 +11,15 @@ workflow test_samtools_merge {
file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam'], checkIfExists: true)] file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam'], checkIfExists: true)]
] ]
SAMTOOLS_MERGE ( input ) SAMTOOLS_MERGE ( input, [] )
}
workflow test_samtools_merge_cram {
input = [ [ id: 'test' ], // meta map
[ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_cram'], checkIfExists: true),
file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_cram'], checkIfExists: true),
]
]
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
SAMTOOLS_MERGE ( input, fasta )
} }

View file

@ -1,7 +1,15 @@
- name: samtools merge - name: samtools merge test_samtools_merge
command: nextflow run ./tests/modules/samtools/merge -entry test_samtools_merge -c tests/config/nextflow.config command: nextflow run tests/modules/samtools/merge -entry test_samtools_merge -c tests/config/nextflow.config
tags: tags:
- samtools
- samtools/merge - samtools/merge
- samtools
files: files:
- path: output/samtools/test_merged.bam - path: output/samtools/test_merged.bam
- name: samtools merge test_samtools_merge_cram
command: nextflow run tests/modules/samtools/merge -entry test_samtools_merge_cram -c tests/config/nextflow.config
tags:
- samtools/merge
- samtools
files:
- path: output/samtools/test_merged.cram

View file

@ -10,5 +10,15 @@ workflow test_samtools_stats {
file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true) file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true)
] ]
SAMTOOLS_STATS ( input ) SAMTOOLS_STATS ( input, [])
}
workflow test_samtools_stats_cram {
input = [ [ id: 'test' ], // meta map
file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_cram'], checkIfExists: true),
file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_cram_crai'], checkIfExists: true)
]
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
SAMTOOLS_STATS ( input, fasta )
} }

View file

@ -1,8 +1,17 @@
- name: samtools stats - name: samtools stats test_samtools_stats
command: nextflow run ./tests/modules/samtools/stats -entry test_samtools_stats -c tests/config/nextflow.config command: nextflow run tests/modules/samtools/stats -entry test_samtools_stats -c tests/config/nextflow.config
tags: tags:
- samtools - samtools
- samtools/stats - samtools/stats
files: files:
- path: ./output/samtools/test.paired_end.sorted.bam.stats - path: output/samtools/test.paired_end.sorted.bam.stats
md5sum: a7f36cf11fd3bf97e0a0ae29c0627296 md5sum: a7f36cf11fd3bf97e0a0ae29c0627296
- name: samtools stats test_samtools_stats_cram
command: nextflow run tests/modules/samtools/stats -entry test_samtools_stats_cram -c tests/config/nextflow.config
tags:
- samtools
- samtools/stats
files:
- path: output/samtools/test.paired_end.recalibrated.sorted.cram.stats
md5sum: bd55a1da30028403f4b66dacf7a2a20e

View file

@ -7,8 +7,17 @@ include { SAMTOOLS_VIEW } from '../../../../modules/samtools/view/main.nf' addPa
workflow test_samtools_view { workflow test_samtools_view {
input = [ [ id:'test', single_end:false ], // meta map input = [ [ id:'test', single_end:false ], // meta map
file(params.test_data['sarscov2']['illumina']['test_paired_end_bam'], checkIfExists: true) file(params.test_data['sarscov2']['illumina']['test_paired_end_bam'], checkIfExists: true)
] ]
SAMTOOLS_VIEW ( input ) SAMTOOLS_VIEW ( input, [] )
}
workflow test_samtools_view_cram {
input = [ [ id: 'test' ], // meta map
file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_cram'], checkIfExists: true),
file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_cram_crai'], checkIfExists: true)
]
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
SAMTOOLS_VIEW ( input, fasta )
} }

View file

@ -1,8 +1,16 @@
- name: samtools view - name: samtools view test_samtools_view
command: nextflow run tests/modules/samtools/view -entry test_samtools_view -c tests/config/nextflow.config command: nextflow run tests/modules/samtools/view -entry test_samtools_view -c tests/config/nextflow.config
tags: tags:
- samtools
- samtools/view - samtools/view
- samtools
files: files:
- path: output/samtools/test.bam - path: output/samtools/test.bam
md5sum: 8fb1e82f76416e9e30fc6b2357e2cf13 md5sum: 8fb1e82f76416e9e30fc6b2357e2cf13
- name: samtools view test_samtools_view_cram
command: nextflow run tests/modules/samtools/view -entry test_samtools_view_cram -c tests/config/nextflow.config
tags:
- samtools/view
- samtools
files:
- path: output/samtools/test.cram