From ac1e6df076195cec553a2079c9cebd94026a0d47 Mon Sep 17 00:00:00 2001 From: FriederikeHanssen Date: Fri, 29 Oct 2021 13:01:05 +0200 Subject: [PATCH] Update to allow cram + update needed to use the gatk4 modules in sarek (#976) * Make samtools/merge cram compliant * samtools/stats cram compliance * update yml file * samtools/view to deal with crams * Update tests to make sure cram works * also fix tmp dir and min mem in one go * basequalityrecal test for cram + min mem + tmpdir * update haplotypecaller for sarek * update haplotype yml * update markdup to allow multiple bams, take out params to be passed with options.args * remove TODO statement * Remove variable md5sum * add emtpy input to stats module in subworkflows * subworkflows seem to work now on my side * Apply code review Co-authored-by: Maxime U. Garcia * replace bam with input to be more inclusive * rename everywhere * rename input * remove variable checksum Co-authored-by: Maxime U. Garcia --- modules/gatk4/applybqsr/main.nf | 10 +++++-- modules/gatk4/applybqsr/meta.yml | 10 +++++-- modules/gatk4/baserecalibrator/main.nf | 11 +++++-- modules/gatk4/baserecalibrator/meta.yml | 11 +++++-- modules/gatk4/haplotypecaller/main.nf | 18 +++++++---- modules/gatk4/haplotypecaller/meta.yml | 23 ++++++++++---- modules/gatk4/markduplicates/main.nf | 13 ++++++-- modules/gatk4/markduplicates/meta.yml | 1 + modules/manta/germline/main.nf | 4 +-- modules/manta/germline/meta.yml | 4 +-- modules/manta/somatic/main.nf | 6 ++-- modules/manta/somatic/meta.yml | 8 ++--- modules/manta/tumoronly/main.nf | 4 +-- modules/manta/tumoronly/meta.yml | 5 ++-- modules/samtools/merge/main.nf | 12 +++++--- modules/samtools/merge/meta.yml | 17 ++++++++--- modules/samtools/stats/main.nf | 6 ++-- modules/samtools/stats/meta.yml | 21 ++++++++----- modules/samtools/view/main.nf | 12 +++++--- modules/samtools/view/meta.yml | 15 ++++++++-- modules/strelka/germline/main.nf | 4 +-- modules/strelka/germline/meta.yml | 12 ++++---- modules/strelka/somatic/main.nf | 6 ++-- modules/strelka/somatic/meta.yml | 8 ++--- .../nf-core/bam_stats_samtools/main.nf | 2 +- tests/modules/gatk4/applybqsr/main.nf | 14 +++++++++ tests/modules/gatk4/applybqsr/test.yml | 17 ++++++++--- tests/modules/gatk4/baserecalibrator/main.nf | 15 ++++++++++ tests/modules/gatk4/baserecalibrator/test.yml | 15 ++++++++-- tests/modules/gatk4/haplotypecaller/main.nf | 30 ++++++++++++++++++- tests/modules/gatk4/haplotypecaller/test.yml | 25 ++++++++++++---- tests/modules/gatk4/markduplicates/main.nf | 9 ++++++ tests/modules/gatk4/markduplicates/test.yml | 19 ++++++++++-- tests/modules/samtools/merge/main.nf | 12 +++++++- tests/modules/samtools/merge/test.yml | 14 +++++++-- tests/modules/samtools/stats/main.nf | 12 +++++++- tests/modules/samtools/stats/test.yml | 15 ++++++++-- tests/modules/samtools/view/main.nf | 13 ++++++-- tests/modules/samtools/view/test.yml | 12 ++++++-- 39 files changed, 356 insertions(+), 109 deletions(-) diff --git a/modules/gatk4/applybqsr/main.nf b/modules/gatk4/applybqsr/main.nf index e804bcff..508a29ca 100644 --- a/modules/gatk4/applybqsr/main.nf +++ b/modules/gatk4/applybqsr/main.nf @@ -19,7 +19,7 @@ process GATK4_APPLYBQSR { } input: - tuple val(meta), path(bam), path(bai), path(bqsr_table) + tuple val(meta), path(input), path(input_index), path(bqsr_table) path fasta path fastaidx path dict @@ -32,12 +32,18 @@ process GATK4_APPLYBQSR { script: def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}" def interval = intervals ? "-L ${intervals}" : "" + if (!task.memory) { + log.info '[GATK ApplyBQSR] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = task.memory.giga + } """ gatk ApplyBQSR \\ -R $fasta \\ - -I $bam \\ + -I $input \\ --bqsr-recal-file $bqsr_table \\ $interval \\ + --tmp-dir . \\ -O ${prefix}.bam \\ $options.args diff --git a/modules/gatk4/applybqsr/meta.yml b/modules/gatk4/applybqsr/meta.yml index e09e8c52..b002dca6 100644 --- a/modules/gatk4/applybqsr/meta.yml +++ b/modules/gatk4/applybqsr/meta.yml @@ -20,10 +20,14 @@ input: description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - bam: + - input: type: file - description: BAM file from alignment - pattern: "*.{bam}" + description: BAM/CRAM file from alignment + pattern: "*.{bam,cram}" + - input_index: + type: file + description: BAI/CRAI file from alignment + pattern: "*.{bai,crai}" - bqsr_table: type: file description: Recalibration table from gatk4_baserecalibrator diff --git a/modules/gatk4/baserecalibrator/main.nf b/modules/gatk4/baserecalibrator/main.nf index 6033fbf1..85c30daf 100644 --- a/modules/gatk4/baserecalibrator/main.nf +++ b/modules/gatk4/baserecalibrator/main.nf @@ -19,7 +19,7 @@ process GATK4_BASERECALIBRATOR { } input: - tuple val(meta), path(bam), path(bai) + tuple val(meta), path(input), path(input_index) path fasta path fastaidx path dict @@ -35,12 +35,19 @@ process GATK4_BASERECALIBRATOR { def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}" def intervalsCommand = intervalsBed ? "-L ${intervalsBed}" : "" def sitesCommand = knownSites.collect{"--known-sites ${it}"}.join(' ') + + if (!task.memory) { + log.info '[GATK BaseRecalibrator] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = task.memory.giga + } """ gatk BaseRecalibrator \ -R $fasta \ - -I $bam \ + -I $input \ $sitesCommand \ $intervalsCommand \ + --tmp-dir . \ $options.args \ -O ${prefix}.table diff --git a/modules/gatk4/baserecalibrator/meta.yml b/modules/gatk4/baserecalibrator/meta.yml index d579d9e5..7fd273e1 100644 --- a/modules/gatk4/baserecalibrator/meta.yml +++ b/modules/gatk4/baserecalibrator/meta.yml @@ -20,10 +20,14 @@ input: description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - bam: + - input: type: file - description: BAM file from alignment - pattern: "*.{bam}" + description: BAM/CRAM file from alignment + pattern: "*.{bam,cram}" + - input_index: + type: file + description: BAI/CRAI file from alignment + pattern: "*.{bai,crai}" - fasta: type: file description: The reference fasta file @@ -57,3 +61,4 @@ output: authors: - "@yocra3" + - "@FriederikeHanssen" diff --git a/modules/gatk4/haplotypecaller/main.nf b/modules/gatk4/haplotypecaller/main.nf index 01b71ccb..4bddbb6d 100644 --- a/modules/gatk4/haplotypecaller/main.nf +++ b/modules/gatk4/haplotypecaller/main.nf @@ -19,10 +19,13 @@ process GATK4_HAPLOTYPECALLER { } input: - tuple val(meta), path(bam), path(bai) + tuple val(meta), path(input), path(input_index) path fasta path fai path dict + path dbsnp + path dbsnp_tbi + path interval output: tuple val(meta), path("*.vcf.gz"), emit: vcf @@ -30,8 +33,10 @@ process GATK4_HAPLOTYPECALLER { path "versions.yml" , emit: versions script: - def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}" - def avail_mem = 3 + def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}" + def interval_option = interval ? "-L ${interval}" : "" + def dbsnp_option = dbsnp ? "-D ${dbsnp}" : "" + def avail_mem = 3 if (!task.memory) { log.info '[GATK HaplotypeCaller] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' } else { @@ -42,9 +47,12 @@ process GATK4_HAPLOTYPECALLER { --java-options "-Xmx${avail_mem}g" \\ HaplotypeCaller \\ -R $fasta \\ - -I $bam \\ + -I $input \\ + ${dbsnp_option} \\ + ${interval_option} \\ -O ${prefix}.vcf.gz \\ - $options.args + $options.args \\ + --tmp-dir . cat <<-END_VERSIONS > versions.yml ${getProcessName(task.process)}: diff --git a/modules/gatk4/haplotypecaller/meta.yml b/modules/gatk4/haplotypecaller/meta.yml index 6a1bd7ed..6c9d0891 100644 --- a/modules/gatk4/haplotypecaller/meta.yml +++ b/modules/gatk4/haplotypecaller/meta.yml @@ -21,14 +21,14 @@ input: description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - bam: + - input: type: file - description: BAM file - pattern: "*.bam" - - bai: + description: BAM/CRAM file from alignment + pattern: "*.{bam,cram}" + - input_index: type: file - description: Index of BAM file - pattern: "*.bam.bai" + description: BAI/CRAI file from alignment + pattern: "*.{bai,crai}" - fasta: type: file description: The reference fasta file @@ -41,6 +41,16 @@ input: type: file description: GATK sequence dictionary pattern: "*.dict" + - dbsnp: + type: file + description: VCF file containing known sites (optional) + - dbsnp_tbi: + type: file + description: VCF index of dbsnp (optional) + - interval: + type: file + description: Bed file with the genomic regions included in the library (optional) + output: - meta: type: map @@ -62,3 +72,4 @@ output: authors: - "@suzannejin" + - "@FriederikeHanssen" diff --git a/modules/gatk4/markduplicates/main.nf b/modules/gatk4/markduplicates/main.nf index 8f94f4dd..b1ff5222 100644 --- a/modules/gatk4/markduplicates/main.nf +++ b/modules/gatk4/markduplicates/main.nf @@ -19,21 +19,28 @@ process GATK4_MARKDUPLICATES { } input: - tuple val(meta), path(bam) + tuple val(meta), path(bams) output: tuple val(meta), path("*.bam") , emit: bam + tuple val(meta), path("*.bai") , emit: bai tuple val(meta), path("*.metrics"), emit: metrics path "versions.yml" , emit: versions script: def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}" + def bam_list = bams.collect(){ bam -> "--INPUT ".concat(bam.toString()) }.join(" ") + def avail_mem = 3 + if (!task.memory) { + log.info '[GATK HaplotypeCaller] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = task.memory.giga + } """ gatk MarkDuplicates \\ - --INPUT $bam \\ + $bam_list \\ --METRICS_FILE ${prefix}.metrics \\ --TMP_DIR . \\ - --ASSUME_SORT_ORDER coordinate \\ --CREATE_INDEX true \\ --OUTPUT ${prefix}.bam \\ $options.args diff --git a/modules/gatk4/markduplicates/meta.yml b/modules/gatk4/markduplicates/meta.yml index 59aaad4d..5777067a 100644 --- a/modules/gatk4/markduplicates/meta.yml +++ b/modules/gatk4/markduplicates/meta.yml @@ -47,3 +47,4 @@ output: authors: - "@ajodeh-juma" + - "@FriederikeHanssen" diff --git a/modules/manta/germline/main.nf b/modules/manta/germline/main.nf index ca2ac9dc..f957a7ec 100644 --- a/modules/manta/germline/main.nf +++ b/modules/manta/germline/main.nf @@ -19,7 +19,7 @@ process MANTA_GERMLINE { } input: - tuple val(meta), path(cram), path(crai) + tuple val(meta), path(input), path(input_index) path fasta path fai path target_bed @@ -39,7 +39,7 @@ process MANTA_GERMLINE { def options_manta = target_bed ? "--exome --callRegions $target_bed" : "" """ configManta.py \ - --bam $cram \ + --bam $input \ --reference $fasta \ $options_manta \ --runDir manta diff --git a/modules/manta/germline/meta.yml b/modules/manta/germline/meta.yml index 7933fd6c..3bdb8264 100644 --- a/modules/manta/germline/meta.yml +++ b/modules/manta/germline/meta.yml @@ -23,11 +23,11 @@ input: description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - cram: + - input: type: file description: BAM/CRAM/SAM file pattern: "*.{bam,cram,sam}" - - crai: + - input_index: type: file description: BAM/CRAM/SAM index file pattern: "*.{bai,crai,sai}" diff --git a/modules/manta/somatic/main.nf b/modules/manta/somatic/main.nf index 16a30f17..f912d478 100644 --- a/modules/manta/somatic/main.nf +++ b/modules/manta/somatic/main.nf @@ -19,7 +19,7 @@ process MANTA_SOMATIC { } input: - tuple val(meta), path(cram_normal), path(crai_normal), path(cram_tumor), path(crai_tumor) + tuple val(meta), path(input_normal), path(input_index_normal), path(input_tumor), path(input_index_tumor) path fasta path fai path target_bed @@ -42,8 +42,8 @@ process MANTA_SOMATIC { """ configManta.py \ - --tumorBam $cram_tumor \ - --normalBam $cram_normal \ + --tumorBam $input_tumor \ + --normalBam $input_normal \ --reference $fasta \ $options_manta \ --runDir manta diff --git a/modules/manta/somatic/meta.yml b/modules/manta/somatic/meta.yml index 08103ba7..ddd0eafe 100644 --- a/modules/manta/somatic/meta.yml +++ b/modules/manta/somatic/meta.yml @@ -23,19 +23,19 @@ input: description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - cram_normal: + - input_normal: type: file description: BAM/CRAM/SAM file pattern: "*.{bam,cram,sam}" - - crai_normal: + - input_index_normal: type: file description: BAM/CRAM/SAM index file pattern: "*.{bai,crai,sai}" - - cram_tumor: + - input_tumor: type: file description: BAM/CRAM/SAM file pattern: "*.{bam,cram,sam}" - - crai_tumor: + - input_index_tumor: type: file description: BAM/CRAM/SAM index file pattern: "*.{bai,crai,sai}" diff --git a/modules/manta/tumoronly/main.nf b/modules/manta/tumoronly/main.nf index a86279df..f20e8128 100644 --- a/modules/manta/tumoronly/main.nf +++ b/modules/manta/tumoronly/main.nf @@ -19,7 +19,7 @@ process MANTA_TUMORONLY { } input: - tuple val(meta), path(cram), path(crai) + tuple val(meta), path(input), path(input_index) path fasta path fai path target_bed @@ -39,7 +39,7 @@ process MANTA_TUMORONLY { def options_manta = target_bed ? "--exome --callRegions $target_bed" : "" """ configManta.py \ - --tumorBam $cram \ + --tumorBam $input \ --reference $fasta \ $options_manta \ --runDir manta diff --git a/modules/manta/tumoronly/meta.yml b/modules/manta/tumoronly/meta.yml index d4af9402..86d1c6c0 100644 --- a/modules/manta/tumoronly/meta.yml +++ b/modules/manta/tumoronly/meta.yml @@ -23,11 +23,11 @@ input: description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - cram: + - input: type: file description: BAM/CRAM/SAM file pattern: "*.{bam,cram,sam}" - - crai: + - input_index: type: file description: BAM/CRAM/SAM index file pattern: "*.{bai,crai,sai}" @@ -54,7 +54,6 @@ output: description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - candidate_small_indels_vcf: type: file description: Gzipped VCF file containing variants diff --git a/modules/samtools/merge/main.nf b/modules/samtools/merge/main.nf index 34c40d57..fefb423b 100644 --- a/modules/samtools/merge/main.nf +++ b/modules/samtools/merge/main.nf @@ -19,16 +19,20 @@ process SAMTOOLS_MERGE { } input: - tuple val(meta), path(bams) + tuple val(meta), path(input_files) + path fasta output: - tuple val(meta), path("${prefix}.bam"), emit: bam - path "versions.yml" , emit: versions + tuple val(meta), path("${prefix}.bam"), optional:true, emit: bam + tuple val(meta), path("${prefix}.cram"), optional:true, emit: cram + path "versions.yml" , emit: versions script: prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}" + def file_type = input_files[0].getExtension() + def reference = fasta ? "--reference ${fasta}" : "" """ - samtools merge ${prefix}.bam $bams + samtools merge ${reference} ${prefix}.${file_type} $input_files cat <<-END_VERSIONS > versions.yml ${getProcessName(task.process)}: ${getSoftwareName(task.process)}: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') diff --git a/modules/samtools/merge/meta.yml b/modules/samtools/merge/meta.yml index 78b75b36..2576a3a3 100644 --- a/modules/samtools/merge/meta.yml +++ b/modules/samtools/merge/meta.yml @@ -1,5 +1,5 @@ name: samtools_merge -description: Merge BAM file +description: Merge BAM or CRAM file keywords: - merge - bam @@ -21,20 +21,28 @@ input: description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - bam: + - input_files: type: file - description: BAM file + description: BAM/CRAM file pattern: "*.{bam,cram,sam}" + - fasta: + type: optional file + description: Reference file the CRAM was created with + pattern: "*.{fasta,fa}" output: - meta: type: map description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - merged_bam: + - bam: type: file description: BAM file pattern: "*.{bam}" + - cram: + type: file + description: CRAM file + pattern: "*.{cram}" - versions: type: file description: File containing software versions @@ -43,3 +51,4 @@ authors: - "@drpatelh" - "@yuukiiwa " - "@maxulysse" + - "@FriederikeHanssen" diff --git a/modules/samtools/stats/main.nf b/modules/samtools/stats/main.nf index 6218dd2d..aab43410 100644 --- a/modules/samtools/stats/main.nf +++ b/modules/samtools/stats/main.nf @@ -19,15 +19,17 @@ process SAMTOOLS_STATS { } input: - tuple val(meta), path(bam), path(bai) + tuple val(meta), path(input), path(input_index) + path fasta output: tuple val(meta), path("*.stats"), emit: stats path "versions.yml" , emit: versions script: + def reference = fasta ? "--reference ${fasta}" : "" """ - samtools stats $bam > ${bam}.stats + samtools stats ${reference} ${input} > ${input}.stats cat <<-END_VERSIONS > versions.yml ${getProcessName(task.process)}: ${getSoftwareName(task.process)}: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') diff --git a/modules/samtools/stats/meta.yml b/modules/samtools/stats/meta.yml index ae41498a..869e62e3 100644 --- a/modules/samtools/stats/meta.yml +++ b/modules/samtools/stats/meta.yml @@ -22,14 +22,18 @@ input: description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - bam: - type: file - description: BAM/CRAM/SAM file - pattern: "*.{bam,cram,sam}" - - bai: - type: file - description: Index for BAM/CRAM/SAM file - pattern: "*.{bai,crai,sai}" + - input: + type: file + description: BAM/CRAM file from alignment + pattern: "*.{bam,cram}" + - input_index: + type: file + description: BAI/CRAI file from alignment + pattern: "*.{bai,crai}" + - fasta: + type: optional file + description: Reference file the CRAM was created with + pattern: "*.{fasta,fa}" output: - meta: type: map @@ -46,3 +50,4 @@ output: pattern: "versions.yml" authors: - "@drpatelh" + - "@FriederikeHanssen" diff --git a/modules/samtools/view/main.nf b/modules/samtools/view/main.nf index ec1663e0..b7a047ee 100644 --- a/modules/samtools/view/main.nf +++ b/modules/samtools/view/main.nf @@ -19,16 +19,20 @@ process SAMTOOLS_VIEW { } input: - tuple val(meta), path(bam) + tuple val(meta), path(input) + path fasta output: - tuple val(meta), path("*.bam"), emit: bam - path "versions.yml" , emit: versions + tuple val(meta), path("*.bam") , optional: true, emit: bam + tuple val(meta), path("*.cram"), optional: true, emit: cram + path "versions.yml" , emit: versions script: def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}" + def reference = fasta ? "--reference ${fasta} -C" : "" + def file_type = input.getExtension() """ - samtools view $options.args $bam > ${prefix}.bam + samtools view ${reference} $options.args $input > ${prefix}.${file_type} cat <<-END_VERSIONS > versions.yml ${getProcessName(task.process)}: ${getSoftwareName(task.process)}: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') diff --git a/modules/samtools/view/meta.yml b/modules/samtools/view/meta.yml index 29d1ecc1..8abf34af 100644 --- a/modules/samtools/view/meta.yml +++ b/modules/samtools/view/meta.yml @@ -21,10 +21,14 @@ input: description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - bam: + - input: type: file description: BAM/CRAM/SAM file pattern: "*.{bam,cram,sam}" + - fasta: + type: optional file + description: Reference file the CRAM was created with + pattern: "*.{fasta,fa}" output: - meta: type: map @@ -33,8 +37,12 @@ output: e.g. [ id:'test', single_end:false ] - bam: type: file - description: filtered/converted BAM/CRAM/SAM file - pattern: "*.{bam,cram,sam}" + description: filtered/converted BAM/SAM file + pattern: "*.{bam,sam}" + - cram: + type: file + description: filtered/converted CRAM file + pattern: "*.cram" - versions: type: file description: File containing software versions @@ -42,3 +50,4 @@ output: authors: - "@drpatelh" - "@joseespinosa" + - "@FriederikeHanssen" diff --git a/modules/strelka/germline/main.nf b/modules/strelka/germline/main.nf index 0d201940..5e913c40 100644 --- a/modules/strelka/germline/main.nf +++ b/modules/strelka/germline/main.nf @@ -19,7 +19,7 @@ process STRELKA_GERMLINE { } input: - tuple val(meta), path(bam), path(bai) + tuple val(meta), path(input), path(input_index) path fasta path fai path target_bed @@ -38,7 +38,7 @@ process STRELKA_GERMLINE { def regions = target_bed ? "--exome --callRegions ${target_bed}" : "" """ configureStrelkaGermlineWorkflow.py \\ - --bam $bam \\ + --bam $input \\ --referenceFasta $fasta \\ $regions \\ $options.args \\ diff --git a/modules/strelka/germline/meta.yml b/modules/strelka/germline/meta.yml index 3f86b045..2eeb0f8f 100644 --- a/modules/strelka/germline/meta.yml +++ b/modules/strelka/germline/meta.yml @@ -21,14 +21,14 @@ input: description: | Groovy Map containing sample information e.g. [ id:'test'] - - bam: + - input: type: file - description: BAM file - pattern: "*.{bam}" - - bai: + description: BAM/CRAM file + pattern: "*.{bam,cram}" + - input_index: type: file - description: BAM index file - pattern: "*.{bai}" + description: BAM/CRAI index file + pattern: "*.{bai,crai}" - target_bed: type: file description: An optional bed file diff --git a/modules/strelka/somatic/main.nf b/modules/strelka/somatic/main.nf index 02bd5822..633b0a2c 100644 --- a/modules/strelka/somatic/main.nf +++ b/modules/strelka/somatic/main.nf @@ -19,7 +19,7 @@ process STRELKA_SOMATIC { } input: - tuple val(meta), path(cram_normal), path(crai_normal), path(cram_tumor), path(crai_tumor), path(manta_candidate_small_indels), path(manta_candidate_small_indels_tbi) + tuple val(meta), path(input_normal), path(input_index_normal), path(input_tumor), path(input_index_tumor), path(manta_candidate_small_indels), path(manta_candidate_small_indels_tbi) path fasta path fai path target_bed @@ -38,8 +38,8 @@ process STRELKA_SOMATIC { def options_manta = manta_candidate_small_indels ? "--indelCandidates ${manta_candidate_small_indels}" : "" """ configureStrelkaSomaticWorkflow.py \\ - --tumor $cram_tumor \\ - --normal $cram_normal \\ + --tumor $input_tumor \\ + --normal $input_normal \\ --referenceFasta $fasta \\ $options_target_bed \\ $options_manta \\ diff --git a/modules/strelka/somatic/meta.yml b/modules/strelka/somatic/meta.yml index ce5acb33..076c1036 100644 --- a/modules/strelka/somatic/meta.yml +++ b/modules/strelka/somatic/meta.yml @@ -21,19 +21,19 @@ input: description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - cram_normal: + - input_normal: type: file description: BAM/CRAM/SAM file pattern: "*.{bam,cram,sam}" - - crai_normal: + - input_index_normal: type: file description: BAM/CRAM/SAM index file pattern: "*.{bai,crai,sai}" - - cram_tumor: + - input_tumor: type: file description: BAM/CRAM/SAM file pattern: "*.{bam,cram,sam}" - - crai_tumor: + - input_index_tumor: type: file description: BAM/CRAM/SAM index file pattern: "*.{bai,crai,sai}" diff --git a/subworkflows/nf-core/bam_stats_samtools/main.nf b/subworkflows/nf-core/bam_stats_samtools/main.nf index 9276232c..463ec99d 100644 --- a/subworkflows/nf-core/bam_stats_samtools/main.nf +++ b/subworkflows/nf-core/bam_stats_samtools/main.nf @@ -15,7 +15,7 @@ workflow BAM_STATS_SAMTOOLS { main: ch_versions = Channel.empty() - SAMTOOLS_STATS ( ch_bam_bai ) + SAMTOOLS_STATS ( ch_bam_bai, [] ) ch_versions = ch_versions.mix(SAMTOOLS_STATS.out.versions.first()) SAMTOOLS_FLAGSTAT ( ch_bam_bai ) diff --git a/tests/modules/gatk4/applybqsr/main.nf b/tests/modules/gatk4/applybqsr/main.nf index 5fb590b0..80b51015 100644 --- a/tests/modules/gatk4/applybqsr/main.nf +++ b/tests/modules/gatk4/applybqsr/main.nf @@ -30,3 +30,17 @@ workflow test_gatk4_applybqsr_intervals { GATK4_APPLYBQSR ( input, fasta, fai, dict, intervals ) } + +workflow test_gatk4_applybqsr_cram { + input = [ [ id:'test' ], // meta map + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram_crai'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_baserecalibrator_table'], checkIfExists: true) + ] + fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) + dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) + intervals = file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true) + + GATK4_APPLYBQSR ( input, fasta, fai, dict, intervals ) +} diff --git a/tests/modules/gatk4/applybqsr/test.yml b/tests/modules/gatk4/applybqsr/test.yml index 983cc09a..ed89c6ff 100644 --- a/tests/modules/gatk4/applybqsr/test.yml +++ b/tests/modules/gatk4/applybqsr/test.yml @@ -1,17 +1,26 @@ - name: gatk4 applybqsr test_gatk4_applybqsr command: nextflow run tests/modules/gatk4/applybqsr -entry test_gatk4_applybqsr -c tests/config/nextflow.config tags: - - gatk4 - gatk4/applybqsr + - gatk4 files: - path: output/gatk4/test.bam - md5sum: dac716c394db5e83c12b44355c098ca7 + md5sum: 87a2eabae2b7b41574f966612b5addae - name: gatk4 applybqsr test_gatk4_applybqsr_intervals command: nextflow run tests/modules/gatk4/applybqsr -entry test_gatk4_applybqsr_intervals -c tests/config/nextflow.config tags: - - gatk4 - gatk4/applybqsr + - gatk4 files: - path: output/gatk4/test.bam - md5sum: 400441dbe5344658580ba0a24ba57069 + md5sum: 9c015d3c1dbd9eee793b7386f432b6aa + +- name: gatk4 applybqsr test_gatk4_applybqsr_cram + command: nextflow run tests/modules/gatk4/applybqsr -entry test_gatk4_applybqsr_cram -c tests/config/nextflow.config + tags: + - gatk4/applybqsr + - gatk4 + files: + - path: output/gatk4/test.bam + md5sum: 02f84815fdbc99c21c8d42ebdcabbbf7 diff --git a/tests/modules/gatk4/baserecalibrator/main.nf b/tests/modules/gatk4/baserecalibrator/main.nf index 671a1d67..a50c09e3 100644 --- a/tests/modules/gatk4/baserecalibrator/main.nf +++ b/tests/modules/gatk4/baserecalibrator/main.nf @@ -18,6 +18,21 @@ workflow test_gatk4_baserecalibrator { GATK4_BASERECALIBRATOR ( input, fasta, fai, dict, [], sites, sites_tbi ) } +workflow test_gatk4_baserecalibrator_cram { + input = [ [ id:'test' ], // meta map + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram_crai'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_baserecalibrator_table'], checkIfExists: true) + ] + fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) + dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) + sites = file(params.test_data['homo_sapiens']['genome']['dbsnp_146_hg38_vcf_gz'], checkIfExists: true) + sites_tbi = file(params.test_data['homo_sapiens']['genome']['dbsnp_146_hg38_vcf_gz_tbi'], checkIfExists: true) + + GATK4_BASERECALIBRATOR ( input, fasta, fai, dict, [], sites, sites_tbi ) +} + workflow test_gatk4_baserecalibrator_intervals { input = [ [ id:'test' ], // meta map file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), diff --git a/tests/modules/gatk4/baserecalibrator/test.yml b/tests/modules/gatk4/baserecalibrator/test.yml index 3c30d78f..a15c9ee3 100644 --- a/tests/modules/gatk4/baserecalibrator/test.yml +++ b/tests/modules/gatk4/baserecalibrator/test.yml @@ -1,17 +1,26 @@ - name: gatk4 baserecalibrator test_gatk4_baserecalibrator command: nextflow run tests/modules/gatk4/baserecalibrator -entry test_gatk4_baserecalibrator -c tests/config/nextflow.config tags: - - gatk4/baserecalibrator - gatk4 + - gatk4/baserecalibrator files: - path: output/gatk4/test.table md5sum: e2e43abdc0c943c1a54dae816d0b9ea7 +- name: gatk4 baserecalibrator test_gatk4_baserecalibrator_cram + command: nextflow run tests/modules/gatk4/baserecalibrator -entry test_gatk4_baserecalibrator_cram -c tests/config/nextflow.config + tags: + - gatk4 + - gatk4/baserecalibrator + files: + - path: output/gatk4/test.table + md5sum: 35d89a3811aa31711fc9815b6b80e6ec + - name: gatk4 baserecalibrator test_gatk4_baserecalibrator_intervals command: nextflow run tests/modules/gatk4/baserecalibrator -entry test_gatk4_baserecalibrator_intervals -c tests/config/nextflow.config tags: - - gatk4/baserecalibrator - gatk4 + - gatk4/baserecalibrator files: - path: output/gatk4/test.table md5sum: 9ecb5f00a2229291705addc09c0ec231 @@ -19,8 +28,8 @@ - name: gatk4 baserecalibrator test_gatk4_baserecalibrator_multiple_sites command: nextflow run tests/modules/gatk4/baserecalibrator -entry test_gatk4_baserecalibrator_multiple_sites -c tests/config/nextflow.config tags: - - gatk4/baserecalibrator - gatk4 + - gatk4/baserecalibrator files: - path: output/gatk4/test.table md5sum: e2e43abdc0c943c1a54dae816d0b9ea7 diff --git a/tests/modules/gatk4/haplotypecaller/main.nf b/tests/modules/gatk4/haplotypecaller/main.nf index 76059074..fd5f30fa 100644 --- a/tests/modules/gatk4/haplotypecaller/main.nf +++ b/tests/modules/gatk4/haplotypecaller/main.nf @@ -13,5 +13,33 @@ workflow test_gatk4_haplotypecaller { fai = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true) dict = file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true) - GATK4_HAPLOTYPECALLER ( input, fasta, fai, dict ) + GATK4_HAPLOTYPECALLER ( input, fasta, fai, dict, [], [], [] ) +} + +workflow test_gatk4_haplotypecaller_cram { + input = [ [ id:'test' ], // meta map + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram_crai'], checkIfExists: true) + ] + fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) + dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) + + GATK4_HAPLOTYPECALLER ( input, fasta, fai, dict, [], [], [] ) +} + +workflow test_gatk4_haplotypecaller_intervals_dbsnp { + input = [ [ id:'test' ], // meta map + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram_crai'], checkIfExists: true) + ] + + fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) + dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) + sites = file(params.test_data['homo_sapiens']['genome']['dbsnp_146_hg38_vcf_gz'], checkIfExists: true) + sites_tbi = file(params.test_data['homo_sapiens']['genome']['dbsnp_146_hg38_vcf_gz_tbi'], checkIfExists: true) + intervals = file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true) + + GATK4_HAPLOTYPECALLER ( input, fasta, fai, dict, sites, sites_tbi, intervals ) } diff --git a/tests/modules/gatk4/haplotypecaller/test.yml b/tests/modules/gatk4/haplotypecaller/test.yml index dee2a2ab..480ff8f0 100644 --- a/tests/modules/gatk4/haplotypecaller/test.yml +++ b/tests/modules/gatk4/haplotypecaller/test.yml @@ -1,13 +1,26 @@ - name: gatk4 haplotypecaller test_gatk4_haplotypecaller command: nextflow run tests/modules/gatk4/haplotypecaller -entry test_gatk4_haplotypecaller -c tests/config/nextflow.config tags: - - gatk4 - gatk4/haplotypecaller + - gatk4 + files: + - path: output/gatk4/test.vcf.gz + - path: output/gatk4/test.vcf.gz.tbi + +- name: gatk4 haplotypecaller test_gatk4_haplotypecaller_cram + command: nextflow run tests/modules/gatk4/haplotypecaller -entry test_gatk4_haplotypecaller_cram -c tests/config/nextflow.config + tags: + - gatk4/haplotypecaller + - gatk4 + files: + - path: output/gatk4/test.vcf.gz + - path: output/gatk4/test.vcf.gz.tbi + +- name: gatk4 haplotypecaller test_gatk4_haplotypecaller_intervals_dbsnp + command: nextflow run tests/modules/gatk4/haplotypecaller -entry test_gatk4_haplotypecaller_intervals_dbsnp -c tests/config/nextflow.config + tags: + - gatk4/haplotypecaller + - gatk4 files: - path: output/gatk4/test.vcf.gz - should_exist: true - contains: - - 'MT192765.1' - - '54.60' - - '37.32' - path: output/gatk4/test.vcf.gz.tbi diff --git a/tests/modules/gatk4/markduplicates/main.nf b/tests/modules/gatk4/markduplicates/main.nf index 06425088..b9709dc0 100644 --- a/tests/modules/gatk4/markduplicates/main.nf +++ b/tests/modules/gatk4/markduplicates/main.nf @@ -11,3 +11,12 @@ workflow test_gatk4_markduplicates { GATK4_MARKDUPLICATES ( input ) } + +workflow test_gatk4_markduplicates_multiple_bams { + input = [ [ id:'test', single_end:false ], // meta map + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_bam'], checkIfExists: true) + ] + + GATK4_MARKDUPLICATES ( input ) +} diff --git a/tests/modules/gatk4/markduplicates/test.yml b/tests/modules/gatk4/markduplicates/test.yml index 028147e6..99296ca4 100644 --- a/tests/modules/gatk4/markduplicates/test.yml +++ b/tests/modules/gatk4/markduplicates/test.yml @@ -1,8 +1,23 @@ - name: gatk4 markduplicates test_gatk4_markduplicates command: nextflow run tests/modules/gatk4/markduplicates -entry test_gatk4_markduplicates -c tests/config/nextflow.config tags: - - gatk4 - gatk4/markduplicates + - gatk4 files: + - path: output/gatk4/test.bai + md5sum: e9c125e82553209933883b4fe2b8d7c2 - path: output/gatk4/test.bam - md5sum: 3b6facab3afbacfa08a7a975efbd2c6b + md5sum: bda9a7bf5057f2288ed70be3eb8a753f + - path: output/gatk4/test.metrics + +- name: gatk4 markduplicates test_gatk4_markduplicates_multiple_bams + command: nextflow run tests/modules/gatk4/markduplicates -entry test_gatk4_markduplicates_multiple_bams -c tests/config/nextflow.config + tags: + - gatk4/markduplicates + - gatk4 + files: + - path: output/gatk4/test.bai + md5sum: 93cebe29e7cca2064262b739235cca9b + - path: output/gatk4/test.bam + md5sum: dcd6f584006b04141fb787001a8ecacc + - path: output/gatk4/test.metrics diff --git a/tests/modules/samtools/merge/main.nf b/tests/modules/samtools/merge/main.nf index a4511a34..07485df1 100644 --- a/tests/modules/samtools/merge/main.nf +++ b/tests/modules/samtools/merge/main.nf @@ -11,5 +11,15 @@ workflow test_samtools_merge { file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam'], checkIfExists: true)] ] - SAMTOOLS_MERGE ( input ) + SAMTOOLS_MERGE ( input, [] ) +} + +workflow test_samtools_merge_cram { + input = [ [ id: 'test' ], // meta map + [ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_cram'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_cram'], checkIfExists: true), + ] + ] + fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + SAMTOOLS_MERGE ( input, fasta ) } diff --git a/tests/modules/samtools/merge/test.yml b/tests/modules/samtools/merge/test.yml index d0674ca4..b39ca2ec 100644 --- a/tests/modules/samtools/merge/test.yml +++ b/tests/modules/samtools/merge/test.yml @@ -1,7 +1,15 @@ -- name: samtools merge - command: nextflow run ./tests/modules/samtools/merge -entry test_samtools_merge -c tests/config/nextflow.config +- name: samtools merge test_samtools_merge + command: nextflow run tests/modules/samtools/merge -entry test_samtools_merge -c tests/config/nextflow.config tags: - - samtools - samtools/merge + - samtools files: - path: output/samtools/test_merged.bam + +- name: samtools merge test_samtools_merge_cram + command: nextflow run tests/modules/samtools/merge -entry test_samtools_merge_cram -c tests/config/nextflow.config + tags: + - samtools/merge + - samtools + files: + - path: output/samtools/test_merged.cram diff --git a/tests/modules/samtools/stats/main.nf b/tests/modules/samtools/stats/main.nf index 04a689fe..8e8b0c88 100644 --- a/tests/modules/samtools/stats/main.nf +++ b/tests/modules/samtools/stats/main.nf @@ -10,5 +10,15 @@ workflow test_samtools_stats { file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true) ] - SAMTOOLS_STATS ( input ) + SAMTOOLS_STATS ( input, []) +} + +workflow test_samtools_stats_cram { + input = [ [ id: 'test' ], // meta map + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_cram'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_cram_crai'], checkIfExists: true) + ] + fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + + SAMTOOLS_STATS ( input, fasta ) } diff --git a/tests/modules/samtools/stats/test.yml b/tests/modules/samtools/stats/test.yml index cf44b846..a194c666 100644 --- a/tests/modules/samtools/stats/test.yml +++ b/tests/modules/samtools/stats/test.yml @@ -1,8 +1,17 @@ -- name: samtools stats - command: nextflow run ./tests/modules/samtools/stats -entry test_samtools_stats -c tests/config/nextflow.config +- name: samtools stats test_samtools_stats + command: nextflow run tests/modules/samtools/stats -entry test_samtools_stats -c tests/config/nextflow.config tags: - samtools - samtools/stats files: - - path: ./output/samtools/test.paired_end.sorted.bam.stats + - path: output/samtools/test.paired_end.sorted.bam.stats md5sum: a7f36cf11fd3bf97e0a0ae29c0627296 + +- name: samtools stats test_samtools_stats_cram + command: nextflow run tests/modules/samtools/stats -entry test_samtools_stats_cram -c tests/config/nextflow.config + tags: + - samtools + - samtools/stats + files: + - path: output/samtools/test.paired_end.recalibrated.sorted.cram.stats + md5sum: bd55a1da30028403f4b66dacf7a2a20e diff --git a/tests/modules/samtools/view/main.nf b/tests/modules/samtools/view/main.nf index c60acb73..bd270cd8 100644 --- a/tests/modules/samtools/view/main.nf +++ b/tests/modules/samtools/view/main.nf @@ -7,8 +7,17 @@ include { SAMTOOLS_VIEW } from '../../../../modules/samtools/view/main.nf' addPa workflow test_samtools_view { input = [ [ id:'test', single_end:false ], // meta map file(params.test_data['sarscov2']['illumina']['test_paired_end_bam'], checkIfExists: true) - ] - SAMTOOLS_VIEW ( input ) + SAMTOOLS_VIEW ( input, [] ) +} + +workflow test_samtools_view_cram { + input = [ [ id: 'test' ], // meta map + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_cram'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_cram_crai'], checkIfExists: true) + ] + fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + + SAMTOOLS_VIEW ( input, fasta ) } diff --git a/tests/modules/samtools/view/test.yml b/tests/modules/samtools/view/test.yml index 383dfa87..ceaa0e89 100644 --- a/tests/modules/samtools/view/test.yml +++ b/tests/modules/samtools/view/test.yml @@ -1,8 +1,16 @@ -- name: samtools view +- name: samtools view test_samtools_view command: nextflow run tests/modules/samtools/view -entry test_samtools_view -c tests/config/nextflow.config tags: - - samtools - samtools/view + - samtools files: - path: output/samtools/test.bam md5sum: 8fb1e82f76416e9e30fc6b2357e2cf13 + +- name: samtools view test_samtools_view_cram + command: nextflow run tests/modules/samtools/view -entry test_samtools_view_cram -c tests/config/nextflow.config + tags: + - samtools/view + - samtools + files: + - path: output/samtools/test.cram