From 71945a5b5f4126593aef76abdf1a2f82aa468566 Mon Sep 17 00:00:00 2001 From: GCJMackenzie <43276267+GCJMackenzie@users.noreply.github.com> Date: Fri, 29 Oct 2021 11:27:56 +0100 Subject: [PATCH 1/5] Mutect2 add mitochondria mode and update tests (#967) * new mitochondria mode added, tests updated to allow for temp fix for test data * add cram test * bam/bam_idx renamed to input and input_index Co-authored-by: GCJMackenzie --- modules/gatk4/mutect2/main.nf | 41 ++++++++++---------- modules/gatk4/mutect2/meta.yml | 23 ++++++++--- tests/modules/gatk4/mutect2/main.nf | 57 ++++++++++++++++++++++++++-- tests/modules/gatk4/mutect2/test.yml | 26 ++++++++++++- 4 files changed, 116 insertions(+), 31 deletions(-) diff --git a/modules/gatk4/mutect2/main.nf b/modules/gatk4/mutect2/main.nf index 9b3f8b3f..7999eec3 100644 --- a/modules/gatk4/mutect2/main.nf +++ b/modules/gatk4/mutect2/main.nf @@ -19,9 +19,11 @@ process GATK4_MUTECT2 { } input: - tuple val(meta) , path(bam) , path(bai) , val(which_norm) - val run_single - val run_pon + tuple val(meta) , path(input) , path(input_index) , val(which_norm) + val run_single + val run_pon + val run_mito + val interval_label path fasta path fastaidx path dict @@ -39,35 +41,34 @@ process GATK4_MUTECT2 { script: def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}" - def inputsList = [] - def normalsList = [] - def inputsCommand = '' - def panelsCommand = '' - def normalsCommand = '' + def panels_command = '' + def normals_command = '' - bam.each() {a -> inputsList.add(" -I " + a ) } - inputsCommand = inputsList.join( ' ') + def inputs_command = '-I ' + input.join( ' -I ') if(run_pon) { - panelsCommand = '' - normalsCommand = '' + panels_command = '' + normals_command = '' } else if(run_single) { - panelsCommand = " --germline-resource $germline_resource --panel-of-normals $panel_of_normals" - normalsCommand = '' + panels_command = " --germline-resource $germline_resource --panel-of-normals $panel_of_normals" + normals_command = '' + + } else if(run_mito){ + panels_command = "-L ${interval_label} --mitochondria-mode" + normals_command = '' } else { - panelsCommand = " --germline-resource $germline_resource --panel-of-normals $panel_of_normals --f1r2-tar-gz ${prefix}.f1r2.tar.gz" - which_norm.each() {a -> normalsList.add(" -normal " + a ) } - normalsCommand = normalsList.join( ' ') + panels_command = " --germline-resource $germline_resource --panel-of-normals $panel_of_normals --f1r2-tar-gz ${prefix}.f1r2.tar.gz" + normals_command = '-normal ' + which_norm.join( ' -normal ') } """ gatk Mutect2 \\ -R ${fasta} \\ - ${inputsCommand} \\ - ${normalsCommand} \\ - ${panelsCommand} \\ + ${inputs_command} \\ + ${normals_command} \\ + ${panels_command} \\ -O ${prefix}.vcf.gz \\ $options.args diff --git a/modules/gatk4/mutect2/meta.yml b/modules/gatk4/mutect2/meta.yml index 4c38a049..44601e41 100644 --- a/modules/gatk4/mutect2/meta.yml +++ b/modules/gatk4/mutect2/meta.yml @@ -22,23 +22,34 @@ input: description: | Groovy Map containing sample information e.g. [ id:'test'] - - bam: + - input: type: list - description: list of BAM files - pattern: "*.bam" - - bai: + description: list of BAM files, also able to take CRAM as an input + pattern: "*.{bam/cram}" + - input_index: type: list - description: list of BAM file indexes - pattern: "*.bam.bai" + description: list of BAM file indexes, also able to take CRAM indexes as an input + pattern: "*.{bam.bai/cram.crai}" - which_norm: type: list description: optional list of sample headers contained in the normal sample bam files (these are required for tumor_normal_pair mode) + pattern: "testN" - run_single: type: boolean description: Specify whether or not to run in tumor_single mode instead of tumor_normal_pair mode (will be ignored if run_pon is also true) + pattern: "true/false" - run_pon: type: boolean description: Specify whether or not to run in panel_of_normal mode instead of tumor_normal_pair mode + pattern: "true/false" + - run_mito: + type: boolean + description: Specify whether or not to run in mitochondria-mode instead of tumor_normal_pair mode + pattern: "true/false" + - interval_label: + type: string + description: Specify the label used for mitochondrial chromosome when mutect2 is run in mitochondria mode. + pattern: "chrM" - fasta: type: file description: The reference fasta file diff --git a/tests/modules/gatk4/mutect2/main.nf b/tests/modules/gatk4/mutect2/main.nf index 072b3125..293739e4 100644 --- a/tests/modules/gatk4/mutect2/main.nf +++ b/tests/modules/gatk4/mutect2/main.nf @@ -3,6 +3,8 @@ nextflow.enable.dsl = 2 include { GATK4_MUTECT2 } from '../../../../modules/gatk4/mutect2/main.nf' addParams( options: [:] ) +// used to run with the mitochondria mode setting as this increases sensitivity, allowing for some tumor_normal variants to be detected while the old test data is still in use, will be removed when new test data for sarek is available. +include { GATK4_MUTECT2 as GATK4_TEMPFIX_MUTECT2 } from '../../../../modules/gatk4/mutect2/main.nf' addParams( options: [args: '--mitochondria-mode'] ) workflow test_gatk4_mutect2_tumor_normal_pair { input = [ [ id:'test'], // meta map @@ -12,6 +14,8 @@ workflow test_gatk4_mutect2_tumor_normal_pair { ] run_single = false run_pon = false + run_mito = false + interval_label = [] fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) fastaidx = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) @@ -20,7 +24,7 @@ workflow test_gatk4_mutect2_tumor_normal_pair { panel_of_normals = file(params.test_data['homo_sapiens']['genome']['mills_and_1000g_indels_vcf_gz'], checkIfExists: true) panel_of_normals_idx = file(params.test_data['homo_sapiens']['genome']['mills_and_1000g_indels_vcf_gz_tbi'], checkIfExists: true) - GATK4_MUTECT2 ( input , run_single , run_pon , fasta , fastaidx , dict , germline_resource, germline_resource_idx , panel_of_normals , panel_of_normals_idx ) + GATK4_TEMPFIX_MUTECT2 ( input , run_single , run_pon , run_mito , interval_label , fasta , fastaidx , dict , germline_resource, germline_resource_idx , panel_of_normals , panel_of_normals_idx ) } workflow test_gatk4_mutect2_tumor_single { @@ -31,6 +35,8 @@ workflow test_gatk4_mutect2_tumor_single { ] run_single = true run_pon = false + run_mito = false + interval_label = [] fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) fastaidx = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) @@ -39,7 +45,28 @@ workflow test_gatk4_mutect2_tumor_single { panel_of_normals = file(params.test_data['homo_sapiens']['genome']['mills_and_1000g_indels_vcf_gz'], checkIfExists: true) panel_of_normals_idx = file(params.test_data['homo_sapiens']['genome']['mills_and_1000g_indels_vcf_gz_tbi'], checkIfExists: true) - GATK4_MUTECT2 ( input , run_single , run_pon , fasta , fastaidx , dict , germline_resource, germline_resource_idx , panel_of_normals , panel_of_normals_idx ) + GATK4_MUTECT2 ( input , run_single , run_pon , run_mito , interval_label , fasta , fastaidx , dict , germline_resource, germline_resource_idx , panel_of_normals , panel_of_normals_idx ) +} + +workflow test_gatk4_mutect2_cram_input { + input = [ [ id:'test'], // meta map + [ file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_cram'], checkIfExists: true)], + [ file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_cram_crai'], checkIfExists: true)], + [] + ] + run_single = true + run_pon = false + run_mito = false + interval_label = [] + fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + fastaidx = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) + dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) + germline_resource = file(params.test_data['homo_sapiens']['genome']['gnomad_r2_1_1_vcf_gz'], checkIfExists: true) + germline_resource_idx = file(params.test_data['homo_sapiens']['genome']['gnomad_r2_1_1_vcf_gz_tbi'], checkIfExists: true) + panel_of_normals = file(params.test_data['homo_sapiens']['genome']['mills_and_1000g_indels_vcf_gz'], checkIfExists: true) + panel_of_normals_idx = file(params.test_data['homo_sapiens']['genome']['mills_and_1000g_indels_vcf_gz_tbi'], checkIfExists: true) + + GATK4_MUTECT2 ( input , run_single , run_pon , run_mito , interval_label , fasta , fastaidx , dict , germline_resource, germline_resource_idx , panel_of_normals , panel_of_normals_idx ) } workflow test_gatk4_mutect2_generate_pon { @@ -50,6 +77,8 @@ workflow test_gatk4_mutect2_generate_pon { ] run_single = false run_pon = true + run_mito = false + interval_label = [] fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) fastaidx = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) @@ -58,5 +87,27 @@ workflow test_gatk4_mutect2_generate_pon { panel_of_normals = [] panel_of_normals_idx = [] - GATK4_MUTECT2 ( input , run_single , run_pon , fasta , fastaidx , dict , germline_resource, germline_resource_idx , panel_of_normals , panel_of_normals_idx ) + GATK4_MUTECT2 ( input , run_single , run_pon, run_mito , interval_label , fasta , fastaidx , dict , germline_resource, germline_resource_idx , panel_of_normals , panel_of_normals_idx ) +} + +// mitochondria mode would ideally have some mitochondria test data, but since the mitochondria settings only increase detection sensitivity, we can use the chr22 data as a stand in as it is already a small dataset, the extra variants detected compared to generate_pon shows the mode is working. +workflow test_gatk4_mutect2_mitochondria { + input = [ [ id:'test'], // meta map + [ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_bam'], checkIfExists: true)], + [ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_bam_bai'], checkIfExists: true)], + [] + ] + run_single = false + run_pon = false + run_mito = true + interval_label = 'chr22' + fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + fastaidx = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) + dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) + germline_resource = [] + germline_resource_idx = [] + panel_of_normals = [] + panel_of_normals_idx = [] + + GATK4_MUTECT2 ( input , run_single , run_pon, run_mito , interval_label , fasta , fastaidx , dict , germline_resource, germline_resource_idx , panel_of_normals , panel_of_normals_idx ) } diff --git a/tests/modules/gatk4/mutect2/test.yml b/tests/modules/gatk4/mutect2/test.yml index 16f39875..031ed072 100644 --- a/tests/modules/gatk4/mutect2/test.yml +++ b/tests/modules/gatk4/mutect2/test.yml @@ -7,7 +7,7 @@ - path: output/gatk4/test.f1r2.tar.gz - path: output/gatk4/test.vcf.gz - path: output/gatk4/test.vcf.gz.stats - md5sum: 6ecb874e6a95aa48233587b876c2a7a9 + md5sum: 887d54e393510f1d0aa2c33bc6155161 - path: output/gatk4/test.vcf.gz.tbi - name: gatk4 mutect2 test_gatk4_mutect2_tumor_single @@ -18,7 +18,18 @@ files: - path: output/gatk4/test.vcf.gz - path: output/gatk4/test.vcf.gz.stats - md5sum: e7ef613f7d158b8a0adf44abe5db2029 + md5sum: 106c5828b02b906c97922618b6072169 + - path: output/gatk4/test.vcf.gz.tbi + +- name: gatk4 mutect2 test_gatk4_mutect2_cram_input + command: nextflow run tests/modules/gatk4/mutect2 -entry test_gatk4_mutect2_cram_input -c tests/config/nextflow.config + tags: + - gatk4 + - gatk4/mutect2 + files: + - path: output/gatk4/test.vcf.gz + - path: output/gatk4/test.vcf.gz.stats + md5sum: 106c5828b02b906c97922618b6072169 - path: output/gatk4/test.vcf.gz.tbi - name: gatk4 mutect2 test_gatk4_mutect2_generate_pon @@ -31,3 +42,14 @@ - path: output/gatk4/test.vcf.gz.stats md5sum: 4f77301a125913170b8e9e7828b4ca3f - path: output/gatk4/test.vcf.gz.tbi + +- name: gatk4 mutect2 test_gatk4_mutect2_mitochondria + command: nextflow run tests/modules/gatk4/mutect2 -entry test_gatk4_mutect2_mitochondria -c tests/config/nextflow.config + tags: + - gatk4 + - gatk4/mutect2 + files: + - path: output/gatk4/test.vcf.gz + - path: output/gatk4/test.vcf.gz.stats + md5sum: fc6ea14ca2da346babe78161beea28c9 + - path: output/gatk4/test.vcf.gz.tbi From ac1e6df076195cec553a2079c9cebd94026a0d47 Mon Sep 17 00:00:00 2001 From: FriederikeHanssen Date: Fri, 29 Oct 2021 13:01:05 +0200 Subject: [PATCH 2/5] Update to allow cram + update needed to use the gatk4 modules in sarek (#976) * Make samtools/merge cram compliant * samtools/stats cram compliance * update yml file * samtools/view to deal with crams * Update tests to make sure cram works * also fix tmp dir and min mem in one go * basequalityrecal test for cram + min mem + tmpdir * update haplotypecaller for sarek * update haplotype yml * update markdup to allow multiple bams, take out params to be passed with options.args * remove TODO statement * Remove variable md5sum * add emtpy input to stats module in subworkflows * subworkflows seem to work now on my side * Apply code review Co-authored-by: Maxime U. Garcia * replace bam with input to be more inclusive * rename everywhere * rename input * remove variable checksum Co-authored-by: Maxime U. Garcia --- modules/gatk4/applybqsr/main.nf | 10 +++++-- modules/gatk4/applybqsr/meta.yml | 10 +++++-- modules/gatk4/baserecalibrator/main.nf | 11 +++++-- modules/gatk4/baserecalibrator/meta.yml | 11 +++++-- modules/gatk4/haplotypecaller/main.nf | 18 +++++++---- modules/gatk4/haplotypecaller/meta.yml | 23 ++++++++++---- modules/gatk4/markduplicates/main.nf | 13 ++++++-- modules/gatk4/markduplicates/meta.yml | 1 + modules/manta/germline/main.nf | 4 +-- modules/manta/germline/meta.yml | 4 +-- modules/manta/somatic/main.nf | 6 ++-- modules/manta/somatic/meta.yml | 8 ++--- modules/manta/tumoronly/main.nf | 4 +-- modules/manta/tumoronly/meta.yml | 5 ++-- modules/samtools/merge/main.nf | 12 +++++--- modules/samtools/merge/meta.yml | 17 ++++++++--- modules/samtools/stats/main.nf | 6 ++-- modules/samtools/stats/meta.yml | 21 ++++++++----- modules/samtools/view/main.nf | 12 +++++--- modules/samtools/view/meta.yml | 15 ++++++++-- modules/strelka/germline/main.nf | 4 +-- modules/strelka/germline/meta.yml | 12 ++++---- modules/strelka/somatic/main.nf | 6 ++-- modules/strelka/somatic/meta.yml | 8 ++--- .../nf-core/bam_stats_samtools/main.nf | 2 +- tests/modules/gatk4/applybqsr/main.nf | 14 +++++++++ tests/modules/gatk4/applybqsr/test.yml | 17 ++++++++--- tests/modules/gatk4/baserecalibrator/main.nf | 15 ++++++++++ tests/modules/gatk4/baserecalibrator/test.yml | 15 ++++++++-- tests/modules/gatk4/haplotypecaller/main.nf | 30 ++++++++++++++++++- tests/modules/gatk4/haplotypecaller/test.yml | 25 ++++++++++++---- tests/modules/gatk4/markduplicates/main.nf | 9 ++++++ tests/modules/gatk4/markduplicates/test.yml | 19 ++++++++++-- tests/modules/samtools/merge/main.nf | 12 +++++++- tests/modules/samtools/merge/test.yml | 14 +++++++-- tests/modules/samtools/stats/main.nf | 12 +++++++- tests/modules/samtools/stats/test.yml | 15 ++++++++-- tests/modules/samtools/view/main.nf | 13 ++++++-- tests/modules/samtools/view/test.yml | 12 ++++++-- 39 files changed, 356 insertions(+), 109 deletions(-) diff --git a/modules/gatk4/applybqsr/main.nf b/modules/gatk4/applybqsr/main.nf index e804bcff..508a29ca 100644 --- a/modules/gatk4/applybqsr/main.nf +++ b/modules/gatk4/applybqsr/main.nf @@ -19,7 +19,7 @@ process GATK4_APPLYBQSR { } input: - tuple val(meta), path(bam), path(bai), path(bqsr_table) + tuple val(meta), path(input), path(input_index), path(bqsr_table) path fasta path fastaidx path dict @@ -32,12 +32,18 @@ process GATK4_APPLYBQSR { script: def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}" def interval = intervals ? "-L ${intervals}" : "" + if (!task.memory) { + log.info '[GATK ApplyBQSR] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = task.memory.giga + } """ gatk ApplyBQSR \\ -R $fasta \\ - -I $bam \\ + -I $input \\ --bqsr-recal-file $bqsr_table \\ $interval \\ + --tmp-dir . \\ -O ${prefix}.bam \\ $options.args diff --git a/modules/gatk4/applybqsr/meta.yml b/modules/gatk4/applybqsr/meta.yml index e09e8c52..b002dca6 100644 --- a/modules/gatk4/applybqsr/meta.yml +++ b/modules/gatk4/applybqsr/meta.yml @@ -20,10 +20,14 @@ input: description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - bam: + - input: type: file - description: BAM file from alignment - pattern: "*.{bam}" + description: BAM/CRAM file from alignment + pattern: "*.{bam,cram}" + - input_index: + type: file + description: BAI/CRAI file from alignment + pattern: "*.{bai,crai}" - bqsr_table: type: file description: Recalibration table from gatk4_baserecalibrator diff --git a/modules/gatk4/baserecalibrator/main.nf b/modules/gatk4/baserecalibrator/main.nf index 6033fbf1..85c30daf 100644 --- a/modules/gatk4/baserecalibrator/main.nf +++ b/modules/gatk4/baserecalibrator/main.nf @@ -19,7 +19,7 @@ process GATK4_BASERECALIBRATOR { } input: - tuple val(meta), path(bam), path(bai) + tuple val(meta), path(input), path(input_index) path fasta path fastaidx path dict @@ -35,12 +35,19 @@ process GATK4_BASERECALIBRATOR { def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}" def intervalsCommand = intervalsBed ? "-L ${intervalsBed}" : "" def sitesCommand = knownSites.collect{"--known-sites ${it}"}.join(' ') + + if (!task.memory) { + log.info '[GATK BaseRecalibrator] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = task.memory.giga + } """ gatk BaseRecalibrator \ -R $fasta \ - -I $bam \ + -I $input \ $sitesCommand \ $intervalsCommand \ + --tmp-dir . \ $options.args \ -O ${prefix}.table diff --git a/modules/gatk4/baserecalibrator/meta.yml b/modules/gatk4/baserecalibrator/meta.yml index d579d9e5..7fd273e1 100644 --- a/modules/gatk4/baserecalibrator/meta.yml +++ b/modules/gatk4/baserecalibrator/meta.yml @@ -20,10 +20,14 @@ input: description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - bam: + - input: type: file - description: BAM file from alignment - pattern: "*.{bam}" + description: BAM/CRAM file from alignment + pattern: "*.{bam,cram}" + - input_index: + type: file + description: BAI/CRAI file from alignment + pattern: "*.{bai,crai}" - fasta: type: file description: The reference fasta file @@ -57,3 +61,4 @@ output: authors: - "@yocra3" + - "@FriederikeHanssen" diff --git a/modules/gatk4/haplotypecaller/main.nf b/modules/gatk4/haplotypecaller/main.nf index 01b71ccb..4bddbb6d 100644 --- a/modules/gatk4/haplotypecaller/main.nf +++ b/modules/gatk4/haplotypecaller/main.nf @@ -19,10 +19,13 @@ process GATK4_HAPLOTYPECALLER { } input: - tuple val(meta), path(bam), path(bai) + tuple val(meta), path(input), path(input_index) path fasta path fai path dict + path dbsnp + path dbsnp_tbi + path interval output: tuple val(meta), path("*.vcf.gz"), emit: vcf @@ -30,8 +33,10 @@ process GATK4_HAPLOTYPECALLER { path "versions.yml" , emit: versions script: - def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}" - def avail_mem = 3 + def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}" + def interval_option = interval ? "-L ${interval}" : "" + def dbsnp_option = dbsnp ? "-D ${dbsnp}" : "" + def avail_mem = 3 if (!task.memory) { log.info '[GATK HaplotypeCaller] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' } else { @@ -42,9 +47,12 @@ process GATK4_HAPLOTYPECALLER { --java-options "-Xmx${avail_mem}g" \\ HaplotypeCaller \\ -R $fasta \\ - -I $bam \\ + -I $input \\ + ${dbsnp_option} \\ + ${interval_option} \\ -O ${prefix}.vcf.gz \\ - $options.args + $options.args \\ + --tmp-dir . cat <<-END_VERSIONS > versions.yml ${getProcessName(task.process)}: diff --git a/modules/gatk4/haplotypecaller/meta.yml b/modules/gatk4/haplotypecaller/meta.yml index 6a1bd7ed..6c9d0891 100644 --- a/modules/gatk4/haplotypecaller/meta.yml +++ b/modules/gatk4/haplotypecaller/meta.yml @@ -21,14 +21,14 @@ input: description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - bam: + - input: type: file - description: BAM file - pattern: "*.bam" - - bai: + description: BAM/CRAM file from alignment + pattern: "*.{bam,cram}" + - input_index: type: file - description: Index of BAM file - pattern: "*.bam.bai" + description: BAI/CRAI file from alignment + pattern: "*.{bai,crai}" - fasta: type: file description: The reference fasta file @@ -41,6 +41,16 @@ input: type: file description: GATK sequence dictionary pattern: "*.dict" + - dbsnp: + type: file + description: VCF file containing known sites (optional) + - dbsnp_tbi: + type: file + description: VCF index of dbsnp (optional) + - interval: + type: file + description: Bed file with the genomic regions included in the library (optional) + output: - meta: type: map @@ -62,3 +72,4 @@ output: authors: - "@suzannejin" + - "@FriederikeHanssen" diff --git a/modules/gatk4/markduplicates/main.nf b/modules/gatk4/markduplicates/main.nf index 8f94f4dd..b1ff5222 100644 --- a/modules/gatk4/markduplicates/main.nf +++ b/modules/gatk4/markduplicates/main.nf @@ -19,21 +19,28 @@ process GATK4_MARKDUPLICATES { } input: - tuple val(meta), path(bam) + tuple val(meta), path(bams) output: tuple val(meta), path("*.bam") , emit: bam + tuple val(meta), path("*.bai") , emit: bai tuple val(meta), path("*.metrics"), emit: metrics path "versions.yml" , emit: versions script: def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}" + def bam_list = bams.collect(){ bam -> "--INPUT ".concat(bam.toString()) }.join(" ") + def avail_mem = 3 + if (!task.memory) { + log.info '[GATK HaplotypeCaller] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = task.memory.giga + } """ gatk MarkDuplicates \\ - --INPUT $bam \\ + $bam_list \\ --METRICS_FILE ${prefix}.metrics \\ --TMP_DIR . \\ - --ASSUME_SORT_ORDER coordinate \\ --CREATE_INDEX true \\ --OUTPUT ${prefix}.bam \\ $options.args diff --git a/modules/gatk4/markduplicates/meta.yml b/modules/gatk4/markduplicates/meta.yml index 59aaad4d..5777067a 100644 --- a/modules/gatk4/markduplicates/meta.yml +++ b/modules/gatk4/markduplicates/meta.yml @@ -47,3 +47,4 @@ output: authors: - "@ajodeh-juma" + - "@FriederikeHanssen" diff --git a/modules/manta/germline/main.nf b/modules/manta/germline/main.nf index ca2ac9dc..f957a7ec 100644 --- a/modules/manta/germline/main.nf +++ b/modules/manta/germline/main.nf @@ -19,7 +19,7 @@ process MANTA_GERMLINE { } input: - tuple val(meta), path(cram), path(crai) + tuple val(meta), path(input), path(input_index) path fasta path fai path target_bed @@ -39,7 +39,7 @@ process MANTA_GERMLINE { def options_manta = target_bed ? "--exome --callRegions $target_bed" : "" """ configManta.py \ - --bam $cram \ + --bam $input \ --reference $fasta \ $options_manta \ --runDir manta diff --git a/modules/manta/germline/meta.yml b/modules/manta/germline/meta.yml index 7933fd6c..3bdb8264 100644 --- a/modules/manta/germline/meta.yml +++ b/modules/manta/germline/meta.yml @@ -23,11 +23,11 @@ input: description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - cram: + - input: type: file description: BAM/CRAM/SAM file pattern: "*.{bam,cram,sam}" - - crai: + - input_index: type: file description: BAM/CRAM/SAM index file pattern: "*.{bai,crai,sai}" diff --git a/modules/manta/somatic/main.nf b/modules/manta/somatic/main.nf index 16a30f17..f912d478 100644 --- a/modules/manta/somatic/main.nf +++ b/modules/manta/somatic/main.nf @@ -19,7 +19,7 @@ process MANTA_SOMATIC { } input: - tuple val(meta), path(cram_normal), path(crai_normal), path(cram_tumor), path(crai_tumor) + tuple val(meta), path(input_normal), path(input_index_normal), path(input_tumor), path(input_index_tumor) path fasta path fai path target_bed @@ -42,8 +42,8 @@ process MANTA_SOMATIC { """ configManta.py \ - --tumorBam $cram_tumor \ - --normalBam $cram_normal \ + --tumorBam $input_tumor \ + --normalBam $input_normal \ --reference $fasta \ $options_manta \ --runDir manta diff --git a/modules/manta/somatic/meta.yml b/modules/manta/somatic/meta.yml index 08103ba7..ddd0eafe 100644 --- a/modules/manta/somatic/meta.yml +++ b/modules/manta/somatic/meta.yml @@ -23,19 +23,19 @@ input: description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - cram_normal: + - input_normal: type: file description: BAM/CRAM/SAM file pattern: "*.{bam,cram,sam}" - - crai_normal: + - input_index_normal: type: file description: BAM/CRAM/SAM index file pattern: "*.{bai,crai,sai}" - - cram_tumor: + - input_tumor: type: file description: BAM/CRAM/SAM file pattern: "*.{bam,cram,sam}" - - crai_tumor: + - input_index_tumor: type: file description: BAM/CRAM/SAM index file pattern: "*.{bai,crai,sai}" diff --git a/modules/manta/tumoronly/main.nf b/modules/manta/tumoronly/main.nf index a86279df..f20e8128 100644 --- a/modules/manta/tumoronly/main.nf +++ b/modules/manta/tumoronly/main.nf @@ -19,7 +19,7 @@ process MANTA_TUMORONLY { } input: - tuple val(meta), path(cram), path(crai) + tuple val(meta), path(input), path(input_index) path fasta path fai path target_bed @@ -39,7 +39,7 @@ process MANTA_TUMORONLY { def options_manta = target_bed ? "--exome --callRegions $target_bed" : "" """ configManta.py \ - --tumorBam $cram \ + --tumorBam $input \ --reference $fasta \ $options_manta \ --runDir manta diff --git a/modules/manta/tumoronly/meta.yml b/modules/manta/tumoronly/meta.yml index d4af9402..86d1c6c0 100644 --- a/modules/manta/tumoronly/meta.yml +++ b/modules/manta/tumoronly/meta.yml @@ -23,11 +23,11 @@ input: description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - cram: + - input: type: file description: BAM/CRAM/SAM file pattern: "*.{bam,cram,sam}" - - crai: + - input_index: type: file description: BAM/CRAM/SAM index file pattern: "*.{bai,crai,sai}" @@ -54,7 +54,6 @@ output: description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - candidate_small_indels_vcf: type: file description: Gzipped VCF file containing variants diff --git a/modules/samtools/merge/main.nf b/modules/samtools/merge/main.nf index 34c40d57..fefb423b 100644 --- a/modules/samtools/merge/main.nf +++ b/modules/samtools/merge/main.nf @@ -19,16 +19,20 @@ process SAMTOOLS_MERGE { } input: - tuple val(meta), path(bams) + tuple val(meta), path(input_files) + path fasta output: - tuple val(meta), path("${prefix}.bam"), emit: bam - path "versions.yml" , emit: versions + tuple val(meta), path("${prefix}.bam"), optional:true, emit: bam + tuple val(meta), path("${prefix}.cram"), optional:true, emit: cram + path "versions.yml" , emit: versions script: prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}" + def file_type = input_files[0].getExtension() + def reference = fasta ? "--reference ${fasta}" : "" """ - samtools merge ${prefix}.bam $bams + samtools merge ${reference} ${prefix}.${file_type} $input_files cat <<-END_VERSIONS > versions.yml ${getProcessName(task.process)}: ${getSoftwareName(task.process)}: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') diff --git a/modules/samtools/merge/meta.yml b/modules/samtools/merge/meta.yml index 78b75b36..2576a3a3 100644 --- a/modules/samtools/merge/meta.yml +++ b/modules/samtools/merge/meta.yml @@ -1,5 +1,5 @@ name: samtools_merge -description: Merge BAM file +description: Merge BAM or CRAM file keywords: - merge - bam @@ -21,20 +21,28 @@ input: description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - bam: + - input_files: type: file - description: BAM file + description: BAM/CRAM file pattern: "*.{bam,cram,sam}" + - fasta: + type: optional file + description: Reference file the CRAM was created with + pattern: "*.{fasta,fa}" output: - meta: type: map description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - merged_bam: + - bam: type: file description: BAM file pattern: "*.{bam}" + - cram: + type: file + description: CRAM file + pattern: "*.{cram}" - versions: type: file description: File containing software versions @@ -43,3 +51,4 @@ authors: - "@drpatelh" - "@yuukiiwa " - "@maxulysse" + - "@FriederikeHanssen" diff --git a/modules/samtools/stats/main.nf b/modules/samtools/stats/main.nf index 6218dd2d..aab43410 100644 --- a/modules/samtools/stats/main.nf +++ b/modules/samtools/stats/main.nf @@ -19,15 +19,17 @@ process SAMTOOLS_STATS { } input: - tuple val(meta), path(bam), path(bai) + tuple val(meta), path(input), path(input_index) + path fasta output: tuple val(meta), path("*.stats"), emit: stats path "versions.yml" , emit: versions script: + def reference = fasta ? "--reference ${fasta}" : "" """ - samtools stats $bam > ${bam}.stats + samtools stats ${reference} ${input} > ${input}.stats cat <<-END_VERSIONS > versions.yml ${getProcessName(task.process)}: ${getSoftwareName(task.process)}: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') diff --git a/modules/samtools/stats/meta.yml b/modules/samtools/stats/meta.yml index ae41498a..869e62e3 100644 --- a/modules/samtools/stats/meta.yml +++ b/modules/samtools/stats/meta.yml @@ -22,14 +22,18 @@ input: description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - bam: - type: file - description: BAM/CRAM/SAM file - pattern: "*.{bam,cram,sam}" - - bai: - type: file - description: Index for BAM/CRAM/SAM file - pattern: "*.{bai,crai,sai}" + - input: + type: file + description: BAM/CRAM file from alignment + pattern: "*.{bam,cram}" + - input_index: + type: file + description: BAI/CRAI file from alignment + pattern: "*.{bai,crai}" + - fasta: + type: optional file + description: Reference file the CRAM was created with + pattern: "*.{fasta,fa}" output: - meta: type: map @@ -46,3 +50,4 @@ output: pattern: "versions.yml" authors: - "@drpatelh" + - "@FriederikeHanssen" diff --git a/modules/samtools/view/main.nf b/modules/samtools/view/main.nf index ec1663e0..b7a047ee 100644 --- a/modules/samtools/view/main.nf +++ b/modules/samtools/view/main.nf @@ -19,16 +19,20 @@ process SAMTOOLS_VIEW { } input: - tuple val(meta), path(bam) + tuple val(meta), path(input) + path fasta output: - tuple val(meta), path("*.bam"), emit: bam - path "versions.yml" , emit: versions + tuple val(meta), path("*.bam") , optional: true, emit: bam + tuple val(meta), path("*.cram"), optional: true, emit: cram + path "versions.yml" , emit: versions script: def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}" + def reference = fasta ? "--reference ${fasta} -C" : "" + def file_type = input.getExtension() """ - samtools view $options.args $bam > ${prefix}.bam + samtools view ${reference} $options.args $input > ${prefix}.${file_type} cat <<-END_VERSIONS > versions.yml ${getProcessName(task.process)}: ${getSoftwareName(task.process)}: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') diff --git a/modules/samtools/view/meta.yml b/modules/samtools/view/meta.yml index 29d1ecc1..8abf34af 100644 --- a/modules/samtools/view/meta.yml +++ b/modules/samtools/view/meta.yml @@ -21,10 +21,14 @@ input: description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - bam: + - input: type: file description: BAM/CRAM/SAM file pattern: "*.{bam,cram,sam}" + - fasta: + type: optional file + description: Reference file the CRAM was created with + pattern: "*.{fasta,fa}" output: - meta: type: map @@ -33,8 +37,12 @@ output: e.g. [ id:'test', single_end:false ] - bam: type: file - description: filtered/converted BAM/CRAM/SAM file - pattern: "*.{bam,cram,sam}" + description: filtered/converted BAM/SAM file + pattern: "*.{bam,sam}" + - cram: + type: file + description: filtered/converted CRAM file + pattern: "*.cram" - versions: type: file description: File containing software versions @@ -42,3 +50,4 @@ output: authors: - "@drpatelh" - "@joseespinosa" + - "@FriederikeHanssen" diff --git a/modules/strelka/germline/main.nf b/modules/strelka/germline/main.nf index 0d201940..5e913c40 100644 --- a/modules/strelka/germline/main.nf +++ b/modules/strelka/germline/main.nf @@ -19,7 +19,7 @@ process STRELKA_GERMLINE { } input: - tuple val(meta), path(bam), path(bai) + tuple val(meta), path(input), path(input_index) path fasta path fai path target_bed @@ -38,7 +38,7 @@ process STRELKA_GERMLINE { def regions = target_bed ? "--exome --callRegions ${target_bed}" : "" """ configureStrelkaGermlineWorkflow.py \\ - --bam $bam \\ + --bam $input \\ --referenceFasta $fasta \\ $regions \\ $options.args \\ diff --git a/modules/strelka/germline/meta.yml b/modules/strelka/germline/meta.yml index 3f86b045..2eeb0f8f 100644 --- a/modules/strelka/germline/meta.yml +++ b/modules/strelka/germline/meta.yml @@ -21,14 +21,14 @@ input: description: | Groovy Map containing sample information e.g. [ id:'test'] - - bam: + - input: type: file - description: BAM file - pattern: "*.{bam}" - - bai: + description: BAM/CRAM file + pattern: "*.{bam,cram}" + - input_index: type: file - description: BAM index file - pattern: "*.{bai}" + description: BAM/CRAI index file + pattern: "*.{bai,crai}" - target_bed: type: file description: An optional bed file diff --git a/modules/strelka/somatic/main.nf b/modules/strelka/somatic/main.nf index 02bd5822..633b0a2c 100644 --- a/modules/strelka/somatic/main.nf +++ b/modules/strelka/somatic/main.nf @@ -19,7 +19,7 @@ process STRELKA_SOMATIC { } input: - tuple val(meta), path(cram_normal), path(crai_normal), path(cram_tumor), path(crai_tumor), path(manta_candidate_small_indels), path(manta_candidate_small_indels_tbi) + tuple val(meta), path(input_normal), path(input_index_normal), path(input_tumor), path(input_index_tumor), path(manta_candidate_small_indels), path(manta_candidate_small_indels_tbi) path fasta path fai path target_bed @@ -38,8 +38,8 @@ process STRELKA_SOMATIC { def options_manta = manta_candidate_small_indels ? "--indelCandidates ${manta_candidate_small_indels}" : "" """ configureStrelkaSomaticWorkflow.py \\ - --tumor $cram_tumor \\ - --normal $cram_normal \\ + --tumor $input_tumor \\ + --normal $input_normal \\ --referenceFasta $fasta \\ $options_target_bed \\ $options_manta \\ diff --git a/modules/strelka/somatic/meta.yml b/modules/strelka/somatic/meta.yml index ce5acb33..076c1036 100644 --- a/modules/strelka/somatic/meta.yml +++ b/modules/strelka/somatic/meta.yml @@ -21,19 +21,19 @@ input: description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - cram_normal: + - input_normal: type: file description: BAM/CRAM/SAM file pattern: "*.{bam,cram,sam}" - - crai_normal: + - input_index_normal: type: file description: BAM/CRAM/SAM index file pattern: "*.{bai,crai,sai}" - - cram_tumor: + - input_tumor: type: file description: BAM/CRAM/SAM file pattern: "*.{bam,cram,sam}" - - crai_tumor: + - input_index_tumor: type: file description: BAM/CRAM/SAM index file pattern: "*.{bai,crai,sai}" diff --git a/subworkflows/nf-core/bam_stats_samtools/main.nf b/subworkflows/nf-core/bam_stats_samtools/main.nf index 9276232c..463ec99d 100644 --- a/subworkflows/nf-core/bam_stats_samtools/main.nf +++ b/subworkflows/nf-core/bam_stats_samtools/main.nf @@ -15,7 +15,7 @@ workflow BAM_STATS_SAMTOOLS { main: ch_versions = Channel.empty() - SAMTOOLS_STATS ( ch_bam_bai ) + SAMTOOLS_STATS ( ch_bam_bai, [] ) ch_versions = ch_versions.mix(SAMTOOLS_STATS.out.versions.first()) SAMTOOLS_FLAGSTAT ( ch_bam_bai ) diff --git a/tests/modules/gatk4/applybqsr/main.nf b/tests/modules/gatk4/applybqsr/main.nf index 5fb590b0..80b51015 100644 --- a/tests/modules/gatk4/applybqsr/main.nf +++ b/tests/modules/gatk4/applybqsr/main.nf @@ -30,3 +30,17 @@ workflow test_gatk4_applybqsr_intervals { GATK4_APPLYBQSR ( input, fasta, fai, dict, intervals ) } + +workflow test_gatk4_applybqsr_cram { + input = [ [ id:'test' ], // meta map + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram_crai'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_baserecalibrator_table'], checkIfExists: true) + ] + fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) + dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) + intervals = file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true) + + GATK4_APPLYBQSR ( input, fasta, fai, dict, intervals ) +} diff --git a/tests/modules/gatk4/applybqsr/test.yml b/tests/modules/gatk4/applybqsr/test.yml index 983cc09a..ed89c6ff 100644 --- a/tests/modules/gatk4/applybqsr/test.yml +++ b/tests/modules/gatk4/applybqsr/test.yml @@ -1,17 +1,26 @@ - name: gatk4 applybqsr test_gatk4_applybqsr command: nextflow run tests/modules/gatk4/applybqsr -entry test_gatk4_applybqsr -c tests/config/nextflow.config tags: - - gatk4 - gatk4/applybqsr + - gatk4 files: - path: output/gatk4/test.bam - md5sum: dac716c394db5e83c12b44355c098ca7 + md5sum: 87a2eabae2b7b41574f966612b5addae - name: gatk4 applybqsr test_gatk4_applybqsr_intervals command: nextflow run tests/modules/gatk4/applybqsr -entry test_gatk4_applybqsr_intervals -c tests/config/nextflow.config tags: - - gatk4 - gatk4/applybqsr + - gatk4 files: - path: output/gatk4/test.bam - md5sum: 400441dbe5344658580ba0a24ba57069 + md5sum: 9c015d3c1dbd9eee793b7386f432b6aa + +- name: gatk4 applybqsr test_gatk4_applybqsr_cram + command: nextflow run tests/modules/gatk4/applybqsr -entry test_gatk4_applybqsr_cram -c tests/config/nextflow.config + tags: + - gatk4/applybqsr + - gatk4 + files: + - path: output/gatk4/test.bam + md5sum: 02f84815fdbc99c21c8d42ebdcabbbf7 diff --git a/tests/modules/gatk4/baserecalibrator/main.nf b/tests/modules/gatk4/baserecalibrator/main.nf index 671a1d67..a50c09e3 100644 --- a/tests/modules/gatk4/baserecalibrator/main.nf +++ b/tests/modules/gatk4/baserecalibrator/main.nf @@ -18,6 +18,21 @@ workflow test_gatk4_baserecalibrator { GATK4_BASERECALIBRATOR ( input, fasta, fai, dict, [], sites, sites_tbi ) } +workflow test_gatk4_baserecalibrator_cram { + input = [ [ id:'test' ], // meta map + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram_crai'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_baserecalibrator_table'], checkIfExists: true) + ] + fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) + dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) + sites = file(params.test_data['homo_sapiens']['genome']['dbsnp_146_hg38_vcf_gz'], checkIfExists: true) + sites_tbi = file(params.test_data['homo_sapiens']['genome']['dbsnp_146_hg38_vcf_gz_tbi'], checkIfExists: true) + + GATK4_BASERECALIBRATOR ( input, fasta, fai, dict, [], sites, sites_tbi ) +} + workflow test_gatk4_baserecalibrator_intervals { input = [ [ id:'test' ], // meta map file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), diff --git a/tests/modules/gatk4/baserecalibrator/test.yml b/tests/modules/gatk4/baserecalibrator/test.yml index 3c30d78f..a15c9ee3 100644 --- a/tests/modules/gatk4/baserecalibrator/test.yml +++ b/tests/modules/gatk4/baserecalibrator/test.yml @@ -1,17 +1,26 @@ - name: gatk4 baserecalibrator test_gatk4_baserecalibrator command: nextflow run tests/modules/gatk4/baserecalibrator -entry test_gatk4_baserecalibrator -c tests/config/nextflow.config tags: - - gatk4/baserecalibrator - gatk4 + - gatk4/baserecalibrator files: - path: output/gatk4/test.table md5sum: e2e43abdc0c943c1a54dae816d0b9ea7 +- name: gatk4 baserecalibrator test_gatk4_baserecalibrator_cram + command: nextflow run tests/modules/gatk4/baserecalibrator -entry test_gatk4_baserecalibrator_cram -c tests/config/nextflow.config + tags: + - gatk4 + - gatk4/baserecalibrator + files: + - path: output/gatk4/test.table + md5sum: 35d89a3811aa31711fc9815b6b80e6ec + - name: gatk4 baserecalibrator test_gatk4_baserecalibrator_intervals command: nextflow run tests/modules/gatk4/baserecalibrator -entry test_gatk4_baserecalibrator_intervals -c tests/config/nextflow.config tags: - - gatk4/baserecalibrator - gatk4 + - gatk4/baserecalibrator files: - path: output/gatk4/test.table md5sum: 9ecb5f00a2229291705addc09c0ec231 @@ -19,8 +28,8 @@ - name: gatk4 baserecalibrator test_gatk4_baserecalibrator_multiple_sites command: nextflow run tests/modules/gatk4/baserecalibrator -entry test_gatk4_baserecalibrator_multiple_sites -c tests/config/nextflow.config tags: - - gatk4/baserecalibrator - gatk4 + - gatk4/baserecalibrator files: - path: output/gatk4/test.table md5sum: e2e43abdc0c943c1a54dae816d0b9ea7 diff --git a/tests/modules/gatk4/haplotypecaller/main.nf b/tests/modules/gatk4/haplotypecaller/main.nf index 76059074..fd5f30fa 100644 --- a/tests/modules/gatk4/haplotypecaller/main.nf +++ b/tests/modules/gatk4/haplotypecaller/main.nf @@ -13,5 +13,33 @@ workflow test_gatk4_haplotypecaller { fai = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true) dict = file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true) - GATK4_HAPLOTYPECALLER ( input, fasta, fai, dict ) + GATK4_HAPLOTYPECALLER ( input, fasta, fai, dict, [], [], [] ) +} + +workflow test_gatk4_haplotypecaller_cram { + input = [ [ id:'test' ], // meta map + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram_crai'], checkIfExists: true) + ] + fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) + dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) + + GATK4_HAPLOTYPECALLER ( input, fasta, fai, dict, [], [], [] ) +} + +workflow test_gatk4_haplotypecaller_intervals_dbsnp { + input = [ [ id:'test' ], // meta map + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram_crai'], checkIfExists: true) + ] + + fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) + dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) + sites = file(params.test_data['homo_sapiens']['genome']['dbsnp_146_hg38_vcf_gz'], checkIfExists: true) + sites_tbi = file(params.test_data['homo_sapiens']['genome']['dbsnp_146_hg38_vcf_gz_tbi'], checkIfExists: true) + intervals = file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true) + + GATK4_HAPLOTYPECALLER ( input, fasta, fai, dict, sites, sites_tbi, intervals ) } diff --git a/tests/modules/gatk4/haplotypecaller/test.yml b/tests/modules/gatk4/haplotypecaller/test.yml index dee2a2ab..480ff8f0 100644 --- a/tests/modules/gatk4/haplotypecaller/test.yml +++ b/tests/modules/gatk4/haplotypecaller/test.yml @@ -1,13 +1,26 @@ - name: gatk4 haplotypecaller test_gatk4_haplotypecaller command: nextflow run tests/modules/gatk4/haplotypecaller -entry test_gatk4_haplotypecaller -c tests/config/nextflow.config tags: - - gatk4 - gatk4/haplotypecaller + - gatk4 + files: + - path: output/gatk4/test.vcf.gz + - path: output/gatk4/test.vcf.gz.tbi + +- name: gatk4 haplotypecaller test_gatk4_haplotypecaller_cram + command: nextflow run tests/modules/gatk4/haplotypecaller -entry test_gatk4_haplotypecaller_cram -c tests/config/nextflow.config + tags: + - gatk4/haplotypecaller + - gatk4 + files: + - path: output/gatk4/test.vcf.gz + - path: output/gatk4/test.vcf.gz.tbi + +- name: gatk4 haplotypecaller test_gatk4_haplotypecaller_intervals_dbsnp + command: nextflow run tests/modules/gatk4/haplotypecaller -entry test_gatk4_haplotypecaller_intervals_dbsnp -c tests/config/nextflow.config + tags: + - gatk4/haplotypecaller + - gatk4 files: - path: output/gatk4/test.vcf.gz - should_exist: true - contains: - - 'MT192765.1' - - '54.60' - - '37.32' - path: output/gatk4/test.vcf.gz.tbi diff --git a/tests/modules/gatk4/markduplicates/main.nf b/tests/modules/gatk4/markduplicates/main.nf index 06425088..b9709dc0 100644 --- a/tests/modules/gatk4/markduplicates/main.nf +++ b/tests/modules/gatk4/markduplicates/main.nf @@ -11,3 +11,12 @@ workflow test_gatk4_markduplicates { GATK4_MARKDUPLICATES ( input ) } + +workflow test_gatk4_markduplicates_multiple_bams { + input = [ [ id:'test', single_end:false ], // meta map + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_bam'], checkIfExists: true) + ] + + GATK4_MARKDUPLICATES ( input ) +} diff --git a/tests/modules/gatk4/markduplicates/test.yml b/tests/modules/gatk4/markduplicates/test.yml index 028147e6..99296ca4 100644 --- a/tests/modules/gatk4/markduplicates/test.yml +++ b/tests/modules/gatk4/markduplicates/test.yml @@ -1,8 +1,23 @@ - name: gatk4 markduplicates test_gatk4_markduplicates command: nextflow run tests/modules/gatk4/markduplicates -entry test_gatk4_markduplicates -c tests/config/nextflow.config tags: - - gatk4 - gatk4/markduplicates + - gatk4 files: + - path: output/gatk4/test.bai + md5sum: e9c125e82553209933883b4fe2b8d7c2 - path: output/gatk4/test.bam - md5sum: 3b6facab3afbacfa08a7a975efbd2c6b + md5sum: bda9a7bf5057f2288ed70be3eb8a753f + - path: output/gatk4/test.metrics + +- name: gatk4 markduplicates test_gatk4_markduplicates_multiple_bams + command: nextflow run tests/modules/gatk4/markduplicates -entry test_gatk4_markduplicates_multiple_bams -c tests/config/nextflow.config + tags: + - gatk4/markduplicates + - gatk4 + files: + - path: output/gatk4/test.bai + md5sum: 93cebe29e7cca2064262b739235cca9b + - path: output/gatk4/test.bam + md5sum: dcd6f584006b04141fb787001a8ecacc + - path: output/gatk4/test.metrics diff --git a/tests/modules/samtools/merge/main.nf b/tests/modules/samtools/merge/main.nf index a4511a34..07485df1 100644 --- a/tests/modules/samtools/merge/main.nf +++ b/tests/modules/samtools/merge/main.nf @@ -11,5 +11,15 @@ workflow test_samtools_merge { file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam'], checkIfExists: true)] ] - SAMTOOLS_MERGE ( input ) + SAMTOOLS_MERGE ( input, [] ) +} + +workflow test_samtools_merge_cram { + input = [ [ id: 'test' ], // meta map + [ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_cram'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_cram'], checkIfExists: true), + ] + ] + fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + SAMTOOLS_MERGE ( input, fasta ) } diff --git a/tests/modules/samtools/merge/test.yml b/tests/modules/samtools/merge/test.yml index d0674ca4..b39ca2ec 100644 --- a/tests/modules/samtools/merge/test.yml +++ b/tests/modules/samtools/merge/test.yml @@ -1,7 +1,15 @@ -- name: samtools merge - command: nextflow run ./tests/modules/samtools/merge -entry test_samtools_merge -c tests/config/nextflow.config +- name: samtools merge test_samtools_merge + command: nextflow run tests/modules/samtools/merge -entry test_samtools_merge -c tests/config/nextflow.config tags: - - samtools - samtools/merge + - samtools files: - path: output/samtools/test_merged.bam + +- name: samtools merge test_samtools_merge_cram + command: nextflow run tests/modules/samtools/merge -entry test_samtools_merge_cram -c tests/config/nextflow.config + tags: + - samtools/merge + - samtools + files: + - path: output/samtools/test_merged.cram diff --git a/tests/modules/samtools/stats/main.nf b/tests/modules/samtools/stats/main.nf index 04a689fe..8e8b0c88 100644 --- a/tests/modules/samtools/stats/main.nf +++ b/tests/modules/samtools/stats/main.nf @@ -10,5 +10,15 @@ workflow test_samtools_stats { file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true) ] - SAMTOOLS_STATS ( input ) + SAMTOOLS_STATS ( input, []) +} + +workflow test_samtools_stats_cram { + input = [ [ id: 'test' ], // meta map + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_cram'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_cram_crai'], checkIfExists: true) + ] + fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + + SAMTOOLS_STATS ( input, fasta ) } diff --git a/tests/modules/samtools/stats/test.yml b/tests/modules/samtools/stats/test.yml index cf44b846..a194c666 100644 --- a/tests/modules/samtools/stats/test.yml +++ b/tests/modules/samtools/stats/test.yml @@ -1,8 +1,17 @@ -- name: samtools stats - command: nextflow run ./tests/modules/samtools/stats -entry test_samtools_stats -c tests/config/nextflow.config +- name: samtools stats test_samtools_stats + command: nextflow run tests/modules/samtools/stats -entry test_samtools_stats -c tests/config/nextflow.config tags: - samtools - samtools/stats files: - - path: ./output/samtools/test.paired_end.sorted.bam.stats + - path: output/samtools/test.paired_end.sorted.bam.stats md5sum: a7f36cf11fd3bf97e0a0ae29c0627296 + +- name: samtools stats test_samtools_stats_cram + command: nextflow run tests/modules/samtools/stats -entry test_samtools_stats_cram -c tests/config/nextflow.config + tags: + - samtools + - samtools/stats + files: + - path: output/samtools/test.paired_end.recalibrated.sorted.cram.stats + md5sum: bd55a1da30028403f4b66dacf7a2a20e diff --git a/tests/modules/samtools/view/main.nf b/tests/modules/samtools/view/main.nf index c60acb73..bd270cd8 100644 --- a/tests/modules/samtools/view/main.nf +++ b/tests/modules/samtools/view/main.nf @@ -7,8 +7,17 @@ include { SAMTOOLS_VIEW } from '../../../../modules/samtools/view/main.nf' addPa workflow test_samtools_view { input = [ [ id:'test', single_end:false ], // meta map file(params.test_data['sarscov2']['illumina']['test_paired_end_bam'], checkIfExists: true) - ] - SAMTOOLS_VIEW ( input ) + SAMTOOLS_VIEW ( input, [] ) +} + +workflow test_samtools_view_cram { + input = [ [ id: 'test' ], // meta map + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_cram'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_cram_crai'], checkIfExists: true) + ] + fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + + SAMTOOLS_VIEW ( input, fasta ) } diff --git a/tests/modules/samtools/view/test.yml b/tests/modules/samtools/view/test.yml index 383dfa87..ceaa0e89 100644 --- a/tests/modules/samtools/view/test.yml +++ b/tests/modules/samtools/view/test.yml @@ -1,8 +1,16 @@ -- name: samtools view +- name: samtools view test_samtools_view command: nextflow run tests/modules/samtools/view -entry test_samtools_view -c tests/config/nextflow.config tags: - - samtools - samtools/view + - samtools files: - path: output/samtools/test.bam md5sum: 8fb1e82f76416e9e30fc6b2357e2cf13 + +- name: samtools view test_samtools_view_cram + command: nextflow run tests/modules/samtools/view -entry test_samtools_view_cram -c tests/config/nextflow.config + tags: + - samtools/view + - samtools + files: + - path: output/samtools/test.cram From 84cb78cc98ba7fe5c9de7227cf9824b13624ce88 Mon Sep 17 00:00:00 2001 From: Daniel Lundin Date: Fri, 29 Oct 2021 13:23:34 +0200 Subject: [PATCH 3/5] Khmer normalizebymedian (#985) * Templates for new module * pe only test passing * only_pe and only_se passing * only_pe, only_se, mixed passes * Multiple pe + se tc passes * Passing args works * Add 'interleaved' to description * Fixed linting message * Update modules/khmer/normalizebymedian/main.nf Good point. Co-authored-by: Daniel Straub <42973691+d4straub@users.noreply.github.com> * Update meta.yml Co-authored-by: Daniel Straub <42973691+d4straub@users.noreply.github.com> --- modules/khmer/normalizebymedian/functions.nf | 78 +++++++++++++++++ modules/khmer/normalizebymedian/main.nf | 49 +++++++++++ modules/khmer/normalizebymedian/meta.yml | 39 +++++++++ tests/config/pytest_modules.yml | 4 + tests/modules/khmer/normalizebymedian/main.nf | 85 +++++++++++++++++++ .../modules/khmer/normalizebymedian/test.yml | 42 +++++++++ 6 files changed, 297 insertions(+) create mode 100644 modules/khmer/normalizebymedian/functions.nf create mode 100644 modules/khmer/normalizebymedian/main.nf create mode 100644 modules/khmer/normalizebymedian/meta.yml create mode 100644 tests/modules/khmer/normalizebymedian/main.nf create mode 100644 tests/modules/khmer/normalizebymedian/test.yml diff --git a/modules/khmer/normalizebymedian/functions.nf b/modules/khmer/normalizebymedian/functions.nf new file mode 100644 index 00000000..85628ee0 --- /dev/null +++ b/modules/khmer/normalizebymedian/functions.nf @@ -0,0 +1,78 @@ +// +// Utility functions used in nf-core DSL2 module files +// + +// +// Extract name of software tool from process name using $task.process +// +def getSoftwareName(task_process) { + return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase() +} + +// +// Extract name of module from process name using $task.process +// +def getProcessName(task_process) { + return task_process.tokenize(':')[-1] +} + +// +// Function to initialise default values and to generate a Groovy Map of available options for nf-core modules +// +def initOptions(Map args) { + def Map options = [:] + options.args = args.args ?: '' + options.args2 = args.args2 ?: '' + options.args3 = args.args3 ?: '' + options.publish_by_meta = args.publish_by_meta ?: [] + options.publish_dir = args.publish_dir ?: '' + options.publish_files = args.publish_files + options.suffix = args.suffix ?: '' + return options +} + +// +// Tidy up and join elements of a list to return a path string +// +def getPathFromList(path_list) { + def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries + paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes + return paths.join('/') +} + +// +// Function to save/publish module results +// +def saveFiles(Map args) { + def ioptions = initOptions(args.options) + def path_list = [ ioptions.publish_dir ?: args.publish_dir ] + + // Do not publish versions.yml unless running from pytest workflow + if (args.filename.equals('versions.yml') && !System.getenv("NF_CORE_MODULES_TEST")) { + return null + } + if (ioptions.publish_by_meta) { + def key_list = ioptions.publish_by_meta instanceof List ? ioptions.publish_by_meta : args.publish_by_meta + for (key in key_list) { + if (args.meta && key instanceof String) { + def path = key + if (args.meta.containsKey(key)) { + path = args.meta[key] instanceof Boolean ? "${key}_${args.meta[key]}".toString() : args.meta[key] + } + path = path instanceof String ? path : '' + path_list.add(path) + } + } + } + if (ioptions.publish_files instanceof Map) { + for (ext in ioptions.publish_files) { + if (args.filename.endsWith(ext.key)) { + def ext_list = path_list.collect() + ext_list.add(ext.value) + return "${getPathFromList(ext_list)}/$args.filename" + } + } + } else if (ioptions.publish_files == null) { + return "${getPathFromList(path_list)}/$args.filename" + } +} diff --git a/modules/khmer/normalizebymedian/main.nf b/modules/khmer/normalizebymedian/main.nf new file mode 100644 index 00000000..234d172b --- /dev/null +++ b/modules/khmer/normalizebymedian/main.nf @@ -0,0 +1,49 @@ +// Import generic module functions +include { initOptions; saveFiles; getSoftwareName; getProcessName } from './functions' + +params.options = [:] +options = initOptions(params.options) + +process KHMER_NORMALIZEBYMEDIAN { + tag "${name}" + label 'process_long' + publishDir "${params.outdir}", + mode: params.publish_dir_mode, + saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:[:], publish_by_meta:[]) } + + conda (params.enable_conda ? "bioconda::khmer=3.0.0a3" : null) + if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { + container "https://depot.galaxyproject.org/singularity/khmer:3.0.0a3--py37haa7609a_2" + } else { + container "quay.io/biocontainers/khmer:3.0.0a3--py37haa7609a_2" + } + + input: + path pe_reads + path se_reads + val name + + output: + path "${name}.fastq.gz", emit: reads + path "versions.yml" , emit: versions + + script: + pe_args = pe_reads ? "--paired" : "" + se_args = se_reads ? "--unpaired-reads ${se_reads}" : "" + files = pe_reads ? pe_reads : se_reads + + """ + normalize-by-median.py \\ + -M ${task.memory.toGiga()}e9 \\ + --gzip ${options.args} \\ + -o ${name}.fastq.gz \\ + ${pe_args} \\ + ${se_args} \\ + ${files} + + cat <<-END_VERSIONS > versions.yml + ${getProcessName(task.process)}: + ${getSoftwareName(task.process)}: \$( normalize-by-median.py --version 2>&1 | grep ^khmer | sed 's/^khmer //' ) + END_VERSIONS + """ +} diff --git a/modules/khmer/normalizebymedian/meta.yml b/modules/khmer/normalizebymedian/meta.yml new file mode 100644 index 00000000..2227750f --- /dev/null +++ b/modules/khmer/normalizebymedian/meta.yml @@ -0,0 +1,39 @@ +name: khmer_normalizebymedian +description: Module that calls normalize-by-median.py from khmer. The module can take a mix of paired end (interleaved) and single end reads. If both types are provided, only a single file with single ends is possible. +keywords: + - digital normalization + - khmer +tools: + - khmer: + description: khmer k-mer counting library + homepage: https://github.com/dib-lab/khmer + documentation: https://khmer.readthedocs.io/en/latest/ + tool_dev_url: https://github.com/dib-lab/khmer + doi: "https://doi.org/10.12688/f1000research.6924.1" + licence: ['BSD License'] + +input: + - pe_reads: + type: files + description: Paired-end interleaved fastq files + pattern: "*.{fq,fastq}.gz" + - se_reads: + type: files + description: Single-end fastq files + pattern: "*.{fq,fastq}.gz" + - name: + type: string + description: filename for output file(s); ".fastq.gz" will be appended + +output: + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - reads: + type: file + description: Interleaved fastq files + pattern: "*.{fq,fastq}.gz" + +authors: + - "@erikrikarddaniel" diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index da9de7aa..0fd84d24 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -629,6 +629,10 @@ kallistobustools/ref: - modules/kallistobustools/ref/** - tests/modules/kallistobustools/ref/** +khmer/normalizebymedian: + - modules/khmer/normalizebymedian/** + - tests/modules/khmer/normalizebymedian/** + kleborate: - modules/kleborate/** - tests/modules/kleborate/** diff --git a/tests/modules/khmer/normalizebymedian/main.nf b/tests/modules/khmer/normalizebymedian/main.nf new file mode 100644 index 00000000..3a3b348c --- /dev/null +++ b/tests/modules/khmer/normalizebymedian/main.nf @@ -0,0 +1,85 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { SEQTK_MERGEPE } from '../../../../modules/seqtk/mergepe/main.nf' addParams( options: [:] ) +include { KHMER_NORMALIZEBYMEDIAN } from '../../../../modules/khmer/normalizebymedian/main.nf' addParams( options: [:] ) +include { KHMER_NORMALIZEBYMEDIAN as KHMER_NORMALIZEBYMEDIAN_ARGS } from '../../../../modules/khmer/normalizebymedian/main.nf' addParams( options: [args: '-C 20 -k 32'] ) + +workflow test_khmer_normalizebymedian_only_pe { + + pe_reads = [ + [ id:'khmer_test', single_end:false ], // meta map + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + ] + ] + + SEQTK_MERGEPE(pe_reads) + + KHMER_NORMALIZEBYMEDIAN ( SEQTK_MERGEPE.out.reads.collect { it[1] }, [], 'only_pe' ) +} + +workflow test_khmer_normalizebymedian_only_se { + + se_reads = [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + ] + + KHMER_NORMALIZEBYMEDIAN ( [], se_reads, 'only_se' ) +} + +workflow test_khmer_normalizebymedian_mixed { + + pe_reads = [ + [ id:'khmer_test', single_end:false ], // meta map + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + ] + ] + se_reads = file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + + SEQTK_MERGEPE(pe_reads) + + KHMER_NORMALIZEBYMEDIAN ( SEQTK_MERGEPE.out.reads.map { it[1] }, se_reads, 'mixed' ) +} + +workflow test_khmer_normalizebymedian_multiple_pe { + + pe_reads = [ + [ id:'khmer_test0', single_end:false ], // meta map + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + ], + [ id:'khmer_test1', single_end:false ], // meta map + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + ] + ] + se_reads = file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + + SEQTK_MERGEPE(pe_reads) + + KHMER_NORMALIZEBYMEDIAN ( SEQTK_MERGEPE.out.reads.collect { it[1] }, se_reads, 'multiple_pe' ) +} + +workflow test_khmer_normalizebymedian_args { + + pe_reads = [ + [ id:'khmer_test0', single_end:false ], // meta map + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + ] + ] + se_reads = file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + + SEQTK_MERGEPE(pe_reads) + + KHMER_NORMALIZEBYMEDIAN_ARGS ( SEQTK_MERGEPE.out.reads.collect { it[1] }, se_reads, 'args' ) +} diff --git a/tests/modules/khmer/normalizebymedian/test.yml b/tests/modules/khmer/normalizebymedian/test.yml new file mode 100644 index 00000000..a914a8ef --- /dev/null +++ b/tests/modules/khmer/normalizebymedian/test.yml @@ -0,0 +1,42 @@ +# nf-core modules create-test-yml khmer/normalizebymedian +- name: khmer normalizebymedian only pe reads + command: nextflow run ./tests/modules/khmer/normalizebymedian -entry test_khmer_normalizebymedian_only_pe -c tests/config/nextflow.config + tags: + - khmer + - khmer/normalizebymedian + files: + - path: output/khmer/only_pe.fastq.gz + # md5sum not stable even locally with docker (gzip done by tool) + #md5sum: 75e05f2e80cf4bd0b534d4b73f7c059c + +- name: khmer normalizebymedian only se reads + command: nextflow run ./tests/modules/khmer/normalizebymedian -entry test_khmer_normalizebymedian_only_se -c tests/config/nextflow.config + tags: + - khmer + - khmer/normalizebymedian + files: + - path: output/khmer/only_se.fastq.gz + +- name: khmer normalizebymedian mixed reads + command: nextflow run ./tests/modules/khmer/normalizebymedian -entry test_khmer_normalizebymedian_mixed -c tests/config/nextflow.config + tags: + - khmer + - khmer/normalizebymedian + files: + - path: output/khmer/mixed.fastq.gz + +- name: khmer normalizebymedian multiple pe reads + command: nextflow run ./tests/modules/khmer/normalizebymedian -entry test_khmer_normalizebymedian_multiple_pe -c tests/config/nextflow.config + tags: + - khmer + - khmer/normalizebymedian + files: + - path: output/khmer/multiple_pe.fastq.gz + +- name: khmer normalizebymedian args + command: nextflow run ./tests/modules/khmer/normalizebymedian -entry test_khmer_normalizebymedian_args -c tests/config/nextflow.config + tags: + - khmer + - khmer/normalizebymedian + files: + - path: output/khmer/args.fastq.gz From 460a3ed87bcd918aee869256cae298457752f921 Mon Sep 17 00:00:00 2001 From: Francesco L <53608000+lescai@users.noreply.github.com> Date: Fri, 29 Oct 2021 14:00:54 +0200 Subject: [PATCH 4/5] Fgbio group reads by umi (#952) * adding template for module groupreadsbyumi * update modules with code * strategy is required argument so moving it to input rather than options.args * tests successful committing yml * added meta to output Co-authored-by: Gregor Sturm --- modules/fgbio/groupreadsbyumi/functions.nf | 78 ++++++++++++++++++++ modules/fgbio/groupreadsbyumi/main.nf | 50 +++++++++++++ modules/fgbio/groupreadsbyumi/meta.yml | 59 +++++++++++++++ tests/config/pytest_modules.yml | 4 + tests/modules/fgbio/groupreadsbyumi/main.nf | 15 ++++ tests/modules/fgbio/groupreadsbyumi/test.yml | 10 +++ 6 files changed, 216 insertions(+) create mode 100644 modules/fgbio/groupreadsbyumi/functions.nf create mode 100644 modules/fgbio/groupreadsbyumi/main.nf create mode 100644 modules/fgbio/groupreadsbyumi/meta.yml create mode 100644 tests/modules/fgbio/groupreadsbyumi/main.nf create mode 100644 tests/modules/fgbio/groupreadsbyumi/test.yml diff --git a/modules/fgbio/groupreadsbyumi/functions.nf b/modules/fgbio/groupreadsbyumi/functions.nf new file mode 100644 index 00000000..85628ee0 --- /dev/null +++ b/modules/fgbio/groupreadsbyumi/functions.nf @@ -0,0 +1,78 @@ +// +// Utility functions used in nf-core DSL2 module files +// + +// +// Extract name of software tool from process name using $task.process +// +def getSoftwareName(task_process) { + return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase() +} + +// +// Extract name of module from process name using $task.process +// +def getProcessName(task_process) { + return task_process.tokenize(':')[-1] +} + +// +// Function to initialise default values and to generate a Groovy Map of available options for nf-core modules +// +def initOptions(Map args) { + def Map options = [:] + options.args = args.args ?: '' + options.args2 = args.args2 ?: '' + options.args3 = args.args3 ?: '' + options.publish_by_meta = args.publish_by_meta ?: [] + options.publish_dir = args.publish_dir ?: '' + options.publish_files = args.publish_files + options.suffix = args.suffix ?: '' + return options +} + +// +// Tidy up and join elements of a list to return a path string +// +def getPathFromList(path_list) { + def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries + paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes + return paths.join('/') +} + +// +// Function to save/publish module results +// +def saveFiles(Map args) { + def ioptions = initOptions(args.options) + def path_list = [ ioptions.publish_dir ?: args.publish_dir ] + + // Do not publish versions.yml unless running from pytest workflow + if (args.filename.equals('versions.yml') && !System.getenv("NF_CORE_MODULES_TEST")) { + return null + } + if (ioptions.publish_by_meta) { + def key_list = ioptions.publish_by_meta instanceof List ? ioptions.publish_by_meta : args.publish_by_meta + for (key in key_list) { + if (args.meta && key instanceof String) { + def path = key + if (args.meta.containsKey(key)) { + path = args.meta[key] instanceof Boolean ? "${key}_${args.meta[key]}".toString() : args.meta[key] + } + path = path instanceof String ? path : '' + path_list.add(path) + } + } + } + if (ioptions.publish_files instanceof Map) { + for (ext in ioptions.publish_files) { + if (args.filename.endsWith(ext.key)) { + def ext_list = path_list.collect() + ext_list.add(ext.value) + return "${getPathFromList(ext_list)}/$args.filename" + } + } + } else if (ioptions.publish_files == null) { + return "${getPathFromList(path_list)}/$args.filename" + } +} diff --git a/modules/fgbio/groupreadsbyumi/main.nf b/modules/fgbio/groupreadsbyumi/main.nf new file mode 100644 index 00000000..8e16f0a5 --- /dev/null +++ b/modules/fgbio/groupreadsbyumi/main.nf @@ -0,0 +1,50 @@ +// Import generic module functions +include { initOptions; saveFiles; getSoftwareName; getProcessName } from './functions' + +params.options = [:] +options = initOptions(params.options) + +process FGBIO_GROUPREADSBYUMI { + tag "$meta.id" + label 'process_low' + publishDir "${params.outdir}", + mode: params.publish_dir_mode, + saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:meta, publish_by_meta:['id']) } + + conda (params.enable_conda ? "bioconda::fgbio=1.4.0" : null) + if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { + container "https://depot.galaxyproject.org/singularity/fgbio:1.4.0--hdfd78af_0" + } else { + container "quay.io/biocontainers/fgbio:1.4.0--hdfd78af_0" + } + + input: + tuple val(meta), path(taggedbam) + val(strategy) + + output: + tuple val(meta), path("*_umi-grouped.bam") , emit: bam + tuple val(meta), path("*_umi_histogram.txt"), emit: histogram + path "versions.yml" , emit: versions + + script: + def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}" + + """ + mkdir tmp + + fgbio \\ + --tmp-dir=${PWD}/tmp \\ + GroupReadsByUmi \\ + -s $strategy \\ + ${options.args} \\ + -i $taggedbam \\ + -o ${prefix}_umi-grouped.bam \\ + -f ${prefix}_umi_histogram.txt + + cat <<-END_VERSIONS > versions.yml + ${getProcessName(task.process)}: + ${getSoftwareName(task.process)}: \$( echo \$(fgbio --version 2>&1 | tr -d '[:cntrl:]' ) | sed -e 's/^.*Version: //;s/\\[.*\$//') + END_VERSIONS + """ +} diff --git a/modules/fgbio/groupreadsbyumi/meta.yml b/modules/fgbio/groupreadsbyumi/meta.yml new file mode 100644 index 00000000..18ce149e --- /dev/null +++ b/modules/fgbio/groupreadsbyumi/meta.yml @@ -0,0 +1,59 @@ +name: fgbio_groupreadsbyumi +description: | + Groups reads together that appear to have come from the same original molecule. + Reads are grouped by template, and then templates are sorted by the 5’ mapping positions + of the reads from the template, used from earliest mapping position to latest. + Reads that have the same end positions are then sub-grouped by UMI sequence. + (!) Note: the MQ tag is required on reads with mapped mates (!) + This can be added using samblaster with the optional argument --addMateTags. +keywords: + - UMI + - groupreads + - fgbio +tools: + - fgbio: + description: A set of tools for working with genomic and high throughput sequencing data, including UMIs + homepage: http://fulcrumgenomics.github.io/fgbio/ + documentation: http://fulcrumgenomics.github.io/fgbio/tools/latest/ + tool_dev_url: https://github.com/fulcrumgenomics/fgbio + doi: "" + licence: ['MIT'] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: | + BAM file. Note: the MQ tag is required on reads with mapped mates (!) + pattern: "*.bam" + - strategy: + type: value + description: | + Reguired argument: defines the UMI assignment strategy. + Must be chosen among: Identity, Edit, Adjacency, Paired. + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - bam: + type: file + description: UMI-grouped BAM + pattern: "*.bam" + - histogram: + type: file + description: A text file containing the tag family size counts + pattern: "*.txt" + +authors: + - "@lescai" diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index 0fd84d24..4fdd8303 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -394,6 +394,10 @@ fgbio/fastqtobam: - modules/fgbio/fastqtobam/** - tests/modules/fgbio/fastqtobam/** +fgbio/groupreadsbyumi: + - modules/fgbio/groupreadsbyumi/** + - tests/modules/fgbio/groupreadsbyumi/** + fgbio/sortbam: - modules/fgbio/sortbam/** - tests/modules/fgbio/sortbam/** diff --git a/tests/modules/fgbio/groupreadsbyumi/main.nf b/tests/modules/fgbio/groupreadsbyumi/main.nf new file mode 100644 index 00000000..31f55724 --- /dev/null +++ b/tests/modules/fgbio/groupreadsbyumi/main.nf @@ -0,0 +1,15 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { FGBIO_GROUPREADSBYUMI } from '../../../../modules/fgbio/groupreadsbyumi/main.nf' addParams( options: [:] ) + +workflow test_fgbio_groupreadsbyumi { + + input = [ [ id:'test', single_end:false ], // meta map + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_umi_unsorted_tagged_bam'], checkIfExists: true) ] + + strategy = "Adjacency" + + FGBIO_GROUPREADSBYUMI ( input, strategy ) +} diff --git a/tests/modules/fgbio/groupreadsbyumi/test.yml b/tests/modules/fgbio/groupreadsbyumi/test.yml new file mode 100644 index 00000000..ce70f129 --- /dev/null +++ b/tests/modules/fgbio/groupreadsbyumi/test.yml @@ -0,0 +1,10 @@ +- name: fgbio groupreadsbyumi test_fgbio_groupreadsbyumi + command: nextflow run tests/modules/fgbio/groupreadsbyumi -entry test_fgbio_groupreadsbyumi -c tests/config/nextflow.config + tags: + - fgbio + - fgbio/groupreadsbyumi + files: + - path: output/fgbio/test_umi-grouped.bam + md5sum: f1e53fc845fd99a3da172eb8063dff0b + - path: output/fgbio/test_umi_histogram.txt + md5sum: d17fd167b2a765d46e4b01bf08ece01b From 2959b4ba070d138d1577acc48d35cc669fbef972 Mon Sep 17 00:00:00 2001 From: Chris Cheshire Date: Fri, 29 Oct 2021 13:22:17 +0100 Subject: [PATCH 5/5] Bedtools sort add extension choice input (#984) * hifiasm copied from fastqc * hifiasm tests init from fastqc * meta.yml init; test.yml and main.nf for printing version * Add hifiasm version printing * Removed spaced on an empty line * Reverted hifiasm from main * Added extension input for bedtools sort * whitespace * Updated docs Co-authored-by: Sviatoslav Sidorov Co-authored-by: Svyatoslav Sidorov --- modules/bedtools/sort/main.nf | 11 ++++++----- modules/bedtools/sort/meta.yml | 19 +++++++++++++------ tests/modules/bedtools/sort/main.nf | 2 +- tests/modules/bedtools/sort/test.yml | 2 +- 4 files changed, 21 insertions(+), 13 deletions(-) diff --git a/modules/bedtools/sort/main.nf b/modules/bedtools/sort/main.nf index bdba3376..4a51c4b2 100644 --- a/modules/bedtools/sort/main.nf +++ b/modules/bedtools/sort/main.nf @@ -19,20 +19,21 @@ process BEDTOOLS_SORT { } input: - tuple val(meta), path(bed) + tuple val(meta), path(intervals) + val extension output: - tuple val(meta), path('*.bed'), emit: bed - path "versions.yml" , emit: versions + tuple val(meta), path("*.${extension}"), emit: sorted + path "versions.yml" , emit: versions script: def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}" """ bedtools \\ sort \\ - -i $bed \\ + -i $intervals \\ $options.args \\ - > ${prefix}.bed + > ${prefix}.${extension} cat <<-END_VERSIONS > versions.yml ${getProcessName(task.process)}: diff --git a/modules/bedtools/sort/meta.yml b/modules/bedtools/sort/meta.yml index 5b8b41d7..c7b1b098 100644 --- a/modules/bedtools/sort/meta.yml +++ b/modules/bedtools/sort/meta.yml @@ -15,20 +15,26 @@ input: description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - bed: + - intervals: type: file - description: Input BED file - pattern: "*.{bed}" + description: BED/BEDGRAPH + pattern: "*.{bed|bedGraph}" + + - extension: + type: string + description: Extension of the output file (e. g., ".bg", ".bedgraph", ".txt", ".tab", etc.) It is set arbitrarily by the user and corresponds to the file format which depends on arguments. output: - meta: type: map description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - bed: + + - sorted: type: file - description: Sorted BED file - pattern: "*.{bed}" + description: Sorted output file + pattern: "*.${extension}" + - versions: type: file description: File containing software versions @@ -37,3 +43,4 @@ authors: - "@Emiller88" - "@sruthipsuresh" - "@drpatelh" + - "@chris-cheshire" diff --git a/tests/modules/bedtools/sort/main.nf b/tests/modules/bedtools/sort/main.nf index ad1a3df4..b5d34e2f 100644 --- a/tests/modules/bedtools/sort/main.nf +++ b/tests/modules/bedtools/sort/main.nf @@ -9,5 +9,5 @@ workflow test_bedtools_sort { file(params.test_data['sarscov2']['genome']['test_bed'], checkIfExists: true) ] - BEDTOOLS_SORT ( input ) + BEDTOOLS_SORT ( input, "testext" ) } diff --git a/tests/modules/bedtools/sort/test.yml b/tests/modules/bedtools/sort/test.yml index ceb25f7d..1dd04507 100644 --- a/tests/modules/bedtools/sort/test.yml +++ b/tests/modules/bedtools/sort/test.yml @@ -4,5 +4,5 @@ - bedtools - bedtools/sort files: - - path: ./output/bedtools/test_out.bed + - path: ./output/bedtools/test_out.testext md5sum: fe4053cf4de3aebbdfc3be2efb125a74