From 5320ab7e6c59c04fd58b018f0d399682ed40adac Mon Sep 17 00:00:00 2001 From: "Robert A. Petit III" Date: Wed, 22 Dec 2021 06:39:35 -0500 Subject: [PATCH 1/4] Fix kleborate inputs (#1172) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Fix kleborate inputs * Update test.yml Co-authored-by: Sébastien Guizard --- modules/kleborate/main.nf | 2 +- tests/modules/kleborate/test.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/kleborate/main.nf b/modules/kleborate/main.nf index 998eced1..4a793793 100644 --- a/modules/kleborate/main.nf +++ b/modules/kleborate/main.nf @@ -21,7 +21,7 @@ process KLEBORATE { kleborate \\ $args \\ --outfile ${prefix}.results.txt \\ - --assemblies *.fasta + --assemblies $fastas cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/tests/modules/kleborate/test.yml b/tests/modules/kleborate/test.yml index c7b25778..30d854d5 100644 --- a/tests/modules/kleborate/test.yml +++ b/tests/modules/kleborate/test.yml @@ -4,4 +4,4 @@ - kleborate files: - path: output/kleborate/test.results.txt - md5sum: b7979a71170736098fb8403cd92748f5 + contains: ['strain', 'genome', 'scaffolds'] From a6e0629e2430a9d4061ddc8afdf4cff8c93080b5 Mon Sep 17 00:00:00 2001 From: FriederikeHanssen Date: Thu, 23 Dec 2021 12:58:20 +0100 Subject: [PATCH 2/4] Change intervals to be part of sample specific input (#1189) * Change intervals to be part of sample specific input * Fix some tests * Update checksum * Update intervals * Update intervals * Try out gavins idea for adding the file * update test line * update test line * update test line * revert contains line --- modules/gatk4/applybqsr/main.nf | 12 +++-- modules/gatk4/applybqsr/meta.yml | 7 +-- modules/gatk4/baserecalibrator/main.nf | 7 +-- modules/gatk4/baserecalibrator/meta.yml | 6 +-- modules/gatk4/genotypegvcfs/main.nf | 5 +- modules/gatk4/genotypegvcfs/meta.yml | 7 ++- modules/gatk4/haplotypecaller/main.nf | 5 +- modules/gatk4/haplotypecaller/meta.yml | 6 +-- tests/modules/gatk4/applybqsr/main.nf | 21 ++++---- tests/modules/gatk4/applybqsr/test.yml | 4 +- tests/modules/gatk4/baserecalibrator/main.nf | 20 ++++---- tests/modules/gatk4/genotypegvcfs/main.nf | 50 +++++++++++--------- tests/modules/gatk4/haplotypecaller/main.nf | 16 ++++--- 13 files changed, 89 insertions(+), 77 deletions(-) diff --git a/modules/gatk4/applybqsr/main.nf b/modules/gatk4/applybqsr/main.nf index 20294c27..22702211 100644 --- a/modules/gatk4/applybqsr/main.nf +++ b/modules/gatk4/applybqsr/main.nf @@ -8,20 +8,22 @@ process GATK4_APPLYBQSR { 'quay.io/biocontainers/gatk4:4.2.4.0--hdfd78af_0' }" input: - tuple val(meta), path(input), path(input_index), path(bqsr_table) + tuple val(meta), path(input), path(input_index), path(bqsr_table), path(intervals) path fasta path fai path dict - path intervals output: - tuple val(meta), path("*.bam"), emit: bam - path "versions.yml" , emit: versions + tuple val(meta), path("*.bam"), emit: bam, optional: true + tuple val(meta), path("*.cram"), emit: cram, optional: true + path "versions.yml" , emit: versions script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" def interval = intervals ? "-L ${intervals}" : "" + def file_type = input.getExtension() + def avail_mem = 3 if (!task.memory) { log.info '[GATK ApplyBQSR] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' @@ -35,7 +37,7 @@ process GATK4_APPLYBQSR { --bqsr-recal-file $bqsr_table \\ $interval \\ --tmp-dir . \\ - -O ${prefix}.bam \\ + -O ${prefix}.${file_type} \\ $args cat <<-END_VERSIONS > versions.yml diff --git a/modules/gatk4/applybqsr/meta.yml b/modules/gatk4/applybqsr/meta.yml index 4e3b2f9a..ad1f82a1 100644 --- a/modules/gatk4/applybqsr/meta.yml +++ b/modules/gatk4/applybqsr/meta.yml @@ -31,6 +31,9 @@ input: - bqsr_table: type: file description: Recalibration table from gatk4_baserecalibrator + - intervals: + type: file + description: Bed file with the genomic regions included in the library (optional) - fasta: type: file description: The reference fasta file @@ -43,9 +46,7 @@ input: type: file description: GATK sequence dictionary pattern: "*.dict" - - intervalsBed: - type: file - description: Bed file with the genomic regions included in the library (optional) + output: - meta: diff --git a/modules/gatk4/baserecalibrator/main.nf b/modules/gatk4/baserecalibrator/main.nf index 8a45d6e2..3a26d6cc 100644 --- a/modules/gatk4/baserecalibrator/main.nf +++ b/modules/gatk4/baserecalibrator/main.nf @@ -8,11 +8,10 @@ process GATK4_BASERECALIBRATOR { 'quay.io/biocontainers/gatk4:4.2.4.0--hdfd78af_0' }" input: - tuple val(meta), path(input), path(input_index) + tuple val(meta), path(input), path(input_index), path(intervals) path fasta path fai path dict - path intervalsBed path knownSites path knownSites_tbi @@ -23,14 +22,16 @@ process GATK4_BASERECALIBRATOR { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def intervalsCommand = intervalsBed ? "-L ${intervalsBed}" : "" + def intervalsCommand = intervals ? "-L ${intervals}" : "" def sitesCommand = knownSites.collect{"--known-sites ${it}"}.join(' ') + def avail_mem = 3 if (!task.memory) { log.info '[GATK BaseRecalibrator] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' } else { avail_mem = task.memory.giga } + """ gatk --java-options "-Xmx${avail_mem}g" BaseRecalibrator \ -R $fasta \ diff --git a/modules/gatk4/baserecalibrator/meta.yml b/modules/gatk4/baserecalibrator/meta.yml index 188340b4..641a50df 100644 --- a/modules/gatk4/baserecalibrator/meta.yml +++ b/modules/gatk4/baserecalibrator/meta.yml @@ -28,6 +28,9 @@ input: type: file description: BAI/CRAI file from alignment pattern: "*.{bai,crai}" + - intervals: + type: file + description: Bed file with the genomic regions included in the library (optional) - fasta: type: file description: The reference fasta file @@ -40,9 +43,6 @@ input: type: file description: GATK sequence dictionary pattern: "*.dict" - - intervalsBed: - type: file - description: Bed file with the genomic regions included in the library (optional) - knownSites: type: file description: Bed file with the genomic regions included in the library (optional) diff --git a/modules/gatk4/genotypegvcfs/main.nf b/modules/gatk4/genotypegvcfs/main.nf index b1c15a5e..f9107b1e 100644 --- a/modules/gatk4/genotypegvcfs/main.nf +++ b/modules/gatk4/genotypegvcfs/main.nf @@ -8,13 +8,12 @@ process GATK4_GENOTYPEGVCFS { 'quay.io/biocontainers/gatk4:4.2.4.0--hdfd78af_0' }" input: - tuple val(meta), path(gvcf), path(gvcf_index) + tuple val(meta), path(gvcf), path(gvcf_index), path(intervals) path fasta path fasta_index path fasta_dict path dbsnp path dbsnp_index - path intervals_bed output: tuple val(meta), path("*.vcf.gz"), emit: vcf @@ -25,7 +24,7 @@ process GATK4_GENOTYPEGVCFS { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" def dbsnp_options = dbsnp ? "-D ${dbsnp}" : "" - def interval_options = intervals_bed ? "-L ${intervals_bed}" : "" + def interval_options = intervals ? "-L ${intervals}" : "" def gvcf_options = gvcf.name.endsWith(".vcf") || gvcf.name.endsWith(".vcf.gz") ? "$gvcf" : "gendb://$gvcf" def avail_mem = 3 if (!task.memory) { diff --git a/modules/gatk4/genotypegvcfs/meta.yml b/modules/gatk4/genotypegvcfs/meta.yml index e6b38863..2c9767b2 100644 --- a/modules/gatk4/genotypegvcfs/meta.yml +++ b/modules/gatk4/genotypegvcfs/meta.yml @@ -25,6 +25,9 @@ input: description: | Tuple of gVCF(.gz) file (first) and its index (second) or the path to a GenomicsDB (and empty) pattern: ["*.{vcf,vcf.gz}", "*.{idx,tbi}"] + - intervals: + type: file + description: Bed file with the genomic regions included in the library (optional) - fasta: type: file description: Reference fasta file @@ -45,10 +48,6 @@ input: type: tuple of files description: dbSNP VCF index file pattern: "*.tbi" - - intervals_bed: - type: file - description: An intevals BED file - pattern: "*.bed" output: - meta: diff --git a/modules/gatk4/haplotypecaller/main.nf b/modules/gatk4/haplotypecaller/main.nf index d6cf514d..8b4c0e64 100644 --- a/modules/gatk4/haplotypecaller/main.nf +++ b/modules/gatk4/haplotypecaller/main.nf @@ -8,13 +8,12 @@ process GATK4_HAPLOTYPECALLER { 'quay.io/biocontainers/gatk4:4.2.4.0--hdfd78af_0' }" input: - tuple val(meta), path(input), path(input_index) + tuple val(meta), path(input), path(input_index), path(intervals) path fasta path fai path dict path dbsnp path dbsnp_tbi - path interval output: tuple val(meta), path("*.vcf.gz"), emit: vcf @@ -24,7 +23,7 @@ process GATK4_HAPLOTYPECALLER { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def interval_option = interval ? "-L ${interval}" : "" + def interval_option = intervals ? "-L ${intervals}" : "" def dbsnp_option = dbsnp ? "-D ${dbsnp}" : "" def avail_mem = 3 if (!task.memory) { diff --git a/modules/gatk4/haplotypecaller/meta.yml b/modules/gatk4/haplotypecaller/meta.yml index 6c9d0891..869bd1d2 100644 --- a/modules/gatk4/haplotypecaller/meta.yml +++ b/modules/gatk4/haplotypecaller/meta.yml @@ -29,6 +29,9 @@ input: type: file description: BAI/CRAI file from alignment pattern: "*.{bai,crai}" + - intervals: + type: file + description: Bed file with the genomic regions included in the library (optional) - fasta: type: file description: The reference fasta file @@ -47,9 +50,6 @@ input: - dbsnp_tbi: type: file description: VCF index of dbsnp (optional) - - interval: - type: file - description: Bed file with the genomic regions included in the library (optional) output: - meta: diff --git a/tests/modules/gatk4/applybqsr/main.nf b/tests/modules/gatk4/applybqsr/main.nf index da85b11b..66711574 100644 --- a/tests/modules/gatk4/applybqsr/main.nf +++ b/tests/modules/gatk4/applybqsr/main.nf @@ -8,39 +8,40 @@ workflow test_gatk4_applybqsr { input = [ [ id:'test' ], // meta map file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test_baserecalibrator_table'], checkIfExists: true) + file(params.test_data['sarscov2']['illumina']['test_baserecalibrator_table'], checkIfExists: true), + [] ] fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) fai = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true) dict = file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true) - GATK4_APPLYBQSR ( input, fasta, fai, dict, [] ) + GATK4_APPLYBQSR ( input, fasta, fai, dict ) } workflow test_gatk4_applybqsr_intervals { input = [ [ id:'test' ], // meta map file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test_baserecalibrator_table'], checkIfExists: true) + file(params.test_data['sarscov2']['illumina']['test_baserecalibrator_table'], checkIfExists: true), + file(params.test_data['sarscov2']['genome']['test_bed'], checkIfExists: true) ] fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) - fai = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true) - dict = file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true) - intervals = file(params.test_data['sarscov2']['genome']['test_bed'], checkIfExists: true) + fai = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true) + dict = file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true) - GATK4_APPLYBQSR ( input, fasta, fai, dict, intervals ) + GATK4_APPLYBQSR ( input, fasta, fai, dict ) } workflow test_gatk4_applybqsr_cram { input = [ [ id:'test' ], // meta map file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true), file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram_crai'], checkIfExists: true), - file(params.test_data['homo_sapiens']['illumina']['test_baserecalibrator_table'], checkIfExists: true) + file(params.test_data['homo_sapiens']['illumina']['test_baserecalibrator_table'], checkIfExists: true), + file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true) ] fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) - intervals = file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true) - GATK4_APPLYBQSR ( input, fasta, fai, dict, intervals ) + GATK4_APPLYBQSR ( input, fasta, fai, dict ) } diff --git a/tests/modules/gatk4/applybqsr/test.yml b/tests/modules/gatk4/applybqsr/test.yml index af29be71..b4aef7ab 100644 --- a/tests/modules/gatk4/applybqsr/test.yml +++ b/tests/modules/gatk4/applybqsr/test.yml @@ -26,7 +26,7 @@ - gatk4 - gatk4/applybqsr files: - - path: output/gatk4/test.bam - md5sum: a333f80284a89a8daab28d3686a0b365 + - path: output/gatk4/test.cram + md5sum: b7659b3b2adaabbe73658dc059dbfdf6 - path: output/gatk4/versions.yml md5sum: 57933f27b3a31b05af3f7c248d365396 diff --git a/tests/modules/gatk4/baserecalibrator/main.nf b/tests/modules/gatk4/baserecalibrator/main.nf index 2675d04b..05c24ae3 100644 --- a/tests/modules/gatk4/baserecalibrator/main.nf +++ b/tests/modules/gatk4/baserecalibrator/main.nf @@ -7,7 +7,8 @@ include { GATK4_BASERECALIBRATOR } from '../../../../modules/gatk4/baserecalibra workflow test_gatk4_baserecalibrator { input = [ [ id:'test' ], // meta map file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true) + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true), + [] ] fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) fai = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true) @@ -15,14 +16,14 @@ workflow test_gatk4_baserecalibrator { sites = file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true) sites_tbi = file(params.test_data['sarscov2']['illumina']['test_vcf_gz_tbi'], checkIfExists: true) - GATK4_BASERECALIBRATOR ( input, fasta, fai, dict, [], sites, sites_tbi ) + GATK4_BASERECALIBRATOR ( input, fasta, fai, dict, sites, sites_tbi ) } workflow test_gatk4_baserecalibrator_cram { input = [ [ id:'test' ], // meta map file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true), file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram_crai'], checkIfExists: true), - file(params.test_data['homo_sapiens']['illumina']['test_baserecalibrator_table'], checkIfExists: true) + [] ] fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) @@ -30,28 +31,29 @@ workflow test_gatk4_baserecalibrator_cram { sites = file(params.test_data['homo_sapiens']['genome']['dbsnp_146_hg38_vcf_gz'], checkIfExists: true) sites_tbi = file(params.test_data['homo_sapiens']['genome']['dbsnp_146_hg38_vcf_gz_tbi'], checkIfExists: true) - GATK4_BASERECALIBRATOR ( input, fasta, fai, dict, [], sites, sites_tbi ) + GATK4_BASERECALIBRATOR ( input, fasta, fai, dict, sites, sites_tbi ) } workflow test_gatk4_baserecalibrator_intervals { input = [ [ id:'test' ], // meta map file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true) + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true), + file(params.test_data['sarscov2']['genome']['test_bed'], checkIfExists: true) ] fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) fai = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true) dict = file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true) - intervals = file(params.test_data['sarscov2']['genome']['test_bed'], checkIfExists: true) sites = file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true) sites_tbi = file(params.test_data['sarscov2']['illumina']['test_vcf_gz_tbi'], checkIfExists: true) - GATK4_BASERECALIBRATOR ( input, fasta, fai, dict, intervals, sites, sites_tbi ) + GATK4_BASERECALIBRATOR ( input, fasta, fai, dict, sites, sites_tbi ) } workflow test_gatk4_baserecalibrator_multiple_sites { input = [ [ id:'test' ], // meta map file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true) + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true), + [] ] fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) fai = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true) @@ -63,5 +65,5 @@ workflow test_gatk4_baserecalibrator_multiple_sites { file(params.test_data['sarscov2']['illumina']['test2_vcf_gz_tbi'], checkIfExists: true) ] - GATK4_BASERECALIBRATOR ( input, fasta, fai, dict, [], sites, sites_tbi ) + GATK4_BASERECALIBRATOR ( input, fasta, fai, dict, sites, sites_tbi ) } diff --git a/tests/modules/gatk4/genotypegvcfs/main.nf b/tests/modules/gatk4/genotypegvcfs/main.nf index ce13ea2d..6191df26 100644 --- a/tests/modules/gatk4/genotypegvcfs/main.nf +++ b/tests/modules/gatk4/genotypegvcfs/main.nf @@ -10,13 +10,15 @@ workflow test_gatk4_genotypegvcfs_vcf_input { input = [ [ id:'test' ], // meta map file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf'], checkIfExists: true), - file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_idx'], checkIfExists: true) ] + file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_idx'], checkIfExists: true), + [] + ] fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) fastaIndex = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) fastaDict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) - GATK4_GENOTYPEGVCFS ( input, fasta, fastaIndex, fastaDict, [], [], [] ) + GATK4_GENOTYPEGVCFS ( input, fasta, fastaIndex, fastaDict, [], []) } // Basic parameters with compressed VCF input @@ -24,13 +26,15 @@ workflow test_gatk4_genotypegvcfs_gz_input { input = [ [ id:'test' ], // meta map file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_gz'], checkIfExists: true), - file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_gz_tbi'], checkIfExists: true) ] + file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_gz_tbi'], checkIfExists: true), + [] + ] fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) fastaIndex = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) fastaDict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) - GATK4_GENOTYPEGVCFS ( input, fasta, fastaIndex, fastaDict, [], [], [] ) + GATK4_GENOTYPEGVCFS ( input, fasta, fastaIndex, fastaDict, [], []) } // Basic parameters + optional dbSNP @@ -38,7 +42,9 @@ workflow test_gatk4_genotypegvcfs_gz_input_dbsnp { input = [ [ id:'test' ], // meta map file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_gz'], checkIfExists: true), - file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_gz_tbi'], checkIfExists: true) ] + file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_gz_tbi'], checkIfExists: true), + [] + ] fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) fastaIndex = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) @@ -47,7 +53,7 @@ workflow test_gatk4_genotypegvcfs_gz_input_dbsnp { dbsnp = file(params.test_data['homo_sapiens']['genome']['dbsnp_146_hg38_vcf_gz'], checkIfExists: true) dbsnpIndex = file(params.test_data['homo_sapiens']['genome']['dbsnp_146_hg38_vcf_gz_tbi'], checkIfExists: true) - GATK4_GENOTYPEGVCFS ( input, fasta, fastaIndex, fastaDict, dbsnp, dbsnpIndex, [] ) + GATK4_GENOTYPEGVCFS ( input, fasta, fastaIndex, fastaDict, dbsnp, dbsnpIndex) } // Basic parameters + optional intervals @@ -55,15 +61,14 @@ workflow test_gatk4_genotypegvcfs_gz_input_intervals { input = [ [ id:'test' ], // meta map file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_gz'], checkIfExists: true), - file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_gz_tbi'], checkIfExists: true) ] + file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_gz_tbi'], checkIfExists: true), + file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true) ] fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) fastaIndex = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) fastaDict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) - intervalsBed = file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true) - - GATK4_GENOTYPEGVCFS ( input, fasta, fastaIndex, fastaDict, [], [], intervalsBed ) + GATK4_GENOTYPEGVCFS ( input, fasta, fastaIndex, fastaDict, [], []) } // Basic parameters + optional dbSNP + optional intervals @@ -71,7 +76,9 @@ workflow test_gatk4_genotypegvcfs_gz_input_dbsnp_intervals { input = [ [ id:'test' ], // meta map file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_gz'], checkIfExists: true), - file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_gz_tbi'], checkIfExists: true) ] + file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_gz_tbi'], checkIfExists: true), + file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true) + ] fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) fastaIndex = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) @@ -80,9 +87,7 @@ workflow test_gatk4_genotypegvcfs_gz_input_dbsnp_intervals { dbsnp = file(params.test_data['homo_sapiens']['genome']['dbsnp_146_hg38_vcf_gz'], checkIfExists: true) dbsnpIndex = file(params.test_data['homo_sapiens']['genome']['dbsnp_146_hg38_vcf_gz_tbi'], checkIfExists: true) - intervalsBed = file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true) - - GATK4_GENOTYPEGVCFS ( input, fasta, fastaIndex, fastaDict, dbsnp, dbsnpIndex, intervalsBed ) + GATK4_GENOTYPEGVCFS ( input, fasta, fastaIndex, fastaDict, dbsnp, dbsnpIndex ) } // Basic parameters with GenomicsDB input @@ -97,9 +102,11 @@ workflow test_gatk4_genotypegvcfs_gendb_input { UNTAR ( test_genomicsdb ) gendb = UNTAR.out.untar.collect() gendb.add([]) + gendb.add([]) + input = Channel.of([ id:'test' ]).combine(gendb) - GATK4_GENOTYPEGVCFS ( input, fasta, fastaIndex, fastaDict, [], [], [] ) + GATK4_GENOTYPEGVCFS ( input, fasta, fastaIndex, fastaDict, [], []) } // Basic parameters with GenomicsDB + optional dbSNP @@ -117,9 +124,10 @@ workflow test_gatk4_genotypegvcfs_gendb_input_dbsnp { UNTAR ( test_genomicsdb ) gendb = UNTAR.out.untar.collect() gendb.add([]) + gendb.add([]) input = Channel.of([ id:'test' ]).combine(gendb) - GATK4_GENOTYPEGVCFS ( input, fasta, fastaIndex, fastaDict, dbsnp, dbsnpIndex, [] ) + GATK4_GENOTYPEGVCFS ( input, fasta, fastaIndex, fastaDict, dbsnp, dbsnpIndex) } // Basic parameters with GenomicsDB + optional intervals @@ -129,16 +137,15 @@ workflow test_gatk4_genotypegvcfs_gendb_input_intervals { fastaIndex = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) fastaDict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) - intervalsBed = file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true) - test_genomicsdb = file(params.test_data['homo_sapiens']['illumina']['test_genomicsdb_tar_gz'], checkIfExists: true) UNTAR ( test_genomicsdb ) gendb = UNTAR.out.untar.collect() gendb.add([]) + gendb.add([file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true)]) input = Channel.of([ id:'test' ]).combine(gendb) - GATK4_GENOTYPEGVCFS ( input, fasta, fastaIndex, fastaDict, [], [], intervalsBed ) + GATK4_GENOTYPEGVCFS ( input, fasta, fastaIndex, fastaDict, [], [] ) } // Basic parameters with GenomicsDB + optional dbSNP + optional intervals @@ -151,14 +158,13 @@ workflow test_gatk4_genotypegvcfs_gendb_input_dbsnp_intervals { dbsnp = file(params.test_data['homo_sapiens']['genome']['dbsnp_146_hg38_vcf_gz'], checkIfExists: true) dbsnpIndex = file(params.test_data['homo_sapiens']['genome']['dbsnp_146_hg38_vcf_gz_tbi'], checkIfExists: true) - intervalsBed = file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true) - test_genomicsdb = file(params.test_data['homo_sapiens']['illumina']['test_genomicsdb_tar_gz'], checkIfExists: true) UNTAR ( test_genomicsdb ) gendb = UNTAR.out.untar.collect() gendb.add([]) + gendb.add([file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true)]) input = Channel.of([ id:'test' ]).combine(gendb) - GATK4_GENOTYPEGVCFS ( input, fasta, fastaIndex, fastaDict, dbsnp, dbsnpIndex, intervalsBed ) + GATK4_GENOTYPEGVCFS ( input, fasta, fastaIndex, fastaDict, dbsnp, dbsnpIndex ) } diff --git a/tests/modules/gatk4/haplotypecaller/main.nf b/tests/modules/gatk4/haplotypecaller/main.nf index dc6bec67..7b60baab 100644 --- a/tests/modules/gatk4/haplotypecaller/main.nf +++ b/tests/modules/gatk4/haplotypecaller/main.nf @@ -7,31 +7,34 @@ include { GATK4_HAPLOTYPECALLER } from '../../../../modules/gatk4/haplotypecalle workflow test_gatk4_haplotypecaller { input = [ [ id:'test' ], // meta map file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true) + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true), + [] ] fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) fai = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true) dict = file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true) - GATK4_HAPLOTYPECALLER ( input, fasta, fai, dict, [], [], [] ) + GATK4_HAPLOTYPECALLER ( input, fasta, fai, dict, [], []) } workflow test_gatk4_haplotypecaller_cram { input = [ [ id:'test' ], // meta map file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true), - file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram_crai'], checkIfExists: true) + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram_crai'], checkIfExists: true), + [] ] fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) - GATK4_HAPLOTYPECALLER ( input, fasta, fai, dict, [], [], [] ) + GATK4_HAPLOTYPECALLER ( input, fasta, fai, dict, [], []) } workflow test_gatk4_haplotypecaller_intervals_dbsnp { input = [ [ id:'test' ], // meta map file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true), - file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram_crai'], checkIfExists: true) + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram_crai'], checkIfExists: true), + file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true) ] fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) @@ -39,7 +42,6 @@ workflow test_gatk4_haplotypecaller_intervals_dbsnp { dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) sites = file(params.test_data['homo_sapiens']['genome']['dbsnp_146_hg38_vcf_gz'], checkIfExists: true) sites_tbi = file(params.test_data['homo_sapiens']['genome']['dbsnp_146_hg38_vcf_gz_tbi'], checkIfExists: true) - intervals = file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true) - GATK4_HAPLOTYPECALLER ( input, fasta, fai, dict, sites, sites_tbi, intervals ) + GATK4_HAPLOTYPECALLER ( input, fasta, fai, dict, sites, sites_tbi ) } From cde237e7cec07798e5754b72aeca44efe89fc6db Mon Sep 17 00:00:00 2001 From: Jose Espinosa-Carrasco Date: Thu, 23 Dec 2021 18:22:07 +0100 Subject: [PATCH 3/4] Fix error messages mentioning suffix (#1190) --- modules/last/postmask/main.nf | 2 +- modules/plink/extract/main.nf | 2 +- modules/pmdtools/filter/main.nf | 2 +- modules/samblaster/main.nf | 2 +- modules/samtools/fixmate/main.nf | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/modules/last/postmask/main.nf b/modules/last/postmask/main.nf index fb097a11..ca981e1c 100644 --- a/modules/last/postmask/main.nf +++ b/modules/last/postmask/main.nf @@ -17,7 +17,7 @@ process LAST_POSTMASK { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - if( "$maf" == "${prefix}.maf.gz" ) error "Input and output names are the same, use the suffix option to disambiguate" + if( "$maf" == "${prefix}.maf.gz" ) error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" """ last-postmask $args $maf | gzip --no-name > ${prefix}.maf.gz diff --git a/modules/plink/extract/main.nf b/modules/plink/extract/main.nf index 9b8a52f3..688ada67 100644 --- a/modules/plink/extract/main.nf +++ b/modules/plink/extract/main.nf @@ -19,7 +19,7 @@ process PLINK_EXTRACT { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - if( "$bed" == "${prefix}.bed" ) error "Input and output names are the same, use the suffix option to disambiguate" + if( "$bed" == "${prefix}.bed" ) error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" """ plink \\ --bfile ${meta.id} \\ diff --git a/modules/pmdtools/filter/main.nf b/modules/pmdtools/filter/main.nf index 0b3bcbc6..b647b6d3 100644 --- a/modules/pmdtools/filter/main.nf +++ b/modules/pmdtools/filter/main.nf @@ -22,7 +22,7 @@ process PMDTOOLS_FILTER { def args3 = task.ext.args3 ?: '' def split_cpus = Math.floor(task.cpus/2) def prefix = task.ext.prefix ?: "${meta.id}" - if ("$bam" == "${prefix}.bam") error "[pmdtools/filter] Input and output names are the same, use the suffix option to disambiguate!" + if ("$bam" == "${prefix}.bam") error "[pmdtools/filter] Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" //threshold and header flags activate filtering function of pmdtools """ samtools \\ diff --git a/modules/samblaster/main.nf b/modules/samblaster/main.nf index c6573283..4627b978 100644 --- a/modules/samblaster/main.nf +++ b/modules/samblaster/main.nf @@ -19,7 +19,7 @@ process SAMBLASTER { def args2 = task.ext.args2 ?: '' def args3 = task.ext.args3 ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - if( "$bam" == "${prefix}.bam" ) error "Input and output names are the same, use the suffix option to disambiguate" + if( "$bam" == "${prefix}.bam" ) error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" """ samtools view -h $args2 $bam | \\ samblaster $args | \\ diff --git a/modules/samtools/fixmate/main.nf b/modules/samtools/fixmate/main.nf index 8f86c1c4..d108f8d6 100644 --- a/modules/samtools/fixmate/main.nf +++ b/modules/samtools/fixmate/main.nf @@ -17,7 +17,7 @@ process SAMTOOLS_FIXMATE { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - if ("$bam" == "${prefix}.bam") error "Input and output names are the same, use the suffix option to disambiguate!" + if ("$bam" == "${prefix}.bam") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" """ samtools \\ From c0e1489133ee9f3c1b1eebccb5344b681b8c1c3f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Guizard?= Date: Fri, 24 Dec 2021 11:32:30 +0000 Subject: [PATCH 4/4] =?UTF-8?q?=F0=9F=90=9B=20FIX:=20remove=20suffix=20opt?= =?UTF-8?q?ion=20(#1188)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- modules/lima/main.nf | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/modules/lima/main.nf b/modules/lima/main.nf index a662a7bb..691e43f2 100644 --- a/modules/lima/main.nf +++ b/modules/lima/main.nf @@ -12,9 +12,7 @@ process LIMA { path primers output: - tuple val(meta), path("*.clips") , emit: clips tuple val(meta), path("*.counts") , emit: counts - tuple val(meta), path("*.guess") , emit: guess tuple val(meta), path("*.report") , emit: report tuple val(meta), path("*.summary"), emit: summary path "versions.yml" , emit: versions @@ -27,10 +25,18 @@ process LIMA { tuple val(meta), path("*.fastq.gz") , optional: true, emit: fastqgz tuple val(meta), path("*.xml") , optional: true, emit: xml tuple val(meta), path("*.json") , optional: true, emit: json + tuple val(meta), path("*.clips") , optional: true, emit: clips + tuple val(meta), path("*.guess") , optional: true, emit: guess script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" + if( "$ccs" == "${prefix}.bam" ) error "Input and output names are the same, set prefix in module configuration" + if( "$ccs" == "${prefix}.fasta" ) error "Input and output names are the same, set prefix in module configuration" + if( "$ccs" == "${prefix}.fasta.gz" ) error "Input and output names are the same, set prefix in module configuration" + if( "$ccs" == "${prefix}.fastq" ) error "Input and output names are the same, set prefix in module configuration" + if( "$ccs" == "${prefix}.fastq.gz" ) error "Input and output names are the same, set prefix in module configuration" + """ OUT_EXT="" @@ -46,7 +52,6 @@ process LIMA { OUT_EXT="fastq.gz" fi - echo \$OUT_EXT lima \\ $ccs \\ $primers \\