From 44e13ae439e0788db8afb227e2801815c459982a Mon Sep 17 00:00:00 2001 From: "Robert A. Petit III" Date: Tue, 10 May 2022 21:31:43 +0000 Subject: [PATCH 001/114] add module for snippy --- modules/snippy/run/main.nf | 55 ++++++++++++ modules/snippy/run/meta.yml | 110 +++++++++++++++++++++++ tests/config/pytest_modules.yml | 12 ++- tests/modules/snippy/run/main.nf | 16 ++++ tests/modules/snippy/run/nextflow.config | 5 ++ tests/modules/snippy/run/test.yml | 39 ++++++++ 6 files changed, 233 insertions(+), 4 deletions(-) create mode 100644 modules/snippy/run/main.nf create mode 100644 modules/snippy/run/meta.yml create mode 100644 tests/modules/snippy/run/main.nf create mode 100644 tests/modules/snippy/run/nextflow.config create mode 100644 tests/modules/snippy/run/test.yml diff --git a/modules/snippy/run/main.nf b/modules/snippy/run/main.nf new file mode 100644 index 00000000..73c4b94d --- /dev/null +++ b/modules/snippy/run/main.nf @@ -0,0 +1,55 @@ +process SNIPPY_RUN { + tag "$meta.id" + label 'process_low' + + conda (params.enable_conda ? "bioconda::snippy=4.6.0" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/snippy:4.6.0--hdfd78af_2 ': + 'quay.io/biocontainers/snippy:4.6.0--hdfd78af_2' }" + + input: + tuple val(meta), path(reads) + path reference + + output: + tuple val(meta), path("${prefix}/${prefix}.tab") , emit: tab + tuple val(meta), path("${prefix}/${prefix}.csv") , emit: csv + tuple val(meta), path("${prefix}/${prefix}.html") , emit: html + tuple val(meta), path("${prefix}/${prefix}.vcf") , emit: vcf + tuple val(meta), path("${prefix}/${prefix}.bed") , emit: bed + tuple val(meta), path("${prefix}/${prefix}.gff") , emit: gff + tuple val(meta), path("${prefix}/${prefix}.bam") , emit: bam + tuple val(meta), path("${prefix}/${prefix}.bam.bai") , emit: bai + tuple val(meta), path("${prefix}/${prefix}.log") , emit: log + tuple val(meta), path("${prefix}/${prefix}.aligned.fa") , emit: aligned_fa + tuple val(meta), path("${prefix}/${prefix}.consensus.fa") , emit: consensus_fa + tuple val(meta), path("${prefix}/${prefix}.consensus.subs.fa"), emit: consensus_subs_fa + tuple val(meta), path("${prefix}/${prefix}.raw.vcf") , emit: raw_vcf + tuple val(meta), path("${prefix}/${prefix}.filt.vcf") , emit: filt_vcf + tuple val(meta), path("${prefix}/${prefix}.vcf.gz") , emit: vcf_gz + tuple val(meta), path("${prefix}/${prefix}.vcf.gz.csi") , emit: vcf_csi + tuple val(meta), path("${prefix}/${prefix}.txt") , emit: txt + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + def read_inputs = meta.single_end ? "--se ${reads[0]}" : "--R1 ${reads[0]} --R2 ${reads[1]}" + """ + snippy \\ + $args \\ + --cpus $task.cpus \\ + --outdir $prefix \\ + --reference $reference \\ + --prefix $prefix \\ + $read_inputs + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + snippy: \$(echo \$(snippy --version 2>&1) | sed 's/snippy //') + END_VERSIONS + """ +} diff --git a/modules/snippy/run/meta.yml b/modules/snippy/run/meta.yml new file mode 100644 index 00000000..fa176399 --- /dev/null +++ b/modules/snippy/run/meta.yml @@ -0,0 +1,110 @@ +name: snippy +description: Rapid haploid variant calling +keywords: + - variant + - fastq + - bacteria +tools: + - snippy: + description: "Rapid bacterial SNP calling and core genome alignments" + homepage: "https://github.com/tseemann/snippy" + documentation: "https://github.com/tseemann/snippy" + tool_dev_url: "https://github.com/tseemann/snippy" + doi: "" + licence: "['GPL v2']" +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + pattern: "*.{fq,fastq,fq.gz,fastq.gz}" + - index: + type: file + description: Reference genome in GenBank (preferred) or FASTA format + pattern: "*.{gbk,gbk.gz,fa,fa.gz}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - tab: + type: file + description: A simple tab-separated summary of all the variants + pattern: "*.tab" + - csv: + type: file + description: A comma-separated version of the .tab file + pattern: "*.csv" + - html: + type: file + description: A HTML version of the .tab file + pattern: "*.html" + - vcf: + type: file + description: The final annotated variants in VCF format + pattern: "*.vcf" + - bed: + type: file + description: The variants in BED format + pattern: "*.bed" + - gff: + type: file + description: The variants in GFF3 format + pattern: "*.gff" + - bam: + type: file + description: The alignments in BAM format. Includes unmapped, multimapping reads. Excludes duplicates. + pattern: "*.bam" + - bai: + type: file + description: Index for the .bam file + pattern: "*.bam.bai" + - log: + type: file + description: A log file with the commands run and their outputs + pattern: "*.log" + - aligned_fa: + type: file + description: A version of the reference but with - at position with depth=0 and N for 0 < depth < --mincov (does not have variants) + pattern: "*.aligned.fa" + - consensus_fa: + type: file + description: A version of the reference genome with all variants instantiated + pattern: "*.consensus.fa" + - consensus_subs_fa: + type: file + description: A version of the reference genome with only substitution variants instantiated + pattern: "*.consensus.subs.fa" + - raw_vcf: + type: file + description: The unfiltered variant calls from Freebayes + pattern: "*.raw.vcf" + - filt_vcf: + type: file + description: The filtered variant calls from Freebayes + pattern: "*.filt.vcf" + - vcf_gz: + type: file + description: Compressed .vcf file via BGZIP + pattern: "*.vcf.gz" + - vcf_csi: + type: file + description: Index for the .vcf.gz via bcftools index + pattern: "*.vcf.gz.csi" + - txt: + type: file + description: Tab-separated columnar list of statistics + pattern: "*.txt" +authors: + - "@rpetit3" diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index 9afe83fd..5a204ac4 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -1643,14 +1643,14 @@ samtools/bam2fq: - modules/samtools/bam2fq/** - tests/modules/samtools/bam2fq/** -samtools/convert: - - modules/samtools/convert/** - - tests/modules/samtools/convert/** - samtools/collatefastq: - modules/samtools/collatefastq/** - tests/modules/samtools/collatefastq/** +samtools/convert: + - modules/samtools/convert/** + - tests/modules/samtools/convert/** + samtools/depth: - modules/samtools/depth/** - tests/modules/samtools/depth/** @@ -1787,6 +1787,10 @@ snapaligner/index: - modules/snapaligner/index/** - tests/modules/snapaligner/index/** +snippy/run: + - modules/snippy/run/** + - tests/modules/snippy/run/** + snpdists: - modules/snpdists/** - tests/modules/snpdists/** diff --git a/tests/modules/snippy/run/main.nf b/tests/modules/snippy/run/main.nf new file mode 100644 index 00000000..bfc4082e --- /dev/null +++ b/tests/modules/snippy/run/main.nf @@ -0,0 +1,16 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { SNIPPY_RUN } from '../../../../modules/snippy/run/main.nf' + +workflow test_snippy_run { + + input = [ [ id:'test', single_end:false ], // meta map + [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ] + ] + reference = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + + SNIPPY_RUN ( input, reference ) +} diff --git a/tests/modules/snippy/run/nextflow.config b/tests/modules/snippy/run/nextflow.config new file mode 100644 index 00000000..50f50a7a --- /dev/null +++ b/tests/modules/snippy/run/nextflow.config @@ -0,0 +1,5 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + +} \ No newline at end of file diff --git a/tests/modules/snippy/run/test.yml b/tests/modules/snippy/run/test.yml new file mode 100644 index 00000000..e38e8b9e --- /dev/null +++ b/tests/modules/snippy/run/test.yml @@ -0,0 +1,39 @@ +- name: snippy run test_snippy_run + command: nextflow run tests/modules/snippy/run -entry test_snippy_run -c tests/config/nextflow.config -c tests/modules/snippy/run/nextflow.config + tags: + - snippy + - snippy/run + files: + - path: output/snippy/test/test.aligned.fa + md5sum: 47e3390d4167edf1955d162d37aca5e3 + - path: output/snippy/test/test.bam + - path: output/snippy/test/test.bam.bai + - path: output/snippy/test/test.bed + md5sum: d41d8cd98f00b204e9800998ecf8427e + - path: output/snippy/test/test.consensus.fa + md5sum: 483f4a5dfe60171c86ee9b7e6dff908b + - path: output/snippy/test/test.consensus.subs.fa + md5sum: 483f4a5dfe60171c86ee9b7e6dff908b + - path: output/snippy/test/test.csv + md5sum: 322f942115e5945c2041a88246166703 + - path: output/snippy/test/test.filt.vcf + contains: ['fileformat', 'freebayes', 'CHROM'] + - path: output/snippy/test/test.gff + md5sum: df19e1b84ba6f691d20c72b397c88abf + - path: output/snippy/test/test.html + md5sum: 1ccbf0ffcadae1a6b2e11681d24c9938 + - path: output/snippy/test/test.log + contains: ['snippy', 'consensus', 'subs'] + - path: output/snippy/test/test.raw.vcf + contains: ['fileformat', 'freebayes', 'CHROM'] + - path: output/snippy/test/test.tab + md5sum: beb9bde3bce985e53e8feba9ec5b136e + - path: output/snippy/test/test.txt + contains: ['DateTime', 'ReadFiles', 'VariantTotal'] + - path: output/snippy/test/test.vcf + contains: ['fileformat', 'freebayes', 'CHROM'] + - path: output/snippy/test/test.vcf.gz + - path: output/snippy/test/test.vcf.gz.csi + md5sum: bed9fa291c220a1ba04eb2d448932ffc + - path: output/snippy/versions.yml + md5sum: 518aad56c4dbefb6cbcde5ab38cf7b5d From f15848d4774f095f43594516769fde889a7a3d12 Mon Sep 17 00:00:00 2001 From: "Robert A. Petit III" Date: Tue, 10 May 2022 21:44:40 +0000 Subject: [PATCH 002/114] fix typo --- modules/snippy/run/meta.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/snippy/run/meta.yml b/modules/snippy/run/meta.yml index fa176399..13c923d8 100644 --- a/modules/snippy/run/meta.yml +++ b/modules/snippy/run/meta.yml @@ -100,7 +100,7 @@ output: pattern: "*.vcf.gz" - vcf_csi: type: file - description: Index for the .vcf.gz via bcftools index + description: Index for the .vcf.gz via bcftools index pattern: "*.vcf.gz.csi" - txt: type: file From 56e94f625e69e48de7c218b79857cc3a59cb9220 Mon Sep 17 00:00:00 2001 From: "Robert A. Petit III" Date: Tue, 10 May 2022 21:47:11 +0000 Subject: [PATCH 003/114] remove empty file md5 --- tests/modules/snippy/run/test.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/modules/snippy/run/test.yml b/tests/modules/snippy/run/test.yml index e38e8b9e..ac945e88 100644 --- a/tests/modules/snippy/run/test.yml +++ b/tests/modules/snippy/run/test.yml @@ -9,7 +9,6 @@ - path: output/snippy/test/test.bam - path: output/snippy/test/test.bam.bai - path: output/snippy/test/test.bed - md5sum: d41d8cd98f00b204e9800998ecf8427e - path: output/snippy/test/test.consensus.fa md5sum: 483f4a5dfe60171c86ee9b7e6dff908b - path: output/snippy/test/test.consensus.subs.fa From ef2996e4a5f4f74b4b49bbbd308c3142de8d894d Mon Sep 17 00:00:00 2001 From: "Robert A. Petit III" Date: Tue, 10 May 2022 21:47:57 +0000 Subject: [PATCH 004/114] match names in meta and main --- modules/snippy/run/meta.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/snippy/run/meta.yml b/modules/snippy/run/meta.yml index 13c923d8..2eb008a1 100644 --- a/modules/snippy/run/meta.yml +++ b/modules/snippy/run/meta.yml @@ -1,4 +1,4 @@ -name: snippy +name: snippy_run description: Rapid haploid variant calling keywords: - variant From 7ced6e20de5a69a25fb57b00244a49329709186a Mon Sep 17 00:00:00 2001 From: "Robert A. Petit III" Date: Wed, 11 May 2022 18:00:36 +0000 Subject: [PATCH 005/114] make prettier --- tests/modules/snippy/run/test.yml | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/tests/modules/snippy/run/test.yml b/tests/modules/snippy/run/test.yml index ac945e88..10e24f55 100644 --- a/tests/modules/snippy/run/test.yml +++ b/tests/modules/snippy/run/test.yml @@ -1,8 +1,9 @@ - name: snippy run test_snippy_run - command: nextflow run tests/modules/snippy/run -entry test_snippy_run -c tests/config/nextflow.config -c tests/modules/snippy/run/nextflow.config + command: | + nextflow run tests/modules/snippy/run -entry test_snippy_run -c tests/config/nextflow.config -c tests/modules/snippy/run/nextflow.config tags: - - snippy - snippy/run + - snippy files: - path: output/snippy/test/test.aligned.fa md5sum: 47e3390d4167edf1955d162d37aca5e3 @@ -16,21 +17,21 @@ - path: output/snippy/test/test.csv md5sum: 322f942115e5945c2041a88246166703 - path: output/snippy/test/test.filt.vcf - contains: ['fileformat', 'freebayes', 'CHROM'] + contains: ["fileformat", "freebayes", "CHROM"] - path: output/snippy/test/test.gff md5sum: df19e1b84ba6f691d20c72b397c88abf - path: output/snippy/test/test.html md5sum: 1ccbf0ffcadae1a6b2e11681d24c9938 - path: output/snippy/test/test.log - contains: ['snippy', 'consensus', 'subs'] + contains: ["snippy", "consensus", "subs"] - path: output/snippy/test/test.raw.vcf - contains: ['fileformat', 'freebayes', 'CHROM'] + contains: ["fileformat", "freebayes", "CHROM"] - path: output/snippy/test/test.tab md5sum: beb9bde3bce985e53e8feba9ec5b136e - path: output/snippy/test/test.txt - contains: ['DateTime', 'ReadFiles', 'VariantTotal'] + contains: ["DateTime", "ReadFiles", "VariantTotal"] - path: output/snippy/test/test.vcf - contains: ['fileformat', 'freebayes', 'CHROM'] + contains: ["fileformat", "freebayes", "CHROM"] - path: output/snippy/test/test.vcf.gz - path: output/snippy/test/test.vcf.gz.csi md5sum: bed9fa291c220a1ba04eb2d448932ffc From dd6d838cd7ac2b15dd5e56ecb1dd5b3cea63d78c Mon Sep 17 00:00:00 2001 From: SusiJo Date: Wed, 25 May 2022 16:37:44 +0200 Subject: [PATCH 006/114] override userEmulation --- tests/modules/gatk4/markduplicatesspark/nextflow.config | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/modules/gatk4/markduplicatesspark/nextflow.config b/tests/modules/gatk4/markduplicatesspark/nextflow.config index 8730f1c4..c66f00e6 100644 --- a/tests/modules/gatk4/markduplicatesspark/nextflow.config +++ b/tests/modules/gatk4/markduplicatesspark/nextflow.config @@ -3,3 +3,6 @@ process { publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } } +// necessary to override tests/config/nextflow.config +docker.userEmulation = false + From 1400f2e68361fc04642b7c95a0cdf50e098bf926 Mon Sep 17 00:00:00 2001 From: SusiJo Date: Wed, 25 May 2022 16:38:30 +0200 Subject: [PATCH 007/114] fixed tests - mixed chr21 + chr22 --- .../modules/gatk4/markduplicatesspark/main.nf | 24 +++++++++++++++---- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/tests/modules/gatk4/markduplicatesspark/main.nf b/tests/modules/gatk4/markduplicatesspark/main.nf index 2f294f59..cd02c240 100644 --- a/tests/modules/gatk4/markduplicatesspark/main.nf +++ b/tests/modules/gatk4/markduplicatesspark/main.nf @@ -6,18 +6,32 @@ include { GATK4_MARKDUPLICATES_SPARK } from '../../../../modules/gatk4/markdupli workflow test_gatk4_markduplicates_spark { input = [ [ id:'test', single_end:false ], // meta map - file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) ] - fasta = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true) - fai = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta_fai'], checkIfExists: true) - dict = file(params.test_data['homo_sapiens']['genome']['genome_21_dict'], checkIfExists: true) + fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + fai = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true) + dict = file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true) GATK4_MARKDUPLICATES_SPARK ( input, fasta, fai, dict ) } +// chr 22 workflow test_gatk4_markduplicates_spark_multiple_bams { input = [ [ id:'test', single_end:false ], // meta map - [ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + [ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_bam'], checkIfExists: true) + ] ] + fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) + dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) + + GATK4_MARKDUPLICATES_SPARK ( input, fasta, fai, dict ) +} + +// chr 21 +workflow test_gatk4_markduplicates_spark_multiple_bams_21 { + input = [ [ id:'test', single_end:false ], // meta map + [ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_bam'], checkIfExists: true) ] ] fasta = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true) From b34e5ea6bf56d25536324f593e9797a3d9c4ccc7 Mon Sep 17 00:00:00 2001 From: SusiJo Date: Wed, 25 May 2022 16:38:48 +0200 Subject: [PATCH 008/114] rm unnecessary spark_user --- modules/gatk4/markduplicatesspark/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/gatk4/markduplicatesspark/main.nf b/modules/gatk4/markduplicatesspark/main.nf index 77e135db..8265e16f 100644 --- a/modules/gatk4/markduplicatesspark/main.nf +++ b/modules/gatk4/markduplicatesspark/main.nf @@ -25,6 +25,7 @@ process GATK4_MARKDUPLICATES_SPARK { prefix = task.ext.prefix ?: "${meta.id}" def input_list = bam.collect{"--input $it"}.join(' ') + def avail_mem = 3 if (!task.memory) { log.info '[GATK MarkDuplicatesSpark] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' @@ -32,7 +33,6 @@ process GATK4_MARKDUPLICATES_SPARK { avail_mem = task.memory.giga } """ - export SPARK_USER=spark3 gatk --java-options "-Xmx${avail_mem}g" MarkDuplicatesSpark \\ $input_list \\ From 63db63757cab03cfa7a02c0d0f134b66fbfadea6 Mon Sep 17 00:00:00 2001 From: Matthias De Smet <11850640+matthdsm@users.noreply.github.com> Date: Mon, 30 May 2022 10:15:40 +0200 Subject: [PATCH 009/114] bump picard versions --- modules/picard/addorreplacereadgroups/main.nf | 6 +++--- modules/picard/cleansam/main.nf | 6 +++--- modules/picard/collecthsmetrics/main.nf | 6 +++--- modules/picard/collectmultiplemetrics/main.nf | 6 +++--- modules/picard/collectwgsmetrics/main.nf | 6 +++--- modules/picard/createsequencedictionary/main.nf | 6 +++--- modules/picard/crosscheckfingerprints/main.nf | 6 +++--- modules/picard/filtersamreads/main.nf | 6 +++--- modules/picard/fixmateinformation/main.nf | 6 +++--- modules/picard/liftovervcf/main.nf | 6 +++--- modules/picard/markduplicates/main.nf | 6 +++--- modules/picard/mergesamfiles/main.nf | 6 +++--- modules/picard/sortsam/main.nf | 6 +++--- modules/picard/sortvcf/main.nf | 6 +++--- 14 files changed, 42 insertions(+), 42 deletions(-) diff --git a/modules/picard/addorreplacereadgroups/main.nf b/modules/picard/addorreplacereadgroups/main.nf index fd102f67..aff83c9f 100644 --- a/modules/picard/addorreplacereadgroups/main.nf +++ b/modules/picard/addorreplacereadgroups/main.nf @@ -2,10 +2,10 @@ process PICARD_ADDORREPLACEREADGROUPS { tag "$meta.id" label 'process_low' - conda (params.enable_conda ? "bioconda::picard=2.27.1" : null) + conda (params.enable_conda ? "bioconda::picard=2.27.2" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/picard:2.27.1--hdfd78af_0' : - 'quay.io/biocontainers/picard:2.27.1--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/picard:2.27.2--hdfd78af_0' : + 'quay.io/biocontainers/picard:2.27.2--hdfd78af_0' }" input: tuple val(meta), path(bam) diff --git a/modules/picard/cleansam/main.nf b/modules/picard/cleansam/main.nf index 62989565..0042c3f1 100644 --- a/modules/picard/cleansam/main.nf +++ b/modules/picard/cleansam/main.nf @@ -2,10 +2,10 @@ process PICARD_CLEANSAM { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::picard=2.27.1" : null) + conda (params.enable_conda ? "bioconda::picard=2.27.2" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/picard:2.27.1--hdfd78af_0' : - 'quay.io/biocontainers/picard:2.27.1--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/picard:2.27.2--hdfd78af_0' : + 'quay.io/biocontainers/picard:2.27.2--hdfd78af_0' }" input: tuple val(meta), path(bam) diff --git a/modules/picard/collecthsmetrics/main.nf b/modules/picard/collecthsmetrics/main.nf index 06b6b8f3..52d5c84a 100644 --- a/modules/picard/collecthsmetrics/main.nf +++ b/modules/picard/collecthsmetrics/main.nf @@ -2,10 +2,10 @@ process PICARD_COLLECTHSMETRICS { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::picard=2.27.1" : null) + conda (params.enable_conda ? "bioconda::picard=2.27.2" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/picard:2.27.1--hdfd78af_0' : - 'quay.io/biocontainers/picard:2.27.1--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/picard:2.27.2--hdfd78af_0' : + 'quay.io/biocontainers/picard:2.27.2--hdfd78af_0' }" input: tuple val(meta), path(bam) diff --git a/modules/picard/collectmultiplemetrics/main.nf b/modules/picard/collectmultiplemetrics/main.nf index 3c3152df..6fe9881b 100644 --- a/modules/picard/collectmultiplemetrics/main.nf +++ b/modules/picard/collectmultiplemetrics/main.nf @@ -2,10 +2,10 @@ process PICARD_COLLECTMULTIPLEMETRICS { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::picard=2.27.1" : null) + conda (params.enable_conda ? "bioconda::picard=2.27.2" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/picard:2.27.1--hdfd78af_0' : - 'quay.io/biocontainers/picard:2.27.1--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/picard:2.27.2--hdfd78af_0' : + 'quay.io/biocontainers/picard:2.27.2--hdfd78af_0' }" input: tuple val(meta), path(bam) diff --git a/modules/picard/collectwgsmetrics/main.nf b/modules/picard/collectwgsmetrics/main.nf index cab1fe0c..e231afe2 100644 --- a/modules/picard/collectwgsmetrics/main.nf +++ b/modules/picard/collectwgsmetrics/main.nf @@ -2,10 +2,10 @@ process PICARD_COLLECTWGSMETRICS { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::picard=2.27.1" : null) + conda (params.enable_conda ? "bioconda::picard=2.27.2" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/picard:2.27.1--hdfd78af_0' : - 'quay.io/biocontainers/picard:2.27.1--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/picard:2.27.2--hdfd78af_0' : + 'quay.io/biocontainers/picard:2.27.2--hdfd78af_0' }" input: tuple val(meta), path(bam) diff --git a/modules/picard/createsequencedictionary/main.nf b/modules/picard/createsequencedictionary/main.nf index 2348c496..16a5ff19 100644 --- a/modules/picard/createsequencedictionary/main.nf +++ b/modules/picard/createsequencedictionary/main.nf @@ -2,10 +2,10 @@ process PICARD_CREATESEQUENCEDICTIONARY { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::picard=2.27.1" : null) + conda (params.enable_conda ? "bioconda::picard=2.27.2" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/picard:2.27.1--hdfd78af_0' : - 'quay.io/biocontainers/picard:2.27.1--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/picard:2.27.2--hdfd78af_0' : + 'quay.io/biocontainers/picard:2.27.2--hdfd78af_0' }" input: tuple val(meta), path(fasta) diff --git a/modules/picard/crosscheckfingerprints/main.nf b/modules/picard/crosscheckfingerprints/main.nf index d3b59402..c7c4766c 100644 --- a/modules/picard/crosscheckfingerprints/main.nf +++ b/modules/picard/crosscheckfingerprints/main.nf @@ -2,10 +2,10 @@ process PICARD_CROSSCHECKFINGERPRINTS { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::picard=2.27.1" : null) + conda (params.enable_conda ? "bioconda::picard=2.27.2" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/picard:2.27.1--hdfd78af_0' : - 'quay.io/biocontainers/picard:2.27.1--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/picard:2.27.2--hdfd78af_0' : + 'quay.io/biocontainers/picard:2.27.2--hdfd78af_0' }" input: tuple val(meta), path(input1) diff --git a/modules/picard/filtersamreads/main.nf b/modules/picard/filtersamreads/main.nf index adedcdc2..e17c4686 100644 --- a/modules/picard/filtersamreads/main.nf +++ b/modules/picard/filtersamreads/main.nf @@ -2,10 +2,10 @@ process PICARD_FILTERSAMREADS { tag "$meta.id" label 'process_low' - conda (params.enable_conda ? "bioconda::picard=2.27.1" : null) + conda (params.enable_conda ? "bioconda::picard=2.27.2" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/picard:2.27.1--hdfd78af_0' : - 'quay.io/biocontainers/picard:2.27.1--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/picard:2.27.2--hdfd78af_0' : + 'quay.io/biocontainers/picard:2.27.2--hdfd78af_0' }" input: tuple val(meta), path(bam), path(readlist) diff --git a/modules/picard/fixmateinformation/main.nf b/modules/picard/fixmateinformation/main.nf index 539b1082..616acce8 100644 --- a/modules/picard/fixmateinformation/main.nf +++ b/modules/picard/fixmateinformation/main.nf @@ -2,10 +2,10 @@ process PICARD_FIXMATEINFORMATION { tag "$meta.id" label 'process_low' - conda (params.enable_conda ? "bioconda::picard=2.27.1" : null) + conda (params.enable_conda ? "bioconda::picard=2.27.2" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/picard:2.27.1--hdfd78af_0' : - 'quay.io/biocontainers/picard:2.27.1--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/picard:2.27.2--hdfd78af_0' : + 'quay.io/biocontainers/picard:2.27.2--hdfd78af_0' }" input: tuple val(meta), path(bam) diff --git a/modules/picard/liftovervcf/main.nf b/modules/picard/liftovervcf/main.nf index c92abfeb..a82f1466 100644 --- a/modules/picard/liftovervcf/main.nf +++ b/modules/picard/liftovervcf/main.nf @@ -2,10 +2,10 @@ process PICARD_LIFTOVERVCF { tag "$meta.id" label 'process_low' - conda (params.enable_conda ? "bioconda::picard=2.27.1" : null) + conda (params.enable_conda ? "bioconda::picard=2.27.2" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/picard:2.27.1--hdfd78af_0' : - 'quay.io/biocontainers/picard:2.27.1--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/picard:2.27.2--hdfd78af_0' : + 'quay.io/biocontainers/picard:2.27.2--hdfd78af_0' }" input: tuple val(meta), path(input_vcf) diff --git a/modules/picard/markduplicates/main.nf b/modules/picard/markduplicates/main.nf index 1565c647..87f913d4 100644 --- a/modules/picard/markduplicates/main.nf +++ b/modules/picard/markduplicates/main.nf @@ -2,10 +2,10 @@ process PICARD_MARKDUPLICATES { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::picard=2.27.1" : null) + conda (params.enable_conda ? "bioconda::picard=2.27.2" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/picard:2.27.1--hdfd78af_0' : - 'quay.io/biocontainers/picard:2.27.1--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/picard:2.27.2--hdfd78af_0' : + 'quay.io/biocontainers/picard:2.27.2--hdfd78af_0' }" input: tuple val(meta), path(bam) diff --git a/modules/picard/mergesamfiles/main.nf b/modules/picard/mergesamfiles/main.nf index 1e32c63a..d65f518c 100644 --- a/modules/picard/mergesamfiles/main.nf +++ b/modules/picard/mergesamfiles/main.nf @@ -2,10 +2,10 @@ process PICARD_MERGESAMFILES { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::picard=2.27.1" : null) + conda (params.enable_conda ? "bioconda::picard=2.27.2" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/picard:2.27.1--hdfd78af_0' : - 'quay.io/biocontainers/picard:2.27.1--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/picard:2.27.2--hdfd78af_0' : + 'quay.io/biocontainers/picard:2.27.2--hdfd78af_0' }" input: tuple val(meta), path(bams) diff --git a/modules/picard/sortsam/main.nf b/modules/picard/sortsam/main.nf index cee60fd1..5e09d907 100644 --- a/modules/picard/sortsam/main.nf +++ b/modules/picard/sortsam/main.nf @@ -2,10 +2,10 @@ process PICARD_SORTSAM { tag "$meta.id" label 'process_low' - conda (params.enable_conda ? "bioconda::picard=2.27.1" : null) + conda (params.enable_conda ? "bioconda::picard=2.27.2" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/picard:2.27.1--hdfd78af_0' : - 'quay.io/biocontainers/picard:2.27.1--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/picard:2.27.2--hdfd78af_0' : + 'quay.io/biocontainers/picard:2.27.2--hdfd78af_0' }" input: tuple val(meta), path(bam) diff --git a/modules/picard/sortvcf/main.nf b/modules/picard/sortvcf/main.nf index fb8dbb79..8f886393 100644 --- a/modules/picard/sortvcf/main.nf +++ b/modules/picard/sortvcf/main.nf @@ -2,10 +2,10 @@ process PICARD_SORTVCF { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::picard=2.27.1" : null) + conda (params.enable_conda ? "bioconda::picard=2.27.2" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/picard:2.27.1--hdfd78af_0' : - 'quay.io/biocontainers/picard:2.27.1--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/picard:2.27.2--hdfd78af_0' : + 'quay.io/biocontainers/picard:2.27.2--hdfd78af_0' }" input: tuple val(meta), path(vcf) From 7ffe98ac8bd3463087a3878c924c653330bfd0c9 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Mon, 30 May 2022 21:12:21 +0200 Subject: [PATCH 010/114] Add gatk/indelrealigner --- modules/gatk/indelrealigner/main.nf | 43 +++++++++++++ modules/gatk/indelrealigner/meta.yml | 63 +++++++++++++++++++ tests/config/pytest_modules.yml | 4 ++ tests/modules/gatk/indelrealigner/main.nf | 22 +++++++ .../gatk/indelrealigner/nextflow.config | 6 ++ tests/modules/gatk/indelrealigner/test.yml | 14 +++++ 6 files changed, 152 insertions(+) create mode 100644 modules/gatk/indelrealigner/main.nf create mode 100644 modules/gatk/indelrealigner/meta.yml create mode 100644 tests/modules/gatk/indelrealigner/main.nf create mode 100644 tests/modules/gatk/indelrealigner/nextflow.config create mode 100644 tests/modules/gatk/indelrealigner/test.yml diff --git a/modules/gatk/indelrealigner/main.nf b/modules/gatk/indelrealigner/main.nf new file mode 100644 index 00000000..117a298b --- /dev/null +++ b/modules/gatk/indelrealigner/main.nf @@ -0,0 +1,43 @@ +process GATK_INDELREALIGNER { + tag "$meta.id" + label 'process_low' + + conda (params.enable_conda ? "bioconda::gatk=3.5" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gatk:3.5--hdfd78af_11': + 'quay.io/biocontainers/gatk:3.5--hdfd78af_11' }" + + input: + tuple val(meta), path(bam), path(bai), path(intervals) + tuple val(meta), path(fasta) + tuple val(meta), path(known_vcf) + + output: + tuple val(meta), path("*.bam"), emit: bam + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + if ("$bam" == "${prefix}.bam") error "Input and output names are the same, set prefix in module configuration to disambiguate!" + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def known = known_vcf ? "-known ${known_vcf}" : "" + """ + gatk3 \\ + -T RealignerTargetCreator \\ + -R ${fasta} \\ + -nt ${task.cpus} + -I ${bam} \\ + -targetIntervals ${intervals} \\ + ${known} \\ + -o ${prefix}.bam \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk: \$(echo \$(gatk3 --version)) + END_VERSIONS + """ +} diff --git a/modules/gatk/indelrealigner/meta.yml b/modules/gatk/indelrealigner/meta.yml new file mode 100644 index 00000000..80abc6aa --- /dev/null +++ b/modules/gatk/indelrealigner/meta.yml @@ -0,0 +1,63 @@ +name: "gatk_indelrealigner" +description: Performs local realignment around indels to correct for mapping errors +keywords: + - bam + - vcf + - variant calling + - indel + - realignment +tools: + - "gatk": + description: "The full Genome Analysis Toolkit (GATK) framework, license restricted." + homepage: "https://gatk.broadinstitute.org/hc/en-us" + documentation: "https://github.com/broadinstitute/gatk-docs" + licence: "['https://software.broadinstitute.org/gatk/download/licensing', 'BSD', 'https://www.broadinstitute.org/gatk/about/#licensing']" + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: Sorted and indexed BAM/CRAM/SAM file + pattern: "*.bam" + - bai: + type: file + description: BAM index file + pattern: "*.bai" + - intervals: + type: file + description: Intervals file created by gatk3 RealignerTargetCreator + pattern: "*.{intervals,list}" + - fasta: + type: file + description: Reference file used to generate BAM file + pattern: ".{fasta,fa,fna}" + - known_vcf: + type: file + description: Optional input VCF file(s) with known indels + pattern: ".vcf" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - bam: + type: file + description: Sorted and indexed BAM/CRAM/SAM file with local realignment around variants + pattern: "*.bam" + - bai: + type: file + description: Output BAM Index file + pattern: "*.bai" + +authors: + - "@jfy133" diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index 3ff58b5c..8bb9d305 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -715,6 +715,10 @@ gamma/gamma: - modules/gamma/gamma/** - tests/modules/gamma/gamma/** +gatk/indelrealigner: + - modules/gatk/indelrealigner/** + - tests/modules/gatk/indelrealigner/** + gatk4/applybqsr: - modules/gatk4/applybqsr/** - tests/modules/gatk4/applybqsr/** diff --git a/tests/modules/gatk/indelrealigner/main.nf b/tests/modules/gatk/indelrealigner/main.nf new file mode 100644 index 00000000..687e12a9 --- /dev/null +++ b/tests/modules/gatk/indelrealigner/main.nf @@ -0,0 +1,22 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { GATK_INDELREALIGNER } from '../../../../modules/gatk/indelrealigner/main.nf' + +// TODO add REalignerTargetCrator + + +workflow test_gatk_indelrealigner { + + input = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['sarscov2']['illumina']['test_paired_end_bam'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_paired_end_bai'], checkIfExists: true), + GATK_REALIGNERTARGETCREATOR.out.intervals + ] + + reference = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + + GATK_INDELREALIGNER ( input, reference, [] ) +} diff --git a/tests/modules/gatk/indelrealigner/nextflow.config b/tests/modules/gatk/indelrealigner/nextflow.config new file mode 100644 index 00000000..2e6ae57f --- /dev/null +++ b/tests/modules/gatk/indelrealigner/nextflow.config @@ -0,0 +1,6 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + ext.prefix = "${meta.id}.realigned" + +} diff --git a/tests/modules/gatk/indelrealigner/test.yml b/tests/modules/gatk/indelrealigner/test.yml new file mode 100644 index 00000000..59fc394f --- /dev/null +++ b/tests/modules/gatk/indelrealigner/test.yml @@ -0,0 +1,14 @@ +## TODO nf-core: Please run the following command to build this file: +# nf-core modules create-test-yml gatk/indelrealigner +- name: "gatk indelrealigner" + command: nextflow run ./tests/modules/gatk/indelrealigner -entry test_gatk_indelrealigner -c ./tests/config/nextflow.config -c ./tests/modules/gatk/indelrealigner/nextflow.config + tags: + - "gatk" + # + - "gatk/indelrealigner" + # + files: + - path: "output/gatk/test.bam" + md5sum: e667c7caad0bc4b7ac383fd023c654fc + - path: output/gatk/versions.yml + md5sum: a01fe51bc4c6a3a6226fbf77b2c7cf3b From 39e34418c90f822f301ab4b2959ced36a4f70f05 Mon Sep 17 00:00:00 2001 From: SusiJo Date: Tue, 31 May 2022 14:34:50 +0200 Subject: [PATCH 011/114] added metrics as optional output --- modules/gatk4/markduplicatesspark/main.nf | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/modules/gatk4/markduplicatesspark/main.nf b/modules/gatk4/markduplicatesspark/main.nf index 8265e16f..eeaf0445 100644 --- a/modules/gatk4/markduplicatesspark/main.nf +++ b/modules/gatk4/markduplicatesspark/main.nf @@ -14,15 +14,16 @@ process GATK4_MARKDUPLICATES_SPARK { path dict output: - tuple val(meta), path("${prefix}"), emit: output - path "versions.yml" , emit: versions + tuple val(meta), path("*.bam"), emit: bam + tuple val(meta), path("*.metrics"), emit: metrics, optional: true + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when script: def args = task.ext.args ?: '' - prefix = task.ext.prefix ?: "${meta.id}" + def prefix = task.ext.prefix ?: "${meta.id}" + ".bam" def input_list = bam.collect{"--input $it"}.join(' ') From e0e8273f7462926b6e58e12183d2ca3484f9e07e Mon Sep 17 00:00:00 2001 From: SusiJo Date: Tue, 31 May 2022 14:35:18 +0200 Subject: [PATCH 012/114] adjusted input to use queryname sorted bams --- .../modules/gatk4/markduplicatesspark/main.nf | 21 ++++++++++--------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/tests/modules/gatk4/markduplicatesspark/main.nf b/tests/modules/gatk4/markduplicatesspark/main.nf index cd02c240..ac89b1b3 100644 --- a/tests/modules/gatk4/markduplicatesspark/main.nf +++ b/tests/modules/gatk4/markduplicatesspark/main.nf @@ -3,6 +3,7 @@ nextflow.enable.dsl = 2 include { GATK4_MARKDUPLICATES_SPARK } from '../../../../modules/gatk4/markduplicatesspark/main.nf' +include { GATK4_MARKDUPLICATES_SPARK as GATK4_MARKDUPLICATES_SPARK_METRICS } from '../../../../modules/gatk4/markduplicatesspark/main.nf' workflow test_gatk4_markduplicates_spark { input = [ [ id:'test', single_end:false ], // meta map @@ -18,8 +19,8 @@ workflow test_gatk4_markduplicates_spark { // chr 22 workflow test_gatk4_markduplicates_spark_multiple_bams { input = [ [ id:'test', single_end:false ], // meta map - [ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), - file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_bam'], checkIfExists: true) + [ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_name_sorted_bam'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_name_sorted_bam'], checkIfExists: true) ] ] fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) @@ -28,15 +29,15 @@ workflow test_gatk4_markduplicates_spark_multiple_bams { GATK4_MARKDUPLICATES_SPARK ( input, fasta, fai, dict ) } -// chr 21 -workflow test_gatk4_markduplicates_spark_multiple_bams_21 { +// chr 22 +workflow test_gatk4_markduplicates_spark_multiple_bams_metrics { input = [ [ id:'test', single_end:false ], // meta map - [ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), - file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_bam'], checkIfExists: true) + [ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_name_sorted_bam'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_name_sorted_bam'], checkIfExists: true) ] ] - fasta = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true) - fai = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta_fai'], checkIfExists: true) - dict = file(params.test_data['homo_sapiens']['genome']['genome_21_dict'], checkIfExists: true) + fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) + dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) - GATK4_MARKDUPLICATES_SPARK ( input, fasta, fai, dict ) + GATK4_MARKDUPLICATES_SPARK_METRICS ( input, fasta, fai, dict ) } From 3117b2973c44677217c2e128890949eba6c94451 Mon Sep 17 00:00:00 2001 From: SusiJo Date: Tue, 31 May 2022 14:35:29 +0200 Subject: [PATCH 013/114] added test for metrics --- tests/modules/gatk4/markduplicatesspark/nextflow.config | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/modules/gatk4/markduplicatesspark/nextflow.config b/tests/modules/gatk4/markduplicatesspark/nextflow.config index c66f00e6..495a42f3 100644 --- a/tests/modules/gatk4/markduplicatesspark/nextflow.config +++ b/tests/modules/gatk4/markduplicatesspark/nextflow.config @@ -2,7 +2,10 @@ process { publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + withName: GATK4_MARKDUPLICATES_SPARK_METRICS { + ext.args = '--metrics-file test.metrics' + } } -// necessary to override tests/config/nextflow.config +// override tests/config/nextflow.config docker.userEmulation = false From ea526051a44a8b25a943f6d58f789a0745b853d5 Mon Sep 17 00:00:00 2001 From: SusiJo Date: Tue, 31 May 2022 14:35:42 +0200 Subject: [PATCH 014/114] new test.yml --- .../gatk4/markduplicatesspark/test.yml | 32 +++++++++++-------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/tests/modules/gatk4/markduplicatesspark/test.yml b/tests/modules/gatk4/markduplicatesspark/test.yml index b0c0b40d..34fe0101 100644 --- a/tests/modules/gatk4/markduplicatesspark/test.yml +++ b/tests/modules/gatk4/markduplicatesspark/test.yml @@ -1,25 +1,31 @@ -- name: gatk4 markduplicates test_gatk4_markduplicates_spark - command: nextflow run tests/modules/gatk4/markduplicatesspark -entry test_gatk4_markduplicates_spark -c tests/config/nextflow.config -c ./tests/modules/gatk4/markduplicatesspark/nextflow.config +- name: gatk4 markduplicatesspark test_gatk4_markduplicates_spark + command: nextflow run ./tests/modules/gatk4/markduplicatesspark -entry test_gatk4_markduplicates_spark -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/markduplicatesspark/nextflow.config tags: - gatk4 - gatk4/markduplicatesspark files: - - path: output/gatk4/test.bai - md5sum: e9c125e82553209933883b4fe2b8d7c2 - path: output/gatk4/test.bam - md5sum: 2efd50b2e6b7fd9bdf242cd9e266cfa9 - - path: output/gatk4/test.metrics - - path: output/gatk4/versions.yml + md5sum: dc1a09ac6371aab7c50d1a554baa06d3 -- name: gatk4 markduplicates test_gatk4_markduplicates_spark_multiple_bams - command: nextflow run tests/modules/gatk4/markduplicatesspark -entry test_gatk4_markduplicates_spark_multiple_bams -c tests/config/nextflow.config -c ./tests/modules/gatk4/markduplicatesspark/nextflow.config +- name: gatk4 markduplicatesspark test_gatk4_markduplicates_spark_multiple_bams + command: nextflow run ./tests/modules/gatk4/markduplicatesspark -entry test_gatk4_markduplicates_spark_multiple_bams -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/markduplicatesspark/nextflow.config tags: - gatk4 - gatk4/markduplicatesspark files: - - path: output/gatk4/test.bai - md5sum: bad71df9c876e72a5bc0a3e0fd755f92 - path: output/gatk4/test.bam - md5sum: 8187febc6108ffef7f907e89b9c091a4 + md5sum: 898cb0a6616897d8ada90bab53bf0837 + +- name: gatk4 markduplicatesspark test_gatk4_markduplicates_spark_multiple_bams_metrics + command: nextflow run ./tests/modules/gatk4/markduplicatesspark -entry test_gatk4_markduplicates_spark_multiple_bams_metrics -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/markduplicatesspark/nextflow.config + tags: + - gatk4 + - gatk4/markduplicatesspark + files: + - path: output/gatk4/test.bam + md5sum: 898cb0a6616897d8ada90bab53bf0837 - path: output/gatk4/test.metrics - - path: output/gatk4/versions.yml + contains: '[ ## METRICS CLASS org.broadinstitute.hellbender.utils.read.markduplicates.GATKDuplicationMetrics + LIBRARY UNPAIRED_READS_EXAMINED READ_PAIRS_EXAMINED SECONDARY_OR_SUPPLEMENTARY_RDS UNMAPPED_READS UNPAIRED_READ_DUPLICATES READ_PAIR_DUPLICATES READ_PAIR_OPTICAL_DUPLICATESPERCENT_DUPLICATION ESTIMATED_LIBRARY_SIZE + testN 0 2820 2 2 0 828 0 0.293617 3807 + testT 0 2718 0 0 0 716 0 0.263429 4206 ]' From e083bfafa0b34f69ba1dfa00e38d58d0b565572b Mon Sep 17 00:00:00 2001 From: SusiJo Date: Tue, 31 May 2022 14:38:45 +0200 Subject: [PATCH 015/114] ran prettier --- tests/modules/gatk4/markduplicatesspark/test.yml | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tests/modules/gatk4/markduplicatesspark/test.yml b/tests/modules/gatk4/markduplicatesspark/test.yml index 34fe0101..a0631183 100644 --- a/tests/modules/gatk4/markduplicatesspark/test.yml +++ b/tests/modules/gatk4/markduplicatesspark/test.yml @@ -25,7 +25,8 @@ - path: output/gatk4/test.bam md5sum: 898cb0a6616897d8ada90bab53bf0837 - path: output/gatk4/test.metrics - contains: '[ ## METRICS CLASS org.broadinstitute.hellbender.utils.read.markduplicates.GATKDuplicationMetrics - LIBRARY UNPAIRED_READS_EXAMINED READ_PAIRS_EXAMINED SECONDARY_OR_SUPPLEMENTARY_RDS UNMAPPED_READS UNPAIRED_READ_DUPLICATES READ_PAIR_DUPLICATES READ_PAIR_OPTICAL_DUPLICATESPERCENT_DUPLICATION ESTIMATED_LIBRARY_SIZE - testN 0 2820 2 2 0 828 0 0.293617 3807 - testT 0 2718 0 0 0 716 0 0.263429 4206 ]' + contains: + "[ ## METRICS CLASS org.broadinstitute.hellbender.utils.read.markduplicates.GATKDuplicationMetrics + LIBRARY UNPAIRED_READS_EXAMINED READ_PAIRS_EXAMINED SECONDARY_OR_SUPPLEMENTARY_RDS UNMAPPED_READS UNPAIRED_READ_DUPLICATES READ_PAIR_DUPLICATES READ_PAIR_OPTICAL_DUPLICATESPERCENT_DUPLICATION ESTIMATED_LIBRARY_SIZE + testN 0 2820 2 2 0 828 0 0.293617 3807 + testT 0 2718 0 0 0 716 0 0.263429 4206 ]" From d484aa190636690a58941aa90f95a08010fa6f4f Mon Sep 17 00:00:00 2001 From: SusiJo Date: Tue, 31 May 2022 14:45:23 +0200 Subject: [PATCH 016/114] adjusted tabs --- tests/modules/gatk4/markduplicatesspark/test.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/modules/gatk4/markduplicatesspark/test.yml b/tests/modules/gatk4/markduplicatesspark/test.yml index a0631183..862b8a16 100644 --- a/tests/modules/gatk4/markduplicatesspark/test.yml +++ b/tests/modules/gatk4/markduplicatesspark/test.yml @@ -26,7 +26,7 @@ md5sum: 898cb0a6616897d8ada90bab53bf0837 - path: output/gatk4/test.metrics contains: - "[ ## METRICS CLASS org.broadinstitute.hellbender.utils.read.markduplicates.GATKDuplicationMetrics - LIBRARY UNPAIRED_READS_EXAMINED READ_PAIRS_EXAMINED SECONDARY_OR_SUPPLEMENTARY_RDS UNMAPPED_READS UNPAIRED_READ_DUPLICATES READ_PAIR_DUPLICATES READ_PAIR_OPTICAL_DUPLICATESPERCENT_DUPLICATION ESTIMATED_LIBRARY_SIZE - testN 0 2820 2 2 0 828 0 0.293617 3807 - testT 0 2718 0 0 0 716 0 0.263429 4206 ]" + "## METRICS CLASS\torg.broadinstitute.hellbender.utils.read.markduplicates.GATKDuplicationMetrics + LIBRARY\tUNPAIRED_READS_EXAMINED READ_PAIRS_EXAMINED\tSECONDARY_OR_SUPPLEMENTARY_RDS\tUNMAPPED_READS\tUNPAIRED_READ_DUPLICATES\tREAD_PAIR_DUPLICATES\tREAD_PAIR_OPTICAL_DUPLICATESPERCENT_DUPLICATION\tESTIMATED_LIBRARY_SIZE + testN\t0\t2820\t2\t2\t0\t828\t0\t0.293617\t3807 + testT\t0\t2718\t0\t0\t0\t716\t0\t0.263429\t4206" From 205576279fa63e7d9ffe3a2ec72eed6eb54b7476 Mon Sep 17 00:00:00 2001 From: SusiJo Date: Tue, 31 May 2022 14:50:55 +0200 Subject: [PATCH 017/114] ran prettier again --- tests/modules/gatk4/markduplicatesspark/test.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/modules/gatk4/markduplicatesspark/test.yml b/tests/modules/gatk4/markduplicatesspark/test.yml index 862b8a16..a240c65e 100644 --- a/tests/modules/gatk4/markduplicatesspark/test.yml +++ b/tests/modules/gatk4/markduplicatesspark/test.yml @@ -25,8 +25,7 @@ - path: output/gatk4/test.bam md5sum: 898cb0a6616897d8ada90bab53bf0837 - path: output/gatk4/test.metrics - contains: - "## METRICS CLASS\torg.broadinstitute.hellbender.utils.read.markduplicates.GATKDuplicationMetrics + contains: "## METRICS CLASS\torg.broadinstitute.hellbender.utils.read.markduplicates.GATKDuplicationMetrics LIBRARY\tUNPAIRED_READS_EXAMINED READ_PAIRS_EXAMINED\tSECONDARY_OR_SUPPLEMENTARY_RDS\tUNMAPPED_READS\tUNPAIRED_READ_DUPLICATES\tREAD_PAIR_DUPLICATES\tREAD_PAIR_OPTICAL_DUPLICATESPERCENT_DUPLICATION\tESTIMATED_LIBRARY_SIZE testN\t0\t2820\t2\t2\t0\t828\t0\t0.293617\t3807 testT\t0\t2718\t0\t0\t0\t716\t0\t0.263429\t4206" From 9ef09e6add67b9125c69c989c9d2b1f426ad6244 Mon Sep 17 00:00:00 2001 From: SusiJo Date: Tue, 31 May 2022 14:54:39 +0200 Subject: [PATCH 018/114] adjusted newlines --- tests/modules/gatk4/markduplicatesspark/test.yml | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/tests/modules/gatk4/markduplicatesspark/test.yml b/tests/modules/gatk4/markduplicatesspark/test.yml index a240c65e..ea7f7d4b 100644 --- a/tests/modules/gatk4/markduplicatesspark/test.yml +++ b/tests/modules/gatk4/markduplicatesspark/test.yml @@ -25,7 +25,4 @@ - path: output/gatk4/test.bam md5sum: 898cb0a6616897d8ada90bab53bf0837 - path: output/gatk4/test.metrics - contains: "## METRICS CLASS\torg.broadinstitute.hellbender.utils.read.markduplicates.GATKDuplicationMetrics - LIBRARY\tUNPAIRED_READS_EXAMINED READ_PAIRS_EXAMINED\tSECONDARY_OR_SUPPLEMENTARY_RDS\tUNMAPPED_READS\tUNPAIRED_READ_DUPLICATES\tREAD_PAIR_DUPLICATES\tREAD_PAIR_OPTICAL_DUPLICATESPERCENT_DUPLICATION\tESTIMATED_LIBRARY_SIZE - testN\t0\t2820\t2\t2\t0\t828\t0\t0.293617\t3807 - testT\t0\t2718\t0\t0\t0\t716\t0\t0.263429\t4206" + contains: "## METRICS CLASS\torg.broadinstitute.hellbender.utils.read.markduplicates.GATKDuplicationMetrics\nLIBRARY\tUNPAIRED_READS_EXAMINED READ_PAIRS_EXAMINED\tSECONDARY_OR_SUPPLEMENTARY_RDS\tUNMAPPED_READS\tUNPAIRED_READ_DUPLICATES\tREAD_PAIR_DUPLICATES\tREAD_PAIR_OPTICAL_DUPLICATESPERCENT_DUPLICATION\tESTIMATED_LIBRARY_SIZE\ntestN\t0\t2820\t2\t2\t0\t828\t0\t0.293617\t3807\ntestT\t0\t2718\t0\t0\t0\t716\t0\t0.263429\t4206" From 37b543e3e70c5474bbdcdb834bf9d51c52e743ee Mon Sep 17 00:00:00 2001 From: SusiJo Date: Tue, 31 May 2022 15:29:27 +0200 Subject: [PATCH 019/114] changed to tab --- tests/modules/gatk4/markduplicatesspark/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/modules/gatk4/markduplicatesspark/test.yml b/tests/modules/gatk4/markduplicatesspark/test.yml index ea7f7d4b..5ee751c4 100644 --- a/tests/modules/gatk4/markduplicatesspark/test.yml +++ b/tests/modules/gatk4/markduplicatesspark/test.yml @@ -25,4 +25,4 @@ - path: output/gatk4/test.bam md5sum: 898cb0a6616897d8ada90bab53bf0837 - path: output/gatk4/test.metrics - contains: "## METRICS CLASS\torg.broadinstitute.hellbender.utils.read.markduplicates.GATKDuplicationMetrics\nLIBRARY\tUNPAIRED_READS_EXAMINED READ_PAIRS_EXAMINED\tSECONDARY_OR_SUPPLEMENTARY_RDS\tUNMAPPED_READS\tUNPAIRED_READ_DUPLICATES\tREAD_PAIR_DUPLICATES\tREAD_PAIR_OPTICAL_DUPLICATESPERCENT_DUPLICATION\tESTIMATED_LIBRARY_SIZE\ntestN\t0\t2820\t2\t2\t0\t828\t0\t0.293617\t3807\ntestT\t0\t2718\t0\t0\t0\t716\t0\t0.263429\t4206" + contains: "## METRICS CLASS org.broadinstitute.hellbender.utils.read.markduplicates.GATKDuplicationMetrics" From 35e296cd7d52225fbceb4f928c86f1433203835e Mon Sep 17 00:00:00 2001 From: SusiJo Date: Tue, 31 May 2022 15:35:34 +0200 Subject: [PATCH 020/114] added brackets --- tests/modules/gatk4/markduplicatesspark/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/modules/gatk4/markduplicatesspark/test.yml b/tests/modules/gatk4/markduplicatesspark/test.yml index 5ee751c4..25c8ec38 100644 --- a/tests/modules/gatk4/markduplicatesspark/test.yml +++ b/tests/modules/gatk4/markduplicatesspark/test.yml @@ -25,4 +25,4 @@ - path: output/gatk4/test.bam md5sum: 898cb0a6616897d8ada90bab53bf0837 - path: output/gatk4/test.metrics - contains: "## METRICS CLASS org.broadinstitute.hellbender.utils.read.markduplicates.GATKDuplicationMetrics" + contains: '[ ## METRICS CLASS org.broadinstitute.hellbender.utils.read.markduplicates.GATKDuplicationMetrics ]' From 941760f75d04b863d0534c210b41505d64972e95 Mon Sep 17 00:00:00 2001 From: SusiJo Date: Tue, 31 May 2022 15:36:02 +0200 Subject: [PATCH 021/114] ran prettier --- tests/modules/gatk4/markduplicatesspark/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/modules/gatk4/markduplicatesspark/test.yml b/tests/modules/gatk4/markduplicatesspark/test.yml index 25c8ec38..9c235d7a 100644 --- a/tests/modules/gatk4/markduplicatesspark/test.yml +++ b/tests/modules/gatk4/markduplicatesspark/test.yml @@ -25,4 +25,4 @@ - path: output/gatk4/test.bam md5sum: 898cb0a6616897d8ada90bab53bf0837 - path: output/gatk4/test.metrics - contains: '[ ## METRICS CLASS org.broadinstitute.hellbender.utils.read.markduplicates.GATKDuplicationMetrics ]' + contains: "[ ## METRICS CLASS org.broadinstitute.hellbender.utils.read.markduplicates.GATKDuplicationMetrics ]" From 58389e550d9d091be164ac5fef440edb286e52c0 Mon Sep 17 00:00:00 2001 From: SusiJo Date: Tue, 31 May 2022 15:41:50 +0200 Subject: [PATCH 022/114] test json array --- tests/modules/gatk4/markduplicatesspark/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/modules/gatk4/markduplicatesspark/test.yml b/tests/modules/gatk4/markduplicatesspark/test.yml index 9c235d7a..24ff6d84 100644 --- a/tests/modules/gatk4/markduplicatesspark/test.yml +++ b/tests/modules/gatk4/markduplicatesspark/test.yml @@ -25,4 +25,4 @@ - path: output/gatk4/test.bam md5sum: 898cb0a6616897d8ada90bab53bf0837 - path: output/gatk4/test.metrics - contains: "[ ## METRICS CLASS org.broadinstitute.hellbender.utils.read.markduplicates.GATKDuplicationMetrics ]" + contains: "[ "## METRICS CLASS", "org.broadinstitute.hellbender.utils.read.markduplicates.GATKDuplicationMetrics" ]" From 84f584875e542e6b624ac751034af2ddab7860ef Mon Sep 17 00:00:00 2001 From: SusiJo Date: Tue, 31 May 2022 15:50:31 +0200 Subject: [PATCH 023/114] change string array --- tests/modules/gatk4/markduplicatesspark/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/modules/gatk4/markduplicatesspark/test.yml b/tests/modules/gatk4/markduplicatesspark/test.yml index 24ff6d84..21323ada 100644 --- a/tests/modules/gatk4/markduplicatesspark/test.yml +++ b/tests/modules/gatk4/markduplicatesspark/test.yml @@ -25,4 +25,4 @@ - path: output/gatk4/test.bam md5sum: 898cb0a6616897d8ada90bab53bf0837 - path: output/gatk4/test.metrics - contains: "[ "## METRICS CLASS", "org.broadinstitute.hellbender.utils.read.markduplicates.GATKDuplicationMetrics" ]" + contains: ["## METRICS CLASS", "org.broadinstitute.hellbender.utils.read.markduplicates.GATKDuplicationMetrics"] From 80366924fdd0b0183cebe2258f25807db5b93c10 Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Tue, 31 May 2022 16:08:13 +0200 Subject: [PATCH 024/114] added gatk4/composestrtablefile --- modules/gatk4/composestrtablefile/main.nf | 51 +++++++++++++++++++ modules/gatk4/composestrtablefile/meta.yml | 43 ++++++++++++++++ tests/config/pytest_modules.yml | 4 ++ .../modules/gatk4/composestrtablefile/main.nf | 16 ++++++ .../gatk4/composestrtablefile/nextflow.config | 5 ++ .../gatk4/composestrtablefile/test.yml | 8 +++ 6 files changed, 127 insertions(+) create mode 100644 modules/gatk4/composestrtablefile/main.nf create mode 100644 modules/gatk4/composestrtablefile/meta.yml create mode 100644 tests/modules/gatk4/composestrtablefile/main.nf create mode 100644 tests/modules/gatk4/composestrtablefile/nextflow.config create mode 100644 tests/modules/gatk4/composestrtablefile/test.yml diff --git a/modules/gatk4/composestrtablefile/main.nf b/modules/gatk4/composestrtablefile/main.nf new file mode 100644 index 00000000..a1114f2a --- /dev/null +++ b/modules/gatk4/composestrtablefile/main.nf @@ -0,0 +1,51 @@ +process GATK4_COMPOSESTRTABLEFILE { + tag "$fasta" + label 'process_low' + + conda (params.enable_conda ? "bioconda::gatk4=4.2.6.1" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gatk4:4.2.6.1--hdfd78af_0': + 'quay.io/biocontainers/gatk4:4.2.6.1--hdfd78af_0' }" + + input: + tuple path(fasta), path(fasta_fai), path(dict) + + output: + path "*.zip" , emit: str_table + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + + def avail_mem = 6 + if (!task.memory) { + log.info '[GATK ComposeSTRTableFile] Available memory not known - defaulting to 6GB. Specify process memory requirements to change this.' + } else { + avail_mem = task.memory.giga + } + """ + gatk --java-options "-Xmx${avail_mem}g" ComposeSTRTableFile \\ + --reference $fasta \\ + --output ${fasta.baseName}.zip \\ + --tmp-dir . \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ + + stub: + """ + touch test.zip + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/gatk4/composestrtablefile/meta.yml b/modules/gatk4/composestrtablefile/meta.yml new file mode 100644 index 00000000..eb825ef4 --- /dev/null +++ b/modules/gatk4/composestrtablefile/meta.yml @@ -0,0 +1,43 @@ +name: "gatk4_composestrtablefile" +description: This tool looks for low-complexity STR sequences along the reference that are later used to estimate the Dragstr model during single sample auto calibration CalibrateDragstrModel. +keywords: + - gatk4 + - composestrtablefile +tools: + - gatk4: + description: + Genome Analysis Toolkit (GATK4). Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/articles/4405451249819-ComposeSTRTableFile + tool_dev_url: https://github.com/broadinstitute/gatk + doi: 10.1158/1538-7445.AM2017-3590 + licence: ["Apache-2.0"] + +input: + - fasta: + type: file + description: FASTA reference file + pattern: "*.{fasta,fa}" + - fasta_fai: + type: file + description: index of the FASTA reference file + pattern: "*.fai" + - dict: + type: file + description: Sequence dictionary of the FASTA reference file + pattern: "*.dict" + +output: + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - str_table: + type: file + description: A zipped folder containing the STR table files + pattern: "*.zip" + +authors: + - "@nvnieuwk" diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index 3ff58b5c..bf4a6137 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -751,6 +751,10 @@ gatk4/combinegvcfs: - modules/gatk4/combinegvcfs/** - tests/modules/gatk4/combinegvcfs/** +gatk4/composestrtablefile: + - modules/gatk4/composestrtablefile/** + - tests/modules/gatk4/composestrtablefile/** + gatk4/createsequencedictionary: - modules/gatk4/createsequencedictionary/** - tests/modules/gatk4/createsequencedictionary/** diff --git a/tests/modules/gatk4/composestrtablefile/main.nf b/tests/modules/gatk4/composestrtablefile/main.nf new file mode 100644 index 00000000..9fc2e5f5 --- /dev/null +++ b/tests/modules/gatk4/composestrtablefile/main.nf @@ -0,0 +1,16 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { GATK4_COMPOSESTRTABLEFILE } from '../../../../modules/gatk4/composestrtablefile/main.nf' + +workflow test_gatk4_composestrtablefile { + + input = [ + file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true), + file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true), + file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) + ] + + GATK4_COMPOSESTRTABLEFILE ( input ) +} diff --git a/tests/modules/gatk4/composestrtablefile/nextflow.config b/tests/modules/gatk4/composestrtablefile/nextflow.config new file mode 100644 index 00000000..50f50a7a --- /dev/null +++ b/tests/modules/gatk4/composestrtablefile/nextflow.config @@ -0,0 +1,5 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + +} \ No newline at end of file diff --git a/tests/modules/gatk4/composestrtablefile/test.yml b/tests/modules/gatk4/composestrtablefile/test.yml new file mode 100644 index 00000000..3bafb348 --- /dev/null +++ b/tests/modules/gatk4/composestrtablefile/test.yml @@ -0,0 +1,8 @@ +- name: gatk4 composestrtablefile test_gatk4_composestrtablefile + command: nextflow run ./tests/modules/gatk4/composestrtablefile -entry test_gatk4_composestrtablefile -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/composestrtablefile/nextflow.config + tags: + - gatk4/composestrtablefile + - gatk4 + files: + - path: output/gatk4/genome.zip + contains: "[ # TODO nf-core: file md5sum was variable, please replace this text with a string found in the file instead ]" From ec41ea6082fcfaadcb8a6a4f02979144de86cd91 Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Tue, 31 May 2022 16:12:05 +0200 Subject: [PATCH 025/114] fix test.yml --- tests/modules/gatk4/composestrtablefile/test.yml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/modules/gatk4/composestrtablefile/test.yml b/tests/modules/gatk4/composestrtablefile/test.yml index 3bafb348..514ae79c 100644 --- a/tests/modules/gatk4/composestrtablefile/test.yml +++ b/tests/modules/gatk4/composestrtablefile/test.yml @@ -1,8 +1,7 @@ - name: gatk4 composestrtablefile test_gatk4_composestrtablefile command: nextflow run ./tests/modules/gatk4/composestrtablefile -entry test_gatk4_composestrtablefile -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/composestrtablefile/nextflow.config tags: - - gatk4/composestrtablefile - gatk4 + - gatk4/composestrtablefile files: - - path: output/gatk4/genome.zip - contains: "[ # TODO nf-core: file md5sum was variable, please replace this text with a string found in the file instead ]" + - path: output/gatk4/genome.zip \ No newline at end of file From 1166fb51505b39983af8ee16ecb382222eec22bb Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Tue, 31 May 2022 16:13:38 +0200 Subject: [PATCH 026/114] prettier --- tests/modules/gatk4/composestrtablefile/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/modules/gatk4/composestrtablefile/test.yml b/tests/modules/gatk4/composestrtablefile/test.yml index 514ae79c..597ae144 100644 --- a/tests/modules/gatk4/composestrtablefile/test.yml +++ b/tests/modules/gatk4/composestrtablefile/test.yml @@ -4,4 +4,4 @@ - gatk4 - gatk4/composestrtablefile files: - - path: output/gatk4/genome.zip \ No newline at end of file + - path: output/gatk4/genome.zip From c1ac3fbb592b8e19bd66110fbe77bd47785cce6d Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Tue, 31 May 2022 20:13:48 +0200 Subject: [PATCH 027/114] Skeleton --- modules/gatk/realignertargetcreator/main.nf | 75 +++++++++++++++++++ modules/gatk/realignertargetcreator/meta.yml | 51 +++++++++++++ tests/config/pytest_modules.yml | 4 + .../gatk/realignertargetcreator/main.nf | 15 ++++ .../realignertargetcreator/nextflow.config | 5 ++ .../gatk/realignertargetcreator/test.yml | 14 ++++ 6 files changed, 164 insertions(+) create mode 100644 modules/gatk/realignertargetcreator/main.nf create mode 100644 modules/gatk/realignertargetcreator/meta.yml create mode 100644 tests/modules/gatk/realignertargetcreator/main.nf create mode 100644 tests/modules/gatk/realignertargetcreator/nextflow.config create mode 100644 tests/modules/gatk/realignertargetcreator/test.yml diff --git a/modules/gatk/realignertargetcreator/main.nf b/modules/gatk/realignertargetcreator/main.nf new file mode 100644 index 00000000..59dee1a8 --- /dev/null +++ b/modules/gatk/realignertargetcreator/main.nf @@ -0,0 +1,75 @@ +// TODO nf-core: If in doubt look at other nf-core/modules to see how we are doing things! :) +// https://github.com/nf-core/modules/tree/master/modules +// You can also ask for help via your pull request or on the #modules channel on the nf-core Slack workspace: +// https://nf-co.re/join +// TODO nf-core: A module file SHOULD only define input and output files as command-line parameters. +// All other parameters MUST be provided using the "task.ext" directive, see here: +// https://www.nextflow.io/docs/latest/process.html#ext +// where "task.ext" is a string. +// Any parameters that need to be evaluated in the context of a particular sample +// e.g. single-end/paired-end data MUST also be defined and evaluated appropriately. +// TODO nf-core: Software that can be piped together SHOULD be added to separate module files +// unless there is a run-time, storage advantage in implementing in this way +// e.g. it's ok to have a single module for bwa to output BAM instead of SAM: +// bwa mem | samtools view -B -T ref.fasta +// TODO nf-core: Optional inputs are not currently supported by Nextflow. However, using an empty +// list (`[]`) instead of a file can be used to work around this issue. + +process GATK_REALIGNERTARGETCREATOR { + tag "$meta.id" + label 'process_low' + + // TODO nf-core: List required Conda package(s). + // Software MUST be pinned to channel (i.e. "bioconda"), version (i.e. "1.10"). + // For Conda, the build (i.e. "h9402c20_2") must be EXCLUDED to support installation on different operating systems. + // TODO nf-core: See section in main README for further information regarding finding and adding container addresses to the section below. + conda (params.enable_conda ? "bioconda::gatk=3.8" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gatk:3.8--hdfd78af_11': + 'quay.io/biocontainers/gatk:3.8--hdfd78af_11' }" + + input: + // TODO nf-core: Where applicable all sample-specific information e.g. "id", "single_end", "read_group" + // MUST be provided as an input via a Groovy Map called "meta". + // This information may not be required in some instances e.g. indexing reference genome files: + // https://github.com/nf-core/modules/blob/master/modules/bwa/index/main.nf + // TODO nf-core: Where applicable please provide/convert compressed files as input/output + // e.g. "*.fastq.gz" and NOT "*.fastq", "*.bam" and NOT "*.sam" etc. + tuple val(meta), path(bam) + + output: + // TODO nf-core: Named file extensions MUST be emitted for ALL output channels + tuple val(meta), path("*.bam"), emit: bam + // TODO nf-core: List additional required output channels/values here + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + // TODO nf-core: Where possible, a command MUST be provided to obtain the version number of the software e.g. 1.10 + // If the software is unable to output a version number on the command-line then it can be manually specified + // e.g. https://github.com/nf-core/modules/blob/master/modules/homer/annotatepeaks/main.nf + // Each software used MUST provide the software name and version number in the YAML version file (versions.yml) + // TODO nf-core: It MUST be possible to pass additional parameters to the tool as a command-line string via the "task.ext.args" directive + // TODO nf-core: If the tool supports multi-threading then you MUST provide the appropriate parameter + // using the Nextflow "task" variable e.g. "--threads $task.cpus" + // TODO nf-core: Please replace the example samtools command below with your module's command + // TODO nf-core: Please indent the command appropriately (4 spaces!!) to help with readability ;) + """ + samtools \\ + sort \\ + $args \\ + -@ $task.cpus \\ + -o ${prefix}.bam \\ + -T $prefix \\ + $bam + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//' )) + END_VERSIONS + """ +} diff --git a/modules/gatk/realignertargetcreator/meta.yml b/modules/gatk/realignertargetcreator/meta.yml new file mode 100644 index 00000000..a33db8f1 --- /dev/null +++ b/modules/gatk/realignertargetcreator/meta.yml @@ -0,0 +1,51 @@ +name: "gatk_realignertargetcreator" +## TODO nf-core: Add a description of the module and list keywords +description: write your description here +keywords: + - sort +tools: + - "gatk": + ## TODO nf-core: Add a description and other details for the software below + description: "The full Genome Analysis Toolkit (GATK) framework, license restricted." + homepage: "None" + documentation: "None" + tool_dev_url: "None" + doi: "" + licence: "['https://software.broadinstitute.org/gatk/download/licensing', 'https://www.broadinstitute.org/gatk/about/#licensing', 'BSD']" + +## TODO nf-core: Add a description of all of the variables used as input +input: + # Only when we have meta + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + # + ## TODO nf-core: Delete / customise this example input + - bam: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + +## TODO nf-core: Add a description of all of the variables used as output +output: + #Only when we have meta + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + # + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + ## TODO nf-core: Delete / customise this example output + - bam: + type: file + description: Sorted BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + +authors: + - "@jfy133" diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index 3ff58b5c..80dcb9ec 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -715,6 +715,10 @@ gamma/gamma: - modules/gamma/gamma/** - tests/modules/gamma/gamma/** +gatk/realignertargetcreator: + - modules/gatk/realignertargetcreator/** + - tests/modules/gatk/realignertargetcreator/** + gatk4/applybqsr: - modules/gatk4/applybqsr/** - tests/modules/gatk4/applybqsr/** diff --git a/tests/modules/gatk/realignertargetcreator/main.nf b/tests/modules/gatk/realignertargetcreator/main.nf new file mode 100644 index 00000000..63908069 --- /dev/null +++ b/tests/modules/gatk/realignertargetcreator/main.nf @@ -0,0 +1,15 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { GATK_REALIGNERTARGETCREATOR } from '../../../../modules/gatk/realignertargetcreator/main.nf' + +workflow test_gatk_realignertargetcreator { + + input = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['sarscov2']['illumina']['test_paired_end_bam'], checkIfExists: true) + ] + + GATK_REALIGNERTARGETCREATOR ( input ) +} diff --git a/tests/modules/gatk/realignertargetcreator/nextflow.config b/tests/modules/gatk/realignertargetcreator/nextflow.config new file mode 100644 index 00000000..50f50a7a --- /dev/null +++ b/tests/modules/gatk/realignertargetcreator/nextflow.config @@ -0,0 +1,5 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + +} \ No newline at end of file diff --git a/tests/modules/gatk/realignertargetcreator/test.yml b/tests/modules/gatk/realignertargetcreator/test.yml new file mode 100644 index 00000000..a45fbb93 --- /dev/null +++ b/tests/modules/gatk/realignertargetcreator/test.yml @@ -0,0 +1,14 @@ +## TODO nf-core: Please run the following command to build this file: +# nf-core modules create-test-yml gatk/realignertargetcreator +- name: "gatk realignertargetcreator" + command: nextflow run ./tests/modules/gatk/realignertargetcreator -entry test_gatk_realignertargetcreator -c ./tests/config/nextflow.config -c ./tests/modules/gatk/realignertargetcreator/nextflow.config + tags: + - "gatk" + # + - "gatk/realignertargetcreator" + # + files: + - path: "output/gatk/test.bam" + md5sum: e667c7caad0bc4b7ac383fd023c654fc + - path: output/gatk/versions.yml + md5sum: a01fe51bc4c6a3a6226fbf77b2c7cf3b From d39ba08e02b78001f851b737bc832203a60dc7f1 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Tue, 31 May 2022 20:19:49 +0200 Subject: [PATCH 028/114] Prepare main module code --- modules/gatk/realignertargetcreator/main.nf | 64 ++++++--------------- 1 file changed, 16 insertions(+), 48 deletions(-) diff --git a/modules/gatk/realignertargetcreator/main.nf b/modules/gatk/realignertargetcreator/main.nf index 59dee1a8..76ac2dd7 100644 --- a/modules/gatk/realignertargetcreator/main.nf +++ b/modules/gatk/realignertargetcreator/main.nf @@ -1,46 +1,19 @@ -// TODO nf-core: If in doubt look at other nf-core/modules to see how we are doing things! :) -// https://github.com/nf-core/modules/tree/master/modules -// You can also ask for help via your pull request or on the #modules channel on the nf-core Slack workspace: -// https://nf-co.re/join -// TODO nf-core: A module file SHOULD only define input and output files as command-line parameters. -// All other parameters MUST be provided using the "task.ext" directive, see here: -// https://www.nextflow.io/docs/latest/process.html#ext -// where "task.ext" is a string. -// Any parameters that need to be evaluated in the context of a particular sample -// e.g. single-end/paired-end data MUST also be defined and evaluated appropriately. -// TODO nf-core: Software that can be piped together SHOULD be added to separate module files -// unless there is a run-time, storage advantage in implementing in this way -// e.g. it's ok to have a single module for bwa to output BAM instead of SAM: -// bwa mem | samtools view -B -T ref.fasta -// TODO nf-core: Optional inputs are not currently supported by Nextflow. However, using an empty -// list (`[]`) instead of a file can be used to work around this issue. - process GATK_REALIGNERTARGETCREATOR { tag "$meta.id" label 'process_low' - // TODO nf-core: List required Conda package(s). - // Software MUST be pinned to channel (i.e. "bioconda"), version (i.e. "1.10"). - // For Conda, the build (i.e. "h9402c20_2") must be EXCLUDED to support installation on different operating systems. - // TODO nf-core: See section in main README for further information regarding finding and adding container addresses to the section below. - conda (params.enable_conda ? "bioconda::gatk=3.8" : null) + conda (params.enable_conda ? "bioconda::gatk=3.5" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk:3.8--hdfd78af_11': - 'quay.io/biocontainers/gatk:3.8--hdfd78af_11' }" + 'https://depot.galaxyproject.org/singularity/gatk:3.5--hdfd78af_11': + 'quay.io/biocontainers/gatk:3.5--hdfd78af_11' }" input: - // TODO nf-core: Where applicable all sample-specific information e.g. "id", "single_end", "read_group" - // MUST be provided as an input via a Groovy Map called "meta". - // This information may not be required in some instances e.g. indexing reference genome files: - // https://github.com/nf-core/modules/blob/master/modules/bwa/index/main.nf - // TODO nf-core: Where applicable please provide/convert compressed files as input/output - // e.g. "*.fastq.gz" and NOT "*.fastq", "*.bam" and NOT "*.sam" etc. tuple val(meta), path(bam) + tuple val(meta), path(reference) + tuple val(meta), path(known_vcf) output: - // TODO nf-core: Named file extensions MUST be emitted for ALL output channels tuple val(meta), path("*.bam"), emit: bam - // TODO nf-core: List additional required output channels/values here path "versions.yml" , emit: versions when: @@ -49,27 +22,22 @@ process GATK_REALIGNERTARGETCREATOR { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - // TODO nf-core: Where possible, a command MUST be provided to obtain the version number of the software e.g. 1.10 - // If the software is unable to output a version number on the command-line then it can be manually specified - // e.g. https://github.com/nf-core/modules/blob/master/modules/homer/annotatepeaks/main.nf - // Each software used MUST provide the software name and version number in the YAML version file (versions.yml) - // TODO nf-core: It MUST be possible to pass additional parameters to the tool as a command-line string via the "task.ext.args" directive - // TODO nf-core: If the tool supports multi-threading then you MUST provide the appropriate parameter - // using the Nextflow "task" variable e.g. "--threads $task.cpus" - // TODO nf-core: Please replace the example samtools command below with your module's command - // TODO nf-core: Please indent the command appropriately (4 spaces!!) to help with readability ;) + def known = known_vcf ? "-known ${known_vcf}" ? "" + if ("$bam" == "${prefix}.bam") error "Input and output names are the same, set prefix in module configuration to disambiguate!" + """ - samtools \\ - sort \\ - $args \\ - -@ $task.cpus \\ + gatk3 \\ + -T RealigerTargetCreator \\ + -nt ${task.cpus} + -I ${bam} \\ + -R ${reference} \\ -o ${prefix}.bam \\ - -T $prefix \\ - $bam + ${known} \\ + $args cat <<-END_VERSIONS > versions.yml "${task.process}": - gatk: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//' )) + gatk: \$(echo \$(gatk3 --version)) END_VERSIONS """ } From 7633d7816b10d8bd1fb97a748ee1006dd0ea5d09 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Tue, 31 May 2022 20:35:54 +0200 Subject: [PATCH 029/114] Add GATK(3)/realignertargetcreator --- modules/gatk/realignertargetcreator/main.nf | 20 +++---- modules/gatk/realignertargetcreator/meta.yml | 57 ++++++++++++------- .../gatk/realignertargetcreator/main.nf | 12 +++- .../gatk/realignertargetcreator/test.yml | 18 ++---- 4 files changed, 60 insertions(+), 47 deletions(-) diff --git a/modules/gatk/realignertargetcreator/main.nf b/modules/gatk/realignertargetcreator/main.nf index 76ac2dd7..e3a03a5f 100644 --- a/modules/gatk/realignertargetcreator/main.nf +++ b/modules/gatk/realignertargetcreator/main.nf @@ -8,13 +8,13 @@ process GATK_REALIGNERTARGETCREATOR { 'quay.io/biocontainers/gatk:3.5--hdfd78af_11' }" input: - tuple val(meta), path(bam) - tuple val(meta), path(reference) - tuple val(meta), path(known_vcf) + tuple val(meta), path(bam), path(bai) + tuple path(fasta), path(fasta_fai), path(fasta_dict) + path(known_vcf) output: - tuple val(meta), path("*.bam"), emit: bam - path "versions.yml" , emit: versions + tuple val(meta), path("*.intervals"), emit: intervals + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -22,16 +22,16 @@ process GATK_REALIGNERTARGETCREATOR { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def known = known_vcf ? "-known ${known_vcf}" ? "" + def known = known_vcf ? "-known ${known_vcf}" : "" if ("$bam" == "${prefix}.bam") error "Input and output names are the same, set prefix in module configuration to disambiguate!" """ gatk3 \\ - -T RealigerTargetCreator \\ - -nt ${task.cpus} + -T RealignerTargetCreator \\ + -nt ${task.cpus} \\ -I ${bam} \\ - -R ${reference} \\ - -o ${prefix}.bam \\ + -R ${fasta} \\ + -o ${prefix}.intervals \\ ${known} \\ $args diff --git a/modules/gatk/realignertargetcreator/meta.yml b/modules/gatk/realignertargetcreator/meta.yml index a33db8f1..70df7b78 100644 --- a/modules/gatk/realignertargetcreator/meta.yml +++ b/modules/gatk/realignertargetcreator/meta.yml @@ -1,51 +1,64 @@ name: "gatk_realignertargetcreator" -## TODO nf-core: Add a description of the module and list keywords -description: write your description here +description: Generates a list of locations that should be considered for local realignment prior genotyping. keywords: - - sort + - bam + - vcf + - variant calling + - indel + - realignment + - targets tools: - "gatk": - ## TODO nf-core: Add a description and other details for the software below description: "The full Genome Analysis Toolkit (GATK) framework, license restricted." - homepage: "None" - documentation: "None" - tool_dev_url: "None" - doi: "" - licence: "['https://software.broadinstitute.org/gatk/download/licensing', 'https://www.broadinstitute.org/gatk/about/#licensing', 'BSD']" + homepage: "https://gatk.broadinstitute.org/hc/en-us" + documentation: "https://github.com/broadinstitute/gatk-docs" + licence: "['https://software.broadinstitute.org/gatk/download/licensing', 'BSD', 'https://www.broadinstitute.org/gatk/about/#licensing']" -## TODO nf-core: Add a description of all of the variables used as input input: - # Only when we have meta - meta: type: map description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - # - ## TODO nf-core: Delete / customise this example input - bam: type: file - description: BAM/CRAM/SAM file - pattern: "*.{bam,cram,sam}" + description: Sorted and indexed BAM/CRAM/SAM file + pattern: "*.bam" + - bai: + type: file + description: BAM index file + pattern: "*.bai" + - fasta: + type: file + description: Reference file used to generate BAM file + pattern: ".{fasta,fa,fna}" + - fasta_fai: + type: file + description: Index of reference file used to generate BAM file + pattern: ".fai" + - dict: + type: file + description: GATK dict file for reference + pattern: ".dict" + - known_vcf: + type: file + description: Optional input VCF file(s) with known indels + pattern: ".vcf" -## TODO nf-core: Add a description of all of the variables used as output output: - #Only when we have meta - meta: type: map description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - # - versions: type: file description: File containing software versions pattern: "versions.yml" - ## TODO nf-core: Delete / customise this example output - - bam: + - intervals: type: file - description: Sorted BAM/CRAM/SAM file - pattern: "*.{bam,cram,sam}" + description: File containg intervals that represent sites of extant and potential indels. + pattern: "*.intervals" authors: - "@jfy133" diff --git a/tests/modules/gatk/realignertargetcreator/main.nf b/tests/modules/gatk/realignertargetcreator/main.nf index 63908069..02e62d93 100644 --- a/tests/modules/gatk/realignertargetcreator/main.nf +++ b/tests/modules/gatk/realignertargetcreator/main.nf @@ -5,11 +5,17 @@ nextflow.enable.dsl = 2 include { GATK_REALIGNERTARGETCREATOR } from '../../../../modules/gatk/realignertargetcreator/main.nf' workflow test_gatk_realignertargetcreator { - + input = [ [ id:'test', single_end:false ], // meta map - file(params.test_data['sarscov2']['illumina']['test_paired_end_bam'], checkIfExists: true) + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true) + ] + reference = [ + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true), + file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true), + file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true), ] - GATK_REALIGNERTARGETCREATOR ( input ) + GATK_REALIGNERTARGETCREATOR ( input, reference, [] ) } diff --git a/tests/modules/gatk/realignertargetcreator/test.yml b/tests/modules/gatk/realignertargetcreator/test.yml index a45fbb93..0e247013 100644 --- a/tests/modules/gatk/realignertargetcreator/test.yml +++ b/tests/modules/gatk/realignertargetcreator/test.yml @@ -1,14 +1,8 @@ -## TODO nf-core: Please run the following command to build this file: -# nf-core modules create-test-yml gatk/realignertargetcreator -- name: "gatk realignertargetcreator" - command: nextflow run ./tests/modules/gatk/realignertargetcreator -entry test_gatk_realignertargetcreator -c ./tests/config/nextflow.config -c ./tests/modules/gatk/realignertargetcreator/nextflow.config +- name: gatk realignertargetcreator test_gatk_realignertargetcreator + command: nextflow run ./tests/modules/gatk/realignertargetcreator -entry test_gatk_realignertargetcreator -c ./tests/config/nextflow.config -c ./tests/modules/gatk/realignertargetcreator/nextflow.config tags: - - "gatk" - # - - "gatk/realignertargetcreator" - # + - gatk + - gatk/realignertargetcreator files: - - path: "output/gatk/test.bam" - md5sum: e667c7caad0bc4b7ac383fd023c654fc - - path: output/gatk/versions.yml - md5sum: a01fe51bc4c6a3a6226fbf77b2c7cf3b + - path: output/gatk/test.intervals + md5sum: 7aa7a1b235a510e6591e262382086bf8 From 5b74a179c3323d683065e820b2fb5a80fdaedcb7 Mon Sep 17 00:00:00 2001 From: nvnieuwk <101190534+nvnieuwk@users.noreply.github.com> Date: Wed, 1 Jun 2022 09:14:08 +0200 Subject: [PATCH 030/114] Update modules/gatk4/composestrtablefile/main.nf Co-authored-by: FriederikeHanssen --- modules/gatk4/composestrtablefile/main.nf | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/modules/gatk4/composestrtablefile/main.nf b/modules/gatk4/composestrtablefile/main.nf index a1114f2a..dae61166 100644 --- a/modules/gatk4/composestrtablefile/main.nf +++ b/modules/gatk4/composestrtablefile/main.nf @@ -8,7 +8,9 @@ process GATK4_COMPOSESTRTABLEFILE { 'quay.io/biocontainers/gatk4:4.2.6.1--hdfd78af_0' }" input: - tuple path(fasta), path(fasta_fai), path(dict) + path(fasta) + path(fasta_fai) + path(dict) output: path "*.zip" , emit: str_table From d4f3df4677fb0aff66db16c896e59fc4ef93a01b Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Wed, 1 Jun 2022 09:17:05 +0200 Subject: [PATCH 031/114] Updated the test --- tests/modules/gatk4/composestrtablefile/main.nf | 14 ++++++++++---- tests/modules/gatk4/composestrtablefile/test.yml | 2 +- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/tests/modules/gatk4/composestrtablefile/main.nf b/tests/modules/gatk4/composestrtablefile/main.nf index 9fc2e5f5..da8967cc 100644 --- a/tests/modules/gatk4/composestrtablefile/main.nf +++ b/tests/modules/gatk4/composestrtablefile/main.nf @@ -6,11 +6,17 @@ include { GATK4_COMPOSESTRTABLEFILE } from '../../../../modules/gatk4/composestr workflow test_gatk4_composestrtablefile { - input = [ - file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true), - file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true), + fasta = [ + file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + ] + + fasta_fai = [ + file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) + ] + + dict = [ file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) ] - GATK4_COMPOSESTRTABLEFILE ( input ) + GATK4_COMPOSESTRTABLEFILE ( fasta, fasta_fai, dict ) } diff --git a/tests/modules/gatk4/composestrtablefile/test.yml b/tests/modules/gatk4/composestrtablefile/test.yml index 597ae144..0a87372a 100644 --- a/tests/modules/gatk4/composestrtablefile/test.yml +++ b/tests/modules/gatk4/composestrtablefile/test.yml @@ -1,7 +1,7 @@ - name: gatk4 composestrtablefile test_gatk4_composestrtablefile command: nextflow run ./tests/modules/gatk4/composestrtablefile -entry test_gatk4_composestrtablefile -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/composestrtablefile/nextflow.config tags: - - gatk4 - gatk4/composestrtablefile + - gatk4 files: - path: output/gatk4/genome.zip From 8470f3cb7e790701401523c58a72e183ac47ecac Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Wed, 1 Jun 2022 09:20:20 +0200 Subject: [PATCH 032/114] linting --- tests/modules/gatk4/composestrtablefile/main.nf | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/tests/modules/gatk4/composestrtablefile/main.nf b/tests/modules/gatk4/composestrtablefile/main.nf index da8967cc..8fccadb9 100644 --- a/tests/modules/gatk4/composestrtablefile/main.nf +++ b/tests/modules/gatk4/composestrtablefile/main.nf @@ -6,17 +6,11 @@ include { GATK4_COMPOSESTRTABLEFILE } from '../../../../modules/gatk4/composestr workflow test_gatk4_composestrtablefile { - fasta = [ - file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) - ] + fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) - fasta_fai = [ - file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) - ] + fasta_fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) - dict = [ - file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) - ] + dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) GATK4_COMPOSESTRTABLEFILE ( fasta, fasta_fai, dict ) } From 114a54c8d5a8e898a126c2804e3e221286eb2682 Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Wed, 1 Jun 2022 09:22:09 +0200 Subject: [PATCH 033/114] spaces fix --- modules/gatk4/composestrtablefile/main.nf | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/gatk4/composestrtablefile/main.nf b/modules/gatk4/composestrtablefile/main.nf index dae61166..8f2f00f2 100644 --- a/modules/gatk4/composestrtablefile/main.nf +++ b/modules/gatk4/composestrtablefile/main.nf @@ -8,9 +8,9 @@ process GATK4_COMPOSESTRTABLEFILE { 'quay.io/biocontainers/gatk4:4.2.6.1--hdfd78af_0' }" input: - path(fasta) - path(fasta_fai) - path(dict) + path(fasta) + path(fasta_fai) + path(dict) output: path "*.zip" , emit: str_table From f6953b51478d24a4caf84b40d92bba3350a38507 Mon Sep 17 00:00:00 2001 From: SusiJo Date: Wed, 1 Jun 2022 17:35:02 +0200 Subject: [PATCH 034/114] adjust cnvkit/batch for germline mode --- modules/cnvkit/batch/main.nf | 38 +++++++++++------ tests/config/test_data.config | 6 +-- tests/modules/cnvkit/batch/main.nf | 22 ++++++++-- tests/modules/cnvkit/batch/test.yml | 63 ++++++++++++++++++++++++++--- 4 files changed, 104 insertions(+), 25 deletions(-) diff --git a/modules/cnvkit/batch/main.nf b/modules/cnvkit/batch/main.nf index c1f9ef87..b467c6f2 100644 --- a/modules/cnvkit/batch/main.nf +++ b/modules/cnvkit/batch/main.nf @@ -28,34 +28,48 @@ process CNVKIT_BATCH { script: def args = task.ext.args ?: '' - // execute samtools only when cram files are input, cnvkit runs natively on bam but is prohibitively slow - // input pair is assumed to have same extension if both exist - def is_cram = tumor.Extension == "cram" ? true : false - def tumor_out = is_cram ? tumor.BaseName + ".bam" : "${tumor}" + def tumor_exists = tumor ? true : false + def normal_exists = normal ? true : false + + // execute samtools only when cram files are input, cnvkit runs natively on bam but is prohibitively slow + def tumor_cram = tumor_exists && tumor.Extension == "cram" ? true : false + def normal_cram = normal_exists && normal.Extension == "cram" ? true : false + + def tumor_out = tumor_cram ? tumor.BaseName + ".bam" : "${tumor}" - // do not run samtools on normal samples in tumor_only mode - def normal_exists = normal ? true: false // tumor_only mode does not need fasta & target // instead it requires a pre-computed reference.cnn which is built from fasta & target def (normal_out, normal_args, fasta_args) = ["", "", ""] if (normal_exists){ def normal_prefix = normal.BaseName - normal_out = is_cram ? "${normal_prefix}" + ".bam" : "${normal}" - normal_args = normal_prefix ? "--normal $normal_out" : "" + normal_out = normal_cram ? "${normal_prefix}" + ".bam" : "${normal}" fasta_args = fasta ? "--fasta $fasta" : "" + + // germline mode + // normal samples must be input without a flag + // requires flag --normal to be empty + if(!tumor_exists){ + tumor_out = normal.BaseName + ".bam" + normal_args = "--normal " + } + // somatic mode + else { + normal_args = normal_prefix ? "--normal $normal_out" : "" + } } def target_args = targets ? "--targets $targets" : "" def reference_args = reference ? "--reference $reference" : "" """ - if $is_cram; then + if $tumor_cram; then samtools view -T $fasta $tumor -@ $task.cpus -o $tumor_out - if $normal_exists; then - samtools view -T $fasta $normal -@ $task.cpus -o $normal_out - fi fi + if $normal_cram; then + samtools view -T $fasta $normal -@ $task.cpus -o $normal_out + fi + cnvkit.py \\ batch \\ diff --git a/tests/config/test_data.config b/tests/config/test_data.config index cf7d45f6..bef6f0c9 100644 --- a/tests/config/test_data.config +++ b/tests/config/test_data.config @@ -23,6 +23,8 @@ params { test_bed12 = "${test_data_dir}/genomics/sarscov2/genome/bed/test.bed12" baits_bed = "${test_data_dir}/genomics/sarscov2/genome/bed/baits.bed" + reference_cnn = "${test_data_dir}/genomics/sarscov2/genome/cnn/reference.cnn" + kraken2 = "${test_data_dir}/genomics/sarscov2/genome/db/kraken2" kraken2_tar_gz = "${test_data_dir}/genomics/sarscov2/genome/db/kraken2.tar.gz" @@ -145,6 +147,7 @@ params { genome_21_multi_interval_bed_gz = "${test_data_dir}/genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.bed.gz" genome_21_multi_interval_bed_gz_tbi = "${test_data_dir}/genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.bed.gz.tbi" genome_21_chromosomes_dir = "${test_data_dir}/genomics/homo_sapiens/genome/chr21/sequence/chromosomes.tar.gz" + genome_21_reference_cnn = "${test_data_dir}/genomics/homo_sapiens/genome/chr21/sequence/reference_chr21.cnn" dbsnp_146_hg38_elsites = "${test_data_dir}/genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.elsites" dbsnp_146_hg38_vcf_gz = "${test_data_dir}/genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz" @@ -414,9 +417,6 @@ params { 'txt' { hello = "${test_data_dir}/generic/txt/hello.txt" } - 'cnn' { - reference = "${test_data_dir}/generic/cnn/reference.cnn" - } 'cooler'{ test_pairix_pair_gz = "${test_data_dir}/genomics/homo_sapiens/cooler/cload/hg19/hg19.GM12878-MboI.pairs.subsample.blksrt.txt.gz" test_pairix_pair_gz_px2 = "${test_data_dir}/genomics/homo_sapiens/cooler/cload/hg19/hg19.GM12878-MboI.pairs.subsample.blksrt.txt.gz.px2" diff --git a/tests/modules/cnvkit/batch/main.nf b/tests/modules/cnvkit/batch/main.nf index 988ed3ad..c93e3a00 100755 --- a/tests/modules/cnvkit/batch/main.nf +++ b/tests/modules/cnvkit/batch/main.nf @@ -5,6 +5,7 @@ nextflow.enable.dsl = 2 include { CNVKIT_BATCH as CNVKIT_HYBRID } from '../../../../modules/cnvkit/batch/main.nf' include { CNVKIT_BATCH as CNVKIT_WGS } from '../../../../modules/cnvkit/batch/main.nf' include { CNVKIT_BATCH as CNVKIT_TUMORONLY } from '../../../../modules/cnvkit/batch/main.nf' +include { CNVKIT_BATCH as CNVKIT_GERMLINE } from '../../../../modules/cnvkit/batch/main.nf' workflow test_cnvkit_hybrid { @@ -47,10 +48,10 @@ workflow test_cnvkit_tumoronly { input = [ [ id:'test'], // meta map - file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_bam'], checkIfExists: true), [] ] - reference = file(params.test_data['generic']['cnn']['reference'], checkIfExists: true) + reference = file(params.test_data['homo_sapiens']['genome']['genome_21_reference_cnn'], checkIfExists: true) CNVKIT_TUMORONLY ( input, [], [], reference ) } @@ -59,11 +60,24 @@ workflow test_cnvkit_tumoronly_cram { input = [ [ id:'test'], // meta map - file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_cram'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_cram'], checkIfExists: true), [] ] fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) - reference = file(params.test_data['generic']['cnn']['reference'], checkIfExists: true) + reference = file(params.test_data['homo_sapiens']['genome']['genome_21_reference_cnn'], checkIfExists: true) CNVKIT_TUMORONLY ( input, fasta, [], reference ) } + +workflow test_cnvkit_germline_cram { + + input = [ + [ id:'test'], // meta map + [], + file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_cram'], checkIfExists: true) + ] + fasta = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true) + targets = file(params.test_data['homo_sapiens']['genome']['genome_21_multi_interval_bed'], checkIfExists: true) + + CNVKIT_GERMLINE ( input, fasta, targets, []) +} diff --git a/tests/modules/cnvkit/batch/test.yml b/tests/modules/cnvkit/batch/test.yml index 00d6a767..928a08de 100755 --- a/tests/modules/cnvkit/batch/test.yml +++ b/tests/modules/cnvkit/batch/test.yml @@ -92,9 +92,22 @@ - cnvkit - cnvkit/batch files: - - path: output/cnvkit/reference.antitarget-tmp.bed - - path: output/cnvkit/reference.target-tmp.bed - md5sum: 26d25ff2d6c45b6d92169b3559c6acdb + - path: output/cnvkit/reference_chr21.antitarget-tmp.bed + md5sum: 3d4d20f9f23b39970865d29ef239d20b + - path: output/cnvkit/reference_chr21.target-tmp.bed + md5sum: 657b25dbda8516624efa8cb2cf3716ca + - path: output/cnvkit/test2.paired_end.recalibrated.sorted.antitargetcoverage.cnn + md5sum: 067115082c4af4b64d58c0dc3a3642e4 + - path: output/cnvkit/test2.paired_end.recalibrated.sorted.bintest.cns + md5sum: f6adc75a0a86b7a921eca1b79a394cb0 + - path: output/cnvkit/test2.paired_end.recalibrated.sorted.call.cns + md5sum: f7caeca04aba28b125ce26b511f42afb + - path: output/cnvkit/test2.paired_end.recalibrated.sorted.cnr + md5sum: d9bdb71ce807051369577ee7f807a40c + - path: output/cnvkit/test2.paired_end.recalibrated.sorted.cns + md5sum: 2b56aac606ba6183d018b30ca58afcec + - path: output/cnvkit/test2.paired_end.recalibrated.sorted.targetcoverage.cnn + md5sum: e6d0190c1c37ce6e41f76ca5b24ccca3 - name: cnvkit batch test_cnvkit_tumoronly_cram command: nextflow run ./tests/modules/cnvkit/batch -entry test_cnvkit_tumoronly_cram -c ./tests/config/nextflow.config -c ./tests/modules/cnvkit/batch/nextflow.config @@ -102,6 +115,44 @@ - cnvkit - cnvkit/batch files: - - path: output/cnvkit/reference.antitarget-tmp.bed - - path: output/cnvkit/reference.target-tmp.bed - md5sum: 26d25ff2d6c45b6d92169b3559c6acdb + - path: output/cnvkit/reference_chr21.antitarget-tmp.bed + md5sum: 3d4d20f9f23b39970865d29ef239d20b + - path: output/cnvkit/reference_chr21.target-tmp.bed + md5sum: 657b25dbda8516624efa8cb2cf3716ca + - path: output/cnvkit/test2.paired_end.recalibrated.sorted.antitargetcoverage.cnn + md5sum: 067115082c4af4b64d58c0dc3a3642e4 + - path: output/cnvkit/test2.paired_end.recalibrated.sorted.bintest.cns + md5sum: f6adc75a0a86b7a921eca1b79a394cb0 + - path: output/cnvkit/test2.paired_end.recalibrated.sorted.call.cns + md5sum: f7caeca04aba28b125ce26b511f42afb + - path: output/cnvkit/test2.paired_end.recalibrated.sorted.cnr + md5sum: d9bdb71ce807051369577ee7f807a40c + - path: output/cnvkit/test2.paired_end.recalibrated.sorted.cns + md5sum: 2b56aac606ba6183d018b30ca58afcec + - path: output/cnvkit/test2.paired_end.recalibrated.sorted.targetcoverage.cnn + md5sum: e6d0190c1c37ce6e41f76ca5b24ccca3 + +- name: cnvkit batch test_cnvkit_germline_cram + command: nextflow run ./tests/modules/cnvkit/batch -entry test_cnvkit_germline_cram -c ./tests/config/nextflow.config -c ./tests/modules/cnvkit/batch/nextflow.config + tags: + - cnvkit + - cnvkit/batch + files: + - path: output/cnvkit/multi_intervals.antitarget.bed + md5sum: 3d4d20f9f23b39970865d29ef239d20b + - path: output/cnvkit/multi_intervals.target.bed + md5sum: 86d30493bb2e619a93f4ebc2923d29f3 + - path: output/cnvkit/reference.cnn + md5sum: a09ee4be5dda1cf0f68073bdb3aad8ec + - path: output/cnvkit/test2.paired_end.recalibrated.sorted.antitargetcoverage.cnn + md5sum: 067115082c4af4b64d58c0dc3a3642e4 + - path: output/cnvkit/test2.paired_end.recalibrated.sorted.bintest.cns + md5sum: 68b62b75cd91b2ffe5633686fb943490 + - path: output/cnvkit/test2.paired_end.recalibrated.sorted.call.cns + md5sum: df196edd72613c59186f4d87df3dc4a4 + - path: output/cnvkit/test2.paired_end.recalibrated.sorted.cnr + md5sum: 3b4fc0cc73be78f978cfe2422470753e + - path: output/cnvkit/test2.paired_end.recalibrated.sorted.cns + md5sum: 4e67451dbcb6601fc3fa5dd7e570f1d4 + - path: output/cnvkit/test2.paired_end.recalibrated.sorted.targetcoverage.cnn + md5sum: b4a49faf170e436ec32dcc21ccc3ce8f From a21cfb666eed7517060f1d2d61a21c340753b0ec Mon Sep 17 00:00:00 2001 From: SusiJo Date: Thu, 2 Jun 2022 10:38:05 +0200 Subject: [PATCH 035/114] changed container + add cram2bam conversion --- modules/deeptools/bamcoverage/main.nf | 58 +++++++++++++++----- modules/deeptools/bamcoverage/meta.yml | 9 +++ tests/modules/deeptools/bamcoverage/main.nf | 18 +++++- tests/modules/deeptools/bamcoverage/test.yml | 21 ++++--- 4 files changed, 82 insertions(+), 24 deletions(-) diff --git a/modules/deeptools/bamcoverage/main.nf b/modules/deeptools/bamcoverage/main.nf index 926bf0ad..04073ed9 100644 --- a/modules/deeptools/bamcoverage/main.nf +++ b/modules/deeptools/bamcoverage/main.nf @@ -2,13 +2,15 @@ process DEEPTOOLS_BAMCOVERAGE { tag "$meta.id" label 'process_low' - conda (params.enable_conda ? "bioconda::deeptools=3.5.1" : null) + conda (params.enable_conda ? "bioconda::deeptools=3.5.1 bioconda::samtools=1.15.1" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/deeptools:3.5.1--py_0': - 'quay.io/biocontainers/deeptools:3.5.1--py_0' }" + 'https://depot.galaxyproject.org/singularity/mulled-v2-eb9e7907c7a753917c1e4d7a64384c047429618a:2c687053c0252667cca265c9f4118f2c205a604c-0': + 'quay.io/biocontainers/mulled-v2-eb9e7907c7a753917c1e4d7a64384c047429618a:2c687053c0252667cca265c9f4118f2c205a604c-0' }" input: tuple val(meta), path(input), path(input_index) + path(fasta) + path(fasta_fai) output: tuple val(meta), path("*.bigWig") , emit: bigwig, optional: true @@ -22,16 +24,44 @@ process DEEPTOOLS_BAMCOVERAGE { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}.bigWig" - """ - bamCoverage \\ - --bam $input \\ - $args \\ - --numberOfProcessors ${task.cpus} \\ - --outFileName ${prefix} + // cram_input is currently not working with deeptools + // therefore it's required to convert cram to bam first + def is_cram = input.Extension == "cram" ? true : false + def input_out = is_cram ? input.BaseName + ".bam" : "${input}" + def fai_reference = fasta_fai ? "--fai-reference ${fasta_fai}" : "" + + if (is_cram){ + """ + samtools view -T $fasta $input $fai_reference -@ $task.cpus -o $input_out + samtools index -b $input_out -@ $task.cpus + + bamCoverage \\ + --bam $input_out \\ + $args \\ + --numberOfProcessors ${task.cpus} \\ + --outFileName ${prefix} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + deeptools: \$(bamCoverage --version | sed -e "s/bamCoverage //g") + END_VERSIONS + """ + + } + else { + """ + bamCoverage \\ + --bam $input_out \\ + $args \\ + --numberOfProcessors ${task.cpus} \\ + --outFileName ${prefix} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + deeptools: \$(bamCoverage --version | sed -e "s/bamCoverage //g") + END_VERSIONS + """ + } - cat <<-END_VERSIONS > versions.yml - "${task.process}": - deeptools: \$(bamCoverage --version | sed -e "s/bamCoverage //g") - END_VERSIONS - """ } diff --git a/modules/deeptools/bamcoverage/meta.yml b/modules/deeptools/bamcoverage/meta.yml index fb92168f..c6566910 100644 --- a/modules/deeptools/bamcoverage/meta.yml +++ b/modules/deeptools/bamcoverage/meta.yml @@ -25,6 +25,14 @@ input: type: file description: BAM/CRAM index file pattern: "*.{bai,crai}" + - fasta: + type: file + description: Reference file the CRAM file was created with (required with CRAM input) + pattern: "*.{fasta,fa}" + - fasta_fai: + type: file + description: Index of the reference file (optional, but recommended) + pattern: "*.{fai}" output: - meta: @@ -47,3 +55,4 @@ output: authors: - "@FriederikeHanssen" + - "@SusiJo" diff --git a/tests/modules/deeptools/bamcoverage/main.nf b/tests/modules/deeptools/bamcoverage/main.nf index fb5c1c2d..97a79508 100644 --- a/tests/modules/deeptools/bamcoverage/main.nf +++ b/tests/modules/deeptools/bamcoverage/main.nf @@ -12,7 +12,7 @@ workflow test_deeptools_bamcoverage_bam { file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true) ] - DEEPTOOLS_BAMCOVERAGE ( input ) + DEEPTOOLS_BAMCOVERAGE ( input, [], [] ) } workflow test_deeptools_bamcoverage_cram { @@ -22,6 +22,20 @@ workflow test_deeptools_bamcoverage_cram { file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true), file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram_crai'], checkIfExists: true) ] + fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + fasta_fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) - DEEPTOOLS_BAMCOVERAGE ( input ) + DEEPTOOLS_BAMCOVERAGE ( input, fasta, fasta_fai) +} + +workflow test_deeptools_bamcoverage_cram_no_fasta_fai { + + input = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram_crai'], checkIfExists: true) + ] + fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + + DEEPTOOLS_BAMCOVERAGE ( input, fasta, []) } diff --git a/tests/modules/deeptools/bamcoverage/test.yml b/tests/modules/deeptools/bamcoverage/test.yml index 736c2e61..63ef6442 100644 --- a/tests/modules/deeptools/bamcoverage/test.yml +++ b/tests/modules/deeptools/bamcoverage/test.yml @@ -1,21 +1,26 @@ - name: deeptools bamcoverage test_deeptools_bamcoverage_bam - command: nextflow run tests/modules/deeptools/bamcoverage -entry test_deeptools_bamcoverage_bam -c tests/config/nextflow.config + command: nextflow run ./tests/modules/deeptools/bamcoverage -entry test_deeptools_bamcoverage_bam -c ./tests/config/nextflow.config -c ./tests/modules/deeptools/bamcoverage/nextflow.config tags: - - deeptools - deeptools/bamcoverage + - deeptools files: - path: output/deeptools/test.bigWig md5sum: 95fe9383a9e6c02aea6b785cf074274f - - path: output/deeptools/versions.yml - md5sum: 68c94e73b7a8c0935578bad61fea54c1 - name: deeptools bamcoverage test_deeptools_bamcoverage_cram - command: nextflow run tests/modules/deeptools/bamcoverage -entry test_deeptools_bamcoverage_cram -c tests/config/nextflow.config + command: nextflow run ./tests/modules/deeptools/bamcoverage -entry test_deeptools_bamcoverage_cram -c ./tests/config/nextflow.config -c ./tests/modules/deeptools/bamcoverage/nextflow.config tags: - - deeptools - deeptools/bamcoverage + - deeptools + files: + - path: output/deeptools/test.bigWig + md5sum: 95fe9383a9e6c02aea6b785cf074274f + +- name: deeptools bamcoverage test_deeptools_bamcoverage_cram_no_fasta_fai + command: nextflow run ./tests/modules/deeptools/bamcoverage -entry test_deeptools_bamcoverage_cram_no_fasta_fai -c ./tests/config/nextflow.config -c ./tests/modules/deeptools/bamcoverage/nextflow.config + tags: + - deeptools/bamcoverage + - deeptools files: - path: output/deeptools/test.bigWig md5sum: 95fe9383a9e6c02aea6b785cf074274f - - path: output/deeptools/versions.yml - md5sum: 665bbd2979c49bf3974a24bd44a88e94 From 51be617b1ca9bff973655eb899d591ed6ab253b5 Mon Sep 17 00:00:00 2001 From: Matthias De Smet <11850640+matthdsm@users.noreply.github.com> Date: Thu, 2 Jun 2022 11:09:55 +0200 Subject: [PATCH 036/114] fix untar output dir --- modules/untar/main.nf | 8 +++++++- tests/config/test_data.config | 2 ++ tests/modules/untar/main.nf | 10 ++++++++++ tests/modules/untar/test.yml | 12 ++++++++++-- 4 files changed, 29 insertions(+), 3 deletions(-) diff --git a/modules/untar/main.nf b/modules/untar/main.nf index 058d1764..29ab10a5 100644 --- a/modules/untar/main.nf +++ b/modules/untar/main.nf @@ -21,12 +21,18 @@ process UNTAR { def args = task.ext.args ?: '' def args2 = task.ext.args2 ?: '' untar = archive.toString() - '.tar.gz' + """ + mkdir output + tar \\ + -C output --strip-components 1 \\ -xzvf \\ $args \\ $archive \\ - $args2 \\ + $args2 + + mv output ${untar} cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/tests/config/test_data.config b/tests/config/test_data.config index 39331664..4af77d92 100644 --- a/tests/config/test_data.config +++ b/tests/config/test_data.config @@ -323,6 +323,8 @@ params { test_sv_vcf = "${test_data_dir}/genomics/homo_sapiens/illumina/vcf/sv_query.vcf.gz" test_pytor = "${test_data_dir}/genomics/homo_sapiens/illumina/pytor/test.pytor" + + test_flowcell = "${test_data_dir}/genomics/homo_sapiens/illumina/bcl/flowcell.tar.gz" } 'pacbio' { primers = "${test_data_dir}/genomics/homo_sapiens/pacbio/fasta/primers.fasta" diff --git a/tests/modules/untar/main.nf b/tests/modules/untar/main.nf index 9d6d4c6c..3b7daedd 100644 --- a/tests/modules/untar/main.nf +++ b/tests/modules/untar/main.nf @@ -12,3 +12,13 @@ workflow test_untar { UNTAR ( input ) } + + +workflow test_untar_different_output_path { + input = [ + [], + file(params.test_data['homo_sapiens']['illumina']['test_flowcell'], checkIfExists: true) + ] + + UNTAR ( input ) +} diff --git a/tests/modules/untar/test.yml b/tests/modules/untar/test.yml index 6d0d1d12..6599f105 100644 --- a/tests/modules/untar/test.yml +++ b/tests/modules/untar/test.yml @@ -1,5 +1,5 @@ -- name: untar - command: nextflow run ./tests/modules/untar -entry test_untar -c ./tests/config/nextflow.config -c ./tests/modules/untar/nextflow.config +- name: untar test_untar + command: nextflow run ./tests/modules/untar -entry test_untar -c ./tests/config/nextflow.config -c ./tests/modules/untar/nextflow.config tags: - untar files: @@ -9,3 +9,11 @@ md5sum: a033d00cf6759407010b21700938f543 - path: output/untar/kraken2/taxo.k2d md5sum: 094d5891cdccf2f1468088855c214b2c + +- name: untar test_untar_different_output_path + command: nextflow run ./tests/modules/untar -entry test_untar_different_output_path -c ./tests/config/nextflow.config -c ./tests/modules/untar/nextflow.config + tags: + - untar + files: + - path: output/untar/flowcell/RunInfo.xml + md5sum: 03038959f4dd181c86bc97ae71fe270a From 2f3ddc90fbeda9fcf54f25b84465f5ac149d2b75 Mon Sep 17 00:00:00 2001 From: SusiJo Date: Thu, 2 Jun 2022 12:57:50 +0200 Subject: [PATCH 037/114] added fasta_fai as input + changed if statements --- modules/cnvkit/batch/main.nf | 152 +++++++++++++++++++++++----- modules/cnvkit/batch/meta.yml | 4 + tests/modules/cnvkit/batch/main.nf | 44 +++++--- tests/modules/cnvkit/batch/test.yml | 49 ++++++--- 4 files changed, 200 insertions(+), 49 deletions(-) diff --git a/modules/cnvkit/batch/main.nf b/modules/cnvkit/batch/main.nf index b467c6f2..090f7860 100644 --- a/modules/cnvkit/batch/main.nf +++ b/modules/cnvkit/batch/main.nf @@ -10,6 +10,7 @@ process CNVKIT_BATCH { input: tuple val(meta), path(tumor), path(normal) path fasta + path fasta_fai path targets path reference @@ -34,12 +35,15 @@ process CNVKIT_BATCH { // execute samtools only when cram files are input, cnvkit runs natively on bam but is prohibitively slow def tumor_cram = tumor_exists && tumor.Extension == "cram" ? true : false def normal_cram = normal_exists && normal.Extension == "cram" ? true : false + def tumor_bam = tumor_exists && tumor.Extension == "bam" ? true : false + def normal_bam = normal_exists && normal.Extension == "bam" ? true : false def tumor_out = tumor_cram ? tumor.BaseName + ".bam" : "${tumor}" // tumor_only mode does not need fasta & target // instead it requires a pre-computed reference.cnn which is built from fasta & target def (normal_out, normal_args, fasta_args) = ["", "", ""] + def fai_reference = fasta_fai ? "--fai-reference ${fasta_fai}" : "" if (normal_exists){ def normal_prefix = normal.BaseName @@ -48,9 +52,9 @@ process CNVKIT_BATCH { // germline mode // normal samples must be input without a flag - // requires flag --normal to be empty + // requires flag --normal to be empty [] if(!tumor_exists){ - tumor_out = normal.BaseName + ".bam" + tumor_out = "${normal_prefix}" + ".bam" normal_args = "--normal " } // somatic mode @@ -62,28 +66,130 @@ process CNVKIT_BATCH { def target_args = targets ? "--targets $targets" : "" def reference_args = reference ? "--reference $reference" : "" - """ - if $tumor_cram; then - samtools view -T $fasta $tumor -@ $task.cpus -o $tumor_out - fi - if $normal_cram; then - samtools view -T $fasta $normal -@ $task.cpus -o $normal_out - fi + // somatic_mode cram_input + if (tumor_cram && normal_cram){ + """ + samtools view -T $fasta $fai_reference $tumor -@ $task.cpus -o $tumor_out + samtools view -T $fasta $fai_reference $normal -@ $task.cpus -o $normal_out + cnvkit.py \\ + batch \\ + $tumor_out \\ + $normal_args \\ + $fasta_args \\ + $reference_args \\ + $target_args \\ + --processes $task.cpus \\ + $args - cnvkit.py \\ - batch \\ - $tumor_out \\ - $normal_args \\ - $fasta_args \\ - $reference_args \\ - $target_args \\ - --processes $task.cpus \\ - $args + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + cnvkit: \$(cnvkit.py version | sed -e "s/cnvkit v//g") + END_VERSIONS + """ + } + // somatic_mode bam_input + else if (tumor_bam && normal_bam){ + """ + cnvkit.py \\ + batch \\ + $tumor_out \\ + $normal_args \\ + $fasta_args \\ + $reference_args \\ + $target_args \\ + --processes $task.cpus \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cnvkit: \$(cnvkit.py version | sed -e "s/cnvkit v//g") + END_VERSIONS + """ + } + // tumor_only_mode cram_input + else if(tumor_cram && !normal_exists){ + """ + samtools view -T $fasta $fai_reference $tumor -@ $task.cpus -o $tumor_out + + cnvkit.py \\ + batch \\ + $tumor_out \\ + $normal_args \\ + $fasta_args \\ + $reference_args \\ + $target_args \\ + --processes $task.cpus \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + cnvkit: \$(cnvkit.py version | sed -e "s/cnvkit v//g") + END_VERSIONS + """ + } + // tumor_only bam_input + else if(tumor_bam && !normal_exists){ + """ + cnvkit.py \\ + batch \\ + $tumor_out \\ + $normal_args \\ + $fasta_args \\ + $reference_args \\ + $target_args \\ + --processes $task.cpus \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cnvkit: \$(cnvkit.py version | sed -e "s/cnvkit v//g") + END_VERSIONS + """ + } + // germline mode cram_input + // normal_args must be --normal [] + else if (normal_cram && !tumor_exists){ + """ + samtools view -T $fasta $fai_reference $normal -@ $task.cpus -o $tumor_out + + cnvkit.py \\ + batch \\ + $tumor_out \\ + $normal_args \\ + $fasta_args \\ + $reference_args \\ + $target_args \\ + --processes $task.cpus \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + cnvkit: \$(cnvkit.py version | sed -e "s/cnvkit v//g") + END_VERSIONS + """ + } + // germline mode bam_input + else if (normal_bam && !tumor_exists){ + """ + cnvkit.py \\ + batch \\ + $tumor_out \\ + $normal_args \\ + $fasta_args \\ + $reference_args \\ + $target_args \\ + --processes $task.cpus \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cnvkit: \$(cnvkit.py version | sed -e "s/cnvkit v//g") + END_VERSIONS + """ + } - cat <<-END_VERSIONS > versions.yml - "${task.process}": - cnvkit: \$(cnvkit.py version | sed -e "s/cnvkit v//g") - END_VERSIONS - """ } diff --git a/modules/cnvkit/batch/meta.yml b/modules/cnvkit/batch/meta.yml index 2cd675c7..89762192 100644 --- a/modules/cnvkit/batch/meta.yml +++ b/modules/cnvkit/batch/meta.yml @@ -29,6 +29,10 @@ input: type: file description: | Input reference genome fasta file (only needed for cram_input and/or when normal_samples are provided) + - fasta_fai: + type: file + description: | + Input reference genome fasta index (optional, but recommended for cram_input) - targetfile: type: file description: | diff --git a/tests/modules/cnvkit/batch/main.nf b/tests/modules/cnvkit/batch/main.nf index c93e3a00..daf87a37 100755 --- a/tests/modules/cnvkit/batch/main.nf +++ b/tests/modules/cnvkit/batch/main.nf @@ -7,7 +7,7 @@ include { CNVKIT_BATCH as CNVKIT_WGS } from '../../../../modules/cnvkit/ba include { CNVKIT_BATCH as CNVKIT_TUMORONLY } from '../../../../modules/cnvkit/batch/main.nf' include { CNVKIT_BATCH as CNVKIT_GERMLINE } from '../../../../modules/cnvkit/batch/main.nf' -workflow test_cnvkit_hybrid { +workflow test_cnvkit_hybrid_somatic { input = [ [ id:'test' ], // meta map @@ -17,10 +17,10 @@ workflow test_cnvkit_hybrid { fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) targets = file(params.test_data['sarscov2']['genome']['baits_bed'], checkIfExists: true) - CNVKIT_HYBRID ( input, fasta, targets, [] ) + CNVKIT_HYBRID ( input, fasta, [], targets, [] ) } -workflow test_cnvkit_wgs { +workflow test_cnvkit_wgs_somatic { input = [ [ id:'test'], // meta map @@ -29,22 +29,24 @@ workflow test_cnvkit_wgs { ] fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) - CNVKIT_WGS ( input, fasta, [], [] ) + CNVKIT_WGS ( input, fasta, [], [], [] ) } -workflow test_cnvkit_cram { +workflow test_cnvkit_cram_wgs_somatic { input = [ [ id:'test'], // meta map file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_cram'], checkIfExists: true), file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true) ] - fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + fasta_fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) - CNVKIT_WGS ( input, fasta, [], [] ) + CNVKIT_WGS ( input, fasta, fasta_fai, [], [] ) } -workflow test_cnvkit_tumoronly { + +workflow test_cnvkit_tumoronly_hybrid_bam { input = [ [ id:'test'], // meta map @@ -53,10 +55,10 @@ workflow test_cnvkit_tumoronly { ] reference = file(params.test_data['homo_sapiens']['genome']['genome_21_reference_cnn'], checkIfExists: true) - CNVKIT_TUMORONLY ( input, [], [], reference ) + CNVKIT_TUMORONLY ( input, [], [], [], reference ) } -workflow test_cnvkit_tumoronly_cram { +workflow test_cnvkit_tumoronly_hybrid_cram { input = [ [ id:'test'], // meta map @@ -66,10 +68,10 @@ workflow test_cnvkit_tumoronly_cram { fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) reference = file(params.test_data['homo_sapiens']['genome']['genome_21_reference_cnn'], checkIfExists: true) - CNVKIT_TUMORONLY ( input, fasta, [], reference ) + CNVKIT_TUMORONLY ( input, fasta, [], [], reference ) } -workflow test_cnvkit_germline_cram { +workflow test_cnvkit_germline_hybrid_cram { input = [ [ id:'test'], // meta map @@ -77,7 +79,21 @@ workflow test_cnvkit_germline_cram { file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_cram'], checkIfExists: true) ] fasta = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true) - targets = file(params.test_data['homo_sapiens']['genome']['genome_21_multi_interval_bed'], checkIfExists: true) + fasta_fai = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta_fai'], checkIfExists: true) + targets = file(params.test_data['homo_sapiens']['genome']['genome_21_multi_interval_bed'], checkIfExists: true) - CNVKIT_GERMLINE ( input, fasta, targets, []) + CNVKIT_GERMLINE ( input, fasta, fasta_fai, targets, []) +} + +workflow test_cnvkit_germline_hybrid_bam { + + input = [ + [ id:'test'], // meta map + [], + file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_bam'], checkIfExists: true) + ] + fasta = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true) + targets = file(params.test_data['homo_sapiens']['genome']['genome_21_multi_interval_bed'], checkIfExists: true) + + CNVKIT_GERMLINE ( input, fasta, [], targets, []) } diff --git a/tests/modules/cnvkit/batch/test.yml b/tests/modules/cnvkit/batch/test.yml index 928a08de..006e142f 100755 --- a/tests/modules/cnvkit/batch/test.yml +++ b/tests/modules/cnvkit/batch/test.yml @@ -1,5 +1,5 @@ -- name: cnvkit batch test_cnvkit_hybrid - command: nextflow run ./tests/modules/cnvkit/batch -entry test_cnvkit_hybrid -c ./tests/config/nextflow.config -c ./tests/modules/cnvkit/batch/nextflow.config +- name: cnvkit batch test_cnvkit_hybrid_somatic + command: nextflow run ./tests/modules/cnvkit/batch -entry test_cnvkit_hybrid_somatic -c ./tests/config/nextflow.config -c ./tests/modules/cnvkit/batch/nextflow.config tags: - cnvkit - cnvkit/batch @@ -26,8 +26,8 @@ - path: output/cnvkit/test.single_end.sorted.targetcoverage.cnn md5sum: aa8a018b1d4d1e688c9f9f6ae01bf4d7 -- name: cnvkit batch test_cnvkit_wgs - command: nextflow run ./tests/modules/cnvkit/batch -entry test_cnvkit_wgs -c ./tests/config/nextflow.config -c ./tests/modules/cnvkit/batch/nextflow.config +- name: cnvkit batch test_cnvkit_wgs_somatic + command: nextflow run ./tests/modules/cnvkit/batch -entry test_cnvkit_wgs_somatic -c ./tests/config/nextflow.config -c ./tests/modules/cnvkit/batch/nextflow.config tags: - cnvkit - cnvkit/batch @@ -56,8 +56,8 @@ - path: output/cnvkit/test2.paired_end.sorted.targetcoverage.cnn md5sum: 6ae6b3fce7299eedca6133d911c38fe1 -- name: cnvkit batch test_cnvkit_cram - command: nextflow run ./tests/modules/cnvkit/batch -entry test_cnvkit_cram -c ./tests/config/nextflow.config -c ./tests/modules/cnvkit/batch/nextflow.config +- name: cnvkit batch test_cnvkit_cram_wgs_somatic + command: nextflow run ./tests/modules/cnvkit/batch -entry test_cnvkit_cram_wgs_somatic -c ./tests/config/nextflow.config -c ./tests/modules/cnvkit/batch/nextflow.config tags: - cnvkit - cnvkit/batch @@ -86,8 +86,8 @@ - path: output/cnvkit/test2.paired_end.sorted.targetcoverage.cnn md5sum: 6ae6b3fce7299eedca6133d911c38fe1 -- name: cnvkit batch test_cnvkit_tumoronly - command: nextflow run ./tests/modules/cnvkit/batch -entry test_cnvkit_tumoronly -c ./tests/config/nextflow.config -c ./tests/modules/cnvkit/batch/nextflow.config +- name: cnvkit batch test_cnvkit_tumoronly_hybrid_bam + command: nextflow run ./tests/modules/cnvkit/batch -entry test_cnvkit_tumoronly_hybrid_bam -c ./tests/config/nextflow.config -c ./tests/modules/cnvkit/batch/nextflow.config tags: - cnvkit - cnvkit/batch @@ -109,8 +109,8 @@ - path: output/cnvkit/test2.paired_end.recalibrated.sorted.targetcoverage.cnn md5sum: e6d0190c1c37ce6e41f76ca5b24ccca3 -- name: cnvkit batch test_cnvkit_tumoronly_cram - command: nextflow run ./tests/modules/cnvkit/batch -entry test_cnvkit_tumoronly_cram -c ./tests/config/nextflow.config -c ./tests/modules/cnvkit/batch/nextflow.config +- name: cnvkit batch test_cnvkit_tumoronly_hybrid_cram + command: nextflow run ./tests/modules/cnvkit/batch -entry test_cnvkit_tumoronly_hybrid_cram -c ./tests/config/nextflow.config -c ./tests/modules/cnvkit/batch/nextflow.config tags: - cnvkit - cnvkit/batch @@ -132,8 +132,33 @@ - path: output/cnvkit/test2.paired_end.recalibrated.sorted.targetcoverage.cnn md5sum: e6d0190c1c37ce6e41f76ca5b24ccca3 -- name: cnvkit batch test_cnvkit_germline_cram - command: nextflow run ./tests/modules/cnvkit/batch -entry test_cnvkit_germline_cram -c ./tests/config/nextflow.config -c ./tests/modules/cnvkit/batch/nextflow.config +- name: cnvkit batch test_cnvkit_germline_hybrid_cram + command: nextflow run ./tests/modules/cnvkit/batch -entry test_cnvkit_germline_hybrid_cram -c ./tests/config/nextflow.config -c ./tests/modules/cnvkit/batch/nextflow.config + tags: + - cnvkit + - cnvkit/batch + files: + - path: output/cnvkit/multi_intervals.antitarget.bed + md5sum: 3d4d20f9f23b39970865d29ef239d20b + - path: output/cnvkit/multi_intervals.target.bed + md5sum: 86d30493bb2e619a93f4ebc2923d29f3 + - path: output/cnvkit/reference.cnn + md5sum: a09ee4be5dda1cf0f68073bdb3aad8ec + - path: output/cnvkit/test2.paired_end.recalibrated.sorted.antitargetcoverage.cnn + md5sum: 067115082c4af4b64d58c0dc3a3642e4 + - path: output/cnvkit/test2.paired_end.recalibrated.sorted.bintest.cns + md5sum: 68b62b75cd91b2ffe5633686fb943490 + - path: output/cnvkit/test2.paired_end.recalibrated.sorted.call.cns + md5sum: df196edd72613c59186f4d87df3dc4a4 + - path: output/cnvkit/test2.paired_end.recalibrated.sorted.cnr + md5sum: 3b4fc0cc73be78f978cfe2422470753e + - path: output/cnvkit/test2.paired_end.recalibrated.sorted.cns + md5sum: 4e67451dbcb6601fc3fa5dd7e570f1d4 + - path: output/cnvkit/test2.paired_end.recalibrated.sorted.targetcoverage.cnn + md5sum: b4a49faf170e436ec32dcc21ccc3ce8f + +- name: cnvkit batch test_cnvkit_germline_hybrid_bam + command: nextflow run ./tests/modules/cnvkit/batch -entry test_cnvkit_germline_hybrid_bam -c ./tests/config/nextflow.config -c ./tests/modules/cnvkit/batch/nextflow.config tags: - cnvkit - cnvkit/batch From 2a45d4286895d77d47b3a3dd9bc845b4d13276df Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Thu, 2 Jun 2022 13:17:06 +0200 Subject: [PATCH 038/114] added the calibratedragstrmodel module --- modules/gatk4/calibratedragstrmodel/main.nf | 48 +++++++++++++ modules/gatk4/calibratedragstrmodel/meta.yml | 70 +++++++++++++++++++ tests/config/pytest_modules.yml | 4 ++ tests/config/test_data.config | 1 + .../gatk4/calibratedragstrmodel/main.nf | 44 ++++++++++++ .../calibratedragstrmodel/nextflow.config | 5 ++ .../gatk4/calibratedragstrmodel/test.yml | 17 +++++ 7 files changed, 189 insertions(+) create mode 100644 modules/gatk4/calibratedragstrmodel/main.nf create mode 100644 modules/gatk4/calibratedragstrmodel/meta.yml create mode 100644 tests/modules/gatk4/calibratedragstrmodel/main.nf create mode 100644 tests/modules/gatk4/calibratedragstrmodel/nextflow.config create mode 100644 tests/modules/gatk4/calibratedragstrmodel/test.yml diff --git a/modules/gatk4/calibratedragstrmodel/main.nf b/modules/gatk4/calibratedragstrmodel/main.nf new file mode 100644 index 00000000..6682753d --- /dev/null +++ b/modules/gatk4/calibratedragstrmodel/main.nf @@ -0,0 +1,48 @@ +process GATK4_CALIBRATEDRAGSTRMODEL{ + tag "$meta.id" + label 'process_low' + + conda (params.enable_conda ? "bioconda::gatk4=4.2.6.1" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gatk4:4.2.6.1--hdfd78af_0': + 'quay.io/biocontainers/gatk4:4.2.6.1--hdfd78af_0' }" + + input: + tuple val(meta), path(bam), path(bam_index) + path fasta + path fasta_fai + path dict + path strtablefile + + output: + tuple val(meta), path("*.txt") , emit: dragstr_model + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + def avail_mem = 3 + if (!task.memory) { + log.info '[GATK CalibrateDragstrModel] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = task.memory.giga + } + """ + gatk --java-options "-Xmx${avail_mem}g" CalibrateDragstrModel \\ + --input $bam \\ + --output ${prefix}.txt \\ + --reference $fasta \\ + --str-table-path $strtablefile \\ + --tmp-dir . \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/gatk4/calibratedragstrmodel/meta.yml b/modules/gatk4/calibratedragstrmodel/meta.yml new file mode 100644 index 00000000..f72a4f60 --- /dev/null +++ b/modules/gatk4/calibratedragstrmodel/meta.yml @@ -0,0 +1,70 @@ +name: "gatk4_calibratedragstrmodel" +description: estimates the parameters for the DRAGstr model +keywords: + - gatk4 + - bam + - cram + - sam + - calibratedragstrmodel +tools: + - gatk4: + description: + Genome Analysis Toolkit (GATK4). Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/articles/360057441571-CalibrateDragstrModel-BETA- + tool_dev_url: https://github.com/broadinstitute/gatk + doi: 10.1158/1538-7445.AM2017-3590 + licence: ["Apache-2.0"] + +input: + # Only when we have meta + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - bam_index: + type: file + description: index of the BAM/CRAM/SAM file + pattern: "*.{bai,crai,sai}" + - fasta: + type: file + description: The reference FASTA file + pattern: "*.{fasta,fa}" + - fasta_fai: + type: file + description: The index of the reference FASTA file + pattern: "*.fai" + - dict: + type: file + description: The sequence dictionary of the reference FASTA file + pattern: "*.dict" + - strtablefile: + type: file + description: The StrTableFile zip folder of the reference FASTA file + pattern: "*.zip" + +output: + #Only when we have meta + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - dragstr_model: + type: file + description: The DragSTR model + pattern: "*.txt" + +authors: + - "@nvnieuwk" diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index bf4a6137..d4bfac5b 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -743,6 +743,10 @@ gatk4/calculatecontamination: - modules/gatk4/calculatecontamination/** - tests/modules/gatk4/calculatecontamination/** +gatk4/calibratedragstrmodel: + - modules/gatk4/calibratedragstrmodel/** + - tests/modules/gatk4/calibratedragstrmodel/** + gatk4/cnnscorevariants: - modules/gatk4/cnnscorevariants/** - tests/modules/gatk4/cnnscorevariants/** diff --git a/tests/config/test_data.config b/tests/config/test_data.config index 39331664..b1326544 100644 --- a/tests/config/test_data.config +++ b/tests/config/test_data.config @@ -121,6 +121,7 @@ params { genome_elfasta = "${test_data_dir}/genomics/homo_sapiens/genome/genome.elfasta" genome_fasta = "${test_data_dir}/genomics/homo_sapiens/genome/genome.fasta" genome_fasta_fai = "${test_data_dir}/genomics/homo_sapiens/genome/genome.fasta.fai" + genome_strtablefile = "${test_data_dir}/genomics/homo_sapiens/genome/genome_strtablefile.zip" genome_dict = "${test_data_dir}/genomics/homo_sapiens/genome/genome.dict" genome_gff3 = "${test_data_dir}/genomics/homo_sapiens/genome/genome.gff3" genome_gtf = "${test_data_dir}/genomics/homo_sapiens/genome/genome.gtf" diff --git a/tests/modules/gatk4/calibratedragstrmodel/main.nf b/tests/modules/gatk4/calibratedragstrmodel/main.nf new file mode 100644 index 00000000..4d4e9896 --- /dev/null +++ b/tests/modules/gatk4/calibratedragstrmodel/main.nf @@ -0,0 +1,44 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { GATK4_CALIBRATEDRAGSTRMODEL } from '../../../../modules/gatk4/calibratedragstrmodel/main.nf' + +workflow test_gatk4_calibratedragstrmodel_bam { + + input = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true) + ] + + fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + + fasta_fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) + + dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) + + strtablefile = file(params.test_data['homo_sapiens']['genome']['genome_strtablefile'], checkIfExists: true) + + GATK4_CALIBRATEDRAGSTRMODEL ( input, fasta, fasta_fai, dict, strtablefile ) +} + +workflow test_gatk4_calibratedragstrmodel_cram { + + input = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram_crai'], checkIfExists: true) + ] + + fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + + fasta_fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) + + dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) + + strtablefile = file(params.test_data['homo_sapiens']['genome']['genome_strtablefile'], checkIfExists: true) + + GATK4_CALIBRATEDRAGSTRMODEL ( input, fasta, fasta_fai, dict, strtablefile ) +} + diff --git a/tests/modules/gatk4/calibratedragstrmodel/nextflow.config b/tests/modules/gatk4/calibratedragstrmodel/nextflow.config new file mode 100644 index 00000000..50f50a7a --- /dev/null +++ b/tests/modules/gatk4/calibratedragstrmodel/nextflow.config @@ -0,0 +1,5 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + +} \ No newline at end of file diff --git a/tests/modules/gatk4/calibratedragstrmodel/test.yml b/tests/modules/gatk4/calibratedragstrmodel/test.yml new file mode 100644 index 00000000..bc46dead --- /dev/null +++ b/tests/modules/gatk4/calibratedragstrmodel/test.yml @@ -0,0 +1,17 @@ +- name: gatk4 calibratedragstrmodel test_gatk4_calibratedragstrmodel_bam + command: nextflow run ./tests/modules/gatk4/calibratedragstrmodel -entry test_gatk4_calibratedragstrmodel_bam -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/calibratedragstrmodel/nextflow.config + tags: + - gatk4 + - gatk4/calibratedragstrmodel + files: + - path: output/gatk4/test.txt + md5sum: 0a1a1583b157fa2251dd931ed165da4f + +- name: gatk4 calibratedragstrmodel test_gatk4_calibratedragstrmodel_cram + command: nextflow run ./tests/modules/gatk4/calibratedragstrmodel -entry test_gatk4_calibratedragstrmodel_cram -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/calibratedragstrmodel/nextflow.config + tags: + - gatk4 + - gatk4/calibratedragstrmodel + files: + - path: output/gatk4/test.txt + md5sum: 1aa7ab38023f724877b3323c5e6b9a4e From d3874bbae9035e6be95be42a712acfff4d28d00e Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Thu, 2 Jun 2022 13:22:55 +0200 Subject: [PATCH 039/114] linting --- modules/gatk4/calibratedragstrmodel/meta.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/gatk4/calibratedragstrmodel/meta.yml b/modules/gatk4/calibratedragstrmodel/meta.yml index f72a4f60..e6d2b0b1 100644 --- a/modules/gatk4/calibratedragstrmodel/meta.yml +++ b/modules/gatk4/calibratedragstrmodel/meta.yml @@ -1,4 +1,4 @@ -name: "gatk4_calibratedragstrmodel" +name: gatk4_calibratedragstrmodel description: estimates the parameters for the DRAGstr model keywords: - gatk4 From 3a49b82863d6dc71fcfcb36a01b04cb58dcb6f12 Mon Sep 17 00:00:00 2001 From: nvnieuwk <101190534+nvnieuwk@users.noreply.github.com> Date: Thu, 2 Jun 2022 13:39:44 +0200 Subject: [PATCH 040/114] Update modules/gatk4/calibratedragstrmodel/main.nf Co-authored-by: Matthias De Smet <11850640+matthdsm@users.noreply.github.com> --- modules/gatk4/calibratedragstrmodel/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/gatk4/calibratedragstrmodel/main.nf b/modules/gatk4/calibratedragstrmodel/main.nf index 6682753d..00a11de0 100644 --- a/modules/gatk4/calibratedragstrmodel/main.nf +++ b/modules/gatk4/calibratedragstrmodel/main.nf @@ -1,4 +1,4 @@ -process GATK4_CALIBRATEDRAGSTRMODEL{ +process GATK4_CALIBRATEDRAGSTRMODEL { tag "$meta.id" label 'process_low' From e53d091a6de1ae9fd681351c085d8abe076ba1ec Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Thu, 2 Jun 2022 14:08:09 +0200 Subject: [PATCH 041/114] added dragstr model to haplotypecaller --- modules/gatk4/haplotypecaller/main.nf | 4 +++- modules/gatk4/haplotypecaller/meta.yml | 4 ++++ tests/config/test_data.config | 2 ++ tests/modules/gatk4/haplotypecaller/main.nf | 22 +++++++++++++++++++- tests/modules/gatk4/haplotypecaller/test.yml | 18 ++++++++++------ 5 files changed, 42 insertions(+), 8 deletions(-) diff --git a/modules/gatk4/haplotypecaller/main.nf b/modules/gatk4/haplotypecaller/main.nf index 6dd3f69e..19cd57bb 100644 --- a/modules/gatk4/haplotypecaller/main.nf +++ b/modules/gatk4/haplotypecaller/main.nf @@ -8,7 +8,7 @@ process GATK4_HAPLOTYPECALLER { 'quay.io/biocontainers/gatk4:4.2.6.1--hdfd78af_0' }" input: - tuple val(meta), path(input), path(input_index), path(intervals) + tuple val(meta), path(input), path(input_index), path(intervals), path(dragstr_model) path fasta path fai path dict @@ -28,6 +28,7 @@ process GATK4_HAPLOTYPECALLER { def prefix = task.ext.prefix ?: "${meta.id}" def dbsnp_command = dbsnp ? "--dbsnp $dbsnp" : "" def interval_command = intervals ? "--intervals $intervals" : "" + def dragstr_command = dragstr_model ? "--dragstr-params-path $dragstr_model" : "" def avail_mem = 3 if (!task.memory) { @@ -42,6 +43,7 @@ process GATK4_HAPLOTYPECALLER { --reference $fasta \\ $dbsnp_command \\ $interval_command \\ + $dragstr_command \\ --tmp-dir . \\ $args diff --git a/modules/gatk4/haplotypecaller/meta.yml b/modules/gatk4/haplotypecaller/meta.yml index 81851a96..48193d91 100644 --- a/modules/gatk4/haplotypecaller/meta.yml +++ b/modules/gatk4/haplotypecaller/meta.yml @@ -32,6 +32,10 @@ input: - intervals: type: file description: Bed file with the genomic regions included in the library (optional) + - dragstr_model: + type: file + description: Text file containing the DragSTR model of the used BAM/CRAM file (optional) + pattern: "*.txt" - fasta: type: file description: The reference fasta file diff --git a/tests/config/test_data.config b/tests/config/test_data.config index 363903f0..fa7017b1 100644 --- a/tests/config/test_data.config +++ b/tests/config/test_data.config @@ -266,6 +266,8 @@ params { test_pileups_table = "${test_data_dir}/genomics/homo_sapiens/illumina/gatk/test.pileups.table" test2_pileups_table = "${test_data_dir}/genomics/homo_sapiens/illumina/gatk/test2.pileups.table" + test_paired_end_sorted_dragstrmodel = "${test_data_dir}/genomics/homo_sapiens/illumina/gatk/test_paired_end_sorted_dragstrmodel.txt" + test_genomicsdb_tar_gz = "${test_data_dir}/genomics/homo_sapiens/illumina/gatk/test_genomicsdb.tar.gz" test_pon_genomicsdb_tar_gz = "${test_data_dir}/genomics/homo_sapiens/illumina/gatk/test_pon_genomicsdb.tar.gz" diff --git a/tests/modules/gatk4/haplotypecaller/main.nf b/tests/modules/gatk4/haplotypecaller/main.nf index 7b60baab..3ad3dfb5 100644 --- a/tests/modules/gatk4/haplotypecaller/main.nf +++ b/tests/modules/gatk4/haplotypecaller/main.nf @@ -8,6 +8,7 @@ workflow test_gatk4_haplotypecaller { input = [ [ id:'test' ], // meta map file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true), + [], [] ] fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) @@ -21,6 +22,7 @@ workflow test_gatk4_haplotypecaller_cram { input = [ [ id:'test' ], // meta map file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true), file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram_crai'], checkIfExists: true), + [], [] ] fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) @@ -34,7 +36,8 @@ workflow test_gatk4_haplotypecaller_intervals_dbsnp { input = [ [ id:'test' ], // meta map file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true), file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram_crai'], checkIfExists: true), - file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true) + file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true), + [] ] fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) @@ -45,3 +48,20 @@ workflow test_gatk4_haplotypecaller_intervals_dbsnp { GATK4_HAPLOTYPECALLER ( input, fasta, fai, dict, sites, sites_tbi ) } + +workflow test_gatk4_haplotypecaller_dragstr_model { + input = [ [ id:'test' ], // meta map + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram_crai'], checkIfExists: true), + [], + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_dragstrmodel'], checkIfExists: true) + ] + + fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) + dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) + sites = [] + sites_tbi = [] + + GATK4_HAPLOTYPECALLER ( input, fasta, fai, dict, sites, sites_tbi ) +} diff --git a/tests/modules/gatk4/haplotypecaller/test.yml b/tests/modules/gatk4/haplotypecaller/test.yml index 3d416a0d..b566ee66 100644 --- a/tests/modules/gatk4/haplotypecaller/test.yml +++ b/tests/modules/gatk4/haplotypecaller/test.yml @@ -1,29 +1,35 @@ - name: gatk4 haplotypecaller test_gatk4_haplotypecaller - command: nextflow run ./tests/modules/gatk4/haplotypecaller -entry test_gatk4_haplotypecaller -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/haplotypecaller/nextflow.config + command: nextflow run ./tests/modules/gatk4/haplotypecaller -entry test_gatk4_haplotypecaller -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/haplotypecaller/nextflow.config tags: - gatk4/haplotypecaller - gatk4 files: - path: output/gatk4/test.vcf.gz - path: output/gatk4/test.vcf.gz.tbi - - path: output/gatk4/versions.yml - name: gatk4 haplotypecaller test_gatk4_haplotypecaller_cram - command: nextflow run ./tests/modules/gatk4/haplotypecaller -entry test_gatk4_haplotypecaller_cram -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/haplotypecaller/nextflow.config + command: nextflow run ./tests/modules/gatk4/haplotypecaller -entry test_gatk4_haplotypecaller_cram -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/haplotypecaller/nextflow.config tags: - gatk4/haplotypecaller - gatk4 files: - path: output/gatk4/test.vcf.gz - path: output/gatk4/test.vcf.gz.tbi - - path: output/gatk4/versions.yml - name: gatk4 haplotypecaller test_gatk4_haplotypecaller_intervals_dbsnp - command: nextflow run ./tests/modules/gatk4/haplotypecaller -entry test_gatk4_haplotypecaller_intervals_dbsnp -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/haplotypecaller/nextflow.config + command: nextflow run ./tests/modules/gatk4/haplotypecaller -entry test_gatk4_haplotypecaller_intervals_dbsnp -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/haplotypecaller/nextflow.config + tags: + - gatk4/haplotypecaller + - gatk4 + files: + - path: output/gatk4/test.vcf.gz + - path: output/gatk4/test.vcf.gz.tbi + +- name: gatk4 haplotypecaller test_gatk4_haplotypecaller_dragstr_model + command: nextflow run ./tests/modules/gatk4/haplotypecaller -entry test_gatk4_haplotypecaller_dragstr_model -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/haplotypecaller/nextflow.config tags: - gatk4/haplotypecaller - gatk4 files: - path: output/gatk4/test.vcf.gz - path: output/gatk4/test.vcf.gz.tbi - - path: output/gatk4/versions.yml From a6c4276ad74eb965fd9d8eb831f0786b85abc9c8 Mon Sep 17 00:00:00 2001 From: nvnieuwk <101190534+nvnieuwk@users.noreply.github.com> Date: Thu, 2 Jun 2022 14:19:57 +0200 Subject: [PATCH 042/114] Update tests/modules/gatk4/haplotypecaller/test.yml Co-authored-by: Maxime U. Garcia --- tests/modules/gatk4/haplotypecaller/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/modules/gatk4/haplotypecaller/test.yml b/tests/modules/gatk4/haplotypecaller/test.yml index b566ee66..ca3110d7 100644 --- a/tests/modules/gatk4/haplotypecaller/test.yml +++ b/tests/modules/gatk4/haplotypecaller/test.yml @@ -1,5 +1,5 @@ - name: gatk4 haplotypecaller test_gatk4_haplotypecaller - command: nextflow run ./tests/modules/gatk4/haplotypecaller -entry test_gatk4_haplotypecaller -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/haplotypecaller/nextflow.config + command: nextflow run ./tests/modules/gatk4/haplotypecaller -entry test_gatk4_haplotypecaller -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/haplotypecaller/nextflow.config tags: - gatk4/haplotypecaller - gatk4 From 2ce1cf3329eecc636ddfcbf27f86b398b45ac0ad Mon Sep 17 00:00:00 2001 From: nvnieuwk <101190534+nvnieuwk@users.noreply.github.com> Date: Thu, 2 Jun 2022 14:20:04 +0200 Subject: [PATCH 043/114] Update tests/modules/gatk4/haplotypecaller/test.yml Co-authored-by: Maxime U. Garcia --- tests/modules/gatk4/haplotypecaller/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/modules/gatk4/haplotypecaller/test.yml b/tests/modules/gatk4/haplotypecaller/test.yml index ca3110d7..094beaef 100644 --- a/tests/modules/gatk4/haplotypecaller/test.yml +++ b/tests/modules/gatk4/haplotypecaller/test.yml @@ -8,7 +8,7 @@ - path: output/gatk4/test.vcf.gz.tbi - name: gatk4 haplotypecaller test_gatk4_haplotypecaller_cram - command: nextflow run ./tests/modules/gatk4/haplotypecaller -entry test_gatk4_haplotypecaller_cram -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/haplotypecaller/nextflow.config + command: nextflow run ./tests/modules/gatk4/haplotypecaller -entry test_gatk4_haplotypecaller_cram -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/haplotypecaller/nextflow.config tags: - gatk4/haplotypecaller - gatk4 From e419d306dfeb8bab0e1d3ac07151585c0fcbee01 Mon Sep 17 00:00:00 2001 From: nvnieuwk <101190534+nvnieuwk@users.noreply.github.com> Date: Thu, 2 Jun 2022 14:20:10 +0200 Subject: [PATCH 044/114] Update tests/modules/gatk4/haplotypecaller/test.yml Co-authored-by: Maxime U. Garcia --- tests/modules/gatk4/haplotypecaller/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/modules/gatk4/haplotypecaller/test.yml b/tests/modules/gatk4/haplotypecaller/test.yml index 094beaef..17c4f6bf 100644 --- a/tests/modules/gatk4/haplotypecaller/test.yml +++ b/tests/modules/gatk4/haplotypecaller/test.yml @@ -17,7 +17,7 @@ - path: output/gatk4/test.vcf.gz.tbi - name: gatk4 haplotypecaller test_gatk4_haplotypecaller_intervals_dbsnp - command: nextflow run ./tests/modules/gatk4/haplotypecaller -entry test_gatk4_haplotypecaller_intervals_dbsnp -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/haplotypecaller/nextflow.config + command: nextflow run ./tests/modules/gatk4/haplotypecaller -entry test_gatk4_haplotypecaller_intervals_dbsnp -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/haplotypecaller/nextflow.config tags: - gatk4/haplotypecaller - gatk4 From e107d61069612fca5aaabcd4f056d06c732d0c18 Mon Sep 17 00:00:00 2001 From: nvnieuwk <101190534+nvnieuwk@users.noreply.github.com> Date: Thu, 2 Jun 2022 14:20:15 +0200 Subject: [PATCH 045/114] Update tests/modules/gatk4/haplotypecaller/test.yml Co-authored-by: Maxime U. Garcia --- tests/modules/gatk4/haplotypecaller/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/modules/gatk4/haplotypecaller/test.yml b/tests/modules/gatk4/haplotypecaller/test.yml index 17c4f6bf..96270644 100644 --- a/tests/modules/gatk4/haplotypecaller/test.yml +++ b/tests/modules/gatk4/haplotypecaller/test.yml @@ -26,7 +26,7 @@ - path: output/gatk4/test.vcf.gz.tbi - name: gatk4 haplotypecaller test_gatk4_haplotypecaller_dragstr_model - command: nextflow run ./tests/modules/gatk4/haplotypecaller -entry test_gatk4_haplotypecaller_dragstr_model -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/haplotypecaller/nextflow.config + command: nextflow run ./tests/modules/gatk4/haplotypecaller -entry test_gatk4_haplotypecaller_dragstr_model -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/haplotypecaller/nextflow.config tags: - gatk4/haplotypecaller - gatk4 From 046fa3958223ce4542578ef26502d39f4c5d71ef Mon Sep 17 00:00:00 2001 From: MaxUlysse Date: Thu, 2 Jun 2022 16:09:09 +0200 Subject: [PATCH 046/114] update VEP version + cache --- modules/ensemblvep/Dockerfile | 4 ++-- modules/ensemblvep/build.sh | 12 ++++++------ modules/ensemblvep/environment.yml | 4 ++-- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/modules/ensemblvep/Dockerfile b/modules/ensemblvep/Dockerfile index b4a1c664..4ada7c6b 100644 --- a/modules/ensemblvep/Dockerfile +++ b/modules/ensemblvep/Dockerfile @@ -11,8 +11,8 @@ RUN conda env create -f /environment.yml && conda clean -a # Setup default ARG variables ARG GENOME=GRCh38 ARG SPECIES=homo_sapiens -ARG VEP_VERSION=104 -ARG VEP_TAG=104.3 +ARG VEP_VERSION=105 +ARG VEP_TAG=105.0 # Add conda installation dir to PATH (instead of doing 'conda activate') ENV PATH /opt/conda/envs/nf-core-vep-${VEP_TAG}/bin:$PATH diff --git a/modules/ensemblvep/build.sh b/modules/ensemblvep/build.sh index 650c8704..402f2434 100755 --- a/modules/ensemblvep/build.sh +++ b/modules/ensemblvep/build.sh @@ -20,9 +20,9 @@ build_push() { docker push nfcore/vep:${VEP_TAG}.${GENOME} } -build_push "GRCh37" "homo_sapiens" "104" "104.3" -build_push "GRCh38" "homo_sapiens" "104" "104.3" -build_push "GRCm38" "mus_musculus" "102" "104.3" -build_push "GRCm39" "mus_musculus" "104" "104.3" -build_push "CanFam3.1" "canis_lupus_familiaris" "104" "104.3" -build_push "WBcel235" "caenorhabditis_elegans" "104" "104.3" +build_push "GRCh37" "homo_sapiens" "105" "105.0" +build_push "GRCh38" "homo_sapiens" "105" "105.0" +build_push "GRCm38" "mus_musculus" "102" "105.0" +build_push "GRCm39" "mus_musculus" "105" "105.0" +build_push "CanFam3.1" "canis_lupus_familiaris" "105" "105.0" +build_push "WBcel235" "caenorhabditis_elegans" "105" "105.0" diff --git a/modules/ensemblvep/environment.yml b/modules/ensemblvep/environment.yml index c0731c26..5df85b80 100644 --- a/modules/ensemblvep/environment.yml +++ b/modules/ensemblvep/environment.yml @@ -1,10 +1,10 @@ # You can use this file to create a conda environment for this module: # conda env create -f environment.yml -name: nf-core-vep-104.3 +name: nf-core-vep-105.0 channels: - conda-forge - bioconda - defaults dependencies: - - bioconda::ensembl-vep=104.3 + - bioconda::ensembl-vep=105.0 From bc0f52bcd3a64f27d8fb2da3a6424b427e15e53c Mon Sep 17 00:00:00 2001 From: MaxUlysse Date: Thu, 2 Jun 2022 16:11:11 +0200 Subject: [PATCH 047/114] fix cache version --- modules/ensemblvep/build.sh | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/modules/ensemblvep/build.sh b/modules/ensemblvep/build.sh index 402f2434..e21a7c0d 100755 --- a/modules/ensemblvep/build.sh +++ b/modules/ensemblvep/build.sh @@ -20,9 +20,9 @@ build_push() { docker push nfcore/vep:${VEP_TAG}.${GENOME} } -build_push "GRCh37" "homo_sapiens" "105" "105.0" -build_push "GRCh38" "homo_sapiens" "105" "105.0" -build_push "GRCm38" "mus_musculus" "102" "105.0" -build_push "GRCm39" "mus_musculus" "105" "105.0" -build_push "CanFam3.1" "canis_lupus_familiaris" "105" "105.0" -build_push "WBcel235" "caenorhabditis_elegans" "105" "105.0" +# build_push "GRCh37" "homo_sapiens" "105" "105.0" +# build_push "GRCh38" "homo_sapiens" "105" "105.0" +# build_push "GRCm38" "mus_musculus" "102" "105.0" +# build_push "GRCm39" "mus_musculus" "105" "105.0" +build_push "CanFam3.1" "canis_lupus_familiaris" "104" "105.0" +# build_push "WBcel235" "caenorhabditis_elegans" "105" "105.0" From b623ff5d2798b8c955baeb3890f0baee6077db66 Mon Sep 17 00:00:00 2001 From: MaxUlysse Date: Thu, 2 Jun 2022 16:11:45 +0200 Subject: [PATCH 048/114] fix comments --- modules/ensemblvep/build.sh | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/modules/ensemblvep/build.sh b/modules/ensemblvep/build.sh index e21a7c0d..6f340c0f 100755 --- a/modules/ensemblvep/build.sh +++ b/modules/ensemblvep/build.sh @@ -20,9 +20,9 @@ build_push() { docker push nfcore/vep:${VEP_TAG}.${GENOME} } -# build_push "GRCh37" "homo_sapiens" "105" "105.0" -# build_push "GRCh38" "homo_sapiens" "105" "105.0" -# build_push "GRCm38" "mus_musculus" "102" "105.0" -# build_push "GRCm39" "mus_musculus" "105" "105.0" +build_push "GRCh37" "homo_sapiens" "105" "105.0" +build_push "GRCh38" "homo_sapiens" "105" "105.0" +build_push "GRCm38" "mus_musculus" "102" "105.0" +build_push "GRCm39" "mus_musculus" "105" "105.0" build_push "CanFam3.1" "canis_lupus_familiaris" "104" "105.0" -# build_push "WBcel235" "caenorhabditis_elegans" "105" "105.0" +build_push "WBcel235" "caenorhabditis_elegans" "105" "105.0" From b4b1f3b81d897b97da0655cf48107b70aa4b2f73 Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Thu, 2 Jun 2022 16:20:12 +0200 Subject: [PATCH 049/114] add intervals to calibratedragstrmodel --- modules/gatk4/calibratedragstrmodel/main.nf | 4 ++- modules/gatk4/calibratedragstrmodel/meta.yml | 4 +++ .../gatk4/calibratedragstrmodel/main.nf | 26 +++++++++++++++++-- .../gatk4/calibratedragstrmodel/test.yml | 13 ++++++++-- 4 files changed, 42 insertions(+), 5 deletions(-) diff --git a/modules/gatk4/calibratedragstrmodel/main.nf b/modules/gatk4/calibratedragstrmodel/main.nf index 00a11de0..08ba7008 100644 --- a/modules/gatk4/calibratedragstrmodel/main.nf +++ b/modules/gatk4/calibratedragstrmodel/main.nf @@ -8,7 +8,7 @@ process GATK4_CALIBRATEDRAGSTRMODEL { 'quay.io/biocontainers/gatk4:4.2.6.1--hdfd78af_0' }" input: - tuple val(meta), path(bam), path(bam_index) + tuple val(meta), path(bam), path(bam_index), path(intervals) path fasta path fasta_fai path dict @@ -24,6 +24,7 @@ process GATK4_CALIBRATEDRAGSTRMODEL { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" + def intervals_command = intervals ? "--intervals $intervals" : "" def avail_mem = 3 if (!task.memory) { @@ -37,6 +38,7 @@ process GATK4_CALIBRATEDRAGSTRMODEL { --output ${prefix}.txt \\ --reference $fasta \\ --str-table-path $strtablefile \\ + $intervals_command \\ --tmp-dir . \\ $args diff --git a/modules/gatk4/calibratedragstrmodel/meta.yml b/modules/gatk4/calibratedragstrmodel/meta.yml index e6d2b0b1..c3c40d81 100644 --- a/modules/gatk4/calibratedragstrmodel/meta.yml +++ b/modules/gatk4/calibratedragstrmodel/meta.yml @@ -33,6 +33,10 @@ input: type: file description: index of the BAM/CRAM/SAM file pattern: "*.{bai,crai,sai}" + - intervals: + type: file + description: BED file or interval list containing regions + pattern: "*.{bed,interval_list}" - fasta: type: file description: The reference FASTA file diff --git a/tests/modules/gatk4/calibratedragstrmodel/main.nf b/tests/modules/gatk4/calibratedragstrmodel/main.nf index 4d4e9896..136a9742 100644 --- a/tests/modules/gatk4/calibratedragstrmodel/main.nf +++ b/tests/modules/gatk4/calibratedragstrmodel/main.nf @@ -9,7 +9,8 @@ workflow test_gatk4_calibratedragstrmodel_bam { input = [ [ id:'test', single_end:false ], // meta map file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), - file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true) + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true), + [] ] fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) @@ -28,7 +29,28 @@ workflow test_gatk4_calibratedragstrmodel_cram { input = [ [ id:'test', single_end:false ], // meta map file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true), - file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram_crai'], checkIfExists: true) + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram_crai'], checkIfExists: true), + [] + ] + + fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + + fasta_fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) + + dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) + + strtablefile = file(params.test_data['homo_sapiens']['genome']['genome_strtablefile'], checkIfExists: true) + + GATK4_CALIBRATEDRAGSTRMODEL ( input, fasta, fasta_fai, dict, strtablefile ) +} + +workflow test_gatk4_calibratedragstrmodel_beds { + + input = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram_crai'], checkIfExists: true), + file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true) ] fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) diff --git a/tests/modules/gatk4/calibratedragstrmodel/test.yml b/tests/modules/gatk4/calibratedragstrmodel/test.yml index bc46dead..53c428a9 100644 --- a/tests/modules/gatk4/calibratedragstrmodel/test.yml +++ b/tests/modules/gatk4/calibratedragstrmodel/test.yml @@ -1,8 +1,8 @@ - name: gatk4 calibratedragstrmodel test_gatk4_calibratedragstrmodel_bam command: nextflow run ./tests/modules/gatk4/calibratedragstrmodel -entry test_gatk4_calibratedragstrmodel_bam -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/calibratedragstrmodel/nextflow.config tags: - - gatk4 - gatk4/calibratedragstrmodel + - gatk4 files: - path: output/gatk4/test.txt md5sum: 0a1a1583b157fa2251dd931ed165da4f @@ -10,8 +10,17 @@ - name: gatk4 calibratedragstrmodel test_gatk4_calibratedragstrmodel_cram command: nextflow run ./tests/modules/gatk4/calibratedragstrmodel -entry test_gatk4_calibratedragstrmodel_cram -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/calibratedragstrmodel/nextflow.config tags: - - gatk4 - gatk4/calibratedragstrmodel + - gatk4 files: - path: output/gatk4/test.txt md5sum: 1aa7ab38023f724877b3323c5e6b9a4e + +- name: gatk4 calibratedragstrmodel test_gatk4_calibratedragstrmodel_beds + command: nextflow run ./tests/modules/gatk4/calibratedragstrmodel -entry test_gatk4_calibratedragstrmodel_beds -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/calibratedragstrmodel/nextflow.config + tags: + - gatk4/calibratedragstrmodel + - gatk4 + files: + - path: output/gatk4/test.txt + md5sum: def8baccad7bd59006f08fcb0a6721bf From 44daa5d6874f655862524a29a5579aea6f6af52b Mon Sep 17 00:00:00 2001 From: SusiJo <43847534+SusiJo@users.noreply.github.com> Date: Thu, 2 Jun 2022 16:23:09 +0200 Subject: [PATCH 050/114] update prefix Co-authored-by: Maxime U. Garcia --- modules/gatk4/markduplicatesspark/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/gatk4/markduplicatesspark/main.nf b/modules/gatk4/markduplicatesspark/main.nf index eeaf0445..5acfd45f 100644 --- a/modules/gatk4/markduplicatesspark/main.nf +++ b/modules/gatk4/markduplicatesspark/main.nf @@ -23,7 +23,7 @@ process GATK4_MARKDUPLICATES_SPARK { script: def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" + ".bam" + def prefix = task.ext.prefix ?: "${meta.id}.bam" def input_list = bam.collect{"--input $it"}.join(' ') From efc1e1bb71592704e5f2631dabdef48c7c3af1d7 Mon Sep 17 00:00:00 2001 From: SusiJo <43847534+SusiJo@users.noreply.github.com> Date: Thu, 2 Jun 2022 16:23:18 +0200 Subject: [PATCH 051/114] rm line Co-authored-by: Maxime U. Garcia --- modules/gatk4/markduplicatesspark/main.nf | 1 - 1 file changed, 1 deletion(-) diff --git a/modules/gatk4/markduplicatesspark/main.nf b/modules/gatk4/markduplicatesspark/main.nf index 5acfd45f..703623d5 100644 --- a/modules/gatk4/markduplicatesspark/main.nf +++ b/modules/gatk4/markduplicatesspark/main.nf @@ -34,7 +34,6 @@ process GATK4_MARKDUPLICATES_SPARK { avail_mem = task.memory.giga } """ - gatk --java-options "-Xmx${avail_mem}g" MarkDuplicatesSpark \\ $input_list \\ --output $prefix \\ From f3c0107fa2ecdc510b4f569dc3e062c7e028ac7a Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Thu, 2 Jun 2022 16:24:03 +0200 Subject: [PATCH 052/114] added optional to intervals --- modules/gatk4/calibratedragstrmodel/meta.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/gatk4/calibratedragstrmodel/meta.yml b/modules/gatk4/calibratedragstrmodel/meta.yml index c3c40d81..e71dac5e 100644 --- a/modules/gatk4/calibratedragstrmodel/meta.yml +++ b/modules/gatk4/calibratedragstrmodel/meta.yml @@ -35,7 +35,7 @@ input: pattern: "*.{bai,crai,sai}" - intervals: type: file - description: BED file or interval list containing regions + description: BED file or interval list containing regions (optional) pattern: "*.{bed,interval_list}" - fasta: type: file From 14c63f9ccc50735464e963c558424efde668f164 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Thu, 2 Jun 2022 20:24:14 +0200 Subject: [PATCH 053/114] Changes after review --- modules/gatk/realignertargetcreator/main.nf | 14 ++++++++++++-- modules/gatk/realignertargetcreator/meta.yml | 6 +++--- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/modules/gatk/realignertargetcreator/main.nf b/modules/gatk/realignertargetcreator/main.nf index e3a03a5f..866ff8a5 100644 --- a/modules/gatk/realignertargetcreator/main.nf +++ b/modules/gatk/realignertargetcreator/main.nf @@ -8,8 +8,10 @@ process GATK_REALIGNERTARGETCREATOR { 'quay.io/biocontainers/gatk:3.5--hdfd78af_11' }" input: - tuple val(meta), path(bam), path(bai) - tuple path(fasta), path(fasta_fai), path(fasta_dict) + tuple val(meta), path(input), path(index) + path path(fasta) + path(fai) + path(dict) path(known_vcf) output: @@ -25,8 +27,16 @@ process GATK_REALIGNERTARGETCREATOR { def known = known_vcf ? "-known ${known_vcf}" : "" if ("$bam" == "${prefix}.bam") error "Input and output names are the same, set prefix in module configuration to disambiguate!" + def avail_mem = 3 + if (!task.memory) { + log.info '[GATK HaplotypeCaller] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = task.memory.giga + } + """ gatk3 \\ + -Xmx${avail_mem}g \\ -T RealignerTargetCreator \\ -nt ${task.cpus} \\ -I ${bam} \\ diff --git a/modules/gatk/realignertargetcreator/meta.yml b/modules/gatk/realignertargetcreator/meta.yml index 70df7b78..c49d2a8d 100644 --- a/modules/gatk/realignertargetcreator/meta.yml +++ b/modules/gatk/realignertargetcreator/meta.yml @@ -20,11 +20,11 @@ input: description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - bam: + - input: type: file description: Sorted and indexed BAM/CRAM/SAM file pattern: "*.bam" - - bai: + - index: type: file description: BAM index file pattern: "*.bai" @@ -32,7 +32,7 @@ input: type: file description: Reference file used to generate BAM file pattern: ".{fasta,fa,fna}" - - fasta_fai: + - fai: type: file description: Index of reference file used to generate BAM file pattern: ".fai" From 43ef3841d1249aa20793b783bb961f568af2b8a4 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Thu, 2 Jun 2022 20:25:12 +0200 Subject: [PATCH 054/114] remove duplciate path --- modules/gatk/realignertargetcreator/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/gatk/realignertargetcreator/main.nf b/modules/gatk/realignertargetcreator/main.nf index 866ff8a5..7454430c 100644 --- a/modules/gatk/realignertargetcreator/main.nf +++ b/modules/gatk/realignertargetcreator/main.nf @@ -9,7 +9,7 @@ process GATK_REALIGNERTARGETCREATOR { input: tuple val(meta), path(input), path(index) - path path(fasta) + path(fasta) path(fai) path(dict) path(known_vcf) From b1edcc6e94124fc7f2473655268f6d72b12d7209 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Thu, 2 Jun 2022 20:26:09 +0200 Subject: [PATCH 055/114] Update modules/gatk/realignertargetcreator/main.nf --- modules/gatk/realignertargetcreator/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/gatk/realignertargetcreator/main.nf b/modules/gatk/realignertargetcreator/main.nf index 7454430c..96b8806c 100644 --- a/modules/gatk/realignertargetcreator/main.nf +++ b/modules/gatk/realignertargetcreator/main.nf @@ -29,7 +29,7 @@ process GATK_REALIGNERTARGETCREATOR { def avail_mem = 3 if (!task.memory) { - log.info '[GATK HaplotypeCaller] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + log.info '[GATK RealignerTargetCreator] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' } else { avail_mem = task.memory.giga } From 599b264280d55f76eebdfd9cb1c474c338fcb5d0 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Thu, 2 Jun 2022 20:27:16 +0200 Subject: [PATCH 056/114] Standaridse parameters --- modules/gatk/indelrealigner/main.nf | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/modules/gatk/indelrealigner/main.nf b/modules/gatk/indelrealigner/main.nf index 117a298b..22391879 100644 --- a/modules/gatk/indelrealigner/main.nf +++ b/modules/gatk/indelrealigner/main.nf @@ -8,9 +8,11 @@ process GATK_INDELREALIGNER { 'quay.io/biocontainers/gatk:3.5--hdfd78af_11' }" input: - tuple val(meta), path(bam), path(bai), path(intervals) - tuple val(meta), path(fasta) - tuple val(meta), path(known_vcf) + tuple val(meta), path(input), path(index), path(intervals) + path path(fasta) + path(fai) + path(dict) + path(known_vcf) output: tuple val(meta), path("*.bam"), emit: bam @@ -24,9 +26,17 @@ process GATK_INDELREALIGNER { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" def known = known_vcf ? "-known ${known_vcf}" : "" + + def avail_mem = 3 + if (!task.memory) { + log.info '[GATK IndelRealigner] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = task.memory.giga + } + """ gatk3 \\ - -T RealignerTargetCreator \\ + -T IndelRealigner \\ -R ${fasta} \\ -nt ${task.cpus} -I ${bam} \\ From c469a2b35ed2e2948c9d3cbde5672143086b7e35 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Thu, 2 Jun 2022 20:32:00 +0200 Subject: [PATCH 057/114] Fux tests --- modules/gatk/realignertargetcreator/main.nf | 6 +++--- .../gatk/realignertargetcreator/main.nf | 19 ++++++++----------- 2 files changed, 11 insertions(+), 14 deletions(-) diff --git a/modules/gatk/realignertargetcreator/main.nf b/modules/gatk/realignertargetcreator/main.nf index 866ff8a5..e356c826 100644 --- a/modules/gatk/realignertargetcreator/main.nf +++ b/modules/gatk/realignertargetcreator/main.nf @@ -9,7 +9,7 @@ process GATK_REALIGNERTARGETCREATOR { input: tuple val(meta), path(input), path(index) - path path(fasta) + path(fasta) path(fai) path(dict) path(known_vcf) @@ -25,7 +25,7 @@ process GATK_REALIGNERTARGETCREATOR { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" def known = known_vcf ? "-known ${known_vcf}" : "" - if ("$bam" == "${prefix}.bam") error "Input and output names are the same, set prefix in module configuration to disambiguate!" + if ("$input" == "${prefix}.bam") error "Input and output names are the same, set prefix in module configuration to disambiguate!" def avail_mem = 3 if (!task.memory) { @@ -39,7 +39,7 @@ process GATK_REALIGNERTARGETCREATOR { -Xmx${avail_mem}g \\ -T RealignerTargetCreator \\ -nt ${task.cpus} \\ - -I ${bam} \\ + -I ${input} \\ -R ${fasta} \\ -o ${prefix}.intervals \\ ${known} \\ diff --git a/tests/modules/gatk/realignertargetcreator/main.nf b/tests/modules/gatk/realignertargetcreator/main.nf index 02e62d93..4b9f8eff 100644 --- a/tests/modules/gatk/realignertargetcreator/main.nf +++ b/tests/modules/gatk/realignertargetcreator/main.nf @@ -6,16 +6,13 @@ include { GATK_REALIGNERTARGETCREATOR } from '../../../../modules/gatk/realigner workflow test_gatk_realignertargetcreator { - input = [ - [ id:'test', single_end:false ], // meta map - file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true) - ] - reference = [ - file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true), - file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true), - file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true), - ] + input = [ [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true), + ] + fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + fai = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true) + dict = file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true) - GATK_REALIGNERTARGETCREATOR ( input, reference, [] ) + GATK_REALIGNERTARGETCREATOR ( input, fasta, fai, dict, [] ) } From de88adc9269d3ae65d626baa164201037df8cac7 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Thu, 2 Jun 2022 20:35:34 +0200 Subject: [PATCH 058/114] Standardise parameters (no tests yet until realigntarget creator ready) --- modules/gatk/indelrealigner/main.nf | 9 +++++---- modules/gatk/indelrealigner/meta.yml | 12 ++++++++++-- 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/modules/gatk/indelrealigner/main.nf b/modules/gatk/indelrealigner/main.nf index 22391879..f98a99b7 100644 --- a/modules/gatk/indelrealigner/main.nf +++ b/modules/gatk/indelrealigner/main.nf @@ -9,20 +9,20 @@ process GATK_INDELREALIGNER { input: tuple val(meta), path(input), path(index), path(intervals) - path path(fasta) + path(fasta) path(fai) path(dict) path(known_vcf) output: - tuple val(meta), path("*.bam"), emit: bam + tuple val(meta), path("*.bam"), path("*.bai"), emit: bam path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when script: - if ("$bam" == "${prefix}.bam") error "Input and output names are the same, set prefix in module configuration to disambiguate!" + if ("$input" == "${prefix}.bam") error "Input and output names are the same, set prefix in module configuration to disambiguate!" def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" def known = known_vcf ? "-known ${known_vcf}" : "" @@ -36,10 +36,11 @@ process GATK_INDELREALIGNER { """ gatk3 \\ + -Xmx${avail_mem}g \\ -T IndelRealigner \\ -R ${fasta} \\ -nt ${task.cpus} - -I ${bam} \\ + -I ${input} \\ -targetIntervals ${intervals} \\ ${known} \\ -o ${prefix}.bam \\ diff --git a/modules/gatk/indelrealigner/meta.yml b/modules/gatk/indelrealigner/meta.yml index 80abc6aa..5abfbfb1 100644 --- a/modules/gatk/indelrealigner/meta.yml +++ b/modules/gatk/indelrealigner/meta.yml @@ -19,11 +19,11 @@ input: description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - bam: + - input: type: file description: Sorted and indexed BAM/CRAM/SAM file pattern: "*.bam" - - bai: + - index: type: file description: BAM index file pattern: "*.bai" @@ -35,6 +35,14 @@ input: type: file description: Reference file used to generate BAM file pattern: ".{fasta,fa,fna}" + - fai: + type: file + description: Index of reference file used to generate BAM file + pattern: ".fai" + - dict: + type: file + description: GATK dict file for reference + pattern: ".dict" - known_vcf: type: file description: Optional input VCF file(s) with known indels From 544b62df45d3043ca2c5ca3a131284fe0d74026b Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Thu, 2 Jun 2022 21:11:17 +0200 Subject: [PATCH 059/114] Star work on GATK UG --- modules/gatk/unifiedgenotyper/main.nf | 50 ++++++++++++++++++ modules/gatk/unifiedgenotyper/meta.yml | 51 +++++++++++++++++++ tests/config/pytest_modules.yml | 4 ++ tests/modules/gatk/unifiedgenotyper/main.nf | 15 ++++++ .../gatk/unifiedgenotyper/nextflow.config | 5 ++ tests/modules/gatk/unifiedgenotyper/test.yml | 14 +++++ 6 files changed, 139 insertions(+) create mode 100644 modules/gatk/unifiedgenotyper/main.nf create mode 100644 modules/gatk/unifiedgenotyper/meta.yml create mode 100644 tests/modules/gatk/unifiedgenotyper/main.nf create mode 100644 tests/modules/gatk/unifiedgenotyper/nextflow.config create mode 100644 tests/modules/gatk/unifiedgenotyper/test.yml diff --git a/modules/gatk/unifiedgenotyper/main.nf b/modules/gatk/unifiedgenotyper/main.nf new file mode 100644 index 00000000..4dd6e408 --- /dev/null +++ b/modules/gatk/unifiedgenotyper/main.nf @@ -0,0 +1,50 @@ +process GATK_UNIFIEDGENOTYPER { + tag "$meta.id" + label 'process_medium' + + conda (params.enable_conda ? "bioconda::gatk=3.5" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gatk:3.5--hdfd78af_11': + 'quay.io/biocontainers/gatk:3.5--hdfd78af_11' }" + + input: + tuple val(meta), path(input), path(index) + path(fasta) + path(fai) + path(dict) + path(known_vcf) + + output: + tuple val(meta), path("*.bam"), emit: bam + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + def avail_mem = 3 + if (!task.memory) { + log.info '[GATK RealignerTargetCreator] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = task.memory.giga + } + + """ + gatk3 \\ + -Xmx${avail_mem}g \\ + -nt ${task.cpus} \\ + -T UnifiedGenotyper \\ + -I ${input} \\ + -R ${fasta} \\ + -o ${prefix}.vcf \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//' )) + END_VERSIONS + """ +} diff --git a/modules/gatk/unifiedgenotyper/meta.yml b/modules/gatk/unifiedgenotyper/meta.yml new file mode 100644 index 00000000..7a992591 --- /dev/null +++ b/modules/gatk/unifiedgenotyper/meta.yml @@ -0,0 +1,51 @@ +name: "gatk_unifiedgenotyper" +## TODO nf-core: Add a description of the module and list keywords +description: write your description here +keywords: + - sort +tools: + - "gatk": + ## TODO nf-core: Add a description and other details for the software below + description: "The full Genome Analysis Toolkit (GATK) framework, license restricted." + homepage: "None" + documentation: "None" + tool_dev_url: "None" + doi: "" + licence: "['BSD', 'https://www.broadinstitute.org/gatk/about/#licensing', 'https://software.broadinstitute.org/gatk/download/licensing']" + +## TODO nf-core: Add a description of all of the variables used as input +input: + # Only when we have meta + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + # + ## TODO nf-core: Delete / customise this example input + - bam: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + +## TODO nf-core: Add a description of all of the variables used as output +output: + #Only when we have meta + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + # + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + ## TODO nf-core: Delete / customise this example output + - bam: + type: file + description: Sorted BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + +authors: + - "@jfy133" diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index d4bfac5b..0d3a21c2 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -715,6 +715,10 @@ gamma/gamma: - modules/gamma/gamma/** - tests/modules/gamma/gamma/** +gatk/unifiedgenotyper: + - modules/gatk/unifiedgenotyper/** + - tests/modules/gatk/unifiedgenotyper/** + gatk4/applybqsr: - modules/gatk4/applybqsr/** - tests/modules/gatk4/applybqsr/** diff --git a/tests/modules/gatk/unifiedgenotyper/main.nf b/tests/modules/gatk/unifiedgenotyper/main.nf new file mode 100644 index 00000000..86a23790 --- /dev/null +++ b/tests/modules/gatk/unifiedgenotyper/main.nf @@ -0,0 +1,15 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { GATK_UNIFIEDGENOTYPER } from '../../../../modules/gatk/unifiedgenotyper/main.nf' + +workflow test_gatk_unifiedgenotyper { + + input = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['sarscov2']['illumina']['test_paired_end_bam'], checkIfExists: true) + ] + + GATK_UNIFIEDGENOTYPER ( input ) +} diff --git a/tests/modules/gatk/unifiedgenotyper/nextflow.config b/tests/modules/gatk/unifiedgenotyper/nextflow.config new file mode 100644 index 00000000..50f50a7a --- /dev/null +++ b/tests/modules/gatk/unifiedgenotyper/nextflow.config @@ -0,0 +1,5 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + +} \ No newline at end of file diff --git a/tests/modules/gatk/unifiedgenotyper/test.yml b/tests/modules/gatk/unifiedgenotyper/test.yml new file mode 100644 index 00000000..aad9aec9 --- /dev/null +++ b/tests/modules/gatk/unifiedgenotyper/test.yml @@ -0,0 +1,14 @@ +## TODO nf-core: Please run the following command to build this file: +# nf-core modules create-test-yml gatk/unifiedgenotyper +- name: "gatk unifiedgenotyper" + command: nextflow run ./tests/modules/gatk/unifiedgenotyper -entry test_gatk_unifiedgenotyper -c ./tests/config/nextflow.config -c ./tests/modules/gatk/unifiedgenotyper/nextflow.config + tags: + - "gatk" + # + - "gatk/unifiedgenotyper" + # + files: + - path: "output/gatk/test.bam" + md5sum: e667c7caad0bc4b7ac383fd023c654fc + - path: output/gatk/versions.yml + md5sum: a01fe51bc4c6a3a6226fbf77b2c7cf3b From a14855dbe8648ca445759f57a1a2805ec4812615 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Thu, 2 Jun 2022 21:22:21 +0200 Subject: [PATCH 060/114] Start meta --- modules/gatk/unifiedgenotyper/main.nf | 16 ++++++- modules/gatk/unifiedgenotyper/meta.yml | 59 ++++++++++++++++---------- 2 files changed, 51 insertions(+), 24 deletions(-) diff --git a/modules/gatk/unifiedgenotyper/main.nf b/modules/gatk/unifiedgenotyper/main.nf index 4dd6e408..896a347e 100644 --- a/modules/gatk/unifiedgenotyper/main.nf +++ b/modules/gatk/unifiedgenotyper/main.nf @@ -13,9 +13,13 @@ process GATK_UNIFIEDGENOTYPER { path(fai) path(dict) path(known_vcf) + path(intervals) + path(contamination) + path(dbsnps) + path(comp) output: - tuple val(meta), path("*.bam"), emit: bam + tuple val(meta), path("*.vcf.gz"), emit: vcf path "versions.yml" , emit: versions when: @@ -24,6 +28,10 @@ process GATK_UNIFIEDGENOTYPER { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" + def contamination_file = contamination ? "-contaminationFile ${contamination}" : "" + def dbsnps_file = dbsnps ? "--dbsnp ${dbsnps}" : "" + def comp_file = comp ? "--comp ${comp}" : "" + def intervals_file = intervals ? "--intervals ${intervals}" : "" def avail_mem = 3 if (!task.memory) { @@ -39,9 +47,15 @@ process GATK_UNIFIEDGENOTYPER { -T UnifiedGenotyper \\ -I ${input} \\ -R ${fasta} \\ + ${contamination_file} \\ + ${dbsnps_file} \\ + ${comp_file} \\ + ${intervals_file} -o ${prefix}.vcf \\ $args + gzip -n *.vcf + cat <<-END_VERSIONS > versions.yml "${task.process}": gatk: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//' )) diff --git a/modules/gatk/unifiedgenotyper/meta.yml b/modules/gatk/unifiedgenotyper/meta.yml index 7a992591..5f85c451 100644 --- a/modules/gatk/unifiedgenotyper/meta.yml +++ b/modules/gatk/unifiedgenotyper/meta.yml @@ -1,51 +1,64 @@ name: "gatk_unifiedgenotyper" -## TODO nf-core: Add a description of the module and list keywords -description: write your description here keywords: - - sort + - bam + - vcf + - variant calling + - indel + - realignment + - targets tools: - "gatk": - ## TODO nf-core: Add a description and other details for the software below description: "The full Genome Analysis Toolkit (GATK) framework, license restricted." - homepage: "None" - documentation: "None" - tool_dev_url: "None" - doi: "" - licence: "['BSD', 'https://www.broadinstitute.org/gatk/about/#licensing', 'https://software.broadinstitute.org/gatk/download/licensing']" + homepage: "https://gatk.broadinstitute.org/hc/en-us" + documentation: "https://github.com/broadinstitute/gatk-docs" + licence: "['https://software.broadinstitute.org/gatk/download/licensing', 'BSD', 'https://www.broadinstitute.org/gatk/about/#licensing']" -## TODO nf-core: Add a description of all of the variables used as input input: - # Only when we have meta - meta: type: map description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - # - ## TODO nf-core: Delete / customise this example input - - bam: + - input: type: file - description: BAM/CRAM/SAM file - pattern: "*.{bam,cram,sam}" + description: Sorted and indexed BAM/CRAM/SAM file + pattern: "*.bam" + - index: + type: file + description: BAM index file + pattern: "*.bai" + - fasta: + type: file + description: Reference file used to generate BAM file + pattern: ".{fasta,fa,fna}" + - fai: + type: file + description: Index of reference file used to generate BAM file + pattern: ".fai" + - dict: + type: file + description: GATK dict file for reference + pattern: ".dict" + - known_vcf: + type: file + description: Optional input VCF file(s) with known indels + pattern: ".vcf" -## TODO nf-core: Add a description of all of the variables used as output output: - #Only when we have meta - meta: type: map description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - # - versions: type: file description: File containing software versions pattern: "versions.yml" - ## TODO nf-core: Delete / customise this example output - - bam: + - vcf: type: file - description: Sorted BAM/CRAM/SAM file - pattern: "*.{bam,cram,sam}" + description: VCF file containing called variants + pattern: "*.vcf.gz" authors: + - "@ilight1542" - "@jfy133" From a565d9072aba07b369c6e3c43f4168556d32678c Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Thu, 2 Jun 2022 21:39:39 +0200 Subject: [PATCH 061/114] Add GATK UnifiedGenotyper --- modules/gatk/unifiedgenotyper/main.nf | 13 ++++++------ modules/gatk/unifiedgenotyper/meta.yml | 21 ++++++++++++++------ tests/modules/gatk/unifiedgenotyper/main.nf | 15 ++++++++------ tests/modules/gatk/unifiedgenotyper/test.yml | 19 +++++++----------- 4 files changed, 37 insertions(+), 31 deletions(-) diff --git a/modules/gatk/unifiedgenotyper/main.nf b/modules/gatk/unifiedgenotyper/main.nf index 896a347e..47d83def 100644 --- a/modules/gatk/unifiedgenotyper/main.nf +++ b/modules/gatk/unifiedgenotyper/main.nf @@ -12,15 +12,14 @@ process GATK_UNIFIEDGENOTYPER { path(fasta) path(fai) path(dict) - path(known_vcf) path(intervals) path(contamination) - path(dbsnps) + path(dbsnp) path(comp) output: tuple val(meta), path("*.vcf.gz"), emit: vcf - path "versions.yml" , emit: versions + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -29,7 +28,7 @@ process GATK_UNIFIEDGENOTYPER { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" def contamination_file = contamination ? "-contaminationFile ${contamination}" : "" - def dbsnps_file = dbsnps ? "--dbsnp ${dbsnps}" : "" + def dbsnp_file = dbsnp ? "--dbsnp ${dbsnp}" : "" def comp_file = comp ? "--comp ${comp}" : "" def intervals_file = intervals ? "--intervals ${intervals}" : "" @@ -48,9 +47,9 @@ process GATK_UNIFIEDGENOTYPER { -I ${input} \\ -R ${fasta} \\ ${contamination_file} \\ - ${dbsnps_file} \\ + ${dbsnp_file} \\ ${comp_file} \\ - ${intervals_file} + ${intervals_file} \\ -o ${prefix}.vcf \\ $args @@ -58,7 +57,7 @@ process GATK_UNIFIEDGENOTYPER { cat <<-END_VERSIONS > versions.yml "${task.process}": - gatk: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//' )) + gatk: \$(echo \$(gatk3 --version)) END_VERSIONS """ } diff --git a/modules/gatk/unifiedgenotyper/meta.yml b/modules/gatk/unifiedgenotyper/meta.yml index 5f85c451..e414d146 100644 --- a/modules/gatk/unifiedgenotyper/meta.yml +++ b/modules/gatk/unifiedgenotyper/meta.yml @@ -3,9 +3,6 @@ keywords: - bam - vcf - variant calling - - indel - - realignment - - targets tools: - "gatk": description: "The full Genome Analysis Toolkit (GATK) framework, license restricted." @@ -39,10 +36,22 @@ input: type: file description: GATK dict file for reference pattern: ".dict" - - known_vcf: + - intervals: type: file - description: Optional input VCF file(s) with known indels - pattern: ".vcf" + description: Bed file with the genomic regions included in the library (optional) + pattern: "*.intervals" + - contamination: + type: file + description: Tab-separated file containing fraction of contamination in sequencing data (per sample) to aggressively remove + pattern: "*" + - dbsnps: + type: file + description: VCF file containing known sites (optional) + pattern: "*" + - comp: + type: file + description: Comparison VCF file (optional) + pattern: "*" output: - meta: diff --git a/tests/modules/gatk/unifiedgenotyper/main.nf b/tests/modules/gatk/unifiedgenotyper/main.nf index 86a23790..6d145e76 100644 --- a/tests/modules/gatk/unifiedgenotyper/main.nf +++ b/tests/modules/gatk/unifiedgenotyper/main.nf @@ -5,11 +5,14 @@ nextflow.enable.dsl = 2 include { GATK_UNIFIEDGENOTYPER } from '../../../../modules/gatk/unifiedgenotyper/main.nf' workflow test_gatk_unifiedgenotyper { - - input = [ - [ id:'test', single_end:false ], // meta map - file(params.test_data['sarscov2']['illumina']['test_paired_end_bam'], checkIfExists: true) - ] - GATK_UNIFIEDGENOTYPER ( input ) + input = [ [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true), + ] + fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + fai = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true) + dict = file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true) + + GATK_UNIFIEDGENOTYPER ( input, fasta, fai, dict, [], [], [], []) } diff --git a/tests/modules/gatk/unifiedgenotyper/test.yml b/tests/modules/gatk/unifiedgenotyper/test.yml index aad9aec9..498e7991 100644 --- a/tests/modules/gatk/unifiedgenotyper/test.yml +++ b/tests/modules/gatk/unifiedgenotyper/test.yml @@ -1,14 +1,9 @@ -## TODO nf-core: Please run the following command to build this file: -# nf-core modules create-test-yml gatk/unifiedgenotyper -- name: "gatk unifiedgenotyper" - command: nextflow run ./tests/modules/gatk/unifiedgenotyper -entry test_gatk_unifiedgenotyper -c ./tests/config/nextflow.config -c ./tests/modules/gatk/unifiedgenotyper/nextflow.config +- name: gatk unifiedgenotyper test_gatk_unifiedgenotyper + command: nextflow run ./tests/modules/gatk/unifiedgenotyper -entry test_gatk_unifiedgenotyper -c ./tests/config/nextflow.config -c ./tests/modules/gatk/unifiedgenotyper/nextflow.config tags: - - "gatk" - # - - "gatk/unifiedgenotyper" - # + - gatk + - gatk/unifiedgenotyper files: - - path: "output/gatk/test.bam" - md5sum: e667c7caad0bc4b7ac383fd023c654fc - - path: output/gatk/versions.yml - md5sum: a01fe51bc4c6a3a6226fbf77b2c7cf3b + - path: output/gatk/test.vcf.gz + contains: + - "#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT test" From 2c9e2f5e73e5a802a6cfca6d72e51f297a372fb1 Mon Sep 17 00:00:00 2001 From: Rike Date: Fri, 3 Jun 2022 10:52:50 +0200 Subject: [PATCH 062/114] Update TIDDIT to 3.0.0 --- modules/tiddit/sv/main.nf | 13 ++++++++----- tests/modules/tiddit/sv/main.nf | 27 +++++++++++++++++++++------ 2 files changed, 29 insertions(+), 11 deletions(-) diff --git a/modules/tiddit/sv/main.nf b/modules/tiddit/sv/main.nf index b3e3813c..cff3dbfb 100644 --- a/modules/tiddit/sv/main.nf +++ b/modules/tiddit/sv/main.nf @@ -2,15 +2,16 @@ process TIDDIT_SV { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::tiddit=2.12.1" : null) + conda (params.enable_conda ? "bioconda::tiddit=3.0.0" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/tiddit:2.12.1--py38h1773678_0' : - 'quay.io/biocontainers/tiddit:2.12.1--py38h1773678_0' }" + 'https://depot.galaxyproject.org/singularity/tiddit:3.0.0--py39h59fae87_1' : + 'quay.io/biocontainers/tiddit:3.0.0--py39h59fae87_1' }" input: - tuple val(meta), path(bam) + tuple val(meta), path(input) path fasta path fai + path bwa_index output: tuple val(meta), path("*.vcf") , emit: vcf @@ -26,10 +27,12 @@ process TIDDIT_SV { def prefix = task.ext.prefix ?: "${meta.id}" def reference = fasta ? "--ref $fasta" : "" """ + INDEX=`find -L ./ -name "*.amb" | sed 's/.amb//'` + tiddit \\ --sv \\ $args \\ - --bam $bam \\ + --bam $input \\ $reference \\ -o $prefix diff --git a/tests/modules/tiddit/sv/main.nf b/tests/modules/tiddit/sv/main.nf index 8dae4950..72113427 100644 --- a/tests/modules/tiddit/sv/main.nf +++ b/tests/modules/tiddit/sv/main.nf @@ -2,25 +2,40 @@ nextflow.enable.dsl = 2 +include { BWA_INDEX } from '../../../../modules/bwa/index/main.nf' include { TIDDIT_SV } from '../../../../modules/tiddit/sv/main.nf' workflow test_tiddit_sv { - input = [ + input = [ [ id:'test' ], // meta map - [ file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) ] + [ file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) ] ] - + fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) fai = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true) - TIDDIT_SV ( input, fasta, fai ) + BWA_INDEX( fasta ) + + TIDDIT_SV ( input, fasta, fai , BWA_INDEX.out.index) } workflow test_tiddit_sv_no_ref { - input = [ + input = [ [ id:'test' ], // meta map - [ file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) ] + [ file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) ] ] TIDDIT_SV ( input, [], [] ) } + +workflow test_tiddit_sv_cram { + input = [ + [ id:'test' ], // meta map + [ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true) ] + ] + + fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) + + TIDDIT_SV ( input, fasta, fai ) +} From 40996cdbe1874f9c2ba693fee76d2b4f316f95f2 Mon Sep 17 00:00:00 2001 From: MaxUlysse Date: Fri, 3 Jun 2022 12:07:16 +0200 Subject: [PATCH 063/114] update tiddit/cov + more tests --- modules/tiddit/cov/main.nf | 16 +++++------ modules/tiddit/cov/meta.yml | 2 +- tests/modules/tiddit/cov/main.nf | 35 ++++++++++++++++++------ tests/modules/tiddit/cov/nextflow.config | 4 +++ tests/modules/tiddit/cov/test.yml | 34 +++++++++++++++++------ 5 files changed, 66 insertions(+), 25 deletions(-) diff --git a/modules/tiddit/cov/main.nf b/modules/tiddit/cov/main.nf index 578c4043..4d7e2827 100644 --- a/modules/tiddit/cov/main.nf +++ b/modules/tiddit/cov/main.nf @@ -2,17 +2,17 @@ process TIDDIT_COV { tag "$meta.id" label 'process_low' - conda (params.enable_conda ? "bioconda::tiddit=2.12.1" : null) + conda (params.enable_conda ? "bioconda::tiddit=3.0.0" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/tiddit:2.12.1--py38h1773678_0' : - 'quay.io/biocontainers/tiddit:2.12.1--py38h1773678_0' }" + 'https://depot.galaxyproject.org/singularity/tiddit:3.0.0--py39h59fae87_1' : + 'quay.io/biocontainers/tiddit:3.0.0--py39h59fae87_1' }" input: - tuple val(meta), path(bam) + tuple val(meta), path(input) path fasta output: - tuple val(meta), path("*.tab"), optional: true, emit: cov + tuple val(meta), path("*.bed"), optional: true, emit: cov tuple val(meta), path("*.wig"), optional: true, emit: wig path "versions.yml" , emit: versions @@ -28,12 +28,12 @@ process TIDDIT_COV { --cov \\ -o $prefix \\ $args \\ - --bam $bam \\ + --bam $input \\ $reference cat <<-END_VERSIONS > versions.yml "${task.process}": - tiddit: \$(echo \$(tiddit 2>&1) | sed 's/^.*TIDDIT-//; s/ .*\$//') + tiddit: \$(echo \$(tiddit 2>&1) | sed 's/^.*tiddit-//; s/ .*\$//') END_VERSIONS """ @@ -45,7 +45,7 @@ process TIDDIT_COV { cat <<-END_VERSIONS > versions.yml "${task.process}": - tiddit: \$(echo \$(tiddit 2>&1) | sed 's/^.*TIDDIT-//; s/ .*\$//') + tiddit: \$(echo \$(tiddit 2>&1) | sed 's/^.*tiddit-//; s/ .*\$//') END_VERSIONS """ } diff --git a/modules/tiddit/cov/meta.yml b/modules/tiddit/cov/meta.yml index d925b783..98ea27c2 100644 --- a/modules/tiddit/cov/meta.yml +++ b/modules/tiddit/cov/meta.yml @@ -19,7 +19,7 @@ input: description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - bam: + - input: type: file description: BAM/CRAM file pattern: "*.{bam,cram}" diff --git a/tests/modules/tiddit/cov/main.nf b/tests/modules/tiddit/cov/main.nf index 1bb35145..0032c759 100644 --- a/tests/modules/tiddit/cov/main.nf +++ b/tests/modules/tiddit/cov/main.nf @@ -2,22 +2,41 @@ nextflow.enable.dsl = 2 -include { TIDDIT_COV } from '../../../../modules/tiddit/cov/main.nf' +include { TIDDIT_COV as TIDDIT_COV_BED } from '../../../../modules/tiddit/cov/main.nf' +include { TIDDIT_COV as TIDDIT_COV_WIG } from '../../../../modules/tiddit/cov/main.nf' -workflow test_tiddit_cov { +workflow test_tiddit_cov_cram_bed { input = [ [ id:'test', single_end:false ], // meta map - file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) ] + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true) ] - fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) - TIDDIT_COV ( input, fasta ) + TIDDIT_COV_BED ( input, fasta ) } -workflow test_tiddit_cov_no_ref { +workflow test_tiddit_cov_bam_bed { input = [ [ id:'test', single_end:false ], // meta map - file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) ] + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) ] - TIDDIT_COV ( input, [] ) + TIDDIT_COV_BED ( input, [] ) +} + +workflow test_tiddit_cov_cram_wig { + + input = [ [ id:'test', single_end:false ], // meta map + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true) ] + + fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + + TIDDIT_COV_WIG ( input, fasta ) +} + +workflow test_tiddit_cov_bam_wig { + + input = [ [ id:'test', single_end:false ], // meta map + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) ] + + TIDDIT_COV_WIG ( input, [] ) } diff --git a/tests/modules/tiddit/cov/nextflow.config b/tests/modules/tiddit/cov/nextflow.config index 8730f1c4..e185a5cf 100644 --- a/tests/modules/tiddit/cov/nextflow.config +++ b/tests/modules/tiddit/cov/nextflow.config @@ -2,4 +2,8 @@ process { publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + withName: TIDDIT_COV_WIG { + ext.args = '-w' + } + } diff --git a/tests/modules/tiddit/cov/test.yml b/tests/modules/tiddit/cov/test.yml index 90c4cbb3..916cc9e3 100644 --- a/tests/modules/tiddit/cov/test.yml +++ b/tests/modules/tiddit/cov/test.yml @@ -1,17 +1,35 @@ -- name: tiddit cov test_tiddit_cov - command: nextflow run ./tests/modules/tiddit/cov -entry test_tiddit_cov -c ./tests/config/nextflow.config -c ./tests/modules/tiddit/cov/nextflow.config +- name: tiddit cov test_tiddit_cov_cram_bed + command: nextflow run ./tests/modules/tiddit/cov -entry test_tiddit_cov_cram_bed -c ./tests/config/nextflow.config -c ./tests/modules/tiddit/cov/nextflow.config tags: - tiddit - tiddit/cov files: - - path: output/tiddit/test.tab - md5sum: f7974948f809f94879d8a60b726194f5 + - path: output/tiddit/test.bed + md5sum: 3b1a28c62a5f25bbba77c1042e9abdf7 -- name: tiddit cov test_tiddit_cov_no_ref - command: nextflow run ./tests/modules/tiddit/cov -entry test_tiddit_cov_no_ref -c ./tests/config/nextflow.config -c ./tests/modules/tiddit/cov/nextflow.config +- name: tiddit cov test_tiddit_cov_bam_bed + command: nextflow run ./tests/modules/tiddit/cov -entry test_tiddit_cov_bam_bed -c ./tests/config/nextflow.config -c ./tests/modules/tiddit/cov/nextflow.config tags: - tiddit - tiddit/cov files: - - path: output/tiddit/test.tab - md5sum: f7974948f809f94879d8a60b726194f5 + - path: output/tiddit/test.bed + md5sum: 9d1474f1c7c6516205254077087bb026 + +- name: tiddit cov test_tiddit_cov_cram_wig + command: nextflow run ./tests/modules/tiddit/cov -entry test_tiddit_cov_cram_wig -c ./tests/config/nextflow.config -c ./tests/modules/tiddit/cov/nextflow.config + tags: + - tiddit + - tiddit/cov + files: + - path: output/tiddit/test.wig + md5sum: ca3645fd0c3491c86c075c91d16d57c4 + +- name: tiddit cov test_tiddit_cov_bam_wig + command: nextflow run ./tests/modules/tiddit/cov -entry test_tiddit_cov_bam_wig -c ./tests/config/nextflow.config -c ./tests/modules/tiddit/cov/nextflow.config + tags: + - tiddit + - tiddit/cov + files: + - path: output/tiddit/test.wig + md5sum: 44bea2ac6a56774738e65773065da670 From f1a5cc07dbcf173654c793ac4eb63e3fb83281ab Mon Sep 17 00:00:00 2001 From: MaxUlysse Date: Fri, 3 Jun 2022 13:00:29 +0200 Subject: [PATCH 064/114] restructure tiddit/sv + test (just one for now) --- modules/tiddit/sv/main.nf | 19 +++++++++++-------- tests/modules/tiddit/sv/main.nf | 26 +++----------------------- tests/modules/tiddit/sv/test.yml | 25 +++++-------------------- 3 files changed, 19 insertions(+), 51 deletions(-) diff --git a/modules/tiddit/sv/main.nf b/modules/tiddit/sv/main.nf index cff3dbfb..d0d5362e 100644 --- a/modules/tiddit/sv/main.nf +++ b/modules/tiddit/sv/main.nf @@ -8,16 +8,15 @@ process TIDDIT_SV { 'quay.io/biocontainers/tiddit:3.0.0--py39h59fae87_1' }" input: - tuple val(meta), path(input) + tuple val(meta), path(input), path(index) path fasta path fai path bwa_index output: - tuple val(meta), path("*.vcf") , emit: vcf - tuple val(meta), path("*.ploidy.tab") , emit: ploidy - tuple val(meta), path("*.signals.tab"), emit: signals - path "versions.yml" , emit: versions + tuple val(meta), path("*.vcf") , emit: vcf + tuple val(meta), path("*.ploidies.tab"), emit: ploidy + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -27,7 +26,11 @@ process TIDDIT_SV { def prefix = task.ext.prefix ?: "${meta.id}" def reference = fasta ? "--ref $fasta" : "" """ - INDEX=`find -L ./ -name "*.amb" | sed 's/.amb//'` + for i in `ls ${bwa_index}` + do + ln -s ${bwa_index}/\$i ${fasta}.\${i##*.} + done + tiddit \\ --sv \\ @@ -38,7 +41,7 @@ process TIDDIT_SV { cat <<-END_VERSIONS > versions.yml "${task.process}": - tiddit: \$(echo \$(tiddit 2>&1) | sed 's/^.*TIDDIT-//; s/ .*\$//') + tiddit: \$(echo \$(tiddit 2>&1) | sed 's/^.*tiddit-//; s/ .*\$//') END_VERSIONS """ @@ -51,7 +54,7 @@ process TIDDIT_SV { cat <<-END_VERSIONS > versions.yml "${task.process}": - tiddit: \$(echo \$(tiddit 2>&1) | sed 's/^.*TIDDIT-//; s/ .*\$//') + tiddit: \$(echo \$(tiddit 2>&1) | sed 's/^.*tiddit-//; s/ .*\$//') END_VERSIONS """ } diff --git a/tests/modules/tiddit/sv/main.nf b/tests/modules/tiddit/sv/main.nf index 72113427..2c09404e 100644 --- a/tests/modules/tiddit/sv/main.nf +++ b/tests/modules/tiddit/sv/main.nf @@ -5,10 +5,11 @@ nextflow.enable.dsl = 2 include { BWA_INDEX } from '../../../../modules/bwa/index/main.nf' include { TIDDIT_SV } from '../../../../modules/tiddit/sv/main.nf' -workflow test_tiddit_sv { +workflow test_tiddit_sv_bam { input = [ [ id:'test' ], // meta map - [ file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) ] + [ file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) ], + [ file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true) ] ] fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) @@ -18,24 +19,3 @@ workflow test_tiddit_sv { TIDDIT_SV ( input, fasta, fai , BWA_INDEX.out.index) } - -workflow test_tiddit_sv_no_ref { - input = [ - [ id:'test' ], // meta map - [ file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) ] - ] - - TIDDIT_SV ( input, [], [] ) -} - -workflow test_tiddit_sv_cram { - input = [ - [ id:'test' ], // meta map - [ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true) ] - ] - - fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) - fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) - - TIDDIT_SV ( input, fasta, fai ) -} diff --git a/tests/modules/tiddit/sv/test.yml b/tests/modules/tiddit/sv/test.yml index 40ea5b4d..732b8434 100644 --- a/tests/modules/tiddit/sv/test.yml +++ b/tests/modules/tiddit/sv/test.yml @@ -1,25 +1,10 @@ -- name: tiddit sv - command: nextflow run ./tests/modules/tiddit/sv -entry test_tiddit_sv -c ./tests/config/nextflow.config -c ./tests/modules/tiddit/sv/nextflow.config +- name: tiddit sv test_tiddit_sv_bam + command: nextflow run ./tests/modules/tiddit/sv -entry test_tiddit_sv_bam -c ./tests/config/nextflow.config -c ./tests/modules/tiddit/sv/nextflow.config tags: - tiddit - tiddit/sv files: - - path: output/tiddit/test.ploidy.tab - md5sum: 45e050b0e204f0a5a3a99627cc440eaa - - path: output/tiddit/test.signals.tab - md5sum: dab4b2fec4ddf8eb1c23005b0770150e + - path: output/tiddit/test.ploidies.tab + md5sum: 6319d3611f7b6b94425a184d274b3dfc - path: output/tiddit/test.vcf - md5sum: bdce14ae8292bf3deb81f6f255baf859 - -- name: tiddit sv no ref - command: nextflow run ./tests/modules/tiddit/sv -entry test_tiddit_sv_no_ref -c ./tests/config/nextflow.config -c ./tests/modules/tiddit/sv/nextflow.config - tags: - - tiddit - - tiddit/sv - files: - - path: output/tiddit/test.ploidy.tab - md5sum: 45e050b0e204f0a5a3a99627cc440eaa - - path: output/tiddit/test.signals.tab - md5sum: dab4b2fec4ddf8eb1c23005b0770150e - - path: output/tiddit/test.vcf - md5sum: 3d0e83a8199b2bdb81cfe3e6b12bf64b + md5sum: 41d3f8746f0420f894104321b7e64f67 From 230224700edcdf3a8c2d241cafc974e4724e48f4 Mon Sep 17 00:00:00 2001 From: SusiJo Date: Fri, 3 Jun 2022 12:45:52 +0200 Subject: [PATCH 065/114] change prefix --- modules/gatk4/markduplicatesspark/main.nf | 2 +- tests/modules/gatk4/markduplicatesspark/main.nf | 14 ++++++++++++++ .../gatk4/markduplicatesspark/nextflow.config | 8 ++++++++ 3 files changed, 23 insertions(+), 1 deletion(-) diff --git a/modules/gatk4/markduplicatesspark/main.nf b/modules/gatk4/markduplicatesspark/main.nf index 703623d5..55049fba 100644 --- a/modules/gatk4/markduplicatesspark/main.nf +++ b/modules/gatk4/markduplicatesspark/main.nf @@ -23,7 +23,7 @@ process GATK4_MARKDUPLICATES_SPARK { script: def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}.bam" + def prefix = task.ext.prefix ?: "${meta.id}" def input_list = bam.collect{"--input $it"}.join(' ') diff --git a/tests/modules/gatk4/markduplicatesspark/main.nf b/tests/modules/gatk4/markduplicatesspark/main.nf index ac89b1b3..004fbb1e 100644 --- a/tests/modules/gatk4/markduplicatesspark/main.nf +++ b/tests/modules/gatk4/markduplicatesspark/main.nf @@ -3,6 +3,7 @@ nextflow.enable.dsl = 2 include { GATK4_MARKDUPLICATES_SPARK } from '../../../../modules/gatk4/markduplicatesspark/main.nf' +include { GATK4_MARKDUPLICATES_SPARK as GATK4_MARKDUPLICATES_SPARK_CRAM } from '../../../../modules/gatk4/markduplicatesspark/main.nf' include { GATK4_MARKDUPLICATES_SPARK as GATK4_MARKDUPLICATES_SPARK_METRICS } from '../../../../modules/gatk4/markduplicatesspark/main.nf' workflow test_gatk4_markduplicates_spark { @@ -29,6 +30,19 @@ workflow test_gatk4_markduplicates_spark_multiple_bams { GATK4_MARKDUPLICATES_SPARK ( input, fasta, fai, dict ) } +// chr 22 +workflow test_gatk4_markduplicates_spark_multiple_bams_cram_out { + input = [ [ id:'test', single_end:false ], // meta map + [ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_name_sorted_bam'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_name_sorted_bam'], checkIfExists: true) + ] ] + fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) + dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) + + GATK4_MARKDUPLICATES_SPARK_CRAM ( input, fasta, fai, dict ) +} + // chr 22 workflow test_gatk4_markduplicates_spark_multiple_bams_metrics { input = [ [ id:'test', single_end:false ], // meta map diff --git a/tests/modules/gatk4/markduplicatesspark/nextflow.config b/tests/modules/gatk4/markduplicatesspark/nextflow.config index 495a42f3..565016b7 100644 --- a/tests/modules/gatk4/markduplicatesspark/nextflow.config +++ b/tests/modules/gatk4/markduplicatesspark/nextflow.config @@ -2,9 +2,17 @@ process { publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + withName: GATK4_MARKDUPLICATES_SPARK { + ext.prefix = { "${meta.id}.bam" } + } + withName: GATK4_MARKDUPLICATES_SPARK_CRAM { + ext.prefix = { "${meta.id}.cram" } + } withName: GATK4_MARKDUPLICATES_SPARK_METRICS { + ext.prefix = { "${meta.id}.bam" } ext.args = '--metrics-file test.metrics' } + } // override tests/config/nextflow.config docker.userEmulation = false From e2bb56608c6934f5228e38a157eae3f8fc58742f Mon Sep 17 00:00:00 2001 From: SusiJo Date: Fri, 3 Jun 2022 13:50:45 +0200 Subject: [PATCH 066/114] change prefix output --- modules/gatk4/markduplicatesspark/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/gatk4/markduplicatesspark/main.nf b/modules/gatk4/markduplicatesspark/main.nf index 55049fba..79e12814 100644 --- a/modules/gatk4/markduplicatesspark/main.nf +++ b/modules/gatk4/markduplicatesspark/main.nf @@ -14,7 +14,7 @@ process GATK4_MARKDUPLICATES_SPARK { path dict output: - tuple val(meta), path("*.bam"), emit: bam + tuple val(meta), path("${prefix}"), emit: output tuple val(meta), path("*.metrics"), emit: metrics, optional: true path "versions.yml" , emit: versions From f9ac4e28e99d3fdd499d15bfb82ec529f35aacb4 Mon Sep 17 00:00:00 2001 From: MaxUlysse Date: Fri, 3 Jun 2022 15:37:10 +0200 Subject: [PATCH 067/114] cover all use cases --- modules/tiddit/sv/main.nf | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/modules/tiddit/sv/main.nf b/modules/tiddit/sv/main.nf index d0d5362e..712f364b 100644 --- a/modules/tiddit/sv/main.nf +++ b/modules/tiddit/sv/main.nf @@ -26,11 +26,7 @@ process TIDDIT_SV { def prefix = task.ext.prefix ?: "${meta.id}" def reference = fasta ? "--ref $fasta" : "" """ - for i in `ls ${bwa_index}` - do - ln -s ${bwa_index}/\$i ${fasta}.\${i##*.} - done - + [[ -d $bwa_index ]] && for i in `ls $bwa_index`; do [[ -f $fasta && ! "\$i" =~ .*"$fasta".* ]] && ln -s $bwa_index/\$i ${fasta}.\${i##*.} || ln -s $bwa_index/\$i \$i; done tiddit \\ --sv \\ From 97e66015caec35f7fd6d095b9fc797549fee9efc Mon Sep 17 00:00:00 2001 From: MaxUlysse Date: Fri, 3 Jun 2022 15:51:02 +0200 Subject: [PATCH 068/114] add test + fix issues --- modules/tiddit/sv/main.nf | 7 ++----- modules/tiddit/sv/meta.yml | 20 ++++++++++++-------- tests/modules/tiddit/sv/main.nf | 17 +++++++++++++++-- tests/modules/tiddit/sv/test.yml | 11 +++++++++++ 4 files changed, 40 insertions(+), 15 deletions(-) diff --git a/modules/tiddit/sv/main.nf b/modules/tiddit/sv/main.nf index 712f364b..ee2a8d75 100644 --- a/modules/tiddit/sv/main.nf +++ b/modules/tiddit/sv/main.nf @@ -10,7 +10,6 @@ process TIDDIT_SV { input: tuple val(meta), path(input), path(index) path fasta - path fai path bwa_index output: @@ -24,7 +23,6 @@ process TIDDIT_SV { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def reference = fasta ? "--ref $fasta" : "" """ [[ -d $bwa_index ]] && for i in `ls $bwa_index`; do [[ -f $fasta && ! "\$i" =~ .*"$fasta".* ]] && ln -s $bwa_index/\$i ${fasta}.\${i##*.} || ln -s $bwa_index/\$i \$i; done @@ -32,7 +30,7 @@ process TIDDIT_SV { --sv \\ $args \\ --bam $input \\ - $reference \\ + --ref $fasta \\ -o $prefix cat <<-END_VERSIONS > versions.yml @@ -45,8 +43,7 @@ process TIDDIT_SV { def prefix = task.ext.prefix ?: "${meta.id}" """ touch ${prefix}.vcf - touch ${prefix}.ploidy.tab - touch ${prefix}.signals.tab + touch ${prefix}.ploidies.tab cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/tiddit/sv/meta.yml b/modules/tiddit/sv/meta.yml index fc307081..8b41c69c 100644 --- a/modules/tiddit/sv/meta.yml +++ b/modules/tiddit/sv/meta.yml @@ -17,14 +17,22 @@ input: description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] + - input: + type: file + description: BAM/CRAM file + pattern: "*.{bam,cram}" + - index: + type: file + description: BAM/CRAM index file + pattern: "*.{bai,crai}" - fasta: type: file description: Input FASTA file pattern: "*.{fasta,fa}" - - fai: + - bwa_index: type: file - description: FASTA index file - pattern: "*.{fai}" + description: BWA genome index files + pattern: "Directory containing BWA index *.{amb,ann,bwt,pac,sa}" output: - meta: type: map @@ -38,11 +46,7 @@ output: - ploidy: type: file description: tab - pattern: "*.{ploidy.tab}" - - signals: - type: file - description: tab - pattern: "*.{signals.tab}" + pattern: "*.{ploidies.tab}" - versions: type: file description: File containing software versions diff --git a/tests/modules/tiddit/sv/main.nf b/tests/modules/tiddit/sv/main.nf index 2c09404e..c9dbf4ea 100644 --- a/tests/modules/tiddit/sv/main.nf +++ b/tests/modules/tiddit/sv/main.nf @@ -13,9 +13,22 @@ workflow test_tiddit_sv_bam { ] fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) - fai = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true) BWA_INDEX( fasta ) - TIDDIT_SV ( input, fasta, fai , BWA_INDEX.out.index) + TIDDIT_SV ( input, fasta, BWA_INDEX.out.index) +} + +workflow test_tiddit_sv_cram { + input = [ + [ id:'test' ], // meta map + [ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true) ], + [ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram_crai'], checkIfExists: true) ] + ] + + fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + + BWA_INDEX( fasta ) + + TIDDIT_SV ( input, fasta, BWA_INDEX.out.index) } diff --git a/tests/modules/tiddit/sv/test.yml b/tests/modules/tiddit/sv/test.yml index 732b8434..557a0913 100644 --- a/tests/modules/tiddit/sv/test.yml +++ b/tests/modules/tiddit/sv/test.yml @@ -8,3 +8,14 @@ md5sum: 6319d3611f7b6b94425a184d274b3dfc - path: output/tiddit/test.vcf md5sum: 41d3f8746f0420f894104321b7e64f67 + +- name: tiddit sv test_tiddit_sv_cram + command: nextflow run ./tests/modules/tiddit/sv -entry test_tiddit_sv_cram -c ./tests/config/nextflow.config -c ./tests/modules/tiddit/sv/nextflow.config + tags: + - tiddit + - tiddit/sv + files: + - path: output/tiddit/test.ploidies.tab + md5sum: f1162a940ddc8b963f6e0e506bb5c136 + - path: output/tiddit/test.vcf + md5sum: 34db59578991285d6b62dc1500272fca From d7cec801b244d8aa0a98aa94934b191ed5720f92 Mon Sep 17 00:00:00 2001 From: "Robert A. Petit III" Date: Fri, 3 Jun 2022 13:05:43 -0600 Subject: [PATCH 069/114] Update main.nf --- modules/snippy/run/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/snippy/run/main.nf b/modules/snippy/run/main.nf index 73c4b94d..5ecdbea8 100644 --- a/modules/snippy/run/main.nf +++ b/modules/snippy/run/main.nf @@ -4,7 +4,7 @@ process SNIPPY_RUN { conda (params.enable_conda ? "bioconda::snippy=4.6.0" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/snippy:4.6.0--hdfd78af_2 ': + 'https://depot.galaxyproject.org/singularity/snippy:4.6.0--hdfd78af_2' : 'quay.io/biocontainers/snippy:4.6.0--hdfd78af_2' }" input: From e827a042ce1f55a37a8e771944b566faa5108e8e Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Fri, 3 Jun 2022 21:24:03 +0200 Subject: [PATCH 070/114] Update modules/gatk/unifiedgenotyper/main.nf Co-authored-by: Maxime U. Garcia --- modules/gatk/unifiedgenotyper/main.nf | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/modules/gatk/unifiedgenotyper/main.nf b/modules/gatk/unifiedgenotyper/main.nf index 47d83def..e88ac7c5 100644 --- a/modules/gatk/unifiedgenotyper/main.nf +++ b/modules/gatk/unifiedgenotyper/main.nf @@ -9,13 +9,13 @@ process GATK_UNIFIEDGENOTYPER { input: tuple val(meta), path(input), path(index) - path(fasta) - path(fai) - path(dict) - path(intervals) - path(contamination) - path(dbsnp) - path(comp) + path fasta + path fai + path dict + path intervals + path contamination + path dbsnp + path comp output: tuple val(meta), path("*.vcf.gz"), emit: vcf From 0d087b4890e080394c3c0fd5971d4e3364841380 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Fri, 3 Jun 2022 21:33:54 +0200 Subject: [PATCH 071/114] Update modules/gatk/realignertargetcreator/main.nf --- modules/gatk/realignertargetcreator/main.nf | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/modules/gatk/realignertargetcreator/main.nf b/modules/gatk/realignertargetcreator/main.nf index 5d2da4b3..ba6a2592 100644 --- a/modules/gatk/realignertargetcreator/main.nf +++ b/modules/gatk/realignertargetcreator/main.nf @@ -9,10 +9,10 @@ process GATK_REALIGNERTARGETCREATOR { input: tuple val(meta), path(input), path(index) - path(fasta) - path(fai) - path(dict) - path(known_vcf) + path fasta + path fai + path dict + path known_vcf output: tuple val(meta), path("*.intervals"), emit: intervals From 771425e7a050acf0e1be41d9e6fc800f675c3e58 Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Tue, 7 Jun 2022 12:11:18 +0200 Subject: [PATCH 072/114] add threads to calibratedragstrmodel --- modules/gatk4/calibratedragstrmodel/main.nf | 1 + tests/modules/gatk4/calibratedragstrmodel/test.yml | 12 ++++++------ 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/modules/gatk4/calibratedragstrmodel/main.nf b/modules/gatk4/calibratedragstrmodel/main.nf index 08ba7008..b4e47edc 100644 --- a/modules/gatk4/calibratedragstrmodel/main.nf +++ b/modules/gatk4/calibratedragstrmodel/main.nf @@ -38,6 +38,7 @@ process GATK4_CALIBRATEDRAGSTRMODEL { --output ${prefix}.txt \\ --reference $fasta \\ --str-table-path $strtablefile \\ + --threads $task.cpus \\ $intervals_command \\ --tmp-dir . \\ $args diff --git a/tests/modules/gatk4/calibratedragstrmodel/test.yml b/tests/modules/gatk4/calibratedragstrmodel/test.yml index 53c428a9..285d04ca 100644 --- a/tests/modules/gatk4/calibratedragstrmodel/test.yml +++ b/tests/modules/gatk4/calibratedragstrmodel/test.yml @@ -1,26 +1,26 @@ - name: gatk4 calibratedragstrmodel test_gatk4_calibratedragstrmodel_bam command: nextflow run ./tests/modules/gatk4/calibratedragstrmodel -entry test_gatk4_calibratedragstrmodel_bam -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/calibratedragstrmodel/nextflow.config tags: - - gatk4/calibratedragstrmodel - gatk4 + - gatk4/calibratedragstrmodel files: - path: output/gatk4/test.txt - md5sum: 0a1a1583b157fa2251dd931ed165da4f + md5sum: e16fa32906c74bb18b93e98a86718ff1 - name: gatk4 calibratedragstrmodel test_gatk4_calibratedragstrmodel_cram command: nextflow run ./tests/modules/gatk4/calibratedragstrmodel -entry test_gatk4_calibratedragstrmodel_cram -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/calibratedragstrmodel/nextflow.config tags: - - gatk4/calibratedragstrmodel - gatk4 + - gatk4/calibratedragstrmodel files: - path: output/gatk4/test.txt - md5sum: 1aa7ab38023f724877b3323c5e6b9a4e + md5sum: 81c7bf338886cb4d5c2cc07fc56afe44 - name: gatk4 calibratedragstrmodel test_gatk4_calibratedragstrmodel_beds command: nextflow run ./tests/modules/gatk4/calibratedragstrmodel -entry test_gatk4_calibratedragstrmodel_beds -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/calibratedragstrmodel/nextflow.config tags: - - gatk4/calibratedragstrmodel - gatk4 + - gatk4/calibratedragstrmodel files: - path: output/gatk4/test.txt - md5sum: def8baccad7bd59006f08fcb0a6721bf + md5sum: cb6a9acdee042302b54fd1f59b5f54ee From 9d6697bdead652a8e3caf4b382b8dc7e7f2ce1d6 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Tue, 7 Jun 2022 12:14:40 +0200 Subject: [PATCH 073/114] template --- modules/rhocall/annotate/main.nf | 75 +++++++++++++++++++ modules/rhocall/annotate/meta.yml | 51 +++++++++++++ tests/config/pytest_modules.yml | 4 + tests/modules/rhocall/annotate/main.nf | 15 ++++ .../modules/rhocall/annotate/nextflow.config | 5 ++ tests/modules/rhocall/annotate/test.yml | 14 ++++ 6 files changed, 164 insertions(+) create mode 100644 modules/rhocall/annotate/main.nf create mode 100644 modules/rhocall/annotate/meta.yml create mode 100644 tests/modules/rhocall/annotate/main.nf create mode 100644 tests/modules/rhocall/annotate/nextflow.config create mode 100644 tests/modules/rhocall/annotate/test.yml diff --git a/modules/rhocall/annotate/main.nf b/modules/rhocall/annotate/main.nf new file mode 100644 index 00000000..8ca395f2 --- /dev/null +++ b/modules/rhocall/annotate/main.nf @@ -0,0 +1,75 @@ +// TODO nf-core: If in doubt look at other nf-core/modules to see how we are doing things! :) +// https://github.com/nf-core/modules/tree/master/modules +// You can also ask for help via your pull request or on the #modules channel on the nf-core Slack workspace: +// https://nf-co.re/join +// TODO nf-core: A module file SHOULD only define input and output files as command-line parameters. +// All other parameters MUST be provided using the "task.ext" directive, see here: +// https://www.nextflow.io/docs/latest/process.html#ext +// where "task.ext" is a string. +// Any parameters that need to be evaluated in the context of a particular sample +// e.g. single-end/paired-end data MUST also be defined and evaluated appropriately. +// TODO nf-core: Software that can be piped together SHOULD be added to separate module files +// unless there is a run-time, storage advantage in implementing in this way +// e.g. it's ok to have a single module for bwa to output BAM instead of SAM: +// bwa mem | samtools view -B -T ref.fasta +// TODO nf-core: Optional inputs are not currently supported by Nextflow. However, using an empty +// list (`[]`) instead of a file can be used to work around this issue. + +process RHOCALL_ANNOTATE { + tag "$meta.id" + label 'process_medium' + + // TODO nf-core: List required Conda package(s). + // Software MUST be pinned to channel (i.e. "bioconda"), version (i.e. "1.10"). + // For Conda, the build (i.e. "h9402c20_2") must be EXCLUDED to support installation on different operating systems. + // TODO nf-core: See section in main README for further information regarding finding and adding container addresses to the section below. + conda (params.enable_conda ? "bioconda::rhocall=0.5.1" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/YOUR-TOOL-HERE': + 'quay.io/biocontainers/YOUR-TOOL-HERE' }" + + input: + // TODO nf-core: Where applicable all sample-specific information e.g. "id", "single_end", "read_group" + // MUST be provided as an input via a Groovy Map called "meta". + // This information may not be required in some instances e.g. indexing reference genome files: + // https://github.com/nf-core/modules/blob/master/modules/bwa/index/main.nf + // TODO nf-core: Where applicable please provide/convert compressed files as input/output + // e.g. "*.fastq.gz" and NOT "*.fastq", "*.bam" and NOT "*.sam" etc. + tuple val(meta), path(bam) + + output: + // TODO nf-core: Named file extensions MUST be emitted for ALL output channels + tuple val(meta), path("*.bam"), emit: bam + // TODO nf-core: List additional required output channels/values here + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + // TODO nf-core: Where possible, a command MUST be provided to obtain the version number of the software e.g. 1.10 + // If the software is unable to output a version number on the command-line then it can be manually specified + // e.g. https://github.com/nf-core/modules/blob/master/modules/homer/annotatepeaks/main.nf + // Each software used MUST provide the software name and version number in the YAML version file (versions.yml) + // TODO nf-core: It MUST be possible to pass additional parameters to the tool as a command-line string via the "task.ext.args" directive + // TODO nf-core: If the tool supports multi-threading then you MUST provide the appropriate parameter + // using the Nextflow "task" variable e.g. "--threads $task.cpus" + // TODO nf-core: Please replace the example samtools command below with your module's command + // TODO nf-core: Please indent the command appropriately (4 spaces!!) to help with readability ;) + """ + samtools \\ + sort \\ + $args \\ + -@ $task.cpus \\ + -o ${prefix}.bam \\ + -T $prefix \\ + $bam + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + rhocall: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//' )) + END_VERSIONS + """ +} diff --git a/modules/rhocall/annotate/meta.yml b/modules/rhocall/annotate/meta.yml new file mode 100644 index 00000000..cf7c8c49 --- /dev/null +++ b/modules/rhocall/annotate/meta.yml @@ -0,0 +1,51 @@ +name: "rhocall_annotate" +## TODO nf-core: Add a description of the module and list keywords +description: write your description here +keywords: + - sort +tools: + - "rhocall": + ## TODO nf-core: Add a description and other details for the software below + description: "Call regions of homozygosity and make tentative UPD calls." + homepage: "https://github.com/dnil/rhocall" + documentation: "https://github.com/dnil/rhocall" + tool_dev_url: "https://github.com/dnil" + doi: "" + licence: "['GPL v3']" + +## TODO nf-core: Add a description of all of the variables used as input +input: + # Only when we have meta + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + # + ## TODO nf-core: Delete / customise this example input + - bam: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + +## TODO nf-core: Add a description of all of the variables used as output +output: + #Only when we have meta + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + # + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + ## TODO nf-core: Delete / customise this example output + - bam: + type: file + description: Sorted BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + +authors: + - "@ramprasadn" diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index d8227203..ae3c1d2f 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -1631,6 +1631,10 @@ rgi/main: - modules/rgi/main/** - tests/modules/rgi/main/** +rhocall/annotate: + - modules/rhocall/annotate/** + - tests/modules/rhocall/annotate/** + rmarkdownnotebook: - modules/rmarkdownnotebook/** - tests/modules/rmarkdownnotebook/** diff --git a/tests/modules/rhocall/annotate/main.nf b/tests/modules/rhocall/annotate/main.nf new file mode 100644 index 00000000..65f1b5a0 --- /dev/null +++ b/tests/modules/rhocall/annotate/main.nf @@ -0,0 +1,15 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { RHOCALL_ANNOTATE } from '../../../../modules/rhocall/annotate/main.nf' + +workflow test_rhocall_annotate { + + input = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['sarscov2']['illumina']['test_paired_end_bam'], checkIfExists: true) + ] + + RHOCALL_ANNOTATE ( input ) +} diff --git a/tests/modules/rhocall/annotate/nextflow.config b/tests/modules/rhocall/annotate/nextflow.config new file mode 100644 index 00000000..50f50a7a --- /dev/null +++ b/tests/modules/rhocall/annotate/nextflow.config @@ -0,0 +1,5 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + +} \ No newline at end of file diff --git a/tests/modules/rhocall/annotate/test.yml b/tests/modules/rhocall/annotate/test.yml new file mode 100644 index 00000000..110ec1b4 --- /dev/null +++ b/tests/modules/rhocall/annotate/test.yml @@ -0,0 +1,14 @@ +## TODO nf-core: Please run the following command to build this file: +# nf-core modules create-test-yml rhocall/annotate +- name: "rhocall annotate" + command: nextflow run ./tests/modules/rhocall/annotate -entry test_rhocall_annotate -c ./tests/config/nextflow.config -c ./tests/modules/rhocall/annotate/nextflow.config + tags: + - "rhocall" + # + - "rhocall/annotate" + # + files: + - path: "output/rhocall/test.bam" + md5sum: e667c7caad0bc4b7ac383fd023c654fc + - path: output/rhocall/versions.yml + md5sum: a01fe51bc4c6a3a6226fbf77b2c7cf3b From 4c7ef30fb64f75ba4499d3b8fba24a068b1ce586 Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Tue, 7 Jun 2022 12:17:14 +0200 Subject: [PATCH 074/114] changed to label to medium --- modules/gatk4/calibratedragstrmodel/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/gatk4/calibratedragstrmodel/main.nf b/modules/gatk4/calibratedragstrmodel/main.nf index b4e47edc..37a54de4 100644 --- a/modules/gatk4/calibratedragstrmodel/main.nf +++ b/modules/gatk4/calibratedragstrmodel/main.nf @@ -1,6 +1,6 @@ process GATK4_CALIBRATEDRAGSTRMODEL { tag "$meta.id" - label 'process_low' + label 'process_medium' conda (params.enable_conda ? "bioconda::gatk4=4.2.6.1" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? From e5bf87aec3b7de144c7f2599154b6bb2620b5c84 Mon Sep 17 00:00:00 2001 From: Matthias De Smet <11850640+matthdsm@users.noreply.github.com> Date: Tue, 7 Jun 2022 13:58:27 +0200 Subject: [PATCH 075/114] possible fix for name collisions --- modules/multiqc/main.nf | 2 +- tests/modules/multiqc/main.nf | 23 +++++++++++++++++++---- tests/modules/multiqc/nextflow.config | 4 +++- tests/modules/multiqc/test.yml | 11 +++++++++-- 4 files changed, 32 insertions(+), 8 deletions(-) diff --git a/modules/multiqc/main.nf b/modules/multiqc/main.nf index ae019dbf..bd3c9dc8 100644 --- a/modules/multiqc/main.nf +++ b/modules/multiqc/main.nf @@ -7,7 +7,7 @@ process MULTIQC { 'quay.io/biocontainers/multiqc:1.12--pyhdfd78af_0' }" input: - path multiqc_files + path "??/*" output: path "*multiqc_report.html", emit: report diff --git a/tests/modules/multiqc/main.nf b/tests/modules/multiqc/main.nf index 43643985..3719d423 100644 --- a/tests/modules/multiqc/main.nf +++ b/tests/modules/multiqc/main.nf @@ -2,15 +2,30 @@ nextflow.enable.dsl = 2 -include { FASTQC } from '../../../modules/fastqc/main.nf' -include { MULTIQC } from '../../../modules/multiqc/main.nf' +include { FASTQC } from '../../../modules/fastqc/main.nf' +include { FASTQC as FASTQC2 } from '../../../modules/fastqc/main.nf' +include { MULTIQC } from '../../../modules/multiqc/main.nf' workflow test_multiqc { input = [ [ id: 'test', single_end: false ], - [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ] + [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true)] ] FASTQC ( input ) MULTIQC ( FASTQC.out.zip.collect { it[1] } ) } + +workflow test_multiqc_fn_collision { + fqc_input = [ [ id: 'test', single_end: false ], + [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true)] + ] + mqc_input = Channel.empty() + + FASTQC ( fqc_input ) + mqc_input = mqc_input.mix(FASTQC.out.zip.collect { it[1] }) + + FASTQC2 ( fqc_input ) + mqc_input = mqc_input.mix(FASTQC2.out.zip.collect { it[1] }) + + MULTIQC ( mqc_input ) +} diff --git a/tests/modules/multiqc/nextflow.config b/tests/modules/multiqc/nextflow.config index 8730f1c4..76d125e5 100644 --- a/tests/modules/multiqc/nextflow.config +++ b/tests/modules/multiqc/nextflow.config @@ -1,5 +1,7 @@ process { publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } - + withName: "FASTQC*" { + publishDir = [ enabled: false ] + } } diff --git a/tests/modules/multiqc/test.yml b/tests/modules/multiqc/test.yml index 39796872..33129c79 100644 --- a/tests/modules/multiqc/test.yml +++ b/tests/modules/multiqc/test.yml @@ -1,5 +1,12 @@ -- name: multiqc - command: nextflow run ./tests/modules/multiqc -entry test_multiqc -c ./tests/config/nextflow.config -c ./tests/modules/multiqc/nextflow.config +- name: multiqc test_multiqc + command: nextflow run ./tests/modules/multiqc -entry test_multiqc -c ./tests/config/nextflow.config -c ./tests/modules/multiqc/nextflow.config + tags: + - multiqc + files: + - path: output/multiqc/multiqc_report.html + +- name: multiqc test_multiqc_fn_collision + command: nextflow run ./tests/modules/multiqc -entry test_multiqc_fn_collision -c ./tests/config/nextflow.config -c ./tests/modules/multiqc/nextflow.config tags: - multiqc files: From dbca9e8b361187bf4c297045927cbcbf5034c312 Mon Sep 17 00:00:00 2001 From: SusiJo Date: Tue, 7 Jun 2022 13:58:57 +0200 Subject: [PATCH 076/114] adjusted prefix + test + authors --- modules/gatk4/markduplicatesspark/main.nf | 2 +- modules/gatk4/markduplicatesspark/meta.yml | 1 + tests/modules/gatk4/markduplicatesspark/nextflow.config | 2 +- tests/modules/gatk4/markduplicatesspark/test.yml | 9 +++++++++ 4 files changed, 12 insertions(+), 2 deletions(-) diff --git a/modules/gatk4/markduplicatesspark/main.nf b/modules/gatk4/markduplicatesspark/main.nf index 79e12814..945f105d 100644 --- a/modules/gatk4/markduplicatesspark/main.nf +++ b/modules/gatk4/markduplicatesspark/main.nf @@ -23,7 +23,7 @@ process GATK4_MARKDUPLICATES_SPARK { script: def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" + prefix = task.ext.prefix ?: "${meta.id}" def input_list = bam.collect{"--input $it"}.join(' ') diff --git a/modules/gatk4/markduplicatesspark/meta.yml b/modules/gatk4/markduplicatesspark/meta.yml index bf3e02ba..59be9b6d 100644 --- a/modules/gatk4/markduplicatesspark/meta.yml +++ b/modules/gatk4/markduplicatesspark/meta.yml @@ -58,3 +58,4 @@ authors: - "@ajodeh-juma" - "@FriederikeHanssen" - "@maxulysse" + - "@SusiJo" diff --git a/tests/modules/gatk4/markduplicatesspark/nextflow.config b/tests/modules/gatk4/markduplicatesspark/nextflow.config index 565016b7..e0455b4b 100644 --- a/tests/modules/gatk4/markduplicatesspark/nextflow.config +++ b/tests/modules/gatk4/markduplicatesspark/nextflow.config @@ -9,8 +9,8 @@ process { ext.prefix = { "${meta.id}.cram" } } withName: GATK4_MARKDUPLICATES_SPARK_METRICS { - ext.prefix = { "${meta.id}.bam" } ext.args = '--metrics-file test.metrics' + ext.prefix = { "${meta.id}.bam" } } } diff --git a/tests/modules/gatk4/markduplicatesspark/test.yml b/tests/modules/gatk4/markduplicatesspark/test.yml index 21323ada..31d8d34d 100644 --- a/tests/modules/gatk4/markduplicatesspark/test.yml +++ b/tests/modules/gatk4/markduplicatesspark/test.yml @@ -16,6 +16,15 @@ - path: output/gatk4/test.bam md5sum: 898cb0a6616897d8ada90bab53bf0837 +- name: gatk4 markduplicatesspark test_gatk4_markduplicates_spark_multiple_bams_cram_out + command: nextflow run ./tests/modules/gatk4/markduplicatesspark -entry test_gatk4_markduplicates_spark_multiple_bams_cram_out -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/markduplicatesspark/nextflow.config + tags: + - gatk4 + - gatk4/markduplicatesspark + files: + - path: output/gatk4/test.cram + md5sum: 2271016de5e4199736598f39d12d7587 + - name: gatk4 markduplicatesspark test_gatk4_markduplicates_spark_multiple_bams_metrics command: nextflow run ./tests/modules/gatk4/markduplicatesspark -entry test_gatk4_markduplicates_spark_multiple_bams_metrics -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/markduplicatesspark/nextflow.config tags: From 65a0fb959f77e44a09080f178ce06602270f3a0f Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Tue, 7 Jun 2022 14:37:03 +0200 Subject: [PATCH 077/114] rhocall --- modules/rhocall/annotate/main.nf | 78 +++++++++---------------- modules/rhocall/annotate/meta.yml | 41 +++++++------ tests/modules/rhocall/annotate/main.nf | 37 ++++++++++-- tests/modules/rhocall/annotate/test.yml | 19 +++--- 4 files changed, 91 insertions(+), 84 deletions(-) diff --git a/modules/rhocall/annotate/main.nf b/modules/rhocall/annotate/main.nf index 8ca395f2..e59795a4 100644 --- a/modules/rhocall/annotate/main.nf +++ b/modules/rhocall/annotate/main.nf @@ -1,75 +1,51 @@ -// TODO nf-core: If in doubt look at other nf-core/modules to see how we are doing things! :) -// https://github.com/nf-core/modules/tree/master/modules -// You can also ask for help via your pull request or on the #modules channel on the nf-core Slack workspace: -// https://nf-co.re/join -// TODO nf-core: A module file SHOULD only define input and output files as command-line parameters. -// All other parameters MUST be provided using the "task.ext" directive, see here: -// https://www.nextflow.io/docs/latest/process.html#ext -// where "task.ext" is a string. -// Any parameters that need to be evaluated in the context of a particular sample -// e.g. single-end/paired-end data MUST also be defined and evaluated appropriately. -// TODO nf-core: Software that can be piped together SHOULD be added to separate module files -// unless there is a run-time, storage advantage in implementing in this way -// e.g. it's ok to have a single module for bwa to output BAM instead of SAM: -// bwa mem | samtools view -B -T ref.fasta -// TODO nf-core: Optional inputs are not currently supported by Nextflow. However, using an empty -// list (`[]`) instead of a file can be used to work around this issue. - process RHOCALL_ANNOTATE { tag "$meta.id" label 'process_medium' - // TODO nf-core: List required Conda package(s). - // Software MUST be pinned to channel (i.e. "bioconda"), version (i.e. "1.10"). - // For Conda, the build (i.e. "h9402c20_2") must be EXCLUDED to support installation on different operating systems. - // TODO nf-core: See section in main README for further information regarding finding and adding container addresses to the section below. conda (params.enable_conda ? "bioconda::rhocall=0.5.1" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/YOUR-TOOL-HERE': - 'quay.io/biocontainers/YOUR-TOOL-HERE' }" + 'https://depot.galaxyproject.org/singularity/rhocall:0.5.1--py39hbf8eff0_0': + 'quay.io/biocontainers/rhocall:0.5.1--py39hbf8eff0_0' }" input: - // TODO nf-core: Where applicable all sample-specific information e.g. "id", "single_end", "read_group" - // MUST be provided as an input via a Groovy Map called "meta". - // This information may not be required in some instances e.g. indexing reference genome files: - // https://github.com/nf-core/modules/blob/master/modules/bwa/index/main.nf - // TODO nf-core: Where applicable please provide/convert compressed files as input/output - // e.g. "*.fastq.gz" and NOT "*.fastq", "*.bam" and NOT "*.sam" etc. - tuple val(meta), path(bam) + tuple val(meta), path(vcf), path(tbi) + tuple val(meta), path(roh) + path bed output: - // TODO nf-core: Named file extensions MUST be emitted for ALL output channels - tuple val(meta), path("*.bam"), emit: bam - // TODO nf-core: List additional required output channels/values here - path "versions.yml" , emit: versions + tuple val(meta), path("*_rhocall.vcf"), emit: vcf + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' + def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - // TODO nf-core: Where possible, a command MUST be provided to obtain the version number of the software e.g. 1.10 - // If the software is unable to output a version number on the command-line then it can be manually specified - // e.g. https://github.com/nf-core/modules/blob/master/modules/homer/annotatepeaks/main.nf - // Each software used MUST provide the software name and version number in the YAML version file (versions.yml) - // TODO nf-core: It MUST be possible to pass additional parameters to the tool as a command-line string via the "task.ext.args" directive - // TODO nf-core: If the tool supports multi-threading then you MUST provide the appropriate parameter - // using the Nextflow "task" variable e.g. "--threads $task.cpus" - // TODO nf-core: Please replace the example samtools command below with your module's command - // TODO nf-core: Please indent the command appropriately (4 spaces!!) to help with readability ;) + def az_bed = bed ? "-b ${bed}" : '' """ - samtools \\ - sort \\ + rhocall \\ + annotate \\ $args \\ - -@ $task.cpus \\ - -o ${prefix}.bam \\ - -T $prefix \\ - $bam + $az_bed \\ + -r $roh \\ + -o ${prefix}_rhocall.vcf \\ + $vcf cat <<-END_VERSIONS > versions.yml "${task.process}": - rhocall: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//' )) + rhocall: \$(echo \$(rhocall --version 2>&1) | sed 's/rhocall, version //' )) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}_rhocall.vcf + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + rhocall: \$(echo \$(rhocall --version 2>&1) | sed 's/rhocall, version //' )) END_VERSIONS """ } diff --git a/modules/rhocall/annotate/meta.yml b/modules/rhocall/annotate/meta.yml index cf7c8c49..4ece9d2b 100644 --- a/modules/rhocall/annotate/meta.yml +++ b/modules/rhocall/annotate/meta.yml @@ -1,11 +1,10 @@ name: "rhocall_annotate" -## TODO nf-core: Add a description of the module and list keywords -description: write your description here +description: "Markup VCF file using rho-calls." keywords: - - sort + - roh + - rhocall tools: - "rhocall": - ## TODO nf-core: Add a description and other details for the software below description: "Call regions of homozygosity and make tentative UPD calls." homepage: "https://github.com/dnil/rhocall" documentation: "https://github.com/dnil/rhocall" @@ -13,39 +12,43 @@ tools: doi: "" licence: "['GPL v3']" -## TODO nf-core: Add a description of all of the variables used as input input: - # Only when we have meta - meta: type: map description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - # - ## TODO nf-core: Delete / customise this example input - - bam: + - vcf: type: file - description: BAM/CRAM/SAM file - pattern: "*.{bam,cram,sam}" + description: vcf file + pattern: "*.{vcf,vcf.gz}" + - tbi: + type: file + description: vcf index file + pattern: "*.{tbi}" + - roh: + type: file + description: Bcftools roh style TSV file with CHR,POS,AZ,QUAL + pattern: "*.{roh}" + - bed: + type: file + description: BED file with AZ windows. + pattern: "*.{bed}" -## TODO nf-core: Add a description of all of the variables used as output output: - #Only when we have meta - meta: type: map description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - # + - vcf: + type: file + description: vcf file + pattern: "*.{vcf,vcf.gz}" - versions: type: file description: File containing software versions pattern: "versions.yml" - ## TODO nf-core: Delete / customise this example output - - bam: - type: file - description: Sorted BAM/CRAM/SAM file - pattern: "*.{bam,cram,sam}" authors: - "@ramprasadn" diff --git a/tests/modules/rhocall/annotate/main.nf b/tests/modules/rhocall/annotate/main.nf index 65f1b5a0..a77dc19a 100644 --- a/tests/modules/rhocall/annotate/main.nf +++ b/tests/modules/rhocall/annotate/main.nf @@ -3,13 +3,38 @@ nextflow.enable.dsl = 2 include { RHOCALL_ANNOTATE } from '../../../../modules/rhocall/annotate/main.nf' +include { BCFTOOLS_ROH } from '../../../../modules/bcftools/roh/main.nf' workflow test_rhocall_annotate { - - input = [ - [ id:'test', single_end:false ], // meta map - file(params.test_data['sarscov2']['illumina']['test_paired_end_bam'], checkIfExists: true) - ] - RHOCALL_ANNOTATE ( input ) + input = [ [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_vcf_gz_tbi'], checkIfExists: true)] + + af_file = [] + gen_map = [] + regions = [] + targets = [] + samples = [] + + BCFTOOLS_ROH ( input, af_file, gen_map, regions, samples, targets ) + RHOCALL_ANNOTATE ( input, BCFTOOLS_ROH.out.roh, []) + +} + +workflow test_rhocall_annotate_stub { + + input = [ [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_vcf_gz_tbi'], checkIfExists: true)] + + af_file = [] + gen_map = [] + regions = [] + targets = [] + samples = [] + + BCFTOOLS_ROH ( input, af_file, gen_map, regions, samples, targets ) + RHOCALL_ANNOTATE ( input, BCFTOOLS_ROH.out.roh, []) + } diff --git a/tests/modules/rhocall/annotate/test.yml b/tests/modules/rhocall/annotate/test.yml index 110ec1b4..77c9f8ec 100644 --- a/tests/modules/rhocall/annotate/test.yml +++ b/tests/modules/rhocall/annotate/test.yml @@ -1,14 +1,17 @@ -## TODO nf-core: Please run the following command to build this file: -# nf-core modules create-test-yml rhocall/annotate - name: "rhocall annotate" command: nextflow run ./tests/modules/rhocall/annotate -entry test_rhocall_annotate -c ./tests/config/nextflow.config -c ./tests/modules/rhocall/annotate/nextflow.config tags: - "rhocall" - # - "rhocall/annotate" - # files: - - path: "output/rhocall/test.bam" - md5sum: e667c7caad0bc4b7ac383fd023c654fc - - path: output/rhocall/versions.yml - md5sum: a01fe51bc4c6a3a6226fbf77b2c7cf3b + - path: "output/rhocall/test_rhocall.vcf" + - path: "output/rhocall/versions.yml" + +- name: "rhocall annotate stub" + command: nextflow run ./tests/modules/rhocall/annotate -entry test_rhocall_annotate_stub -c ./tests/config/nextflow.config -c ./tests/modules/rhocall/annotate/nextflow.config -stub-run + tags: + - "rhocall" + - "rhocall/annotate" + files: + - path: "output/rhocall/test_rhocall.vcf" + - path: "output/rhocall/versions.yml" From 745e47cd33bad1303257b669d22c913abd4bf94e Mon Sep 17 00:00:00 2001 From: SusiJo Date: Tue, 7 Jun 2022 14:38:43 +0200 Subject: [PATCH 078/114] add versions.yml --- tests/modules/gatk4/markduplicatesspark/test.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/modules/gatk4/markduplicatesspark/test.yml b/tests/modules/gatk4/markduplicatesspark/test.yml index 31d8d34d..4ef9b916 100644 --- a/tests/modules/gatk4/markduplicatesspark/test.yml +++ b/tests/modules/gatk4/markduplicatesspark/test.yml @@ -6,6 +6,7 @@ files: - path: output/gatk4/test.bam md5sum: dc1a09ac6371aab7c50d1a554baa06d3 + - path: output/gatk4/versions.yml - name: gatk4 markduplicatesspark test_gatk4_markduplicates_spark_multiple_bams command: nextflow run ./tests/modules/gatk4/markduplicatesspark -entry test_gatk4_markduplicates_spark_multiple_bams -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/markduplicatesspark/nextflow.config @@ -15,6 +16,7 @@ files: - path: output/gatk4/test.bam md5sum: 898cb0a6616897d8ada90bab53bf0837 + - path: output/gatk4/versions.yml - name: gatk4 markduplicatesspark test_gatk4_markduplicates_spark_multiple_bams_cram_out command: nextflow run ./tests/modules/gatk4/markduplicatesspark -entry test_gatk4_markduplicates_spark_multiple_bams_cram_out -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/markduplicatesspark/nextflow.config @@ -24,6 +26,7 @@ files: - path: output/gatk4/test.cram md5sum: 2271016de5e4199736598f39d12d7587 + - path: output/gatk4/versions.yml - name: gatk4 markduplicatesspark test_gatk4_markduplicates_spark_multiple_bams_metrics command: nextflow run ./tests/modules/gatk4/markduplicatesspark -entry test_gatk4_markduplicates_spark_multiple_bams_metrics -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/markduplicatesspark/nextflow.config @@ -35,3 +38,4 @@ md5sum: 898cb0a6616897d8ada90bab53bf0837 - path: output/gatk4/test.metrics contains: ["## METRICS CLASS", "org.broadinstitute.hellbender.utils.read.markduplicates.GATKDuplicationMetrics"] + - path: output/gatk4/versions.yml From 17a4e931427b5b144c0d0227a3ea49347ba7457a Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Tue, 7 Jun 2022 14:43:35 +0200 Subject: [PATCH 079/114] fix versions.yaml --- modules/rhocall/annotate/main.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/rhocall/annotate/main.nf b/modules/rhocall/annotate/main.nf index e59795a4..95d647b1 100644 --- a/modules/rhocall/annotate/main.nf +++ b/modules/rhocall/annotate/main.nf @@ -34,7 +34,7 @@ process RHOCALL_ANNOTATE { cat <<-END_VERSIONS > versions.yml "${task.process}": - rhocall: \$(echo \$(rhocall --version 2>&1) | sed 's/rhocall, version //' )) + rhocall: \$(echo \$(rhocall --version 2>&1) | sed 's/rhocall, version //' ) END_VERSIONS """ @@ -45,7 +45,7 @@ process RHOCALL_ANNOTATE { cat <<-END_VERSIONS > versions.yml "${task.process}": - rhocall: \$(echo \$(rhocall --version 2>&1) | sed 's/rhocall, version //' )) + rhocall: \$(echo \$(rhocall --version 2>&1) | sed 's/rhocall, version //' ) END_VERSIONS """ } From d196552e064b6979109931e4d48a68617f090254 Mon Sep 17 00:00:00 2001 From: Matthias De Smet <11850640+matthdsm@users.noreply.github.com> Date: Tue, 7 Jun 2022 16:33:04 +0200 Subject: [PATCH 080/114] update input --- modules/multiqc/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/multiqc/main.nf b/modules/multiqc/main.nf index bd3c9dc8..d376db60 100644 --- a/modules/multiqc/main.nf +++ b/modules/multiqc/main.nf @@ -7,7 +7,7 @@ process MULTIQC { 'quay.io/biocontainers/multiqc:1.12--pyhdfd78af_0' }" input: - path "??/*" + path "*/*" output: path "*multiqc_report.html", emit: report From 4935bb98cbc05472da3e89a0b0f04423a105f94f Mon Sep 17 00:00:00 2001 From: "Maxime U. Garcia" Date: Wed, 8 Jun 2022 11:38:02 +0200 Subject: [PATCH 081/114] Apply suggestions from code review Co-authored-by: Mahesh Binzer-Panchal --- modules/tiddit/sv/main.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/tiddit/sv/main.nf b/modules/tiddit/sv/main.nf index ee2a8d75..ec4affe3 100644 --- a/modules/tiddit/sv/main.nf +++ b/modules/tiddit/sv/main.nf @@ -8,7 +8,7 @@ process TIDDIT_SV { 'quay.io/biocontainers/tiddit:3.0.0--py39h59fae87_1' }" input: - tuple val(meta), path(input), path(index) + tuple val(meta), path(input), path(input_index) path fasta path bwa_index @@ -24,7 +24,7 @@ process TIDDIT_SV { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" """ - [[ -d $bwa_index ]] && for i in `ls $bwa_index`; do [[ -f $fasta && ! "\$i" =~ .*"$fasta".* ]] && ln -s $bwa_index/\$i ${fasta}.\${i##*.} || ln -s $bwa_index/\$i \$i; done + [[ -d $bwa_index ]] && for i in $bwa_index/*; do [[ -f $fasta && ! "\$i" =~ .*"$fasta".* ]] && ln -s $bwa_index/\$i ${fasta}.\${i##*.} || ln -s $bwa_index/\$i \$i; done tiddit \\ --sv \\ From 07eff4346f3ccf0990dd785f95a02059d18a3b64 Mon Sep 17 00:00:00 2001 From: MaxUlysse Date: Wed, 8 Jun 2022 11:46:45 +0200 Subject: [PATCH 082/114] fix path to symbolic links --- modules/tiddit/sv/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/tiddit/sv/main.nf b/modules/tiddit/sv/main.nf index ec4affe3..275ceb3f 100644 --- a/modules/tiddit/sv/main.nf +++ b/modules/tiddit/sv/main.nf @@ -24,7 +24,7 @@ process TIDDIT_SV { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" """ - [[ -d $bwa_index ]] && for i in $bwa_index/*; do [[ -f $fasta && ! "\$i" =~ .*"$fasta".* ]] && ln -s $bwa_index/\$i ${fasta}.\${i##*.} || ln -s $bwa_index/\$i \$i; done + [[ -d $bwa_index ]] && for i in $bwa_index/*; do [[ -f $fasta && ! "\$i" =~ .*"$fasta".* ]] && ln -s \$i ${fasta}.\${i##*.} || ln -s \$i .; done tiddit \\ --sv \\ From 439d2a9e679977add51da377f3dbfdf68ab5c6a9 Mon Sep 17 00:00:00 2001 From: Matthias De Smet <11850640+matthdsm@users.noreply.github.com> Date: Wed, 8 Jun 2022 13:48:27 +0200 Subject: [PATCH 083/114] update mqc inputs --- modules/multiqc/main.nf | 3 ++- modules/multiqc/meta.yml | 8 ++++++++ tests/modules/multiqc/main.nf | 4 ++-- 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/modules/multiqc/main.nf b/modules/multiqc/main.nf index d376db60..df68d853 100644 --- a/modules/multiqc/main.nf +++ b/modules/multiqc/main.nf @@ -7,7 +7,8 @@ process MULTIQC { 'quay.io/biocontainers/multiqc:1.12--pyhdfd78af_0' }" input: - path "*/*" + path multiqc_files, stageAs: "?/*" + tuple path(mqc_config), path(mqc_logo) output: path "*multiqc_report.html", emit: report diff --git a/modules/multiqc/meta.yml b/modules/multiqc/meta.yml index 6fa891ef..bf3a27fe 100644 --- a/modules/multiqc/meta.yml +++ b/modules/multiqc/meta.yml @@ -17,6 +17,14 @@ input: type: file description: | List of reports / files recognised by MultiQC, for example the html and zip output of FastQC + - multiqc_config: + type: file + description: Config yml for MultiQC + pattern: "*.{yml,yaml}" + - multiqc_logo: + type: file + description: Logo file for MultiQC + pattern: "*.{png}" output: - report: type: file diff --git a/tests/modules/multiqc/main.nf b/tests/modules/multiqc/main.nf index 3719d423..e015cbc3 100644 --- a/tests/modules/multiqc/main.nf +++ b/tests/modules/multiqc/main.nf @@ -12,7 +12,7 @@ workflow test_multiqc { ] FASTQC ( input ) - MULTIQC ( FASTQC.out.zip.collect { it[1] } ) + MULTIQC ( FASTQC.out.zip.collect { it[1] }, [[],[]] ) } workflow test_multiqc_fn_collision { @@ -27,5 +27,5 @@ workflow test_multiqc_fn_collision { FASTQC2 ( fqc_input ) mqc_input = mqc_input.mix(FASTQC2.out.zip.collect { it[1] }) - MULTIQC ( mqc_input ) + MULTIQC ( mqc_input, [[],[]] ) } From f1e33ca6e0d7d40fda3f78822b7789c6c13eb21c Mon Sep 17 00:00:00 2001 From: Matthias De Smet <11850640+matthdsm@users.noreply.github.com> Date: Wed, 8 Jun 2022 14:02:04 +0200 Subject: [PATCH 084/114] update mqc command --- modules/multiqc/main.nf | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/modules/multiqc/main.nf b/modules/multiqc/main.nf index df68d853..de4de537 100644 --- a/modules/multiqc/main.nf +++ b/modules/multiqc/main.nf @@ -8,7 +8,7 @@ process MULTIQC { input: path multiqc_files, stageAs: "?/*" - tuple path(mqc_config), path(mqc_logo) + tuple path(multiqc_config), path(multiqc_logo) output: path "*multiqc_report.html", emit: report @@ -21,8 +21,13 @@ process MULTIQC { script: def args = task.ext.args ?: '' + def config = multiqc_config ?: '--config $multiqc_config' """ - multiqc -f $args . + multiqc \ + --force \ + $config \ + $args \ + . cat <<-END_VERSIONS > versions.yml "${task.process}": From 100b3f244c1dced5a6fa74af77829d7ef759e648 Mon Sep 17 00:00:00 2001 From: Matthias De Smet <11850640+matthdsm@users.noreply.github.com> Date: Wed, 8 Jun 2022 14:24:55 +0200 Subject: [PATCH 085/114] Apply suggestions from code review Co-authored-by: Mahesh Binzer-Panchal --- modules/multiqc/main.nf | 2 +- tests/modules/multiqc/main.nf | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/modules/multiqc/main.nf b/modules/multiqc/main.nf index de4de537..52069c21 100644 --- a/modules/multiqc/main.nf +++ b/modules/multiqc/main.nf @@ -21,7 +21,7 @@ process MULTIQC { script: def args = task.ext.args ?: '' - def config = multiqc_config ?: '--config $multiqc_config' + def config = multiqc_config ?: "--config $multiqc_config" """ multiqc \ --force \ diff --git a/tests/modules/multiqc/main.nf b/tests/modules/multiqc/main.nf index e015cbc3..236d8ad0 100644 --- a/tests/modules/multiqc/main.nf +++ b/tests/modules/multiqc/main.nf @@ -16,9 +16,10 @@ workflow test_multiqc { } workflow test_multiqc_fn_collision { - fqc_input = [ [ id: 'test', single_end: false ], - [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true)] - ] + fqc_input = [ + [ id: 'test', single_end: false ], + [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true)] + ] mqc_input = Channel.empty() FASTQC ( fqc_input ) From 7b018acabbdf4ab893e7a7d9f321f6e87748cb02 Mon Sep 17 00:00:00 2001 From: Matthias De Smet <11850640+matthdsm@users.noreply.github.com> Date: Wed, 8 Jun 2022 14:36:13 +0200 Subject: [PATCH 086/114] Update modules/multiqc/main.nf Co-authored-by: Mahesh Binzer-Panchal --- modules/multiqc/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/multiqc/main.nf b/modules/multiqc/main.nf index 52069c21..6960bd4c 100644 --- a/modules/multiqc/main.nf +++ b/modules/multiqc/main.nf @@ -21,7 +21,7 @@ process MULTIQC { script: def args = task.ext.args ?: '' - def config = multiqc_config ?: "--config $multiqc_config" + def config = multiqc_config ? "--config $multiqc_config" : '' """ multiqc \ --force \ From 1648d2fcba4951266c25ddef2da843820d53fe15 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Wed, 8 Jun 2022 14:36:25 +0200 Subject: [PATCH 087/114] add index --- modules/bcftools/roh/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/bcftools/roh/main.nf b/modules/bcftools/roh/main.nf index 890b6fad..83ac4e65 100644 --- a/modules/bcftools/roh/main.nf +++ b/modules/bcftools/roh/main.nf @@ -9,7 +9,7 @@ process BCFTOOLS_ROH { input: tuple val(meta), path(vcf), path(tbi) - path af_file + tuple path(af_file), path(af_file_tbi) path genetic_map path regions_file path samples_file From ea4dc866e09834704302c7f4cd8dd0401678b2e7 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Wed, 8 Jun 2022 14:38:55 +0200 Subject: [PATCH 088/114] add index --- modules/bcftools/roh/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/bcftools/roh/main.nf b/modules/bcftools/roh/main.nf index 890b6fad..83ac4e65 100644 --- a/modules/bcftools/roh/main.nf +++ b/modules/bcftools/roh/main.nf @@ -9,7 +9,7 @@ process BCFTOOLS_ROH { input: tuple val(meta), path(vcf), path(tbi) - path af_file + tuple path(af_file), path(af_file_tbi) path genetic_map path regions_file path samples_file From e5f8924fabf4c8380f55fb7aee89fd2c268161b1 Mon Sep 17 00:00:00 2001 From: Matthias De Smet <11850640+matthdsm@users.noreply.github.com> Date: Wed, 8 Jun 2022 14:41:10 +0200 Subject: [PATCH 089/114] Update modules/multiqc/main.nf Co-authored-by: Mahesh Binzer-Panchal --- modules/multiqc/main.nf | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/modules/multiqc/main.nf b/modules/multiqc/main.nf index 6960bd4c..b0831b5d 100644 --- a/modules/multiqc/main.nf +++ b/modules/multiqc/main.nf @@ -23,10 +23,10 @@ process MULTIQC { def args = task.ext.args ?: '' def config = multiqc_config ? "--config $multiqc_config" : '' """ - multiqc \ - --force \ - $config \ - $args \ + multiqc \\ + --force \\ + $config \\ + $args \\ . cat <<-END_VERSIONS > versions.yml From c005a5e00777d577d661e1c2a81a779de1bda3fc Mon Sep 17 00:00:00 2001 From: Matthias De Smet <11850640+matthdsm@users.noreply.github.com> Date: Wed, 8 Jun 2022 14:42:58 +0200 Subject: [PATCH 090/114] formatting --- tests/modules/multiqc/main.nf | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tests/modules/multiqc/main.nf b/tests/modules/multiqc/main.nf index 236d8ad0..6c21d061 100644 --- a/tests/modules/multiqc/main.nf +++ b/tests/modules/multiqc/main.nf @@ -7,16 +7,17 @@ include { FASTQC as FASTQC2 } from '../../../modules/fastqc/main.nf' include { MULTIQC } from '../../../modules/multiqc/main.nf' workflow test_multiqc { - input = [ [ id: 'test', single_end: false ], - [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true)] - ] + input = [ + [ id: 'test', single_end: false ], + [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true)] + ] FASTQC ( input ) MULTIQC ( FASTQC.out.zip.collect { it[1] }, [[],[]] ) } workflow test_multiqc_fn_collision { - fqc_input = [ + fqc_input = [ [ id: 'test', single_end: false ], [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true)] ] From c079ee20250f5676c53b145fdd00f9defe91f112 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Wed, 8 Jun 2022 14:48:16 +0200 Subject: [PATCH 091/114] fix test --- modules/bcftools/roh/meta.yml | 3 +++ tests/modules/bcftools/roh/main.nf | 4 ++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/modules/bcftools/roh/meta.yml b/modules/bcftools/roh/meta.yml index fd03d4ce..fa0fd088 100644 --- a/modules/bcftools/roh/meta.yml +++ b/modules/bcftools/roh/meta.yml @@ -23,6 +23,9 @@ input: - af_file: type: file description: "Read allele frequencies from a tab-delimited file containing the columns: CHROM\tPOS\tREF,ALT\tAF." + - af_file_tbi: + type: file + description: "tbi index of af_file." - genetic_map: type: file description: "Genetic map in the format required also by IMPUTE2." diff --git a/tests/modules/bcftools/roh/main.nf b/tests/modules/bcftools/roh/main.nf index 3eb534b6..3a2ef640 100644 --- a/tests/modules/bcftools/roh/main.nf +++ b/tests/modules/bcftools/roh/main.nf @@ -10,7 +10,7 @@ workflow test_bcftools_roh { file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true), file(params.test_data['sarscov2']['illumina']['test_vcf_gz_tbi'], checkIfExists: true)] - af_file = [] + af_file = [[],[]] gen_map = [] regions = [] targets = [] @@ -25,7 +25,7 @@ workflow test_bcftools_roh_stub { file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true), file(params.test_data['sarscov2']['illumina']['test_vcf_gz_tbi'], checkIfExists: true)] - af_file = [] + af_file = [[],[]] gen_map = [] regions = [] targets = [] From 1d674890565bc2a018176388c46e71f9c0bb74f0 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Wed, 8 Jun 2022 16:36:02 +0200 Subject: [PATCH 092/114] update test --- tests/modules/rhocall/annotate/main.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/modules/rhocall/annotate/main.nf b/tests/modules/rhocall/annotate/main.nf index a77dc19a..9cca1520 100644 --- a/tests/modules/rhocall/annotate/main.nf +++ b/tests/modules/rhocall/annotate/main.nf @@ -11,7 +11,7 @@ workflow test_rhocall_annotate { file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true), file(params.test_data['sarscov2']['illumina']['test_vcf_gz_tbi'], checkIfExists: true)] - af_file = [] + af_file = [[],[]] gen_map = [] regions = [] targets = [] @@ -28,7 +28,7 @@ workflow test_rhocall_annotate_stub { file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true), file(params.test_data['sarscov2']['illumina']['test_vcf_gz_tbi'], checkIfExists: true)] - af_file = [] + af_file = [[],[]] gen_map = [] regions = [] targets = [] From 4d7afa21e2f9e64d8bab5e9865cbde170bc920db Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Wed, 8 Jun 2022 21:14:08 +0200 Subject: [PATCH 093/114] Add GATK indelrealigner tests --- modules/gatk/indelrealigner/main.nf | 6 ++-- tests/modules/gatk/indelrealigner/main.nf | 31 +++++++++++++------ .../gatk/indelrealigner/nextflow.config | 2 +- tests/modules/gatk/indelrealigner/test.yml | 22 ++++++------- 4 files changed, 35 insertions(+), 26 deletions(-) diff --git a/modules/gatk/indelrealigner/main.nf b/modules/gatk/indelrealigner/main.nf index f98a99b7..10246082 100644 --- a/modules/gatk/indelrealigner/main.nf +++ b/modules/gatk/indelrealigner/main.nf @@ -22,11 +22,12 @@ process GATK_INDELREALIGNER { task.ext.when == null || task.ext.when script: - if ("$input" == "${prefix}.bam") error "Input and output names are the same, set prefix in module configuration to disambiguate!" def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" def known = known_vcf ? "-known ${known_vcf}" : "" + if ("$input" == "${prefix}.bam") error "Input and output names are the same, set prefix in module configuration to disambiguate!" + def avail_mem = 3 if (!task.memory) { log.info '[GATK IndelRealigner] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' @@ -39,9 +40,8 @@ process GATK_INDELREALIGNER { -Xmx${avail_mem}g \\ -T IndelRealigner \\ -R ${fasta} \\ - -nt ${task.cpus} -I ${input} \\ - -targetIntervals ${intervals} \\ + --targetIntervals ${intervals} \\ ${known} \\ -o ${prefix}.bam \\ $args diff --git a/tests/modules/gatk/indelrealigner/main.nf b/tests/modules/gatk/indelrealigner/main.nf index 687e12a9..d4e01e12 100644 --- a/tests/modules/gatk/indelrealigner/main.nf +++ b/tests/modules/gatk/indelrealigner/main.nf @@ -2,21 +2,32 @@ nextflow.enable.dsl = 2 +include { GATK_REALIGNERTARGETCREATOR } from '../../../../modules/gatk/realignertargetcreator/main.nf' include { GATK_INDELREALIGNER } from '../../../../modules/gatk/indelrealigner/main.nf' -// TODO add REalignerTargetCrator - workflow test_gatk_indelrealigner { - input = [ - [ id:'test', single_end:false ], // meta map - file(params.test_data['sarscov2']['illumina']['test_paired_end_bam'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test_paired_end_bai'], checkIfExists: true), - GATK_REALIGNERTARGETCREATOR.out.intervals - ] - reference = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + fai = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true) + dict = file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true) - GATK_INDELREALIGNER ( input, reference, [] ) + input_realignertargetcreator = [ [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true), + ] + + GATK_REALIGNERTARGETCREATOR ( input_realignertargetcreator, fasta, fai, dict, [] ) + + ch_intervals = GATK_REALIGNERTARGETCREATOR.out.intervals + + ch_bams_indelrealigner = Channel.of([ [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true) + ]) + + ch_input_indelrealigner = ch_bams_indelrealigner.mix(ch_intervals).groupTuple(by: 0).map{ [it[0], it[1][0], it[2], it[1][1] ] }.dump(tag: "input") + + GATK_INDELREALIGNER ( ch_input_indelrealigner, fasta, fai, dict, [] ) } diff --git a/tests/modules/gatk/indelrealigner/nextflow.config b/tests/modules/gatk/indelrealigner/nextflow.config index 2e6ae57f..489a478f 100644 --- a/tests/modules/gatk/indelrealigner/nextflow.config +++ b/tests/modules/gatk/indelrealigner/nextflow.config @@ -1,6 +1,6 @@ process { publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } - ext.prefix = "${meta.id}.realigned" + ext.prefix = { "${meta.id}.realigned" } } diff --git a/tests/modules/gatk/indelrealigner/test.yml b/tests/modules/gatk/indelrealigner/test.yml index 59fc394f..b7f0c465 100644 --- a/tests/modules/gatk/indelrealigner/test.yml +++ b/tests/modules/gatk/indelrealigner/test.yml @@ -1,14 +1,12 @@ -## TODO nf-core: Please run the following command to build this file: -# nf-core modules create-test-yml gatk/indelrealigner -- name: "gatk indelrealigner" - command: nextflow run ./tests/modules/gatk/indelrealigner -entry test_gatk_indelrealigner -c ./tests/config/nextflow.config -c ./tests/modules/gatk/indelrealigner/nextflow.config +- name: gatk indelrealigner test_gatk_indelrealigner + command: nextflow run ./tests/modules/gatk/indelrealigner -entry test_gatk_indelrealigner -c ./tests/config/nextflow.config -c ./tests/modules/gatk/indelrealigner/nextflow.config tags: - - "gatk" - # - - "gatk/indelrealigner" - # + - gatk/indelrealigner + - gatk files: - - path: "output/gatk/test.bam" - md5sum: e667c7caad0bc4b7ac383fd023c654fc - - path: output/gatk/versions.yml - md5sum: a01fe51bc4c6a3a6226fbf77b2c7cf3b + - path: output/gatk/test.realigned.bai + md5sum: 85a67df8827fe426e7f3a458134c0551 + - path: output/gatk/test.realigned.bam + md5sum: ea1df6f7fcafc408fae4dc1574813d8a + - path: output/gatk/test.realigned.intervals + md5sum: 7aa7a1b235a510e6591e262382086bf8 From 939653a47eca27d160f6d2342f3a63c44c29004b Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Thu, 9 Jun 2022 08:26:04 +0200 Subject: [PATCH 094/114] Changes after code review --- modules/gatk/indelrealigner/main.nf | 2 +- modules/gatk/indelrealigner/meta.yml | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/modules/gatk/indelrealigner/main.nf b/modules/gatk/indelrealigner/main.nf index 10246082..2ff012b4 100644 --- a/modules/gatk/indelrealigner/main.nf +++ b/modules/gatk/indelrealigner/main.nf @@ -8,7 +8,7 @@ process GATK_INDELREALIGNER { 'quay.io/biocontainers/gatk:3.5--hdfd78af_11' }" input: - tuple val(meta), path(input), path(index), path(intervals) + tuple val(meta), path(bam), path(bai), path(intervals) path(fasta) path(fai) path(dict) diff --git a/modules/gatk/indelrealigner/meta.yml b/modules/gatk/indelrealigner/meta.yml index 5abfbfb1..35ad28e8 100644 --- a/modules/gatk/indelrealigner/meta.yml +++ b/modules/gatk/indelrealigner/meta.yml @@ -19,11 +19,11 @@ input: description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - input: + - bam: type: file - description: Sorted and indexed BAM/CRAM/SAM file + description: Sorted and indexed BAM file pattern: "*.bam" - - index: + - bai: type: file description: BAM index file pattern: "*.bai" @@ -60,7 +60,7 @@ output: pattern: "versions.yml" - bam: type: file - description: Sorted and indexed BAM/CRAM/SAM file with local realignment around variants + description: Sorted and indexed BAM file with local realignment around variants pattern: "*.bam" - bai: type: file From 70f7f07a371c2c553026d5b0fbaadec9a7423e53 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Thu, 9 Jun 2022 08:33:37 +0200 Subject: [PATCH 095/114] Apply suggestions from code review Co-authored-by: FriederikeHanssen --- modules/gatk/indelrealigner/main.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/gatk/indelrealigner/main.nf b/modules/gatk/indelrealigner/main.nf index 2ff012b4..94cf78da 100644 --- a/modules/gatk/indelrealigner/main.nf +++ b/modules/gatk/indelrealigner/main.nf @@ -26,7 +26,7 @@ process GATK_INDELREALIGNER { def prefix = task.ext.prefix ?: "${meta.id}" def known = known_vcf ? "-known ${known_vcf}" : "" - if ("$input" == "${prefix}.bam") error "Input and output names are the same, set prefix in module configuration to disambiguate!" + if ("$bam" == "${prefix}.bam") error "Input and output names are the same, set prefix in module configuration to disambiguate!" def avail_mem = 3 if (!task.memory) { @@ -40,7 +40,7 @@ process GATK_INDELREALIGNER { -Xmx${avail_mem}g \\ -T IndelRealigner \\ -R ${fasta} \\ - -I ${input} \\ + -I ${bam} \\ --targetIntervals ${intervals} \\ ${known} \\ -o ${prefix}.bam \\ From cda434611f851bbc4fe3d876e6b702cf721a0175 Mon Sep 17 00:00:00 2001 From: SusiJo Date: Thu, 9 Jun 2022 10:24:39 +0200 Subject: [PATCH 096/114] fix conda with openjdk 1.8 --- modules/gatk4/markduplicatesspark/main.nf | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/modules/gatk4/markduplicatesspark/main.nf b/modules/gatk4/markduplicatesspark/main.nf index 945f105d..db6a1aa1 100644 --- a/modules/gatk4/markduplicatesspark/main.nf +++ b/modules/gatk4/markduplicatesspark/main.nf @@ -2,7 +2,7 @@ process GATK4_MARKDUPLICATES_SPARK { tag "$meta.id" label 'process_high' - conda (params.enable_conda ? "bioconda::gatk4=4.2.3.0" : null) + conda (params.enable_conda ? "bioconda::gatk4=4.2.3.0 conda-forge::openjdk=8.0.312" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/gatk4:4.2.3.0--hdfd78af_0' : 'broadinstitute/gatk:4.2.3.0' }" @@ -45,6 +45,7 @@ process GATK4_MARKDUPLICATES_SPARK { cat <<-END_VERSIONS > versions.yml "${task.process}": gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + openjdk: \$(echo \$(java -version 2>&1) | grep version | sed 's/\"//g' | cut -f3 -d ' ') END_VERSIONS """ } From 5ccf6fbcc913f34ee2897689081d1cf60cecdb35 Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Thu, 9 Jun 2022 11:45:27 +0200 Subject: [PATCH 097/114] add optional fasta argument to ensemblvep --- modules/ensemblvep/main.nf | 4 ++++ modules/ensemblvep/meta.yml | 5 +++++ tests/modules/ensemblvep/main.nf | 4 +++- tests/modules/ensemblvep/test.yml | 4 ++-- 4 files changed, 14 insertions(+), 3 deletions(-) diff --git a/modules/ensemblvep/main.nf b/modules/ensemblvep/main.nf index a5a9b1ab..d2efe35f 100644 --- a/modules/ensemblvep/main.nf +++ b/modules/ensemblvep/main.nf @@ -13,6 +13,7 @@ process ENSEMBLVEP { val species val cache_version path cache + path fasta path extra_files output: @@ -27,6 +28,8 @@ process ENSEMBLVEP { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" def dir_cache = cache ? "\${PWD}/${cache}" : "/.vep" + def reference = fasta ? "--fasta $fasta" : "" + """ mkdir $prefix @@ -34,6 +37,7 @@ process ENSEMBLVEP { -i $vcf \\ -o ${prefix}.ann.vcf \\ $args \\ + $reference \\ --assembly $genome \\ --species $species \\ --cache \\ diff --git a/modules/ensemblvep/meta.yml b/modules/ensemblvep/meta.yml index 418bb970..9891815d 100644 --- a/modules/ensemblvep/meta.yml +++ b/modules/ensemblvep/meta.yml @@ -36,6 +36,11 @@ input: type: file description: | path to VEP cache (optional) + - fasta: + type: file + description: | + reference FASTA file (optional) + pattern: "*.{fasta,fa}" - extra_files: type: tuple description: | diff --git a/tests/modules/ensemblvep/main.nf b/tests/modules/ensemblvep/main.nf index 30d19957..ad9fa097 100644 --- a/tests/modules/ensemblvep/main.nf +++ b/tests/modules/ensemblvep/main.nf @@ -10,5 +10,7 @@ workflow test_ensemblvep { file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true) ] - ENSEMBLVEP ( input, "WBcel235", "caenorhabditis_elegans", "104", [], [] ) + fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + + ENSEMBLVEP ( input, "WBcel235", "caenorhabditis_elegans", "104", [], fasta, [] ) } diff --git a/tests/modules/ensemblvep/test.yml b/tests/modules/ensemblvep/test.yml index 42384d6e..fdc2aa6a 100644 --- a/tests/modules/ensemblvep/test.yml +++ b/tests/modules/ensemblvep/test.yml @@ -1,7 +1,7 @@ - name: ensemblvep test_ensemblvep - command: nextflow run ./tests/modules/ensemblvep -entry test_ensemblvep -c ./tests/config/nextflow.config -c ./tests/modules/ensemblvep/nextflow.config + command: nextflow run ./tests/modules/ensemblvep -entry test_ensemblvep -c ./tests/config/nextflow.config -c ./tests/modules/ensemblvep/nextflow.config tags: - ensemblvep files: - path: output/ensemblvep/test.ann.vcf - - path: output/ensemblvep/test.summary.html + - path: output/ensemblvep/test.summary.html \ No newline at end of file From a3202e41ec15a57f2e80a69fb580996b3cf9f29b Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Thu, 9 Jun 2022 11:47:24 +0200 Subject: [PATCH 098/114] prettier --- tests/modules/ensemblvep/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/modules/ensemblvep/test.yml b/tests/modules/ensemblvep/test.yml index fdc2aa6a..de854bd2 100644 --- a/tests/modules/ensemblvep/test.yml +++ b/tests/modules/ensemblvep/test.yml @@ -4,4 +4,4 @@ - ensemblvep files: - path: output/ensemblvep/test.ann.vcf - - path: output/ensemblvep/test.summary.html \ No newline at end of file + - path: output/ensemblvep/test.summary.html From 7947bc662cdefa528a552e3a682a6c57f5a2cbe9 Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Thu, 9 Jun 2022 11:53:16 +0200 Subject: [PATCH 099/114] added an extra test without fasta --- tests/modules/ensemblvep/main.nf | 13 ++++++++++++- tests/modules/ensemblvep/test.yml | 12 ++++++++++-- 2 files changed, 22 insertions(+), 3 deletions(-) diff --git a/tests/modules/ensemblvep/main.nf b/tests/modules/ensemblvep/main.nf index ad9fa097..6b7cc307 100644 --- a/tests/modules/ensemblvep/main.nf +++ b/tests/modules/ensemblvep/main.nf @@ -4,7 +4,7 @@ nextflow.enable.dsl = 2 include { ENSEMBLVEP } from '../../../modules/ensemblvep/main.nf' -workflow test_ensemblvep { +workflow test_ensemblvep_fasta { input = [ [ id:'test' ], // meta map file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true) @@ -14,3 +14,14 @@ workflow test_ensemblvep { ENSEMBLVEP ( input, "WBcel235", "caenorhabditis_elegans", "104", [], fasta, [] ) } + +workflow test_ensemblvep_no_fasta { + input = [ + [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true) + ] + + fasta = [] + + ENSEMBLVEP ( input, "WBcel235", "caenorhabditis_elegans", "104", [], fasta, [] ) +} diff --git a/tests/modules/ensemblvep/test.yml b/tests/modules/ensemblvep/test.yml index de854bd2..effea9d0 100644 --- a/tests/modules/ensemblvep/test.yml +++ b/tests/modules/ensemblvep/test.yml @@ -1,5 +1,13 @@ -- name: ensemblvep test_ensemblvep - command: nextflow run ./tests/modules/ensemblvep -entry test_ensemblvep -c ./tests/config/nextflow.config -c ./tests/modules/ensemblvep/nextflow.config +- name: ensemblvep test_ensemblvep_fasta + command: nextflow run ./tests/modules/ensemblvep -entry test_ensemblvep_fasta -c ./tests/config/nextflow.config -c ./tests/modules/ensemblvep/nextflow.config + tags: + - ensemblvep + files: + - path: output/ensemblvep/test.ann.vcf + - path: output/ensemblvep/test.summary.html + +- name: ensemblvep test_ensemblvep_no_fasta + command: nextflow run ./tests/modules/ensemblvep -entry test_ensemblvep_no_fasta -c ./tests/config/nextflow.config -c ./tests/modules/ensemblvep/nextflow.config tags: - ensemblvep files: From e60323b757faceb0a3135b5aaa29d10653a756ff Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Thu, 9 Jun 2022 11:53:59 +0200 Subject: [PATCH 100/114] typo --- tests/modules/ensemblvep/test.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/modules/ensemblvep/test.yml b/tests/modules/ensemblvep/test.yml index effea9d0..7e94d5fc 100644 --- a/tests/modules/ensemblvep/test.yml +++ b/tests/modules/ensemblvep/test.yml @@ -1,5 +1,5 @@ - name: ensemblvep test_ensemblvep_fasta - command: nextflow run ./tests/modules/ensemblvep -entry test_ensemblvep_fasta -c ./tests/config/nextflow.config -c ./tests/modules/ensemblvep/nextflow.config + command: nextflow run ./tests/modules/ensemblvep -entry test_ensemblvep_fasta -c ./tests/config/nextflow.config -c ./tests/modules/ensemblvep/nextflow.config tags: - ensemblvep files: @@ -7,7 +7,7 @@ - path: output/ensemblvep/test.summary.html - name: ensemblvep test_ensemblvep_no_fasta - command: nextflow run ./tests/modules/ensemblvep -entry test_ensemblvep_no_fasta -c ./tests/config/nextflow.config -c ./tests/modules/ensemblvep/nextflow.config + command: nextflow run ./tests/modules/ensemblvep -entry test_ensemblvep_no_fasta -c ./tests/config/nextflow.config -c ./tests/modules/ensemblvep/nextflow.config tags: - ensemblvep files: From 6d10909e9f2d7e6ab42096711c3d666aff981718 Mon Sep 17 00:00:00 2001 From: nvnieuwk <101190534+nvnieuwk@users.noreply.github.com> Date: Thu, 9 Jun 2022 11:55:41 +0200 Subject: [PATCH 101/114] Update tests/modules/ensemblvep/main.nf Co-authored-by: Maxime U. Garcia --- tests/modules/ensemblvep/main.nf | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/modules/ensemblvep/main.nf b/tests/modules/ensemblvep/main.nf index 6b7cc307..3c8afada 100644 --- a/tests/modules/ensemblvep/main.nf +++ b/tests/modules/ensemblvep/main.nf @@ -21,7 +21,5 @@ workflow test_ensemblvep_no_fasta { file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true) ] - fasta = [] - - ENSEMBLVEP ( input, "WBcel235", "caenorhabditis_elegans", "104", [], fasta, [] ) + ENSEMBLVEP ( input, "WBcel235", "caenorhabditis_elegans", "104", [], [], [] ) } From ef5b0dd977d19cbd2a233bb0783cd02aaedc208b Mon Sep 17 00:00:00 2001 From: Rike Date: Thu, 9 Jun 2022 14:54:19 +0200 Subject: [PATCH 102/114] gatk4 version update to 4.2.6.1 --- modules/gatk4/markduplicatesspark/main.nf | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/gatk4/markduplicatesspark/main.nf b/modules/gatk4/markduplicatesspark/main.nf index db6a1aa1..0bafe7ce 100644 --- a/modules/gatk4/markduplicatesspark/main.nf +++ b/modules/gatk4/markduplicatesspark/main.nf @@ -2,10 +2,10 @@ process GATK4_MARKDUPLICATES_SPARK { tag "$meta.id" label 'process_high' - conda (params.enable_conda ? "bioconda::gatk4=4.2.3.0 conda-forge::openjdk=8.0.312" : null) + conda (params.enable_conda ? "bioconda::gatk4=4.2.6.1 conda-forge::openjdk=8.0.312" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.2.3.0--hdfd78af_0' : - 'broadinstitute/gatk:4.2.3.0' }" + 'https://depot.galaxyproject.org/singularity/gatk4:4.2.6.1--hdfd78af_0': + 'quay.io/biocontainers/gatk4:4.2.6.1--hdfd78af_0' }" input: tuple val(meta), path(bam) From fe9e4ece00c03c1082e0c9f911cf1f2fdc065941 Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Thu, 9 Jun 2022 14:56:55 +0200 Subject: [PATCH 103/114] new module gatk4/reblockgvcf --- modules/gatk4/reblockgvcf/main.nf | 52 +++++++++++++ modules/gatk4/reblockgvcf/meta.yml | 74 +++++++++++++++++++ tests/config/pytest_modules.yml | 4 + tests/modules/gatk4/reblockgvcf/main.nf | 55 ++++++++++++++ .../modules/gatk4/reblockgvcf/nextflow.config | 5 ++ tests/modules/gatk4/reblockgvcf/test.yml | 32 ++++++++ 6 files changed, 222 insertions(+) create mode 100644 modules/gatk4/reblockgvcf/main.nf create mode 100644 modules/gatk4/reblockgvcf/meta.yml create mode 100644 tests/modules/gatk4/reblockgvcf/main.nf create mode 100644 tests/modules/gatk4/reblockgvcf/nextflow.config create mode 100644 tests/modules/gatk4/reblockgvcf/test.yml diff --git a/modules/gatk4/reblockgvcf/main.nf b/modules/gatk4/reblockgvcf/main.nf new file mode 100644 index 00000000..d27ac222 --- /dev/null +++ b/modules/gatk4/reblockgvcf/main.nf @@ -0,0 +1,52 @@ +process GATK4_REBLOCKGVCF { + tag "$meta.id" + label 'process_low' + + conda (params.enable_conda ? "bioconda::gatk4=4.2.6.1" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gatk4:4.2.6.1--hdfd78af_0': + 'quay.io/biocontainers/gatk4:4.2.6.1--hdfd78af_0' }" + + input: + tuple val(meta), path(gvcf), path(tbi), path(intervals) + path fasta + path fai + path dict + path dbsnp + path dbsnp_tbi + + output: + tuple val(meta), path("*.reblock.g.vcf.gz"), path("*.tbi") , emit: bam + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def dbsnp_command = dbsnp ? "--dbsnp $dbsnp" : "" + def interval_command = intervals ? "--intervals $intervals" : "" + + def avail_mem = 3 + if (!task.memory) { + log.info '[GATK ReblockGVCF] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = task.memory.giga + } + """ + gatk --java-options "-Xmx${avail_mem}g" ReblockGVCF \\ + --variant $gvcf \\ + --output ${prefix}.reblock.g.vcf.gz \\ + --reference $fasta \\ + $dbsnp_command \\ + $interval_command \\ + --tmp-dir . \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/gatk4/reblockgvcf/meta.yml b/modules/gatk4/reblockgvcf/meta.yml new file mode 100644 index 00000000..407eeec2 --- /dev/null +++ b/modules/gatk4/reblockgvcf/meta.yml @@ -0,0 +1,74 @@ +name: "gatk4_reblockgvcf" +description: Condenses homRef blocks in a single-sample GVCF +keywords: + - gatk4 + - reblockgvcf + - gvcf +tools: + - gatk4: + description: | + Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s + doi: 10.1158/1538-7445.AM2017-3590 + licence: ["Apache-2.0"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - gvcf: + type: file + description: GVCF file created using HaplotypeCaller using the '-ERC GVCF' or '-ERC BP_RESOLUTION' mode + pattern: "*.{vcf,gvcf}.gz" + - tbi: + type: file + description: Index of the GVCF file + pattern: "*.tbi" + - intervals: + type: file + description: Bed file with the genomic regions included in the library (optional) + - fasta: + type: file + description: The reference fasta file + pattern: "*.fasta" + - fai: + type: file + description: Index of reference fasta file + pattern: "fasta.fai" + - dict: + type: file + description: GATK sequence dictionary + pattern: "*.dict" + - dbsnp: + type: file + description: VCF file containing known sites (optional) + - dbsnp_tbi: + type: file + description: VCF index of dbsnp (optional) + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - gvcf: + type: file + description: Filtered GVCF + pattern: "*reblock.g.vcf.gz" + - tbi: + type: file + description: Index of the filtered GVCF + pattern: "*reblock.g.vcf.gz.tbi" + +authors: + - "@nvnieuwk" diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index b3a5a24c..7325f02d 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -859,6 +859,10 @@ gatk4/mutect2: - modules/gatk4/mutect2/** - tests/modules/gatk4/mutect2/** +gatk4/reblockgvcf: + - modules/gatk4/reblockgvcf/** + - tests/modules/gatk4/reblockgvcf/** + gatk4/revertsam: - modules/gatk4/revertsam/** - tests/modules/gatk4/revertsam/** diff --git a/tests/modules/gatk4/reblockgvcf/main.nf b/tests/modules/gatk4/reblockgvcf/main.nf new file mode 100644 index 00000000..2233a5c5 --- /dev/null +++ b/tests/modules/gatk4/reblockgvcf/main.nf @@ -0,0 +1,55 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { GATK4_REBLOCKGVCF } from '../../../../modules/gatk4/reblockgvcf/main.nf' + +workflow test_gatk4_reblockgvcf { + + input = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_vcf_gz_tbi'], checkIfExists: true), + [] + ] + + fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + fasta_index = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true) + dict = file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true) + + GATK4_REBLOCKGVCF ( input, fasta, fasta_index, dict, [], [] ) +} + +workflow test_gatk4_reblockgvcf_intervals { + + input = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_vcf_gz_tbi'], checkIfExists: true), + file(params.test_data['sarscov2']['genome']['test_bed'], checkIfExists: true) + ] + + fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + fasta_index = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true) + dict = file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true) + + GATK4_REBLOCKGVCF ( input, fasta, fasta_index, dict, [], [] ) +} + +workflow test_gatk4_reblockgvcf_dbsnp { + + input = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['homo_sapiens']['illumina']['test_haplotc_cnn_vcf_gz'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_haplotc_cnn_vcf_gz_tbi'], checkIfExists: true), + [] + ] + + fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + fasta_index = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) + dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) + dbsnp = file(params.test_data['homo_sapiens']['genome']['dbsnp_146_hg38_vcf_gz'], checkIfExists: true) + dbsnp_tbi = file(params.test_data['homo_sapiens']['genome']['dbsnp_146_hg38_vcf_gz_tbi'], checkIfExists: true) + + GATK4_REBLOCKGVCF ( input, fasta, fasta_index, dict, dbsnp, dbsnp_tbi ) +} \ No newline at end of file diff --git a/tests/modules/gatk4/reblockgvcf/nextflow.config b/tests/modules/gatk4/reblockgvcf/nextflow.config new file mode 100644 index 00000000..50f50a7a --- /dev/null +++ b/tests/modules/gatk4/reblockgvcf/nextflow.config @@ -0,0 +1,5 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + +} \ No newline at end of file diff --git a/tests/modules/gatk4/reblockgvcf/test.yml b/tests/modules/gatk4/reblockgvcf/test.yml new file mode 100644 index 00000000..e5461341 --- /dev/null +++ b/tests/modules/gatk4/reblockgvcf/test.yml @@ -0,0 +1,32 @@ +- name: gatk4 reblockgvcf test_gatk4_reblockgvcf + command: nextflow run ./tests/modules/gatk4/reblockgvcf -entry test_gatk4_reblockgvcf -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/reblockgvcf/nextflow.config + tags: + - gatk4/reblockgvcf + - gatk4 + files: + - path: output/gatk4/test.reblock.g.vcf.gz + contains: '[ # TODO nf-core: file md5sum was variable, please replace this text with a string found in the file instead ]' + - path: output/gatk4/test.reblock.g.vcf.gz.tbi + md5sum: e1aab7d826a151828fd0671ec5aed2e0 + +- name: gatk4 reblockgvcf test_gatk4_reblockgvcf_intervals + command: nextflow run ./tests/modules/gatk4/reblockgvcf -entry test_gatk4_reblockgvcf_intervals -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/reblockgvcf/nextflow.config + tags: + - gatk4/reblockgvcf + - gatk4 + files: + - path: output/gatk4/test.reblock.g.vcf.gz + contains: '[ # TODO nf-core: file md5sum was variable, please replace this text with a string found in the file instead ]' + - path: output/gatk4/test.reblock.g.vcf.gz.tbi + md5sum: e7ca7e9fe76ce12198fd54ec9a64fad4 + +- name: gatk4 reblockgvcf test_gatk4_reblockgvcf_dbsnp + command: nextflow run ./tests/modules/gatk4/reblockgvcf -entry test_gatk4_reblockgvcf_dbsnp -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/reblockgvcf/nextflow.config + tags: + - gatk4/reblockgvcf + - gatk4 + files: + - path: output/gatk4/test.reblock.g.vcf.gz + contains: '[ # TODO nf-core: file md5sum was variable, please replace this text with a string found in the file instead ]' + - path: output/gatk4/test.reblock.g.vcf.gz.tbi + md5sum: 017edea27a253eb51cc4505d00dcb295 From 14d38e43cf00c42a09a9b6e80914daa0ef135de1 Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Thu, 9 Jun 2022 14:58:20 +0200 Subject: [PATCH 104/114] fix test.yml --- tests/modules/gatk4/reblockgvcf/test.yml | 3 --- 1 file changed, 3 deletions(-) diff --git a/tests/modules/gatk4/reblockgvcf/test.yml b/tests/modules/gatk4/reblockgvcf/test.yml index e5461341..97f1515f 100644 --- a/tests/modules/gatk4/reblockgvcf/test.yml +++ b/tests/modules/gatk4/reblockgvcf/test.yml @@ -5,7 +5,6 @@ - gatk4 files: - path: output/gatk4/test.reblock.g.vcf.gz - contains: '[ # TODO nf-core: file md5sum was variable, please replace this text with a string found in the file instead ]' - path: output/gatk4/test.reblock.g.vcf.gz.tbi md5sum: e1aab7d826a151828fd0671ec5aed2e0 @@ -16,7 +15,6 @@ - gatk4 files: - path: output/gatk4/test.reblock.g.vcf.gz - contains: '[ # TODO nf-core: file md5sum was variable, please replace this text with a string found in the file instead ]' - path: output/gatk4/test.reblock.g.vcf.gz.tbi md5sum: e7ca7e9fe76ce12198fd54ec9a64fad4 @@ -27,6 +25,5 @@ - gatk4 files: - path: output/gatk4/test.reblock.g.vcf.gz - contains: '[ # TODO nf-core: file md5sum was variable, please replace this text with a string found in the file instead ]' - path: output/gatk4/test.reblock.g.vcf.gz.tbi md5sum: 017edea27a253eb51cc4505d00dcb295 From b9e397ff15a98c8b70099579f7645a32c467d65b Mon Sep 17 00:00:00 2001 From: Rike Date: Thu, 9 Jun 2022 15:06:46 +0200 Subject: [PATCH 105/114] use broadinstitute container --- modules/gatk4/markduplicatesspark/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/gatk4/markduplicatesspark/main.nf b/modules/gatk4/markduplicatesspark/main.nf index 0bafe7ce..9f77821e 100644 --- a/modules/gatk4/markduplicatesspark/main.nf +++ b/modules/gatk4/markduplicatesspark/main.nf @@ -5,7 +5,7 @@ process GATK4_MARKDUPLICATES_SPARK { conda (params.enable_conda ? "bioconda::gatk4=4.2.6.1 conda-forge::openjdk=8.0.312" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/gatk4:4.2.6.1--hdfd78af_0': - 'quay.io/biocontainers/gatk4:4.2.6.1--hdfd78af_0' }" + 'broadinstitute/gatk4:4.2.6.1' }" input: tuple val(meta), path(bam) From 81fff7384d8342439cf4fc4cce51433d979c97a2 Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Thu, 9 Jun 2022 15:08:02 +0200 Subject: [PATCH 106/114] small fix and typo --- modules/gatk4/reblockgvcf/main.nf | 6 +++--- modules/gatk4/reblockgvcf/meta.yml | 4 ++-- tests/modules/gatk4/reblockgvcf/test.yml | 3 --- 3 files changed, 5 insertions(+), 8 deletions(-) diff --git a/modules/gatk4/reblockgvcf/main.nf b/modules/gatk4/reblockgvcf/main.nf index d27ac222..bbd8e2ae 100644 --- a/modules/gatk4/reblockgvcf/main.nf +++ b/modules/gatk4/reblockgvcf/main.nf @@ -16,8 +16,8 @@ process GATK4_REBLOCKGVCF { path dbsnp_tbi output: - tuple val(meta), path("*.reblock.g.vcf.gz"), path("*.tbi") , emit: bam - path "versions.yml" , emit: versions + tuple val(meta), path("*.rb.g.vcf.gz"), path("*.tbi") , emit: bam + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -37,7 +37,7 @@ process GATK4_REBLOCKGVCF { """ gatk --java-options "-Xmx${avail_mem}g" ReblockGVCF \\ --variant $gvcf \\ - --output ${prefix}.reblock.g.vcf.gz \\ + --output ${prefix}.rb.g.vcf.gz \\ --reference $fasta \\ $dbsnp_command \\ $interval_command \\ diff --git a/modules/gatk4/reblockgvcf/meta.yml b/modules/gatk4/reblockgvcf/meta.yml index 407eeec2..23518416 100644 --- a/modules/gatk4/reblockgvcf/meta.yml +++ b/modules/gatk4/reblockgvcf/meta.yml @@ -64,11 +64,11 @@ output: - gvcf: type: file description: Filtered GVCF - pattern: "*reblock.g.vcf.gz" + pattern: "*rb.g.vcf.gz" - tbi: type: file description: Index of the filtered GVCF - pattern: "*reblock.g.vcf.gz.tbi" + pattern: "*rb.g.vcf.gz.tbi" authors: - "@nvnieuwk" diff --git a/tests/modules/gatk4/reblockgvcf/test.yml b/tests/modules/gatk4/reblockgvcf/test.yml index 97f1515f..9cef81ae 100644 --- a/tests/modules/gatk4/reblockgvcf/test.yml +++ b/tests/modules/gatk4/reblockgvcf/test.yml @@ -6,7 +6,6 @@ files: - path: output/gatk4/test.reblock.g.vcf.gz - path: output/gatk4/test.reblock.g.vcf.gz.tbi - md5sum: e1aab7d826a151828fd0671ec5aed2e0 - name: gatk4 reblockgvcf test_gatk4_reblockgvcf_intervals command: nextflow run ./tests/modules/gatk4/reblockgvcf -entry test_gatk4_reblockgvcf_intervals -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/reblockgvcf/nextflow.config @@ -16,7 +15,6 @@ files: - path: output/gatk4/test.reblock.g.vcf.gz - path: output/gatk4/test.reblock.g.vcf.gz.tbi - md5sum: e7ca7e9fe76ce12198fd54ec9a64fad4 - name: gatk4 reblockgvcf test_gatk4_reblockgvcf_dbsnp command: nextflow run ./tests/modules/gatk4/reblockgvcf -entry test_gatk4_reblockgvcf_dbsnp -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/reblockgvcf/nextflow.config @@ -26,4 +24,3 @@ files: - path: output/gatk4/test.reblock.g.vcf.gz - path: output/gatk4/test.reblock.g.vcf.gz.tbi - md5sum: 017edea27a253eb51cc4505d00dcb295 From 89a6a03e38b8b5869a2e1e5abfe4b4cc4938c622 Mon Sep 17 00:00:00 2001 From: Rike Date: Thu, 9 Jun 2022 15:09:19 +0200 Subject: [PATCH 107/114] fix container tag --- modules/gatk4/markduplicatesspark/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/gatk4/markduplicatesspark/main.nf b/modules/gatk4/markduplicatesspark/main.nf index 9f77821e..52faf5bc 100644 --- a/modules/gatk4/markduplicatesspark/main.nf +++ b/modules/gatk4/markduplicatesspark/main.nf @@ -5,7 +5,7 @@ process GATK4_MARKDUPLICATES_SPARK { conda (params.enable_conda ? "bioconda::gatk4=4.2.6.1 conda-forge::openjdk=8.0.312" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/gatk4:4.2.6.1--hdfd78af_0': - 'broadinstitute/gatk4:4.2.6.1' }" + 'broadinstitute/gatk:4.2.6.1' }" input: tuple val(meta), path(bam) From 873215c8ae3882e3ce1c8c62fbae16e74d631270 Mon Sep 17 00:00:00 2001 From: nvnieuwk <101190534+nvnieuwk@users.noreply.github.com> Date: Thu, 9 Jun 2022 15:11:18 +0200 Subject: [PATCH 108/114] Update modules/gatk4/reblockgvcf/main.nf Co-authored-by: Maxime U. Garcia --- modules/gatk4/reblockgvcf/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/gatk4/reblockgvcf/main.nf b/modules/gatk4/reblockgvcf/main.nf index bbd8e2ae..5640e8ae 100644 --- a/modules/gatk4/reblockgvcf/main.nf +++ b/modules/gatk4/reblockgvcf/main.nf @@ -16,7 +16,7 @@ process GATK4_REBLOCKGVCF { path dbsnp_tbi output: - tuple val(meta), path("*.rb.g.vcf.gz"), path("*.tbi") , emit: bam + tuple val(meta), path("*.rb.g.vcf.gz"), path("*.tbi") , emit: vcf path "versions.yml" , emit: versions when: From c50df9ad04900067adf6ddfc855a485633813bc6 Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Thu, 9 Jun 2022 15:14:37 +0200 Subject: [PATCH 109/114] fix test.yml --- tests/modules/gatk4/reblockgvcf/test.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/modules/gatk4/reblockgvcf/test.yml b/tests/modules/gatk4/reblockgvcf/test.yml index 9cef81ae..623f58a6 100644 --- a/tests/modules/gatk4/reblockgvcf/test.yml +++ b/tests/modules/gatk4/reblockgvcf/test.yml @@ -4,8 +4,8 @@ - gatk4/reblockgvcf - gatk4 files: - - path: output/gatk4/test.reblock.g.vcf.gz - - path: output/gatk4/test.reblock.g.vcf.gz.tbi + - path: output/gatk4/test.rb.g.vcf.gz + - path: output/gatk4/test.rb.g.vcf.gz.tbi - name: gatk4 reblockgvcf test_gatk4_reblockgvcf_intervals command: nextflow run ./tests/modules/gatk4/reblockgvcf -entry test_gatk4_reblockgvcf_intervals -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/reblockgvcf/nextflow.config @@ -13,8 +13,8 @@ - gatk4/reblockgvcf - gatk4 files: - - path: output/gatk4/test.reblock.g.vcf.gz - - path: output/gatk4/test.reblock.g.vcf.gz.tbi + - path: output/gatk4/test.rb.g.vcf.gz + - path: output/gatk4/test.rb.g.vcf.gz.tbi - name: gatk4 reblockgvcf test_gatk4_reblockgvcf_dbsnp command: nextflow run ./tests/modules/gatk4/reblockgvcf -entry test_gatk4_reblockgvcf_dbsnp -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/reblockgvcf/nextflow.config @@ -22,5 +22,5 @@ - gatk4/reblockgvcf - gatk4 files: - - path: output/gatk4/test.reblock.g.vcf.gz - - path: output/gatk4/test.reblock.g.vcf.gz.tbi + - path: output/gatk4/test.rb.g.vcf.gz + - path: output/gatk4/test.rb.g.vcf.gz.tbi From 8e8f4c9c51a7b229dd45e9b287d48115c238baf3 Mon Sep 17 00:00:00 2001 From: Rike Date: Thu, 9 Jun 2022 15:16:11 +0200 Subject: [PATCH 110/114] remove singulairty container --- modules/gatk4/markduplicatesspark/main.nf | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/modules/gatk4/markduplicatesspark/main.nf b/modules/gatk4/markduplicatesspark/main.nf index 52faf5bc..b8c315fc 100644 --- a/modules/gatk4/markduplicatesspark/main.nf +++ b/modules/gatk4/markduplicatesspark/main.nf @@ -3,9 +3,7 @@ process GATK4_MARKDUPLICATES_SPARK { label 'process_high' conda (params.enable_conda ? "bioconda::gatk4=4.2.6.1 conda-forge::openjdk=8.0.312" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.2.6.1--hdfd78af_0': - 'broadinstitute/gatk:4.2.6.1' }" + container 'broadinstitute/gatk:4.2.6.1' input: tuple val(meta), path(bam) From 6a4732ef3b76b54d75533e6be9ba57e3008d4853 Mon Sep 17 00:00:00 2001 From: Rike Date: Thu, 9 Jun 2022 15:29:00 +0200 Subject: [PATCH 111/114] similar issues arise with bqsrspark, also use broadinstitute container --- modules/gatk4/applybqsrspark/main.nf | 6 ++---- modules/gatk4/baserecalibratorspark/main.nf | 6 ++---- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/modules/gatk4/applybqsrspark/main.nf b/modules/gatk4/applybqsrspark/main.nf index 9d7891ba..5890278a 100644 --- a/modules/gatk4/applybqsrspark/main.nf +++ b/modules/gatk4/applybqsrspark/main.nf @@ -2,10 +2,8 @@ process GATK4_APPLYBQSR_SPARK { tag "$meta.id" label 'process_low' - conda (params.enable_conda ? "bioconda::gatk4=4.2.6.1" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.2.6.1--hdfd78af_0': - 'quay.io/biocontainers/gatk4:4.2.6.1--hdfd78af_0' }" + conda (params.enable_conda ? "bioconda::gatk4=4.2.6.1 conda-forge::openjdk=8.0.312" : null) + container 'broadinstitute/gatk:4.2.6.1' input: tuple val(meta), path(input), path(input_index), path(bqsr_table), path(intervals) diff --git a/modules/gatk4/baserecalibratorspark/main.nf b/modules/gatk4/baserecalibratorspark/main.nf index 79988b72..755122cb 100644 --- a/modules/gatk4/baserecalibratorspark/main.nf +++ b/modules/gatk4/baserecalibratorspark/main.nf @@ -2,10 +2,8 @@ process GATK4_BASERECALIBRATOR_SPARK { tag "$meta.id" label 'process_low' - conda (params.enable_conda ? "bioconda::gatk4=4.2.6.1" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.2.6.1--hdfd78af_0': - 'quay.io/biocontainers/gatk4:4.2.6.1--hdfd78af_0' }" + conda (params.enable_conda ? "bioconda::gatk4=4.2.6.1 conda-forge::openjdk=8.0.312" : null) + container 'broadinstitute/gatk:4.2.6.1' input: tuple val(meta), path(input), path(input_index), path(intervals) From 95633c6853c3a33a7cf65720381874582cd3b7c8 Mon Sep 17 00:00:00 2001 From: Rike Date: Thu, 9 Jun 2022 15:30:47 +0200 Subject: [PATCH 112/114] update checksum --- tests/modules/gatk4/markduplicatesspark/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/modules/gatk4/markduplicatesspark/test.yml b/tests/modules/gatk4/markduplicatesspark/test.yml index 4ef9b916..1ef7f543 100644 --- a/tests/modules/gatk4/markduplicatesspark/test.yml +++ b/tests/modules/gatk4/markduplicatesspark/test.yml @@ -5,7 +5,7 @@ - gatk4/markduplicatesspark files: - path: output/gatk4/test.bam - md5sum: dc1a09ac6371aab7c50d1a554baa06d3 + md5sum: 4b5abb065d498bece619f9bd4cc929e1 - path: output/gatk4/versions.yml - name: gatk4 markduplicatesspark test_gatk4_markduplicates_spark_multiple_bams From a5583143c46e2b7b56b513e61141a36b5c0e29d3 Mon Sep 17 00:00:00 2001 From: Rike Date: Thu, 9 Jun 2022 15:38:28 +0200 Subject: [PATCH 113/114] update checksum --- tests/modules/gatk4/applybqsrspark/test.yml | 2 +- tests/modules/gatk4/markduplicatesspark/test.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/modules/gatk4/applybqsrspark/test.yml b/tests/modules/gatk4/applybqsrspark/test.yml index 1e4f8b47..5a1c3df6 100644 --- a/tests/modules/gatk4/applybqsrspark/test.yml +++ b/tests/modules/gatk4/applybqsrspark/test.yml @@ -15,7 +15,7 @@ - gatk4/applybqsrspark files: - path: output/gatk4/test.bam - md5sum: 2ca2446f0125890280056fd7da822732 + md5sum: db27196dbd626edb0509aac4589dedcf - path: output/gatk4/versions.yml - name: gatk4 applybqsr test_gatk4_applybqsr_spark_cram diff --git a/tests/modules/gatk4/markduplicatesspark/test.yml b/tests/modules/gatk4/markduplicatesspark/test.yml index 1ef7f543..4ef9b916 100644 --- a/tests/modules/gatk4/markduplicatesspark/test.yml +++ b/tests/modules/gatk4/markduplicatesspark/test.yml @@ -5,7 +5,7 @@ - gatk4/markduplicatesspark files: - path: output/gatk4/test.bam - md5sum: 4b5abb065d498bece619f9bd4cc929e1 + md5sum: dc1a09ac6371aab7c50d1a554baa06d3 - path: output/gatk4/versions.yml - name: gatk4 markduplicatesspark test_gatk4_markduplicates_spark_multiple_bams From 30262d1fdd032df12c2dedca55099d8eaaf7b136 Mon Sep 17 00:00:00 2001 From: Rike Date: Thu, 9 Jun 2022 15:44:12 +0200 Subject: [PATCH 114/114] remove variable checksum --- tests/modules/gatk4/applybqsrspark/test.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/modules/gatk4/applybqsrspark/test.yml b/tests/modules/gatk4/applybqsrspark/test.yml index 5a1c3df6..b7b8c72d 100644 --- a/tests/modules/gatk4/applybqsrspark/test.yml +++ b/tests/modules/gatk4/applybqsrspark/test.yml @@ -15,7 +15,6 @@ - gatk4/applybqsrspark files: - path: output/gatk4/test.bam - md5sum: db27196dbd626edb0509aac4589dedcf - path: output/gatk4/versions.yml - name: gatk4 applybqsr test_gatk4_applybqsr_spark_cram