From 44e13ae439e0788db8afb227e2801815c459982a Mon Sep 17 00:00:00 2001 From: "Robert A. Petit III" Date: Tue, 10 May 2022 21:31:43 +0000 Subject: [PATCH 01/18] add module for snippy --- modules/snippy/run/main.nf | 55 ++++++++++++ modules/snippy/run/meta.yml | 110 +++++++++++++++++++++++ tests/config/pytest_modules.yml | 12 ++- tests/modules/snippy/run/main.nf | 16 ++++ tests/modules/snippy/run/nextflow.config | 5 ++ tests/modules/snippy/run/test.yml | 39 ++++++++ 6 files changed, 233 insertions(+), 4 deletions(-) create mode 100644 modules/snippy/run/main.nf create mode 100644 modules/snippy/run/meta.yml create mode 100644 tests/modules/snippy/run/main.nf create mode 100644 tests/modules/snippy/run/nextflow.config create mode 100644 tests/modules/snippy/run/test.yml diff --git a/modules/snippy/run/main.nf b/modules/snippy/run/main.nf new file mode 100644 index 00000000..73c4b94d --- /dev/null +++ b/modules/snippy/run/main.nf @@ -0,0 +1,55 @@ +process SNIPPY_RUN { + tag "$meta.id" + label 'process_low' + + conda (params.enable_conda ? "bioconda::snippy=4.6.0" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/snippy:4.6.0--hdfd78af_2 ': + 'quay.io/biocontainers/snippy:4.6.0--hdfd78af_2' }" + + input: + tuple val(meta), path(reads) + path reference + + output: + tuple val(meta), path("${prefix}/${prefix}.tab") , emit: tab + tuple val(meta), path("${prefix}/${prefix}.csv") , emit: csv + tuple val(meta), path("${prefix}/${prefix}.html") , emit: html + tuple val(meta), path("${prefix}/${prefix}.vcf") , emit: vcf + tuple val(meta), path("${prefix}/${prefix}.bed") , emit: bed + tuple val(meta), path("${prefix}/${prefix}.gff") , emit: gff + tuple val(meta), path("${prefix}/${prefix}.bam") , emit: bam + tuple val(meta), path("${prefix}/${prefix}.bam.bai") , emit: bai + tuple val(meta), path("${prefix}/${prefix}.log") , emit: log + tuple val(meta), path("${prefix}/${prefix}.aligned.fa") , emit: aligned_fa + tuple val(meta), path("${prefix}/${prefix}.consensus.fa") , emit: consensus_fa + tuple val(meta), path("${prefix}/${prefix}.consensus.subs.fa"), emit: consensus_subs_fa + tuple val(meta), path("${prefix}/${prefix}.raw.vcf") , emit: raw_vcf + tuple val(meta), path("${prefix}/${prefix}.filt.vcf") , emit: filt_vcf + tuple val(meta), path("${prefix}/${prefix}.vcf.gz") , emit: vcf_gz + tuple val(meta), path("${prefix}/${prefix}.vcf.gz.csi") , emit: vcf_csi + tuple val(meta), path("${prefix}/${prefix}.txt") , emit: txt + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + def read_inputs = meta.single_end ? "--se ${reads[0]}" : "--R1 ${reads[0]} --R2 ${reads[1]}" + """ + snippy \\ + $args \\ + --cpus $task.cpus \\ + --outdir $prefix \\ + --reference $reference \\ + --prefix $prefix \\ + $read_inputs + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + snippy: \$(echo \$(snippy --version 2>&1) | sed 's/snippy //') + END_VERSIONS + """ +} diff --git a/modules/snippy/run/meta.yml b/modules/snippy/run/meta.yml new file mode 100644 index 00000000..fa176399 --- /dev/null +++ b/modules/snippy/run/meta.yml @@ -0,0 +1,110 @@ +name: snippy +description: Rapid haploid variant calling +keywords: + - variant + - fastq + - bacteria +tools: + - snippy: + description: "Rapid bacterial SNP calling and core genome alignments" + homepage: "https://github.com/tseemann/snippy" + documentation: "https://github.com/tseemann/snippy" + tool_dev_url: "https://github.com/tseemann/snippy" + doi: "" + licence: "['GPL v2']" +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + pattern: "*.{fq,fastq,fq.gz,fastq.gz}" + - index: + type: file + description: Reference genome in GenBank (preferred) or FASTA format + pattern: "*.{gbk,gbk.gz,fa,fa.gz}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - tab: + type: file + description: A simple tab-separated summary of all the variants + pattern: "*.tab" + - csv: + type: file + description: A comma-separated version of the .tab file + pattern: "*.csv" + - html: + type: file + description: A HTML version of the .tab file + pattern: "*.html" + - vcf: + type: file + description: The final annotated variants in VCF format + pattern: "*.vcf" + - bed: + type: file + description: The variants in BED format + pattern: "*.bed" + - gff: + type: file + description: The variants in GFF3 format + pattern: "*.gff" + - bam: + type: file + description: The alignments in BAM format. Includes unmapped, multimapping reads. Excludes duplicates. + pattern: "*.bam" + - bai: + type: file + description: Index for the .bam file + pattern: "*.bam.bai" + - log: + type: file + description: A log file with the commands run and their outputs + pattern: "*.log" + - aligned_fa: + type: file + description: A version of the reference but with - at position with depth=0 and N for 0 < depth < --mincov (does not have variants) + pattern: "*.aligned.fa" + - consensus_fa: + type: file + description: A version of the reference genome with all variants instantiated + pattern: "*.consensus.fa" + - consensus_subs_fa: + type: file + description: A version of the reference genome with only substitution variants instantiated + pattern: "*.consensus.subs.fa" + - raw_vcf: + type: file + description: The unfiltered variant calls from Freebayes + pattern: "*.raw.vcf" + - filt_vcf: + type: file + description: The filtered variant calls from Freebayes + pattern: "*.filt.vcf" + - vcf_gz: + type: file + description: Compressed .vcf file via BGZIP + pattern: "*.vcf.gz" + - vcf_csi: + type: file + description: Index for the .vcf.gz via bcftools index + pattern: "*.vcf.gz.csi" + - txt: + type: file + description: Tab-separated columnar list of statistics + pattern: "*.txt" +authors: + - "@rpetit3" diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index 9afe83fd..5a204ac4 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -1643,14 +1643,14 @@ samtools/bam2fq: - modules/samtools/bam2fq/** - tests/modules/samtools/bam2fq/** -samtools/convert: - - modules/samtools/convert/** - - tests/modules/samtools/convert/** - samtools/collatefastq: - modules/samtools/collatefastq/** - tests/modules/samtools/collatefastq/** +samtools/convert: + - modules/samtools/convert/** + - tests/modules/samtools/convert/** + samtools/depth: - modules/samtools/depth/** - tests/modules/samtools/depth/** @@ -1787,6 +1787,10 @@ snapaligner/index: - modules/snapaligner/index/** - tests/modules/snapaligner/index/** +snippy/run: + - modules/snippy/run/** + - tests/modules/snippy/run/** + snpdists: - modules/snpdists/** - tests/modules/snpdists/** diff --git a/tests/modules/snippy/run/main.nf b/tests/modules/snippy/run/main.nf new file mode 100644 index 00000000..bfc4082e --- /dev/null +++ b/tests/modules/snippy/run/main.nf @@ -0,0 +1,16 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { SNIPPY_RUN } from '../../../../modules/snippy/run/main.nf' + +workflow test_snippy_run { + + input = [ [ id:'test', single_end:false ], // meta map + [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ] + ] + reference = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + + SNIPPY_RUN ( input, reference ) +} diff --git a/tests/modules/snippy/run/nextflow.config b/tests/modules/snippy/run/nextflow.config new file mode 100644 index 00000000..50f50a7a --- /dev/null +++ b/tests/modules/snippy/run/nextflow.config @@ -0,0 +1,5 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + +} \ No newline at end of file diff --git a/tests/modules/snippy/run/test.yml b/tests/modules/snippy/run/test.yml new file mode 100644 index 00000000..e38e8b9e --- /dev/null +++ b/tests/modules/snippy/run/test.yml @@ -0,0 +1,39 @@ +- name: snippy run test_snippy_run + command: nextflow run tests/modules/snippy/run -entry test_snippy_run -c tests/config/nextflow.config -c tests/modules/snippy/run/nextflow.config + tags: + - snippy + - snippy/run + files: + - path: output/snippy/test/test.aligned.fa + md5sum: 47e3390d4167edf1955d162d37aca5e3 + - path: output/snippy/test/test.bam + - path: output/snippy/test/test.bam.bai + - path: output/snippy/test/test.bed + md5sum: d41d8cd98f00b204e9800998ecf8427e + - path: output/snippy/test/test.consensus.fa + md5sum: 483f4a5dfe60171c86ee9b7e6dff908b + - path: output/snippy/test/test.consensus.subs.fa + md5sum: 483f4a5dfe60171c86ee9b7e6dff908b + - path: output/snippy/test/test.csv + md5sum: 322f942115e5945c2041a88246166703 + - path: output/snippy/test/test.filt.vcf + contains: ['fileformat', 'freebayes', 'CHROM'] + - path: output/snippy/test/test.gff + md5sum: df19e1b84ba6f691d20c72b397c88abf + - path: output/snippy/test/test.html + md5sum: 1ccbf0ffcadae1a6b2e11681d24c9938 + - path: output/snippy/test/test.log + contains: ['snippy', 'consensus', 'subs'] + - path: output/snippy/test/test.raw.vcf + contains: ['fileformat', 'freebayes', 'CHROM'] + - path: output/snippy/test/test.tab + md5sum: beb9bde3bce985e53e8feba9ec5b136e + - path: output/snippy/test/test.txt + contains: ['DateTime', 'ReadFiles', 'VariantTotal'] + - path: output/snippy/test/test.vcf + contains: ['fileformat', 'freebayes', 'CHROM'] + - path: output/snippy/test/test.vcf.gz + - path: output/snippy/test/test.vcf.gz.csi + md5sum: bed9fa291c220a1ba04eb2d448932ffc + - path: output/snippy/versions.yml + md5sum: 518aad56c4dbefb6cbcde5ab38cf7b5d From f15848d4774f095f43594516769fde889a7a3d12 Mon Sep 17 00:00:00 2001 From: "Robert A. Petit III" Date: Tue, 10 May 2022 21:44:40 +0000 Subject: [PATCH 02/18] fix typo --- modules/snippy/run/meta.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/snippy/run/meta.yml b/modules/snippy/run/meta.yml index fa176399..13c923d8 100644 --- a/modules/snippy/run/meta.yml +++ b/modules/snippy/run/meta.yml @@ -100,7 +100,7 @@ output: pattern: "*.vcf.gz" - vcf_csi: type: file - description: Index for the .vcf.gz via bcftools index + description: Index for the .vcf.gz via bcftools index pattern: "*.vcf.gz.csi" - txt: type: file From 56e94f625e69e48de7c218b79857cc3a59cb9220 Mon Sep 17 00:00:00 2001 From: "Robert A. Petit III" Date: Tue, 10 May 2022 21:47:11 +0000 Subject: [PATCH 03/18] remove empty file md5 --- tests/modules/snippy/run/test.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/modules/snippy/run/test.yml b/tests/modules/snippy/run/test.yml index e38e8b9e..ac945e88 100644 --- a/tests/modules/snippy/run/test.yml +++ b/tests/modules/snippy/run/test.yml @@ -9,7 +9,6 @@ - path: output/snippy/test/test.bam - path: output/snippy/test/test.bam.bai - path: output/snippy/test/test.bed - md5sum: d41d8cd98f00b204e9800998ecf8427e - path: output/snippy/test/test.consensus.fa md5sum: 483f4a5dfe60171c86ee9b7e6dff908b - path: output/snippy/test/test.consensus.subs.fa From ef2996e4a5f4f74b4b49bbbd308c3142de8d894d Mon Sep 17 00:00:00 2001 From: "Robert A. Petit III" Date: Tue, 10 May 2022 21:47:57 +0000 Subject: [PATCH 04/18] match names in meta and main --- modules/snippy/run/meta.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/snippy/run/meta.yml b/modules/snippy/run/meta.yml index 13c923d8..2eb008a1 100644 --- a/modules/snippy/run/meta.yml +++ b/modules/snippy/run/meta.yml @@ -1,4 +1,4 @@ -name: snippy +name: snippy_run description: Rapid haploid variant calling keywords: - variant From 7ced6e20de5a69a25fb57b00244a49329709186a Mon Sep 17 00:00:00 2001 From: "Robert A. Petit III" Date: Wed, 11 May 2022 18:00:36 +0000 Subject: [PATCH 05/18] make prettier --- tests/modules/snippy/run/test.yml | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/tests/modules/snippy/run/test.yml b/tests/modules/snippy/run/test.yml index ac945e88..10e24f55 100644 --- a/tests/modules/snippy/run/test.yml +++ b/tests/modules/snippy/run/test.yml @@ -1,8 +1,9 @@ - name: snippy run test_snippy_run - command: nextflow run tests/modules/snippy/run -entry test_snippy_run -c tests/config/nextflow.config -c tests/modules/snippy/run/nextflow.config + command: | + nextflow run tests/modules/snippy/run -entry test_snippy_run -c tests/config/nextflow.config -c tests/modules/snippy/run/nextflow.config tags: - - snippy - snippy/run + - snippy files: - path: output/snippy/test/test.aligned.fa md5sum: 47e3390d4167edf1955d162d37aca5e3 @@ -16,21 +17,21 @@ - path: output/snippy/test/test.csv md5sum: 322f942115e5945c2041a88246166703 - path: output/snippy/test/test.filt.vcf - contains: ['fileformat', 'freebayes', 'CHROM'] + contains: ["fileformat", "freebayes", "CHROM"] - path: output/snippy/test/test.gff md5sum: df19e1b84ba6f691d20c72b397c88abf - path: output/snippy/test/test.html md5sum: 1ccbf0ffcadae1a6b2e11681d24c9938 - path: output/snippy/test/test.log - contains: ['snippy', 'consensus', 'subs'] + contains: ["snippy", "consensus", "subs"] - path: output/snippy/test/test.raw.vcf - contains: ['fileformat', 'freebayes', 'CHROM'] + contains: ["fileformat", "freebayes", "CHROM"] - path: output/snippy/test/test.tab md5sum: beb9bde3bce985e53e8feba9ec5b136e - path: output/snippy/test/test.txt - contains: ['DateTime', 'ReadFiles', 'VariantTotal'] + contains: ["DateTime", "ReadFiles", "VariantTotal"] - path: output/snippy/test/test.vcf - contains: ['fileformat', 'freebayes', 'CHROM'] + contains: ["fileformat", "freebayes", "CHROM"] - path: output/snippy/test/test.vcf.gz - path: output/snippy/test/test.vcf.gz.csi md5sum: bed9fa291c220a1ba04eb2d448932ffc From 63db63757cab03cfa7a02c0d0f134b66fbfadea6 Mon Sep 17 00:00:00 2001 From: Matthias De Smet <11850640+matthdsm@users.noreply.github.com> Date: Mon, 30 May 2022 10:15:40 +0200 Subject: [PATCH 06/18] bump picard versions --- modules/picard/addorreplacereadgroups/main.nf | 6 +++--- modules/picard/cleansam/main.nf | 6 +++--- modules/picard/collecthsmetrics/main.nf | 6 +++--- modules/picard/collectmultiplemetrics/main.nf | 6 +++--- modules/picard/collectwgsmetrics/main.nf | 6 +++--- modules/picard/createsequencedictionary/main.nf | 6 +++--- modules/picard/crosscheckfingerprints/main.nf | 6 +++--- modules/picard/filtersamreads/main.nf | 6 +++--- modules/picard/fixmateinformation/main.nf | 6 +++--- modules/picard/liftovervcf/main.nf | 6 +++--- modules/picard/markduplicates/main.nf | 6 +++--- modules/picard/mergesamfiles/main.nf | 6 +++--- modules/picard/sortsam/main.nf | 6 +++--- modules/picard/sortvcf/main.nf | 6 +++--- 14 files changed, 42 insertions(+), 42 deletions(-) diff --git a/modules/picard/addorreplacereadgroups/main.nf b/modules/picard/addorreplacereadgroups/main.nf index fd102f67..aff83c9f 100644 --- a/modules/picard/addorreplacereadgroups/main.nf +++ b/modules/picard/addorreplacereadgroups/main.nf @@ -2,10 +2,10 @@ process PICARD_ADDORREPLACEREADGROUPS { tag "$meta.id" label 'process_low' - conda (params.enable_conda ? "bioconda::picard=2.27.1" : null) + conda (params.enable_conda ? "bioconda::picard=2.27.2" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/picard:2.27.1--hdfd78af_0' : - 'quay.io/biocontainers/picard:2.27.1--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/picard:2.27.2--hdfd78af_0' : + 'quay.io/biocontainers/picard:2.27.2--hdfd78af_0' }" input: tuple val(meta), path(bam) diff --git a/modules/picard/cleansam/main.nf b/modules/picard/cleansam/main.nf index 62989565..0042c3f1 100644 --- a/modules/picard/cleansam/main.nf +++ b/modules/picard/cleansam/main.nf @@ -2,10 +2,10 @@ process PICARD_CLEANSAM { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::picard=2.27.1" : null) + conda (params.enable_conda ? "bioconda::picard=2.27.2" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/picard:2.27.1--hdfd78af_0' : - 'quay.io/biocontainers/picard:2.27.1--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/picard:2.27.2--hdfd78af_0' : + 'quay.io/biocontainers/picard:2.27.2--hdfd78af_0' }" input: tuple val(meta), path(bam) diff --git a/modules/picard/collecthsmetrics/main.nf b/modules/picard/collecthsmetrics/main.nf index 06b6b8f3..52d5c84a 100644 --- a/modules/picard/collecthsmetrics/main.nf +++ b/modules/picard/collecthsmetrics/main.nf @@ -2,10 +2,10 @@ process PICARD_COLLECTHSMETRICS { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::picard=2.27.1" : null) + conda (params.enable_conda ? "bioconda::picard=2.27.2" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/picard:2.27.1--hdfd78af_0' : - 'quay.io/biocontainers/picard:2.27.1--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/picard:2.27.2--hdfd78af_0' : + 'quay.io/biocontainers/picard:2.27.2--hdfd78af_0' }" input: tuple val(meta), path(bam) diff --git a/modules/picard/collectmultiplemetrics/main.nf b/modules/picard/collectmultiplemetrics/main.nf index 3c3152df..6fe9881b 100644 --- a/modules/picard/collectmultiplemetrics/main.nf +++ b/modules/picard/collectmultiplemetrics/main.nf @@ -2,10 +2,10 @@ process PICARD_COLLECTMULTIPLEMETRICS { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::picard=2.27.1" : null) + conda (params.enable_conda ? "bioconda::picard=2.27.2" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/picard:2.27.1--hdfd78af_0' : - 'quay.io/biocontainers/picard:2.27.1--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/picard:2.27.2--hdfd78af_0' : + 'quay.io/biocontainers/picard:2.27.2--hdfd78af_0' }" input: tuple val(meta), path(bam) diff --git a/modules/picard/collectwgsmetrics/main.nf b/modules/picard/collectwgsmetrics/main.nf index cab1fe0c..e231afe2 100644 --- a/modules/picard/collectwgsmetrics/main.nf +++ b/modules/picard/collectwgsmetrics/main.nf @@ -2,10 +2,10 @@ process PICARD_COLLECTWGSMETRICS { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::picard=2.27.1" : null) + conda (params.enable_conda ? "bioconda::picard=2.27.2" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/picard:2.27.1--hdfd78af_0' : - 'quay.io/biocontainers/picard:2.27.1--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/picard:2.27.2--hdfd78af_0' : + 'quay.io/biocontainers/picard:2.27.2--hdfd78af_0' }" input: tuple val(meta), path(bam) diff --git a/modules/picard/createsequencedictionary/main.nf b/modules/picard/createsequencedictionary/main.nf index 2348c496..16a5ff19 100644 --- a/modules/picard/createsequencedictionary/main.nf +++ b/modules/picard/createsequencedictionary/main.nf @@ -2,10 +2,10 @@ process PICARD_CREATESEQUENCEDICTIONARY { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::picard=2.27.1" : null) + conda (params.enable_conda ? "bioconda::picard=2.27.2" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/picard:2.27.1--hdfd78af_0' : - 'quay.io/biocontainers/picard:2.27.1--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/picard:2.27.2--hdfd78af_0' : + 'quay.io/biocontainers/picard:2.27.2--hdfd78af_0' }" input: tuple val(meta), path(fasta) diff --git a/modules/picard/crosscheckfingerprints/main.nf b/modules/picard/crosscheckfingerprints/main.nf index d3b59402..c7c4766c 100644 --- a/modules/picard/crosscheckfingerprints/main.nf +++ b/modules/picard/crosscheckfingerprints/main.nf @@ -2,10 +2,10 @@ process PICARD_CROSSCHECKFINGERPRINTS { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::picard=2.27.1" : null) + conda (params.enable_conda ? "bioconda::picard=2.27.2" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/picard:2.27.1--hdfd78af_0' : - 'quay.io/biocontainers/picard:2.27.1--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/picard:2.27.2--hdfd78af_0' : + 'quay.io/biocontainers/picard:2.27.2--hdfd78af_0' }" input: tuple val(meta), path(input1) diff --git a/modules/picard/filtersamreads/main.nf b/modules/picard/filtersamreads/main.nf index adedcdc2..e17c4686 100644 --- a/modules/picard/filtersamreads/main.nf +++ b/modules/picard/filtersamreads/main.nf @@ -2,10 +2,10 @@ process PICARD_FILTERSAMREADS { tag "$meta.id" label 'process_low' - conda (params.enable_conda ? "bioconda::picard=2.27.1" : null) + conda (params.enable_conda ? "bioconda::picard=2.27.2" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/picard:2.27.1--hdfd78af_0' : - 'quay.io/biocontainers/picard:2.27.1--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/picard:2.27.2--hdfd78af_0' : + 'quay.io/biocontainers/picard:2.27.2--hdfd78af_0' }" input: tuple val(meta), path(bam), path(readlist) diff --git a/modules/picard/fixmateinformation/main.nf b/modules/picard/fixmateinformation/main.nf index 539b1082..616acce8 100644 --- a/modules/picard/fixmateinformation/main.nf +++ b/modules/picard/fixmateinformation/main.nf @@ -2,10 +2,10 @@ process PICARD_FIXMATEINFORMATION { tag "$meta.id" label 'process_low' - conda (params.enable_conda ? "bioconda::picard=2.27.1" : null) + conda (params.enable_conda ? "bioconda::picard=2.27.2" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/picard:2.27.1--hdfd78af_0' : - 'quay.io/biocontainers/picard:2.27.1--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/picard:2.27.2--hdfd78af_0' : + 'quay.io/biocontainers/picard:2.27.2--hdfd78af_0' }" input: tuple val(meta), path(bam) diff --git a/modules/picard/liftovervcf/main.nf b/modules/picard/liftovervcf/main.nf index c92abfeb..a82f1466 100644 --- a/modules/picard/liftovervcf/main.nf +++ b/modules/picard/liftovervcf/main.nf @@ -2,10 +2,10 @@ process PICARD_LIFTOVERVCF { tag "$meta.id" label 'process_low' - conda (params.enable_conda ? "bioconda::picard=2.27.1" : null) + conda (params.enable_conda ? "bioconda::picard=2.27.2" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/picard:2.27.1--hdfd78af_0' : - 'quay.io/biocontainers/picard:2.27.1--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/picard:2.27.2--hdfd78af_0' : + 'quay.io/biocontainers/picard:2.27.2--hdfd78af_0' }" input: tuple val(meta), path(input_vcf) diff --git a/modules/picard/markduplicates/main.nf b/modules/picard/markduplicates/main.nf index 1565c647..87f913d4 100644 --- a/modules/picard/markduplicates/main.nf +++ b/modules/picard/markduplicates/main.nf @@ -2,10 +2,10 @@ process PICARD_MARKDUPLICATES { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::picard=2.27.1" : null) + conda (params.enable_conda ? "bioconda::picard=2.27.2" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/picard:2.27.1--hdfd78af_0' : - 'quay.io/biocontainers/picard:2.27.1--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/picard:2.27.2--hdfd78af_0' : + 'quay.io/biocontainers/picard:2.27.2--hdfd78af_0' }" input: tuple val(meta), path(bam) diff --git a/modules/picard/mergesamfiles/main.nf b/modules/picard/mergesamfiles/main.nf index 1e32c63a..d65f518c 100644 --- a/modules/picard/mergesamfiles/main.nf +++ b/modules/picard/mergesamfiles/main.nf @@ -2,10 +2,10 @@ process PICARD_MERGESAMFILES { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::picard=2.27.1" : null) + conda (params.enable_conda ? "bioconda::picard=2.27.2" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/picard:2.27.1--hdfd78af_0' : - 'quay.io/biocontainers/picard:2.27.1--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/picard:2.27.2--hdfd78af_0' : + 'quay.io/biocontainers/picard:2.27.2--hdfd78af_0' }" input: tuple val(meta), path(bams) diff --git a/modules/picard/sortsam/main.nf b/modules/picard/sortsam/main.nf index cee60fd1..5e09d907 100644 --- a/modules/picard/sortsam/main.nf +++ b/modules/picard/sortsam/main.nf @@ -2,10 +2,10 @@ process PICARD_SORTSAM { tag "$meta.id" label 'process_low' - conda (params.enable_conda ? "bioconda::picard=2.27.1" : null) + conda (params.enable_conda ? "bioconda::picard=2.27.2" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/picard:2.27.1--hdfd78af_0' : - 'quay.io/biocontainers/picard:2.27.1--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/picard:2.27.2--hdfd78af_0' : + 'quay.io/biocontainers/picard:2.27.2--hdfd78af_0' }" input: tuple val(meta), path(bam) diff --git a/modules/picard/sortvcf/main.nf b/modules/picard/sortvcf/main.nf index fb8dbb79..8f886393 100644 --- a/modules/picard/sortvcf/main.nf +++ b/modules/picard/sortvcf/main.nf @@ -2,10 +2,10 @@ process PICARD_SORTVCF { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::picard=2.27.1" : null) + conda (params.enable_conda ? "bioconda::picard=2.27.2" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/picard:2.27.1--hdfd78af_0' : - 'quay.io/biocontainers/picard:2.27.1--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/picard:2.27.2--hdfd78af_0' : + 'quay.io/biocontainers/picard:2.27.2--hdfd78af_0' }" input: tuple val(meta), path(vcf) From 046fa3958223ce4542578ef26502d39f4c5d71ef Mon Sep 17 00:00:00 2001 From: MaxUlysse Date: Thu, 2 Jun 2022 16:09:09 +0200 Subject: [PATCH 07/18] update VEP version + cache --- modules/ensemblvep/Dockerfile | 4 ++-- modules/ensemblvep/build.sh | 12 ++++++------ modules/ensemblvep/environment.yml | 4 ++-- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/modules/ensemblvep/Dockerfile b/modules/ensemblvep/Dockerfile index b4a1c664..4ada7c6b 100644 --- a/modules/ensemblvep/Dockerfile +++ b/modules/ensemblvep/Dockerfile @@ -11,8 +11,8 @@ RUN conda env create -f /environment.yml && conda clean -a # Setup default ARG variables ARG GENOME=GRCh38 ARG SPECIES=homo_sapiens -ARG VEP_VERSION=104 -ARG VEP_TAG=104.3 +ARG VEP_VERSION=105 +ARG VEP_TAG=105.0 # Add conda installation dir to PATH (instead of doing 'conda activate') ENV PATH /opt/conda/envs/nf-core-vep-${VEP_TAG}/bin:$PATH diff --git a/modules/ensemblvep/build.sh b/modules/ensemblvep/build.sh index 650c8704..402f2434 100755 --- a/modules/ensemblvep/build.sh +++ b/modules/ensemblvep/build.sh @@ -20,9 +20,9 @@ build_push() { docker push nfcore/vep:${VEP_TAG}.${GENOME} } -build_push "GRCh37" "homo_sapiens" "104" "104.3" -build_push "GRCh38" "homo_sapiens" "104" "104.3" -build_push "GRCm38" "mus_musculus" "102" "104.3" -build_push "GRCm39" "mus_musculus" "104" "104.3" -build_push "CanFam3.1" "canis_lupus_familiaris" "104" "104.3" -build_push "WBcel235" "caenorhabditis_elegans" "104" "104.3" +build_push "GRCh37" "homo_sapiens" "105" "105.0" +build_push "GRCh38" "homo_sapiens" "105" "105.0" +build_push "GRCm38" "mus_musculus" "102" "105.0" +build_push "GRCm39" "mus_musculus" "105" "105.0" +build_push "CanFam3.1" "canis_lupus_familiaris" "105" "105.0" +build_push "WBcel235" "caenorhabditis_elegans" "105" "105.0" diff --git a/modules/ensemblvep/environment.yml b/modules/ensemblvep/environment.yml index c0731c26..5df85b80 100644 --- a/modules/ensemblvep/environment.yml +++ b/modules/ensemblvep/environment.yml @@ -1,10 +1,10 @@ # You can use this file to create a conda environment for this module: # conda env create -f environment.yml -name: nf-core-vep-104.3 +name: nf-core-vep-105.0 channels: - conda-forge - bioconda - defaults dependencies: - - bioconda::ensembl-vep=104.3 + - bioconda::ensembl-vep=105.0 From bc0f52bcd3a64f27d8fb2da3a6424b427e15e53c Mon Sep 17 00:00:00 2001 From: MaxUlysse Date: Thu, 2 Jun 2022 16:11:11 +0200 Subject: [PATCH 08/18] fix cache version --- modules/ensemblvep/build.sh | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/modules/ensemblvep/build.sh b/modules/ensemblvep/build.sh index 402f2434..e21a7c0d 100755 --- a/modules/ensemblvep/build.sh +++ b/modules/ensemblvep/build.sh @@ -20,9 +20,9 @@ build_push() { docker push nfcore/vep:${VEP_TAG}.${GENOME} } -build_push "GRCh37" "homo_sapiens" "105" "105.0" -build_push "GRCh38" "homo_sapiens" "105" "105.0" -build_push "GRCm38" "mus_musculus" "102" "105.0" -build_push "GRCm39" "mus_musculus" "105" "105.0" -build_push "CanFam3.1" "canis_lupus_familiaris" "105" "105.0" -build_push "WBcel235" "caenorhabditis_elegans" "105" "105.0" +# build_push "GRCh37" "homo_sapiens" "105" "105.0" +# build_push "GRCh38" "homo_sapiens" "105" "105.0" +# build_push "GRCm38" "mus_musculus" "102" "105.0" +# build_push "GRCm39" "mus_musculus" "105" "105.0" +build_push "CanFam3.1" "canis_lupus_familiaris" "104" "105.0" +# build_push "WBcel235" "caenorhabditis_elegans" "105" "105.0" From b623ff5d2798b8c955baeb3890f0baee6077db66 Mon Sep 17 00:00:00 2001 From: MaxUlysse Date: Thu, 2 Jun 2022 16:11:45 +0200 Subject: [PATCH 09/18] fix comments --- modules/ensemblvep/build.sh | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/modules/ensemblvep/build.sh b/modules/ensemblvep/build.sh index e21a7c0d..6f340c0f 100755 --- a/modules/ensemblvep/build.sh +++ b/modules/ensemblvep/build.sh @@ -20,9 +20,9 @@ build_push() { docker push nfcore/vep:${VEP_TAG}.${GENOME} } -# build_push "GRCh37" "homo_sapiens" "105" "105.0" -# build_push "GRCh38" "homo_sapiens" "105" "105.0" -# build_push "GRCm38" "mus_musculus" "102" "105.0" -# build_push "GRCm39" "mus_musculus" "105" "105.0" +build_push "GRCh37" "homo_sapiens" "105" "105.0" +build_push "GRCh38" "homo_sapiens" "105" "105.0" +build_push "GRCm38" "mus_musculus" "102" "105.0" +build_push "GRCm39" "mus_musculus" "105" "105.0" build_push "CanFam3.1" "canis_lupus_familiaris" "104" "105.0" -# build_push "WBcel235" "caenorhabditis_elegans" "105" "105.0" +build_push "WBcel235" "caenorhabditis_elegans" "105" "105.0" From b4b1f3b81d897b97da0655cf48107b70aa4b2f73 Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Thu, 2 Jun 2022 16:20:12 +0200 Subject: [PATCH 10/18] add intervals to calibratedragstrmodel --- modules/gatk4/calibratedragstrmodel/main.nf | 4 ++- modules/gatk4/calibratedragstrmodel/meta.yml | 4 +++ .../gatk4/calibratedragstrmodel/main.nf | 26 +++++++++++++++++-- .../gatk4/calibratedragstrmodel/test.yml | 13 ++++++++-- 4 files changed, 42 insertions(+), 5 deletions(-) diff --git a/modules/gatk4/calibratedragstrmodel/main.nf b/modules/gatk4/calibratedragstrmodel/main.nf index 00a11de0..08ba7008 100644 --- a/modules/gatk4/calibratedragstrmodel/main.nf +++ b/modules/gatk4/calibratedragstrmodel/main.nf @@ -8,7 +8,7 @@ process GATK4_CALIBRATEDRAGSTRMODEL { 'quay.io/biocontainers/gatk4:4.2.6.1--hdfd78af_0' }" input: - tuple val(meta), path(bam), path(bam_index) + tuple val(meta), path(bam), path(bam_index), path(intervals) path fasta path fasta_fai path dict @@ -24,6 +24,7 @@ process GATK4_CALIBRATEDRAGSTRMODEL { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" + def intervals_command = intervals ? "--intervals $intervals" : "" def avail_mem = 3 if (!task.memory) { @@ -37,6 +38,7 @@ process GATK4_CALIBRATEDRAGSTRMODEL { --output ${prefix}.txt \\ --reference $fasta \\ --str-table-path $strtablefile \\ + $intervals_command \\ --tmp-dir . \\ $args diff --git a/modules/gatk4/calibratedragstrmodel/meta.yml b/modules/gatk4/calibratedragstrmodel/meta.yml index e6d2b0b1..c3c40d81 100644 --- a/modules/gatk4/calibratedragstrmodel/meta.yml +++ b/modules/gatk4/calibratedragstrmodel/meta.yml @@ -33,6 +33,10 @@ input: type: file description: index of the BAM/CRAM/SAM file pattern: "*.{bai,crai,sai}" + - intervals: + type: file + description: BED file or interval list containing regions + pattern: "*.{bed,interval_list}" - fasta: type: file description: The reference FASTA file diff --git a/tests/modules/gatk4/calibratedragstrmodel/main.nf b/tests/modules/gatk4/calibratedragstrmodel/main.nf index 4d4e9896..136a9742 100644 --- a/tests/modules/gatk4/calibratedragstrmodel/main.nf +++ b/tests/modules/gatk4/calibratedragstrmodel/main.nf @@ -9,7 +9,8 @@ workflow test_gatk4_calibratedragstrmodel_bam { input = [ [ id:'test', single_end:false ], // meta map file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), - file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true) + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true), + [] ] fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) @@ -28,7 +29,28 @@ workflow test_gatk4_calibratedragstrmodel_cram { input = [ [ id:'test', single_end:false ], // meta map file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true), - file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram_crai'], checkIfExists: true) + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram_crai'], checkIfExists: true), + [] + ] + + fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + + fasta_fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) + + dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) + + strtablefile = file(params.test_data['homo_sapiens']['genome']['genome_strtablefile'], checkIfExists: true) + + GATK4_CALIBRATEDRAGSTRMODEL ( input, fasta, fasta_fai, dict, strtablefile ) +} + +workflow test_gatk4_calibratedragstrmodel_beds { + + input = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram_crai'], checkIfExists: true), + file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true) ] fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) diff --git a/tests/modules/gatk4/calibratedragstrmodel/test.yml b/tests/modules/gatk4/calibratedragstrmodel/test.yml index bc46dead..53c428a9 100644 --- a/tests/modules/gatk4/calibratedragstrmodel/test.yml +++ b/tests/modules/gatk4/calibratedragstrmodel/test.yml @@ -1,8 +1,8 @@ - name: gatk4 calibratedragstrmodel test_gatk4_calibratedragstrmodel_bam command: nextflow run ./tests/modules/gatk4/calibratedragstrmodel -entry test_gatk4_calibratedragstrmodel_bam -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/calibratedragstrmodel/nextflow.config tags: - - gatk4 - gatk4/calibratedragstrmodel + - gatk4 files: - path: output/gatk4/test.txt md5sum: 0a1a1583b157fa2251dd931ed165da4f @@ -10,8 +10,17 @@ - name: gatk4 calibratedragstrmodel test_gatk4_calibratedragstrmodel_cram command: nextflow run ./tests/modules/gatk4/calibratedragstrmodel -entry test_gatk4_calibratedragstrmodel_cram -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/calibratedragstrmodel/nextflow.config tags: - - gatk4 - gatk4/calibratedragstrmodel + - gatk4 files: - path: output/gatk4/test.txt md5sum: 1aa7ab38023f724877b3323c5e6b9a4e + +- name: gatk4 calibratedragstrmodel test_gatk4_calibratedragstrmodel_beds + command: nextflow run ./tests/modules/gatk4/calibratedragstrmodel -entry test_gatk4_calibratedragstrmodel_beds -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/calibratedragstrmodel/nextflow.config + tags: + - gatk4/calibratedragstrmodel + - gatk4 + files: + - path: output/gatk4/test.txt + md5sum: def8baccad7bd59006f08fcb0a6721bf From f3c0107fa2ecdc510b4f569dc3e062c7e028ac7a Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Thu, 2 Jun 2022 16:24:03 +0200 Subject: [PATCH 11/18] added optional to intervals --- modules/gatk4/calibratedragstrmodel/meta.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/gatk4/calibratedragstrmodel/meta.yml b/modules/gatk4/calibratedragstrmodel/meta.yml index c3c40d81..e71dac5e 100644 --- a/modules/gatk4/calibratedragstrmodel/meta.yml +++ b/modules/gatk4/calibratedragstrmodel/meta.yml @@ -35,7 +35,7 @@ input: pattern: "*.{bai,crai,sai}" - intervals: type: file - description: BED file or interval list containing regions + description: BED file or interval list containing regions (optional) pattern: "*.{bed,interval_list}" - fasta: type: file From 544b62df45d3043ca2c5ca3a131284fe0d74026b Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Thu, 2 Jun 2022 21:11:17 +0200 Subject: [PATCH 12/18] Star work on GATK UG --- modules/gatk/unifiedgenotyper/main.nf | 50 ++++++++++++++++++ modules/gatk/unifiedgenotyper/meta.yml | 51 +++++++++++++++++++ tests/config/pytest_modules.yml | 4 ++ tests/modules/gatk/unifiedgenotyper/main.nf | 15 ++++++ .../gatk/unifiedgenotyper/nextflow.config | 5 ++ tests/modules/gatk/unifiedgenotyper/test.yml | 14 +++++ 6 files changed, 139 insertions(+) create mode 100644 modules/gatk/unifiedgenotyper/main.nf create mode 100644 modules/gatk/unifiedgenotyper/meta.yml create mode 100644 tests/modules/gatk/unifiedgenotyper/main.nf create mode 100644 tests/modules/gatk/unifiedgenotyper/nextflow.config create mode 100644 tests/modules/gatk/unifiedgenotyper/test.yml diff --git a/modules/gatk/unifiedgenotyper/main.nf b/modules/gatk/unifiedgenotyper/main.nf new file mode 100644 index 00000000..4dd6e408 --- /dev/null +++ b/modules/gatk/unifiedgenotyper/main.nf @@ -0,0 +1,50 @@ +process GATK_UNIFIEDGENOTYPER { + tag "$meta.id" + label 'process_medium' + + conda (params.enable_conda ? "bioconda::gatk=3.5" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gatk:3.5--hdfd78af_11': + 'quay.io/biocontainers/gatk:3.5--hdfd78af_11' }" + + input: + tuple val(meta), path(input), path(index) + path(fasta) + path(fai) + path(dict) + path(known_vcf) + + output: + tuple val(meta), path("*.bam"), emit: bam + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + def avail_mem = 3 + if (!task.memory) { + log.info '[GATK RealignerTargetCreator] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = task.memory.giga + } + + """ + gatk3 \\ + -Xmx${avail_mem}g \\ + -nt ${task.cpus} \\ + -T UnifiedGenotyper \\ + -I ${input} \\ + -R ${fasta} \\ + -o ${prefix}.vcf \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//' )) + END_VERSIONS + """ +} diff --git a/modules/gatk/unifiedgenotyper/meta.yml b/modules/gatk/unifiedgenotyper/meta.yml new file mode 100644 index 00000000..7a992591 --- /dev/null +++ b/modules/gatk/unifiedgenotyper/meta.yml @@ -0,0 +1,51 @@ +name: "gatk_unifiedgenotyper" +## TODO nf-core: Add a description of the module and list keywords +description: write your description here +keywords: + - sort +tools: + - "gatk": + ## TODO nf-core: Add a description and other details for the software below + description: "The full Genome Analysis Toolkit (GATK) framework, license restricted." + homepage: "None" + documentation: "None" + tool_dev_url: "None" + doi: "" + licence: "['BSD', 'https://www.broadinstitute.org/gatk/about/#licensing', 'https://software.broadinstitute.org/gatk/download/licensing']" + +## TODO nf-core: Add a description of all of the variables used as input +input: + # Only when we have meta + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + # + ## TODO nf-core: Delete / customise this example input + - bam: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + +## TODO nf-core: Add a description of all of the variables used as output +output: + #Only when we have meta + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + # + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + ## TODO nf-core: Delete / customise this example output + - bam: + type: file + description: Sorted BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + +authors: + - "@jfy133" diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index d4bfac5b..0d3a21c2 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -715,6 +715,10 @@ gamma/gamma: - modules/gamma/gamma/** - tests/modules/gamma/gamma/** +gatk/unifiedgenotyper: + - modules/gatk/unifiedgenotyper/** + - tests/modules/gatk/unifiedgenotyper/** + gatk4/applybqsr: - modules/gatk4/applybqsr/** - tests/modules/gatk4/applybqsr/** diff --git a/tests/modules/gatk/unifiedgenotyper/main.nf b/tests/modules/gatk/unifiedgenotyper/main.nf new file mode 100644 index 00000000..86a23790 --- /dev/null +++ b/tests/modules/gatk/unifiedgenotyper/main.nf @@ -0,0 +1,15 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { GATK_UNIFIEDGENOTYPER } from '../../../../modules/gatk/unifiedgenotyper/main.nf' + +workflow test_gatk_unifiedgenotyper { + + input = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['sarscov2']['illumina']['test_paired_end_bam'], checkIfExists: true) + ] + + GATK_UNIFIEDGENOTYPER ( input ) +} diff --git a/tests/modules/gatk/unifiedgenotyper/nextflow.config b/tests/modules/gatk/unifiedgenotyper/nextflow.config new file mode 100644 index 00000000..50f50a7a --- /dev/null +++ b/tests/modules/gatk/unifiedgenotyper/nextflow.config @@ -0,0 +1,5 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + +} \ No newline at end of file diff --git a/tests/modules/gatk/unifiedgenotyper/test.yml b/tests/modules/gatk/unifiedgenotyper/test.yml new file mode 100644 index 00000000..aad9aec9 --- /dev/null +++ b/tests/modules/gatk/unifiedgenotyper/test.yml @@ -0,0 +1,14 @@ +## TODO nf-core: Please run the following command to build this file: +# nf-core modules create-test-yml gatk/unifiedgenotyper +- name: "gatk unifiedgenotyper" + command: nextflow run ./tests/modules/gatk/unifiedgenotyper -entry test_gatk_unifiedgenotyper -c ./tests/config/nextflow.config -c ./tests/modules/gatk/unifiedgenotyper/nextflow.config + tags: + - "gatk" + # + - "gatk/unifiedgenotyper" + # + files: + - path: "output/gatk/test.bam" + md5sum: e667c7caad0bc4b7ac383fd023c654fc + - path: output/gatk/versions.yml + md5sum: a01fe51bc4c6a3a6226fbf77b2c7cf3b From a14855dbe8648ca445759f57a1a2805ec4812615 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Thu, 2 Jun 2022 21:22:21 +0200 Subject: [PATCH 13/18] Start meta --- modules/gatk/unifiedgenotyper/main.nf | 16 ++++++- modules/gatk/unifiedgenotyper/meta.yml | 59 ++++++++++++++++---------- 2 files changed, 51 insertions(+), 24 deletions(-) diff --git a/modules/gatk/unifiedgenotyper/main.nf b/modules/gatk/unifiedgenotyper/main.nf index 4dd6e408..896a347e 100644 --- a/modules/gatk/unifiedgenotyper/main.nf +++ b/modules/gatk/unifiedgenotyper/main.nf @@ -13,9 +13,13 @@ process GATK_UNIFIEDGENOTYPER { path(fai) path(dict) path(known_vcf) + path(intervals) + path(contamination) + path(dbsnps) + path(comp) output: - tuple val(meta), path("*.bam"), emit: bam + tuple val(meta), path("*.vcf.gz"), emit: vcf path "versions.yml" , emit: versions when: @@ -24,6 +28,10 @@ process GATK_UNIFIEDGENOTYPER { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" + def contamination_file = contamination ? "-contaminationFile ${contamination}" : "" + def dbsnps_file = dbsnps ? "--dbsnp ${dbsnps}" : "" + def comp_file = comp ? "--comp ${comp}" : "" + def intervals_file = intervals ? "--intervals ${intervals}" : "" def avail_mem = 3 if (!task.memory) { @@ -39,9 +47,15 @@ process GATK_UNIFIEDGENOTYPER { -T UnifiedGenotyper \\ -I ${input} \\ -R ${fasta} \\ + ${contamination_file} \\ + ${dbsnps_file} \\ + ${comp_file} \\ + ${intervals_file} -o ${prefix}.vcf \\ $args + gzip -n *.vcf + cat <<-END_VERSIONS > versions.yml "${task.process}": gatk: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//' )) diff --git a/modules/gatk/unifiedgenotyper/meta.yml b/modules/gatk/unifiedgenotyper/meta.yml index 7a992591..5f85c451 100644 --- a/modules/gatk/unifiedgenotyper/meta.yml +++ b/modules/gatk/unifiedgenotyper/meta.yml @@ -1,51 +1,64 @@ name: "gatk_unifiedgenotyper" -## TODO nf-core: Add a description of the module and list keywords -description: write your description here keywords: - - sort + - bam + - vcf + - variant calling + - indel + - realignment + - targets tools: - "gatk": - ## TODO nf-core: Add a description and other details for the software below description: "The full Genome Analysis Toolkit (GATK) framework, license restricted." - homepage: "None" - documentation: "None" - tool_dev_url: "None" - doi: "" - licence: "['BSD', 'https://www.broadinstitute.org/gatk/about/#licensing', 'https://software.broadinstitute.org/gatk/download/licensing']" + homepage: "https://gatk.broadinstitute.org/hc/en-us" + documentation: "https://github.com/broadinstitute/gatk-docs" + licence: "['https://software.broadinstitute.org/gatk/download/licensing', 'BSD', 'https://www.broadinstitute.org/gatk/about/#licensing']" -## TODO nf-core: Add a description of all of the variables used as input input: - # Only when we have meta - meta: type: map description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - # - ## TODO nf-core: Delete / customise this example input - - bam: + - input: type: file - description: BAM/CRAM/SAM file - pattern: "*.{bam,cram,sam}" + description: Sorted and indexed BAM/CRAM/SAM file + pattern: "*.bam" + - index: + type: file + description: BAM index file + pattern: "*.bai" + - fasta: + type: file + description: Reference file used to generate BAM file + pattern: ".{fasta,fa,fna}" + - fai: + type: file + description: Index of reference file used to generate BAM file + pattern: ".fai" + - dict: + type: file + description: GATK dict file for reference + pattern: ".dict" + - known_vcf: + type: file + description: Optional input VCF file(s) with known indels + pattern: ".vcf" -## TODO nf-core: Add a description of all of the variables used as output output: - #Only when we have meta - meta: type: map description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - # - versions: type: file description: File containing software versions pattern: "versions.yml" - ## TODO nf-core: Delete / customise this example output - - bam: + - vcf: type: file - description: Sorted BAM/CRAM/SAM file - pattern: "*.{bam,cram,sam}" + description: VCF file containing called variants + pattern: "*.vcf.gz" authors: + - "@ilight1542" - "@jfy133" From a565d9072aba07b369c6e3c43f4168556d32678c Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Thu, 2 Jun 2022 21:39:39 +0200 Subject: [PATCH 14/18] Add GATK UnifiedGenotyper --- modules/gatk/unifiedgenotyper/main.nf | 13 ++++++------ modules/gatk/unifiedgenotyper/meta.yml | 21 ++++++++++++++------ tests/modules/gatk/unifiedgenotyper/main.nf | 15 ++++++++------ tests/modules/gatk/unifiedgenotyper/test.yml | 19 +++++++----------- 4 files changed, 37 insertions(+), 31 deletions(-) diff --git a/modules/gatk/unifiedgenotyper/main.nf b/modules/gatk/unifiedgenotyper/main.nf index 896a347e..47d83def 100644 --- a/modules/gatk/unifiedgenotyper/main.nf +++ b/modules/gatk/unifiedgenotyper/main.nf @@ -12,15 +12,14 @@ process GATK_UNIFIEDGENOTYPER { path(fasta) path(fai) path(dict) - path(known_vcf) path(intervals) path(contamination) - path(dbsnps) + path(dbsnp) path(comp) output: tuple val(meta), path("*.vcf.gz"), emit: vcf - path "versions.yml" , emit: versions + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -29,7 +28,7 @@ process GATK_UNIFIEDGENOTYPER { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" def contamination_file = contamination ? "-contaminationFile ${contamination}" : "" - def dbsnps_file = dbsnps ? "--dbsnp ${dbsnps}" : "" + def dbsnp_file = dbsnp ? "--dbsnp ${dbsnp}" : "" def comp_file = comp ? "--comp ${comp}" : "" def intervals_file = intervals ? "--intervals ${intervals}" : "" @@ -48,9 +47,9 @@ process GATK_UNIFIEDGENOTYPER { -I ${input} \\ -R ${fasta} \\ ${contamination_file} \\ - ${dbsnps_file} \\ + ${dbsnp_file} \\ ${comp_file} \\ - ${intervals_file} + ${intervals_file} \\ -o ${prefix}.vcf \\ $args @@ -58,7 +57,7 @@ process GATK_UNIFIEDGENOTYPER { cat <<-END_VERSIONS > versions.yml "${task.process}": - gatk: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//' )) + gatk: \$(echo \$(gatk3 --version)) END_VERSIONS """ } diff --git a/modules/gatk/unifiedgenotyper/meta.yml b/modules/gatk/unifiedgenotyper/meta.yml index 5f85c451..e414d146 100644 --- a/modules/gatk/unifiedgenotyper/meta.yml +++ b/modules/gatk/unifiedgenotyper/meta.yml @@ -3,9 +3,6 @@ keywords: - bam - vcf - variant calling - - indel - - realignment - - targets tools: - "gatk": description: "The full Genome Analysis Toolkit (GATK) framework, license restricted." @@ -39,10 +36,22 @@ input: type: file description: GATK dict file for reference pattern: ".dict" - - known_vcf: + - intervals: type: file - description: Optional input VCF file(s) with known indels - pattern: ".vcf" + description: Bed file with the genomic regions included in the library (optional) + pattern: "*.intervals" + - contamination: + type: file + description: Tab-separated file containing fraction of contamination in sequencing data (per sample) to aggressively remove + pattern: "*" + - dbsnps: + type: file + description: VCF file containing known sites (optional) + pattern: "*" + - comp: + type: file + description: Comparison VCF file (optional) + pattern: "*" output: - meta: diff --git a/tests/modules/gatk/unifiedgenotyper/main.nf b/tests/modules/gatk/unifiedgenotyper/main.nf index 86a23790..6d145e76 100644 --- a/tests/modules/gatk/unifiedgenotyper/main.nf +++ b/tests/modules/gatk/unifiedgenotyper/main.nf @@ -5,11 +5,14 @@ nextflow.enable.dsl = 2 include { GATK_UNIFIEDGENOTYPER } from '../../../../modules/gatk/unifiedgenotyper/main.nf' workflow test_gatk_unifiedgenotyper { - - input = [ - [ id:'test', single_end:false ], // meta map - file(params.test_data['sarscov2']['illumina']['test_paired_end_bam'], checkIfExists: true) - ] - GATK_UNIFIEDGENOTYPER ( input ) + input = [ [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true), + ] + fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + fai = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true) + dict = file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true) + + GATK_UNIFIEDGENOTYPER ( input, fasta, fai, dict, [], [], [], []) } diff --git a/tests/modules/gatk/unifiedgenotyper/test.yml b/tests/modules/gatk/unifiedgenotyper/test.yml index aad9aec9..498e7991 100644 --- a/tests/modules/gatk/unifiedgenotyper/test.yml +++ b/tests/modules/gatk/unifiedgenotyper/test.yml @@ -1,14 +1,9 @@ -## TODO nf-core: Please run the following command to build this file: -# nf-core modules create-test-yml gatk/unifiedgenotyper -- name: "gatk unifiedgenotyper" - command: nextflow run ./tests/modules/gatk/unifiedgenotyper -entry test_gatk_unifiedgenotyper -c ./tests/config/nextflow.config -c ./tests/modules/gatk/unifiedgenotyper/nextflow.config +- name: gatk unifiedgenotyper test_gatk_unifiedgenotyper + command: nextflow run ./tests/modules/gatk/unifiedgenotyper -entry test_gatk_unifiedgenotyper -c ./tests/config/nextflow.config -c ./tests/modules/gatk/unifiedgenotyper/nextflow.config tags: - - "gatk" - # - - "gatk/unifiedgenotyper" - # + - gatk + - gatk/unifiedgenotyper files: - - path: "output/gatk/test.bam" - md5sum: e667c7caad0bc4b7ac383fd023c654fc - - path: output/gatk/versions.yml - md5sum: a01fe51bc4c6a3a6226fbf77b2c7cf3b + - path: output/gatk/test.vcf.gz + contains: + - "#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT test" From d7cec801b244d8aa0a98aa94934b191ed5720f92 Mon Sep 17 00:00:00 2001 From: "Robert A. Petit III" Date: Fri, 3 Jun 2022 13:05:43 -0600 Subject: [PATCH 15/18] Update main.nf --- modules/snippy/run/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/snippy/run/main.nf b/modules/snippy/run/main.nf index 73c4b94d..5ecdbea8 100644 --- a/modules/snippy/run/main.nf +++ b/modules/snippy/run/main.nf @@ -4,7 +4,7 @@ process SNIPPY_RUN { conda (params.enable_conda ? "bioconda::snippy=4.6.0" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/snippy:4.6.0--hdfd78af_2 ': + 'https://depot.galaxyproject.org/singularity/snippy:4.6.0--hdfd78af_2' : 'quay.io/biocontainers/snippy:4.6.0--hdfd78af_2' }" input: From e827a042ce1f55a37a8e771944b566faa5108e8e Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Fri, 3 Jun 2022 21:24:03 +0200 Subject: [PATCH 16/18] Update modules/gatk/unifiedgenotyper/main.nf Co-authored-by: Maxime U. Garcia --- modules/gatk/unifiedgenotyper/main.nf | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/modules/gatk/unifiedgenotyper/main.nf b/modules/gatk/unifiedgenotyper/main.nf index 47d83def..e88ac7c5 100644 --- a/modules/gatk/unifiedgenotyper/main.nf +++ b/modules/gatk/unifiedgenotyper/main.nf @@ -9,13 +9,13 @@ process GATK_UNIFIEDGENOTYPER { input: tuple val(meta), path(input), path(index) - path(fasta) - path(fai) - path(dict) - path(intervals) - path(contamination) - path(dbsnp) - path(comp) + path fasta + path fai + path dict + path intervals + path contamination + path dbsnp + path comp output: tuple val(meta), path("*.vcf.gz"), emit: vcf From 771425e7a050acf0e1be41d9e6fc800f675c3e58 Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Tue, 7 Jun 2022 12:11:18 +0200 Subject: [PATCH 17/18] add threads to calibratedragstrmodel --- modules/gatk4/calibratedragstrmodel/main.nf | 1 + tests/modules/gatk4/calibratedragstrmodel/test.yml | 12 ++++++------ 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/modules/gatk4/calibratedragstrmodel/main.nf b/modules/gatk4/calibratedragstrmodel/main.nf index 08ba7008..b4e47edc 100644 --- a/modules/gatk4/calibratedragstrmodel/main.nf +++ b/modules/gatk4/calibratedragstrmodel/main.nf @@ -38,6 +38,7 @@ process GATK4_CALIBRATEDRAGSTRMODEL { --output ${prefix}.txt \\ --reference $fasta \\ --str-table-path $strtablefile \\ + --threads $task.cpus \\ $intervals_command \\ --tmp-dir . \\ $args diff --git a/tests/modules/gatk4/calibratedragstrmodel/test.yml b/tests/modules/gatk4/calibratedragstrmodel/test.yml index 53c428a9..285d04ca 100644 --- a/tests/modules/gatk4/calibratedragstrmodel/test.yml +++ b/tests/modules/gatk4/calibratedragstrmodel/test.yml @@ -1,26 +1,26 @@ - name: gatk4 calibratedragstrmodel test_gatk4_calibratedragstrmodel_bam command: nextflow run ./tests/modules/gatk4/calibratedragstrmodel -entry test_gatk4_calibratedragstrmodel_bam -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/calibratedragstrmodel/nextflow.config tags: - - gatk4/calibratedragstrmodel - gatk4 + - gatk4/calibratedragstrmodel files: - path: output/gatk4/test.txt - md5sum: 0a1a1583b157fa2251dd931ed165da4f + md5sum: e16fa32906c74bb18b93e98a86718ff1 - name: gatk4 calibratedragstrmodel test_gatk4_calibratedragstrmodel_cram command: nextflow run ./tests/modules/gatk4/calibratedragstrmodel -entry test_gatk4_calibratedragstrmodel_cram -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/calibratedragstrmodel/nextflow.config tags: - - gatk4/calibratedragstrmodel - gatk4 + - gatk4/calibratedragstrmodel files: - path: output/gatk4/test.txt - md5sum: 1aa7ab38023f724877b3323c5e6b9a4e + md5sum: 81c7bf338886cb4d5c2cc07fc56afe44 - name: gatk4 calibratedragstrmodel test_gatk4_calibratedragstrmodel_beds command: nextflow run ./tests/modules/gatk4/calibratedragstrmodel -entry test_gatk4_calibratedragstrmodel_beds -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/calibratedragstrmodel/nextflow.config tags: - - gatk4/calibratedragstrmodel - gatk4 + - gatk4/calibratedragstrmodel files: - path: output/gatk4/test.txt - md5sum: def8baccad7bd59006f08fcb0a6721bf + md5sum: cb6a9acdee042302b54fd1f59b5f54ee From 4c7ef30fb64f75ba4499d3b8fba24a068b1ce586 Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Tue, 7 Jun 2022 12:17:14 +0200 Subject: [PATCH 18/18] changed to label to medium --- modules/gatk4/calibratedragstrmodel/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/gatk4/calibratedragstrmodel/main.nf b/modules/gatk4/calibratedragstrmodel/main.nf index b4e47edc..37a54de4 100644 --- a/modules/gatk4/calibratedragstrmodel/main.nf +++ b/modules/gatk4/calibratedragstrmodel/main.nf @@ -1,6 +1,6 @@ process GATK4_CALIBRATEDRAGSTRMODEL { tag "$meta.id" - label 'process_low' + label 'process_medium' conda (params.enable_conda ? "bioconda::gatk4=4.2.6.1" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?