From 9194fb845e1ec6a92ac340a15a22e47f3ad22781 Mon Sep 17 00:00:00 2001 From: Mahesh Binzer-Panchal Date: Fri, 6 May 2022 07:54:10 +0000 Subject: [PATCH 001/168] Add module kat hist --- modules/kat/hist/main.nf | 42 +++++++++++++++++ modules/kat/hist/meta.yml | 64 ++++++++++++++++++++++++++ tests/config/pytest_modules.yml | 4 ++ tests/modules/kat/hist/main.nf | 28 +++++++++++ tests/modules/kat/hist/nextflow.config | 9 ++++ tests/modules/kat/hist/test.yml | 30 ++++++++++++ 6 files changed, 177 insertions(+) create mode 100644 modules/kat/hist/main.nf create mode 100644 modules/kat/hist/meta.yml create mode 100644 tests/modules/kat/hist/main.nf create mode 100644 tests/modules/kat/hist/nextflow.config create mode 100644 tests/modules/kat/hist/test.yml diff --git a/modules/kat/hist/main.nf b/modules/kat/hist/main.nf new file mode 100644 index 00000000..e9a486f9 --- /dev/null +++ b/modules/kat/hist/main.nf @@ -0,0 +1,42 @@ +process KAT_HIST { + tag "$meta.id" + label 'process_medium' + + conda (params.enable_conda ? "bioconda::kat=2.4.2" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/kat:2.4.2--py38hfc5f9d8_2': + 'quay.io/biocontainers/kat:2.4.2--py38hfc5f9d8_2' }" + + input: + tuple val(meta), path(reads) + + output: + tuple val(meta), path("*.hist") , emit: hist + tuple val(meta), path("*.hist.dist_analysis.json"), emit: json + tuple val(meta), path("*.png") , emit: png , optional: true + tuple val(meta), path("*.ps") , emit: ps , optional: true + tuple val(meta), path("*.pdf") , emit: pdf , optional: true + tuple val(meta), path("*-hash.jf*") , emit: jellyfish_hash, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + kat hist \\ + --threads $task.cpus \\ + --output_prefix ${prefix}.hist \\ + $args \\ + $reads + + ls -l + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + kat: \$( kat hist --version | sed 's/kat //' ) + END_VERSIONS + """ +} diff --git a/modules/kat/hist/meta.yml b/modules/kat/hist/meta.yml new file mode 100644 index 00000000..a7b45347 --- /dev/null +++ b/modules/kat/hist/meta.yml @@ -0,0 +1,64 @@ +name: "kat_hist" +description: Creates a histogram of the number of distinct k-mers having a given frequency. +keywords: + - k-mer + - histogram + - count +tools: + - "kat": + description: "KAT is a suite of tools that analyse jellyfish hashes or sequence files (fasta or fastq) using kmer counts" + homepage: https://www.earlham.ac.uk/kat-tools + documentation: https://kat.readthedocs.io/en/latest/index.html + tool_dev_url: https://github.com/TGAC/KAT + doi: http://bioinformatics.oxfordjournals.org/content/early/2016/10/20/bioinformatics.btw663.abstract + licence: "['GPL v3']" + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - hist: + type: file + description: KAT histogram of k-mer counts + pattern: "*.hist" + - json: + type: file + description: KAT histogram summary of distance analysis + pattern: "*.hist.dist_analysis.json" + - png: + type: file + description: KAT plot of k-mer histogram in PNG format + pattern: "*.png" + - ps: + type: file + description: KAT plot of k-mer histogram in PS format + pattern: "*.ps" + - pdf: + type: file + description: KAT plot of k-mer histogram in PDF format + pattern: "*.pdf" + - jellyfish_hash: + type: file + description: Jellyfish hash file + pattern: "*-hist.jf*" + +authors: + - "@mahesh-panchal" diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index 32a28477..21b112d0 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -1041,6 +1041,10 @@ kallistobustools/ref: - modules/kallistobustools/ref/** - tests/modules/kallistobustools/ref/** +kat/hist: + - modules/kat/hist/** + - tests/modules/kat/hist/** + khmer/normalizebymedian: - modules/khmer/normalizebymedian/** - tests/modules/khmer/normalizebymedian/** diff --git a/tests/modules/kat/hist/main.nf b/tests/modules/kat/hist/main.nf new file mode 100644 index 00000000..06be6150 --- /dev/null +++ b/tests/modules/kat/hist/main.nf @@ -0,0 +1,28 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { KAT_HIST } from '../../../../modules/kat/hist/main.nf' + +workflow test_kat_hist_single_end { + + input = [ + [ id:'test', single_end:true ], // meta map + file(params.test_data['homo_sapiens']['illumina']['test_1_fastq_gz'], checkIfExists: true) + ] + + KAT_HIST ( input ) +} + +workflow test_kat_hist_paired_end { + + input = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.test_data['homo_sapiens']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_2_fastq_gz'], checkIfExists: true), + ] + ] + + KAT_HIST ( input ) +} diff --git a/tests/modules/kat/hist/nextflow.config b/tests/modules/kat/hist/nextflow.config new file mode 100644 index 00000000..a2af9dff --- /dev/null +++ b/tests/modules/kat/hist/nextflow.config @@ -0,0 +1,9 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + + withName: 'test_kat_hist_single_end:KAT_HIST' { + ext.args = '-d' + } + +} diff --git a/tests/modules/kat/hist/test.yml b/tests/modules/kat/hist/test.yml new file mode 100644 index 00000000..994cffa4 --- /dev/null +++ b/tests/modules/kat/hist/test.yml @@ -0,0 +1,30 @@ +- name: kat hist test_kat_hist_single_end + command: nextflow run tests/modules/kat/hist -entry test_kat_hist_single_end -c tests/config/nextflow.config + tags: + - kat/hist + - kat + files: + - path: output/kat/test.hist + md5sum: 1aceb823b6774f14f5cca15954138efd + - path: output/kat/test.hist-hash.jf27 + - path: output/kat/test.hist.dist_analysis.json + md5sum: ec4317d510f752855411d13c0f24dea9 + - path: output/kat/test.hist.png + md5sum: 30daaf4d2dd4b1574e387df973c39d03 + - path: output/kat/versions.yml + md5sum: 296f28c007bc55f8f5490702cf3f81d4 + +- name: kat hist test_kat_hist_paired_end + command: nextflow run tests/modules/kat/hist -entry test_kat_hist_paired_end -c tests/config/nextflow.config + tags: + - kat/hist + - kat + files: + - path: output/kat/test.hist + md5sum: c4e8e01996bd8f676e02690220e1def7 + - path: output/kat/test.hist.dist_analysis.json + md5sum: 954a3d1c3fc56f80f0929315ed299824 + - path: output/kat/test.hist.png + md5sum: 8a21ce0965342234f5982f947f4f10b0 + - path: output/kat/versions.yml + md5sum: 3a07329af7ce78e76b021996cd08eacb From c19e7da59decb430b3e57c5d2b5175005850b0af Mon Sep 17 00:00:00 2001 From: Mahesh Binzer-Panchal Date: Fri, 6 May 2022 10:55:48 +0200 Subject: [PATCH 002/168] Apply suggestions from code review Remove versions md5sum Co-authored-by: FriederikeHanssen --- tests/modules/kat/hist/test.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/modules/kat/hist/test.yml b/tests/modules/kat/hist/test.yml index 994cffa4..131e8a3a 100644 --- a/tests/modules/kat/hist/test.yml +++ b/tests/modules/kat/hist/test.yml @@ -12,7 +12,6 @@ - path: output/kat/test.hist.png md5sum: 30daaf4d2dd4b1574e387df973c39d03 - path: output/kat/versions.yml - md5sum: 296f28c007bc55f8f5490702cf3f81d4 - name: kat hist test_kat_hist_paired_end command: nextflow run tests/modules/kat/hist -entry test_kat_hist_paired_end -c tests/config/nextflow.config @@ -27,4 +26,3 @@ - path: output/kat/test.hist.png md5sum: 8a21ce0965342234f5982f947f4f10b0 - path: output/kat/versions.yml - md5sum: 3a07329af7ce78e76b021996cd08eacb From 9b4923dcd5334ad44590180abfc73b0dee387874 Mon Sep 17 00:00:00 2001 From: Mahesh Binzer-Panchal Date: Tue, 10 May 2022 15:02:37 +0000 Subject: [PATCH 003/168] Change test files --- tests/modules/kat/hist/main.nf | 6 +++--- tests/modules/kat/hist/test.yml | 14 ++++++++------ 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/tests/modules/kat/hist/main.nf b/tests/modules/kat/hist/main.nf index 06be6150..88136749 100644 --- a/tests/modules/kat/hist/main.nf +++ b/tests/modules/kat/hist/main.nf @@ -8,7 +8,7 @@ workflow test_kat_hist_single_end { input = [ [ id:'test', single_end:true ], // meta map - file(params.test_data['homo_sapiens']['illumina']['test_1_fastq_gz'], checkIfExists: true) + file(params.test_data['homo_sapiens']['illumina']['test2_1_fastq_gz'], checkIfExists: true) ] KAT_HIST ( input ) @@ -19,8 +19,8 @@ workflow test_kat_hist_paired_end { input = [ [ id:'test', single_end:false ], // meta map [ - file(params.test_data['homo_sapiens']['illumina']['test_1_fastq_gz'], checkIfExists: true), - file(params.test_data['homo_sapiens']['illumina']['test_2_fastq_gz'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test2_1_fastq_gz'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test2_2_fastq_gz'], checkIfExists: true), ] ] diff --git a/tests/modules/kat/hist/test.yml b/tests/modules/kat/hist/test.yml index 131e8a3a..94f545e6 100644 --- a/tests/modules/kat/hist/test.yml +++ b/tests/modules/kat/hist/test.yml @@ -5,13 +5,14 @@ - kat files: - path: output/kat/test.hist - md5sum: 1aceb823b6774f14f5cca15954138efd + md5sum: c6eba52b3a2653a684577a8ae20b74c1 - path: output/kat/test.hist-hash.jf27 - path: output/kat/test.hist.dist_analysis.json - md5sum: ec4317d510f752855411d13c0f24dea9 + md5sum: 52a5a2d91c71b940f36f1f0a7fd5ef10 - path: output/kat/test.hist.png - md5sum: 30daaf4d2dd4b1574e387df973c39d03 + md5sum: 49861ef1a265e0edde3550b39c64a274 - path: output/kat/versions.yml + md5sum: 296f28c007bc55f8f5490702cf3f81d4 - name: kat hist test_kat_hist_paired_end command: nextflow run tests/modules/kat/hist -entry test_kat_hist_paired_end -c tests/config/nextflow.config @@ -20,9 +21,10 @@ - kat files: - path: output/kat/test.hist - md5sum: c4e8e01996bd8f676e02690220e1def7 + md5sum: 91429091e74b1718051591d83a1ccb5d - path: output/kat/test.hist.dist_analysis.json - md5sum: 954a3d1c3fc56f80f0929315ed299824 + md5sum: 8b0dabeaff4ba706b33aa8964d687e13 - path: output/kat/test.hist.png - md5sum: 8a21ce0965342234f5982f947f4f10b0 + md5sum: e20774d0d2b979cb6ead7b7fb5ad36d9 - path: output/kat/versions.yml + md5sum: 3a07329af7ce78e76b021996cd08eacb From 7a16ece747c402c23cf1a8b36b2edf0ee468dad3 Mon Sep 17 00:00:00 2001 From: Mahesh Binzer-Panchal Date: Tue, 10 May 2022 15:13:25 +0000 Subject: [PATCH 004/168] Change md5sum to contains --- tests/modules/kat/hist/test.yml | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/tests/modules/kat/hist/test.yml b/tests/modules/kat/hist/test.yml index 94f545e6..19d92ee2 100644 --- a/tests/modules/kat/hist/test.yml +++ b/tests/modules/kat/hist/test.yml @@ -8,7 +8,14 @@ md5sum: c6eba52b3a2653a684577a8ae20b74c1 - path: output/kat/test.hist-hash.jf27 - path: output/kat/test.hist.dist_analysis.json - md5sum: 52a5a2d91c71b940f36f1f0a7fd5ef10 + # md5sum: 52a5a2d91c71b940f36f1f0a7fd5ef10 # This is variable for an unknown reason + contains: + - "nb_peaks" + - "global_minima" + - "global_maxima" + - "mean_freq" + - "est_genome_size" + - "est_het_rate" - path: output/kat/test.hist.png md5sum: 49861ef1a265e0edde3550b39c64a274 - path: output/kat/versions.yml @@ -23,7 +30,14 @@ - path: output/kat/test.hist md5sum: 91429091e74b1718051591d83a1ccb5d - path: output/kat/test.hist.dist_analysis.json - md5sum: 8b0dabeaff4ba706b33aa8964d687e13 + # md5sum: 8b0dabeaff4ba706b33aa8964d687e13 # This is variable for an unknown reason + contains: + - "nb_peaks" + - "global_minima" + - "global_maxima" + - "mean_freq" + - "est_genome_size" + - "est_het_rate" - path: output/kat/test.hist.png md5sum: e20774d0d2b979cb6ead7b7fb5ad36d9 - path: output/kat/versions.yml From 44e13ae439e0788db8afb227e2801815c459982a Mon Sep 17 00:00:00 2001 From: "Robert A. Petit III" Date: Tue, 10 May 2022 21:31:43 +0000 Subject: [PATCH 005/168] add module for snippy --- modules/snippy/run/main.nf | 55 ++++++++++++ modules/snippy/run/meta.yml | 110 +++++++++++++++++++++++ tests/config/pytest_modules.yml | 12 ++- tests/modules/snippy/run/main.nf | 16 ++++ tests/modules/snippy/run/nextflow.config | 5 ++ tests/modules/snippy/run/test.yml | 39 ++++++++ 6 files changed, 233 insertions(+), 4 deletions(-) create mode 100644 modules/snippy/run/main.nf create mode 100644 modules/snippy/run/meta.yml create mode 100644 tests/modules/snippy/run/main.nf create mode 100644 tests/modules/snippy/run/nextflow.config create mode 100644 tests/modules/snippy/run/test.yml diff --git a/modules/snippy/run/main.nf b/modules/snippy/run/main.nf new file mode 100644 index 00000000..73c4b94d --- /dev/null +++ b/modules/snippy/run/main.nf @@ -0,0 +1,55 @@ +process SNIPPY_RUN { + tag "$meta.id" + label 'process_low' + + conda (params.enable_conda ? "bioconda::snippy=4.6.0" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/snippy:4.6.0--hdfd78af_2 ': + 'quay.io/biocontainers/snippy:4.6.0--hdfd78af_2' }" + + input: + tuple val(meta), path(reads) + path reference + + output: + tuple val(meta), path("${prefix}/${prefix}.tab") , emit: tab + tuple val(meta), path("${prefix}/${prefix}.csv") , emit: csv + tuple val(meta), path("${prefix}/${prefix}.html") , emit: html + tuple val(meta), path("${prefix}/${prefix}.vcf") , emit: vcf + tuple val(meta), path("${prefix}/${prefix}.bed") , emit: bed + tuple val(meta), path("${prefix}/${prefix}.gff") , emit: gff + tuple val(meta), path("${prefix}/${prefix}.bam") , emit: bam + tuple val(meta), path("${prefix}/${prefix}.bam.bai") , emit: bai + tuple val(meta), path("${prefix}/${prefix}.log") , emit: log + tuple val(meta), path("${prefix}/${prefix}.aligned.fa") , emit: aligned_fa + tuple val(meta), path("${prefix}/${prefix}.consensus.fa") , emit: consensus_fa + tuple val(meta), path("${prefix}/${prefix}.consensus.subs.fa"), emit: consensus_subs_fa + tuple val(meta), path("${prefix}/${prefix}.raw.vcf") , emit: raw_vcf + tuple val(meta), path("${prefix}/${prefix}.filt.vcf") , emit: filt_vcf + tuple val(meta), path("${prefix}/${prefix}.vcf.gz") , emit: vcf_gz + tuple val(meta), path("${prefix}/${prefix}.vcf.gz.csi") , emit: vcf_csi + tuple val(meta), path("${prefix}/${prefix}.txt") , emit: txt + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + def read_inputs = meta.single_end ? "--se ${reads[0]}" : "--R1 ${reads[0]} --R2 ${reads[1]}" + """ + snippy \\ + $args \\ + --cpus $task.cpus \\ + --outdir $prefix \\ + --reference $reference \\ + --prefix $prefix \\ + $read_inputs + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + snippy: \$(echo \$(snippy --version 2>&1) | sed 's/snippy //') + END_VERSIONS + """ +} diff --git a/modules/snippy/run/meta.yml b/modules/snippy/run/meta.yml new file mode 100644 index 00000000..fa176399 --- /dev/null +++ b/modules/snippy/run/meta.yml @@ -0,0 +1,110 @@ +name: snippy +description: Rapid haploid variant calling +keywords: + - variant + - fastq + - bacteria +tools: + - snippy: + description: "Rapid bacterial SNP calling and core genome alignments" + homepage: "https://github.com/tseemann/snippy" + documentation: "https://github.com/tseemann/snippy" + tool_dev_url: "https://github.com/tseemann/snippy" + doi: "" + licence: "['GPL v2']" +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + pattern: "*.{fq,fastq,fq.gz,fastq.gz}" + - index: + type: file + description: Reference genome in GenBank (preferred) or FASTA format + pattern: "*.{gbk,gbk.gz,fa,fa.gz}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - tab: + type: file + description: A simple tab-separated summary of all the variants + pattern: "*.tab" + - csv: + type: file + description: A comma-separated version of the .tab file + pattern: "*.csv" + - html: + type: file + description: A HTML version of the .tab file + pattern: "*.html" + - vcf: + type: file + description: The final annotated variants in VCF format + pattern: "*.vcf" + - bed: + type: file + description: The variants in BED format + pattern: "*.bed" + - gff: + type: file + description: The variants in GFF3 format + pattern: "*.gff" + - bam: + type: file + description: The alignments in BAM format. Includes unmapped, multimapping reads. Excludes duplicates. + pattern: "*.bam" + - bai: + type: file + description: Index for the .bam file + pattern: "*.bam.bai" + - log: + type: file + description: A log file with the commands run and their outputs + pattern: "*.log" + - aligned_fa: + type: file + description: A version of the reference but with - at position with depth=0 and N for 0 < depth < --mincov (does not have variants) + pattern: "*.aligned.fa" + - consensus_fa: + type: file + description: A version of the reference genome with all variants instantiated + pattern: "*.consensus.fa" + - consensus_subs_fa: + type: file + description: A version of the reference genome with only substitution variants instantiated + pattern: "*.consensus.subs.fa" + - raw_vcf: + type: file + description: The unfiltered variant calls from Freebayes + pattern: "*.raw.vcf" + - filt_vcf: + type: file + description: The filtered variant calls from Freebayes + pattern: "*.filt.vcf" + - vcf_gz: + type: file + description: Compressed .vcf file via BGZIP + pattern: "*.vcf.gz" + - vcf_csi: + type: file + description: Index for the .vcf.gz via bcftools index + pattern: "*.vcf.gz.csi" + - txt: + type: file + description: Tab-separated columnar list of statistics + pattern: "*.txt" +authors: + - "@rpetit3" diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index 9afe83fd..5a204ac4 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -1643,14 +1643,14 @@ samtools/bam2fq: - modules/samtools/bam2fq/** - tests/modules/samtools/bam2fq/** -samtools/convert: - - modules/samtools/convert/** - - tests/modules/samtools/convert/** - samtools/collatefastq: - modules/samtools/collatefastq/** - tests/modules/samtools/collatefastq/** +samtools/convert: + - modules/samtools/convert/** + - tests/modules/samtools/convert/** + samtools/depth: - modules/samtools/depth/** - tests/modules/samtools/depth/** @@ -1787,6 +1787,10 @@ snapaligner/index: - modules/snapaligner/index/** - tests/modules/snapaligner/index/** +snippy/run: + - modules/snippy/run/** + - tests/modules/snippy/run/** + snpdists: - modules/snpdists/** - tests/modules/snpdists/** diff --git a/tests/modules/snippy/run/main.nf b/tests/modules/snippy/run/main.nf new file mode 100644 index 00000000..bfc4082e --- /dev/null +++ b/tests/modules/snippy/run/main.nf @@ -0,0 +1,16 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { SNIPPY_RUN } from '../../../../modules/snippy/run/main.nf' + +workflow test_snippy_run { + + input = [ [ id:'test', single_end:false ], // meta map + [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ] + ] + reference = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + + SNIPPY_RUN ( input, reference ) +} diff --git a/tests/modules/snippy/run/nextflow.config b/tests/modules/snippy/run/nextflow.config new file mode 100644 index 00000000..50f50a7a --- /dev/null +++ b/tests/modules/snippy/run/nextflow.config @@ -0,0 +1,5 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + +} \ No newline at end of file diff --git a/tests/modules/snippy/run/test.yml b/tests/modules/snippy/run/test.yml new file mode 100644 index 00000000..e38e8b9e --- /dev/null +++ b/tests/modules/snippy/run/test.yml @@ -0,0 +1,39 @@ +- name: snippy run test_snippy_run + command: nextflow run tests/modules/snippy/run -entry test_snippy_run -c tests/config/nextflow.config -c tests/modules/snippy/run/nextflow.config + tags: + - snippy + - snippy/run + files: + - path: output/snippy/test/test.aligned.fa + md5sum: 47e3390d4167edf1955d162d37aca5e3 + - path: output/snippy/test/test.bam + - path: output/snippy/test/test.bam.bai + - path: output/snippy/test/test.bed + md5sum: d41d8cd98f00b204e9800998ecf8427e + - path: output/snippy/test/test.consensus.fa + md5sum: 483f4a5dfe60171c86ee9b7e6dff908b + - path: output/snippy/test/test.consensus.subs.fa + md5sum: 483f4a5dfe60171c86ee9b7e6dff908b + - path: output/snippy/test/test.csv + md5sum: 322f942115e5945c2041a88246166703 + - path: output/snippy/test/test.filt.vcf + contains: ['fileformat', 'freebayes', 'CHROM'] + - path: output/snippy/test/test.gff + md5sum: df19e1b84ba6f691d20c72b397c88abf + - path: output/snippy/test/test.html + md5sum: 1ccbf0ffcadae1a6b2e11681d24c9938 + - path: output/snippy/test/test.log + contains: ['snippy', 'consensus', 'subs'] + - path: output/snippy/test/test.raw.vcf + contains: ['fileformat', 'freebayes', 'CHROM'] + - path: output/snippy/test/test.tab + md5sum: beb9bde3bce985e53e8feba9ec5b136e + - path: output/snippy/test/test.txt + contains: ['DateTime', 'ReadFiles', 'VariantTotal'] + - path: output/snippy/test/test.vcf + contains: ['fileformat', 'freebayes', 'CHROM'] + - path: output/snippy/test/test.vcf.gz + - path: output/snippy/test/test.vcf.gz.csi + md5sum: bed9fa291c220a1ba04eb2d448932ffc + - path: output/snippy/versions.yml + md5sum: 518aad56c4dbefb6cbcde5ab38cf7b5d From f15848d4774f095f43594516769fde889a7a3d12 Mon Sep 17 00:00:00 2001 From: "Robert A. Petit III" Date: Tue, 10 May 2022 21:44:40 +0000 Subject: [PATCH 006/168] fix typo --- modules/snippy/run/meta.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/snippy/run/meta.yml b/modules/snippy/run/meta.yml index fa176399..13c923d8 100644 --- a/modules/snippy/run/meta.yml +++ b/modules/snippy/run/meta.yml @@ -100,7 +100,7 @@ output: pattern: "*.vcf.gz" - vcf_csi: type: file - description: Index for the .vcf.gz via bcftools index + description: Index for the .vcf.gz via bcftools index pattern: "*.vcf.gz.csi" - txt: type: file From 56e94f625e69e48de7c218b79857cc3a59cb9220 Mon Sep 17 00:00:00 2001 From: "Robert A. Petit III" Date: Tue, 10 May 2022 21:47:11 +0000 Subject: [PATCH 007/168] remove empty file md5 --- tests/modules/snippy/run/test.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/modules/snippy/run/test.yml b/tests/modules/snippy/run/test.yml index e38e8b9e..ac945e88 100644 --- a/tests/modules/snippy/run/test.yml +++ b/tests/modules/snippy/run/test.yml @@ -9,7 +9,6 @@ - path: output/snippy/test/test.bam - path: output/snippy/test/test.bam.bai - path: output/snippy/test/test.bed - md5sum: d41d8cd98f00b204e9800998ecf8427e - path: output/snippy/test/test.consensus.fa md5sum: 483f4a5dfe60171c86ee9b7e6dff908b - path: output/snippy/test/test.consensus.subs.fa From ef2996e4a5f4f74b4b49bbbd308c3142de8d894d Mon Sep 17 00:00:00 2001 From: "Robert A. Petit III" Date: Tue, 10 May 2022 21:47:57 +0000 Subject: [PATCH 008/168] match names in meta and main --- modules/snippy/run/meta.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/snippy/run/meta.yml b/modules/snippy/run/meta.yml index 13c923d8..2eb008a1 100644 --- a/modules/snippy/run/meta.yml +++ b/modules/snippy/run/meta.yml @@ -1,4 +1,4 @@ -name: snippy +name: snippy_run description: Rapid haploid variant calling keywords: - variant From 7ced6e20de5a69a25fb57b00244a49329709186a Mon Sep 17 00:00:00 2001 From: "Robert A. Petit III" Date: Wed, 11 May 2022 18:00:36 +0000 Subject: [PATCH 009/168] make prettier --- tests/modules/snippy/run/test.yml | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/tests/modules/snippy/run/test.yml b/tests/modules/snippy/run/test.yml index ac945e88..10e24f55 100644 --- a/tests/modules/snippy/run/test.yml +++ b/tests/modules/snippy/run/test.yml @@ -1,8 +1,9 @@ - name: snippy run test_snippy_run - command: nextflow run tests/modules/snippy/run -entry test_snippy_run -c tests/config/nextflow.config -c tests/modules/snippy/run/nextflow.config + command: | + nextflow run tests/modules/snippy/run -entry test_snippy_run -c tests/config/nextflow.config -c tests/modules/snippy/run/nextflow.config tags: - - snippy - snippy/run + - snippy files: - path: output/snippy/test/test.aligned.fa md5sum: 47e3390d4167edf1955d162d37aca5e3 @@ -16,21 +17,21 @@ - path: output/snippy/test/test.csv md5sum: 322f942115e5945c2041a88246166703 - path: output/snippy/test/test.filt.vcf - contains: ['fileformat', 'freebayes', 'CHROM'] + contains: ["fileformat", "freebayes", "CHROM"] - path: output/snippy/test/test.gff md5sum: df19e1b84ba6f691d20c72b397c88abf - path: output/snippy/test/test.html md5sum: 1ccbf0ffcadae1a6b2e11681d24c9938 - path: output/snippy/test/test.log - contains: ['snippy', 'consensus', 'subs'] + contains: ["snippy", "consensus", "subs"] - path: output/snippy/test/test.raw.vcf - contains: ['fileformat', 'freebayes', 'CHROM'] + contains: ["fileformat", "freebayes", "CHROM"] - path: output/snippy/test/test.tab md5sum: beb9bde3bce985e53e8feba9ec5b136e - path: output/snippy/test/test.txt - contains: ['DateTime', 'ReadFiles', 'VariantTotal'] + contains: ["DateTime", "ReadFiles", "VariantTotal"] - path: output/snippy/test/test.vcf - contains: ['fileformat', 'freebayes', 'CHROM'] + contains: ["fileformat", "freebayes", "CHROM"] - path: output/snippy/test/test.vcf.gz - path: output/snippy/test/test.vcf.gz.csi md5sum: bed9fa291c220a1ba04eb2d448932ffc From c70b169f61620f35bd6633426f4b236ced9e067d Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Fri, 20 May 2022 13:34:54 +0200 Subject: [PATCH 010/168] create modules --- modules/bcftools/rhocall/main.nf | 75 +++++++++++++++++++ modules/bcftools/rhocall/meta.yml | 51 +++++++++++++ tests/config/pytest_modules.yml | 4 + tests/modules/bcftools/rhocall/main.nf | 15 ++++ .../modules/bcftools/rhocall/nextflow.config | 5 ++ tests/modules/bcftools/rhocall/test.yml | 14 ++++ 6 files changed, 164 insertions(+) create mode 100644 modules/bcftools/rhocall/main.nf create mode 100644 modules/bcftools/rhocall/meta.yml create mode 100644 tests/modules/bcftools/rhocall/main.nf create mode 100644 tests/modules/bcftools/rhocall/nextflow.config create mode 100644 tests/modules/bcftools/rhocall/test.yml diff --git a/modules/bcftools/rhocall/main.nf b/modules/bcftools/rhocall/main.nf new file mode 100644 index 00000000..7036963e --- /dev/null +++ b/modules/bcftools/rhocall/main.nf @@ -0,0 +1,75 @@ +// TODO nf-core: If in doubt look at other nf-core/modules to see how we are doing things! :) +// https://github.com/nf-core/modules/tree/master/modules +// You can also ask for help via your pull request or on the #modules channel on the nf-core Slack workspace: +// https://nf-co.re/join +// TODO nf-core: A module file SHOULD only define input and output files as command-line parameters. +// All other parameters MUST be provided using the "task.ext" directive, see here: +// https://www.nextflow.io/docs/latest/process.html#ext +// where "task.ext" is a string. +// Any parameters that need to be evaluated in the context of a particular sample +// e.g. single-end/paired-end data MUST also be defined and evaluated appropriately. +// TODO nf-core: Software that can be piped together SHOULD be added to separate module files +// unless there is a run-time, storage advantage in implementing in this way +// e.g. it's ok to have a single module for bwa to output BAM instead of SAM: +// bwa mem | samtools view -B -T ref.fasta +// TODO nf-core: Optional inputs are not currently supported by Nextflow. However, using an empty +// list (`[]`) instead of a file can be used to work around this issue. + +process BCFTOOLS_RHOCALL { + tag "$meta.id" + label 'process_medium' + + // TODO nf-core: List required Conda package(s). + // Software MUST be pinned to channel (i.e. "bioconda"), version (i.e. "1.10"). + // For Conda, the build (i.e. "h9402c20_2") must be EXCLUDED to support installation on different operating systems. + // TODO nf-core: See section in main README for further information regarding finding and adding container addresses to the section below. + conda (params.enable_conda ? "bioconda::bcftools=1.15.1" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bcftools:1.15.1--h0ea216a_0': + 'quay.io/biocontainers/bcftools:1.15.1--h0ea216a_0' }" + + input: + // TODO nf-core: Where applicable all sample-specific information e.g. "id", "single_end", "read_group" + // MUST be provided as an input via a Groovy Map called "meta". + // This information may not be required in some instances e.g. indexing reference genome files: + // https://github.com/nf-core/modules/blob/master/modules/bwa/index/main.nf + // TODO nf-core: Where applicable please provide/convert compressed files as input/output + // e.g. "*.fastq.gz" and NOT "*.fastq", "*.bam" and NOT "*.sam" etc. + tuple val(meta), path(bam) + + output: + // TODO nf-core: Named file extensions MUST be emitted for ALL output channels + tuple val(meta), path("*.bam"), emit: bam + // TODO nf-core: List additional required output channels/values here + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + // TODO nf-core: Where possible, a command MUST be provided to obtain the version number of the software e.g. 1.10 + // If the software is unable to output a version number on the command-line then it can be manually specified + // e.g. https://github.com/nf-core/modules/blob/master/modules/homer/annotatepeaks/main.nf + // Each software used MUST provide the software name and version number in the YAML version file (versions.yml) + // TODO nf-core: It MUST be possible to pass additional parameters to the tool as a command-line string via the "task.ext.args" directive + // TODO nf-core: If the tool supports multi-threading then you MUST provide the appropriate parameter + // using the Nextflow "task" variable e.g. "--threads $task.cpus" + // TODO nf-core: Please replace the example samtools command below with your module's command + // TODO nf-core: Please indent the command appropriately (4 spaces!!) to help with readability ;) + """ + samtools \\ + sort \\ + $args \\ + -@ $task.cpus \\ + -o ${prefix}.bam \\ + -T $prefix \\ + $bam + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//' )) + END_VERSIONS + """ +} diff --git a/modules/bcftools/rhocall/meta.yml b/modules/bcftools/rhocall/meta.yml new file mode 100644 index 00000000..fa7baf2a --- /dev/null +++ b/modules/bcftools/rhocall/meta.yml @@ -0,0 +1,51 @@ +name: "bcftools_rhocall" +## TODO nf-core: Add a description of the module and list keywords +description: write your description here +keywords: + - sort +tools: + - "bcftools": + ## TODO nf-core: Add a description and other details for the software below + description: "BCFtools is a set of utilities that manipulate variant calls in the Variant Call Format (VCF) and its binary counterpart BCF. All commands work transparently with both VCFs and BCFs, both uncompressed and BGZF-compressed. Most commands accept VCF, bgzipped VCF and BCF with filetype detected automatically even when streaming from a pipe. Indexed VCF and BCF will work in all situations. Un-indexed VCF and BCF and streams will work in most, but not all situations." + homepage: "None" + documentation: "None" + tool_dev_url: "None" + doi: "" + licence: "['GPL']" + +## TODO nf-core: Add a description of all of the variables used as input +input: + # Only when we have meta + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + # + ## TODO nf-core: Delete / customise this example input + - bam: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + +## TODO nf-core: Add a description of all of the variables used as output +output: + #Only when we have meta + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + # + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + ## TODO nf-core: Delete / customise this example output + - bam: + type: file + description: Sorted BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + +authors: + - "@ramprasadn" diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index 16411798..858cbd47 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -166,6 +166,10 @@ bcftools/reheader: - modules/bcftools/reheader/** - tests/modules/bcftools/reheader/** +bcftools/rhocall: + - modules/bcftools/rhocall/** + - tests/modules/bcftools/rhocall/** + bcftools/sort: - modules/bcftools/sort/** - tests/modules/bcftools/sort/** diff --git a/tests/modules/bcftools/rhocall/main.nf b/tests/modules/bcftools/rhocall/main.nf new file mode 100644 index 00000000..2630e4f7 --- /dev/null +++ b/tests/modules/bcftools/rhocall/main.nf @@ -0,0 +1,15 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { BCFTOOLS_RHOCALL } from '../../../../modules/bcftools/rhocall/main.nf' + +workflow test_bcftools_rhocall { + + input = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['sarscov2']['illumina']['test_paired_end_bam'], checkIfExists: true) + ] + + BCFTOOLS_RHOCALL ( input ) +} diff --git a/tests/modules/bcftools/rhocall/nextflow.config b/tests/modules/bcftools/rhocall/nextflow.config new file mode 100644 index 00000000..50f50a7a --- /dev/null +++ b/tests/modules/bcftools/rhocall/nextflow.config @@ -0,0 +1,5 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + +} \ No newline at end of file diff --git a/tests/modules/bcftools/rhocall/test.yml b/tests/modules/bcftools/rhocall/test.yml new file mode 100644 index 00000000..5d5f44f6 --- /dev/null +++ b/tests/modules/bcftools/rhocall/test.yml @@ -0,0 +1,14 @@ +## TODO nf-core: Please run the following command to build this file: +# nf-core modules create-test-yml bcftools/rhocall +- name: "bcftools rhocall" + command: nextflow run ./tests/modules/bcftools/rhocall -entry test_bcftools_rhocall -c ./tests/config/nextflow.config -c ./tests/modules/bcftools/rhocall/nextflow.config + tags: + - "bcftools" + # + - "bcftools/rhocall" + # + files: + - path: "output/bcftools/test.bam" + md5sum: e667c7caad0bc4b7ac383fd023c654fc + - path: output/bcftools/versions.yml + md5sum: a01fe51bc4c6a3a6226fbf77b2c7cf3b From 2542c9d1767dbb374c6fb07583eda166f5287e18 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Fri, 20 May 2022 16:46:19 +0200 Subject: [PATCH 011/168] add tests --- modules/bcftools/rhocall/main.nf | 75 ------------------- modules/bcftools/rhocall/meta.yml | 51 ------------- modules/bcftools/roh/main.nf | 61 +++++++++++++++ modules/bcftools/roh/meta.yml | 56 ++++++++++++++ tests/config/pytest_modules.yml | 6 +- tests/modules/bcftools/rhocall/main.nf | 15 ---- tests/modules/bcftools/rhocall/test.yml | 14 ---- tests/modules/bcftools/roh/main.nf | 20 +++++ .../bcftools/{rhocall => roh}/nextflow.config | 0 tests/modules/bcftools/roh/test.yml | 8 ++ 10 files changed, 148 insertions(+), 158 deletions(-) delete mode 100644 modules/bcftools/rhocall/main.nf delete mode 100644 modules/bcftools/rhocall/meta.yml create mode 100644 modules/bcftools/roh/main.nf create mode 100644 modules/bcftools/roh/meta.yml delete mode 100644 tests/modules/bcftools/rhocall/main.nf delete mode 100644 tests/modules/bcftools/rhocall/test.yml create mode 100644 tests/modules/bcftools/roh/main.nf rename tests/modules/bcftools/{rhocall => roh}/nextflow.config (100%) create mode 100644 tests/modules/bcftools/roh/test.yml diff --git a/modules/bcftools/rhocall/main.nf b/modules/bcftools/rhocall/main.nf deleted file mode 100644 index 7036963e..00000000 --- a/modules/bcftools/rhocall/main.nf +++ /dev/null @@ -1,75 +0,0 @@ -// TODO nf-core: If in doubt look at other nf-core/modules to see how we are doing things! :) -// https://github.com/nf-core/modules/tree/master/modules -// You can also ask for help via your pull request or on the #modules channel on the nf-core Slack workspace: -// https://nf-co.re/join -// TODO nf-core: A module file SHOULD only define input and output files as command-line parameters. -// All other parameters MUST be provided using the "task.ext" directive, see here: -// https://www.nextflow.io/docs/latest/process.html#ext -// where "task.ext" is a string. -// Any parameters that need to be evaluated in the context of a particular sample -// e.g. single-end/paired-end data MUST also be defined and evaluated appropriately. -// TODO nf-core: Software that can be piped together SHOULD be added to separate module files -// unless there is a run-time, storage advantage in implementing in this way -// e.g. it's ok to have a single module for bwa to output BAM instead of SAM: -// bwa mem | samtools view -B -T ref.fasta -// TODO nf-core: Optional inputs are not currently supported by Nextflow. However, using an empty -// list (`[]`) instead of a file can be used to work around this issue. - -process BCFTOOLS_RHOCALL { - tag "$meta.id" - label 'process_medium' - - // TODO nf-core: List required Conda package(s). - // Software MUST be pinned to channel (i.e. "bioconda"), version (i.e. "1.10"). - // For Conda, the build (i.e. "h9402c20_2") must be EXCLUDED to support installation on different operating systems. - // TODO nf-core: See section in main README for further information regarding finding and adding container addresses to the section below. - conda (params.enable_conda ? "bioconda::bcftools=1.15.1" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/bcftools:1.15.1--h0ea216a_0': - 'quay.io/biocontainers/bcftools:1.15.1--h0ea216a_0' }" - - input: - // TODO nf-core: Where applicable all sample-specific information e.g. "id", "single_end", "read_group" - // MUST be provided as an input via a Groovy Map called "meta". - // This information may not be required in some instances e.g. indexing reference genome files: - // https://github.com/nf-core/modules/blob/master/modules/bwa/index/main.nf - // TODO nf-core: Where applicable please provide/convert compressed files as input/output - // e.g. "*.fastq.gz" and NOT "*.fastq", "*.bam" and NOT "*.sam" etc. - tuple val(meta), path(bam) - - output: - // TODO nf-core: Named file extensions MUST be emitted for ALL output channels - tuple val(meta), path("*.bam"), emit: bam - // TODO nf-core: List additional required output channels/values here - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - // TODO nf-core: Where possible, a command MUST be provided to obtain the version number of the software e.g. 1.10 - // If the software is unable to output a version number on the command-line then it can be manually specified - // e.g. https://github.com/nf-core/modules/blob/master/modules/homer/annotatepeaks/main.nf - // Each software used MUST provide the software name and version number in the YAML version file (versions.yml) - // TODO nf-core: It MUST be possible to pass additional parameters to the tool as a command-line string via the "task.ext.args" directive - // TODO nf-core: If the tool supports multi-threading then you MUST provide the appropriate parameter - // using the Nextflow "task" variable e.g. "--threads $task.cpus" - // TODO nf-core: Please replace the example samtools command below with your module's command - // TODO nf-core: Please indent the command appropriately (4 spaces!!) to help with readability ;) - """ - samtools \\ - sort \\ - $args \\ - -@ $task.cpus \\ - -o ${prefix}.bam \\ - -T $prefix \\ - $bam - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - bcftools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//' )) - END_VERSIONS - """ -} diff --git a/modules/bcftools/rhocall/meta.yml b/modules/bcftools/rhocall/meta.yml deleted file mode 100644 index fa7baf2a..00000000 --- a/modules/bcftools/rhocall/meta.yml +++ /dev/null @@ -1,51 +0,0 @@ -name: "bcftools_rhocall" -## TODO nf-core: Add a description of the module and list keywords -description: write your description here -keywords: - - sort -tools: - - "bcftools": - ## TODO nf-core: Add a description and other details for the software below - description: "BCFtools is a set of utilities that manipulate variant calls in the Variant Call Format (VCF) and its binary counterpart BCF. All commands work transparently with both VCFs and BCFs, both uncompressed and BGZF-compressed. Most commands accept VCF, bgzipped VCF and BCF with filetype detected automatically even when streaming from a pipe. Indexed VCF and BCF will work in all situations. Un-indexed VCF and BCF and streams will work in most, but not all situations." - homepage: "None" - documentation: "None" - tool_dev_url: "None" - doi: "" - licence: "['GPL']" - -## TODO nf-core: Add a description of all of the variables used as input -input: - # Only when we have meta - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - # - ## TODO nf-core: Delete / customise this example input - - bam: - type: file - description: BAM/CRAM/SAM file - pattern: "*.{bam,cram,sam}" - -## TODO nf-core: Add a description of all of the variables used as output -output: - #Only when we have meta - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - # - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - ## TODO nf-core: Delete / customise this example output - - bam: - type: file - description: Sorted BAM/CRAM/SAM file - pattern: "*.{bam,cram,sam}" - -authors: - - "@ramprasadn" diff --git a/modules/bcftools/roh/main.nf b/modules/bcftools/roh/main.nf new file mode 100644 index 00000000..55d8c4a3 --- /dev/null +++ b/modules/bcftools/roh/main.nf @@ -0,0 +1,61 @@ +process BCFTOOLS_ROH { + tag "$meta.id" + label 'process_medium' + + conda (params.enable_conda ? "bioconda::bcftools=1.15.1" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bcftools:1.15.1--h0ea216a_0': + 'quay.io/biocontainers/bcftools:1.15.1--h0ea216a_0' }" + + input: + tuple val(meta), path(vcf), path(tbi) + path af_file + path genetic_map + path regions_file + path samples_file + path targets_file + + output: + tuple val(meta), path("*.roh"), emit: roh + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def af_read = af_file ? "--AF-file ${af_file}" : '' + def gen_map = genetic_map ? "--genetic-map ${genetic_map}" : '' + def reg_file = regions_file ? "--regions-file ${regions_file}" : '' + def samp_file = samples_file ? "--samples-file ${samples_file}" : '' + def targ_file = targets_file ? "--targets-file ${targets_file}" : '' + """ + bcftools \\ + roh \\ + $args \\ + $af_read \\ + $gen_map \\ + $reg_file \\ + $samp_file \\ + $targ_file \\ + -o ${prefix}.roh.gz \\ + $vcf + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.roh + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/bcftools/roh/meta.yml b/modules/bcftools/roh/meta.yml new file mode 100644 index 00000000..09535fe8 --- /dev/null +++ b/modules/bcftools/roh/meta.yml @@ -0,0 +1,56 @@ +name: "bcftools_roh" +## TODO nf-core: Add a description of the module and list keywords +description: write your description here +keywords: + - sort +tools: + - "roh": + description: "A program for detecting runs of homo/autozygosity. Only bi-allelic sites are considered." + homepage: https://www.htslib.org/ + documentation: http://www.htslib.org/doc/bcftools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - vcf: + type: file + description: VCF file + pattern: "*.{vcf,.vcf.gz}" + - af_file: + type: file + description: "Read allele frequencies from a tab-delimited file containing the columns: CHROM\tPOS\tREF,ALT\tAF." + - genetic_map: + type: file + description: "Genetic map in the format required also by IMPUTE2." + - regions_file: + type: file + description: "Regions can be specified either on command line or in a VCF, BED, or tab-delimited file (the default)." + - samples_file: + type: file + description: "File of sample names to include or exclude if prefixed with '^'." + - targets_file: + type: file + description: "Targets can be specified either on command line or in a VCF, BED, or tab-delimited file (the default)." + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - roh: + type: file + description: Contains site-specific and/or per-region runs of homo/autozygosity calls. + pattern: "*.{roh}" + +authors: + - "@ramprasadn" diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index 858cbd47..e15315e9 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -166,9 +166,9 @@ bcftools/reheader: - modules/bcftools/reheader/** - tests/modules/bcftools/reheader/** -bcftools/rhocall: - - modules/bcftools/rhocall/** - - tests/modules/bcftools/rhocall/** +bcftools/roh: + - modules/bcftools/roh/** + - tests/modules/bcftools/roh/** bcftools/sort: - modules/bcftools/sort/** diff --git a/tests/modules/bcftools/rhocall/main.nf b/tests/modules/bcftools/rhocall/main.nf deleted file mode 100644 index 2630e4f7..00000000 --- a/tests/modules/bcftools/rhocall/main.nf +++ /dev/null @@ -1,15 +0,0 @@ -#!/usr/bin/env nextflow - -nextflow.enable.dsl = 2 - -include { BCFTOOLS_RHOCALL } from '../../../../modules/bcftools/rhocall/main.nf' - -workflow test_bcftools_rhocall { - - input = [ - [ id:'test', single_end:false ], // meta map - file(params.test_data['sarscov2']['illumina']['test_paired_end_bam'], checkIfExists: true) - ] - - BCFTOOLS_RHOCALL ( input ) -} diff --git a/tests/modules/bcftools/rhocall/test.yml b/tests/modules/bcftools/rhocall/test.yml deleted file mode 100644 index 5d5f44f6..00000000 --- a/tests/modules/bcftools/rhocall/test.yml +++ /dev/null @@ -1,14 +0,0 @@ -## TODO nf-core: Please run the following command to build this file: -# nf-core modules create-test-yml bcftools/rhocall -- name: "bcftools rhocall" - command: nextflow run ./tests/modules/bcftools/rhocall -entry test_bcftools_rhocall -c ./tests/config/nextflow.config -c ./tests/modules/bcftools/rhocall/nextflow.config - tags: - - "bcftools" - # - - "bcftools/rhocall" - # - files: - - path: "output/bcftools/test.bam" - md5sum: e667c7caad0bc4b7ac383fd023c654fc - - path: output/bcftools/versions.yml - md5sum: a01fe51bc4c6a3a6226fbf77b2c7cf3b diff --git a/tests/modules/bcftools/roh/main.nf b/tests/modules/bcftools/roh/main.nf new file mode 100644 index 00000000..5dd6f897 --- /dev/null +++ b/tests/modules/bcftools/roh/main.nf @@ -0,0 +1,20 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { BCFTOOLS_ROH } from '../../../../modules/bcftools/roh/main.nf' + +workflow test_bcftools_roh { + + input = [ [ id:'out' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_vcf_gz_tbi'], checkIfExists: true)] + + af_file = [] + gen_map = [] + regions = [] + targets = [] + samples = [] + + BCFTOOLS_ROH ( input, af_file, gen_map, regions, samples, targets ) +} diff --git a/tests/modules/bcftools/rhocall/nextflow.config b/tests/modules/bcftools/roh/nextflow.config similarity index 100% rename from tests/modules/bcftools/rhocall/nextflow.config rename to tests/modules/bcftools/roh/nextflow.config diff --git a/tests/modules/bcftools/roh/test.yml b/tests/modules/bcftools/roh/test.yml new file mode 100644 index 00000000..7a8c754b --- /dev/null +++ b/tests/modules/bcftools/roh/test.yml @@ -0,0 +1,8 @@ +- name: "bcftools roh" + command: nextflow run ./tests/modules/bcftools/roh -entry test_bcftools_roh -c ./tests/config/nextflow.config -c ./tests/modules/bcftools/roh/nextflow.config + tags: + - "bcftools" + - "bcftools/rhoh" + files: + - path: "output/bcftools/test.roh" + - path: output/bcftools/versions.yml From 635edc0022a39acd295a65f14dec482c6a36b9d1 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Fri, 20 May 2022 17:07:42 +0200 Subject: [PATCH 012/168] fix typo --- modules/bcftools/roh/main.nf | 2 +- tests/modules/bcftools/roh/test.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/bcftools/roh/main.nf b/modules/bcftools/roh/main.nf index 55d8c4a3..890b6fad 100644 --- a/modules/bcftools/roh/main.nf +++ b/modules/bcftools/roh/main.nf @@ -39,7 +39,7 @@ process BCFTOOLS_ROH { $reg_file \\ $samp_file \\ $targ_file \\ - -o ${prefix}.roh.gz \\ + -o ${prefix}.roh \\ $vcf cat <<-END_VERSIONS > versions.yml diff --git a/tests/modules/bcftools/roh/test.yml b/tests/modules/bcftools/roh/test.yml index 7a8c754b..2387ee39 100644 --- a/tests/modules/bcftools/roh/test.yml +++ b/tests/modules/bcftools/roh/test.yml @@ -2,7 +2,7 @@ command: nextflow run ./tests/modules/bcftools/roh -entry test_bcftools_roh -c ./tests/config/nextflow.config -c ./tests/modules/bcftools/roh/nextflow.config tags: - "bcftools" - - "bcftools/rhoh" + - "bcftools/roh" files: - path: "output/bcftools/test.roh" - path: output/bcftools/versions.yml From 995bf88327b5f4dad1af9abbe3a2d8d24968fb72 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Fri, 20 May 2022 17:10:09 +0200 Subject: [PATCH 013/168] add stub --- tests/modules/bcftools/roh/main.nf | 17 ++++++++++++++++- tests/modules/bcftools/roh/test.yml | 11 ++++++++++- 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/tests/modules/bcftools/roh/main.nf b/tests/modules/bcftools/roh/main.nf index 5dd6f897..3eb534b6 100644 --- a/tests/modules/bcftools/roh/main.nf +++ b/tests/modules/bcftools/roh/main.nf @@ -6,7 +6,22 @@ include { BCFTOOLS_ROH } from '../../../../modules/bcftools/roh/main.nf' workflow test_bcftools_roh { - input = [ [ id:'out' ], // meta map + input = [ [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_vcf_gz_tbi'], checkIfExists: true)] + + af_file = [] + gen_map = [] + regions = [] + targets = [] + samples = [] + + BCFTOOLS_ROH ( input, af_file, gen_map, regions, samples, targets ) +} + +workflow test_bcftools_roh_stub { + + input = [ [ id:'test' ], // meta map file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true), file(params.test_data['sarscov2']['illumina']['test_vcf_gz_tbi'], checkIfExists: true)] diff --git a/tests/modules/bcftools/roh/test.yml b/tests/modules/bcftools/roh/test.yml index 2387ee39..9cc50a66 100644 --- a/tests/modules/bcftools/roh/test.yml +++ b/tests/modules/bcftools/roh/test.yml @@ -5,4 +5,13 @@ - "bcftools/roh" files: - path: "output/bcftools/test.roh" - - path: output/bcftools/versions.yml + - path: "output/bcftools/versions.yml" + +- name: "bcftools roh stub" + command: nextflow run ./tests/modules/bcftools/roh -entry test_bcftools_roh_stub -c ./tests/config/nextflow.config -c ./tests/modules/bcftools/roh/nextflow.config + tags: + - "bcftools" + - "bcftools/roh" + files: + - path: "output/bcftools/test.roh" + - path: "output/bcftools/versions.yml" From b40b147004d02af5c4c0fb7ae17986d8727514db Mon Sep 17 00:00:00 2001 From: Mahesh Binzer-Panchal Date: Mon, 23 May 2022 08:51:29 +0200 Subject: [PATCH 014/168] Update tests/modules/kat/hist/test.yml Co-authored-by: Sateesh Peri <105730406+sateeshblue@users.noreply.github.com> --- tests/modules/kat/hist/test.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/modules/kat/hist/test.yml b/tests/modules/kat/hist/test.yml index 19d92ee2..90502485 100644 --- a/tests/modules/kat/hist/test.yml +++ b/tests/modules/kat/hist/test.yml @@ -19,7 +19,6 @@ - path: output/kat/test.hist.png md5sum: 49861ef1a265e0edde3550b39c64a274 - path: output/kat/versions.yml - md5sum: 296f28c007bc55f8f5490702cf3f81d4 - name: kat hist test_kat_hist_paired_end command: nextflow run tests/modules/kat/hist -entry test_kat_hist_paired_end -c tests/config/nextflow.config From d7c1fe14778375e0751372b553e77fc84efb0541 Mon Sep 17 00:00:00 2001 From: Mahesh Binzer-Panchal Date: Mon, 23 May 2022 08:51:49 +0200 Subject: [PATCH 015/168] Update tests/modules/kat/hist/test.yml Co-authored-by: Sateesh Peri <105730406+sateeshblue@users.noreply.github.com> --- tests/modules/kat/hist/test.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/modules/kat/hist/test.yml b/tests/modules/kat/hist/test.yml index 90502485..391a3a21 100644 --- a/tests/modules/kat/hist/test.yml +++ b/tests/modules/kat/hist/test.yml @@ -40,4 +40,3 @@ - path: output/kat/test.hist.png md5sum: e20774d0d2b979cb6ead7b7fb5ad36d9 - path: output/kat/versions.yml - md5sum: 3a07329af7ce78e76b021996cd08eacb From 8d5680a6b782d97b359a4d2702259b71f96f5441 Mon Sep 17 00:00:00 2001 From: jtangrot Date: Mon, 23 May 2022 10:01:55 +0200 Subject: [PATCH 016/168] Add vserach usearch_global --- modules/vsearch/usearchglobal/main.nf | 51 +++++++++++++++++++ modules/vsearch/usearchglobal/meta.yml | 38 ++++++++++++++ tests/config/pytest_modules.yml | 4 ++ tests/modules/vsearch/usearchglobal/main.nf | 12 +++++ .../vsearch/usearchglobal/nextflow.config | 5 ++ tests/modules/vsearch/usearchglobal/test.yml | 7 +++ .../vsearch/usearchglobal/test.yml.ori | 14 +++++ 7 files changed, 131 insertions(+) create mode 100644 modules/vsearch/usearchglobal/main.nf create mode 100644 modules/vsearch/usearchglobal/meta.yml create mode 100644 tests/modules/vsearch/usearchglobal/main.nf create mode 100644 tests/modules/vsearch/usearchglobal/nextflow.config create mode 100644 tests/modules/vsearch/usearchglobal/test.yml create mode 100644 tests/modules/vsearch/usearchglobal/test.yml.ori diff --git a/modules/vsearch/usearchglobal/main.nf b/modules/vsearch/usearchglobal/main.nf new file mode 100644 index 00000000..65ee62cc --- /dev/null +++ b/modules/vsearch/usearchglobal/main.nf @@ -0,0 +1,51 @@ +// TODO nf-core: A module file SHOULD only define input and output files as command-line parameters. +// All other parameters MUST be provided using the "task.ext" directive, see here: +// https://www.nextflow.io/docs/latest/process.html#ext +// where "task.ext" is a string. +// Any parameters that need to be evaluated in the context of a particular sample +// e.g. single-end/paired-end data MUST also be defined and evaluated appropriately. +// TODO nf-core: Software that can be piped together SHOULD be added to separate module files +// unless there is a run-time, storage advantage in implementing in this way +// e.g. it's ok to have a single module for bwa to output BAM instead of SAM: +// bwa mem | samtools view -B -T ref.fasta + +process VSEARCH_USEARCHGLOBAL { + tag '$queryfasta' + label 'process_low' + + conda (params.enable_conda ? "bioconda::vsearch=2.21.1" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/vsearch:2.21.1--hf1761c0_1': + 'quay.io/biocontainers/vsearch:2.21.1--h95f258a_0' }" + + input: + path queryfasta + path db + val outprefix + + // TODO nf-core: Where applicable please provide/convert compressed files as input/output + // e.g. "*.fastq.gz" and NOT "*.fastq", "*.bam" and NOT "*.sam" etc. + + output: + path ("*.tsv") , emit: tsv + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + vsearch \\ + --usearch_global $queryfasta \\ + --db $db \\ + --threads $task.cpus \\ + $args \\ + --blast6out ${outprefix}.tsv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + vsearch: \$(vsearch --version 2>&1 | head -n 1 | sed 's/vsearch //g' | sed 's/,.*//g' | sed 's/^v//' | sed 's/_.*//') + END_VERSIONS + """ +} diff --git a/modules/vsearch/usearchglobal/meta.yml b/modules/vsearch/usearchglobal/meta.yml new file mode 100644 index 00000000..8cb97446 --- /dev/null +++ b/modules/vsearch/usearchglobal/meta.yml @@ -0,0 +1,38 @@ +name: "vsearch_usearchglobal" +description: Compare target sequences to fasta-formatted query sequences using global pairwise alignment. +keywords: + - vsearch + - usearch + - alignment + - fasta +tools: + - "vsearch": + description: "VSEARCH is a versatile open-source tool for microbiome analysis, including chimera detection, clustering, dereplication and rereplication, extraction, FASTA/FASTQ/SFF file processing, masking, orienting, pair-wise alignment, restriction site cutting, searching, shuffling, sorting, subsampling, and taxonomic classification of amplicon sequences for metagenomics, genomics, and population genetics. (USEARCH alternative)" + homepage: "https://github.com/torognes/vsearch" + documentation: "None" + tool_dev_url: "https://github.com/torognes/vsearch" + doi: "doi: 10.7717/peerj.2584" + licence: "['GPL v3-or-later OR BSD-2-clause']" + +input: + - queryfasta: + type: file + description: Query sequences in FASTA format + pattern: "*.{fasta,fa,fna,faa}" + - db: + type: file + description: Reference database file. It may be in FASTA or UDB format. + pattern: "*" + +output: + - blast6out: + type: file + description: Tab delimited results in blast-like format + pattern: "*.{tsv}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@jtangrot" diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index 2d0d3226..05188329 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -2024,6 +2024,10 @@ vcftools: - modules/vcftools/** - tests/modules/vcftools/** +vsearch/usearchglobal: + - modules/vsearch/usearchglobal/** + - tests/modules/vsearch/usearchglobal/** + yara/index: - modules/yara/index/** - tests/modules/yara/index/** diff --git a/tests/modules/vsearch/usearchglobal/main.nf b/tests/modules/vsearch/usearchglobal/main.nf new file mode 100644 index 00000000..02becd21 --- /dev/null +++ b/tests/modules/vsearch/usearchglobal/main.nf @@ -0,0 +1,12 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { VSEARCH_USEARCHGLOBAL } from '../../../../modules/vsearch/usearchglobal/main.nf' + +workflow test_vsearch_usearchglobal { + + query = file(params.test_data['sarscov2']['genome']['transcriptome_fasta'], checkIfExists: true) + db = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + VSEARCH_USEARCHGLOBAL ( query, db, "blast6out_results" ) +} diff --git a/tests/modules/vsearch/usearchglobal/nextflow.config b/tests/modules/vsearch/usearchglobal/nextflow.config new file mode 100644 index 00000000..aab29979 --- /dev/null +++ b/tests/modules/vsearch/usearchglobal/nextflow.config @@ -0,0 +1,5 @@ +process { + ext.args = '--id 0.985' + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + +} \ No newline at end of file diff --git a/tests/modules/vsearch/usearchglobal/test.yml b/tests/modules/vsearch/usearchglobal/test.yml new file mode 100644 index 00000000..b013519d --- /dev/null +++ b/tests/modules/vsearch/usearchglobal/test.yml @@ -0,0 +1,7 @@ +- name: vsearch usearchglobal test_vsearch_usearchglobal + command: nextflow run ./tests/modules/vsearch/usearchglobal -entry test_vsearch_usearchglobal -c ./tests/config/nextflow.config -c ./tests/modules/vsearch/usearchglobal/nextflow.config + tags: + - vsearch/usearchglobal + files: + - path: output/vsearch/blast6out_results.tsv + md5sum: 09733131643f1d951321a6e17a35eb8c diff --git a/tests/modules/vsearch/usearchglobal/test.yml.ori b/tests/modules/vsearch/usearchglobal/test.yml.ori new file mode 100644 index 00000000..df644165 --- /dev/null +++ b/tests/modules/vsearch/usearchglobal/test.yml.ori @@ -0,0 +1,14 @@ +## TODO nf-core: Please run the following command to build this file: +# nf-core modules create-test-yml vsearch/usearchglobal +- name: "vsearch usearchglobal" + command: nextflow run ./tests/modules/vsearch/usearchglobal -entry test_vsearch_usearchglobal -c ./tests/config/nextflow.config -c ./tests/modules/vsearch/usearchglobal/nextflow.config + tags: + - "vsearch" + # + - "vsearch/usearchglobal" + # + files: + - path: "output/vsearch/blast6out_results.tsv" + md5sum: e667c7caad0bc4b7ac383fd023c654fc + - path: output/vsearch/versions.yml + md5sum: a01fe51bc4c6a3a6226fbf77b2c7cf3b From 06a3bff20135aa898d373ef410fa96809fd78aef Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Mon, 23 May 2022 12:59:48 +0200 Subject: [PATCH 017/168] fix lint error --- tests/modules/bcftools/roh/nextflow.config | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/modules/bcftools/roh/nextflow.config b/tests/modules/bcftools/roh/nextflow.config index 50f50a7a..8730f1c4 100644 --- a/tests/modules/bcftools/roh/nextflow.config +++ b/tests/modules/bcftools/roh/nextflow.config @@ -1,5 +1,5 @@ process { publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } - -} \ No newline at end of file + +} From 853b76d16a263b314e5807f675d532b5cc1a68d6 Mon Sep 17 00:00:00 2001 From: jtangrot Date: Mon, 23 May 2022 14:02:10 +0200 Subject: [PATCH 018/168] Add options for output file type --- modules/vsearch/usearchglobal/main.nf | 38 +++++++++++++++---- tests/modules/vsearch/usearchglobal/main.nf | 14 ++++++- .../vsearch/usearchglobal/nextflow.config | 4 +- tests/modules/vsearch/usearchglobal/test.yml | 14 ++++++- 4 files changed, 58 insertions(+), 12 deletions(-) diff --git a/modules/vsearch/usearchglobal/main.nf b/modules/vsearch/usearchglobal/main.nf index 65ee62cc..0b31842f 100644 --- a/modules/vsearch/usearchglobal/main.nf +++ b/modules/vsearch/usearchglobal/main.nf @@ -10,7 +10,7 @@ // bwa mem | samtools view -B -T ref.fasta process VSEARCH_USEARCHGLOBAL { - tag '$queryfasta' + tag "$meta.id" label 'process_low' conda (params.enable_conda ? "bioconda::vsearch=2.21.1" : null) @@ -19,29 +19,53 @@ process VSEARCH_USEARCHGLOBAL { 'quay.io/biocontainers/vsearch:2.21.1--h95f258a_0' }" input: - path queryfasta + tuple val(meta), path(queryfasta) path db - val outprefix + val outoption + val user_columns // TODO nf-core: Where applicable please provide/convert compressed files as input/output // e.g. "*.fastq.gz" and NOT "*.fastq", "*.bam" and NOT "*.sam" etc. output: - path ("*.tsv") , emit: tsv - path "versions.yml" , emit: versions - + tuple val(meta), path('*.aln') , optional: true, emit: aln + tuple val(meta), path('*.biom') , optional: true, emit: biom + tuple val(meta), path('*.sam') , optional: true, emit: sam + tuple val(meta), path('*.tsv') , optional: true, emit: tsv + tuple val(meta), path('*.uc') , optional: true, emit: uc + path "versions.yml" , emit: versions + when: task.ext.when == null || task.ext.when script: def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def columns = user_columns ? "--userfields ${user_columns}" : '' + switch ( outoption ) { + case "alnout": outfmt = "--alnout"; out_ext = 'aln'; break + case "biomout": outfmt = "--biomout"; out_ext = 'biom'; break + case "blast6out": outfmt = "--blast6out"; out_ext = 'blast6out.tsv'; break + case "mothur_shared_out": outfmt = "--mothur_shared_out"; out_ext = 'mothur.tsv'; break + case "otutabout": outfmt = "--otutabout"; out_ext = 'otu.tsv'; break + case "samout": outfmt = "--samout"; out_ext = 'sam'; break + case "uc": outfmt = "--uc"; out_ext = 'uc'; break + case "userout": outfmt = "--userout"; out_ext = 'user.tsv'; break + case "lcaout": outfmt = "--lcaout"; out_ext = 'lca.tsv'; break + default: + outfmt = "--alnout"; + out_ext = 'aln'; + log.warn("Unknown output file format provided (${outoption}): selectingpairwise alignments (alnout)"); + break + } """ vsearch \\ --usearch_global $queryfasta \\ --db $db \\ --threads $task.cpus \\ $args \\ - --blast6out ${outprefix}.tsv + ${columns} \\ + ${outfmt} ${prefix}.${out_ext} cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/tests/modules/vsearch/usearchglobal/main.nf b/tests/modules/vsearch/usearchglobal/main.nf index 02becd21..cdf0fb2f 100644 --- a/tests/modules/vsearch/usearchglobal/main.nf +++ b/tests/modules/vsearch/usearchglobal/main.nf @@ -8,5 +8,17 @@ workflow test_vsearch_usearchglobal { query = file(params.test_data['sarscov2']['genome']['transcriptome_fasta'], checkIfExists: true) db = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) - VSEARCH_USEARCHGLOBAL ( query, db, "blast6out_results" ) + + outoption = "xcfert" // Nonsense text to check default case. + columns = "" + VSEARCH_USEARCHGLOBAL ( [ [id:'test'], query ], db, outoption, columns ) +} + +workflow test_vsearch_usearchglobal_userout { + + query = file(params.test_data['sarscov2']['genome']['transcriptome_fasta'], checkIfExists: true) + db = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + outoption = "userout" + columns = "query+target+id" + VSEARCH_USEARCHGLOBAL ( [ [id:'test'], query ], db, outoption, columns ) } diff --git a/tests/modules/vsearch/usearchglobal/nextflow.config b/tests/modules/vsearch/usearchglobal/nextflow.config index aab29979..3148a59e 100644 --- a/tests/modules/vsearch/usearchglobal/nextflow.config +++ b/tests/modules/vsearch/usearchglobal/nextflow.config @@ -1,5 +1,5 @@ process { ext.args = '--id 0.985' publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } - -} \ No newline at end of file +} + diff --git a/tests/modules/vsearch/usearchglobal/test.yml b/tests/modules/vsearch/usearchglobal/test.yml index b013519d..b7ebec4a 100644 --- a/tests/modules/vsearch/usearchglobal/test.yml +++ b/tests/modules/vsearch/usearchglobal/test.yml @@ -1,7 +1,17 @@ - name: vsearch usearchglobal test_vsearch_usearchglobal command: nextflow run ./tests/modules/vsearch/usearchglobal -entry test_vsearch_usearchglobal -c ./tests/config/nextflow.config -c ./tests/modules/vsearch/usearchglobal/nextflow.config tags: + - vsearch - vsearch/usearchglobal files: - - path: output/vsearch/blast6out_results.tsv - md5sum: 09733131643f1d951321a6e17a35eb8c + - path: output/vsearch/test.aln + md5sum: 7b7479c16e0ecb503913da8bde48d6c5 + +- name: vsearch usearchglobal test_vsearch_usearchglobal_userout + command: nextflow run ./tests/modules/vsearch/usearchglobal -entry test_vsearch_usearchglobal_userout -c ./tests/config/nextflow.config -c ./tests/modules/vsearch/usearchglobal/nextflow.config + tags: + - vsearch + - vsearch/usearchglobal + files: + - path: output/vsearch/test.user.tsv + md5sum: b6cc50f7c8d18cb82e74dab70ed4baab From 5db94ea0ee8f80319e307ecf89b121e82e8c2ba8 Mon Sep 17 00:00:00 2001 From: SusiJo Date: Tue, 24 May 2022 10:17:41 +0200 Subject: [PATCH 019/168] changed tag of module --- modules/cnvkit/reference/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/cnvkit/reference/main.nf b/modules/cnvkit/reference/main.nf index 1f0b20d8..992d768f 100644 --- a/modules/cnvkit/reference/main.nf +++ b/modules/cnvkit/reference/main.nf @@ -1,5 +1,5 @@ process CNVKIT_REFERENCE { - tag "$reference" + tag "$fasta" label 'process_low' conda (params.enable_conda ? "bioconda::cnvkit=0.9.9" : null) From 01fb8851c3e778052d342059736ecc09abab2e0d Mon Sep 17 00:00:00 2001 From: Matthias De Smet <11850640+matthdsm@users.noreply.github.com> Date: Tue, 24 May 2022 11:47:58 +0200 Subject: [PATCH 020/168] mosdept: cram compatiblility --- modules/mosdepth/main.nf | 33 +++-------- modules/mosdepth/meta.yml | 13 +++-- tests/modules/mosdepth/main.nf | 55 +++++++++++------- tests/modules/mosdepth/test.yml | 98 +++++++++++++++++++-------------- 4 files changed, 106 insertions(+), 93 deletions(-) diff --git a/modules/mosdepth/main.nf b/modules/mosdepth/main.nf index ff91e06f..894df1ae 100644 --- a/modules/mosdepth/main.nf +++ b/modules/mosdepth/main.nf @@ -10,13 +10,13 @@ process MOSDEPTH { input: tuple val(meta), path(bam), path(bai) path bed - val window_size + path fasta output: tuple val(meta), path('*.global.dist.txt') , emit: global_txt tuple val(meta), path('*.region.dist.txt') , emit: regions_txt , optional:true tuple val(meta), path('*.summary.txt') , emit: summary_txt - tuple val(meta), path('*.per-base.d4') , emit: d4 , optional:true + tuple val(meta), path('*.per-base.d4') , emit: per_base_d4 , optional:true tuple val(meta), path('*.per-base.bed.gz') , emit: per_base_bed, optional:true tuple val(meta), path('*.per-base.bed.gz.csi'), emit: per_base_csi, optional:true tuple val(meta), path('*.regions.bed.gz') , emit: regions_bed , optional:true @@ -29,36 +29,17 @@ process MOSDEPTH { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - if (window_size) { - interval = "--by ${window_size}" - } else if ( bed ) { - interval = "--by ${bed}" - } else { - interval = "" - } + def reference = fasta ? "--fasta ${fasta}" : "" + def interval = bed ? "--by ${bed}" : "" + """ mosdepth \\ + --threads ${task.cpus} \\ $interval \\ + $reference \\ $args \\ $prefix \\ $bam - cat <<-END_VERSIONS > versions.yml - "${task.process}": - mosdepth: \$(mosdepth --version 2>&1 | sed 's/^.*mosdepth //; s/ .*\$//') - END_VERSIONS - """ - - stub: - def prefix = task.ext.prefix ?: "${meta.id}" - """ - touch ${prefix}.global.dist.txt - touch ${prefix}.region.dist.txt - touch ${prefix}.summary.txt - touch ${prefix}.per-base.d4 - touch ${prefix}.per-base.bed.gz - touch ${prefix}.per-base.bed.gz.csi - touch ${prefix}.regions.bed.gz - touch ${prefix}.regions.bed.gz.csi cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/mosdepth/meta.yml b/modules/mosdepth/meta.yml index 636e966b..6e9e34c9 100644 --- a/modules/mosdepth/meta.yml +++ b/modules/mosdepth/meta.yml @@ -30,10 +30,10 @@ input: type: file description: BED file with intersected intervals pattern: "*.{bed}" - - window_size: - type: integer - description: Window size - pattern: "[0-9]+" + - fasta: + type: file + description: Reference genome FASTA file + pattern: "*.{fa,fasta}" output: - meta: type: map @@ -60,6 +60,10 @@ output: type: file description: Index file for BED file with per-base coverage pattern: "*.{per-base.bed.gz.csi}" + - per_base_d4: + type: file + description: D4 file with per-base coverage + pattern: "*.{per-base.d4}" - regions_bed: type: file description: BED file with per-region coverage @@ -76,3 +80,4 @@ authors: - "@joseespinosa" - "@drpatelh" - "@ramprasadn" + - "@matthdsm" diff --git a/tests/modules/mosdepth/main.nf b/tests/modules/mosdepth/main.nf index ddd68129..86ca0e86 100644 --- a/tests/modules/mosdepth/main.nf +++ b/tests/modules/mosdepth/main.nf @@ -5,32 +5,45 @@ nextflow.enable.dsl = 2 include { MOSDEPTH } from '../../../modules/mosdepth/main.nf' workflow test_mosdepth { - input = [ [ id:'test', single_end:true ], - [ file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) ], - [ file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true) ] - ] + input = [ + [ id:'test', single_end:true ], + [ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) ], + [ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true) ] + ] MOSDEPTH ( input, [], [] ) } - -workflow test_mosdepth_window { - input = [ [ id:'test', single_end:true ], - [ file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) ], - [ file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true) ] - ] - window = 100 - - MOSDEPTH ( input, [], window ) -} - - workflow test_mosdepth_bed { - input = [ [ id:'test', single_end:true ], - [ file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) ], - [ file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true) ] - ] - bed = [ file(params.test_data['sarscov2']['genome']['test_bed'], checkIfExists: true) ] + input = [ + [ id:'test', single_end:true ], + [ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) ], + [ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true) ] + ] + bed = [ file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true) ] MOSDEPTH ( input, bed, [] ) } + +workflow test_mosdepth_cram { + input = [ + [ id:'test', single_end:true ], + [ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true) ], + [ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram_crai'], checkIfExists: true) ] + ] + fasta = [ file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) ] + + MOSDEPTH ( input, [], fasta ) +} + +workflow test_mosdepth_cram_bed { + input = [ + [ id:'test', single_end:true ], + [ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true) ], + [ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram_crai'], checkIfExists: true) ] + ] + bed = [ file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true) ] + fasta = [ file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) ] + + MOSDEPTH ( input, bed, fasta ) +} diff --git a/tests/modules/mosdepth/test.yml b/tests/modules/mosdepth/test.yml index c66e0b89..2cacb185 100644 --- a/tests/modules/mosdepth/test.yml +++ b/tests/modules/mosdepth/test.yml @@ -1,53 +1,67 @@ -- name: mosdepth - command: nextflow run ./tests/modules/mosdepth -entry test_mosdepth -c ./tests/config/nextflow.config -c ./tests/modules/mosdepth/nextflow.config +- name: mosdepth test_mosdepth + command: nextflow run ./tests/modules/mosdepth -entry test_mosdepth -c ./tests/config/nextflow.config -c ./tests/modules/mosdepth/nextflow.config tags: - mosdepth files: - - path: ./output/mosdepth/test.per-base.bed.gz.csi - md5sum: b2aad62c41a7146680d31df505fcc8c5 - - path: ./output/mosdepth/test.per-base.bed.gz - md5sum: 11b3f649072c2c7453febb085b1a9c33 - - path: ./output/mosdepth/test.mosdepth.global.dist.txt - md5sum: 2a1de1b0ecc361a21cd296ec4e1efd6a - - path: ./output/mosdepth/test.mosdepth.summary.txt - md5sum: 7b249dd3b3e58cc122fbd25ea84aa25d + - path: output/mosdepth/test.mosdepth.global.dist.txt + md5sum: e82e90c7d508a135b5a8a7cd6933452e + - path: output/mosdepth/test.mosdepth.summary.txt + md5sum: 4f0d231060cbde4efdd673863bd2fb59 + - path: output/mosdepth/test.per-base.bed.gz + md5sum: bc1df47d46f818fee5275975925d769a + - path: output/mosdepth/test.per-base.bed.gz.csi + md5sum: 9e649ac749ff6c6073bef5ab63e8aaa4 -- name: mosdepth window - command: nextflow run ./tests/modules/mosdepth -entry test_mosdepth_window -c ./tests/config/nextflow.config -c ./tests/modules/mosdepth/nextflow.config +- name: mosdepth test_mosdepth_bed + command: nextflow run ./tests/modules/mosdepth -entry test_mosdepth_bed -c ./tests/config/nextflow.config -c ./tests/modules/mosdepth/nextflow.config tags: - mosdepth files: - - path: ./output/mosdepth/test.per-base.bed.gz.csi - md5sum: b2aad62c41a7146680d31df505fcc8c5 - - path: ./output/mosdepth/test.per-base.bed.gz - md5sum: 11b3f649072c2c7453febb085b1a9c33 - - path: ./output/mosdepth/test.mosdepth.global.dist.txt - md5sum: 2a1de1b0ecc361a21cd296ec4e1efd6a - - path: ./output/mosdepth/test.regions.bed.gz - md5sum: 64e1ced01c4443d7c1796ef553992f0c - - path: ./output/mosdepth/test.regions.bed.gz.csi - md5sum: 9e312b4b0784bd46dfbd23b3a8afed6a - - path: ./output/mosdepth/test.mosdepth.region.dist.txt - md5sum: 65fbc824c4212c6884354d8ac72ad37e - - path: ./output/mosdepth/test.mosdepth.summary.txt - md5sum: 11804907dab069ddb99ca97bf2698572 + - path: output/mosdepth/test.mosdepth.global.dist.txt + md5sum: e82e90c7d508a135b5a8a7cd6933452e + - path: output/mosdepth/test.mosdepth.region.dist.txt + md5sum: e82e90c7d508a135b5a8a7cd6933452e + - path: output/mosdepth/test.mosdepth.summary.txt + md5sum: 96c037f769974b904beb53edc4f56d82 + - path: output/mosdepth/test.per-base.bed.gz + md5sum: bc1df47d46f818fee5275975925d769a + - path: output/mosdepth/test.per-base.bed.gz.csi + md5sum: 9e649ac749ff6c6073bef5ab63e8aaa4 + - path: output/mosdepth/test.regions.bed.gz + md5sum: 5d398caf7171ec4406278e2add3009ae + - path: output/mosdepth/test.regions.bed.gz.csi + md5sum: 47669cfe41f3e222e74d81e1b1be191f -- name: mosdepth bed - command: nextflow run ./tests/modules/mosdepth -entry test_mosdepth_bed -c ./tests/config/nextflow.config -c ./tests/modules/mosdepth/nextflow.config +- name: mosdepth test_mosdepth_cram + command: nextflow run ./tests/modules/mosdepth -entry test_mosdepth_cram -c ./tests/config/nextflow.config -c ./tests/modules/mosdepth/nextflow.config tags: - mosdepth files: - - path: ./output/mosdepth/test.per-base.bed.gz.csi - md5sum: b2aad62c41a7146680d31df505fcc8c5 - - path: ./output/mosdepth/test.per-base.bed.gz - md5sum: 11b3f649072c2c7453febb085b1a9c33 - - path: ./output/mosdepth/test.mosdepth.global.dist.txt - md5sum: 2a1de1b0ecc361a21cd296ec4e1efd6a - - path: ./output/mosdepth/test.regions.bed.gz - md5sum: 347f877700d1dc42c95157199eff25d5 - - path: ./output/mosdepth/test.regions.bed.gz.csi - md5sum: ed5fbf46e3bdcbf60094df295bc40356 - - path: ./output/mosdepth/test.mosdepth.region.dist.txt - md5sum: 295564628113d2ec0ca34d7f661cfea8 - - path: ./output/mosdepth/test.mosdepth.summary.txt - md5sum: b07817412fd17819c14541e63bc4926c + - path: output/mosdepth/test.mosdepth.global.dist.txt + md5sum: e82e90c7d508a135b5a8a7cd6933452e + - path: output/mosdepth/test.mosdepth.summary.txt + md5sum: 4f0d231060cbde4efdd673863bd2fb59 + - path: output/mosdepth/test.per-base.bed.gz + md5sum: bc1df47d46f818fee5275975925d769a + - path: output/mosdepth/test.per-base.bed.gz.csi + md5sum: 9e649ac749ff6c6073bef5ab63e8aaa4 + +- name: mosdepth test_mosdepth_cram_bed + command: nextflow run ./tests/modules/mosdepth -entry test_mosdepth_cram_bed -c ./tests/config/nextflow.config -c ./tests/modules/mosdepth/nextflow.config + tags: + - mosdepth + files: + - path: output/mosdepth/test.mosdepth.global.dist.txt + md5sum: e82e90c7d508a135b5a8a7cd6933452e + - path: output/mosdepth/test.mosdepth.region.dist.txt + md5sum: e82e90c7d508a135b5a8a7cd6933452e + - path: output/mosdepth/test.mosdepth.summary.txt + md5sum: 96c037f769974b904beb53edc4f56d82 + - path: output/mosdepth/test.per-base.bed.gz + md5sum: bc1df47d46f818fee5275975925d769a + - path: output/mosdepth/test.per-base.bed.gz.csi + md5sum: 9e649ac749ff6c6073bef5ab63e8aaa4 + - path: output/mosdepth/test.regions.bed.gz + md5sum: 5d398caf7171ec4406278e2add3009ae + - path: output/mosdepth/test.regions.bed.gz.csi + md5sum: 47669cfe41f3e222e74d81e1b1be191f From 29a4f112be0a28006af99564a149711a9e08af91 Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Tue, 24 May 2022 11:14:32 +0100 Subject: [PATCH 021/168] Update main.nf --- modules/cnvkit/reference/main.nf | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/modules/cnvkit/reference/main.nf b/modules/cnvkit/reference/main.nf index 992d768f..71d9ed28 100644 --- a/modules/cnvkit/reference/main.nf +++ b/modules/cnvkit/reference/main.nf @@ -8,20 +8,19 @@ process CNVKIT_REFERENCE { 'quay.io/biocontainers/cnvkit:0.9.9--pyhdfd78af_0' }" input: - path fasta - path targets - path antitargets + path fasta + path targets + path antitargets output: - path("*.cnn") , emit: cnn - path "versions.yml" , emit: versions + path "*.cnn" , emit: cnn + path "versions.yml", emit: versions when: task.ext.when == null || task.ext.when script: def args = task.ext.args ?: '' - """ cnvkit.py \\ reference \\ From b5850fd47157627b43fe6c80fd9defb097df893e Mon Sep 17 00:00:00 2001 From: Matthias De Smet <11850640+matthdsm@users.noreply.github.com> Date: Tue, 24 May 2022 12:16:03 +0200 Subject: [PATCH 022/168] add stub --- modules/mosdepth/main.nf | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/modules/mosdepth/main.nf b/modules/mosdepth/main.nf index 894df1ae..1f503c6b 100644 --- a/modules/mosdepth/main.nf +++ b/modules/mosdepth/main.nf @@ -46,4 +46,22 @@ process MOSDEPTH { mosdepth: \$(mosdepth --version 2>&1 | sed 's/^.*mosdepth //; s/ .*\$//') END_VERSIONS """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.global.dist.txt + touch ${prefix}.region.dist.txt + touch ${prefix}.summary.txt + touch ${prefix}.per-base.d4 + touch ${prefix}.per-base.bed.gz + touch ${prefix}.per-base.bed.gz.csi + touch ${prefix}.regions.bed.gz + touch ${prefix}.regions.bed.gz.csi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + mosdepth: \$(mosdepth --version 2>&1 | sed 's/^.*mosdepth //; s/ .*\$//') + END_VERSIONS + """ } From 7e777c6626064d7463ecbf91c2c599db85eec647 Mon Sep 17 00:00:00 2001 From: Matthias De Smet <11850640+matthdsm@users.noreply.github.com> Date: Tue, 24 May 2022 12:25:31 +0200 Subject: [PATCH 023/168] add error is is provided twice --- modules/mosdepth/main.nf | 3 +++ 1 file changed, 3 insertions(+) diff --git a/modules/mosdepth/main.nf b/modules/mosdepth/main.nf index 1f503c6b..a5218286 100644 --- a/modules/mosdepth/main.nf +++ b/modules/mosdepth/main.nf @@ -31,6 +31,9 @@ process MOSDEPTH { def prefix = task.ext.prefix ?: "${meta.id}" def reference = fasta ? "--fasta ${fasta}" : "" def interval = bed ? "--by ${bed}" : "" + if (bed && ext.args.contains("--by")) { + exit 1, "--by can only be specified once." + } """ mosdepth \\ From 858cbe80c67093a39486bbb545b91c95077615bd Mon Sep 17 00:00:00 2001 From: Matthias De Smet <11850640+matthdsm@users.noreply.github.com> Date: Tue, 24 May 2022 12:43:49 +0200 Subject: [PATCH 024/168] add must fail test --- modules/mosdepth/main.nf | 2 +- tests/modules/mosdepth/main.nf | 14 +++++++++++++- tests/modules/mosdepth/nextflow.config | 4 +++- tests/modules/mosdepth/test.yml | 6 ++++++ 4 files changed, 23 insertions(+), 3 deletions(-) diff --git a/modules/mosdepth/main.nf b/modules/mosdepth/main.nf index a5218286..39586797 100644 --- a/modules/mosdepth/main.nf +++ b/modules/mosdepth/main.nf @@ -31,7 +31,7 @@ process MOSDEPTH { def prefix = task.ext.prefix ?: "${meta.id}" def reference = fasta ? "--fasta ${fasta}" : "" def interval = bed ? "--by ${bed}" : "" - if (bed && ext.args.contains("--by")) { + if (bed && args.contains("--by")) { exit 1, "--by can only be specified once." } diff --git a/tests/modules/mosdepth/main.nf b/tests/modules/mosdepth/main.nf index 86ca0e86..eab47e89 100644 --- a/tests/modules/mosdepth/main.nf +++ b/tests/modules/mosdepth/main.nf @@ -2,7 +2,8 @@ nextflow.enable.dsl = 2 -include { MOSDEPTH } from '../../../modules/mosdepth/main.nf' +include { MOSDEPTH } from '../../../modules/mosdepth/main.nf' +include { MOSDEPTH as MOSDEPTH_FAIL } from '../../../modules/mosdepth/main.nf' workflow test_mosdepth { input = [ @@ -47,3 +48,14 @@ workflow test_mosdepth_cram_bed { MOSDEPTH ( input, bed, fasta ) } + +workflow test_mosdepth_fail { + input = [ + [ id:'test', single_end:true ], + [ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) ], + [ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true) ] + ] + bed = [ file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true) ] + + MOSDEPTH_FAIL ( input, bed, [] ) +} diff --git a/tests/modules/mosdepth/nextflow.config b/tests/modules/mosdepth/nextflow.config index 8730f1c4..85674a3c 100644 --- a/tests/modules/mosdepth/nextflow.config +++ b/tests/modules/mosdepth/nextflow.config @@ -1,5 +1,7 @@ process { publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } - + withName: MOSDEPTH_FAIL { + ext.args = "--by 100" + } } diff --git a/tests/modules/mosdepth/test.yml b/tests/modules/mosdepth/test.yml index 2cacb185..4eaf9bf1 100644 --- a/tests/modules/mosdepth/test.yml +++ b/tests/modules/mosdepth/test.yml @@ -65,3 +65,9 @@ md5sum: 5d398caf7171ec4406278e2add3009ae - path: output/mosdepth/test.regions.bed.gz.csi md5sum: 47669cfe41f3e222e74d81e1b1be191f + +- name: mosdepth test_mosdepth_fail + command: nextflow run ./tests/modules/mosdepth -entry test_mosdepth_fail -c ./tests/config/nextflow.config -c ./tests/modules/mosdepth/nextflow.config + tags: + - mosdepth + exitcode: 1 From 5bc2d419a94ec8b916739205961c115f62eec9c6 Mon Sep 17 00:00:00 2001 From: Matthias De Smet <11850640+matthdsm@users.noreply.github.com> Date: Tue, 24 May 2022 13:23:00 +0200 Subject: [PATCH 025/168] fix fail test --- tests/modules/mosdepth/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/modules/mosdepth/test.yml b/tests/modules/mosdepth/test.yml index 4eaf9bf1..a927396c 100644 --- a/tests/modules/mosdepth/test.yml +++ b/tests/modules/mosdepth/test.yml @@ -70,4 +70,4 @@ command: nextflow run ./tests/modules/mosdepth -entry test_mosdepth_fail -c ./tests/config/nextflow.config -c ./tests/modules/mosdepth/nextflow.config tags: - mosdepth - exitcode: 1 + exit_code: 1 From a2a8e09762a5aec8985f0d93125eafe3a6d2d365 Mon Sep 17 00:00:00 2001 From: Matthias De Smet <11850640+matthdsm@users.noreply.github.com> Date: Tue, 24 May 2022 13:45:52 +0200 Subject: [PATCH 026/168] Update modules/mosdepth/main.nf Co-authored-by: Harshil Patel --- modules/mosdepth/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/mosdepth/main.nf b/modules/mosdepth/main.nf index 39586797..5f6538e7 100644 --- a/modules/mosdepth/main.nf +++ b/modules/mosdepth/main.nf @@ -32,7 +32,7 @@ process MOSDEPTH { def reference = fasta ? "--fasta ${fasta}" : "" def interval = bed ? "--by ${bed}" : "" if (bed && args.contains("--by")) { - exit 1, "--by can only be specified once." + exit 1, "'--by' can only be specified once when running mosdepth! Either remove input BED file definition or remove '--by' from 'ext.args' definition" } """ From 749edce06945870a708397e37be4042048aaeb50 Mon Sep 17 00:00:00 2001 From: SusiJo Date: Tue, 24 May 2022 14:20:19 +0200 Subject: [PATCH 027/168] add prefix --- modules/cnvkit/reference/main.nf | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/modules/cnvkit/reference/main.nf b/modules/cnvkit/reference/main.nf index 992d768f..bf4b3c21 100644 --- a/modules/cnvkit/reference/main.nf +++ b/modules/cnvkit/reference/main.nf @@ -21,6 +21,7 @@ process CNVKIT_REFERENCE { script: def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" """ cnvkit.py \\ @@ -28,7 +29,7 @@ process CNVKIT_REFERENCE { --fasta $fasta \\ --targets $targets \\ --antitargets $antitargets \\ - --output reference.cnn \\ + --output ${prefix}.reference.cnn \\ $args cat <<-END_VERSIONS > versions.yml From 8381a5e5dd73228b3fe744e51f1964d62c28c1e5 Mon Sep 17 00:00:00 2001 From: Matthias De Smet <11850640+matthdsm@users.noreply.github.com> Date: Tue, 24 May 2022 14:43:40 +0200 Subject: [PATCH 028/168] add window test --- tests/modules/mosdepth/main.nf | 16 ++++++++++++++-- tests/modules/mosdepth/nextflow.config | 3 +++ tests/modules/mosdepth/test.yml | 20 ++++++++++++++++++++ 3 files changed, 37 insertions(+), 2 deletions(-) diff --git a/tests/modules/mosdepth/main.nf b/tests/modules/mosdepth/main.nf index eab47e89..96a9ef20 100644 --- a/tests/modules/mosdepth/main.nf +++ b/tests/modules/mosdepth/main.nf @@ -2,8 +2,9 @@ nextflow.enable.dsl = 2 -include { MOSDEPTH } from '../../../modules/mosdepth/main.nf' -include { MOSDEPTH as MOSDEPTH_FAIL } from '../../../modules/mosdepth/main.nf' +include { MOSDEPTH } from '../../../modules/mosdepth/main.nf' +include { MOSDEPTH as MOSDEPTH_FAIL } from '../../../modules/mosdepth/main.nf' +include { MOSDEPTH as MOSDEPTH_WINDOW } from '../../../modules/mosdepth/main.nf' workflow test_mosdepth { input = [ @@ -49,6 +50,17 @@ workflow test_mosdepth_cram_bed { MOSDEPTH ( input, bed, fasta ) } +workflow test_mosdepth_window { + input = [ + [ id:'test', single_end:true ], + [ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) ], + [ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true) ] + ] + bed = [ file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true) ] + + MOSDEPTH_WINDOW ( input, [], [] ) +} + workflow test_mosdepth_fail { input = [ [ id:'test', single_end:true ], diff --git a/tests/modules/mosdepth/nextflow.config b/tests/modules/mosdepth/nextflow.config index 85674a3c..4a6153e6 100644 --- a/tests/modules/mosdepth/nextflow.config +++ b/tests/modules/mosdepth/nextflow.config @@ -4,4 +4,7 @@ process { withName: MOSDEPTH_FAIL { ext.args = "--by 100" } + withName: MOSDEPTH_WINDOW { + ext.args = "--by 100" + } } diff --git a/tests/modules/mosdepth/test.yml b/tests/modules/mosdepth/test.yml index a927396c..a3115d6e 100644 --- a/tests/modules/mosdepth/test.yml +++ b/tests/modules/mosdepth/test.yml @@ -66,6 +66,26 @@ - path: output/mosdepth/test.regions.bed.gz.csi md5sum: 47669cfe41f3e222e74d81e1b1be191f +- name: mosdepth test_mosdepth_window + command: nextflow run ./tests/modules/mosdepth -entry test_mosdepth_window -c ./tests/config/nextflow.config -c ./tests/modules/mosdepth/nextflow.config + tags: + - mosdepth + files: + - path: output/mosdepth/test.mosdepth.global.dist.txt + md5sum: e82e90c7d508a135b5a8a7cd6933452e + - path: output/mosdepth/test.mosdepth.region.dist.txt + md5sum: 39e0e707ec32feb5176fd20a95f1f468 + - path: output/mosdepth/test.mosdepth.summary.txt + md5sum: 96c037f769974b904beb53edc4f56d82 + - path: output/mosdepth/test.per-base.bed.gz + md5sum: bc1df47d46f818fee5275975925d769a + - path: output/mosdepth/test.per-base.bed.gz.csi + md5sum: 9e649ac749ff6c6073bef5ab63e8aaa4 + - path: output/mosdepth/test.regions.bed.gz + md5sum: f02e2cb49cc050e13d76942d6960827a + - path: output/mosdepth/test.regions.bed.gz.csi + md5sum: 257d67678136963d9dd904330079609d + - name: mosdepth test_mosdepth_fail command: nextflow run ./tests/modules/mosdepth -entry test_mosdepth_fail -c ./tests/config/nextflow.config -c ./tests/modules/mosdepth/nextflow.config tags: From 8208140d21f3a754fff2e177db7a0e570fa2af6e Mon Sep 17 00:00:00 2001 From: SusiJo Date: Tue, 24 May 2022 14:59:52 +0200 Subject: [PATCH 029/168] missing meta.id changed --- modules/cnvkit/reference/main.nf | 2 +- tests/modules/cnvkit/reference/test.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/cnvkit/reference/main.nf b/modules/cnvkit/reference/main.nf index d5b8baa0..10458f27 100644 --- a/modules/cnvkit/reference/main.nf +++ b/modules/cnvkit/reference/main.nf @@ -21,7 +21,7 @@ process CNVKIT_REFERENCE { script: def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" + def prefix = task.ext.prefix ?: targets.BaseName """ cnvkit.py \\ diff --git a/tests/modules/cnvkit/reference/test.yml b/tests/modules/cnvkit/reference/test.yml index b1b8c896..a5baf0a2 100644 --- a/tests/modules/cnvkit/reference/test.yml +++ b/tests/modules/cnvkit/reference/test.yml @@ -4,5 +4,5 @@ - cnvkit/reference - cnvkit files: - - path: output/cnvkit/reference.cnn + - path: output/cnvkit/multi_intervals.reference.cnn md5sum: 7c4a7902f5ab101b1f9d6038d331b3d9 From b0dba3ee7543dc89a00575fb619408de97cd889d Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Tue, 24 May 2022 16:17:15 +0200 Subject: [PATCH 030/168] update description --- modules/bcftools/roh/meta.yml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/modules/bcftools/roh/meta.yml b/modules/bcftools/roh/meta.yml index 09535fe8..fd03d4ce 100644 --- a/modules/bcftools/roh/meta.yml +++ b/modules/bcftools/roh/meta.yml @@ -1,8 +1,7 @@ name: "bcftools_roh" -## TODO nf-core: Add a description of the module and list keywords -description: write your description here +description: A program for detecting runs of homo/autozygosity. Only bi-allelic sites are considered. keywords: - - sort + - roh tools: - "roh": description: "A program for detecting runs of homo/autozygosity. Only bi-allelic sites are considered." From f1fc7fc38eb65782cdf2d05bc4d56a11ec8922a8 Mon Sep 17 00:00:00 2001 From: jvhagey Date: Tue, 24 May 2022 15:15:07 -0400 Subject: [PATCH 031/168] adding upzip --- modules/gamma/{ => gamma}/main.nf | 15 ++++++++-- modules/gamma/{ => gamma}/meta.yml | 0 tests/modules/gamma/gamma/main.nf | 29 +++++++++++++++++++ .../modules/gamma/{ => gamma}/nextflow.config | 0 tests/modules/gamma/gamma/test.yml | 29 +++++++++++++++++++ tests/modules/gamma/main.nf | 17 ----------- tests/modules/gamma/test.yml | 13 --------- 7 files changed, 71 insertions(+), 32 deletions(-) rename modules/gamma/{ => gamma}/main.nf (83%) rename modules/gamma/{ => gamma}/meta.yml (100%) create mode 100644 tests/modules/gamma/gamma/main.nf rename tests/modules/gamma/{ => gamma}/nextflow.config (100%) create mode 100644 tests/modules/gamma/gamma/test.yml delete mode 100644 tests/modules/gamma/main.nf delete mode 100644 tests/modules/gamma/test.yml diff --git a/modules/gamma/main.nf b/modules/gamma/gamma/main.nf similarity index 83% rename from modules/gamma/main.nf rename to modules/gamma/gamma/main.nf index e176ee68..51a5f6c8 100644 --- a/modules/gamma/main.nf +++ b/modules/gamma/gamma/main.nf @@ -26,13 +26,24 @@ process GAMMA { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" + """ - GAMMA.py \\ + if [[ ${fasta} == *.gz ]] + then + FNAME=\$(basename ${fasta} .gz) + gunzip -f ${fasta} + GAMMA.py \\ + $args \\ + "\${FNAME}" \\ + $db \\ + $prefix + else + GAMMA.py \\ $args \\ $fasta \\ $db \\ $prefix - + fi cat <<-END_VERSIONS > versions.yml "${task.process}": gamma: $VERSION diff --git a/modules/gamma/meta.yml b/modules/gamma/gamma/meta.yml similarity index 100% rename from modules/gamma/meta.yml rename to modules/gamma/gamma/meta.yml diff --git a/tests/modules/gamma/gamma/main.nf b/tests/modules/gamma/gamma/main.nf new file mode 100644 index 00000000..070a3bf1 --- /dev/null +++ b/tests/modules/gamma/gamma/main.nf @@ -0,0 +1,29 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { GAMMA } from '../../../../modules/gamma/gamma/main.nf' + +workflow test_unzip { + + input = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['bacteroides_fragilis']['illumina']['test1_contigs_fa_gz'], checkIfExists: true), + ] + + db = [ file("https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/srst2/ResGANNCBI_20210507_srst2.fasta", checkIfExists: true), ] + + GAMMA ( input, db ) +} + +workflow test_gamma { + + input = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + + db = [ file(params.test_data['sarscov2']['genome']['transcriptome_fasta'], checkIfExists: true) ] + + GAMMA ( input, db ) +} diff --git a/tests/modules/gamma/nextflow.config b/tests/modules/gamma/gamma/nextflow.config similarity index 100% rename from tests/modules/gamma/nextflow.config rename to tests/modules/gamma/gamma/nextflow.config diff --git a/tests/modules/gamma/gamma/test.yml b/tests/modules/gamma/gamma/test.yml new file mode 100644 index 00000000..308d890f --- /dev/null +++ b/tests/modules/gamma/gamma/test.yml @@ -0,0 +1,29 @@ +- name: gamma gamma test_unzip + command: nextflow run tests/modules/gamma/gamma -entry test_unzip -c tests/config/nextflow.config + tags: + - gamma/gamma + - gamma + files: + - path: /tmp/tmppm1ybfln/gamma/test.fasta + md5sum: 5b3b831d863fffaa3410a9ee7bfa12ce + - path: /tmp/tmppm1ybfln/gamma/test.gamma + md5sum: 46165a89e10b7315d3a9b0aa6c561626 + - path: /tmp/tmppm1ybfln/gamma/test.psl + md5sum: f489ce4602ddbcb692d5781ee3fbf449 + - path: /tmp/tmppm1ybfln/gamma/versions.yml + md5sum: ad23c3ec60663598543cbe8bc74bc4ab + +- name: gamma gamma test_gamma + command: nextflow run tests/modules/gamma/gamma -entry test_gamma -c tests/config/nextflow.config + tags: + - gamma/gamma + - gamma + files: + - path: /tmp/tmpw3chm6da/gamma/test.fasta + md5sum: df37b48466181311e0a679f3c5878484 + - path: /tmp/tmpw3chm6da/gamma/test.gamma + md5sum: 3256708fa517a65ed01d99e0e3c762ae + - path: /tmp/tmpw3chm6da/gamma/test.psl + md5sum: 162a2757ed3b167ae1e0cdb24213f940 + - path: /tmp/tmpw3chm6da/gamma/versions.yml + md5sum: 3fefb5b46c94993362243c5f9a472057 diff --git a/tests/modules/gamma/main.nf b/tests/modules/gamma/main.nf deleted file mode 100644 index f9477706..00000000 --- a/tests/modules/gamma/main.nf +++ /dev/null @@ -1,17 +0,0 @@ -#!/usr/bin/env nextflow - -nextflow.enable.dsl = 2 - -include { GAMMA } from '../../../modules/gamma/main.nf' - -workflow test_gamma { - - input = [ - [ id:'test', single_end:false ], // meta map - file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) - ] - - db = [ file(params.test_data['sarscov2']['genome']['transcriptome_fasta'], checkIfExists: true) ] - - GAMMA ( input, db ) -} diff --git a/tests/modules/gamma/test.yml b/tests/modules/gamma/test.yml deleted file mode 100644 index 1b493b49..00000000 --- a/tests/modules/gamma/test.yml +++ /dev/null @@ -1,13 +0,0 @@ -- name: gamma test_gamma - command: nextflow run tests/modules/gamma -entry test_gamma -c tests/config/nextflow.config - tags: - - gamma - files: - - path: output/gamma/test.fasta - md5sum: df37b48466181311e0a679f3c5878484 - - path: output/gamma/test.gamma - md5sum: 3256708fa517a65ed01d99e0e3c762ae - - path: output/gamma/test.psl - md5sum: 162a2757ed3b167ae1e0cdb24213f940 - - path: output/gamma/versions.yml - md5sum: 3fefb5b46c94993362243c5f9a472057 From b8aaea7429e74cb23acff21b03ba3814d7905ed1 Mon Sep 17 00:00:00 2001 From: jvhagey Date: Tue, 24 May 2022 16:27:00 -0400 Subject: [PATCH 032/168] updating paths in test.yml --- modules/gamma/gamma/main.nf | 2 +- modules/gamma/gamma/meta.yml | 3 ++- tests/modules/gamma/gamma/main.nf | 6 +++--- tests/modules/gamma/gamma/test.yml | 20 ++++++++++---------- 4 files changed, 16 insertions(+), 15 deletions(-) diff --git a/modules/gamma/gamma/main.nf b/modules/gamma/gamma/main.nf index 51a5f6c8..fddced62 100644 --- a/modules/gamma/gamma/main.nf +++ b/modules/gamma/gamma/main.nf @@ -1,6 +1,6 @@ def VERSION = '2.1' // Version information not provided by tool on CLI -process GAMMA { +process GAMMA_GAMMA { tag "$meta.id" label 'process_low' diff --git a/modules/gamma/gamma/meta.yml b/modules/gamma/gamma/meta.yml index 316b685b..d9e272e2 100644 --- a/modules/gamma/gamma/meta.yml +++ b/modules/gamma/gamma/meta.yml @@ -1,4 +1,4 @@ -name: "gamma" +name: "gamma_gamma" description: Gene Allele Mutation Microbial Assessment keywords: - gamma @@ -61,3 +61,4 @@ output: authors: - "@sateeshperi" - "@rastanton" + - "@jvhagey" diff --git a/tests/modules/gamma/gamma/main.nf b/tests/modules/gamma/gamma/main.nf index 070a3bf1..97e58be7 100644 --- a/tests/modules/gamma/gamma/main.nf +++ b/tests/modules/gamma/gamma/main.nf @@ -2,7 +2,7 @@ nextflow.enable.dsl = 2 -include { GAMMA } from '../../../../modules/gamma/gamma/main.nf' +include { GAMMA_GAMMA } from '../../../../modules/gamma/gamma/main.nf' workflow test_unzip { @@ -13,7 +13,7 @@ workflow test_unzip { db = [ file("https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/srst2/ResGANNCBI_20210507_srst2.fasta", checkIfExists: true), ] - GAMMA ( input, db ) + GAMMA_GAMMA ( input, db ) } workflow test_gamma { @@ -25,5 +25,5 @@ workflow test_gamma { db = [ file(params.test_data['sarscov2']['genome']['transcriptome_fasta'], checkIfExists: true) ] - GAMMA ( input, db ) + GAMMA_GAMMA ( input, db ) } diff --git a/tests/modules/gamma/gamma/test.yml b/tests/modules/gamma/gamma/test.yml index 308d890f..2c649105 100644 --- a/tests/modules/gamma/gamma/test.yml +++ b/tests/modules/gamma/gamma/test.yml @@ -4,14 +4,14 @@ - gamma/gamma - gamma files: - - path: /tmp/tmppm1ybfln/gamma/test.fasta + - path: output/gamma/gamma/test.fasta md5sum: 5b3b831d863fffaa3410a9ee7bfa12ce - - path: /tmp/tmppm1ybfln/gamma/test.gamma + - path: output/gamma/gamma/test.gamma md5sum: 46165a89e10b7315d3a9b0aa6c561626 - - path: /tmp/tmppm1ybfln/gamma/test.psl + - path: output/gamma/gamma/test.psl md5sum: f489ce4602ddbcb692d5781ee3fbf449 - - path: /tmp/tmppm1ybfln/gamma/versions.yml - md5sum: ad23c3ec60663598543cbe8bc74bc4ab + - path: output/gamma/gamma/versions.yml + md5sum: 8baafec7b3b87f788f69e30d317c9722 - name: gamma gamma test_gamma command: nextflow run tests/modules/gamma/gamma -entry test_gamma -c tests/config/nextflow.config @@ -19,11 +19,11 @@ - gamma/gamma - gamma files: - - path: /tmp/tmpw3chm6da/gamma/test.fasta + - path: output/gamma/gamma/test.fasta md5sum: df37b48466181311e0a679f3c5878484 - - path: /tmp/tmpw3chm6da/gamma/test.gamma + - path: output/gamma/gamma/test.gamma md5sum: 3256708fa517a65ed01d99e0e3c762ae - - path: /tmp/tmpw3chm6da/gamma/test.psl + - path: output/gamma/gamma/test.psl md5sum: 162a2757ed3b167ae1e0cdb24213f940 - - path: /tmp/tmpw3chm6da/gamma/versions.yml - md5sum: 3fefb5b46c94993362243c5f9a472057 + - path: output/gamma/gamma/versions.yml + md5sum: b75c2871d8cac2f8ac67c0fbd22babd6 From bb999395aa4d03d02336794c655cfcdf34855b05 Mon Sep 17 00:00:00 2001 From: jvhagey Date: Tue, 24 May 2022 19:19:38 -0400 Subject: [PATCH 033/168] fixed pytest_modules.yml --- tests/config/pytest_modules.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index 2063d691..80711bae 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -707,9 +707,9 @@ freebayes: - modules/freebayes/** - tests/modules/freebayes/** -gamma: - - modules/gamma/** - - tests/modules/gamma/** +gamma/gamma: + - modules/gamma/gamma/** + - tests/modules/gamma/gamma/** gatk4/applybqsr: - modules/gatk4/applybqsr/** From 127665d27ba5512bfe1596f86c2b11a197556233 Mon Sep 17 00:00:00 2001 From: jvhagey Date: Tue, 24 May 2022 19:27:09 -0400 Subject: [PATCH 034/168] fixed test.yml --- tests/modules/gamma/gamma/test.yml | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tests/modules/gamma/gamma/test.yml b/tests/modules/gamma/gamma/test.yml index 2c649105..163eda5f 100644 --- a/tests/modules/gamma/gamma/test.yml +++ b/tests/modules/gamma/gamma/test.yml @@ -4,13 +4,13 @@ - gamma/gamma - gamma files: - - path: output/gamma/gamma/test.fasta + - path: output/gamma/test.fasta md5sum: 5b3b831d863fffaa3410a9ee7bfa12ce - - path: output/gamma/gamma/test.gamma + - path: output/gamma/test.gamma md5sum: 46165a89e10b7315d3a9b0aa6c561626 - - path: output/gamma/gamma/test.psl + - path: output/gamma/test.psl md5sum: f489ce4602ddbcb692d5781ee3fbf449 - - path: output/gamma/gamma/versions.yml + - path: output/gamma/versions.yml md5sum: 8baafec7b3b87f788f69e30d317c9722 - name: gamma gamma test_gamma @@ -19,11 +19,11 @@ - gamma/gamma - gamma files: - - path: output/gamma/gamma/test.fasta + - path: output/gamma/test.fasta md5sum: df37b48466181311e0a679f3c5878484 - - path: output/gamma/gamma/test.gamma + - path: output/gamma/test.gamma md5sum: 3256708fa517a65ed01d99e0e3c762ae - - path: output/gamma/gamma/test.psl + - path: output/gamma/test.psl md5sum: 162a2757ed3b167ae1e0cdb24213f940 - - path: output/gamma/gamma/versions.yml + - path: output/gamma/versions.yml md5sum: b75c2871d8cac2f8ac67c0fbd22babd6 From c452f562890534ca3d0a331de7c35b744037fdae Mon Sep 17 00:00:00 2001 From: Mahesh Binzer-Panchal Date: Wed, 25 May 2022 12:29:14 +0000 Subject: [PATCH 035/168] Patch mash screen --- modules/mash/screen/main.nf | 8 ++++---- modules/mash/screen/meta.yml | 9 +++++---- tests/modules/mash/screen/main.nf | 9 ++++++--- tests/modules/mash/screen/test.yml | 10 +++++----- 4 files changed, 20 insertions(+), 16 deletions(-) diff --git a/modules/mash/screen/main.nf b/modules/mash/screen/main.nf index 3e7859ed..be272cbb 100644 --- a/modules/mash/screen/main.nf +++ b/modules/mash/screen/main.nf @@ -8,8 +8,8 @@ process MASH_SCREEN { 'quay.io/biocontainers/mash:2.3--he348c14_1' }" input: - tuple val(meta), path(query_sketch) - path fastx_db + tuple val(meta), path(query) + path sequences_sketch output: tuple val(meta), path("*.screen"), emit: screen @@ -26,8 +26,8 @@ process MASH_SCREEN { screen \\ $args \\ -p $task.cpus \\ - $query_sketch \\ - $fastx_db \\ + $sequences_sketch \\ + $query \\ > ${prefix}.screen cat <<-END_VERSIONS > versions.yml diff --git a/modules/mash/screen/meta.yml b/modules/mash/screen/meta.yml index 81e455bc..063aca6c 100644 --- a/modules/mash/screen/meta.yml +++ b/modules/mash/screen/meta.yml @@ -20,13 +20,14 @@ input: description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - query_sketch: + - query: type: file - description: MinHash sketch of query sequences - pattern: "*.msh" - - fastx_db: + description: Query sequences + pattern: "*.fastq.gz" + - sequence_sketch: type: file description: Sequence files to match against + pattern: "*.msh" output: - meta: diff --git a/tests/modules/mash/screen/main.nf b/tests/modules/mash/screen/main.nf index 7f0b8bd9..5338d932 100644 --- a/tests/modules/mash/screen/main.nf +++ b/tests/modules/mash/screen/main.nf @@ -14,8 +14,11 @@ workflow test_mash_screen { file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ] ] - fastx_db = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + sars_db = [ + [ id: 'sars_db' ], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] - MASH_SKETCH ( input ) - MASH_SCREEN ( MASH_SKETCH.out.mash, fastx_db ) + MASH_SKETCH ( sars_db ) + MASH_SCREEN ( input, MASH_SKETCH.out.mash.map { meta, sketch -> sketch } ) } diff --git a/tests/modules/mash/screen/test.yml b/tests/modules/mash/screen/test.yml index f622817a..e4907ce9 100644 --- a/tests/modules/mash/screen/test.yml +++ b/tests/modules/mash/screen/test.yml @@ -4,9 +4,9 @@ - mash - mash/screen files: - - path: output/mash/test.mash_stats - md5sum: 2a6f297d8e69a5e4160243bc6c89129c - - path: output/mash/test.msh - md5sum: d747145a43dad5f82342036f8f5d9133 + - path: output/mash/sars_db.mash_stats + md5sum: 1dafbd23e36e18bf4c87a007d0fc98f7 + - path: output/mash/sars_db.msh + md5sum: 24289e4a13526e88eeb2abfca4a0f0a8 - path: output/mash/test.screen - md5sum: d3c871dccd5cd57ab54781fa5c5d7278 + md5sum: ac8701e1aab651b2f36c6380b1351b11 From dd6d838cd7ac2b15dd5e56ecb1dd5b3cea63d78c Mon Sep 17 00:00:00 2001 From: SusiJo Date: Wed, 25 May 2022 16:37:44 +0200 Subject: [PATCH 036/168] override userEmulation --- tests/modules/gatk4/markduplicatesspark/nextflow.config | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/modules/gatk4/markduplicatesspark/nextflow.config b/tests/modules/gatk4/markduplicatesspark/nextflow.config index 8730f1c4..c66f00e6 100644 --- a/tests/modules/gatk4/markduplicatesspark/nextflow.config +++ b/tests/modules/gatk4/markduplicatesspark/nextflow.config @@ -3,3 +3,6 @@ process { publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } } +// necessary to override tests/config/nextflow.config +docker.userEmulation = false + From 1400f2e68361fc04642b7c95a0cdf50e098bf926 Mon Sep 17 00:00:00 2001 From: SusiJo Date: Wed, 25 May 2022 16:38:30 +0200 Subject: [PATCH 037/168] fixed tests - mixed chr21 + chr22 --- .../modules/gatk4/markduplicatesspark/main.nf | 24 +++++++++++++++---- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/tests/modules/gatk4/markduplicatesspark/main.nf b/tests/modules/gatk4/markduplicatesspark/main.nf index 2f294f59..cd02c240 100644 --- a/tests/modules/gatk4/markduplicatesspark/main.nf +++ b/tests/modules/gatk4/markduplicatesspark/main.nf @@ -6,18 +6,32 @@ include { GATK4_MARKDUPLICATES_SPARK } from '../../../../modules/gatk4/markdupli workflow test_gatk4_markduplicates_spark { input = [ [ id:'test', single_end:false ], // meta map - file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) ] - fasta = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true) - fai = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta_fai'], checkIfExists: true) - dict = file(params.test_data['homo_sapiens']['genome']['genome_21_dict'], checkIfExists: true) + fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + fai = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true) + dict = file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true) GATK4_MARKDUPLICATES_SPARK ( input, fasta, fai, dict ) } +// chr 22 workflow test_gatk4_markduplicates_spark_multiple_bams { input = [ [ id:'test', single_end:false ], // meta map - [ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + [ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_bam'], checkIfExists: true) + ] ] + fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) + dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) + + GATK4_MARKDUPLICATES_SPARK ( input, fasta, fai, dict ) +} + +// chr 21 +workflow test_gatk4_markduplicates_spark_multiple_bams_21 { + input = [ [ id:'test', single_end:false ], // meta map + [ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_bam'], checkIfExists: true) ] ] fasta = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true) From b34e5ea6bf56d25536324f593e9797a3d9c4ccc7 Mon Sep 17 00:00:00 2001 From: SusiJo Date: Wed, 25 May 2022 16:38:48 +0200 Subject: [PATCH 038/168] rm unnecessary spark_user --- modules/gatk4/markduplicatesspark/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/gatk4/markduplicatesspark/main.nf b/modules/gatk4/markduplicatesspark/main.nf index 77e135db..8265e16f 100644 --- a/modules/gatk4/markduplicatesspark/main.nf +++ b/modules/gatk4/markduplicatesspark/main.nf @@ -25,6 +25,7 @@ process GATK4_MARKDUPLICATES_SPARK { prefix = task.ext.prefix ?: "${meta.id}" def input_list = bam.collect{"--input $it"}.join(' ') + def avail_mem = 3 if (!task.memory) { log.info '[GATK MarkDuplicatesSpark] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' @@ -32,7 +33,6 @@ process GATK4_MARKDUPLICATES_SPARK { avail_mem = task.memory.giga } """ - export SPARK_USER=spark3 gatk --java-options "-Xmx${avail_mem}g" MarkDuplicatesSpark \\ $input_list \\ From 72ff2732e3177b041234aa49c1775376a00d59f4 Mon Sep 17 00:00:00 2001 From: Matthias De Smet <11850640+matthdsm@users.noreply.github.com> Date: Wed, 25 May 2022 19:19:39 +0200 Subject: [PATCH 039/168] add all remaining outputs to mosdepth --- modules/mosdepth/main.nf | 29 ++++++++++++------ modules/mosdepth/meta.yml | 16 ++++++++++ tests/modules/mosdepth/main.nf | 30 ++++++++++++++++-- tests/modules/mosdepth/nextflow.config | 6 ++++ tests/modules/mosdepth/test.yml | 42 ++++++++++++++++++++++++++ 5 files changed, 111 insertions(+), 12 deletions(-) diff --git a/modules/mosdepth/main.nf b/modules/mosdepth/main.nf index 5f6538e7..96471cfc 100644 --- a/modules/mosdepth/main.nf +++ b/modules/mosdepth/main.nf @@ -13,15 +13,19 @@ process MOSDEPTH { path fasta output: - tuple val(meta), path('*.global.dist.txt') , emit: global_txt - tuple val(meta), path('*.region.dist.txt') , emit: regions_txt , optional:true - tuple val(meta), path('*.summary.txt') , emit: summary_txt - tuple val(meta), path('*.per-base.d4') , emit: per_base_d4 , optional:true - tuple val(meta), path('*.per-base.bed.gz') , emit: per_base_bed, optional:true - tuple val(meta), path('*.per-base.bed.gz.csi'), emit: per_base_csi, optional:true - tuple val(meta), path('*.regions.bed.gz') , emit: regions_bed , optional:true - tuple val(meta), path('*.regions.bed.gz.csi') , emit: regions_csi , optional:true - path "versions.yml" , emit: versions + tuple val(meta), path('*.global.dist.txt') , emit: global_txt + tuple val(meta), path('*.region.dist.txt') , emit: regions_txt , optional:true + tuple val(meta), path('*.summary.txt') , emit: summary_txt + tuple val(meta), path('*.per-base.d4') , emit: per_base_d4 , optional:true + tuple val(meta), path('*.per-base.bed.gz') , emit: per_base_bed, optional:true + tuple val(meta), path('*.per-base.bed.gz.csi') , emit: per_base_csi, optional:true + tuple val(meta), path('*.regions.bed.gz') , emit: regions_bed , optional:true + tuple val(meta), path('*.regions.bed.gz.csi') , emit: regions_csi , optional:true + tuple val(meta), path('*.quantized.bed.gz') , emit: quantized_bed , optional:true + tuple val(meta), path('*.quantized.bed.gz.csi') , emit: quantized_csi , optional:true + tuple val(meta), path('*.thresholds.bed.gz') , emit: thresholds_bed , optional:true + tuple val(meta), path('*.thresholds.bed.gz.csi'), emit: thresholds_csi , optional:true + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -34,6 +38,9 @@ process MOSDEPTH { if (bed && args.contains("--by")) { exit 1, "'--by' can only be specified once when running mosdepth! Either remove input BED file definition or remove '--by' from 'ext.args' definition" } + if (!bed && args.contains("--thresholds")) { + exit 1, "'--thresholds' can only be specified in conjunction with '--by'" + } """ mosdepth \\ @@ -61,6 +68,10 @@ process MOSDEPTH { touch ${prefix}.per-base.bed.gz.csi touch ${prefix}.regions.bed.gz touch ${prefix}.regions.bed.gz.csi + touch ${prefix}.quantized.bed.gz + touch ${prefix}.quantized.bed.gz.csi + touch ${prefix}.thresholds.bed.gz + touch ${prefix}.thresholds.bed.gz.csi cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/mosdepth/meta.yml b/modules/mosdepth/meta.yml index 6e9e34c9..d1e33447 100644 --- a/modules/mosdepth/meta.yml +++ b/modules/mosdepth/meta.yml @@ -72,6 +72,22 @@ output: type: file description: Index file for BED file with per-region coverage pattern: "*.{regions.bed.gz.csi}" + - quantized_bed: + type: file + description: BED file with binned coverage + pattern: "*.{quantized.bed.gz}" + - quantized_csi: + type: file + description: Index file for BED file with binned coverage + pattern: "*.{quantized.bed.gz.csi}" + - thresholds_bed: + type: file + description: BED file with the number of bases in each region that are covered at or above each threshold + pattern: "*.{thresholds.bed.gz}" + - thresholds_csi: + type: file + description: Index file for BED file with threshold coverage + pattern: "*.{thresholds.bed.gz.csi}" - versions: type: file description: File containing software versions diff --git a/tests/modules/mosdepth/main.nf b/tests/modules/mosdepth/main.nf index 96a9ef20..dd4a01c4 100644 --- a/tests/modules/mosdepth/main.nf +++ b/tests/modules/mosdepth/main.nf @@ -2,9 +2,12 @@ nextflow.enable.dsl = 2 -include { MOSDEPTH } from '../../../modules/mosdepth/main.nf' -include { MOSDEPTH as MOSDEPTH_FAIL } from '../../../modules/mosdepth/main.nf' -include { MOSDEPTH as MOSDEPTH_WINDOW } from '../../../modules/mosdepth/main.nf' +include { MOSDEPTH } from '../../../modules/mosdepth/main.nf' +include { MOSDEPTH as MOSDEPTH_FAIL } from '../../../modules/mosdepth/main.nf' +include { MOSDEPTH as MOSDEPTH_WINDOW } from '../../../modules/mosdepth/main.nf' +include { MOSDEPTH as MOSDEPTH_THRESHOLD } from '../../../modules/mosdepth/main.nf' +include { MOSDEPTH as MOSDEPTH_QUANTIZED } from '../../../modules/mosdepth/main.nf' + workflow test_mosdepth { input = [ @@ -61,6 +64,27 @@ workflow test_mosdepth_window { MOSDEPTH_WINDOW ( input, [], [] ) } +workflow test_mosdepth_quantized { + input = [ + [ id:'test', single_end:true ], + [ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) ], + [ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true) ] + ] + + MOSDEPTH_QUANTIZED ( input, [], [] ) +} + +workflow test_mosdepth_thresholds { + input = [ + [ id:'test', single_end:true ], + [ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) ], + [ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true) ] + ] + bed = [ file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true) ] + + MOSDEPTH_THRESHOLD ( input, bed, [] ) +} + workflow test_mosdepth_fail { input = [ [ id:'test', single_end:true ], diff --git a/tests/modules/mosdepth/nextflow.config b/tests/modules/mosdepth/nextflow.config index 4a6153e6..66e9291f 100644 --- a/tests/modules/mosdepth/nextflow.config +++ b/tests/modules/mosdepth/nextflow.config @@ -7,4 +7,10 @@ process { withName: MOSDEPTH_WINDOW { ext.args = "--by 100" } + withName: MOSDEPTH_QUANTIZED { + ext.args = "--quantize 0:1:4:100:200" + } + withName: MOSDEPTH_THRESHOLD { + ext.args = "--thresholds 1,10,20,30" + } } diff --git a/tests/modules/mosdepth/test.yml b/tests/modules/mosdepth/test.yml index a3115d6e..8efadb9d 100644 --- a/tests/modules/mosdepth/test.yml +++ b/tests/modules/mosdepth/test.yml @@ -86,6 +86,48 @@ - path: output/mosdepth/test.regions.bed.gz.csi md5sum: 257d67678136963d9dd904330079609d +- name: mosdepth test_mosdepth_quantized + command: nextflow run ./tests/modules/mosdepth -entry test_mosdepth_quantized -c ./tests/config/nextflow.config -c ./tests/modules/mosdepth/nextflow.config + tags: + - mosdepth + files: + - path: output/mosdepth/test.mosdepth.global.dist.txt + md5sum: e82e90c7d508a135b5a8a7cd6933452e + - path: output/mosdepth/test.mosdepth.summary.txt + md5sum: 4f0d231060cbde4efdd673863bd2fb59 + - path: output/mosdepth/test.per-base.bed.gz + md5sum: bc1df47d46f818fee5275975925d769a + - path: output/mosdepth/test.per-base.bed.gz.csi + md5sum: 9e649ac749ff6c6073bef5ab63e8aaa4 + - path: output/mosdepth/test.quantized.bed.gz + md5sum: 3e434a8bafcf59a67841ae3d4d752838 + - path: output/mosdepth/test.quantized.bed.gz.csi + md5sum: be9617f551f19a33923f1e886eaefb93 + +- name: mosdepth test_mosdepth_thresholds + command: nextflow run ./tests/modules/mosdepth -entry test_mosdepth_thresholds -c ./tests/config/nextflow.config -c ./tests/modules/mosdepth/nextflow.config + tags: + - mosdepth + files: + - path: output/mosdepth/test.mosdepth.global.dist.txt + md5sum: e82e90c7d508a135b5a8a7cd6933452e + - path: output/mosdepth/test.mosdepth.region.dist.txt + md5sum: e82e90c7d508a135b5a8a7cd6933452e + - path: output/mosdepth/test.mosdepth.summary.txt + md5sum: 96c037f769974b904beb53edc4f56d82 + - path: output/mosdepth/test.per-base.bed.gz + md5sum: bc1df47d46f818fee5275975925d769a + - path: output/mosdepth/test.per-base.bed.gz.csi + md5sum: 9e649ac749ff6c6073bef5ab63e8aaa4 + - path: output/mosdepth/test.regions.bed.gz + md5sum: 5d398caf7171ec4406278e2add3009ae + - path: output/mosdepth/test.regions.bed.gz.csi + md5sum: 47669cfe41f3e222e74d81e1b1be191f + - path: output/mosdepth/test.thresholds.bed.gz + md5sum: 13101e326eea3cbfa1d569b69f494f4c + - path: output/mosdepth/test.thresholds.bed.gz.csi + md5sum: 912055ee9452229439df6fae95644196 + - name: mosdepth test_mosdepth_fail command: nextflow run ./tests/modules/mosdepth -entry test_mosdepth_fail -c ./tests/config/nextflow.config -c ./tests/modules/mosdepth/nextflow.config tags: From 241a1de035d47529fd09835f3f619490308afc21 Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Wed, 25 May 2022 19:38:12 +0100 Subject: [PATCH 040/168] Update main.nf --- tests/modules/mosdepth/main.nf | 93 +++++++++++++++++----------------- 1 file changed, 46 insertions(+), 47 deletions(-) diff --git a/tests/modules/mosdepth/main.nf b/tests/modules/mosdepth/main.nf index dd4a01c4..1bb8e8ff 100644 --- a/tests/modules/mosdepth/main.nf +++ b/tests/modules/mosdepth/main.nf @@ -8,90 +8,89 @@ include { MOSDEPTH as MOSDEPTH_WINDOW } from '../../../modules/mosdepth/main. include { MOSDEPTH as MOSDEPTH_THRESHOLD } from '../../../modules/mosdepth/main.nf' include { MOSDEPTH as MOSDEPTH_QUANTIZED } from '../../../modules/mosdepth/main.nf' - workflow test_mosdepth { - input = [ - [ id:'test', single_end:true ], - [ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) ], - [ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true) ] - ] + input = [ + [ id:'test', single_end:true ], + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true) + ] MOSDEPTH ( input, [], [] ) } workflow test_mosdepth_bed { - input = [ - [ id:'test', single_end:true ], - [ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) ], - [ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true) ] - ] - bed = [ file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true) ] + input = [ + [ id:'test', single_end:true ], + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true) + ] + bed = file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true) MOSDEPTH ( input, bed, [] ) } workflow test_mosdepth_cram { - input = [ - [ id:'test', single_end:true ], - [ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true) ], - [ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram_crai'], checkIfExists: true) ] - ] - fasta = [ file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) ] + input = [ + [ id:'test', single_end:true ], + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram_crai'], checkIfExists: true) + ] + fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) MOSDEPTH ( input, [], fasta ) } workflow test_mosdepth_cram_bed { input = [ - [ id:'test', single_end:true ], - [ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true) ], - [ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram_crai'], checkIfExists: true) ] - ] - bed = [ file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true) ] - fasta = [ file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) ] + [ id:'test', single_end:true ], + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram_crai'], checkIfExists: true) + ] + bed = file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true) + fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) MOSDEPTH ( input, bed, fasta ) } workflow test_mosdepth_window { - input = [ - [ id:'test', single_end:true ], - [ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) ], - [ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true) ] - ] - bed = [ file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true) ] + input = [ + [ id:'test', single_end:true ], + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true) + ] + bed = file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true) MOSDEPTH_WINDOW ( input, [], [] ) } workflow test_mosdepth_quantized { - input = [ - [ id:'test', single_end:true ], - [ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) ], - [ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true) ] - ] + input = [ + [ id:'test', single_end:true ], + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true) + ] MOSDEPTH_QUANTIZED ( input, [], [] ) } workflow test_mosdepth_thresholds { - input = [ - [ id:'test', single_end:true ], - [ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) ], - [ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true) ] - ] - bed = [ file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true) ] + input = [ + [ id:'test', single_end:true ], + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true) + ] + bed = file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true) MOSDEPTH_THRESHOLD ( input, bed, [] ) } workflow test_mosdepth_fail { - input = [ - [ id:'test', single_end:true ], - [ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) ], - [ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true) ] - ] - bed = [ file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true) ] + input = [ + [ id:'test', single_end:true ], + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true) + ] + bed = file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true) MOSDEPTH_FAIL ( input, bed, [] ) } From 72a31b76eb1b58879e0d91fb1d992e0118693098 Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Wed, 25 May 2022 19:42:15 +0100 Subject: [PATCH 041/168] Update main.nf --- modules/mosdepth/main.nf | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/modules/mosdepth/main.nf b/modules/mosdepth/main.nf index 96471cfc..d7e3c929 100644 --- a/modules/mosdepth/main.nf +++ b/modules/mosdepth/main.nf @@ -14,17 +14,17 @@ process MOSDEPTH { output: tuple val(meta), path('*.global.dist.txt') , emit: global_txt - tuple val(meta), path('*.region.dist.txt') , emit: regions_txt , optional:true tuple val(meta), path('*.summary.txt') , emit: summary_txt - tuple val(meta), path('*.per-base.d4') , emit: per_base_d4 , optional:true - tuple val(meta), path('*.per-base.bed.gz') , emit: per_base_bed, optional:true - tuple val(meta), path('*.per-base.bed.gz.csi') , emit: per_base_csi, optional:true - tuple val(meta), path('*.regions.bed.gz') , emit: regions_bed , optional:true - tuple val(meta), path('*.regions.bed.gz.csi') , emit: regions_csi , optional:true - tuple val(meta), path('*.quantized.bed.gz') , emit: quantized_bed , optional:true - tuple val(meta), path('*.quantized.bed.gz.csi') , emit: quantized_csi , optional:true - tuple val(meta), path('*.thresholds.bed.gz') , emit: thresholds_bed , optional:true - tuple val(meta), path('*.thresholds.bed.gz.csi'), emit: thresholds_csi , optional:true + tuple val(meta), path('*.region.dist.txt') , optional:true, emit: regions_txt + tuple val(meta), path('*.per-base.d4') , optional:true, emit: per_base_d4 + tuple val(meta), path('*.per-base.bed.gz') , optional:true, emit: per_base_bed + tuple val(meta), path('*.per-base.bed.gz.csi') , optional:true, emit: per_base_csi + tuple val(meta), path('*.regions.bed.gz') , optional:true, emit: regions_bed + tuple val(meta), path('*.regions.bed.gz.csi') , optional:true, emit: regions_csi + tuple val(meta), path('*.quantized.bed.gz') , optional:true, emit: quantized_bed + tuple val(meta), path('*.quantized.bed.gz.csi') , optional:true, emit: quantized_csi + tuple val(meta), path('*.thresholds.bed.gz') , optional:true, emit: thresholds_bed + tuple val(meta), path('*.thresholds.bed.gz.csi'), optional:true, emit: thresholds_csi path "versions.yml" , emit: versions when: @@ -44,7 +44,7 @@ process MOSDEPTH { """ mosdepth \\ - --threads ${task.cpus} \\ + --threads $task.cpus \\ $interval \\ $reference \\ $args \\ From 05e4e2cddec05c58e7a807e03a8e751456eef572 Mon Sep 17 00:00:00 2001 From: jtangrot Date: Thu, 26 May 2022 20:16:45 +0200 Subject: [PATCH 042/168] Add idcutoff as input --- modules/vsearch/usearchglobal/main.nf | 53 ++++++++----------- modules/vsearch/usearchglobal/meta.yml | 51 ++++++++++++++++-- tests/modules/vsearch/usearchglobal/main.nf | 7 +-- .../vsearch/usearchglobal/nextflow.config | 1 - tests/modules/vsearch/usearchglobal/test.yml | 8 +-- .../vsearch/usearchglobal/test.yml.ori | 14 ----- 6 files changed, 79 insertions(+), 55 deletions(-) delete mode 100644 tests/modules/vsearch/usearchglobal/test.yml.ori diff --git a/modules/vsearch/usearchglobal/main.nf b/modules/vsearch/usearchglobal/main.nf index 0b31842f..ed95b7c3 100644 --- a/modules/vsearch/usearchglobal/main.nf +++ b/modules/vsearch/usearchglobal/main.nf @@ -1,39 +1,31 @@ -// TODO nf-core: A module file SHOULD only define input and output files as command-line parameters. -// All other parameters MUST be provided using the "task.ext" directive, see here: -// https://www.nextflow.io/docs/latest/process.html#ext -// where "task.ext" is a string. -// Any parameters that need to be evaluated in the context of a particular sample -// e.g. single-end/paired-end data MUST also be defined and evaluated appropriately. -// TODO nf-core: Software that can be piped together SHOULD be added to separate module files -// unless there is a run-time, storage advantage in implementing in this way -// e.g. it's ok to have a single module for bwa to output BAM instead of SAM: -// bwa mem | samtools view -B -T ref.fasta - process VSEARCH_USEARCHGLOBAL { - tag "$meta.id" + tag "${meta.id}" label 'process_low' conda (params.enable_conda ? "bioconda::vsearch=2.21.1" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/vsearch:2.21.1--hf1761c0_1': + 'https://depot.galaxyproject.org/singularity/vsearch:2.21.1--h95f258a_0': 'quay.io/biocontainers/vsearch:2.21.1--h95f258a_0' }" input: - tuple val(meta), path(queryfasta) + val(meta) + path(queryfasta) path db + val idcutoff val outoption val user_columns - // TODO nf-core: Where applicable please provide/convert compressed files as input/output - // e.g. "*.fastq.gz" and NOT "*.fastq", "*.bam" and NOT "*.sam" etc. - output: - tuple val(meta), path('*.aln') , optional: true, emit: aln - tuple val(meta), path('*.biom') , optional: true, emit: biom - tuple val(meta), path('*.sam') , optional: true, emit: sam - tuple val(meta), path('*.tsv') , optional: true, emit: tsv - tuple val(meta), path('*.uc') , optional: true, emit: uc - path "versions.yml" , emit: versions + tuple val(meta), path('*.aln') , optional: true, emit: aln + tuple val(meta), path('*.biom') , optional: true, emit: biom + tuple val(meta), path('*.lca') , optional: true, emit: lca + tuple val(meta), path('*.mothur') , optional: true, emit: mothur + tuple val(meta), path('*.otu') , optional: true, emit: otu + tuple val(meta), path('*.sam') , optional: true, emit: sam + tuple val(meta), path('*.tsv') , optional: true, emit: tsv + tuple val(meta), path('*.txt') , optional: true, emit: txt + tuple val(meta), path('*.uc') , optional: true, emit: uc + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -45,13 +37,13 @@ process VSEARCH_USEARCHGLOBAL { switch ( outoption ) { case "alnout": outfmt = "--alnout"; out_ext = 'aln'; break case "biomout": outfmt = "--biomout"; out_ext = 'biom'; break - case "blast6out": outfmt = "--blast6out"; out_ext = 'blast6out.tsv'; break - case "mothur_shared_out": outfmt = "--mothur_shared_out"; out_ext = 'mothur.tsv'; break - case "otutabout": outfmt = "--otutabout"; out_ext = 'otu.tsv'; break - case "samout": outfmt = "--samout"; out_ext = 'sam'; break - case "uc": outfmt = "--uc"; out_ext = 'uc'; break - case "userout": outfmt = "--userout"; out_ext = 'user.tsv'; break - case "lcaout": outfmt = "--lcaout"; out_ext = 'lca.tsv'; break + case "blast6out": outfmt = "--blast6out"; out_ext = 'txt'; break + case "mothur_shared_out": outfmt = "--mothur_shared_out"; out_ext = 'mothur'; break + case "otutabout": outfmt = "--otutabout"; out_ext = 'otu'; break + case "samout": outfmt = "--samout"; out_ext = 'sam'; break + case "uc": outfmt = "--uc"; out_ext = 'uc'; break + case "userout": outfmt = "--userout"; out_ext = 'tsv'; break + case "lcaout": outfmt = "--lcaout"; out_ext = 'lca'; break default: outfmt = "--alnout"; out_ext = 'aln'; @@ -62,6 +54,7 @@ process VSEARCH_USEARCHGLOBAL { vsearch \\ --usearch_global $queryfasta \\ --db $db \\ + --id $idcutoff \\ --threads $task.cpus \\ $args \\ ${columns} \\ diff --git a/modules/vsearch/usearchglobal/meta.yml b/modules/vsearch/usearchglobal/meta.yml index 8cb97446..848fb65a 100644 --- a/modules/vsearch/usearchglobal/meta.yml +++ b/modules/vsearch/usearchglobal/meta.yml @@ -15,20 +15,65 @@ tools: licence: "['GPL v3-or-later OR BSD-2-clause']" input: + - meta: + type: map + description: Groovy Map containing sample information e.g. [ id:'test' ] - queryfasta: type: file description: Query sequences in FASTA format pattern: "*.{fasta,fa,fna,faa}" - db: type: file - description: Reference database file. It may be in FASTA or UDB format. + description: Reference database file in FASTA or UDB format pattern: "*" + - idcutoff: + type: real + description: Reject the sequence match if the pairwise identity is lower than the given id cutoff value (value ranging from 0.0 to 1.0 included) + - outoption: + type: string + description: Specify the type of output file to be generated by selecting one of the vsearch output file options + pattern: "alnout|biomout|blast6out|mothur_shared_out|otutabout|samout|uc|userout|lcaout" + - user_columns: + type: string + description: If using the `userout` option, specify which columns to include in output, with fields separated with `+` (e.g. query+target+id). See USEARCH manual for valid options. For other output options, use an empty string. output: - - blast6out: + - aln: type: file - description: Tab delimited results in blast-like format + description: Results in pairwise alignment format + pattern: "*.{aln}" + - biom: + type: file + description: Results in an OTU table in the biom version 1.0 file format + pattern: "*.{biom}" + - lca: + type: file + description: Last common ancestor (LCA) information about the hits of each query in tab-separated format + pattern: "*.{lca}" + - mothur: + type: file + description: Results in an OTU table in the mothur ’shared’ tab-separated plain text file format + pattern: "*.{mothur}" + - otu: + type: file + description: Results in an OTU table in the classic tab-separated plain text format + pattern: "*.{otu}" + - sam: + type: file + description: Results written in sam format + pattern: "*.{sam}" + - tsv: + type: file + description: Results in tab-separated output, columns defined by user pattern: "*.{tsv}" + - txt: + type: file + description: Tab delimited results in blast-like tabular format + pattern: "*.{txt}" + - uc: + type: file + description: Tab delimited results in a uclust-like format with 10 columns + pattern: "*.{uc}" - versions: type: file description: File containing software versions diff --git a/tests/modules/vsearch/usearchglobal/main.nf b/tests/modules/vsearch/usearchglobal/main.nf index cdf0fb2f..f4afec32 100644 --- a/tests/modules/vsearch/usearchglobal/main.nf +++ b/tests/modules/vsearch/usearchglobal/main.nf @@ -8,17 +8,18 @@ workflow test_vsearch_usearchglobal { query = file(params.test_data['sarscov2']['genome']['transcriptome_fasta'], checkIfExists: true) db = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) - + idcutoff = 0.985 outoption = "xcfert" // Nonsense text to check default case. columns = "" - VSEARCH_USEARCHGLOBAL ( [ [id:'test'], query ], db, outoption, columns ) + VSEARCH_USEARCHGLOBAL ( [id:'test'], query, db, idcutoff, outoption, columns ) } workflow test_vsearch_usearchglobal_userout { query = file(params.test_data['sarscov2']['genome']['transcriptome_fasta'], checkIfExists: true) db = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + idcutoff = 0.985 outoption = "userout" columns = "query+target+id" - VSEARCH_USEARCHGLOBAL ( [ [id:'test'], query ], db, outoption, columns ) + VSEARCH_USEARCHGLOBAL ( [id:'test'], query, db, idcutoff, outoption, columns ) } diff --git a/tests/modules/vsearch/usearchglobal/nextflow.config b/tests/modules/vsearch/usearchglobal/nextflow.config index 3148a59e..14f46dea 100644 --- a/tests/modules/vsearch/usearchglobal/nextflow.config +++ b/tests/modules/vsearch/usearchglobal/nextflow.config @@ -1,5 +1,4 @@ process { - ext.args = '--id 0.985' publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } } diff --git a/tests/modules/vsearch/usearchglobal/test.yml b/tests/modules/vsearch/usearchglobal/test.yml index b7ebec4a..79bcd50e 100644 --- a/tests/modules/vsearch/usearchglobal/test.yml +++ b/tests/modules/vsearch/usearchglobal/test.yml @@ -1,17 +1,17 @@ - name: vsearch usearchglobal test_vsearch_usearchglobal command: nextflow run ./tests/modules/vsearch/usearchglobal -entry test_vsearch_usearchglobal -c ./tests/config/nextflow.config -c ./tests/modules/vsearch/usearchglobal/nextflow.config tags: - - vsearch - vsearch/usearchglobal + - vsearch files: - path: output/vsearch/test.aln - md5sum: 7b7479c16e0ecb503913da8bde48d6c5 + md5sum: 2543c4147530dcb1ba4550d3fdb1502a - name: vsearch usearchglobal test_vsearch_usearchglobal_userout command: nextflow run ./tests/modules/vsearch/usearchglobal -entry test_vsearch_usearchglobal_userout -c ./tests/config/nextflow.config -c ./tests/modules/vsearch/usearchglobal/nextflow.config tags: - - vsearch - vsearch/usearchglobal + - vsearch files: - - path: output/vsearch/test.user.tsv + - path: output/vsearch/test.tsv md5sum: b6cc50f7c8d18cb82e74dab70ed4baab diff --git a/tests/modules/vsearch/usearchglobal/test.yml.ori b/tests/modules/vsearch/usearchglobal/test.yml.ori deleted file mode 100644 index df644165..00000000 --- a/tests/modules/vsearch/usearchglobal/test.yml.ori +++ /dev/null @@ -1,14 +0,0 @@ -## TODO nf-core: Please run the following command to build this file: -# nf-core modules create-test-yml vsearch/usearchglobal -- name: "vsearch usearchglobal" - command: nextflow run ./tests/modules/vsearch/usearchglobal -entry test_vsearch_usearchglobal -c ./tests/config/nextflow.config -c ./tests/modules/vsearch/usearchglobal/nextflow.config - tags: - - "vsearch" - # - - "vsearch/usearchglobal" - # - files: - - path: "output/vsearch/blast6out_results.tsv" - md5sum: e667c7caad0bc4b7ac383fd023c654fc - - path: output/vsearch/versions.yml - md5sum: a01fe51bc4c6a3a6226fbf77b2c7cf3b From fb6c7bca3d55c19a793372513395e3a567bdd7ba Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Fri, 27 May 2022 11:35:04 +0100 Subject: [PATCH 043/168] Bump container version for STAR to 2.7.10a --- modules/star/align/main.nf | 13 +++++++------ modules/star/genomegenerate/main.nf | 13 ++++++------- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/modules/star/align/main.nf b/modules/star/align/main.nf index 762b84f6..e5aa36f5 100644 --- a/modules/star/align/main.nf +++ b/modules/star/align/main.nf @@ -2,16 +2,15 @@ process STAR_ALIGN { tag "$meta.id" label 'process_high' - // Note: 2.7X indices incompatible with AWS iGenomes. - conda (params.enable_conda ? 'bioconda::star=2.7.9a' : null) + conda (params.enable_conda ? "bioconda::star=2.7.10a bioconda::samtools=1.15.1 conda-forge::gawk=5.1.0" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/star:2.7.9a--h9ee0642_0' : - 'quay.io/biocontainers/star:2.7.9a--h9ee0642_0' }" + 'https://depot.galaxyproject.org/singularity/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:afaaa4c6f5b308b4b6aa2dd8e99e1466b2a6b0cd-0' : + 'quay.io/biocontainers/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:afaaa4c6f5b308b4b6aa2dd8e99e1466b2a6b0cd-0' }" input: tuple val(meta), path(reads) - path index - path gtf + path index + path gtf val star_ignore_sjdbgtf val seq_platform val seq_center @@ -67,6 +66,8 @@ process STAR_ALIGN { cat <<-END_VERSIONS > versions.yml "${task.process}": star: \$(STAR --version | sed -e "s/STAR_//g") + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + gawk: \$(echo \$(gawk --version 2>&1) | sed 's/^.*GNU Awk //; s/, .*\$//') END_VERSIONS """ } diff --git a/modules/star/genomegenerate/main.nf b/modules/star/genomegenerate/main.nf index e5568f1d..3c298016 100644 --- a/modules/star/genomegenerate/main.nf +++ b/modules/star/genomegenerate/main.nf @@ -2,19 +2,18 @@ process STAR_GENOMEGENERATE { tag "$fasta" label 'process_high' - // Note: 2.7X indices incompatible with AWS iGenomes. - conda (params.enable_conda ? "bioconda::star=2.7.9a bioconda::samtools=1.15.1 conda-forge::gawk=5.1.0" : null) + conda (params.enable_conda ? "bioconda::star=2.7.10a bioconda::samtools=1.15.1 conda-forge::gawk=5.1.0" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:1c4c32d87798d425c970ececfbadd155e7560277-0' : - 'quay.io/biocontainers/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:1c4c32d87798d425c970ececfbadd155e7560277-0' }" + 'https://depot.galaxyproject.org/singularity/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:afaaa4c6f5b308b4b6aa2dd8e99e1466b2a6b0cd-0' : + 'quay.io/biocontainers/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:afaaa4c6f5b308b4b6aa2dd8e99e1466b2a6b0cd-0' }" input: path fasta path gtf output: - path "star" , emit: index - path "versions.yml" , emit: versions + path "star" , emit: index + path "versions.yml", emit: versions when: task.ext.when == null || task.ext.when @@ -22,7 +21,7 @@ process STAR_GENOMEGENERATE { script: def args = task.ext.args ?: '' def args_list = args.tokenize() - def memory = task.memory ? "--limitGenomeGenerateRAM ${task.memory.toBytes() - 100000000}" : '' + def memory = task.memory ? "--limitGenomeGenerateRAM ${task.memory.toBytes() - 100000000}" : '' if (args_list.contains('--genomeSAindexNbases')) { """ mkdir star From ad578cafcc559b8a6a99257b91e85ba044d9f619 Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Fri, 27 May 2022 12:14:10 +0100 Subject: [PATCH 044/168] Fix tests --- tests/modules/star/align/test.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/modules/star/align/test.yml b/tests/modules/star/align/test.yml index af5bebe5..f7cb0f62 100644 --- a/tests/modules/star/align/test.yml +++ b/tests/modules/star/align/test.yml @@ -36,7 +36,7 @@ - path: output/star/star/transcriptInfo.tab md5sum: 0c3a5adb49d15e5feff81db8e29f2e36 - path: output/star/test.Aligned.out.bam - md5sum: b9f5e2f6a624b64c300fe25dc3ac801f + md5sum: 63de6af2210e138b49d7b4d570c6e67f - path: output/star/test.Log.final.out - path: output/star/test.Log.out - path: output/star/test.Log.progress.out @@ -80,7 +80,7 @@ - path: output/star/star/transcriptInfo.tab md5sum: 0c3a5adb49d15e5feff81db8e29f2e36 - path: output/star/test.Aligned.out.bam - md5sum: 38d08f0b944a2a1b981a250d675aa0d9 + md5sum: 7cdef439bc8092bfefb4d091bf8ee6ab - path: output/star/test.Log.final.out - path: output/star/test.Log.out - path: output/star/test.Log.progress.out @@ -124,7 +124,7 @@ - path: output/star/star/transcriptInfo.tab md5sum: 0c3a5adb49d15e5feff81db8e29f2e36 - path: output/star/test.Aligned.out.bam - md5sum: c740d5177067c1fcc48ab7a16cd639d7 + md5sum: 5dbc36fce7b72628c809bbc7d3d67973 - path: output/star/test.Log.final.out - path: output/star/test.Log.out - path: output/star/test.Log.progress.out @@ -168,9 +168,9 @@ - path: output/star/star/transcriptInfo.tab md5sum: 0c3a5adb49d15e5feff81db8e29f2e36 - path: output/star/test.Aligned.out.bam - md5sum: a1bd1b40950a58ea2776908076160052 + md5sum: d85858bf55a523121dde762046a34c5c - path: output/star/test.Chimeric.out.junction - md5sum: 327629eb54032212f29e1c32cbac6975 + md5sum: ae87d1a24180f5a35cf6b47fdfdd0539 - path: output/star/test.Log.final.out - path: output/star/test.Log.out - path: output/star/test.Log.progress.out From 1c99ffd876a3a91c13764b7b9c26959d74eb1bea Mon Sep 17 00:00:00 2001 From: jtangrot Date: Mon, 23 May 2022 10:01:55 +0200 Subject: [PATCH 045/168] Add vserach usearch_global --- modules/vsearch/usearchglobal/main.nf | 51 +++++++++++++++++++ modules/vsearch/usearchglobal/meta.yml | 38 ++++++++++++++ tests/config/pytest_modules.yml | 4 ++ tests/modules/vsearch/usearchglobal/main.nf | 12 +++++ .../vsearch/usearchglobal/nextflow.config | 5 ++ tests/modules/vsearch/usearchglobal/test.yml | 7 +++ .../vsearch/usearchglobal/test.yml.ori | 14 +++++ 7 files changed, 131 insertions(+) create mode 100644 modules/vsearch/usearchglobal/main.nf create mode 100644 modules/vsearch/usearchglobal/meta.yml create mode 100644 tests/modules/vsearch/usearchglobal/main.nf create mode 100644 tests/modules/vsearch/usearchglobal/nextflow.config create mode 100644 tests/modules/vsearch/usearchglobal/test.yml create mode 100644 tests/modules/vsearch/usearchglobal/test.yml.ori diff --git a/modules/vsearch/usearchglobal/main.nf b/modules/vsearch/usearchglobal/main.nf new file mode 100644 index 00000000..65ee62cc --- /dev/null +++ b/modules/vsearch/usearchglobal/main.nf @@ -0,0 +1,51 @@ +// TODO nf-core: A module file SHOULD only define input and output files as command-line parameters. +// All other parameters MUST be provided using the "task.ext" directive, see here: +// https://www.nextflow.io/docs/latest/process.html#ext +// where "task.ext" is a string. +// Any parameters that need to be evaluated in the context of a particular sample +// e.g. single-end/paired-end data MUST also be defined and evaluated appropriately. +// TODO nf-core: Software that can be piped together SHOULD be added to separate module files +// unless there is a run-time, storage advantage in implementing in this way +// e.g. it's ok to have a single module for bwa to output BAM instead of SAM: +// bwa mem | samtools view -B -T ref.fasta + +process VSEARCH_USEARCHGLOBAL { + tag '$queryfasta' + label 'process_low' + + conda (params.enable_conda ? "bioconda::vsearch=2.21.1" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/vsearch:2.21.1--hf1761c0_1': + 'quay.io/biocontainers/vsearch:2.21.1--h95f258a_0' }" + + input: + path queryfasta + path db + val outprefix + + // TODO nf-core: Where applicable please provide/convert compressed files as input/output + // e.g. "*.fastq.gz" and NOT "*.fastq", "*.bam" and NOT "*.sam" etc. + + output: + path ("*.tsv") , emit: tsv + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + vsearch \\ + --usearch_global $queryfasta \\ + --db $db \\ + --threads $task.cpus \\ + $args \\ + --blast6out ${outprefix}.tsv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + vsearch: \$(vsearch --version 2>&1 | head -n 1 | sed 's/vsearch //g' | sed 's/,.*//g' | sed 's/^v//' | sed 's/_.*//') + END_VERSIONS + """ +} diff --git a/modules/vsearch/usearchglobal/meta.yml b/modules/vsearch/usearchglobal/meta.yml new file mode 100644 index 00000000..8cb97446 --- /dev/null +++ b/modules/vsearch/usearchglobal/meta.yml @@ -0,0 +1,38 @@ +name: "vsearch_usearchglobal" +description: Compare target sequences to fasta-formatted query sequences using global pairwise alignment. +keywords: + - vsearch + - usearch + - alignment + - fasta +tools: + - "vsearch": + description: "VSEARCH is a versatile open-source tool for microbiome analysis, including chimera detection, clustering, dereplication and rereplication, extraction, FASTA/FASTQ/SFF file processing, masking, orienting, pair-wise alignment, restriction site cutting, searching, shuffling, sorting, subsampling, and taxonomic classification of amplicon sequences for metagenomics, genomics, and population genetics. (USEARCH alternative)" + homepage: "https://github.com/torognes/vsearch" + documentation: "None" + tool_dev_url: "https://github.com/torognes/vsearch" + doi: "doi: 10.7717/peerj.2584" + licence: "['GPL v3-or-later OR BSD-2-clause']" + +input: + - queryfasta: + type: file + description: Query sequences in FASTA format + pattern: "*.{fasta,fa,fna,faa}" + - db: + type: file + description: Reference database file. It may be in FASTA or UDB format. + pattern: "*" + +output: + - blast6out: + type: file + description: Tab delimited results in blast-like format + pattern: "*.{tsv}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@jtangrot" diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index 529f3e02..3ff58b5c 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -2052,6 +2052,10 @@ vcftools: - modules/vcftools/** - tests/modules/vcftools/** +vsearch/usearchglobal: + - modules/vsearch/usearchglobal/** + - tests/modules/vsearch/usearchglobal/** + yara/index: - modules/yara/index/** - tests/modules/yara/index/** diff --git a/tests/modules/vsearch/usearchglobal/main.nf b/tests/modules/vsearch/usearchglobal/main.nf new file mode 100644 index 00000000..02becd21 --- /dev/null +++ b/tests/modules/vsearch/usearchglobal/main.nf @@ -0,0 +1,12 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { VSEARCH_USEARCHGLOBAL } from '../../../../modules/vsearch/usearchglobal/main.nf' + +workflow test_vsearch_usearchglobal { + + query = file(params.test_data['sarscov2']['genome']['transcriptome_fasta'], checkIfExists: true) + db = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + VSEARCH_USEARCHGLOBAL ( query, db, "blast6out_results" ) +} diff --git a/tests/modules/vsearch/usearchglobal/nextflow.config b/tests/modules/vsearch/usearchglobal/nextflow.config new file mode 100644 index 00000000..aab29979 --- /dev/null +++ b/tests/modules/vsearch/usearchglobal/nextflow.config @@ -0,0 +1,5 @@ +process { + ext.args = '--id 0.985' + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + +} \ No newline at end of file diff --git a/tests/modules/vsearch/usearchglobal/test.yml b/tests/modules/vsearch/usearchglobal/test.yml new file mode 100644 index 00000000..b013519d --- /dev/null +++ b/tests/modules/vsearch/usearchglobal/test.yml @@ -0,0 +1,7 @@ +- name: vsearch usearchglobal test_vsearch_usearchglobal + command: nextflow run ./tests/modules/vsearch/usearchglobal -entry test_vsearch_usearchglobal -c ./tests/config/nextflow.config -c ./tests/modules/vsearch/usearchglobal/nextflow.config + tags: + - vsearch/usearchglobal + files: + - path: output/vsearch/blast6out_results.tsv + md5sum: 09733131643f1d951321a6e17a35eb8c diff --git a/tests/modules/vsearch/usearchglobal/test.yml.ori b/tests/modules/vsearch/usearchglobal/test.yml.ori new file mode 100644 index 00000000..df644165 --- /dev/null +++ b/tests/modules/vsearch/usearchglobal/test.yml.ori @@ -0,0 +1,14 @@ +## TODO nf-core: Please run the following command to build this file: +# nf-core modules create-test-yml vsearch/usearchglobal +- name: "vsearch usearchglobal" + command: nextflow run ./tests/modules/vsearch/usearchglobal -entry test_vsearch_usearchglobal -c ./tests/config/nextflow.config -c ./tests/modules/vsearch/usearchglobal/nextflow.config + tags: + - "vsearch" + # + - "vsearch/usearchglobal" + # + files: + - path: "output/vsearch/blast6out_results.tsv" + md5sum: e667c7caad0bc4b7ac383fd023c654fc + - path: output/vsearch/versions.yml + md5sum: a01fe51bc4c6a3a6226fbf77b2c7cf3b From 589ce2d73a0b9a3109f5291587eff425ca7fbfa5 Mon Sep 17 00:00:00 2001 From: jtangrot Date: Mon, 23 May 2022 14:02:10 +0200 Subject: [PATCH 046/168] Add options for output file type --- modules/vsearch/usearchglobal/main.nf | 38 +++++++++++++++---- tests/modules/vsearch/usearchglobal/main.nf | 14 ++++++- .../vsearch/usearchglobal/nextflow.config | 4 +- tests/modules/vsearch/usearchglobal/test.yml | 14 ++++++- 4 files changed, 58 insertions(+), 12 deletions(-) diff --git a/modules/vsearch/usearchglobal/main.nf b/modules/vsearch/usearchglobal/main.nf index 65ee62cc..0b31842f 100644 --- a/modules/vsearch/usearchglobal/main.nf +++ b/modules/vsearch/usearchglobal/main.nf @@ -10,7 +10,7 @@ // bwa mem | samtools view -B -T ref.fasta process VSEARCH_USEARCHGLOBAL { - tag '$queryfasta' + tag "$meta.id" label 'process_low' conda (params.enable_conda ? "bioconda::vsearch=2.21.1" : null) @@ -19,29 +19,53 @@ process VSEARCH_USEARCHGLOBAL { 'quay.io/biocontainers/vsearch:2.21.1--h95f258a_0' }" input: - path queryfasta + tuple val(meta), path(queryfasta) path db - val outprefix + val outoption + val user_columns // TODO nf-core: Where applicable please provide/convert compressed files as input/output // e.g. "*.fastq.gz" and NOT "*.fastq", "*.bam" and NOT "*.sam" etc. output: - path ("*.tsv") , emit: tsv - path "versions.yml" , emit: versions - + tuple val(meta), path('*.aln') , optional: true, emit: aln + tuple val(meta), path('*.biom') , optional: true, emit: biom + tuple val(meta), path('*.sam') , optional: true, emit: sam + tuple val(meta), path('*.tsv') , optional: true, emit: tsv + tuple val(meta), path('*.uc') , optional: true, emit: uc + path "versions.yml" , emit: versions + when: task.ext.when == null || task.ext.when script: def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def columns = user_columns ? "--userfields ${user_columns}" : '' + switch ( outoption ) { + case "alnout": outfmt = "--alnout"; out_ext = 'aln'; break + case "biomout": outfmt = "--biomout"; out_ext = 'biom'; break + case "blast6out": outfmt = "--blast6out"; out_ext = 'blast6out.tsv'; break + case "mothur_shared_out": outfmt = "--mothur_shared_out"; out_ext = 'mothur.tsv'; break + case "otutabout": outfmt = "--otutabout"; out_ext = 'otu.tsv'; break + case "samout": outfmt = "--samout"; out_ext = 'sam'; break + case "uc": outfmt = "--uc"; out_ext = 'uc'; break + case "userout": outfmt = "--userout"; out_ext = 'user.tsv'; break + case "lcaout": outfmt = "--lcaout"; out_ext = 'lca.tsv'; break + default: + outfmt = "--alnout"; + out_ext = 'aln'; + log.warn("Unknown output file format provided (${outoption}): selectingpairwise alignments (alnout)"); + break + } """ vsearch \\ --usearch_global $queryfasta \\ --db $db \\ --threads $task.cpus \\ $args \\ - --blast6out ${outprefix}.tsv + ${columns} \\ + ${outfmt} ${prefix}.${out_ext} cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/tests/modules/vsearch/usearchglobal/main.nf b/tests/modules/vsearch/usearchglobal/main.nf index 02becd21..cdf0fb2f 100644 --- a/tests/modules/vsearch/usearchglobal/main.nf +++ b/tests/modules/vsearch/usearchglobal/main.nf @@ -8,5 +8,17 @@ workflow test_vsearch_usearchglobal { query = file(params.test_data['sarscov2']['genome']['transcriptome_fasta'], checkIfExists: true) db = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) - VSEARCH_USEARCHGLOBAL ( query, db, "blast6out_results" ) + + outoption = "xcfert" // Nonsense text to check default case. + columns = "" + VSEARCH_USEARCHGLOBAL ( [ [id:'test'], query ], db, outoption, columns ) +} + +workflow test_vsearch_usearchglobal_userout { + + query = file(params.test_data['sarscov2']['genome']['transcriptome_fasta'], checkIfExists: true) + db = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + outoption = "userout" + columns = "query+target+id" + VSEARCH_USEARCHGLOBAL ( [ [id:'test'], query ], db, outoption, columns ) } diff --git a/tests/modules/vsearch/usearchglobal/nextflow.config b/tests/modules/vsearch/usearchglobal/nextflow.config index aab29979..3148a59e 100644 --- a/tests/modules/vsearch/usearchglobal/nextflow.config +++ b/tests/modules/vsearch/usearchglobal/nextflow.config @@ -1,5 +1,5 @@ process { ext.args = '--id 0.985' publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } - -} \ No newline at end of file +} + diff --git a/tests/modules/vsearch/usearchglobal/test.yml b/tests/modules/vsearch/usearchglobal/test.yml index b013519d..b7ebec4a 100644 --- a/tests/modules/vsearch/usearchglobal/test.yml +++ b/tests/modules/vsearch/usearchglobal/test.yml @@ -1,7 +1,17 @@ - name: vsearch usearchglobal test_vsearch_usearchglobal command: nextflow run ./tests/modules/vsearch/usearchglobal -entry test_vsearch_usearchglobal -c ./tests/config/nextflow.config -c ./tests/modules/vsearch/usearchglobal/nextflow.config tags: + - vsearch - vsearch/usearchglobal files: - - path: output/vsearch/blast6out_results.tsv - md5sum: 09733131643f1d951321a6e17a35eb8c + - path: output/vsearch/test.aln + md5sum: 7b7479c16e0ecb503913da8bde48d6c5 + +- name: vsearch usearchglobal test_vsearch_usearchglobal_userout + command: nextflow run ./tests/modules/vsearch/usearchglobal -entry test_vsearch_usearchglobal_userout -c ./tests/config/nextflow.config -c ./tests/modules/vsearch/usearchglobal/nextflow.config + tags: + - vsearch + - vsearch/usearchglobal + files: + - path: output/vsearch/test.user.tsv + md5sum: b6cc50f7c8d18cb82e74dab70ed4baab From 01d86439f5a25cd6b46006420ad7eb35049f4b27 Mon Sep 17 00:00:00 2001 From: jtangrot Date: Thu, 26 May 2022 20:16:45 +0200 Subject: [PATCH 047/168] Add idcutoff as input --- modules/vsearch/usearchglobal/main.nf | 53 ++++++++----------- modules/vsearch/usearchglobal/meta.yml | 51 ++++++++++++++++-- tests/modules/vsearch/usearchglobal/main.nf | 7 +-- .../vsearch/usearchglobal/nextflow.config | 1 - tests/modules/vsearch/usearchglobal/test.yml | 8 +-- .../vsearch/usearchglobal/test.yml.ori | 14 ----- 6 files changed, 79 insertions(+), 55 deletions(-) delete mode 100644 tests/modules/vsearch/usearchglobal/test.yml.ori diff --git a/modules/vsearch/usearchglobal/main.nf b/modules/vsearch/usearchglobal/main.nf index 0b31842f..ed95b7c3 100644 --- a/modules/vsearch/usearchglobal/main.nf +++ b/modules/vsearch/usearchglobal/main.nf @@ -1,39 +1,31 @@ -// TODO nf-core: A module file SHOULD only define input and output files as command-line parameters. -// All other parameters MUST be provided using the "task.ext" directive, see here: -// https://www.nextflow.io/docs/latest/process.html#ext -// where "task.ext" is a string. -// Any parameters that need to be evaluated in the context of a particular sample -// e.g. single-end/paired-end data MUST also be defined and evaluated appropriately. -// TODO nf-core: Software that can be piped together SHOULD be added to separate module files -// unless there is a run-time, storage advantage in implementing in this way -// e.g. it's ok to have a single module for bwa to output BAM instead of SAM: -// bwa mem | samtools view -B -T ref.fasta - process VSEARCH_USEARCHGLOBAL { - tag "$meta.id" + tag "${meta.id}" label 'process_low' conda (params.enable_conda ? "bioconda::vsearch=2.21.1" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/vsearch:2.21.1--hf1761c0_1': + 'https://depot.galaxyproject.org/singularity/vsearch:2.21.1--h95f258a_0': 'quay.io/biocontainers/vsearch:2.21.1--h95f258a_0' }" input: - tuple val(meta), path(queryfasta) + val(meta) + path(queryfasta) path db + val idcutoff val outoption val user_columns - // TODO nf-core: Where applicable please provide/convert compressed files as input/output - // e.g. "*.fastq.gz" and NOT "*.fastq", "*.bam" and NOT "*.sam" etc. - output: - tuple val(meta), path('*.aln') , optional: true, emit: aln - tuple val(meta), path('*.biom') , optional: true, emit: biom - tuple val(meta), path('*.sam') , optional: true, emit: sam - tuple val(meta), path('*.tsv') , optional: true, emit: tsv - tuple val(meta), path('*.uc') , optional: true, emit: uc - path "versions.yml" , emit: versions + tuple val(meta), path('*.aln') , optional: true, emit: aln + tuple val(meta), path('*.biom') , optional: true, emit: biom + tuple val(meta), path('*.lca') , optional: true, emit: lca + tuple val(meta), path('*.mothur') , optional: true, emit: mothur + tuple val(meta), path('*.otu') , optional: true, emit: otu + tuple val(meta), path('*.sam') , optional: true, emit: sam + tuple val(meta), path('*.tsv') , optional: true, emit: tsv + tuple val(meta), path('*.txt') , optional: true, emit: txt + tuple val(meta), path('*.uc') , optional: true, emit: uc + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -45,13 +37,13 @@ process VSEARCH_USEARCHGLOBAL { switch ( outoption ) { case "alnout": outfmt = "--alnout"; out_ext = 'aln'; break case "biomout": outfmt = "--biomout"; out_ext = 'biom'; break - case "blast6out": outfmt = "--blast6out"; out_ext = 'blast6out.tsv'; break - case "mothur_shared_out": outfmt = "--mothur_shared_out"; out_ext = 'mothur.tsv'; break - case "otutabout": outfmt = "--otutabout"; out_ext = 'otu.tsv'; break - case "samout": outfmt = "--samout"; out_ext = 'sam'; break - case "uc": outfmt = "--uc"; out_ext = 'uc'; break - case "userout": outfmt = "--userout"; out_ext = 'user.tsv'; break - case "lcaout": outfmt = "--lcaout"; out_ext = 'lca.tsv'; break + case "blast6out": outfmt = "--blast6out"; out_ext = 'txt'; break + case "mothur_shared_out": outfmt = "--mothur_shared_out"; out_ext = 'mothur'; break + case "otutabout": outfmt = "--otutabout"; out_ext = 'otu'; break + case "samout": outfmt = "--samout"; out_ext = 'sam'; break + case "uc": outfmt = "--uc"; out_ext = 'uc'; break + case "userout": outfmt = "--userout"; out_ext = 'tsv'; break + case "lcaout": outfmt = "--lcaout"; out_ext = 'lca'; break default: outfmt = "--alnout"; out_ext = 'aln'; @@ -62,6 +54,7 @@ process VSEARCH_USEARCHGLOBAL { vsearch \\ --usearch_global $queryfasta \\ --db $db \\ + --id $idcutoff \\ --threads $task.cpus \\ $args \\ ${columns} \\ diff --git a/modules/vsearch/usearchglobal/meta.yml b/modules/vsearch/usearchglobal/meta.yml index 8cb97446..848fb65a 100644 --- a/modules/vsearch/usearchglobal/meta.yml +++ b/modules/vsearch/usearchglobal/meta.yml @@ -15,20 +15,65 @@ tools: licence: "['GPL v3-or-later OR BSD-2-clause']" input: + - meta: + type: map + description: Groovy Map containing sample information e.g. [ id:'test' ] - queryfasta: type: file description: Query sequences in FASTA format pattern: "*.{fasta,fa,fna,faa}" - db: type: file - description: Reference database file. It may be in FASTA or UDB format. + description: Reference database file in FASTA or UDB format pattern: "*" + - idcutoff: + type: real + description: Reject the sequence match if the pairwise identity is lower than the given id cutoff value (value ranging from 0.0 to 1.0 included) + - outoption: + type: string + description: Specify the type of output file to be generated by selecting one of the vsearch output file options + pattern: "alnout|biomout|blast6out|mothur_shared_out|otutabout|samout|uc|userout|lcaout" + - user_columns: + type: string + description: If using the `userout` option, specify which columns to include in output, with fields separated with `+` (e.g. query+target+id). See USEARCH manual for valid options. For other output options, use an empty string. output: - - blast6out: + - aln: type: file - description: Tab delimited results in blast-like format + description: Results in pairwise alignment format + pattern: "*.{aln}" + - biom: + type: file + description: Results in an OTU table in the biom version 1.0 file format + pattern: "*.{biom}" + - lca: + type: file + description: Last common ancestor (LCA) information about the hits of each query in tab-separated format + pattern: "*.{lca}" + - mothur: + type: file + description: Results in an OTU table in the mothur ’shared’ tab-separated plain text file format + pattern: "*.{mothur}" + - otu: + type: file + description: Results in an OTU table in the classic tab-separated plain text format + pattern: "*.{otu}" + - sam: + type: file + description: Results written in sam format + pattern: "*.{sam}" + - tsv: + type: file + description: Results in tab-separated output, columns defined by user pattern: "*.{tsv}" + - txt: + type: file + description: Tab delimited results in blast-like tabular format + pattern: "*.{txt}" + - uc: + type: file + description: Tab delimited results in a uclust-like format with 10 columns + pattern: "*.{uc}" - versions: type: file description: File containing software versions diff --git a/tests/modules/vsearch/usearchglobal/main.nf b/tests/modules/vsearch/usearchglobal/main.nf index cdf0fb2f..f4afec32 100644 --- a/tests/modules/vsearch/usearchglobal/main.nf +++ b/tests/modules/vsearch/usearchglobal/main.nf @@ -8,17 +8,18 @@ workflow test_vsearch_usearchglobal { query = file(params.test_data['sarscov2']['genome']['transcriptome_fasta'], checkIfExists: true) db = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) - + idcutoff = 0.985 outoption = "xcfert" // Nonsense text to check default case. columns = "" - VSEARCH_USEARCHGLOBAL ( [ [id:'test'], query ], db, outoption, columns ) + VSEARCH_USEARCHGLOBAL ( [id:'test'], query, db, idcutoff, outoption, columns ) } workflow test_vsearch_usearchglobal_userout { query = file(params.test_data['sarscov2']['genome']['transcriptome_fasta'], checkIfExists: true) db = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + idcutoff = 0.985 outoption = "userout" columns = "query+target+id" - VSEARCH_USEARCHGLOBAL ( [ [id:'test'], query ], db, outoption, columns ) + VSEARCH_USEARCHGLOBAL ( [id:'test'], query, db, idcutoff, outoption, columns ) } diff --git a/tests/modules/vsearch/usearchglobal/nextflow.config b/tests/modules/vsearch/usearchglobal/nextflow.config index 3148a59e..14f46dea 100644 --- a/tests/modules/vsearch/usearchglobal/nextflow.config +++ b/tests/modules/vsearch/usearchglobal/nextflow.config @@ -1,5 +1,4 @@ process { - ext.args = '--id 0.985' publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } } diff --git a/tests/modules/vsearch/usearchglobal/test.yml b/tests/modules/vsearch/usearchglobal/test.yml index b7ebec4a..79bcd50e 100644 --- a/tests/modules/vsearch/usearchglobal/test.yml +++ b/tests/modules/vsearch/usearchglobal/test.yml @@ -1,17 +1,17 @@ - name: vsearch usearchglobal test_vsearch_usearchglobal command: nextflow run ./tests/modules/vsearch/usearchglobal -entry test_vsearch_usearchglobal -c ./tests/config/nextflow.config -c ./tests/modules/vsearch/usearchglobal/nextflow.config tags: - - vsearch - vsearch/usearchglobal + - vsearch files: - path: output/vsearch/test.aln - md5sum: 7b7479c16e0ecb503913da8bde48d6c5 + md5sum: 2543c4147530dcb1ba4550d3fdb1502a - name: vsearch usearchglobal test_vsearch_usearchglobal_userout command: nextflow run ./tests/modules/vsearch/usearchglobal -entry test_vsearch_usearchglobal_userout -c ./tests/config/nextflow.config -c ./tests/modules/vsearch/usearchglobal/nextflow.config tags: - - vsearch - vsearch/usearchglobal + - vsearch files: - - path: output/vsearch/test.user.tsv + - path: output/vsearch/test.tsv md5sum: b6cc50f7c8d18cb82e74dab70ed4baab diff --git a/tests/modules/vsearch/usearchglobal/test.yml.ori b/tests/modules/vsearch/usearchglobal/test.yml.ori deleted file mode 100644 index df644165..00000000 --- a/tests/modules/vsearch/usearchglobal/test.yml.ori +++ /dev/null @@ -1,14 +0,0 @@ -## TODO nf-core: Please run the following command to build this file: -# nf-core modules create-test-yml vsearch/usearchglobal -- name: "vsearch usearchglobal" - command: nextflow run ./tests/modules/vsearch/usearchglobal -entry test_vsearch_usearchglobal -c ./tests/config/nextflow.config -c ./tests/modules/vsearch/usearchglobal/nextflow.config - tags: - - "vsearch" - # - - "vsearch/usearchglobal" - # - files: - - path: "output/vsearch/blast6out_results.tsv" - md5sum: e667c7caad0bc4b7ac383fd023c654fc - - path: output/vsearch/versions.yml - md5sum: a01fe51bc4c6a3a6226fbf77b2c7cf3b From 2e16c90fce782bda854d2d638d0e216fc613ca39 Mon Sep 17 00:00:00 2001 From: jtangrot Date: Mon, 30 May 2022 09:57:05 +0200 Subject: [PATCH 048/168] Fix whitespace --- modules/vsearch/usearchglobal/main.nf | 2 +- modules/vsearch/usearchglobal/meta.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/vsearch/usearchglobal/main.nf b/modules/vsearch/usearchglobal/main.nf index ed95b7c3..804ef223 100644 --- a/modules/vsearch/usearchglobal/main.nf +++ b/modules/vsearch/usearchglobal/main.nf @@ -26,7 +26,7 @@ process VSEARCH_USEARCHGLOBAL { tuple val(meta), path('*.txt') , optional: true, emit: txt tuple val(meta), path('*.uc') , optional: true, emit: uc path "versions.yml" , emit: versions - + when: task.ext.when == null || task.ext.when diff --git a/modules/vsearch/usearchglobal/meta.yml b/modules/vsearch/usearchglobal/meta.yml index 848fb65a..3b22e9cd 100644 --- a/modules/vsearch/usearchglobal/meta.yml +++ b/modules/vsearch/usearchglobal/meta.yml @@ -65,7 +65,7 @@ output: - tsv: type: file description: Results in tab-separated output, columns defined by user - pattern: "*.{tsv}" + pattern: "*.{tsv}" - txt: type: file description: Tab delimited results in blast-like tabular format From 63db63757cab03cfa7a02c0d0f134b66fbfadea6 Mon Sep 17 00:00:00 2001 From: Matthias De Smet <11850640+matthdsm@users.noreply.github.com> Date: Mon, 30 May 2022 10:15:40 +0200 Subject: [PATCH 049/168] bump picard versions --- modules/picard/addorreplacereadgroups/main.nf | 6 +++--- modules/picard/cleansam/main.nf | 6 +++--- modules/picard/collecthsmetrics/main.nf | 6 +++--- modules/picard/collectmultiplemetrics/main.nf | 6 +++--- modules/picard/collectwgsmetrics/main.nf | 6 +++--- modules/picard/createsequencedictionary/main.nf | 6 +++--- modules/picard/crosscheckfingerprints/main.nf | 6 +++--- modules/picard/filtersamreads/main.nf | 6 +++--- modules/picard/fixmateinformation/main.nf | 6 +++--- modules/picard/liftovervcf/main.nf | 6 +++--- modules/picard/markduplicates/main.nf | 6 +++--- modules/picard/mergesamfiles/main.nf | 6 +++--- modules/picard/sortsam/main.nf | 6 +++--- modules/picard/sortvcf/main.nf | 6 +++--- 14 files changed, 42 insertions(+), 42 deletions(-) diff --git a/modules/picard/addorreplacereadgroups/main.nf b/modules/picard/addorreplacereadgroups/main.nf index fd102f67..aff83c9f 100644 --- a/modules/picard/addorreplacereadgroups/main.nf +++ b/modules/picard/addorreplacereadgroups/main.nf @@ -2,10 +2,10 @@ process PICARD_ADDORREPLACEREADGROUPS { tag "$meta.id" label 'process_low' - conda (params.enable_conda ? "bioconda::picard=2.27.1" : null) + conda (params.enable_conda ? "bioconda::picard=2.27.2" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/picard:2.27.1--hdfd78af_0' : - 'quay.io/biocontainers/picard:2.27.1--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/picard:2.27.2--hdfd78af_0' : + 'quay.io/biocontainers/picard:2.27.2--hdfd78af_0' }" input: tuple val(meta), path(bam) diff --git a/modules/picard/cleansam/main.nf b/modules/picard/cleansam/main.nf index 62989565..0042c3f1 100644 --- a/modules/picard/cleansam/main.nf +++ b/modules/picard/cleansam/main.nf @@ -2,10 +2,10 @@ process PICARD_CLEANSAM { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::picard=2.27.1" : null) + conda (params.enable_conda ? "bioconda::picard=2.27.2" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/picard:2.27.1--hdfd78af_0' : - 'quay.io/biocontainers/picard:2.27.1--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/picard:2.27.2--hdfd78af_0' : + 'quay.io/biocontainers/picard:2.27.2--hdfd78af_0' }" input: tuple val(meta), path(bam) diff --git a/modules/picard/collecthsmetrics/main.nf b/modules/picard/collecthsmetrics/main.nf index 06b6b8f3..52d5c84a 100644 --- a/modules/picard/collecthsmetrics/main.nf +++ b/modules/picard/collecthsmetrics/main.nf @@ -2,10 +2,10 @@ process PICARD_COLLECTHSMETRICS { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::picard=2.27.1" : null) + conda (params.enable_conda ? "bioconda::picard=2.27.2" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/picard:2.27.1--hdfd78af_0' : - 'quay.io/biocontainers/picard:2.27.1--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/picard:2.27.2--hdfd78af_0' : + 'quay.io/biocontainers/picard:2.27.2--hdfd78af_0' }" input: tuple val(meta), path(bam) diff --git a/modules/picard/collectmultiplemetrics/main.nf b/modules/picard/collectmultiplemetrics/main.nf index 3c3152df..6fe9881b 100644 --- a/modules/picard/collectmultiplemetrics/main.nf +++ b/modules/picard/collectmultiplemetrics/main.nf @@ -2,10 +2,10 @@ process PICARD_COLLECTMULTIPLEMETRICS { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::picard=2.27.1" : null) + conda (params.enable_conda ? "bioconda::picard=2.27.2" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/picard:2.27.1--hdfd78af_0' : - 'quay.io/biocontainers/picard:2.27.1--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/picard:2.27.2--hdfd78af_0' : + 'quay.io/biocontainers/picard:2.27.2--hdfd78af_0' }" input: tuple val(meta), path(bam) diff --git a/modules/picard/collectwgsmetrics/main.nf b/modules/picard/collectwgsmetrics/main.nf index cab1fe0c..e231afe2 100644 --- a/modules/picard/collectwgsmetrics/main.nf +++ b/modules/picard/collectwgsmetrics/main.nf @@ -2,10 +2,10 @@ process PICARD_COLLECTWGSMETRICS { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::picard=2.27.1" : null) + conda (params.enable_conda ? "bioconda::picard=2.27.2" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/picard:2.27.1--hdfd78af_0' : - 'quay.io/biocontainers/picard:2.27.1--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/picard:2.27.2--hdfd78af_0' : + 'quay.io/biocontainers/picard:2.27.2--hdfd78af_0' }" input: tuple val(meta), path(bam) diff --git a/modules/picard/createsequencedictionary/main.nf b/modules/picard/createsequencedictionary/main.nf index 2348c496..16a5ff19 100644 --- a/modules/picard/createsequencedictionary/main.nf +++ b/modules/picard/createsequencedictionary/main.nf @@ -2,10 +2,10 @@ process PICARD_CREATESEQUENCEDICTIONARY { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::picard=2.27.1" : null) + conda (params.enable_conda ? "bioconda::picard=2.27.2" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/picard:2.27.1--hdfd78af_0' : - 'quay.io/biocontainers/picard:2.27.1--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/picard:2.27.2--hdfd78af_0' : + 'quay.io/biocontainers/picard:2.27.2--hdfd78af_0' }" input: tuple val(meta), path(fasta) diff --git a/modules/picard/crosscheckfingerprints/main.nf b/modules/picard/crosscheckfingerprints/main.nf index d3b59402..c7c4766c 100644 --- a/modules/picard/crosscheckfingerprints/main.nf +++ b/modules/picard/crosscheckfingerprints/main.nf @@ -2,10 +2,10 @@ process PICARD_CROSSCHECKFINGERPRINTS { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::picard=2.27.1" : null) + conda (params.enable_conda ? "bioconda::picard=2.27.2" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/picard:2.27.1--hdfd78af_0' : - 'quay.io/biocontainers/picard:2.27.1--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/picard:2.27.2--hdfd78af_0' : + 'quay.io/biocontainers/picard:2.27.2--hdfd78af_0' }" input: tuple val(meta), path(input1) diff --git a/modules/picard/filtersamreads/main.nf b/modules/picard/filtersamreads/main.nf index adedcdc2..e17c4686 100644 --- a/modules/picard/filtersamreads/main.nf +++ b/modules/picard/filtersamreads/main.nf @@ -2,10 +2,10 @@ process PICARD_FILTERSAMREADS { tag "$meta.id" label 'process_low' - conda (params.enable_conda ? "bioconda::picard=2.27.1" : null) + conda (params.enable_conda ? "bioconda::picard=2.27.2" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/picard:2.27.1--hdfd78af_0' : - 'quay.io/biocontainers/picard:2.27.1--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/picard:2.27.2--hdfd78af_0' : + 'quay.io/biocontainers/picard:2.27.2--hdfd78af_0' }" input: tuple val(meta), path(bam), path(readlist) diff --git a/modules/picard/fixmateinformation/main.nf b/modules/picard/fixmateinformation/main.nf index 539b1082..616acce8 100644 --- a/modules/picard/fixmateinformation/main.nf +++ b/modules/picard/fixmateinformation/main.nf @@ -2,10 +2,10 @@ process PICARD_FIXMATEINFORMATION { tag "$meta.id" label 'process_low' - conda (params.enable_conda ? "bioconda::picard=2.27.1" : null) + conda (params.enable_conda ? "bioconda::picard=2.27.2" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/picard:2.27.1--hdfd78af_0' : - 'quay.io/biocontainers/picard:2.27.1--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/picard:2.27.2--hdfd78af_0' : + 'quay.io/biocontainers/picard:2.27.2--hdfd78af_0' }" input: tuple val(meta), path(bam) diff --git a/modules/picard/liftovervcf/main.nf b/modules/picard/liftovervcf/main.nf index c92abfeb..a82f1466 100644 --- a/modules/picard/liftovervcf/main.nf +++ b/modules/picard/liftovervcf/main.nf @@ -2,10 +2,10 @@ process PICARD_LIFTOVERVCF { tag "$meta.id" label 'process_low' - conda (params.enable_conda ? "bioconda::picard=2.27.1" : null) + conda (params.enable_conda ? "bioconda::picard=2.27.2" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/picard:2.27.1--hdfd78af_0' : - 'quay.io/biocontainers/picard:2.27.1--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/picard:2.27.2--hdfd78af_0' : + 'quay.io/biocontainers/picard:2.27.2--hdfd78af_0' }" input: tuple val(meta), path(input_vcf) diff --git a/modules/picard/markduplicates/main.nf b/modules/picard/markduplicates/main.nf index 1565c647..87f913d4 100644 --- a/modules/picard/markduplicates/main.nf +++ b/modules/picard/markduplicates/main.nf @@ -2,10 +2,10 @@ process PICARD_MARKDUPLICATES { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::picard=2.27.1" : null) + conda (params.enable_conda ? "bioconda::picard=2.27.2" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/picard:2.27.1--hdfd78af_0' : - 'quay.io/biocontainers/picard:2.27.1--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/picard:2.27.2--hdfd78af_0' : + 'quay.io/biocontainers/picard:2.27.2--hdfd78af_0' }" input: tuple val(meta), path(bam) diff --git a/modules/picard/mergesamfiles/main.nf b/modules/picard/mergesamfiles/main.nf index 1e32c63a..d65f518c 100644 --- a/modules/picard/mergesamfiles/main.nf +++ b/modules/picard/mergesamfiles/main.nf @@ -2,10 +2,10 @@ process PICARD_MERGESAMFILES { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::picard=2.27.1" : null) + conda (params.enable_conda ? "bioconda::picard=2.27.2" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/picard:2.27.1--hdfd78af_0' : - 'quay.io/biocontainers/picard:2.27.1--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/picard:2.27.2--hdfd78af_0' : + 'quay.io/biocontainers/picard:2.27.2--hdfd78af_0' }" input: tuple val(meta), path(bams) diff --git a/modules/picard/sortsam/main.nf b/modules/picard/sortsam/main.nf index cee60fd1..5e09d907 100644 --- a/modules/picard/sortsam/main.nf +++ b/modules/picard/sortsam/main.nf @@ -2,10 +2,10 @@ process PICARD_SORTSAM { tag "$meta.id" label 'process_low' - conda (params.enable_conda ? "bioconda::picard=2.27.1" : null) + conda (params.enable_conda ? "bioconda::picard=2.27.2" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/picard:2.27.1--hdfd78af_0' : - 'quay.io/biocontainers/picard:2.27.1--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/picard:2.27.2--hdfd78af_0' : + 'quay.io/biocontainers/picard:2.27.2--hdfd78af_0' }" input: tuple val(meta), path(bam) diff --git a/modules/picard/sortvcf/main.nf b/modules/picard/sortvcf/main.nf index fb8dbb79..8f886393 100644 --- a/modules/picard/sortvcf/main.nf +++ b/modules/picard/sortvcf/main.nf @@ -2,10 +2,10 @@ process PICARD_SORTVCF { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::picard=2.27.1" : null) + conda (params.enable_conda ? "bioconda::picard=2.27.2" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/picard:2.27.1--hdfd78af_0' : - 'quay.io/biocontainers/picard:2.27.1--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/picard:2.27.2--hdfd78af_0' : + 'quay.io/biocontainers/picard:2.27.2--hdfd78af_0' }" input: tuple val(meta), path(vcf) From c67c15f243b6e590fb28275dbcc29f0d4ae6053c Mon Sep 17 00:00:00 2001 From: jtangrot Date: Mon, 30 May 2022 13:01:06 +0200 Subject: [PATCH 050/168] Make input tuple --- modules/vsearch/usearchglobal/main.nf | 3 +-- tests/modules/vsearch/usearchglobal/main.nf | 4 ++-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/modules/vsearch/usearchglobal/main.nf b/modules/vsearch/usearchglobal/main.nf index 804ef223..88d945ea 100644 --- a/modules/vsearch/usearchglobal/main.nf +++ b/modules/vsearch/usearchglobal/main.nf @@ -8,8 +8,7 @@ process VSEARCH_USEARCHGLOBAL { 'quay.io/biocontainers/vsearch:2.21.1--h95f258a_0' }" input: - val(meta) - path(queryfasta) + tuple val(meta), path(queryfasta) path db val idcutoff val outoption diff --git a/tests/modules/vsearch/usearchglobal/main.nf b/tests/modules/vsearch/usearchglobal/main.nf index f4afec32..90dc3191 100644 --- a/tests/modules/vsearch/usearchglobal/main.nf +++ b/tests/modules/vsearch/usearchglobal/main.nf @@ -11,7 +11,7 @@ workflow test_vsearch_usearchglobal { idcutoff = 0.985 outoption = "xcfert" // Nonsense text to check default case. columns = "" - VSEARCH_USEARCHGLOBAL ( [id:'test'], query, db, idcutoff, outoption, columns ) + VSEARCH_USEARCHGLOBAL ( [[id:'test'], query], db, idcutoff, outoption, columns ) } workflow test_vsearch_usearchglobal_userout { @@ -21,5 +21,5 @@ workflow test_vsearch_usearchglobal_userout { idcutoff = 0.985 outoption = "userout" columns = "query+target+id" - VSEARCH_USEARCHGLOBAL ( [id:'test'], query, db, idcutoff, outoption, columns ) + VSEARCH_USEARCHGLOBAL ( [[id:'test'], query], db, idcutoff, outoption, columns ) } From 69e00b55d39c2d4c1f4ef94fd5be972d8478c07c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jeanette=20T=C3=A5ngrot?= Date: Mon, 30 May 2022 13:03:45 +0200 Subject: [PATCH 051/168] Update modules/vsearch/usearchglobal/main.nf Co-authored-by: Daniel Straub <42973691+d4straub@users.noreply.github.com> --- modules/vsearch/usearchglobal/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/vsearch/usearchglobal/main.nf b/modules/vsearch/usearchglobal/main.nf index 804ef223..01e379d2 100644 --- a/modules/vsearch/usearchglobal/main.nf +++ b/modules/vsearch/usearchglobal/main.nf @@ -47,7 +47,7 @@ process VSEARCH_USEARCHGLOBAL { default: outfmt = "--alnout"; out_ext = 'aln'; - log.warn("Unknown output file format provided (${outoption}): selectingpairwise alignments (alnout)"); + log.warn("Unknown output file format provided (${outoption}): selecting pairwise alignments (alnout)"); break } """ From 6aac5c2c4099f950f43f9db7fa3b4b639c4c7ffc Mon Sep 17 00:00:00 2001 From: jtangrot Date: Mon, 30 May 2022 13:30:27 +0200 Subject: [PATCH 052/168] Update test --- tests/modules/vsearch/usearchglobal/test.yml | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/tests/modules/vsearch/usearchglobal/test.yml b/tests/modules/vsearch/usearchglobal/test.yml index 79bcd50e..a593f78f 100644 --- a/tests/modules/vsearch/usearchglobal/test.yml +++ b/tests/modules/vsearch/usearchglobal/test.yml @@ -5,7 +5,16 @@ - vsearch files: - path: output/vsearch/test.aln - md5sum: 2543c4147530dcb1ba4550d3fdb1502a + contains: + - "vsearch --usearch_global transcriptome.fasta --db genome.fasta --id 0.985 --threads 2 --alnout test.aln" + - "Query >lcl|MT192765.1_cds_QIK50427.1_2" + - "%Id TLen Target" + - "100% 29829 MT192765.1" + - "Query 3822nt >lcl|MT192765.1_cds_QIK50427.1_2" + - "Target 29829nt >MT192765.1" + - "Qry 21249 + CAACAGAGTTGTTATTTCTAGTGATGTTCTTGTTAACAACTAA 21291" + - "Tgt 21506 + CAACAGAGTTGTTATTTCTAGTGATGTTCTTGTTAACAACTAA 21548" + - "21291 cols, 21290 ids (100.0%), 1 gaps (0.0%)" - name: vsearch usearchglobal test_vsearch_usearchglobal_userout command: nextflow run ./tests/modules/vsearch/usearchglobal -entry test_vsearch_usearchglobal_userout -c ./tests/config/nextflow.config -c ./tests/modules/vsearch/usearchglobal/nextflow.config From 6ae405c5dab57ca9ce3761bfd6f39d6bb8df9e20 Mon Sep 17 00:00:00 2001 From: jtangrot Date: Mon, 30 May 2022 13:35:12 +0200 Subject: [PATCH 053/168] prettier --- tests/modules/vsearch/usearchglobal/test.yml | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/tests/modules/vsearch/usearchglobal/test.yml b/tests/modules/vsearch/usearchglobal/test.yml index a593f78f..227d728f 100644 --- a/tests/modules/vsearch/usearchglobal/test.yml +++ b/tests/modules/vsearch/usearchglobal/test.yml @@ -6,15 +6,15 @@ files: - path: output/vsearch/test.aln contains: - - "vsearch --usearch_global transcriptome.fasta --db genome.fasta --id 0.985 --threads 2 --alnout test.aln" - - "Query >lcl|MT192765.1_cds_QIK50427.1_2" - - "%Id TLen Target" - - "100% 29829 MT192765.1" - - "Query 3822nt >lcl|MT192765.1_cds_QIK50427.1_2" - - "Target 29829nt >MT192765.1" - - "Qry 21249 + CAACAGAGTTGTTATTTCTAGTGATGTTCTTGTTAACAACTAA 21291" - - "Tgt 21506 + CAACAGAGTTGTTATTTCTAGTGATGTTCTTGTTAACAACTAA 21548" - - "21291 cols, 21290 ids (100.0%), 1 gaps (0.0%)" + - "vsearch --usearch_global transcriptome.fasta --db genome.fasta --id 0.985 --threads 2 --alnout test.aln" + - "Query >lcl|MT192765.1_cds_QIK50427.1_2" + - "%Id TLen Target" + - "100% 29829 MT192765.1" + - "Query 3822nt >lcl|MT192765.1_cds_QIK50427.1_2" + - "Target 29829nt >MT192765.1" + - "Qry 21249 + CAACAGAGTTGTTATTTCTAGTGATGTTCTTGTTAACAACTAA 21291" + - "Tgt 21506 + CAACAGAGTTGTTATTTCTAGTGATGTTCTTGTTAACAACTAA 21548" + - "21291 cols, 21290 ids (100.0%), 1 gaps (0.0%)" - name: vsearch usearchglobal test_vsearch_usearchglobal_userout command: nextflow run ./tests/modules/vsearch/usearchglobal -entry test_vsearch_usearchglobal_userout -c ./tests/config/nextflow.config -c ./tests/modules/vsearch/usearchglobal/nextflow.config From d463cd5dd676738e95363e46302918d366a6b6d2 Mon Sep 17 00:00:00 2001 From: Sebastien Guizard Date: Mon, 30 May 2022 17:26:51 +0100 Subject: [PATCH 054/168] =?UTF-8?q?=F0=9F=91=8C=20IMPROVE:=20Remove=20last?= =?UTF-8?q?=20TODO?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- modules/bamtools/convert/meta.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/modules/bamtools/convert/meta.yml b/modules/bamtools/convert/meta.yml index 5796a4ab..dcd1cc65 100644 --- a/modules/bamtools/convert/meta.yml +++ b/modules/bamtools/convert/meta.yml @@ -42,7 +42,6 @@ output: type: file description: File containing software versions pattern: "versions.yml" - ## TODO nf-core: Delete / customise this example output - out: type: file description: The data in the asked format (bed, fasta, fastq, json, pileup, sam, yaml) From 7ffe98ac8bd3463087a3878c924c653330bfd0c9 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Mon, 30 May 2022 21:12:21 +0200 Subject: [PATCH 055/168] Add gatk/indelrealigner --- modules/gatk/indelrealigner/main.nf | 43 +++++++++++++ modules/gatk/indelrealigner/meta.yml | 63 +++++++++++++++++++ tests/config/pytest_modules.yml | 4 ++ tests/modules/gatk/indelrealigner/main.nf | 22 +++++++ .../gatk/indelrealigner/nextflow.config | 6 ++ tests/modules/gatk/indelrealigner/test.yml | 14 +++++ 6 files changed, 152 insertions(+) create mode 100644 modules/gatk/indelrealigner/main.nf create mode 100644 modules/gatk/indelrealigner/meta.yml create mode 100644 tests/modules/gatk/indelrealigner/main.nf create mode 100644 tests/modules/gatk/indelrealigner/nextflow.config create mode 100644 tests/modules/gatk/indelrealigner/test.yml diff --git a/modules/gatk/indelrealigner/main.nf b/modules/gatk/indelrealigner/main.nf new file mode 100644 index 00000000..117a298b --- /dev/null +++ b/modules/gatk/indelrealigner/main.nf @@ -0,0 +1,43 @@ +process GATK_INDELREALIGNER { + tag "$meta.id" + label 'process_low' + + conda (params.enable_conda ? "bioconda::gatk=3.5" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gatk:3.5--hdfd78af_11': + 'quay.io/biocontainers/gatk:3.5--hdfd78af_11' }" + + input: + tuple val(meta), path(bam), path(bai), path(intervals) + tuple val(meta), path(fasta) + tuple val(meta), path(known_vcf) + + output: + tuple val(meta), path("*.bam"), emit: bam + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + if ("$bam" == "${prefix}.bam") error "Input and output names are the same, set prefix in module configuration to disambiguate!" + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def known = known_vcf ? "-known ${known_vcf}" : "" + """ + gatk3 \\ + -T RealignerTargetCreator \\ + -R ${fasta} \\ + -nt ${task.cpus} + -I ${bam} \\ + -targetIntervals ${intervals} \\ + ${known} \\ + -o ${prefix}.bam \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk: \$(echo \$(gatk3 --version)) + END_VERSIONS + """ +} diff --git a/modules/gatk/indelrealigner/meta.yml b/modules/gatk/indelrealigner/meta.yml new file mode 100644 index 00000000..80abc6aa --- /dev/null +++ b/modules/gatk/indelrealigner/meta.yml @@ -0,0 +1,63 @@ +name: "gatk_indelrealigner" +description: Performs local realignment around indels to correct for mapping errors +keywords: + - bam + - vcf + - variant calling + - indel + - realignment +tools: + - "gatk": + description: "The full Genome Analysis Toolkit (GATK) framework, license restricted." + homepage: "https://gatk.broadinstitute.org/hc/en-us" + documentation: "https://github.com/broadinstitute/gatk-docs" + licence: "['https://software.broadinstitute.org/gatk/download/licensing', 'BSD', 'https://www.broadinstitute.org/gatk/about/#licensing']" + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: Sorted and indexed BAM/CRAM/SAM file + pattern: "*.bam" + - bai: + type: file + description: BAM index file + pattern: "*.bai" + - intervals: + type: file + description: Intervals file created by gatk3 RealignerTargetCreator + pattern: "*.{intervals,list}" + - fasta: + type: file + description: Reference file used to generate BAM file + pattern: ".{fasta,fa,fna}" + - known_vcf: + type: file + description: Optional input VCF file(s) with known indels + pattern: ".vcf" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - bam: + type: file + description: Sorted and indexed BAM/CRAM/SAM file with local realignment around variants + pattern: "*.bam" + - bai: + type: file + description: Output BAM Index file + pattern: "*.bai" + +authors: + - "@jfy133" diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index 3ff58b5c..8bb9d305 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -715,6 +715,10 @@ gamma/gamma: - modules/gamma/gamma/** - tests/modules/gamma/gamma/** +gatk/indelrealigner: + - modules/gatk/indelrealigner/** + - tests/modules/gatk/indelrealigner/** + gatk4/applybqsr: - modules/gatk4/applybqsr/** - tests/modules/gatk4/applybqsr/** diff --git a/tests/modules/gatk/indelrealigner/main.nf b/tests/modules/gatk/indelrealigner/main.nf new file mode 100644 index 00000000..687e12a9 --- /dev/null +++ b/tests/modules/gatk/indelrealigner/main.nf @@ -0,0 +1,22 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { GATK_INDELREALIGNER } from '../../../../modules/gatk/indelrealigner/main.nf' + +// TODO add REalignerTargetCrator + + +workflow test_gatk_indelrealigner { + + input = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['sarscov2']['illumina']['test_paired_end_bam'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_paired_end_bai'], checkIfExists: true), + GATK_REALIGNERTARGETCREATOR.out.intervals + ] + + reference = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + + GATK_INDELREALIGNER ( input, reference, [] ) +} diff --git a/tests/modules/gatk/indelrealigner/nextflow.config b/tests/modules/gatk/indelrealigner/nextflow.config new file mode 100644 index 00000000..2e6ae57f --- /dev/null +++ b/tests/modules/gatk/indelrealigner/nextflow.config @@ -0,0 +1,6 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + ext.prefix = "${meta.id}.realigned" + +} diff --git a/tests/modules/gatk/indelrealigner/test.yml b/tests/modules/gatk/indelrealigner/test.yml new file mode 100644 index 00000000..59fc394f --- /dev/null +++ b/tests/modules/gatk/indelrealigner/test.yml @@ -0,0 +1,14 @@ +## TODO nf-core: Please run the following command to build this file: +# nf-core modules create-test-yml gatk/indelrealigner +- name: "gatk indelrealigner" + command: nextflow run ./tests/modules/gatk/indelrealigner -entry test_gatk_indelrealigner -c ./tests/config/nextflow.config -c ./tests/modules/gatk/indelrealigner/nextflow.config + tags: + - "gatk" + # + - "gatk/indelrealigner" + # + files: + - path: "output/gatk/test.bam" + md5sum: e667c7caad0bc4b7ac383fd023c654fc + - path: output/gatk/versions.yml + md5sum: a01fe51bc4c6a3a6226fbf77b2c7cf3b From 723852bf3d3b12059b2f53da8bc055206f3019d7 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Tue, 31 May 2022 13:38:18 +0200 Subject: [PATCH 056/168] Fixes running without shortreads, and also ensures all tests produce non-empty fastq.gz files --- modules/filtlong/main.nf | 2 +- tests/modules/filtlong/nextflow.config | 2 ++ tests/modules/filtlong/test.yml | 12 ++++++------ 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/modules/filtlong/main.nf b/modules/filtlong/main.nf index 0e6fdd53..67500053 100644 --- a/modules/filtlong/main.nf +++ b/modules/filtlong/main.nf @@ -20,7 +20,7 @@ process FILTLONG { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def short_reads = meta.single_end ? "-1 $shortreads" : "-1 ${shortreads[0]} -2 ${shortreads[1]}" + def short_reads = !shortreads ? "" : meta.single_end ? "-1 $shortreads" : "-1 ${shortreads[0]} -2 ${shortreads[1]}" """ filtlong \\ $short_reads \\ diff --git a/tests/modules/filtlong/nextflow.config b/tests/modules/filtlong/nextflow.config index 8730f1c4..ec9854c2 100644 --- a/tests/modules/filtlong/nextflow.config +++ b/tests/modules/filtlong/nextflow.config @@ -2,4 +2,6 @@ process { publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + ext.args = "--min_length 10" + } diff --git a/tests/modules/filtlong/test.yml b/tests/modules/filtlong/test.yml index dc5fa5a9..9c465303 100644 --- a/tests/modules/filtlong/test.yml +++ b/tests/modules/filtlong/test.yml @@ -1,23 +1,23 @@ - name: filtlong test_filtlong - command: nextflow run ./tests/modules/filtlong -entry test_filtlong -c ./tests/config/nextflow.config -c ./tests/modules/filtlong/nextflow.config + command: nextflow run ./tests/modules/filtlong -entry test_filtlong -c ./tests/config/nextflow.config -c ./tests/modules/filtlong/nextflow.config tags: - filtlong files: - path: output/filtlong/test_lr_filtlong.fastq.gz - md5sum: 7029066c27ac6f5ef18d660d5741979a + md5sum: ff2b6e10fea0c45f10e8739a5bca25ed - name: filtlong test_filtlong_illumina_se - command: nextflow run ./tests/modules/filtlong -entry test_filtlong_illumina_se -c ./tests/config/nextflow.config -c ./tests/modules/filtlong/nextflow.config + command: nextflow run ./tests/modules/filtlong -entry test_filtlong_illumina_se -c ./tests/config/nextflow.config -c ./tests/modules/filtlong/nextflow.config tags: - filtlong files: - path: output/filtlong/test_lr_filtlong.fastq.gz - md5sum: 7029066c27ac6f5ef18d660d5741979a + md5sum: ff2b6e10fea0c45f10e8739a5bca25ed - name: filtlong test_filtlong_illumina_pe - command: nextflow run ./tests/modules/filtlong -entry test_filtlong_illumina_pe -c ./tests/config/nextflow.config -c ./tests/modules/filtlong/nextflow.config + command: nextflow run ./tests/modules/filtlong -entry test_filtlong_illumina_pe -c ./tests/config/nextflow.config -c ./tests/modules/filtlong/nextflow.config tags: - filtlong files: - path: output/filtlong/test_lr_filtlong.fastq.gz - md5sum: 7029066c27ac6f5ef18d660d5741979a + md5sum: ff2b6e10fea0c45f10e8739a5bca25ed From 39e34418c90f822f301ab4b2959ced36a4f70f05 Mon Sep 17 00:00:00 2001 From: SusiJo Date: Tue, 31 May 2022 14:34:50 +0200 Subject: [PATCH 057/168] added metrics as optional output --- modules/gatk4/markduplicatesspark/main.nf | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/modules/gatk4/markduplicatesspark/main.nf b/modules/gatk4/markduplicatesspark/main.nf index 8265e16f..eeaf0445 100644 --- a/modules/gatk4/markduplicatesspark/main.nf +++ b/modules/gatk4/markduplicatesspark/main.nf @@ -14,15 +14,16 @@ process GATK4_MARKDUPLICATES_SPARK { path dict output: - tuple val(meta), path("${prefix}"), emit: output - path "versions.yml" , emit: versions + tuple val(meta), path("*.bam"), emit: bam + tuple val(meta), path("*.metrics"), emit: metrics, optional: true + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when script: def args = task.ext.args ?: '' - prefix = task.ext.prefix ?: "${meta.id}" + def prefix = task.ext.prefix ?: "${meta.id}" + ".bam" def input_list = bam.collect{"--input $it"}.join(' ') From e0e8273f7462926b6e58e12183d2ca3484f9e07e Mon Sep 17 00:00:00 2001 From: SusiJo Date: Tue, 31 May 2022 14:35:18 +0200 Subject: [PATCH 058/168] adjusted input to use queryname sorted bams --- .../modules/gatk4/markduplicatesspark/main.nf | 21 ++++++++++--------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/tests/modules/gatk4/markduplicatesspark/main.nf b/tests/modules/gatk4/markduplicatesspark/main.nf index cd02c240..ac89b1b3 100644 --- a/tests/modules/gatk4/markduplicatesspark/main.nf +++ b/tests/modules/gatk4/markduplicatesspark/main.nf @@ -3,6 +3,7 @@ nextflow.enable.dsl = 2 include { GATK4_MARKDUPLICATES_SPARK } from '../../../../modules/gatk4/markduplicatesspark/main.nf' +include { GATK4_MARKDUPLICATES_SPARK as GATK4_MARKDUPLICATES_SPARK_METRICS } from '../../../../modules/gatk4/markduplicatesspark/main.nf' workflow test_gatk4_markduplicates_spark { input = [ [ id:'test', single_end:false ], // meta map @@ -18,8 +19,8 @@ workflow test_gatk4_markduplicates_spark { // chr 22 workflow test_gatk4_markduplicates_spark_multiple_bams { input = [ [ id:'test', single_end:false ], // meta map - [ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), - file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_bam'], checkIfExists: true) + [ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_name_sorted_bam'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_name_sorted_bam'], checkIfExists: true) ] ] fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) @@ -28,15 +29,15 @@ workflow test_gatk4_markduplicates_spark_multiple_bams { GATK4_MARKDUPLICATES_SPARK ( input, fasta, fai, dict ) } -// chr 21 -workflow test_gatk4_markduplicates_spark_multiple_bams_21 { +// chr 22 +workflow test_gatk4_markduplicates_spark_multiple_bams_metrics { input = [ [ id:'test', single_end:false ], // meta map - [ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), - file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_bam'], checkIfExists: true) + [ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_name_sorted_bam'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_name_sorted_bam'], checkIfExists: true) ] ] - fasta = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true) - fai = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta_fai'], checkIfExists: true) - dict = file(params.test_data['homo_sapiens']['genome']['genome_21_dict'], checkIfExists: true) + fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) + dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) - GATK4_MARKDUPLICATES_SPARK ( input, fasta, fai, dict ) + GATK4_MARKDUPLICATES_SPARK_METRICS ( input, fasta, fai, dict ) } From 3117b2973c44677217c2e128890949eba6c94451 Mon Sep 17 00:00:00 2001 From: SusiJo Date: Tue, 31 May 2022 14:35:29 +0200 Subject: [PATCH 059/168] added test for metrics --- tests/modules/gatk4/markduplicatesspark/nextflow.config | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/modules/gatk4/markduplicatesspark/nextflow.config b/tests/modules/gatk4/markduplicatesspark/nextflow.config index c66f00e6..495a42f3 100644 --- a/tests/modules/gatk4/markduplicatesspark/nextflow.config +++ b/tests/modules/gatk4/markduplicatesspark/nextflow.config @@ -2,7 +2,10 @@ process { publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + withName: GATK4_MARKDUPLICATES_SPARK_METRICS { + ext.args = '--metrics-file test.metrics' + } } -// necessary to override tests/config/nextflow.config +// override tests/config/nextflow.config docker.userEmulation = false From ea526051a44a8b25a943f6d58f789a0745b853d5 Mon Sep 17 00:00:00 2001 From: SusiJo Date: Tue, 31 May 2022 14:35:42 +0200 Subject: [PATCH 060/168] new test.yml --- .../gatk4/markduplicatesspark/test.yml | 32 +++++++++++-------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/tests/modules/gatk4/markduplicatesspark/test.yml b/tests/modules/gatk4/markduplicatesspark/test.yml index b0c0b40d..34fe0101 100644 --- a/tests/modules/gatk4/markduplicatesspark/test.yml +++ b/tests/modules/gatk4/markduplicatesspark/test.yml @@ -1,25 +1,31 @@ -- name: gatk4 markduplicates test_gatk4_markduplicates_spark - command: nextflow run tests/modules/gatk4/markduplicatesspark -entry test_gatk4_markduplicates_spark -c tests/config/nextflow.config -c ./tests/modules/gatk4/markduplicatesspark/nextflow.config +- name: gatk4 markduplicatesspark test_gatk4_markduplicates_spark + command: nextflow run ./tests/modules/gatk4/markduplicatesspark -entry test_gatk4_markduplicates_spark -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/markduplicatesspark/nextflow.config tags: - gatk4 - gatk4/markduplicatesspark files: - - path: output/gatk4/test.bai - md5sum: e9c125e82553209933883b4fe2b8d7c2 - path: output/gatk4/test.bam - md5sum: 2efd50b2e6b7fd9bdf242cd9e266cfa9 - - path: output/gatk4/test.metrics - - path: output/gatk4/versions.yml + md5sum: dc1a09ac6371aab7c50d1a554baa06d3 -- name: gatk4 markduplicates test_gatk4_markduplicates_spark_multiple_bams - command: nextflow run tests/modules/gatk4/markduplicatesspark -entry test_gatk4_markduplicates_spark_multiple_bams -c tests/config/nextflow.config -c ./tests/modules/gatk4/markduplicatesspark/nextflow.config +- name: gatk4 markduplicatesspark test_gatk4_markduplicates_spark_multiple_bams + command: nextflow run ./tests/modules/gatk4/markduplicatesspark -entry test_gatk4_markduplicates_spark_multiple_bams -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/markduplicatesspark/nextflow.config tags: - gatk4 - gatk4/markduplicatesspark files: - - path: output/gatk4/test.bai - md5sum: bad71df9c876e72a5bc0a3e0fd755f92 - path: output/gatk4/test.bam - md5sum: 8187febc6108ffef7f907e89b9c091a4 + md5sum: 898cb0a6616897d8ada90bab53bf0837 + +- name: gatk4 markduplicatesspark test_gatk4_markduplicates_spark_multiple_bams_metrics + command: nextflow run ./tests/modules/gatk4/markduplicatesspark -entry test_gatk4_markduplicates_spark_multiple_bams_metrics -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/markduplicatesspark/nextflow.config + tags: + - gatk4 + - gatk4/markduplicatesspark + files: + - path: output/gatk4/test.bam + md5sum: 898cb0a6616897d8ada90bab53bf0837 - path: output/gatk4/test.metrics - - path: output/gatk4/versions.yml + contains: '[ ## METRICS CLASS org.broadinstitute.hellbender.utils.read.markduplicates.GATKDuplicationMetrics + LIBRARY UNPAIRED_READS_EXAMINED READ_PAIRS_EXAMINED SECONDARY_OR_SUPPLEMENTARY_RDS UNMAPPED_READS UNPAIRED_READ_DUPLICATES READ_PAIR_DUPLICATES READ_PAIR_OPTICAL_DUPLICATESPERCENT_DUPLICATION ESTIMATED_LIBRARY_SIZE + testN 0 2820 2 2 0 828 0 0.293617 3807 + testT 0 2718 0 0 0 716 0 0.263429 4206 ]' From e083bfafa0b34f69ba1dfa00e38d58d0b565572b Mon Sep 17 00:00:00 2001 From: SusiJo Date: Tue, 31 May 2022 14:38:45 +0200 Subject: [PATCH 061/168] ran prettier --- tests/modules/gatk4/markduplicatesspark/test.yml | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tests/modules/gatk4/markduplicatesspark/test.yml b/tests/modules/gatk4/markduplicatesspark/test.yml index 34fe0101..a0631183 100644 --- a/tests/modules/gatk4/markduplicatesspark/test.yml +++ b/tests/modules/gatk4/markduplicatesspark/test.yml @@ -25,7 +25,8 @@ - path: output/gatk4/test.bam md5sum: 898cb0a6616897d8ada90bab53bf0837 - path: output/gatk4/test.metrics - contains: '[ ## METRICS CLASS org.broadinstitute.hellbender.utils.read.markduplicates.GATKDuplicationMetrics - LIBRARY UNPAIRED_READS_EXAMINED READ_PAIRS_EXAMINED SECONDARY_OR_SUPPLEMENTARY_RDS UNMAPPED_READS UNPAIRED_READ_DUPLICATES READ_PAIR_DUPLICATES READ_PAIR_OPTICAL_DUPLICATESPERCENT_DUPLICATION ESTIMATED_LIBRARY_SIZE - testN 0 2820 2 2 0 828 0 0.293617 3807 - testT 0 2718 0 0 0 716 0 0.263429 4206 ]' + contains: + "[ ## METRICS CLASS org.broadinstitute.hellbender.utils.read.markduplicates.GATKDuplicationMetrics + LIBRARY UNPAIRED_READS_EXAMINED READ_PAIRS_EXAMINED SECONDARY_OR_SUPPLEMENTARY_RDS UNMAPPED_READS UNPAIRED_READ_DUPLICATES READ_PAIR_DUPLICATES READ_PAIR_OPTICAL_DUPLICATESPERCENT_DUPLICATION ESTIMATED_LIBRARY_SIZE + testN 0 2820 2 2 0 828 0 0.293617 3807 + testT 0 2718 0 0 0 716 0 0.263429 4206 ]" From d484aa190636690a58941aa90f95a08010fa6f4f Mon Sep 17 00:00:00 2001 From: SusiJo Date: Tue, 31 May 2022 14:45:23 +0200 Subject: [PATCH 062/168] adjusted tabs --- tests/modules/gatk4/markduplicatesspark/test.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/modules/gatk4/markduplicatesspark/test.yml b/tests/modules/gatk4/markduplicatesspark/test.yml index a0631183..862b8a16 100644 --- a/tests/modules/gatk4/markduplicatesspark/test.yml +++ b/tests/modules/gatk4/markduplicatesspark/test.yml @@ -26,7 +26,7 @@ md5sum: 898cb0a6616897d8ada90bab53bf0837 - path: output/gatk4/test.metrics contains: - "[ ## METRICS CLASS org.broadinstitute.hellbender.utils.read.markduplicates.GATKDuplicationMetrics - LIBRARY UNPAIRED_READS_EXAMINED READ_PAIRS_EXAMINED SECONDARY_OR_SUPPLEMENTARY_RDS UNMAPPED_READS UNPAIRED_READ_DUPLICATES READ_PAIR_DUPLICATES READ_PAIR_OPTICAL_DUPLICATESPERCENT_DUPLICATION ESTIMATED_LIBRARY_SIZE - testN 0 2820 2 2 0 828 0 0.293617 3807 - testT 0 2718 0 0 0 716 0 0.263429 4206 ]" + "## METRICS CLASS\torg.broadinstitute.hellbender.utils.read.markduplicates.GATKDuplicationMetrics + LIBRARY\tUNPAIRED_READS_EXAMINED READ_PAIRS_EXAMINED\tSECONDARY_OR_SUPPLEMENTARY_RDS\tUNMAPPED_READS\tUNPAIRED_READ_DUPLICATES\tREAD_PAIR_DUPLICATES\tREAD_PAIR_OPTICAL_DUPLICATESPERCENT_DUPLICATION\tESTIMATED_LIBRARY_SIZE + testN\t0\t2820\t2\t2\t0\t828\t0\t0.293617\t3807 + testT\t0\t2718\t0\t0\t0\t716\t0\t0.263429\t4206" From 430def06faabbbbfe03da68661c329b85a1296ed Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Tue, 31 May 2022 14:45:41 +0200 Subject: [PATCH 063/168] Switch from md5sum to string due to conda flakiness --- tests/modules/filtlong/test.yml | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tests/modules/filtlong/test.yml b/tests/modules/filtlong/test.yml index 9c465303..93f847ec 100644 --- a/tests/modules/filtlong/test.yml +++ b/tests/modules/filtlong/test.yml @@ -4,7 +4,8 @@ - filtlong files: - path: output/filtlong/test_lr_filtlong.fastq.gz - md5sum: ff2b6e10fea0c45f10e8739a5bca25ed + contains: + - "@00068f7a-51b3-4933-8fc6-7d6e29181ff9" - name: filtlong test_filtlong_illumina_se command: nextflow run ./tests/modules/filtlong -entry test_filtlong_illumina_se -c ./tests/config/nextflow.config -c ./tests/modules/filtlong/nextflow.config @@ -12,7 +13,8 @@ - filtlong files: - path: output/filtlong/test_lr_filtlong.fastq.gz - md5sum: ff2b6e10fea0c45f10e8739a5bca25ed + contains: + - "@00068f7a-51b3-4933-8fc6-7d6e29181ff9" - name: filtlong test_filtlong_illumina_pe command: nextflow run ./tests/modules/filtlong -entry test_filtlong_illumina_pe -c ./tests/config/nextflow.config -c ./tests/modules/filtlong/nextflow.config @@ -20,4 +22,5 @@ - filtlong files: - path: output/filtlong/test_lr_filtlong.fastq.gz - md5sum: ff2b6e10fea0c45f10e8739a5bca25ed + contains: + - "@00068f7a-51b3-4933-8fc6-7d6e29181ff9" From 205576279fa63e7d9ffe3a2ec72eed6eb54b7476 Mon Sep 17 00:00:00 2001 From: SusiJo Date: Tue, 31 May 2022 14:50:55 +0200 Subject: [PATCH 064/168] ran prettier again --- tests/modules/gatk4/markduplicatesspark/test.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/modules/gatk4/markduplicatesspark/test.yml b/tests/modules/gatk4/markduplicatesspark/test.yml index 862b8a16..a240c65e 100644 --- a/tests/modules/gatk4/markduplicatesspark/test.yml +++ b/tests/modules/gatk4/markduplicatesspark/test.yml @@ -25,8 +25,7 @@ - path: output/gatk4/test.bam md5sum: 898cb0a6616897d8ada90bab53bf0837 - path: output/gatk4/test.metrics - contains: - "## METRICS CLASS\torg.broadinstitute.hellbender.utils.read.markduplicates.GATKDuplicationMetrics + contains: "## METRICS CLASS\torg.broadinstitute.hellbender.utils.read.markduplicates.GATKDuplicationMetrics LIBRARY\tUNPAIRED_READS_EXAMINED READ_PAIRS_EXAMINED\tSECONDARY_OR_SUPPLEMENTARY_RDS\tUNMAPPED_READS\tUNPAIRED_READ_DUPLICATES\tREAD_PAIR_DUPLICATES\tREAD_PAIR_OPTICAL_DUPLICATESPERCENT_DUPLICATION\tESTIMATED_LIBRARY_SIZE testN\t0\t2820\t2\t2\t0\t828\t0\t0.293617\t3807 testT\t0\t2718\t0\t0\t0\t716\t0\t0.263429\t4206" From 9ef09e6add67b9125c69c989c9d2b1f426ad6244 Mon Sep 17 00:00:00 2001 From: SusiJo Date: Tue, 31 May 2022 14:54:39 +0200 Subject: [PATCH 065/168] adjusted newlines --- tests/modules/gatk4/markduplicatesspark/test.yml | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/tests/modules/gatk4/markduplicatesspark/test.yml b/tests/modules/gatk4/markduplicatesspark/test.yml index a240c65e..ea7f7d4b 100644 --- a/tests/modules/gatk4/markduplicatesspark/test.yml +++ b/tests/modules/gatk4/markduplicatesspark/test.yml @@ -25,7 +25,4 @@ - path: output/gatk4/test.bam md5sum: 898cb0a6616897d8ada90bab53bf0837 - path: output/gatk4/test.metrics - contains: "## METRICS CLASS\torg.broadinstitute.hellbender.utils.read.markduplicates.GATKDuplicationMetrics - LIBRARY\tUNPAIRED_READS_EXAMINED READ_PAIRS_EXAMINED\tSECONDARY_OR_SUPPLEMENTARY_RDS\tUNMAPPED_READS\tUNPAIRED_READ_DUPLICATES\tREAD_PAIR_DUPLICATES\tREAD_PAIR_OPTICAL_DUPLICATESPERCENT_DUPLICATION\tESTIMATED_LIBRARY_SIZE - testN\t0\t2820\t2\t2\t0\t828\t0\t0.293617\t3807 - testT\t0\t2718\t0\t0\t0\t716\t0\t0.263429\t4206" + contains: "## METRICS CLASS\torg.broadinstitute.hellbender.utils.read.markduplicates.GATKDuplicationMetrics\nLIBRARY\tUNPAIRED_READS_EXAMINED READ_PAIRS_EXAMINED\tSECONDARY_OR_SUPPLEMENTARY_RDS\tUNMAPPED_READS\tUNPAIRED_READ_DUPLICATES\tREAD_PAIR_DUPLICATES\tREAD_PAIR_OPTICAL_DUPLICATESPERCENT_DUPLICATION\tESTIMATED_LIBRARY_SIZE\ntestN\t0\t2820\t2\t2\t0\t828\t0\t0.293617\t3807\ntestT\t0\t2718\t0\t0\t0\t716\t0\t0.263429\t4206" From 51b0a6e4f3b048a1eebeb86896fa0374771ca554 Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Tue, 31 May 2022 15:15:58 +0200 Subject: [PATCH 066/168] Added optional input of index files to bcftools/concat --- modules/bcftools/concat/main.nf | 2 +- modules/bcftools/concat/meta.yml | 5 +++++ tests/modules/bcftools/concat/main.nf | 18 +++++++++++++++--- tests/modules/bcftools/concat/test.yml | 15 ++++++++++++--- 4 files changed, 33 insertions(+), 7 deletions(-) diff --git a/modules/bcftools/concat/main.nf b/modules/bcftools/concat/main.nf index c0633ef7..257ee36f 100644 --- a/modules/bcftools/concat/main.nf +++ b/modules/bcftools/concat/main.nf @@ -8,7 +8,7 @@ process BCFTOOLS_CONCAT { 'quay.io/biocontainers/bcftools:1.14--h88f3f91_0' }" input: - tuple val(meta), path(vcfs) + tuple val(meta), path(vcfs), path(tbi) output: tuple val(meta), path("*.gz"), emit: vcf diff --git a/modules/bcftools/concat/meta.yml b/modules/bcftools/concat/meta.yml index 3984276f..167dbe5a 100644 --- a/modules/bcftools/concat/meta.yml +++ b/modules/bcftools/concat/meta.yml @@ -25,6 +25,11 @@ input: description: | List containing 2 or more vcf files e.g. [ 'file1.vcf', 'file2.vcf' ] + - tbi: + type: files + description: | + List containing 2 or more index files (optional) + e.g. [ 'file1.tbi', 'file2.tbi' ] output: - meta: type: map diff --git a/tests/modules/bcftools/concat/main.nf b/tests/modules/bcftools/concat/main.nf index 8441d488..777dc5b8 100644 --- a/tests/modules/bcftools/concat/main.nf +++ b/tests/modules/bcftools/concat/main.nf @@ -4,13 +4,25 @@ nextflow.enable.dsl = 2 include { BCFTOOLS_CONCAT } from '../../../../modules/bcftools/concat/main.nf' -workflow test_bcftools_concat { +workflow test_bcftools_concat_tbi { input = [ [ id:'test3' ], // meta map [ file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test2_vcf_gz'], checkIfExists: true) ] + file(params.test_data['sarscov2']['illumina']['test2_vcf_gz'], checkIfExists: true) ], + [ file(params.test_data['sarscov2']['illumina']['test_vcf_gz_tbi'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test2_vcf_gz_tbi'], checkIfExists: true) ] + ] + + BCFTOOLS_CONCAT ( input ) +} + +workflow test_bcftools_concat_no_tbi { + + input = [ [ id:'test3' ], // meta map + [ file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test2_vcf_gz'], checkIfExists: true) ], + [] ] - BCFTOOLS_CONCAT ( input ) } diff --git a/tests/modules/bcftools/concat/test.yml b/tests/modules/bcftools/concat/test.yml index 7d04ebc6..0d12badc 100644 --- a/tests/modules/bcftools/concat/test.yml +++ b/tests/modules/bcftools/concat/test.yml @@ -1,8 +1,17 @@ -- name: bcftools concat test_bcftools_concat - command: nextflow run ./tests/modules/bcftools/concat -entry test_bcftools_concat -c ./tests/config/nextflow.config -c ./tests/modules/bcftools/concat/nextflow.config +- name: bcftools concat test_bcftools_concat_tbi + command: nextflow run ./tests/modules/bcftools/concat -entry test_bcftools_concat_tbi -c ./tests/config/nextflow.config -c ./tests/modules/bcftools/concat/nextflow.config tags: - - bcftools/concat - bcftools + - bcftools/concat + files: + - path: output/bcftools/test3.vcf.gz + md5sum: 35c88bfaad20101062e98beb217d7137 + +- name: bcftools concat test_bcftools_concat_no_tbi + command: nextflow run ./tests/modules/bcftools/concat -entry test_bcftools_concat_no_tbi -c ./tests/config/nextflow.config -c ./tests/modules/bcftools/concat/nextflow.config + tags: + - bcftools + - bcftools/concat files: - path: output/bcftools/test3.vcf.gz md5sum: 35c88bfaad20101062e98beb217d7137 From 37b543e3e70c5474bbdcdb834bf9d51c52e743ee Mon Sep 17 00:00:00 2001 From: SusiJo Date: Tue, 31 May 2022 15:29:27 +0200 Subject: [PATCH 067/168] changed to tab --- tests/modules/gatk4/markduplicatesspark/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/modules/gatk4/markduplicatesspark/test.yml b/tests/modules/gatk4/markduplicatesspark/test.yml index ea7f7d4b..5ee751c4 100644 --- a/tests/modules/gatk4/markduplicatesspark/test.yml +++ b/tests/modules/gatk4/markduplicatesspark/test.yml @@ -25,4 +25,4 @@ - path: output/gatk4/test.bam md5sum: 898cb0a6616897d8ada90bab53bf0837 - path: output/gatk4/test.metrics - contains: "## METRICS CLASS\torg.broadinstitute.hellbender.utils.read.markduplicates.GATKDuplicationMetrics\nLIBRARY\tUNPAIRED_READS_EXAMINED READ_PAIRS_EXAMINED\tSECONDARY_OR_SUPPLEMENTARY_RDS\tUNMAPPED_READS\tUNPAIRED_READ_DUPLICATES\tREAD_PAIR_DUPLICATES\tREAD_PAIR_OPTICAL_DUPLICATESPERCENT_DUPLICATION\tESTIMATED_LIBRARY_SIZE\ntestN\t0\t2820\t2\t2\t0\t828\t0\t0.293617\t3807\ntestT\t0\t2718\t0\t0\t0\t716\t0\t0.263429\t4206" + contains: "## METRICS CLASS org.broadinstitute.hellbender.utils.read.markduplicates.GATKDuplicationMetrics" From 35e296cd7d52225fbceb4f928c86f1433203835e Mon Sep 17 00:00:00 2001 From: SusiJo Date: Tue, 31 May 2022 15:35:34 +0200 Subject: [PATCH 068/168] added brackets --- tests/modules/gatk4/markduplicatesspark/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/modules/gatk4/markduplicatesspark/test.yml b/tests/modules/gatk4/markduplicatesspark/test.yml index 5ee751c4..25c8ec38 100644 --- a/tests/modules/gatk4/markduplicatesspark/test.yml +++ b/tests/modules/gatk4/markduplicatesspark/test.yml @@ -25,4 +25,4 @@ - path: output/gatk4/test.bam md5sum: 898cb0a6616897d8ada90bab53bf0837 - path: output/gatk4/test.metrics - contains: "## METRICS CLASS org.broadinstitute.hellbender.utils.read.markduplicates.GATKDuplicationMetrics" + contains: '[ ## METRICS CLASS org.broadinstitute.hellbender.utils.read.markduplicates.GATKDuplicationMetrics ]' From 941760f75d04b863d0534c210b41505d64972e95 Mon Sep 17 00:00:00 2001 From: SusiJo Date: Tue, 31 May 2022 15:36:02 +0200 Subject: [PATCH 069/168] ran prettier --- tests/modules/gatk4/markduplicatesspark/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/modules/gatk4/markduplicatesspark/test.yml b/tests/modules/gatk4/markduplicatesspark/test.yml index 25c8ec38..9c235d7a 100644 --- a/tests/modules/gatk4/markduplicatesspark/test.yml +++ b/tests/modules/gatk4/markduplicatesspark/test.yml @@ -25,4 +25,4 @@ - path: output/gatk4/test.bam md5sum: 898cb0a6616897d8ada90bab53bf0837 - path: output/gatk4/test.metrics - contains: '[ ## METRICS CLASS org.broadinstitute.hellbender.utils.read.markduplicates.GATKDuplicationMetrics ]' + contains: "[ ## METRICS CLASS org.broadinstitute.hellbender.utils.read.markduplicates.GATKDuplicationMetrics ]" From 5513d095f5240a0240cd327e3b86fa619819d3d4 Mon Sep 17 00:00:00 2001 From: Anders Sune Pedersen Date: Tue, 31 May 2022 15:37:26 +0200 Subject: [PATCH 070/168] Adding tbi-files to the output from GATK4_MERGEVCFS. #1709 --- modules/gatk4/mergevcfs/main.nf | 1 + modules/gatk4/mergevcfs/meta.yml | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/modules/gatk4/mergevcfs/main.nf b/modules/gatk4/mergevcfs/main.nf index 964c1a3b..35930a6e 100644 --- a/modules/gatk4/mergevcfs/main.nf +++ b/modules/gatk4/mergevcfs/main.nf @@ -13,6 +13,7 @@ process GATK4_MERGEVCFS { output: tuple val(meta), path('*.vcf.gz'), emit: vcf + tuple val(meta), path("*.tbi") , emit: tbi path "versions.yml" , emit: versions when: diff --git a/modules/gatk4/mergevcfs/meta.yml b/modules/gatk4/mergevcfs/meta.yml index 8d4123d9..6110e944 100644 --- a/modules/gatk4/mergevcfs/meta.yml +++ b/modules/gatk4/mergevcfs/meta.yml @@ -35,6 +35,12 @@ output: type: file description: merged vcf file pattern: "*.vcf.gz" + - tbi: + type: file + description: index files for the merged vcf files + pattern: "*.tbi" + + - versions: type: file description: File containing software versions From 58389e550d9d091be164ac5fef440edb286e52c0 Mon Sep 17 00:00:00 2001 From: SusiJo Date: Tue, 31 May 2022 15:41:50 +0200 Subject: [PATCH 071/168] test json array --- tests/modules/gatk4/markduplicatesspark/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/modules/gatk4/markduplicatesspark/test.yml b/tests/modules/gatk4/markduplicatesspark/test.yml index 9c235d7a..24ff6d84 100644 --- a/tests/modules/gatk4/markduplicatesspark/test.yml +++ b/tests/modules/gatk4/markduplicatesspark/test.yml @@ -25,4 +25,4 @@ - path: output/gatk4/test.bam md5sum: 898cb0a6616897d8ada90bab53bf0837 - path: output/gatk4/test.metrics - contains: "[ ## METRICS CLASS org.broadinstitute.hellbender.utils.read.markduplicates.GATKDuplicationMetrics ]" + contains: "[ "## METRICS CLASS", "org.broadinstitute.hellbender.utils.read.markduplicates.GATKDuplicationMetrics" ]" From 3e20ca7e9a0400b3d878e6c846a3b3733bfb867c Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Tue, 31 May 2022 15:46:28 +0200 Subject: [PATCH 072/168] Don't hardcode forced output names --- tests/modules/filtlong/nextflow.config | 1 + tests/modules/filtlong/test.yml | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/modules/filtlong/nextflow.config b/tests/modules/filtlong/nextflow.config index ec9854c2..f0b90e67 100644 --- a/tests/modules/filtlong/nextflow.config +++ b/tests/modules/filtlong/nextflow.config @@ -3,5 +3,6 @@ process { publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } ext.args = "--min_length 10" + ext.prefix = "${meta.id}_lr" } diff --git a/tests/modules/filtlong/test.yml b/tests/modules/filtlong/test.yml index 93f847ec..87bd491b 100644 --- a/tests/modules/filtlong/test.yml +++ b/tests/modules/filtlong/test.yml @@ -3,7 +3,7 @@ tags: - filtlong files: - - path: output/filtlong/test_lr_filtlong.fastq.gz + - path: output/filtlong/test_lr.fastq.gz contains: - "@00068f7a-51b3-4933-8fc6-7d6e29181ff9" @@ -12,7 +12,7 @@ tags: - filtlong files: - - path: output/filtlong/test_lr_filtlong.fastq.gz + - path: output/filtlong/test_lr.fastq.gz contains: - "@00068f7a-51b3-4933-8fc6-7d6e29181ff9" @@ -21,6 +21,6 @@ tags: - filtlong files: - - path: output/filtlong/test_lr_filtlong.fastq.gz + - path: output/filtlong/test_lr.fastq.gz contains: - "@00068f7a-51b3-4933-8fc6-7d6e29181ff9" From 089f761f0bf79c4a486f1df9b6205f650196a2c1 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Tue, 31 May 2022 15:49:57 +0200 Subject: [PATCH 073/168] Actually add the changes --- modules/filtlong/main.nf | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/modules/filtlong/main.nf b/modules/filtlong/main.nf index 67500053..9dbf05b2 100644 --- a/modules/filtlong/main.nf +++ b/modules/filtlong/main.nf @@ -11,7 +11,7 @@ process FILTLONG { tuple val(meta), path(shortreads), path(longreads) output: - tuple val(meta), path("${meta.id}_lr_filtlong.fastq.gz"), emit: reads + tuple val(meta), path("*.fastq.gz"), emit: reads path "versions.yml" , emit: versions when: @@ -21,12 +21,13 @@ process FILTLONG { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" def short_reads = !shortreads ? "" : meta.single_end ? "-1 $shortreads" : "-1 ${shortreads[0]} -2 ${shortreads[1]}" + if ("$longreads" == "${prefix}.fastq.gz") error "Longread FASTQ input and output names are the same, set prefix in module configuration to disambiguate!" """ filtlong \\ $short_reads \\ $args \\ $longreads \\ - | gzip -n > ${prefix}_lr_filtlong.fastq.gz + | gzip -n > ${prefix}.fastq.gz cat <<-END_VERSIONS > versions.yml "${task.process}": From 84f584875e542e6b624ac751034af2ddab7860ef Mon Sep 17 00:00:00 2001 From: SusiJo Date: Tue, 31 May 2022 15:50:31 +0200 Subject: [PATCH 074/168] change string array --- tests/modules/gatk4/markduplicatesspark/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/modules/gatk4/markduplicatesspark/test.yml b/tests/modules/gatk4/markduplicatesspark/test.yml index 24ff6d84..21323ada 100644 --- a/tests/modules/gatk4/markduplicatesspark/test.yml +++ b/tests/modules/gatk4/markduplicatesspark/test.yml @@ -25,4 +25,4 @@ - path: output/gatk4/test.bam md5sum: 898cb0a6616897d8ada90bab53bf0837 - path: output/gatk4/test.metrics - contains: "[ "## METRICS CLASS", "org.broadinstitute.hellbender.utils.read.markduplicates.GATKDuplicationMetrics" ]" + contains: ["## METRICS CLASS", "org.broadinstitute.hellbender.utils.read.markduplicates.GATKDuplicationMetrics"] From 98bb45cf4e93801116d9435f2c1e644f91f0628d Mon Sep 17 00:00:00 2001 From: asp8200 Date: Tue, 31 May 2022 13:59:56 +0000 Subject: [PATCH 075/168] Updating test for gatk4/mergevcfs --- tests/modules/gatk4/mergevcfs/test.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/modules/gatk4/mergevcfs/test.yml b/tests/modules/gatk4/mergevcfs/test.yml index da2f7578..eb43a184 100644 --- a/tests/modules/gatk4/mergevcfs/test.yml +++ b/tests/modules/gatk4/mergevcfs/test.yml @@ -6,6 +6,8 @@ files: - path: output/gatk4/test.vcf.gz md5sum: 5b289bda88d3a3504f2e19ee8cff177c + - path: output/gatk4/test.vcf.gz.tbi + md5sum: a81673763b13086cfce9a23e72a35a16 - path: output/gatk4/versions.yml - name: gatk4 mergevcfs test_gatk4_mergevcfs_no_dict From 80366924fdd0b0183cebe2258f25807db5b93c10 Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Tue, 31 May 2022 16:08:13 +0200 Subject: [PATCH 076/168] added gatk4/composestrtablefile --- modules/gatk4/composestrtablefile/main.nf | 51 +++++++++++++++++++ modules/gatk4/composestrtablefile/meta.yml | 43 ++++++++++++++++ tests/config/pytest_modules.yml | 4 ++ .../modules/gatk4/composestrtablefile/main.nf | 16 ++++++ .../gatk4/composestrtablefile/nextflow.config | 5 ++ .../gatk4/composestrtablefile/test.yml | 8 +++ 6 files changed, 127 insertions(+) create mode 100644 modules/gatk4/composestrtablefile/main.nf create mode 100644 modules/gatk4/composestrtablefile/meta.yml create mode 100644 tests/modules/gatk4/composestrtablefile/main.nf create mode 100644 tests/modules/gatk4/composestrtablefile/nextflow.config create mode 100644 tests/modules/gatk4/composestrtablefile/test.yml diff --git a/modules/gatk4/composestrtablefile/main.nf b/modules/gatk4/composestrtablefile/main.nf new file mode 100644 index 00000000..a1114f2a --- /dev/null +++ b/modules/gatk4/composestrtablefile/main.nf @@ -0,0 +1,51 @@ +process GATK4_COMPOSESTRTABLEFILE { + tag "$fasta" + label 'process_low' + + conda (params.enable_conda ? "bioconda::gatk4=4.2.6.1" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gatk4:4.2.6.1--hdfd78af_0': + 'quay.io/biocontainers/gatk4:4.2.6.1--hdfd78af_0' }" + + input: + tuple path(fasta), path(fasta_fai), path(dict) + + output: + path "*.zip" , emit: str_table + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + + def avail_mem = 6 + if (!task.memory) { + log.info '[GATK ComposeSTRTableFile] Available memory not known - defaulting to 6GB. Specify process memory requirements to change this.' + } else { + avail_mem = task.memory.giga + } + """ + gatk --java-options "-Xmx${avail_mem}g" ComposeSTRTableFile \\ + --reference $fasta \\ + --output ${fasta.baseName}.zip \\ + --tmp-dir . \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ + + stub: + """ + touch test.zip + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/gatk4/composestrtablefile/meta.yml b/modules/gatk4/composestrtablefile/meta.yml new file mode 100644 index 00000000..eb825ef4 --- /dev/null +++ b/modules/gatk4/composestrtablefile/meta.yml @@ -0,0 +1,43 @@ +name: "gatk4_composestrtablefile" +description: This tool looks for low-complexity STR sequences along the reference that are later used to estimate the Dragstr model during single sample auto calibration CalibrateDragstrModel. +keywords: + - gatk4 + - composestrtablefile +tools: + - gatk4: + description: + Genome Analysis Toolkit (GATK4). Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/articles/4405451249819-ComposeSTRTableFile + tool_dev_url: https://github.com/broadinstitute/gatk + doi: 10.1158/1538-7445.AM2017-3590 + licence: ["Apache-2.0"] + +input: + - fasta: + type: file + description: FASTA reference file + pattern: "*.{fasta,fa}" + - fasta_fai: + type: file + description: index of the FASTA reference file + pattern: "*.fai" + - dict: + type: file + description: Sequence dictionary of the FASTA reference file + pattern: "*.dict" + +output: + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - str_table: + type: file + description: A zipped folder containing the STR table files + pattern: "*.zip" + +authors: + - "@nvnieuwk" diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index 3ff58b5c..bf4a6137 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -751,6 +751,10 @@ gatk4/combinegvcfs: - modules/gatk4/combinegvcfs/** - tests/modules/gatk4/combinegvcfs/** +gatk4/composestrtablefile: + - modules/gatk4/composestrtablefile/** + - tests/modules/gatk4/composestrtablefile/** + gatk4/createsequencedictionary: - modules/gatk4/createsequencedictionary/** - tests/modules/gatk4/createsequencedictionary/** diff --git a/tests/modules/gatk4/composestrtablefile/main.nf b/tests/modules/gatk4/composestrtablefile/main.nf new file mode 100644 index 00000000..9fc2e5f5 --- /dev/null +++ b/tests/modules/gatk4/composestrtablefile/main.nf @@ -0,0 +1,16 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { GATK4_COMPOSESTRTABLEFILE } from '../../../../modules/gatk4/composestrtablefile/main.nf' + +workflow test_gatk4_composestrtablefile { + + input = [ + file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true), + file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true), + file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) + ] + + GATK4_COMPOSESTRTABLEFILE ( input ) +} diff --git a/tests/modules/gatk4/composestrtablefile/nextflow.config b/tests/modules/gatk4/composestrtablefile/nextflow.config new file mode 100644 index 00000000..50f50a7a --- /dev/null +++ b/tests/modules/gatk4/composestrtablefile/nextflow.config @@ -0,0 +1,5 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + +} \ No newline at end of file diff --git a/tests/modules/gatk4/composestrtablefile/test.yml b/tests/modules/gatk4/composestrtablefile/test.yml new file mode 100644 index 00000000..3bafb348 --- /dev/null +++ b/tests/modules/gatk4/composestrtablefile/test.yml @@ -0,0 +1,8 @@ +- name: gatk4 composestrtablefile test_gatk4_composestrtablefile + command: nextflow run ./tests/modules/gatk4/composestrtablefile -entry test_gatk4_composestrtablefile -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/composestrtablefile/nextflow.config + tags: + - gatk4/composestrtablefile + - gatk4 + files: + - path: output/gatk4/genome.zip + contains: "[ # TODO nf-core: file md5sum was variable, please replace this text with a string found in the file instead ]" From ec41ea6082fcfaadcb8a6a4f02979144de86cd91 Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Tue, 31 May 2022 16:12:05 +0200 Subject: [PATCH 077/168] fix test.yml --- tests/modules/gatk4/composestrtablefile/test.yml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/modules/gatk4/composestrtablefile/test.yml b/tests/modules/gatk4/composestrtablefile/test.yml index 3bafb348..514ae79c 100644 --- a/tests/modules/gatk4/composestrtablefile/test.yml +++ b/tests/modules/gatk4/composestrtablefile/test.yml @@ -1,8 +1,7 @@ - name: gatk4 composestrtablefile test_gatk4_composestrtablefile command: nextflow run ./tests/modules/gatk4/composestrtablefile -entry test_gatk4_composestrtablefile -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/composestrtablefile/nextflow.config tags: - - gatk4/composestrtablefile - gatk4 + - gatk4/composestrtablefile files: - - path: output/gatk4/genome.zip - contains: "[ # TODO nf-core: file md5sum was variable, please replace this text with a string found in the file instead ]" + - path: output/gatk4/genome.zip \ No newline at end of file From 1166fb51505b39983af8ee16ecb382222eec22bb Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Tue, 31 May 2022 16:13:38 +0200 Subject: [PATCH 078/168] prettier --- tests/modules/gatk4/composestrtablefile/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/modules/gatk4/composestrtablefile/test.yml b/tests/modules/gatk4/composestrtablefile/test.yml index 514ae79c..597ae144 100644 --- a/tests/modules/gatk4/composestrtablefile/test.yml +++ b/tests/modules/gatk4/composestrtablefile/test.yml @@ -4,4 +4,4 @@ - gatk4 - gatk4/composestrtablefile files: - - path: output/gatk4/genome.zip \ No newline at end of file + - path: output/gatk4/genome.zip From 4199a05aeb0ec277d40cb112949bb85893310873 Mon Sep 17 00:00:00 2001 From: asp8200 Date: Tue, 31 May 2022 15:34:34 +0000 Subject: [PATCH 079/168] Removing redundant empty line --- modules/gatk4/mergevcfs/meta.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/modules/gatk4/mergevcfs/meta.yml b/modules/gatk4/mergevcfs/meta.yml index 6110e944..3ebce0b9 100644 --- a/modules/gatk4/mergevcfs/meta.yml +++ b/modules/gatk4/mergevcfs/meta.yml @@ -40,7 +40,6 @@ output: description: index files for the merged vcf files pattern: "*.tbi" - - versions: type: file description: File containing software versions From df22cd80df8bee4d3f1d5750026c26c560be7392 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Tue, 31 May 2022 20:07:52 +0200 Subject: [PATCH 080/168] Fix config for tests --- tests/modules/filtlong/nextflow.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/modules/filtlong/nextflow.config b/tests/modules/filtlong/nextflow.config index f0b90e67..5e4c9fbb 100644 --- a/tests/modules/filtlong/nextflow.config +++ b/tests/modules/filtlong/nextflow.config @@ -3,6 +3,6 @@ process { publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } ext.args = "--min_length 10" - ext.prefix = "${meta.id}_lr" + ext.prefix = "test_lr" } From c1ac3fbb592b8e19bd66110fbe77bd47785cce6d Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Tue, 31 May 2022 20:13:48 +0200 Subject: [PATCH 081/168] Skeleton --- modules/gatk/realignertargetcreator/main.nf | 75 +++++++++++++++++++ modules/gatk/realignertargetcreator/meta.yml | 51 +++++++++++++ tests/config/pytest_modules.yml | 4 + .../gatk/realignertargetcreator/main.nf | 15 ++++ .../realignertargetcreator/nextflow.config | 5 ++ .../gatk/realignertargetcreator/test.yml | 14 ++++ 6 files changed, 164 insertions(+) create mode 100644 modules/gatk/realignertargetcreator/main.nf create mode 100644 modules/gatk/realignertargetcreator/meta.yml create mode 100644 tests/modules/gatk/realignertargetcreator/main.nf create mode 100644 tests/modules/gatk/realignertargetcreator/nextflow.config create mode 100644 tests/modules/gatk/realignertargetcreator/test.yml diff --git a/modules/gatk/realignertargetcreator/main.nf b/modules/gatk/realignertargetcreator/main.nf new file mode 100644 index 00000000..59dee1a8 --- /dev/null +++ b/modules/gatk/realignertargetcreator/main.nf @@ -0,0 +1,75 @@ +// TODO nf-core: If in doubt look at other nf-core/modules to see how we are doing things! :) +// https://github.com/nf-core/modules/tree/master/modules +// You can also ask for help via your pull request or on the #modules channel on the nf-core Slack workspace: +// https://nf-co.re/join +// TODO nf-core: A module file SHOULD only define input and output files as command-line parameters. +// All other parameters MUST be provided using the "task.ext" directive, see here: +// https://www.nextflow.io/docs/latest/process.html#ext +// where "task.ext" is a string. +// Any parameters that need to be evaluated in the context of a particular sample +// e.g. single-end/paired-end data MUST also be defined and evaluated appropriately. +// TODO nf-core: Software that can be piped together SHOULD be added to separate module files +// unless there is a run-time, storage advantage in implementing in this way +// e.g. it's ok to have a single module for bwa to output BAM instead of SAM: +// bwa mem | samtools view -B -T ref.fasta +// TODO nf-core: Optional inputs are not currently supported by Nextflow. However, using an empty +// list (`[]`) instead of a file can be used to work around this issue. + +process GATK_REALIGNERTARGETCREATOR { + tag "$meta.id" + label 'process_low' + + // TODO nf-core: List required Conda package(s). + // Software MUST be pinned to channel (i.e. "bioconda"), version (i.e. "1.10"). + // For Conda, the build (i.e. "h9402c20_2") must be EXCLUDED to support installation on different operating systems. + // TODO nf-core: See section in main README for further information regarding finding and adding container addresses to the section below. + conda (params.enable_conda ? "bioconda::gatk=3.8" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gatk:3.8--hdfd78af_11': + 'quay.io/biocontainers/gatk:3.8--hdfd78af_11' }" + + input: + // TODO nf-core: Where applicable all sample-specific information e.g. "id", "single_end", "read_group" + // MUST be provided as an input via a Groovy Map called "meta". + // This information may not be required in some instances e.g. indexing reference genome files: + // https://github.com/nf-core/modules/blob/master/modules/bwa/index/main.nf + // TODO nf-core: Where applicable please provide/convert compressed files as input/output + // e.g. "*.fastq.gz" and NOT "*.fastq", "*.bam" and NOT "*.sam" etc. + tuple val(meta), path(bam) + + output: + // TODO nf-core: Named file extensions MUST be emitted for ALL output channels + tuple val(meta), path("*.bam"), emit: bam + // TODO nf-core: List additional required output channels/values here + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + // TODO nf-core: Where possible, a command MUST be provided to obtain the version number of the software e.g. 1.10 + // If the software is unable to output a version number on the command-line then it can be manually specified + // e.g. https://github.com/nf-core/modules/blob/master/modules/homer/annotatepeaks/main.nf + // Each software used MUST provide the software name and version number in the YAML version file (versions.yml) + // TODO nf-core: It MUST be possible to pass additional parameters to the tool as a command-line string via the "task.ext.args" directive + // TODO nf-core: If the tool supports multi-threading then you MUST provide the appropriate parameter + // using the Nextflow "task" variable e.g. "--threads $task.cpus" + // TODO nf-core: Please replace the example samtools command below with your module's command + // TODO nf-core: Please indent the command appropriately (4 spaces!!) to help with readability ;) + """ + samtools \\ + sort \\ + $args \\ + -@ $task.cpus \\ + -o ${prefix}.bam \\ + -T $prefix \\ + $bam + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//' )) + END_VERSIONS + """ +} diff --git a/modules/gatk/realignertargetcreator/meta.yml b/modules/gatk/realignertargetcreator/meta.yml new file mode 100644 index 00000000..a33db8f1 --- /dev/null +++ b/modules/gatk/realignertargetcreator/meta.yml @@ -0,0 +1,51 @@ +name: "gatk_realignertargetcreator" +## TODO nf-core: Add a description of the module and list keywords +description: write your description here +keywords: + - sort +tools: + - "gatk": + ## TODO nf-core: Add a description and other details for the software below + description: "The full Genome Analysis Toolkit (GATK) framework, license restricted." + homepage: "None" + documentation: "None" + tool_dev_url: "None" + doi: "" + licence: "['https://software.broadinstitute.org/gatk/download/licensing', 'https://www.broadinstitute.org/gatk/about/#licensing', 'BSD']" + +## TODO nf-core: Add a description of all of the variables used as input +input: + # Only when we have meta + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + # + ## TODO nf-core: Delete / customise this example input + - bam: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + +## TODO nf-core: Add a description of all of the variables used as output +output: + #Only when we have meta + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + # + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + ## TODO nf-core: Delete / customise this example output + - bam: + type: file + description: Sorted BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + +authors: + - "@jfy133" diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index 3ff58b5c..80dcb9ec 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -715,6 +715,10 @@ gamma/gamma: - modules/gamma/gamma/** - tests/modules/gamma/gamma/** +gatk/realignertargetcreator: + - modules/gatk/realignertargetcreator/** + - tests/modules/gatk/realignertargetcreator/** + gatk4/applybqsr: - modules/gatk4/applybqsr/** - tests/modules/gatk4/applybqsr/** diff --git a/tests/modules/gatk/realignertargetcreator/main.nf b/tests/modules/gatk/realignertargetcreator/main.nf new file mode 100644 index 00000000..63908069 --- /dev/null +++ b/tests/modules/gatk/realignertargetcreator/main.nf @@ -0,0 +1,15 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { GATK_REALIGNERTARGETCREATOR } from '../../../../modules/gatk/realignertargetcreator/main.nf' + +workflow test_gatk_realignertargetcreator { + + input = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['sarscov2']['illumina']['test_paired_end_bam'], checkIfExists: true) + ] + + GATK_REALIGNERTARGETCREATOR ( input ) +} diff --git a/tests/modules/gatk/realignertargetcreator/nextflow.config b/tests/modules/gatk/realignertargetcreator/nextflow.config new file mode 100644 index 00000000..50f50a7a --- /dev/null +++ b/tests/modules/gatk/realignertargetcreator/nextflow.config @@ -0,0 +1,5 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + +} \ No newline at end of file diff --git a/tests/modules/gatk/realignertargetcreator/test.yml b/tests/modules/gatk/realignertargetcreator/test.yml new file mode 100644 index 00000000..a45fbb93 --- /dev/null +++ b/tests/modules/gatk/realignertargetcreator/test.yml @@ -0,0 +1,14 @@ +## TODO nf-core: Please run the following command to build this file: +# nf-core modules create-test-yml gatk/realignertargetcreator +- name: "gatk realignertargetcreator" + command: nextflow run ./tests/modules/gatk/realignertargetcreator -entry test_gatk_realignertargetcreator -c ./tests/config/nextflow.config -c ./tests/modules/gatk/realignertargetcreator/nextflow.config + tags: + - "gatk" + # + - "gatk/realignertargetcreator" + # + files: + - path: "output/gatk/test.bam" + md5sum: e667c7caad0bc4b7ac383fd023c654fc + - path: output/gatk/versions.yml + md5sum: a01fe51bc4c6a3a6226fbf77b2c7cf3b From d39ba08e02b78001f851b737bc832203a60dc7f1 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Tue, 31 May 2022 20:19:49 +0200 Subject: [PATCH 082/168] Prepare main module code --- modules/gatk/realignertargetcreator/main.nf | 64 ++++++--------------- 1 file changed, 16 insertions(+), 48 deletions(-) diff --git a/modules/gatk/realignertargetcreator/main.nf b/modules/gatk/realignertargetcreator/main.nf index 59dee1a8..76ac2dd7 100644 --- a/modules/gatk/realignertargetcreator/main.nf +++ b/modules/gatk/realignertargetcreator/main.nf @@ -1,46 +1,19 @@ -// TODO nf-core: If in doubt look at other nf-core/modules to see how we are doing things! :) -// https://github.com/nf-core/modules/tree/master/modules -// You can also ask for help via your pull request or on the #modules channel on the nf-core Slack workspace: -// https://nf-co.re/join -// TODO nf-core: A module file SHOULD only define input and output files as command-line parameters. -// All other parameters MUST be provided using the "task.ext" directive, see here: -// https://www.nextflow.io/docs/latest/process.html#ext -// where "task.ext" is a string. -// Any parameters that need to be evaluated in the context of a particular sample -// e.g. single-end/paired-end data MUST also be defined and evaluated appropriately. -// TODO nf-core: Software that can be piped together SHOULD be added to separate module files -// unless there is a run-time, storage advantage in implementing in this way -// e.g. it's ok to have a single module for bwa to output BAM instead of SAM: -// bwa mem | samtools view -B -T ref.fasta -// TODO nf-core: Optional inputs are not currently supported by Nextflow. However, using an empty -// list (`[]`) instead of a file can be used to work around this issue. - process GATK_REALIGNERTARGETCREATOR { tag "$meta.id" label 'process_low' - // TODO nf-core: List required Conda package(s). - // Software MUST be pinned to channel (i.e. "bioconda"), version (i.e. "1.10"). - // For Conda, the build (i.e. "h9402c20_2") must be EXCLUDED to support installation on different operating systems. - // TODO nf-core: See section in main README for further information regarding finding and adding container addresses to the section below. - conda (params.enable_conda ? "bioconda::gatk=3.8" : null) + conda (params.enable_conda ? "bioconda::gatk=3.5" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk:3.8--hdfd78af_11': - 'quay.io/biocontainers/gatk:3.8--hdfd78af_11' }" + 'https://depot.galaxyproject.org/singularity/gatk:3.5--hdfd78af_11': + 'quay.io/biocontainers/gatk:3.5--hdfd78af_11' }" input: - // TODO nf-core: Where applicable all sample-specific information e.g. "id", "single_end", "read_group" - // MUST be provided as an input via a Groovy Map called "meta". - // This information may not be required in some instances e.g. indexing reference genome files: - // https://github.com/nf-core/modules/blob/master/modules/bwa/index/main.nf - // TODO nf-core: Where applicable please provide/convert compressed files as input/output - // e.g. "*.fastq.gz" and NOT "*.fastq", "*.bam" and NOT "*.sam" etc. tuple val(meta), path(bam) + tuple val(meta), path(reference) + tuple val(meta), path(known_vcf) output: - // TODO nf-core: Named file extensions MUST be emitted for ALL output channels tuple val(meta), path("*.bam"), emit: bam - // TODO nf-core: List additional required output channels/values here path "versions.yml" , emit: versions when: @@ -49,27 +22,22 @@ process GATK_REALIGNERTARGETCREATOR { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - // TODO nf-core: Where possible, a command MUST be provided to obtain the version number of the software e.g. 1.10 - // If the software is unable to output a version number on the command-line then it can be manually specified - // e.g. https://github.com/nf-core/modules/blob/master/modules/homer/annotatepeaks/main.nf - // Each software used MUST provide the software name and version number in the YAML version file (versions.yml) - // TODO nf-core: It MUST be possible to pass additional parameters to the tool as a command-line string via the "task.ext.args" directive - // TODO nf-core: If the tool supports multi-threading then you MUST provide the appropriate parameter - // using the Nextflow "task" variable e.g. "--threads $task.cpus" - // TODO nf-core: Please replace the example samtools command below with your module's command - // TODO nf-core: Please indent the command appropriately (4 spaces!!) to help with readability ;) + def known = known_vcf ? "-known ${known_vcf}" ? "" + if ("$bam" == "${prefix}.bam") error "Input and output names are the same, set prefix in module configuration to disambiguate!" + """ - samtools \\ - sort \\ - $args \\ - -@ $task.cpus \\ + gatk3 \\ + -T RealigerTargetCreator \\ + -nt ${task.cpus} + -I ${bam} \\ + -R ${reference} \\ -o ${prefix}.bam \\ - -T $prefix \\ - $bam + ${known} \\ + $args cat <<-END_VERSIONS > versions.yml "${task.process}": - gatk: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//' )) + gatk: \$(echo \$(gatk3 --version)) END_VERSIONS """ } From 7633d7816b10d8bd1fb97a748ee1006dd0ea5d09 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Tue, 31 May 2022 20:35:54 +0200 Subject: [PATCH 083/168] Add GATK(3)/realignertargetcreator --- modules/gatk/realignertargetcreator/main.nf | 20 +++---- modules/gatk/realignertargetcreator/meta.yml | 57 ++++++++++++------- .../gatk/realignertargetcreator/main.nf | 12 +++- .../gatk/realignertargetcreator/test.yml | 18 ++---- 4 files changed, 60 insertions(+), 47 deletions(-) diff --git a/modules/gatk/realignertargetcreator/main.nf b/modules/gatk/realignertargetcreator/main.nf index 76ac2dd7..e3a03a5f 100644 --- a/modules/gatk/realignertargetcreator/main.nf +++ b/modules/gatk/realignertargetcreator/main.nf @@ -8,13 +8,13 @@ process GATK_REALIGNERTARGETCREATOR { 'quay.io/biocontainers/gatk:3.5--hdfd78af_11' }" input: - tuple val(meta), path(bam) - tuple val(meta), path(reference) - tuple val(meta), path(known_vcf) + tuple val(meta), path(bam), path(bai) + tuple path(fasta), path(fasta_fai), path(fasta_dict) + path(known_vcf) output: - tuple val(meta), path("*.bam"), emit: bam - path "versions.yml" , emit: versions + tuple val(meta), path("*.intervals"), emit: intervals + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -22,16 +22,16 @@ process GATK_REALIGNERTARGETCREATOR { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def known = known_vcf ? "-known ${known_vcf}" ? "" + def known = known_vcf ? "-known ${known_vcf}" : "" if ("$bam" == "${prefix}.bam") error "Input and output names are the same, set prefix in module configuration to disambiguate!" """ gatk3 \\ - -T RealigerTargetCreator \\ - -nt ${task.cpus} + -T RealignerTargetCreator \\ + -nt ${task.cpus} \\ -I ${bam} \\ - -R ${reference} \\ - -o ${prefix}.bam \\ + -R ${fasta} \\ + -o ${prefix}.intervals \\ ${known} \\ $args diff --git a/modules/gatk/realignertargetcreator/meta.yml b/modules/gatk/realignertargetcreator/meta.yml index a33db8f1..70df7b78 100644 --- a/modules/gatk/realignertargetcreator/meta.yml +++ b/modules/gatk/realignertargetcreator/meta.yml @@ -1,51 +1,64 @@ name: "gatk_realignertargetcreator" -## TODO nf-core: Add a description of the module and list keywords -description: write your description here +description: Generates a list of locations that should be considered for local realignment prior genotyping. keywords: - - sort + - bam + - vcf + - variant calling + - indel + - realignment + - targets tools: - "gatk": - ## TODO nf-core: Add a description and other details for the software below description: "The full Genome Analysis Toolkit (GATK) framework, license restricted." - homepage: "None" - documentation: "None" - tool_dev_url: "None" - doi: "" - licence: "['https://software.broadinstitute.org/gatk/download/licensing', 'https://www.broadinstitute.org/gatk/about/#licensing', 'BSD']" + homepage: "https://gatk.broadinstitute.org/hc/en-us" + documentation: "https://github.com/broadinstitute/gatk-docs" + licence: "['https://software.broadinstitute.org/gatk/download/licensing', 'BSD', 'https://www.broadinstitute.org/gatk/about/#licensing']" -## TODO nf-core: Add a description of all of the variables used as input input: - # Only when we have meta - meta: type: map description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - # - ## TODO nf-core: Delete / customise this example input - bam: type: file - description: BAM/CRAM/SAM file - pattern: "*.{bam,cram,sam}" + description: Sorted and indexed BAM/CRAM/SAM file + pattern: "*.bam" + - bai: + type: file + description: BAM index file + pattern: "*.bai" + - fasta: + type: file + description: Reference file used to generate BAM file + pattern: ".{fasta,fa,fna}" + - fasta_fai: + type: file + description: Index of reference file used to generate BAM file + pattern: ".fai" + - dict: + type: file + description: GATK dict file for reference + pattern: ".dict" + - known_vcf: + type: file + description: Optional input VCF file(s) with known indels + pattern: ".vcf" -## TODO nf-core: Add a description of all of the variables used as output output: - #Only when we have meta - meta: type: map description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - # - versions: type: file description: File containing software versions pattern: "versions.yml" - ## TODO nf-core: Delete / customise this example output - - bam: + - intervals: type: file - description: Sorted BAM/CRAM/SAM file - pattern: "*.{bam,cram,sam}" + description: File containg intervals that represent sites of extant and potential indels. + pattern: "*.intervals" authors: - "@jfy133" diff --git a/tests/modules/gatk/realignertargetcreator/main.nf b/tests/modules/gatk/realignertargetcreator/main.nf index 63908069..02e62d93 100644 --- a/tests/modules/gatk/realignertargetcreator/main.nf +++ b/tests/modules/gatk/realignertargetcreator/main.nf @@ -5,11 +5,17 @@ nextflow.enable.dsl = 2 include { GATK_REALIGNERTARGETCREATOR } from '../../../../modules/gatk/realignertargetcreator/main.nf' workflow test_gatk_realignertargetcreator { - + input = [ [ id:'test', single_end:false ], // meta map - file(params.test_data['sarscov2']['illumina']['test_paired_end_bam'], checkIfExists: true) + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true) + ] + reference = [ + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true), + file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true), + file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true), ] - GATK_REALIGNERTARGETCREATOR ( input ) + GATK_REALIGNERTARGETCREATOR ( input, reference, [] ) } diff --git a/tests/modules/gatk/realignertargetcreator/test.yml b/tests/modules/gatk/realignertargetcreator/test.yml index a45fbb93..0e247013 100644 --- a/tests/modules/gatk/realignertargetcreator/test.yml +++ b/tests/modules/gatk/realignertargetcreator/test.yml @@ -1,14 +1,8 @@ -## TODO nf-core: Please run the following command to build this file: -# nf-core modules create-test-yml gatk/realignertargetcreator -- name: "gatk realignertargetcreator" - command: nextflow run ./tests/modules/gatk/realignertargetcreator -entry test_gatk_realignertargetcreator -c ./tests/config/nextflow.config -c ./tests/modules/gatk/realignertargetcreator/nextflow.config +- name: gatk realignertargetcreator test_gatk_realignertargetcreator + command: nextflow run ./tests/modules/gatk/realignertargetcreator -entry test_gatk_realignertargetcreator -c ./tests/config/nextflow.config -c ./tests/modules/gatk/realignertargetcreator/nextflow.config tags: - - "gatk" - # - - "gatk/realignertargetcreator" - # + - gatk + - gatk/realignertargetcreator files: - - path: "output/gatk/test.bam" - md5sum: e667c7caad0bc4b7ac383fd023c654fc - - path: output/gatk/versions.yml - md5sum: a01fe51bc4c6a3a6226fbf77b2c7cf3b + - path: output/gatk/test.intervals + md5sum: 7aa7a1b235a510e6591e262382086bf8 From 5b74a179c3323d683065e820b2fb5a80fdaedcb7 Mon Sep 17 00:00:00 2001 From: nvnieuwk <101190534+nvnieuwk@users.noreply.github.com> Date: Wed, 1 Jun 2022 09:14:08 +0200 Subject: [PATCH 084/168] Update modules/gatk4/composestrtablefile/main.nf Co-authored-by: FriederikeHanssen --- modules/gatk4/composestrtablefile/main.nf | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/modules/gatk4/composestrtablefile/main.nf b/modules/gatk4/composestrtablefile/main.nf index a1114f2a..dae61166 100644 --- a/modules/gatk4/composestrtablefile/main.nf +++ b/modules/gatk4/composestrtablefile/main.nf @@ -8,7 +8,9 @@ process GATK4_COMPOSESTRTABLEFILE { 'quay.io/biocontainers/gatk4:4.2.6.1--hdfd78af_0' }" input: - tuple path(fasta), path(fasta_fai), path(dict) + path(fasta) + path(fasta_fai) + path(dict) output: path "*.zip" , emit: str_table From d4f3df4677fb0aff66db16c896e59fc4ef93a01b Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Wed, 1 Jun 2022 09:17:05 +0200 Subject: [PATCH 085/168] Updated the test --- tests/modules/gatk4/composestrtablefile/main.nf | 14 ++++++++++---- tests/modules/gatk4/composestrtablefile/test.yml | 2 +- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/tests/modules/gatk4/composestrtablefile/main.nf b/tests/modules/gatk4/composestrtablefile/main.nf index 9fc2e5f5..da8967cc 100644 --- a/tests/modules/gatk4/composestrtablefile/main.nf +++ b/tests/modules/gatk4/composestrtablefile/main.nf @@ -6,11 +6,17 @@ include { GATK4_COMPOSESTRTABLEFILE } from '../../../../modules/gatk4/composestr workflow test_gatk4_composestrtablefile { - input = [ - file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true), - file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true), + fasta = [ + file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + ] + + fasta_fai = [ + file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) + ] + + dict = [ file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) ] - GATK4_COMPOSESTRTABLEFILE ( input ) + GATK4_COMPOSESTRTABLEFILE ( fasta, fasta_fai, dict ) } diff --git a/tests/modules/gatk4/composestrtablefile/test.yml b/tests/modules/gatk4/composestrtablefile/test.yml index 597ae144..0a87372a 100644 --- a/tests/modules/gatk4/composestrtablefile/test.yml +++ b/tests/modules/gatk4/composestrtablefile/test.yml @@ -1,7 +1,7 @@ - name: gatk4 composestrtablefile test_gatk4_composestrtablefile command: nextflow run ./tests/modules/gatk4/composestrtablefile -entry test_gatk4_composestrtablefile -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/composestrtablefile/nextflow.config tags: - - gatk4 - gatk4/composestrtablefile + - gatk4 files: - path: output/gatk4/genome.zip From 8470f3cb7e790701401523c58a72e183ac47ecac Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Wed, 1 Jun 2022 09:20:20 +0200 Subject: [PATCH 086/168] linting --- tests/modules/gatk4/composestrtablefile/main.nf | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/tests/modules/gatk4/composestrtablefile/main.nf b/tests/modules/gatk4/composestrtablefile/main.nf index da8967cc..8fccadb9 100644 --- a/tests/modules/gatk4/composestrtablefile/main.nf +++ b/tests/modules/gatk4/composestrtablefile/main.nf @@ -6,17 +6,11 @@ include { GATK4_COMPOSESTRTABLEFILE } from '../../../../modules/gatk4/composestr workflow test_gatk4_composestrtablefile { - fasta = [ - file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) - ] + fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) - fasta_fai = [ - file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) - ] + fasta_fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) - dict = [ - file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) - ] + dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) GATK4_COMPOSESTRTABLEFILE ( fasta, fasta_fai, dict ) } From 114a54c8d5a8e898a126c2804e3e221286eb2682 Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Wed, 1 Jun 2022 09:22:09 +0200 Subject: [PATCH 087/168] spaces fix --- modules/gatk4/composestrtablefile/main.nf | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/gatk4/composestrtablefile/main.nf b/modules/gatk4/composestrtablefile/main.nf index dae61166..8f2f00f2 100644 --- a/modules/gatk4/composestrtablefile/main.nf +++ b/modules/gatk4/composestrtablefile/main.nf @@ -8,9 +8,9 @@ process GATK4_COMPOSESTRTABLEFILE { 'quay.io/biocontainers/gatk4:4.2.6.1--hdfd78af_0' }" input: - path(fasta) - path(fasta_fai) - path(dict) + path(fasta) + path(fasta_fai) + path(dict) output: path "*.zip" , emit: str_table From f6953b51478d24a4caf84b40d92bba3350a38507 Mon Sep 17 00:00:00 2001 From: SusiJo Date: Wed, 1 Jun 2022 17:35:02 +0200 Subject: [PATCH 088/168] adjust cnvkit/batch for germline mode --- modules/cnvkit/batch/main.nf | 38 +++++++++++------ tests/config/test_data.config | 6 +-- tests/modules/cnvkit/batch/main.nf | 22 ++++++++-- tests/modules/cnvkit/batch/test.yml | 63 ++++++++++++++++++++++++++--- 4 files changed, 104 insertions(+), 25 deletions(-) diff --git a/modules/cnvkit/batch/main.nf b/modules/cnvkit/batch/main.nf index c1f9ef87..b467c6f2 100644 --- a/modules/cnvkit/batch/main.nf +++ b/modules/cnvkit/batch/main.nf @@ -28,34 +28,48 @@ process CNVKIT_BATCH { script: def args = task.ext.args ?: '' - // execute samtools only when cram files are input, cnvkit runs natively on bam but is prohibitively slow - // input pair is assumed to have same extension if both exist - def is_cram = tumor.Extension == "cram" ? true : false - def tumor_out = is_cram ? tumor.BaseName + ".bam" : "${tumor}" + def tumor_exists = tumor ? true : false + def normal_exists = normal ? true : false + + // execute samtools only when cram files are input, cnvkit runs natively on bam but is prohibitively slow + def tumor_cram = tumor_exists && tumor.Extension == "cram" ? true : false + def normal_cram = normal_exists && normal.Extension == "cram" ? true : false + + def tumor_out = tumor_cram ? tumor.BaseName + ".bam" : "${tumor}" - // do not run samtools on normal samples in tumor_only mode - def normal_exists = normal ? true: false // tumor_only mode does not need fasta & target // instead it requires a pre-computed reference.cnn which is built from fasta & target def (normal_out, normal_args, fasta_args) = ["", "", ""] if (normal_exists){ def normal_prefix = normal.BaseName - normal_out = is_cram ? "${normal_prefix}" + ".bam" : "${normal}" - normal_args = normal_prefix ? "--normal $normal_out" : "" + normal_out = normal_cram ? "${normal_prefix}" + ".bam" : "${normal}" fasta_args = fasta ? "--fasta $fasta" : "" + + // germline mode + // normal samples must be input without a flag + // requires flag --normal to be empty + if(!tumor_exists){ + tumor_out = normal.BaseName + ".bam" + normal_args = "--normal " + } + // somatic mode + else { + normal_args = normal_prefix ? "--normal $normal_out" : "" + } } def target_args = targets ? "--targets $targets" : "" def reference_args = reference ? "--reference $reference" : "" """ - if $is_cram; then + if $tumor_cram; then samtools view -T $fasta $tumor -@ $task.cpus -o $tumor_out - if $normal_exists; then - samtools view -T $fasta $normal -@ $task.cpus -o $normal_out - fi fi + if $normal_cram; then + samtools view -T $fasta $normal -@ $task.cpus -o $normal_out + fi + cnvkit.py \\ batch \\ diff --git a/tests/config/test_data.config b/tests/config/test_data.config index cf7d45f6..bef6f0c9 100644 --- a/tests/config/test_data.config +++ b/tests/config/test_data.config @@ -23,6 +23,8 @@ params { test_bed12 = "${test_data_dir}/genomics/sarscov2/genome/bed/test.bed12" baits_bed = "${test_data_dir}/genomics/sarscov2/genome/bed/baits.bed" + reference_cnn = "${test_data_dir}/genomics/sarscov2/genome/cnn/reference.cnn" + kraken2 = "${test_data_dir}/genomics/sarscov2/genome/db/kraken2" kraken2_tar_gz = "${test_data_dir}/genomics/sarscov2/genome/db/kraken2.tar.gz" @@ -145,6 +147,7 @@ params { genome_21_multi_interval_bed_gz = "${test_data_dir}/genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.bed.gz" genome_21_multi_interval_bed_gz_tbi = "${test_data_dir}/genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.bed.gz.tbi" genome_21_chromosomes_dir = "${test_data_dir}/genomics/homo_sapiens/genome/chr21/sequence/chromosomes.tar.gz" + genome_21_reference_cnn = "${test_data_dir}/genomics/homo_sapiens/genome/chr21/sequence/reference_chr21.cnn" dbsnp_146_hg38_elsites = "${test_data_dir}/genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.elsites" dbsnp_146_hg38_vcf_gz = "${test_data_dir}/genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz" @@ -414,9 +417,6 @@ params { 'txt' { hello = "${test_data_dir}/generic/txt/hello.txt" } - 'cnn' { - reference = "${test_data_dir}/generic/cnn/reference.cnn" - } 'cooler'{ test_pairix_pair_gz = "${test_data_dir}/genomics/homo_sapiens/cooler/cload/hg19/hg19.GM12878-MboI.pairs.subsample.blksrt.txt.gz" test_pairix_pair_gz_px2 = "${test_data_dir}/genomics/homo_sapiens/cooler/cload/hg19/hg19.GM12878-MboI.pairs.subsample.blksrt.txt.gz.px2" diff --git a/tests/modules/cnvkit/batch/main.nf b/tests/modules/cnvkit/batch/main.nf index 988ed3ad..c93e3a00 100755 --- a/tests/modules/cnvkit/batch/main.nf +++ b/tests/modules/cnvkit/batch/main.nf @@ -5,6 +5,7 @@ nextflow.enable.dsl = 2 include { CNVKIT_BATCH as CNVKIT_HYBRID } from '../../../../modules/cnvkit/batch/main.nf' include { CNVKIT_BATCH as CNVKIT_WGS } from '../../../../modules/cnvkit/batch/main.nf' include { CNVKIT_BATCH as CNVKIT_TUMORONLY } from '../../../../modules/cnvkit/batch/main.nf' +include { CNVKIT_BATCH as CNVKIT_GERMLINE } from '../../../../modules/cnvkit/batch/main.nf' workflow test_cnvkit_hybrid { @@ -47,10 +48,10 @@ workflow test_cnvkit_tumoronly { input = [ [ id:'test'], // meta map - file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_bam'], checkIfExists: true), [] ] - reference = file(params.test_data['generic']['cnn']['reference'], checkIfExists: true) + reference = file(params.test_data['homo_sapiens']['genome']['genome_21_reference_cnn'], checkIfExists: true) CNVKIT_TUMORONLY ( input, [], [], reference ) } @@ -59,11 +60,24 @@ workflow test_cnvkit_tumoronly_cram { input = [ [ id:'test'], // meta map - file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_cram'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_cram'], checkIfExists: true), [] ] fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) - reference = file(params.test_data['generic']['cnn']['reference'], checkIfExists: true) + reference = file(params.test_data['homo_sapiens']['genome']['genome_21_reference_cnn'], checkIfExists: true) CNVKIT_TUMORONLY ( input, fasta, [], reference ) } + +workflow test_cnvkit_germline_cram { + + input = [ + [ id:'test'], // meta map + [], + file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_cram'], checkIfExists: true) + ] + fasta = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true) + targets = file(params.test_data['homo_sapiens']['genome']['genome_21_multi_interval_bed'], checkIfExists: true) + + CNVKIT_GERMLINE ( input, fasta, targets, []) +} diff --git a/tests/modules/cnvkit/batch/test.yml b/tests/modules/cnvkit/batch/test.yml index 00d6a767..928a08de 100755 --- a/tests/modules/cnvkit/batch/test.yml +++ b/tests/modules/cnvkit/batch/test.yml @@ -92,9 +92,22 @@ - cnvkit - cnvkit/batch files: - - path: output/cnvkit/reference.antitarget-tmp.bed - - path: output/cnvkit/reference.target-tmp.bed - md5sum: 26d25ff2d6c45b6d92169b3559c6acdb + - path: output/cnvkit/reference_chr21.antitarget-tmp.bed + md5sum: 3d4d20f9f23b39970865d29ef239d20b + - path: output/cnvkit/reference_chr21.target-tmp.bed + md5sum: 657b25dbda8516624efa8cb2cf3716ca + - path: output/cnvkit/test2.paired_end.recalibrated.sorted.antitargetcoverage.cnn + md5sum: 067115082c4af4b64d58c0dc3a3642e4 + - path: output/cnvkit/test2.paired_end.recalibrated.sorted.bintest.cns + md5sum: f6adc75a0a86b7a921eca1b79a394cb0 + - path: output/cnvkit/test2.paired_end.recalibrated.sorted.call.cns + md5sum: f7caeca04aba28b125ce26b511f42afb + - path: output/cnvkit/test2.paired_end.recalibrated.sorted.cnr + md5sum: d9bdb71ce807051369577ee7f807a40c + - path: output/cnvkit/test2.paired_end.recalibrated.sorted.cns + md5sum: 2b56aac606ba6183d018b30ca58afcec + - path: output/cnvkit/test2.paired_end.recalibrated.sorted.targetcoverage.cnn + md5sum: e6d0190c1c37ce6e41f76ca5b24ccca3 - name: cnvkit batch test_cnvkit_tumoronly_cram command: nextflow run ./tests/modules/cnvkit/batch -entry test_cnvkit_tumoronly_cram -c ./tests/config/nextflow.config -c ./tests/modules/cnvkit/batch/nextflow.config @@ -102,6 +115,44 @@ - cnvkit - cnvkit/batch files: - - path: output/cnvkit/reference.antitarget-tmp.bed - - path: output/cnvkit/reference.target-tmp.bed - md5sum: 26d25ff2d6c45b6d92169b3559c6acdb + - path: output/cnvkit/reference_chr21.antitarget-tmp.bed + md5sum: 3d4d20f9f23b39970865d29ef239d20b + - path: output/cnvkit/reference_chr21.target-tmp.bed + md5sum: 657b25dbda8516624efa8cb2cf3716ca + - path: output/cnvkit/test2.paired_end.recalibrated.sorted.antitargetcoverage.cnn + md5sum: 067115082c4af4b64d58c0dc3a3642e4 + - path: output/cnvkit/test2.paired_end.recalibrated.sorted.bintest.cns + md5sum: f6adc75a0a86b7a921eca1b79a394cb0 + - path: output/cnvkit/test2.paired_end.recalibrated.sorted.call.cns + md5sum: f7caeca04aba28b125ce26b511f42afb + - path: output/cnvkit/test2.paired_end.recalibrated.sorted.cnr + md5sum: d9bdb71ce807051369577ee7f807a40c + - path: output/cnvkit/test2.paired_end.recalibrated.sorted.cns + md5sum: 2b56aac606ba6183d018b30ca58afcec + - path: output/cnvkit/test2.paired_end.recalibrated.sorted.targetcoverage.cnn + md5sum: e6d0190c1c37ce6e41f76ca5b24ccca3 + +- name: cnvkit batch test_cnvkit_germline_cram + command: nextflow run ./tests/modules/cnvkit/batch -entry test_cnvkit_germline_cram -c ./tests/config/nextflow.config -c ./tests/modules/cnvkit/batch/nextflow.config + tags: + - cnvkit + - cnvkit/batch + files: + - path: output/cnvkit/multi_intervals.antitarget.bed + md5sum: 3d4d20f9f23b39970865d29ef239d20b + - path: output/cnvkit/multi_intervals.target.bed + md5sum: 86d30493bb2e619a93f4ebc2923d29f3 + - path: output/cnvkit/reference.cnn + md5sum: a09ee4be5dda1cf0f68073bdb3aad8ec + - path: output/cnvkit/test2.paired_end.recalibrated.sorted.antitargetcoverage.cnn + md5sum: 067115082c4af4b64d58c0dc3a3642e4 + - path: output/cnvkit/test2.paired_end.recalibrated.sorted.bintest.cns + md5sum: 68b62b75cd91b2ffe5633686fb943490 + - path: output/cnvkit/test2.paired_end.recalibrated.sorted.call.cns + md5sum: df196edd72613c59186f4d87df3dc4a4 + - path: output/cnvkit/test2.paired_end.recalibrated.sorted.cnr + md5sum: 3b4fc0cc73be78f978cfe2422470753e + - path: output/cnvkit/test2.paired_end.recalibrated.sorted.cns + md5sum: 4e67451dbcb6601fc3fa5dd7e570f1d4 + - path: output/cnvkit/test2.paired_end.recalibrated.sorted.targetcoverage.cnn + md5sum: b4a49faf170e436ec32dcc21ccc3ce8f From a21cfb666eed7517060f1d2d61a21c340753b0ec Mon Sep 17 00:00:00 2001 From: SusiJo Date: Thu, 2 Jun 2022 10:38:05 +0200 Subject: [PATCH 089/168] changed container + add cram2bam conversion --- modules/deeptools/bamcoverage/main.nf | 58 +++++++++++++++----- modules/deeptools/bamcoverage/meta.yml | 9 +++ tests/modules/deeptools/bamcoverage/main.nf | 18 +++++- tests/modules/deeptools/bamcoverage/test.yml | 21 ++++--- 4 files changed, 82 insertions(+), 24 deletions(-) diff --git a/modules/deeptools/bamcoverage/main.nf b/modules/deeptools/bamcoverage/main.nf index 926bf0ad..04073ed9 100644 --- a/modules/deeptools/bamcoverage/main.nf +++ b/modules/deeptools/bamcoverage/main.nf @@ -2,13 +2,15 @@ process DEEPTOOLS_BAMCOVERAGE { tag "$meta.id" label 'process_low' - conda (params.enable_conda ? "bioconda::deeptools=3.5.1" : null) + conda (params.enable_conda ? "bioconda::deeptools=3.5.1 bioconda::samtools=1.15.1" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/deeptools:3.5.1--py_0': - 'quay.io/biocontainers/deeptools:3.5.1--py_0' }" + 'https://depot.galaxyproject.org/singularity/mulled-v2-eb9e7907c7a753917c1e4d7a64384c047429618a:2c687053c0252667cca265c9f4118f2c205a604c-0': + 'quay.io/biocontainers/mulled-v2-eb9e7907c7a753917c1e4d7a64384c047429618a:2c687053c0252667cca265c9f4118f2c205a604c-0' }" input: tuple val(meta), path(input), path(input_index) + path(fasta) + path(fasta_fai) output: tuple val(meta), path("*.bigWig") , emit: bigwig, optional: true @@ -22,16 +24,44 @@ process DEEPTOOLS_BAMCOVERAGE { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}.bigWig" - """ - bamCoverage \\ - --bam $input \\ - $args \\ - --numberOfProcessors ${task.cpus} \\ - --outFileName ${prefix} + // cram_input is currently not working with deeptools + // therefore it's required to convert cram to bam first + def is_cram = input.Extension == "cram" ? true : false + def input_out = is_cram ? input.BaseName + ".bam" : "${input}" + def fai_reference = fasta_fai ? "--fai-reference ${fasta_fai}" : "" + + if (is_cram){ + """ + samtools view -T $fasta $input $fai_reference -@ $task.cpus -o $input_out + samtools index -b $input_out -@ $task.cpus + + bamCoverage \\ + --bam $input_out \\ + $args \\ + --numberOfProcessors ${task.cpus} \\ + --outFileName ${prefix} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + deeptools: \$(bamCoverage --version | sed -e "s/bamCoverage //g") + END_VERSIONS + """ + + } + else { + """ + bamCoverage \\ + --bam $input_out \\ + $args \\ + --numberOfProcessors ${task.cpus} \\ + --outFileName ${prefix} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + deeptools: \$(bamCoverage --version | sed -e "s/bamCoverage //g") + END_VERSIONS + """ + } - cat <<-END_VERSIONS > versions.yml - "${task.process}": - deeptools: \$(bamCoverage --version | sed -e "s/bamCoverage //g") - END_VERSIONS - """ } diff --git a/modules/deeptools/bamcoverage/meta.yml b/modules/deeptools/bamcoverage/meta.yml index fb92168f..c6566910 100644 --- a/modules/deeptools/bamcoverage/meta.yml +++ b/modules/deeptools/bamcoverage/meta.yml @@ -25,6 +25,14 @@ input: type: file description: BAM/CRAM index file pattern: "*.{bai,crai}" + - fasta: + type: file + description: Reference file the CRAM file was created with (required with CRAM input) + pattern: "*.{fasta,fa}" + - fasta_fai: + type: file + description: Index of the reference file (optional, but recommended) + pattern: "*.{fai}" output: - meta: @@ -47,3 +55,4 @@ output: authors: - "@FriederikeHanssen" + - "@SusiJo" diff --git a/tests/modules/deeptools/bamcoverage/main.nf b/tests/modules/deeptools/bamcoverage/main.nf index fb5c1c2d..97a79508 100644 --- a/tests/modules/deeptools/bamcoverage/main.nf +++ b/tests/modules/deeptools/bamcoverage/main.nf @@ -12,7 +12,7 @@ workflow test_deeptools_bamcoverage_bam { file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true) ] - DEEPTOOLS_BAMCOVERAGE ( input ) + DEEPTOOLS_BAMCOVERAGE ( input, [], [] ) } workflow test_deeptools_bamcoverage_cram { @@ -22,6 +22,20 @@ workflow test_deeptools_bamcoverage_cram { file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true), file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram_crai'], checkIfExists: true) ] + fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + fasta_fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) - DEEPTOOLS_BAMCOVERAGE ( input ) + DEEPTOOLS_BAMCOVERAGE ( input, fasta, fasta_fai) +} + +workflow test_deeptools_bamcoverage_cram_no_fasta_fai { + + input = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram_crai'], checkIfExists: true) + ] + fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + + DEEPTOOLS_BAMCOVERAGE ( input, fasta, []) } diff --git a/tests/modules/deeptools/bamcoverage/test.yml b/tests/modules/deeptools/bamcoverage/test.yml index 736c2e61..63ef6442 100644 --- a/tests/modules/deeptools/bamcoverage/test.yml +++ b/tests/modules/deeptools/bamcoverage/test.yml @@ -1,21 +1,26 @@ - name: deeptools bamcoverage test_deeptools_bamcoverage_bam - command: nextflow run tests/modules/deeptools/bamcoverage -entry test_deeptools_bamcoverage_bam -c tests/config/nextflow.config + command: nextflow run ./tests/modules/deeptools/bamcoverage -entry test_deeptools_bamcoverage_bam -c ./tests/config/nextflow.config -c ./tests/modules/deeptools/bamcoverage/nextflow.config tags: - - deeptools - deeptools/bamcoverage + - deeptools files: - path: output/deeptools/test.bigWig md5sum: 95fe9383a9e6c02aea6b785cf074274f - - path: output/deeptools/versions.yml - md5sum: 68c94e73b7a8c0935578bad61fea54c1 - name: deeptools bamcoverage test_deeptools_bamcoverage_cram - command: nextflow run tests/modules/deeptools/bamcoverage -entry test_deeptools_bamcoverage_cram -c tests/config/nextflow.config + command: nextflow run ./tests/modules/deeptools/bamcoverage -entry test_deeptools_bamcoverage_cram -c ./tests/config/nextflow.config -c ./tests/modules/deeptools/bamcoverage/nextflow.config tags: - - deeptools - deeptools/bamcoverage + - deeptools + files: + - path: output/deeptools/test.bigWig + md5sum: 95fe9383a9e6c02aea6b785cf074274f + +- name: deeptools bamcoverage test_deeptools_bamcoverage_cram_no_fasta_fai + command: nextflow run ./tests/modules/deeptools/bamcoverage -entry test_deeptools_bamcoverage_cram_no_fasta_fai -c ./tests/config/nextflow.config -c ./tests/modules/deeptools/bamcoverage/nextflow.config + tags: + - deeptools/bamcoverage + - deeptools files: - path: output/deeptools/test.bigWig md5sum: 95fe9383a9e6c02aea6b785cf074274f - - path: output/deeptools/versions.yml - md5sum: 665bbd2979c49bf3974a24bd44a88e94 From 51be617b1ca9bff973655eb899d591ed6ab253b5 Mon Sep 17 00:00:00 2001 From: Matthias De Smet <11850640+matthdsm@users.noreply.github.com> Date: Thu, 2 Jun 2022 11:09:55 +0200 Subject: [PATCH 090/168] fix untar output dir --- modules/untar/main.nf | 8 +++++++- tests/config/test_data.config | 2 ++ tests/modules/untar/main.nf | 10 ++++++++++ tests/modules/untar/test.yml | 12 ++++++++++-- 4 files changed, 29 insertions(+), 3 deletions(-) diff --git a/modules/untar/main.nf b/modules/untar/main.nf index 058d1764..29ab10a5 100644 --- a/modules/untar/main.nf +++ b/modules/untar/main.nf @@ -21,12 +21,18 @@ process UNTAR { def args = task.ext.args ?: '' def args2 = task.ext.args2 ?: '' untar = archive.toString() - '.tar.gz' + """ + mkdir output + tar \\ + -C output --strip-components 1 \\ -xzvf \\ $args \\ $archive \\ - $args2 \\ + $args2 + + mv output ${untar} cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/tests/config/test_data.config b/tests/config/test_data.config index 39331664..4af77d92 100644 --- a/tests/config/test_data.config +++ b/tests/config/test_data.config @@ -323,6 +323,8 @@ params { test_sv_vcf = "${test_data_dir}/genomics/homo_sapiens/illumina/vcf/sv_query.vcf.gz" test_pytor = "${test_data_dir}/genomics/homo_sapiens/illumina/pytor/test.pytor" + + test_flowcell = "${test_data_dir}/genomics/homo_sapiens/illumina/bcl/flowcell.tar.gz" } 'pacbio' { primers = "${test_data_dir}/genomics/homo_sapiens/pacbio/fasta/primers.fasta" diff --git a/tests/modules/untar/main.nf b/tests/modules/untar/main.nf index 9d6d4c6c..3b7daedd 100644 --- a/tests/modules/untar/main.nf +++ b/tests/modules/untar/main.nf @@ -12,3 +12,13 @@ workflow test_untar { UNTAR ( input ) } + + +workflow test_untar_different_output_path { + input = [ + [], + file(params.test_data['homo_sapiens']['illumina']['test_flowcell'], checkIfExists: true) + ] + + UNTAR ( input ) +} diff --git a/tests/modules/untar/test.yml b/tests/modules/untar/test.yml index 6d0d1d12..6599f105 100644 --- a/tests/modules/untar/test.yml +++ b/tests/modules/untar/test.yml @@ -1,5 +1,5 @@ -- name: untar - command: nextflow run ./tests/modules/untar -entry test_untar -c ./tests/config/nextflow.config -c ./tests/modules/untar/nextflow.config +- name: untar test_untar + command: nextflow run ./tests/modules/untar -entry test_untar -c ./tests/config/nextflow.config -c ./tests/modules/untar/nextflow.config tags: - untar files: @@ -9,3 +9,11 @@ md5sum: a033d00cf6759407010b21700938f543 - path: output/untar/kraken2/taxo.k2d md5sum: 094d5891cdccf2f1468088855c214b2c + +- name: untar test_untar_different_output_path + command: nextflow run ./tests/modules/untar -entry test_untar_different_output_path -c ./tests/config/nextflow.config -c ./tests/modules/untar/nextflow.config + tags: + - untar + files: + - path: output/untar/flowcell/RunInfo.xml + md5sum: 03038959f4dd181c86bc97ae71fe270a From 2f3ddc90fbeda9fcf54f25b84465f5ac149d2b75 Mon Sep 17 00:00:00 2001 From: SusiJo Date: Thu, 2 Jun 2022 12:57:50 +0200 Subject: [PATCH 091/168] added fasta_fai as input + changed if statements --- modules/cnvkit/batch/main.nf | 152 +++++++++++++++++++++++----- modules/cnvkit/batch/meta.yml | 4 + tests/modules/cnvkit/batch/main.nf | 44 +++++--- tests/modules/cnvkit/batch/test.yml | 49 ++++++--- 4 files changed, 200 insertions(+), 49 deletions(-) diff --git a/modules/cnvkit/batch/main.nf b/modules/cnvkit/batch/main.nf index b467c6f2..090f7860 100644 --- a/modules/cnvkit/batch/main.nf +++ b/modules/cnvkit/batch/main.nf @@ -10,6 +10,7 @@ process CNVKIT_BATCH { input: tuple val(meta), path(tumor), path(normal) path fasta + path fasta_fai path targets path reference @@ -34,12 +35,15 @@ process CNVKIT_BATCH { // execute samtools only when cram files are input, cnvkit runs natively on bam but is prohibitively slow def tumor_cram = tumor_exists && tumor.Extension == "cram" ? true : false def normal_cram = normal_exists && normal.Extension == "cram" ? true : false + def tumor_bam = tumor_exists && tumor.Extension == "bam" ? true : false + def normal_bam = normal_exists && normal.Extension == "bam" ? true : false def tumor_out = tumor_cram ? tumor.BaseName + ".bam" : "${tumor}" // tumor_only mode does not need fasta & target // instead it requires a pre-computed reference.cnn which is built from fasta & target def (normal_out, normal_args, fasta_args) = ["", "", ""] + def fai_reference = fasta_fai ? "--fai-reference ${fasta_fai}" : "" if (normal_exists){ def normal_prefix = normal.BaseName @@ -48,9 +52,9 @@ process CNVKIT_BATCH { // germline mode // normal samples must be input without a flag - // requires flag --normal to be empty + // requires flag --normal to be empty [] if(!tumor_exists){ - tumor_out = normal.BaseName + ".bam" + tumor_out = "${normal_prefix}" + ".bam" normal_args = "--normal " } // somatic mode @@ -62,28 +66,130 @@ process CNVKIT_BATCH { def target_args = targets ? "--targets $targets" : "" def reference_args = reference ? "--reference $reference" : "" - """ - if $tumor_cram; then - samtools view -T $fasta $tumor -@ $task.cpus -o $tumor_out - fi - if $normal_cram; then - samtools view -T $fasta $normal -@ $task.cpus -o $normal_out - fi + // somatic_mode cram_input + if (tumor_cram && normal_cram){ + """ + samtools view -T $fasta $fai_reference $tumor -@ $task.cpus -o $tumor_out + samtools view -T $fasta $fai_reference $normal -@ $task.cpus -o $normal_out + cnvkit.py \\ + batch \\ + $tumor_out \\ + $normal_args \\ + $fasta_args \\ + $reference_args \\ + $target_args \\ + --processes $task.cpus \\ + $args - cnvkit.py \\ - batch \\ - $tumor_out \\ - $normal_args \\ - $fasta_args \\ - $reference_args \\ - $target_args \\ - --processes $task.cpus \\ - $args + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + cnvkit: \$(cnvkit.py version | sed -e "s/cnvkit v//g") + END_VERSIONS + """ + } + // somatic_mode bam_input + else if (tumor_bam && normal_bam){ + """ + cnvkit.py \\ + batch \\ + $tumor_out \\ + $normal_args \\ + $fasta_args \\ + $reference_args \\ + $target_args \\ + --processes $task.cpus \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cnvkit: \$(cnvkit.py version | sed -e "s/cnvkit v//g") + END_VERSIONS + """ + } + // tumor_only_mode cram_input + else if(tumor_cram && !normal_exists){ + """ + samtools view -T $fasta $fai_reference $tumor -@ $task.cpus -o $tumor_out + + cnvkit.py \\ + batch \\ + $tumor_out \\ + $normal_args \\ + $fasta_args \\ + $reference_args \\ + $target_args \\ + --processes $task.cpus \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + cnvkit: \$(cnvkit.py version | sed -e "s/cnvkit v//g") + END_VERSIONS + """ + } + // tumor_only bam_input + else if(tumor_bam && !normal_exists){ + """ + cnvkit.py \\ + batch \\ + $tumor_out \\ + $normal_args \\ + $fasta_args \\ + $reference_args \\ + $target_args \\ + --processes $task.cpus \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cnvkit: \$(cnvkit.py version | sed -e "s/cnvkit v//g") + END_VERSIONS + """ + } + // germline mode cram_input + // normal_args must be --normal [] + else if (normal_cram && !tumor_exists){ + """ + samtools view -T $fasta $fai_reference $normal -@ $task.cpus -o $tumor_out + + cnvkit.py \\ + batch \\ + $tumor_out \\ + $normal_args \\ + $fasta_args \\ + $reference_args \\ + $target_args \\ + --processes $task.cpus \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + cnvkit: \$(cnvkit.py version | sed -e "s/cnvkit v//g") + END_VERSIONS + """ + } + // germline mode bam_input + else if (normal_bam && !tumor_exists){ + """ + cnvkit.py \\ + batch \\ + $tumor_out \\ + $normal_args \\ + $fasta_args \\ + $reference_args \\ + $target_args \\ + --processes $task.cpus \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cnvkit: \$(cnvkit.py version | sed -e "s/cnvkit v//g") + END_VERSIONS + """ + } - cat <<-END_VERSIONS > versions.yml - "${task.process}": - cnvkit: \$(cnvkit.py version | sed -e "s/cnvkit v//g") - END_VERSIONS - """ } diff --git a/modules/cnvkit/batch/meta.yml b/modules/cnvkit/batch/meta.yml index 2cd675c7..89762192 100644 --- a/modules/cnvkit/batch/meta.yml +++ b/modules/cnvkit/batch/meta.yml @@ -29,6 +29,10 @@ input: type: file description: | Input reference genome fasta file (only needed for cram_input and/or when normal_samples are provided) + - fasta_fai: + type: file + description: | + Input reference genome fasta index (optional, but recommended for cram_input) - targetfile: type: file description: | diff --git a/tests/modules/cnvkit/batch/main.nf b/tests/modules/cnvkit/batch/main.nf index c93e3a00..daf87a37 100755 --- a/tests/modules/cnvkit/batch/main.nf +++ b/tests/modules/cnvkit/batch/main.nf @@ -7,7 +7,7 @@ include { CNVKIT_BATCH as CNVKIT_WGS } from '../../../../modules/cnvkit/ba include { CNVKIT_BATCH as CNVKIT_TUMORONLY } from '../../../../modules/cnvkit/batch/main.nf' include { CNVKIT_BATCH as CNVKIT_GERMLINE } from '../../../../modules/cnvkit/batch/main.nf' -workflow test_cnvkit_hybrid { +workflow test_cnvkit_hybrid_somatic { input = [ [ id:'test' ], // meta map @@ -17,10 +17,10 @@ workflow test_cnvkit_hybrid { fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) targets = file(params.test_data['sarscov2']['genome']['baits_bed'], checkIfExists: true) - CNVKIT_HYBRID ( input, fasta, targets, [] ) + CNVKIT_HYBRID ( input, fasta, [], targets, [] ) } -workflow test_cnvkit_wgs { +workflow test_cnvkit_wgs_somatic { input = [ [ id:'test'], // meta map @@ -29,22 +29,24 @@ workflow test_cnvkit_wgs { ] fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) - CNVKIT_WGS ( input, fasta, [], [] ) + CNVKIT_WGS ( input, fasta, [], [], [] ) } -workflow test_cnvkit_cram { +workflow test_cnvkit_cram_wgs_somatic { input = [ [ id:'test'], // meta map file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_cram'], checkIfExists: true), file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true) ] - fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + fasta_fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) - CNVKIT_WGS ( input, fasta, [], [] ) + CNVKIT_WGS ( input, fasta, fasta_fai, [], [] ) } -workflow test_cnvkit_tumoronly { + +workflow test_cnvkit_tumoronly_hybrid_bam { input = [ [ id:'test'], // meta map @@ -53,10 +55,10 @@ workflow test_cnvkit_tumoronly { ] reference = file(params.test_data['homo_sapiens']['genome']['genome_21_reference_cnn'], checkIfExists: true) - CNVKIT_TUMORONLY ( input, [], [], reference ) + CNVKIT_TUMORONLY ( input, [], [], [], reference ) } -workflow test_cnvkit_tumoronly_cram { +workflow test_cnvkit_tumoronly_hybrid_cram { input = [ [ id:'test'], // meta map @@ -66,10 +68,10 @@ workflow test_cnvkit_tumoronly_cram { fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) reference = file(params.test_data['homo_sapiens']['genome']['genome_21_reference_cnn'], checkIfExists: true) - CNVKIT_TUMORONLY ( input, fasta, [], reference ) + CNVKIT_TUMORONLY ( input, fasta, [], [], reference ) } -workflow test_cnvkit_germline_cram { +workflow test_cnvkit_germline_hybrid_cram { input = [ [ id:'test'], // meta map @@ -77,7 +79,21 @@ workflow test_cnvkit_germline_cram { file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_cram'], checkIfExists: true) ] fasta = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true) - targets = file(params.test_data['homo_sapiens']['genome']['genome_21_multi_interval_bed'], checkIfExists: true) + fasta_fai = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta_fai'], checkIfExists: true) + targets = file(params.test_data['homo_sapiens']['genome']['genome_21_multi_interval_bed'], checkIfExists: true) - CNVKIT_GERMLINE ( input, fasta, targets, []) + CNVKIT_GERMLINE ( input, fasta, fasta_fai, targets, []) +} + +workflow test_cnvkit_germline_hybrid_bam { + + input = [ + [ id:'test'], // meta map + [], + file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_bam'], checkIfExists: true) + ] + fasta = file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true) + targets = file(params.test_data['homo_sapiens']['genome']['genome_21_multi_interval_bed'], checkIfExists: true) + + CNVKIT_GERMLINE ( input, fasta, [], targets, []) } diff --git a/tests/modules/cnvkit/batch/test.yml b/tests/modules/cnvkit/batch/test.yml index 928a08de..006e142f 100755 --- a/tests/modules/cnvkit/batch/test.yml +++ b/tests/modules/cnvkit/batch/test.yml @@ -1,5 +1,5 @@ -- name: cnvkit batch test_cnvkit_hybrid - command: nextflow run ./tests/modules/cnvkit/batch -entry test_cnvkit_hybrid -c ./tests/config/nextflow.config -c ./tests/modules/cnvkit/batch/nextflow.config +- name: cnvkit batch test_cnvkit_hybrid_somatic + command: nextflow run ./tests/modules/cnvkit/batch -entry test_cnvkit_hybrid_somatic -c ./tests/config/nextflow.config -c ./tests/modules/cnvkit/batch/nextflow.config tags: - cnvkit - cnvkit/batch @@ -26,8 +26,8 @@ - path: output/cnvkit/test.single_end.sorted.targetcoverage.cnn md5sum: aa8a018b1d4d1e688c9f9f6ae01bf4d7 -- name: cnvkit batch test_cnvkit_wgs - command: nextflow run ./tests/modules/cnvkit/batch -entry test_cnvkit_wgs -c ./tests/config/nextflow.config -c ./tests/modules/cnvkit/batch/nextflow.config +- name: cnvkit batch test_cnvkit_wgs_somatic + command: nextflow run ./tests/modules/cnvkit/batch -entry test_cnvkit_wgs_somatic -c ./tests/config/nextflow.config -c ./tests/modules/cnvkit/batch/nextflow.config tags: - cnvkit - cnvkit/batch @@ -56,8 +56,8 @@ - path: output/cnvkit/test2.paired_end.sorted.targetcoverage.cnn md5sum: 6ae6b3fce7299eedca6133d911c38fe1 -- name: cnvkit batch test_cnvkit_cram - command: nextflow run ./tests/modules/cnvkit/batch -entry test_cnvkit_cram -c ./tests/config/nextflow.config -c ./tests/modules/cnvkit/batch/nextflow.config +- name: cnvkit batch test_cnvkit_cram_wgs_somatic + command: nextflow run ./tests/modules/cnvkit/batch -entry test_cnvkit_cram_wgs_somatic -c ./tests/config/nextflow.config -c ./tests/modules/cnvkit/batch/nextflow.config tags: - cnvkit - cnvkit/batch @@ -86,8 +86,8 @@ - path: output/cnvkit/test2.paired_end.sorted.targetcoverage.cnn md5sum: 6ae6b3fce7299eedca6133d911c38fe1 -- name: cnvkit batch test_cnvkit_tumoronly - command: nextflow run ./tests/modules/cnvkit/batch -entry test_cnvkit_tumoronly -c ./tests/config/nextflow.config -c ./tests/modules/cnvkit/batch/nextflow.config +- name: cnvkit batch test_cnvkit_tumoronly_hybrid_bam + command: nextflow run ./tests/modules/cnvkit/batch -entry test_cnvkit_tumoronly_hybrid_bam -c ./tests/config/nextflow.config -c ./tests/modules/cnvkit/batch/nextflow.config tags: - cnvkit - cnvkit/batch @@ -109,8 +109,8 @@ - path: output/cnvkit/test2.paired_end.recalibrated.sorted.targetcoverage.cnn md5sum: e6d0190c1c37ce6e41f76ca5b24ccca3 -- name: cnvkit batch test_cnvkit_tumoronly_cram - command: nextflow run ./tests/modules/cnvkit/batch -entry test_cnvkit_tumoronly_cram -c ./tests/config/nextflow.config -c ./tests/modules/cnvkit/batch/nextflow.config +- name: cnvkit batch test_cnvkit_tumoronly_hybrid_cram + command: nextflow run ./tests/modules/cnvkit/batch -entry test_cnvkit_tumoronly_hybrid_cram -c ./tests/config/nextflow.config -c ./tests/modules/cnvkit/batch/nextflow.config tags: - cnvkit - cnvkit/batch @@ -132,8 +132,33 @@ - path: output/cnvkit/test2.paired_end.recalibrated.sorted.targetcoverage.cnn md5sum: e6d0190c1c37ce6e41f76ca5b24ccca3 -- name: cnvkit batch test_cnvkit_germline_cram - command: nextflow run ./tests/modules/cnvkit/batch -entry test_cnvkit_germline_cram -c ./tests/config/nextflow.config -c ./tests/modules/cnvkit/batch/nextflow.config +- name: cnvkit batch test_cnvkit_germline_hybrid_cram + command: nextflow run ./tests/modules/cnvkit/batch -entry test_cnvkit_germline_hybrid_cram -c ./tests/config/nextflow.config -c ./tests/modules/cnvkit/batch/nextflow.config + tags: + - cnvkit + - cnvkit/batch + files: + - path: output/cnvkit/multi_intervals.antitarget.bed + md5sum: 3d4d20f9f23b39970865d29ef239d20b + - path: output/cnvkit/multi_intervals.target.bed + md5sum: 86d30493bb2e619a93f4ebc2923d29f3 + - path: output/cnvkit/reference.cnn + md5sum: a09ee4be5dda1cf0f68073bdb3aad8ec + - path: output/cnvkit/test2.paired_end.recalibrated.sorted.antitargetcoverage.cnn + md5sum: 067115082c4af4b64d58c0dc3a3642e4 + - path: output/cnvkit/test2.paired_end.recalibrated.sorted.bintest.cns + md5sum: 68b62b75cd91b2ffe5633686fb943490 + - path: output/cnvkit/test2.paired_end.recalibrated.sorted.call.cns + md5sum: df196edd72613c59186f4d87df3dc4a4 + - path: output/cnvkit/test2.paired_end.recalibrated.sorted.cnr + md5sum: 3b4fc0cc73be78f978cfe2422470753e + - path: output/cnvkit/test2.paired_end.recalibrated.sorted.cns + md5sum: 4e67451dbcb6601fc3fa5dd7e570f1d4 + - path: output/cnvkit/test2.paired_end.recalibrated.sorted.targetcoverage.cnn + md5sum: b4a49faf170e436ec32dcc21ccc3ce8f + +- name: cnvkit batch test_cnvkit_germline_hybrid_bam + command: nextflow run ./tests/modules/cnvkit/batch -entry test_cnvkit_germline_hybrid_bam -c ./tests/config/nextflow.config -c ./tests/modules/cnvkit/batch/nextflow.config tags: - cnvkit - cnvkit/batch From 2a45d4286895d77d47b3a3dd9bc845b4d13276df Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Thu, 2 Jun 2022 13:17:06 +0200 Subject: [PATCH 092/168] added the calibratedragstrmodel module --- modules/gatk4/calibratedragstrmodel/main.nf | 48 +++++++++++++ modules/gatk4/calibratedragstrmodel/meta.yml | 70 +++++++++++++++++++ tests/config/pytest_modules.yml | 4 ++ tests/config/test_data.config | 1 + .../gatk4/calibratedragstrmodel/main.nf | 44 ++++++++++++ .../calibratedragstrmodel/nextflow.config | 5 ++ .../gatk4/calibratedragstrmodel/test.yml | 17 +++++ 7 files changed, 189 insertions(+) create mode 100644 modules/gatk4/calibratedragstrmodel/main.nf create mode 100644 modules/gatk4/calibratedragstrmodel/meta.yml create mode 100644 tests/modules/gatk4/calibratedragstrmodel/main.nf create mode 100644 tests/modules/gatk4/calibratedragstrmodel/nextflow.config create mode 100644 tests/modules/gatk4/calibratedragstrmodel/test.yml diff --git a/modules/gatk4/calibratedragstrmodel/main.nf b/modules/gatk4/calibratedragstrmodel/main.nf new file mode 100644 index 00000000..6682753d --- /dev/null +++ b/modules/gatk4/calibratedragstrmodel/main.nf @@ -0,0 +1,48 @@ +process GATK4_CALIBRATEDRAGSTRMODEL{ + tag "$meta.id" + label 'process_low' + + conda (params.enable_conda ? "bioconda::gatk4=4.2.6.1" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gatk4:4.2.6.1--hdfd78af_0': + 'quay.io/biocontainers/gatk4:4.2.6.1--hdfd78af_0' }" + + input: + tuple val(meta), path(bam), path(bam_index) + path fasta + path fasta_fai + path dict + path strtablefile + + output: + tuple val(meta), path("*.txt") , emit: dragstr_model + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + def avail_mem = 3 + if (!task.memory) { + log.info '[GATK CalibrateDragstrModel] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = task.memory.giga + } + """ + gatk --java-options "-Xmx${avail_mem}g" CalibrateDragstrModel \\ + --input $bam \\ + --output ${prefix}.txt \\ + --reference $fasta \\ + --str-table-path $strtablefile \\ + --tmp-dir . \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/gatk4/calibratedragstrmodel/meta.yml b/modules/gatk4/calibratedragstrmodel/meta.yml new file mode 100644 index 00000000..f72a4f60 --- /dev/null +++ b/modules/gatk4/calibratedragstrmodel/meta.yml @@ -0,0 +1,70 @@ +name: "gatk4_calibratedragstrmodel" +description: estimates the parameters for the DRAGstr model +keywords: + - gatk4 + - bam + - cram + - sam + - calibratedragstrmodel +tools: + - gatk4: + description: + Genome Analysis Toolkit (GATK4). Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/articles/360057441571-CalibrateDragstrModel-BETA- + tool_dev_url: https://github.com/broadinstitute/gatk + doi: 10.1158/1538-7445.AM2017-3590 + licence: ["Apache-2.0"] + +input: + # Only when we have meta + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - bam_index: + type: file + description: index of the BAM/CRAM/SAM file + pattern: "*.{bai,crai,sai}" + - fasta: + type: file + description: The reference FASTA file + pattern: "*.{fasta,fa}" + - fasta_fai: + type: file + description: The index of the reference FASTA file + pattern: "*.fai" + - dict: + type: file + description: The sequence dictionary of the reference FASTA file + pattern: "*.dict" + - strtablefile: + type: file + description: The StrTableFile zip folder of the reference FASTA file + pattern: "*.zip" + +output: + #Only when we have meta + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - dragstr_model: + type: file + description: The DragSTR model + pattern: "*.txt" + +authors: + - "@nvnieuwk" diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index bf4a6137..d4bfac5b 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -743,6 +743,10 @@ gatk4/calculatecontamination: - modules/gatk4/calculatecontamination/** - tests/modules/gatk4/calculatecontamination/** +gatk4/calibratedragstrmodel: + - modules/gatk4/calibratedragstrmodel/** + - tests/modules/gatk4/calibratedragstrmodel/** + gatk4/cnnscorevariants: - modules/gatk4/cnnscorevariants/** - tests/modules/gatk4/cnnscorevariants/** diff --git a/tests/config/test_data.config b/tests/config/test_data.config index 39331664..b1326544 100644 --- a/tests/config/test_data.config +++ b/tests/config/test_data.config @@ -121,6 +121,7 @@ params { genome_elfasta = "${test_data_dir}/genomics/homo_sapiens/genome/genome.elfasta" genome_fasta = "${test_data_dir}/genomics/homo_sapiens/genome/genome.fasta" genome_fasta_fai = "${test_data_dir}/genomics/homo_sapiens/genome/genome.fasta.fai" + genome_strtablefile = "${test_data_dir}/genomics/homo_sapiens/genome/genome_strtablefile.zip" genome_dict = "${test_data_dir}/genomics/homo_sapiens/genome/genome.dict" genome_gff3 = "${test_data_dir}/genomics/homo_sapiens/genome/genome.gff3" genome_gtf = "${test_data_dir}/genomics/homo_sapiens/genome/genome.gtf" diff --git a/tests/modules/gatk4/calibratedragstrmodel/main.nf b/tests/modules/gatk4/calibratedragstrmodel/main.nf new file mode 100644 index 00000000..4d4e9896 --- /dev/null +++ b/tests/modules/gatk4/calibratedragstrmodel/main.nf @@ -0,0 +1,44 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { GATK4_CALIBRATEDRAGSTRMODEL } from '../../../../modules/gatk4/calibratedragstrmodel/main.nf' + +workflow test_gatk4_calibratedragstrmodel_bam { + + input = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true) + ] + + fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + + fasta_fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) + + dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) + + strtablefile = file(params.test_data['homo_sapiens']['genome']['genome_strtablefile'], checkIfExists: true) + + GATK4_CALIBRATEDRAGSTRMODEL ( input, fasta, fasta_fai, dict, strtablefile ) +} + +workflow test_gatk4_calibratedragstrmodel_cram { + + input = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram_crai'], checkIfExists: true) + ] + + fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + + fasta_fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) + + dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) + + strtablefile = file(params.test_data['homo_sapiens']['genome']['genome_strtablefile'], checkIfExists: true) + + GATK4_CALIBRATEDRAGSTRMODEL ( input, fasta, fasta_fai, dict, strtablefile ) +} + diff --git a/tests/modules/gatk4/calibratedragstrmodel/nextflow.config b/tests/modules/gatk4/calibratedragstrmodel/nextflow.config new file mode 100644 index 00000000..50f50a7a --- /dev/null +++ b/tests/modules/gatk4/calibratedragstrmodel/nextflow.config @@ -0,0 +1,5 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + +} \ No newline at end of file diff --git a/tests/modules/gatk4/calibratedragstrmodel/test.yml b/tests/modules/gatk4/calibratedragstrmodel/test.yml new file mode 100644 index 00000000..bc46dead --- /dev/null +++ b/tests/modules/gatk4/calibratedragstrmodel/test.yml @@ -0,0 +1,17 @@ +- name: gatk4 calibratedragstrmodel test_gatk4_calibratedragstrmodel_bam + command: nextflow run ./tests/modules/gatk4/calibratedragstrmodel -entry test_gatk4_calibratedragstrmodel_bam -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/calibratedragstrmodel/nextflow.config + tags: + - gatk4 + - gatk4/calibratedragstrmodel + files: + - path: output/gatk4/test.txt + md5sum: 0a1a1583b157fa2251dd931ed165da4f + +- name: gatk4 calibratedragstrmodel test_gatk4_calibratedragstrmodel_cram + command: nextflow run ./tests/modules/gatk4/calibratedragstrmodel -entry test_gatk4_calibratedragstrmodel_cram -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/calibratedragstrmodel/nextflow.config + tags: + - gatk4 + - gatk4/calibratedragstrmodel + files: + - path: output/gatk4/test.txt + md5sum: 1aa7ab38023f724877b3323c5e6b9a4e From d3874bbae9035e6be95be42a712acfff4d28d00e Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Thu, 2 Jun 2022 13:22:55 +0200 Subject: [PATCH 093/168] linting --- modules/gatk4/calibratedragstrmodel/meta.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/gatk4/calibratedragstrmodel/meta.yml b/modules/gatk4/calibratedragstrmodel/meta.yml index f72a4f60..e6d2b0b1 100644 --- a/modules/gatk4/calibratedragstrmodel/meta.yml +++ b/modules/gatk4/calibratedragstrmodel/meta.yml @@ -1,4 +1,4 @@ -name: "gatk4_calibratedragstrmodel" +name: gatk4_calibratedragstrmodel description: estimates the parameters for the DRAGstr model keywords: - gatk4 From 3a49b82863d6dc71fcfcb36a01b04cb58dcb6f12 Mon Sep 17 00:00:00 2001 From: nvnieuwk <101190534+nvnieuwk@users.noreply.github.com> Date: Thu, 2 Jun 2022 13:39:44 +0200 Subject: [PATCH 094/168] Update modules/gatk4/calibratedragstrmodel/main.nf Co-authored-by: Matthias De Smet <11850640+matthdsm@users.noreply.github.com> --- modules/gatk4/calibratedragstrmodel/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/gatk4/calibratedragstrmodel/main.nf b/modules/gatk4/calibratedragstrmodel/main.nf index 6682753d..00a11de0 100644 --- a/modules/gatk4/calibratedragstrmodel/main.nf +++ b/modules/gatk4/calibratedragstrmodel/main.nf @@ -1,4 +1,4 @@ -process GATK4_CALIBRATEDRAGSTRMODEL{ +process GATK4_CALIBRATEDRAGSTRMODEL { tag "$meta.id" label 'process_low' From e53d091a6de1ae9fd681351c085d8abe076ba1ec Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Thu, 2 Jun 2022 14:08:09 +0200 Subject: [PATCH 095/168] added dragstr model to haplotypecaller --- modules/gatk4/haplotypecaller/main.nf | 4 +++- modules/gatk4/haplotypecaller/meta.yml | 4 ++++ tests/config/test_data.config | 2 ++ tests/modules/gatk4/haplotypecaller/main.nf | 22 +++++++++++++++++++- tests/modules/gatk4/haplotypecaller/test.yml | 18 ++++++++++------ 5 files changed, 42 insertions(+), 8 deletions(-) diff --git a/modules/gatk4/haplotypecaller/main.nf b/modules/gatk4/haplotypecaller/main.nf index 6dd3f69e..19cd57bb 100644 --- a/modules/gatk4/haplotypecaller/main.nf +++ b/modules/gatk4/haplotypecaller/main.nf @@ -8,7 +8,7 @@ process GATK4_HAPLOTYPECALLER { 'quay.io/biocontainers/gatk4:4.2.6.1--hdfd78af_0' }" input: - tuple val(meta), path(input), path(input_index), path(intervals) + tuple val(meta), path(input), path(input_index), path(intervals), path(dragstr_model) path fasta path fai path dict @@ -28,6 +28,7 @@ process GATK4_HAPLOTYPECALLER { def prefix = task.ext.prefix ?: "${meta.id}" def dbsnp_command = dbsnp ? "--dbsnp $dbsnp" : "" def interval_command = intervals ? "--intervals $intervals" : "" + def dragstr_command = dragstr_model ? "--dragstr-params-path $dragstr_model" : "" def avail_mem = 3 if (!task.memory) { @@ -42,6 +43,7 @@ process GATK4_HAPLOTYPECALLER { --reference $fasta \\ $dbsnp_command \\ $interval_command \\ + $dragstr_command \\ --tmp-dir . \\ $args diff --git a/modules/gatk4/haplotypecaller/meta.yml b/modules/gatk4/haplotypecaller/meta.yml index 81851a96..48193d91 100644 --- a/modules/gatk4/haplotypecaller/meta.yml +++ b/modules/gatk4/haplotypecaller/meta.yml @@ -32,6 +32,10 @@ input: - intervals: type: file description: Bed file with the genomic regions included in the library (optional) + - dragstr_model: + type: file + description: Text file containing the DragSTR model of the used BAM/CRAM file (optional) + pattern: "*.txt" - fasta: type: file description: The reference fasta file diff --git a/tests/config/test_data.config b/tests/config/test_data.config index 363903f0..fa7017b1 100644 --- a/tests/config/test_data.config +++ b/tests/config/test_data.config @@ -266,6 +266,8 @@ params { test_pileups_table = "${test_data_dir}/genomics/homo_sapiens/illumina/gatk/test.pileups.table" test2_pileups_table = "${test_data_dir}/genomics/homo_sapiens/illumina/gatk/test2.pileups.table" + test_paired_end_sorted_dragstrmodel = "${test_data_dir}/genomics/homo_sapiens/illumina/gatk/test_paired_end_sorted_dragstrmodel.txt" + test_genomicsdb_tar_gz = "${test_data_dir}/genomics/homo_sapiens/illumina/gatk/test_genomicsdb.tar.gz" test_pon_genomicsdb_tar_gz = "${test_data_dir}/genomics/homo_sapiens/illumina/gatk/test_pon_genomicsdb.tar.gz" diff --git a/tests/modules/gatk4/haplotypecaller/main.nf b/tests/modules/gatk4/haplotypecaller/main.nf index 7b60baab..3ad3dfb5 100644 --- a/tests/modules/gatk4/haplotypecaller/main.nf +++ b/tests/modules/gatk4/haplotypecaller/main.nf @@ -8,6 +8,7 @@ workflow test_gatk4_haplotypecaller { input = [ [ id:'test' ], // meta map file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true), + [], [] ] fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) @@ -21,6 +22,7 @@ workflow test_gatk4_haplotypecaller_cram { input = [ [ id:'test' ], // meta map file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true), file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram_crai'], checkIfExists: true), + [], [] ] fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) @@ -34,7 +36,8 @@ workflow test_gatk4_haplotypecaller_intervals_dbsnp { input = [ [ id:'test' ], // meta map file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true), file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram_crai'], checkIfExists: true), - file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true) + file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true), + [] ] fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) @@ -45,3 +48,20 @@ workflow test_gatk4_haplotypecaller_intervals_dbsnp { GATK4_HAPLOTYPECALLER ( input, fasta, fai, dict, sites, sites_tbi ) } + +workflow test_gatk4_haplotypecaller_dragstr_model { + input = [ [ id:'test' ], // meta map + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram_crai'], checkIfExists: true), + [], + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_dragstrmodel'], checkIfExists: true) + ] + + fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) + dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) + sites = [] + sites_tbi = [] + + GATK4_HAPLOTYPECALLER ( input, fasta, fai, dict, sites, sites_tbi ) +} diff --git a/tests/modules/gatk4/haplotypecaller/test.yml b/tests/modules/gatk4/haplotypecaller/test.yml index 3d416a0d..b566ee66 100644 --- a/tests/modules/gatk4/haplotypecaller/test.yml +++ b/tests/modules/gatk4/haplotypecaller/test.yml @@ -1,29 +1,35 @@ - name: gatk4 haplotypecaller test_gatk4_haplotypecaller - command: nextflow run ./tests/modules/gatk4/haplotypecaller -entry test_gatk4_haplotypecaller -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/haplotypecaller/nextflow.config + command: nextflow run ./tests/modules/gatk4/haplotypecaller -entry test_gatk4_haplotypecaller -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/haplotypecaller/nextflow.config tags: - gatk4/haplotypecaller - gatk4 files: - path: output/gatk4/test.vcf.gz - path: output/gatk4/test.vcf.gz.tbi - - path: output/gatk4/versions.yml - name: gatk4 haplotypecaller test_gatk4_haplotypecaller_cram - command: nextflow run ./tests/modules/gatk4/haplotypecaller -entry test_gatk4_haplotypecaller_cram -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/haplotypecaller/nextflow.config + command: nextflow run ./tests/modules/gatk4/haplotypecaller -entry test_gatk4_haplotypecaller_cram -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/haplotypecaller/nextflow.config tags: - gatk4/haplotypecaller - gatk4 files: - path: output/gatk4/test.vcf.gz - path: output/gatk4/test.vcf.gz.tbi - - path: output/gatk4/versions.yml - name: gatk4 haplotypecaller test_gatk4_haplotypecaller_intervals_dbsnp - command: nextflow run ./tests/modules/gatk4/haplotypecaller -entry test_gatk4_haplotypecaller_intervals_dbsnp -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/haplotypecaller/nextflow.config + command: nextflow run ./tests/modules/gatk4/haplotypecaller -entry test_gatk4_haplotypecaller_intervals_dbsnp -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/haplotypecaller/nextflow.config + tags: + - gatk4/haplotypecaller + - gatk4 + files: + - path: output/gatk4/test.vcf.gz + - path: output/gatk4/test.vcf.gz.tbi + +- name: gatk4 haplotypecaller test_gatk4_haplotypecaller_dragstr_model + command: nextflow run ./tests/modules/gatk4/haplotypecaller -entry test_gatk4_haplotypecaller_dragstr_model -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/haplotypecaller/nextflow.config tags: - gatk4/haplotypecaller - gatk4 files: - path: output/gatk4/test.vcf.gz - path: output/gatk4/test.vcf.gz.tbi - - path: output/gatk4/versions.yml From a6c4276ad74eb965fd9d8eb831f0786b85abc9c8 Mon Sep 17 00:00:00 2001 From: nvnieuwk <101190534+nvnieuwk@users.noreply.github.com> Date: Thu, 2 Jun 2022 14:19:57 +0200 Subject: [PATCH 096/168] Update tests/modules/gatk4/haplotypecaller/test.yml Co-authored-by: Maxime U. Garcia --- tests/modules/gatk4/haplotypecaller/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/modules/gatk4/haplotypecaller/test.yml b/tests/modules/gatk4/haplotypecaller/test.yml index b566ee66..ca3110d7 100644 --- a/tests/modules/gatk4/haplotypecaller/test.yml +++ b/tests/modules/gatk4/haplotypecaller/test.yml @@ -1,5 +1,5 @@ - name: gatk4 haplotypecaller test_gatk4_haplotypecaller - command: nextflow run ./tests/modules/gatk4/haplotypecaller -entry test_gatk4_haplotypecaller -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/haplotypecaller/nextflow.config + command: nextflow run ./tests/modules/gatk4/haplotypecaller -entry test_gatk4_haplotypecaller -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/haplotypecaller/nextflow.config tags: - gatk4/haplotypecaller - gatk4 From 2ce1cf3329eecc636ddfcbf27f86b398b45ac0ad Mon Sep 17 00:00:00 2001 From: nvnieuwk <101190534+nvnieuwk@users.noreply.github.com> Date: Thu, 2 Jun 2022 14:20:04 +0200 Subject: [PATCH 097/168] Update tests/modules/gatk4/haplotypecaller/test.yml Co-authored-by: Maxime U. Garcia --- tests/modules/gatk4/haplotypecaller/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/modules/gatk4/haplotypecaller/test.yml b/tests/modules/gatk4/haplotypecaller/test.yml index ca3110d7..094beaef 100644 --- a/tests/modules/gatk4/haplotypecaller/test.yml +++ b/tests/modules/gatk4/haplotypecaller/test.yml @@ -8,7 +8,7 @@ - path: output/gatk4/test.vcf.gz.tbi - name: gatk4 haplotypecaller test_gatk4_haplotypecaller_cram - command: nextflow run ./tests/modules/gatk4/haplotypecaller -entry test_gatk4_haplotypecaller_cram -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/haplotypecaller/nextflow.config + command: nextflow run ./tests/modules/gatk4/haplotypecaller -entry test_gatk4_haplotypecaller_cram -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/haplotypecaller/nextflow.config tags: - gatk4/haplotypecaller - gatk4 From e419d306dfeb8bab0e1d3ac07151585c0fcbee01 Mon Sep 17 00:00:00 2001 From: nvnieuwk <101190534+nvnieuwk@users.noreply.github.com> Date: Thu, 2 Jun 2022 14:20:10 +0200 Subject: [PATCH 098/168] Update tests/modules/gatk4/haplotypecaller/test.yml Co-authored-by: Maxime U. Garcia --- tests/modules/gatk4/haplotypecaller/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/modules/gatk4/haplotypecaller/test.yml b/tests/modules/gatk4/haplotypecaller/test.yml index 094beaef..17c4f6bf 100644 --- a/tests/modules/gatk4/haplotypecaller/test.yml +++ b/tests/modules/gatk4/haplotypecaller/test.yml @@ -17,7 +17,7 @@ - path: output/gatk4/test.vcf.gz.tbi - name: gatk4 haplotypecaller test_gatk4_haplotypecaller_intervals_dbsnp - command: nextflow run ./tests/modules/gatk4/haplotypecaller -entry test_gatk4_haplotypecaller_intervals_dbsnp -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/haplotypecaller/nextflow.config + command: nextflow run ./tests/modules/gatk4/haplotypecaller -entry test_gatk4_haplotypecaller_intervals_dbsnp -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/haplotypecaller/nextflow.config tags: - gatk4/haplotypecaller - gatk4 From e107d61069612fca5aaabcd4f056d06c732d0c18 Mon Sep 17 00:00:00 2001 From: nvnieuwk <101190534+nvnieuwk@users.noreply.github.com> Date: Thu, 2 Jun 2022 14:20:15 +0200 Subject: [PATCH 099/168] Update tests/modules/gatk4/haplotypecaller/test.yml Co-authored-by: Maxime U. Garcia --- tests/modules/gatk4/haplotypecaller/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/modules/gatk4/haplotypecaller/test.yml b/tests/modules/gatk4/haplotypecaller/test.yml index 17c4f6bf..96270644 100644 --- a/tests/modules/gatk4/haplotypecaller/test.yml +++ b/tests/modules/gatk4/haplotypecaller/test.yml @@ -26,7 +26,7 @@ - path: output/gatk4/test.vcf.gz.tbi - name: gatk4 haplotypecaller test_gatk4_haplotypecaller_dragstr_model - command: nextflow run ./tests/modules/gatk4/haplotypecaller -entry test_gatk4_haplotypecaller_dragstr_model -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/haplotypecaller/nextflow.config + command: nextflow run ./tests/modules/gatk4/haplotypecaller -entry test_gatk4_haplotypecaller_dragstr_model -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/haplotypecaller/nextflow.config tags: - gatk4/haplotypecaller - gatk4 From 046fa3958223ce4542578ef26502d39f4c5d71ef Mon Sep 17 00:00:00 2001 From: MaxUlysse Date: Thu, 2 Jun 2022 16:09:09 +0200 Subject: [PATCH 100/168] update VEP version + cache --- modules/ensemblvep/Dockerfile | 4 ++-- modules/ensemblvep/build.sh | 12 ++++++------ modules/ensemblvep/environment.yml | 4 ++-- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/modules/ensemblvep/Dockerfile b/modules/ensemblvep/Dockerfile index b4a1c664..4ada7c6b 100644 --- a/modules/ensemblvep/Dockerfile +++ b/modules/ensemblvep/Dockerfile @@ -11,8 +11,8 @@ RUN conda env create -f /environment.yml && conda clean -a # Setup default ARG variables ARG GENOME=GRCh38 ARG SPECIES=homo_sapiens -ARG VEP_VERSION=104 -ARG VEP_TAG=104.3 +ARG VEP_VERSION=105 +ARG VEP_TAG=105.0 # Add conda installation dir to PATH (instead of doing 'conda activate') ENV PATH /opt/conda/envs/nf-core-vep-${VEP_TAG}/bin:$PATH diff --git a/modules/ensemblvep/build.sh b/modules/ensemblvep/build.sh index 650c8704..402f2434 100755 --- a/modules/ensemblvep/build.sh +++ b/modules/ensemblvep/build.sh @@ -20,9 +20,9 @@ build_push() { docker push nfcore/vep:${VEP_TAG}.${GENOME} } -build_push "GRCh37" "homo_sapiens" "104" "104.3" -build_push "GRCh38" "homo_sapiens" "104" "104.3" -build_push "GRCm38" "mus_musculus" "102" "104.3" -build_push "GRCm39" "mus_musculus" "104" "104.3" -build_push "CanFam3.1" "canis_lupus_familiaris" "104" "104.3" -build_push "WBcel235" "caenorhabditis_elegans" "104" "104.3" +build_push "GRCh37" "homo_sapiens" "105" "105.0" +build_push "GRCh38" "homo_sapiens" "105" "105.0" +build_push "GRCm38" "mus_musculus" "102" "105.0" +build_push "GRCm39" "mus_musculus" "105" "105.0" +build_push "CanFam3.1" "canis_lupus_familiaris" "105" "105.0" +build_push "WBcel235" "caenorhabditis_elegans" "105" "105.0" diff --git a/modules/ensemblvep/environment.yml b/modules/ensemblvep/environment.yml index c0731c26..5df85b80 100644 --- a/modules/ensemblvep/environment.yml +++ b/modules/ensemblvep/environment.yml @@ -1,10 +1,10 @@ # You can use this file to create a conda environment for this module: # conda env create -f environment.yml -name: nf-core-vep-104.3 +name: nf-core-vep-105.0 channels: - conda-forge - bioconda - defaults dependencies: - - bioconda::ensembl-vep=104.3 + - bioconda::ensembl-vep=105.0 From bc0f52bcd3a64f27d8fb2da3a6424b427e15e53c Mon Sep 17 00:00:00 2001 From: MaxUlysse Date: Thu, 2 Jun 2022 16:11:11 +0200 Subject: [PATCH 101/168] fix cache version --- modules/ensemblvep/build.sh | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/modules/ensemblvep/build.sh b/modules/ensemblvep/build.sh index 402f2434..e21a7c0d 100755 --- a/modules/ensemblvep/build.sh +++ b/modules/ensemblvep/build.sh @@ -20,9 +20,9 @@ build_push() { docker push nfcore/vep:${VEP_TAG}.${GENOME} } -build_push "GRCh37" "homo_sapiens" "105" "105.0" -build_push "GRCh38" "homo_sapiens" "105" "105.0" -build_push "GRCm38" "mus_musculus" "102" "105.0" -build_push "GRCm39" "mus_musculus" "105" "105.0" -build_push "CanFam3.1" "canis_lupus_familiaris" "105" "105.0" -build_push "WBcel235" "caenorhabditis_elegans" "105" "105.0" +# build_push "GRCh37" "homo_sapiens" "105" "105.0" +# build_push "GRCh38" "homo_sapiens" "105" "105.0" +# build_push "GRCm38" "mus_musculus" "102" "105.0" +# build_push "GRCm39" "mus_musculus" "105" "105.0" +build_push "CanFam3.1" "canis_lupus_familiaris" "104" "105.0" +# build_push "WBcel235" "caenorhabditis_elegans" "105" "105.0" From b623ff5d2798b8c955baeb3890f0baee6077db66 Mon Sep 17 00:00:00 2001 From: MaxUlysse Date: Thu, 2 Jun 2022 16:11:45 +0200 Subject: [PATCH 102/168] fix comments --- modules/ensemblvep/build.sh | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/modules/ensemblvep/build.sh b/modules/ensemblvep/build.sh index e21a7c0d..6f340c0f 100755 --- a/modules/ensemblvep/build.sh +++ b/modules/ensemblvep/build.sh @@ -20,9 +20,9 @@ build_push() { docker push nfcore/vep:${VEP_TAG}.${GENOME} } -# build_push "GRCh37" "homo_sapiens" "105" "105.0" -# build_push "GRCh38" "homo_sapiens" "105" "105.0" -# build_push "GRCm38" "mus_musculus" "102" "105.0" -# build_push "GRCm39" "mus_musculus" "105" "105.0" +build_push "GRCh37" "homo_sapiens" "105" "105.0" +build_push "GRCh38" "homo_sapiens" "105" "105.0" +build_push "GRCm38" "mus_musculus" "102" "105.0" +build_push "GRCm39" "mus_musculus" "105" "105.0" build_push "CanFam3.1" "canis_lupus_familiaris" "104" "105.0" -# build_push "WBcel235" "caenorhabditis_elegans" "105" "105.0" +build_push "WBcel235" "caenorhabditis_elegans" "105" "105.0" From b4b1f3b81d897b97da0655cf48107b70aa4b2f73 Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Thu, 2 Jun 2022 16:20:12 +0200 Subject: [PATCH 103/168] add intervals to calibratedragstrmodel --- modules/gatk4/calibratedragstrmodel/main.nf | 4 ++- modules/gatk4/calibratedragstrmodel/meta.yml | 4 +++ .../gatk4/calibratedragstrmodel/main.nf | 26 +++++++++++++++++-- .../gatk4/calibratedragstrmodel/test.yml | 13 ++++++++-- 4 files changed, 42 insertions(+), 5 deletions(-) diff --git a/modules/gatk4/calibratedragstrmodel/main.nf b/modules/gatk4/calibratedragstrmodel/main.nf index 00a11de0..08ba7008 100644 --- a/modules/gatk4/calibratedragstrmodel/main.nf +++ b/modules/gatk4/calibratedragstrmodel/main.nf @@ -8,7 +8,7 @@ process GATK4_CALIBRATEDRAGSTRMODEL { 'quay.io/biocontainers/gatk4:4.2.6.1--hdfd78af_0' }" input: - tuple val(meta), path(bam), path(bam_index) + tuple val(meta), path(bam), path(bam_index), path(intervals) path fasta path fasta_fai path dict @@ -24,6 +24,7 @@ process GATK4_CALIBRATEDRAGSTRMODEL { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" + def intervals_command = intervals ? "--intervals $intervals" : "" def avail_mem = 3 if (!task.memory) { @@ -37,6 +38,7 @@ process GATK4_CALIBRATEDRAGSTRMODEL { --output ${prefix}.txt \\ --reference $fasta \\ --str-table-path $strtablefile \\ + $intervals_command \\ --tmp-dir . \\ $args diff --git a/modules/gatk4/calibratedragstrmodel/meta.yml b/modules/gatk4/calibratedragstrmodel/meta.yml index e6d2b0b1..c3c40d81 100644 --- a/modules/gatk4/calibratedragstrmodel/meta.yml +++ b/modules/gatk4/calibratedragstrmodel/meta.yml @@ -33,6 +33,10 @@ input: type: file description: index of the BAM/CRAM/SAM file pattern: "*.{bai,crai,sai}" + - intervals: + type: file + description: BED file or interval list containing regions + pattern: "*.{bed,interval_list}" - fasta: type: file description: The reference FASTA file diff --git a/tests/modules/gatk4/calibratedragstrmodel/main.nf b/tests/modules/gatk4/calibratedragstrmodel/main.nf index 4d4e9896..136a9742 100644 --- a/tests/modules/gatk4/calibratedragstrmodel/main.nf +++ b/tests/modules/gatk4/calibratedragstrmodel/main.nf @@ -9,7 +9,8 @@ workflow test_gatk4_calibratedragstrmodel_bam { input = [ [ id:'test', single_end:false ], // meta map file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), - file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true) + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true), + [] ] fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) @@ -28,7 +29,28 @@ workflow test_gatk4_calibratedragstrmodel_cram { input = [ [ id:'test', single_end:false ], // meta map file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true), - file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram_crai'], checkIfExists: true) + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram_crai'], checkIfExists: true), + [] + ] + + fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + + fasta_fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) + + dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) + + strtablefile = file(params.test_data['homo_sapiens']['genome']['genome_strtablefile'], checkIfExists: true) + + GATK4_CALIBRATEDRAGSTRMODEL ( input, fasta, fasta_fai, dict, strtablefile ) +} + +workflow test_gatk4_calibratedragstrmodel_beds { + + input = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram_crai'], checkIfExists: true), + file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true) ] fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) diff --git a/tests/modules/gatk4/calibratedragstrmodel/test.yml b/tests/modules/gatk4/calibratedragstrmodel/test.yml index bc46dead..53c428a9 100644 --- a/tests/modules/gatk4/calibratedragstrmodel/test.yml +++ b/tests/modules/gatk4/calibratedragstrmodel/test.yml @@ -1,8 +1,8 @@ - name: gatk4 calibratedragstrmodel test_gatk4_calibratedragstrmodel_bam command: nextflow run ./tests/modules/gatk4/calibratedragstrmodel -entry test_gatk4_calibratedragstrmodel_bam -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/calibratedragstrmodel/nextflow.config tags: - - gatk4 - gatk4/calibratedragstrmodel + - gatk4 files: - path: output/gatk4/test.txt md5sum: 0a1a1583b157fa2251dd931ed165da4f @@ -10,8 +10,17 @@ - name: gatk4 calibratedragstrmodel test_gatk4_calibratedragstrmodel_cram command: nextflow run ./tests/modules/gatk4/calibratedragstrmodel -entry test_gatk4_calibratedragstrmodel_cram -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/calibratedragstrmodel/nextflow.config tags: - - gatk4 - gatk4/calibratedragstrmodel + - gatk4 files: - path: output/gatk4/test.txt md5sum: 1aa7ab38023f724877b3323c5e6b9a4e + +- name: gatk4 calibratedragstrmodel test_gatk4_calibratedragstrmodel_beds + command: nextflow run ./tests/modules/gatk4/calibratedragstrmodel -entry test_gatk4_calibratedragstrmodel_beds -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/calibratedragstrmodel/nextflow.config + tags: + - gatk4/calibratedragstrmodel + - gatk4 + files: + - path: output/gatk4/test.txt + md5sum: def8baccad7bd59006f08fcb0a6721bf From 44daa5d6874f655862524a29a5579aea6f6af52b Mon Sep 17 00:00:00 2001 From: SusiJo <43847534+SusiJo@users.noreply.github.com> Date: Thu, 2 Jun 2022 16:23:09 +0200 Subject: [PATCH 104/168] update prefix Co-authored-by: Maxime U. Garcia --- modules/gatk4/markduplicatesspark/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/gatk4/markduplicatesspark/main.nf b/modules/gatk4/markduplicatesspark/main.nf index eeaf0445..5acfd45f 100644 --- a/modules/gatk4/markduplicatesspark/main.nf +++ b/modules/gatk4/markduplicatesspark/main.nf @@ -23,7 +23,7 @@ process GATK4_MARKDUPLICATES_SPARK { script: def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" + ".bam" + def prefix = task.ext.prefix ?: "${meta.id}.bam" def input_list = bam.collect{"--input $it"}.join(' ') From efc1e1bb71592704e5f2631dabdef48c7c3af1d7 Mon Sep 17 00:00:00 2001 From: SusiJo <43847534+SusiJo@users.noreply.github.com> Date: Thu, 2 Jun 2022 16:23:18 +0200 Subject: [PATCH 105/168] rm line Co-authored-by: Maxime U. Garcia --- modules/gatk4/markduplicatesspark/main.nf | 1 - 1 file changed, 1 deletion(-) diff --git a/modules/gatk4/markduplicatesspark/main.nf b/modules/gatk4/markduplicatesspark/main.nf index 5acfd45f..703623d5 100644 --- a/modules/gatk4/markduplicatesspark/main.nf +++ b/modules/gatk4/markduplicatesspark/main.nf @@ -34,7 +34,6 @@ process GATK4_MARKDUPLICATES_SPARK { avail_mem = task.memory.giga } """ - gatk --java-options "-Xmx${avail_mem}g" MarkDuplicatesSpark \\ $input_list \\ --output $prefix \\ From f3c0107fa2ecdc510b4f569dc3e062c7e028ac7a Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Thu, 2 Jun 2022 16:24:03 +0200 Subject: [PATCH 106/168] added optional to intervals --- modules/gatk4/calibratedragstrmodel/meta.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/gatk4/calibratedragstrmodel/meta.yml b/modules/gatk4/calibratedragstrmodel/meta.yml index c3c40d81..e71dac5e 100644 --- a/modules/gatk4/calibratedragstrmodel/meta.yml +++ b/modules/gatk4/calibratedragstrmodel/meta.yml @@ -35,7 +35,7 @@ input: pattern: "*.{bai,crai,sai}" - intervals: type: file - description: BED file or interval list containing regions + description: BED file or interval list containing regions (optional) pattern: "*.{bed,interval_list}" - fasta: type: file From 14c63f9ccc50735464e963c558424efde668f164 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Thu, 2 Jun 2022 20:24:14 +0200 Subject: [PATCH 107/168] Changes after review --- modules/gatk/realignertargetcreator/main.nf | 14 ++++++++++++-- modules/gatk/realignertargetcreator/meta.yml | 6 +++--- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/modules/gatk/realignertargetcreator/main.nf b/modules/gatk/realignertargetcreator/main.nf index e3a03a5f..866ff8a5 100644 --- a/modules/gatk/realignertargetcreator/main.nf +++ b/modules/gatk/realignertargetcreator/main.nf @@ -8,8 +8,10 @@ process GATK_REALIGNERTARGETCREATOR { 'quay.io/biocontainers/gatk:3.5--hdfd78af_11' }" input: - tuple val(meta), path(bam), path(bai) - tuple path(fasta), path(fasta_fai), path(fasta_dict) + tuple val(meta), path(input), path(index) + path path(fasta) + path(fai) + path(dict) path(known_vcf) output: @@ -25,8 +27,16 @@ process GATK_REALIGNERTARGETCREATOR { def known = known_vcf ? "-known ${known_vcf}" : "" if ("$bam" == "${prefix}.bam") error "Input and output names are the same, set prefix in module configuration to disambiguate!" + def avail_mem = 3 + if (!task.memory) { + log.info '[GATK HaplotypeCaller] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = task.memory.giga + } + """ gatk3 \\ + -Xmx${avail_mem}g \\ -T RealignerTargetCreator \\ -nt ${task.cpus} \\ -I ${bam} \\ diff --git a/modules/gatk/realignertargetcreator/meta.yml b/modules/gatk/realignertargetcreator/meta.yml index 70df7b78..c49d2a8d 100644 --- a/modules/gatk/realignertargetcreator/meta.yml +++ b/modules/gatk/realignertargetcreator/meta.yml @@ -20,11 +20,11 @@ input: description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - bam: + - input: type: file description: Sorted and indexed BAM/CRAM/SAM file pattern: "*.bam" - - bai: + - index: type: file description: BAM index file pattern: "*.bai" @@ -32,7 +32,7 @@ input: type: file description: Reference file used to generate BAM file pattern: ".{fasta,fa,fna}" - - fasta_fai: + - fai: type: file description: Index of reference file used to generate BAM file pattern: ".fai" From 43ef3841d1249aa20793b783bb961f568af2b8a4 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Thu, 2 Jun 2022 20:25:12 +0200 Subject: [PATCH 108/168] remove duplciate path --- modules/gatk/realignertargetcreator/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/gatk/realignertargetcreator/main.nf b/modules/gatk/realignertargetcreator/main.nf index 866ff8a5..7454430c 100644 --- a/modules/gatk/realignertargetcreator/main.nf +++ b/modules/gatk/realignertargetcreator/main.nf @@ -9,7 +9,7 @@ process GATK_REALIGNERTARGETCREATOR { input: tuple val(meta), path(input), path(index) - path path(fasta) + path(fasta) path(fai) path(dict) path(known_vcf) From b1edcc6e94124fc7f2473655268f6d72b12d7209 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Thu, 2 Jun 2022 20:26:09 +0200 Subject: [PATCH 109/168] Update modules/gatk/realignertargetcreator/main.nf --- modules/gatk/realignertargetcreator/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/gatk/realignertargetcreator/main.nf b/modules/gatk/realignertargetcreator/main.nf index 7454430c..96b8806c 100644 --- a/modules/gatk/realignertargetcreator/main.nf +++ b/modules/gatk/realignertargetcreator/main.nf @@ -29,7 +29,7 @@ process GATK_REALIGNERTARGETCREATOR { def avail_mem = 3 if (!task.memory) { - log.info '[GATK HaplotypeCaller] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + log.info '[GATK RealignerTargetCreator] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' } else { avail_mem = task.memory.giga } From 599b264280d55f76eebdfd9cb1c474c338fcb5d0 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Thu, 2 Jun 2022 20:27:16 +0200 Subject: [PATCH 110/168] Standaridse parameters --- modules/gatk/indelrealigner/main.nf | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/modules/gatk/indelrealigner/main.nf b/modules/gatk/indelrealigner/main.nf index 117a298b..22391879 100644 --- a/modules/gatk/indelrealigner/main.nf +++ b/modules/gatk/indelrealigner/main.nf @@ -8,9 +8,11 @@ process GATK_INDELREALIGNER { 'quay.io/biocontainers/gatk:3.5--hdfd78af_11' }" input: - tuple val(meta), path(bam), path(bai), path(intervals) - tuple val(meta), path(fasta) - tuple val(meta), path(known_vcf) + tuple val(meta), path(input), path(index), path(intervals) + path path(fasta) + path(fai) + path(dict) + path(known_vcf) output: tuple val(meta), path("*.bam"), emit: bam @@ -24,9 +26,17 @@ process GATK_INDELREALIGNER { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" def known = known_vcf ? "-known ${known_vcf}" : "" + + def avail_mem = 3 + if (!task.memory) { + log.info '[GATK IndelRealigner] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = task.memory.giga + } + """ gatk3 \\ - -T RealignerTargetCreator \\ + -T IndelRealigner \\ -R ${fasta} \\ -nt ${task.cpus} -I ${bam} \\ From c469a2b35ed2e2948c9d3cbde5672143086b7e35 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Thu, 2 Jun 2022 20:32:00 +0200 Subject: [PATCH 111/168] Fux tests --- modules/gatk/realignertargetcreator/main.nf | 6 +++--- .../gatk/realignertargetcreator/main.nf | 19 ++++++++----------- 2 files changed, 11 insertions(+), 14 deletions(-) diff --git a/modules/gatk/realignertargetcreator/main.nf b/modules/gatk/realignertargetcreator/main.nf index 866ff8a5..e356c826 100644 --- a/modules/gatk/realignertargetcreator/main.nf +++ b/modules/gatk/realignertargetcreator/main.nf @@ -9,7 +9,7 @@ process GATK_REALIGNERTARGETCREATOR { input: tuple val(meta), path(input), path(index) - path path(fasta) + path(fasta) path(fai) path(dict) path(known_vcf) @@ -25,7 +25,7 @@ process GATK_REALIGNERTARGETCREATOR { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" def known = known_vcf ? "-known ${known_vcf}" : "" - if ("$bam" == "${prefix}.bam") error "Input and output names are the same, set prefix in module configuration to disambiguate!" + if ("$input" == "${prefix}.bam") error "Input and output names are the same, set prefix in module configuration to disambiguate!" def avail_mem = 3 if (!task.memory) { @@ -39,7 +39,7 @@ process GATK_REALIGNERTARGETCREATOR { -Xmx${avail_mem}g \\ -T RealignerTargetCreator \\ -nt ${task.cpus} \\ - -I ${bam} \\ + -I ${input} \\ -R ${fasta} \\ -o ${prefix}.intervals \\ ${known} \\ diff --git a/tests/modules/gatk/realignertargetcreator/main.nf b/tests/modules/gatk/realignertargetcreator/main.nf index 02e62d93..4b9f8eff 100644 --- a/tests/modules/gatk/realignertargetcreator/main.nf +++ b/tests/modules/gatk/realignertargetcreator/main.nf @@ -6,16 +6,13 @@ include { GATK_REALIGNERTARGETCREATOR } from '../../../../modules/gatk/realigner workflow test_gatk_realignertargetcreator { - input = [ - [ id:'test', single_end:false ], // meta map - file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true) - ] - reference = [ - file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true), - file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true), - file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true), - ] + input = [ [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true), + ] + fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + fai = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true) + dict = file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true) - GATK_REALIGNERTARGETCREATOR ( input, reference, [] ) + GATK_REALIGNERTARGETCREATOR ( input, fasta, fai, dict, [] ) } From de88adc9269d3ae65d626baa164201037df8cac7 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Thu, 2 Jun 2022 20:35:34 +0200 Subject: [PATCH 112/168] Standardise parameters (no tests yet until realigntarget creator ready) --- modules/gatk/indelrealigner/main.nf | 9 +++++---- modules/gatk/indelrealigner/meta.yml | 12 ++++++++++-- 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/modules/gatk/indelrealigner/main.nf b/modules/gatk/indelrealigner/main.nf index 22391879..f98a99b7 100644 --- a/modules/gatk/indelrealigner/main.nf +++ b/modules/gatk/indelrealigner/main.nf @@ -9,20 +9,20 @@ process GATK_INDELREALIGNER { input: tuple val(meta), path(input), path(index), path(intervals) - path path(fasta) + path(fasta) path(fai) path(dict) path(known_vcf) output: - tuple val(meta), path("*.bam"), emit: bam + tuple val(meta), path("*.bam"), path("*.bai"), emit: bam path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when script: - if ("$bam" == "${prefix}.bam") error "Input and output names are the same, set prefix in module configuration to disambiguate!" + if ("$input" == "${prefix}.bam") error "Input and output names are the same, set prefix in module configuration to disambiguate!" def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" def known = known_vcf ? "-known ${known_vcf}" : "" @@ -36,10 +36,11 @@ process GATK_INDELREALIGNER { """ gatk3 \\ + -Xmx${avail_mem}g \\ -T IndelRealigner \\ -R ${fasta} \\ -nt ${task.cpus} - -I ${bam} \\ + -I ${input} \\ -targetIntervals ${intervals} \\ ${known} \\ -o ${prefix}.bam \\ diff --git a/modules/gatk/indelrealigner/meta.yml b/modules/gatk/indelrealigner/meta.yml index 80abc6aa..5abfbfb1 100644 --- a/modules/gatk/indelrealigner/meta.yml +++ b/modules/gatk/indelrealigner/meta.yml @@ -19,11 +19,11 @@ input: description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - bam: + - input: type: file description: Sorted and indexed BAM/CRAM/SAM file pattern: "*.bam" - - bai: + - index: type: file description: BAM index file pattern: "*.bai" @@ -35,6 +35,14 @@ input: type: file description: Reference file used to generate BAM file pattern: ".{fasta,fa,fna}" + - fai: + type: file + description: Index of reference file used to generate BAM file + pattern: ".fai" + - dict: + type: file + description: GATK dict file for reference + pattern: ".dict" - known_vcf: type: file description: Optional input VCF file(s) with known indels From 544b62df45d3043ca2c5ca3a131284fe0d74026b Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Thu, 2 Jun 2022 21:11:17 +0200 Subject: [PATCH 113/168] Star work on GATK UG --- modules/gatk/unifiedgenotyper/main.nf | 50 ++++++++++++++++++ modules/gatk/unifiedgenotyper/meta.yml | 51 +++++++++++++++++++ tests/config/pytest_modules.yml | 4 ++ tests/modules/gatk/unifiedgenotyper/main.nf | 15 ++++++ .../gatk/unifiedgenotyper/nextflow.config | 5 ++ tests/modules/gatk/unifiedgenotyper/test.yml | 14 +++++ 6 files changed, 139 insertions(+) create mode 100644 modules/gatk/unifiedgenotyper/main.nf create mode 100644 modules/gatk/unifiedgenotyper/meta.yml create mode 100644 tests/modules/gatk/unifiedgenotyper/main.nf create mode 100644 tests/modules/gatk/unifiedgenotyper/nextflow.config create mode 100644 tests/modules/gatk/unifiedgenotyper/test.yml diff --git a/modules/gatk/unifiedgenotyper/main.nf b/modules/gatk/unifiedgenotyper/main.nf new file mode 100644 index 00000000..4dd6e408 --- /dev/null +++ b/modules/gatk/unifiedgenotyper/main.nf @@ -0,0 +1,50 @@ +process GATK_UNIFIEDGENOTYPER { + tag "$meta.id" + label 'process_medium' + + conda (params.enable_conda ? "bioconda::gatk=3.5" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gatk:3.5--hdfd78af_11': + 'quay.io/biocontainers/gatk:3.5--hdfd78af_11' }" + + input: + tuple val(meta), path(input), path(index) + path(fasta) + path(fai) + path(dict) + path(known_vcf) + + output: + tuple val(meta), path("*.bam"), emit: bam + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + def avail_mem = 3 + if (!task.memory) { + log.info '[GATK RealignerTargetCreator] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = task.memory.giga + } + + """ + gatk3 \\ + -Xmx${avail_mem}g \\ + -nt ${task.cpus} \\ + -T UnifiedGenotyper \\ + -I ${input} \\ + -R ${fasta} \\ + -o ${prefix}.vcf \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//' )) + END_VERSIONS + """ +} diff --git a/modules/gatk/unifiedgenotyper/meta.yml b/modules/gatk/unifiedgenotyper/meta.yml new file mode 100644 index 00000000..7a992591 --- /dev/null +++ b/modules/gatk/unifiedgenotyper/meta.yml @@ -0,0 +1,51 @@ +name: "gatk_unifiedgenotyper" +## TODO nf-core: Add a description of the module and list keywords +description: write your description here +keywords: + - sort +tools: + - "gatk": + ## TODO nf-core: Add a description and other details for the software below + description: "The full Genome Analysis Toolkit (GATK) framework, license restricted." + homepage: "None" + documentation: "None" + tool_dev_url: "None" + doi: "" + licence: "['BSD', 'https://www.broadinstitute.org/gatk/about/#licensing', 'https://software.broadinstitute.org/gatk/download/licensing']" + +## TODO nf-core: Add a description of all of the variables used as input +input: + # Only when we have meta + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + # + ## TODO nf-core: Delete / customise this example input + - bam: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + +## TODO nf-core: Add a description of all of the variables used as output +output: + #Only when we have meta + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + # + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + ## TODO nf-core: Delete / customise this example output + - bam: + type: file + description: Sorted BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + +authors: + - "@jfy133" diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index d4bfac5b..0d3a21c2 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -715,6 +715,10 @@ gamma/gamma: - modules/gamma/gamma/** - tests/modules/gamma/gamma/** +gatk/unifiedgenotyper: + - modules/gatk/unifiedgenotyper/** + - tests/modules/gatk/unifiedgenotyper/** + gatk4/applybqsr: - modules/gatk4/applybqsr/** - tests/modules/gatk4/applybqsr/** diff --git a/tests/modules/gatk/unifiedgenotyper/main.nf b/tests/modules/gatk/unifiedgenotyper/main.nf new file mode 100644 index 00000000..86a23790 --- /dev/null +++ b/tests/modules/gatk/unifiedgenotyper/main.nf @@ -0,0 +1,15 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { GATK_UNIFIEDGENOTYPER } from '../../../../modules/gatk/unifiedgenotyper/main.nf' + +workflow test_gatk_unifiedgenotyper { + + input = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['sarscov2']['illumina']['test_paired_end_bam'], checkIfExists: true) + ] + + GATK_UNIFIEDGENOTYPER ( input ) +} diff --git a/tests/modules/gatk/unifiedgenotyper/nextflow.config b/tests/modules/gatk/unifiedgenotyper/nextflow.config new file mode 100644 index 00000000..50f50a7a --- /dev/null +++ b/tests/modules/gatk/unifiedgenotyper/nextflow.config @@ -0,0 +1,5 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + +} \ No newline at end of file diff --git a/tests/modules/gatk/unifiedgenotyper/test.yml b/tests/modules/gatk/unifiedgenotyper/test.yml new file mode 100644 index 00000000..aad9aec9 --- /dev/null +++ b/tests/modules/gatk/unifiedgenotyper/test.yml @@ -0,0 +1,14 @@ +## TODO nf-core: Please run the following command to build this file: +# nf-core modules create-test-yml gatk/unifiedgenotyper +- name: "gatk unifiedgenotyper" + command: nextflow run ./tests/modules/gatk/unifiedgenotyper -entry test_gatk_unifiedgenotyper -c ./tests/config/nextflow.config -c ./tests/modules/gatk/unifiedgenotyper/nextflow.config + tags: + - "gatk" + # + - "gatk/unifiedgenotyper" + # + files: + - path: "output/gatk/test.bam" + md5sum: e667c7caad0bc4b7ac383fd023c654fc + - path: output/gatk/versions.yml + md5sum: a01fe51bc4c6a3a6226fbf77b2c7cf3b From a14855dbe8648ca445759f57a1a2805ec4812615 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Thu, 2 Jun 2022 21:22:21 +0200 Subject: [PATCH 114/168] Start meta --- modules/gatk/unifiedgenotyper/main.nf | 16 ++++++- modules/gatk/unifiedgenotyper/meta.yml | 59 ++++++++++++++++---------- 2 files changed, 51 insertions(+), 24 deletions(-) diff --git a/modules/gatk/unifiedgenotyper/main.nf b/modules/gatk/unifiedgenotyper/main.nf index 4dd6e408..896a347e 100644 --- a/modules/gatk/unifiedgenotyper/main.nf +++ b/modules/gatk/unifiedgenotyper/main.nf @@ -13,9 +13,13 @@ process GATK_UNIFIEDGENOTYPER { path(fai) path(dict) path(known_vcf) + path(intervals) + path(contamination) + path(dbsnps) + path(comp) output: - tuple val(meta), path("*.bam"), emit: bam + tuple val(meta), path("*.vcf.gz"), emit: vcf path "versions.yml" , emit: versions when: @@ -24,6 +28,10 @@ process GATK_UNIFIEDGENOTYPER { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" + def contamination_file = contamination ? "-contaminationFile ${contamination}" : "" + def dbsnps_file = dbsnps ? "--dbsnp ${dbsnps}" : "" + def comp_file = comp ? "--comp ${comp}" : "" + def intervals_file = intervals ? "--intervals ${intervals}" : "" def avail_mem = 3 if (!task.memory) { @@ -39,9 +47,15 @@ process GATK_UNIFIEDGENOTYPER { -T UnifiedGenotyper \\ -I ${input} \\ -R ${fasta} \\ + ${contamination_file} \\ + ${dbsnps_file} \\ + ${comp_file} \\ + ${intervals_file} -o ${prefix}.vcf \\ $args + gzip -n *.vcf + cat <<-END_VERSIONS > versions.yml "${task.process}": gatk: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//' )) diff --git a/modules/gatk/unifiedgenotyper/meta.yml b/modules/gatk/unifiedgenotyper/meta.yml index 7a992591..5f85c451 100644 --- a/modules/gatk/unifiedgenotyper/meta.yml +++ b/modules/gatk/unifiedgenotyper/meta.yml @@ -1,51 +1,64 @@ name: "gatk_unifiedgenotyper" -## TODO nf-core: Add a description of the module and list keywords -description: write your description here keywords: - - sort + - bam + - vcf + - variant calling + - indel + - realignment + - targets tools: - "gatk": - ## TODO nf-core: Add a description and other details for the software below description: "The full Genome Analysis Toolkit (GATK) framework, license restricted." - homepage: "None" - documentation: "None" - tool_dev_url: "None" - doi: "" - licence: "['BSD', 'https://www.broadinstitute.org/gatk/about/#licensing', 'https://software.broadinstitute.org/gatk/download/licensing']" + homepage: "https://gatk.broadinstitute.org/hc/en-us" + documentation: "https://github.com/broadinstitute/gatk-docs" + licence: "['https://software.broadinstitute.org/gatk/download/licensing', 'BSD', 'https://www.broadinstitute.org/gatk/about/#licensing']" -## TODO nf-core: Add a description of all of the variables used as input input: - # Only when we have meta - meta: type: map description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - # - ## TODO nf-core: Delete / customise this example input - - bam: + - input: type: file - description: BAM/CRAM/SAM file - pattern: "*.{bam,cram,sam}" + description: Sorted and indexed BAM/CRAM/SAM file + pattern: "*.bam" + - index: + type: file + description: BAM index file + pattern: "*.bai" + - fasta: + type: file + description: Reference file used to generate BAM file + pattern: ".{fasta,fa,fna}" + - fai: + type: file + description: Index of reference file used to generate BAM file + pattern: ".fai" + - dict: + type: file + description: GATK dict file for reference + pattern: ".dict" + - known_vcf: + type: file + description: Optional input VCF file(s) with known indels + pattern: ".vcf" -## TODO nf-core: Add a description of all of the variables used as output output: - #Only when we have meta - meta: type: map description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - # - versions: type: file description: File containing software versions pattern: "versions.yml" - ## TODO nf-core: Delete / customise this example output - - bam: + - vcf: type: file - description: Sorted BAM/CRAM/SAM file - pattern: "*.{bam,cram,sam}" + description: VCF file containing called variants + pattern: "*.vcf.gz" authors: + - "@ilight1542" - "@jfy133" From a565d9072aba07b369c6e3c43f4168556d32678c Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Thu, 2 Jun 2022 21:39:39 +0200 Subject: [PATCH 115/168] Add GATK UnifiedGenotyper --- modules/gatk/unifiedgenotyper/main.nf | 13 ++++++------ modules/gatk/unifiedgenotyper/meta.yml | 21 ++++++++++++++------ tests/modules/gatk/unifiedgenotyper/main.nf | 15 ++++++++------ tests/modules/gatk/unifiedgenotyper/test.yml | 19 +++++++----------- 4 files changed, 37 insertions(+), 31 deletions(-) diff --git a/modules/gatk/unifiedgenotyper/main.nf b/modules/gatk/unifiedgenotyper/main.nf index 896a347e..47d83def 100644 --- a/modules/gatk/unifiedgenotyper/main.nf +++ b/modules/gatk/unifiedgenotyper/main.nf @@ -12,15 +12,14 @@ process GATK_UNIFIEDGENOTYPER { path(fasta) path(fai) path(dict) - path(known_vcf) path(intervals) path(contamination) - path(dbsnps) + path(dbsnp) path(comp) output: tuple val(meta), path("*.vcf.gz"), emit: vcf - path "versions.yml" , emit: versions + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -29,7 +28,7 @@ process GATK_UNIFIEDGENOTYPER { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" def contamination_file = contamination ? "-contaminationFile ${contamination}" : "" - def dbsnps_file = dbsnps ? "--dbsnp ${dbsnps}" : "" + def dbsnp_file = dbsnp ? "--dbsnp ${dbsnp}" : "" def comp_file = comp ? "--comp ${comp}" : "" def intervals_file = intervals ? "--intervals ${intervals}" : "" @@ -48,9 +47,9 @@ process GATK_UNIFIEDGENOTYPER { -I ${input} \\ -R ${fasta} \\ ${contamination_file} \\ - ${dbsnps_file} \\ + ${dbsnp_file} \\ ${comp_file} \\ - ${intervals_file} + ${intervals_file} \\ -o ${prefix}.vcf \\ $args @@ -58,7 +57,7 @@ process GATK_UNIFIEDGENOTYPER { cat <<-END_VERSIONS > versions.yml "${task.process}": - gatk: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//' )) + gatk: \$(echo \$(gatk3 --version)) END_VERSIONS """ } diff --git a/modules/gatk/unifiedgenotyper/meta.yml b/modules/gatk/unifiedgenotyper/meta.yml index 5f85c451..e414d146 100644 --- a/modules/gatk/unifiedgenotyper/meta.yml +++ b/modules/gatk/unifiedgenotyper/meta.yml @@ -3,9 +3,6 @@ keywords: - bam - vcf - variant calling - - indel - - realignment - - targets tools: - "gatk": description: "The full Genome Analysis Toolkit (GATK) framework, license restricted." @@ -39,10 +36,22 @@ input: type: file description: GATK dict file for reference pattern: ".dict" - - known_vcf: + - intervals: type: file - description: Optional input VCF file(s) with known indels - pattern: ".vcf" + description: Bed file with the genomic regions included in the library (optional) + pattern: "*.intervals" + - contamination: + type: file + description: Tab-separated file containing fraction of contamination in sequencing data (per sample) to aggressively remove + pattern: "*" + - dbsnps: + type: file + description: VCF file containing known sites (optional) + pattern: "*" + - comp: + type: file + description: Comparison VCF file (optional) + pattern: "*" output: - meta: diff --git a/tests/modules/gatk/unifiedgenotyper/main.nf b/tests/modules/gatk/unifiedgenotyper/main.nf index 86a23790..6d145e76 100644 --- a/tests/modules/gatk/unifiedgenotyper/main.nf +++ b/tests/modules/gatk/unifiedgenotyper/main.nf @@ -5,11 +5,14 @@ nextflow.enable.dsl = 2 include { GATK_UNIFIEDGENOTYPER } from '../../../../modules/gatk/unifiedgenotyper/main.nf' workflow test_gatk_unifiedgenotyper { - - input = [ - [ id:'test', single_end:false ], // meta map - file(params.test_data['sarscov2']['illumina']['test_paired_end_bam'], checkIfExists: true) - ] - GATK_UNIFIEDGENOTYPER ( input ) + input = [ [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true), + ] + fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + fai = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true) + dict = file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true) + + GATK_UNIFIEDGENOTYPER ( input, fasta, fai, dict, [], [], [], []) } diff --git a/tests/modules/gatk/unifiedgenotyper/test.yml b/tests/modules/gatk/unifiedgenotyper/test.yml index aad9aec9..498e7991 100644 --- a/tests/modules/gatk/unifiedgenotyper/test.yml +++ b/tests/modules/gatk/unifiedgenotyper/test.yml @@ -1,14 +1,9 @@ -## TODO nf-core: Please run the following command to build this file: -# nf-core modules create-test-yml gatk/unifiedgenotyper -- name: "gatk unifiedgenotyper" - command: nextflow run ./tests/modules/gatk/unifiedgenotyper -entry test_gatk_unifiedgenotyper -c ./tests/config/nextflow.config -c ./tests/modules/gatk/unifiedgenotyper/nextflow.config +- name: gatk unifiedgenotyper test_gatk_unifiedgenotyper + command: nextflow run ./tests/modules/gatk/unifiedgenotyper -entry test_gatk_unifiedgenotyper -c ./tests/config/nextflow.config -c ./tests/modules/gatk/unifiedgenotyper/nextflow.config tags: - - "gatk" - # - - "gatk/unifiedgenotyper" - # + - gatk + - gatk/unifiedgenotyper files: - - path: "output/gatk/test.bam" - md5sum: e667c7caad0bc4b7ac383fd023c654fc - - path: output/gatk/versions.yml - md5sum: a01fe51bc4c6a3a6226fbf77b2c7cf3b + - path: output/gatk/test.vcf.gz + contains: + - "#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT test" From 2c9e2f5e73e5a802a6cfca6d72e51f297a372fb1 Mon Sep 17 00:00:00 2001 From: Rike Date: Fri, 3 Jun 2022 10:52:50 +0200 Subject: [PATCH 116/168] Update TIDDIT to 3.0.0 --- modules/tiddit/sv/main.nf | 13 ++++++++----- tests/modules/tiddit/sv/main.nf | 27 +++++++++++++++++++++------ 2 files changed, 29 insertions(+), 11 deletions(-) diff --git a/modules/tiddit/sv/main.nf b/modules/tiddit/sv/main.nf index b3e3813c..cff3dbfb 100644 --- a/modules/tiddit/sv/main.nf +++ b/modules/tiddit/sv/main.nf @@ -2,15 +2,16 @@ process TIDDIT_SV { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::tiddit=2.12.1" : null) + conda (params.enable_conda ? "bioconda::tiddit=3.0.0" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/tiddit:2.12.1--py38h1773678_0' : - 'quay.io/biocontainers/tiddit:2.12.1--py38h1773678_0' }" + 'https://depot.galaxyproject.org/singularity/tiddit:3.0.0--py39h59fae87_1' : + 'quay.io/biocontainers/tiddit:3.0.0--py39h59fae87_1' }" input: - tuple val(meta), path(bam) + tuple val(meta), path(input) path fasta path fai + path bwa_index output: tuple val(meta), path("*.vcf") , emit: vcf @@ -26,10 +27,12 @@ process TIDDIT_SV { def prefix = task.ext.prefix ?: "${meta.id}" def reference = fasta ? "--ref $fasta" : "" """ + INDEX=`find -L ./ -name "*.amb" | sed 's/.amb//'` + tiddit \\ --sv \\ $args \\ - --bam $bam \\ + --bam $input \\ $reference \\ -o $prefix diff --git a/tests/modules/tiddit/sv/main.nf b/tests/modules/tiddit/sv/main.nf index 8dae4950..72113427 100644 --- a/tests/modules/tiddit/sv/main.nf +++ b/tests/modules/tiddit/sv/main.nf @@ -2,25 +2,40 @@ nextflow.enable.dsl = 2 +include { BWA_INDEX } from '../../../../modules/bwa/index/main.nf' include { TIDDIT_SV } from '../../../../modules/tiddit/sv/main.nf' workflow test_tiddit_sv { - input = [ + input = [ [ id:'test' ], // meta map - [ file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) ] + [ file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) ] ] - + fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) fai = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true) - TIDDIT_SV ( input, fasta, fai ) + BWA_INDEX( fasta ) + + TIDDIT_SV ( input, fasta, fai , BWA_INDEX.out.index) } workflow test_tiddit_sv_no_ref { - input = [ + input = [ [ id:'test' ], // meta map - [ file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) ] + [ file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) ] ] TIDDIT_SV ( input, [], [] ) } + +workflow test_tiddit_sv_cram { + input = [ + [ id:'test' ], // meta map + [ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true) ] + ] + + fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) + + TIDDIT_SV ( input, fasta, fai ) +} From 40996cdbe1874f9c2ba693fee76d2b4f316f95f2 Mon Sep 17 00:00:00 2001 From: MaxUlysse Date: Fri, 3 Jun 2022 12:07:16 +0200 Subject: [PATCH 117/168] update tiddit/cov + more tests --- modules/tiddit/cov/main.nf | 16 +++++------ modules/tiddit/cov/meta.yml | 2 +- tests/modules/tiddit/cov/main.nf | 35 ++++++++++++++++++------ tests/modules/tiddit/cov/nextflow.config | 4 +++ tests/modules/tiddit/cov/test.yml | 34 +++++++++++++++++------ 5 files changed, 66 insertions(+), 25 deletions(-) diff --git a/modules/tiddit/cov/main.nf b/modules/tiddit/cov/main.nf index 578c4043..4d7e2827 100644 --- a/modules/tiddit/cov/main.nf +++ b/modules/tiddit/cov/main.nf @@ -2,17 +2,17 @@ process TIDDIT_COV { tag "$meta.id" label 'process_low' - conda (params.enable_conda ? "bioconda::tiddit=2.12.1" : null) + conda (params.enable_conda ? "bioconda::tiddit=3.0.0" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/tiddit:2.12.1--py38h1773678_0' : - 'quay.io/biocontainers/tiddit:2.12.1--py38h1773678_0' }" + 'https://depot.galaxyproject.org/singularity/tiddit:3.0.0--py39h59fae87_1' : + 'quay.io/biocontainers/tiddit:3.0.0--py39h59fae87_1' }" input: - tuple val(meta), path(bam) + tuple val(meta), path(input) path fasta output: - tuple val(meta), path("*.tab"), optional: true, emit: cov + tuple val(meta), path("*.bed"), optional: true, emit: cov tuple val(meta), path("*.wig"), optional: true, emit: wig path "versions.yml" , emit: versions @@ -28,12 +28,12 @@ process TIDDIT_COV { --cov \\ -o $prefix \\ $args \\ - --bam $bam \\ + --bam $input \\ $reference cat <<-END_VERSIONS > versions.yml "${task.process}": - tiddit: \$(echo \$(tiddit 2>&1) | sed 's/^.*TIDDIT-//; s/ .*\$//') + tiddit: \$(echo \$(tiddit 2>&1) | sed 's/^.*tiddit-//; s/ .*\$//') END_VERSIONS """ @@ -45,7 +45,7 @@ process TIDDIT_COV { cat <<-END_VERSIONS > versions.yml "${task.process}": - tiddit: \$(echo \$(tiddit 2>&1) | sed 's/^.*TIDDIT-//; s/ .*\$//') + tiddit: \$(echo \$(tiddit 2>&1) | sed 's/^.*tiddit-//; s/ .*\$//') END_VERSIONS """ } diff --git a/modules/tiddit/cov/meta.yml b/modules/tiddit/cov/meta.yml index d925b783..98ea27c2 100644 --- a/modules/tiddit/cov/meta.yml +++ b/modules/tiddit/cov/meta.yml @@ -19,7 +19,7 @@ input: description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - bam: + - input: type: file description: BAM/CRAM file pattern: "*.{bam,cram}" diff --git a/tests/modules/tiddit/cov/main.nf b/tests/modules/tiddit/cov/main.nf index 1bb35145..0032c759 100644 --- a/tests/modules/tiddit/cov/main.nf +++ b/tests/modules/tiddit/cov/main.nf @@ -2,22 +2,41 @@ nextflow.enable.dsl = 2 -include { TIDDIT_COV } from '../../../../modules/tiddit/cov/main.nf' +include { TIDDIT_COV as TIDDIT_COV_BED } from '../../../../modules/tiddit/cov/main.nf' +include { TIDDIT_COV as TIDDIT_COV_WIG } from '../../../../modules/tiddit/cov/main.nf' -workflow test_tiddit_cov { +workflow test_tiddit_cov_cram_bed { input = [ [ id:'test', single_end:false ], // meta map - file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) ] + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true) ] - fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) - TIDDIT_COV ( input, fasta ) + TIDDIT_COV_BED ( input, fasta ) } -workflow test_tiddit_cov_no_ref { +workflow test_tiddit_cov_bam_bed { input = [ [ id:'test', single_end:false ], // meta map - file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) ] + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) ] - TIDDIT_COV ( input, [] ) + TIDDIT_COV_BED ( input, [] ) +} + +workflow test_tiddit_cov_cram_wig { + + input = [ [ id:'test', single_end:false ], // meta map + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true) ] + + fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + + TIDDIT_COV_WIG ( input, fasta ) +} + +workflow test_tiddit_cov_bam_wig { + + input = [ [ id:'test', single_end:false ], // meta map + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) ] + + TIDDIT_COV_WIG ( input, [] ) } diff --git a/tests/modules/tiddit/cov/nextflow.config b/tests/modules/tiddit/cov/nextflow.config index 8730f1c4..e185a5cf 100644 --- a/tests/modules/tiddit/cov/nextflow.config +++ b/tests/modules/tiddit/cov/nextflow.config @@ -2,4 +2,8 @@ process { publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + withName: TIDDIT_COV_WIG { + ext.args = '-w' + } + } diff --git a/tests/modules/tiddit/cov/test.yml b/tests/modules/tiddit/cov/test.yml index 90c4cbb3..916cc9e3 100644 --- a/tests/modules/tiddit/cov/test.yml +++ b/tests/modules/tiddit/cov/test.yml @@ -1,17 +1,35 @@ -- name: tiddit cov test_tiddit_cov - command: nextflow run ./tests/modules/tiddit/cov -entry test_tiddit_cov -c ./tests/config/nextflow.config -c ./tests/modules/tiddit/cov/nextflow.config +- name: tiddit cov test_tiddit_cov_cram_bed + command: nextflow run ./tests/modules/tiddit/cov -entry test_tiddit_cov_cram_bed -c ./tests/config/nextflow.config -c ./tests/modules/tiddit/cov/nextflow.config tags: - tiddit - tiddit/cov files: - - path: output/tiddit/test.tab - md5sum: f7974948f809f94879d8a60b726194f5 + - path: output/tiddit/test.bed + md5sum: 3b1a28c62a5f25bbba77c1042e9abdf7 -- name: tiddit cov test_tiddit_cov_no_ref - command: nextflow run ./tests/modules/tiddit/cov -entry test_tiddit_cov_no_ref -c ./tests/config/nextflow.config -c ./tests/modules/tiddit/cov/nextflow.config +- name: tiddit cov test_tiddit_cov_bam_bed + command: nextflow run ./tests/modules/tiddit/cov -entry test_tiddit_cov_bam_bed -c ./tests/config/nextflow.config -c ./tests/modules/tiddit/cov/nextflow.config tags: - tiddit - tiddit/cov files: - - path: output/tiddit/test.tab - md5sum: f7974948f809f94879d8a60b726194f5 + - path: output/tiddit/test.bed + md5sum: 9d1474f1c7c6516205254077087bb026 + +- name: tiddit cov test_tiddit_cov_cram_wig + command: nextflow run ./tests/modules/tiddit/cov -entry test_tiddit_cov_cram_wig -c ./tests/config/nextflow.config -c ./tests/modules/tiddit/cov/nextflow.config + tags: + - tiddit + - tiddit/cov + files: + - path: output/tiddit/test.wig + md5sum: ca3645fd0c3491c86c075c91d16d57c4 + +- name: tiddit cov test_tiddit_cov_bam_wig + command: nextflow run ./tests/modules/tiddit/cov -entry test_tiddit_cov_bam_wig -c ./tests/config/nextflow.config -c ./tests/modules/tiddit/cov/nextflow.config + tags: + - tiddit + - tiddit/cov + files: + - path: output/tiddit/test.wig + md5sum: 44bea2ac6a56774738e65773065da670 From f1a5cc07dbcf173654c793ac4eb63e3fb83281ab Mon Sep 17 00:00:00 2001 From: MaxUlysse Date: Fri, 3 Jun 2022 13:00:29 +0200 Subject: [PATCH 118/168] restructure tiddit/sv + test (just one for now) --- modules/tiddit/sv/main.nf | 19 +++++++++++-------- tests/modules/tiddit/sv/main.nf | 26 +++----------------------- tests/modules/tiddit/sv/test.yml | 25 +++++-------------------- 3 files changed, 19 insertions(+), 51 deletions(-) diff --git a/modules/tiddit/sv/main.nf b/modules/tiddit/sv/main.nf index cff3dbfb..d0d5362e 100644 --- a/modules/tiddit/sv/main.nf +++ b/modules/tiddit/sv/main.nf @@ -8,16 +8,15 @@ process TIDDIT_SV { 'quay.io/biocontainers/tiddit:3.0.0--py39h59fae87_1' }" input: - tuple val(meta), path(input) + tuple val(meta), path(input), path(index) path fasta path fai path bwa_index output: - tuple val(meta), path("*.vcf") , emit: vcf - tuple val(meta), path("*.ploidy.tab") , emit: ploidy - tuple val(meta), path("*.signals.tab"), emit: signals - path "versions.yml" , emit: versions + tuple val(meta), path("*.vcf") , emit: vcf + tuple val(meta), path("*.ploidies.tab"), emit: ploidy + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -27,7 +26,11 @@ process TIDDIT_SV { def prefix = task.ext.prefix ?: "${meta.id}" def reference = fasta ? "--ref $fasta" : "" """ - INDEX=`find -L ./ -name "*.amb" | sed 's/.amb//'` + for i in `ls ${bwa_index}` + do + ln -s ${bwa_index}/\$i ${fasta}.\${i##*.} + done + tiddit \\ --sv \\ @@ -38,7 +41,7 @@ process TIDDIT_SV { cat <<-END_VERSIONS > versions.yml "${task.process}": - tiddit: \$(echo \$(tiddit 2>&1) | sed 's/^.*TIDDIT-//; s/ .*\$//') + tiddit: \$(echo \$(tiddit 2>&1) | sed 's/^.*tiddit-//; s/ .*\$//') END_VERSIONS """ @@ -51,7 +54,7 @@ process TIDDIT_SV { cat <<-END_VERSIONS > versions.yml "${task.process}": - tiddit: \$(echo \$(tiddit 2>&1) | sed 's/^.*TIDDIT-//; s/ .*\$//') + tiddit: \$(echo \$(tiddit 2>&1) | sed 's/^.*tiddit-//; s/ .*\$//') END_VERSIONS """ } diff --git a/tests/modules/tiddit/sv/main.nf b/tests/modules/tiddit/sv/main.nf index 72113427..2c09404e 100644 --- a/tests/modules/tiddit/sv/main.nf +++ b/tests/modules/tiddit/sv/main.nf @@ -5,10 +5,11 @@ nextflow.enable.dsl = 2 include { BWA_INDEX } from '../../../../modules/bwa/index/main.nf' include { TIDDIT_SV } from '../../../../modules/tiddit/sv/main.nf' -workflow test_tiddit_sv { +workflow test_tiddit_sv_bam { input = [ [ id:'test' ], // meta map - [ file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) ] + [ file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) ], + [ file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true) ] ] fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) @@ -18,24 +19,3 @@ workflow test_tiddit_sv { TIDDIT_SV ( input, fasta, fai , BWA_INDEX.out.index) } - -workflow test_tiddit_sv_no_ref { - input = [ - [ id:'test' ], // meta map - [ file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) ] - ] - - TIDDIT_SV ( input, [], [] ) -} - -workflow test_tiddit_sv_cram { - input = [ - [ id:'test' ], // meta map - [ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true) ] - ] - - fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) - fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) - - TIDDIT_SV ( input, fasta, fai ) -} diff --git a/tests/modules/tiddit/sv/test.yml b/tests/modules/tiddit/sv/test.yml index 40ea5b4d..732b8434 100644 --- a/tests/modules/tiddit/sv/test.yml +++ b/tests/modules/tiddit/sv/test.yml @@ -1,25 +1,10 @@ -- name: tiddit sv - command: nextflow run ./tests/modules/tiddit/sv -entry test_tiddit_sv -c ./tests/config/nextflow.config -c ./tests/modules/tiddit/sv/nextflow.config +- name: tiddit sv test_tiddit_sv_bam + command: nextflow run ./tests/modules/tiddit/sv -entry test_tiddit_sv_bam -c ./tests/config/nextflow.config -c ./tests/modules/tiddit/sv/nextflow.config tags: - tiddit - tiddit/sv files: - - path: output/tiddit/test.ploidy.tab - md5sum: 45e050b0e204f0a5a3a99627cc440eaa - - path: output/tiddit/test.signals.tab - md5sum: dab4b2fec4ddf8eb1c23005b0770150e + - path: output/tiddit/test.ploidies.tab + md5sum: 6319d3611f7b6b94425a184d274b3dfc - path: output/tiddit/test.vcf - md5sum: bdce14ae8292bf3deb81f6f255baf859 - -- name: tiddit sv no ref - command: nextflow run ./tests/modules/tiddit/sv -entry test_tiddit_sv_no_ref -c ./tests/config/nextflow.config -c ./tests/modules/tiddit/sv/nextflow.config - tags: - - tiddit - - tiddit/sv - files: - - path: output/tiddit/test.ploidy.tab - md5sum: 45e050b0e204f0a5a3a99627cc440eaa - - path: output/tiddit/test.signals.tab - md5sum: dab4b2fec4ddf8eb1c23005b0770150e - - path: output/tiddit/test.vcf - md5sum: 3d0e83a8199b2bdb81cfe3e6b12bf64b + md5sum: 41d3f8746f0420f894104321b7e64f67 From 230224700edcdf3a8c2d241cafc974e4724e48f4 Mon Sep 17 00:00:00 2001 From: SusiJo Date: Fri, 3 Jun 2022 12:45:52 +0200 Subject: [PATCH 119/168] change prefix --- modules/gatk4/markduplicatesspark/main.nf | 2 +- tests/modules/gatk4/markduplicatesspark/main.nf | 14 ++++++++++++++ .../gatk4/markduplicatesspark/nextflow.config | 8 ++++++++ 3 files changed, 23 insertions(+), 1 deletion(-) diff --git a/modules/gatk4/markduplicatesspark/main.nf b/modules/gatk4/markduplicatesspark/main.nf index 703623d5..55049fba 100644 --- a/modules/gatk4/markduplicatesspark/main.nf +++ b/modules/gatk4/markduplicatesspark/main.nf @@ -23,7 +23,7 @@ process GATK4_MARKDUPLICATES_SPARK { script: def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}.bam" + def prefix = task.ext.prefix ?: "${meta.id}" def input_list = bam.collect{"--input $it"}.join(' ') diff --git a/tests/modules/gatk4/markduplicatesspark/main.nf b/tests/modules/gatk4/markduplicatesspark/main.nf index ac89b1b3..004fbb1e 100644 --- a/tests/modules/gatk4/markduplicatesspark/main.nf +++ b/tests/modules/gatk4/markduplicatesspark/main.nf @@ -3,6 +3,7 @@ nextflow.enable.dsl = 2 include { GATK4_MARKDUPLICATES_SPARK } from '../../../../modules/gatk4/markduplicatesspark/main.nf' +include { GATK4_MARKDUPLICATES_SPARK as GATK4_MARKDUPLICATES_SPARK_CRAM } from '../../../../modules/gatk4/markduplicatesspark/main.nf' include { GATK4_MARKDUPLICATES_SPARK as GATK4_MARKDUPLICATES_SPARK_METRICS } from '../../../../modules/gatk4/markduplicatesspark/main.nf' workflow test_gatk4_markduplicates_spark { @@ -29,6 +30,19 @@ workflow test_gatk4_markduplicates_spark_multiple_bams { GATK4_MARKDUPLICATES_SPARK ( input, fasta, fai, dict ) } +// chr 22 +workflow test_gatk4_markduplicates_spark_multiple_bams_cram_out { + input = [ [ id:'test', single_end:false ], // meta map + [ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_name_sorted_bam'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_name_sorted_bam'], checkIfExists: true) + ] ] + fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + fai = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) + dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) + + GATK4_MARKDUPLICATES_SPARK_CRAM ( input, fasta, fai, dict ) +} + // chr 22 workflow test_gatk4_markduplicates_spark_multiple_bams_metrics { input = [ [ id:'test', single_end:false ], // meta map diff --git a/tests/modules/gatk4/markduplicatesspark/nextflow.config b/tests/modules/gatk4/markduplicatesspark/nextflow.config index 495a42f3..565016b7 100644 --- a/tests/modules/gatk4/markduplicatesspark/nextflow.config +++ b/tests/modules/gatk4/markduplicatesspark/nextflow.config @@ -2,9 +2,17 @@ process { publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + withName: GATK4_MARKDUPLICATES_SPARK { + ext.prefix = { "${meta.id}.bam" } + } + withName: GATK4_MARKDUPLICATES_SPARK_CRAM { + ext.prefix = { "${meta.id}.cram" } + } withName: GATK4_MARKDUPLICATES_SPARK_METRICS { + ext.prefix = { "${meta.id}.bam" } ext.args = '--metrics-file test.metrics' } + } // override tests/config/nextflow.config docker.userEmulation = false From e2bb56608c6934f5228e38a157eae3f8fc58742f Mon Sep 17 00:00:00 2001 From: SusiJo Date: Fri, 3 Jun 2022 13:50:45 +0200 Subject: [PATCH 120/168] change prefix output --- modules/gatk4/markduplicatesspark/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/gatk4/markduplicatesspark/main.nf b/modules/gatk4/markduplicatesspark/main.nf index 55049fba..79e12814 100644 --- a/modules/gatk4/markduplicatesspark/main.nf +++ b/modules/gatk4/markduplicatesspark/main.nf @@ -14,7 +14,7 @@ process GATK4_MARKDUPLICATES_SPARK { path dict output: - tuple val(meta), path("*.bam"), emit: bam + tuple val(meta), path("${prefix}"), emit: output tuple val(meta), path("*.metrics"), emit: metrics, optional: true path "versions.yml" , emit: versions From f9ac4e28e99d3fdd499d15bfb82ec529f35aacb4 Mon Sep 17 00:00:00 2001 From: MaxUlysse Date: Fri, 3 Jun 2022 15:37:10 +0200 Subject: [PATCH 121/168] cover all use cases --- modules/tiddit/sv/main.nf | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/modules/tiddit/sv/main.nf b/modules/tiddit/sv/main.nf index d0d5362e..712f364b 100644 --- a/modules/tiddit/sv/main.nf +++ b/modules/tiddit/sv/main.nf @@ -26,11 +26,7 @@ process TIDDIT_SV { def prefix = task.ext.prefix ?: "${meta.id}" def reference = fasta ? "--ref $fasta" : "" """ - for i in `ls ${bwa_index}` - do - ln -s ${bwa_index}/\$i ${fasta}.\${i##*.} - done - + [[ -d $bwa_index ]] && for i in `ls $bwa_index`; do [[ -f $fasta && ! "\$i" =~ .*"$fasta".* ]] && ln -s $bwa_index/\$i ${fasta}.\${i##*.} || ln -s $bwa_index/\$i \$i; done tiddit \\ --sv \\ From 97e66015caec35f7fd6d095b9fc797549fee9efc Mon Sep 17 00:00:00 2001 From: MaxUlysse Date: Fri, 3 Jun 2022 15:51:02 +0200 Subject: [PATCH 122/168] add test + fix issues --- modules/tiddit/sv/main.nf | 7 ++----- modules/tiddit/sv/meta.yml | 20 ++++++++++++-------- tests/modules/tiddit/sv/main.nf | 17 +++++++++++++++-- tests/modules/tiddit/sv/test.yml | 11 +++++++++++ 4 files changed, 40 insertions(+), 15 deletions(-) diff --git a/modules/tiddit/sv/main.nf b/modules/tiddit/sv/main.nf index 712f364b..ee2a8d75 100644 --- a/modules/tiddit/sv/main.nf +++ b/modules/tiddit/sv/main.nf @@ -10,7 +10,6 @@ process TIDDIT_SV { input: tuple val(meta), path(input), path(index) path fasta - path fai path bwa_index output: @@ -24,7 +23,6 @@ process TIDDIT_SV { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def reference = fasta ? "--ref $fasta" : "" """ [[ -d $bwa_index ]] && for i in `ls $bwa_index`; do [[ -f $fasta && ! "\$i" =~ .*"$fasta".* ]] && ln -s $bwa_index/\$i ${fasta}.\${i##*.} || ln -s $bwa_index/\$i \$i; done @@ -32,7 +30,7 @@ process TIDDIT_SV { --sv \\ $args \\ --bam $input \\ - $reference \\ + --ref $fasta \\ -o $prefix cat <<-END_VERSIONS > versions.yml @@ -45,8 +43,7 @@ process TIDDIT_SV { def prefix = task.ext.prefix ?: "${meta.id}" """ touch ${prefix}.vcf - touch ${prefix}.ploidy.tab - touch ${prefix}.signals.tab + touch ${prefix}.ploidies.tab cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/tiddit/sv/meta.yml b/modules/tiddit/sv/meta.yml index fc307081..8b41c69c 100644 --- a/modules/tiddit/sv/meta.yml +++ b/modules/tiddit/sv/meta.yml @@ -17,14 +17,22 @@ input: description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] + - input: + type: file + description: BAM/CRAM file + pattern: "*.{bam,cram}" + - index: + type: file + description: BAM/CRAM index file + pattern: "*.{bai,crai}" - fasta: type: file description: Input FASTA file pattern: "*.{fasta,fa}" - - fai: + - bwa_index: type: file - description: FASTA index file - pattern: "*.{fai}" + description: BWA genome index files + pattern: "Directory containing BWA index *.{amb,ann,bwt,pac,sa}" output: - meta: type: map @@ -38,11 +46,7 @@ output: - ploidy: type: file description: tab - pattern: "*.{ploidy.tab}" - - signals: - type: file - description: tab - pattern: "*.{signals.tab}" + pattern: "*.{ploidies.tab}" - versions: type: file description: File containing software versions diff --git a/tests/modules/tiddit/sv/main.nf b/tests/modules/tiddit/sv/main.nf index 2c09404e..c9dbf4ea 100644 --- a/tests/modules/tiddit/sv/main.nf +++ b/tests/modules/tiddit/sv/main.nf @@ -13,9 +13,22 @@ workflow test_tiddit_sv_bam { ] fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) - fai = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true) BWA_INDEX( fasta ) - TIDDIT_SV ( input, fasta, fai , BWA_INDEX.out.index) + TIDDIT_SV ( input, fasta, BWA_INDEX.out.index) +} + +workflow test_tiddit_sv_cram { + input = [ + [ id:'test' ], // meta map + [ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true) ], + [ file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram_crai'], checkIfExists: true) ] + ] + + fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + + BWA_INDEX( fasta ) + + TIDDIT_SV ( input, fasta, BWA_INDEX.out.index) } diff --git a/tests/modules/tiddit/sv/test.yml b/tests/modules/tiddit/sv/test.yml index 732b8434..557a0913 100644 --- a/tests/modules/tiddit/sv/test.yml +++ b/tests/modules/tiddit/sv/test.yml @@ -8,3 +8,14 @@ md5sum: 6319d3611f7b6b94425a184d274b3dfc - path: output/tiddit/test.vcf md5sum: 41d3f8746f0420f894104321b7e64f67 + +- name: tiddit sv test_tiddit_sv_cram + command: nextflow run ./tests/modules/tiddit/sv -entry test_tiddit_sv_cram -c ./tests/config/nextflow.config -c ./tests/modules/tiddit/sv/nextflow.config + tags: + - tiddit + - tiddit/sv + files: + - path: output/tiddit/test.ploidies.tab + md5sum: f1162a940ddc8b963f6e0e506bb5c136 + - path: output/tiddit/test.vcf + md5sum: 34db59578991285d6b62dc1500272fca From d7cec801b244d8aa0a98aa94934b191ed5720f92 Mon Sep 17 00:00:00 2001 From: "Robert A. Petit III" Date: Fri, 3 Jun 2022 13:05:43 -0600 Subject: [PATCH 123/168] Update main.nf --- modules/snippy/run/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/snippy/run/main.nf b/modules/snippy/run/main.nf index 73c4b94d..5ecdbea8 100644 --- a/modules/snippy/run/main.nf +++ b/modules/snippy/run/main.nf @@ -4,7 +4,7 @@ process SNIPPY_RUN { conda (params.enable_conda ? "bioconda::snippy=4.6.0" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/snippy:4.6.0--hdfd78af_2 ': + 'https://depot.galaxyproject.org/singularity/snippy:4.6.0--hdfd78af_2' : 'quay.io/biocontainers/snippy:4.6.0--hdfd78af_2' }" input: From e827a042ce1f55a37a8e771944b566faa5108e8e Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Fri, 3 Jun 2022 21:24:03 +0200 Subject: [PATCH 124/168] Update modules/gatk/unifiedgenotyper/main.nf Co-authored-by: Maxime U. Garcia --- modules/gatk/unifiedgenotyper/main.nf | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/modules/gatk/unifiedgenotyper/main.nf b/modules/gatk/unifiedgenotyper/main.nf index 47d83def..e88ac7c5 100644 --- a/modules/gatk/unifiedgenotyper/main.nf +++ b/modules/gatk/unifiedgenotyper/main.nf @@ -9,13 +9,13 @@ process GATK_UNIFIEDGENOTYPER { input: tuple val(meta), path(input), path(index) - path(fasta) - path(fai) - path(dict) - path(intervals) - path(contamination) - path(dbsnp) - path(comp) + path fasta + path fai + path dict + path intervals + path contamination + path dbsnp + path comp output: tuple val(meta), path("*.vcf.gz"), emit: vcf From 0d087b4890e080394c3c0fd5971d4e3364841380 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Fri, 3 Jun 2022 21:33:54 +0200 Subject: [PATCH 125/168] Update modules/gatk/realignertargetcreator/main.nf --- modules/gatk/realignertargetcreator/main.nf | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/modules/gatk/realignertargetcreator/main.nf b/modules/gatk/realignertargetcreator/main.nf index 5d2da4b3..ba6a2592 100644 --- a/modules/gatk/realignertargetcreator/main.nf +++ b/modules/gatk/realignertargetcreator/main.nf @@ -9,10 +9,10 @@ process GATK_REALIGNERTARGETCREATOR { input: tuple val(meta), path(input), path(index) - path(fasta) - path(fai) - path(dict) - path(known_vcf) + path fasta + path fai + path dict + path known_vcf output: tuple val(meta), path("*.intervals"), emit: intervals From 771425e7a050acf0e1be41d9e6fc800f675c3e58 Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Tue, 7 Jun 2022 12:11:18 +0200 Subject: [PATCH 126/168] add threads to calibratedragstrmodel --- modules/gatk4/calibratedragstrmodel/main.nf | 1 + tests/modules/gatk4/calibratedragstrmodel/test.yml | 12 ++++++------ 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/modules/gatk4/calibratedragstrmodel/main.nf b/modules/gatk4/calibratedragstrmodel/main.nf index 08ba7008..b4e47edc 100644 --- a/modules/gatk4/calibratedragstrmodel/main.nf +++ b/modules/gatk4/calibratedragstrmodel/main.nf @@ -38,6 +38,7 @@ process GATK4_CALIBRATEDRAGSTRMODEL { --output ${prefix}.txt \\ --reference $fasta \\ --str-table-path $strtablefile \\ + --threads $task.cpus \\ $intervals_command \\ --tmp-dir . \\ $args diff --git a/tests/modules/gatk4/calibratedragstrmodel/test.yml b/tests/modules/gatk4/calibratedragstrmodel/test.yml index 53c428a9..285d04ca 100644 --- a/tests/modules/gatk4/calibratedragstrmodel/test.yml +++ b/tests/modules/gatk4/calibratedragstrmodel/test.yml @@ -1,26 +1,26 @@ - name: gatk4 calibratedragstrmodel test_gatk4_calibratedragstrmodel_bam command: nextflow run ./tests/modules/gatk4/calibratedragstrmodel -entry test_gatk4_calibratedragstrmodel_bam -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/calibratedragstrmodel/nextflow.config tags: - - gatk4/calibratedragstrmodel - gatk4 + - gatk4/calibratedragstrmodel files: - path: output/gatk4/test.txt - md5sum: 0a1a1583b157fa2251dd931ed165da4f + md5sum: e16fa32906c74bb18b93e98a86718ff1 - name: gatk4 calibratedragstrmodel test_gatk4_calibratedragstrmodel_cram command: nextflow run ./tests/modules/gatk4/calibratedragstrmodel -entry test_gatk4_calibratedragstrmodel_cram -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/calibratedragstrmodel/nextflow.config tags: - - gatk4/calibratedragstrmodel - gatk4 + - gatk4/calibratedragstrmodel files: - path: output/gatk4/test.txt - md5sum: 1aa7ab38023f724877b3323c5e6b9a4e + md5sum: 81c7bf338886cb4d5c2cc07fc56afe44 - name: gatk4 calibratedragstrmodel test_gatk4_calibratedragstrmodel_beds command: nextflow run ./tests/modules/gatk4/calibratedragstrmodel -entry test_gatk4_calibratedragstrmodel_beds -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/calibratedragstrmodel/nextflow.config tags: - - gatk4/calibratedragstrmodel - gatk4 + - gatk4/calibratedragstrmodel files: - path: output/gatk4/test.txt - md5sum: def8baccad7bd59006f08fcb0a6721bf + md5sum: cb6a9acdee042302b54fd1f59b5f54ee From 9d6697bdead652a8e3caf4b382b8dc7e7f2ce1d6 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Tue, 7 Jun 2022 12:14:40 +0200 Subject: [PATCH 127/168] template --- modules/rhocall/annotate/main.nf | 75 +++++++++++++++++++ modules/rhocall/annotate/meta.yml | 51 +++++++++++++ tests/config/pytest_modules.yml | 4 + tests/modules/rhocall/annotate/main.nf | 15 ++++ .../modules/rhocall/annotate/nextflow.config | 5 ++ tests/modules/rhocall/annotate/test.yml | 14 ++++ 6 files changed, 164 insertions(+) create mode 100644 modules/rhocall/annotate/main.nf create mode 100644 modules/rhocall/annotate/meta.yml create mode 100644 tests/modules/rhocall/annotate/main.nf create mode 100644 tests/modules/rhocall/annotate/nextflow.config create mode 100644 tests/modules/rhocall/annotate/test.yml diff --git a/modules/rhocall/annotate/main.nf b/modules/rhocall/annotate/main.nf new file mode 100644 index 00000000..8ca395f2 --- /dev/null +++ b/modules/rhocall/annotate/main.nf @@ -0,0 +1,75 @@ +// TODO nf-core: If in doubt look at other nf-core/modules to see how we are doing things! :) +// https://github.com/nf-core/modules/tree/master/modules +// You can also ask for help via your pull request or on the #modules channel on the nf-core Slack workspace: +// https://nf-co.re/join +// TODO nf-core: A module file SHOULD only define input and output files as command-line parameters. +// All other parameters MUST be provided using the "task.ext" directive, see here: +// https://www.nextflow.io/docs/latest/process.html#ext +// where "task.ext" is a string. +// Any parameters that need to be evaluated in the context of a particular sample +// e.g. single-end/paired-end data MUST also be defined and evaluated appropriately. +// TODO nf-core: Software that can be piped together SHOULD be added to separate module files +// unless there is a run-time, storage advantage in implementing in this way +// e.g. it's ok to have a single module for bwa to output BAM instead of SAM: +// bwa mem | samtools view -B -T ref.fasta +// TODO nf-core: Optional inputs are not currently supported by Nextflow. However, using an empty +// list (`[]`) instead of a file can be used to work around this issue. + +process RHOCALL_ANNOTATE { + tag "$meta.id" + label 'process_medium' + + // TODO nf-core: List required Conda package(s). + // Software MUST be pinned to channel (i.e. "bioconda"), version (i.e. "1.10"). + // For Conda, the build (i.e. "h9402c20_2") must be EXCLUDED to support installation on different operating systems. + // TODO nf-core: See section in main README for further information regarding finding and adding container addresses to the section below. + conda (params.enable_conda ? "bioconda::rhocall=0.5.1" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/YOUR-TOOL-HERE': + 'quay.io/biocontainers/YOUR-TOOL-HERE' }" + + input: + // TODO nf-core: Where applicable all sample-specific information e.g. "id", "single_end", "read_group" + // MUST be provided as an input via a Groovy Map called "meta". + // This information may not be required in some instances e.g. indexing reference genome files: + // https://github.com/nf-core/modules/blob/master/modules/bwa/index/main.nf + // TODO nf-core: Where applicable please provide/convert compressed files as input/output + // e.g. "*.fastq.gz" and NOT "*.fastq", "*.bam" and NOT "*.sam" etc. + tuple val(meta), path(bam) + + output: + // TODO nf-core: Named file extensions MUST be emitted for ALL output channels + tuple val(meta), path("*.bam"), emit: bam + // TODO nf-core: List additional required output channels/values here + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + // TODO nf-core: Where possible, a command MUST be provided to obtain the version number of the software e.g. 1.10 + // If the software is unable to output a version number on the command-line then it can be manually specified + // e.g. https://github.com/nf-core/modules/blob/master/modules/homer/annotatepeaks/main.nf + // Each software used MUST provide the software name and version number in the YAML version file (versions.yml) + // TODO nf-core: It MUST be possible to pass additional parameters to the tool as a command-line string via the "task.ext.args" directive + // TODO nf-core: If the tool supports multi-threading then you MUST provide the appropriate parameter + // using the Nextflow "task" variable e.g. "--threads $task.cpus" + // TODO nf-core: Please replace the example samtools command below with your module's command + // TODO nf-core: Please indent the command appropriately (4 spaces!!) to help with readability ;) + """ + samtools \\ + sort \\ + $args \\ + -@ $task.cpus \\ + -o ${prefix}.bam \\ + -T $prefix \\ + $bam + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + rhocall: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//' )) + END_VERSIONS + """ +} diff --git a/modules/rhocall/annotate/meta.yml b/modules/rhocall/annotate/meta.yml new file mode 100644 index 00000000..cf7c8c49 --- /dev/null +++ b/modules/rhocall/annotate/meta.yml @@ -0,0 +1,51 @@ +name: "rhocall_annotate" +## TODO nf-core: Add a description of the module and list keywords +description: write your description here +keywords: + - sort +tools: + - "rhocall": + ## TODO nf-core: Add a description and other details for the software below + description: "Call regions of homozygosity and make tentative UPD calls." + homepage: "https://github.com/dnil/rhocall" + documentation: "https://github.com/dnil/rhocall" + tool_dev_url: "https://github.com/dnil" + doi: "" + licence: "['GPL v3']" + +## TODO nf-core: Add a description of all of the variables used as input +input: + # Only when we have meta + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + # + ## TODO nf-core: Delete / customise this example input + - bam: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + +## TODO nf-core: Add a description of all of the variables used as output +output: + #Only when we have meta + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + # + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + ## TODO nf-core: Delete / customise this example output + - bam: + type: file + description: Sorted BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + +authors: + - "@ramprasadn" diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index d8227203..ae3c1d2f 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -1631,6 +1631,10 @@ rgi/main: - modules/rgi/main/** - tests/modules/rgi/main/** +rhocall/annotate: + - modules/rhocall/annotate/** + - tests/modules/rhocall/annotate/** + rmarkdownnotebook: - modules/rmarkdownnotebook/** - tests/modules/rmarkdownnotebook/** diff --git a/tests/modules/rhocall/annotate/main.nf b/tests/modules/rhocall/annotate/main.nf new file mode 100644 index 00000000..65f1b5a0 --- /dev/null +++ b/tests/modules/rhocall/annotate/main.nf @@ -0,0 +1,15 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { RHOCALL_ANNOTATE } from '../../../../modules/rhocall/annotate/main.nf' + +workflow test_rhocall_annotate { + + input = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['sarscov2']['illumina']['test_paired_end_bam'], checkIfExists: true) + ] + + RHOCALL_ANNOTATE ( input ) +} diff --git a/tests/modules/rhocall/annotate/nextflow.config b/tests/modules/rhocall/annotate/nextflow.config new file mode 100644 index 00000000..50f50a7a --- /dev/null +++ b/tests/modules/rhocall/annotate/nextflow.config @@ -0,0 +1,5 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + +} \ No newline at end of file diff --git a/tests/modules/rhocall/annotate/test.yml b/tests/modules/rhocall/annotate/test.yml new file mode 100644 index 00000000..110ec1b4 --- /dev/null +++ b/tests/modules/rhocall/annotate/test.yml @@ -0,0 +1,14 @@ +## TODO nf-core: Please run the following command to build this file: +# nf-core modules create-test-yml rhocall/annotate +- name: "rhocall annotate" + command: nextflow run ./tests/modules/rhocall/annotate -entry test_rhocall_annotate -c ./tests/config/nextflow.config -c ./tests/modules/rhocall/annotate/nextflow.config + tags: + - "rhocall" + # + - "rhocall/annotate" + # + files: + - path: "output/rhocall/test.bam" + md5sum: e667c7caad0bc4b7ac383fd023c654fc + - path: output/rhocall/versions.yml + md5sum: a01fe51bc4c6a3a6226fbf77b2c7cf3b From 4c7ef30fb64f75ba4499d3b8fba24a068b1ce586 Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Tue, 7 Jun 2022 12:17:14 +0200 Subject: [PATCH 128/168] changed to label to medium --- modules/gatk4/calibratedragstrmodel/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/gatk4/calibratedragstrmodel/main.nf b/modules/gatk4/calibratedragstrmodel/main.nf index b4e47edc..37a54de4 100644 --- a/modules/gatk4/calibratedragstrmodel/main.nf +++ b/modules/gatk4/calibratedragstrmodel/main.nf @@ -1,6 +1,6 @@ process GATK4_CALIBRATEDRAGSTRMODEL { tag "$meta.id" - label 'process_low' + label 'process_medium' conda (params.enable_conda ? "bioconda::gatk4=4.2.6.1" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? From e5bf87aec3b7de144c7f2599154b6bb2620b5c84 Mon Sep 17 00:00:00 2001 From: Matthias De Smet <11850640+matthdsm@users.noreply.github.com> Date: Tue, 7 Jun 2022 13:58:27 +0200 Subject: [PATCH 129/168] possible fix for name collisions --- modules/multiqc/main.nf | 2 +- tests/modules/multiqc/main.nf | 23 +++++++++++++++++++---- tests/modules/multiqc/nextflow.config | 4 +++- tests/modules/multiqc/test.yml | 11 +++++++++-- 4 files changed, 32 insertions(+), 8 deletions(-) diff --git a/modules/multiqc/main.nf b/modules/multiqc/main.nf index ae019dbf..bd3c9dc8 100644 --- a/modules/multiqc/main.nf +++ b/modules/multiqc/main.nf @@ -7,7 +7,7 @@ process MULTIQC { 'quay.io/biocontainers/multiqc:1.12--pyhdfd78af_0' }" input: - path multiqc_files + path "??/*" output: path "*multiqc_report.html", emit: report diff --git a/tests/modules/multiqc/main.nf b/tests/modules/multiqc/main.nf index 43643985..3719d423 100644 --- a/tests/modules/multiqc/main.nf +++ b/tests/modules/multiqc/main.nf @@ -2,15 +2,30 @@ nextflow.enable.dsl = 2 -include { FASTQC } from '../../../modules/fastqc/main.nf' -include { MULTIQC } from '../../../modules/multiqc/main.nf' +include { FASTQC } from '../../../modules/fastqc/main.nf' +include { FASTQC as FASTQC2 } from '../../../modules/fastqc/main.nf' +include { MULTIQC } from '../../../modules/multiqc/main.nf' workflow test_multiqc { input = [ [ id: 'test', single_end: false ], - [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ] + [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true)] ] FASTQC ( input ) MULTIQC ( FASTQC.out.zip.collect { it[1] } ) } + +workflow test_multiqc_fn_collision { + fqc_input = [ [ id: 'test', single_end: false ], + [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true)] + ] + mqc_input = Channel.empty() + + FASTQC ( fqc_input ) + mqc_input = mqc_input.mix(FASTQC.out.zip.collect { it[1] }) + + FASTQC2 ( fqc_input ) + mqc_input = mqc_input.mix(FASTQC2.out.zip.collect { it[1] }) + + MULTIQC ( mqc_input ) +} diff --git a/tests/modules/multiqc/nextflow.config b/tests/modules/multiqc/nextflow.config index 8730f1c4..76d125e5 100644 --- a/tests/modules/multiqc/nextflow.config +++ b/tests/modules/multiqc/nextflow.config @@ -1,5 +1,7 @@ process { publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } - + withName: "FASTQC*" { + publishDir = [ enabled: false ] + } } diff --git a/tests/modules/multiqc/test.yml b/tests/modules/multiqc/test.yml index 39796872..33129c79 100644 --- a/tests/modules/multiqc/test.yml +++ b/tests/modules/multiqc/test.yml @@ -1,5 +1,12 @@ -- name: multiqc - command: nextflow run ./tests/modules/multiqc -entry test_multiqc -c ./tests/config/nextflow.config -c ./tests/modules/multiqc/nextflow.config +- name: multiqc test_multiqc + command: nextflow run ./tests/modules/multiqc -entry test_multiqc -c ./tests/config/nextflow.config -c ./tests/modules/multiqc/nextflow.config + tags: + - multiqc + files: + - path: output/multiqc/multiqc_report.html + +- name: multiqc test_multiqc_fn_collision + command: nextflow run ./tests/modules/multiqc -entry test_multiqc_fn_collision -c ./tests/config/nextflow.config -c ./tests/modules/multiqc/nextflow.config tags: - multiqc files: From dbca9e8b361187bf4c297045927cbcbf5034c312 Mon Sep 17 00:00:00 2001 From: SusiJo Date: Tue, 7 Jun 2022 13:58:57 +0200 Subject: [PATCH 130/168] adjusted prefix + test + authors --- modules/gatk4/markduplicatesspark/main.nf | 2 +- modules/gatk4/markduplicatesspark/meta.yml | 1 + tests/modules/gatk4/markduplicatesspark/nextflow.config | 2 +- tests/modules/gatk4/markduplicatesspark/test.yml | 9 +++++++++ 4 files changed, 12 insertions(+), 2 deletions(-) diff --git a/modules/gatk4/markduplicatesspark/main.nf b/modules/gatk4/markduplicatesspark/main.nf index 79e12814..945f105d 100644 --- a/modules/gatk4/markduplicatesspark/main.nf +++ b/modules/gatk4/markduplicatesspark/main.nf @@ -23,7 +23,7 @@ process GATK4_MARKDUPLICATES_SPARK { script: def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" + prefix = task.ext.prefix ?: "${meta.id}" def input_list = bam.collect{"--input $it"}.join(' ') diff --git a/modules/gatk4/markduplicatesspark/meta.yml b/modules/gatk4/markduplicatesspark/meta.yml index bf3e02ba..59be9b6d 100644 --- a/modules/gatk4/markduplicatesspark/meta.yml +++ b/modules/gatk4/markduplicatesspark/meta.yml @@ -58,3 +58,4 @@ authors: - "@ajodeh-juma" - "@FriederikeHanssen" - "@maxulysse" + - "@SusiJo" diff --git a/tests/modules/gatk4/markduplicatesspark/nextflow.config b/tests/modules/gatk4/markduplicatesspark/nextflow.config index 565016b7..e0455b4b 100644 --- a/tests/modules/gatk4/markduplicatesspark/nextflow.config +++ b/tests/modules/gatk4/markduplicatesspark/nextflow.config @@ -9,8 +9,8 @@ process { ext.prefix = { "${meta.id}.cram" } } withName: GATK4_MARKDUPLICATES_SPARK_METRICS { - ext.prefix = { "${meta.id}.bam" } ext.args = '--metrics-file test.metrics' + ext.prefix = { "${meta.id}.bam" } } } diff --git a/tests/modules/gatk4/markduplicatesspark/test.yml b/tests/modules/gatk4/markduplicatesspark/test.yml index 21323ada..31d8d34d 100644 --- a/tests/modules/gatk4/markduplicatesspark/test.yml +++ b/tests/modules/gatk4/markduplicatesspark/test.yml @@ -16,6 +16,15 @@ - path: output/gatk4/test.bam md5sum: 898cb0a6616897d8ada90bab53bf0837 +- name: gatk4 markduplicatesspark test_gatk4_markduplicates_spark_multiple_bams_cram_out + command: nextflow run ./tests/modules/gatk4/markduplicatesspark -entry test_gatk4_markduplicates_spark_multiple_bams_cram_out -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/markduplicatesspark/nextflow.config + tags: + - gatk4 + - gatk4/markduplicatesspark + files: + - path: output/gatk4/test.cram + md5sum: 2271016de5e4199736598f39d12d7587 + - name: gatk4 markduplicatesspark test_gatk4_markduplicates_spark_multiple_bams_metrics command: nextflow run ./tests/modules/gatk4/markduplicatesspark -entry test_gatk4_markduplicates_spark_multiple_bams_metrics -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/markduplicatesspark/nextflow.config tags: From 65a0fb959f77e44a09080f178ce06602270f3a0f Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Tue, 7 Jun 2022 14:37:03 +0200 Subject: [PATCH 131/168] rhocall --- modules/rhocall/annotate/main.nf | 78 +++++++++---------------- modules/rhocall/annotate/meta.yml | 41 +++++++------ tests/modules/rhocall/annotate/main.nf | 37 ++++++++++-- tests/modules/rhocall/annotate/test.yml | 19 +++--- 4 files changed, 91 insertions(+), 84 deletions(-) diff --git a/modules/rhocall/annotate/main.nf b/modules/rhocall/annotate/main.nf index 8ca395f2..e59795a4 100644 --- a/modules/rhocall/annotate/main.nf +++ b/modules/rhocall/annotate/main.nf @@ -1,75 +1,51 @@ -// TODO nf-core: If in doubt look at other nf-core/modules to see how we are doing things! :) -// https://github.com/nf-core/modules/tree/master/modules -// You can also ask for help via your pull request or on the #modules channel on the nf-core Slack workspace: -// https://nf-co.re/join -// TODO nf-core: A module file SHOULD only define input and output files as command-line parameters. -// All other parameters MUST be provided using the "task.ext" directive, see here: -// https://www.nextflow.io/docs/latest/process.html#ext -// where "task.ext" is a string. -// Any parameters that need to be evaluated in the context of a particular sample -// e.g. single-end/paired-end data MUST also be defined and evaluated appropriately. -// TODO nf-core: Software that can be piped together SHOULD be added to separate module files -// unless there is a run-time, storage advantage in implementing in this way -// e.g. it's ok to have a single module for bwa to output BAM instead of SAM: -// bwa mem | samtools view -B -T ref.fasta -// TODO nf-core: Optional inputs are not currently supported by Nextflow. However, using an empty -// list (`[]`) instead of a file can be used to work around this issue. - process RHOCALL_ANNOTATE { tag "$meta.id" label 'process_medium' - // TODO nf-core: List required Conda package(s). - // Software MUST be pinned to channel (i.e. "bioconda"), version (i.e. "1.10"). - // For Conda, the build (i.e. "h9402c20_2") must be EXCLUDED to support installation on different operating systems. - // TODO nf-core: See section in main README for further information regarding finding and adding container addresses to the section below. conda (params.enable_conda ? "bioconda::rhocall=0.5.1" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/YOUR-TOOL-HERE': - 'quay.io/biocontainers/YOUR-TOOL-HERE' }" + 'https://depot.galaxyproject.org/singularity/rhocall:0.5.1--py39hbf8eff0_0': + 'quay.io/biocontainers/rhocall:0.5.1--py39hbf8eff0_0' }" input: - // TODO nf-core: Where applicable all sample-specific information e.g. "id", "single_end", "read_group" - // MUST be provided as an input via a Groovy Map called "meta". - // This information may not be required in some instances e.g. indexing reference genome files: - // https://github.com/nf-core/modules/blob/master/modules/bwa/index/main.nf - // TODO nf-core: Where applicable please provide/convert compressed files as input/output - // e.g. "*.fastq.gz" and NOT "*.fastq", "*.bam" and NOT "*.sam" etc. - tuple val(meta), path(bam) + tuple val(meta), path(vcf), path(tbi) + tuple val(meta), path(roh) + path bed output: - // TODO nf-core: Named file extensions MUST be emitted for ALL output channels - tuple val(meta), path("*.bam"), emit: bam - // TODO nf-core: List additional required output channels/values here - path "versions.yml" , emit: versions + tuple val(meta), path("*_rhocall.vcf"), emit: vcf + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' + def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - // TODO nf-core: Where possible, a command MUST be provided to obtain the version number of the software e.g. 1.10 - // If the software is unable to output a version number on the command-line then it can be manually specified - // e.g. https://github.com/nf-core/modules/blob/master/modules/homer/annotatepeaks/main.nf - // Each software used MUST provide the software name and version number in the YAML version file (versions.yml) - // TODO nf-core: It MUST be possible to pass additional parameters to the tool as a command-line string via the "task.ext.args" directive - // TODO nf-core: If the tool supports multi-threading then you MUST provide the appropriate parameter - // using the Nextflow "task" variable e.g. "--threads $task.cpus" - // TODO nf-core: Please replace the example samtools command below with your module's command - // TODO nf-core: Please indent the command appropriately (4 spaces!!) to help with readability ;) + def az_bed = bed ? "-b ${bed}" : '' """ - samtools \\ - sort \\ + rhocall \\ + annotate \\ $args \\ - -@ $task.cpus \\ - -o ${prefix}.bam \\ - -T $prefix \\ - $bam + $az_bed \\ + -r $roh \\ + -o ${prefix}_rhocall.vcf \\ + $vcf cat <<-END_VERSIONS > versions.yml "${task.process}": - rhocall: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//' )) + rhocall: \$(echo \$(rhocall --version 2>&1) | sed 's/rhocall, version //' )) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}_rhocall.vcf + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + rhocall: \$(echo \$(rhocall --version 2>&1) | sed 's/rhocall, version //' )) END_VERSIONS """ } diff --git a/modules/rhocall/annotate/meta.yml b/modules/rhocall/annotate/meta.yml index cf7c8c49..4ece9d2b 100644 --- a/modules/rhocall/annotate/meta.yml +++ b/modules/rhocall/annotate/meta.yml @@ -1,11 +1,10 @@ name: "rhocall_annotate" -## TODO nf-core: Add a description of the module and list keywords -description: write your description here +description: "Markup VCF file using rho-calls." keywords: - - sort + - roh + - rhocall tools: - "rhocall": - ## TODO nf-core: Add a description and other details for the software below description: "Call regions of homozygosity and make tentative UPD calls." homepage: "https://github.com/dnil/rhocall" documentation: "https://github.com/dnil/rhocall" @@ -13,39 +12,43 @@ tools: doi: "" licence: "['GPL v3']" -## TODO nf-core: Add a description of all of the variables used as input input: - # Only when we have meta - meta: type: map description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - # - ## TODO nf-core: Delete / customise this example input - - bam: + - vcf: type: file - description: BAM/CRAM/SAM file - pattern: "*.{bam,cram,sam}" + description: vcf file + pattern: "*.{vcf,vcf.gz}" + - tbi: + type: file + description: vcf index file + pattern: "*.{tbi}" + - roh: + type: file + description: Bcftools roh style TSV file with CHR,POS,AZ,QUAL + pattern: "*.{roh}" + - bed: + type: file + description: BED file with AZ windows. + pattern: "*.{bed}" -## TODO nf-core: Add a description of all of the variables used as output output: - #Only when we have meta - meta: type: map description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - # + - vcf: + type: file + description: vcf file + pattern: "*.{vcf,vcf.gz}" - versions: type: file description: File containing software versions pattern: "versions.yml" - ## TODO nf-core: Delete / customise this example output - - bam: - type: file - description: Sorted BAM/CRAM/SAM file - pattern: "*.{bam,cram,sam}" authors: - "@ramprasadn" diff --git a/tests/modules/rhocall/annotate/main.nf b/tests/modules/rhocall/annotate/main.nf index 65f1b5a0..a77dc19a 100644 --- a/tests/modules/rhocall/annotate/main.nf +++ b/tests/modules/rhocall/annotate/main.nf @@ -3,13 +3,38 @@ nextflow.enable.dsl = 2 include { RHOCALL_ANNOTATE } from '../../../../modules/rhocall/annotate/main.nf' +include { BCFTOOLS_ROH } from '../../../../modules/bcftools/roh/main.nf' workflow test_rhocall_annotate { - - input = [ - [ id:'test', single_end:false ], // meta map - file(params.test_data['sarscov2']['illumina']['test_paired_end_bam'], checkIfExists: true) - ] - RHOCALL_ANNOTATE ( input ) + input = [ [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_vcf_gz_tbi'], checkIfExists: true)] + + af_file = [] + gen_map = [] + regions = [] + targets = [] + samples = [] + + BCFTOOLS_ROH ( input, af_file, gen_map, regions, samples, targets ) + RHOCALL_ANNOTATE ( input, BCFTOOLS_ROH.out.roh, []) + +} + +workflow test_rhocall_annotate_stub { + + input = [ [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_vcf_gz_tbi'], checkIfExists: true)] + + af_file = [] + gen_map = [] + regions = [] + targets = [] + samples = [] + + BCFTOOLS_ROH ( input, af_file, gen_map, regions, samples, targets ) + RHOCALL_ANNOTATE ( input, BCFTOOLS_ROH.out.roh, []) + } diff --git a/tests/modules/rhocall/annotate/test.yml b/tests/modules/rhocall/annotate/test.yml index 110ec1b4..77c9f8ec 100644 --- a/tests/modules/rhocall/annotate/test.yml +++ b/tests/modules/rhocall/annotate/test.yml @@ -1,14 +1,17 @@ -## TODO nf-core: Please run the following command to build this file: -# nf-core modules create-test-yml rhocall/annotate - name: "rhocall annotate" command: nextflow run ./tests/modules/rhocall/annotate -entry test_rhocall_annotate -c ./tests/config/nextflow.config -c ./tests/modules/rhocall/annotate/nextflow.config tags: - "rhocall" - # - "rhocall/annotate" - # files: - - path: "output/rhocall/test.bam" - md5sum: e667c7caad0bc4b7ac383fd023c654fc - - path: output/rhocall/versions.yml - md5sum: a01fe51bc4c6a3a6226fbf77b2c7cf3b + - path: "output/rhocall/test_rhocall.vcf" + - path: "output/rhocall/versions.yml" + +- name: "rhocall annotate stub" + command: nextflow run ./tests/modules/rhocall/annotate -entry test_rhocall_annotate_stub -c ./tests/config/nextflow.config -c ./tests/modules/rhocall/annotate/nextflow.config -stub-run + tags: + - "rhocall" + - "rhocall/annotate" + files: + - path: "output/rhocall/test_rhocall.vcf" + - path: "output/rhocall/versions.yml" From 745e47cd33bad1303257b669d22c913abd4bf94e Mon Sep 17 00:00:00 2001 From: SusiJo Date: Tue, 7 Jun 2022 14:38:43 +0200 Subject: [PATCH 132/168] add versions.yml --- tests/modules/gatk4/markduplicatesspark/test.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/modules/gatk4/markduplicatesspark/test.yml b/tests/modules/gatk4/markduplicatesspark/test.yml index 31d8d34d..4ef9b916 100644 --- a/tests/modules/gatk4/markduplicatesspark/test.yml +++ b/tests/modules/gatk4/markduplicatesspark/test.yml @@ -6,6 +6,7 @@ files: - path: output/gatk4/test.bam md5sum: dc1a09ac6371aab7c50d1a554baa06d3 + - path: output/gatk4/versions.yml - name: gatk4 markduplicatesspark test_gatk4_markduplicates_spark_multiple_bams command: nextflow run ./tests/modules/gatk4/markduplicatesspark -entry test_gatk4_markduplicates_spark_multiple_bams -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/markduplicatesspark/nextflow.config @@ -15,6 +16,7 @@ files: - path: output/gatk4/test.bam md5sum: 898cb0a6616897d8ada90bab53bf0837 + - path: output/gatk4/versions.yml - name: gatk4 markduplicatesspark test_gatk4_markduplicates_spark_multiple_bams_cram_out command: nextflow run ./tests/modules/gatk4/markduplicatesspark -entry test_gatk4_markduplicates_spark_multiple_bams_cram_out -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/markduplicatesspark/nextflow.config @@ -24,6 +26,7 @@ files: - path: output/gatk4/test.cram md5sum: 2271016de5e4199736598f39d12d7587 + - path: output/gatk4/versions.yml - name: gatk4 markduplicatesspark test_gatk4_markduplicates_spark_multiple_bams_metrics command: nextflow run ./tests/modules/gatk4/markduplicatesspark -entry test_gatk4_markduplicates_spark_multiple_bams_metrics -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/markduplicatesspark/nextflow.config @@ -35,3 +38,4 @@ md5sum: 898cb0a6616897d8ada90bab53bf0837 - path: output/gatk4/test.metrics contains: ["## METRICS CLASS", "org.broadinstitute.hellbender.utils.read.markduplicates.GATKDuplicationMetrics"] + - path: output/gatk4/versions.yml From 17a4e931427b5b144c0d0227a3ea49347ba7457a Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Tue, 7 Jun 2022 14:43:35 +0200 Subject: [PATCH 133/168] fix versions.yaml --- modules/rhocall/annotate/main.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/rhocall/annotate/main.nf b/modules/rhocall/annotate/main.nf index e59795a4..95d647b1 100644 --- a/modules/rhocall/annotate/main.nf +++ b/modules/rhocall/annotate/main.nf @@ -34,7 +34,7 @@ process RHOCALL_ANNOTATE { cat <<-END_VERSIONS > versions.yml "${task.process}": - rhocall: \$(echo \$(rhocall --version 2>&1) | sed 's/rhocall, version //' )) + rhocall: \$(echo \$(rhocall --version 2>&1) | sed 's/rhocall, version //' ) END_VERSIONS """ @@ -45,7 +45,7 @@ process RHOCALL_ANNOTATE { cat <<-END_VERSIONS > versions.yml "${task.process}": - rhocall: \$(echo \$(rhocall --version 2>&1) | sed 's/rhocall, version //' )) + rhocall: \$(echo \$(rhocall --version 2>&1) | sed 's/rhocall, version //' ) END_VERSIONS """ } From d196552e064b6979109931e4d48a68617f090254 Mon Sep 17 00:00:00 2001 From: Matthias De Smet <11850640+matthdsm@users.noreply.github.com> Date: Tue, 7 Jun 2022 16:33:04 +0200 Subject: [PATCH 134/168] update input --- modules/multiqc/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/multiqc/main.nf b/modules/multiqc/main.nf index bd3c9dc8..d376db60 100644 --- a/modules/multiqc/main.nf +++ b/modules/multiqc/main.nf @@ -7,7 +7,7 @@ process MULTIQC { 'quay.io/biocontainers/multiqc:1.12--pyhdfd78af_0' }" input: - path "??/*" + path "*/*" output: path "*multiqc_report.html", emit: report From 4935bb98cbc05472da3e89a0b0f04423a105f94f Mon Sep 17 00:00:00 2001 From: "Maxime U. Garcia" Date: Wed, 8 Jun 2022 11:38:02 +0200 Subject: [PATCH 135/168] Apply suggestions from code review Co-authored-by: Mahesh Binzer-Panchal --- modules/tiddit/sv/main.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/tiddit/sv/main.nf b/modules/tiddit/sv/main.nf index ee2a8d75..ec4affe3 100644 --- a/modules/tiddit/sv/main.nf +++ b/modules/tiddit/sv/main.nf @@ -8,7 +8,7 @@ process TIDDIT_SV { 'quay.io/biocontainers/tiddit:3.0.0--py39h59fae87_1' }" input: - tuple val(meta), path(input), path(index) + tuple val(meta), path(input), path(input_index) path fasta path bwa_index @@ -24,7 +24,7 @@ process TIDDIT_SV { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" """ - [[ -d $bwa_index ]] && for i in `ls $bwa_index`; do [[ -f $fasta && ! "\$i" =~ .*"$fasta".* ]] && ln -s $bwa_index/\$i ${fasta}.\${i##*.} || ln -s $bwa_index/\$i \$i; done + [[ -d $bwa_index ]] && for i in $bwa_index/*; do [[ -f $fasta && ! "\$i" =~ .*"$fasta".* ]] && ln -s $bwa_index/\$i ${fasta}.\${i##*.} || ln -s $bwa_index/\$i \$i; done tiddit \\ --sv \\ From 07eff4346f3ccf0990dd785f95a02059d18a3b64 Mon Sep 17 00:00:00 2001 From: MaxUlysse Date: Wed, 8 Jun 2022 11:46:45 +0200 Subject: [PATCH 136/168] fix path to symbolic links --- modules/tiddit/sv/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/tiddit/sv/main.nf b/modules/tiddit/sv/main.nf index ec4affe3..275ceb3f 100644 --- a/modules/tiddit/sv/main.nf +++ b/modules/tiddit/sv/main.nf @@ -24,7 +24,7 @@ process TIDDIT_SV { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" """ - [[ -d $bwa_index ]] && for i in $bwa_index/*; do [[ -f $fasta && ! "\$i" =~ .*"$fasta".* ]] && ln -s $bwa_index/\$i ${fasta}.\${i##*.} || ln -s $bwa_index/\$i \$i; done + [[ -d $bwa_index ]] && for i in $bwa_index/*; do [[ -f $fasta && ! "\$i" =~ .*"$fasta".* ]] && ln -s \$i ${fasta}.\${i##*.} || ln -s \$i .; done tiddit \\ --sv \\ From 439d2a9e679977add51da377f3dbfdf68ab5c6a9 Mon Sep 17 00:00:00 2001 From: Matthias De Smet <11850640+matthdsm@users.noreply.github.com> Date: Wed, 8 Jun 2022 13:48:27 +0200 Subject: [PATCH 137/168] update mqc inputs --- modules/multiqc/main.nf | 3 ++- modules/multiqc/meta.yml | 8 ++++++++ tests/modules/multiqc/main.nf | 4 ++-- 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/modules/multiqc/main.nf b/modules/multiqc/main.nf index d376db60..df68d853 100644 --- a/modules/multiqc/main.nf +++ b/modules/multiqc/main.nf @@ -7,7 +7,8 @@ process MULTIQC { 'quay.io/biocontainers/multiqc:1.12--pyhdfd78af_0' }" input: - path "*/*" + path multiqc_files, stageAs: "?/*" + tuple path(mqc_config), path(mqc_logo) output: path "*multiqc_report.html", emit: report diff --git a/modules/multiqc/meta.yml b/modules/multiqc/meta.yml index 6fa891ef..bf3a27fe 100644 --- a/modules/multiqc/meta.yml +++ b/modules/multiqc/meta.yml @@ -17,6 +17,14 @@ input: type: file description: | List of reports / files recognised by MultiQC, for example the html and zip output of FastQC + - multiqc_config: + type: file + description: Config yml for MultiQC + pattern: "*.{yml,yaml}" + - multiqc_logo: + type: file + description: Logo file for MultiQC + pattern: "*.{png}" output: - report: type: file diff --git a/tests/modules/multiqc/main.nf b/tests/modules/multiqc/main.nf index 3719d423..e015cbc3 100644 --- a/tests/modules/multiqc/main.nf +++ b/tests/modules/multiqc/main.nf @@ -12,7 +12,7 @@ workflow test_multiqc { ] FASTQC ( input ) - MULTIQC ( FASTQC.out.zip.collect { it[1] } ) + MULTIQC ( FASTQC.out.zip.collect { it[1] }, [[],[]] ) } workflow test_multiqc_fn_collision { @@ -27,5 +27,5 @@ workflow test_multiqc_fn_collision { FASTQC2 ( fqc_input ) mqc_input = mqc_input.mix(FASTQC2.out.zip.collect { it[1] }) - MULTIQC ( mqc_input ) + MULTIQC ( mqc_input, [[],[]] ) } From f1e33ca6e0d7d40fda3f78822b7789c6c13eb21c Mon Sep 17 00:00:00 2001 From: Matthias De Smet <11850640+matthdsm@users.noreply.github.com> Date: Wed, 8 Jun 2022 14:02:04 +0200 Subject: [PATCH 138/168] update mqc command --- modules/multiqc/main.nf | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/modules/multiqc/main.nf b/modules/multiqc/main.nf index df68d853..de4de537 100644 --- a/modules/multiqc/main.nf +++ b/modules/multiqc/main.nf @@ -8,7 +8,7 @@ process MULTIQC { input: path multiqc_files, stageAs: "?/*" - tuple path(mqc_config), path(mqc_logo) + tuple path(multiqc_config), path(multiqc_logo) output: path "*multiqc_report.html", emit: report @@ -21,8 +21,13 @@ process MULTIQC { script: def args = task.ext.args ?: '' + def config = multiqc_config ?: '--config $multiqc_config' """ - multiqc -f $args . + multiqc \ + --force \ + $config \ + $args \ + . cat <<-END_VERSIONS > versions.yml "${task.process}": From 100b3f244c1dced5a6fa74af77829d7ef759e648 Mon Sep 17 00:00:00 2001 From: Matthias De Smet <11850640+matthdsm@users.noreply.github.com> Date: Wed, 8 Jun 2022 14:24:55 +0200 Subject: [PATCH 139/168] Apply suggestions from code review Co-authored-by: Mahesh Binzer-Panchal --- modules/multiqc/main.nf | 2 +- tests/modules/multiqc/main.nf | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/modules/multiqc/main.nf b/modules/multiqc/main.nf index de4de537..52069c21 100644 --- a/modules/multiqc/main.nf +++ b/modules/multiqc/main.nf @@ -21,7 +21,7 @@ process MULTIQC { script: def args = task.ext.args ?: '' - def config = multiqc_config ?: '--config $multiqc_config' + def config = multiqc_config ?: "--config $multiqc_config" """ multiqc \ --force \ diff --git a/tests/modules/multiqc/main.nf b/tests/modules/multiqc/main.nf index e015cbc3..236d8ad0 100644 --- a/tests/modules/multiqc/main.nf +++ b/tests/modules/multiqc/main.nf @@ -16,9 +16,10 @@ workflow test_multiqc { } workflow test_multiqc_fn_collision { - fqc_input = [ [ id: 'test', single_end: false ], - [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true)] - ] + fqc_input = [ + [ id: 'test', single_end: false ], + [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true)] + ] mqc_input = Channel.empty() FASTQC ( fqc_input ) From 7b018acabbdf4ab893e7a7d9f321f6e87748cb02 Mon Sep 17 00:00:00 2001 From: Matthias De Smet <11850640+matthdsm@users.noreply.github.com> Date: Wed, 8 Jun 2022 14:36:13 +0200 Subject: [PATCH 140/168] Update modules/multiqc/main.nf Co-authored-by: Mahesh Binzer-Panchal --- modules/multiqc/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/multiqc/main.nf b/modules/multiqc/main.nf index 52069c21..6960bd4c 100644 --- a/modules/multiqc/main.nf +++ b/modules/multiqc/main.nf @@ -21,7 +21,7 @@ process MULTIQC { script: def args = task.ext.args ?: '' - def config = multiqc_config ?: "--config $multiqc_config" + def config = multiqc_config ? "--config $multiqc_config" : '' """ multiqc \ --force \ From 1648d2fcba4951266c25ddef2da843820d53fe15 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Wed, 8 Jun 2022 14:36:25 +0200 Subject: [PATCH 141/168] add index --- modules/bcftools/roh/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/bcftools/roh/main.nf b/modules/bcftools/roh/main.nf index 890b6fad..83ac4e65 100644 --- a/modules/bcftools/roh/main.nf +++ b/modules/bcftools/roh/main.nf @@ -9,7 +9,7 @@ process BCFTOOLS_ROH { input: tuple val(meta), path(vcf), path(tbi) - path af_file + tuple path(af_file), path(af_file_tbi) path genetic_map path regions_file path samples_file From ea4dc866e09834704302c7f4cd8dd0401678b2e7 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Wed, 8 Jun 2022 14:38:55 +0200 Subject: [PATCH 142/168] add index --- modules/bcftools/roh/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/bcftools/roh/main.nf b/modules/bcftools/roh/main.nf index 890b6fad..83ac4e65 100644 --- a/modules/bcftools/roh/main.nf +++ b/modules/bcftools/roh/main.nf @@ -9,7 +9,7 @@ process BCFTOOLS_ROH { input: tuple val(meta), path(vcf), path(tbi) - path af_file + tuple path(af_file), path(af_file_tbi) path genetic_map path regions_file path samples_file From e5f8924fabf4c8380f55fb7aee89fd2c268161b1 Mon Sep 17 00:00:00 2001 From: Matthias De Smet <11850640+matthdsm@users.noreply.github.com> Date: Wed, 8 Jun 2022 14:41:10 +0200 Subject: [PATCH 143/168] Update modules/multiqc/main.nf Co-authored-by: Mahesh Binzer-Panchal --- modules/multiqc/main.nf | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/modules/multiqc/main.nf b/modules/multiqc/main.nf index 6960bd4c..b0831b5d 100644 --- a/modules/multiqc/main.nf +++ b/modules/multiqc/main.nf @@ -23,10 +23,10 @@ process MULTIQC { def args = task.ext.args ?: '' def config = multiqc_config ? "--config $multiqc_config" : '' """ - multiqc \ - --force \ - $config \ - $args \ + multiqc \\ + --force \\ + $config \\ + $args \\ . cat <<-END_VERSIONS > versions.yml From c005a5e00777d577d661e1c2a81a779de1bda3fc Mon Sep 17 00:00:00 2001 From: Matthias De Smet <11850640+matthdsm@users.noreply.github.com> Date: Wed, 8 Jun 2022 14:42:58 +0200 Subject: [PATCH 144/168] formatting --- tests/modules/multiqc/main.nf | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tests/modules/multiqc/main.nf b/tests/modules/multiqc/main.nf index 236d8ad0..6c21d061 100644 --- a/tests/modules/multiqc/main.nf +++ b/tests/modules/multiqc/main.nf @@ -7,16 +7,17 @@ include { FASTQC as FASTQC2 } from '../../../modules/fastqc/main.nf' include { MULTIQC } from '../../../modules/multiqc/main.nf' workflow test_multiqc { - input = [ [ id: 'test', single_end: false ], - [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true)] - ] + input = [ + [ id: 'test', single_end: false ], + [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true)] + ] FASTQC ( input ) MULTIQC ( FASTQC.out.zip.collect { it[1] }, [[],[]] ) } workflow test_multiqc_fn_collision { - fqc_input = [ + fqc_input = [ [ id: 'test', single_end: false ], [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true)] ] From c079ee20250f5676c53b145fdd00f9defe91f112 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Wed, 8 Jun 2022 14:48:16 +0200 Subject: [PATCH 145/168] fix test --- modules/bcftools/roh/meta.yml | 3 +++ tests/modules/bcftools/roh/main.nf | 4 ++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/modules/bcftools/roh/meta.yml b/modules/bcftools/roh/meta.yml index fd03d4ce..fa0fd088 100644 --- a/modules/bcftools/roh/meta.yml +++ b/modules/bcftools/roh/meta.yml @@ -23,6 +23,9 @@ input: - af_file: type: file description: "Read allele frequencies from a tab-delimited file containing the columns: CHROM\tPOS\tREF,ALT\tAF." + - af_file_tbi: + type: file + description: "tbi index of af_file." - genetic_map: type: file description: "Genetic map in the format required also by IMPUTE2." diff --git a/tests/modules/bcftools/roh/main.nf b/tests/modules/bcftools/roh/main.nf index 3eb534b6..3a2ef640 100644 --- a/tests/modules/bcftools/roh/main.nf +++ b/tests/modules/bcftools/roh/main.nf @@ -10,7 +10,7 @@ workflow test_bcftools_roh { file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true), file(params.test_data['sarscov2']['illumina']['test_vcf_gz_tbi'], checkIfExists: true)] - af_file = [] + af_file = [[],[]] gen_map = [] regions = [] targets = [] @@ -25,7 +25,7 @@ workflow test_bcftools_roh_stub { file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true), file(params.test_data['sarscov2']['illumina']['test_vcf_gz_tbi'], checkIfExists: true)] - af_file = [] + af_file = [[],[]] gen_map = [] regions = [] targets = [] From 1d674890565bc2a018176388c46e71f9c0bb74f0 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Wed, 8 Jun 2022 16:36:02 +0200 Subject: [PATCH 146/168] update test --- tests/modules/rhocall/annotate/main.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/modules/rhocall/annotate/main.nf b/tests/modules/rhocall/annotate/main.nf index a77dc19a..9cca1520 100644 --- a/tests/modules/rhocall/annotate/main.nf +++ b/tests/modules/rhocall/annotate/main.nf @@ -11,7 +11,7 @@ workflow test_rhocall_annotate { file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true), file(params.test_data['sarscov2']['illumina']['test_vcf_gz_tbi'], checkIfExists: true)] - af_file = [] + af_file = [[],[]] gen_map = [] regions = [] targets = [] @@ -28,7 +28,7 @@ workflow test_rhocall_annotate_stub { file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true), file(params.test_data['sarscov2']['illumina']['test_vcf_gz_tbi'], checkIfExists: true)] - af_file = [] + af_file = [[],[]] gen_map = [] regions = [] targets = [] From 4d7afa21e2f9e64d8bab5e9865cbde170bc920db Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Wed, 8 Jun 2022 21:14:08 +0200 Subject: [PATCH 147/168] Add GATK indelrealigner tests --- modules/gatk/indelrealigner/main.nf | 6 ++-- tests/modules/gatk/indelrealigner/main.nf | 31 +++++++++++++------ .../gatk/indelrealigner/nextflow.config | 2 +- tests/modules/gatk/indelrealigner/test.yml | 22 ++++++------- 4 files changed, 35 insertions(+), 26 deletions(-) diff --git a/modules/gatk/indelrealigner/main.nf b/modules/gatk/indelrealigner/main.nf index f98a99b7..10246082 100644 --- a/modules/gatk/indelrealigner/main.nf +++ b/modules/gatk/indelrealigner/main.nf @@ -22,11 +22,12 @@ process GATK_INDELREALIGNER { task.ext.when == null || task.ext.when script: - if ("$input" == "${prefix}.bam") error "Input and output names are the same, set prefix in module configuration to disambiguate!" def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" def known = known_vcf ? "-known ${known_vcf}" : "" + if ("$input" == "${prefix}.bam") error "Input and output names are the same, set prefix in module configuration to disambiguate!" + def avail_mem = 3 if (!task.memory) { log.info '[GATK IndelRealigner] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' @@ -39,9 +40,8 @@ process GATK_INDELREALIGNER { -Xmx${avail_mem}g \\ -T IndelRealigner \\ -R ${fasta} \\ - -nt ${task.cpus} -I ${input} \\ - -targetIntervals ${intervals} \\ + --targetIntervals ${intervals} \\ ${known} \\ -o ${prefix}.bam \\ $args diff --git a/tests/modules/gatk/indelrealigner/main.nf b/tests/modules/gatk/indelrealigner/main.nf index 687e12a9..d4e01e12 100644 --- a/tests/modules/gatk/indelrealigner/main.nf +++ b/tests/modules/gatk/indelrealigner/main.nf @@ -2,21 +2,32 @@ nextflow.enable.dsl = 2 +include { GATK_REALIGNERTARGETCREATOR } from '../../../../modules/gatk/realignertargetcreator/main.nf' include { GATK_INDELREALIGNER } from '../../../../modules/gatk/indelrealigner/main.nf' -// TODO add REalignerTargetCrator - workflow test_gatk_indelrealigner { - input = [ - [ id:'test', single_end:false ], // meta map - file(params.test_data['sarscov2']['illumina']['test_paired_end_bam'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test_paired_end_bai'], checkIfExists: true), - GATK_REALIGNERTARGETCREATOR.out.intervals - ] - reference = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + fai = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true) + dict = file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true) - GATK_INDELREALIGNER ( input, reference, [] ) + input_realignertargetcreator = [ [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true), + ] + + GATK_REALIGNERTARGETCREATOR ( input_realignertargetcreator, fasta, fai, dict, [] ) + + ch_intervals = GATK_REALIGNERTARGETCREATOR.out.intervals + + ch_bams_indelrealigner = Channel.of([ [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true) + ]) + + ch_input_indelrealigner = ch_bams_indelrealigner.mix(ch_intervals).groupTuple(by: 0).map{ [it[0], it[1][0], it[2], it[1][1] ] }.dump(tag: "input") + + GATK_INDELREALIGNER ( ch_input_indelrealigner, fasta, fai, dict, [] ) } diff --git a/tests/modules/gatk/indelrealigner/nextflow.config b/tests/modules/gatk/indelrealigner/nextflow.config index 2e6ae57f..489a478f 100644 --- a/tests/modules/gatk/indelrealigner/nextflow.config +++ b/tests/modules/gatk/indelrealigner/nextflow.config @@ -1,6 +1,6 @@ process { publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } - ext.prefix = "${meta.id}.realigned" + ext.prefix = { "${meta.id}.realigned" } } diff --git a/tests/modules/gatk/indelrealigner/test.yml b/tests/modules/gatk/indelrealigner/test.yml index 59fc394f..b7f0c465 100644 --- a/tests/modules/gatk/indelrealigner/test.yml +++ b/tests/modules/gatk/indelrealigner/test.yml @@ -1,14 +1,12 @@ -## TODO nf-core: Please run the following command to build this file: -# nf-core modules create-test-yml gatk/indelrealigner -- name: "gatk indelrealigner" - command: nextflow run ./tests/modules/gatk/indelrealigner -entry test_gatk_indelrealigner -c ./tests/config/nextflow.config -c ./tests/modules/gatk/indelrealigner/nextflow.config +- name: gatk indelrealigner test_gatk_indelrealigner + command: nextflow run ./tests/modules/gatk/indelrealigner -entry test_gatk_indelrealigner -c ./tests/config/nextflow.config -c ./tests/modules/gatk/indelrealigner/nextflow.config tags: - - "gatk" - # - - "gatk/indelrealigner" - # + - gatk/indelrealigner + - gatk files: - - path: "output/gatk/test.bam" - md5sum: e667c7caad0bc4b7ac383fd023c654fc - - path: output/gatk/versions.yml - md5sum: a01fe51bc4c6a3a6226fbf77b2c7cf3b + - path: output/gatk/test.realigned.bai + md5sum: 85a67df8827fe426e7f3a458134c0551 + - path: output/gatk/test.realigned.bam + md5sum: ea1df6f7fcafc408fae4dc1574813d8a + - path: output/gatk/test.realigned.intervals + md5sum: 7aa7a1b235a510e6591e262382086bf8 From 939653a47eca27d160f6d2342f3a63c44c29004b Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Thu, 9 Jun 2022 08:26:04 +0200 Subject: [PATCH 148/168] Changes after code review --- modules/gatk/indelrealigner/main.nf | 2 +- modules/gatk/indelrealigner/meta.yml | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/modules/gatk/indelrealigner/main.nf b/modules/gatk/indelrealigner/main.nf index 10246082..2ff012b4 100644 --- a/modules/gatk/indelrealigner/main.nf +++ b/modules/gatk/indelrealigner/main.nf @@ -8,7 +8,7 @@ process GATK_INDELREALIGNER { 'quay.io/biocontainers/gatk:3.5--hdfd78af_11' }" input: - tuple val(meta), path(input), path(index), path(intervals) + tuple val(meta), path(bam), path(bai), path(intervals) path(fasta) path(fai) path(dict) diff --git a/modules/gatk/indelrealigner/meta.yml b/modules/gatk/indelrealigner/meta.yml index 5abfbfb1..35ad28e8 100644 --- a/modules/gatk/indelrealigner/meta.yml +++ b/modules/gatk/indelrealigner/meta.yml @@ -19,11 +19,11 @@ input: description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - input: + - bam: type: file - description: Sorted and indexed BAM/CRAM/SAM file + description: Sorted and indexed BAM file pattern: "*.bam" - - index: + - bai: type: file description: BAM index file pattern: "*.bai" @@ -60,7 +60,7 @@ output: pattern: "versions.yml" - bam: type: file - description: Sorted and indexed BAM/CRAM/SAM file with local realignment around variants + description: Sorted and indexed BAM file with local realignment around variants pattern: "*.bam" - bai: type: file From 70f7f07a371c2c553026d5b0fbaadec9a7423e53 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Thu, 9 Jun 2022 08:33:37 +0200 Subject: [PATCH 149/168] Apply suggestions from code review Co-authored-by: FriederikeHanssen --- modules/gatk/indelrealigner/main.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/gatk/indelrealigner/main.nf b/modules/gatk/indelrealigner/main.nf index 2ff012b4..94cf78da 100644 --- a/modules/gatk/indelrealigner/main.nf +++ b/modules/gatk/indelrealigner/main.nf @@ -26,7 +26,7 @@ process GATK_INDELREALIGNER { def prefix = task.ext.prefix ?: "${meta.id}" def known = known_vcf ? "-known ${known_vcf}" : "" - if ("$input" == "${prefix}.bam") error "Input and output names are the same, set prefix in module configuration to disambiguate!" + if ("$bam" == "${prefix}.bam") error "Input and output names are the same, set prefix in module configuration to disambiguate!" def avail_mem = 3 if (!task.memory) { @@ -40,7 +40,7 @@ process GATK_INDELREALIGNER { -Xmx${avail_mem}g \\ -T IndelRealigner \\ -R ${fasta} \\ - -I ${input} \\ + -I ${bam} \\ --targetIntervals ${intervals} \\ ${known} \\ -o ${prefix}.bam \\ From cda434611f851bbc4fe3d876e6b702cf721a0175 Mon Sep 17 00:00:00 2001 From: SusiJo Date: Thu, 9 Jun 2022 10:24:39 +0200 Subject: [PATCH 150/168] fix conda with openjdk 1.8 --- modules/gatk4/markduplicatesspark/main.nf | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/modules/gatk4/markduplicatesspark/main.nf b/modules/gatk4/markduplicatesspark/main.nf index 945f105d..db6a1aa1 100644 --- a/modules/gatk4/markduplicatesspark/main.nf +++ b/modules/gatk4/markduplicatesspark/main.nf @@ -2,7 +2,7 @@ process GATK4_MARKDUPLICATES_SPARK { tag "$meta.id" label 'process_high' - conda (params.enable_conda ? "bioconda::gatk4=4.2.3.0" : null) + conda (params.enable_conda ? "bioconda::gatk4=4.2.3.0 conda-forge::openjdk=8.0.312" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/gatk4:4.2.3.0--hdfd78af_0' : 'broadinstitute/gatk:4.2.3.0' }" @@ -45,6 +45,7 @@ process GATK4_MARKDUPLICATES_SPARK { cat <<-END_VERSIONS > versions.yml "${task.process}": gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + openjdk: \$(echo \$(java -version 2>&1) | grep version | sed 's/\"//g' | cut -f3 -d ' ') END_VERSIONS """ } From 5ccf6fbcc913f34ee2897689081d1cf60cecdb35 Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Thu, 9 Jun 2022 11:45:27 +0200 Subject: [PATCH 151/168] add optional fasta argument to ensemblvep --- modules/ensemblvep/main.nf | 4 ++++ modules/ensemblvep/meta.yml | 5 +++++ tests/modules/ensemblvep/main.nf | 4 +++- tests/modules/ensemblvep/test.yml | 4 ++-- 4 files changed, 14 insertions(+), 3 deletions(-) diff --git a/modules/ensemblvep/main.nf b/modules/ensemblvep/main.nf index a5a9b1ab..d2efe35f 100644 --- a/modules/ensemblvep/main.nf +++ b/modules/ensemblvep/main.nf @@ -13,6 +13,7 @@ process ENSEMBLVEP { val species val cache_version path cache + path fasta path extra_files output: @@ -27,6 +28,8 @@ process ENSEMBLVEP { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" def dir_cache = cache ? "\${PWD}/${cache}" : "/.vep" + def reference = fasta ? "--fasta $fasta" : "" + """ mkdir $prefix @@ -34,6 +37,7 @@ process ENSEMBLVEP { -i $vcf \\ -o ${prefix}.ann.vcf \\ $args \\ + $reference \\ --assembly $genome \\ --species $species \\ --cache \\ diff --git a/modules/ensemblvep/meta.yml b/modules/ensemblvep/meta.yml index 418bb970..9891815d 100644 --- a/modules/ensemblvep/meta.yml +++ b/modules/ensemblvep/meta.yml @@ -36,6 +36,11 @@ input: type: file description: | path to VEP cache (optional) + - fasta: + type: file + description: | + reference FASTA file (optional) + pattern: "*.{fasta,fa}" - extra_files: type: tuple description: | diff --git a/tests/modules/ensemblvep/main.nf b/tests/modules/ensemblvep/main.nf index 30d19957..ad9fa097 100644 --- a/tests/modules/ensemblvep/main.nf +++ b/tests/modules/ensemblvep/main.nf @@ -10,5 +10,7 @@ workflow test_ensemblvep { file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true) ] - ENSEMBLVEP ( input, "WBcel235", "caenorhabditis_elegans", "104", [], [] ) + fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + + ENSEMBLVEP ( input, "WBcel235", "caenorhabditis_elegans", "104", [], fasta, [] ) } diff --git a/tests/modules/ensemblvep/test.yml b/tests/modules/ensemblvep/test.yml index 42384d6e..fdc2aa6a 100644 --- a/tests/modules/ensemblvep/test.yml +++ b/tests/modules/ensemblvep/test.yml @@ -1,7 +1,7 @@ - name: ensemblvep test_ensemblvep - command: nextflow run ./tests/modules/ensemblvep -entry test_ensemblvep -c ./tests/config/nextflow.config -c ./tests/modules/ensemblvep/nextflow.config + command: nextflow run ./tests/modules/ensemblvep -entry test_ensemblvep -c ./tests/config/nextflow.config -c ./tests/modules/ensemblvep/nextflow.config tags: - ensemblvep files: - path: output/ensemblvep/test.ann.vcf - - path: output/ensemblvep/test.summary.html + - path: output/ensemblvep/test.summary.html \ No newline at end of file From a3202e41ec15a57f2e80a69fb580996b3cf9f29b Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Thu, 9 Jun 2022 11:47:24 +0200 Subject: [PATCH 152/168] prettier --- tests/modules/ensemblvep/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/modules/ensemblvep/test.yml b/tests/modules/ensemblvep/test.yml index fdc2aa6a..de854bd2 100644 --- a/tests/modules/ensemblvep/test.yml +++ b/tests/modules/ensemblvep/test.yml @@ -4,4 +4,4 @@ - ensemblvep files: - path: output/ensemblvep/test.ann.vcf - - path: output/ensemblvep/test.summary.html \ No newline at end of file + - path: output/ensemblvep/test.summary.html From 7947bc662cdefa528a552e3a682a6c57f5a2cbe9 Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Thu, 9 Jun 2022 11:53:16 +0200 Subject: [PATCH 153/168] added an extra test without fasta --- tests/modules/ensemblvep/main.nf | 13 ++++++++++++- tests/modules/ensemblvep/test.yml | 12 ++++++++++-- 2 files changed, 22 insertions(+), 3 deletions(-) diff --git a/tests/modules/ensemblvep/main.nf b/tests/modules/ensemblvep/main.nf index ad9fa097..6b7cc307 100644 --- a/tests/modules/ensemblvep/main.nf +++ b/tests/modules/ensemblvep/main.nf @@ -4,7 +4,7 @@ nextflow.enable.dsl = 2 include { ENSEMBLVEP } from '../../../modules/ensemblvep/main.nf' -workflow test_ensemblvep { +workflow test_ensemblvep_fasta { input = [ [ id:'test' ], // meta map file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true) @@ -14,3 +14,14 @@ workflow test_ensemblvep { ENSEMBLVEP ( input, "WBcel235", "caenorhabditis_elegans", "104", [], fasta, [] ) } + +workflow test_ensemblvep_no_fasta { + input = [ + [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true) + ] + + fasta = [] + + ENSEMBLVEP ( input, "WBcel235", "caenorhabditis_elegans", "104", [], fasta, [] ) +} diff --git a/tests/modules/ensemblvep/test.yml b/tests/modules/ensemblvep/test.yml index de854bd2..effea9d0 100644 --- a/tests/modules/ensemblvep/test.yml +++ b/tests/modules/ensemblvep/test.yml @@ -1,5 +1,13 @@ -- name: ensemblvep test_ensemblvep - command: nextflow run ./tests/modules/ensemblvep -entry test_ensemblvep -c ./tests/config/nextflow.config -c ./tests/modules/ensemblvep/nextflow.config +- name: ensemblvep test_ensemblvep_fasta + command: nextflow run ./tests/modules/ensemblvep -entry test_ensemblvep_fasta -c ./tests/config/nextflow.config -c ./tests/modules/ensemblvep/nextflow.config + tags: + - ensemblvep + files: + - path: output/ensemblvep/test.ann.vcf + - path: output/ensemblvep/test.summary.html + +- name: ensemblvep test_ensemblvep_no_fasta + command: nextflow run ./tests/modules/ensemblvep -entry test_ensemblvep_no_fasta -c ./tests/config/nextflow.config -c ./tests/modules/ensemblvep/nextflow.config tags: - ensemblvep files: From e60323b757faceb0a3135b5aaa29d10653a756ff Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Thu, 9 Jun 2022 11:53:59 +0200 Subject: [PATCH 154/168] typo --- tests/modules/ensemblvep/test.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/modules/ensemblvep/test.yml b/tests/modules/ensemblvep/test.yml index effea9d0..7e94d5fc 100644 --- a/tests/modules/ensemblvep/test.yml +++ b/tests/modules/ensemblvep/test.yml @@ -1,5 +1,5 @@ - name: ensemblvep test_ensemblvep_fasta - command: nextflow run ./tests/modules/ensemblvep -entry test_ensemblvep_fasta -c ./tests/config/nextflow.config -c ./tests/modules/ensemblvep/nextflow.config + command: nextflow run ./tests/modules/ensemblvep -entry test_ensemblvep_fasta -c ./tests/config/nextflow.config -c ./tests/modules/ensemblvep/nextflow.config tags: - ensemblvep files: @@ -7,7 +7,7 @@ - path: output/ensemblvep/test.summary.html - name: ensemblvep test_ensemblvep_no_fasta - command: nextflow run ./tests/modules/ensemblvep -entry test_ensemblvep_no_fasta -c ./tests/config/nextflow.config -c ./tests/modules/ensemblvep/nextflow.config + command: nextflow run ./tests/modules/ensemblvep -entry test_ensemblvep_no_fasta -c ./tests/config/nextflow.config -c ./tests/modules/ensemblvep/nextflow.config tags: - ensemblvep files: From 6d10909e9f2d7e6ab42096711c3d666aff981718 Mon Sep 17 00:00:00 2001 From: nvnieuwk <101190534+nvnieuwk@users.noreply.github.com> Date: Thu, 9 Jun 2022 11:55:41 +0200 Subject: [PATCH 155/168] Update tests/modules/ensemblvep/main.nf Co-authored-by: Maxime U. Garcia --- tests/modules/ensemblvep/main.nf | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/modules/ensemblvep/main.nf b/tests/modules/ensemblvep/main.nf index 6b7cc307..3c8afada 100644 --- a/tests/modules/ensemblvep/main.nf +++ b/tests/modules/ensemblvep/main.nf @@ -21,7 +21,5 @@ workflow test_ensemblvep_no_fasta { file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true) ] - fasta = [] - - ENSEMBLVEP ( input, "WBcel235", "caenorhabditis_elegans", "104", [], fasta, [] ) + ENSEMBLVEP ( input, "WBcel235", "caenorhabditis_elegans", "104", [], [], [] ) } From ef5b0dd977d19cbd2a233bb0783cd02aaedc208b Mon Sep 17 00:00:00 2001 From: Rike Date: Thu, 9 Jun 2022 14:54:19 +0200 Subject: [PATCH 156/168] gatk4 version update to 4.2.6.1 --- modules/gatk4/markduplicatesspark/main.nf | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/gatk4/markduplicatesspark/main.nf b/modules/gatk4/markduplicatesspark/main.nf index db6a1aa1..0bafe7ce 100644 --- a/modules/gatk4/markduplicatesspark/main.nf +++ b/modules/gatk4/markduplicatesspark/main.nf @@ -2,10 +2,10 @@ process GATK4_MARKDUPLICATES_SPARK { tag "$meta.id" label 'process_high' - conda (params.enable_conda ? "bioconda::gatk4=4.2.3.0 conda-forge::openjdk=8.0.312" : null) + conda (params.enable_conda ? "bioconda::gatk4=4.2.6.1 conda-forge::openjdk=8.0.312" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.2.3.0--hdfd78af_0' : - 'broadinstitute/gatk:4.2.3.0' }" + 'https://depot.galaxyproject.org/singularity/gatk4:4.2.6.1--hdfd78af_0': + 'quay.io/biocontainers/gatk4:4.2.6.1--hdfd78af_0' }" input: tuple val(meta), path(bam) From fe9e4ece00c03c1082e0c9f911cf1f2fdc065941 Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Thu, 9 Jun 2022 14:56:55 +0200 Subject: [PATCH 157/168] new module gatk4/reblockgvcf --- modules/gatk4/reblockgvcf/main.nf | 52 +++++++++++++ modules/gatk4/reblockgvcf/meta.yml | 74 +++++++++++++++++++ tests/config/pytest_modules.yml | 4 + tests/modules/gatk4/reblockgvcf/main.nf | 55 ++++++++++++++ .../modules/gatk4/reblockgvcf/nextflow.config | 5 ++ tests/modules/gatk4/reblockgvcf/test.yml | 32 ++++++++ 6 files changed, 222 insertions(+) create mode 100644 modules/gatk4/reblockgvcf/main.nf create mode 100644 modules/gatk4/reblockgvcf/meta.yml create mode 100644 tests/modules/gatk4/reblockgvcf/main.nf create mode 100644 tests/modules/gatk4/reblockgvcf/nextflow.config create mode 100644 tests/modules/gatk4/reblockgvcf/test.yml diff --git a/modules/gatk4/reblockgvcf/main.nf b/modules/gatk4/reblockgvcf/main.nf new file mode 100644 index 00000000..d27ac222 --- /dev/null +++ b/modules/gatk4/reblockgvcf/main.nf @@ -0,0 +1,52 @@ +process GATK4_REBLOCKGVCF { + tag "$meta.id" + label 'process_low' + + conda (params.enable_conda ? "bioconda::gatk4=4.2.6.1" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gatk4:4.2.6.1--hdfd78af_0': + 'quay.io/biocontainers/gatk4:4.2.6.1--hdfd78af_0' }" + + input: + tuple val(meta), path(gvcf), path(tbi), path(intervals) + path fasta + path fai + path dict + path dbsnp + path dbsnp_tbi + + output: + tuple val(meta), path("*.reblock.g.vcf.gz"), path("*.tbi") , emit: bam + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def dbsnp_command = dbsnp ? "--dbsnp $dbsnp" : "" + def interval_command = intervals ? "--intervals $intervals" : "" + + def avail_mem = 3 + if (!task.memory) { + log.info '[GATK ReblockGVCF] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = task.memory.giga + } + """ + gatk --java-options "-Xmx${avail_mem}g" ReblockGVCF \\ + --variant $gvcf \\ + --output ${prefix}.reblock.g.vcf.gz \\ + --reference $fasta \\ + $dbsnp_command \\ + $interval_command \\ + --tmp-dir . \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/gatk4/reblockgvcf/meta.yml b/modules/gatk4/reblockgvcf/meta.yml new file mode 100644 index 00000000..407eeec2 --- /dev/null +++ b/modules/gatk4/reblockgvcf/meta.yml @@ -0,0 +1,74 @@ +name: "gatk4_reblockgvcf" +description: Condenses homRef blocks in a single-sample GVCF +keywords: + - gatk4 + - reblockgvcf + - gvcf +tools: + - gatk4: + description: | + Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s + doi: 10.1158/1538-7445.AM2017-3590 + licence: ["Apache-2.0"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - gvcf: + type: file + description: GVCF file created using HaplotypeCaller using the '-ERC GVCF' or '-ERC BP_RESOLUTION' mode + pattern: "*.{vcf,gvcf}.gz" + - tbi: + type: file + description: Index of the GVCF file + pattern: "*.tbi" + - intervals: + type: file + description: Bed file with the genomic regions included in the library (optional) + - fasta: + type: file + description: The reference fasta file + pattern: "*.fasta" + - fai: + type: file + description: Index of reference fasta file + pattern: "fasta.fai" + - dict: + type: file + description: GATK sequence dictionary + pattern: "*.dict" + - dbsnp: + type: file + description: VCF file containing known sites (optional) + - dbsnp_tbi: + type: file + description: VCF index of dbsnp (optional) + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - gvcf: + type: file + description: Filtered GVCF + pattern: "*reblock.g.vcf.gz" + - tbi: + type: file + description: Index of the filtered GVCF + pattern: "*reblock.g.vcf.gz.tbi" + +authors: + - "@nvnieuwk" diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index b3a5a24c..7325f02d 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -859,6 +859,10 @@ gatk4/mutect2: - modules/gatk4/mutect2/** - tests/modules/gatk4/mutect2/** +gatk4/reblockgvcf: + - modules/gatk4/reblockgvcf/** + - tests/modules/gatk4/reblockgvcf/** + gatk4/revertsam: - modules/gatk4/revertsam/** - tests/modules/gatk4/revertsam/** diff --git a/tests/modules/gatk4/reblockgvcf/main.nf b/tests/modules/gatk4/reblockgvcf/main.nf new file mode 100644 index 00000000..2233a5c5 --- /dev/null +++ b/tests/modules/gatk4/reblockgvcf/main.nf @@ -0,0 +1,55 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { GATK4_REBLOCKGVCF } from '../../../../modules/gatk4/reblockgvcf/main.nf' + +workflow test_gatk4_reblockgvcf { + + input = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_vcf_gz_tbi'], checkIfExists: true), + [] + ] + + fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + fasta_index = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true) + dict = file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true) + + GATK4_REBLOCKGVCF ( input, fasta, fasta_index, dict, [], [] ) +} + +workflow test_gatk4_reblockgvcf_intervals { + + input = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_vcf_gz_tbi'], checkIfExists: true), + file(params.test_data['sarscov2']['genome']['test_bed'], checkIfExists: true) + ] + + fasta = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + fasta_index = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true) + dict = file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true) + + GATK4_REBLOCKGVCF ( input, fasta, fasta_index, dict, [], [] ) +} + +workflow test_gatk4_reblockgvcf_dbsnp { + + input = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['homo_sapiens']['illumina']['test_haplotc_cnn_vcf_gz'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_haplotc_cnn_vcf_gz_tbi'], checkIfExists: true), + [] + ] + + fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + fasta_index = file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) + dict = file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true) + dbsnp = file(params.test_data['homo_sapiens']['genome']['dbsnp_146_hg38_vcf_gz'], checkIfExists: true) + dbsnp_tbi = file(params.test_data['homo_sapiens']['genome']['dbsnp_146_hg38_vcf_gz_tbi'], checkIfExists: true) + + GATK4_REBLOCKGVCF ( input, fasta, fasta_index, dict, dbsnp, dbsnp_tbi ) +} \ No newline at end of file diff --git a/tests/modules/gatk4/reblockgvcf/nextflow.config b/tests/modules/gatk4/reblockgvcf/nextflow.config new file mode 100644 index 00000000..50f50a7a --- /dev/null +++ b/tests/modules/gatk4/reblockgvcf/nextflow.config @@ -0,0 +1,5 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + +} \ No newline at end of file diff --git a/tests/modules/gatk4/reblockgvcf/test.yml b/tests/modules/gatk4/reblockgvcf/test.yml new file mode 100644 index 00000000..e5461341 --- /dev/null +++ b/tests/modules/gatk4/reblockgvcf/test.yml @@ -0,0 +1,32 @@ +- name: gatk4 reblockgvcf test_gatk4_reblockgvcf + command: nextflow run ./tests/modules/gatk4/reblockgvcf -entry test_gatk4_reblockgvcf -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/reblockgvcf/nextflow.config + tags: + - gatk4/reblockgvcf + - gatk4 + files: + - path: output/gatk4/test.reblock.g.vcf.gz + contains: '[ # TODO nf-core: file md5sum was variable, please replace this text with a string found in the file instead ]' + - path: output/gatk4/test.reblock.g.vcf.gz.tbi + md5sum: e1aab7d826a151828fd0671ec5aed2e0 + +- name: gatk4 reblockgvcf test_gatk4_reblockgvcf_intervals + command: nextflow run ./tests/modules/gatk4/reblockgvcf -entry test_gatk4_reblockgvcf_intervals -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/reblockgvcf/nextflow.config + tags: + - gatk4/reblockgvcf + - gatk4 + files: + - path: output/gatk4/test.reblock.g.vcf.gz + contains: '[ # TODO nf-core: file md5sum was variable, please replace this text with a string found in the file instead ]' + - path: output/gatk4/test.reblock.g.vcf.gz.tbi + md5sum: e7ca7e9fe76ce12198fd54ec9a64fad4 + +- name: gatk4 reblockgvcf test_gatk4_reblockgvcf_dbsnp + command: nextflow run ./tests/modules/gatk4/reblockgvcf -entry test_gatk4_reblockgvcf_dbsnp -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/reblockgvcf/nextflow.config + tags: + - gatk4/reblockgvcf + - gatk4 + files: + - path: output/gatk4/test.reblock.g.vcf.gz + contains: '[ # TODO nf-core: file md5sum was variable, please replace this text with a string found in the file instead ]' + - path: output/gatk4/test.reblock.g.vcf.gz.tbi + md5sum: 017edea27a253eb51cc4505d00dcb295 From 14d38e43cf00c42a09a9b6e80914daa0ef135de1 Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Thu, 9 Jun 2022 14:58:20 +0200 Subject: [PATCH 158/168] fix test.yml --- tests/modules/gatk4/reblockgvcf/test.yml | 3 --- 1 file changed, 3 deletions(-) diff --git a/tests/modules/gatk4/reblockgvcf/test.yml b/tests/modules/gatk4/reblockgvcf/test.yml index e5461341..97f1515f 100644 --- a/tests/modules/gatk4/reblockgvcf/test.yml +++ b/tests/modules/gatk4/reblockgvcf/test.yml @@ -5,7 +5,6 @@ - gatk4 files: - path: output/gatk4/test.reblock.g.vcf.gz - contains: '[ # TODO nf-core: file md5sum was variable, please replace this text with a string found in the file instead ]' - path: output/gatk4/test.reblock.g.vcf.gz.tbi md5sum: e1aab7d826a151828fd0671ec5aed2e0 @@ -16,7 +15,6 @@ - gatk4 files: - path: output/gatk4/test.reblock.g.vcf.gz - contains: '[ # TODO nf-core: file md5sum was variable, please replace this text with a string found in the file instead ]' - path: output/gatk4/test.reblock.g.vcf.gz.tbi md5sum: e7ca7e9fe76ce12198fd54ec9a64fad4 @@ -27,6 +25,5 @@ - gatk4 files: - path: output/gatk4/test.reblock.g.vcf.gz - contains: '[ # TODO nf-core: file md5sum was variable, please replace this text with a string found in the file instead ]' - path: output/gatk4/test.reblock.g.vcf.gz.tbi md5sum: 017edea27a253eb51cc4505d00dcb295 From b9e397ff15a98c8b70099579f7645a32c467d65b Mon Sep 17 00:00:00 2001 From: Rike Date: Thu, 9 Jun 2022 15:06:46 +0200 Subject: [PATCH 159/168] use broadinstitute container --- modules/gatk4/markduplicatesspark/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/gatk4/markduplicatesspark/main.nf b/modules/gatk4/markduplicatesspark/main.nf index 0bafe7ce..9f77821e 100644 --- a/modules/gatk4/markduplicatesspark/main.nf +++ b/modules/gatk4/markduplicatesspark/main.nf @@ -5,7 +5,7 @@ process GATK4_MARKDUPLICATES_SPARK { conda (params.enable_conda ? "bioconda::gatk4=4.2.6.1 conda-forge::openjdk=8.0.312" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/gatk4:4.2.6.1--hdfd78af_0': - 'quay.io/biocontainers/gatk4:4.2.6.1--hdfd78af_0' }" + 'broadinstitute/gatk4:4.2.6.1' }" input: tuple val(meta), path(bam) From 81fff7384d8342439cf4fc4cce51433d979c97a2 Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Thu, 9 Jun 2022 15:08:02 +0200 Subject: [PATCH 160/168] small fix and typo --- modules/gatk4/reblockgvcf/main.nf | 6 +++--- modules/gatk4/reblockgvcf/meta.yml | 4 ++-- tests/modules/gatk4/reblockgvcf/test.yml | 3 --- 3 files changed, 5 insertions(+), 8 deletions(-) diff --git a/modules/gatk4/reblockgvcf/main.nf b/modules/gatk4/reblockgvcf/main.nf index d27ac222..bbd8e2ae 100644 --- a/modules/gatk4/reblockgvcf/main.nf +++ b/modules/gatk4/reblockgvcf/main.nf @@ -16,8 +16,8 @@ process GATK4_REBLOCKGVCF { path dbsnp_tbi output: - tuple val(meta), path("*.reblock.g.vcf.gz"), path("*.tbi") , emit: bam - path "versions.yml" , emit: versions + tuple val(meta), path("*.rb.g.vcf.gz"), path("*.tbi") , emit: bam + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -37,7 +37,7 @@ process GATK4_REBLOCKGVCF { """ gatk --java-options "-Xmx${avail_mem}g" ReblockGVCF \\ --variant $gvcf \\ - --output ${prefix}.reblock.g.vcf.gz \\ + --output ${prefix}.rb.g.vcf.gz \\ --reference $fasta \\ $dbsnp_command \\ $interval_command \\ diff --git a/modules/gatk4/reblockgvcf/meta.yml b/modules/gatk4/reblockgvcf/meta.yml index 407eeec2..23518416 100644 --- a/modules/gatk4/reblockgvcf/meta.yml +++ b/modules/gatk4/reblockgvcf/meta.yml @@ -64,11 +64,11 @@ output: - gvcf: type: file description: Filtered GVCF - pattern: "*reblock.g.vcf.gz" + pattern: "*rb.g.vcf.gz" - tbi: type: file description: Index of the filtered GVCF - pattern: "*reblock.g.vcf.gz.tbi" + pattern: "*rb.g.vcf.gz.tbi" authors: - "@nvnieuwk" diff --git a/tests/modules/gatk4/reblockgvcf/test.yml b/tests/modules/gatk4/reblockgvcf/test.yml index 97f1515f..9cef81ae 100644 --- a/tests/modules/gatk4/reblockgvcf/test.yml +++ b/tests/modules/gatk4/reblockgvcf/test.yml @@ -6,7 +6,6 @@ files: - path: output/gatk4/test.reblock.g.vcf.gz - path: output/gatk4/test.reblock.g.vcf.gz.tbi - md5sum: e1aab7d826a151828fd0671ec5aed2e0 - name: gatk4 reblockgvcf test_gatk4_reblockgvcf_intervals command: nextflow run ./tests/modules/gatk4/reblockgvcf -entry test_gatk4_reblockgvcf_intervals -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/reblockgvcf/nextflow.config @@ -16,7 +15,6 @@ files: - path: output/gatk4/test.reblock.g.vcf.gz - path: output/gatk4/test.reblock.g.vcf.gz.tbi - md5sum: e7ca7e9fe76ce12198fd54ec9a64fad4 - name: gatk4 reblockgvcf test_gatk4_reblockgvcf_dbsnp command: nextflow run ./tests/modules/gatk4/reblockgvcf -entry test_gatk4_reblockgvcf_dbsnp -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/reblockgvcf/nextflow.config @@ -26,4 +24,3 @@ files: - path: output/gatk4/test.reblock.g.vcf.gz - path: output/gatk4/test.reblock.g.vcf.gz.tbi - md5sum: 017edea27a253eb51cc4505d00dcb295 From 89a6a03e38b8b5869a2e1e5abfe4b4cc4938c622 Mon Sep 17 00:00:00 2001 From: Rike Date: Thu, 9 Jun 2022 15:09:19 +0200 Subject: [PATCH 161/168] fix container tag --- modules/gatk4/markduplicatesspark/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/gatk4/markduplicatesspark/main.nf b/modules/gatk4/markduplicatesspark/main.nf index 9f77821e..52faf5bc 100644 --- a/modules/gatk4/markduplicatesspark/main.nf +++ b/modules/gatk4/markduplicatesspark/main.nf @@ -5,7 +5,7 @@ process GATK4_MARKDUPLICATES_SPARK { conda (params.enable_conda ? "bioconda::gatk4=4.2.6.1 conda-forge::openjdk=8.0.312" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/gatk4:4.2.6.1--hdfd78af_0': - 'broadinstitute/gatk4:4.2.6.1' }" + 'broadinstitute/gatk:4.2.6.1' }" input: tuple val(meta), path(bam) From 873215c8ae3882e3ce1c8c62fbae16e74d631270 Mon Sep 17 00:00:00 2001 From: nvnieuwk <101190534+nvnieuwk@users.noreply.github.com> Date: Thu, 9 Jun 2022 15:11:18 +0200 Subject: [PATCH 162/168] Update modules/gatk4/reblockgvcf/main.nf Co-authored-by: Maxime U. Garcia --- modules/gatk4/reblockgvcf/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/gatk4/reblockgvcf/main.nf b/modules/gatk4/reblockgvcf/main.nf index bbd8e2ae..5640e8ae 100644 --- a/modules/gatk4/reblockgvcf/main.nf +++ b/modules/gatk4/reblockgvcf/main.nf @@ -16,7 +16,7 @@ process GATK4_REBLOCKGVCF { path dbsnp_tbi output: - tuple val(meta), path("*.rb.g.vcf.gz"), path("*.tbi") , emit: bam + tuple val(meta), path("*.rb.g.vcf.gz"), path("*.tbi") , emit: vcf path "versions.yml" , emit: versions when: From c50df9ad04900067adf6ddfc855a485633813bc6 Mon Sep 17 00:00:00 2001 From: Nicolas Vannieuwkerke Date: Thu, 9 Jun 2022 15:14:37 +0200 Subject: [PATCH 163/168] fix test.yml --- tests/modules/gatk4/reblockgvcf/test.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/modules/gatk4/reblockgvcf/test.yml b/tests/modules/gatk4/reblockgvcf/test.yml index 9cef81ae..623f58a6 100644 --- a/tests/modules/gatk4/reblockgvcf/test.yml +++ b/tests/modules/gatk4/reblockgvcf/test.yml @@ -4,8 +4,8 @@ - gatk4/reblockgvcf - gatk4 files: - - path: output/gatk4/test.reblock.g.vcf.gz - - path: output/gatk4/test.reblock.g.vcf.gz.tbi + - path: output/gatk4/test.rb.g.vcf.gz + - path: output/gatk4/test.rb.g.vcf.gz.tbi - name: gatk4 reblockgvcf test_gatk4_reblockgvcf_intervals command: nextflow run ./tests/modules/gatk4/reblockgvcf -entry test_gatk4_reblockgvcf_intervals -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/reblockgvcf/nextflow.config @@ -13,8 +13,8 @@ - gatk4/reblockgvcf - gatk4 files: - - path: output/gatk4/test.reblock.g.vcf.gz - - path: output/gatk4/test.reblock.g.vcf.gz.tbi + - path: output/gatk4/test.rb.g.vcf.gz + - path: output/gatk4/test.rb.g.vcf.gz.tbi - name: gatk4 reblockgvcf test_gatk4_reblockgvcf_dbsnp command: nextflow run ./tests/modules/gatk4/reblockgvcf -entry test_gatk4_reblockgvcf_dbsnp -c ./tests/config/nextflow.config -c ./tests/modules/gatk4/reblockgvcf/nextflow.config @@ -22,5 +22,5 @@ - gatk4/reblockgvcf - gatk4 files: - - path: output/gatk4/test.reblock.g.vcf.gz - - path: output/gatk4/test.reblock.g.vcf.gz.tbi + - path: output/gatk4/test.rb.g.vcf.gz + - path: output/gatk4/test.rb.g.vcf.gz.tbi From 8e8f4c9c51a7b229dd45e9b287d48115c238baf3 Mon Sep 17 00:00:00 2001 From: Rike Date: Thu, 9 Jun 2022 15:16:11 +0200 Subject: [PATCH 164/168] remove singulairty container --- modules/gatk4/markduplicatesspark/main.nf | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/modules/gatk4/markduplicatesspark/main.nf b/modules/gatk4/markduplicatesspark/main.nf index 52faf5bc..b8c315fc 100644 --- a/modules/gatk4/markduplicatesspark/main.nf +++ b/modules/gatk4/markduplicatesspark/main.nf @@ -3,9 +3,7 @@ process GATK4_MARKDUPLICATES_SPARK { label 'process_high' conda (params.enable_conda ? "bioconda::gatk4=4.2.6.1 conda-forge::openjdk=8.0.312" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.2.6.1--hdfd78af_0': - 'broadinstitute/gatk:4.2.6.1' }" + container 'broadinstitute/gatk:4.2.6.1' input: tuple val(meta), path(bam) From 6a4732ef3b76b54d75533e6be9ba57e3008d4853 Mon Sep 17 00:00:00 2001 From: Rike Date: Thu, 9 Jun 2022 15:29:00 +0200 Subject: [PATCH 165/168] similar issues arise with bqsrspark, also use broadinstitute container --- modules/gatk4/applybqsrspark/main.nf | 6 ++---- modules/gatk4/baserecalibratorspark/main.nf | 6 ++---- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/modules/gatk4/applybqsrspark/main.nf b/modules/gatk4/applybqsrspark/main.nf index 9d7891ba..5890278a 100644 --- a/modules/gatk4/applybqsrspark/main.nf +++ b/modules/gatk4/applybqsrspark/main.nf @@ -2,10 +2,8 @@ process GATK4_APPLYBQSR_SPARK { tag "$meta.id" label 'process_low' - conda (params.enable_conda ? "bioconda::gatk4=4.2.6.1" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.2.6.1--hdfd78af_0': - 'quay.io/biocontainers/gatk4:4.2.6.1--hdfd78af_0' }" + conda (params.enable_conda ? "bioconda::gatk4=4.2.6.1 conda-forge::openjdk=8.0.312" : null) + container 'broadinstitute/gatk:4.2.6.1' input: tuple val(meta), path(input), path(input_index), path(bqsr_table), path(intervals) diff --git a/modules/gatk4/baserecalibratorspark/main.nf b/modules/gatk4/baserecalibratorspark/main.nf index 79988b72..755122cb 100644 --- a/modules/gatk4/baserecalibratorspark/main.nf +++ b/modules/gatk4/baserecalibratorspark/main.nf @@ -2,10 +2,8 @@ process GATK4_BASERECALIBRATOR_SPARK { tag "$meta.id" label 'process_low' - conda (params.enable_conda ? "bioconda::gatk4=4.2.6.1" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.2.6.1--hdfd78af_0': - 'quay.io/biocontainers/gatk4:4.2.6.1--hdfd78af_0' }" + conda (params.enable_conda ? "bioconda::gatk4=4.2.6.1 conda-forge::openjdk=8.0.312" : null) + container 'broadinstitute/gatk:4.2.6.1' input: tuple val(meta), path(input), path(input_index), path(intervals) From 95633c6853c3a33a7cf65720381874582cd3b7c8 Mon Sep 17 00:00:00 2001 From: Rike Date: Thu, 9 Jun 2022 15:30:47 +0200 Subject: [PATCH 166/168] update checksum --- tests/modules/gatk4/markduplicatesspark/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/modules/gatk4/markduplicatesspark/test.yml b/tests/modules/gatk4/markduplicatesspark/test.yml index 4ef9b916..1ef7f543 100644 --- a/tests/modules/gatk4/markduplicatesspark/test.yml +++ b/tests/modules/gatk4/markduplicatesspark/test.yml @@ -5,7 +5,7 @@ - gatk4/markduplicatesspark files: - path: output/gatk4/test.bam - md5sum: dc1a09ac6371aab7c50d1a554baa06d3 + md5sum: 4b5abb065d498bece619f9bd4cc929e1 - path: output/gatk4/versions.yml - name: gatk4 markduplicatesspark test_gatk4_markduplicates_spark_multiple_bams From a5583143c46e2b7b56b513e61141a36b5c0e29d3 Mon Sep 17 00:00:00 2001 From: Rike Date: Thu, 9 Jun 2022 15:38:28 +0200 Subject: [PATCH 167/168] update checksum --- tests/modules/gatk4/applybqsrspark/test.yml | 2 +- tests/modules/gatk4/markduplicatesspark/test.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/modules/gatk4/applybqsrspark/test.yml b/tests/modules/gatk4/applybqsrspark/test.yml index 1e4f8b47..5a1c3df6 100644 --- a/tests/modules/gatk4/applybqsrspark/test.yml +++ b/tests/modules/gatk4/applybqsrspark/test.yml @@ -15,7 +15,7 @@ - gatk4/applybqsrspark files: - path: output/gatk4/test.bam - md5sum: 2ca2446f0125890280056fd7da822732 + md5sum: db27196dbd626edb0509aac4589dedcf - path: output/gatk4/versions.yml - name: gatk4 applybqsr test_gatk4_applybqsr_spark_cram diff --git a/tests/modules/gatk4/markduplicatesspark/test.yml b/tests/modules/gatk4/markduplicatesspark/test.yml index 1ef7f543..4ef9b916 100644 --- a/tests/modules/gatk4/markduplicatesspark/test.yml +++ b/tests/modules/gatk4/markduplicatesspark/test.yml @@ -5,7 +5,7 @@ - gatk4/markduplicatesspark files: - path: output/gatk4/test.bam - md5sum: 4b5abb065d498bece619f9bd4cc929e1 + md5sum: dc1a09ac6371aab7c50d1a554baa06d3 - path: output/gatk4/versions.yml - name: gatk4 markduplicatesspark test_gatk4_markduplicates_spark_multiple_bams From 30262d1fdd032df12c2dedca55099d8eaaf7b136 Mon Sep 17 00:00:00 2001 From: Rike Date: Thu, 9 Jun 2022 15:44:12 +0200 Subject: [PATCH 168/168] remove variable checksum --- tests/modules/gatk4/applybqsrspark/test.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/modules/gatk4/applybqsrspark/test.yml b/tests/modules/gatk4/applybqsrspark/test.yml index 5a1c3df6..b7b8c72d 100644 --- a/tests/modules/gatk4/applybqsrspark/test.yml +++ b/tests/modules/gatk4/applybqsrspark/test.yml @@ -15,7 +15,6 @@ - gatk4/applybqsrspark files: - path: output/gatk4/test.bam - md5sum: db27196dbd626edb0509aac4589dedcf - path: output/gatk4/versions.yml - name: gatk4 applybqsr test_gatk4_applybqsr_spark_cram